From ce9bba3cb5409844f8f3d7dcc235a9ea30cad090 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 20 Dec 2013 17:40:40 -0800 Subject: Sync from Google-internal development. --- upb/pb/decoder.h | 461 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 403 insertions(+), 58 deletions(-) (limited to 'upb/pb/decoder.h') diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h index c645688..4529324 100644 --- a/upb/pb/decoder.h +++ b/upb/pb/decoder.h @@ -1,102 +1,447 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2009-2010 Google Inc. See LICENSE for details. + * Copyright (c) 2009-2013 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * upb::Decoder implements a high performance, streaming decoder for protobuf - * data that works by parsing input data one buffer at a time and calling into - * a upb::Handlers. + * upb::pb::Decoder implements a high performance, streaming, resumable decoder + * for the binary protobuf format. */ #ifndef UPB_DECODER_H_ #define UPB_DECODER_H_ +#include "upb/table.int.h" #include "upb/sink.h" +#ifdef __cplusplus +namespace upb { +namespace pb { +class CodeCache; +class Decoder; +class DecoderMethod; +} // namespace pb +} // namespace upb + +typedef upb::pb::CodeCache upb_pbcodecache; +typedef upb::pb::Decoder upb_pbdecoder; +typedef upb::pb::DecoderMethod upb_pbdecodermethod; +#else +struct upb_pbdecoder; +struct upb_pbdecodermethod; +struct upb_pbcodecache; + +typedef struct upb_pbdecoder upb_pbdecoder; +typedef struct upb_pbdecodermethod upb_pbdecodermethod; +typedef struct upb_pbcodecache upb_pbcodecache; +#endif + // The maximum that any submessages can be nested. Matches proto2's limit. -// At the moment this specifies the size of several statically-sized arrays -// and therefore setting it high will cause more memory to be used. Will -// be replaced by a runtime-configurable limit and dynamically-resizing arrays. -// TODO: make this a runtime-settable property of Decoder. +// This specifies the size of the decoder's statically-sized array and therefore +// setting it high will cause the upb::pb::Decoder object to be larger. +// +// If necessary we can add a runtime-settable property to Decoder that allow +// this to be larger than the compile-time setting, but this would add +// complexity, particularly since we would have to decide how/if to give users +// the ability to set a custom memory allocation function. #define UPB_DECODER_MAX_NESTING 64 +// Internal-only struct used by the decoder. +typedef struct { #ifdef __cplusplus -namespace upb { -namespace pb { + private: +#endif + // Space optimization note: we store two pointers here that the JIT + // doesn't need at all; the upb_handlers* inside the sink and + // the dispatch table pointer. We can optimze so that the JIT uses + // smaller stack frames than the interpreter. The only thing we need + // to guarantee is that the fallback routines can find end_ofs. -// Frame type that encapsulates decoder state. -class Decoder; +#ifdef __cplusplus + char sink[sizeof(upb_sink)]; +#else + upb_sink sink; +#endif + // The absolute stream offset of the end-of-frame delimiter. + // Non-delimited frames (groups and non-packed repeated fields) reuse the + // delimiter of their parent, even though the frame may not end there. + // + // NOTE: the JIT stores a slightly different value here for non-top frames. + // It stores the value relative to the end of the enclosed message. But the + // top frame is still stored the same way, which is important for ensuring + // that calls from the JIT into C work correctly. + uint64_t end_ofs; + const uint32_t *base; + uint32_t groupnum; + upb_inttable *dispatch; // Not used by the JIT. +} upb_pbdecoder_frame; -// Resets the sink of the Decoder. This must be called at least once before -// the decoder can be used. It may only be called with the decoder is in a -// state where it was just created or reset. The given sink must be from the -// same pipeline as this decoder. -inline bool ResetDecoderSink(Decoder* d, Sink* sink); +#ifdef __cplusplus -// Gets the handlers suitable for parsing protobuf data according to the given -// destination handlers. The protobuf schema to parse is taken from dest. -inline const upb::Handlers *GetDecoderHandlers(const upb::Handlers *dest, - bool allowjit, - const void *owner); +// Represents the code to parse a protobuf according to a specific schema, +// optionally bound to a set of destination handlers. +class upb::pb::DecoderMethod /* : public upb::RefCounted */ { + public: + // From upb::ReferenceCounted. + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void* from, const void* to) const; + void CheckRef(const void* owner) const; -// Returns true if these handlers represent a upb::pb::Decoder. -bool IsDecoder(const upb::Handlers *h); + // The schema that this method parses. Never NULL. + const MessageDef* schema() const; -// Returns true if IsDecoder(h) and the given handlers have JIT code. -inline bool HasJitCode(const upb::Handlers* h); + // The destination handlers that are statically bound to this method. + // This method is only capable of outputting to a sink that uses these + // handlers. + // + // Will be NULL if this method is not statically bound. + const Handlers* dest_handlers() const; -// Returns the destination handlers if IsDecoder(h), otherwise returns NULL. -const upb::Handlers* GetDestHandlers(const upb::Handlers* h); + // The input handlers for this decoder method. + const BytesHandler* input_handler() const; -} // namespace pb -} // namespace upb + // Whether this method is native. + bool is_native() const; -typedef upb::pb::Decoder upb_pbdecoder; + // Convenience method for generating a DecoderMethod without explicitly + // creating a CodeCache. + static reffed_ptr NewForDestHandlers( + const upb::Handlers *dest); -extern "C" { + private: + UPB_DISALLOW_POD_OPS(DecoderMethod, upb::pb::DecoderMethod); #else -struct upb_pbdecoder; -typedef struct upb_pbdecoder upb_pbdecoder; +struct upb_pbdecodermethod { +#endif + upb_refcounted base; + + // While compiling, the base is relative in "ofs", after compiling it is + // absolute in "ptr". + union { + uint32_t ofs; // PC offset of method. + void *ptr; // Pointer to bytecode or machine code for this method. + } code_base; + + // The decoder method group to which this method belongs. We own a ref. + // Owning a ref on the entire group is more coarse-grained than is strictly + // necessary; all we truly require is that methods we directly reference + // outlive us, while the group could contain many other messages we don't + // require. But the group represents the messages that were + // allocated+compiled together, so it makes the most sense to free them + // together also. + const upb_refcounted *group; + + // Whether this method is native code or bytecode. + bool is_native_; + + // The handler one calls to invoke this method. + upb_byteshandler input_handler_; + + // The message type that this method is parsing. + const upb_msgdef *schema_; + + // The destination handlers this method is bound to, or NULL if this method + // can be bound to a destination handlers instance at runtime. + // + // If non-NULL, we own a ref. + const upb_handlers *dest_handlers_; + + // The dispatch table layout is: + // [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ] + // + // If wt1 matches, jump to the 48-bit offset. If wt2 matches, lookup + // (UPB_MAX_FIELDNUMBER + fieldnum) and jump there. + // + // We need two wire types because of packed/non-packed compatibility. A + // primitive repeated field can use either wire type and be valid. While we + // could key the table on fieldnum+wiretype, the table would be 8x sparser. + // + // Storing two wire types in the primary value allows us to quickly rule out + // the second wire type without needing to do a separate lookup (this case is + // less common than an unknown field). + upb_inttable dispatch; +}; + +#ifdef __cplusplus + +// A Decoder receives binary protobuf data on its input sink and pushes the +// decoded data to its output sink. +class upb::pb::Decoder { + public: + // Constructs a decoder instance for the given method, which must outlive this + // decoder. Any errors during parsing will be set on the given status, which + // must also outlive this decoder. + Decoder(const DecoderMethod* method, Status* status); + ~Decoder(); + + // Returns the DecoderMethod this decoder is parsing from. + // TODO(haberman): Do users need to be able to rebind this? + const DecoderMethod* method() const; + + // Resets the state of the decoder. + void Reset(); + + // Resets the output sink of the Decoder. + // The given sink must match method()->schema() as well as + // method()->dest_handlers() if the latter is non-NULL. + // + // This must be called at least once before the decoder can be used. It may + // only be called with the decoder is in a state where it was just created or + // reset with pipeline.Reset(). The given sink must be from the same pipeline + // as this decoder. + bool ResetOutput(Sink* sink); + + // The sink on which this decoder receives input. + BytesSink* input(); + + private: + UPB_DISALLOW_COPY_AND_ASSIGN(Decoder); +#else +struct upb_pbdecoder { +#endif + // Our input sink. + upb_bytessink input_; + + // The decoder method we are parsing with (owned). + const upb_pbdecodermethod *method_; + + size_t call_len; + const uint32_t *pc, *last; + + // Current input buffer and its stream offset. + const char *buf, *ptr, *end, *checkpoint; + + // End of the delimited region, relative to ptr, or NULL if not in this buf. + const char *delim_end; + + // End of the delimited region, relative to ptr, or end if not in this buf. + const char *data_end; + + // Overall stream offset of "buf." + uint64_t bufstart_ofs; + + // How many bytes past the end of the user buffer we want to skip. + size_t skip; + + // Buffer for residual bytes not parsed from the previous buffer. + // The maximum number of residual bytes we require is 12; a five-byte + // unknown tag plus an eight-byte value, less one because the value + // is only a partial value. + char residual[12]; + char *residual_end; + + // Stores the user buffer passed to our decode function. + const char *buf_param; + size_t size_param; + +#ifdef UPB_USE_JIT_X64 + // Used momentarily by the generated code to store a value while a user + // function is called. + uint32_t tmp_len; + + const void *saved_rsp; +#endif + + upb_status *status; + + // Our internal stack. + upb_pbdecoder_frame *top, *limit; + upb_pbdecoder_frame stack[UPB_DECODER_MAX_NESTING]; +#ifdef UPB_USE_JIT_X64 + // Each native stack frame needs two pointers, plus we need a few frames for + // the enter/exit trampolines. + const uint32_t *callstack[(UPB_DECODER_MAX_NESTING * 2) + 10]; +#else + const uint32_t *callstack[UPB_DECODER_MAX_NESTING]; +#endif +}; + +#ifdef __cplusplus + +// A class for caching protobuf processing code, whether bytecode for the +// interpreted decoder or machine code for the JIT. +// +// This class is not thread-safe. +class upb::pb::CodeCache { + public: + CodeCache(); + ~CodeCache(); + + // Whether the cache is allowed to generate machine code. Defaults to true. + // There is no real reason to turn it off except for testing or if you are + // having a specific problem with the JIT. + // + // Note that allow_jit = true does not *guarantee* that the code will be JIT + // compiled. If this platform is not supported or the JIT was not compiled + // in, the code may still be interpreted. + bool allow_jit() const; + + // This may only be called when the object is first constructed, and prior to + // any code generation, otherwise returns false and does nothing. + bool set_allow_jit(bool allow); + + // Returns a DecoderMethod that can push data to the given handlers. + // If a suitable method already exists, it will be returned from the cache. + // + // Specifying the destination handlers here allows the DecoderMethod to be + // statically bound to the destination handlers if possible, which can allow + // more efficient decoding. However the returned method may or may not + // actually be statically bound. But in all cases, the returned method can + // push data to the given handlers. + const DecoderMethod *GetDecoderMethodForDestHandlers( + const upb::Handlers *handlers); + + // If/when someone needs to explicitly create a dynamically-bound + // DecoderMethod*, we can add a method to get it here. + + private: + UPB_DISALLOW_COPY_AND_ASSIGN(CodeCache); +#else +struct upb_pbcodecache { #endif + bool allow_jit_; + + // Array of mgroups. + upb_inttable groups; +}; + + +#ifdef __cplusplus +extern "C" { +#endif + +void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *method, + upb_status *status); +void upb_pbdecoder_uninit(upb_pbdecoder *d); +void upb_pbdecoder_reset(upb_pbdecoder *d); +const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d); +bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink *sink); +upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d); -// C API. -const upb_frametype *upb_pbdecoder_getframetype(); -bool upb_pbdecoder_resetsink(upb_pbdecoder *d, upb_sink *sink); -const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest, - bool allowjit, - const void *owner); -bool upb_pbdecoder_isdecoder(const upb_handlers *h); -bool upb_pbdecoder_hasjitcode(const upb_handlers *h); -const upb_handlers *upb_pbdecoder_getdesthandlers(const upb_handlers *h); +void upb_pbdecodermethod_ref(const upb_pbdecodermethod *m, const void *owner); +void upb_pbdecodermethod_unref(const upb_pbdecodermethod *m, const void *owner); +void upb_pbdecodermethod_donateref(const upb_pbdecodermethod *m, + const void *from, const void *to); +void upb_pbdecodermethod_checkref(const upb_pbdecodermethod *m, + const void *owner); +const upb_msgdef *upb_pbdecodermethod_schema(const upb_pbdecodermethod *m); +const upb_handlers *upb_pbdecodermethod_desthandlers( + const upb_pbdecodermethod *m); +const upb_byteshandler *upb_pbdecodermethod_inputhandler( + const upb_pbdecodermethod *m); +bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m); +const upb_pbdecodermethod *upb_pbdecodermethod_newfordesthandlers( + const upb_handlers *dest, const void *owner); -// C++ implementation details. ///////////////////////////////////////////////// +void upb_pbcodecache_init(upb_pbcodecache *c); +void upb_pbcodecache_uninit(upb_pbcodecache *c); +bool upb_pbcodecache_allowjit(const upb_pbcodecache *c); +bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow); +const upb_pbdecodermethod *upb_pbcodecache_getdecodermethodfordesthandlers( + upb_pbcodecache *c, const upb_handlers *handlers); + +#ifdef __cplusplus +} /* extern "C" */ +#endif #ifdef __cplusplus -} // extern "C" namespace upb { +template<> +class Pointer { + public: + explicit Pointer(pb::DecoderMethod* ptr) : ptr_(ptr) {} + operator pb::DecoderMethod*() { return ptr_; } + operator RefCounted*() { return UPB_UPCAST(ptr_); } + private: + pb::DecoderMethod* ptr_; +}; + +template<> +class Pointer { + public: + explicit Pointer(const pb::DecoderMethod* ptr) : ptr_(ptr) {} + operator const pb::DecoderMethod*() { return ptr_; } + operator const RefCounted*() { return UPB_UPCAST(ptr_); } + private: + const pb::DecoderMethod* ptr_; +}; + namespace pb { -inline bool ResetDecoderSink(Decoder* r, Sink* sink) { - return upb_pbdecoder_resetsink(r, sink); + +inline Decoder::Decoder(const DecoderMethod* m, Status* s) { + upb_pbdecoder_init(this, m, s); +} +inline Decoder::~Decoder() { + upb_pbdecoder_uninit(this); +} +inline const DecoderMethod* Decoder::method() const { + return upb_pbdecoder_method(this); +} +inline void Decoder::Reset() { + upb_pbdecoder_reset(this); +} +inline bool Decoder::ResetOutput(Sink* sink) { + return upb_pbdecoder_resetoutput(this, sink); +} +inline BytesSink* Decoder::input() { + return upb_pbdecoder_input(this); +} + +inline void DecoderMethod::Ref(const void *owner) const { + upb_pbdecodermethod_ref(this, owner); } -inline const upb::Handlers* GetDecoderHandlers(const upb::Handlers* dest, - bool allowjit, - const void* owner) { - return upb_pbdecoder_gethandlers(dest, allowjit, owner); +inline void DecoderMethod::Unref(const void *owner) const { + upb_pbdecodermethod_unref(this, owner); } -inline bool IsDecoder(const upb::Handlers* h) { - return upb_pbdecoder_isdecoder(h); +inline void DecoderMethod::DonateRef(const void *from, const void *to) const { + upb_pbdecodermethod_donateref(this, from, to); } -inline bool HasJitCode(const upb::Handlers* h) { - return upb_pbdecoder_hasjitcode(h); +inline void DecoderMethod::CheckRef(const void *owner) const { + upb_pbdecodermethod_checkref(this, owner); } -inline const upb::Handlers* GetDestHandlers(const upb::Handlers* h) { - return upb_pbdecoder_getdesthandlers(h); +inline const MessageDef* DecoderMethod::schema() const { + return upb_pbdecodermethod_schema(this); } +inline const Handlers* DecoderMethod::dest_handlers() const { + return upb_pbdecodermethod_desthandlers(this); +} +inline const BytesHandler* DecoderMethod::input_handler() const { + return upb_pbdecodermethod_inputhandler(this); +} +inline bool DecoderMethod::is_native() const { + return upb_pbdecodermethod_isnative(this); +} +// static +inline reffed_ptr DecoderMethod::NewForDestHandlers( + const Handlers *dest) { + const upb_pbdecodermethod *m = + upb_pbdecodermethod_newfordesthandlers(dest, &m); + return reffed_ptr(m, &m); +} + +inline CodeCache::CodeCache() { + upb_pbcodecache_init(this); +} +inline CodeCache::~CodeCache() { + upb_pbcodecache_uninit(this); +} +inline bool CodeCache::allow_jit() const { + return upb_pbcodecache_allowjit(this); +} +inline bool CodeCache::set_allow_jit(bool allow) { + return upb_pbcodecache_setallowjit(this, allow); +} +inline const DecoderMethod* CodeCache::GetDecoderMethodForDestHandlers( + const upb::Handlers* handlers) { + return upb_pbcodecache_getdecodermethodfordesthandlers(this, handlers); +} + } // namespace pb } // namespace upb -#endif + +#endif // __cplusplus #endif /* UPB_DECODER_H_ */ -- cgit v1.2.3