/* * upb - a minimalist implementation of protocol buffers. * * Copyright (c) 2009-2010 Google Inc. See LICENSE for details. * Author: Josh Haberman * * upb_decoder implements a high performance, streaming decoder for protobuf * data that works by getting its input data from a upb_byteregion and calling * into a upb_handlers. */ #ifndef UPB_DECODER_H_ #define UPB_DECODER_H_ #include #include "upb/bytestream.h" #include "upb/sink.h" #ifdef __cplusplus extern "C" { #endif /* upb_decoderplan ************************************************************/ // A decoderplan contains whatever data structures and generated (JIT-ted) code // are necessary to decode protobuf data of a specific type to a specific set // of handlers. By generating the plan ahead of time, we avoid having to // redo this work every time we decode. // // A decoderplan is threadsafe, meaning that it can be used concurrently by // different upb_decoders in different threads. However, the upb_decoders are // *not* thread-safe. struct _upb_decoderplan; typedef struct _upb_decoderplan upb_decoderplan; // TODO(haberman): // - add support for letting any message in the plan be at the top level. // - make this object a handlers instead (when bytesrc/bytesink are merged // into handlers). // - add support for sharing code with previously-built plans/handlers. upb_decoderplan *upb_decoderplan_new(const upb_handlers *h, bool allowjit); void upb_decoderplan_unref(upb_decoderplan *p); // Returns true if the plan contains JIT-ted code. This may not be the same as // the "allowjit" parameter to the constructor if support for JIT-ting was not // compiled in. bool upb_decoderplan_hasjitcode(upb_decoderplan *p); /* upb_decoder ****************************************************************/ struct dasm_State; typedef struct { const upb_fielddef *f; uint64_t end_ofs; uint32_t group_fieldnum; // UINT32_MAX for non-groups. bool is_sequence; // frame represents seq or submsg? (f might be both). bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX // (strings aren't pushed). } upb_decoder_frame; typedef struct _upb_decoder { upb_decoderplan *plan; upb_byteregion *input; // Input data (serialized), not owned. upb_status status; // Where we store errors that occur. // Where we push parsed data. // TODO(haberman): make this a pointer and make upb_decoder_resetinput() take // one of these instead of a void*. upb_sink sink; // Our internal stack. upb_decoder_frame *top, *limit; upb_decoder_frame stack[UPB_MAX_NESTING]; // Current input buffer and its stream offset. const char *buf, *ptr, *end; uint64_t bufstart_ofs; // End of the delimited region, relative to ptr, or NULL if not in this buf. const char *delim_end; // True if the top stack frame represents a packed field. bool top_is_packed; #ifdef UPB_USE_JIT_X64 // For JIT, which doesn't do bounds checks in the middle of parsing a field. const char *jit_end, *effective_end; // == MIN(jit_end, delim_end) // Used momentarily by the generated code to store a value while a user // function is called. uint32_t tmp_len; #endif // For exiting the decoder on error. jmp_buf exitjmp; } upb_decoder; void upb_decoder_init(upb_decoder *d); void upb_decoder_uninit(upb_decoder *d); // Resets the plan that the decoder will parse from. "msg_offset" indicates // which message from the plan will be used as the top-level message. // // This will also reset the decoder's input to be uninitialized -- // upb_decoder_resetinput() must be called before parsing can occur. The plan // must live until the decoder is destroyed or reset to a different plan. // // Must be called before upb_decoder_resetinput() or upb_decoder_decode(). void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p); // Resets the input of an already-allocated decoder. This puts it in a state // where it has not seen any data, and expects the next data to be from the // beginning of a new protobuf. Decoders must have their input reset before // they can be used. A decoder can have its input reset multiple times. // "input" must live until the decoder is destroyed or has it input reset // again. "c" is the closure that will be passed to the handlers. // // Must be called before upb_decoder_decode(). void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input, void *c); // Decodes serialized data (calling handlers as the data is parsed), returning // the success of the operation (call upb_decoder_status() for details). upb_success_t upb_decoder_decode(upb_decoder *d); INLINE const upb_status *upb_decoder_status(upb_decoder *d) { return &d->status; } // Implementation details struct _upb_decoderplan { // The top-level handlers that this plan calls into. We own a ref. const upb_handlers *handlers; #ifdef UPB_USE_JIT_X64 // JIT-generated machine code (else NULL). char *jit_code; size_t jit_size; char *debug_info; // For storing upb_jitmsginfo, which contains per-msg runtime data needed // by the JIT. // Maps upb_handlers* -> upb_jitmsginfo. upb_inttable msginfo; // The following members are used only while the JIT is being built. // This pointer is allocated by dasm_init() and freed by dasm_free(). struct dasm_State *dynasm; // For storing pclabel bases while we are building the JIT. // Maps (upb_handlers* or upb_fielddef*) -> int32 pclabel_base upb_inttable pclabels; // This is not the same as len(pclabels) because the table only contains base // offsets for each def, but each def can have many pclabels. uint32_t pclabel_count; #endif }; #ifdef __cplusplus } /* extern "C" */ #endif #endif /* UPB_DECODER_H_ */