From 3d0c7c45da5b72a88bfb03dc5ce3384b7f01cef6 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Tue, 18 Nov 2014 15:21:50 -0800 Subject: Sync to Google-internal development. --- upb/pb/encoder.h | 163 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 133 insertions(+), 30 deletions(-) (limited to 'upb/pb/encoder.h') diff --git a/upb/pb/encoder.h b/upb/pb/encoder.h index 563b78d..2df5797 100644 --- a/upb/pb/encoder.h +++ b/upb/pb/encoder.h @@ -7,52 +7,155 @@ * Implements a set of upb_handlers that write protobuf data to the binary wire * format. * - * For messages that have any submessages, the encoder needs a buffer - * containing the submessage sizes, so they can be properly written at the - * front of each message. Note that groups do *not* have this requirement. + * This encoder implementation does not have any access to any out-of-band or + * precomputed lengths for submessages, so it must buffer submessages internally + * before it can emit the first byte. */ #ifndef UPB_ENCODER_H_ #define UPB_ENCODER_H_ -#include "upb/upb.h" -#include "upb/bytestream.h" +#include "upb/sink.h" #ifdef __cplusplus -extern "C" { +namespace upb { +namespace pb { +class Encoder; +} // namespace pb +} // namespace upb #endif -/* upb_encoder ****************************************************************/ +UPB_DECLARE_TYPE(upb::pb::Encoder, upb_pb_encoder); -// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol -// buffer binary wire format. -struct upb_encoder; -typedef struct upb_encoder upb_encoder; +#define UPB_PBENCODER_MAX_NESTING 100 -upb_encoder *upb_encoder_new(upb_msgdef *md); -void upb_encoder_free(upb_encoder *e); +/* upb::pb::Encoder ***********************************************************/ -// Resets the given upb_encoder such that is is ready to begin encoding, -// outputting data to "bytesink" (which must live until the encoder is -// reset or destroyed). -void upb_encoder_reset(upb_encoder *e, upb_bytesink *bytesink); +// The output buffer is divided into segments; a segment is a string of data +// that is "ready to go" -- it does not need any varint lengths inserted into +// the middle. The seams between segments are where varints will be inserted +// once they are known. +// +// We also use the concept of a "run", which is a range of encoded bytes that +// occur at a single submessage level. Every segment contains one or more runs. +// +// A segment can span messages. Consider: +// +// .--Submessage lengths---------. +// | | | +// | V V +// V | |--------------- | |----------------- +// Submessages: | |----------------------------------------------- +// Top-level msg: ------------------------------------------------------------ +// +// Segments: ----- ------------------- ----------------- +// Runs: *---- *--------------*--- *---------------- +// (* marks the start) +// +// Note that the top-level menssage is not in any segment because it does not +// have any length preceding it. +// +// A segment is only interrupted when another length needs to be inserted. So +// observe how the second segment spans both the inner submessage and part of +// the next enclosing message. +typedef struct { + UPB_PRIVATE_FOR_CPP + uint32_t msglen; // The length to varint-encode before this segment. + uint32_t seglen; // Length of the segment. +} upb_pb_encoder_segment; -// Returns the upb_sink to which data can be written. The sink is invalidated -// when the encoder is reset or destroyed. Note that if the client wants to -// encode any length-delimited submessages it must first call -// upb_encoder_buildsizes() below. -upb_sink *upb_encoder_sink(upb_encoder *e); +UPB_DEFINE_CLASS0(upb::pb::Encoder, + public: + Encoder(const upb::Handlers* handlers); + ~Encoder(); -// Call prior to pushing any data with embedded submessages. "src" must yield -// exactly the same data as what will next be encoded, but in reverse order. -// The encoder iterates over this data in order to determine the sizes of the -// submessages. If any errors are returned by the upb_src, the status will -// be saved in *status. If the client is sure that the upb_src will not throw -// any errors, "status" may be NULL. -void upb_encoder_buildsizes(upb_encoder *e, upb_src *src, upb_status *status); + static reffed_ptr NewHandlers(const upb::MessageDef* msg); + + // Resets the state of the printer, so that it will expect to begin a new + // document. + void Reset(); + + // Resets the output pointer which will serve as our closure. + void ResetOutput(BytesSink* output); + + // The input to the encoder. + Sink* input(); + + private: + UPB_DISALLOW_COPY_AND_ASSIGN(Encoder); +, +UPB_DEFINE_STRUCT0(upb_pb_encoder, UPB_QUOTE( + // Our input and output. + upb_sink input_; + upb_bytessink *output_; + + // The "subclosure" -- used as the inner closure as part of the bytessink + // protocol. + void *subc; + + // The output buffer and limit, and our current write position. "buf" + // initially points to "initbuf", but is dynamically allocated if we need to + // grow beyond the initial size. + char *buf, *ptr, *limit; + + // The beginning of the current run, or undefined if we are at the top level. + char *runbegin; + + // The list of segments we are accumulating. + upb_pb_encoder_segment *segbuf, *segptr, *seglimit; + + // The stack of enclosing submessages. Each entry in the stack points to the + // segment where this submessage's length is being accumulated. + int stack[UPB_PBENCODER_MAX_NESTING], *top, *stacklimit; + + // Depth of startmsg/endmsg calls. + int depth; + + // Initial buffers for the output buffer and segment buffer. If we outgrow + // these we will dynamically allocate bigger ones. + char initbuf[256]; + upb_pb_encoder_segment seginitbuf[32]; +))); + +UPB_BEGIN_EXTERN_C + +const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m, + const void *owner); +void upb_pb_encoder_reset(upb_pb_encoder *e); +upb_sink *upb_pb_encoder_input(upb_pb_encoder *p); +void upb_pb_encoder_init(upb_pb_encoder *e, const upb_handlers *h); +void upb_pb_encoder_resetoutput(upb_pb_encoder *e, upb_bytessink *output); +void upb_pb_encoder_uninit(upb_pb_encoder *e); + +UPB_END_EXTERN_C #ifdef __cplusplus -} /* extern "C" */ + +namespace upb { +namespace pb { +inline Encoder::Encoder(const upb::Handlers* handlers) { + upb_pb_encoder_init(this, handlers); +} +inline Encoder::~Encoder() { + upb_pb_encoder_uninit(this); +} +inline void Encoder::Reset() { + upb_pb_encoder_reset(this); +} +inline void Encoder::ResetOutput(BytesSink* output) { + upb_pb_encoder_resetoutput(this, output); +} +inline Sink* Encoder::input() { + return upb_pb_encoder_input(this); +} +inline reffed_ptr Encoder::NewHandlers( + const upb::MessageDef *md) { + const Handlers* h = upb_pb_encoder_newhandlers(md, &h); + return reffed_ptr(h, &h); +} +} // namespace pb +} // namespace upb + #endif #endif /* UPB_ENCODER_H_ */ -- cgit v1.2.3