From b5f5ee867e6c91b77490dc8894236f17a47bde00 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 23 Nov 2011 16:19:22 -0800 Subject: Refinement of upb_bytesrc interface. Added a upb_byteregion that tracks a region of the input buffer; decoders use this instead of using a upb_bytesrc directly. upb_byteregion is also used as the way of passing a string to a upb_handlers callback. This symmetry makes decoders compose better; if you want to take a parsed string and decode it as something else, you can take the string directly from the callback and feed it as input to another parser. A commented-out version of a pinning interface is present; I decline to actually implement it (and accept its extra complexity) until/unless it is clear that it is actually a win. But it is included as a proof-of-concept, to show that it fits well with the existing interface. --- upb/pb/decoder.h | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) (limited to 'upb/pb/decoder.h') diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h index 2232c52..c35bec4 100644 --- a/upb/pb/decoder.h +++ b/upb/pb/decoder.h @@ -5,7 +5,7 @@ * Author: Josh Haberman * * upb_decoder implements a high performance, streaming decoder for protobuf - * data that works by getting its input data from a upb_bytesrc and calling + * data that works by getting its input data from a upb_byteregion and calling * into a upb_handlers. */ @@ -26,24 +26,14 @@ extern "C" { struct dasm_State; typedef struct _upb_decoder { - upb_bytesrc *bytesrc; // Source of our serialized data. - upb_dispatcher dispatcher; // Dispatcher to which we push parsed data. - upb_status *status; // Where we will store any errors that occur. - upb_strref strref; // For passing string data to callbacks. - - // Offsets for the bytesrc region we currently have ref'd. - uint64_t refstart_ofs, refend_ofs; + upb_byteregion *input; // Input data (serialized). + upb_dispatcher dispatcher; // Dispatcher to which we push parsed data. + upb_status *status; // Where we will store any errors that occur. + upb_byteregion str_byteregion; // For passing string data to callbacks. // Current input buffer and its stream offset. const char *buf, *ptr, *end; - uint64_t bufstart_ofs, bufend_ofs; - - // Stream offset for the end of the top-level message, if any. - uint64_t end_ofs; - - // Buf offset as of which we've delivered calbacks; needed for rollback if - // a callback returns UPB_BREAK. - const char *completed_ptr; + uint64_t bufstart_ofs; // End of the delimited region, relative to ptr, or NULL if not in this buf. const char *delim_end; @@ -65,10 +55,6 @@ typedef struct _upb_decoder { sigjmp_buf exitjmp; } upb_decoder; -// Used for frames that have no specific end offset: groups, repeated primitive -// fields inside groups, and the top-level message. -#define UPB_NONDELIMITED UINT64_MAX - // Initializes/uninitializes a decoder for calling into the given handlers // or to write into the given msgdef, given its accessors). Takes a ref // on the handlers. @@ -77,13 +63,13 @@ void upb_decoder_uninit(upb_decoder *d); // Resets the internal state of an already-allocated decoder. This puts it in a // state where it has not seen any data, and expects the next data to be from -// the beginning of a new protobuf. Parsers must be reset before they can be -// used. A decoder can be reset multiple times. -// -// Pass UINT64_MAX for end_ofs to indicate a non-delimited top-level message. -void upb_decoder_reset(upb_decoder *d, upb_bytesrc *src, uint64_t start_ofs, - uint64_t end_ofs, void *closure); +// the beginning of a new protobuf. Decoders must be reset before they can be +// used. A decoder can be reset multiple times. "input" must live until the +// decoder is reset again (or destroyed). +void upb_decoder_reset(upb_decoder *d, upb_byteregion *input, void *closure); +// Decodes serialized data (calling handlers as the data is parsed) until error +// or EOF (see *status for details). void upb_decoder_decode(upb_decoder *d, upb_status *status); #ifdef __cplusplus -- cgit v1.2.3