From 036fe6bb0673930cd5e8450532abe3b7acffb94e Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 16 Jan 2010 21:47:47 -0800 Subject: Flesh out implementation of upb_sizebuilder. --- src/upb.h | 8 +-- src/upb_decoder.c | 20 +++---- src/upb_encoder.c | 161 +++++++++++++++++++++++++++++++++++++++++------------- src/upb_encoder.h | 37 ++++++++++++- src/upb_sink.h | 22 ++++++-- 5 files changed, 189 insertions(+), 59 deletions(-) (limited to 'src') diff --git a/src/upb.h b/src/upb.h index b680d7b..4fb5773 100644 --- a/src/upb.h +++ b/src/upb.h @@ -272,14 +272,14 @@ enum upb_status_code { // The input byte stream ended in the middle of a record. UPB_STATUS_NEED_MORE_DATA = 1, - // The user value callback opted to stop parsing. - UPB_STATUS_USER_CANCELLED = 2, - // An unrecoverable error occurred. UPB_STATUS_ERROR = -1, // A varint went for 10 bytes without terminating. - UPB_ERROR_UNTERMINATED_VARINT = -2 + UPB_ERROR_UNTERMINATED_VARINT = -2, + + // The max nesting level (UPB_MAX_NESTING) was exceeded. + UPB_ERROR_MAX_NESTING_EXCEEDED = -3 }; #define UPB_ERRORMSG_MAXLEN 256 diff --git a/src/upb_decoder.c b/src/upb_decoder.c index f07d901..440eaef 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -207,10 +207,8 @@ INLINE const uint8_t *decode_tag(const uint8_t *buf, const uint8_t *end, } -/** - * Parses a 64-bit varint that is known to be >= 2 bytes (the inline version - * handles 1 and 2 byte varints). - */ +// Parses a 64-bit varint that is known to be >= 2 bytes (the inline version +// handles 1 and 2 byte varints). const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end, uint64_t *val, upb_status *status) { @@ -367,10 +365,8 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { } -/** - * Pushes a new stack frame for a submessage with the given len (which will - * be zero if the submessage is a group). - */ +// Pushes a new stack frame for a submessage with the given len (which will +// be zero if the submessage is a group). static const uint8_t *push(upb_decoder *d, const uint8_t *start, uint32_t submsg_len, upb_fielddef *f, upb_status *status) @@ -378,7 +374,7 @@ static const uint8_t *push(upb_decoder *d, const uint8_t *start, d->top->field = f; d->top++; if(d->top >= d->limit) { - upb_seterr(status, UPB_STATUS_ERROR, + upb_seterr(status, UPB_ERROR_MAX_NESTING_EXCEEDED, "Nesting exceeded maximum (%d levels)\n", UPB_MAX_NESTING); return NULL; @@ -391,10 +387,8 @@ static const uint8_t *push(upb_decoder *d, const uint8_t *start, return get_msgend(d, start); } -/** - * Pops a stack frame, returning a pointer for where the next submsg should - * end (or a pointer that is out of range for a group). - */ +// Pops a stack frame, returning a pointer for where the next submsg should +// end (or a pointer that is out of range for a group). static const void *pop(upb_decoder *d, const uint8_t *start) { d->top--; diff --git a/src/upb_encoder.c b/src/upb_encoder.c index 9b8b213..6d57acc 100644 --- a/src/upb_encoder.c +++ b/src/upb_encoder.c @@ -5,9 +5,11 @@ */ #include "upb_encoder.h" + +#include #include "descriptor.h" -/* Functions for calculating sizes. *******************************************/ +/* Functions for calculating sizes of wire values. ****************************/ static size_t upb_v_uint64_t_size(uint64_t val) { #ifdef __GNUC__ @@ -103,9 +105,9 @@ static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val) return uint64_end; } -/* Functions to write .proto values. ******************************************/ +/* Functions to write and calculate sizes for .proto values. ******************/ -/* Performs zig-zag encoding, which is used by sint32 and sint64. */ +// Performs zig-zag encoding, which is used by sint32 and sint64. static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } @@ -167,7 +169,7 @@ T(FLOAT, f, uint32_t, float, _float) { #undef PUT #undef T -uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v) +static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v) { #define CASE(t, member_name) \ case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name); @@ -191,11 +193,127 @@ uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v) #undef CASE } -uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t fn, upb_wire_type_t wt) +static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v) +{ +#define CASE(t, member_name) \ + case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name); + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + default: assert(false); return 0; + } +#undef CASE +} + +static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num, + upb_wire_type_t wt) +{ + return upb_put_UINT32(buf, wt | (num << 3)); +} + +static uint32_t _upb_get_tag_size(upb_field_number_t num) +{ + return upb_get_UINT32_size(num << 3); +} + + +/* upb_sizebuilder ************************************************************/ + +struct upb_sizebuilder { + // Accumulating size for the current level. + uint32_t size; + + // Stack of sizes for our current nesting. + uint32_t stack[UPB_MAX_NESTING], *top, *limit; + + // Vector of sizes. + uint32_t *sizes; + int sizes_len; + int sizes_size; + + upb_status status; +}; + +// upb_sink callbacks. +static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f, + upb_value val) +{ + upb_sizebuilder *sb = (upb_sizebuilder*)sink; + uint32_t size = 0; + size += _upb_get_tag_size(f->number); + size += _upb_get_value_size(f->type, val); + sb->size += size; + return UPB_SINK_CONTINUE; +} + +static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f, + upb_strptr str, + int32_t start, uint32_t end) +{ + (void)str; // String data itself is not used. + upb_sizebuilder *sb = (upb_sizebuilder*)sink; + if(start >= 0) { + uint32_t size = 0; + size += _upb_get_tag_size(f->number); + size += upb_get_UINT32_size(end - start); + sb->size += size; + } + return UPB_SINK_CONTINUE; +} + +static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f) +{ + (void)f; // Unused (we calculate tag size and delimiter in endcb). + upb_sizebuilder *sb = (upb_sizebuilder*)sink; + *sb->top = sb->size; + sb->top++; + sb->size = 0; + if(sb->top == sb->limit) { + upb_seterr(&sb->status, UPB_ERROR_MAX_NESTING_EXCEEDED, + "Nesting exceeded maximum (%d levels)\n", + UPB_MAX_NESTING); + return UPB_SINK_STOP; + } + return UPB_SINK_CONTINUE; +} + +static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f) { - return upb_put_UINT32(buf, wt | (fn << 3)); + upb_sizebuilder *sb = (upb_sizebuilder*)sink; + if(sb->sizes_len == sb->sizes_size) { + sb->sizes_size *= 2; + sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes)); + } + sb->sizes[sb->sizes_len++] = sb->size; + sb->top--; + // The size according to the parent includes the tag size and delimiter of + // the submessage. + sb->size += upb_get_UINT32_size(sb->size); + sb->size += _upb_get_tag_size(f->number); + // Include size accumulated in parent before child began. + sb->size += *sb->top; + return UPB_SINK_CONTINUE; } +upb_sink_callbacks _upb_sizebuilder_sink_vtbl = { + _upb_sizebuilder_valuecb, + _upb_sizebuilder_strcb, + _upb_sizebuilder_startcb, + _upb_sizebuilder_endcb +}; + /* upb_sink callbacks *********************************************************/ @@ -283,34 +401,3 @@ upb_sink_callbacks _upb_encoder_sink_vtbl = { _upb_encoder_endcb }; - -/* Public Interface ***********************************************************/ - -size_t upb_get_encoded_size(upb_value v, upb_fielddef *f) -{ -#define CASE(t, member_name) \ - case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name); - switch(f->type) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - default: assert(false); return 0; - } -#undef CASE -} - -size_t upb_get_encoded_tag_size(uint32_t fieldnum) { - return upb_v_uint64_t_size((uint64_t)fieldnum << 3); -} - diff --git a/src/upb_encoder.h b/src/upb_encoder.h index 1322e21..1137c7f 100644 --- a/src/upb_encoder.h +++ b/src/upb_encoder.h @@ -20,13 +20,48 @@ extern "C" { #endif +/* upb_sizebuilder ************************************************************/ + +// A upb_sizebuilder performs a pre-pass on data to be serialized that gathers +// the sizes of submessages. This size data is required for serialization, +// because we have to know at the beginning of a submessage how many encoded +// bytes the submessage will represent. +struct upb_sizebuilder; +typedef struct upb_sizebuilder upb_sizebuilder; + +upb_sizebuilder *upb_sizebuilder_new(); +void upb_sizebuilder_free(upb_sizebuilder *sb); + +// Returns a sink that must be used to perform the pre-pass. Note that the +// pre-pass *must* occur in the opposite order from the actual encode that +// follows, and the data *must* be identical both times (except for the +// reversed order. +upb_sink *upb_sizebuilder_sink(upb_sizebuilder *sb); + + +/* upb_encoder ****************************************************************/ + +// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol +// buffer binary wire format. struct upb_encoder; typedef struct upb_encoder upb_encoder; upb_encoder *upb_encoder_new(); void upb_encoder_free(upb_encoder *s); -void upb_encoder_reset(upb_encoder *s, uint32_t *sizes); +// Resets the given upb_encoder such that is is ready to begin encoding. The +// upb_sizebuilder "sb" is used to determine submessage sizes; it must have +// previously been initialized by feeding it the same data in reverse order. +// "sb" may be null if and only if the data contains no submessages; groups +// are ok and do not require sizes to be precalculated. The upb_bytesink +// "out" is where the encoded output data will be sent. +// +// Both "sb" and "out" must live until the encoder is either reset or freed. +void upb_encoder_reset(upb_encoder *s, upb_sizebuilder *sb, upb_bytesink *out); + +// The upb_sink to which data can be sent to be encoded. Note that this data +// must be identical to the data that was previously given to the sizebuilder +// (if any). upb_sink *upb_encoder_sink(upb_encoder *s); #ifdef __cplusplus diff --git a/src/upb_sink.h b/src/upb_sink.h index f117107..41e5275 100644 --- a/src/upb_sink.h +++ b/src/upb_sink.h @@ -98,9 +98,15 @@ typedef struct upb_sink_callbacks { upb_end_cb end_cb; } upb_sink_callbacks; -// We could potentially define these later to also be capable of calling a C++ -// virtual method instead of doing the virtual dispatch manually. This would -// make it possible to write C++ sinks in a more natural style without loss of +// These macros implement a mini virtual function dispatch for upb_sink instances. +// This allows functions that call upb_sinks to just write: +// +// upb_sink_onvalue(sink, field, val); +// +// The macro will handle the virtual function lookup and dispatch. We could +// potentially define these later to also be capable of calling a C++ virtual +// method instead of doing the virtual dispatch manually. This would make it +// possible to write C++ sinks in a more natural style without loss of // efficiency. We could have a flag in upb_sink defining whether it is a C // sink or a C++ one. #define upb_sink_onvalue(s, f, val) s->vtbl->value_cb(s, f, val) @@ -123,10 +129,18 @@ INLINE void upb_sink_init(upb_sink *s, upb_sink_callbacks *vtbl) { // // The two simplest kinds of sinks are "write to string" and "write to FILE*". +// A forward declaration solely for the benefit of declaring upb_byte_cb below. +// Always prefer upb_bytesink (without the "struct" keyword) instead. +struct _upb_bytesink; + // The single bytesink callback; it takes the bytes to be written and returns // how many were successfully written. If zero is returned, it indicates that // no more bytes can be accepted right now. -//typedef size_t (*upb_byte_cb)(upb_bytesink *s, upb_strptr str); +typedef size_t (*upb_byte_cb)(struct _upb_bytesink *s, upb_strptr str); + +typedef struct _upb_bytesink { + upb_byte_cb *cb; +} upb_bytesink; #ifdef __cplusplus } /* extern "C" */ -- cgit v1.2.3