summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2010-01-16 21:47:47 -0800
committerJoshua Haberman <joshua@reverberate.org>2010-01-16 21:47:47 -0800
commit036fe6bb0673930cd5e8450532abe3b7acffb94e (patch)
tree652770f222f592542745060d61be96ad2ec0427e
parent611afe9c6928ea814abd37c4b3cc2869a6ed5efd (diff)
Flesh out implementation of upb_sizebuilder.
-rw-r--r--src/upb.h8
-rw-r--r--src/upb_decoder.c20
-rw-r--r--src/upb_encoder.c161
-rw-r--r--src/upb_encoder.h37
-rw-r--r--src/upb_sink.h22
5 files changed, 189 insertions, 59 deletions
diff --git a/src/upb.h b/src/upb.h
index b680d7b..4fb5773 100644
--- a/src/upb.h
+++ b/src/upb.h
@@ -272,14 +272,14 @@ enum upb_status_code {
// The input byte stream ended in the middle of a record.
UPB_STATUS_NEED_MORE_DATA = 1,
- // The user value callback opted to stop parsing.
- UPB_STATUS_USER_CANCELLED = 2,
-
// An unrecoverable error occurred.
UPB_STATUS_ERROR = -1,
// A varint went for 10 bytes without terminating.
- UPB_ERROR_UNTERMINATED_VARINT = -2
+ UPB_ERROR_UNTERMINATED_VARINT = -2,
+
+ // The max nesting level (UPB_MAX_NESTING) was exceeded.
+ UPB_ERROR_MAX_NESTING_EXCEEDED = -3
};
#define UPB_ERRORMSG_MAXLEN 256
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index f07d901..440eaef 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -207,10 +207,8 @@ INLINE const uint8_t *decode_tag(const uint8_t *buf, const uint8_t *end,
}
-/**
- * Parses a 64-bit varint that is known to be >= 2 bytes (the inline version
- * handles 1 and 2 byte varints).
- */
+// Parses a 64-bit varint that is known to be >= 2 bytes (the inline version
+// handles 1 and 2 byte varints).
const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end,
uint64_t *val, upb_status *status)
{
@@ -367,10 +365,8 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) {
}
-/**
- * Pushes a new stack frame for a submessage with the given len (which will
- * be zero if the submessage is a group).
- */
+// Pushes a new stack frame for a submessage with the given len (which will
+// be zero if the submessage is a group).
static const uint8_t *push(upb_decoder *d, const uint8_t *start,
uint32_t submsg_len, upb_fielddef *f,
upb_status *status)
@@ -378,7 +374,7 @@ static const uint8_t *push(upb_decoder *d, const uint8_t *start,
d->top->field = f;
d->top++;
if(d->top >= d->limit) {
- upb_seterr(status, UPB_STATUS_ERROR,
+ upb_seterr(status, UPB_ERROR_MAX_NESTING_EXCEEDED,
"Nesting exceeded maximum (%d levels)\n",
UPB_MAX_NESTING);
return NULL;
@@ -391,10 +387,8 @@ static const uint8_t *push(upb_decoder *d, const uint8_t *start,
return get_msgend(d, start);
}
-/**
- * Pops a stack frame, returning a pointer for where the next submsg should
- * end (or a pointer that is out of range for a group).
- */
+// Pops a stack frame, returning a pointer for where the next submsg should
+// end (or a pointer that is out of range for a group).
static const void *pop(upb_decoder *d, const uint8_t *start)
{
d->top--;
diff --git a/src/upb_encoder.c b/src/upb_encoder.c
index 9b8b213..6d57acc 100644
--- a/src/upb_encoder.c
+++ b/src/upb_encoder.c
@@ -5,9 +5,11 @@
*/
#include "upb_encoder.h"
+
+#include <stdlib.h>
#include "descriptor.h"
-/* Functions for calculating sizes. *******************************************/
+/* Functions for calculating sizes of wire values. ****************************/
static size_t upb_v_uint64_t_size(uint64_t val) {
#ifdef __GNUC__
@@ -103,9 +105,9 @@ static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val)
return uint64_end;
}
-/* Functions to write .proto values. ******************************************/
+/* Functions to write and calculate sizes for .proto values. ******************/
-/* Performs zig-zag encoding, which is used by sint32 and sint64. */
+// Performs zig-zag encoding, which is used by sint32 and sint64.
static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
@@ -167,7 +169,7 @@ T(FLOAT, f, uint32_t, float, _float) {
#undef PUT
#undef T
-uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v)
+static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v)
{
#define CASE(t, member_name) \
case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name);
@@ -191,11 +193,127 @@ uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v)
#undef CASE
}
-uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t fn, upb_wire_type_t wt)
+static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v)
+{
+#define CASE(t, member_name) \
+ case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name);
+ switch(ft) {
+ CASE(DOUBLE, _double)
+ CASE(FLOAT, _float)
+ CASE(INT32, int32)
+ CASE(INT64, int64)
+ CASE(UINT32, uint32)
+ CASE(UINT64, uint64)
+ CASE(SINT32, int32)
+ CASE(SINT64, int64)
+ CASE(FIXED32, uint32)
+ CASE(FIXED64, uint64)
+ CASE(SFIXED32, int32)
+ CASE(SFIXED64, int64)
+ CASE(BOOL, _bool)
+ CASE(ENUM, int32)
+ default: assert(false); return 0;
+ }
+#undef CASE
+}
+
+static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num,
+ upb_wire_type_t wt)
+{
+ return upb_put_UINT32(buf, wt | (num << 3));
+}
+
+static uint32_t _upb_get_tag_size(upb_field_number_t num)
+{
+ return upb_get_UINT32_size(num << 3);
+}
+
+
+/* upb_sizebuilder ************************************************************/
+
+struct upb_sizebuilder {
+ // Accumulating size for the current level.
+ uint32_t size;
+
+ // Stack of sizes for our current nesting.
+ uint32_t stack[UPB_MAX_NESTING], *top, *limit;
+
+ // Vector of sizes.
+ uint32_t *sizes;
+ int sizes_len;
+ int sizes_size;
+
+ upb_status status;
+};
+
+// upb_sink callbacks.
+static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f,
+ upb_value val)
+{
+ upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+ uint32_t size = 0;
+ size += _upb_get_tag_size(f->number);
+ size += _upb_get_value_size(f->type, val);
+ sb->size += size;
+ return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f,
+ upb_strptr str,
+ int32_t start, uint32_t end)
+{
+ (void)str; // String data itself is not used.
+ upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+ if(start >= 0) {
+ uint32_t size = 0;
+ size += _upb_get_tag_size(f->number);
+ size += upb_get_UINT32_size(end - start);
+ sb->size += size;
+ }
+ return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f)
+{
+ (void)f; // Unused (we calculate tag size and delimiter in endcb).
+ upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+ *sb->top = sb->size;
+ sb->top++;
+ sb->size = 0;
+ if(sb->top == sb->limit) {
+ upb_seterr(&sb->status, UPB_ERROR_MAX_NESTING_EXCEEDED,
+ "Nesting exceeded maximum (%d levels)\n",
+ UPB_MAX_NESTING);
+ return UPB_SINK_STOP;
+ }
+ return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f)
{
- return upb_put_UINT32(buf, wt | (fn << 3));
+ upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+ if(sb->sizes_len == sb->sizes_size) {
+ sb->sizes_size *= 2;
+ sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes));
+ }
+ sb->sizes[sb->sizes_len++] = sb->size;
+ sb->top--;
+ // The size according to the parent includes the tag size and delimiter of
+ // the submessage.
+ sb->size += upb_get_UINT32_size(sb->size);
+ sb->size += _upb_get_tag_size(f->number);
+ // Include size accumulated in parent before child began.
+ sb->size += *sb->top;
+ return UPB_SINK_CONTINUE;
}
+upb_sink_callbacks _upb_sizebuilder_sink_vtbl = {
+ _upb_sizebuilder_valuecb,
+ _upb_sizebuilder_strcb,
+ _upb_sizebuilder_startcb,
+ _upb_sizebuilder_endcb
+};
+
/* upb_sink callbacks *********************************************************/
@@ -283,34 +401,3 @@ upb_sink_callbacks _upb_encoder_sink_vtbl = {
_upb_encoder_endcb
};
-
-/* Public Interface ***********************************************************/
-
-size_t upb_get_encoded_size(upb_value v, upb_fielddef *f)
-{
-#define CASE(t, member_name) \
- case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name);
- switch(f->type) {
- CASE(DOUBLE, _double)
- CASE(FLOAT, _float)
- CASE(INT32, int32)
- CASE(INT64, int64)
- CASE(UINT32, uint32)
- CASE(UINT64, uint64)
- CASE(SINT32, int32)
- CASE(SINT64, int64)
- CASE(FIXED32, uint32)
- CASE(FIXED64, uint64)
- CASE(SFIXED32, int32)
- CASE(SFIXED64, int64)
- CASE(BOOL, _bool)
- CASE(ENUM, int32)
- default: assert(false); return 0;
- }
-#undef CASE
-}
-
-size_t upb_get_encoded_tag_size(uint32_t fieldnum) {
- return upb_v_uint64_t_size((uint64_t)fieldnum << 3);
-}
-
diff --git a/src/upb_encoder.h b/src/upb_encoder.h
index 1322e21..1137c7f 100644
--- a/src/upb_encoder.h
+++ b/src/upb_encoder.h
@@ -20,13 +20,48 @@
extern "C" {
#endif
+/* upb_sizebuilder ************************************************************/
+
+// A upb_sizebuilder performs a pre-pass on data to be serialized that gathers
+// the sizes of submessages. This size data is required for serialization,
+// because we have to know at the beginning of a submessage how many encoded
+// bytes the submessage will represent.
+struct upb_sizebuilder;
+typedef struct upb_sizebuilder upb_sizebuilder;
+
+upb_sizebuilder *upb_sizebuilder_new();
+void upb_sizebuilder_free(upb_sizebuilder *sb);
+
+// Returns a sink that must be used to perform the pre-pass. Note that the
+// pre-pass *must* occur in the opposite order from the actual encode that
+// follows, and the data *must* be identical both times (except for the
+// reversed order.
+upb_sink *upb_sizebuilder_sink(upb_sizebuilder *sb);
+
+
+/* upb_encoder ****************************************************************/
+
+// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol
+// buffer binary wire format.
struct upb_encoder;
typedef struct upb_encoder upb_encoder;
upb_encoder *upb_encoder_new();
void upb_encoder_free(upb_encoder *s);
-void upb_encoder_reset(upb_encoder *s, uint32_t *sizes);
+// Resets the given upb_encoder such that is is ready to begin encoding. The
+// upb_sizebuilder "sb" is used to determine submessage sizes; it must have
+// previously been initialized by feeding it the same data in reverse order.
+// "sb" may be null if and only if the data contains no submessages; groups
+// are ok and do not require sizes to be precalculated. The upb_bytesink
+// "out" is where the encoded output data will be sent.
+//
+// Both "sb" and "out" must live until the encoder is either reset or freed.
+void upb_encoder_reset(upb_encoder *s, upb_sizebuilder *sb, upb_bytesink *out);
+
+// The upb_sink to which data can be sent to be encoded. Note that this data
+// must be identical to the data that was previously given to the sizebuilder
+// (if any).
upb_sink *upb_encoder_sink(upb_encoder *s);
#ifdef __cplusplus
diff --git a/src/upb_sink.h b/src/upb_sink.h
index f117107..41e5275 100644
--- a/src/upb_sink.h
+++ b/src/upb_sink.h
@@ -98,9 +98,15 @@ typedef struct upb_sink_callbacks {
upb_end_cb end_cb;
} upb_sink_callbacks;
-// We could potentially define these later to also be capable of calling a C++
-// virtual method instead of doing the virtual dispatch manually. This would
-// make it possible to write C++ sinks in a more natural style without loss of
+// These macros implement a mini virtual function dispatch for upb_sink instances.
+// This allows functions that call upb_sinks to just write:
+//
+// upb_sink_onvalue(sink, field, val);
+//
+// The macro will handle the virtual function lookup and dispatch. We could
+// potentially define these later to also be capable of calling a C++ virtual
+// method instead of doing the virtual dispatch manually. This would make it
+// possible to write C++ sinks in a more natural style without loss of
// efficiency. We could have a flag in upb_sink defining whether it is a C
// sink or a C++ one.
#define upb_sink_onvalue(s, f, val) s->vtbl->value_cb(s, f, val)
@@ -123,10 +129,18 @@ INLINE void upb_sink_init(upb_sink *s, upb_sink_callbacks *vtbl) {
//
// The two simplest kinds of sinks are "write to string" and "write to FILE*".
+// A forward declaration solely for the benefit of declaring upb_byte_cb below.
+// Always prefer upb_bytesink (without the "struct" keyword) instead.
+struct _upb_bytesink;
+
// The single bytesink callback; it takes the bytes to be written and returns
// how many were successfully written. If zero is returned, it indicates that
// no more bytes can be accepted right now.
-//typedef size_t (*upb_byte_cb)(upb_bytesink *s, upb_strptr str);
+typedef size_t (*upb_byte_cb)(struct _upb_bytesink *s, upb_strptr str);
+
+typedef struct _upb_bytesink {
+ upb_byte_cb *cb;
+} upb_bytesink;
#ifdef __cplusplus
} /* extern "C" */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback