summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2009-08-01 18:02:57 -0700
committerJoshua Haberman <joshua@reverberate.org>2009-08-01 18:02:57 -0700
commit85f6cecb80b48d13dd6c0886c07d38eda0a8f1dd (patch)
tree8b66cceb887cd57deabad2c07d018930d576170c /src
parent4bcdea25f03e529223ed8ae080bdf5aba71d816d (diff)
Added calculation of sizes for serialization (untested).
Diffstat (limited to 'src')
-rw-r--r--src/upb_msg.c117
-rw-r--r--src/upb_msg.h58
-rw-r--r--src/upb_parse.h27
-rw-r--r--src/upb_serialize.h42
4 files changed, 208 insertions, 36 deletions
diff --git a/src/upb_msg.c b/src/upb_msg.c
index bdebe0d..513c0c5 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -9,6 +9,7 @@
#include "descriptor.h"
#include "upb_msg.h"
#include "upb_parse.h"
+#include "upb_serialize.h"
/* Rounds p up to the next multiple of t. */
#define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t))))
@@ -248,7 +249,7 @@ void upb_msg_reuse_submsg(void **msg, struct upb_msg *m)
if(!*msg) *msg = upb_msgdata_new(m);
}
-/* Serialization/Deserialization. ********************************************/
+/* Parsing. ******************************************************************/
static upb_field_type_t tag_cb(void *udata, struct upb_tag *tag,
void **user_field_desc)
@@ -390,6 +391,120 @@ void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *str, bool byref)
}
}
+/* Serialization. ************************************************************/
+
+/* We store the message sizes linearly in post-order (size of parent after sizes
+ * of children) for a right-to-left traversal of the message tree. Iterating
+ * over this in reverse gives us a pre-order (size of parent before sizes of
+ * children) left-to-right traversal, which is what we want for parsing. */
+struct upb_msgsizes {
+ int len;
+ int size;
+ size_t *sizes;
+};
+
+/* Declared below -- this and get_valuesize are mutually recursive. */
+static size_t get_msgsize(struct upb_msgsizes *sizes, void *data,
+ struct upb_msg *m);
+
+/* Returns a size of a value as it will be serialized. Does *not* include
+ * the size of the tag -- that is already accounted for. */
+static size_t get_valuesize(struct upb_msgsizes *sizes, union upb_value_ptr p,
+ struct upb_msg_field *f,
+ google_protobuf_FieldDescriptorProto *fd)
+{
+ switch(f->type) {
+ default: assert(false); return 0; /* Internal corruption. */
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE: {
+ size_t submsg_size = get_msgsize(sizes, p.msg, f->ref.msg);
+ return upb_get_INT32_size(submsg_size) + submsg_size;
+ }
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP: {
+ size_t endgrp_tag_size = upb_get_tag_size(fd->number);
+ return endgrp_tag_size + get_msgsize(sizes, p.msg, f->ref.msg);
+ }
+#define CASE(type, member) \
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type: \
+ return upb_get_ ## type ## _size(*p.member);
+ CASE(DOUBLE, _double)
+ CASE(FLOAT, _float)
+ CASE(INT32, int32)
+ CASE(INT64, int64)
+ CASE(UINT32, uint32)
+ CASE(UINT64, uint64)
+ CASE(SINT32, int32)
+ CASE(SINT64, int64)
+ CASE(FIXED32, uint32)
+ CASE(FIXED64, uint64)
+ CASE(SFIXED32, int32)
+ CASE(SFIXED64, int64)
+ CASE(BOOL, _bool)
+ CASE(ENUM, int32)
+#undef CASE
+ }
+}
+
+/* This is mostly just a pure recursive function to calculate the size of a
+ * message. However it also stores the results of each level of the recursion
+ * in sizes, because we need all of this intermediate information later. */
+static size_t get_msgsize(struct upb_msgsizes *sizes, void *data,
+ struct upb_msg *m)
+{
+ size_t size = 0;
+ /* We iterate over fields and arrays in reverse order. */
+ for(int32_t i = m->num_fields - 1; i >= 0; i--) {
+ struct upb_msg_field *f = &m->fields[i];
+ google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m);
+ if(!upb_msg_isset(data, f)) continue;
+ union upb_value_ptr p = upb_msg_getptr(data, f);
+ if(upb_isarray(f)) {
+ for(int32_t j = (*p.arr)->len - 1; j >= 0; j--) {
+ union upb_value_ptr elem = upb_array_getelementptr((*p.arr), j, f->type);
+ /* TODO: for packed arrays tag size goes outside the loop. */
+ size += upb_get_tag_size(fd->number);
+ size += get_valuesize(sizes, elem, f, fd);
+ }
+ } else {
+ size += upb_get_tag_size(fd->number);
+ size += get_valuesize(sizes, p, f, fd);
+ }
+ }
+ /* Resize the 'sizes' array if necessary. */
+ assert(sizes->len <= sizes->size);
+ if(sizes->len == sizes->size) {
+ sizes->size *= 2;
+ sizes->sizes = realloc(sizes->sizes, sizes->size * sizeof(size_t));
+ }
+ /* Add our size (already added our children, so post-order). */
+ sizes->sizes[sizes->len++] = size;
+ return size;
+}
+
+void upb_msgsizes_read(struct upb_msgsizes *sizes, void *data, struct upb_msg *m)
+{
+ get_msgsize(sizes, data, m);
+}
+
+/* Initialize/free a upb_msg_sizes for the given message. */
+void upb_msgsizes_init(struct upb_msgsizes *sizes)
+{
+ sizes->len = 0;
+ sizes->size = 0;
+ sizes->sizes = NULL;
+}
+
+void upb_msgsizes_free(struct upb_msgsizes *sizes)
+{
+ free(sizes->sizes);
+}
+
+size_t upb_msgsizes_totalsize(struct upb_msgsizes *sizes)
+{
+ return sizes->sizes[sizes->len-1];
+}
+
+/* Comparison. ***************************************************************/
+
bool upb_value_eql(union upb_value_ptr p1, union upb_value_ptr p2,
upb_field_type_t type)
{
diff --git a/src/upb_msg.h b/src/upb_msg.h
index 4ce94a6..8699fbf 100644
--- a/src/upb_msg.h
+++ b/src/upb_msg.h
@@ -215,32 +215,39 @@ INLINE bool upb_isarray(struct upb_msg_field *f) {
/* "Set" flag reading and writing. *******************************************/
+/* Please note that these functions do not perform any memory management or in
+ * any way ensure that the fields are valid. They *only* test/set/clear a bit
+ * that indicates whether the field is set or not. */
+
+/* Returns the byte offset where we store whether this field is set. */
INLINE size_t upb_isset_offset(uint32_t field_index) {
return field_index / 8;
}
+/* Returns the mask within the appropriate byte that selects the set bit. */
INLINE uint8_t upb_isset_mask(uint32_t field_index) {
return 1 << (field_index % 8);
}
-/* Functions for reading and writing the "set" flags in the msg. Note that
- * these do not perform memory management associated with any dynamic memory
- * these fields may be referencing. These *only* set and test the flags. */
+/* Returns true if the given field is set, false otherwise. */
INLINE void upb_msg_set(void *s, struct upb_msg_field *f)
{
((char*)s)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index);
}
+/* Clears the set bit for this field in the given message. */
INLINE void upb_msg_unset(void *s, struct upb_msg_field *f)
{
((char*)s)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index);
}
+/* Tests whether the given field is set. */
INLINE bool upb_msg_isset(void *s, struct upb_msg_field *f)
{
return ((char*)s)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index);
}
+/* Returns true if *all* required fields are set, false otherwise. */
INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m)
{
int num_fields = m->num_required_fields;
@@ -253,6 +260,7 @@ INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m)
return true;
}
+/* Clears the set bit for all fields. */
INLINE void upb_msg_clear(void *s, struct upb_msg *m)
{
memset(s, 0, m->set_flags_bytes);
@@ -304,7 +312,7 @@ void upb_msg_reuse_array(struct upb_array **arr, uint32_t size,
/* Reuse a submessage of the given type. */
void upb_msg_reuse_submsg(void **msg, struct upb_msg *m);
-/* Serialization/Deserialization. ********************************************/
+/* Parsing. ******************************************************************/
/* This is all just a layer on top of the stream-oriented facility in
* upb_parse.h. */
@@ -352,6 +360,48 @@ upb_status_t upb_msg_parse(struct upb_msg_parse_state *s,
* above. "byref" works as in upb_msg_parse_init(). */
void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *s, bool byref);
+/* Serialization *************************************************************/
+
+/* For messages that contain any submessages, we must do a pre-pass on the
+ * message tree to discover the size of all submessages. This is necessary
+ * because when serializing, the message length has to precede the message data
+ * itself.
+ *
+ * We can calculate these sizes once and reuse them as long as the message is
+ * known not to have changed. */
+struct upb_msgsizes;
+
+/* Initialize/free a upb_msgsizes for the given message. */
+void upb_msgsizes_init(struct upb_msgsizes *sizes);
+void upb_msgsizes_free(struct upb_msgsizes *sizes);
+
+/* Given a previously initialized sizes, recurse over the message and store its
+ * sizes in 'sizes'. */
+void upb_msgsizes_read(struct upb_msgsizes *sizes, void *data,
+ struct upb_msg *m);
+
+/* Returns the total size of the serialized message given in sizes. Must be
+ * preceeded by a call to upb_msgsizes_read. */
+size_t upb_msgsizes_totalsize(struct upb_msgsizes *sizes);
+
+struct upb_msg_serialize_state;
+
+/* Initializes the state of serialization. The provided message must not
+ * change between the upb_msgsizes_read() call that was used to construct
+ * "sizes" and the parse being fully completed. */
+void upb_msg_serialize_alloc(struct upb_msg_serialize_state *s);
+void upb_msg_serialize_free(struct upb_msg_serialize_state *s);
+void upb_msg_serialize_init(struct upb_msg_serialize_state *s, void *data,
+ struct upb_msg *m, struct upb_msgsizes *sizes);
+
+/* Serializes the next set of bytes into buf (which has size len). Returns
+ * UPB_STATUS_OK if serialization is complete, or UPB_STATUS_NEED_MORE_DATA
+ * if there is more data from the message left to be serialized.
+ *
+ * The number of bytes written to buf is returned in *read. This will be
+ * equal to len unless we finished serializing. */
+upb_status_t upb_msg_serialize(struct upb_msg_serialize_state *s,
+ void *buf, size_t len, size_t *read);
/* Text dump *****************************************************************/
diff --git a/src/upb_parse.h b/src/upb_parse.h
index ca18937..193c307 100644
--- a/src/upb_parse.h
+++ b/src/upb_parse.h
@@ -164,8 +164,7 @@ INLINE upb_status_t upb_get_v_uint32_t(uint8_t *buf, uint8_t *end,
{
uint64_t val64;
UPB_CHECK(upb_get_v_uint64_t(buf, end, &val64, outbuf));
- /* TODO: should we throw an error if any of the high bits in val64 are set? */
- *val = (uint32_t)val64;
+ *val = (uint32_t)val64; /* Discard the high bits. */
return UPB_STATUS_OK;
}
@@ -245,18 +244,18 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
GET(type, v_or_f, wire_t, val_t, member_name) \
WVTOV(type, wire_t, val_t)
-T(INT32, v, uint32_t, int32_t, int32) { return (int32_t)s; }
-T(INT64, v, uint64_t, int64_t, int64) { return (int64_t)s; }
-T(UINT32, v, uint32_t, uint32_t, uint32) { return s; }
-T(UINT64, v, uint64_t, uint64_t, uint64) { return s; }
-T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzdec_32(s); }
-T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzdec_64(s); }
-T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; }
-T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; }
-T(SFIXED32, f, uint32_t, int32_t, int32) { return (int32_t)s; }
-T(SFIXED64, f, uint64_t, int64_t, int64) { return (int64_t)s; }
-T(BOOL, v, uint32_t, bool, _bool) { return (bool)s; }
-T(ENUM, v, uint32_t, int32_t, int32) { return (int32_t)s; }
+T(INT32, v, uint32_t, int32_t, int32) { return (int32_t)s; }
+T(INT64, v, uint64_t, int64_t, int64) { return (int64_t)s; }
+T(UINT32, v, uint32_t, uint32_t, uint32) { return s; }
+T(UINT64, v, uint64_t, uint64_t, uint64) { return s; }
+T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzdec_32(s); }
+T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzdec_64(s); }
+T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; }
+T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; }
+T(SFIXED32, f, uint32_t, int32_t, int32) { return (int32_t)s; }
+T(SFIXED64, f, uint64_t, int64_t, int64) { return (int64_t)s; }
+T(BOOL, v, uint32_t, bool, _bool) { return (bool)s; }
+T(ENUM, v, uint32_t, int32_t, int32) { return (int32_t)s; }
T(DOUBLE, f, uint64_t, double, _double) {
union upb_value v;
v.uint64 = s;
diff --git a/src/upb_serialize.h b/src/upb_serialize.h
index b785e0e..ec735c2 100644
--- a/src/upb_serialize.h
+++ b/src/upb_serialize.h
@@ -44,11 +44,19 @@ INLINE upb_status_t upb_put_v_uint64_t(uint8_t *buf, uint8_t *end, uint64_t val,
return UPB_STATUS_OK;
}
-/* Puts a varint -- called when we only have 32 bits of data. */
+/* Puts an unsigned 32-bit varint, verbatim. Never uses the high 64 bits. */
INLINE upb_status_t upb_put_v_uint32_t(uint8_t *buf, uint8_t *end,
uint32_t val, uint8_t **outbuf)
{
- return UPB_STATUS_OK;
+ return upb_put_v_uint64_t(buf, end, val, outbuf);
+}
+
+/* Puts a signed 32-bit varint, first sign-extending to 64-bits. We do this to
+ * maintain wire-compatibility with 64-bit signed integers. */
+INLINE upb_status_t upb_put_v_int32_t(uint8_t *buf, uint8_t *end,
+ int32_t val, uint8_t **outbuf)
+{
+ return upb_put_v_uint64_t(buf, end, (int64_t)val, outbuf);
}
INLINE void upb_put32(uint8_t *buf, uint32_t val) {
@@ -157,18 +165,18 @@ INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
PUT(type, v_or_f, wire_t, val_t, member_name) \
VTOWV(type, wire_t, val_t)
-T(INT32, v, uint32_t, int32_t, int32) { return (uint32_t)s; }
-T(INT64, v, uint64_t, int64_t, int64) { return (uint64_t)s; }
-T(UINT32, v, uint32_t, uint32_t, uint32) { return s; }
-T(UINT64, v, uint64_t, uint64_t, uint64) { return s; }
-T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzenc_32(s); }
-T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzdec_64(s); }
-T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; }
-T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; }
-T(SFIXED32, f, uint32_t, int32_t, int32) { return (uint32_t)s; }
-T(SFIXED64, f, uint64_t, int64_t, int64) { return (uint64_t)s; }
-T(BOOL, v, uint32_t, bool, _bool) { return (uint32_t)s; }
-T(ENUM, v, uint32_t, int32_t, int32) { return (uint32_t)s; }
+T(INT32, v, uint32_t, int32_t, int32) { return (uint32_t)s; }
+T(INT64, v, uint64_t, int64_t, int64) { return (uint64_t)s; }
+T(UINT32, v, uint32_t, uint32_t, uint32) { return s; }
+T(UINT64, v, uint64_t, uint64_t, uint64) { return s; }
+T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzenc_32(s); }
+T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzdec_64(s); }
+T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; }
+T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; }
+T(SFIXED32, f, uint32_t, int32_t, int32) { return (uint32_t)s; }
+T(SFIXED64, f, uint64_t, int64_t, int64) { return (uint64_t)s; }
+T(BOOL, v, uint32_t, bool, _bool) { return (uint32_t)s; }
+T(ENUM, v, uint32_t, int32_t, int32) { return (uint32_t)s; }
T(DOUBLE, f, uint64_t, double, _double) {
union upb_value v;
v._double = s;
@@ -183,9 +191,9 @@ T(FLOAT, f, uint32_t, float, _float) {
#undef PUT
#undef T
-/* Functions to get sizes of serialized values without serializing. ***********/
-
-
+size_t upb_get_tag_size(uint32_t fieldnum) {
+ return upb_v_uint64_t_size((uint64_t)fieldnum << 3);
+}
#ifdef __cplusplus
} /* extern "C" */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback