summaryrefslogtreecommitdiff
path: root/pbstream.c
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2009-02-23 23:10:52 -0800
committerJoshua Haberman <joshua@reverberate.org>2009-02-23 23:10:52 -0800
commitfaf6b5f3bfe9cae65817c8f94dc8770323276a92 (patch)
tree8a29f7cb807babe22f9e8854674ce38abfe68b75 /pbstream.c
parent75ee6df2ff6509e0d8d92e26c613cfb24d3828e2 (diff)
Massive changes (practically a rewrite). More compact and minimal.
Still doesn't actually work yet, but much closer.
Diffstat (limited to 'pbstream.c')
-rw-r--r--pbstream.c613
1 files changed, 240 insertions, 373 deletions
diff --git a/pbstream.c b/pbstream.c
index 070e429..dd91bb8 100644
--- a/pbstream.c
+++ b/pbstream.c
@@ -4,7 +4,6 @@
* Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details.
*/
-#include <limits.h>
#include <string.h>
#include "pbstream.h"
@@ -17,427 +16,295 @@
#define unlikely(x) (x)
#endif
-/* An array, indexed by pbstream_type, that indicates what wire type is
- * expected for the given pbstream type. */
-static enum pbstream_wire_type expected_wire_type[] = {
- PBSTREAM_WIRE_TYPE_64BIT, // PBSTREAM_TYPE_DOUBLE,
- PBSTREAM_WIRE_TYPE_32BIT, // PBSTREAM_TYPE_FLOAT,
- PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_INT32,
- PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_INT64,
- PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_UINT32,
- PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_UINT64,
- PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_SINT32,
- PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_SINT64,
- PBSTREAM_WIRE_TYPE_32BIT, // PBSTREAM_TYPE_FIXED32,
- PBSTREAM_WIRE_TYPE_64BIT, // PBSTREAM_TYPE_FIXED64,
- PBSTREAM_WIRE_TYPE_32BIT, // PBSTREAM_TYPE_SFIXED32,
- PBSTREAM_WIRE_TYPE_64BIT, // PBSTREAM_TYPE_SFIXED64,
- PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_BOOL,
- PBSTREAM_WIRE_TYPE_STRING, // PBSTREAM_TYPE_STRING,
- PBSTREAM_WIRE_TYPE_STRING, // PBSTREAM_TYPE_BYTES,
- PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_ENUM,
- PBSTREAM_WIRE_TYPE_STRING, // PBSTREAM_TYPE_MESSAGE
-};
+/* Lowest-level functions -- these read integers from the input buffer.
+ * To avoid branches, none of these do bounds checking. So we force clients
+ * to overallocate their buffers by >=9 bytes. */
-/* Reads a varint starting at buf (but not past end), storing the result
- * in out_value. Returns whether the operation was successful. */
-enum pbstream_status get_varint(char **buf, char *end, uint64_t *out_value)
+static pbstream_status_t get_v_uint64_t(char **buf, char *end, uint64_t *val)
{
- *out_value = 0;
- int bitpos = 0;
- char *b = *buf;
-
- /* Because we don't check for buffer overrun inside the loop, we require
- * that callers use a buffer that is overallocated by at least 9 bytes (the
- * maximum we can overrun before the bitpos check catches the problem). */
- for(; *b & 0x80 && bitpos < 64; bitpos += 7, b++)
- *out_value |= (uint64_t)(*b & 0x7F) << bitpos;
-
- /* If bitpos is 63 (as it will be if this was a nine-byte varint) this will
- * throw away the middle six bits of the final byte. We don't bother warning
- * about this. */
- *out_value |= (uint64_t)(*b & 0x7F) << bitpos;
- b++;
-
- if(unlikely(bitpos >= 64)) return PBSTREAM_ERROR_UNTERMINATED_VARINT;
- if(unlikely(b > end)) return PBSTREAM_STATUS_INCOMPLETE;
- *buf = b;
- return PBSTREAM_STATUS_OK;
+ uint8_t* ptr = (uint8_t*)*buf;
+ uint32_t b;
+ uint32_t part0 = 0, part1 = 0, part2 = 0;
+
+ b = *(ptr++); part0 = (b & 0x7F) ; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part0 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part0 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part0 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part1 = (b & 0x7F) ; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part1 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part1 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part1 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part2 = (b & 0x7F) ; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part2 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
+ return PBSTREAM_ERROR_UNTERMINATED_VARINT;
+
+done:
+ *buf = (char*)ptr;
+ *val = (uint64_t)part0 | ((uint64_t)part1 << 28) | ((uint64_t)part2 << 56);
+ return unlikely(*buf > end) ? PBSTREAM_STATUS_INCOMPLETE : PBSTREAM_STATUS_OK;
}
-/* TODO: the little-endian versions of these functions don't respect alignment.
- * While it's hard to believe that this could be less efficient than the
- * alternative (the big endian implementation), this deserves some tests and
- * measurements to be sure. */
-enum pbstream_status get_32_le(char **buf, char *end, uint32_t *out_value)
+static pbstream_status_t get_v_uint32_t(char **buf, char *end, uint32_t *val)
{
- char *b = *buf;
- char *int32_end = b+4;
- if(unlikely(int32_end > end)) return PBSTREAM_STATUS_INCOMPLETE;
-#if __BYTE_ORDER == __LITTLE_ENDIAN
- *out_value = *(uint32_t*)b;
-#else
- *out_value = b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24);
-#endif
- *buf = int32_end;
- return PBSTREAM_STATUS_OK;
+ uint8_t* ptr = (uint8_t*)*buf;
+ uint32_t b;
+ uint32_t result;
+
+ b = *(ptr++); result = (b & 0x7F) ; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result = (b & 0x7F) << 28; if (!(b & 0x80)) goto done;
+ return PBSTREAM_ERROR_UNTERMINATED_VARINT;
+
+done:
+ *buf = (char*)ptr;
+ *val = result;
+ return unlikely(*buf > end) ? PBSTREAM_STATUS_INCOMPLETE: PBSTREAM_STATUS_OK;
}
-bool get_64_le(char **buf, char *end, uint64_t *out_value)
+static pbstream_status_t get_f_uint32_t(char **buf, char *end, uint32_t *val)
{
- char *b = *buf;
- char *int64_end = b+8;
- if(unlikely(int64_end > end)) return PBSTREAM_STATUS_INCOMPLETE;
+ uint8_t *b = (uint8_t*)*buf;
#if __BYTE_ORDER == __LITTLE_ENDIAN
- *out_value = *(uint64_t*)buf;
+ *val = *(uint32_t*)b; /* likely unaligned, TODO: verify performance. */
#else
- *out_value = (b[0]) | (b[1] << 8 ) | (b[2] << 16) | (b[3] << 24) |
- (b[4] << 32) | (b[5] << 40) | (b[6] << 48) | (b[7] << 56);
+ *val = b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24);
#endif
- *buf = int64_end;
- return PBSTREAM_STATUS_OK;
+ *buf = (char*)b + sizeof(uint32_t);
+ return unlikely(*buf > end) ? PBSTREAM_STATUS_INCOMPLETE : PBSTREAM_STATUS_OK;
}
-int32_t zigzag_decode_32(uint32_t n)
+static pbstream_status_t get_f_uint64_t(char **buf, char *end, uint64_t *val)
{
- return (n >> 1) ^ -(int32_t)(n & 1);
+ uint8_t *b = (uint8_t*)*buf;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ *val = *(uint64_t*)buf; /* likely unaligned, TODO: verify performance. */
+#else
+ *val = (b[0]) | (b[1] << 8 ) | (b[2] << 16) | (b[3] << 24) |
+ (b[4] << 32) | (b[5] << 40) | (b[6] << 48) | (b[7] << 56);
+#endif
+ *buf = (char*)b + sizeof(uint64_t);
+ return unlikely(*buf > end) ? PBSTREAM_STATUS_INCOMPLETE : PBSTREAM_STATUS_OK;
}
-int64_t zigzag_decode_64(uint64_t n)
-{
- return (n >> 1) ^ -(int64_t)(n & 1);
-}
+static int32_t zz_decode_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
+static int64_t zz_decode_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
-/* Parses the next field-number/wire-value pair from the stream of bytes
- * starting at *buf, without reading past end. Stores the parsed and wire
- * value in *field_number and *wire_value, respectively.
- *
- * Returns a status indicating whether the operation was successful. If the
- * return status is STATUS_INCOMPLETE, returns the number of additional bytes
- * requred in *need_more_bytes. Updates *buf to point past the end of the
- * parsed data if the operation was successful.
- */
-enum pbstream_status pbstream_parse_wire_value(
- char **buf, char *end,
- pbstream_field_number_t *field_number,
- struct pbstream_wire_value *wire_value,
- int *need_more_bytes)
-{
- char *b = *buf; /* Our local buf pointer -- only update buf if we succeed. */
-
-#define DECODE(dest, func) \
- do { \
- enum pbstream_status status = func(&b, end, &dest); \
- if(unlikely(status != PBSTREAM_STATUS_OK)) { \
- *need_more_bytes = 0; /* This only arises below in this function. */ \
- return status; \
- } \
+#define CHECK(func) do { \
+ pbstream_wire_type_t status = func; \
+ if(status != PBSTREAM_STATUS_OK) return status; \
} while (0)
- uint64_t key;
- DECODE(key, get_varint);
+/* WVTOV() generates a function:
+ * void wvtov_TYPE(wire_t src, val_t *dst, size_t offset)
+ * (macro invoker defines the body of the function). */
+#define WVTOV(type, wire_t, val_t) \
+ static void wvtov_ ## type(wire_t s, val_t *d, size_t offset)
+
+/* GET() generates a function:
+ * pbstream_status_t get_TYPE(char **buf, char *end, size_t offset,
+ * pbstream_value *dst) */
+#define GET(type, v_or_f, wire_t, val_t, member_name) \
+ static pbstream_status_t get_ ## type(char **buf, char *end, size_t offset, \
+ struct pbstream_value *d) { \
+ wire_t tmp; \
+ CHECK(get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp)); \
+ wvtov_ ## type(tmp, &d->v.member_name, offset); \
+ return PBSTREAM_STATUS_OK; \
+ }
- *field_number = key >> 3;
- wire_value->type = key & 0x07;
+#define T(type, v_or_f, wire_t, val_t, member_name) \
+ WVTOV(type, wire_t, val_t); /* prototype for GET below */ \
+ GET(type, v_or_f, wire_t, val_t, member_name) \
+ WVTOV(type, wire_t, val_t)
+
+T(DOUBLE, f, uint64_t, double, _double){ memcpy(d, &s, sizeof(double)); }
+T(FLOAT, f, uint32_t, float, _float) { memcpy(d, &s, sizeof(float)); }
+T(INT32, v, uint32_t, int32_t, int32) { *d = (int32_t)s; }
+T(INT64, v, uint64_t, int64_t, int64) { *d = (int64_t)s; }
+T(UINT32, v, uint32_t, uint32_t, uint32) { *d = s; }
+T(UINT64, v, uint64_t, uint64_t, uint64) { *d = s; }
+T(SINT32, v, uint32_t, int32_t, int32) { *d = zz_decode_32(s); }
+T(SINT64, v, uint64_t, int64_t, int64) { *d = zz_decode_64(s); }
+T(FIXED32, f, uint32_t, uint32_t, uint32) { *d = s; }
+T(FIXED64, f, uint64_t, uint64_t, uint64) { *d = s; }
+T(SFIXED32, f, uint32_t, int32_t, int32) { *d = (int32_t)s; }
+T(SFIXED64, f, uint64_t, int64_t, int64) { *d = (int64_t)s; }
+T(BOOL, v, uint32_t, bool, _bool) { *d = (bool)s; }
+T(ENUM, v, uint32_t, int32_t, _enum) { *d = (int32_t)s; }
+
+#define T_DELIMITED(type) \
+ T(type, v, uint32_t, struct pbstream_delimited, delimited) { \
+ d->offset = offset; \
+ d->len = s; \
+ }
+T_DELIMITED(STRING); /* We leave UTF-8 validation to the client. */
+T_DELIMITED(BYTES);
+T_DELIMITED(MESSAGE);
+#undef WVTOV
+#undef GET
+#undef T
+#undef T_DELIMITED
+
+struct pbstream_type_info {
+ pbstream_wire_type_t expected_wire_type;
+ pbstream_status_t (*get)(char **buf, char *end, size_t offset,
+ struct pbstream_value *d);
+};
+static struct pbstream_type_info type_info[] = {
+ {PBSTREAM_WIRE_TYPE_64BIT, get_DOUBLE},
+ {PBSTREAM_WIRE_TYPE_32BIT, get_FLOAT},
+ {PBSTREAM_WIRE_TYPE_VARINT, get_INT32},
+ {PBSTREAM_WIRE_TYPE_VARINT, get_INT64},
+ {PBSTREAM_WIRE_TYPE_VARINT, get_UINT32},
+ {PBSTREAM_WIRE_TYPE_VARINT, get_UINT64},
+ {PBSTREAM_WIRE_TYPE_VARINT, get_SINT32},
+ {PBSTREAM_WIRE_TYPE_VARINT, get_SINT64},
+ {PBSTREAM_WIRE_TYPE_32BIT, get_FIXED32},
+ {PBSTREAM_WIRE_TYPE_64BIT, get_FIXED64},
+ {PBSTREAM_WIRE_TYPE_32BIT, get_SFIXED32},
+ {PBSTREAM_WIRE_TYPE_64BIT, get_SFIXED64},
+ {PBSTREAM_WIRE_TYPE_VARINT, get_BOOL},
+ {PBSTREAM_WIRE_TYPE_DELIMITED, get_STRING},
+ {PBSTREAM_WIRE_TYPE_DELIMITED, get_BYTES},
+ {PBSTREAM_WIRE_TYPE_VARINT, get_ENUM},
+ {PBSTREAM_WIRE_TYPE_DELIMITED, get_MESSAGE}
+};
- switch(wire_value->type) {
- case PBSTREAM_WIRE_TYPE_VARINT:
- DECODE(wire_value->v.varint, get_varint);
- break;
+static pbstream_status_t parse_tag(char **buf, char *end, struct pbstream_tag *tag)
+{
+ uint32_t tag_int;
+ CHECK(get_v_uint32_t(buf, end, &tag_int));
+ tag->wire_type = tag_int & 0x07;
+ tag->field_number = tag_int >> 3;
+ return PBSTREAM_STATUS_OK;
+}
+static pbstream_status_t parse_unknown_value(
+ char **buf, char *end, int buf_offset,
+ struct pbstream_wire_value *wv)
+{
+#define DECODE(dest, func) CHECK(func(buf, end, &dest))
+ switch(wv->type) {
+ case PBSTREAM_WIRE_TYPE_VARINT:
+ DECODE(wv->v.varint, get_v_uint64_t); break;
case PBSTREAM_WIRE_TYPE_64BIT:
- DECODE(wire_value->v._64bit, get_64_le);
- break;
-
- case PBSTREAM_WIRE_TYPE_STRING: {
- uint64_t string_len;
- DECODE(string_len, get_varint);
- if (unlikely(string_len > INT_MAX)) {
- /* TODO: notice this and fail. */
- }
- wire_value->v.string.len = (int)string_len;
- if(b + wire_value->v.string.len > end) {
- *need_more_bytes = b + wire_value->v.string.len - end;
- return PBSTREAM_STATUS_INCOMPLETE;
- }
- wire_value->v.string.data = b;
- b += wire_value->v.string.len;
+ DECODE(wv->v._64bit, get_f_uint64_t); break;
+ case PBSTREAM_WIRE_TYPE_32BIT:
+ DECODE(wv->v._32bit, get_f_uint32_t); break;
+ case PBSTREAM_WIRE_TYPE_DELIMITED: {
+ uint32_t len;
+ wv->v.delimited.offset = buf_offset;
+ DECODE(len, get_v_uint32_t);
+ wv->v.delimited.len = (size_t)len;
break;
}
-
case PBSTREAM_WIRE_TYPE_START_GROUP:
case PBSTREAM_WIRE_TYPE_END_GROUP:
/* TODO (though these are deprecated, so not high priority). */
break;
-
- case PBSTREAM_WIRE_TYPE_32BIT:
- DECODE(wire_value->v._32bit, get_32_le);
- break;
}
-
- *buf = b;
- return true;
+ return PBSTREAM_STATUS_OK;
+#undef DECODE
}
-/* Translates from a wire value to a .proto value. The caller should have
- * already checked that the wire_value is of the correct type. The pbstream
- * type must not be PBSTREAM_TYPE_MESSAGE. This operation always succeeds. */
-void pbstream_translate_field(struct pbstream_wire_value *wire_value,
- enum pbstream_type type,
- struct pbstream_value *out_value)
-{
- out_value->type = type;
- switch(type) {
- case PBSTREAM_TYPE_DOUBLE:
- memcpy(&out_value->v._double, &wire_value->v._64bit, sizeof(double));
- break;
-
- case PBSTREAM_TYPE_FLOAT:
- memcpy(&out_value->v._float, &wire_value->v._32bit, sizeof(float));
- break;
-
- case PBSTREAM_TYPE_INT32:
- out_value->v.int32 = (int32_t)wire_value->v.varint;
- break;
-
- case PBSTREAM_TYPE_INT64:
- out_value->v.int64 = (int64_t)zigzag_decode_64(wire_value->v.varint);
- break;
-
- case PBSTREAM_TYPE_UINT32:
- out_value->v.uint32 = (uint32_t)wire_value->v.varint;
- break;
-
- case PBSTREAM_TYPE_UINT64:
- out_value->v.uint64 = (uint64_t)wire_value->v.varint;
- break;
-
- case PBSTREAM_TYPE_SINT32:
- out_value->v.int32 = zigzag_decode_32(wire_value->v.varint);
- break;
-
- case PBSTREAM_TYPE_SINT64:
- out_value->v.int64 = zigzag_decode_64(wire_value->v.varint);
- break;
-
- case PBSTREAM_TYPE_FIXED32:
- out_value->v.int32 = wire_value->v._32bit;
- break;
-
- case PBSTREAM_TYPE_FIXED64:
- out_value->v.int64 = wire_value->v._64bit;
- break;
-
- case PBSTREAM_TYPE_SFIXED32:
- out_value->v.int32 = (int32_t)wire_value->v._32bit;
- break;
-
- case PBSTREAM_TYPE_SFIXED64:
- out_value->v.int64 = (int64_t)wire_value->v._64bit;
- break;
-
- case PBSTREAM_TYPE_BOOL:
- out_value->v._bool = (bool)wire_value->v.varint;
- break;
-
- case PBSTREAM_TYPE_STRING:
- out_value->v.string.data = wire_value->v.string.data;
- out_value->v.string.len = wire_value->v.string.len;
- /* TODO: validate UTF-8? */
- break;
-
- case PBSTREAM_TYPE_BYTES:
- out_value->v.bytes.data = wire_value->v.string.data;
- out_value->v.bytes.len = wire_value->v.string.len;
- break;
+#define CALLBACK(s, func, ...) do { \
+ if(s->callbacks.func) s->callbacks.func(__VA_ARGS__); \
+ } while (0)
- case PBSTREAM_TYPE_ENUM:
- out_value->v._enum = (bool)wire_value->v.varint;
- break;
+#define NONFATAL_ERROR(s, code) do { \
+ if(s->ignore_nonfatal_errors) CALLBACK(s, error_callback, code); \
+ else return code; \
+ } while (0)
- case PBSTREAM_TYPE_MESSAGE:
- /* Should never happen. */
- break;
- }
+static struct pbstream_field_descriptor *find_field_descriptor(
+ struct pbstream_message_descriptor* md,
+ pbstream_field_number_t field_number)
+{
+ /* Likely will want to replace linear search with something better. */
+ for (int i = 0; i < md->fields_len; i++)
+ if (md->fields[i].field_number == field_number) return &md->fields[i];
+ return NULL;
}
-/* Given a wire value that was just parsed and a matching field descriptor,
- * processes the given value and performs the appropriate actions. These
- * actions include:
- * - checking that the wire type is as expected
- * - converting the wire type to a .proto type
- * - entering a sub-message, if that is in fact what this field implies.
- *
- * This function also calls user callbacks pertaining to any of the above at
- * the appropriate times. */
-void process_value(struct pbstream_parse_state *s,
- struct pbstream_wire_value *wire_value,
- struct pbstream_field_descriptor *field_descriptor)
+/* Process actions associated with the end of a [sub-]message. */
+pbstream_status_t process_message_end(struct pbstream_parse_state *s)
{
- /* Check that the wire type is appropriate for this .proto type. */
- if(unlikely(wire_value->type != expected_wire_type[field_descriptor->type])) {
- /* Type mismatch. */
- if(s->callbacks.error_callback) {
- /* TODO: a nice formatted message. */
- s->callbacks.error_callback(PBSTREAM_ERROR_MISMATCHED_TYPE, NULL,
- s->offset, false);
+ struct pbstream_parse_stack_frame *frame = DYNARRAY_GET_TOP(s->stack);
+ /* A submessage that doesn't end exactly on a field boundary indicates
+ * corruption. */
+ if(unlikely(s->offset != frame->end_offset))
+ return PBSTREAM_ERROR_BAD_SUBMESSAGE_END;
+
+ /* Check required fields. */
+ struct pbstream_message_descriptor *md = frame->message_descriptor;
+ for(int i = 0; i < md->fields_len; i++) {
+ struct pbstream_field_descriptor *fd = &md->fields[i];
+ if(fd->seen_field_num && !frame->seen_fields[fd->seen_field_num] &&
+ fd->cardinality == PBSTREAM_CARDINALITY_REQUIRED) {
+ NONFATAL_ERROR(s, PBSTREAM_ERROR_MISSING_REQUIRED_FIELD);
}
+ }
+ RESIZE_DYNARRAY(s->stack, s->stack_len-1);
+ return PBSTREAM_STATUS_OK;
+}
- /* Report the wire value we parsed as an unknown value. */
- if(s->callbacks.unknown_value_callback) {
- s->callbacks.unknown_value_callback(field_descriptor->field_number,
- wire_value,
- s->user_data);
- }
- return;
+/* Parses and processes the next value from buf (but not past end). */
+pbstream_status_t parse_field(struct pbstream_parse_state *s,
+ char *buf, char *end,
+ pbstream_field_number_t *fieldnum,
+ struct pbstream_value *val,
+ struct pbstream_wire_value *wv)
+{
+ struct pbstream_parse_stack_frame *frame = DYNARRAY_GET_TOP(s->stack);
+ struct pbstream_message_descriptor *md = frame->message_descriptor;
+ struct pbstream_tag tag;
+ struct pbstream_field_descriptor *fd;
+ struct pbstream_type_info *info;
+ char *b = buf;
+
+ if(unlikely(s->offset >= frame->end_offset)) return process_message_end(s);
+
+ CHECK(parse_tag(&b, end, &tag));
+ size_t val_offset = s->offset + (b-buf);
+ fd = find_field_descriptor(md, tag.field_number);
+ if(unlikely(!fd)) goto unknown_value;
+ info = &type_info[fd->type];
+
+ /* Check type and cardinality. */
+ if(unlikely(tag.wire_type != info->expected_wire_type)) {
+ NONFATAL_ERROR(s, PBSTREAM_ERROR_MISMATCHED_TYPE);
+ goto unknown_value;
+ }
+ if(fd->seen_field_num > 0) {
+ if(unlikely(frame->seen_fields[fd->seen_field_num]))
+ NONFATAL_ERROR(s, PBSTREAM_ERROR_DUPLICATE_FIELD);
+ frame->seen_fields[fd->seen_field_num] = true;
}
- if(field_descriptor->type == PBSTREAM_TYPE_MESSAGE) {
+ if(unlikely(fd->type == PBSTREAM_TYPE_MESSAGE)) {
/* We're entering a sub-message. */
- if(s->callbacks.begin_message_callback) {
- s->callbacks.begin_message_callback(field_descriptor->d.message,
- s->user_data);
- }
-
- /* Push and initialize a new stack frame. */
+ CHECK(info->get(&b, end, val_offset, val));
RESIZE_DYNARRAY(s->stack, s->stack_len+1);
struct pbstream_parse_stack_frame *frame = DYNARRAY_GET_TOP(s->stack);
- frame->message_descriptor = field_descriptor->d.message;
- frame->end_offset = 0; /* TODO: set this correctly. */
+ frame->message_descriptor = fd->d.message;
+ frame->end_offset = val->v.delimited.offset + val->v.delimited.len;
+ s->offset = wv->v.delimited.offset; /* skip past only the tag. */
int num_seen_fields = frame->message_descriptor->num_seen_fields;
INIT_DYNARRAY(frame->seen_fields, num_seen_fields, num_seen_fields);
} else {
/* This is a scalar value. */
- struct pbstream_value value;
- pbstream_translate_field(wire_value, field_descriptor->type, &value);
- if(s->callbacks.value_callback) {
- s->callbacks.value_callback(field_descriptor, &value, s->user_data);
- }
- }
-}
-
-struct pbstream_field_descriptor *find_field_descriptor_by_number(
- struct pbstream_message_descriptor* message_descriptor,
- pbstream_field_number_t field_number)
-{
- /* Currently a linear search -- could be optimized to do a binary search, hash
- * table lookup, or any other number of clever things you might imagine. */
- for (int i = 0; i < message_descriptor->fields_len; i++)
- if (message_descriptor->fields[i].field_number == field_number)
- return &message_descriptor->fields[i];
- return NULL;
-}
-
-/* Parses and processes the next value from *buf (but not past end), returning
- * a status indicating whether the operation succeeded, and calling appropriate
- * callbacks. If more data is needed to parse the last partial field, returns
- * how many more bytes are needed in need_more_bytes. Updates *buf to point
- * past the parsed value if the operation succeeds. */
-enum pbstream_status pbstream_parse_field(struct pbstream_parse_state *s,
- char **buf, char *end,
- int *need_more_bytes)
-{
- struct pbstream_parse_stack_frame *frame = DYNARRAY_GET_TOP(s->stack);
- struct pbstream_message_descriptor *message_descriptor =
- frame->message_descriptor;
- pbstream_field_number_t field_number;
- struct pbstream_wire_value wire_value;
- enum pbstream_status status;
-
- /* Decode the raw wire data. */
- status = pbstream_parse_wire_value(buf, end, &field_number, &wire_value,
- need_more_bytes);
-
- if(unlikely(status != PBSTREAM_STATUS_OK)) {
- if(status == PBSTREAM_ERROR_UNTERMINATED_VARINT &&
- s->callbacks.error_callback) {
- /* TODO: a nice formatted message. */
- s->callbacks.error_callback(PBSTREAM_ERROR_UNTERMINATED_VARINT, NULL,
- s->offset, true);
- }
- s->fatal_error = true;
- return status;
- }
-
- /* Find the corresponding field definition from the .proto file. */
- struct pbstream_field_descriptor *field_descriptor;
- field_descriptor = find_field_descriptor_by_number(message_descriptor,
- field_number);
-
- if(likely(field_descriptor != NULL)) {
- if(field_descriptor->seen_field_num > 0) { /* for non-repeated fields */
- /* Check that this field has not been seen before. */
- if(frame->seen_fields[field_descriptor->seen_field_num]) {
- if(s->callbacks.error_callback)
- s->callbacks.error_callback(PBSTREAM_ERROR_DUPLICATE_FIELD, NULL,
- s->offset, false);
- return PBSTREAM_STATUS_ERROR;
- }
- /* Mark the field as seen. */
- frame->seen_fields[field_descriptor->seen_field_num] = true;
- }
- process_value(s, &wire_value, field_descriptor);
- } else {
- /* This field was not defined in the .proto file. */
- if(s->callbacks.unknown_value_callback)
- s->callbacks.unknown_value_callback(field_number, &wire_value,
- s->user_data);
+ *fieldnum = tag.field_number;
+ val->type = fd->type;
+ CHECK(info->get(&b, end, val_offset, val));
+ s->offset += (b-buf);
}
return PBSTREAM_STATUS_OK;
-}
-
-/* Process actions associated with the end of a submessage. This includes:
- * - emitting default values for all optional elements (either explicit
- * defaults or implicit defaults).
- * - emitting errors for any required fields that were not seen.
- * - calling the user's callback.
- * - popping the stack frame. */
-void process_submessage_end(struct pbstream_parse_state *s)
-{
- /* TODO: emit default values for optional elements. either explicit defaults
- * (specified in the .proto file) or implicit defaults (which are specified
- * in the pbstream definition, by type. */
-
- /* TODO: emit errors for required fields that were not seen. */
-
- /* Process the end of message by calling the user's callback and popping
- * our stack frame. */
- if(s->callbacks.end_message_callback)
- s->callbacks.end_message_callback(s->user_data);
-
- /* Pop the stack frame associated with this submessage. */
- RESIZE_DYNARRAY(s->stack, s->stack_len-1);
-}
-
-enum pbstream_status pbstream_parse(struct pbstream_parse_state *s,
- char *buf_start, int buf_len,
- int *consumed_bytes, int *need_more_bytes)
-{
- char *buf = buf_start;
- char *end = buf_start + buf_len;
- int buf_start_offset = s->offset;
- enum pbstream_status status = PBSTREAM_STATUS_OK;
-
- while(buf < end) {
- /* Check for a submessage ending. */
- while(s->offset >= DYNARRAY_GET_TOP(s->stack)->end_offset) {
- /* A submessage that doesn't end exactly on a field boundary indicates
- * corruption. */
- if(unlikely(s->offset != DYNARRAY_GET_TOP(s->stack)->end_offset)) {
- if(s->callbacks.error_callback) {
- s->callbacks.error_callback(PBSTREAM_ERROR_BAD_SUBMESSAGE_END, NULL,
- s->offset, true);
- }
- s->fatal_error = true;
- break;
- }
- process_submessage_end(s);
- }
- status = pbstream_parse_field(s, &buf, end, need_more_bytes);
- if(status != PBSTREAM_STATUS_OK)
- break;
- s->offset = buf_start_offset + (buf - buf_start);
- }
- return status;
+unknown_value:
+ wv->type = tag.wire_type;
+ CHECK(parse_unknown_value(&b, end, val_offset, wv));
+ s->offset += (b-buf);
+ return PBSTREAM_STATUS_OK;
}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback