From faf6b5f3bfe9cae65817c8f94dc8770323276a92 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 23 Feb 2009 23:10:52 -0800 Subject: Massive changes (practically a rewrite). More compact and minimal. Still doesn't actually work yet, but much closer. --- pbstream.c | 613 ++++++++++++++++++++++++------------------------------------- 1 file changed, 240 insertions(+), 373 deletions(-) (limited to 'pbstream.c') diff --git a/pbstream.c b/pbstream.c index 070e429..dd91bb8 100644 --- a/pbstream.c +++ b/pbstream.c @@ -4,7 +4,6 @@ * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. */ -#include #include #include "pbstream.h" @@ -17,427 +16,295 @@ #define unlikely(x) (x) #endif -/* An array, indexed by pbstream_type, that indicates what wire type is - * expected for the given pbstream type. */ -static enum pbstream_wire_type expected_wire_type[] = { - PBSTREAM_WIRE_TYPE_64BIT, // PBSTREAM_TYPE_DOUBLE, - PBSTREAM_WIRE_TYPE_32BIT, // PBSTREAM_TYPE_FLOAT, - PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_INT32, - PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_INT64, - PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_UINT32, - PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_UINT64, - PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_SINT32, - PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_SINT64, - PBSTREAM_WIRE_TYPE_32BIT, // PBSTREAM_TYPE_FIXED32, - PBSTREAM_WIRE_TYPE_64BIT, // PBSTREAM_TYPE_FIXED64, - PBSTREAM_WIRE_TYPE_32BIT, // PBSTREAM_TYPE_SFIXED32, - PBSTREAM_WIRE_TYPE_64BIT, // PBSTREAM_TYPE_SFIXED64, - PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_BOOL, - PBSTREAM_WIRE_TYPE_STRING, // PBSTREAM_TYPE_STRING, - PBSTREAM_WIRE_TYPE_STRING, // PBSTREAM_TYPE_BYTES, - PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_ENUM, - PBSTREAM_WIRE_TYPE_STRING, // PBSTREAM_TYPE_MESSAGE -}; +/* Lowest-level functions -- these read integers from the input buffer. + * To avoid branches, none of these do bounds checking. So we force clients + * to overallocate their buffers by >=9 bytes. */ -/* Reads a varint starting at buf (but not past end), storing the result - * in out_value. Returns whether the operation was successful. */ -enum pbstream_status get_varint(char **buf, char *end, uint64_t *out_value) +static pbstream_status_t get_v_uint64_t(char **buf, char *end, uint64_t *val) { - *out_value = 0; - int bitpos = 0; - char *b = *buf; - - /* Because we don't check for buffer overrun inside the loop, we require - * that callers use a buffer that is overallocated by at least 9 bytes (the - * maximum we can overrun before the bitpos check catches the problem). */ - for(; *b & 0x80 && bitpos < 64; bitpos += 7, b++) - *out_value |= (uint64_t)(*b & 0x7F) << bitpos; - - /* If bitpos is 63 (as it will be if this was a nine-byte varint) this will - * throw away the middle six bits of the final byte. We don't bother warning - * about this. */ - *out_value |= (uint64_t)(*b & 0x7F) << bitpos; - b++; - - if(unlikely(bitpos >= 64)) return PBSTREAM_ERROR_UNTERMINATED_VARINT; - if(unlikely(b > end)) return PBSTREAM_STATUS_INCOMPLETE; - *buf = b; - return PBSTREAM_STATUS_OK; + uint8_t* ptr = (uint8_t*)*buf; + uint32_t b; + uint32_t part0 = 0, part1 = 0, part2 = 0; + + b = *(ptr++); part0 = (b & 0x7F) ; if (!(b & 0x80)) goto done; + b = *(ptr++); part0 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; + b = *(ptr++); part0 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; + b = *(ptr++); part0 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; + b = *(ptr++); part1 = (b & 0x7F) ; if (!(b & 0x80)) goto done; + b = *(ptr++); part1 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; + b = *(ptr++); part1 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; + b = *(ptr++); part1 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; + b = *(ptr++); part2 = (b & 0x7F) ; if (!(b & 0x80)) goto done; + b = *(ptr++); part2 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; + return PBSTREAM_ERROR_UNTERMINATED_VARINT; + +done: + *buf = (char*)ptr; + *val = (uint64_t)part0 | ((uint64_t)part1 << 28) | ((uint64_t)part2 << 56); + return unlikely(*buf > end) ? PBSTREAM_STATUS_INCOMPLETE : PBSTREAM_STATUS_OK; } -/* TODO: the little-endian versions of these functions don't respect alignment. - * While it's hard to believe that this could be less efficient than the - * alternative (the big endian implementation), this deserves some tests and - * measurements to be sure. */ -enum pbstream_status get_32_le(char **buf, char *end, uint32_t *out_value) +static pbstream_status_t get_v_uint32_t(char **buf, char *end, uint32_t *val) { - char *b = *buf; - char *int32_end = b+4; - if(unlikely(int32_end > end)) return PBSTREAM_STATUS_INCOMPLETE; -#if __BYTE_ORDER == __LITTLE_ENDIAN - *out_value = *(uint32_t*)b; -#else - *out_value = b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24); -#endif - *buf = int32_end; - return PBSTREAM_STATUS_OK; + uint8_t* ptr = (uint8_t*)*buf; + uint32_t b; + uint32_t result; + + b = *(ptr++); result = (b & 0x7F) ; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; + b = *(ptr++); result = (b & 0x7F) << 28; if (!(b & 0x80)) goto done; + return PBSTREAM_ERROR_UNTERMINATED_VARINT; + +done: + *buf = (char*)ptr; + *val = result; + return unlikely(*buf > end) ? PBSTREAM_STATUS_INCOMPLETE: PBSTREAM_STATUS_OK; } -bool get_64_le(char **buf, char *end, uint64_t *out_value) +static pbstream_status_t get_f_uint32_t(char **buf, char *end, uint32_t *val) { - char *b = *buf; - char *int64_end = b+8; - if(unlikely(int64_end > end)) return PBSTREAM_STATUS_INCOMPLETE; + uint8_t *b = (uint8_t*)*buf; #if __BYTE_ORDER == __LITTLE_ENDIAN - *out_value = *(uint64_t*)buf; + *val = *(uint32_t*)b; /* likely unaligned, TODO: verify performance. */ #else - *out_value = (b[0]) | (b[1] << 8 ) | (b[2] << 16) | (b[3] << 24) | - (b[4] << 32) | (b[5] << 40) | (b[6] << 48) | (b[7] << 56); + *val = b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24); #endif - *buf = int64_end; - return PBSTREAM_STATUS_OK; + *buf = (char*)b + sizeof(uint32_t); + return unlikely(*buf > end) ? PBSTREAM_STATUS_INCOMPLETE : PBSTREAM_STATUS_OK; } -int32_t zigzag_decode_32(uint32_t n) +static pbstream_status_t get_f_uint64_t(char **buf, char *end, uint64_t *val) { - return (n >> 1) ^ -(int32_t)(n & 1); + uint8_t *b = (uint8_t*)*buf; +#if __BYTE_ORDER == __LITTLE_ENDIAN + *val = *(uint64_t*)buf; /* likely unaligned, TODO: verify performance. */ +#else + *val = (b[0]) | (b[1] << 8 ) | (b[2] << 16) | (b[3] << 24) | + (b[4] << 32) | (b[5] << 40) | (b[6] << 48) | (b[7] << 56); +#endif + *buf = (char*)b + sizeof(uint64_t); + return unlikely(*buf > end) ? PBSTREAM_STATUS_INCOMPLETE : PBSTREAM_STATUS_OK; } -int64_t zigzag_decode_64(uint64_t n) -{ - return (n >> 1) ^ -(int64_t)(n & 1); -} +static int32_t zz_decode_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } +static int64_t zz_decode_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } -/* Parses the next field-number/wire-value pair from the stream of bytes - * starting at *buf, without reading past end. Stores the parsed and wire - * value in *field_number and *wire_value, respectively. - * - * Returns a status indicating whether the operation was successful. If the - * return status is STATUS_INCOMPLETE, returns the number of additional bytes - * requred in *need_more_bytes. Updates *buf to point past the end of the - * parsed data if the operation was successful. - */ -enum pbstream_status pbstream_parse_wire_value( - char **buf, char *end, - pbstream_field_number_t *field_number, - struct pbstream_wire_value *wire_value, - int *need_more_bytes) -{ - char *b = *buf; /* Our local buf pointer -- only update buf if we succeed. */ - -#define DECODE(dest, func) \ - do { \ - enum pbstream_status status = func(&b, end, &dest); \ - if(unlikely(status != PBSTREAM_STATUS_OK)) { \ - *need_more_bytes = 0; /* This only arises below in this function. */ \ - return status; \ - } \ +#define CHECK(func) do { \ + pbstream_wire_type_t status = func; \ + if(status != PBSTREAM_STATUS_OK) return status; \ } while (0) - uint64_t key; - DECODE(key, get_varint); +/* WVTOV() generates a function: + * void wvtov_TYPE(wire_t src, val_t *dst, size_t offset) + * (macro invoker defines the body of the function). */ +#define WVTOV(type, wire_t, val_t) \ + static void wvtov_ ## type(wire_t s, val_t *d, size_t offset) + +/* GET() generates a function: + * pbstream_status_t get_TYPE(char **buf, char *end, size_t offset, + * pbstream_value *dst) */ +#define GET(type, v_or_f, wire_t, val_t, member_name) \ + static pbstream_status_t get_ ## type(char **buf, char *end, size_t offset, \ + struct pbstream_value *d) { \ + wire_t tmp; \ + CHECK(get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp)); \ + wvtov_ ## type(tmp, &d->v.member_name, offset); \ + return PBSTREAM_STATUS_OK; \ + } - *field_number = key >> 3; - wire_value->type = key & 0x07; +#define T(type, v_or_f, wire_t, val_t, member_name) \ + WVTOV(type, wire_t, val_t); /* prototype for GET below */ \ + GET(type, v_or_f, wire_t, val_t, member_name) \ + WVTOV(type, wire_t, val_t) + +T(DOUBLE, f, uint64_t, double, _double){ memcpy(d, &s, sizeof(double)); } +T(FLOAT, f, uint32_t, float, _float) { memcpy(d, &s, sizeof(float)); } +T(INT32, v, uint32_t, int32_t, int32) { *d = (int32_t)s; } +T(INT64, v, uint64_t, int64_t, int64) { *d = (int64_t)s; } +T(UINT32, v, uint32_t, uint32_t, uint32) { *d = s; } +T(UINT64, v, uint64_t, uint64_t, uint64) { *d = s; } +T(SINT32, v, uint32_t, int32_t, int32) { *d = zz_decode_32(s); } +T(SINT64, v, uint64_t, int64_t, int64) { *d = zz_decode_64(s); } +T(FIXED32, f, uint32_t, uint32_t, uint32) { *d = s; } +T(FIXED64, f, uint64_t, uint64_t, uint64) { *d = s; } +T(SFIXED32, f, uint32_t, int32_t, int32) { *d = (int32_t)s; } +T(SFIXED64, f, uint64_t, int64_t, int64) { *d = (int64_t)s; } +T(BOOL, v, uint32_t, bool, _bool) { *d = (bool)s; } +T(ENUM, v, uint32_t, int32_t, _enum) { *d = (int32_t)s; } + +#define T_DELIMITED(type) \ + T(type, v, uint32_t, struct pbstream_delimited, delimited) { \ + d->offset = offset; \ + d->len = s; \ + } +T_DELIMITED(STRING); /* We leave UTF-8 validation to the client. */ +T_DELIMITED(BYTES); +T_DELIMITED(MESSAGE); +#undef WVTOV +#undef GET +#undef T +#undef T_DELIMITED + +struct pbstream_type_info { + pbstream_wire_type_t expected_wire_type; + pbstream_status_t (*get)(char **buf, char *end, size_t offset, + struct pbstream_value *d); +}; +static struct pbstream_type_info type_info[] = { + {PBSTREAM_WIRE_TYPE_64BIT, get_DOUBLE}, + {PBSTREAM_WIRE_TYPE_32BIT, get_FLOAT}, + {PBSTREAM_WIRE_TYPE_VARINT, get_INT32}, + {PBSTREAM_WIRE_TYPE_VARINT, get_INT64}, + {PBSTREAM_WIRE_TYPE_VARINT, get_UINT32}, + {PBSTREAM_WIRE_TYPE_VARINT, get_UINT64}, + {PBSTREAM_WIRE_TYPE_VARINT, get_SINT32}, + {PBSTREAM_WIRE_TYPE_VARINT, get_SINT64}, + {PBSTREAM_WIRE_TYPE_32BIT, get_FIXED32}, + {PBSTREAM_WIRE_TYPE_64BIT, get_FIXED64}, + {PBSTREAM_WIRE_TYPE_32BIT, get_SFIXED32}, + {PBSTREAM_WIRE_TYPE_64BIT, get_SFIXED64}, + {PBSTREAM_WIRE_TYPE_VARINT, get_BOOL}, + {PBSTREAM_WIRE_TYPE_DELIMITED, get_STRING}, + {PBSTREAM_WIRE_TYPE_DELIMITED, get_BYTES}, + {PBSTREAM_WIRE_TYPE_VARINT, get_ENUM}, + {PBSTREAM_WIRE_TYPE_DELIMITED, get_MESSAGE} +}; - switch(wire_value->type) { - case PBSTREAM_WIRE_TYPE_VARINT: - DECODE(wire_value->v.varint, get_varint); - break; +static pbstream_status_t parse_tag(char **buf, char *end, struct pbstream_tag *tag) +{ + uint32_t tag_int; + CHECK(get_v_uint32_t(buf, end, &tag_int)); + tag->wire_type = tag_int & 0x07; + tag->field_number = tag_int >> 3; + return PBSTREAM_STATUS_OK; +} +static pbstream_status_t parse_unknown_value( + char **buf, char *end, int buf_offset, + struct pbstream_wire_value *wv) +{ +#define DECODE(dest, func) CHECK(func(buf, end, &dest)) + switch(wv->type) { + case PBSTREAM_WIRE_TYPE_VARINT: + DECODE(wv->v.varint, get_v_uint64_t); break; case PBSTREAM_WIRE_TYPE_64BIT: - DECODE(wire_value->v._64bit, get_64_le); - break; - - case PBSTREAM_WIRE_TYPE_STRING: { - uint64_t string_len; - DECODE(string_len, get_varint); - if (unlikely(string_len > INT_MAX)) { - /* TODO: notice this and fail. */ - } - wire_value->v.string.len = (int)string_len; - if(b + wire_value->v.string.len > end) { - *need_more_bytes = b + wire_value->v.string.len - end; - return PBSTREAM_STATUS_INCOMPLETE; - } - wire_value->v.string.data = b; - b += wire_value->v.string.len; + DECODE(wv->v._64bit, get_f_uint64_t); break; + case PBSTREAM_WIRE_TYPE_32BIT: + DECODE(wv->v._32bit, get_f_uint32_t); break; + case PBSTREAM_WIRE_TYPE_DELIMITED: { + uint32_t len; + wv->v.delimited.offset = buf_offset; + DECODE(len, get_v_uint32_t); + wv->v.delimited.len = (size_t)len; break; } - case PBSTREAM_WIRE_TYPE_START_GROUP: case PBSTREAM_WIRE_TYPE_END_GROUP: /* TODO (though these are deprecated, so not high priority). */ break; - - case PBSTREAM_WIRE_TYPE_32BIT: - DECODE(wire_value->v._32bit, get_32_le); - break; } - - *buf = b; - return true; + return PBSTREAM_STATUS_OK; +#undef DECODE } -/* Translates from a wire value to a .proto value. The caller should have - * already checked that the wire_value is of the correct type. The pbstream - * type must not be PBSTREAM_TYPE_MESSAGE. This operation always succeeds. */ -void pbstream_translate_field(struct pbstream_wire_value *wire_value, - enum pbstream_type type, - struct pbstream_value *out_value) -{ - out_value->type = type; - switch(type) { - case PBSTREAM_TYPE_DOUBLE: - memcpy(&out_value->v._double, &wire_value->v._64bit, sizeof(double)); - break; - - case PBSTREAM_TYPE_FLOAT: - memcpy(&out_value->v._float, &wire_value->v._32bit, sizeof(float)); - break; - - case PBSTREAM_TYPE_INT32: - out_value->v.int32 = (int32_t)wire_value->v.varint; - break; - - case PBSTREAM_TYPE_INT64: - out_value->v.int64 = (int64_t)zigzag_decode_64(wire_value->v.varint); - break; - - case PBSTREAM_TYPE_UINT32: - out_value->v.uint32 = (uint32_t)wire_value->v.varint; - break; - - case PBSTREAM_TYPE_UINT64: - out_value->v.uint64 = (uint64_t)wire_value->v.varint; - break; - - case PBSTREAM_TYPE_SINT32: - out_value->v.int32 = zigzag_decode_32(wire_value->v.varint); - break; - - case PBSTREAM_TYPE_SINT64: - out_value->v.int64 = zigzag_decode_64(wire_value->v.varint); - break; - - case PBSTREAM_TYPE_FIXED32: - out_value->v.int32 = wire_value->v._32bit; - break; - - case PBSTREAM_TYPE_FIXED64: - out_value->v.int64 = wire_value->v._64bit; - break; - - case PBSTREAM_TYPE_SFIXED32: - out_value->v.int32 = (int32_t)wire_value->v._32bit; - break; - - case PBSTREAM_TYPE_SFIXED64: - out_value->v.int64 = (int64_t)wire_value->v._64bit; - break; - - case PBSTREAM_TYPE_BOOL: - out_value->v._bool = (bool)wire_value->v.varint; - break; - - case PBSTREAM_TYPE_STRING: - out_value->v.string.data = wire_value->v.string.data; - out_value->v.string.len = wire_value->v.string.len; - /* TODO: validate UTF-8? */ - break; - - case PBSTREAM_TYPE_BYTES: - out_value->v.bytes.data = wire_value->v.string.data; - out_value->v.bytes.len = wire_value->v.string.len; - break; +#define CALLBACK(s, func, ...) do { \ + if(s->callbacks.func) s->callbacks.func(__VA_ARGS__); \ + } while (0) - case PBSTREAM_TYPE_ENUM: - out_value->v._enum = (bool)wire_value->v.varint; - break; +#define NONFATAL_ERROR(s, code) do { \ + if(s->ignore_nonfatal_errors) CALLBACK(s, error_callback, code); \ + else return code; \ + } while (0) - case PBSTREAM_TYPE_MESSAGE: - /* Should never happen. */ - break; - } +static struct pbstream_field_descriptor *find_field_descriptor( + struct pbstream_message_descriptor* md, + pbstream_field_number_t field_number) +{ + /* Likely will want to replace linear search with something better. */ + for (int i = 0; i < md->fields_len; i++) + if (md->fields[i].field_number == field_number) return &md->fields[i]; + return NULL; } -/* Given a wire value that was just parsed and a matching field descriptor, - * processes the given value and performs the appropriate actions. These - * actions include: - * - checking that the wire type is as expected - * - converting the wire type to a .proto type - * - entering a sub-message, if that is in fact what this field implies. - * - * This function also calls user callbacks pertaining to any of the above at - * the appropriate times. */ -void process_value(struct pbstream_parse_state *s, - struct pbstream_wire_value *wire_value, - struct pbstream_field_descriptor *field_descriptor) +/* Process actions associated with the end of a [sub-]message. */ +pbstream_status_t process_message_end(struct pbstream_parse_state *s) { - /* Check that the wire type is appropriate for this .proto type. */ - if(unlikely(wire_value->type != expected_wire_type[field_descriptor->type])) { - /* Type mismatch. */ - if(s->callbacks.error_callback) { - /* TODO: a nice formatted message. */ - s->callbacks.error_callback(PBSTREAM_ERROR_MISMATCHED_TYPE, NULL, - s->offset, false); + struct pbstream_parse_stack_frame *frame = DYNARRAY_GET_TOP(s->stack); + /* A submessage that doesn't end exactly on a field boundary indicates + * corruption. */ + if(unlikely(s->offset != frame->end_offset)) + return PBSTREAM_ERROR_BAD_SUBMESSAGE_END; + + /* Check required fields. */ + struct pbstream_message_descriptor *md = frame->message_descriptor; + for(int i = 0; i < md->fields_len; i++) { + struct pbstream_field_descriptor *fd = &md->fields[i]; + if(fd->seen_field_num && !frame->seen_fields[fd->seen_field_num] && + fd->cardinality == PBSTREAM_CARDINALITY_REQUIRED) { + NONFATAL_ERROR(s, PBSTREAM_ERROR_MISSING_REQUIRED_FIELD); } + } + RESIZE_DYNARRAY(s->stack, s->stack_len-1); + return PBSTREAM_STATUS_OK; +} - /* Report the wire value we parsed as an unknown value. */ - if(s->callbacks.unknown_value_callback) { - s->callbacks.unknown_value_callback(field_descriptor->field_number, - wire_value, - s->user_data); - } - return; +/* Parses and processes the next value from buf (but not past end). */ +pbstream_status_t parse_field(struct pbstream_parse_state *s, + char *buf, char *end, + pbstream_field_number_t *fieldnum, + struct pbstream_value *val, + struct pbstream_wire_value *wv) +{ + struct pbstream_parse_stack_frame *frame = DYNARRAY_GET_TOP(s->stack); + struct pbstream_message_descriptor *md = frame->message_descriptor; + struct pbstream_tag tag; + struct pbstream_field_descriptor *fd; + struct pbstream_type_info *info; + char *b = buf; + + if(unlikely(s->offset >= frame->end_offset)) return process_message_end(s); + + CHECK(parse_tag(&b, end, &tag)); + size_t val_offset = s->offset + (b-buf); + fd = find_field_descriptor(md, tag.field_number); + if(unlikely(!fd)) goto unknown_value; + info = &type_info[fd->type]; + + /* Check type and cardinality. */ + if(unlikely(tag.wire_type != info->expected_wire_type)) { + NONFATAL_ERROR(s, PBSTREAM_ERROR_MISMATCHED_TYPE); + goto unknown_value; + } + if(fd->seen_field_num > 0) { + if(unlikely(frame->seen_fields[fd->seen_field_num])) + NONFATAL_ERROR(s, PBSTREAM_ERROR_DUPLICATE_FIELD); + frame->seen_fields[fd->seen_field_num] = true; } - if(field_descriptor->type == PBSTREAM_TYPE_MESSAGE) { + if(unlikely(fd->type == PBSTREAM_TYPE_MESSAGE)) { /* We're entering a sub-message. */ - if(s->callbacks.begin_message_callback) { - s->callbacks.begin_message_callback(field_descriptor->d.message, - s->user_data); - } - - /* Push and initialize a new stack frame. */ + CHECK(info->get(&b, end, val_offset, val)); RESIZE_DYNARRAY(s->stack, s->stack_len+1); struct pbstream_parse_stack_frame *frame = DYNARRAY_GET_TOP(s->stack); - frame->message_descriptor = field_descriptor->d.message; - frame->end_offset = 0; /* TODO: set this correctly. */ + frame->message_descriptor = fd->d.message; + frame->end_offset = val->v.delimited.offset + val->v.delimited.len; + s->offset = wv->v.delimited.offset; /* skip past only the tag. */ int num_seen_fields = frame->message_descriptor->num_seen_fields; INIT_DYNARRAY(frame->seen_fields, num_seen_fields, num_seen_fields); } else { /* This is a scalar value. */ - struct pbstream_value value; - pbstream_translate_field(wire_value, field_descriptor->type, &value); - if(s->callbacks.value_callback) { - s->callbacks.value_callback(field_descriptor, &value, s->user_data); - } - } -} - -struct pbstream_field_descriptor *find_field_descriptor_by_number( - struct pbstream_message_descriptor* message_descriptor, - pbstream_field_number_t field_number) -{ - /* Currently a linear search -- could be optimized to do a binary search, hash - * table lookup, or any other number of clever things you might imagine. */ - for (int i = 0; i < message_descriptor->fields_len; i++) - if (message_descriptor->fields[i].field_number == field_number) - return &message_descriptor->fields[i]; - return NULL; -} - -/* Parses and processes the next value from *buf (but not past end), returning - * a status indicating whether the operation succeeded, and calling appropriate - * callbacks. If more data is needed to parse the last partial field, returns - * how many more bytes are needed in need_more_bytes. Updates *buf to point - * past the parsed value if the operation succeeds. */ -enum pbstream_status pbstream_parse_field(struct pbstream_parse_state *s, - char **buf, char *end, - int *need_more_bytes) -{ - struct pbstream_parse_stack_frame *frame = DYNARRAY_GET_TOP(s->stack); - struct pbstream_message_descriptor *message_descriptor = - frame->message_descriptor; - pbstream_field_number_t field_number; - struct pbstream_wire_value wire_value; - enum pbstream_status status; - - /* Decode the raw wire data. */ - status = pbstream_parse_wire_value(buf, end, &field_number, &wire_value, - need_more_bytes); - - if(unlikely(status != PBSTREAM_STATUS_OK)) { - if(status == PBSTREAM_ERROR_UNTERMINATED_VARINT && - s->callbacks.error_callback) { - /* TODO: a nice formatted message. */ - s->callbacks.error_callback(PBSTREAM_ERROR_UNTERMINATED_VARINT, NULL, - s->offset, true); - } - s->fatal_error = true; - return status; - } - - /* Find the corresponding field definition from the .proto file. */ - struct pbstream_field_descriptor *field_descriptor; - field_descriptor = find_field_descriptor_by_number(message_descriptor, - field_number); - - if(likely(field_descriptor != NULL)) { - if(field_descriptor->seen_field_num > 0) { /* for non-repeated fields */ - /* Check that this field has not been seen before. */ - if(frame->seen_fields[field_descriptor->seen_field_num]) { - if(s->callbacks.error_callback) - s->callbacks.error_callback(PBSTREAM_ERROR_DUPLICATE_FIELD, NULL, - s->offset, false); - return PBSTREAM_STATUS_ERROR; - } - /* Mark the field as seen. */ - frame->seen_fields[field_descriptor->seen_field_num] = true; - } - process_value(s, &wire_value, field_descriptor); - } else { - /* This field was not defined in the .proto file. */ - if(s->callbacks.unknown_value_callback) - s->callbacks.unknown_value_callback(field_number, &wire_value, - s->user_data); + *fieldnum = tag.field_number; + val->type = fd->type; + CHECK(info->get(&b, end, val_offset, val)); + s->offset += (b-buf); } return PBSTREAM_STATUS_OK; -} - -/* Process actions associated with the end of a submessage. This includes: - * - emitting default values for all optional elements (either explicit - * defaults or implicit defaults). - * - emitting errors for any required fields that were not seen. - * - calling the user's callback. - * - popping the stack frame. */ -void process_submessage_end(struct pbstream_parse_state *s) -{ - /* TODO: emit default values for optional elements. either explicit defaults - * (specified in the .proto file) or implicit defaults (which are specified - * in the pbstream definition, by type. */ - - /* TODO: emit errors for required fields that were not seen. */ - - /* Process the end of message by calling the user's callback and popping - * our stack frame. */ - if(s->callbacks.end_message_callback) - s->callbacks.end_message_callback(s->user_data); - - /* Pop the stack frame associated with this submessage. */ - RESIZE_DYNARRAY(s->stack, s->stack_len-1); -} - -enum pbstream_status pbstream_parse(struct pbstream_parse_state *s, - char *buf_start, int buf_len, - int *consumed_bytes, int *need_more_bytes) -{ - char *buf = buf_start; - char *end = buf_start + buf_len; - int buf_start_offset = s->offset; - enum pbstream_status status = PBSTREAM_STATUS_OK; - - while(buf < end) { - /* Check for a submessage ending. */ - while(s->offset >= DYNARRAY_GET_TOP(s->stack)->end_offset) { - /* A submessage that doesn't end exactly on a field boundary indicates - * corruption. */ - if(unlikely(s->offset != DYNARRAY_GET_TOP(s->stack)->end_offset)) { - if(s->callbacks.error_callback) { - s->callbacks.error_callback(PBSTREAM_ERROR_BAD_SUBMESSAGE_END, NULL, - s->offset, true); - } - s->fatal_error = true; - break; - } - process_submessage_end(s); - } - status = pbstream_parse_field(s, &buf, end, need_more_bytes); - if(status != PBSTREAM_STATUS_OK) - break; - s->offset = buf_start_offset + (buf - buf_start); - } - return status; +unknown_value: + wv->type = tag.wire_type; + CHECK(parse_unknown_value(&b, end, val_offset, wv)); + s->offset += (b-buf); + return PBSTREAM_STATUS_OK; } -- cgit v1.2.3