From f813688f3ed0880e4db168c6b1baf9039d39eeee Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 30 Jun 2009 19:45:32 -0700 Subject: Major revision to upb_parse. --- tests.c | 14 +-- upb.h | 21 ++++- upb_parse.c | 304 +++++++++++++++++++++++++++++------------------------------- upb_parse.h | 66 +++++++------ 4 files changed, 214 insertions(+), 191 deletions(-) diff --git a/tests.c b/tests.c index 128e43b..de8e951 100644 --- a/tests.c +++ b/tests.c @@ -10,7 +10,7 @@ void test_get_v_uint64_t() uint8_t zero[] = {0x00}; void *zero_buf = zero; uint64_t zero_val = 0; - status = get_v_uint64_t(&zero_buf, sizeof(zero), &zero_val); + status = get_v_uint64_t(&zero_buf, zero + sizeof(zero), &zero_val); assert(status == UPB_STATUS_OK); assert(zero_val == 0); assert(zero_buf == zero + sizeof(zero)); @@ -18,7 +18,7 @@ void test_get_v_uint64_t() uint8_t one[] = {0x01}; void *one_buf = one; uint64_t one_val = 0; - status = get_v_uint64_t(&one_buf, sizeof(one), &one_val); + status = get_v_uint64_t(&one_buf, one + sizeof(one), &one_val); assert(status == UPB_STATUS_OK); assert(one_val == 1); assert(one_buf == one + sizeof(one)); @@ -26,7 +26,7 @@ void test_get_v_uint64_t() uint8_t twobyte[] = {0xAC, 0x02}; void *twobyte_buf = twobyte; uint64_t twobyte_val = 0; - status = get_v_uint64_t(&twobyte_buf, sizeof(twobyte), &twobyte_val); + status = get_v_uint64_t(&twobyte_buf, twobyte + sizeof(twobyte), &twobyte_val); assert(status == UPB_STATUS_OK); assert(twobyte_val == 300); assert(twobyte_buf == twobyte + sizeof(twobyte)); @@ -34,7 +34,7 @@ void test_get_v_uint64_t() uint8_t tenbyte[] = {0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x7F}; void *tenbyte_buf = tenbyte; uint64_t tenbyte_val = 0; - status = get_v_uint64_t(&tenbyte_buf, sizeof(tenbyte), &tenbyte_val); + status = get_v_uint64_t(&tenbyte_buf, tenbyte + sizeof(tenbyte), &tenbyte_val); assert(status == UPB_STATUS_OK); assert(tenbyte_val == 0x89101c305080c101); assert(tenbyte_buf == tenbyte + sizeof(tenbyte)); @@ -42,12 +42,12 @@ void test_get_v_uint64_t() uint8_t elevenbyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01}; void *elevenbyte_buf = elevenbyte; uint64_t elevenbyte_val = 0; - status = get_v_uint64_t(&elevenbyte_buf, sizeof(elevenbyte), &elevenbyte_val); + status = get_v_uint64_t(&elevenbyte_buf, elevenbyte + sizeof(elevenbyte), &elevenbyte_val); assert(status == UPB_ERROR_UNTERMINATED_VARINT); - status = get_v_uint64_t(&elevenbyte_buf, sizeof(elevenbyte)-1, &elevenbyte_val); + status = get_v_uint64_t(&elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-1, &elevenbyte_val); /* Byte 10 is 0x80, so we know it's unterminated. */ assert(status == UPB_ERROR_UNTERMINATED_VARINT); - status = get_v_uint64_t(&elevenbyte_buf, sizeof(elevenbyte)-2, &elevenbyte_val); + status = get_v_uint64_t(&elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-2, &elevenbyte_val); assert(status == UPB_STATUS_NEED_MORE_DATA); } diff --git a/upb.h b/upb.h index 3281a0e..39ca0de 100644 --- a/upb.h +++ b/upb.h @@ -16,6 +16,15 @@ extern "C" { #endif +/* Branch prediction hints for GCC. */ +#ifdef __GNUC__ +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif + /* inline if possible, emit standalone code if required. */ #define INLINE static inline @@ -113,6 +122,9 @@ typedef enum upb_status { // The input byte stream ended in the middle of a record. UPB_STATUS_NEED_MORE_DATA = 1, + // The user value callback opted to stop parsing. + UPB_STATUS_USER_CANCELLED = 2, + // A varint did not terminate before hitting 64 bits. UPB_ERROR_UNTERMINATED_VARINT = -1, @@ -126,9 +138,16 @@ typedef enum upb_status { UPB_ERROR_OVERFLOW = -4, // An "end group" tag was encountered in an inappropriate place. - UPB_ERROR_SPURIOUS_END_GROUP = -5 + UPB_ERROR_SPURIOUS_END_GROUP = -5, + + UPB_ERROR_ILLEGAL = -6 } upb_status_t; +#define UPB_CHECK(func) do { \ + upb_status_t status = func; \ + if(status != UPB_STATUS_OK) return status; \ + } while (0) + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/upb_parse.c b/upb_parse.c index 46d6d86..673241d 100644 --- a/upb_parse.c +++ b/upb_parse.c @@ -12,112 +12,92 @@ #include #include "descriptor.h" -/* Branch prediction hints for GCC. */ -#ifdef __GNUC__ -#define likely(x) __builtin_expect((x),1) -#define unlikely(x) __builtin_expect((x),0) -#else -#define likely(x) (x) -#define unlikely(x) (x) -#endif - -#define CHECK(func) do { \ - upb_status_t status = func; \ - if(status != UPB_STATUS_OK) return status; \ - } while (0) - /* Lowest-level functions -- these read integers from the input buffer. */ -static size_t min(size_t a, size_t b) { return a < b ? a : b; } +static void *check_end(uint8_t *buf, void *end, size_t maxlen, + upb_status_t *bound_error) +{ + void *maxend = buf + maxlen; + if(end < maxend) { + *bound_error = UPB_STATUS_NEED_MORE_DATA; + return end; + } else { + *bound_error = UPB_ERROR_UNTERMINATED_VARINT; + return maxend; + } +} -static upb_status_t get_v_uint64_t(void *restrict *buf, size_t len, +static upb_status_t get_v_uint64_t(void *restrict *buf, void *end, uint64_t *restrict val) { - uint32_t bitpos, bytes = min(len, 10); uint8_t *b = *buf; - uint8_t *end = b + bytes; + upb_status_t bound_error; + end = check_end(b, end, 10, &bound_error); /* 2**64 is a 10-byte varint. */ uint8_t last = 0x80; *val = 0; - for(bitpos = 0; b < end && (last & 0x80); b++, bitpos += 7) + for(int bitpos = 0; b < (uint8_t*)end && (last & 0x80); b++, bitpos += 7) *val |= ((uint64_t)((last = *b) & 0x7F)) << bitpos; - if(unlikely(last & 0x80)) { - return bytes < 10 ? UPB_STATUS_NEED_MORE_DATA : UPB_ERROR_UNTERMINATED_VARINT; - } else { - *buf = b; - return UPB_STATUS_OK; - } + if(unlikely(last & 0x80)) return bound_error; + *buf = b; + return UPB_STATUS_OK; } -static upb_status_t skip_v_uint64_t(void **buf, size_t len) +static upb_status_t skip_v_uint64_t(void **buf, void *end) { - uint32_t bytes = min(len, 10); uint8_t *b = *buf; - uint8_t *end = b + bytes; + upb_status_t bound_error; + end = check_end(b, end, 10, &bound_error); /* 2**64 is a 10-byte varint. */ uint8_t last = 0x80; - for(; b < end && (last & 0x80); b++) + for(; b < (uint8_t*)end && (last & 0x80); b++) last = *b; - if(unlikely(b == end)) { - return bytes < 10 ? UPB_STATUS_NEED_MORE_DATA : UPB_ERROR_UNTERMINATED_VARINT; - } else { - *buf = b; - return UPB_STATUS_OK; - } + if(unlikely(last & 0x80)) return bound_error; + *buf = b; + return UPB_STATUS_OK; } -static upb_status_t get_v_uint32_t(void *restrict *buf, size_t len, +static upb_status_t get_v_uint32_t(void *restrict *buf, void *end, uint32_t *restrict val) { - uint32_t bitpos, bytes = min(len, 5); uint8_t *b = *buf; - uint8_t *end = b + bytes; + upb_status_t bound_error; + end = check_end(b, end, 5, &bound_error); /* 2**32 is a 5-byte varint. */ uint8_t last = 0x80; *val = 0; - for(bitpos = 0; b < end && (last & 0x80); b++, bitpos += 7) + for(int bitpos = 0; b < (uint8_t*)end && (last & 0x80); b++, bitpos += 7) *val |= ((uint32_t)((last = *b) & 0x7F)) << bitpos; - if(unlikely(b == end)) { - return bytes < 5 ? UPB_STATUS_NEED_MORE_DATA : UPB_ERROR_UNTERMINATED_VARINT; - } else { - *buf = b; - return UPB_STATUS_OK; - } + if(unlikely(last & 0x80)) return bound_error; + *buf = b; + return UPB_STATUS_OK; } -#define SHL(val, bits) ((uint32_t)val << bits) -static upb_status_t get_f_uint32_t(void *restrict *buf, size_t len, +static upb_status_t get_f_uint32_t(void *restrict *buf, void *end, uint32_t *restrict val) { - const uint8_t size = sizeof(uint32_t); - if(unlikely(len < size)) return UPB_STATUS_NEED_MORE_DATA; uint8_t *b = *buf; + void *uint32_end = (uint8_t*)*buf + sizeof(uint32_t); + if(unlikely(uint32_end > end)) return UPB_STATUS_NEED_MORE_DATA; #if UPB_UNALIGNED_READS_OK *val = *(uint32_t*)b; #else +#define SHL(val, bits) ((uint32_t)val << bits) *val = SHL(b[0], 0) | SHL(b[1], 8) | SHL(b[2], 16) | SHL(b[3], 24); -#endif - b += size; - *buf = b; - return UPB_STATUS_OK; -} #undef SHL - -static upb_status_t skip_f_uint32_t(void **buf, size_t len) -{ - const uint8_t size = sizeof(uint32_t); - if(unlikely(len < size)) return UPB_STATUS_NEED_MORE_DATA; - *buf = (char*)*buf + size; +#endif + *buf = uint32_end; return UPB_STATUS_OK; } -static upb_status_t get_f_uint64_t(void *restrict *buf, size_t len, +static upb_status_t get_f_uint64_t(void *restrict *buf, void *end, uint64_t *restrict val) { - if(unlikely(len < sizeof(uint64_t))) return UPB_STATUS_NEED_MORE_DATA; + void *uint64_end = (uint8_t*)*buf + sizeof(uint64_t); + if(unlikely(uint64_end > end)) return UPB_STATUS_NEED_MORE_DATA; #if UPB_UNALIGNED_READS_OK *val = *(uint64_t*)*buf; - *buf = (char*)*buf + sizeof(uint64_t); + *buf = uint64_end; #else uint32_t lo32, hi32; get_f_uint32_t(buf, &lo32); @@ -127,11 +107,19 @@ static upb_status_t get_f_uint64_t(void *restrict *buf, size_t len, return UPB_STATUS_OK; } -static upb_status_t skip_f_uint64_t(void **buf, size_t len) +static upb_status_t skip_f_uint32_t(void **buf, void *end) { - const uint8_t size = sizeof(uint64_t); - if(unlikely(len < size)) return UPB_STATUS_NEED_MORE_DATA; - *buf = (char*)*buf + size; + void *uint32_end = (uint8_t*)*buf + sizeof(uint32_t); + if(unlikely(uint32_end > end)) return UPB_STATUS_NEED_MORE_DATA; + *buf = uint32_end; + return UPB_STATUS_OK; +} + +static upb_status_t skip_f_uint64_t(void **buf, void *end) +{ + void *uint64_end = (uint8_t*)*buf + sizeof(uint64_t); + if(unlikely(uint64_end > end)) return UPB_STATUS_NEED_MORE_DATA; + *buf = uint64_end; return UPB_STATUS_OK; } @@ -145,9 +133,9 @@ static int64_t zz_decode_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } static void wvtov_ ## type(wire_t s, val_t *d) #define GET(type, v_or_f, wire_t, val_t, member_name) \ - static upb_status_t get_ ## type(void **buf, size_t len, val_t *d) { \ + static upb_status_t get_ ## type(void **buf, void *end, val_t *d) { \ wire_t tmp; \ - CHECK(get_ ## v_or_f ## _ ## wire_t(buf, len, &tmp)); \ + UPB_CHECK(get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp)); \ wvtov_ ## type(tmp, d); \ return UPB_STATUS_OK; \ } @@ -187,9 +175,6 @@ struct upb_type_info upb_type_info[] = { [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_64BIT}, [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_32BIT}, [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL] = {alignof(bool), sizeof(bool), UPB_WIRE_TYPE_VARINT}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = {alignof(struct upb_string), sizeof(struct upb_string), UPB_WIRE_TYPE_DELIMITED}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = {alignof(struct upb_string), sizeof(struct upb_string), UPB_WIRE_TYPE_DELIMITED}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = {0,0,0}, [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE] = {alignof(void*), sizeof(void*), UPB_WIRE_TYPE_DELIMITED}, [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT}, [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT}, @@ -197,69 +182,51 @@ struct upb_type_info upb_type_info[] = { [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64]= {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_64BIT}, [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT}, [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = {alignof(struct upb_string), sizeof(struct upb_string), UPB_WIRE_TYPE_DELIMITED}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = {alignof(struct upb_string), sizeof(struct upb_string), UPB_WIRE_TYPE_DELIMITED}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = {0,0,0}, }; -upb_status_t upb_parse_tag(void **buf, size_t len, struct upb_tag *tag) +upb_status_t upb_parse_tag(void **buf, void *end, struct upb_tag *tag) { uint32_t tag_int; - CHECK(get_v_uint32_t(buf, len, &tag_int)); + UPB_CHECK(get_v_uint32_t(buf, end, &tag_int)); tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); tag->field_number = tag_int >> 3; return UPB_STATUS_OK; } -upb_status_t upb_parse_wire_value(void *buf, size_t len, size_t *offset, - upb_wire_type_t wt, +upb_status_t upb_parse_wire_value(void **buf, void *end, upb_wire_type_t wt, union upb_wire_value *wv) { -#define READ(expr) CHECK(expr); *offset += ((char*)b-(char*)buf) - void *b = buf; switch(wt) { - case UPB_WIRE_TYPE_VARINT: READ(get_v_uint64_t(&b, len, &wv->varint)); break; - case UPB_WIRE_TYPE_64BIT: READ(get_f_uint64_t(&b, len, &wv->_64bit)); break; - case UPB_WIRE_TYPE_32BIT: READ(get_f_uint32_t(&b, len, &wv->_32bit)); break; - case UPB_WIRE_TYPE_DELIMITED: - READ(get_v_uint32_t(&b, len, &wv->_32bit)); - size_t new_offset = *offset + wv->_32bit; - if (new_offset < *offset) return UPB_ERROR_OVERFLOW; - *offset = new_offset; - break; - case UPB_WIRE_TYPE_START_GROUP: - case UPB_WIRE_TYPE_END_GROUP: break; + case UPB_WIRE_TYPE_VARINT: UPB_CHECK(get_v_uint64_t(buf, end, &wv->varint)); break; + case UPB_WIRE_TYPE_64BIT: UPB_CHECK(get_f_uint64_t(buf, end, &wv->_64bit)); break; + case UPB_WIRE_TYPE_32BIT: UPB_CHECK(get_f_uint32_t(buf, end, &wv->_32bit)); break; + default: return UPB_ERROR_ILLEGAL; /* Doesn't handle delimited, groups. */ } return UPB_STATUS_OK; } -upb_status_t upb_skip_wire_value(void *buf, size_t len, size_t *offset, - upb_wire_type_t wt) +upb_status_t upb_skip_wire_value(void **buf, void *end, upb_wire_type_t wt) { - void *b = buf; switch(wt) { - case UPB_WIRE_TYPE_VARINT: READ(skip_v_uint64_t(&b, len)); break; - case UPB_WIRE_TYPE_64BIT: READ(skip_f_uint64_t(&b, len)); break; - case UPB_WIRE_TYPE_32BIT: READ(skip_f_uint32_t(&b, len)); break; - case UPB_WIRE_TYPE_DELIMITED: { - /* Have to get (not skip) the length to skip the bytes. */ - uint32_t delim_len; - READ(get_v_uint32_t(&b, len, &delim_len)); - size_t new_offset = *offset + delim_len; - if (new_offset < *offset) return UPB_ERROR_OVERFLOW; - *offset = new_offset; - break; - } + case UPB_WIRE_TYPE_VARINT: UPB_CHECK(skip_v_uint64_t(buf, end)); break; + case UPB_WIRE_TYPE_64BIT: UPB_CHECK(skip_f_uint64_t(buf, end)); break; + case UPB_WIRE_TYPE_32BIT: UPB_CHECK(skip_f_uint32_t(buf, end)); break; case UPB_WIRE_TYPE_START_GROUP: /* TODO: skip to matching end group. */ case UPB_WIRE_TYPE_END_GROUP: break; + default: return UPB_ERROR_ILLEGAL; } return UPB_STATUS_OK; -#undef READ } -upb_status_t upb_parse_value(void **b, size_t len, upb_field_type_t ft, +upb_status_t upb_parse_value(void **buf, void *end, upb_field_type_t ft, union upb_value *v) { #define CASE(t, member_name) \ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \ - return get_ ## t(b, len, &v->member_name); + return get_ ## t(buf, end, &v->member_name); switch(ft) { CASE(DOUBLE, _double) CASE(FLOAT, _float) @@ -275,11 +242,7 @@ upb_status_t upb_parse_value(void **b, size_t len, upb_field_type_t ft, CASE(SFIXED64, int64) CASE(BOOL, _bool) CASE(ENUM, int32) - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE: - return get_INT32(b, len, &v->int32); - default: return 0; /* Including GROUP -- groups have no value. */ + default: return UPB_ERROR_ILLEGAL; } #undef CASE } @@ -291,8 +254,6 @@ void upb_parse_state_init(struct upb_parse_state *state, size_t udata_size) state->stack = state->top = malloc(stack_bytes); state->limit = (struct upb_parse_stack_frame*)((char*)state->stack + stack_bytes); state->udata_size = udata_size; - state->done = false; - state->packed_end_offset = 0; } void upb_parse_state_free(struct upb_parse_state *state) @@ -318,60 +279,89 @@ static upb_status_t push_stack_frame(struct upb_parse_state *s, size_t end, return UPB_STATUS_OK; } -#if 0 +static upb_status_t parse_delimited(struct upb_parse_state *s, + struct upb_tag *tag, + void **buf, void *end, + size_t base_offset) +{ + int32_t delim_len; + void *user_field_desc; + void *bufstart = *buf; + + /* Whether we are parsing or skipping the field, we always need to parse + * the length. */ + UPB_CHECK(get_INT32(buf, end, &delim_len)); + upb_field_type_t ft = s->tag_cb(s, tag, &user_field_desc); + if(unlikely(*buf < bufstart)) return UPB_ERROR_OVERFLOW; + if(unlikely(*buf > end && + ft != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE)) { + /* Streaming submessages is ok, but for other delimited types (string, + * bytes, and packed arrays) we require that all the delimited data is + * available. This could be relaxed if desired. */ + return UPB_STATUS_NEED_MORE_DATA; + } + + if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) { + UPB_CHECK(push_stack_frame(s, base_offset + delim_len, user_field_desc)); + } else { + void *delim_end = (char*)*buf + delim_len; + if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING || + ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES) { + struct upb_string str = {.data = *buf, .byte_len = delim_len}; + s->str_cb(s, &str, user_field_desc); + *buf = delim_end; + } else { + /* Packed Array. */ + while(*buf < delim_end) + UPB_CHECK(s->value_cb(s, buf, end, ft, user_field_desc)); + } + } + return UPB_STATUS_OK; +} + +static upb_status_t parse_nondelimited(struct upb_parse_state *s, + struct upb_tag *tag, + void **buf, void *end) +{ + /* Simple value or begin group. */ + void *user_field_desc; + upb_field_type_t ft = s->tag_cb(s, tag, &user_field_desc); + if(ft == 0) { + UPB_CHECK(upb_skip_wire_value(buf, end, tag->wire_type)); + } else if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP) { + /* No length specified, an "end group" tag will mark the end. */ + UPB_CHECK(push_stack_frame(s, 0, user_field_desc)); + } else { + UPB_CHECK(s->value_cb(s, buf, end, ft, user_field_desc)); + } + return UPB_STATUS_OK; +} + upb_status_t upb_parse(struct upb_parse_state *s, void *buf, size_t len, size_t *read) { - size_t start_offset = s->offset; - size_t end_offset = start_offset + len; - while(!s->done && s->offset < end_offset) { - while(s->offset >= s->top->end_offset) pop_stack_frame(s); - while(s->packed_end_offset > s->offset) { - /* Parse a packed field entry. */ + void *end = (char*)buf + len; + *read = 0; + while(buf < end) { + while(s->offset >= s->top->end_offset) { + if(s->offset != s->top->end_offset) return UPB_ERROR_BAD_SUBMESSAGE_END; + pop_stack_frame(s); } struct upb_tag tag; - void *b = buf; - CHECK(upb_parse_tag(&b, len, &tag)); - int tag_bytes = ((char*)b - (char*)buf); - s->offset += tag_bytes; - buf = b; + void *bufstart = buf; + UPB_CHECK(upb_parse_tag(&buf, end, &tag)); if(unlikely(tag.wire_type == UPB_WIRE_TYPE_END_GROUP)) { if(unlikely(s->top->end_offset != 0)) return UPB_ERROR_SPURIOUS_END_GROUP; pop_stack_frame(s); - continue; - } - - void *user_field_desc; - //upb_field_type_t ft = s->tag_cb(s, &tag, &user_field_desc); - if(ft == 0) { - CHECK(upb_skip_wire_value(b, &s->offset, tag.wire_type)); - } else if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP) { - /* No length specified, an "end group" tag will mark the end. */ - push_stack_frame(s, 0, user_field_desc); + } else if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) { + parse_delimited(s, &tag, &buf, end, s->offset + (char*)buf - (char*)bufstart); } else { - /* For all other cases we parse the next value. */ - union upb_value v; - CHECK(upb_parse_value(&b, ft, &v)); - if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) { - /* The value we parsed is the length of the submessage. */ - push_stack_frame(s, s->offset + v.delim_len, user_field_desc); - } else if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING || - ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES) { - s->value_cb(s, &v, b, user_field_desc); - b = (char*)b + v.delim_len; - } else if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) { - /* Delimited data which is not a string, bytes, or a submessage. - * It must be a packed array. */ - s->packed_type = ft; - s->packed_end_offset = s->offset + v.delim_len; - } else { - /* The common case: a simple value. */ - s->value_cb(s, &v, b, user_field_desc); - } + parse_nondelimited(s, &tag, &buf, end); } + size_t bytes_read = ((char*)buf - (char*)bufstart); + *read += bytes_read; + s->offset += bytes_read; } - *read = s->offset - start_offset; return UPB_STATUS_OK; } -#endif diff --git a/upb_parse.h b/upb_parse.h index fbc5898..9d3d52d 100644 --- a/upb_parse.h +++ b/upb_parse.h @@ -29,14 +29,32 @@ void upb_parse_state_init(struct upb_parse_state *state, size_t udata_size); void upb_parse_state_free(struct upb_parse_state *state); /* The callback that is called immediately after a tag has been parsed. The - * client must either advance the stream beyond the corresponding value or - * return an error to indicate that the stream should rewind to before the - * tag. + * client should determine whether it wants to parse or skip the corresponding + * value. If it wants to parse it, it must discover and return the correct + * .proto type (the tag only contains the wire type) and check that the wire + * type is appropriate for the .proto type. To skip the value (which means + * skipping all submessages, in the case of a submessage), the callback should + * return zero. */ +typedef upb_field_type_t (*upb_tag_cb)(struct upb_parse_state *s, + struct upb_tag *tag, + void **user_field_desc); + +/* The callback that is called when a regular value (ie. not a string or + * submessage) is encountered which the client has opted to parse (by not + * returning 0 from the tag_cb). The client must parse the value and update + * buf accordingly, returning success or failure. * - * The client advances the stream beyond the corresponding value by either - * parsing the value or skipping it. */ -typedef upb_field_type_t (*upb_tag_cb)(void **buf, struct upb_parse_state *s, - struct upb_tag *tag); + * Note that this callback can be called several times in a row for a single + * call to tag_cb in the case of packed arrays. */ +typedef upb_status_t (*upb_value_cb)(struct upb_parse_state *s, + void **buf, void *end, + upb_field_type_t type, + void *user_field_desc); + +/* The callback that is called when a string is parsed. */ +typedef upb_status_t (*upb_str_cb)(struct upb_parse_state *s, + struct upb_string *str, + void *user_field_desc); /* Callbacks that are called when a submessage begins and ends, respectively. * Both are called with the submessage's stack frame at the top of the stack. */ @@ -55,13 +73,11 @@ struct upb_parse_state { size_t offset; struct upb_parse_stack_frame *stack, *top, *limit; size_t udata_size; /* How many bytes the user gets in each frame. */ - bool done; /* Any callback can abort processing by setting done=true. */ - /* These are only set if we're in the middle of a packed array. */ - size_t packed_end_offset; /* 0 if not in a packed array. */ - upb_field_type_t packed_type; - upb_tag_cb tag_cb; + upb_tag_cb tag_cb; + upb_value_cb value_cb; + upb_str_cb str_cb; upb_submsg_start_cb submsg_start_cb; - upb_submsg_end_cb submsg_end_cb; + upb_submsg_end_cb submsg_end_cb; }; /* Parses up to len bytes of protobuf data out of buf, calling cb as needed. @@ -71,37 +87,35 @@ struct upb_parse_state { upb_status_t upb_parse(struct upb_parse_state *s, void *buf, size_t len, size_t *read); -/* Low-level parsing functions. ***********************************************/ - -/* Parses a single tag from the character data starting at buf, and updates - * buf to point one past the bytes that were consumed. buf will be incremented - * by at most ten bytes. */ -upb_status_t upb_parse_tag(void **buf, size_t len, struct upb_tag *tag); - extern upb_wire_type_t upb_expected_wire_types[]; /* Returns true if wt is the correct on-the-wire type for ft. */ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { return upb_expected_wire_types[ft] == wt; } +/* Data-consuming functions (to be called from value cb). *********************/ + +/* Parses a single tag from the character data starting at buf, and updates + * buf to point one past the bytes that were consumed. buf will be incremented + * by at most ten bytes. */ +upb_status_t upb_parse_tag(void **buf, void *end, struct upb_tag *tag); + /* Parses and converts a value from the character data starting at buf. The * caller must have previously checked that the wire type is appropriate for * this field type. For delimited data, buf is advanced to the beginning of * the delimited data, not the end. */ -upb_status_t upb_parse_value(void **buf, size_t len, upb_field_type_t ft, - union upb_value *value); +upb_status_t upb_parse_value(void **buf, void *end, upb_field_type_t ft, + union upb_value *v); /* Parses a wire value with the given type (which must have been obtained from * a tag that was just parsed) and adds the number of bytes that were consumed * to *offset. For delimited types, offset is advanced past the delimited * data. */ -upb_status_t upb_parse_wire_value(void *buf, size_t len, size_t *offset, - upb_wire_type_t wt, +upb_status_t upb_parse_wire_value(void **buf, void *end, upb_wire_type_t wt, union upb_wire_value *wv); /* Like the above, but discards the wire value instead of saving it. */ -upb_status_t upb_skip_wire_value(void *buf, size_t len, size_t *offset, - upb_wire_type_t wt); +upb_status_t upb_skip_wire_value(void **buf, void *end, upb_wire_type_t wt); #ifdef __cplusplus } /* extern "C" */ -- cgit v1.2.3