From 1e388b0af3d7c2384cd350a349ba756914620466 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 20 Jul 2009 10:52:37 -0700 Subject: Significant, experimental changes (setjmp/longjmp, group handling). --- Makefile | 2 +- benchmark/benchmark.cc | 30 +-- benchmark/google_messages.proto.bin | 130 ----------- src/upb_msg.c | 63 ++---- src/upb_msg.h | 6 + src/upb_parse.c | 440 +++++++++++++++++------------------- src/upb_parse.h | 46 ++-- tests/test_table.cc | 1 + 8 files changed, 270 insertions(+), 448 deletions(-) delete mode 100644 benchmark/google_messages.proto.bin diff --git a/Makefile b/Makefile index 2cffa4f..3d4642d 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ CC=gcc CXX=g++ CFLAGS=-std=c99 -CPPFLAGS=-O3 -Wall -Wextra -pedantic -g -DUPB_UNALIGNED_READS_OK -fomit-frame-pointer -Idescriptor -Isrc -Itests -I. +CPPFLAGS=-O0 -Wall -Wextra -pedantic -g -DUPB_UNALIGNED_READS_OK -DNDEBUG -Idescriptor -Isrc -Itests -I. OBJ=src/upb_parse.o src/upb_table.o src/upb_msg.o src/upb_enum.o src/upb_context.o \ src/upb_string.o descriptor/descriptor.o SRC=src/*.c src/*.h descriptor/*.c descriptor/*.h tests/*.c tests/*.h tools/*.c tools/*.h diff --git a/benchmark/benchmark.cc b/benchmark/benchmark.cc index 10713a8..95709bc 100644 --- a/benchmark/benchmark.cc +++ b/benchmark/benchmark.cc @@ -73,19 +73,19 @@ int main () upb_msgdata_free(data, m, true); upb_context_free(&c); - benchmarks::SpeedMessage2 msg; - std::string stlstr(str.ptr, str.byte_len); - upb_strfree(str); - total = 0; - before = clock(); - for(int i = 0; i < 2000; i++) { - if(!msg.ParseFromString(stlstr)) { - fprintf(stderr, "Error parsing with proto2.\n"); - return 1; - } - total += str.byte_len; - } - elapsed = ((double)clock() - before) / CLOCKS_PER_SEC; - fprintf(stderr, "proto2 parsed %sB, ", eng(total, 3, false)); - fprintf(stderr, "%sB/s\n", eng(total/elapsed, 3, false)); + //benchmarks::SpeedMessage2 msg; + //std::string stlstr(str.ptr, str.byte_len); + //upb_strfree(str); + //total = 0; + //before = clock(); + //for(int i = 0; i < 2000; i++) { + // if(!msg.ParseFromString(stlstr)) { + // fprintf(stderr, "Error parsing with proto2.\n"); + // return 1; + // } + // total += str.byte_len; + //} + //elapsed = ((double)clock() - before) / CLOCKS_PER_SEC; + //fprintf(stderr, "proto2 parsed %sB, ", eng(total, 3, false)); + //fprintf(stderr, "%sB/s\n", eng(total/elapsed, 3, false)); } diff --git a/benchmark/google_messages.proto.bin b/benchmark/google_messages.proto.bin deleted file mode 100644 index 0531d7e..0000000 --- a/benchmark/google_messages.proto.bin +++ /dev/null @@ -1,130 +0,0 @@ - -ï -google_speed.proto -benchmarks"ø - SpeedMessage1 -field1 (  -field9 (  -field18 (  -field80P (:false -field81Q (:true -field2 ( -field3 ( -field280˜ ( -field6 (:0 -field22 ( -field4 (  -field5 ( -field59; (:false -field7 (  -field16 ( -field130‚ (:0 -field12 (:true -field17 (:true -field13 (:true -field14 (:true -field104h (:0 -field100d (:0 -field101e (:0 -field102f (  -field103g (  -field29 (:0 -field30 (:false -field60< (:-1 -field271 (:-1 -field272 (:-1 -field150– ( -field23 (:0 -field24 (:false -field25 (:04 -field15 ( 2#.benchmarks.SpeedMessage1SubMessage -field78N ( -field67C (:0 -field68D ( -field128€ (:0( -field129 ( :xxxxxxxxxxxxxxxxxxxxx -field131ƒ (:0"¢ -SpeedMessage1SubMessage -field1 (:0 -field2 (:0 -field3 (:0 -field15 (  -field12 (:true -field13 ( -field14 ( -field16 ( -field19 (:2 -field20 (:true -field28 (:true -field21 ( -field22 ( -field23 (:false -field206Î (:false -field203Ë ( -field204Ì ( -field205Í (  -field207Ï ( -field300¬ ("Ê - SpeedMessage2 -field1 (  -field3 ( -field4 ( -field30 ( -field75K (:false -field6 (  -field2 (  -field21 (:0 -field71G ( -field25 ( -field109m (:0 -field210Ò (:0 -field211Ó (:0 -field212Ô (:0 -field213Õ (:0 -field216Ø (:0 -field217Ù (:0 -field218Ú (:0 -field220Ü (:0 -field221Ý (:0 -field222Þ (:0 -field63? (0 -group1 - ( -2 .benchmarks.SpeedMessage2.Group1 -field128€ (  -field131ƒ ( -field127 (  -field129 ( -field130‚ ( -field205Í (:false -field206Î (:falseà -Group1 -field11 ( -field26 ( -field12 (  -field13 (  -field14 (  -field15 ( -field5 ( -field27 (  -field28 ( -field29 (  -field16 (  -field22 (  -field73I ( -field20 (:0 -field24 ( 8 -field31 ( 2'.benchmarks.SpeedMessage2GroupedMessage"ß -SpeedMessage2GroupedMessage -field1 ( -field2 ( -field3 (:0 -field4 ( -field5 ( -field6 (:true -field7 (:false -field8 ( -field9 ( -field10 - ( -field11 ( \ No newline at end of file diff --git a/src/upb_msg.c b/src/upb_msg.c index ed2a851..5b50541 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -256,26 +256,12 @@ void upb_msg_reuse_submsg(void **msg, struct upb_msg *m) /* Serialization/Deserialization. ********************************************/ -/* We use this as our "user_data" for each frame of the parsing stack. */ -struct parse_frame_data { - struct upb_msg *m; - void *data; -}; - -static void set_frame_data(struct upb_parse_state *s, struct upb_msg *m, - void *data) -{ - struct parse_frame_data *frame = (void*)&s->top->user_data; - frame->m = m; - frame->data = data; -} - -static upb_field_type_t tag_cb(struct upb_parse_state *s, struct upb_tag *tag, +static upb_field_type_t tag_cb(void *udata, struct upb_tag tag, void **user_field_desc) { - struct parse_frame_data *frame = (void*)&s->top->user_data; - struct upb_msg_field *f = upb_msg_fieldbynum(frame->m, tag->field_number); - if(!f || !upb_check_type(tag->wire_type, f->type)) + struct upb_msg_parse_state *s = udata; + struct upb_msg_field *f = upb_msg_fieldbynum(s->top->m, tag.field_number); + if(!f || !upb_check_type(tag.wire_type, f->type)) return 0; /* Skip unknown or fields of the wrong type. */ *user_field_desc = f; return f->type; @@ -299,23 +285,21 @@ static union upb_value_ptr get_value_ptr(void *data, struct upb_msg_field *f) return p; } -static upb_status_t value_cb(struct upb_parse_state *s, void **buf, void *end, - void *user_field_desc) +static void *value_cb(void *udata, void *buf, void *end, + void *user_field_desc, jmp_buf errjmp) { - struct parse_frame_data *frame = (void*)&s->top->user_data; + struct upb_msg_parse_state *s = udata; struct upb_msg_field *f = user_field_desc; - union upb_value_ptr p = get_value_ptr(frame->data, f); - UPB_CHECK(upb_parse_value(buf, end, f->type, p)); - return UPB_STATUS_OK; + union upb_value_ptr p = get_value_ptr(s->top->data, f); + return upb_parse_value(buf, end, f->type, p, errjmp); } -static upb_status_t str_cb(struct upb_parse_state *_s, struct upb_string *str, +static upb_status_t str_cb(void *udata, struct upb_string *str, void *user_field_desc) { - struct upb_msg_parse_state *s = (void*)_s; - struct parse_frame_data *frame = (void*)&s->s.top->user_data; + struct upb_msg_parse_state *s = udata; struct upb_msg_field *f = user_field_desc; - union upb_value_ptr p = get_value_ptr(frame->data, f); + union upb_value_ptr p = get_value_ptr(s->top->data, f); if(s->byref) { upb_msg_reuse_strref(p.str); **p.str = *str; @@ -326,29 +310,30 @@ static upb_status_t str_cb(struct upb_parse_state *_s, struct upb_string *str, return UPB_STATUS_OK; } -static void submsg_start_cb(struct upb_parse_state *_s, void *user_field_desc) +static void submsg_start_cb(void *udata, void *user_field_desc) { - struct upb_msg_parse_state *s = (void*)_s; + struct upb_msg_parse_state *s = udata; struct upb_msg_field *f = user_field_desc; - struct parse_frame_data *frame = (void*)&s->s.top->user_data; - // TODO: find a non-hacky way to get a pointer to the old frame. - struct parse_frame_data *oldframe = (void*)((char*)s->s.top - s->s.udata_size); - union upb_value_ptr p = get_value_ptr(oldframe->data, f); + union upb_value_ptr p = get_value_ptr(s->top->data, f); assert(f->ref.msg); upb_msg_reuse_submsg(p.msg, f->ref.msg); - set_frame_data(&s->s, f->ref.msg, *p.msg); - if(!s->merge) upb_msg_clear(frame->data, f->ref.msg); + s->top++; + s->top->m = f->ref.msg; + s->top->data = *p.msg; + if(!s->merge) upb_msg_clear(s->top->data, s->top->m); } void upb_msg_parse_reset(struct upb_msg_parse_state *s, void *msg, struct upb_msg *m, bool merge, bool byref) { - upb_parse_reset(&s->s); + upb_parse_reset(&s->s, s); s->merge = merge; s->byref = byref; if(!merge && msg == NULL) msg = upb_msgdata_new(m); upb_msg_clear(msg, m); - set_frame_data(&s->s, m, msg); + s->top = s->stack; + s->top->m = m; + s->top->data = msg; s->s.tag_cb = tag_cb; s->s.value_cb = value_cb; s->s.str_cb = str_cb; @@ -358,7 +343,7 @@ void upb_msg_parse_reset(struct upb_msg_parse_state *s, void *msg, void upb_msg_parse_init(struct upb_msg_parse_state *s, void *msg, struct upb_msg *m, bool merge, bool byref) { - upb_parse_init(&s->s, sizeof(struct parse_frame_data)); + upb_parse_init(&s->s, s); upb_msg_parse_reset(s, msg, m, merge, byref); } diff --git a/src/upb_msg.h b/src/upb_msg.h index 1d41805..14fc870 100644 --- a/src/upb_msg.h +++ b/src/upb_msg.h @@ -331,11 +331,17 @@ void upb_msg_reuse_submsg(void **msg, struct upb_msg *m); /* This is all just a layer on top of the stream-oriented facility in * upb_parse.h. */ +struct upb_msg_parse_frame { + struct upb_msg *m; + void *data; +}; + struct upb_msg_parse_state { struct upb_parse_state s; bool merge; bool byref; struct upb_msg *m; + struct upb_msg_parse_frame stack[UPB_MAX_NESTING], *top; }; /* Initializes/frees a message parser. The parser will write the data to the diff --git a/src/upb_parse.c b/src/upb_parse.c index ca28ccc..fa37a56 100644 --- a/src/upb_parse.c +++ b/src/upb_parse.c @@ -6,147 +6,149 @@ #include "upb_parse.h" -#include #include #include -#include + +/* May want to move this to upb.c if enough other things warrant it. */ #include "descriptor.h" +#define alignof(t) offsetof(struct { char c; t x; }, x) +struct upb_type_info upb_type_info[] = { + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE] = {alignof(double), sizeof(double), UPB_WIRE_TYPE_64BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT] = {alignof(float), sizeof(float), UPB_WIRE_TYPE_32BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_64BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_32BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL] = {alignof(bool), sizeof(bool), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE] = {alignof(void*), sizeof(void*), UPB_WIRE_TYPE_DELIMITED}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = {alignof(void*), sizeof(void*), UPB_WIRE_TYPE_START_GROUP}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32]= {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_32BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64]= {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_64BIT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = {alignof(struct upb_string*), sizeof(struct upb_string*), UPB_WIRE_TYPE_DELIMITED}, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = {alignof(struct upb_string*), sizeof(struct upb_string*), UPB_WIRE_TYPE_DELIMITED}, +}; /* Lowest-level functions -- these read integers from the input buffer. */ -static void *check_end(uint8_t *buf, void *end, size_t maxlen, - upb_status_t *bound_error) +inline +static void *get_v_uint64_t(void *restrict _buf, void *_end, + uint64_t *restrict val, jmp_buf errjmp) { - void *maxend = buf + maxlen; - if(end < maxend) { - *bound_error = UPB_STATUS_NEED_MORE_DATA; - return end; - } else { - *bound_error = UPB_ERROR_UNTERMINATED_VARINT; - return maxend; - } -} - -inline static upb_status_t get_v_uint64_t(void *restrict *buf, void *end, - uint64_t *restrict val) -{ - uint8_t *b = *buf; - - if((*b & 0x80) == 0) { + uint8_t *buf = _buf, *end = _end; + if((*buf & 0x80) == 0) { /* Single-byte varint -- very common case. */ - *buf = b + 1; - *val = *b & 0x7f; - return UPB_STATUS_OK; - } else if(b <= (uint8_t*)end && (*(b+1) & 0x80) == 0) { + *val = *buf & 0x7f; + return buf + 1; + } else if(buf <= end && (*(buf+1) & 0x80) == 0) { /* Two-byte varint. */ - *buf = b + 2; - *val = (b[0] & 0x7f) | ((b[1] & 0x7f) << 7); - return UPB_STATUS_OK; - } else if(b + 10 <= (uint8_t*)end) { + *val = (buf[0] & 0x7f) | ((buf[1] & 0x7f) << 7); + return buf + 2; + } else if(buf + 10 <= end) { /* >2-byte varint, fast path. */ - uint64_t cont = *(uint64_t*)(b+2) | 0x7f7f7f7f7f7f7f7fULL; + uint64_t cont = *(uint64_t*)(buf+2) | 0x7f7f7f7f7f7f7f7fULL; int num_bytes = __builtin_ffsll(~cont) / 8; uint32_t part0 = 0, part1 = 0, part2 = 0; switch(num_bytes) { - default: return UPB_ERROR_UNTERMINATED_VARINT; - case 8: part2 |= (b[9] & 0x7F) << 7; - case 7: part2 |= (b[8] & 0x7F); - case 6: part1 |= (b[7] & 0x7F) << 21; - case 5: part1 |= (b[6] & 0x7F) << 14; - case 4: part1 |= (b[5] & 0x7F) << 7; - case 3: part1 |= (b[4] & 0x7F); - case 2: part0 |= (b[3] & 0x7F) << 21; - case 1: part0 |= (b[2] & 0x7F) << 14; - part0 |= (b[1] & 0x7F) << 7; - part0 |= (b[0] & 0x7F); + default: longjmp(errjmp, UPB_ERROR_UNTERMINATED_VARINT); + case 8: part2 |= (buf[9] & 0x7F) << 7; + case 7: part2 |= (buf[8] & 0x7F); + case 6: part1 |= (buf[7] & 0x7F) << 21; + case 5: part1 |= (buf[6] & 0x7F) << 14; + case 4: part1 |= (buf[5] & 0x7F) << 7; + case 3: part1 |= (buf[4] & 0x7F); + case 2: part0 |= (buf[3] & 0x7F) << 21; + case 1: part0 |= (buf[2] & 0x7F) << 14; + part0 |= (buf[1] & 0x7F) << 7; + part0 |= (buf[0] & 0x7F); } - *buf = b + num_bytes + 2; *val = (uint64_t)part0 | ((uint64_t)part1 << 28) | ((uint64_t)part2 << 56); - return UPB_STATUS_OK; + return buf + num_bytes + 2; } else { /* >2-byte varint, slow path. */ uint8_t last = 0x80; *val = 0; - for(int bitpos = 0; b < (uint8_t*)end && (last & 0x80); b++, bitpos += 7) - *val |= ((uint64_t)((last = *b) & 0x7F)) << bitpos; - if(last & 0x80) return UPB_STATUS_NEED_MORE_DATA; - *buf = b; - return UPB_STATUS_OK; + for(int bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7) + *val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos; + if(last & 0x80) longjmp(errjmp, UPB_STATUS_NEED_MORE_DATA); + return buf; } } -static upb_status_t skip_v_uint64_t(void **buf, void *end) +static void *skip_v_uint64_t(void *_buf, void *_end, jmp_buf errjmp) { - uint8_t *b = *buf; - upb_status_t bound_error; - end = check_end(b, end, 10, &bound_error); /* 2**64 is a 10-byte varint. */ + /* TODO: optimize. */ + uint8_t *buf = _buf, *end = _end; uint8_t last = 0x80; - for(; b < (uint8_t*)end && (last & 0x80); b++) - last = *b; + for(; buf < end && (last & 0x80); buf++) + last = *buf; - if(last & 0x80) return bound_error; - *buf = b; - return UPB_STATUS_OK; + if(last & 0x80) { + upb_status_t err = + buf == end ? UPB_STATUS_NEED_MORE_DATA : UPB_ERROR_UNTERMINATED_VARINT; + longjmp(errjmp, err); + } + return buf; } -static upb_status_t get_v_uint32_t(void *restrict *buf, void *end, - uint32_t *restrict val) +static void *get_v_uint32_t(void *restrict buf, void *end, + uint32_t *restrict val, jmp_buf errjmp) { uint64_t val64; - UPB_CHECK(get_v_uint64_t(buf, end, &val64)); + void *outbuf = get_v_uint64_t(buf, end, &val64, errjmp); + /* TODO: should we throw an error if any of the high bits in val64 are set? */ *val = (uint32_t)val64; - return UPB_STATUS_OK; + return outbuf; } -static upb_status_t get_f_uint32_t(void *restrict *buf, void *end, - uint32_t *restrict val) +static void *get_f_uint32_t(void *restrict buf, void *end, + uint32_t *restrict val, jmp_buf errjmp) { - uint8_t *b = *buf; - void *uint32_end = (uint8_t*)*buf + sizeof(uint32_t); - if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA; + void *uint32_end = (uint8_t*)buf + sizeof(uint32_t); + if(uint32_end > end) longjmp(errjmp, UPB_STATUS_NEED_MORE_DATA); #if UPB_UNALIGNED_READS_OK - *val = *(uint32_t*)b; + *val = *(uint32_t*)buf; #else #define SHL(val, bits) ((uint32_t)val << bits) *val = SHL(b[0], 0) | SHL(b[1], 8) | SHL(b[2], 16) | SHL(b[3], 24); #undef SHL #endif - *buf = uint32_end; - return UPB_STATUS_OK; + return uint32_end; } -static upb_status_t get_f_uint64_t(void *restrict *buf, void *end, - uint64_t *restrict val) +static void *get_f_uint64_t(void *restrict buf, void *end, + uint64_t *restrict val, jmp_buf errjmp) { - void *uint64_end = (uint8_t*)*buf + sizeof(uint64_t); - if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA; + void *uint64_end = (uint8_t*)buf + sizeof(uint64_t); + if(uint64_end > end) longjmp(errjmp, UPB_STATUS_NEED_MORE_DATA); #if UPB_UNALIGNED_READS_OK - *val = *(uint64_t*)*buf; - *buf = uint64_end; + *val = *(uint64_t*)buf; #else - uint32_t lo32, hi32; - get_f_uint32_t(buf, &lo32, end); - get_f_uint32_t(buf, &hi32, end); - *val = lo32 | ((uint64_t)hi32 << 32); +#define SHL(val, bits) ((uint64_t)val << bits) + *val = SHL(b[0], 0) | SHL(b[1], 8) | SHL(b[2], 16) | SHL(b[3], 24) | + SHL(b[4], 32) | SHL(b[5], 40) | SHL(b[6], 48) | SHL(b[7], 56) | +#undef SHL #endif - return UPB_STATUS_OK; + return uint64_end; } -static upb_status_t skip_f_uint32_t(void **buf, void *end) +static void *skip_f_uint32_t(void *buf, void *end, jmp_buf errjmp) { - void *uint32_end = (uint8_t*)*buf + sizeof(uint32_t); - if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA; - *buf = uint32_end; - return UPB_STATUS_OK; + void *uint32_end = (uint8_t*)buf + sizeof(uint32_t); + if(uint32_end > end) longjmp(errjmp, UPB_STATUS_NEED_MORE_DATA); + return uint32_end; } -static upb_status_t skip_f_uint64_t(void **buf, void *end) +static void *skip_f_uint64_t(void *buf, void *end, jmp_buf errjmp) { - void *uint64_end = (uint8_t*)*buf + sizeof(uint64_t); - if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA; - *buf = uint64_end; - return UPB_STATUS_OK; + void *uint64_end = (uint8_t*)buf + sizeof(uint64_t); + if(uint64_end > end) longjmp(errjmp, UPB_STATUS_NEED_MORE_DATA); + return uint64_end; } static int32_t zz_decode_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } @@ -159,11 +161,11 @@ static int64_t zz_decode_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } static void wvtov_ ## type(wire_t s, val_t *d) #define GET(type, v_or_f, wire_t, val_t, member_name) \ - static upb_status_t get_ ## type(void **buf, void *end, val_t *d) { \ + static void *get_ ## type(void *buf, void *end, val_t *d, jmp_buf errjmp) { \ wire_t tmp; \ - UPB_CHECK(get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp)); \ + void *outbuf = get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp, errjmp); \ wvtov_ ## type(tmp, d); \ - return UPB_STATUS_OK; \ + return outbuf; \ } #define T(type, v_or_f, wire_t, val_t, member_name) \ @@ -189,70 +191,46 @@ T(ENUM, v, uint32_t, int32_t, int32) { *d = (int32_t)s; } #undef GET #undef T -#define alignof(t) offsetof(struct { char c; t x; }, x) - -/* May want to move this to upb.c if enough other things warrant it. */ -struct upb_type_info upb_type_info[] = { - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE] = {alignof(double), sizeof(double), UPB_WIRE_TYPE_64BIT}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT] = {alignof(float), sizeof(float), UPB_WIRE_TYPE_32BIT}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_VARINT}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_64BIT}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_32BIT}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL] = {alignof(bool), sizeof(bool), UPB_WIRE_TYPE_VARINT}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE] = {alignof(void*), sizeof(void*), UPB_WIRE_TYPE_DELIMITED}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = {alignof(void*), sizeof(void*), UPB_WIRE_TYPE_START_GROUP}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32]= {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_32BIT}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64]= {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_64BIT}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = {alignof(struct upb_string*), sizeof(struct upb_string*), UPB_WIRE_TYPE_DELIMITED}, - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = {alignof(struct upb_string*), sizeof(struct upb_string*), UPB_WIRE_TYPE_DELIMITED}, -}; - -static upb_status_t parse_tag(void **buf, void *end, struct upb_tag *tag) +static void *parse_tag(void *buf, void *end, struct upb_tag *tag, jmp_buf errjmp) { uint32_t tag_int; - UPB_CHECK(get_v_uint32_t(buf, end, &tag_int)); + void *outbuf = get_v_uint32_t(buf, end, &tag_int, errjmp); tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); tag->field_number = tag_int >> 3; - return UPB_STATUS_OK; + return outbuf; } -upb_status_t upb_parse_wire_value(void **buf, void *end, upb_wire_type_t wt, - union upb_wire_value *wv) +void *upb_parse_wire_value(void *buf, void *end, upb_wire_type_t wt, + union upb_wire_value *wv, jmp_buf errjmp) { switch(wt) { - case UPB_WIRE_TYPE_VARINT: UPB_CHECK(get_v_uint64_t(buf, end, &wv->varint)); break; - case UPB_WIRE_TYPE_64BIT: UPB_CHECK(get_f_uint64_t(buf, end, &wv->_64bit)); break; - case UPB_WIRE_TYPE_32BIT: UPB_CHECK(get_f_uint32_t(buf, end, &wv->_32bit)); break; - default: return UPB_ERROR_ILLEGAL; /* Doesn't handle delimited, groups. */ + case UPB_WIRE_TYPE_VARINT: return get_v_uint64_t(buf, end, &wv->varint, errjmp); + case UPB_WIRE_TYPE_64BIT: return get_f_uint64_t(buf, end, &wv->_64bit, errjmp); + case UPB_WIRE_TYPE_32BIT: return get_f_uint32_t(buf, end, &wv->_32bit, errjmp); + default: longjmp(errjmp, UPB_ERROR_ILLEGAL); /* Doesn't handle delimited, groups. */ } - return UPB_STATUS_OK; } -static upb_status_t skip_wire_value(void **buf, void *end, upb_wire_type_t wt) +static void *skip_wire_value(void *buf, void *end, upb_wire_type_t wt, + jmp_buf errjmp) { switch(wt) { - case UPB_WIRE_TYPE_VARINT: UPB_CHECK(skip_v_uint64_t(buf, end)); break; - case UPB_WIRE_TYPE_64BIT: UPB_CHECK(skip_f_uint64_t(buf, end)); break; - case UPB_WIRE_TYPE_32BIT: UPB_CHECK(skip_f_uint32_t(buf, end)); break; + case UPB_WIRE_TYPE_VARINT: return skip_v_uint64_t(buf, end, errjmp); + case UPB_WIRE_TYPE_64BIT: return skip_f_uint64_t(buf, end, errjmp); + case UPB_WIRE_TYPE_32BIT: return skip_f_uint32_t(buf, end, errjmp); case UPB_WIRE_TYPE_START_GROUP: /* TODO: skip to matching end group. */ case UPB_WIRE_TYPE_END_GROUP: break; - default: return UPB_ERROR_ILLEGAL; + default: longjmp(errjmp, UPB_ERROR_ILLEGAL); } - return UPB_STATUS_OK; + return buf; } -upb_status_t upb_parse_value(void **buf, void *end, upb_field_type_t ft, - union upb_value_ptr v) +void *upb_parse_value(void *buf, void *end, upb_field_type_t ft, + union upb_value_ptr v, jmp_buf errjmp) { #define CASE(t, member_name) \ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \ - return get_ ## t(buf, end, v.member_name); + return get_ ## t(buf, end, v.member_name, errjmp); switch(ft) { CASE(DOUBLE, _double) CASE(FLOAT, _float) @@ -268,28 +246,25 @@ upb_status_t upb_parse_value(void **buf, void *end, upb_field_type_t ft, CASE(SFIXED64, int64) CASE(BOOL, _bool) CASE(ENUM, int32) - default: return UPB_ERROR_ILLEGAL; + default: longjmp(errjmp, UPB_ERROR_ILLEGAL); } #undef CASE } -void upb_parse_reset(struct upb_parse_state *state) +void upb_parse_reset(struct upb_parse_state *state, void *udata) { - state->offset = 0; state->top = state->stack; + state->limit = &state->stack[UPB_MAX_NESTING]; /* The top-level message is not delimited (we can keep receiving data for - * it indefinitely). */ - state->top->end_offset = SIZE_MAX; + * it indefinitely), so we treat it like a group. */ + *state->top = 0; + state->udata = udata; } -void upb_parse_init(struct upb_parse_state *state, size_t udata_size) +void upb_parse_init(struct upb_parse_state *state, void *udata) { memset(state, 0, sizeof(struct upb_parse_state)); /* Clear all callbacks. */ - size_t stack_bytes = (sizeof(*state->stack) + udata_size) * UPB_MAX_NESTING; - state->stack = malloc(stack_bytes); - state->limit = (struct upb_parse_stack_frame*)((char*)state->stack + stack_bytes); - state->udata_size = udata_size; - upb_parse_reset(state); + upb_parse_reset(state, udata); } void upb_parse_free(struct upb_parse_state *state) @@ -297,112 +272,105 @@ void upb_parse_free(struct upb_parse_state *state) free(state->stack); } -static size_t pop_stack_frame(struct upb_parse_state *s) +static void *pop_stack_frame(struct upb_parse_state *s, + uint8_t *buf, uint8_t *submsg_end) { - if(s->submsg_end_cb) s->submsg_end_cb(s); + if(s->submsg_end_cb) s->submsg_end_cb(s->udata); + uint32_t final_submsg_len = *s->top - (buf - submsg_end); s->top--; - s->top = (struct upb_parse_stack_frame*)((char*)s->top - s->udata_size); - return s->top->end_offset; + *s->top -= final_submsg_len; + return (char*)buf + (*s->top > 0 ? *s->top : 0); } -static upb_status_t push_stack_frame(struct upb_parse_state *s, size_t end, - void *user_field_desc, size_t *end_offset) +/* Returns the next end offset. */ +static void *push_stack_frame(struct upb_parse_state *s, + uint8_t *buf, uint8_t *submsg_end, uint32_t len, + void *user_field_desc, jmp_buf errjmp) { + *s->top -= len; + if(*s->top < 0) *s->top -= (buf - submsg_end); s->top++; - s->top = (struct upb_parse_stack_frame*)((char*)s->top + s->udata_size); - if(s->top > s->limit) return UPB_ERROR_STACK_OVERFLOW; - s->top->end_offset = end; - *end_offset = end; - if(s->submsg_start_cb) s->submsg_start_cb(s, user_field_desc); - return UPB_STATUS_OK; -} - -static upb_status_t parse_delimited(struct upb_parse_state *s, - struct upb_tag *tag, - void **buf, void *end, - size_t base_offset, size_t *end_offset) -{ - int32_t delim_len; - void *user_field_desc; - void *bufstart = *buf; - - /* Whether we are parsing or skipping the field, we always need to parse - * the length. */ - UPB_CHECK(get_INT32(buf, end, &delim_len)); - upb_field_type_t ft = s->tag_cb(s, tag, &user_field_desc); - if(*buf < bufstart) return UPB_ERROR_OVERFLOW; - if(*buf > end && ft != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) { - /* Streaming submessages is ok, but for other delimited types (string, - * bytes, and packed arrays) we require that all the delimited data is - * available. This could be relaxed if desired. */ - return UPB_STATUS_NEED_MORE_DATA; - } - - if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) { - base_offset += ((char*)*buf - (char*)bufstart); - UPB_CHECK(push_stack_frame(s, base_offset + delim_len, user_field_desc, end_offset)); - } else { - void *delim_end = (char*)*buf + delim_len; - if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING || - ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES) { - struct upb_string str = {.ptr = *buf, .byte_len = delim_len}; - s->str_cb(s, &str, user_field_desc); - *buf = delim_end; - } else { - /* Packed Array. */ - while(*buf < delim_end) - UPB_CHECK(s->value_cb(s, buf, end, user_field_desc)); - } - } - return UPB_STATUS_OK; + if(s->top > s->limit) longjmp(errjmp, UPB_ERROR_STACK_OVERFLOW); + *s->top = len; + if(s->submsg_start_cb) s->submsg_start_cb(s->udata, user_field_desc); + return (char*)buf + *s->top; } -static upb_status_t parse_nondelimited(struct upb_parse_state *s, - struct upb_tag *tag, - void **buf, void *end, - size_t *end_offset) +upb_status_t upb_isstringtype(upb_field_type_t type) { - /* Simple value or begin group. */ - void *user_field_desc; - upb_field_type_t ft = s->tag_cb(s, tag, &user_field_desc); - if(ft == 0) { - UPB_CHECK(skip_wire_value(buf, end, tag->wire_type)); - } else if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP) { - /* No length specified, an "end group" tag will mark the end. */ - UPB_CHECK(push_stack_frame(s, UINT32_MAX, user_field_desc, end_offset)); - } else { - UPB_CHECK(s->value_cb(s, buf, end, user_field_desc)); - } - return UPB_STATUS_OK; + return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING || + type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES; } -upb_status_t upb_parse(struct upb_parse_state *restrict s, void *buf, size_t len, - size_t *restrict read) +upb_status_t upb_parse(struct upb_parse_state *s, void *_buf, size_t len, + size_t *read) { - void *end = (char*)buf + len; - size_t offset = s->offset; - size_t end_offset = s->top->end_offset; + uint8_t *buf = _buf; + uint8_t *volatile completed = buf; + uint8_t *const start = buf; + /* Error handling with setjmp/longjmp (saves repeated error code checks, and + * lets us use function return values for something more useful). */ + jmp_buf errjmp; + upb_status_t status = UPB_STATUS_OK; + if((status = setjmp(errjmp)) != 0) goto done; + + uint8_t *end = buf + len; + uint8_t *submsg_end = buf + (*s->top > 0 ? *s->top : 0); while(buf < end) { struct upb_tag tag; - void *bufstart = buf; - UPB_CHECK(parse_tag(&buf, end, &tag)); + buf = parse_tag(buf, end, &tag, errjmp); if(tag.wire_type == UPB_WIRE_TYPE_END_GROUP) { - if(end_offset != UINT32_MAX) - return UPB_ERROR_SPURIOUS_END_GROUP; - end_offset = pop_stack_frame(s); - } else if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) { - UPB_CHECK(parse_delimited( - s, &tag, &buf, end, offset + (char*)buf - (char*)bufstart, &end_offset)); - } else { - UPB_CHECK(parse_nondelimited(s, &tag, &buf, end, &end_offset)); + submsg_end = pop_stack_frame(s, buf, submsg_end); + completed = buf; + continue; } - offset += ((char*)buf - (char*)bufstart); - while(offset >= end_offset) { - if(offset != end_offset) return UPB_ERROR_BAD_SUBMESSAGE_END; - end_offset = pop_stack_frame(s); + /* Don't handle START_GROUP here, so client can skip group via tag_cb. */ + void *user_field_desc; + + upb_field_type_t ft = s->tag_cb(s->udata, tag, &user_field_desc); + if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) { + int32_t delim_len; + buf = get_INT32(buf, end, &delim_len, errjmp); + uint8_t *delim_end = buf + delim_len; + + if(delim_end > end) { /* String ends beyond the data we have. */ + if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) { + /* Streaming the body of a message is ok. */ + } else { + /* String, bytes, and packed arrays must have all data present. */ + status = UPB_STATUS_NEED_MORE_DATA; + goto done; + } + } + + if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) { + submsg_end = push_stack_frame(s, buf, submsg_end, delim_len, user_field_desc, errjmp); + } else { /* Delimited data for which we require (and have) all data. */ + if(ft == 0) { + /* Do nothing -- client has elected to skip. */ + } else if(upb_isstringtype(ft)) { + struct upb_string str = {.ptr = (char*)buf, .byte_len = delim_len}; + s->str_cb(s->udata, &str, user_field_desc); + } else { /* Packed Array. */ + while(buf < delim_end) + buf = s->value_cb(s->udata, buf, end, user_field_desc, errjmp); + } + buf = delim_end; + } + } else { /* Scalar (non-delimited) value. */ + if(ft == 0) /* Client elected to skip. */ + buf = skip_wire_value(buf, end, tag.wire_type, errjmp); + else if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP) + submsg_end = push_stack_frame(s, buf, submsg_end, 0, user_field_desc, errjmp); + else + buf = s->value_cb(s->udata, buf, end, user_field_desc, errjmp); } + + while(buf == submsg_end) submsg_end = pop_stack_frame(s, buf, submsg_end); + completed = buf; } - *read = offset - s->offset; - s->offset = offset; - return UPB_STATUS_OK; + +done: + *read = (char*)completed - (char*)start; + return status; } diff --git a/src/upb_parse.h b/src/upb_parse.h index 4600c04..4a432d8 100644 --- a/src/upb_parse.h +++ b/src/upb_parse.h @@ -12,8 +12,9 @@ #ifndef UPB_PARSE_H_ #define UPB_PARSE_H_ -#include +#include #include +#include #include "upb.h" #ifdef __cplusplus @@ -63,8 +64,8 @@ struct upb_parse_state; /* Initialize and free (respectively) the given parse state, which must have * been previously allocated. udata_size specifies how much space will be * available at parse_stack_frame.user_data in each frame for user data. */ -void upb_parse_init(struct upb_parse_state *state, size_t udata_size); -void upb_parse_reset(struct upb_parse_state *state); +void upb_parse_init(struct upb_parse_state *state, void *udata); +void upb_parse_reset(struct upb_parse_state *state, void *udata); void upb_parse_free(struct upb_parse_state *state); /* The callback that is called immediately after a tag has been parsed. The @@ -74,8 +75,8 @@ void upb_parse_free(struct upb_parse_state *state); * type is appropriate for the .proto type. To skip the value (which means * skipping all submessages, in the case of a submessage), the callback should * return zero. */ -typedef upb_field_type_t (*upb_tag_cb)(struct upb_parse_state *s, - struct upb_tag *tag, +typedef upb_field_type_t (*upb_tag_cb)(void *udata, + struct upb_tag tag, void **user_field_desc); /* The callback that is called when a regular value (ie. not a string or @@ -85,34 +86,25 @@ typedef upb_field_type_t (*upb_tag_cb)(struct upb_parse_state *s, * * Note that this callback can be called several times in a row for a single * call to tag_cb in the case of packed arrays. */ -typedef upb_status_t (*upb_value_cb)(struct upb_parse_state *s, - void **buf, void *end, - void *user_field_desc); +typedef void *(*upb_value_cb)(void *udata, void *buf, void *end, + void *user_field_desc, jmp_buf errjmp); /* The callback that is called when a string is parsed. */ -typedef upb_status_t (*upb_str_cb)(struct upb_parse_state *s, +typedef upb_status_t (*upb_str_cb)(void *udata, struct upb_string *str, void *user_field_desc); /* Callbacks that are called when a submessage begins and ends, respectively. * Both are called with the submessage's stack frame at the top of the stack. */ -typedef void (*upb_submsg_start_cb)(struct upb_parse_state *s, +typedef void (*upb_submsg_start_cb)(void *udata, void *user_field_desc); -typedef void (*upb_submsg_end_cb)(struct upb_parse_state *s); - -/* Each stack frame (one for each level of submessages/groups) has this format, - * where user_data has as many bytes allocated as specified when initialized. */ -struct upb_parse_stack_frame { - size_t end_offset; /* 0 indicates that this is a group. */ -#ifndef __cplusplus /* Temporary hack since C++ doesn't support flex arrays. */ - char user_data[]; -#endif -}; +typedef void (*upb_submsg_end_cb)(void *udata); struct upb_parse_state { - size_t offset; - struct upb_parse_stack_frame *stack, *top, *limit; - size_t udata_size; /* How many bytes the user gets in each frame. */ + /* For delimited submsgs, counts from the submsg len down to zero. + * For group submsgs, counts from zero down to the negative len. */ + int32_t stack[UPB_MAX_NESTING], *top, *limit; + void *udata; upb_tag_cb tag_cb; upb_value_cb value_cb; upb_str_cb str_cb; @@ -144,14 +136,14 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { /* Parses and converts a value from the character data starting at buf. The * caller must have previously checked that the wire type is appropriate for * this field type. */ -upb_status_t upb_parse_value(void **buf, void *end, upb_field_type_t ft, - union upb_value_ptr v); +void *upb_parse_value(void *buf, void *end, upb_field_type_t ft, + union upb_value_ptr v, jmp_buf errjmp); /* Parses a wire value with the given type (which must have been obtained from * a tag that was just parsed) and adds the number of bytes that were consumed * to *offset. */ -upb_status_t upb_parse_wire_value(void **buf, void *end, upb_wire_type_t wt, - union upb_wire_value *wv); +void *upb_parse_wire_value(void *buf, void *end, upb_wire_type_t wt, + union upb_wire_value *wv, jmp_buf errjmp); #ifdef __cplusplus } /* extern "C" */ diff --git a/tests/test_table.cc b/tests/test_table.cc index 0337eaa..24a93c3 100644 --- a/tests/test_table.cc +++ b/tests/test_table.cc @@ -1,4 +1,5 @@ +#undef NDEBUG /* ensure tests always assert. */ #include "upb_table.h" #include "test_util.h" #include -- cgit v1.2.3