From d1f78c88faafea7e672c7c45e20f6f040942a92a Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 3 Jun 2009 22:06:24 -0700 Subject: A bunch more work, a fast table for field lookup. --- upb_parse.c | 189 +++++++++++++++--------------------------------------------- 1 file changed, 47 insertions(+), 142 deletions(-) (limited to 'upb_parse.c') diff --git a/upb_parse.c b/upb_parse.c index c0fc007..458876e 100644 --- a/upb_parse.c +++ b/upb_parse.c @@ -8,6 +8,7 @@ #include #include +#include "descriptor.h" /* Branch prediction hints for GCC. */ #ifdef __GNUC__ @@ -138,14 +139,10 @@ static int64_t zz_decode_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } static void wvtov_ ## type(wire_t s, val_t *d) #define GET(type, v_or_f, wire_t, val_t, member_name) \ - static upb_status_t get_ ## type(struct upb_parse_state *s, \ - uint8_t *buf, \ - struct upb_tagged_value *d) { \ + static upb_status_t get_ ## type(uint8_t **buf, union upb_value *d) { \ wire_t tmp; \ - uint8_t *b = buf; \ - CHECK(get_ ## v_or_f ## _ ## wire_t(&b, &tmp)); \ - wvtov_ ## type(tmp, &d->v.member_name); \ - s->offset += (b-buf); \ + CHECK(get_ ## v_or_f ## _ ## wire_t(buf, &tmp)); \ + wvtov_ ## type(tmp, &d->member_name); \ return UPB_STATUS_OK; \ } @@ -172,65 +169,25 @@ T(ENUM, v, uint32_t, int32_t, int32) { *d = (int32_t)s; } #undef GET #undef T -static void wvtov_delimited(uint32_t s, struct upb_delimited *d, size_t o) -{ - d->offset = o; - d->len = s; -} - -/* Use BYTES version for both STRING and BYTES, leave UTF-8 checks to client. */ -static upb_status_t get_BYTES(struct upb_parse_state *s, uint8_t *buf, - struct upb_tagged_value *d) { - uint32_t tmp; - uint8_t *b = buf; - CHECK(get_v_uint32_t(&b, &tmp)); - s->offset += (b-buf); /* advance past length varint. */ - wvtov_delimited(tmp, &d->v.delimited, s->offset); - size_t new_offset = s->offset + d->v.delimited.len; /* skip bytes */ - if (unlikely(new_offset < s->offset)) return UPB_ERROR_OVERFLOW; - s->offset = new_offset; - return UPB_STATUS_OK; -} - -static upb_status_t get_MESSAGE(struct upb_parse_state *s, uint8_t *buf, - struct upb_tagged_value *d) { - /* We're entering a sub-message. */ - uint32_t tmp; - uint8_t *b = buf; - CHECK(get_v_uint32_t(&b, &tmp)); - s->offset += (b-buf); /* advance past length varint. */ - wvtov_delimited(tmp, &d->v.delimited, s->offset); - /* Unlike STRING and BYTES, we *don't* advance past delimited here. */ - if (unlikely(++s->top == s->limit)) return UPB_ERROR_STACK_OVERFLOW; - s->top->fieldset = d->field->fieldset; - s->top->end_offset = d->v.delimited.offset + d->v.delimited.len; - if (unlikely(s->top->end_offset < s->offset)) return UPB_ERROR_OVERFLOW; - return UPB_STATUS_OK; -} - -struct upb_type_info { - upb_wire_type_t expected_wire_type; - upb_status_t (*get)(struct upb_parse_state *s, uint8_t *buf, - struct upb_tagged_value *d); -}; -static struct upb_type_info type_info[] = { - {UPB_WIRE_TYPE_64BIT, get_DOUBLE}, - {UPB_WIRE_TYPE_32BIT, get_FLOAT}, - {UPB_WIRE_TYPE_VARINT, get_INT32}, - {UPB_WIRE_TYPE_VARINT, get_INT64}, - {UPB_WIRE_TYPE_VARINT, get_UINT32}, - {UPB_WIRE_TYPE_VARINT, get_UINT64}, - {UPB_WIRE_TYPE_VARINT, get_SINT32}, - {UPB_WIRE_TYPE_VARINT, get_SINT64}, - {UPB_WIRE_TYPE_32BIT, get_FIXED32}, - {UPB_WIRE_TYPE_64BIT, get_FIXED64}, - {UPB_WIRE_TYPE_32BIT, get_SFIXED32}, - {UPB_WIRE_TYPE_64BIT, get_SFIXED64}, - {UPB_WIRE_TYPE_VARINT, get_BOOL}, - {UPB_WIRE_TYPE_DELIMITED, get_BYTES}, - {UPB_WIRE_TYPE_DELIMITED, get_BYTES}, - {UPB_WIRE_TYPE_VARINT, get_ENUM}, - {UPB_WIRE_TYPE_DELIMITED, get_MESSAGE} +upb_wire_type_t upb_expected_wire_types[] = { + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE] = UPB_WIRE_TYPE_64BIT, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT] = UPB_WIRE_TYPE_32BIT, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64] = UPB_WIRE_TYPE_VARINT, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64] = UPB_WIRE_TYPE_VARINT, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32] = UPB_WIRE_TYPE_VARINT, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64] = UPB_WIRE_TYPE_64BIT, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32] = UPB_WIRE_TYPE_32BIT, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL] = UPB_WIRE_TYPE_VARINT, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = UPB_WIRE_TYPE_DELIMITED, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = UPB_WIRE_TYPE_DELIMITED, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = -1, /* TODO */ + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE] = UPB_WIRE_TYPE_DELIMITED, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32] = UPB_WIRE_TYPE_VARINT, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM] = UPB_WIRE_TYPE_VARINT, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32] = UPB_WIRE_TYPE_32BIT, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64] = UPB_WIRE_TYPE_64BIT, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32] = UPB_WIRE_TYPE_VARINT, + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64] = UPB_WIRE_TYPE_VARINT, }; upb_status_t parse_tag(uint8_t **buf, struct upb_tag *tag) @@ -249,22 +206,17 @@ upb_status_t parse_wire_value(uint8_t *buf, size_t *offset, #define READ(expr) CHECK(expr); *offset += (b-buf) uint8_t *b = buf; switch(wt) { - case UPB_WIRE_TYPE_VARINT: - READ(get_v_uint64_t(&b, &wv->varint)); break; - case UPB_WIRE_TYPE_64BIT: - READ(get_f_uint64_t(&b, &wv->_64bit)); break; - case UPB_WIRE_TYPE_32BIT: - READ(get_f_uint32_t(&b, &wv->_32bit)); break; + case UPB_WIRE_TYPE_VARINT: READ(get_v_uint64_t(&b, &wv->varint)); break; + case UPB_WIRE_TYPE_64BIT: READ(get_f_uint64_t(&b, &wv->_64bit)); break; + case UPB_WIRE_TYPE_32BIT: READ(get_f_uint32_t(&b, &wv->_32bit)); break; case UPB_WIRE_TYPE_DELIMITED: - wv->delimited.offset = *offset; - READ(get_v_uint32_t(&b, &wv->delimited.len)); - size_t new_offset = *offset + wv->delimited.len; + READ(get_v_uint32_t(&b, &wv->_32bit)); + size_t new_offset = *offset + wv->_32bit; if (new_offset < *offset) return UPB_ERROR_OVERFLOW; *offset += new_offset; break; case UPB_WIRE_TYPE_START_GROUP: - case UPB_WIRE_TYPE_END_GROUP: - return UPB_ERROR_GROUP; /* deprecated, no plans to support. */ + case UPB_WIRE_TYPE_END_GROUP: return UPB_ERROR_GROUP; /* TODO */ } return UPB_STATUS_OK; } @@ -274,12 +226,9 @@ upb_status_t skip_wire_value(uint8_t *buf, size_t *offset, { uint8_t *b = buf; switch(wt) { - case UPB_WIRE_TYPE_VARINT: - READ(skip_v_uint64_t(&b)); break; - case UPB_WIRE_TYPE_64BIT: - READ(skip_f_uint64_t(&b)); break; - case UPB_WIRE_TYPE_32BIT: - READ(skip_f_uint32_t(&b)); break; + case UPB_WIRE_TYPE_VARINT: READ(skip_v_uint64_t(&b)); break; + case UPB_WIRE_TYPE_64BIT: READ(skip_f_uint64_t(&b)); break; + case UPB_WIRE_TYPE_32BIT: READ(skip_f_uint32_t(&b)); break; case UPB_WIRE_TYPE_DELIMITED: { /* Have to get (not skip) the length to skip the bytes. */ uint32_t len; @@ -290,71 +239,27 @@ upb_status_t skip_wire_value(uint8_t *buf, size_t *offset, break; } case UPB_WIRE_TYPE_START_GROUP: - case UPB_WIRE_TYPE_END_GROUP: - return UPB_ERROR_GROUP; /* deprecated, no plans to support. */ + case UPB_WIRE_TYPE_END_GROUP: return UPB_ERROR_GROUP; /* TODO */ } return UPB_STATUS_OK; #undef READ } -/* Parses and processes the next value from buf. */ -upb_status_t upb_parse_field(struct upb_parse_state *s, - uint8_t *buf, - upb_field_number_t *fieldnum, - struct upb_tagged_value *val, - struct upb_tagged_wire_value *wv) +upb_status_t upb_parse_value(uint8_t **b, upb_field_type_t ft, + union upb_value *v) { - /* Check for end-of-message at the current stack depth. */ - if(unlikely(s->offset >= s->top->end_offset)) { - /* If the end offset isn't an exact field boundary, the pb is corrupt. */ - if(unlikely(s->offset != s->top->end_offset)) - return UPB_ERROR_BAD_SUBMESSAGE_END; - s->top--; - return UPB_STATUS_SUBMESSAGE_END; +#define CASE(t) \ + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: return get_ ## t(b, v); + switch(ft) { + CASE(DOUBLE) CASE(FLOAT) CASE(INT64) CASE(UINT64) CASE(INT32) CASE(FIXED64) + CASE(FIXED32) CASE(BOOL) CASE(UINT32) CASE(ENUM) CASE(SFIXED32) + CASE(SFIXED64) CASE(SINT32) CASE(SINT64) + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING: + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE: + return get_UINT32(b, v); + default: return UPB_ERROR; /* Including GROUP. */ } - - struct upb_tag tag; - uint8_t *b = buf; - CHECK(parse_tag(&b, &tag)); - s->offset += (b-buf); - struct upb_field *fd = upb_find_field(s->top->fieldset, - tag.field_number); - upb_status_t unknown_value_status; - if(unlikely(!fd)) { - unknown_value_status = UPB_ERROR_UNKNOWN_VALUE; - goto unknown_value; - } - struct upb_type_info *info = &type_info[fd->type]; - if(unlikely(tag.wire_type != info->expected_wire_type)) { - unknown_value_status = UPB_ERROR_MISMATCHED_TYPE; - goto unknown_value; - } - - *fieldnum = tag.field_number; - val->field = fd; - CHECK(info->get(s, b, val)); - return UPB_STATUS_OK; - -unknown_value: - wv->type = tag.wire_type; - CHECK(parse_wire_value(buf, &s->offset, tag.wire_type, &wv->v)); - return unknown_value_status; -} - -void upb_init_parser( - struct upb_parse_state *state, - struct upb_fieldset *toplevel_fieldset) -{ - state->offset = 0; - state->top = state->stack; - state->limit = state->top + UPB_MAX_STACK; - state->top->fieldset = toplevel_fieldset; - state->top->end_offset = SIZE_MAX; -} - -static int compare_fields(const void *f1, const void *f2) -{ - return ((struct upb_field*)f1)->field_number - - ((struct upb_field*)f2)->field_number; +#undef CASE } -- cgit v1.2.3