From e373367fb70d4f432db1d3e9c21f5e0d93950e56 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 25 Jul 2009 15:46:34 -0700 Subject: Move parsing functions to header file in anticipation of code generation (want them inlined). --- src/upb_msg.c | 21 +++++----- src/upb_parse.c | 122 +++++--------------------------------------------------- src/upb_parse.h | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 141 insertions(+), 123 deletions(-) (limited to 'src') diff --git a/src/upb_msg.c b/src/upb_msg.c index f589d3d..6d6d934 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -231,7 +231,7 @@ void upb_msg_reuse_array(struct upb_array **arr, uint32_t size, upb_field_type_t } struct mm_upb_array *a = (void*)*arr; if(a->size < size) { - size = max(16, round_up_to_pow2(size)); + size = max(4, round_up_to_pow2(size)); size_t type_size = upb_type_info[t].size; a->a.elements._void = realloc(a->a.elements._void, size * type_size); /* Zero any newly initialized memory. */ @@ -246,7 +246,6 @@ void upb_msg_reuse_strref(struct upb_string **str) { upb_msg_reuse_str(str, 0); void upb_msg_reuse_submsg(void **msg, struct upb_msg *m) { if(!*msg) *msg = upb_msgdata_new(m); - else upb_msg_clear(*msg, m); /* Clears set bits, leaves pointers. */ } /* Serialization/Deserialization. ********************************************/ @@ -275,7 +274,6 @@ static union upb_value_ptr get_value_ptr(void *data, struct upb_msg_field *f) p = upb_array_getelementptr(*p.arr, len, f->type); assert(p._void); } - upb_msg_set(data, f); assert(p._void); return p; } @@ -286,8 +284,9 @@ static upb_status_t value_cb(void *udata, uint8_t *buf, uint8_t *end, struct upb_msg_parse_state *s = udata; struct upb_msg_field *f = user_field_desc; union upb_value_ptr p = get_value_ptr(s->top->data, f); + upb_msg_set(s->top->data, f); UPB_CHECK(upb_parse_value(buf, end, f->type, p, outbuf)); - google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, s->top->m); + //google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, s->top->m); //upb_text_printfield(&s->p, *fd->name, f->type, upb_deref(p, f->type), stdout); return UPB_STATUS_OK; } @@ -297,6 +296,7 @@ static void str_cb(void *udata, struct upb_string *str, void *user_field_desc) struct upb_msg_parse_state *s = udata; struct upb_msg_field *f = user_field_desc; union upb_value_ptr p = get_value_ptr(s->top->data, f); + upb_msg_set(s->top->data, f); if(s->byref) { upb_msg_reuse_strref(p.str); **p.str = *str; @@ -312,13 +312,16 @@ static void submsg_start_cb(void *udata, void *user_field_desc) { struct upb_msg_parse_state *s = udata; struct upb_msg_field *f = user_field_desc; - union upb_value_ptr p = get_value_ptr(s->top->data, f); - assert(f->ref.msg); - upb_msg_reuse_submsg(p.msg, f->ref.msg); + struct upb_msg *m = f->ref.msg; + void *data = s->top->data; /* The message from the existing frame. */ + union upb_value_ptr p = get_value_ptr(data, f); + upb_msg_reuse_submsg(p.msg, m); + if(!upb_msg_isset(data, f) || !s->merge) + upb_msg_clear(*p.msg, m); + upb_msg_set(data, f); s->top++; - s->top->m = f->ref.msg; + s->top->m = m; s->top->data = *p.msg; - if(!s->merge) upb_msg_clear(s->top->data, s->top->m); //upb_text_push(&s->p, *s->top->m->descriptor->name, stdout); } diff --git a/src/upb_parse.c b/src/upb_parse.c index 96b7647..4e1f4a5 100644 --- a/src/upb_parse.c +++ b/src/upb_parse.c @@ -33,21 +33,13 @@ struct upb_type_info upb_type_info[] = { TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES, UPB_WIRE_TYPE_DELIMITED, struct upb_string*) }; -/* Lowest-level functions -- these read integers from the input buffer. */ - -inline -static upb_status_t get_v_uint64_t(uint8_t *restrict buf, uint8_t *end, - uint64_t *restrict val, uint8_t **outbuf) +/* This is called by the inline version of the function if the varint turns out + * to be >= 2 bytes. */ +upb_status_t upb_get_v_uint64_t_full(uint8_t *restrict buf, uint8_t *end, + uint64_t *restrict val, + uint8_t **outbuf) { - if((*buf & 0x80) == 0) { - /* Single-byte varint -- very common case. */ - *val = *buf & 0x7f; - *outbuf = buf + 1; - } else if(buf <= end && (*(buf+1) & 0x80) == 0) { - /* Two-byte varint. */ - *val = (buf[0] & 0x7f) | ((buf[1] & 0x7f) << 7); - *outbuf = buf + 2; - } else if(buf + 10 <= end) { + if(buf + 10 <= end) { /* >2-byte varint, fast path. */ uint64_t cont = *(uint64_t*)(buf+2) | 0x7f7f7f7f7f7f7f7fULL; int num_bytes = __builtin_ffsll(~cont) / 8; @@ -95,49 +87,6 @@ static upb_status_t skip_v_uint64_t(uint8_t *buf, uint8_t *end, uint8_t **outbuf return UPB_STATUS_OK; } -static upb_status_t get_v_uint32_t(uint8_t *restrict buf, uint8_t *end, - uint32_t *restrict val, uint8_t **outbuf) -{ - uint64_t val64; - UPB_CHECK(get_v_uint64_t(buf, end, &val64, outbuf)); - /* TODO: should we throw an error if any of the high bits in val64 are set? */ - *val = (uint32_t)val64; - return UPB_STATUS_OK; -} - -static upb_status_t get_f_uint32_t(uint8_t *restrict buf, uint8_t *end, - uint32_t *restrict val, uint8_t **outbuf) -{ - uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA; -#if UPB_UNALIGNED_READS_OK - *val = *(uint32_t*)buf; -#else -#define SHL(val, bits) ((uint32_t)val << bits) - *val = SHL(b[0], 0) | SHL(b[1], 8) | SHL(b[2], 16) | SHL(b[3], 24); -#undef SHL -#endif - *outbuf = uint32_end; - return UPB_STATUS_OK; -} - -static upb_status_t get_f_uint64_t(uint8_t *restrict buf, uint8_t *end, - uint64_t *restrict val, uint8_t **outbuf) -{ - uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA; -#if UPB_UNALIGNED_READS_OK - *val = *(uint64_t*)buf; -#else -#define SHL(val, bits) ((uint64_t)val << bits) - *val = SHL(b[0], 0) | SHL(b[1], 8) | SHL(b[2], 16) | SHL(b[3], 24) | - SHL(b[4], 32) | SHL(b[5], 40) | SHL(b[6], 48) | SHL(b[7], 56) | -#undef SHL -#endif - *outbuf = uint64_end; - return UPB_STATUS_OK; -} - static upb_status_t skip_f_uint32_t(uint8_t *buf, uint8_t *end, uint8_t **outbuf) { uint8_t *uint32_end = buf + sizeof(uint32_t); @@ -154,62 +103,13 @@ static upb_status_t skip_f_uint64_t(uint8_t *buf, uint8_t *end, uint8_t **outbuf return UPB_STATUS_OK; } -static int32_t zz_decode_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } -static int64_t zz_decode_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } - -/* Functions for reading wire values and converting them to values. These - * are generated with macros because they follow a higly consistent pattern. */ - -#define WVTOV(type, wire_t, val_t) \ - static void wvtov_ ## type(wire_t s, val_t *d) - -#define GET(type, v_or_f, wire_t, val_t, member_name) \ - static upb_status_t get_ ## type(uint8_t *buf, uint8_t *end, val_t *d, uint8_t **outbuf) { \ - wire_t tmp; \ - UPB_CHECK(get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp, outbuf)); \ - wvtov_ ## type(tmp, d); \ - return UPB_STATUS_OK; \ - } - -#define T(type, v_or_f, wire_t, val_t, member_name) \ - WVTOV(type, wire_t, val_t); /* prototype for GET below */ \ - GET(type, v_or_f, wire_t, val_t, member_name) \ - WVTOV(type, wire_t, val_t) - -T(DOUBLE, f, uint64_t, double, _double) { memcpy(d, &s, sizeof(double)); } -T(FLOAT, f, uint32_t, float, _float) { memcpy(d, &s, sizeof(float)); } -T(INT32, v, uint32_t, int32_t, int32) { *d = (int32_t)s; } -T(INT64, v, uint64_t, int64_t, int64) { *d = (int64_t)s; } -T(UINT32, v, uint32_t, uint32_t, uint32) { *d = s; } -T(UINT64, v, uint64_t, uint64_t, uint64) { *d = s; } -T(SINT32, v, uint32_t, int32_t, int32) { *d = zz_decode_32(s); } -T(SINT64, v, uint64_t, int64_t, int64) { *d = zz_decode_64(s); } -T(FIXED32, f, uint32_t, uint32_t, uint32) { *d = s; } -T(FIXED64, f, uint64_t, uint64_t, uint64) { *d = s; } -T(SFIXED32, f, uint32_t, int32_t, int32) { *d = (int32_t)s; } -T(SFIXED64, f, uint64_t, int64_t, int64) { *d = (int64_t)s; } -T(BOOL, v, uint32_t, bool, _bool) { *d = (bool)s; } -T(ENUM, v, uint32_t, int32_t, int32) { *d = (int32_t)s; } -#undef WVTOV -#undef GET -#undef T - -static upb_status_t parse_tag(uint8_t *buf, uint8_t *end, struct upb_tag *tag, uint8_t **outbuf) -{ - uint32_t tag_int; - UPB_CHECK(get_v_uint32_t(buf, end, &tag_int, outbuf)); - tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); - tag->field_number = tag_int >> 3; - return UPB_STATUS_OK; -} - upb_status_t upb_parse_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt, union upb_wire_value *wv, uint8_t **outbuf) { switch(wt) { - case UPB_WIRE_TYPE_VARINT: return get_v_uint64_t(buf, end, &wv->varint, outbuf); - case UPB_WIRE_TYPE_64BIT: return get_f_uint64_t(buf, end, &wv->_64bit, outbuf); - case UPB_WIRE_TYPE_32BIT: return get_f_uint32_t(buf, end, &wv->_32bit, outbuf); + case UPB_WIRE_TYPE_VARINT: return upb_get_v_uint64_t(buf, end, &wv->varint, outbuf); + case UPB_WIRE_TYPE_64BIT: return upb_get_f_uint64_t(buf, end, &wv->_64bit, outbuf); + case UPB_WIRE_TYPE_32BIT: return upb_get_f_uint32_t(buf, end, &wv->_32bit, outbuf); default: return UPB_ERROR_ILLEGAL; /* Doesn't handle delimited, groups. */ } } @@ -232,7 +132,7 @@ upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft, { #define CASE(t, member_name) \ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \ - return get_ ## t(buf, end, v.member_name, outbuf); + return upb_get_ ## t(buf, end, v.member_name, outbuf); switch(ft) { CASE(DOUBLE, _double) CASE(FLOAT, _float) @@ -324,7 +224,7 @@ upb_status_t upb_parse(struct upb_parse_state *s, void *_buf, size_t len, upb_field_type_t ft = tag_cb(udata, &tag, &user_field_desc); if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) { int32_t delim_len; - UPB_CHECK(get_INT32(buf, end, &delim_len, &buf)); + UPB_CHECK(upb_get_INT32(buf, end, &delim_len, &buf)); uint8_t *delim_end = buf + delim_len; if(delim_end > end) { /* String ends beyond the data we have. */ diff --git a/src/upb_parse.h b/src/upb_parse.h index f675697..9e95c9c 100644 --- a/src/upb_parse.h +++ b/src/upb_parse.h @@ -132,9 +132,8 @@ upb_status_t upb_parse(struct upb_parse_state *s, void *buf, size_t len, extern upb_wire_type_t upb_expected_wire_types[]; /* Returns true if wt is the correct on-the-wire type for ft. */ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { - /* With packed arrays, anything can be delimited (except groups). */ - return (wt == UPB_WIRE_TYPE_DELIMITED) || upb_type_info[ft].expected_wire_type == wt; - ; // && ft != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP ); + /* This doesn't currently support packed arrays. */ + return upb_type_info[ft].expected_wire_type == wt; } /* Data-consuming functions (to be called from value cb). *********************/ @@ -151,6 +150,122 @@ upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft, upb_status_t upb_parse_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt, union upb_wire_value *wv, uint8_t **outbuf); +/* Low-level parsing functions. **********************************************/ + +upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val, + uint8_t **outbuf); + +INLINE upb_status_t upb_get_v_uint64_t(uint8_t *buf, uint8_t *end, uint64_t *val, + uint8_t **outbuf) +{ + /* We inline these two common cases (short varints), if that fails we + * dispatch to the full (non-inlined) version. */ + if((*buf & 0x80) == 0) { + /* Single-byte varint -- very common case. */ + *val = *buf & 0x7f; + *outbuf = buf + 1; + return UPB_STATUS_OK; + } else if(buf <= end && (*(buf+1) & 0x80) == 0) { + /* Two-byte varint. */ + *val = (buf[0] & 0x7f) | ((buf[1] & 0x7f) << 7); + *outbuf = buf + 2; + return UPB_STATUS_OK; + } else { + return upb_get_v_uint64_t_full(buf, end, val, outbuf); + } +} + +INLINE upb_status_t upb_get_v_uint32_t(uint8_t *buf, uint8_t *end, + uint32_t *val, uint8_t **outbuf) +{ + uint64_t val64; + UPB_CHECK(upb_get_v_uint64_t(buf, end, &val64, outbuf)); + /* TODO: should we throw an error if any of the high bits in val64 are set? */ + *val = (uint32_t)val64; + return UPB_STATUS_OK; +} + +INLINE upb_status_t upb_get_f_uint32_t(uint8_t *buf, uint8_t *end, + uint32_t *val, uint8_t **outbuf) +{ + uint8_t *uint32_end = buf + sizeof(uint32_t); + if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA; +#if UPB_UNALIGNED_READS_OK + *val = *(uint32_t*)buf; +#else +#define SHL(val, bits) ((uint32_t)val << bits) + *val = SHL(b[0], 0) | SHL(b[1], 8) | SHL(b[2], 16) | SHL(b[3], 24); +#undef SHL +#endif + *outbuf = uint32_end; + return UPB_STATUS_OK; +} + +INLINE upb_status_t upb_get_f_uint64_t(uint8_t *buf, uint8_t *end, + uint64_t *val, uint8_t **outbuf) +{ + uint8_t *uint64_end = buf + sizeof(uint64_t); + if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA; +#if UPB_UNALIGNED_READS_OK + *val = *(uint64_t*)buf; +#else +#define SHL(val, bits) ((uint64_t)val << bits) + *val = SHL(b[0], 0) | SHL(b[1], 8) | SHL(b[2], 16) | SHL(b[3], 24) | + SHL(b[4], 32) | SHL(b[5], 40) | SHL(b[6], 48) | SHL(b[7], 56) | +#undef SHL +#endif + *outbuf = uint64_end; + return UPB_STATUS_OK; +} + +INLINE int32_t zz_decode_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } +INLINE int64_t zz_decode_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } + + +#define WVTOV(type, wire_t, val_t) \ + INLINE void upb_wvtov_ ## type(wire_t s, val_t *d) + +#define GET(type, v_or_f, wire_t, val_t, member_name) \ + INLINE upb_status_t upb_get_ ## type(uint8_t *buf, uint8_t *end, val_t *d, uint8_t **outbuf) { \ + wire_t tmp; \ + UPB_CHECK(upb_get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp, outbuf)); \ + upb_wvtov_ ## type(tmp, d); \ + return UPB_STATUS_OK; \ + } + +#define T(type, v_or_f, wire_t, val_t, member_name) \ + WVTOV(type, wire_t, val_t); /* prototype for GET below */ \ + GET(type, v_or_f, wire_t, val_t, member_name) \ + WVTOV(type, wire_t, val_t) + +T(DOUBLE, f, uint64_t, double, _double) { memcpy(d, &s, sizeof(double)); } +T(FLOAT, f, uint32_t, float, _float) { memcpy(d, &s, sizeof(float)); } +T(INT32, v, uint32_t, int32_t, int32) { *d = (int32_t)s; } +T(INT64, v, uint64_t, int64_t, int64) { *d = (int64_t)s; } +T(UINT32, v, uint32_t, uint32_t, uint32) { *d = s; } +T(UINT64, v, uint64_t, uint64_t, uint64) { *d = s; } +T(SINT32, v, uint32_t, int32_t, int32) { *d = zz_decode_32(s); } +T(SINT64, v, uint64_t, int64_t, int64) { *d = zz_decode_64(s); } +T(FIXED32, f, uint32_t, uint32_t, uint32) { *d = s; } +T(FIXED64, f, uint64_t, uint64_t, uint64) { *d = s; } +T(SFIXED32, f, uint32_t, int32_t, int32) { *d = (int32_t)s; } +T(SFIXED64, f, uint64_t, int64_t, int64) { *d = (int64_t)s; } +T(BOOL, v, uint32_t, bool, _bool) { *d = (bool)s; } +T(ENUM, v, uint32_t, int32_t, int32) { *d = (int32_t)s; } +#undef WVTOV +#undef GET +#undef T + +INLINE upb_status_t parse_tag(uint8_t *buf, uint8_t *end, struct upb_tag *tag, + uint8_t **outbuf) +{ + uint32_t tag_int; + UPB_CHECK(upb_get_v_uint32_t(buf, end, &tag_int, outbuf)); + tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); + tag->field_number = tag_int >> 3; + return UPB_STATUS_OK; +} + #ifdef __cplusplus } /* extern "C" */ #endif -- cgit v1.2.3