From e252432a4176b6524e8c064673459e947ba11cb7 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 14 Nov 2009 13:20:21 -0800 Subject: Refactoring: split defs into their own file, move private parsing funcs out of .h file. --- src/upb_parse.c | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) (limited to 'src/upb_parse.c') diff --git a/src/upb_parse.c b/src/upb_parse.c index 4ae2202..d1d535a 100644 --- a/src/upb_parse.c +++ b/src/upb_parse.c @@ -9,6 +9,198 @@ #include #include +/* Functions to read wire values. *********************************************/ + +// These functions are internal to the parser, but might be moved into an +// internal header file if we at some point in the future opt to do code +// generation, because the generated code would want to inline these functions. +// The same applies to the functions to read .proto values below. + +uint8_t *upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val, + struct upb_status *status); + +// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). +INLINE uint8_t *upb_get_v_uint64_t(uint8_t *buf, uint8_t *end, uint64_t *val, + struct upb_status *status) +{ + // We inline this common case (1-byte varints), if that fails we dispatch to + // the full (non-inlined) version. + if((*buf & 0x80) == 0) { + *val = *buf & 0x7f; + return buf + 1; + } else { + return upb_get_v_uint64_t_full(buf, end, val, status); + } +} + +// Gets a varint -- called when we only need 32 bits of it. +INLINE uint8_t *upb_get_v_uint32_t(uint8_t *buf, uint8_t *end, + uint32_t *val, struct upb_status *status) +{ + uint64_t val64; + uint8_t *ret = upb_get_v_uint64_t(buf, end, &val64, status); + *val = (uint32_t)val64; // Discard the high bits. + return ret; +} + +// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). +INLINE uint8_t *upb_get_f_uint32_t(uint8_t *buf, uint8_t *end, + uint32_t *val, struct upb_status *status) +{ + uint8_t *uint32_end = buf + sizeof(uint32_t); + if(uint32_end > end) { + status->code = UPB_STATUS_NEED_MORE_DATA; + return end; + } +#if UPB_UNALIGNED_READS_OK + *val = *(uint32_t*)buf; +#else +#define SHL(val, bits) ((uint32_t)val << bits) + *val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24); +#undef SHL +#endif + return uint32_end; +} + +// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). +INLINE uint8_t *upb_get_f_uint64_t(uint8_t *buf, uint8_t *end, + uint64_t *val, struct upb_status *status) +{ + uint8_t *uint64_end = buf + sizeof(uint64_t); + if(uint64_end > end) { + status->code = UPB_STATUS_NEED_MORE_DATA; + return end; + } +#if UPB_UNALIGNED_READS_OK + *val = *(uint64_t*)buf; +#else +#define SHL(val, bits) ((uint64_t)val << bits) + *val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24) | + SHL(buf[4], 32) | SHL(buf[5], 40) | SHL(buf[6], 48) | SHL(buf[7], 56); +#undef SHL +#endif + return uint64_end; +} + +INLINE uint8_t *upb_skip_v_uint64_t(uint8_t *buf, uint8_t *end, + struct upb_status *status) +{ + uint8_t *const maxend = buf + 10; + uint8_t last = 0x80; + for(; buf < (uint8_t*)end && (last & 0x80); buf++) + last = *buf; + + if(buf >= end && buf <= maxend && (last & 0x80)) { + status->code = UPB_STATUS_NEED_MORE_DATA; + buf = end; + } else if(buf > maxend) { + status->code = UPB_ERROR_UNTERMINATED_VARINT; + buf = end; + } + return buf; +} + +INLINE uint8_t *upb_skip_f_uint32_t(uint8_t *buf, uint8_t *end, + struct upb_status *status) +{ + uint8_t *uint32_end = buf + sizeof(uint32_t); + if(uint32_end > end) { + status->code = UPB_STATUS_NEED_MORE_DATA; + return end; + } + return uint32_end; +} + +INLINE uint8_t *upb_skip_f_uint64_t(uint8_t *buf, uint8_t *end, + struct upb_status *status) +{ + uint8_t *uint64_end = buf + sizeof(uint64_t); + if(uint64_end > end) { + status->code = UPB_STATUS_NEED_MORE_DATA; + return end; + } + return uint64_end; +} + +/* Functions to read .proto values. *******************************************/ + +// Performs zig-zag decoding, which is used by sint32 and sint64. +INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } +INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } + +// Use macros to define a set of two functions for each .proto type: +// +// // Reads and converts a .proto value from buf, placing it in d. +// // "end" indicates the end of the current buffer (if the buffer does +// // not contain the entire value UPB_STATUS_NEED_MORE_DATA is returned). +// // On success, a pointer will be returned to the first byte that was +// // not consumed. +// uint8_t *upb_get_INT32(uint8_t *buf, uint8_t *end, int32_t *d, +// struct upb_status *status); +// +// // Given an already read wire value s (source), convert it to a .proto +// // value and return it. +// int32_t upb_wvtov_INT32(uint32_t s); +// +// These are the most efficient functions to call if you want to decode a value +// for a known type. + +#define WVTOV(type, wire_t, val_t) \ + INLINE val_t upb_wvtov_ ## type(wire_t s) + +#define GET(type, v_or_f, wire_t, val_t, member_name) \ + INLINE uint8_t *upb_get_ ## type(uint8_t *buf, uint8_t *end, val_t *d, \ + struct upb_status *status) { \ + wire_t tmp = 0; \ + uint8_t *ret = upb_get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp, status); \ + *d = upb_wvtov_ ## type(tmp); \ + return ret; \ + } + +#define T(type, v_or_f, wire_t, val_t, member_name) \ + WVTOV(type, wire_t, val_t); /* prototype for GET below */ \ + GET(type, v_or_f, wire_t, val_t, member_name) \ + WVTOV(type, wire_t, val_t) + +T(INT32, v, uint32_t, int32_t, int32) { return (int32_t)s; } +T(INT64, v, uint64_t, int64_t, int64) { return (int64_t)s; } +T(UINT32, v, uint32_t, uint32_t, uint32) { return s; } +T(UINT64, v, uint64_t, uint64_t, uint64) { return s; } +T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzdec_32(s); } +T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzdec_64(s); } +T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; } +T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; } +T(SFIXED32, f, uint32_t, int32_t, int32) { return (int32_t)s; } +T(SFIXED64, f, uint64_t, int64_t, int64) { return (int64_t)s; } +T(BOOL, v, uint32_t, bool, _bool) { return (bool)s; } +T(ENUM, v, uint32_t, int32_t, int32) { return (int32_t)s; } +T(DOUBLE, f, uint64_t, double, _double) { + union upb_value v; + v.uint64 = s; + return v._double; +} +T(FLOAT, f, uint32_t, float, _float) { + union upb_value v; + v.uint32 = s; + return v._float; +} + +#undef WVTOV +#undef GET +#undef T + +// Parses a tag, places the result in *tag. +INLINE uint8_t *parse_tag(uint8_t *buf, uint8_t *end, struct upb_tag *tag, + struct upb_status *status) +{ + uint32_t tag_int; + uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); + tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); + tag->field_number = tag_int >> 3; + return ret; +} + + /** * Parses a 64-bit varint that is known to be >= 2 bytes (the inline version * handles 1 and 2 byte varints). -- cgit v1.2.3