From 4a99abba123fc1d2bef62778846a1f27b2012de0 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 6 May 2011 13:18:38 -0700 Subject: Refactor varint encoding/decoding. --- src/upb_decoder.c | 2 +- src/upb_decoder_x86.dasc | 25 +------ src/upb_varint.h | 187 +++++++++++++++++++++++++++++++++++++++++++++++ src/upb_varint_decoder.h | 155 --------------------------------------- tests/test_varint.c | 2 +- 5 files changed, 191 insertions(+), 180 deletions(-) create mode 100644 src/upb_varint.h delete mode 100644 src/upb_varint_decoder.h diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 0ce18e7..d952954 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -9,7 +9,7 @@ #include #include #include "upb_decoder.h" -#include "upb_varint_decoder.h" +#include "upb_varint.h" #ifdef UPB_USE_JIT_X64 #define Dst_DECL upb_decoder *d diff --git a/src/upb_decoder_x86.dasc b/src/upb_decoder_x86.dasc index 4088eba..d02f7d1 100644 --- a/src/upb_decoder_x86.dasc +++ b/src/upb_decoder_x86.dasc @@ -249,28 +249,7 @@ void upb_reg_jit_gdb(upb_decoder *d) { |.endmacro #include -#include "upb_varint_decoder.h" - -static size_t upb_value_size(uint64_t val) { -#ifdef __GNUC__ - int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0. -#else - int high_bit = 0; - uint64_t tmp = val; - while(tmp >>= 1) high_bit++; -#endif - return val == 0 ? 1 : high_bit / 8 + 1; -} - -static uint64_t upb_encode_varint(uint64_t val) -{ - uint64_t ret = 0; - for (int bitpos = 0; val; bitpos+=8, val >>=7) { - if (bitpos > 0) ret |= (1 << (bitpos-1)); - ret |= (val & 0x7f) << bitpos; - } - return ret; -} +#include "upb_varint.h" // PTR should point to the beginning of the tag. static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag, @@ -493,7 +472,7 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_handlers_msgent *m) { for(int i = 0; i < num_keys; i++) { uint32_t key = keys[i]; upb_handlers_fieldent *f = upb_inttable_lookup(&m->fieldtab, key); - uint32_t tag = upb_encode_varint(key); + uint32_t tag = upb_vencode(key); if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f); last_tag = tag; last_f = f; diff --git a/src/upb_varint.h b/src/upb_varint.h new file mode 100644 index 0000000..7ca93ec --- /dev/null +++ b/src/upb_varint.h @@ -0,0 +1,187 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2011 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * A number of routines for varint manipulation (we keep them all around to + * have multiple approaches available for benchmarking). + */ + +#ifndef UPB_VARINT_DECODER_H_ +#define UPB_VARINT_DECODER_H_ + +#include "upb.h" +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Decoding *******************************************************************/ + +// All decoding functions return this struct by value. +typedef struct { + const char *p; // NULL if the varint was unterminated. + uint64_t val; +} upb_decoderet; + +// A basic branch-based decoder, uses 32-bit values to get good performance +// on 32-bit architectures (but performs well on 64-bits also). +INLINE upb_decoderet upb_vdecode_branch32(const char *p) { + upb_decoderet r = {NULL, 0}; + uint32_t low, high = 0; + uint32_t b; + b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 28; + high = (b & 0x7f) >> 4; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 3; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done; + return r; + +done: + r.val = ((uint64_t)high << 32) | low; + r.p = p; + return r; +} + +// Like the previous, but uses 64-bit values. +INLINE upb_decoderet upb_vdecode_branch64(const char *p) { + uint64_t val; + uint64_t b; + upb_decoderet r = {(void*)0, 0}; + b = *(p++); val = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7f) << 28; if(!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7f) << 35; if(!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7f) << 42; if(!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7f) << 49; if(!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7f) << 56; if(!(b & 0x80)) goto done; + b = *(p++); val |= (b & 0x7f) << 63; if(!(b & 0x80)) goto done; + return r; + +done: + r.val = val; + r.p = p; + return r; +} + +// Given an encoded varint v, returns an integer with a single bit set that +// indicates the end of the varint. Subtracting one from this value will +// yield a mask that leaves only bits that are part of the varint. Returns +// 0 if the varint is unterminated. +INLINE uint64_t upb_get_vstopbit(uint64_t v) { + uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL; + return ~cbits & (cbits+1); +} +INLINE uint64_t upb_get_vmask(uint64_t v) { return upb_get_vstopbit(v) - 1; } + +// Decodes a varint of at most 8 bytes without branching (except for error). +INLINE upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) { + uint64_t b; + memcpy(&b, r.p, sizeof(b)); + uint64_t stop_bit = upb_get_vstopbit(b); + b &= (stop_bit - 1); + b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f); + b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff); + b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff); + if (stop_bit == 0) { + // Error: unterminated varint. + upb_decoderet err_r = {(void*)0, 0}; + return err_r; + } + upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8), + r.val | (b << 14)}; + return my_r; +} + +// Another implementation of the previous. +INLINE upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) { + uint64_t b; + memcpy(&b, r.p, sizeof(b)); + uint64_t stop_bit = upb_get_vstopbit(b); + b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1); + b += b & 0x007f007f007f007fULL; + b += 3 * (b & 0x0000ffff0000ffffULL); + b += 15 * (b & 0x00000000ffffffffULL); + if (stop_bit == 0) { + // Error: unterminated varint. + upb_decoderet err_r = {(void*)0, 0}; + return err_r; + } + upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8), + r.val | (b << 7)}; + return my_r; +} + +// Template for a function that checks the first two bytes with branching +// and dispatches 2-10 bytes with a separate function. +#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \ +INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *p) { \ + uint64_t b = 0; \ + upb_decoderet r = {p, 0}; \ + memcpy(&b, r.p, 2); \ + if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; } \ + r.val = (b & 0x7f) | ((b & 0x7f00) >> 1); \ + r.p = p + 2; \ + if ((b & 0x8000) == 0) return r; \ + return decode_max8_function(r); \ +} + +UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright); +UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino); +#undef UPB_VARINT_DECODER_CHECK2 + +// Our canonical functions for decoding varints, based on the currently +// favored best-performing implementations. +INLINE upb_decoderet upb_vdecode_fast(const char *p) { + // Use nobranch2 on 64-bit, branch32 on 32-bit. + if (sizeof(long) == 8) + return upb_vdecode_check2_massimino(p); + else + return upb_vdecode_branch32(p); +} + +INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) { + return upb_vdecode_max8_massimino(r); +} + + +/* Encoding *******************************************************************/ + +INLINE size_t upb_value_size(uint64_t val) { +#ifdef __GNUC__ + int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0. +#else + int high_bit = 0; + uint64_t tmp = val; + while(tmp >>= 1) high_bit++; +#endif + return val == 0 ? 1 : high_bit / 8 + 1; +} + +// Currently only works with 32-bit varints. +INLINE uint64_t upb_vencode(uint32_t val) { + uint64_t ret = 0; + for (int bitpos = 0; val; bitpos+=8, val >>=7) { + if (bitpos > 0) ret |= (1 << (bitpos-1)); + ret |= (val & 0x7f) << bitpos; + } + return ret; +} + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_VARINT_DECODER_H_ */ diff --git a/src/upb_varint_decoder.h b/src/upb_varint_decoder.h deleted file mode 100644 index d7af90a..0000000 --- a/src/upb_varint_decoder.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2011 Google Inc. See LICENSE for details. - * Author: Josh Haberman - * - * A number of routines for varint decoding (we keep them all around to have - * multiple approaches available for benchmarking). All of these functions - * require the buffer to have at least 10 bytes available; if we don't know - * for sure that there are 10 bytes, then there is only one viable option - * (branching on every byte). - */ - -#ifndef UPB_VARINT_DECODER_H_ -#define UPB_VARINT_DECODER_H_ - -#include "upb.h" -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -// All decoding functions return this struct by value. -typedef struct { - const char *p; // NULL if the varint was unterminated. - uint64_t val; -} upb_decoderet; - -// A basic branch-based decoder, uses 32-bit values to get good performance -// on 32-bit architectures (but performs well on 64-bits also). -INLINE upb_decoderet upb_vdecode_branch32(const char *p) { - upb_decoderet r = {NULL, 0}; - uint32_t low, high = 0; - uint32_t b; - b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 28; - high = (b & 0x7f) >> 4; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 3; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done; - return r; - -done: - r.val = ((uint64_t)high << 32) | low; - r.p = p; - return r; -} - -// Like the previous, but uses 64-bit values. -INLINE upb_decoderet upb_vdecode_branch64(const char *p) { - uint64_t val; - uint64_t b; - upb_decoderet r = {(void*)0, 0}; - b = *(p++); val = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 28; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 35; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 42; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 49; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 56; if(!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7f) << 63; if(!(b & 0x80)) goto done; - return r; - -done: - r.val = val; - r.p = p; - return r; -} - -// Decodes a varint of at most 8 bytes without branching (except for error). -INLINE upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) { - uint64_t b; - memcpy(&b, r.p, sizeof(b)); - uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL; - uint64_t stop_bit = ~cbits & (cbits+1); - b &= (stop_bit - 1); - b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f); - b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff); - b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff); - if (stop_bit == 0) { - // Error: unterminated varint. - upb_decoderet err_r = {(void*)0, 0}; - return err_r; - } - upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8), - r.val | (b << 14)}; - return my_r; -} - -// Another implementation of the previous. -INLINE upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) { - uint64_t b; - memcpy(&b, r.p, sizeof(b)); - uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL; - uint64_t stop_bit = ~cbits & (cbits + 1); - b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1); - b += b & 0x007f007f007f007fULL; - b += 3 * (b & 0x0000ffff0000ffffULL); - b += 15 * (b & 0x00000000ffffffffULL); - if (stop_bit == 0) { - // Error: unterminated varint. - upb_decoderet err_r = {(void*)0, 0}; - return err_r; - } - upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8), - r.val | (b << 7)}; - return my_r; -} - -// Template for a function that checks the first two bytes with branching -// and dispatches 2-10 bytes with a separate function. -#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \ -INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *p) { \ - uint64_t b = 0; \ - upb_decoderet r = {p, 0}; \ - memcpy(&b, r.p, 2); \ - if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; } \ - r.val = (b & 0x7f) | ((b & 0x7f00) >> 1); \ - r.p = p + 2; \ - if ((b & 0x8000) == 0) return r; \ - return decode_max8_function(r); \ -} - -UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright); -UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino); -#undef UPB_VARINT_DECODER_CHECK2 - -// Our canonical functions for decoding varints, based on the currently -// favored best-performing implementations. -INLINE upb_decoderet upb_vdecode_fast(const char *p) { - // Use nobranch2 on 64-bit, branch32 on 32-bit. - if (sizeof(long) == 8) - return upb_vdecode_check2_massimino(p); - else - return upb_vdecode_branch32(p); -} - -INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) { - return upb_vdecode_max8_massimino(r); -} - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_VARINT_DECODER_H_ */ diff --git a/tests/test_varint.c b/tests/test_varint.c index f0a8993..9790295 100644 --- a/tests/test_varint.c +++ b/tests/test_varint.c @@ -4,7 +4,7 @@ * Copyright (c) 2011 Google Inc. See LICENSE for details. */ -#include "upb_varint_decoder.h" +#include "upb_varint.h" #include "upb_test.h" static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) { -- cgit v1.2.3