summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2011-02-10 23:37:47 -0800
committerJoshua Haberman <joshua@reverberate.org>2011-02-10 23:37:47 -0800
commitee84a7da167d2211066c4a663d41febdf9544438 (patch)
treee5e8eaae95ae0e81a027077585bea658aa57235f
parentbd1dfd397e02546b4d5a45fac76f3fb1bf685172 (diff)
Add (but do not activate) an SSE varint decoder.
-rw-r--r--stream/upb_decoder.c56
-rw-r--r--tests/tests.c8
2 files changed, 55 insertions, 9 deletions
diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c
index 7da8993..4a43c4b 100644
--- a/stream/upb_decoder.c
+++ b/stream/upb_decoder.c
@@ -16,8 +16,47 @@
// The key fast-path varint-decoding routine. Here we can assume we have at
// least UPB_MAX_VARINT_ENCODED_SIZE bytes available. There are a lot of
// possibilities for optimization/experimentation here.
-INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val,
- upb_status *status) {
+
+#ifdef USE_SSE_VARINT_DECODING
+#include <emmintrin.h>
+
+// This works, but is empirically slower than the branchy version below. Why?
+// Most varints are very short. Next step: use branches for 1/2-byte varints,
+// but use the SSE version for 3-10 byte varints.
+INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) {
+ const char *p = *ptr;
+ __m128i val128 = _mm_loadu_si128((void*)p);
+ unsigned int continuation_bits = _mm_movemask_epi8(val128);
+ unsigned int bsr_val = ~continuation_bits;
+ int varint_length = __builtin_ffs(bsr_val);
+ if (varint_length > 10) {
+ upb_seterr(s, UPB_ERROR, "Unterminated varint");
+ return false;
+ }
+
+ uint16_t twob;
+ memcpy(&twob, p, 2);
+ twob &= 0x7f7f;
+ twob = ((twob & 0xff00) >> 1) | (twob & 0xff);
+
+ uint64_t eightb;
+ memcpy(&eightb, p + 2, 8);
+ eightb &= 0x7f7f7f7f7f7f7f7f;
+ eightb = ((eightb & 0xff00ff00ff00ff00) >> 1) | (eightb & 0x00ff00ff00ff00ff);
+ eightb = ((eightb & 0xffff0000ffff0000) >> 2) | (eightb & 0x0000ffff0000ffff);
+ eightb = ((eightb & 0xffffffff00000000) >> 4) | (eightb & 0x00000000ffffffff);
+
+ uint64_t all_bits = twob | (eightb << 14);
+ int varint_bits = varint_length * 7;
+ uint64_t mask = varint_bits == 70 ? (uint64_t)-1 : (1ULL << (varint_bits)) - 1;
+ *val = all_bits & mask;
+ *ptr = p + varint_length;
+ return true;
+}
+
+#else
+
+INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) {
const char *p = *ptr;
uint32_t low, high = 0;
uint32_t b;
@@ -33,14 +72,17 @@ INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val,
b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done;
b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done;
- upb_seterr(status, UPB_ERROR, "Unterminated varint");
+ upb_seterr(s, UPB_ERROR, "Unterminated varint");
return false;
+
done:
- *ptr = p;
*val = ((uint64_t)high << 32) | low;
+ *ptr = p;
return true;
}
+#endif
+
/* Decoding/Buffering of individual values ************************************/
@@ -163,7 +205,7 @@ done:
}
INLINE bool upb_decode_varint(upb_decoder *d, upb_dstate *s, upb_value *val) {
- if (s->len >= UPB_MAX_VARINT_ENCODED_SIZE) {
+ if (s->len >= 16) {
// Common (fast) case.
uint64_t val64;
const char *p = s->ptr;
@@ -315,7 +357,9 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
CHECK_FLOW(upb_dispatch_unknownval(&d->dispatcher, tag.field_number, val));
} else if (!upb_check_type(tag.wire_type, f->type)) {
// TODO: put more details in this error msg.
- upb_seterr(status, UPB_ERROR, "Field had incorrect type.");
+ upb_seterr(status, UPB_ERROR, "Field had incorrect type, name: " UPB_STRFMT, UPB_STRARG(f->name));
+ upb_printerr(status);
+ *(int*)0 = 0;
goto err;
}
diff --git a/tests/tests.c b/tests/tests.c
index 17e00f3..c691b18 100644
--- a/tests/tests.c
+++ b/tests/tests.c
@@ -17,17 +17,18 @@ static void test_get_v_uint64_t()
{
#define TEST(name, bytes, val) {\
upb_status status = UPB_STATUS_INIT; \
- const char name[] = bytes; \
+ const char name[] = bytes "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" ; \
const char *name ## _buf = name; \
uint64_t name ## _val = 0; \
upb_decode_varint_fast(&name ## _buf, &name ## _val, &status); \
ASSERT(upb_ok(&status)); \
ASSERT(name ## _val == val); \
- ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \
+ ASSERT(name ## _buf == name + sizeof(name) - 16); /* - 1 for NULL */ \
}
TEST(zero, "\x00", 0ULL);
TEST(one, "\x01", 1ULL);
+ TEST(twob, "\x81\x14", 0xa01ULL);
TEST(twob, "\x81\x03", 0x181ULL);
TEST(threeb, "\x81\x83\x07", 0x1c181ULL);
TEST(fourb, "\x81\x83\x87\x0f", 0x1e1c181ULL);
@@ -39,7 +40,7 @@ static void test_get_v_uint64_t()
TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0x8303fdf9f1e1c181ULL);
#undef TEST
- char twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
+ char twelvebyte[16] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
const char *twelvebyte_buf = twelvebyte;
uint64_t twelvebyte_val = 0;
upb_status status = UPB_STATUS_INIT;
@@ -214,6 +215,7 @@ static void test_upb_symtab() {
}
upb_status status = UPB_STATUS_INIT;
upb_parsedesc(s, descriptor, &status);
+ upb_printerr(&status);
ASSERT(upb_ok(&status));
upb_status_uninit(&status);
upb_string_unref(descriptor);
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback