From 9c21992286a5b6a3559f0ae9e191b417a8e4c06c Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 20 Jun 2009 22:46:40 -0700 Subject: Added MurmurHash for string hashing (not used yet). --- upb_table.c | 175 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- upb_table.h | 8 +++ 2 files changed, 182 insertions(+), 1 deletion(-) diff --git a/upb_table.c b/upb_table.c index d28e079..00adc01 100644 --- a/upb_table.c +++ b/upb_table.c @@ -10,6 +10,180 @@ #include #include +#ifdef UPB_UNALIGNED_READS_OK +//----------------------------------------------------------------------------- +// MurmurHash2, by Austin Appleby +// Reformatted and C99-ified by Joshua Haberman. +// Note - This code makes a few assumptions about how your machine behaves - +// 1. We can read a 4-byte value from any address without crashing +// 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t +// And it has a few limitations - +// 1. It will not work incrementally. +// 2. It will not produce the same results on little-endian and big-endian +// machines. +static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) +{ + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + const uint32_t m = 0x5bd1e995; + const int32_t r = 24; + + // Initialize the hash to a 'random' value + uint32_t h = seed ^ len; + + // Mix 4 bytes at a time into the hash + const uint8_t * data = (const uint8_t *)key; + while(len >= 4) { + uint32_t k = *(uint32_t *)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + switch(len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; h *= m; + }; + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +#else // !UPB_UNALIGNED_READS_OK + +//----------------------------------------------------------------------------- +// MurmurHashAligned2, by Austin Appleby +// Same algorithm as MurmurHash2, but only does aligned reads - should be safer +// on certain platforms. +// Performance will be lower than MurmurHash2 + +#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } + +static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) +{ + const uint32_t m = 0x5bd1e995; + const int32_t r = 24; + const uint8_t * data = (const uint8_t *)key; + uint32_t h = seed ^ len; + uint8_t align = (uintptr_t)data & 3; + + if(align && (len >= 4)) { + // Pre-load the temp registers + uint32_t t = 0, d = 0; + + switch(align) { + case 1: t |= data[2] << 16; + case 2: t |= data[1] << 8; + case 3: t |= data[0]; + } + + t <<= (8 * align); + + data += 4-align; + len -= 4-align; + + int32_t sl = 8 * (4-align); + int32_t sr = 8 * align; + + // Mix + + while(len >= 4) { + d = *(uint32_t *)data; + t = (t >> sr) | (d << sl); + + uint32_t k = t; + + MIX(h,k,m); + + t = d; + + data += 4; + len -= 4; + } + + // Handle leftover data in temp registers + + d = 0; + + if(len >= align) { + switch(align) { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + } + + uint32_t k = (t >> sr) | (d << sl); + MIX(h,k,m); + + data += align; + len -= align; + + //---------- + // Handle tail bytes + + switch(len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; h *= m; + }; + } else { + switch(len) { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + case 0: h ^= (t >> sr) | (d << sl); h *= m; + } + } + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } else { + while(len >= 4) { + uint32_t k = *(uint32_t *)data; + + MIX(h,k,m); + + data += 4; + len -= 4; + } + + //---------- + // Handle tail bytes + + switch(len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } +} +#undef MIX + +#endif // UPB_UNALIGNED_READS_OK + static int compare_entries(const void *f1, const void *f2) { return ((struct upb_inttable_entry*)f1)->key - @@ -120,4 +294,3 @@ void upb_inttable_free(struct upb_inttable *table) free(table->entries); } -/* Emit definition for inline functions. */ diff --git a/upb_table.h b/upb_table.h index 93945c7..2a8e4f2 100644 --- a/upb_table.h +++ b/upb_table.h @@ -2,6 +2,14 @@ * upb - a minimalist implementation of protocol buffers. * * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * This file defines very fast int->struct (inttable) and string->struct + * (strtable) hash tables. The struct can be of any size, and it is stored + * in the table itself, for cache-friendly performance. + * + * The table uses internal chaining with Brent's variation (inspired by the + * Lua implementation of hash tables). The hash function for strings is + * Austin Appleby's "MurmurHash." */ #ifndef UPB_TABLE_H_ -- cgit v1.2.3