diff options
author | Josh Haberman <jhaberman@gmail.com> | 2013-10-24 12:43:19 -0700 |
---|---|---|
committer | Josh Haberman <jhaberman@gmail.com> | 2013-10-24 12:43:19 -0700 |
commit | 26d98ca94f2f049e8767b4a9a33d185a3d7ea0fd (patch) | |
tree | 340bcf495f06ed05c9f3fb423f210caf4edce2b1 /upb/table.c | |
parent | 61109fca1f967771c21dc7184aee35f3b439c577 (diff) |
Merge from Google-internal development:
- rewritten decoder; interpreted decoder is bytecode-based,
JIT decoder no longer falls back to the interpreter.
- C++ improvements: C++11-compatible iterators, upb::reffed_ptr
for RAII refcounting, better upcast/downcast support.
- removed the gross upb_value abstraction from public upb.h.
Diffstat (limited to 'upb/table.c')
-rw-r--r-- | upb/table.c | 146 |
1 files changed, 99 insertions, 47 deletions
diff --git a/upb/table.c b/upb/table.c index 40f841d..0b43fd9 100644 --- a/upb/table.c +++ b/upb/table.c @@ -7,23 +7,31 @@ * Implementation is heavily inspired by Lua's ltable.c. */ -#include "upb/table.h" +#include "upb/table.int.h" #include <stdlib.h> #include <string.h> #define UPB_MAXARRSIZE 16 // 64k. +// From Chromium. +#define ARRAY_SIZE(x) \ + ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x]))))) + static const double MAX_LOAD = 0.85; -// The minimum percentage of an array part that we will allow. This is a -// speed/memory-usage tradeoff (though it's not straightforward because of +// The minimum utilization of the array part of a mixed hash/array table. This +// is a speed/memory-usage tradeoff (though it's not straightforward because of // cache effects). The lower this is, the more memory we'll use. static const double MIN_DENSITY = 0.1; -int upb_log2(uint64_t v) { +bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; } + +int log2ceil(uint64_t v) { int ret = 0; + bool pow2 = is_pow2(v); while (v >>= 1) ret++; + ret = pow2 ? ret : ret + 1; // Ceiling. return UPB_MIN(UPB_MAXARRSIZE, ret); } @@ -49,9 +57,9 @@ static bool isfull(upb_table *t) { return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD; } -static bool init(upb_table *t, upb_ctype_t type, uint8_t size_lg2) { +static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2) { t->count = 0; - t->type = type; + t->ctype = ctype; t->size_lg2 = size_lg2; t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0; size_t bytes = upb_table_size(t) * sizeof(upb_tabent); @@ -88,7 +96,7 @@ static bool lookup(const upb_table *t, upb_tabkey key, upb_value *v, const upb_tabent *e = findentry(t, key, hash, eql); if (e) { if (v) { - _upb_value_setval(v, e->val, t->type); + _upb_value_setval(v, e->val, t->ctype); } return true; } else { @@ -100,7 +108,7 @@ static bool lookup(const upb_table *t, upb_tabkey key, upb_value *v, static void insert(upb_table *t, upb_tabkey key, upb_value val, hashfunc_t *hash, eqlfunc_t *eql) { assert(findentry(t, key, hash, eql) == NULL); - assert(val.type == t->type); + assert(val.ctype == t->ctype); t->count++; upb_tabent *mainpos_e = (upb_tabent*)hash(t, key); upb_tabent *our_e = mainpos_e; @@ -145,7 +153,7 @@ static bool rm(upb_table *t, upb_tabkey key, upb_value *val, // Element to remove is at the head of its chain. t->count--; if (val) { - _upb_value_setval(val, chain->val, t->type); + _upb_value_setval(val, chain->val, t->ctype); } if (chain->next) { upb_tabent *move = (upb_tabent*)chain->next; @@ -164,7 +172,7 @@ static bool rm(upb_table *t, upb_tabkey key, upb_value *val, if (chain->next) { // Found element to remove. if (val) { - _upb_value_setval(val, chain->next->val, t->type); + _upb_value_setval(val, chain->next->val, t->ctype); } upb_tabent *rm = (upb_tabent*)chain->next; if (removed) *removed = rm->key; @@ -204,8 +212,8 @@ static bool streql(upb_tabkey k1, upb_tabkey k2) { return strcmp(k1.str, k2.str) == 0; } -bool upb_strtable_init(upb_strtable *t, upb_ctype_t type) { - return init(&t->t, type, 2); +bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) { + return init(&t->t, ctype, 2); } void upb_strtable_uninit(upb_strtable *t) { @@ -218,7 +226,7 @@ bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) { if (isfull(&t->t)) { // Need to resize. New table of double the size, add old elements to it. upb_strtable new_table; - if (!init(&new_table.t, t->t.type, t->t.size_lg2 + 1)) + if (!init(&new_table.t, t->t.ctype, t->t.size_lg2 + 1)) return false; upb_strtable_iter i; upb_strtable_begin(&i, t); @@ -267,6 +275,21 @@ static bool inteql(upb_tabkey k1, upb_tabkey k2) { return k1.num == k2.num; } +static _upb_value *inttable_val(upb_inttable *t, uintptr_t key) { + if (key < t->array_size) { + return upb_arrhas(t->array[key]) ? (_upb_value*)&t->array[key] : NULL; + } else { + upb_tabent *e = + (upb_tabent*)findentry(&t->t, upb_intkey(key), &upb_inthash, &inteql); + return e ? &e->val : NULL; + } +} + +static const _upb_value *inttable_val_const(const upb_inttable *t, + uintptr_t key) { + return inttable_val((upb_inttable*)t, key); +} + size_t upb_inttable_count(const upb_inttable *t) { return t->t.count + t->array_count; } @@ -285,9 +308,9 @@ static void check(upb_inttable *t) { #endif } -bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t type, +bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype, size_t asize, int hsize_lg2) { - if (!init(&t->t, type, hsize_lg2)) return false; + if (!init(&t->t, ctype, hsize_lg2)) return false; // Always make the array part at least 1 long, so that we know key 0 // won't be in the hash part, which simplifies things. t->array_size = UPB_MAX(1, asize); @@ -303,8 +326,8 @@ bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t type, return true; } -bool upb_inttable_init(upb_inttable *t, upb_ctype_t type) { - return upb_inttable_sizedinit(t, type, 0, 4); +bool upb_inttable_init(upb_inttable *t, upb_ctype_t ctype) { + return upb_inttable_sizedinit(t, ctype, 0, 4); } void upb_inttable_uninit(upb_inttable *t) { @@ -322,12 +345,12 @@ bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) { if (isfull(&t->t)) { // Need to resize the hash part, but we re-use the array part. upb_table new_table; - if (!init(&new_table, t->t.type, t->t.size_lg2 + 1)) + if (!init(&new_table, t->t.ctype, t->t.size_lg2 + 1)) return false; const upb_tabent *e; for (e = begin(&t->t); e; e = next(&t->t, e)) { upb_value v; - _upb_value_setval(&v, e->val, t->t.type); + _upb_value_setval(&v, e->val, t->t.ctype); insert(&new_table, e->key, v, &upb_inthash, &inteql); } @@ -343,15 +366,17 @@ bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) { } bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) { - if (key < t->array_size) { - bool ret = upb_arrhas(t->array[key]); - if (ret && v) { - _upb_value_setval(v, t->array[key], t->t.type); - } - return ret; - } else { - return lookup(&t->t, upb_intkey(key), v, &upb_inthash, &inteql); - } + const _upb_value *table_v = inttable_val_const(t, key); + if (!table_v) return false; + if (v) _upb_value_setval(v, *table_v, t->t.ctype); + return true; +} + +bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) { + _upb_value *table_v = inttable_val(t, key); + if (!table_v) return false; + *table_v = val.val; + return true; } bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { @@ -360,7 +385,7 @@ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { if (upb_arrhas(t->array[key])) { t->array_count--; if (val) { - _upb_value_setval(val, t->array[key], t->t.type); + _upb_value_setval(val, t->array[key], t->t.ctype); } ((upb_value*)t->array)[key] = upb_value_uint64(-1); success = true; @@ -400,31 +425,58 @@ bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) { } void upb_inttable_compact(upb_inttable *t) { - // Find the largest power of two that satisfies the MIN_DENSITY definition. + // Create a power-of-two histogram of the table keys. int counts[UPB_MAXARRSIZE + 1] = {0}; + uintptr_t max_key = 0; upb_inttable_iter i; - for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i)) - counts[upb_log2(upb_inttable_iter_key(&i))]++; - // Int part must always be at least 1 entry large to catch lookups of key 0. - // Key 0 must always be in the array part because "0" in the hash part - // denotes an empty entry. - int count = UPB_MAX(upb_inttable_count(t), 1); - int size; - for (size = UPB_MAXARRSIZE; size > 1; size--) { - count -= counts[size]; - if (count >= (1 << size) * MIN_DENSITY) break; + upb_inttable_begin(&i, t); + for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { + uintptr_t key = upb_inttable_iter_key(&i); + if (key > max_key) { + max_key = key; + } + counts[log2ceil(key)]++; } + int arr_size; + int arr_count = upb_inttable_count(t); + + if (upb_inttable_count(t) >= max_key * MIN_DENSITY) { + // We can put 100% of the entries in the array part. + arr_size = max_key + 1; + } else { + // Find the largest power of two that satisfies the MIN_DENSITY definition. + for (int size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 1; size_lg2--) { + arr_size = 1 << size_lg2; + arr_count -= counts[size_lg2]; + if (arr_count >= arr_size * MIN_DENSITY) { + break; + } + } + } + + // Array part must always be at least 1 entry large to catch lookups of key + // 0. Key 0 must always be in the array part because "0" in the hash part + // denotes an empty entry. + arr_size = UPB_MAX(arr_size, 1); + // Insert all elements into new, perfectly-sized table. - upb_inttable new_table; - int hashsize = (upb_inttable_count(t) - count + 1) / MAX_LOAD; + int hash_count = upb_inttable_count(t) - arr_count; + int hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0; + int hashsize_lg2 = log2ceil(hash_size); + assert(hash_count >= 0); - upb_inttable_sizedinit(&new_table, t->t.type, size, upb_log2(hashsize)); - for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i)) - upb_inttable_insert( - &new_table, upb_inttable_iter_key(&i), upb_inttable_iter_value(&i)); + upb_inttable new_t; + upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2); + upb_inttable_begin(&i, t); + for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { + uintptr_t k = upb_inttable_iter_key(&i); + upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i)); + } + assert(new_t.array_size == arr_size); + assert(new_t.t.size_lg2 == hashsize_lg2); upb_inttable_uninit(t); - *t = new_table; + *t = new_t; } void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) { |