summaryrefslogtreecommitdiff
path: root/upb/table.c
diff options
context:
space:
mode:
authorJosh Haberman <haberman@google.com>2013-02-15 16:27:18 -0800
committerJosh Haberman <haberman@google.com>2013-02-15 16:27:18 -0800
commit7d3e2bd2c4cfd1296d1d6f996d7548de26540d41 (patch)
treeb4b35967b3322c65cfb1a32220e8718de09d85fc /upb/table.c
parentea198bdcf947ba4bd51474bdd4f7b82b5e4cf41d (diff)
Sync with 8 months of Google-internal development.
Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink).
Diffstat (limited to 'upb/table.c')
-rw-r--r--upb/table.c208
1 files changed, 148 insertions, 60 deletions
diff --git a/upb/table.c b/upb/table.c
index 1cf944a..21457a0 100644
--- a/upb/table.c
+++ b/upb/table.c
@@ -5,14 +5,10 @@
* Author: Josh Haberman <jhaberman@gmail.com>
*
* Implementation is heavily inspired by Lua's ltable.c.
- *
- * TODO: for table iteration we use (array - 1) in several places; is this
- * undefined behavior? If so find a better solution.
*/
#include "upb/table.h"
-#include <assert.h>
#include <stdlib.h>
#include <string.h>
@@ -35,47 +31,56 @@ int upb_log2(uint64_t v) {
return UPB_MIN(UPB_MAXARRSIZE, ret);
}
+char *upb_strdup(const char *s) {
+ size_t n = strlen(s) + 1;
+ char *p = malloc(n);
+ if (p) memcpy(p, s, n);
+ return p;
+}
+
static upb_tabkey upb_strkey(const char *str) {
upb_tabkey k;
k.str = (char*)str;
return k;
}
-static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
-typedef upb_tabent *upb_hashfunc_t(const upb_table *t, upb_tabkey key);
+typedef const upb_tabent *upb_hashfunc_t(const upb_table *t, upb_tabkey key);
typedef bool upb_eqlfunc_t(upb_tabkey k1, upb_tabkey k2);
/* Base table (shared code) ***************************************************/
-static size_t upb_table_size(const upb_table *t) { return 1 << t->size_lg2; }
-
static bool upb_table_isfull(upb_table *t) {
return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD;
}
-static bool upb_table_init(upb_table *t, uint8_t size_lg2) {
+static bool upb_table_init(upb_table *t, upb_ctype_t type, uint8_t size_lg2) {
t->count = 0;
+ t->type = type;
t->size_lg2 = size_lg2;
+ t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
size_t bytes = upb_table_size(t) * sizeof(upb_tabent);
- t->mask = upb_table_size(t) - 1;
- t->entries = malloc(bytes);
- if (!t->entries) return false;
- memset(t->entries, 0, bytes);
+ if (bytes > 0) {
+ t->entries = malloc(bytes);
+ if (!t->entries) return false;
+ memset((void*)t->entries, 0, bytes);
+ } else {
+ t->entries = NULL;
+ }
return true;
}
-static void upb_table_uninit(upb_table *t) { free(t->entries); }
-
-static bool upb_tabent_isempty(const upb_tabent *e) { return e->key.num == 0; }
+static void upb_table_uninit(upb_table *t) { free((void*)t->entries); }
-static upb_tabent *upb_table_emptyent(const upb_table *t) {
- upb_tabent *e = t->entries + upb_table_size(t);
+static upb_tabent *upb_table_emptyent(upb_table *t) {
+ upb_tabent *e = (upb_tabent*)t->entries + upb_table_size(t);
while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
}
-static upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key,
- upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
- upb_tabent *e = hash(t, key);
+static const upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key,
+ upb_hashfunc_t *hash,
+ upb_eqlfunc_t *eql) {
+ if (t->size_lg2 == 0) return NULL;
+ const upb_tabent *e = hash(t, key);
if (upb_tabent_isempty(e)) return NULL;
while (1) {
if (eql(e->key, key)) return &e->val;
@@ -86,14 +91,19 @@ static upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key,
// The given key must not already exist in the table.
static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val,
upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
- (void)eql;
assert(upb_table_lookup(t, key, hash, eql) == NULL);
+ assert(val.type == t->type);
t->count++;
- upb_tabent *mainpos_e = hash(t, key);
+ upb_tabent *mainpos_e = (upb_tabent*)hash(t, key);
upb_tabent *our_e = mainpos_e;
- if (!upb_tabent_isempty(mainpos_e)) { // Collision.
+ if (upb_tabent_isempty(mainpos_e)) {
+ // Our main position is empty; use it.
+ our_e->next = NULL;
+ } else {
+ // Collision.
upb_tabent *new_e = upb_table_emptyent(t);
- upb_tabent *chain = hash(t, mainpos_e->key); // Head of collider's chain.
+ // Head of collider's chain.
+ upb_tabent *chain = (upb_tabent*)hash(t, mainpos_e->key);
if (chain == mainpos_e) {
// Existing ent is in its main posisiton (it has the same hash as us, and
// is the head of our chain). Insert to new ent and append to this chain.
@@ -105,7 +115,10 @@ static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val,
// chain). This implies that no existing ent in the table has our hash.
// Evict it (updating its chain) and use its ent for head of our chain.
*new_e = *mainpos_e; // copies next.
- while (chain->next != mainpos_e) chain = chain->next;
+ while (chain->next != mainpos_e) {
+ chain = (upb_tabent*)chain->next;
+ assert(chain);
+ }
chain->next = new_e;
our_e = mainpos_e;
our_e->next = NULL;
@@ -117,27 +130,35 @@ static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val,
}
static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val,
+ upb_tabkey *removed,
upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
- upb_tabent *chain = hash(t, key);
+ upb_tabent *chain = (upb_tabent*)hash(t, key);
+ if (upb_tabent_isempty(chain)) return false;
if (eql(chain->key, key)) {
+ // Element to remove is at the head of its chain.
t->count--;
if (val) *val = chain->val;
if (chain->next) {
- upb_tabent *move = chain->next;
+ upb_tabent *move = (upb_tabent*)chain->next;
*chain = *move;
+ *removed = move->key;
move->key.num = 0; // Make the slot empty.
} else {
+ *removed = chain->key;
chain->key.num = 0; // Make the slot empty.
}
return true;
} else {
+ // Element to remove is either in a non-head position or not in the table.
while (chain->next && !eql(chain->next->key, key))
- chain = chain->next;
+ chain = (upb_tabent*)chain->next;
if (chain->next) {
// Found element to remove.
if (val) *val = chain->next->val;
- chain->next->key.num = 0;
- chain->next = chain->next->next;
+ upb_tabent *remove = (upb_tabent*)chain->next;
+ *removed = remove->key;
+ remove->key.num = 0;
+ chain->next = remove->next;
t->count--;
return true;
} else {
@@ -146,13 +167,16 @@ static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val,
}
}
-static upb_tabent *upb_table_next(const upb_table *t, upb_tabent *e) {
- upb_tabent *end = t->entries + upb_table_size(t);
+static const upb_tabent *upb_table_next(const upb_table *t,
+ const upb_tabent *e) {
+ const upb_tabent *end = t->entries + upb_table_size(t);
do { if (++e == end) return NULL; } while(e->key.num == 0);
return e;
}
-static upb_tabent *upb_table_begin(const upb_table *t) {
+// TODO: is calculating t->entries - 1 undefined behavior? If so find a better
+// solution.
+static const upb_tabent *upb_table_begin(const upb_table *t) {
return upb_table_next(t, t->entries - 1);
}
@@ -161,7 +185,7 @@ static upb_tabent *upb_table_begin(const upb_table *t) {
// A simple "subclass" of upb_table that only adds a hash function for strings.
-static upb_tabent *upb_strhash(const upb_table *t, upb_tabkey key) {
+static const upb_tabent *upb_strhash(const upb_table *t, upb_tabkey key) {
// Could avoid the strlen() by using a hash function that terminates on NULL.
return t->entries + (MurmurHash2(key.str, strlen(key.str), 0) & t->mask);
}
@@ -170,11 +194,13 @@ static bool upb_streql(upb_tabkey k1, upb_tabkey k2) {
return strcmp(k1.str, k2.str) == 0;
}
-bool upb_strtable_init(upb_strtable *t) { return upb_table_init(&t->t, 4); }
+bool upb_strtable_init(upb_strtable *t, upb_ctype_t type) {
+ return upb_table_init(&t->t, type, 2);
+}
void upb_strtable_uninit(upb_strtable *t) {
for (size_t i = 0; i < upb_table_size(&t->t); i++)
- free(t->t.entries[i].key.str);
+ free((void*)t->t.entries[i].key.str);
upb_table_uninit(&t->t);
}
@@ -182,7 +208,8 @@ bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) {
if (upb_table_isfull(&t->t)) {
// Need to resize. New table of double the size, add old elements to it.
upb_strtable new_table;
- if (!upb_table_init(&new_table.t, t->t.size_lg2 + 1)) return false;
+ if (!upb_table_init(&new_table.t, t->t.type, t->t.size_lg2 + 1))
+ return false;
upb_strtable_iter i;
upb_strtable_begin(&i, t);
for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
@@ -192,15 +219,23 @@ bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) {
upb_strtable_uninit(t);
*t = new_table;
}
- if ((k = strdup(k)) == NULL) return false;
+ if ((k = upb_strdup(k)) == NULL) return false;
upb_table_insert(&t->t, upb_strkey(k), v, &upb_strhash, &upb_streql);
return true;
}
-upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key) {
+const upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key) {
return upb_table_lookup(&t->t, upb_strkey(key), &upb_strhash, &upb_streql);
}
+bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val) {
+ upb_tabkey removed;
+ bool found = upb_table_remove(
+ &t->t, upb_strkey(key), val, &removed, &upb_strhash, &upb_streql);
+ if (found) free((void*)removed.str);
+ return found;
+}
+
void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
i->t = t;
i->e = upb_table_begin(&t->t);
@@ -224,8 +259,9 @@ size_t upb_inttable_count(const upb_inttable *t) {
return t->t.count + t->array_count;
}
-bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2) {
- if (!upb_table_init(&t->t, hsize_lg2)) return false;
+bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t type,
+ size_t asize, int hsize_lg2) {
+ if (!upb_table_init(&t->t, type, hsize_lg2)) return false;
// Always make the array part at least 1 long, so that we know key 0
// won't be in the hash part, which simplifies things.
t->array_size = UPB_MAX(1, asize);
@@ -236,17 +272,32 @@ bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2) {
upb_table_uninit(&t->t);
return false;
}
- memset(t->array, 0xff, array_bytes);
+ memset((void*)t->array, 0xff, array_bytes);
return true;
}
-bool upb_inttable_init(upb_inttable *t) {
- return upb_inttable_sizedinit(t, 0, 4);
+bool upb_inttable_init(upb_inttable *t, upb_ctype_t type) {
+ return upb_inttable_sizedinit(t, type, 0, 4);
}
void upb_inttable_uninit(upb_inttable *t) {
upb_table_uninit(&t->t);
- free(t->array);
+ free((void*)t->array);
+}
+
+static void upb_inttable_check(upb_inttable *t) {
+ UPB_UNUSED(t);
+#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
+ // This check is very expensive (makes inserts/deletes O(N)).
+ size_t count = 0;
+ upb_inttable_iter i;
+ upb_inttable_begin(&i, t);
+ for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
+ const upb_value *v = upb_inttable_lookup(t, upb_inttable_iter_key(&i));
+ assert(v);
+ }
+ assert(count == upb_inttable_count(t));
+#endif
}
bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
@@ -254,45 +305,78 @@ bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
if (key < t->array_size) {
assert(!upb_arrhas(t->array[key]));
t->array_count++;
- t->array[key] = val;
+ ((upb_value*)t->array)[key] = val;
} else {
if (upb_table_isfull(&t->t)) {
// Need to resize the hash part, but we re-use the array part.
upb_table new_table;
- if (!upb_table_init(&new_table, t->t.size_lg2 + 1)) return false;
- upb_tabent *e;
+ if (!upb_table_init(&new_table, t->t.type, t->t.size_lg2 + 1))
+ return false;
+ const upb_tabent *e;
for (e = upb_table_begin(&t->t); e; e = upb_table_next(&t->t, e))
upb_table_insert(&new_table, e->key, e->val, &upb_inthash, &upb_inteql);
+
+ assert(t->t.count == new_table.count);
+
upb_table_uninit(&t->t);
t->t = new_table;
}
upb_table_insert(&t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql);
}
+ upb_inttable_check(t);
return true;
}
-upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key) {
+const upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key) {
if (key < t->array_size) {
- upb_value *v = &t->array[key];
+ const upb_value *v = &t->array[key];
return upb_arrhas(*v) ? v : NULL;
}
return upb_table_lookup(&t->t, upb_intkey(key), &upb_inthash, &upb_inteql);
}
bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
+ bool success;
if (key < t->array_size) {
if (upb_arrhas(t->array[key])) {
t->array_count--;
if (val) *val = t->array[key];
- t->array[key] = upb_value_uint64(-1);
- return true;
+ ((upb_value*)t->array)[key] = upb_value_uint64(-1);
+ success = true;
} else {
- return false;
+ success = false;
}
} else {
- return upb_table_remove(
- &t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql);
+ upb_tabkey removed;
+ success = upb_table_remove(
+ &t->t, upb_intkey(key), val, &removed, &upb_inthash, &upb_inteql);
}
+ upb_inttable_check(t);
+ return success;
+}
+
+bool upb_inttable_push(upb_inttable *t, upb_value val) {
+ return upb_inttable_insert(t, upb_inttable_count(t), val);
+}
+
+upb_value upb_inttable_pop(upb_inttable *t) {
+ upb_value val;
+ bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
+ UPB_ASSERT_VAR(ok, ok);
+ return val;
+}
+
+bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val) {
+ return upb_inttable_insert(t, (uintptr_t)key, val);
+}
+
+const upb_value *upb_inttable_lookupptr(const upb_inttable *t,
+ const void *key) {
+ return upb_inttable_lookup(t, (uintptr_t)key);
+}
+
+bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
+ return upb_inttable_remove(t, (uintptr_t)key, val);
}
void upb_inttable_compact(upb_inttable *t) {
@@ -301,7 +385,10 @@ void upb_inttable_compact(upb_inttable *t) {
upb_inttable_iter i;
for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i))
counts[upb_log2(upb_inttable_iter_key(&i))]++;
- int count = upb_inttable_count(t);
+ // Int part must always be at least 1 entry large to catch lookups of key 0.
+ // Key 0 must always be in the array part because "0" in the hash part
+ // denotes an empty entry.
+ int count = UPB_MAX(upb_inttable_count(t), 1);
int size;
for (size = UPB_MAXARRSIZE; size > 1; size--) {
count -= counts[size];
@@ -311,7 +398,8 @@ void upb_inttable_compact(upb_inttable *t) {
// Insert all elements into new, perfectly-sized table.
upb_inttable new_table;
int hashsize = (upb_inttable_count(t) - count + 1) / MAX_LOAD;
- upb_inttable_sizedinit(&new_table, size, upb_log2(hashsize) + 1);
+
+ upb_inttable_sizedinit(&new_table, t->t.type, size, upb_log2(hashsize));
for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i))
upb_inttable_insert(
&new_table, upb_inttable_iter_key(&i), upb_inttable_iter_value(&i));
@@ -352,7 +440,7 @@ void upb_inttable_next(upb_inttable_iter *iter) {
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
-static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
+uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const uint32_t m = 0x5bd1e995;
@@ -403,7 +491,7 @@ static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
-static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
+uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
const uint32_t m = 0x5bd1e995;
const int32_t r = 24;
const uint8_t * data = (const uint8_t *)key;
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback