Sync with 8 months of Google-internal development.

Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink).
author: Josh Haberman <haberman@google.com> 2013-02-15 16:27:18 -0800
committer: Josh Haberman <haberman@google.com> 2013-02-15 16:27:18 -0800
commit: 7d3e2bd2c4cfd1296d1d6f996d7548de26540d41 (patch)
tree: b4b35967b3322c65cfb1a32220e8718de09d85fc /upb/table.c
parent: ea198bdcf947ba4bd51474bdd4f7b82b5e4cf41d (diff)
1 files changed, 148 insertions, 60 deletions
diff --git a/upb/table.c b/upb/table.c
index 1cf944a..21457a0 100644
--- a/upb/table.c
+++ b/upb/table.c
@@ -5,14 +5,10 @@
  * Author: Josh Haberman <jhaberman@gmail.com>
  *
  * Implementation is heavily inspired by Lua's ltable.c.
- *
- * TODO: for table iteration we use (array - 1) in several places; is this
- * undefined behavior?  If so find a better solution.
  */
 
 #include "upb/table.h"
 
-#include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -35,47 +31,56 @@ int upb_log2(uint64_t v) {
   return UPB_MIN(UPB_MAXARRSIZE, ret);
 }
 
+char *upb_strdup(const char *s) {
+  size_t n = strlen(s) + 1;
+  char *p = malloc(n);
+  if (p) memcpy(p, s, n);
+  return p;
+}
+
 static upb_tabkey upb_strkey(const char *str) {
   upb_tabkey k;
   k.str = (char*)str;
   return k;
 }
 
-static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
-typedef upb_tabent *upb_hashfunc_t(const upb_table *t, upb_tabkey key);
+typedef const upb_tabent *upb_hashfunc_t(const upb_table *t, upb_tabkey key);
 typedef bool upb_eqlfunc_t(upb_tabkey k1, upb_tabkey k2);
 
 /* Base table (shared code) ***************************************************/
 
-static size_t upb_table_size(const upb_table *t) { return 1 << t->size_lg2; }
-
 static bool upb_table_isfull(upb_table *t) {
   return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD;
 }
 
-static bool upb_table_init(upb_table *t, uint8_t size_lg2) {
+static bool upb_table_init(upb_table *t, upb_ctype_t type, uint8_t size_lg2) {
   t->count = 0;
+  t->type = type;
   t->size_lg2 = size_lg2;
+  t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
   size_t bytes = upb_table_size(t) * sizeof(upb_tabent);
-  t->mask = upb_table_size(t) - 1;
-  t->entries = malloc(bytes);
-  if (!t->entries) return false;
-  memset(t->entries, 0, bytes);
+  if (bytes > 0) {
+    t->entries = malloc(bytes);
+    if (!t->entries) return false;
+    memset((void*)t->entries, 0, bytes);
+  } else {
+    t->entries = NULL;
+  }
   return true;
 }
 
-static void upb_table_uninit(upb_table *t) { free(t->entries); }
-
-static bool upb_tabent_isempty(const upb_tabent *e) { return e->key.num == 0; }
+static void upb_table_uninit(upb_table *t) { free((void*)t->entries); }
 
-static upb_tabent *upb_table_emptyent(const upb_table *t) {
-  upb_tabent *e = t->entries + upb_table_size(t);
+static upb_tabent *upb_table_emptyent(upb_table *t) {
+  upb_tabent *e = (upb_tabent*)t->entries + upb_table_size(t);
   while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
 }
 
-static upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key,
-                                   upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
-  upb_tabent *e = hash(t, key);
+static const upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key,
+                                         upb_hashfunc_t *hash,
+                                         upb_eqlfunc_t *eql) {
+  if (t->size_lg2 == 0) return NULL;
+  const upb_tabent *e = hash(t, key);
   if (upb_tabent_isempty(e)) return NULL;
   while (1) {
     if (eql(e->key, key)) return &e->val;
@@ -86,14 +91,19 @@ static upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key,
 // The given key must not already exist in the table.
 static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val,
                              upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
-  (void)eql;
   assert(upb_table_lookup(t, key, hash, eql) == NULL);
+  assert(val.type == t->type);
   t->count++;
-  upb_tabent *mainpos_e = hash(t, key);
+  upb_tabent *mainpos_e = (upb_tabent*)hash(t, key);
   upb_tabent *our_e = mainpos_e;
-  if (!upb_tabent_isempty(mainpos_e)) {  // Collision.
+  if (upb_tabent_isempty(mainpos_e)) {
+    // Our main position is empty; use it.
+    our_e->next = NULL;
+  } else {
+    // Collision.
     upb_tabent *new_e = upb_table_emptyent(t);
-    upb_tabent *chain = hash(t, mainpos_e->key);  // Head of collider's chain.
+    // Head of collider's chain.
+    upb_tabent *chain = (upb_tabent*)hash(t, mainpos_e->key);
     if (chain == mainpos_e) {
       // Existing ent is in its main posisiton (it has the same hash as us, and
       // is the head of our chain).  Insert to new ent and append to this chain.
@@ -105,7 +115,10 @@ static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val,
       // chain).  This implies that no existing ent in the table has our hash.
       // Evict it (updating its chain) and use its ent for head of our chain.
       *new_e = *mainpos_e;  // copies next.
-      while (chain->next != mainpos_e) chain = chain->next;
+      while (chain->next != mainpos_e) {
+        chain = (upb_tabent*)chain->next;
+        assert(chain);
+      }
       chain->next = new_e;
       our_e = mainpos_e;
       our_e->next = NULL;
@@ -117,27 +130,35 @@ static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val,
 }
 
 static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val,
+                             upb_tabkey *removed,
                              upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
-  upb_tabent *chain = hash(t, key);
+  upb_tabent *chain = (upb_tabent*)hash(t, key);
+  if (upb_tabent_isempty(chain)) return false;
   if (eql(chain->key, key)) {
+    // Element to remove is at the head of its chain.
     t->count--;
     if (val) *val = chain->val;
     if (chain->next) {
-      upb_tabent *move = chain->next;
+      upb_tabent *move = (upb_tabent*)chain->next;
       *chain = *move;
+      *removed = move->key;
       move->key.num = 0;  // Make the slot empty.
     } else {
+      *removed = chain->key;
       chain->key.num = 0;  // Make the slot empty.
     }
     return true;
   } else {
+    // Element to remove is either in a non-head position or not in the table.
     while (chain->next && !eql(chain->next->key, key))
-      chain = chain->next;
+      chain = (upb_tabent*)chain->next;
     if (chain->next) {
       // Found element to remove.
       if (val) *val = chain->next->val;
-      chain->next->key.num = 0;
-      chain->next = chain->next->next;
+      upb_tabent *remove = (upb_tabent*)chain->next;
+      *removed = remove->key;
+      remove->key.num = 0;
+      chain->next = remove->next;
       t->count--;
       return true;
     } else {
@@ -146,13 +167,16 @@ static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val,
   }
 }
 
-static upb_tabent *upb_table_next(const upb_table *t, upb_tabent *e) {
-  upb_tabent *end = t->entries + upb_table_size(t);
+static const upb_tabent *upb_table_next(const upb_table *t,
+                                        const upb_tabent *e) {
+  const upb_tabent *end = t->entries + upb_table_size(t);
   do { if (++e == end) return NULL; } while(e->key.num == 0);
   return e;
 }
 
-static upb_tabent *upb_table_begin(const upb_table *t) {
+// TODO: is calculating t->entries - 1 undefined behavior?  If so find a better
+// solution.
+static const upb_tabent *upb_table_begin(const upb_table *t) {
   return upb_table_next(t, t->entries - 1);
 }
 
@@ -161,7 +185,7 @@ static upb_tabent *upb_table_begin(const upb_table *t) {
 
 // A simple "subclass" of upb_table that only adds a hash function for strings.
 
-static upb_tabent *upb_strhash(const upb_table *t, upb_tabkey key) {
+static const upb_tabent *upb_strhash(const upb_table *t, upb_tabkey key) {
   // Could avoid the strlen() by using a hash function that terminates on NULL.
   return t->entries + (MurmurHash2(key.str, strlen(key.str), 0) & t->mask);
 }
@@ -170,11 +194,13 @@ static bool upb_streql(upb_tabkey k1, upb_tabkey k2) {
   return strcmp(k1.str, k2.str) == 0;
 }
 
-bool upb_strtable_init(upb_strtable *t) { return upb_table_init(&t->t, 4); }
+bool upb_strtable_init(upb_strtable *t, upb_ctype_t type) {
+  return upb_table_init(&t->t, type, 2);
+}
 
 void upb_strtable_uninit(upb_strtable *t) {
   for (size_t i = 0; i < upb_table_size(&t->t); i++)
-    free(t->t.entries[i].key.str);
+    free((void*)t->t.entries[i].key.str);
   upb_table_uninit(&t->t);
 }
 
@@ -182,7 +208,8 @@ bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) {
   if (upb_table_isfull(&t->t)) {
     // Need to resize.  New table of double the size, add old elements to it.
     upb_strtable new_table;
-    if (!upb_table_init(&new_table.t, t->t.size_lg2 + 1)) return false;
+    if (!upb_table_init(&new_table.t, t->t.type, t->t.size_lg2 + 1))
+      return false;
     upb_strtable_iter i;
     upb_strtable_begin(&i, t);
     for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
@@ -192,15 +219,23 @@ bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) {
     upb_strtable_uninit(t);
     *t = new_table;
   }
-  if ((k = strdup(k)) == NULL) return false;
+  if ((k = upb_strdup(k)) == NULL) return false;
   upb_table_insert(&t->t, upb_strkey(k), v, &upb_strhash, &upb_streql);
   return true;
 }
 
-upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key) {
+const upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key) {
   return upb_table_lookup(&t->t, upb_strkey(key), &upb_strhash, &upb_streql);
 }
 
+bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val) {
+  upb_tabkey removed;
+  bool found = upb_table_remove(
+      &t->t, upb_strkey(key), val, &removed, &upb_strhash, &upb_streql);
+  if (found) free((void*)removed.str);
+  return found;
+}
+
 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
   i->t = t;
   i->e = upb_table_begin(&t->t);
@@ -224,8 +259,9 @@ size_t upb_inttable_count(const upb_inttable *t) {
   return t->t.count + t->array_count;
 }
 
-bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2) {
-  if (!upb_table_init(&t->t, hsize_lg2)) return false;
+bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t type,
+                            size_t asize, int hsize_lg2) {
+  if (!upb_table_init(&t->t, type, hsize_lg2)) return false;
   // Always make the array part at least 1 long, so that we know key 0
   // won't be in the hash part, which simplifies things.
   t->array_size = UPB_MAX(1, asize);
@@ -236,17 +272,32 @@ bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2) {
     upb_table_uninit(&t->t);
     return false;
   }
-  memset(t->array, 0xff, array_bytes);
+  memset((void*)t->array, 0xff, array_bytes);
   return true;
 }
 
-bool upb_inttable_init(upb_inttable *t) {
-  return upb_inttable_sizedinit(t, 0, 4);
+bool upb_inttable_init(upb_inttable *t, upb_ctype_t type) {
+  return upb_inttable_sizedinit(t, type, 0, 4);
 }
 
 void upb_inttable_uninit(upb_inttable *t) {
   upb_table_uninit(&t->t);
-  free(t->array);
+  free((void*)t->array);
+}
+
+static void upb_inttable_check(upb_inttable *t) {
+  UPB_UNUSED(t);
+#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
+  // This check is very expensive (makes inserts/deletes O(N)).
+  size_t count = 0;
+  upb_inttable_iter i;
+  upb_inttable_begin(&i, t);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
+    const upb_value *v = upb_inttable_lookup(t, upb_inttable_iter_key(&i));
+    assert(v);
+  }
+  assert(count == upb_inttable_count(t));
+#endif
 }
 
 bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
@@ -254,45 +305,78 @@ bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
   if (key < t->array_size) {
     assert(!upb_arrhas(t->array[key]));
     t->array_count++;
-    t->array[key] = val;
+    ((upb_value*)t->array)[key] = val;
   } else {
     if (upb_table_isfull(&t->t)) {
       // Need to resize the hash part, but we re-use the array part.
       upb_table new_table;
-      if (!upb_table_init(&new_table, t->t.size_lg2 + 1)) return false;
-      upb_tabent *e;
+      if (!upb_table_init(&new_table, t->t.type, t->t.size_lg2 + 1))
+        return false;
+      const upb_tabent *e;
       for (e = upb_table_begin(&t->t); e; e = upb_table_next(&t->t, e))
         upb_table_insert(&new_table, e->key, e->val, &upb_inthash, &upb_inteql);
+
+      assert(t->t.count == new_table.count);
+
       upb_table_uninit(&t->t);
       t->t = new_table;
     }
     upb_table_insert(&t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql);
   }
+  upb_inttable_check(t);
   return true;
 }
 
-upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key) {
+const upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key) {
   if (key < t->array_size) {
-    upb_value *v = &t->array[key];
+    const upb_value *v = &t->array[key];
     return upb_arrhas(*v) ? v : NULL;
   }
   return upb_table_lookup(&t->t, upb_intkey(key), &upb_inthash, &upb_inteql);
 }
 
 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
+  bool success;
   if (key < t->array_size) {
     if (upb_arrhas(t->array[key])) {
       t->array_count--;
       if (val) *val = t->array[key];
-      t->array[key] = upb_value_uint64(-1);
-      return true;
+      ((upb_value*)t->array)[key] = upb_value_uint64(-1);
+      success = true;
     } else {
-      return false;
+      success = false;
     }
   } else {
-    return upb_table_remove(
-        &t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql);
+    upb_tabkey removed;
+    success = upb_table_remove(
+        &t->t, upb_intkey(key), val, &removed, &upb_inthash, &upb_inteql);
   }
+  upb_inttable_check(t);
+  return success;
+}
+
+bool upb_inttable_push(upb_inttable *t, upb_value val) {
+  return upb_inttable_insert(t, upb_inttable_count(t), val);
+}
+
+upb_value upb_inttable_pop(upb_inttable *t) {
+  upb_value val;
+  bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
+  UPB_ASSERT_VAR(ok, ok);
+  return val;
+}
+
+bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val) {
+  return upb_inttable_insert(t, (uintptr_t)key, val);
+}
+
+const upb_value *upb_inttable_lookupptr(const upb_inttable *t,
+                                        const void *key) {
+  return upb_inttable_lookup(t, (uintptr_t)key);
+}
+
+bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
+  return upb_inttable_remove(t, (uintptr_t)key, val);
 }
 
 void upb_inttable_compact(upb_inttable *t) {
@@ -301,7 +385,10 @@ void upb_inttable_compact(upb_inttable *t) {
   upb_inttable_iter i;
   for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i))
     counts[upb_log2(upb_inttable_iter_key(&i))]++;
-  int count = upb_inttable_count(t);
+  // Int part must always be at least 1 entry large to catch lookups of key 0.
+  // Key 0 must always be in the array part because "0" in the hash part
+  // denotes an empty entry.
+  int count = UPB_MAX(upb_inttable_count(t), 1);
   int size;
   for (size = UPB_MAXARRSIZE; size > 1; size--) {
     count -= counts[size];
@@ -311,7 +398,8 @@ void upb_inttable_compact(upb_inttable *t) {
   // Insert all elements into new, perfectly-sized table.
   upb_inttable new_table;
   int hashsize = (upb_inttable_count(t) - count + 1) / MAX_LOAD;
-  upb_inttable_sizedinit(&new_table, size, upb_log2(hashsize) + 1);
+
+  upb_inttable_sizedinit(&new_table, t->t.type, size, upb_log2(hashsize));
   for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i))
     upb_inttable_insert(
         &new_table, upb_inttable_iter_key(&i), upb_inttable_iter_value(&i));
@@ -352,7 +440,7 @@ void upb_inttable_next(upb_inttable_iter *iter) {
 //   1. It will not work incrementally.
 //   2. It will not produce the same results on little-endian and big-endian
 //      machines.
-static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
+uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
   // 'm' and 'r' are mixing constants generated offline.
   // They're not really 'magic', they just happen to work well.
   const uint32_t m = 0x5bd1e995;
@@ -403,7 +491,7 @@ static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
 
 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
 
-static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
+uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
   const uint32_t m = 0x5bd1e995;
   const int32_t r = 24;
   const uint8_t * data = (const uint8_t *)key;
author	Josh Haberman <haberman@google.com>	2013-02-15 16:27:18 -0800
committer	Josh Haberman <haberman@google.com>	2013-02-15 16:27:18 -0800
commit	7d3e2bd2c4cfd1296d1d6f996d7548de26540d41 (patch)
tree	b4b35967b3322c65cfb1a32220e8718de09d85fc /upb/table.c
parent	ea198bdcf947ba4bd51474bdd4f7b82b5e4cf41d (diff)