summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--upb.h12
-rw-r--r--upb_struct.h12
-rw-r--r--upb_table.c257
-rw-r--r--upb_table.h107
4 files changed, 221 insertions, 167 deletions
diff --git a/upb.h b/upb.h
index 36abcaa..a144218 100644
--- a/upb.h
+++ b/upb.h
@@ -10,6 +10,7 @@
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h> /* for size_t. */
+#include <string.h>
#ifdef __cplusplus
extern "C" {
@@ -28,6 +29,17 @@ extern "C" {
/* The maximum that any submessages can be nested. Matches proto2's limit. */
#define UPB_MAX_NESTING 64
+/* Represents a string or bytes. */
+struct upb_string {
+ void *data;
+ uint32_t byte_len;
+};
+
+INLINE bool upb_string_eql(struct upb_string *s1, struct upb_string *s2) {
+ return s1->byte_len == s2->byte_len &&
+ memcmp(s1->data, s2->data, s1->byte_len) == 0;
+}
+
/* A list of types as they are encoded on-the-wire. */
enum upb_wire_type {
UPB_WIRE_TYPE_VARINT = 0,
diff --git a/upb_struct.h b/upb_struct.h
index d46b007..72a1d21 100644
--- a/upb_struct.h
+++ b/upb_struct.h
@@ -67,29 +67,23 @@ struct upb_struct_field *upb_struct_find_field_by_number(
/* Variable-length data (strings and arrays).**********************************/
-/* Represents a string or bytes. */
-struct upb_string {
- size_t byte_len;
- void *data;
-};
-
/* Represents an array (a repeated field) of any type. The interpretation of
* the data in the array depends on the type. */
struct upb_array {
- size_t len; /* Measured in elements. */
void *data; /* Size of individual elements is based on type. */
+ uint32_t len; /* Measured in elements. */
};
/* A generic array of structs, using void* instead of specific types. */
struct upb_struct_array {
- size_t len;
void **elements;
+ uint32_t len;
};
/* An array of strings. */
struct upb_string_array {
- size_t len;
struct upb_string **elements;
+ uint32_t len;
};
/* Specific arrays of all the primitive types. */
diff --git a/upb_table.c b/upb_table.c
index 00adc01..13ebb70 100644
--- a/upb_table.c
+++ b/upb_table.c
@@ -10,9 +10,153 @@
#include <stdlib.h>
#include <string.h>
+static const upb_inttable_key_t EMPTYENT = 0;
+static const double MAX_LOAD = 0.85;
+
+static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
+
+static uint32_t max(uint32_t a, uint32_t b) { return a > b ? a : b; }
+static struct upb_inttable_entry *intent(struct upb_inttable *t, uint32_t i) {
+ return (struct upb_inttable_entry*)((char*)t->t.entries + i*t->t.entry_size);
+}
+static struct upb_strtable_entry *strent(struct upb_strtable *t, uint32_t i) {
+ return (struct upb_strtable_entry*)((char*)t->t.entries + i*t->t.entry_size);
+}
+
+void upb_table_init(struct upb_table *t, uint32_t size, uint16_t entry_size)
+{
+ t->count = 0;
+ t->entry_size = entry_size;
+ t->size_lg2 = 1;
+ while(size >>= 1) t->size_lg2++;
+ t->size_lg2 = max(t->size_lg2, 4); /* Min size of 16. */
+ t->entries = malloc(upb_table_size(t) * t->entry_size);
+}
+
+void upb_inttable_init(struct upb_inttable *t, uint32_t size, uint16_t entsize)
+{
+ upb_table_init(&t->t, size, entsize);
+ for(uint32_t i = 0; i < upb_table_size(&t->t); i++)
+ intent(t, i)->key = EMPTYENT;
+}
+
+void upb_strtable_init(struct upb_strtable *t, uint32_t size, uint16_t entsize)
+{
+ upb_table_init(&t->t, size, entsize);
+ for(uint32_t i = 0; i < upb_table_size(&t->t); i++)
+ strent(t, i)->key.data = NULL;
+}
+
+void upb_table_free(struct upb_table *t) { free(t->entries); }
+void upb_inttable_free(struct upb_inttable *t) { upb_table_free(&t->t); }
+void upb_strtable_free(struct upb_strtable *t) { upb_table_free(&t->t); }
+
+void *upb_strtable_lookup(struct upb_strtable *t, struct upb_string *key)
+{
+ uint32_t hash =
+ MurmurHash2(key->data, key->byte_len, 0) & (upb_strtable_size(t)-1);
+ while(1) {
+ struct upb_strtable_entry *e =
+ (struct upb_strtable_entry*)(char*)t->t.entries + hash*t->t.entry_size;
+ if(e->key.data == NULL) return NULL;
+ else if(upb_string_eql(&e->key, key)) return e;
+ hash = e->next;
+ }
+}
+
+static struct upb_inttable_entry *find_empty_slot(struct upb_inttable *table)
+{
+ /* TODO: does it matter that this is biased towards the front of the table? */
+ for(uint32_t i = 0; i < upb_inttable_size(table); i++) {
+ struct upb_inttable_entry *e = intent(table, i);
+ if(e->key == EMPTYENT) return e;
+ }
+ assert(false);
+ return NULL;
+}
+
+static void *maybe_resize(struct upb_table *t) {
+ if((double)++t->count / upb_table_size(t) > MAX_LOAD) {
+ void *old_entries = t->entries;
+ t->size_lg2++; /* Double the table size. */
+ t->entries = malloc(upb_table_size(t) * t->entry_size);
+ return old_entries;
+ } else {
+ return NULL; /* No resize necessary. */
+ }
+}
+
+static void intinsert(void *table, struct upb_inttable_entry *e, uint32_t size)
+{
+ /* TODO */
+#if 0
+ struct upb_inttable_entry *e, *table_e;
+ e = upb_inttable_entry_get(entries, i, entry_size);
+ table_e = upb_inttable_mainpos2(table, e->key);
+ if(table_e->key != UPB_EMPTY_ENTRY) { /* Collision. */
+ if(table_e == upb_inttable_mainpos2(table, table_e->key)) {
+ /* Existing element is in its main posisiton. Find an empty slot to
+ * place our new element and append it to this key's chain. */
+ struct upb_inttable_entry *empty = find_empty_slot(table);
+ while (table_e->next) table_e = table_e->next;
+ table_e->next = empty;
+ table_e = empty;
+ } else {
+ /* Existing element is not in its main position. Move it to an empty
+ * slot and put our element in its main position. */
+ struct upb_inttable_entry *empty, *colliding_key_mainpos;
+ empty = find_empty_slot(table);
+ colliding_key_mainpos = upb_inttable_mainpos2(table, table_e->key);
+ assert(colliding_key_mainpos->key != UPB_EMPTY_ENTRY);
+ assert(colliding_key_mainpos->next);
+ memcpy(empty, table_e, entry_size); /* next is copied also. */
+ while(1) {
+ assert(colliding_key_mainpos->next);
+ if(colliding_key_mainpos->next == table_e) {
+ colliding_key_mainpos->next = empty;
+ break;
+ }
+ }
+ /* table_e remains set to our mainpos. */
+ }
+ }
+ memcpy(table_e, e, entry_size);
+ table_e->next = NULL;
+#endif
+}
+
+void upb_inttable_insert(struct upb_inttable *t, struct upb_inttable_entry *e)
+{
+ void *new_entries = maybe_resize(&t->t);
+ if(new_entries) { /* Are we doing a resize? */
+ for(uint32_t i = 0; i < (upb_inttable_size(t)>>1); i++) {
+ struct upb_inttable_entry *old_e = intent(t, i);
+ if(old_e->key != EMPTYENT) intinsert(new_entries, old_e, t->t.entry_size);
+ }
+ }
+ intinsert(t->t.entries, e, t->t.entry_size);
+}
+
+static void strinsert(void *table, struct upb_strtable_entry *e, uint32_t size)
+{
+ /* TODO */
+}
+
+void upb_strtable_insert(struct upb_strtable *t, struct upb_strtable_entry *e)
+{
+ void *new_entries = maybe_resize(&t->t);
+ if(new_entries) { /* Are we doing a resize? */
+ for(uint32_t i = 0; i < (upb_strtable_size(t)>>1); i++) {
+ struct upb_strtable_entry *old_e = strent(t, i);
+ if(old_e->key.data != NULL) strinsert(new_entries, old_e, t->t.entry_size);
+ }
+ }
+ strinsert(t->t.entries, e, t->t.entry_size);
+}
+
#ifdef UPB_UNALIGNED_READS_OK
//-----------------------------------------------------------------------------
-// MurmurHash2, by Austin Appleby
+// MurmurHash2, by Austin Appleby (released as public domain).
// Reformatted and C99-ified by Joshua Haberman.
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
@@ -183,114 +327,3 @@ static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed)
#undef MIX
#endif // UPB_UNALIGNED_READS_OK
-
-static int compare_entries(const void *f1, const void *f2)
-{
- return ((struct upb_inttable_entry*)f1)->key -
- ((struct upb_inttable_entry*)f2)->key;
-}
-
-static uint32_t max(uint32_t a, uint32_t b) { return a > b ? a : b; }
-
-static uint32_t round_up_to_pow2(uint32_t v)
-{
- /* cf. Bit Twiddling Hacks:
- * http://www-graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
- v--;
- v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16;
- v++;
- return v;
-}
-
-static struct upb_inttable_entry *find_empty_slot(struct upb_inttable *table)
-{
- /* TODO: does it matter that this is biased towards the front of the table? */
- for(uint32_t i = 0; i < table->size; i++) {
- struct upb_inttable_entry *e =
- upb_inttable_entry_get(table->entries, i, table->entry_size);
- if(e->key == UPB_EMPTY_ENTRY) return e;
- }
- assert(false);
- return NULL;
-}
-
-void upb_inttable_init(struct upb_inttable *table, void *entries,
- int num_entries, int entry_size)
-{
- qsort(entries, num_entries, entry_size, compare_entries);
-
- /* Find the largest n for which at least half the keys <n are used. We
- * make sure our table size is at least n. This allows all keys <n to be
- * in their main position (as if it were an array) and only numbers >n might
- * possibly have collisions. Start at 8 to avoid noise of small numbers. */
- upb_inttable_key_t n = 0, maybe_n;
- bool all_in_array = true;
- for(int i = 0; i < num_entries; i++) {
- struct upb_inttable_entry *e =
- upb_inttable_entry_get(entries, i, entry_size);
- maybe_n = e->key;
- if(maybe_n > 8 && maybe_n/(i+1) >= 2) {
- all_in_array = false;
- break;
- }
- n = maybe_n;
- }
-
- /* TODO: measure, tweak, optimize this choice of table size. Possibly test
- * (at runtime) maximum chain length for each proposed size. */
- uint32_t min_size_by_load = all_in_array ? n : (double)num_entries / 0.85;
- uint32_t min_size = max(n, min_size_by_load);
- table->size = round_up_to_pow2(min_size);
- table->entry_size = entry_size;
- table->entries = malloc(table->size * entry_size);
-
- /* Initialize to empty. */
- for(size_t i = 0; i < table->size; i++) {
- struct upb_inttable_entry *e =
- upb_inttable_entry_get(table->entries, i, entry_size);
- e->key = UPB_EMPTY_ENTRY;
- e->next = NULL;
- }
-
- /* Insert the elements. */
- for(int i = 0; i < num_entries; i++) {
- struct upb_inttable_entry *e, *table_e;
- e = upb_inttable_entry_get(entries, i, entry_size);
- table_e = upb_inttable_mainpos(table, e->key);
- if(table_e->key != UPB_EMPTY_ENTRY) { /* Collision. */
- if(table_e == upb_inttable_mainpos(table, table_e->key)) {
- /* Existing element is in its main posisiton. Find an empty slot to
- * place our new element and append it to this key's chain. */
- struct upb_inttable_entry *empty = find_empty_slot(table);
- while (table_e->next) table_e = table_e->next;
- table_e->next = empty;
- table_e = empty;
- } else {
- /* Existing element is not in its main position. Move it to an empty
- * slot and put our element in its main position. */
- struct upb_inttable_entry *empty, *colliding_key_mainpos;
- empty = find_empty_slot(table);
- colliding_key_mainpos = upb_inttable_mainpos(table, table_e->key);
- assert(colliding_key_mainpos->key != UPB_EMPTY_ENTRY);
- assert(colliding_key_mainpos->next);
- memcpy(empty, table_e, entry_size); /* next is copied also. */
- while(1) {
- assert(colliding_key_mainpos->next);
- if(colliding_key_mainpos->next == table_e) {
- colliding_key_mainpos->next = empty;
- break;
- }
- }
- /* table_e remains set to our mainpos. */
- }
- }
- memcpy(table_e, e, entry_size);
- table_e->next = NULL;
- }
-}
-
-void upb_inttable_free(struct upb_inttable *table)
-{
- free(table->entries);
-}
-
diff --git a/upb_table.h b/upb_table.h
index 2a8e4f2..f405e05 100644
--- a/upb_table.h
+++ b/upb_table.h
@@ -21,70 +21,85 @@
extern "C" {
#endif
-/* Inttables are keyed by 32-bit integer. */
-typedef int32_t upb_inttable_key_t;
-
-#define UPB_EMPTY_ENTRY (upb_inttable_key_t)-1
+typedef uint32_t upb_inttable_key_t;
struct upb_inttable_entry {
upb_inttable_key_t key;
- struct upb_inttable_entry *next; /* Internal chaining. */
+ uint32_t next; /* Internal chaining. */
};
-struct upb_inttable {
- uint32_t size; /* Is a power of two. */
- uint32_t entry_size; /* How big is each entry? */
+/* TODO: consider storing the hash in the entry. This would avoid the need to
+ * rehash on table resizes, but more importantly could possibly improve lookup
+ * performance by letting us compare hashes before comparing lengths or the
+ * strings themselves. */
+struct upb_strtable_entry {
+ struct upb_string key;
+ uint32_t next; /* Internal chaining. */
+};
+
+struct upb_table {
void *entries;
+ uint32_t count; /* How many elements are currently in the table? */
+ uint16_t entry_size; /* How big is each entry? */
+ uint8_t size_lg2; /* The table is 2^size_lg2 in size. */
};
-/* Builds an int32_t -> <entry> table, optimized for very fast lookup by
- * number. table is a pointer to a previously allocated upb_inttable.
- * entries points to an array of the desired entries themselves, each of size
- * entry_size. The table is allocated in dynamic memory, and does not reference
- * the data in entries. Entries may be modified by the function.
- *
- * The table must be freed with upb_inttable_free. */
-void upb_inttable_init(struct upb_inttable *table, void *entries,
- int num_entries, int entry_size);
+struct upb_strtable {
+ struct upb_table t;
+};
-/* Frees any data that was allocated by upb_inttable_init. */
+struct upb_inttable {
+ struct upb_table t;
+};
+
+/* Initialize and free a table, respectively. Specify the initial size
+ * with 'size' (the size will be increased as necessary). Entry size
+ * specifies how many bytes each entry in the table is. */
+void upb_inttable_init(struct upb_inttable *table,
+ uint32_t size, uint16_t entry_size);
void upb_inttable_free(struct upb_inttable *table);
+void upb_strtable_init(struct upb_strtable *table,
+ uint32_t size, uint16_t entry_size);
+void upb_strtable_free(struct upb_strtable *table);
-INLINE struct upb_inttable_entry *upb_inttable_entry_get(
- void *entries, int32_t pos, int entry_size) {
- return (struct upb_inttable_entry*)(((char*)entries) + pos*entry_size);
+INLINE uint32_t upb_table_size(struct upb_table *t) { return 1 << t->size_lg2; }
+INLINE uint32_t upb_inttable_size(struct upb_inttable *t) {
+ return upb_table_size(&t->t);
}
-
-INLINE struct upb_inttable_entry *upb_inttable_mainpos2(
- struct upb_inttable *table, upb_inttable_key_t key, int32_t entry_size) {
- /* Identity hash for integers. */
- int32_t pos = key & (table->size-1);
- return upb_inttable_entry_get(table->entries, pos, entry_size);
+INLINE uint32_t upb_strtable_size(struct upb_strtable *t) {
+ return upb_table_size(&t->t);
}
-INLINE struct upb_inttable_entry *upb_inttable_mainpos(
- struct upb_inttable *table, upb_inttable_key_t key) {
- return upb_inttable_mainpos2(table, key, table->entry_size);
+/* Inserts the given key into the hashtable with the given value. The key must
+ * not already exist in the hash table. The data will be copied from e into
+ * the hashtable (the amount of data copied comes from entry_size when the
+ * table was constructed). Therefore the data at val may be freed once the
+ * call returns. */
+void upb_inttable_insert(struct upb_inttable *t, struct upb_inttable_entry *e);
+void upb_strtable_insert(struct upb_strtable *t, struct upb_strtable_entry *e);
+
+INLINE uint32_t upb_inttable_hash(struct upb_inttable *t, upb_inttable_key_t k) {
+ return k & (upb_inttable_size(t)-1); /* Identity hash for ints. */
}
-/* Lookups up key in this table. Inlined because this is in the critical path
- * of parsing. */
-INLINE void *upb_inttable_lookup(struct upb_inttable *table,
- int32_t key,
- int32_t entry_size) {
- /* TODO: experiment with Cuckoo Hashing, which can perform lookups without
- * any branches, cf:
- * http://www.cs.cmu.edu/~damon2006/pdf/zukowski06archconscioushashing.pdf.
- * The tradeoff is having to calculate a second hash for every lookup, which
- * will hurt the simple array case. */
- struct upb_inttable_entry *e = upb_inttable_mainpos2(table, key, entry_size);
- do {
- if (key == e->key) return e;
- e = e->next;
- } while (e);
- return NULL; /* Not found. */
+/* Looks up key in this table. Inlined because this is in the critical path
+ * of parsing. We have the caller specify the entry_size because fixing
+ * this as a literal (instead of reading table->entry_size) gives the
+ * compiler more ability to optimize. */
+INLINE void *upb_inttable_lookup(struct upb_inttable *t,
+ uint32_t key, uint32_t entry_size) {
+ uint32_t hash = upb_inttable_hash(t, key);
+ while(1) {
+ struct upb_inttable_entry *e =
+ (struct upb_inttable_entry*)(char*)t->t.entries + hash*entry_size;
+ if(e->key == 0) return NULL;
+ else if(e->key == key) return e;
+ hash = e->next;
+ }
}
+void *upb_strtable_lookup(struct upb_strtable *t, struct upb_string *key);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback