From 0124bfd8fe87cb38fe367478d18f449edce8a0ae Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 21 Jun 2009 17:58:23 -0700 Subject: More work on inttable/strtable (not finished). --- upb_table.c | 257 ++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 145 insertions(+), 112 deletions(-) (limited to 'upb_table.c') diff --git a/upb_table.c b/upb_table.c index 00adc01..13ebb70 100644 --- a/upb_table.c +++ b/upb_table.c @@ -10,9 +10,153 @@ #include #include +static const upb_inttable_key_t EMPTYENT = 0; +static const double MAX_LOAD = 0.85; + +static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); + +static uint32_t max(uint32_t a, uint32_t b) { return a > b ? a : b; } +static struct upb_inttable_entry *intent(struct upb_inttable *t, uint32_t i) { + return (struct upb_inttable_entry*)((char*)t->t.entries + i*t->t.entry_size); +} +static struct upb_strtable_entry *strent(struct upb_strtable *t, uint32_t i) { + return (struct upb_strtable_entry*)((char*)t->t.entries + i*t->t.entry_size); +} + +void upb_table_init(struct upb_table *t, uint32_t size, uint16_t entry_size) +{ + t->count = 0; + t->entry_size = entry_size; + t->size_lg2 = 1; + while(size >>= 1) t->size_lg2++; + t->size_lg2 = max(t->size_lg2, 4); /* Min size of 16. */ + t->entries = malloc(upb_table_size(t) * t->entry_size); +} + +void upb_inttable_init(struct upb_inttable *t, uint32_t size, uint16_t entsize) +{ + upb_table_init(&t->t, size, entsize); + for(uint32_t i = 0; i < upb_table_size(&t->t); i++) + intent(t, i)->key = EMPTYENT; +} + +void upb_strtable_init(struct upb_strtable *t, uint32_t size, uint16_t entsize) +{ + upb_table_init(&t->t, size, entsize); + for(uint32_t i = 0; i < upb_table_size(&t->t); i++) + strent(t, i)->key.data = NULL; +} + +void upb_table_free(struct upb_table *t) { free(t->entries); } +void upb_inttable_free(struct upb_inttable *t) { upb_table_free(&t->t); } +void upb_strtable_free(struct upb_strtable *t) { upb_table_free(&t->t); } + +void *upb_strtable_lookup(struct upb_strtable *t, struct upb_string *key) +{ + uint32_t hash = + MurmurHash2(key->data, key->byte_len, 0) & (upb_strtable_size(t)-1); + while(1) { + struct upb_strtable_entry *e = + (struct upb_strtable_entry*)(char*)t->t.entries + hash*t->t.entry_size; + if(e->key.data == NULL) return NULL; + else if(upb_string_eql(&e->key, key)) return e; + hash = e->next; + } +} + +static struct upb_inttable_entry *find_empty_slot(struct upb_inttable *table) +{ + /* TODO: does it matter that this is biased towards the front of the table? */ + for(uint32_t i = 0; i < upb_inttable_size(table); i++) { + struct upb_inttable_entry *e = intent(table, i); + if(e->key == EMPTYENT) return e; + } + assert(false); + return NULL; +} + +static void *maybe_resize(struct upb_table *t) { + if((double)++t->count / upb_table_size(t) > MAX_LOAD) { + void *old_entries = t->entries; + t->size_lg2++; /* Double the table size. */ + t->entries = malloc(upb_table_size(t) * t->entry_size); + return old_entries; + } else { + return NULL; /* No resize necessary. */ + } +} + +static void intinsert(void *table, struct upb_inttable_entry *e, uint32_t size) +{ + /* TODO */ +#if 0 + struct upb_inttable_entry *e, *table_e; + e = upb_inttable_entry_get(entries, i, entry_size); + table_e = upb_inttable_mainpos2(table, e->key); + if(table_e->key != UPB_EMPTY_ENTRY) { /* Collision. */ + if(table_e == upb_inttable_mainpos2(table, table_e->key)) { + /* Existing element is in its main posisiton. Find an empty slot to + * place our new element and append it to this key's chain. */ + struct upb_inttable_entry *empty = find_empty_slot(table); + while (table_e->next) table_e = table_e->next; + table_e->next = empty; + table_e = empty; + } else { + /* Existing element is not in its main position. Move it to an empty + * slot and put our element in its main position. */ + struct upb_inttable_entry *empty, *colliding_key_mainpos; + empty = find_empty_slot(table); + colliding_key_mainpos = upb_inttable_mainpos2(table, table_e->key); + assert(colliding_key_mainpos->key != UPB_EMPTY_ENTRY); + assert(colliding_key_mainpos->next); + memcpy(empty, table_e, entry_size); /* next is copied also. */ + while(1) { + assert(colliding_key_mainpos->next); + if(colliding_key_mainpos->next == table_e) { + colliding_key_mainpos->next = empty; + break; + } + } + /* table_e remains set to our mainpos. */ + } + } + memcpy(table_e, e, entry_size); + table_e->next = NULL; +#endif +} + +void upb_inttable_insert(struct upb_inttable *t, struct upb_inttable_entry *e) +{ + void *new_entries = maybe_resize(&t->t); + if(new_entries) { /* Are we doing a resize? */ + for(uint32_t i = 0; i < (upb_inttable_size(t)>>1); i++) { + struct upb_inttable_entry *old_e = intent(t, i); + if(old_e->key != EMPTYENT) intinsert(new_entries, old_e, t->t.entry_size); + } + } + intinsert(t->t.entries, e, t->t.entry_size); +} + +static void strinsert(void *table, struct upb_strtable_entry *e, uint32_t size) +{ + /* TODO */ +} + +void upb_strtable_insert(struct upb_strtable *t, struct upb_strtable_entry *e) +{ + void *new_entries = maybe_resize(&t->t); + if(new_entries) { /* Are we doing a resize? */ + for(uint32_t i = 0; i < (upb_strtable_size(t)>>1); i++) { + struct upb_strtable_entry *old_e = strent(t, i); + if(old_e->key.data != NULL) strinsert(new_entries, old_e, t->t.entry_size); + } + } + strinsert(t->t.entries, e, t->t.entry_size); +} + #ifdef UPB_UNALIGNED_READS_OK //----------------------------------------------------------------------------- -// MurmurHash2, by Austin Appleby +// MurmurHash2, by Austin Appleby (released as public domain). // Reformatted and C99-ified by Joshua Haberman. // Note - This code makes a few assumptions about how your machine behaves - // 1. We can read a 4-byte value from any address without crashing @@ -183,114 +327,3 @@ static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) #undef MIX #endif // UPB_UNALIGNED_READS_OK - -static int compare_entries(const void *f1, const void *f2) -{ - return ((struct upb_inttable_entry*)f1)->key - - ((struct upb_inttable_entry*)f2)->key; -} - -static uint32_t max(uint32_t a, uint32_t b) { return a > b ? a : b; } - -static uint32_t round_up_to_pow2(uint32_t v) -{ - /* cf. Bit Twiddling Hacks: - * http://www-graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */ - v--; - v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; - v++; - return v; -} - -static struct upb_inttable_entry *find_empty_slot(struct upb_inttable *table) -{ - /* TODO: does it matter that this is biased towards the front of the table? */ - for(uint32_t i = 0; i < table->size; i++) { - struct upb_inttable_entry *e = - upb_inttable_entry_get(table->entries, i, table->entry_size); - if(e->key == UPB_EMPTY_ENTRY) return e; - } - assert(false); - return NULL; -} - -void upb_inttable_init(struct upb_inttable *table, void *entries, - int num_entries, int entry_size) -{ - qsort(entries, num_entries, entry_size, compare_entries); - - /* Find the largest n for which at least half the keys n might - * possibly have collisions. Start at 8 to avoid noise of small numbers. */ - upb_inttable_key_t n = 0, maybe_n; - bool all_in_array = true; - for(int i = 0; i < num_entries; i++) { - struct upb_inttable_entry *e = - upb_inttable_entry_get(entries, i, entry_size); - maybe_n = e->key; - if(maybe_n > 8 && maybe_n/(i+1) >= 2) { - all_in_array = false; - break; - } - n = maybe_n; - } - - /* TODO: measure, tweak, optimize this choice of table size. Possibly test - * (at runtime) maximum chain length for each proposed size. */ - uint32_t min_size_by_load = all_in_array ? n : (double)num_entries / 0.85; - uint32_t min_size = max(n, min_size_by_load); - table->size = round_up_to_pow2(min_size); - table->entry_size = entry_size; - table->entries = malloc(table->size * entry_size); - - /* Initialize to empty. */ - for(size_t i = 0; i < table->size; i++) { - struct upb_inttable_entry *e = - upb_inttable_entry_get(table->entries, i, entry_size); - e->key = UPB_EMPTY_ENTRY; - e->next = NULL; - } - - /* Insert the elements. */ - for(int i = 0; i < num_entries; i++) { - struct upb_inttable_entry *e, *table_e; - e = upb_inttable_entry_get(entries, i, entry_size); - table_e = upb_inttable_mainpos(table, e->key); - if(table_e->key != UPB_EMPTY_ENTRY) { /* Collision. */ - if(table_e == upb_inttable_mainpos(table, table_e->key)) { - /* Existing element is in its main posisiton. Find an empty slot to - * place our new element and append it to this key's chain. */ - struct upb_inttable_entry *empty = find_empty_slot(table); - while (table_e->next) table_e = table_e->next; - table_e->next = empty; - table_e = empty; - } else { - /* Existing element is not in its main position. Move it to an empty - * slot and put our element in its main position. */ - struct upb_inttable_entry *empty, *colliding_key_mainpos; - empty = find_empty_slot(table); - colliding_key_mainpos = upb_inttable_mainpos(table, table_e->key); - assert(colliding_key_mainpos->key != UPB_EMPTY_ENTRY); - assert(colliding_key_mainpos->next); - memcpy(empty, table_e, entry_size); /* next is copied also. */ - while(1) { - assert(colliding_key_mainpos->next); - if(colliding_key_mainpos->next == table_e) { - colliding_key_mainpos->next = empty; - break; - } - } - /* table_e remains set to our mainpos. */ - } - } - memcpy(table_e, e, entry_size); - table_e->next = NULL; - } -} - -void upb_inttable_free(struct upb_inttable *table) -{ - free(table->entries); -} - -- cgit v1.2.3