From 7ccb32c30577df22043d05126144f1dd5829255f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 22 Jun 2009 12:46:14 -0700 Subject: Finished hashtable implementation, not yet tested. --- upb_table.c | 172 +++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 105 insertions(+), 67 deletions(-) (limited to 'upb_table.c') diff --git a/upb_table.c b/upb_table.c index 13ebb70..9753088 100644 --- a/upb_table.c +++ b/upb_table.c @@ -16,11 +16,13 @@ static const double MAX_LOAD = 0.85; static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); static uint32_t max(uint32_t a, uint32_t b) { return a > b ? a : b; } + +/* We use 1-based indexes into the table so that 0 can be "NULL". */ static struct upb_inttable_entry *intent(struct upb_inttable *t, uint32_t i) { - return (struct upb_inttable_entry*)((char*)t->t.entries + i*t->t.entry_size); + return UPB_INDEX(t->t.entries, i-1, t->t.entry_size); } static struct upb_strtable_entry *strent(struct upb_strtable *t, uint32_t i) { - return (struct upb_strtable_entry*)((char*)t->t.entries + i*t->t.entry_size); + return UPB_INDEX(t->t.entries, i-1, t->t.entry_size); } void upb_table_init(struct upb_table *t, uint32_t size, uint16_t entry_size) @@ -30,128 +32,164 @@ void upb_table_init(struct upb_table *t, uint32_t size, uint16_t entry_size) t->size_lg2 = 1; while(size >>= 1) t->size_lg2++; t->size_lg2 = max(t->size_lg2, 4); /* Min size of 16. */ - t->entries = malloc(upb_table_size(t) * t->entry_size); + size_t bytes = upb_table_size(t) * t->entry_size; + t->entries = malloc(bytes); + memset(t->entries, 0, bytes); /* Both tables consider 0's an empty entry. */ } void upb_inttable_init(struct upb_inttable *t, uint32_t size, uint16_t entsize) { upb_table_init(&t->t, size, entsize); - for(uint32_t i = 0; i < upb_table_size(&t->t); i++) - intent(t, i)->key = EMPTYENT; } void upb_strtable_init(struct upb_strtable *t, uint32_t size, uint16_t entsize) { upb_table_init(&t->t, size, entsize); - for(uint32_t i = 0; i < upb_table_size(&t->t); i++) - strent(t, i)->key.data = NULL; } void upb_table_free(struct upb_table *t) { free(t->entries); } void upb_inttable_free(struct upb_inttable *t) { upb_table_free(&t->t); } void upb_strtable_free(struct upb_strtable *t) { upb_table_free(&t->t); } +static uint32_t strtable_bucket(struct upb_strtable *t, struct upb_string *key) +{ + uint32_t hash = MurmurHash2(key->data, key->byte_len, 0); + return (hash & (upb_strtable_size(t)-1)) + 1; +} + void *upb_strtable_lookup(struct upb_strtable *t, struct upb_string *key) { - uint32_t hash = - MurmurHash2(key->data, key->byte_len, 0) & (upb_strtable_size(t)-1); - while(1) { - struct upb_strtable_entry *e = - (struct upb_strtable_entry*)(char*)t->t.entries + hash*t->t.entry_size; - if(e->key.data == NULL) return NULL; - else if(upb_string_eql(&e->key, key)) return e; - hash = e->next; - } + uint32_t bucket = strtable_bucket(t, key); + struct upb_strtable_entry *e; + do { + e = strent(t, bucket); + if(upb_string_eql(&e->key, key)) return e; + } while((bucket = e->next) != UPB_END_OF_CHAIN); + return NULL; } -static struct upb_inttable_entry *find_empty_slot(struct upb_inttable *table) +static uint32_t empty_intbucket(struct upb_inttable *table) { /* TODO: does it matter that this is biased towards the front of the table? */ - for(uint32_t i = 0; i < upb_inttable_size(table); i++) { + for(uint32_t i = 1; i <= upb_inttable_size(table); i++) { struct upb_inttable_entry *e = intent(table, i); - if(e->key == EMPTYENT) return e; + if(e->key == EMPTYENT) return i; } assert(false); - return NULL; + return 0; } -static void *maybe_resize(struct upb_table *t) { - if((double)++t->count / upb_table_size(t) > MAX_LOAD) { - void *old_entries = t->entries; - t->size_lg2++; /* Double the table size. */ - t->entries = malloc(upb_table_size(t) * t->entry_size); - return old_entries; - } else { - return NULL; /* No resize necessary. */ - } -} - -static void intinsert(void *table, struct upb_inttable_entry *e, uint32_t size) +static void intinsert(struct upb_inttable *t, struct upb_inttable_entry *e) { - /* TODO */ -#if 0 - struct upb_inttable_entry *e, *table_e; - e = upb_inttable_entry_get(entries, i, entry_size); - table_e = upb_inttable_mainpos2(table, e->key); - if(table_e->key != UPB_EMPTY_ENTRY) { /* Collision. */ - if(table_e == upb_inttable_mainpos2(table, table_e->key)) { + uint32_t bucket = upb_inttable_bucket(t, e->key); + struct upb_inttable_entry *table_e = intent(t, bucket); + if(table_e->key != EMPTYENT) { /* Collision. */ + if(bucket == upb_inttable_bucket(t, table_e->key)) { /* Existing element is in its main posisiton. Find an empty slot to * place our new element and append it to this key's chain. */ - struct upb_inttable_entry *empty = find_empty_slot(table); - while (table_e->next) table_e = table_e->next; - table_e->next = empty; - table_e = empty; + uint32_t empty_bucket = empty_intbucket(t); + while (table_e->next != UPB_END_OF_CHAIN) + table_e = intent(t, table_e->next-1); + table_e->next = empty_bucket; + table_e = intent(t, empty_bucket); } else { /* Existing element is not in its main position. Move it to an empty * slot and put our element in its main position. */ - struct upb_inttable_entry *empty, *colliding_key_mainpos; - empty = find_empty_slot(table); - colliding_key_mainpos = upb_inttable_mainpos2(table, table_e->key); - assert(colliding_key_mainpos->key != UPB_EMPTY_ENTRY); - assert(colliding_key_mainpos->next); - memcpy(empty, table_e, entry_size); /* next is copied also. */ + uint32_t empty_bucket = empty_intbucket(t); + uint32_t evictee_bucket = upb_inttable_bucket(t, table_e->key); + memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ + struct upb_inttable_entry *evictee_e = intent(t, evictee_bucket); while(1) { - assert(colliding_key_mainpos->next); - if(colliding_key_mainpos->next == table_e) { - colliding_key_mainpos->next = empty; + assert(evictee_e->key != UPB_EMPTY_ENTRY); + assert(evictee_e->next != END_OF_CHAIN); + if(evictee_e->next == bucket) { + evictee_e->next = empty_bucket; break; } } /* table_e remains set to our mainpos. */ } } - memcpy(table_e, e, entry_size); - table_e->next = NULL; -#endif + memcpy(table_e, e, t->t.entry_size); + table_e->next = UPB_END_OF_CHAIN; } void upb_inttable_insert(struct upb_inttable *t, struct upb_inttable_entry *e) { - void *new_entries = maybe_resize(&t->t); - if(new_entries) { /* Are we doing a resize? */ - for(uint32_t i = 0; i < (upb_inttable_size(t)>>1); i++) { + if((double)++t->t.count / upb_inttable_size(t) > MAX_LOAD) { + /* Need to resize. New table of double the size, add old elements to it. */ + struct upb_inttable new_table; + upb_inttable_init(&new_table, upb_inttable_size(t)*2, t->t.entry_size); + for(uint32_t i = 1; i <= upb_inttable_size(t)/2; i++) { struct upb_inttable_entry *old_e = intent(t, i); - if(old_e->key != EMPTYENT) intinsert(new_entries, old_e, t->t.entry_size); + if(old_e->key != EMPTYENT) intinsert(&new_table, old_e); } + upb_inttable_free(t); + *t = new_table; } - intinsert(t->t.entries, e, t->t.entry_size); + intinsert(t->t.entries, e); } -static void strinsert(void *table, struct upb_strtable_entry *e, uint32_t size) +static uint32_t empty_strbucket(struct upb_strtable *table) { - /* TODO */ + /* TODO: does it matter that this is biased towards the front of the table? */ + for(uint32_t i = 1; i <= upb_strtable_size(table); i++) { + struct upb_strtable_entry *e = strent(table, i); + if(e->key.byte_len == 0) return i; + } + assert(false); + return 0; +} + +static void strinsert(struct upb_strtable *t, struct upb_strtable_entry *e) +{ + uint32_t bucket = strtable_bucket(t, &e->key); + struct upb_strtable_entry *table_e = strent(t, bucket); + if(table_e->key.byte_len != 0) { /* Collision. */ + if(bucket == strtable_bucket(t, &table_e->key)) { + /* Existing element is in its main posisiton. Find an empty slot to + * place our new element and append it to this key's chain. */ + uint32_t empty_bucket = empty_strbucket(t); + while (table_e->next != UPB_END_OF_CHAIN) + table_e = strent(t, table_e->next-1); + table_e->next = empty_bucket; + table_e = strent(t, empty_bucket); + } else { + /* Existing element is not in its main position. Move it to an empty + * slot and put our element in its main position. */ + uint32_t empty_bucket = empty_strbucket(t); + uint32_t evictee_bucket = strtable_bucket(t, &table_e->key); + memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ + struct upb_strtable_entry *evictee_e = strent(t, evictee_bucket); + while(1) { + assert(evictee_e->key != UPB_EMPTY_ENTRY); + assert(evictee_e->next != END_OF_CHAIN); + if(evictee_e->next == bucket) { + evictee_e->next = empty_bucket; + break; + } + } + /* table_e remains set to our mainpos. */ + } + } + memcpy(table_e, e, t->t.entry_size); + table_e->next = UPB_END_OF_CHAIN; } void upb_strtable_insert(struct upb_strtable *t, struct upb_strtable_entry *e) { - void *new_entries = maybe_resize(&t->t); - if(new_entries) { /* Are we doing a resize? */ - for(uint32_t i = 0; i < (upb_strtable_size(t)>>1); i++) { + if((double)++t->t.count / upb_strtable_size(t) > MAX_LOAD) { + /* Need to resize. New table of double the size, add old elements to it. */ + struct upb_strtable new_table; + upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.entry_size); + for(uint32_t i = 1; i <= upb_strtable_size(t)/2; i++) { struct upb_strtable_entry *old_e = strent(t, i); - if(old_e->key.data != NULL) strinsert(new_entries, old_e, t->t.entry_size); + if(old_e->key.byte_len != 0) strinsert(&new_table, old_e); } + upb_strtable_free(t); + *t = new_table; } - strinsert(t->t.entries, e, t->t.entry_size); + strinsert(t->t.entries, e); } #ifdef UPB_UNALIGNED_READS_OK -- cgit v1.2.3