diff options
Diffstat (limited to 'upb/table.h')
-rw-r--r-- | upb/table.h | 238 |
1 files changed, 97 insertions, 141 deletions
diff --git a/upb/table.h b/upb/table.h index 0c0a785..f6bff66 100644 --- a/upb/table.h +++ b/upb/table.h @@ -4,13 +4,16 @@ * Copyright (c) 2009 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> * - * This file defines very fast int->struct (inttable) and string->struct - * (strtable) hash tables. The struct can be of any size, and it is stored - * in the table itself, for cache-friendly performance. + * This file defines very fast int->upb_value (inttable) and string->upb_value + * (strtable) hash tables. * - * The table uses internal chaining with Brent's variation (inspired by the - * Lua implementation of hash tables). The hash function for strings is - * Austin Appleby's "MurmurHash." + * The table uses chained scatter with Brent's variation (inspired by the Lua + * implementation of hash tables). The hash function for strings is Austin + * Appleby's "MurmurHash." + * + * The inttable uses uintptr_t as its key, which guarantees it can be used to + * store pointers or integers of at least 32 bits (upb isn't really useful on + * systems where sizeof(void*) < 4). * * This header is internal to upb; its interface should not be considered * public or stable. @@ -19,52 +22,30 @@ #ifndef UPB_TABLE_H_ #define UPB_TABLE_H_ -#include <assert.h> #include <stddef.h> +#include <stdint.h> #include "upb.h" #ifdef __cplusplus extern "C" { #endif -#define UPB_END_OF_CHAIN (uint32_t)-1 - -typedef struct { - bool has_entry:1; - // The rest of the bits are the user's. -} upb_inttable_value; - -typedef struct { - uint32_t key; - uint32_t next; // Internal chaining. -} upb_inttable_header; - -typedef struct { - upb_inttable_header hdr; - upb_inttable_value val; -} upb_inttable_entry; - -// TODO: consider storing the hash in the entry. This would avoid the need to -// rehash on table resizes, but more importantly could possibly improve lookup -// performance by letting us compare hashes before comparing lengths or the -// strings themselves. -typedef struct { - char *key; // We own, nullz. TODO: store explicit len? - uint32_t next; // Internal chaining. -} upb_strtable_header; +typedef union { + uintptr_t num; + char *str; // We own, nullz. +} upb_tabkey; -typedef struct { - upb_strtable_header hdr; - uint32_t val; // Val is at least 32 bits. -} upb_strtable_entry; +typedef struct _upb_tabent { + upb_tabkey key; + upb_value val; + struct _upb_tabent *next; // Internal chaining. +} upb_tabent; typedef struct { - void *entries; // Hash table. - uint32_t count; // Number of entries in the hash part. - uint32_t mask; // Mask to turn hash value -> bucket. - uint16_t entry_size; // Size of each entry. - uint16_t value_size; // Size of each value. - uint8_t size_lg2; // Size of the hash table part is 2^size_lg2 entries. + upb_tabent *entries; // Hash table. + size_t count; // Number of entries in the hash part. + size_t mask; // Mask to turn hash value -> bucket. + uint8_t size_lg2; // Size of the hash table part is 2^size_lg2 entries. } upb_table; typedef struct { @@ -72,149 +53,124 @@ typedef struct { } upb_strtable; typedef struct { - upb_table t; - void *array; // Array part of the table. - uint32_t array_size; // Array part size. - uint32_t array_count; // Array part number of elements. + upb_table t; // For entries that don't fit in the array part. + upb_value *array; // Array part of the table. + size_t array_size; // Array part size. + size_t array_count; // Array part number of elements. } upb_inttable; -// Initialize and free a table, respectively. Specify the initial size -// with 'size' (the size will be increased as necessary). Value size -// specifies how many bytes each value in the table is. -// -// WARNING! The lowest bit of every entry is reserved by the hash table. -// It will always be overwritten when you insert, and must not be modified -// when looked up! -void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t value_size); -void upb_inttable_free(upb_inttable *table); -void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t value_size); -void upb_strtable_free(upb_strtable *table); - -// Number of values in the hash table. -INLINE uint32_t upb_table_count(const upb_table *t) { return t->count; } -INLINE uint32_t upb_inttable_count(const upb_inttable *t) { - return t->array_count + upb_table_count(&t->t); -} -INLINE uint32_t upb_strtable_count(const upb_strtable *t) { - return upb_table_count(&t->t); +INLINE upb_tabkey upb_intkey(uintptr_t key) { upb_tabkey k = {key}; return k; } + +INLINE upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) { + return t->entries + ((uint32_t)key.num & t->mask); } -// Inserts the given key into the hashtable with the given value. The key must -// not already exist in the hash table. The data will be copied from val into -// the hashtable (the amount of data copied comes from value_size when the -// table was constructed). Therefore the data at val may be freed once the -// call returns. For string tables, the table takes ownership of the string. -// -// WARNING: the lowest bit of val is reserved and will be overwritten! -void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val); -// TODO: may want to allow for more complex keys with custom hash/comparison -// functions. -void upb_strtable_insert(upb_strtable *t, const char *key, const void *val); -void upb_inttable_compact(upb_inttable *t); +INLINE bool upb_arrhas(upb_value v) { return v.val.uint64 != (uint64_t)-1; } -INLINE uint32_t _upb_inttable_bucket(const upb_inttable *t, uint32_t k) { - uint32_t bucket = k & t->t.mask; // Identity hash for ints. - assert(bucket != UPB_END_OF_CHAIN); - return bucket; -} +// Initialize and uninitialize a table, respectively. If memory allocation +// failed, false is returned that the table is uninitialized. +bool upb_inttable_init(upb_inttable *table); +bool upb_strtable_init(upb_strtable *table); +void upb_inttable_uninit(upb_inttable *table); +void upb_strtable_uninit(upb_strtable *table); -// Returns true if this key belongs in the array part of the table. -INLINE bool _upb_inttable_isarrkey(const upb_inttable *t, uint32_t k) { - return (k < t->array_size); -} +// Returns the number of values in the table. +size_t upb_inttable_count(const upb_inttable *t); +INLINE size_t upb_strtable_count(const upb_strtable *t) { return t->t.count; } -// Looks up key in this table, returning a pointer to the user's inserted data. -// We have the caller specify the entry_size because fixing this as a literal -// (instead of reading table->entry_size) gives the compiler more ability to -// optimize. +// Inserts the given key into the hashtable with the given value. The key must +// not already exist in the hash table. For string tables, the key must be +// NULL-terminated, and the table will make an internal copy of the key. +// Inttables must not insert a value of UINTPTR_MAX. // -// Note: All returned pointers are invalidated by inserts! -INLINE void *_upb_inttable_fastlookup(const upb_inttable *t, uint32_t key, - size_t entry_size, size_t value_size) { - upb_inttable_value *arrval = - (upb_inttable_value*)UPB_INDEX(t->array, key, value_size); - if (_upb_inttable_isarrkey(t, key)) { - return (arrval->has_entry) ? arrval : NULL; +// If a table resize was required but memory allocation failed, false is +// returned and the table is unchanged. +bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val); +bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val); + +// Looks up key in this table, returning a pointer to the table's internal copy +// of the user's inserted data, or NULL if this key is not in the table. The +// user is free to modify the given upb_value, which will be reflected in any +// future lookups of this key. The returned pointer is invalidated by inserts. +upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key); +upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key); + +// Removes an item from the table. Returns true if the remove was successful, +// and stores the removed item in *val if non-NULL. +bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val); + +// Optimizes the table for the current set of entries, for both memory use and +// lookup time. Client should call this after all entries have been inserted; +// inserting more entries is legal, but will likely require a table resize. +void upb_inttable_compact(upb_inttable *t); + +// A special-case inlinable version of the lookup routine for 32-bit integers. +INLINE upb_value *upb_inttable_lookup32(const upb_inttable *t, uint32_t key) { + if (key < t->array_size) { + upb_value *v = &t->array[key]; + return upb_arrhas(*v) ? v : NULL; } - uint32_t bucket = _upb_inttable_bucket(t, key); - upb_inttable_entry *e = - (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size); - while (1) { - if (e->hdr.key == key) { - return &e->val; - } - if ((bucket = e->hdr.next) == UPB_END_OF_CHAIN) return NULL; - e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size); + for (upb_tabent *e = upb_inthash(&t->t, upb_intkey(key)); true; e = e->next) { + if ((uint32_t)e->key.num == key) return &e->val; + if (e->next == NULL) return NULL; } } -INLINE size_t _upb_inttable_entrysize(size_t value_size) { - return upb_align_up(sizeof(upb_inttable_header) + value_size, 8); -} - -INLINE void *upb_inttable_fastlookup(const upb_inttable *t, uint32_t key, - uint32_t value_size) { - return _upb_inttable_fastlookup( - t, key, _upb_inttable_entrysize(value_size), value_size); -} - -INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) { - return _upb_inttable_fastlookup(t, key, t->t.entry_size, t->t.value_size); -} - -void *upb_strtable_lookupl(const upb_strtable *t, const char *key, size_t len); -void *upb_strtable_lookup(const upb_strtable *t, const char *key); - /* upb_strtable_iter **********************************************************/ // Strtable iteration. Order is undefined. Insertions invalidate iterators. // upb_strtable_iter i; -// for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) { +// upb_strtable_begin(&i, t); +// for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { // const char *key = upb_strtable_iter_key(&i); // const myval *val = upb_strtable_iter_value(&i); // // ... // } typedef struct { const upb_strtable *t; - upb_strtable_entry *e; + upb_tabent *e; } upb_strtable_iter; void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t); void upb_strtable_next(upb_strtable_iter *i); INLINE bool upb_strtable_done(upb_strtable_iter *i) { return i->e == NULL; } INLINE const char *upb_strtable_iter_key(upb_strtable_iter *i) { - return i->e->hdr.key; + return i->e->key.str; } -INLINE const void *upb_strtable_iter_value(upb_strtable_iter *i) { - return &i->e->val; +INLINE upb_value upb_strtable_iter_value(upb_strtable_iter *i) { + return i->e->val; } /* upb_inttable_iter **********************************************************/ // Inttable iteration. Order is undefined. Insertions invalidate iterators. -// for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i); -// i = upb_inttable_next(t, i)) { +// upb_inttable_iter i; +// upb_inttable_begin(&i, t); +// for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { // // ... // } typedef struct { - uint32_t key; - upb_inttable_value *value; + const upb_inttable *t; + union { + upb_tabent *ent; // For hash iteration. + upb_value *val; // For array iteration. + } ptr; + uintptr_t arrkey; bool array_part; } upb_inttable_iter; -upb_inttable_iter upb_inttable_begin(const upb_inttable *t); -upb_inttable_iter upb_inttable_next(const upb_inttable *t, - upb_inttable_iter iter); -INLINE bool upb_inttable_done(upb_inttable_iter iter) { - return iter.value == NULL; +void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t); +void upb_inttable_next(upb_inttable_iter *i); +INLINE bool upb_inttable_done(upb_inttable_iter *i) { + return i->ptr.ent == NULL; } -INLINE uint32_t upb_inttable_iter_key(upb_inttable_iter iter) { - return iter.key; +INLINE uintptr_t upb_inttable_iter_key(upb_inttable_iter *i) { + return i->array_part ? i->arrkey : i->ptr.ent->key.num; } -INLINE void *upb_inttable_iter_value(upb_inttable_iter iter) { - return iter.value; +INLINE upb_value upb_inttable_iter_value(upb_inttable_iter *i) { + return i->array_part ? *i->ptr.val : i->ptr.ent->val; } #ifdef __cplusplus |