From fd1cc566253819100a416b5be37547c75fecf314 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 16 Dec 2014 15:23:29 -0800 Subject: Modified strtable to support length-delimited string keys. Allows for arbitrary binary data, e.g., to support strings from other languages as key values. --- upb/bindings/lua/table.c | 2 +- upb/table.c | 64 +++++++++++++++++++++++++++--------------------- upb/table.int.h | 38 +++++++++++++++++++++++++--- 3 files changed, 71 insertions(+), 33 deletions(-) diff --git a/upb/bindings/lua/table.c b/upb/bindings/lua/table.c index febadf0..e15382b 100644 --- a/upb/bindings/lua/table.c +++ b/upb/bindings/lua/table.c @@ -68,7 +68,7 @@ static void lupbtable_pushent(lua_State *L, const upb_tabent *e, if (inttab) { lua_pushnumber(L, e->key.num); } else { - lua_pushstring(L, e->key.str); + lua_pushlstring(L, e->key.s.str, e->key.s.length); } lua_setfield(L, -2, "key"); lupbtable_pushval(L, e->val, ctype); diff --git a/upb/table.c b/upb/table.c index 63bb068..aa99fe2 100644 --- a/upb/table.c +++ b/upb/table.c @@ -36,31 +36,28 @@ int log2ceil(uint64_t v) { } char *upb_strdup(const char *s) { - size_t n = strlen(s) + 1; + return upb_strdup2(s, strlen(s)); +} + +char *upb_strdup2(const char *s, size_t len) { + // Always null-terminate, even if binary data; but don't rely on the input to + // have a null-terminating byte since it may be a raw binary buffer. + size_t n = len + 1; char *p = malloc(n); - if (p) memcpy(p, s, n); + if (p) memcpy(p, s, len); + p[len] = 0; return p; } // A type to represent the lookup key of either a strtable or an inttable. -// This is like upb_tabkey, but can carry a size also to allow lookups of -// non-NULL-terminated strings (we don't store string lengths in the table). typedef struct { upb_tabkey key; - uint32_t len; // For string keys only. } lookupkey_t; -static lookupkey_t strkey(const char *str) { - lookupkey_t k; - k.key.str = (char*)str; - k.len = strlen(str); - return k; -} - static lookupkey_t strkey2(const char *str, size_t len) { lookupkey_t k; - k.key.str = (char*)str; - k.len = len; + k.key.s.str = (char*)str; + k.key.s.length = len; return k; } @@ -242,11 +239,12 @@ static size_t begin(const upb_table *t) { // A simple "subclass" of upb_table that only adds a hash function for strings. static uint32_t strhash(upb_tabkey key) { - return MurmurHash2(key.str, strlen(key.str), 0); + return MurmurHash2(key.s.str, key.s.length, 0); } static bool streql(upb_tabkey k1, lookupkey_t k2) { - return strncmp(k1.str, k2.key.str, k2.len) == 0 && k1.str[k2.len] == '\0'; + return k1.s.length == k2.key.s.length && + memcmp(k1.s.str, k2.key.s.str, k1.s.length) == 0; } bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) { @@ -255,7 +253,7 @@ bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) { void upb_strtable_uninit(upb_strtable *t) { for (size_t i = 0; i < upb_table_size(&t->t); i++) - free((void*)t->t.entries[i].key.str); + free((void*)t->t.entries[i].key.s.str); uninit(&t->t); } @@ -266,26 +264,30 @@ bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) { upb_strtable_iter i; upb_strtable_begin(&i, t); for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) { - upb_strtable_insert( - &new_table, upb_strtable_iter_key(&i), upb_strtable_iter_value(&i)); + upb_strtable_insert2( + &new_table, + upb_strtable_iter_key(&i), + upb_strtable_iter_keylength(&i), + upb_strtable_iter_value(&i)); } upb_strtable_uninit(t); *t = new_table; return true; } -bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) { +bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len, + upb_value v) { if (isfull(&t->t)) { // Need to resize. New table of double the size, add old elements to it. if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) { return false; } } - if ((k = upb_strdup(k)) == NULL) return false; + if ((k = upb_strdup2(k, len)) == NULL) return false; - lookupkey_t key = strkey(k); - uint32_t hash = MurmurHash2(key.key.str, key.len, 0); - insert(&t->t, strkey(k), v, hash, &strhash, &streql); + lookupkey_t key = strkey2(k, len); + uint32_t hash = MurmurHash2(key.key.s.str, key.key.s.length, 0); + insert(&t->t, key, v, hash, &strhash, &streql); return true; } @@ -295,11 +297,12 @@ bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len, return lookup(&t->t, strkey2(key, len), v, hash, &streql); } -bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val) { +bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len, + upb_value *val) { uint32_t hash = MurmurHash2(key, strlen(key), 0); upb_tabkey tabkey; - if (rm(&t->t, strkey(key), val, &tabkey, hash, &streql)) { - free((void*)tabkey.str); + if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) { + free((void*)tabkey.s.str); return true; } else { return false; @@ -328,7 +331,12 @@ bool upb_strtable_done(const upb_strtable_iter *i) { const char *upb_strtable_iter_key(upb_strtable_iter *i) { assert(!upb_strtable_done(i)); - return str_tabent(i)->key.str; + return str_tabent(i)->key.s.str; +} + +size_t upb_strtable_iter_keylength(upb_strtable_iter *i) { + assert(!upb_strtable_done(i)); + return str_tabent(i)->key.s.length; } upb_value upb_strtable_iter_value(const upb_strtable_iter *i) { diff --git a/upb/table.int.h b/upb/table.int.h index 049ad3a..e851e90 100644 --- a/upb/table.int.h +++ b/upb/table.int.h @@ -97,6 +97,9 @@ typedef struct { // Like strdup(), which isn't always available since it's not ANSI C. char *upb_strdup(const char *s); +// Variant that works with a length-delimited rather than NULL-delimited string, +// as supported by strtable. +char *upb_strdup2(const char *s, size_t len); UPB_INLINE void _upb_value_setval(upb_value *v, _upb_value val, upb_ctype_t ctype) { @@ -151,12 +154,24 @@ FUNCS(fptr, fptr, upb_func*, UPB_CTYPE_FPTR); typedef union { uintptr_t num; - const char *str; // We own, nullz. + struct { + // We own this. NULL-terminated but may also contain binary data; see + // explicit length below. + // TODO: move the length to the start of the string in order to reduce + // tabkey's size (to one machine word) in a way that supports static + // initialization. + const char *str; + size_t length; + } s; } upb_tabkey; #define UPB_TABKEY_NUM(n) {n} #ifdef UPB_C99 -#define UPB_TABKEY_STR(s) {.str = s} +// Given that |s| is a string literal, sizeof(s) gives us a +// compile-time-constant strlen(). We must ensure that this works for static +// data initializers. +#define UPB_TABKEY_STR(strval) { .s = { .str = strval, \ + .length = sizeof(strval) - 1 } } #endif // TODO(haberman): C++ #define UPB_TABKEY_NONE {0} @@ -262,7 +277,14 @@ UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) { // If a table resize was required but memory allocation failed, false is // returned and the table is unchanged. bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val); -bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val); +bool upb_strtable_insert2(upb_strtable *t, const char *key, size_t len, + upb_value val); + +// For NULL-terminated strings. +UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key, + upb_value val) { + return upb_strtable_insert2(t, key, strlen(key), val); +} // Looks up key in this table, returning "true" if the key was found. // If v is non-NULL, copies the value for this key into *v. @@ -279,7 +301,14 @@ UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key, // Removes an item from the table. Returns true if the remove was successful, // and stores the removed item in *val if non-NULL. bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val); -bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val); +bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len, + upb_value *val); + +// For NULL-terminated strings. +UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key, + upb_value *v) { + return upb_strtable_remove2(t, key, strlen(key), v); +} // Updates an existing entry in an inttable. If the entry does not exist, // returns false and does nothing. Unlike insert/remove, this does not @@ -373,6 +402,7 @@ void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t); void upb_strtable_next(upb_strtable_iter *i); bool upb_strtable_done(const upb_strtable_iter *i); const char *upb_strtable_iter_key(upb_strtable_iter *i); +size_t upb_strtable_iter_keylength(upb_strtable_iter *i); upb_value upb_strtable_iter_value(const upb_strtable_iter *i); void upb_strtable_iter_setdone(upb_strtable_iter *i); bool upb_strtable_iter_isequal(const upb_strtable_iter *i1, -- cgit v1.2.3