From e29bf964d1716398e8354a50f506906a307298e5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 12:15:31 -0700 Subject: Tests for string and fleshed out implementation. --- Makefile | 15 ++++++++----- core/upb_string.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++------- core/upb_string.h | 40 ++++++++++++++++++++++++---------- tests/test_string.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++ tests/test_table.cc | 13 ++++++++++- 5 files changed, 161 insertions(+), 26 deletions(-) create mode 100644 tests/test_string.c diff --git a/Makefile b/Makefile index ca4f940..1f977b4 100644 --- a/Makefile +++ b/Makefile @@ -86,22 +86,25 @@ tests/test.proto.pb: tests/test.proto # TODO: replace with upbc protoc tests/test.proto -otests/test.proto.pb -TESTS=tests/tests \ +TESTS=tests/test_string \ + tests/test_table +tests: $(TESTS) + +OTHER_TESTS=tests/tests \ tests/test_table \ tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2 \ tests/test.proto.pb $(TESTS): core/libupb.a -#VALGRIND=valgrind --leak-check=full --error-exitcode=1 -VALGRIND= +VALGRIND=valgrind --leak-check=full --error-exitcode=1 +#VALGRIND= test: tests @echo Running all tests under valgrind. - $(VALGRIND) ./tests/tests # Needs to be rewritten to separate the benchmark. # valgrind --error-exitcode=1 ./tests/test_table - @for test in tests/t.* ; do \ - if [ -f ./$$test ] ; then \ + @for test in tests/*; do \ + if [ -x ./$$test ] ; then \ echo $(VALGRIND) ./$$test: \\c; \ $(VALGRIND) ./$$test; \ fi \ diff --git a/core/upb_string.c b/core/upb_string.c index 91ab9ae..f9af9e9 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -7,8 +7,11 @@ #include "upb_string.h" #include - -#define UPB_STRING_UNFINALIZED -1 +#ifdef __GLIBC__ +#include +#elif defined(__APPLE__) +#include +#endif static uint32_t upb_round_up_pow2(uint32_t v) { // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 @@ -25,23 +28,67 @@ static uint32_t upb_round_up_pow2(uint32_t v) { upb_string *upb_string_new() { upb_string *str = malloc(sizeof(*str)); str->ptr = NULL; + str->cached_mem = NULL; +#ifndef UPB_HAVE_MSIZE str->size = 0; - str->len = UPB_STRING_UNFINALIZED; +#endif + str->src = NULL; upb_atomic_refcount_init(&str->refcount, 1); return str; } +uint32_t upb_string_size(upb_string *str) { +#ifdef __GLIBC__ + return malloc_usable_size(str->cached_mem); +#elif defined(__APPLE__) + return malloc_size(str->cached_mem); +#else + return str->size; +#endif +} + +static void upb_string_release(upb_string *str) { + if(str->src) { + upb_string_unref(str->src); + str->src = NULL; + } +} + void _upb_string_free(upb_string *str) { - if(str->ptr) free(str->ptr); + if(str->cached_mem) free(str->cached_mem); + upb_string_release(str); free(str); } +upb_string *upb_string_tryrecycle(upb_string *str) { + if(str == NULL || upb_atomic_read(&str->refcount) > 1) { + return upb_string_new(); + } else { + str->ptr = NULL; + upb_string_release(str); + return str; + } +} + char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { - assert(str->len == UPB_STRING_UNFINALIZED); - if (str->size < len) { - str->size = upb_round_up_pow2(len); - str->ptr = realloc(str->ptr, str->size); + assert(str->ptr == NULL); + uint32_t size = upb_string_size(str); + if (size < len) { + size = upb_round_up_pow2(len); + str->cached_mem = realloc(str->cached_mem, size); +#ifndef UPB_HAVE_MSIZE + str->size = size; +#endif } str->len = len; + str->ptr = str->cached_mem; return str->ptr; } + +void upb_string_substr(upb_string *str, upb_string *target_str, + upb_strlen_t start, upb_strlen_t len) { + assert(str->ptr == NULL); + str->src = upb_string_getref(target_str); + str->ptr = upb_string_getrobuf(target_str) + start; + str->len = len; +} diff --git a/core/upb_string.h b/core/upb_string.h index 770dba7..7ec3d48 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -16,8 +16,6 @@ * without having to reallocate the upb_string. * - strings can be substrings of other strings (owning a ref on the source * string). - * - strings can refer to memory that they do not own, in which case we avoid - * copies if possible (the exact strategy for doing this can vary). * - strings are not thread-safe by default, but can be made so by calling a * function. This is not the default because it causes extra CPU overhead. */ @@ -37,16 +35,31 @@ extern "C" { // All members of this struct are private, and may only be read/written through // the associated functions. Also, strings may *only* be allocated on the heap. struct _upb_string { + // The pointer to our currently active data. This may be memory we own + // or a pointer into memory we don't own. char *ptr; + + // If non-NULL, this is a block of memory we own. We keep this cached even + // if "ptr" is currently aliasing memory we don't own. + char *cached_mem; + + // The effective length of the string (the bytes at ptr). int32_t len; +#ifndef UPB_HAVE_MSIZE + // How many bytes are allocated in cached_mem. + // + // Many platforms have a function that can tell you the size of a block + // that was previously malloc'd. In this case we can avoid storing the + // size explicitly. uint32_t size; +#endif + + // The string's refcount. upb_atomic_refcount_t refcount; - union { - // Used if this is a slice of another string. - struct _upb_string *src; - // Used if this string is referencing external unowned memory. - upb_atomic_refcount_t reader_count; - } extra; + + // Used if this is a slice of another string, NULL otherwise. We own a ref + // on src. + struct _upb_string *src; }; // Returns a newly-created, empty, non-finalized string. When the string is no @@ -113,11 +126,14 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); void upb_string_substr(upb_string *str, upb_string *target_str, upb_strlen_t start, upb_strlen_t len); +// Sketch of an API for allowing upb_strings to reference external, unowned +// data. Waiting for a clear use case before actually implementing it. +// // Makes the string "str" a reference to the given string data. The caller // guarantees that the given string data will not change or be deleted until // a matching call to upb_string_detach(). -void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); -void upb_string_detach(upb_string *str); +// void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); +// void upb_string_detach(upb_string *str); // Allows using upb_strings in printf, ie: // upb_strptr str = UPB_STRLIT("Hello, World!\n"); @@ -176,7 +192,9 @@ INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) { } // Like upb_strdup(), but duplicates a C NULL-terminated string. -upb_string *upb_strdupc(const char *src); +INLINE upb_string *upb_strdupc(const char *src) { + return upb_strduplen(src, strlen(src)); +} // Appends 'append' to 's' in-place, resizing s if necessary. void upb_strcat(upb_string *s, upb_string *append); diff --git a/tests/test_string.c b/tests/test_string.c new file mode 100644 index 0000000..4fdab6c --- /dev/null +++ b/tests/test_string.c @@ -0,0 +1,56 @@ + +#undef NDEBUG /* ensure tests always assert. */ +#include "upb_string.h" + +char static_str[] = "Static string."; + +int main() { + upb_string *str = upb_string_new(); + assert(str != NULL); + upb_string_unref(str); + + // Can also create a string by tryrecycle(NULL). + str = upb_string_tryrecycle(NULL); + assert(str != NULL); + + upb_strcpyc(str, static_str); + assert(upb_string_len(str) == (sizeof(static_str) - 1)); + const char *robuf = upb_string_getrobuf(str); + assert(robuf != NULL); + assert(memcmp(robuf, static_str, upb_string_len(str)) == 0); + upb_string_endread(str); + + upb_string *str2 = upb_string_tryrecycle(str); + // No other referents, so should return the same string. + assert(str2 == str); + + // Write a shorter string, the same memory should be reused. + upb_strcpyc(str, "XX"); + const char *robuf2 = upb_string_getrobuf(str); + assert(robuf2 == robuf); + assert(memcmp(robuf2, "XX", 2) == 0); + + // Make string alias part of another string. + str2 = upb_strdupc("WXYZ"); + upb_string_substr(str, str2, 1, 2); + assert(upb_string_len(str) == 2); + assert(upb_string_len(str2) == 4); + // The two string should be aliasing the same data. + const char *robuf3 = upb_string_getrobuf(str); + const char *robuf4 = upb_string_getrobuf(str2); + assert(robuf3 == robuf4 + 1); + // The aliased string should have an extra ref. + assert(upb_atomic_read(&str2->refcount) == 2); + + // Recycling str should eliminate the extra ref. + str = upb_string_tryrecycle(str); + assert(upb_atomic_read(&str2->refcount) == 1); + + // Resetting str should reuse its old data. + upb_strcpyc(str, "XX"); + const char *robuf5 = upb_string_getrobuf(str); + assert(robuf5 == robuf); + + upb_string_unref(str); + upb_string_unref(str2); +} diff --git a/tests/test_table.cc b/tests/test_table.cc index 37e14a8..47d5e57 100644 --- a/tests/test_table.cc +++ b/tests/test_table.cc @@ -12,6 +12,8 @@ #include #include +bool benchmark = false; + using std::string; using std::vector; @@ -116,6 +118,11 @@ void test_inttable(int32_t *keys, size_t num_entries) } } + if(!benchmark) { + upb_inttable_free(&table); + return; + } + /* Test performance. We only test lookups for keys that are known to exist. */ uintptr_t x = 0; const unsigned int iterations = 0xFFFFFF; @@ -219,8 +226,12 @@ int32_t *get_contiguous_keys(int32_t num) return buf; } -int main() +int main(int argc, char *argv[]) { + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--benchmark") == 0) benchmark = true; + } + vector keys; keys.push_back("google.protobuf.FileDescriptorSet"); keys.push_back("google.protobuf.FileDescriptorProto"); -- cgit v1.2.3