From e29bf964d1716398e8354a50f506906a307298e5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 12:15:31 -0700 Subject: Tests for string and fleshed out implementation. --- core/upb_string.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++------- core/upb_string.h | 40 +++++++++++++++++++++++++---------- 2 files changed, 84 insertions(+), 19 deletions(-) (limited to 'core') diff --git a/core/upb_string.c b/core/upb_string.c index 91ab9ae..f9af9e9 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -7,8 +7,11 @@ #include "upb_string.h" #include - -#define UPB_STRING_UNFINALIZED -1 +#ifdef __GLIBC__ +#include +#elif defined(__APPLE__) +#include +#endif static uint32_t upb_round_up_pow2(uint32_t v) { // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 @@ -25,23 +28,67 @@ static uint32_t upb_round_up_pow2(uint32_t v) { upb_string *upb_string_new() { upb_string *str = malloc(sizeof(*str)); str->ptr = NULL; + str->cached_mem = NULL; +#ifndef UPB_HAVE_MSIZE str->size = 0; - str->len = UPB_STRING_UNFINALIZED; +#endif + str->src = NULL; upb_atomic_refcount_init(&str->refcount, 1); return str; } +uint32_t upb_string_size(upb_string *str) { +#ifdef __GLIBC__ + return malloc_usable_size(str->cached_mem); +#elif defined(__APPLE__) + return malloc_size(str->cached_mem); +#else + return str->size; +#endif +} + +static void upb_string_release(upb_string *str) { + if(str->src) { + upb_string_unref(str->src); + str->src = NULL; + } +} + void _upb_string_free(upb_string *str) { - if(str->ptr) free(str->ptr); + if(str->cached_mem) free(str->cached_mem); + upb_string_release(str); free(str); } +upb_string *upb_string_tryrecycle(upb_string *str) { + if(str == NULL || upb_atomic_read(&str->refcount) > 1) { + return upb_string_new(); + } else { + str->ptr = NULL; + upb_string_release(str); + return str; + } +} + char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { - assert(str->len == UPB_STRING_UNFINALIZED); - if (str->size < len) { - str->size = upb_round_up_pow2(len); - str->ptr = realloc(str->ptr, str->size); + assert(str->ptr == NULL); + uint32_t size = upb_string_size(str); + if (size < len) { + size = upb_round_up_pow2(len); + str->cached_mem = realloc(str->cached_mem, size); +#ifndef UPB_HAVE_MSIZE + str->size = size; +#endif } str->len = len; + str->ptr = str->cached_mem; return str->ptr; } + +void upb_string_substr(upb_string *str, upb_string *target_str, + upb_strlen_t start, upb_strlen_t len) { + assert(str->ptr == NULL); + str->src = upb_string_getref(target_str); + str->ptr = upb_string_getrobuf(target_str) + start; + str->len = len; +} diff --git a/core/upb_string.h b/core/upb_string.h index 770dba7..7ec3d48 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -16,8 +16,6 @@ * without having to reallocate the upb_string. * - strings can be substrings of other strings (owning a ref on the source * string). - * - strings can refer to memory that they do not own, in which case we avoid - * copies if possible (the exact strategy for doing this can vary). * - strings are not thread-safe by default, but can be made so by calling a * function. This is not the default because it causes extra CPU overhead. */ @@ -37,16 +35,31 @@ extern "C" { // All members of this struct are private, and may only be read/written through // the associated functions. Also, strings may *only* be allocated on the heap. struct _upb_string { + // The pointer to our currently active data. This may be memory we own + // or a pointer into memory we don't own. char *ptr; + + // If non-NULL, this is a block of memory we own. We keep this cached even + // if "ptr" is currently aliasing memory we don't own. + char *cached_mem; + + // The effective length of the string (the bytes at ptr). int32_t len; +#ifndef UPB_HAVE_MSIZE + // How many bytes are allocated in cached_mem. + // + // Many platforms have a function that can tell you the size of a block + // that was previously malloc'd. In this case we can avoid storing the + // size explicitly. uint32_t size; +#endif + + // The string's refcount. upb_atomic_refcount_t refcount; - union { - // Used if this is a slice of another string. - struct _upb_string *src; - // Used if this string is referencing external unowned memory. - upb_atomic_refcount_t reader_count; - } extra; + + // Used if this is a slice of another string, NULL otherwise. We own a ref + // on src. + struct _upb_string *src; }; // Returns a newly-created, empty, non-finalized string. When the string is no @@ -113,11 +126,14 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); void upb_string_substr(upb_string *str, upb_string *target_str, upb_strlen_t start, upb_strlen_t len); +// Sketch of an API for allowing upb_strings to reference external, unowned +// data. Waiting for a clear use case before actually implementing it. +// // Makes the string "str" a reference to the given string data. The caller // guarantees that the given string data will not change or be deleted until // a matching call to upb_string_detach(). -void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); -void upb_string_detach(upb_string *str); +// void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); +// void upb_string_detach(upb_string *str); // Allows using upb_strings in printf, ie: // upb_strptr str = UPB_STRLIT("Hello, World!\n"); @@ -176,7 +192,9 @@ INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) { } // Like upb_strdup(), but duplicates a C NULL-terminated string. -upb_string *upb_strdupc(const char *src); +INLINE upb_string *upb_strdupc(const char *src) { + return upb_strduplen(src, strlen(src)); +} // Appends 'append' to 's' in-place, resizing s if necessary. void upb_strcat(upb_string *s, upb_string *append); -- cgit v1.2.3