summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile15
-rw-r--r--core/upb_string.c63
-rw-r--r--core/upb_string.h40
-rw-r--r--tests/test_string.c56
-rw-r--r--tests/test_table.cc13
5 files changed, 161 insertions, 26 deletions
diff --git a/Makefile b/Makefile
index ca4f940..1f977b4 100644
--- a/Makefile
+++ b/Makefile
@@ -86,22 +86,25 @@ tests/test.proto.pb: tests/test.proto
# TODO: replace with upbc
protoc tests/test.proto -otests/test.proto.pb
-TESTS=tests/tests \
+TESTS=tests/test_string \
+ tests/test_table
+tests: $(TESTS)
+
+OTHER_TESTS=tests/tests \
tests/test_table \
tests/t.test_vs_proto2.googlemessage1 \
tests/t.test_vs_proto2.googlemessage2 \
tests/test.proto.pb
$(TESTS): core/libupb.a
-#VALGRIND=valgrind --leak-check=full --error-exitcode=1
-VALGRIND=
+VALGRIND=valgrind --leak-check=full --error-exitcode=1
+#VALGRIND=
test: tests
@echo Running all tests under valgrind.
- $(VALGRIND) ./tests/tests
# Needs to be rewritten to separate the benchmark.
# valgrind --error-exitcode=1 ./tests/test_table
- @for test in tests/t.* ; do \
- if [ -f ./$$test ] ; then \
+ @for test in tests/*; do \
+ if [ -x ./$$test ] ; then \
echo $(VALGRIND) ./$$test: \\c; \
$(VALGRIND) ./$$test; \
fi \
diff --git a/core/upb_string.c b/core/upb_string.c
index 91ab9ae..f9af9e9 100644
--- a/core/upb_string.c
+++ b/core/upb_string.c
@@ -7,8 +7,11 @@
#include "upb_string.h"
#include <stdlib.h>
-
-#define UPB_STRING_UNFINALIZED -1
+#ifdef __GLIBC__
+#include <malloc.h>
+#elif defined(__APPLE__)
+#include <malloc/malloc.h>
+#endif
static uint32_t upb_round_up_pow2(uint32_t v) {
// http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
@@ -25,23 +28,67 @@ static uint32_t upb_round_up_pow2(uint32_t v) {
upb_string *upb_string_new() {
upb_string *str = malloc(sizeof(*str));
str->ptr = NULL;
+ str->cached_mem = NULL;
+#ifndef UPB_HAVE_MSIZE
str->size = 0;
- str->len = UPB_STRING_UNFINALIZED;
+#endif
+ str->src = NULL;
upb_atomic_refcount_init(&str->refcount, 1);
return str;
}
+uint32_t upb_string_size(upb_string *str) {
+#ifdef __GLIBC__
+ return malloc_usable_size(str->cached_mem);
+#elif defined(__APPLE__)
+ return malloc_size(str->cached_mem);
+#else
+ return str->size;
+#endif
+}
+
+static void upb_string_release(upb_string *str) {
+ if(str->src) {
+ upb_string_unref(str->src);
+ str->src = NULL;
+ }
+}
+
void _upb_string_free(upb_string *str) {
- if(str->ptr) free(str->ptr);
+ if(str->cached_mem) free(str->cached_mem);
+ upb_string_release(str);
free(str);
}
+upb_string *upb_string_tryrecycle(upb_string *str) {
+ if(str == NULL || upb_atomic_read(&str->refcount) > 1) {
+ return upb_string_new();
+ } else {
+ str->ptr = NULL;
+ upb_string_release(str);
+ return str;
+ }
+}
+
char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) {
- assert(str->len == UPB_STRING_UNFINALIZED);
- if (str->size < len) {
- str->size = upb_round_up_pow2(len);
- str->ptr = realloc(str->ptr, str->size);
+ assert(str->ptr == NULL);
+ uint32_t size = upb_string_size(str);
+ if (size < len) {
+ size = upb_round_up_pow2(len);
+ str->cached_mem = realloc(str->cached_mem, size);
+#ifndef UPB_HAVE_MSIZE
+ str->size = size;
+#endif
}
str->len = len;
+ str->ptr = str->cached_mem;
return str->ptr;
}
+
+void upb_string_substr(upb_string *str, upb_string *target_str,
+ upb_strlen_t start, upb_strlen_t len) {
+ assert(str->ptr == NULL);
+ str->src = upb_string_getref(target_str);
+ str->ptr = upb_string_getrobuf(target_str) + start;
+ str->len = len;
+}
diff --git a/core/upb_string.h b/core/upb_string.h
index 770dba7..7ec3d48 100644
--- a/core/upb_string.h
+++ b/core/upb_string.h
@@ -16,8 +16,6 @@
* without having to reallocate the upb_string.
* - strings can be substrings of other strings (owning a ref on the source
* string).
- * - strings can refer to memory that they do not own, in which case we avoid
- * copies if possible (the exact strategy for doing this can vary).
* - strings are not thread-safe by default, but can be made so by calling a
* function. This is not the default because it causes extra CPU overhead.
*/
@@ -37,16 +35,31 @@ extern "C" {
// All members of this struct are private, and may only be read/written through
// the associated functions. Also, strings may *only* be allocated on the heap.
struct _upb_string {
+ // The pointer to our currently active data. This may be memory we own
+ // or a pointer into memory we don't own.
char *ptr;
+
+ // If non-NULL, this is a block of memory we own. We keep this cached even
+ // if "ptr" is currently aliasing memory we don't own.
+ char *cached_mem;
+
+ // The effective length of the string (the bytes at ptr).
int32_t len;
+#ifndef UPB_HAVE_MSIZE
+ // How many bytes are allocated in cached_mem.
+ //
+ // Many platforms have a function that can tell you the size of a block
+ // that was previously malloc'd. In this case we can avoid storing the
+ // size explicitly.
uint32_t size;
+#endif
+
+ // The string's refcount.
upb_atomic_refcount_t refcount;
- union {
- // Used if this is a slice of another string.
- struct _upb_string *src;
- // Used if this string is referencing external unowned memory.
- upb_atomic_refcount_t reader_count;
- } extra;
+
+ // Used if this is a slice of another string, NULL otherwise. We own a ref
+ // on src.
+ struct _upb_string *src;
};
// Returns a newly-created, empty, non-finalized string. When the string is no
@@ -113,11 +126,14 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len);
void upb_string_substr(upb_string *str, upb_string *target_str,
upb_strlen_t start, upb_strlen_t len);
+// Sketch of an API for allowing upb_strings to reference external, unowned
+// data. Waiting for a clear use case before actually implementing it.
+//
// Makes the string "str" a reference to the given string data. The caller
// guarantees that the given string data will not change or be deleted until
// a matching call to upb_string_detach().
-void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len);
-void upb_string_detach(upb_string *str);
+// void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len);
+// void upb_string_detach(upb_string *str);
// Allows using upb_strings in printf, ie:
// upb_strptr str = UPB_STRLIT("Hello, World!\n");
@@ -176,7 +192,9 @@ INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) {
}
// Like upb_strdup(), but duplicates a C NULL-terminated string.
-upb_string *upb_strdupc(const char *src);
+INLINE upb_string *upb_strdupc(const char *src) {
+ return upb_strduplen(src, strlen(src));
+}
// Appends 'append' to 's' in-place, resizing s if necessary.
void upb_strcat(upb_string *s, upb_string *append);
diff --git a/tests/test_string.c b/tests/test_string.c
new file mode 100644
index 0000000..4fdab6c
--- /dev/null
+++ b/tests/test_string.c
@@ -0,0 +1,56 @@
+
+#undef NDEBUG /* ensure tests always assert. */
+#include "upb_string.h"
+
+char static_str[] = "Static string.";
+
+int main() {
+ upb_string *str = upb_string_new();
+ assert(str != NULL);
+ upb_string_unref(str);
+
+ // Can also create a string by tryrecycle(NULL).
+ str = upb_string_tryrecycle(NULL);
+ assert(str != NULL);
+
+ upb_strcpyc(str, static_str);
+ assert(upb_string_len(str) == (sizeof(static_str) - 1));
+ const char *robuf = upb_string_getrobuf(str);
+ assert(robuf != NULL);
+ assert(memcmp(robuf, static_str, upb_string_len(str)) == 0);
+ upb_string_endread(str);
+
+ upb_string *str2 = upb_string_tryrecycle(str);
+ // No other referents, so should return the same string.
+ assert(str2 == str);
+
+ // Write a shorter string, the same memory should be reused.
+ upb_strcpyc(str, "XX");
+ const char *robuf2 = upb_string_getrobuf(str);
+ assert(robuf2 == robuf);
+ assert(memcmp(robuf2, "XX", 2) == 0);
+
+ // Make string alias part of another string.
+ str2 = upb_strdupc("WXYZ");
+ upb_string_substr(str, str2, 1, 2);
+ assert(upb_string_len(str) == 2);
+ assert(upb_string_len(str2) == 4);
+ // The two string should be aliasing the same data.
+ const char *robuf3 = upb_string_getrobuf(str);
+ const char *robuf4 = upb_string_getrobuf(str2);
+ assert(robuf3 == robuf4 + 1);
+ // The aliased string should have an extra ref.
+ assert(upb_atomic_read(&str2->refcount) == 2);
+
+ // Recycling str should eliminate the extra ref.
+ str = upb_string_tryrecycle(str);
+ assert(upb_atomic_read(&str2->refcount) == 1);
+
+ // Resetting str should reuse its old data.
+ upb_strcpyc(str, "XX");
+ const char *robuf5 = upb_string_getrobuf(str);
+ assert(robuf5 == robuf);
+
+ upb_string_unref(str);
+ upb_string_unref(str2);
+}
diff --git a/tests/test_table.cc b/tests/test_table.cc
index 37e14a8..47d5e57 100644
--- a/tests/test_table.cc
+++ b/tests/test_table.cc
@@ -12,6 +12,8 @@
#include <sys/resource.h>
#include <iostream>
+bool benchmark = false;
+
using std::string;
using std::vector;
@@ -116,6 +118,11 @@ void test_inttable(int32_t *keys, size_t num_entries)
}
}
+ if(!benchmark) {
+ upb_inttable_free(&table);
+ return;
+ }
+
/* Test performance. We only test lookups for keys that are known to exist. */
uintptr_t x = 0;
const unsigned int iterations = 0xFFFFFF;
@@ -219,8 +226,12 @@ int32_t *get_contiguous_keys(int32_t num)
return buf;
}
-int main()
+int main(int argc, char *argv[])
{
+ for (int i = 1; i < argc; i++) {
+ if (strcmp(argv[i], "--benchmark") == 0) benchmark = true;
+ }
+
vector<string> keys;
keys.push_back("google.protobuf.FileDescriptorSet");
keys.push_back("google.protobuf.FileDescriptorProto");
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback