From 28ec9a1fa0f9b1d741920dfa8afc91fa2532c43d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Jul 2010 20:20:33 -0700 Subject: Split src/ into core/ and stream/. --- core/upb_string.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 core/upb_string.c (limited to 'core/upb_string.c') diff --git a/core/upb_string.c b/core/upb_string.c new file mode 100644 index 0000000..91ab9ae --- /dev/null +++ b/core/upb_string.c @@ -0,0 +1,47 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_string.h" + +#include + +#define UPB_STRING_UNFINALIZED -1 + +static uint32_t upb_round_up_pow2(uint32_t v) { + // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; +} + +upb_string *upb_string_new() { + upb_string *str = malloc(sizeof(*str)); + str->ptr = NULL; + str->size = 0; + str->len = UPB_STRING_UNFINALIZED; + upb_atomic_refcount_init(&str->refcount, 1); + return str; +} + +void _upb_string_free(upb_string *str) { + if(str->ptr) free(str->ptr); + free(str); +} + +char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { + assert(str->len == UPB_STRING_UNFINALIZED); + if (str->size < len) { + str->size = upb_round_up_pow2(len); + str->ptr = realloc(str->ptr, str->size); + } + str->len = len; + return str->ptr; +} -- cgit v1.2.3 From e29bf964d1716398e8354a50f506906a307298e5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 12:15:31 -0700 Subject: Tests for string and fleshed out implementation. --- Makefile | 15 ++++++++----- core/upb_string.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++------- core/upb_string.h | 40 ++++++++++++++++++++++++---------- tests/test_string.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++ tests/test_table.cc | 13 ++++++++++- 5 files changed, 161 insertions(+), 26 deletions(-) create mode 100644 tests/test_string.c (limited to 'core/upb_string.c') diff --git a/Makefile b/Makefile index ca4f940..1f977b4 100644 --- a/Makefile +++ b/Makefile @@ -86,22 +86,25 @@ tests/test.proto.pb: tests/test.proto # TODO: replace with upbc protoc tests/test.proto -otests/test.proto.pb -TESTS=tests/tests \ +TESTS=tests/test_string \ + tests/test_table +tests: $(TESTS) + +OTHER_TESTS=tests/tests \ tests/test_table \ tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2 \ tests/test.proto.pb $(TESTS): core/libupb.a -#VALGRIND=valgrind --leak-check=full --error-exitcode=1 -VALGRIND= +VALGRIND=valgrind --leak-check=full --error-exitcode=1 +#VALGRIND= test: tests @echo Running all tests under valgrind. - $(VALGRIND) ./tests/tests # Needs to be rewritten to separate the benchmark. # valgrind --error-exitcode=1 ./tests/test_table - @for test in tests/t.* ; do \ - if [ -f ./$$test ] ; then \ + @for test in tests/*; do \ + if [ -x ./$$test ] ; then \ echo $(VALGRIND) ./$$test: \\c; \ $(VALGRIND) ./$$test; \ fi \ diff --git a/core/upb_string.c b/core/upb_string.c index 91ab9ae..f9af9e9 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -7,8 +7,11 @@ #include "upb_string.h" #include - -#define UPB_STRING_UNFINALIZED -1 +#ifdef __GLIBC__ +#include +#elif defined(__APPLE__) +#include +#endif static uint32_t upb_round_up_pow2(uint32_t v) { // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 @@ -25,23 +28,67 @@ static uint32_t upb_round_up_pow2(uint32_t v) { upb_string *upb_string_new() { upb_string *str = malloc(sizeof(*str)); str->ptr = NULL; + str->cached_mem = NULL; +#ifndef UPB_HAVE_MSIZE str->size = 0; - str->len = UPB_STRING_UNFINALIZED; +#endif + str->src = NULL; upb_atomic_refcount_init(&str->refcount, 1); return str; } +uint32_t upb_string_size(upb_string *str) { +#ifdef __GLIBC__ + return malloc_usable_size(str->cached_mem); +#elif defined(__APPLE__) + return malloc_size(str->cached_mem); +#else + return str->size; +#endif +} + +static void upb_string_release(upb_string *str) { + if(str->src) { + upb_string_unref(str->src); + str->src = NULL; + } +} + void _upb_string_free(upb_string *str) { - if(str->ptr) free(str->ptr); + if(str->cached_mem) free(str->cached_mem); + upb_string_release(str); free(str); } +upb_string *upb_string_tryrecycle(upb_string *str) { + if(str == NULL || upb_atomic_read(&str->refcount) > 1) { + return upb_string_new(); + } else { + str->ptr = NULL; + upb_string_release(str); + return str; + } +} + char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { - assert(str->len == UPB_STRING_UNFINALIZED); - if (str->size < len) { - str->size = upb_round_up_pow2(len); - str->ptr = realloc(str->ptr, str->size); + assert(str->ptr == NULL); + uint32_t size = upb_string_size(str); + if (size < len) { + size = upb_round_up_pow2(len); + str->cached_mem = realloc(str->cached_mem, size); +#ifndef UPB_HAVE_MSIZE + str->size = size; +#endif } str->len = len; + str->ptr = str->cached_mem; return str->ptr; } + +void upb_string_substr(upb_string *str, upb_string *target_str, + upb_strlen_t start, upb_strlen_t len) { + assert(str->ptr == NULL); + str->src = upb_string_getref(target_str); + str->ptr = upb_string_getrobuf(target_str) + start; + str->len = len; +} diff --git a/core/upb_string.h b/core/upb_string.h index 770dba7..7ec3d48 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -16,8 +16,6 @@ * without having to reallocate the upb_string. * - strings can be substrings of other strings (owning a ref on the source * string). - * - strings can refer to memory that they do not own, in which case we avoid - * copies if possible (the exact strategy for doing this can vary). * - strings are not thread-safe by default, but can be made so by calling a * function. This is not the default because it causes extra CPU overhead. */ @@ -37,16 +35,31 @@ extern "C" { // All members of this struct are private, and may only be read/written through // the associated functions. Also, strings may *only* be allocated on the heap. struct _upb_string { + // The pointer to our currently active data. This may be memory we own + // or a pointer into memory we don't own. char *ptr; + + // If non-NULL, this is a block of memory we own. We keep this cached even + // if "ptr" is currently aliasing memory we don't own. + char *cached_mem; + + // The effective length of the string (the bytes at ptr). int32_t len; +#ifndef UPB_HAVE_MSIZE + // How many bytes are allocated in cached_mem. + // + // Many platforms have a function that can tell you the size of a block + // that was previously malloc'd. In this case we can avoid storing the + // size explicitly. uint32_t size; +#endif + + // The string's refcount. upb_atomic_refcount_t refcount; - union { - // Used if this is a slice of another string. - struct _upb_string *src; - // Used if this string is referencing external unowned memory. - upb_atomic_refcount_t reader_count; - } extra; + + // Used if this is a slice of another string, NULL otherwise. We own a ref + // on src. + struct _upb_string *src; }; // Returns a newly-created, empty, non-finalized string. When the string is no @@ -113,11 +126,14 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); void upb_string_substr(upb_string *str, upb_string *target_str, upb_strlen_t start, upb_strlen_t len); +// Sketch of an API for allowing upb_strings to reference external, unowned +// data. Waiting for a clear use case before actually implementing it. +// // Makes the string "str" a reference to the given string data. The caller // guarantees that the given string data will not change or be deleted until // a matching call to upb_string_detach(). -void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); -void upb_string_detach(upb_string *str); +// void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); +// void upb_string_detach(upb_string *str); // Allows using upb_strings in printf, ie: // upb_strptr str = UPB_STRLIT("Hello, World!\n"); @@ -176,7 +192,9 @@ INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) { } // Like upb_strdup(), but duplicates a C NULL-terminated string. -upb_string *upb_strdupc(const char *src); +INLINE upb_string *upb_strdupc(const char *src) { + return upb_strduplen(src, strlen(src)); +} // Appends 'append' to 's' in-place, resizing s if necessary. void upb_strcat(upb_string *s, upb_string *append); diff --git a/tests/test_string.c b/tests/test_string.c new file mode 100644 index 0000000..4fdab6c --- /dev/null +++ b/tests/test_string.c @@ -0,0 +1,56 @@ + +#undef NDEBUG /* ensure tests always assert. */ +#include "upb_string.h" + +char static_str[] = "Static string."; + +int main() { + upb_string *str = upb_string_new(); + assert(str != NULL); + upb_string_unref(str); + + // Can also create a string by tryrecycle(NULL). + str = upb_string_tryrecycle(NULL); + assert(str != NULL); + + upb_strcpyc(str, static_str); + assert(upb_string_len(str) == (sizeof(static_str) - 1)); + const char *robuf = upb_string_getrobuf(str); + assert(robuf != NULL); + assert(memcmp(robuf, static_str, upb_string_len(str)) == 0); + upb_string_endread(str); + + upb_string *str2 = upb_string_tryrecycle(str); + // No other referents, so should return the same string. + assert(str2 == str); + + // Write a shorter string, the same memory should be reused. + upb_strcpyc(str, "XX"); + const char *robuf2 = upb_string_getrobuf(str); + assert(robuf2 == robuf); + assert(memcmp(robuf2, "XX", 2) == 0); + + // Make string alias part of another string. + str2 = upb_strdupc("WXYZ"); + upb_string_substr(str, str2, 1, 2); + assert(upb_string_len(str) == 2); + assert(upb_string_len(str2) == 4); + // The two string should be aliasing the same data. + const char *robuf3 = upb_string_getrobuf(str); + const char *robuf4 = upb_string_getrobuf(str2); + assert(robuf3 == robuf4 + 1); + // The aliased string should have an extra ref. + assert(upb_atomic_read(&str2->refcount) == 2); + + // Recycling str should eliminate the extra ref. + str = upb_string_tryrecycle(str); + assert(upb_atomic_read(&str2->refcount) == 1); + + // Resetting str should reuse its old data. + upb_strcpyc(str, "XX"); + const char *robuf5 = upb_string_getrobuf(str); + assert(robuf5 == robuf); + + upb_string_unref(str); + upb_string_unref(str2); +} diff --git a/tests/test_table.cc b/tests/test_table.cc index 37e14a8..47d5e57 100644 --- a/tests/test_table.cc +++ b/tests/test_table.cc @@ -12,6 +12,8 @@ #include #include +bool benchmark = false; + using std::string; using std::vector; @@ -116,6 +118,11 @@ void test_inttable(int32_t *keys, size_t num_entries) } } + if(!benchmark) { + upb_inttable_free(&table); + return; + } + /* Test performance. We only test lookups for keys that are known to exist. */ uintptr_t x = 0; const unsigned int iterations = 0xFFFFFF; @@ -219,8 +226,12 @@ int32_t *get_contiguous_keys(int32_t num) return buf; } -int main() +int main(int argc, char *argv[]) { + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--benchmark") == 0) benchmark = true; + } + vector keys; keys.push_back("google.protobuf.FileDescriptorSet"); keys.push_back("google.protobuf.FileDescriptorProto"); -- cgit v1.2.3 From 2ef013126c682a44d15554ea7a04144fc9a10fed Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 13:28:47 -0700 Subject: Fleshed out upb_string further. Now upb_def's only unresolved references are upb_src. --- Makefile | 3 ++- core/upb_def.c | 11 +++++------ core/upb_string.c | 30 +++++++++++++++++++++++++++++- core/upb_string.h | 46 +++++++++++++++++++++++++++++++++++++--------- tests/test_string.c | 17 +++++++++++++++-- 5 files changed, 88 insertions(+), 19 deletions(-) (limited to 'core/upb_string.c') diff --git a/Makefile b/Makefile index 1f977b4..2abe0c7 100644 --- a/Makefile +++ b/Makefile @@ -87,7 +87,8 @@ tests/test.proto.pb: tests/test.proto protoc tests/test.proto -otests/test.proto.pb TESTS=tests/test_string \ - tests/test_table + tests/test_table \ + tests/test_def tests: $(TESTS) OTHER_TESTS=tests/tests \ diff --git a/core/upb_def.c b/core/upb_def.c index bfab738..1f57c70 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -44,13 +44,12 @@ static void upb_deflist_push(upb_deflist *l, upb_def *d) { * join("", "Baz") -> "Baz" * Caller owns a ref on the returned string. */ static upb_string *upb_join(upb_string *base, upb_string *name) { - upb_string *joined = upb_strdup(base); - upb_strlen_t len = upb_string_len(joined); - if(len > 0) { - upb_string_getrwbuf(joined, len + 1)[len] = UPB_SYMBOL_SEPARATOR; + if (upb_string_len(base) == 0) { + return upb_string_getref(name); + } else { + return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT, + UPB_STRARG(base), UPB_STRARG(name)); } - upb_strcat(joined, name); - return joined; } // Qualify the defname for all defs starting with offset "start" with "str". diff --git a/core/upb_string.c b/core/upb_string.c index f9af9e9..2f487aa 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -82,7 +82,7 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { } str->len = len; str->ptr = str->cached_mem; - return str->ptr; + return str->cached_mem; } void upb_string_substr(upb_string *str, upb_string *target_str, @@ -92,3 +92,31 @@ void upb_string_substr(upb_string *str, upb_string *target_str, str->ptr = upb_string_getrobuf(target_str) + start; str->len = len; } + +void upb_string_vprintf(upb_string *str, const char *format, va_list args) { + // Try once without reallocating. We have to va_copy because we might have + // to call vsnprintf again. + uint32_t size = UPB_MAX(upb_string_size(str), 16); + char *buf = upb_string_getrwbuf(str, size); + va_list args_copy; + va_copy(args_copy, args); + uint32_t true_size = vsnprintf(buf, size, format, args_copy); + va_end(args_copy); + + if (true_size > size) { + // Need to reallocate. + str = upb_string_tryrecycle(str); + buf = upb_string_getrwbuf(str, true_size); + vsnprintf(buf, true_size, format, args); + } + str->len = true_size; +} + +upb_string *upb_string_asprintf(const char *format, ...) { + upb_string *str = upb_string_new(); + va_list args; + va_start(args, format); + upb_string_vprintf(str, format, args); + va_end(args); + return str; +} diff --git a/core/upb_string.h b/core/upb_string.h index 7ec3d48..5cc0eaf 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -25,6 +25,7 @@ #include #include +#include #include "upb_atomic.h" #include "upb.h" @@ -37,7 +38,7 @@ extern "C" { struct _upb_string { // The pointer to our currently active data. This may be memory we own // or a pointer into memory we don't own. - char *ptr; + const char *ptr; // If non-NULL, this is a block of memory we own. We keep this cached even // if "ptr" is currently aliasing memory we don't own. @@ -111,16 +112,25 @@ INLINE void upb_string_endread(upb_string *str) { (void)str; } // } upb_string *upb_string_tryrecycle(upb_string *str); -// The three options for setting the contents of a string. These may only be -// called when a string is first created or recycled; once other functions have -// been called on the string, these functions are not allowed until the string -// is recycled. +// The options for setting the contents of a string. These may only be called +// when a string is first created or recycled; once other functions have been +// called on the string, these functions are not allowed until the string is +// recycled. // Gets a pointer suitable for writing to the string, which is guaranteed to // have at least "len" bytes of data available. The size of the string will // become "len". char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); +// Replaces the contents of str with the contents of the given printf. +void upb_string_vprintf(upb_string *str, const char *format, va_list args); +INLINE void upb_string_printf(upb_string *str, const char *format, ...) { + va_list args; + va_start(args, format); + upb_string_vprintf(str, format, args); + va_end(args); +} + // Sets the contents of "str" to be the given substring of "target_str", to // which the caller must own a ref. void upb_string_substr(upb_string *str, upb_string *target_str, @@ -144,7 +154,7 @@ void upb_string_substr(upb_string *str, upb_string *target_str, /* upb_string library functions ***********************************************/ // Named like their counterparts, these are all safe against buffer -// overflow. These only use the public upb_string interface. +// overflow. For the most part these only use the public upb_string interface. // More efficient than upb_strcmp if all you need is to test equality. INLINE bool upb_streql(upb_string *s1, upb_string *s2) { @@ -163,6 +173,17 @@ INLINE bool upb_streql(upb_string *s1, upb_string *s2) { // Like strcmp(). int upb_strcmp(upb_string *s1, upb_string *s2); +// Compare a upb_string with memory or a NULL-terminated C string. +INLINE bool upb_streqllen(upb_string *str, const void *buf, upb_strlen_t len) { + return len == upb_string_len(str) && + memcmp(upb_string_getrobuf(str), buf, len) == 0; +} + +INLINE bool upb_streqlc(upb_string *str, const void *buf) { + // Could be made one-pass. + return upb_streqllen(str, buf, strlen((const char*)buf)); +} + // Like upb_strcpy, but copies from a buffer and length. INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) { memcpy(upb_string_getrwbuf(dest, len), src, len); @@ -175,10 +196,10 @@ INLINE void upb_strcpy(upb_string *dest, upb_string *src) { } // Like upb_strcpy, but copies from a NULL-terminated string. -INLINE void upb_strcpyc(upb_string *dest, const char *src) { +INLINE void upb_strcpyc(upb_string *dest, const void *src) { // This does two passes over src, but that is necessary unless we want to // repeatedly re-allocate dst, which seems worse. - upb_strcpylen(dest, src, strlen(src)); + upb_strcpylen(dest, src, strlen((const char*)src)); } // Returns a new string whose contents are a copy of s. @@ -200,11 +221,18 @@ INLINE upb_string *upb_strdupc(const char *src) { void upb_strcat(upb_string *s, upb_string *append); // Returns a new string that is a substring of the given string. -upb_string *upb_strslice(upb_string *s, int offset, int len); +INLINE upb_string *upb_strslice(upb_string *s, int offset, int len) { + upb_string *str = upb_string_new(); + upb_string_substr(str, s, offset, len); + return str; +} // Reads an entire file into a newly-allocated string. upb_string *upb_strreadfile(const char *filename); +// Returns a new string with the contents of the given printf. +upb_string *upb_string_asprintf(const char *format, ...); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tests/test_string.c b/tests/test_string.c index 4fdab6c..5e6e2a9 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -17,7 +17,7 @@ int main() { assert(upb_string_len(str) == (sizeof(static_str) - 1)); const char *robuf = upb_string_getrobuf(str); assert(robuf != NULL); - assert(memcmp(robuf, static_str, upb_string_len(str)) == 0); + assert(upb_streqlc(str, static_str)); upb_string_endread(str); upb_string *str2 = upb_string_tryrecycle(str); @@ -28,7 +28,7 @@ int main() { upb_strcpyc(str, "XX"); const char *robuf2 = upb_string_getrobuf(str); assert(robuf2 == robuf); - assert(memcmp(robuf2, "XX", 2) == 0); + assert(upb_streqlc(str, "XX")); // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); @@ -51,6 +51,19 @@ int main() { const char *robuf5 = upb_string_getrobuf(str); assert(robuf5 == robuf); + // Resetting str to something very long should require new data to be + // allocated. + str = upb_string_tryrecycle(str); + const char longstring[] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; + upb_strcpyc(str, longstring); + const char *robuf6 = upb_string_getrobuf(str); + assert(robuf6 != robuf); + assert(upb_streqlc(str, longstring)); + + // Test printf. + str = upb_string_tryrecycle(str); + upb_string_printf(str, "Number: %d, String: %s", 5, "YO!"); + upb_string_unref(str); upb_string_unref(str2); } -- cgit v1.2.3 From ae0beee2854b977f472d48cd149b880b074b59c5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 19:37:47 -0700 Subject: Fixed upb_string error with strange vsnprintf() behavior. --- core/upb.c | 9 +++++++++ core/upb.h | 1 + core/upb_def.c | 49 +++++++++++++++++++++++++++++++++++++------------ core/upb_string.c | 13 +++++++++---- tests/test_string.c | 9 +++++++++ 5 files changed, 65 insertions(+), 16 deletions(-) (limited to 'core/upb_string.c') diff --git a/core/upb.c b/core/upb.c index 9ed5617..d581bbe 100644 --- a/core/upb.c +++ b/core/upb.c @@ -64,3 +64,12 @@ void upb_status_reset(upb_status *status) { upb_string_unref(status->str); status->str = NULL; } + +void upb_printerr(upb_status *status) { + if(status->str) { + fprintf(stderr, "code: %d, msg: " UPB_STRFMT "\n", + status->code, UPB_STRARG(status->str)); + } else { + fprintf(stderr, "code: %d, no msg\n", status->code); + } +} diff --git a/core/upb.h b/core/upb.h index 630d9e1..13317bb 100644 --- a/core/upb.h +++ b/core/upb.h @@ -200,6 +200,7 @@ INLINE void upb_status_init(upb_status *status) { status->str = NULL; } +void upb_printerr(upb_status *status); void upb_status_reset(upb_status *status); void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...); diff --git a/core/upb_def.c b/core/upb_def.c index 0f48559..2b2916e 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -21,7 +21,7 @@ typedef struct { static void upb_deflist_init(upb_deflist *l) { l->size = 8; - l->defs = malloc(l->size); + l->defs = malloc(l->size * sizeof(void*)); l->len = 0; } @@ -34,7 +34,7 @@ static void upb_deflist_uninit(upb_deflist *l) { static void upb_deflist_push(upb_deflist *l, upb_def *d) { if(l->len == l->size) { l->size *= 2; - l->defs = realloc(l->defs, l->size); + l->defs = realloc(l->defs, l->size * sizeof(void*)); } l->defs[l->len++] = d; } @@ -238,6 +238,7 @@ static void upb_enumdef_free(upb_enumdef *e) { free(e); } +// google.protobuf.EnumValueDescriptorProto. static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) { int32_t number = -1; @@ -245,13 +246,13 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { - case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &number)); - break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: name = upb_string_tryrecycle(name); CHECKSRC(upb_src_getstr(src, name)); break; + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: + CHECKSRC(upb_src_getint32(src, &number)); + break; default: CHECKSRC(upb_src_skipval(src)); break; @@ -278,6 +279,7 @@ err: return false; } +// google.protobuf.EnumDescriptorProto. static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) { upb_enumdef *e = malloc(sizeof(*e)); @@ -290,8 +292,11 @@ static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: e->base.fqname = upb_string_tryrecycle(e->base.fqname); CHECKSRC(upb_src_getstr(src, e->base.fqname)); + break; case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: + CHECKSRC(upb_src_startmsg(src)); CHECK(upb_addenum_val(src, e, status)); + CHECKSRC(upb_src_endmsg(src)); break; default: upb_src_skipval(src); @@ -729,8 +734,10 @@ err: // We need to free all defs from "tmptab." upb_rwlock_unlock(&s->lock); for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; - e = upb_strtable_next(&tmptab, &e->e)) + e = upb_strtable_next(&tmptab, &e->e)) { + fprintf(stderr, "Unreffing def: '" UPB_STRFMT "'\n", UPB_STRARG(e->e.key)); upb_def_unref(e->def); + } upb_strtable_free(&tmptab); return false; } @@ -914,10 +921,12 @@ static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) key = upb_baredecoder_readv32(d); d->wire_type = key & 0x7; d->field.number = key >> 3; + fprintf(stderr, "field num: %d, wire_type: %d\n", d->field.number, d->wire_type); if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { // For delimited wire values we parse the length now, since we need it in // all cases. d->delimited_len = upb_baredecoder_readv32(d); + fprintf(stderr, "delimited size: %d\n", d->delimited_len); } return &d->field; } @@ -944,6 +953,7 @@ static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) *val.uint32 = upb_baredecoder_readf32(d); break; default: + *(char*)0 = 0; assert(false); } return true; @@ -951,19 +961,24 @@ static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) static bool upb_baredecoder_skipval(upb_baredecoder *d) { - upb_value val; - return upb_baredecoder_getval(d, upb_value_addrof(&val)); + if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { + d->offset += d->delimited_len; + return true; + } else { + upb_value val; + return upb_baredecoder_getval(d, upb_value_addrof(&val)); + } } static bool upb_baredecoder_startmsg(upb_baredecoder *d) { - *(d->top++) = d->offset + d->delimited_len; + *(++d->top) = d->offset + d->delimited_len; return true; } static bool upb_baredecoder_endmsg(upb_baredecoder *d) { - d->offset = *(--d->top); + d->offset = *(d->top--); return true; } @@ -980,7 +995,9 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) { upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); + d->offset = 0; d->top = &d->stack[0]; + *(d->top) = upb_string_len(d->input); upb_src_init(&d->src, &upb_baredecoder_src_vtbl); return d; } @@ -1001,9 +1018,17 @@ void upb_symtab_add_descriptorproto(upb_symtab *symtab) // TODO: allow upb_strings to be static or on the stack. upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); upb_baredecoder *decoder = upb_baredecoder_new(descriptor); - upb_status status; + upb_status status = UPB_STATUS_INIT; upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status); - assert(upb_ok(&status)); upb_baredecoder_free(decoder); upb_string_unref(descriptor); + + if(!upb_ok(&status)) { + // upb itself is corrupt. + upb_printerr(&status); + upb_symtab_unref(symtab); + abort(); + } + fprintf(stderr, "Claims to have succeeded\n"); + upb_printerr(&status); } diff --git a/core/upb_string.c b/core/upb_string.c index 2f487aa..3563c9e 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -87,6 +87,7 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { void upb_string_substr(upb_string *str, upb_string *target_str, upb_strlen_t start, upb_strlen_t len) { + if(str->ptr) *(char*)0 = 0; assert(str->ptr == NULL); str->src = upb_string_getref(target_str); str->ptr = upb_string_getrobuf(target_str) + start; @@ -103,11 +104,15 @@ void upb_string_vprintf(upb_string *str, const char *format, va_list args) { uint32_t true_size = vsnprintf(buf, size, format, args_copy); va_end(args_copy); - if (true_size > size) { - // Need to reallocate. + if (true_size >= size) { + // Need to reallocate. We reallocate even if the sizes were equal, + // because snprintf excludes the terminating NULL from its count. + // We don't care about the terminating NULL, but snprintf might + // bail out of printing even other characters if it doesn't have + // enough space to write the NULL also. str = upb_string_tryrecycle(str); - buf = upb_string_getrwbuf(str, true_size); - vsnprintf(buf, true_size, format, args); + buf = upb_string_getrwbuf(str, true_size + 1); + vsnprintf(buf, true_size + 1, format, args); } str->len = true_size; } diff --git a/tests/test_string.c b/tests/test_string.c index 5869b70..46f35b9 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -32,6 +32,7 @@ int main() { // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); + str = upb_string_tryrecycle(str); upb_string_substr(str, str2, 1, 2); assert(upb_string_len(str) == 2); assert(upb_string_len(str2) == 4); @@ -63,9 +64,17 @@ int main() { // Test printf. str = upb_string_tryrecycle(str); upb_string_printf(str, "Number: %d, String: %s", 5, "YO!"); + assert(upb_streqlc(str, "Number: 5, String: YO!")); + + // Test asprintf + upb_string *str3 = upb_string_asprintf("Yo %s: " UPB_STRFMT "\n", + "Josh", UPB_STRARG(str)); + const char expected[] = "Yo Josh: Number: 5, String: YO!\n"; + assert(upb_streqlc(str3, expected)); upb_string_unref(str); upb_string_unref(str2); + upb_string_unref(str3); // Unref of NULL is harmless. upb_string_unref(NULL); -- cgit v1.2.3 From 7a6a702792e769366a8852fc90dbea9cfc9e01c0 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 11 Jul 2010 18:53:27 -0700 Subject: Allow static upb_strings. This can allow strings to reference static data, and reduced the memory footprint of test_def by about 10% (3k). --- core/upb_def.c | 4 +--- core/upb_string.c | 12 ++++++++--- core/upb_string.h | 57 +++++++++++++++++++++++++++++++++++++++++++++---- descriptor/descriptor.c | 7 ++++-- descriptor/descriptor.h | 5 +++-- tests/test_string.c | 33 +++++++++++++++++++++++++++- 6 files changed, 103 insertions(+), 15 deletions(-) (limited to 'core/upb_string.c') diff --git a/core/upb_def.c b/core/upb_def.c index b9402c5..c0d72db 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -1018,12 +1018,10 @@ static upb_src *upb_baredecoder_src(upb_baredecoder *d) void upb_symtab_add_descriptorproto(upb_symtab *symtab) { // TODO: allow upb_strings to be static or on the stack. - upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); - upb_baredecoder *decoder = upb_baredecoder_new(descriptor); + upb_baredecoder *decoder = upb_baredecoder_new(&descriptor_str); upb_status status = UPB_STATUS_INIT; upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status); upb_baredecoder_free(decoder); - upb_string_unref(descriptor); if(!upb_ok(&status)) { // upb itself is corrupt. diff --git a/core/upb_string.c b/core/upb_string.c index 3563c9e..ca3c669 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -61,12 +61,12 @@ void _upb_string_free(upb_string *str) { } upb_string *upb_string_tryrecycle(upb_string *str) { - if(str == NULL || upb_atomic_read(&str->refcount) > 1) { - return upb_string_new(); - } else { + if(str && upb_atomic_read(&str->refcount) == 1) { str->ptr = NULL; upb_string_release(str); return str; + } else { + return upb_string_new(); } } @@ -125,3 +125,9 @@ upb_string *upb_string_asprintf(const char *format, ...) { va_end(args); return str; } + +upb_string *upb_strdup(upb_string *s) { + upb_string *str = upb_string_new(); + upb_strcpy(str, s); + return str; +} diff --git a/core/upb_string.h b/core/upb_string.h index 5cc0eaf..65ba404 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -63,6 +63,17 @@ struct _upb_string { struct _upb_string *src; }; +// Internal-only initializer for upb_string instances. +#ifdef UPB_HAVE_MSIZE +#define _UPB_STRING_INIT(str, len, refcount) {(char*)str, NULL, len, {refcount}, NULL} +#else +#define _UPB_STRING_INIT(str, len, refcount) {(char*)str, NULL, len, 0, {refcount}, NULL} +#endif + +// Special pseudo-refcounts for static/stack-allocated strings, respectively. +#define _UPB_STRING_REFCOUNT_STATIC -1 +#define _UPB_STRING_REFCOUNT_STACK -2 + // Returns a newly-created, empty, non-finalized string. When the string is no // longer needed, it should be unref'd, never freed directly. upb_string *upb_string_new(); @@ -72,15 +83,21 @@ void _upb_string_free(upb_string *str); // Releases a ref on the given string, which may free the memory. "str" // can be NULL, in which case this is a no-op. INLINE void upb_string_unref(upb_string *str) { - if (str && upb_atomic_unref(&str->refcount)) _upb_string_free(str); + if (str && upb_atomic_read(&str->refcount) > 0 && + upb_atomic_unref(&str->refcount)) { + _upb_string_free(str); + } } +upb_string *upb_strdup(upb_string *s); // Forward-declare. + // Returns a string with the same contents as "str". The caller owns a ref on // the returned string, which may or may not be the same object as "str. INLINE upb_string *upb_string_getref(upb_string *str) { - // If/when we support stack-allocated strings, this will have to allocate - // a new string if the given string is on the stack. - upb_atomic_ref(&str->refcount); + int refcount = upb_atomic_read(&str->refcount); + if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str); + // We don't ref the special <0 refcount for static strings. + if (refcount > 0) upb_atomic_ref(&str->refcount); return str; } @@ -151,6 +168,38 @@ void upb_string_substr(upb_string *str, upb_string *target_str, #define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str) #define UPB_STRFMT "%.*s" +// Macros for constructing upb_string objects statically or on the stack. These +// can be used like: +// +// upb_string static_str = UPB_STATIC_STRING("Foo"); +// +// int main() { +// upb_string stack_str = UPB_STACK_STRING("Foo"); +// // Now: +// // upb_streql(&static_str, &stack_str) == true +// // upb_streql(&static_str, UPB_STRLIT("Foo")) == true +// } +// +// You can also use UPB_STACK_STRING or UPB_STATIC_STRING with character arrays, +// but you must not change the underlying data once you've passed the string on: +// +// void foo() { +// char data[] = "ABC123"; +// upb_string stack_str = UPB_STACK_STR(data); +// bar(&stack_str); +// data[0] = "B"; // NOT ALLOWED!! +// } +// +// TODO: should the stack business just be like attach/detach? The latter seems +// more flexible, though it does require a stack allocation. Maybe put this off +// until there is a clear use case. +#define UPB_STATIC_STRING(str) \ + _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STATIC) +#define UPB_STATIC_STRING_LEN(str, len) \ + _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STATIC) +#define UPB_STACK_STRING(str) _UPB_STRING_INIT(str, _UPB_STRING_REFCOUNT_STACK) +#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) + /* upb_string library functions ***********************************************/ // Named like their counterparts, these are all safe against buffer diff --git a/descriptor/descriptor.c b/descriptor/descriptor.c index cd50a16..ee6b25b 100644 --- a/descriptor/descriptor.c +++ b/descriptor/descriptor.c @@ -1,4 +1,6 @@ -unsigned char descriptor_pb[] = { +#include "descriptor.h" + +static unsigned char descriptor_pb[] = { 0x0a, 0x9b, 0x1b, 0x0a, 0x1b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x2f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0f, 0x67, 0x6f, @@ -291,4 +293,5 @@ unsigned char descriptor_pb[] = { 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x48, 0x01 }; -unsigned int descriptor_pb_len = 3486; +static const unsigned int descriptor_pb_len = 3486; +upb_string descriptor_str = UPB_STATIC_STRING(descriptor_pb); diff --git a/descriptor/descriptor.h b/descriptor/descriptor.h index b598a9a..f6d3ca3 100644 --- a/descriptor/descriptor.h +++ b/descriptor/descriptor.h @@ -11,12 +11,13 @@ #ifndef UPB_DESCRIPTOR_H_ #define UPB_DESCRIPTOR_H_ +#include "upb_string.h" + #ifdef __cplusplus extern "C" { #endif -extern unsigned char descriptor_pb[]; -extern unsigned int descriptor_pb_len; +extern upb_string descriptor_str; #ifdef __cplusplus } /* extern "C" */ diff --git a/tests/test_string.c b/tests/test_string.c index 46f35b9..7c9ed02 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -3,8 +3,33 @@ #include "upb_string.h" char static_str[] = "Static string."; +upb_string static_upbstr = UPB_STATIC_STRING(static_str); -int main() { +static void test_static() { + // Static string is initialized appropriately. + assert(upb_streql(&static_upbstr, UPB_STRLIT("Static string."))); + + // Taking a ref on a static string returns the same string, and repeated + // refs don't get the string in a confused state. + assert(upb_string_getref(&static_upbstr) == &static_upbstr); + assert(upb_string_getref(&static_upbstr) == &static_upbstr); + assert(upb_string_getref(&static_upbstr) == &static_upbstr); + + // Unreffing a static string does nothing (is not harmful). + upb_string_unref(&static_upbstr); + upb_string_unref(&static_upbstr); + upb_string_unref(&static_upbstr); + upb_string_unref(&static_upbstr); + upb_string_unref(&static_upbstr); + + // Recycling a static string returns a new string (that can be modified). + upb_string *str = upb_string_tryrecycle(&static_upbstr); + assert(str != &static_upbstr); + + upb_string_unref(str); +} + +static void test_dynamic() { upb_string *str = upb_string_new(); assert(str != NULL); upb_string_unref(str); @@ -29,6 +54,7 @@ int main() { const char *robuf2 = upb_string_getrobuf(str); assert(robuf2 == robuf); assert(upb_streqlc(str, "XX")); + assert(upb_streql(str, UPB_STRLIT("XX"))); // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); @@ -79,3 +105,8 @@ int main() { // Unref of NULL is harmless. upb_string_unref(NULL); } + +int main() { + test_static(); + test_dynamic(); +} -- cgit v1.2.3 From af9d691a344746b15fb1df2e454273b637d20433 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 15:05:57 -0700 Subject: Added Xcode project. --- core/upb_string.c | 2 +- stream/upb_textprinter.c | 2 +- upb.xcodeproj/project.pbxproj | 497 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 499 insertions(+), 2 deletions(-) create mode 100644 upb.xcodeproj/project.pbxproj (limited to 'core/upb_string.c') diff --git a/core/upb_string.c b/core/upb_string.c index ca3c669..93686f5 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -71,7 +71,7 @@ upb_string *upb_string_tryrecycle(upb_string *str) { } char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { - assert(str->ptr == NULL); + // assert(str->ptr == NULL); uint32_t size = upb_string_size(str); if (size < len) { size = upb_round_up_pow2(len); diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 0f0357a..11ad6a8 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -7,7 +7,7 @@ #include "upb_textprinter.h" #include -#include +#include #include "upb_def.h" #include "upb_string.h" diff --git a/upb.xcodeproj/project.pbxproj b/upb.xcodeproj/project.pbxproj new file mode 100644 index 0000000..8b4eb4e --- /dev/null +++ b/upb.xcodeproj/project.pbxproj @@ -0,0 +1,497 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 45; + objects = { + +/* Begin PBXBuildFile section */ + 420E6F1C11F258AE001DA8FE /* test_decoder.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D4F11F24F3E0076AD28 /* test_decoder.c */; }; + 420E6F3B11F259B3001DA8FE /* liblibupbcore.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 42BD1D5C11F24F920076AD28 /* liblibupbcore.a */; }; + 420E6F3C11F259B3001DA8FE /* liblibupbstream.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 42BD1D6411F24FBA0076AD28 /* liblibupbstream.a */; }; + 42BD1D6E11F2500D0076AD28 /* upb.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D3211F24E4C0076AD28 /* upb.c */; }; + 42BD1D7011F2500D0076AD28 /* upb_def.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D2911F24E4C0076AD28 /* upb_def.c */; }; + 42BD1D7211F2500D0076AD28 /* upb_stream.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D2C11F24E4C0076AD28 /* upb_stream.c */; }; + 42BD1D7311F2500D0076AD28 /* upb_string.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D2E11F24E4C0076AD28 /* upb_string.c */; }; + 42BD1D7411F2500D0076AD28 /* upb_table.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D3011F24E4C0076AD28 /* upb_table.c */; }; + 42BD1D7611F250B90076AD28 /* upb_decoder.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D3E11F24EA30076AD28 /* upb_decoder.c */; }; + 42BD1D7711F250B90076AD28 /* upb_stdio.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D4011F24EA30076AD28 /* upb_stdio.c */; }; + 42BD1D7811F250B90076AD28 /* upb_textprinter.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D4211F24EA30076AD28 /* upb_textprinter.c */; }; + 42BD1D9011F251820076AD28 /* descriptor_const.h in Headers */ = {isa = PBXBuildFile; fileRef = 42BD1D8D11F251820076AD28 /* descriptor_const.h */; }; + 42BD1D9111F251820076AD28 /* descriptor.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D8E11F251820076AD28 /* descriptor.c */; }; + 42BD1D9211F251820076AD28 /* descriptor.h in Headers */ = {isa = PBXBuildFile; fileRef = 42BD1D8F11F251820076AD28 /* descriptor.h */; }; +/* End PBXBuildFile section */ + +/* Begin PBXContainerItemProxy section */ + 420E6F3311F2598D001DA8FE /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 42BD1D5B11F24F920076AD28 /* upbcore */; + remoteInfo = upbcore; + }; + 420E6F3511F2598D001DA8FE /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 42BD1D6311F24FBA0076AD28 /* upbstream */; + remoteInfo = upbstream; + }; +/* End PBXContainerItemProxy section */ + +/* Begin PBXFileReference section */ + 420E6F1811F2589F001DA8FE /* test_decoder */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = test_decoder; sourceTree = BUILT_PRODUCTS_DIR; }; + 42BD1D2811F24E4C0076AD28 /* upb_atomic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_atomic.h; path = core/upb_atomic.h; sourceTree = ""; }; + 42BD1D2911F24E4C0076AD28 /* upb_def.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_def.c; path = core/upb_def.c; sourceTree = ""; }; + 42BD1D2A11F24E4C0076AD28 /* upb_def.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_def.h; path = core/upb_def.h; sourceTree = ""; }; + 42BD1D2B11F24E4C0076AD28 /* upb_stream_vtbl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_stream_vtbl.h; path = core/upb_stream_vtbl.h; sourceTree = ""; }; + 42BD1D2C11F24E4C0076AD28 /* upb_stream.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_stream.c; path = core/upb_stream.c; sourceTree = ""; }; + 42BD1D2D11F24E4C0076AD28 /* upb_stream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_stream.h; path = core/upb_stream.h; sourceTree = ""; }; + 42BD1D2E11F24E4C0076AD28 /* upb_string.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_string.c; path = core/upb_string.c; sourceTree = ""; }; + 42BD1D2F11F24E4C0076AD28 /* upb_string.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_string.h; path = core/upb_string.h; sourceTree = ""; }; + 42BD1D3011F24E4C0076AD28 /* upb_table.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_table.c; path = core/upb_table.c; sourceTree = ""; }; + 42BD1D3111F24E4C0076AD28 /* upb_table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_table.h; path = core/upb_table.h; sourceTree = ""; }; + 42BD1D3211F24E4C0076AD28 /* upb.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb.c; path = core/upb.c; sourceTree = ""; }; + 42BD1D3311F24E4C0076AD28 /* upb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb.h; path = core/upb.h; sourceTree = ""; }; + 42BD1D3E11F24EA30076AD28 /* upb_decoder.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_decoder.c; path = stream/upb_decoder.c; sourceTree = ""; }; + 42BD1D3F11F24EA30076AD28 /* upb_decoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_decoder.h; path = stream/upb_decoder.h; sourceTree = ""; }; + 42BD1D4011F24EA30076AD28 /* upb_stdio.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_stdio.c; path = stream/upb_stdio.c; sourceTree = ""; }; + 42BD1D4111F24EA30076AD28 /* upb_stdio.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_stdio.h; path = stream/upb_stdio.h; sourceTree = ""; }; + 42BD1D4211F24EA30076AD28 /* upb_textprinter.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_textprinter.c; path = stream/upb_textprinter.c; sourceTree = ""; }; + 42BD1D4311F24EA30076AD28 /* upb_textprinter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_textprinter.h; path = stream/upb_textprinter.h; sourceTree = ""; }; + 42BD1D4F11F24F3E0076AD28 /* test_decoder.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = test_decoder.c; path = tests/test_decoder.c; sourceTree = ""; }; + 42BD1D5011F24F3E0076AD28 /* test_def.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = test_def.c; path = tests/test_def.c; sourceTree = ""; }; + 42BD1D5111F24F3E0076AD28 /* test_string.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = test_string.c; path = tests/test_string.c; sourceTree = ""; }; + 42BD1D5211F24F3E0076AD28 /* test_table.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = test_table.cc; path = tests/test_table.cc; sourceTree = ""; }; + 42BD1D5311F24F3E0076AD28 /* test_util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = test_util.h; path = tests/test_util.h; sourceTree = ""; }; + 42BD1D5C11F24F920076AD28 /* liblibupbcore.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = liblibupbcore.a; sourceTree = BUILT_PRODUCTS_DIR; }; + 42BD1D6411F24FBA0076AD28 /* liblibupbstream.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = liblibupbstream.a; sourceTree = BUILT_PRODUCTS_DIR; }; + 42BD1D8D11F251820076AD28 /* descriptor_const.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = descriptor_const.h; path = descriptor/descriptor_const.h; sourceTree = ""; }; + 42BD1D8E11F251820076AD28 /* descriptor.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = descriptor.c; path = descriptor/descriptor.c; sourceTree = ""; }; + 42BD1D8F11F251820076AD28 /* descriptor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = descriptor.h; path = descriptor/descriptor.h; sourceTree = ""; }; + C6A0FF2C0290799A04C91782 /* upb.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = upb.1; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 420E6F1611F2589F001DA8FE /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 420E6F3B11F259B3001DA8FE /* liblibupbcore.a in Frameworks */, + 420E6F3C11F259B3001DA8FE /* liblibupbstream.a in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 42BD1D5A11F24F920076AD28 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 42BD1D6211F24FBA0076AD28 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 08FB7794FE84155DC02AAC07 /* upb */ = { + isa = PBXGroup; + children = ( + 08FB7795FE84155DC02AAC07 /* Source */, + C6A0FF2B0290797F04C91782 /* Documentation */, + 1AB674ADFE9D54B511CA2CBB /* Products */, + ); + name = upb; + sourceTree = ""; + }; + 08FB7795FE84155DC02AAC07 /* Source */ = { + isa = PBXGroup; + children = ( + 42BD1D8A11F251670076AD28 /* descriptor */, + 42BD1D4711F24EB20076AD28 /* tests */, + 42BD1D3B11F24E810076AD28 /* stream */, + 42BD1D3A11F24E5F0076AD28 /* core */, + ); + name = Source; + sourceTree = ""; + }; + 1AB674ADFE9D54B511CA2CBB /* Products */ = { + isa = PBXGroup; + children = ( + 42BD1D5C11F24F920076AD28 /* liblibupbcore.a */, + 42BD1D6411F24FBA0076AD28 /* liblibupbstream.a */, + 420E6F1811F2589F001DA8FE /* test_decoder */, + ); + name = Products; + sourceTree = ""; + }; + 42BD1D3A11F24E5F0076AD28 /* core */ = { + isa = PBXGroup; + children = ( + 42BD1D2811F24E4C0076AD28 /* upb_atomic.h */, + 42BD1D2911F24E4C0076AD28 /* upb_def.c */, + 42BD1D2A11F24E4C0076AD28 /* upb_def.h */, + 42BD1D2B11F24E4C0076AD28 /* upb_stream_vtbl.h */, + 42BD1D2C11F24E4C0076AD28 /* upb_stream.c */, + 42BD1D2D11F24E4C0076AD28 /* upb_stream.h */, + 42BD1D2E11F24E4C0076AD28 /* upb_string.c */, + 42BD1D2F11F24E4C0076AD28 /* upb_string.h */, + 42BD1D3011F24E4C0076AD28 /* upb_table.c */, + 42BD1D3111F24E4C0076AD28 /* upb_table.h */, + 42BD1D3211F24E4C0076AD28 /* upb.c */, + 42BD1D3311F24E4C0076AD28 /* upb.h */, + ); + name = core; + sourceTree = ""; + }; + 42BD1D3B11F24E810076AD28 /* stream */ = { + isa = PBXGroup; + children = ( + 42BD1D3E11F24EA30076AD28 /* upb_decoder.c */, + 42BD1D3F11F24EA30076AD28 /* upb_decoder.h */, + 42BD1D4011F24EA30076AD28 /* upb_stdio.c */, + 42BD1D4111F24EA30076AD28 /* upb_stdio.h */, + 42BD1D4211F24EA30076AD28 /* upb_textprinter.c */, + 42BD1D4311F24EA30076AD28 /* upb_textprinter.h */, + ); + name = stream; + sourceTree = ""; + }; + 42BD1D4711F24EB20076AD28 /* tests */ = { + isa = PBXGroup; + children = ( + 42BD1D4F11F24F3E0076AD28 /* test_decoder.c */, + 42BD1D5011F24F3E0076AD28 /* test_def.c */, + 42BD1D5111F24F3E0076AD28 /* test_string.c */, + 42BD1D5211F24F3E0076AD28 /* test_table.cc */, + 42BD1D5311F24F3E0076AD28 /* test_util.h */, + ); + name = tests; + sourceTree = ""; + }; + 42BD1D8A11F251670076AD28 /* descriptor */ = { + isa = PBXGroup; + children = ( + 42BD1D8D11F251820076AD28 /* descriptor_const.h */, + 42BD1D8E11F251820076AD28 /* descriptor.c */, + 42BD1D8F11F251820076AD28 /* descriptor.h */, + ); + name = descriptor; + sourceTree = ""; + }; + C6A0FF2B0290797F04C91782 /* Documentation */ = { + isa = PBXGroup; + children = ( + C6A0FF2C0290799A04C91782 /* upb.1 */, + ); + name = Documentation; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXHeadersBuildPhase section */ + 42BD1D5811F24F920076AD28 /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + 42BD1D9011F251820076AD28 /* descriptor_const.h in Headers */, + 42BD1D9211F251820076AD28 /* descriptor.h in Headers */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 42BD1D6011F24FBA0076AD28 /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXHeadersBuildPhase section */ + +/* Begin PBXNativeTarget section */ + 420E6F1711F2589F001DA8FE /* test_decoder */ = { + isa = PBXNativeTarget; + buildConfigurationList = 420E6F1F11F258CC001DA8FE /* Build configuration list for PBXNativeTarget "test_decoder" */; + buildPhases = ( + 420E6F1511F2589F001DA8FE /* Sources */, + 420E6F1611F2589F001DA8FE /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + 420E6F3411F2598D001DA8FE /* PBXTargetDependency */, + 420E6F3611F2598D001DA8FE /* PBXTargetDependency */, + ); + name = test_decoder; + productName = test_decoder; + productReference = 420E6F1811F2589F001DA8FE /* test_decoder */; + productType = "com.apple.product-type.tool"; + }; + 42BD1D5B11F24F920076AD28 /* upbcore */ = { + isa = PBXNativeTarget; + buildConfigurationList = 42BD1D5F11F24FB10076AD28 /* Build configuration list for PBXNativeTarget "upbcore" */; + buildPhases = ( + 42BD1D5811F24F920076AD28 /* Headers */, + 42BD1D5911F24F920076AD28 /* Sources */, + 42BD1D5A11F24F920076AD28 /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = upbcore; + productName = libupbcore; + productReference = 42BD1D5C11F24F920076AD28 /* liblibupbcore.a */; + productType = "com.apple.product-type.library.static"; + }; + 42BD1D6311F24FBA0076AD28 /* upbstream */ = { + isa = PBXNativeTarget; + buildConfigurationList = 42BD1D6911F24FED0076AD28 /* Build configuration list for PBXNativeTarget "upbstream" */; + buildPhases = ( + 42BD1D6011F24FBA0076AD28 /* Headers */, + 42BD1D6111F24FBA0076AD28 /* Sources */, + 42BD1D6211F24FBA0076AD28 /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = upbstream; + productName = libupbstream; + productReference = 42BD1D6411F24FBA0076AD28 /* liblibupbstream.a */; + productType = "com.apple.product-type.library.static"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 08FB7793FE84155DC02AAC07 /* Project object */ = { + isa = PBXProject; + buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "upb" */; + compatibilityVersion = "Xcode 3.1"; + hasScannedForEncodings = 1; + mainGroup = 08FB7794FE84155DC02AAC07 /* upb */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 42BD1D5B11F24F920076AD28 /* upbcore */, + 42BD1D6311F24FBA0076AD28 /* upbstream */, + 420E6F1711F2589F001DA8FE /* test_decoder */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + 420E6F1511F2589F001DA8FE /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 420E6F1C11F258AE001DA8FE /* test_decoder.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 42BD1D5911F24F920076AD28 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 42BD1D6E11F2500D0076AD28 /* upb.c in Sources */, + 42BD1D7011F2500D0076AD28 /* upb_def.c in Sources */, + 42BD1D7211F2500D0076AD28 /* upb_stream.c in Sources */, + 42BD1D7311F2500D0076AD28 /* upb_string.c in Sources */, + 42BD1D7411F2500D0076AD28 /* upb_table.c in Sources */, + 42BD1D9111F251820076AD28 /* descriptor.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 42BD1D6111F24FBA0076AD28 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 42BD1D7611F250B90076AD28 /* upb_decoder.c in Sources */, + 42BD1D7711F250B90076AD28 /* upb_stdio.c in Sources */, + 42BD1D7811F250B90076AD28 /* upb_textprinter.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXTargetDependency section */ + 420E6F3411F2598D001DA8FE /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 42BD1D5B11F24F920076AD28 /* upbcore */; + targetProxy = 420E6F3311F2598D001DA8FE /* PBXContainerItemProxy */; + }; + 420E6F3611F2598D001DA8FE /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 42BD1D6311F24FBA0076AD28 /* upbstream */; + targetProxy = 420E6F3511F2598D001DA8FE /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + +/* Begin XCBuildConfiguration section */ + 1DEB928A08733DD80010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = UPB_THREAD_UNSAFE; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + ONLY_ACTIVE_ARCH = YES; + PREBINDING = NO; + SDKROOT = macosx10.6; + }; + name = Debug; + }; + 1DEB928B08733DD80010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_PREPROCESSOR_DEFINITIONS = UPB_THREAD_UNSAFE; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + PREBINDING = NO; + SDKROOT = macosx10.6; + }; + name = Release; + }; + 420E6F1A11F258A0001DA8FE /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + INSTALL_PATH = /usr/local/bin; + PREBINDING = NO; + PRODUCT_NAME = test_decoder; + }; + name = Debug; + }; + 420E6F1B11F258A0001DA8FE /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = YES; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_MODEL_TUNING = G5; + INSTALL_PATH = /usr/local/bin; + PREBINDING = NO; + PRODUCT_NAME = test_decoder; + ZERO_LINK = NO; + }; + name = Release; + }; + 42BD1D5D11F24F930076AD28 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + INSTALL_PATH = /usr/local/lib; + PREBINDING = NO; + PRODUCT_NAME = libupbcore; + }; + name = Debug; + }; + 42BD1D5E11F24F930076AD28 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = YES; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_MODEL_TUNING = G5; + INSTALL_PATH = /usr/local/lib; + PREBINDING = NO; + PRODUCT_NAME = libupbcore; + ZERO_LINK = NO; + }; + name = Release; + }; + 42BD1D6511F24FBA0076AD28 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + INSTALL_PATH = /usr/local/lib; + PREBINDING = NO; + PRODUCT_NAME = libupbstream; + }; + name = Debug; + }; + 42BD1D6611F24FBA0076AD28 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = YES; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_MODEL_TUNING = G5; + INSTALL_PATH = /usr/local/lib; + PREBINDING = NO; + PRODUCT_NAME = libupbstream; + ZERO_LINK = NO; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "upb" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB928A08733DD80010E9CD /* Debug */, + 1DEB928B08733DD80010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 420E6F1F11F258CC001DA8FE /* Build configuration list for PBXNativeTarget "test_decoder" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 420E6F1A11F258A0001DA8FE /* Debug */, + 420E6F1B11F258A0001DA8FE /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 42BD1D5F11F24FB10076AD28 /* Build configuration list for PBXNativeTarget "upbcore" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 42BD1D5D11F24F930076AD28 /* Debug */, + 42BD1D5E11F24F930076AD28 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 42BD1D6911F24FED0076AD28 /* Build configuration list for PBXNativeTarget "upbstream" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 42BD1D6511F24FBA0076AD28 /* Debug */, + 42BD1D6611F24FBA0076AD28 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 08FB7793FE84155DC02AAC07 /* Project object */; +} -- cgit v1.2.3 From 678799082b9775e601a09af9aa68e59fc1c64f6f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 24 Jul 2010 16:23:52 -0700 Subject: Stream decoding benchmark. --- Makefile | 24 ++++++-- benchmarks/parsestream.upb_table.c | 113 +++++++++++++++++++++++++++++++++++++ core/upb_stream.h | 3 +- core/upb_string.c | 18 ++++++ stream/upb_byteio.h | 43 -------------- 5 files changed, 152 insertions(+), 49 deletions(-) create mode 100644 benchmarks/parsestream.upb_table.c delete mode 100644 stream/upb_byteio.h (limited to 'core/upb_string.c') diff --git a/Makefile b/Makefile index 749c5a7..203bed6 100644 --- a/Makefile +++ b/Makefile @@ -54,7 +54,7 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ - core/upb_stream.c stream/upb_stdio.c stream/upb_textprinter.c \ + core/upb_stream.c stream/upb_stdio.c stream/upb_strstream.c stream/upb_textprinter.c \ descriptor/descriptor.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. @@ -154,10 +154,10 @@ tests/tests: core/libupb.a tools/upbc: core/libupb.a # Benchmarks -UPB_BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \ - benchmarks/b.parsetostruct_googlemessage1.upb_table_byref \ - benchmarks/b.parsetostruct_googlemessage2.upb_table_byval \ - benchmarks/b.parsetostruct_googlemessage2.upb_table_byref +#UPB_BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table \ +# benchmarks/b.parsetostruct_googlemessage2.upb_table +UPB_BENCHMARKS=benchmarks/b.parsestream_googlemessage1.upb_table \ + benchmarks/b.parsestream_googlemessage2.upb_table BENCHMARKS=$(UPB_BENCHMARKS) \ benchmarks/b.parsetostruct_googlemessage1.proto2_table \ @@ -204,6 +204,20 @@ benchmarks/b.parsetostruct_googlemessage2.upb_table_byref: \ -DMESSAGE_FILE=\"google_message2.dat\" \ -DBYREF=true $(LIBUPB) +benchmarks/b.parsestream_googlemessage1.upb_table \ +benchmarks/b.parsestream_googlemessage2.upb_table: \ + benchmarks/parsestream.upb_table.c $(LIBUPB) benchmarks/google_messages.proto.pb + $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsestream_googlemessage1.upb_table $< \ + -DMESSAGE_NAME=\"benchmarks.SpeedMessage1\" \ + -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \ + -DMESSAGE_FILE=\"google_message1.dat\" \ + $(LIBUPB) + $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsestream_googlemessage2.upb_table $< \ + -DMESSAGE_NAME=\"benchmarks.SpeedMessage2\" \ + -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \ + -DMESSAGE_FILE=\"google_message2.dat\" \ + $(LIBUPB) + benchmarks/b.parsetostruct_googlemessage1.proto2_table \ benchmarks/b.parsetostruct_googlemessage2.proto2_table: \ benchmarks/parsetostruct.proto2_table.cc benchmarks/google_messages.pb.cc diff --git a/benchmarks/parsestream.upb_table.c b/benchmarks/parsestream.upb_table.c new file mode 100644 index 0000000..c6acad9 --- /dev/null +++ b/benchmarks/parsestream.upb_table.c @@ -0,0 +1,113 @@ + +#include "main.c" + +#include "upb_def.h" +#include "upb_decoder.h" +#include "upb_strstream.h" + +static upb_stringsrc *stringsrc; +static upb_string *input_str; +static upb_string *tmp_str; +static upb_msgdef *def; +static upb_decoder *decoder; + +static bool initialize() +{ + // Initialize upb state, decode descriptor. + upb_status status = UPB_STATUS_INIT; + upb_symtab *s = upb_symtab_new(); + upb_symtab_add_descriptorproto(s); + upb_string *fds_str = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); + if(fds_str == NULL) { + fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ":"), + upb_printerr(&status); + return false; + } + + upb_stringsrc *ssrc = upb_stringsrc_new(); + upb_stringsrc_reset(ssrc, fds_str); + upb_def *fds_def = upb_symtab_lookup( + s, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds_def)); + upb_decoder_reset(d, upb_stringsrc_bytesrc(ssrc)); + + upb_symtab_addfds(s, upb_decoder_src(d), &status); + + if(!upb_ok(&status)) { + fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ":"); + upb_printerr(&status); + return false; + } + + upb_string_unref(fds_str); + upb_decoder_free(d); + upb_stringsrc_free(ssrc); + upb_def_unref(fds_def); + + def = upb_downcast_msgdef(upb_symtab_lookup(s, UPB_STRLIT(MESSAGE_NAME))); + if(!def) { + fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n", + UPB_STRARG(UPB_STRLIT(MESSAGE_NAME))); + return false; + } + upb_symtab_unref(s); + + // Read the message data itself. + input_str = upb_strreadfile(MESSAGE_FILE); + if(input_str == NULL) { + fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); + return false; + } + tmp_str = NULL; + decoder = upb_decoder_new(def); + stringsrc = upb_stringsrc_new(); + return true; +} + +static void cleanup() +{ + upb_string_unref(input_str); + upb_string_unref(tmp_str); + upb_def_unref(UPB_UPCAST(def)); + upb_decoder_free(decoder); + upb_stringsrc_free(stringsrc); +} + +static size_t run(int i) +{ + (void)i; + upb_status status = UPB_STATUS_INIT; + upb_stringsrc_reset(stringsrc, input_str); + upb_decoder_reset(decoder, upb_stringsrc_bytesrc(stringsrc)); + upb_src *src = upb_decoder_src(decoder); + upb_fielddef *f; + upb_string *str = NULL; + int depth = 0; + while(1) { + while((f = upb_src_getdef(src)) != NULL) { + if(upb_issubmsg(f)) { + upb_src_startmsg(src); + ++depth; + } else if(upb_isstring(f)) { + tmp_str = upb_string_tryrecycle(str); + upb_src_getstr(src, tmp_str); + } else { + // Primitive type. + upb_value val; + upb_src_getval(src, upb_value_addrof(&val)); + } + } + // If we're not EOF now, the loop terminated due to an error. + if (!upb_src_eof(src)) goto err; + if (depth == 0) break; + --depth; + upb_src_endmsg(src); + } + if(!upb_ok(&status)) goto err; + return upb_string_len(input_str); + +err: + fprintf(stderr, "Decode error"); + upb_printerr(&status); + return 0; +} diff --git a/core/upb_stream.h b/core/upb_stream.h index b7400c5..861bd1c 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -128,7 +128,8 @@ bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); // Appends the next "len" bytes in the stream in-place to "str". This should // be used when the caller needs to build a contiguous string of the existing -// data in "str" with more data. +// data in "str" with more data. The call fails if fewer than len bytes are +// available in the stream. bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); // Returns the current error status for the stream. diff --git a/core/upb_string.c b/core/upb_string.c index 93686f5..847a3ee 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -131,3 +131,21 @@ upb_string *upb_strdup(upb_string *s) { upb_strcpy(str, s); return str; } + +upb_string *upb_strreadfile(const char *filename) { + FILE *f = fopen(filename, "rb"); + if(!f) return NULL; + if(fseek(f, 0, SEEK_END) != 0) goto error; + long size = ftell(f); + if(size < 0) goto error; + if(fseek(f, 0, SEEK_SET) != 0) goto error; + upb_string *s = upb_string_new(); + char *buf = upb_string_getrwbuf(s, size); + if(fread(buf, size, 1, f) != 1) goto error; + fclose(f); + return s; + +error: + fclose(f); + return NULL; +} diff --git a/stream/upb_byteio.h b/stream/upb_byteio.h deleted file mode 100644 index 69a28b3..0000000 --- a/stream/upb_byteio.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * This file contains upb_bytesrc and upb_bytesink implementations for common - * interfaces like strings, UNIX fds, and FILE*. - * - * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_BYTEIO_H -#define UPB_BYTEIO_H - -#include "upb_srcsink.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_stringsrc **************************************************************/ - -struct upb_stringsrc; -typedef struct upb_stringsrc upb_stringsrc; - -// Create/free a stringsrc. -upb_stringsrc *upb_stringsrc_new(); -void upb_stringsrc_free(upb_stringsrc *s); - -// Resets the stringsrc to a state where it will vend the given string. The -// stringsrc will take a reference on the string, so the caller need not ensure -// that it outlives the stringsrc. A stringsrc can be reset multiple times. -void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); - -// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. -upb_bytesrc *upb_stringsrc_bytesrc(); - - -/* upb_fdsrc ******************************************************************/ - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif -- cgit v1.2.3 From b471ca6b81b88dc23aae6a53345d94d9a2714a7c Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 6 Dec 2010 15:52:40 -0800 Subject: The last major revision to the upb_stream protocol. Sources and sinks communicate by means of a upb_handlers object, which encapsulates a set of handler callbacks and will possibly offer richer semantics in the future like giving specific fields different callbacks. The upb_handlers protocol supports delegation, so sets of handlers can be written in reusable ways. For example, if a set of handlers is written to handle a specific .proto type, those handlers can be used whether that type is at the top level or whether it is a sub-message of a higher-level type. Delegation allows the streaming protocol to properly compose. --- Makefile | 41 +++++---- core/upb_stream.c | 55 ------------ core/upb_stream.h | 167 +++++++++++++++++++++++------------ core/upb_stream_vtbl.h | 235 +++++++++++++++++++++---------------------------- core/upb_string.c | 9 ++ core/upb_string.h | 7 +- 6 files changed, 249 insertions(+), 265 deletions(-) delete mode 100644 core/upb_stream.c (limited to 'core/upb_string.c') diff --git a/Makefile b/Makefile index 131b3c0..5c6598c 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ CXX=g++ CFLAGS=-std=c99 INCLUDE=-Idescriptor -Icore -Itests -Istream -I. CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) -LDLIBS=-lpthread +LDLIBS=-lpthread core/libupb.a ifeq ($(shell uname), Darwin) CPPFLAGS += -I/usr/include/lua5.1 LDFLAGS += -L/usr/local/lib -llua @@ -47,16 +47,27 @@ clean: rm -rf $(LIBUPB) $(LIBUPB_PIC) rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(call rwildcard,,*.gc*) rm -rf benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb* - rm -rf tests/tests tests/t.* tests/test_table + rm -rf $(TESTS) tests/t.* rm -rf descriptor/descriptor.pb rm -rf tools/upbc deps cd lang_ext/python && python setup.py clean --all +-include deps +deps: gen-deps.sh Makefile $(call rwildcard,,*.c) $(call rwildcard,,*.h) + @./gen-deps.sh $(SRC) + # The core library (core/libupb.a) -SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ - core/upb_stream.c stream/upb_stdio.c stream/upb_strstream.c stream/upb_textprinter.c \ - core/upb_msg.c \ - descriptor/descriptor.c +SRC=core/upb.c \ + core/upb_table.c \ + core/upb_string.c \ + descriptor/descriptor.c \ +# core/upb_def.c \ +# core/upb_msg.c \ +# stream/upb_decoder.c \ +# stream/upb_stdio.c \ +# stream/upb_strstream.c \ +# stream/upb_textprinter.c + $(SRC): perf-cppflags # Parts of core that are yet to be converted. OTHERSRC=src/upb_encoder.c src/upb_text.c @@ -101,15 +112,16 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ - tests/test_def \ - tests/test_decoder \ - tests/t.test_vs_proto2.googlemessage1 \ - tests/t.test_vs_proto2.googlemessage2 \ - tests/test.proto.pb + tests/test_stream \ +# tests/test_def \ +# tests/test_decoder \ +# tests/t.test_vs_proto2.googlemessage1 \ +# tests/t.test_vs_proto2.googlemessage2 \ +# tests/test.proto.pb tests: $(TESTS) OTHER_TESTS=tests/tests \ -$(TESTS): core/libupb.a +$(TESTS): $(LIBUPB) VALGRIND=valgrind --leak-check=full --error-exitcode=1 #VALGRIND= @@ -118,7 +130,7 @@ test: tests @set -e # Abort on error. # Needs to be rewritten to separate the benchmark. # valgrind --error-exitcode=1 ./tests/test_table - @for test in tests/*; do \ + @for test in $(TESTS); do \ if [ -x ./$$test ] ; then \ echo !!! $(VALGRIND) ./$$test; \ $(VALGRIND) ./$$test || exit 1; \ @@ -247,6 +259,3 @@ benchmarks/b.parsetostruct_googlemessage2.proto2_compiled: \ -DMESSAGE_HFILE=\"google_messages.pb.h\" \ benchmarks/google_messages.pb.cc -lprotobuf -lpthread --include deps -deps: gen-deps.sh Makefile $(call rwildcard,,*.c) $(call rwildcard,,*.h) - @./gen-deps.sh $(SRC) diff --git a/core/upb_stream.c b/core/upb_stream.c deleted file mode 100644 index 0d47392..0000000 --- a/core/upb_stream.c +++ /dev/null @@ -1,55 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_stream.h" - -#include "upb_def.h" - -#define CHECKSRC(x) if(!x) goto src_err -#define CHECKSINK(x) if(!x) goto sink_err - -void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status) { - upb_fielddef *f; - upb_string *str = NULL; - int depth = 0; - while(1) { - while((f = upb_src_getdef(src)) != NULL) { - CHECKSINK(upb_sink_putdef(sink, f)); - if(upb_issubmsg(f)) { - upb_src_startmsg(src); - upb_sink_startmsg(sink); - ++depth; - } else if(upb_isstring(f)) { - str = upb_string_tryrecycle(str); - CHECKSRC(upb_src_getstr(src, str)); - CHECKSINK(upb_sink_putstr(sink, str)); - } else { - // Primitive type. - upb_value val; - CHECKSRC(upb_src_getval(src, upb_value_addrof(&val))); - CHECKSINK(upb_sink_putval(sink, val)); - } - } - // If we're not EOF now, the loop terminated due to an error. - CHECKSRC(upb_src_eof(src)); - if (depth == 0) break; - --depth; - upb_src_endmsg(src); - upb_sink_endmsg(sink); - } - upb_string_unref(str); - return; - -src_err: - upb_string_unref(str); - upb_copyerr(status, upb_src_status(src)); - return; - -sink_err: - upb_string_unref(str); - upb_copyerr(status, upb_sink_status(sink)); - return; -} diff --git a/core/upb_stream.h b/core/upb_stream.h index cd00c1e..1eb111e 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -19,7 +19,7 @@ #ifndef UPB_SRCSINK_H #define UPB_SRCSINK_H -#include "upb_stream_vtbl.h" +#include "upb.h" #ifdef __cplusplus extern "C" { @@ -28,98 +28,149 @@ extern "C" { // Forward-declare. We can't include upb_def.h; it would be circular. struct _upb_fielddef; -/* upb_sink *******************************************************************/ +/* upb_handlers ***************************************************************/ -// A upb_sink is a component that receives a stream of protobuf data. -// It is an abstract interface that is implemented either by the system or -// by users. -// -// TODO: unknown fields. +// upb_handlers define the interface by which a upb_src passes data to a +// upb_sink. -// Constants that a sink returns to indicate to its caller whether it should +// Constants that a handler returns to indicate to its caller whether it should // continue or not. typedef enum { // Caller should continue sending values to the sink. - UPB_SINK_CONTINUE, + UPB_CONTINUE, - // Return from upb_sink_putdef() to skip the next value (which may be a - // submessage). - UPB_SINK_SKIP, + // Skips to the end of the current submessage (or if we are at the top + // level, skips to the end of the entire message). + UPB_SKIP, // Caller should stop sending values; check sink status for details. // If processing resumes later, it should resume with the next value. - UPB_SINK_STOP, -} upb_sinkret_t; - -// Puts the given fielddef into the stream. -upb_sinkret_t upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); - -// Puts the given value into the stream. -upb_sinkret_t upb_sink_putval(upb_sink *sink, upb_value val); -upb_sinkret_t upb_sink_putstr(upb_sink *sink, upb_string *str); - -// Starts/ends a submessage. upb_sink_startmsg may seem redundant, but a -// client could have a submessage already serialized, and therefore put it -// as a string instead of its individual elements. -upb_sinkret_t upb_sink_startmsg(upb_sink *sink); -upb_sinkret_t upb_sink_endmsg(upb_sink *sink); - -// Returns the current error status for the stream. -upb_status *upb_sink_status(upb_sink *sink); - - -/* upb_src ********************************************************************/ - -// A upb_src is a resumable push parser for protobuf data. It works by first -// accepting registration of a upb_sink to which it will push data, then -// in a second phase is parses the actual data. + UPB_STOP, + + // When returned from a startsubmsg handler, indicates that the submessage + // should be handled by a different set of handlers, which have been + // registered on the provided upb_handlers object. May not be returned + // from any other callback. + UPB_DELEGATE, +} upb_flow_t; + +// upb_handlers +struct _upb_handlers; +typedef struct _upb_handlers upb_handlers; + +typedef void (*upb_startmsg_handler_t)(void *closure); +typedef void (*upb_endmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_value_handler_t)(void *closure, + struct _upb_fielddef *f, + upb_value val); +typedef upb_flow_t (*upb_startsubmsg_handler_t)(void *closure, + struct _upb_fielddef *f, + upb_handlers *delegate_to); +typedef upb_flow_t (*upb_endsubmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_unknownval_handler_t)(void *closure, + upb_field_number_t fieldnum, + upb_value val); + +// An empty set of handlers, for convenient copy/paste: // - -// Sets the given sink as the target of this src. It will be called when the -// upb_src_parse() is run. -void upb_src_setsink(upb_src *src, upb_sink *sink); - -// Pushes data from this src to the previously registered sink, returning -// true if all data was processed. If false is returned, check -// upb_src_status() for details; if it is a resumable status, upb_src_run -// may be called again to resume processing. -bool upb_src_run(upb_src *src); +// static void startmsg(void *closure) { +// // Called when the top-level message begins. +// } +// +// static void endmsg(void *closure) { +// // Called when the top-level message ends. +// } +// +// static upb_flow_t value(void *closure, upb_fielddef *f, upb_value val) { +// // Called for every value in the stream. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t startsubmsg(void *closure, upb_fielddef *f, +// upb_handlers *delegate_to) { +// // Called when a submessage begins; can delegate by returning UPB_DELEGATE. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t endsubmsg(void *closure) { +// // Called when a submessage ends. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum, +// upb_value val) { +// Called with an unknown value is encountered. +// return UPB_CONTINUE; +// } +typedef struct { + upb_startmsg_handler_t startmsg; + upb_endmsg_handler_t endmsg; + upb_value_handler_t value; + upb_startsubmsg_handler_t startsubmsg; + upb_endsubmsg_handler_t endsubmsg; + upb_unknownval_handler_t unknownval; +} upb_handlerset; + +// Functions to register handlers on a upb_handlers object. +INLINE void upb_handlers_init(upb_handlers *h); +INLINE void upb_handlers_uninit(upb_handlers *h); +INLINE void upb_handlers_reset(upb_handlers *h); +INLINE bool upb_handlers_isempty(upb_handlers *h); +INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); +INLINE void upb_set_handler_closure(upb_handlers *h, void *closure); + +// An object that transparently handles delegation so that the caller needs +// only follow the protocol as if delegation did not exist. +struct _upb_dispatcher; +typedef struct _upb_dispatcher upb_dispatcher; +INLINE void upb_dispatcher_init(upb_dispatcher *d); +INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); +INLINE void upb_dispatch_startmsg(upb_dispatcher *d); +INLINE void upb_dispatch_endmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f); +INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, + upb_value val); +INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, + upb_field_number_t fieldnum, upb_value val); /* upb_bytesrc ****************************************************************/ +struct _upb_bytesrc; +typedef struct _upb_bytesrc upb_bytesrc; + // Returns the next string in the stream. false is returned on error or eof. // The string must be at least "minlen" bytes long unless the stream is eof. -bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); +INLINE bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); // Appends the next "len" bytes in the stream in-place to "str". This should // be used when the caller needs to build a contiguous string of the existing // data in "str" with more data. The call fails if fewer than len bytes are // available in the stream. -bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); +INLINE bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); // Returns the current error status for the stream. // Note! The "eof" flag works like feof() in C; it cannot report end-of-file // until a read has failed due to eof. It cannot preemptively tell you that // the next call will fail due to eof. Since these are the semantics that C // and UNIX provide, we're stuck with them if we want to support eg. stdio. -INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } -INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } +INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src); +INLINE bool upb_bytesrc_eof(upb_bytesrc *src); /* upb_bytesink ***************************************************************/ +struct _upb_bytesink; +typedef struct _upb_bytesink upb_bytesink; + // Puts the given string. Returns the number of bytes that were actually, // consumed, which may be fewer than were in the string, or <0 on error. -int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); +INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); // Returns the current error status for the stream. -upb_status *upb_bytesink_status(upb_bytesink *sink); - -/* Utility functions **********************************************************/ - -// Streams data from src to sink until EOF or error. -void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status); +INLINE upb_status *upb_bytesink_status(upb_bytesink *sink); +#include "upb_stream_vtbl.h" #ifdef __cplusplus } /* extern "C" */ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 96f6cfe..91464a7 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -5,59 +5,21 @@ * interfaces. Only components that are implementing these interfaces need * to worry about this file. * - * This is tedious; this is the place in upb where I most wish I had a C++ - * feature. In C++ the compiler would generate this all for me. If there's - * any consolation, it's that I have a bit of flexibility you don't have in - * C++: I could, with preprocessor magic alone "de-virtualize" this interface - * for a particular source file. Say I had a C file that called a upb_src, - * but didn't want to pay the virtual function overhead. I could define: - * - * #define upb_src_getdef(src) upb_decoder_getdef((upb_decoder*)src) - * #define upb_src_stargmsg(src) upb_decoder_startmsg(upb_decoder*)src) - * // etc. - * - * The source file is compatible with the regular upb_src interface, but here - * we bind it to a particular upb_src (upb_decoder), which could lead to - * improved performance at a loss of flexibility for this one upb_src client. - * * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. */ #ifndef UPB_SRCSINK_VTBL_H_ #define UPB_SRCSINK_VTBL_H_ -#include "upb.h" +#include +#include "upb_stream.h" #ifdef __cplusplus extern "C" { #endif -struct upb_src; -typedef struct upb_src upb_src; -struct upb_sink; -typedef struct upb_sink upb_sink; -struct upb_bytesrc; -typedef struct upb_bytesrc upb_bytesrc; -struct upb_bytesink; -typedef struct upb_bytesink upb_bytesink; - // Typedefs for function pointers to all of the virtual functions. -// upb_src. -typedef struct _upb_fielddef *(*upb_src_getdef_fptr)(upb_src *src); -typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); -typedef bool (*upb_src_getstr_fptr)(upb_src *src, upb_string *str); -typedef bool (*upb_src_skipval_fptr)(upb_src *src); -typedef bool (*upb_src_startmsg_fptr)(upb_src *src); -typedef bool (*upb_src_endmsg_fptr)(upb_src *src); - -// upb_sink. -typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); -typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); -typedef bool (*upb_sink_putstr_fptr)(upb_sink *sink, upb_string *str); -typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); -typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); - // upb_bytesrc. typedef bool (*upb_bytesrc_get_fptr)( upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); @@ -68,23 +30,6 @@ typedef bool (*upb_bytesrc_append_fptr)( typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); // Vtables for the above interfaces. -typedef struct { - upb_src_getdef_fptr getdef; - upb_src_getval_fptr getval; - upb_src_getstr_fptr getstr; - upb_src_skipval_fptr skipval; - upb_src_startmsg_fptr startmsg; - upb_src_endmsg_fptr endmsg; -} upb_src_vtable; - -typedef struct { - upb_sink_putdef_fptr putdef; - upb_sink_putval_fptr putval; - upb_sink_putstr_fptr putstr; - upb_sink_startmsg_fptr startmsg; - upb_sink_endmsg_fptr endmsg; -} upb_sink_vtable; - typedef struct { upb_bytesrc_get_fptr get; upb_bytesrc_append_fptr append; @@ -97,42 +42,18 @@ typedef struct { // "Base Class" definitions; components that implement these interfaces should // contain one of these structures. -struct upb_src { - upb_src_vtable *vtbl; - upb_status status; - bool eof; -}; - -struct upb_sink { - upb_sink_vtable *vtbl; - upb_status status; - bool eof; -}; - -struct upb_bytesrc { +struct _upb_bytesrc { upb_bytesrc_vtable *vtbl; upb_status status; bool eof; }; -struct upb_bytesink { +struct _upb_bytesink { upb_bytesink_vtable *vtbl; upb_status status; bool eof; }; -INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { - s->vtbl = vtbl; - s->eof = false; - upb_status_init(&s->status); -} - -INLINE void upb_sink_init(upb_sink *s, upb_sink_vtable *vtbl) { - s->vtbl = vtbl; - s->eof = false; - upb_status_init(&s->status); -} - INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtable *vtbl) { s->vtbl = vtbl; s->eof = false; @@ -146,46 +67,6 @@ INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { } // Implementation of virtual function dispatch. -INLINE struct _upb_fielddef *upb_src_getdef(upb_src *src) { - return src->vtbl->getdef(src); -} -INLINE bool upb_src_getval(upb_src *src, upb_valueptr val) { - return src->vtbl->getval(src, val); -} -INLINE bool upb_src_getstr(upb_src *src, upb_string *str) { - return src->vtbl->getstr(src, str); -} -INLINE bool upb_src_skipval(upb_src *src) { return src->vtbl->skipval(src); } -INLINE bool upb_src_startmsg(upb_src *src) { return src->vtbl->startmsg(src); } -INLINE bool upb_src_endmsg(upb_src *src) { return src->vtbl->endmsg(src); } - -// Implementation of type-specific upb_src accessors. If we encounter a upb_src -// where these can be implemented directly in a measurably more efficient way, -// we can make these part of the vtable also. -// -// For <64-bit types we have to use a temporary to accommodate baredecoder, -// which does not know the actual width of the type. -INLINE bool upb_src_getbool(upb_src *src, bool *_bool) { - upb_value val; - bool ret = upb_src_getval(src, upb_value_addrof(&val)); - *_bool = val._bool; - return ret; -} - -INLINE bool upb_src_getint32(upb_src *src, int32_t *i32) { - upb_value val; - bool ret = upb_src_getval(src, upb_value_addrof(&val)); - *i32 = val.int32; - return ret; -} - -// TODO. -bool upb_src_getint32(upb_src *src, int32_t *val); -bool upb_src_getint64(upb_src *src, int64_t *val); -bool upb_src_getuint32(upb_src *src, uint32_t *val); -bool upb_src_getuint64(upb_src *src, uint64_t *val); -bool upb_src_getfloat(upb_src *src, float *val); -bool upb_src_getdouble(upb_src *src, double *val); // upb_bytesrc INLINE bool upb_bytesrc_get( @@ -198,24 +79,108 @@ INLINE bool upb_bytesrc_append( return bytesrc->vtbl->append(bytesrc, str, len); } -// upb_sink -INLINE bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def) { - return sink->vtbl->putdef(sink, def); +INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } +INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } + +// upb_handlers +struct _upb_handlers { + upb_handlerset *set; + void *closure; +}; + +INLINE void upb_handlers_init(upb_handlers *h) { + (void)h; +} +INLINE void upb_handlers_uninit(upb_handlers *h) { + (void)h; +} + +INLINE void upb_handlers_reset(upb_handlers *h) { + h->set = NULL; + h->closure = NULL; +} + +INLINE bool upb_handlers_isempty(upb_handlers *h) { + return !h->set && !h->closure; +} + +INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set) { + h->set = set; +} + +INLINE void upb_set_handler_closure(upb_handlers *h, void *closure) { + h->closure = closure; +} + +// upb_dispatcher +typedef struct { + upb_handlers handlers; + int depth; +} upb_dispatcher_frame; + +struct _upb_dispatcher { + upb_dispatcher_frame stack[UPB_MAX_NESTING], *top, *limit; +}; + +INLINE void upb_dispatcher_init(upb_dispatcher *d) { + d->limit = d->stack + sizeof(d->stack); } -INLINE bool upb_sink_putval(upb_sink *sink, upb_value val) { - return sink->vtbl->putval(sink, val); + +INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h) { + d->top = d->stack; + d->top->depth = 1; // Never want to trigger end-of-delegation. + d->top->handlers = *h; } -INLINE bool upb_sink_putstr(upb_sink *sink, upb_string *str) { - return sink->vtbl->putstr(sink, str); + +INLINE void upb_dispatch_startmsg(upb_dispatcher *d) { + assert(d->stack == d->top); + d->top->handlers.set->startmsg(d->top->handlers.closure); } -INLINE bool upb_sink_startmsg(upb_sink *sink) { - return sink->vtbl->startmsg(sink); + +INLINE void upb_dispatch_endmsg(upb_dispatcher *d) { + assert(d->stack == d->top); + d->top->handlers.set->endmsg(d->top->handlers.closure); } -INLINE bool upb_sink_endmsg(upb_sink *sink) { - return sink->vtbl->endmsg(sink); + +INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, + struct _upb_fielddef *f) { + upb_handlers handlers; + upb_handlers_init(&handlers); + upb_handlers_reset(&handlers); + upb_flow_t ret = d->top->handlers.set->startsubmsg(d->top->handlers.closure, f, &handlers); + assert((ret == UPB_DELEGATE) == !upb_handlers_isempty(&handlers)); + if (ret == UPB_DELEGATE) { + ++d->top; + d->top->handlers = handlers; + d->top->depth = 0; + d->top->handlers.set->startmsg(d->top->handlers.closure); + ret = UPB_CONTINUE; + } + ++d->top->depth; + upb_handlers_uninit(&handlers); + return ret; +} + +INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { + if (--d->top->depth == 0) { + d->top->handlers.set->endmsg(d->top->handlers.closure); + --d->top; + } + return d->top->handlers.set->endsubmsg(d->top->handlers.closure); } -INLINE upb_status *upb_sink_status(upb_sink *sink) { return &sink->status; } +INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, + struct _upb_fielddef *f, + upb_value val) { + return d->top->handlers.set->value(d->top->handlers.closure, f, val); +} + +INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, + upb_field_number_t fieldnum, + upb_value val) { + return d->top->handlers.set->unknownval(d->top->handlers.closure, + fieldnum, val); +} // upb_bytesink INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { diff --git a/core/upb_string.c b/core/upb_string.c index 847a3ee..4f5f5c2 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -29,6 +29,7 @@ upb_string *upb_string_new() { upb_string *str = malloc(sizeof(*str)); str->ptr = NULL; str->cached_mem = NULL; + str->len = 0; #ifndef UPB_HAVE_MSIZE str->size = 0; #endif @@ -132,6 +133,14 @@ upb_string *upb_strdup(upb_string *s) { return str; } +void upb_strcat(upb_string *s, upb_string *append) { + uint32_t old_size = upb_string_len(s); + uint32_t append_size = upb_string_len(append); + uint32_t new_size = old_size + append_size; + char *buf = upb_string_getrwbuf(s, new_size); + memcpy(buf + old_size, upb_string_getrobuf(append), append_size); +} + upb_string *upb_strreadfile(const char *filename) { FILE *f = fopen(filename, "rb"); if(!f) return NULL; diff --git a/core/upb_string.h b/core/upb_string.h index bd89f67..ee345e3 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -18,6 +18,11 @@ * string). * - strings are not thread-safe by default, but can be made so by calling a * function. This is not the default because it causes extra CPU overhead. + * + * Reference-counted strings have recently fallen out of favor because of the + * performance impacts of doing thread-safe reference counting with atomic + * operations. We side-step this issue by not performing atomic operations + * unless the string has been marked thread-safe. */ #ifndef UPB_STRING_H @@ -34,7 +39,7 @@ extern "C" { #endif // All members of this struct are private, and may only be read/written through -// the associated functions. Also, strings may *only* be allocated on the heap. +// the associated functions. struct _upb_string { // The pointer to our currently active data. This may be memory we own // or a pointer into memory we don't own. -- cgit v1.2.3 From bcc688a303439c758a47da9f0eb1c064ece6ce09 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 10 Jan 2011 20:37:04 -0800 Subject: upb_def compiles again! --- core/upb.c | 2 +- core/upb.h | 37 ++++--- core/upb_def.c | 283 +++++++++++++++++++++++++++++++------------------ core/upb_msg.c | 13 ++- core/upb_stream.h | 62 +++++++---- core/upb_stream_vtbl.h | 88 ++++++++++----- core/upb_string.c | 2 +- core/upb_string.h | 13 +-- 8 files changed, 325 insertions(+), 175 deletions(-) (limited to 'core/upb_string.c') diff --git a/core/upb.c b/core/upb.c index c396323..2f715d0 100644 --- a/core/upb.c +++ b/core/upb.c @@ -45,7 +45,7 @@ void upb_seterr(upb_status *status, enum upb_status_code code, { if(upb_ok(status)) { // The first error is the most interesting. status->code = code; - status->str = upb_string_tryrecycle(status->str); + upb_string_recycle(&status->str); va_list args; va_start(args, msg); upb_string_vprintf(status->str, msg, args); diff --git a/core/upb.h b/core/upb.h index 2057d60..64bc88c 100644 --- a/core/upb.h +++ b/core/upb.h @@ -126,14 +126,20 @@ struct _upb_array; typedef struct _upb_array upb_array; struct _upb_msg; typedef struct _upb_msg upb_msg; +struct _upb_bytesrc; +typedef struct _upb_bytesrc upb_bytesrc; -typedef uint32_t upb_strlen_t; +typedef int32_t upb_strlen_t; +#define UPB_STRLEN_MAX INT32_MAX // The type of a upb_value. This is like a upb_fieldtype_t, but adds the // constant UPB_VALUETYPE_ARRAY to represent an array. typedef uint8_t upb_valuetype_t; #define UPB_VALUETYPE_ARRAY 32 +#define UPB_VALUETYPE_BYTESRC 32 +#define UPB_VALUETYPE_RAW 33 + // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. typedef struct { @@ -146,6 +152,7 @@ typedef struct { uint64_t uint64; bool _bool; upb_string *str; + upb_bytesrc *bytesrc; upb_msg *msg; upb_array *arr; upb_atomic_refcount_t *refcount; @@ -167,21 +174,27 @@ typedef struct { #define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ ctype upb_value_get ## name(upb_value val) { \ - assert(val.type == UPB_TYPE(proto_type)); \ + assert(val.type == proto_type || val.type == UPB_VALUETYPE_RAW); \ return val.val.membername; \ } \ - void upb_value_ ## name(upb_value *val, ctype cval) { \ - SET_TYPE(val->type, UPB_TYPE(proto_type)); \ + void upb_value_set ## name(upb_value *val, ctype cval) { \ + SET_TYPE(val->type, proto_type); \ val->val.membername = cval; \ } -UPB_VALUE_ACCESSORS(double, _double, double, DOUBLE); -UPB_VALUE_ACCESSORS(float, _float, float, FLOAT); -UPB_VALUE_ACCESSORS(int32, int32, int32_t, INT32); -UPB_VALUE_ACCESSORS(int64, int64, int64_t, INT64); -UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UINT32); -UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UINT64); -UPB_VALUE_ACCESSORS(bool, _bool, bool, BOOL); -UPB_VALUE_ACCESSORS(str, str, upb_string*, STRING); +UPB_VALUE_ACCESSORS(double, _double, double, UPB_TYPE(DOUBLE)); +UPB_VALUE_ACCESSORS(float, _float, float, UPB_TYPE(FLOAT)); +UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_TYPE(INT32)); +UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64)); +UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32)); +UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64)); +UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); +UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); +UPB_VALUE_ACCESSORS(bytesrc, bytesrc, upb_bytesrc*, UPB_VALUETYPE_BYTESRC); + +void upb_value_setraw(upb_value *val, uint64_t cval) { + SET_TYPE(val->type, UPB_VALUETYPE_RAW); + val->val.uint64 = cval; +} // A pointer to a .proto value. The owner must have an out-of-band way of // knowing the type, so it knows which union member to use. diff --git a/core/upb_def.c b/core/upb_def.c index 4320fb6..4f12dbe 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -228,6 +228,10 @@ static void upb_deflist_push(upb_deflist *l, upb_def *d) { l->defs[l->len++] = d; } +static upb_def *upb_deflist_last(upb_deflist *l) { + return l->defs[l->len-1]; +} + // Qualify the defname for all defs starting with offset "start" with "str". static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { for(uint32_t i = start; i < l->len; i++) { @@ -238,8 +242,14 @@ static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) } } +// We keep a stack of all the messages scopes we are currently in, as well as +// the top-level file scope. This is necessary to correctly qualify the +// definitions that are contained inside. "name" tracks the name of the +// message or package (a bare name -- not qualified by any enclosing scopes). typedef struct { upb_string *name; + // Index of the first def that is under this scope. For msgdefs, the + // msgdef itself is at start-1. int start; } upb_defbuilder_frame; @@ -250,6 +260,10 @@ struct _upb_defbuilder { uint32_t number; upb_string *name; + bool saw_number; + bool saw_name; + + upb_fielddef *f; }; typedef struct _upb_defbuilder upb_defbuilder; @@ -259,6 +273,28 @@ static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, upb_handlers *h); +static void upb_defbuilder_init(upb_defbuilder *b) { + upb_deflist_init(&b->defs); + b->stack_len = 0; + b->name = NULL; +} + +static void upb_defbuilder_uninit(upb_defbuilder *b) { + upb_string_unref(b->name); + upb_deflist_uninit(&b->defs); +} + +static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) { + if (b->stack_len <= 1) return NULL; + int index = b->stack[b->stack_len-1].start - 1; + assert(index >= 0); + return upb_downcast_msgdef(b->defs.defs[index]); +} + +static upb_def *upb_defbuilder_last(upb_defbuilder *b) { + return upb_deflist_last(&b->defs); +} + // Start/end handlers for FileDescriptorProto and DescriptorProto (the two // entities that have names and can contain sub-definitions. void upb_defbuilder_startcontainer(upb_defbuilder *b) { @@ -291,9 +327,8 @@ static upb_flow_t upb_defbuilder_FileDescriptorProto_value(void *_b, case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: return BEGIN_SUBMSG; - default: - return UPB_SKIP; } + return UPB_CONTINUE; } static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( @@ -308,19 +343,19 @@ static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( return UPB_DELEGATE; default: // TODO: services and extensions. - return UPB_SKIP; + return UPB_SKIPSUBMSG; } } static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_defbuilder_FileDescriptorProto_handlers = { + static upb_handlerset handlers = { NULL, // startmsg NULL, // endmsg &upb_defbuilder_FileDescriptorProto_value, &upb_defbuilder_FileDescriptorProto_startsubmsg, }; - upb_register_handlerset(h, &upb_defbuilder_FileDescriptorProto_handlers); + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -333,9 +368,8 @@ static upb_flow_t upb_defbuilder_FileDescriptorSet_value(void *b, switch(f->number) { case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: return BEGIN_SUBMSG; - default: - return UPB_SKIP; } + return UPB_CONTINUE; } static upb_flow_t upb_defbuilder_FileDescriptorSet_startsubmsg( @@ -345,20 +379,19 @@ static upb_flow_t upb_defbuilder_FileDescriptorSet_startsubmsg( case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: upb_defbuilder_register_FileDescriptorProto(b, h); return UPB_DELEGATE; - default: - return UPB_SKIP; } + return UPB_SKIPSUBMSG; } static void upb_defbuilder_register_FileDescriptorSet( upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_defbuilder_FileDescriptorSet_handlers = { + static upb_handlerset handlers = { NULL, // startmsg NULL, // endmsg &upb_defbuilder_FileDescriptorSet_value, &upb_defbuilder_FileDescriptorSet_startsubmsg, }; - upb_register_handlerset(h, &upb_defbuilder_FileDescriptorSet_handlers); + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -406,18 +439,20 @@ static void upb_enumdef_free(upb_enumdef *e) { } // google.protobuf.EnumValueDescriptorProto. -static void upb_enumdef_EnumValueDescriptorProto_startmsg(upb_defbuilder *b) { - b->number = -1; - b->name = NULL; +static void upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; + b->saw_number = false; + b->saw_name = false; } -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(upb_defbuilder *b, +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; switch(f->number) { case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: - b->name = upb_string_tryrecycle(name); - CHECKSRC(upb_src_getstr(src, name)); + upb_string_unref(b->name); + upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: b->number = upb_value_getint32(val); @@ -428,34 +463,37 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(upb_defbuilder *b, return UPB_CONTINUE; } -static void upb_enumdef_EnumValueDescriptorProto_endmsg(upb_defbuilder *b) { - if(b->name == NULL || b->number == -1) { - upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); - goto err; +static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + if(!b->saw_number || !b->saw_name) { + //upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); + //goto err; + return; } - upb_ntoi_ent ntoi_ent = {{name, 0}, number}; - upb_iton_ent iton_ent = {{number, 0}, name}; + upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number}; + upb_iton_ent iton_ent = {{b->number, 0}, b->name}; + upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); upb_strtable_insert(&e->ntoi, &ntoi_ent.e); upb_inttable_insert(&e->iton, &iton_ent.e); // We don't unref "name" because we pass our ref to the iton entry of the // table. strtables can ref their keys, but the inttable doesn't know that // the value is a string. - return UPB_CONTINUE; } static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_enumdef_EnumValueDescriptorProto_handlers = { + static upb_handlerset handlers = { &upb_enumdef_EnumValueDescriptorProto_startmsg, &upb_enumdef_EnumValueDescriptorProto_endmsg, &upb_enumdef_EnumValueDescriptorProto_value, - } - upb_register_handlerset(h, &upb_enumdef_EnumValueDescriptorProto_handlers); + }; + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } // google.protobuf.EnumDescriptorProto. -void upb_enumdef_EnumDescriptorProto_startmsg(upb_defbuilder *b) { +void upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; upb_enumdef *e = malloc(sizeof(*e)); upb_def_init(&e->base, UPB_DEF_ENUM); upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); @@ -463,42 +501,51 @@ void upb_enumdef_EnumDescriptorProto_startmsg(upb_defbuilder *b) { upb_deflist_push(&b->defs, UPB_UPCAST(e)); } -void upb_enumdef_EnumDescriptorProto_endmsg(upb_defbuilder *b) { - assert(e->base.fqname); +void upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + assert(upb_defbuilder_last(b)->fqname != NULL); } -static upb_flow_t upb_enumdef_EnumDescriptorProto_value(upb_defbuilder *b, +static upb_flow_t upb_enumdef_EnumDescriptorProto_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; switch(f->number) { - case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: { + upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); upb_string_unref(e->base.fqname); - e->base.fqname = upb_value_getstr(val); + e->base.fqname = upb_string_getref(upb_value_getstr(val)); + return UPB_CONTINUE; + } case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: return BEGIN_SUBMSG; + default: + return UPB_CONTINUE; } - return UPB_CONTINUE; } -static upb_flow_t upb_enumdef_EnumDescriptorProto_startsubmsg(upb_defbuilder *b, +static upb_flow_t upb_enumdef_EnumDescriptorProto_startsubmsg(void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; switch(f->number) { case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: upb_enumdef_register_EnumValueDescriptorProto(b, h); return UPB_DELEGATE; + default: + return UPB_SKIPSUBMSG; } - return UPB_SKIP; } static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_enumdef_EnumDescriptorProto_handlers = { + static upb_handlerset handlers = { &upb_enumdef_EnumDescriptorProto_startmsg, &upb_enumdef_EnumDescriptorProto_endmsg, &upb_enumdef_EnumDescriptorProto_value, - } - upb_register_handlerset(h, &upb_enumdef_EnumDescriptorProto_handlers); + &upb_enumdef_EnumDescriptorProto_startsubmsg, + }; + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -529,56 +576,71 @@ static void upb_fielddef_free(upb_fielddef *f) { free(f); } -static void upb_fielddef_startmsg(upb_defbuilder *b) { +static void upb_fielddef_startmsg(void *_b) { + upb_defbuilder *b = _b; upb_fielddef *f = malloc(sizeof(*f)); f->number = -1; f->name = NULL; f->def = NULL; f->owned = false; - f->msgdef = m; + f->msgdef = upb_defbuilder_top(b); b->f = f; } -static void upb_fielddef_endmsg(upb_defbuilder *b) { +static void upb_fielddef_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_fielddef *f = b->f; // TODO: verify that all required fields were present. assert(f->number != -1 && f->name != NULL); assert((f->def != NULL) == upb_hasdef(f)); // Field was successfully read, add it as a field of the msgdef. + upb_msgdef *m = upb_defbuilder_top(b); upb_itof_ent itof_ent = {{f->number, 0}, f}; upb_ntof_ent ntof_ent = {{f->name, 0}, f}; upb_inttable_insert(&m->itof, &itof_ent.e); upb_strtable_insert(&m->ntof, &ntof_ent.e); - return true; } -static upb_flow_t upb_fielddef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { - switch(parsed_f->number) { +static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; + switch(f->number) { case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: - f->type = upb_value_getint32(val); + b->f->type = upb_value_getint32(val); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: - f->label = upb_value_getint32(val); + b->f->label = upb_value_getint32(val); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM: - f->number = upb_value_getint32(val); + b->f->number = upb_value_getint32(val); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: - f->name = upb_string_tryrecycle(f->name); - CHECKSRC(upb_src_getstr(src, f->name)); + upb_string_unref(b->f->name); + b->f->name = upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { upb_string *str = upb_string_new(); - CHECKSRC(upb_src_getstr(src, str)); - if(f->def) upb_def_unref(f->def); - f->def = UPB_UPCAST(upb_unresolveddef_new(str)); - f->owned = true; + if (!upb_value_getfullstr(val, str, NULL)) return UPB_ERROR; + if(b->f->def) upb_def_unref(b->f->def); + b->f->def = UPB_UPCAST(upb_unresolveddef_new(str)); + b->f->owned = true; break; } } return UPB_CONTINUE; } +static void upb_fielddef_register_FieldDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset handlers = { + &upb_fielddef_startmsg, + &upb_fielddef_endmsg, + &upb_fielddef_value, + }; + upb_register_handlerset(h, &handlers); + upb_set_handler_closure(h, b); +} + /* upb_msgdef *****************************************************************/ @@ -596,21 +658,24 @@ static int upb_compare_fields(const void *f1, const void *f2) { } // google.protobuf.DescriptorProto. -static void upb_msgdef_startmsg(upb_defbuilder *b) { +static void upb_msgdef_startmsg(void *_b) { + upb_defbuilder *b = _b; upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG); upb_atomic_refcount_init(&m->cycle_refcount, 0); upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); upb_deflist_push(&b->defs, UPB_UPCAST(m)); - upb_defbuilder_startcontainer(b, UPB_UPCAST(m)); + upb_defbuilder_startcontainer(b); } -static void upb_msgdef_endmsg(upb_defbuilder *b) { - upb_msgdef *m = upb_downcast_msgdef(upb_deflist_stacktop(&m->defs)); +static void upb_msgdef_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_msgdef *m = upb_defbuilder_top(b); if(!m->base.fqname) { - upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); - return UPB_ERROR; + //upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); + //return UPB_ERROR; + return; } // Create an ordering over the fields. @@ -651,51 +716,57 @@ static void upb_msgdef_endmsg(upb_defbuilder *b) { if (max_align > 0) m->size = upb_align_up(m->size, max_align); upb_defbuilder_endcontainer(b); - return UPB_CONTINUE; + //return UPB_CONTINUE; } -static bool upb_msgdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { +static upb_flow_t upb_msgdef_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; switch(f->number) { - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: - upb_defbuilder_setscopename(upb_value_getstr(val)); - break; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: { + upb_msgdef *m = upb_defbuilder_top(b); + upb_string_unref(m->base.fqname); + m->base.fqname = upb_string_getref(upb_value_getstr(val)); + upb_defbuilder_setscopename(b, upb_value_getstr(val)); + return UPB_CONTINUE; + } case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: return BEGIN_SUBMSG; default: // TODO: extensions. - return UPB_SKIP; + return UPB_CONTINUE; } } -static upb_flow_t upb_msgdef_startsubmsg(upb_defbuilder *b, upb_fielddef *f, +static upb_flow_t upb_msgdef_startsubmsg(void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; switch(f->number) { case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: - upb_register_FieldDescriptorProto(b, h); + upb_fielddef_register_FieldDescriptorProto(b, h); return UPB_DELEGATE; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: upb_msgdef_register_DescriptorProto(b, h); return UPB_DELEGATE; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - upb_register_EnumDescriptorProto(b, h); + upb_enumdef_register_EnumDescriptorProto(b, h); return UPB_DELEGATE; break; default: - return UPB_SKIP; + return UPB_SKIPSUBMSG; } } static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_msgdef_DescriptorProto_handlers = { + static upb_handlerset handlers = { &upb_msgdef_startmsg, &upb_msgdef_endmsg, &upb_msgdef_value, &upb_msgdef_startsubmsg, - } - upb_register_handlerset(h, &upb_msgdef_DescriptorProto_handlers); + }; + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -884,7 +955,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, // indicating whether the new defs can overwrite existing defs in the symtab, // attempts to add the given defs to the symtab. The whole operation either // succeeds or fails. Ownership of "defs" and "exts" is taken. -bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, +bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs, bool allow_redef, upb_status *status) { upb_rwlock_wrlock(&s->lock); @@ -892,9 +963,9 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, // Build a table of the defs we mean to add, for duplicate detection and name // resolution. upb_strtable tmptab; - upb_strtable_init(&tmptab, defs->len, sizeof(upb_symtab_ent)); - for (uint32_t i = 0; i < defs->len; i++) { - upb_def *def = defs->defs[i]; + upb_strtable_init(&tmptab, num_defs, sizeof(upb_symtab_ent)); + for (int i = 0; i < num_defs; i++) { + upb_def *def = defs[i]; upb_symtab_ent e = {{def->fqname, 0}, def}; // Redefinition is never allowed within a single FileDescriptorSet. @@ -909,13 +980,13 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, // Pass ownership from the deflist to the strtable. upb_strtable_insert(&tmptab, &e.e); - defs->defs[i] = NULL; + defs[i] = NULL; } // TODO: process the list of extensions by modifying entries from // tmptab in-place (copying them from the symtab first if necessary). - CHECK(upb_resolverefs(&tmptab, &s->symtab, status)); + if (!upb_resolverefs(&tmptab, &s->symtab, status)) goto err; // The defs in tmptab have been vetted, and can be added to the symtab // without causing errors. Now add all tmptab defs to the symtab, @@ -946,6 +1017,7 @@ err: upb_def_unref(e->def); } upb_strtable_free(&tmptab); + for (int i = 0; i < num_defs; i++) upb_def_unref(defs[i]); return false; } @@ -1026,20 +1098,18 @@ upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) { - upb_defbuilder *b = upb_defbuilder_new(); - upb_defbuilder_register_handlers(b, upb_src_gethandlers(src)); + upb_defbuilder b; + upb_defbuilder_init(&b); + //upb_defbuilder_register_FileDescriptorSet(&b, upb_src_gethandlers(src)); + upb_defbuilder_register_FileDescriptorSet(&b, NULL); if(!upb_src_run(src)) { upb_copyerr(status, upb_src_status(src)); + upb_defbuilder_uninit(&b); return; } - upb_symtab_add_defs(s, b->defs, b->defs_len, false, status); - upb_deflist_uninit(&defs); + upb_symtab_add_defs(s, b.defs.defs, b.defs.len, false, status); + upb_defbuilder_uninit(&b); return; - -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_deflist_uninit(&defs); } @@ -1074,8 +1144,10 @@ err: // complicated to support on big-endian machines. typedef struct { + upb_src src; upb_string *input; upb_strlen_t offset; + upb_dispatcher dispatcher; } upb_baredecoder; static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) @@ -1121,9 +1193,9 @@ bool upb_baredecoder_run(upb_baredecoder *d) { upb_dispatch_startmsg(&d->dispatcher); while(d->offset < upb_string_len(d->input)) { // Detect end-of-submessage. - while(d->offset >= *d->top) { + while(d->offset >= *top) { upb_dispatch_endsubmsg(&d->dispatcher); - d->offset = *(d->top--); + d->offset = *(top--); } uint32_t key = upb_baredecoder_readv64(d); @@ -1134,16 +1206,16 @@ bool upb_baredecoder_run(upb_baredecoder *d) { uint32_t delim_len = upb_baredecoder_readv32(d); // We don't know if it's a string or a submessage; deliver first as // string. - str = upb_string_tryrecycle(str); - upb_string_substr(str, d->input, d->offset, d->delimited_len); + upb_string_recycle(&str); + upb_string_substr(str, d->input, d->offset, delim_len); upb_value v; upb_value_setstr(&v, str); - if(upb_dispatch_value(&d->dispatcher, &f, v) == UPB_TREAT_AS_SUBMSG) { + if(upb_dispatch_value(&d->dispatcher, &f, v) == BEGIN_SUBMSG) { // Should deliver as a submessage instead. upb_dispatch_startsubmsg(&d->dispatcher, &f); - *(++d->top) = d->offset + delimited_len; + *(++top) = d->offset + delim_len; } else { - d->offset += delimited_len; + d->offset += delim_len; } } else { upb_value v; @@ -1167,23 +1239,24 @@ bool upb_baredecoder_run(upb_baredecoder *d) { } } upb_dispatch_endmsg(&d->dispatcher); + return true; } -static upb_src_vtable upb_baredecoder_src_vtbl = { - (upb_src_getdef_fptr)&upb_baredecoder_getdef, - (upb_src_getval_fptr)&upb_baredecoder_getval, - (upb_src_getstr_fptr)&upb_baredecoder_getstr, - (upb_src_skipval_fptr)&upb_baredecoder_skipval, - (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, - (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, -}; - static upb_baredecoder *upb_baredecoder_new(upb_string *str) { + //static upb_src_vtable vtbl = { + // (upb_src_getdef_fptr)&upb_baredecoder_getdef, + // (upb_src_getval_fptr)&upb_baredecoder_getval, + // (upb_src_getstr_fptr)&upb_baredecoder_getstr, + // (upb_src_skipval_fptr)&upb_baredecoder_skipval, + // (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, + // (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, + //}; upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); d->offset = 0; - upb_src_init(&d->src, &upb_baredecoder_src_vtbl); + upb_dispatcher_init(&d->dispatcher); + //upb_src_init(&d->src, &vtbl); return d; } diff --git a/core/upb_msg.c b/core/upb_msg.c index 75f7a35..a0a5196 100644 --- a/core/upb_msg.c +++ b/core/upb_msg.c @@ -7,6 +7,8 @@ */ #include "upb_msg.h" +#include "upb_decoder.h" +#include "upb_strstream.h" void _upb_elem_free(upb_value v, upb_fielddef *f) { switch(f->type) { @@ -108,10 +110,13 @@ upb_value upb_field_tryrecycle(upb_valueptr p, upb_value val, upb_fielddef *f, void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, upb_status *status) { - (void)msg; - (void)md; - (void)str; - (void)status; + upb_stringsrc *ssrc = upb_stringsrc_new(); + upb_stringsrc_reset(ssrc, str); + upb_decoder *d = upb_decoder_new(md); + upb_decoder_reset(d, upb_stringsrc_bytesrc(ssrc)); + + upb_decoder_free(d); + upb_stringsrc_free(ssrc); } void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, diff --git a/core/upb_stream.h b/core/upb_stream.h index c96c544..9ae69de 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -39,13 +39,16 @@ typedef enum { // Caller should continue sending values to the sink. UPB_CONTINUE, - // Skips to the end of the current submessage (or if we are at the top - // level, skips to the end of the entire message). - UPB_SKIP, + // An error occurred; check status for details. + UPB_ERROR, - // Caller should stop sending values; check sink status for details. + // Processing should stop for now, but could be resumed later. // If processing resumes later, it should resume with the next value. - UPB_STOP, + UPB_SUSPEND, + + // Skips to the end of the current submessage (or if we are at the top + // level, skips to the end of the entire message). + UPB_SKIPSUBMSG, // When returned from a startsubmsg handler, indicates that the submessage // should be handled by a different set of handlers, which have been @@ -117,6 +120,9 @@ INLINE void upb_handlers_uninit(upb_handlers *h); INLINE void upb_handlers_reset(upb_handlers *h); INLINE bool upb_handlers_isempty(upb_handlers *h); INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); +// TODO: for clients that want to increase efficiency by preventing bytesrcs +// from automatically being converted to strings in the value callback. +// INLINE void upb_handlers_use_bytesrcs(bool use_bytesrcs); INLINE void upb_set_handler_closure(upb_handlers *h, void *closure); // An object that transparently handles delegation so that the caller needs @@ -140,21 +146,30 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, struct _upb_src; typedef struct _upb_src upb_src; +bool upb_src_run(upb_src *src); +upb_status *upb_src_status(upb_src *src); -/* upb_bytesrc ****************************************************************/ - -struct _upb_bytesrc; -typedef struct _upb_bytesrc upb_bytesrc; -// Returns the next string in the stream. false is returned on error or eof. -// The string must be at least "minlen" bytes long unless the stream is eof. -INLINE bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); +/* upb_bytesrc ****************************************************************/ -// Appends the next "len" bytes in the stream in-place to "str". This should -// be used when the caller needs to build a contiguous string of the existing -// data in "str" with more data. The call fails if fewer than len bytes are -// available in the stream. -INLINE bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); +// Reads up to "count" bytes into "buf", returning the total number of bytes +// read. If <0, indicates error (check upb_bytesrc_status for details). +INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, + upb_strlen_t count); + +// Like upb_bytesrc_read(), but modifies "str" in-place, possibly aliasing +// existing string data (which avoids a copy). +INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, + upb_strlen_t count); + +// A convenience function for getting all the remaining data in a upb_bytesrc +// as a upb_string. Returns false and sets "status" if the operation fails. +INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, + upb_status *status); +INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, + upb_status *status) { + return upb_bytesrc_getfullstr(upb_value_getbytesrc(val), str, status); +} // Returns the current error status for the stream. // Note! The "eof" flag works like feof() in C; it cannot report end-of-file @@ -164,14 +179,21 @@ INLINE bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t l INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src); INLINE bool upb_bytesrc_eof(upb_bytesrc *src); + /* upb_bytesink ***************************************************************/ struct _upb_bytesink; typedef struct _upb_bytesink upb_bytesink; -// Puts the given string. Returns the number of bytes that were actually, -// consumed, which may be fewer than were in the string, or <0 on error. -INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); +// Writes up to "count" bytes from "buf", returning the total number of bytes +// written. If <0, indicates error (check upb_bytesink_status() for details). +INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, + upb_strlen_t count); + +// Puts the given string, which may alias the string data (which avoids a +// copy). Returns the number of bytes that were actually, consumed, which may +// be fewer than were in the string, or <0 on error. +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str); // Returns the current error status for the stream. INLINE upb_status *upb_bytesink_status(upb_bytesink *sink); diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 91464a7..c0cf04f 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -20,23 +20,33 @@ extern "C" { // Typedefs for function pointers to all of the virtual functions. +// upb_src +struct _upb_src { +}; +typedef struct { +} upb_src_vtbl; + // upb_bytesrc. -typedef bool (*upb_bytesrc_get_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); -typedef bool (*upb_bytesrc_append_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t len); +typedef upb_strlen_t (*upb_bytesrc_read_fptr)( + upb_bytesrc *src, void *buf, upb_strlen_t count); +typedef bool (*upb_bytesrc_getstr_fptr)( + upb_bytesrc *src, upb_string *str, upb_strlen_t count); // upb_bytesink. -typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); +typedef upb_strlen_t (*upb_bytesink_write_fptr)( + upb_bytesink *bytesink, void *buf, upb_strlen_t count); +typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( + upb_bytesink *bytesink, upb_string *str); // Vtables for the above interfaces. typedef struct { - upb_bytesrc_get_fptr get; - upb_bytesrc_append_fptr append; + upb_bytesrc_read_fptr read; + upb_bytesrc_getstr_fptr getstr; } upb_bytesrc_vtable; typedef struct { - upb_bytesink_put_fptr put; + upb_bytesink_write_fptr write; + upb_bytesink_putstr_fptr putstr; } upb_bytesink_vtable; // "Base Class" definitions; components that implement these interfaces should @@ -69,19 +79,56 @@ INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { // Implementation of virtual function dispatch. // upb_bytesrc -INLINE bool upb_bytesrc_get( - upb_bytesrc *bytesrc, upb_string *str, upb_strlen_t minlen) { - return bytesrc->vtbl->get(bytesrc, str, minlen); -} +INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, + upb_strlen_t count) { + return src->vtbl->read(src, buf, count); +} + +INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, + upb_strlen_t count) { + return src->vtbl->getstr(src, str, count); +} + +INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, + upb_status *status) { + // We start with a getstr, because that could possibly alias data instead of + // copying. + if (!upb_bytesrc_getstr(src, str, UPB_STRLEN_MAX)) goto error; + // Trade-off between number of read calls and amount of overallocation. + const size_t bufsize = 4096; + while (!upb_bytesrc_eof(src)) { + upb_strlen_t len = upb_string_len(str); + char *buf = upb_string_getrwbuf(str, len + bufsize); + upb_strlen_t read = upb_bytesrc_read(src, buf + len, bufsize); + if (read < 0) goto error; + // Resize to proper size. + upb_string_getrwbuf(str, len + read); + } + return true; -INLINE bool upb_bytesrc_append( - upb_bytesrc *bytesrc, upb_string *str, upb_strlen_t len) { - return bytesrc->vtbl->append(bytesrc, str, len); +error: + upb_copyerr(status, upb_bytesrc_status(src)); + return false; } INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } + +// upb_bytesink +INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, + upb_strlen_t count) { + return sink->vtbl->write(sink, buf, count); +} + +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str) { + return sink->vtbl->putstr(sink, str); +} + +INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { + return &sink->status; +} + // upb_handlers struct _upb_handlers { upb_handlerset *set; @@ -182,17 +229,6 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, fieldnum, val); } -// upb_bytesink -INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { - return sink->vtbl->put(sink, str); -} -INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { - return &sink->status; -} - -// upb_bytesink - - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/core/upb_string.c b/core/upb_string.c index 4f5f5c2..b243dfd 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -73,7 +73,7 @@ upb_string *upb_string_tryrecycle(upb_string *str) { char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { // assert(str->ptr == NULL); - uint32_t size = upb_string_size(str); + upb_strlen_t size = upb_string_size(str); if (size < len) { size = upb_round_up_pow2(len); str->cached_mem = realloc(str->cached_mem, size); diff --git a/core/upb_string.h b/core/upb_string.h index ee345e3..f82603b 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -119,20 +119,21 @@ INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } INLINE void upb_string_endread(upb_string *str) { (void)str; } // Attempts to recycle the string "str" so it may be reused and have different -// data written to it. The returned string is either "str" if it could be -// recycled or a newly created string if "str" has other references. +// data written to it. After the function returns, "str" points to a writable +// string, which is either the original string if it had no other references +// or a newly created string if it did have other references. // -// As a special case, passing NULL will allocate a new string. This is -// convenient for the pattern: +// As a special case, passing a pointer to NULL will allocate a new string. +// This is convenient for the pattern: // // upb_string *str = NULL; // while (x) { // if (y) { -// str = upb_string_tryrecycle(str); +// upb_string_recycle(&str); // upb_src_getstr(str); // } // } -upb_string *upb_string_tryrecycle(upb_string *str); +upb_string *upb_string_recycle(upb_string **str); // The options for setting the contents of a string. These may only be called // when a string is first created or recycled; once other functions have been -- cgit v1.2.3 From a695b92ccea4b82180ae45d21d7ed4445f7d0769 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 21 Jan 2011 19:18:22 -0800 Subject: Debugging test_def, it's close to working again! --- Makefile | 10 +++--- core/upb_def.c | 80 +++++++++++++++++++++++++++-------------- core/upb_stream.h | 21 ++++++++--- core/upb_stream_vtbl.h | 96 ++++++++++++++++++++++++++++++++++++++++---------- core/upb_string.c | 8 ++--- core/upb_string.h | 2 +- tests/test_def.c | 1 + tests/test_string.c | 19 +++++----- 8 files changed, 171 insertions(+), 66 deletions(-) (limited to 'core/upb_string.c') diff --git a/Makefile b/Makefile index 42c7d41..af79363 100644 --- a/Makefile +++ b/Makefile @@ -74,9 +74,9 @@ OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. core/upb_def.o: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< + $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< core/upb_def.lo: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC + $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< -fPIC lang_ext/lua/upb.so: lang_ext/lua/upb.lo $(CC) $(CFLAGS) $(CPPFLAGS) -shared -o $@ $< core/libupb_pic.a @@ -112,13 +112,13 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ - tests/test_stream \ -# tests/test_def \ + tests/test_def \ +# tests/test_stream \ # tests/test_decoder \ # tests/t.test_vs_proto2.googlemessage1 \ # tests/t.test_vs_proto2.googlemessage2 \ # tests/test.proto.pb -tests: $(TESTS) +tests: $(LIBUPB) $(TESTS) OTHER_TESTS=tests/tests \ $(TESTS): $(LIBUPB) diff --git a/core/upb_def.c b/core/upb_def.c index 79b6632..a935930 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -319,6 +319,18 @@ void upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { } // Handlers for google.protobuf.FileDescriptorProto. +static upb_flow_t upb_defbuilder_FileDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; + upb_defbuilder_startcontainer(b); + return UPB_CONTINUE; +} + +static upb_flow_t upb_defbuilder_FileDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_defbuilder_endcontainer(b); + return UPB_CONTINUE; +} + static upb_flow_t upb_defbuilder_FileDescriptorProto_value(void *_b, upb_fielddef *f, upb_value val) { @@ -353,8 +365,8 @@ static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, upb_handlers *h) { static upb_handlerset handlers = { - NULL, // startmsg - NULL, // endmsg + &upb_defbuilder_FileDescriptorProto_startmsg, + &upb_defbuilder_FileDescriptorProto_endmsg, &upb_defbuilder_FileDescriptorProto_value, &upb_defbuilder_FileDescriptorProto_startsubmsg, }; @@ -457,9 +469,11 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: upb_string_unref(b->name); upb_string_getref(upb_value_getstr(val)); + b->saw_name = true; break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: b->number = upb_value_getint32(val); + b->saw_number = true; break; default: break; @@ -507,8 +521,8 @@ static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { } static upb_flow_t upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { - upb_defbuilder *b = _b; - assert(upb_defbuilder_last(b)->fqname != NULL); + (void)_b; + assert(upb_defbuilder_last((upb_defbuilder*)_b)->fqname != NULL); return UPB_CONTINUE; } @@ -627,10 +641,8 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { b->f->name = upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { - upb_string *str = upb_string_new(); - if (!upb_value_getfullstr(val, str, NULL)) return UPB_BREAK; if(b->f->def) upb_def_unref(b->f->def); - b->f->def = UPB_UPCAST(upb_unresolveddef_new(str)); + b->f->def = UPB_UPCAST(upb_unresolveddef_new(upb_value_getstr(val))); b->f->owned = true; break; } @@ -720,6 +732,7 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) { m->size = offset + type_info->size; max_align = UPB_MAX(max_align, type_info->align); } + free(sorted_fields); if (max_align > 0) m->size = upb_align_up(m->size, max_align); @@ -1131,6 +1144,7 @@ void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) // * keeping a pointer to the upb_fielddef* and reading it later (the same // upb_fielddef is reused over and over). // * detecting errors in the input (we trust that our input is known-good). +// * skipping the rest of the submessage (UPB_SKIPSUBMSG). // // It also does not support any of the follow protobuf features: // * packed fields. @@ -1189,18 +1203,27 @@ static uint32_t upb_baredecoder_readf32(upb_baredecoder *d) return val; } -bool upb_baredecoder_run(upb_baredecoder *d) { +static void upb_baredecoder_sethandlers(upb_src *src, upb_handlers *handlers) { + upb_baredecoder *d = (upb_baredecoder*)src; + upb_dispatcher_reset(&d->dispatcher, handlers); +} + +static void upb_baredecoder_run(upb_src *src, upb_status *status) { + upb_baredecoder *d = (upb_baredecoder*)src; + assert(!upb_handlers_isempty(&d->dispatcher.top->handlers)); upb_string *str = NULL; upb_strlen_t stack[UPB_MAX_NESTING]; upb_strlen_t *top = &stack[0]; *top = upb_string_len(d->input); d->offset = 0; - upb_dispatch_startmsg(&d->dispatcher); +#define CHECK(x) if (x != UPB_CONTINUE && x != BEGIN_SUBMSG) goto err; + + CHECK(upb_dispatch_startmsg(&d->dispatcher)); while(d->offset < upb_string_len(d->input)) { // Detect end-of-submessage. while(d->offset >= *top) { - upb_dispatch_endsubmsg(&d->dispatcher); + CHECK(upb_dispatch_endsubmsg(&d->dispatcher)); d->offset = *(top--); } @@ -1216,9 +1239,11 @@ bool upb_baredecoder_run(upb_baredecoder *d) { upb_string_substr(str, d->input, d->offset, delim_len); upb_value v; upb_value_setstr(&v, str); - if(upb_dispatch_value(&d->dispatcher, &f, v) == BEGIN_SUBMSG) { + upb_flow_t ret = upb_dispatch_value(&d->dispatcher, &f, v); + CHECK(ret); + if(ret == BEGIN_SUBMSG) { // Should deliver as a submessage instead. - upb_dispatch_startsubmsg(&d->dispatcher, &f); + CHECK(upb_dispatch_startsubmsg(&d->dispatcher, &f)); *(++top) = d->offset + delim_len; } else { d->offset += delim_len; @@ -1228,11 +1253,9 @@ bool upb_baredecoder_run(upb_baredecoder *d) { switch(wt) { case UPB_WIRE_TYPE_VARINT: upb_value_setraw(&v, upb_baredecoder_readv64(d)); - upb_dispatch_value(&d->dispatcher, &f, v); break; case UPB_WIRE_TYPE_64BIT: upb_value_setraw(&v, upb_baredecoder_readf64(d)); - upb_dispatch_value(&d->dispatcher, &f, v); break; case UPB_WIRE_TYPE_32BIT: upb_value_setraw(&v, upb_baredecoder_readf32(d)); @@ -1241,28 +1264,33 @@ bool upb_baredecoder_run(upb_baredecoder *d) { assert(false); abort(); } - upb_dispatch_value(&d->dispatcher, &f, v); + CHECK(upb_dispatch_value(&d->dispatcher, &f, v)); } } - upb_dispatch_endmsg(&d->dispatcher); - return true; + CHECK(upb_dispatch_endmsg(&d->dispatcher)); + printf("SUCCESS!!\n"); + upb_string_unref(str); + return; + +err: + upb_copyerr(status, d->dispatcher.top->handlers.status); + upb_printerr(d->dispatcher.top->handlers.status); + upb_printerr(status); + upb_string_unref(str); + printf("ERROR!!\n"); } static upb_baredecoder *upb_baredecoder_new(upb_string *str) { - //static upb_src_vtable vtbl = { - // (upb_src_getdef_fptr)&upb_baredecoder_getdef, - // (upb_src_getval_fptr)&upb_baredecoder_getval, - // (upb_src_getstr_fptr)&upb_baredecoder_getstr, - // (upb_src_skipval_fptr)&upb_baredecoder_skipval, - // (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, - // (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, - //}; + static upb_src_vtbl vtbl = { + &upb_baredecoder_sethandlers, + &upb_baredecoder_run, + }; upb_baredecoder *d = malloc(sizeof(*d)); + upb_src_init(&d->src, &vtbl); d->input = upb_string_getref(str); d->offset = 0; upb_dispatcher_init(&d->dispatcher); - //upb_src_init(&d->src, &vtbl); return d; } diff --git a/core/upb_stream.h b/core/upb_stream.h index 66bfec2..cf01a5f 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -136,8 +136,8 @@ struct _upb_dispatcher; typedef struct _upb_dispatcher upb_dispatcher; INLINE void upb_dispatcher_init(upb_dispatcher *d); INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); -INLINE void upb_dispatch_startmsg(upb_dispatcher *d); -INLINE void upb_dispatch_endmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d); INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f); INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, @@ -151,8 +151,21 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, struct _upb_src; typedef struct _upb_src upb_src; -void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); -void upb_src_run(upb_src *src, upb_status *status); +// upb_src_sethandlers() must be called once and only once before upb_src_run() +// is called. This sets up the callbacks that will handle the parse. A +// upb_src that is fully initialized except for the call to +// upb_src_sethandlers() is called "prepared" -- this is useful for library +// functions that want to consume the output of a generic upb_src. +// Calling sethandlers() multiple times is an error and will trigger an abort(). +INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); + +// Runs the src, calling the callbacks that were registered with +// upb_src_sethandlers(), and returning the status of the operation in +// "status." The status might indicate UPB_TRYAGAIN (indicating EAGAIN on a +// non-blocking socket) or a resumable error; in both cases upb_src_run can be +// called again later. TRYAGAIN could come from either the src (input buffers +// are empty) or the handlers (output buffers are full). +INLINE void upb_src_run(upb_src *src, upb_status *status); /* upb_bytesrc ****************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index d017177..e462122 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -13,6 +13,7 @@ #include #include "upb_stream.h" +#include "upb_string.h" #ifdef __cplusplus extern "C" { @@ -21,10 +22,8 @@ extern "C" { // Typedefs for function pointers to all of the virtual functions. // upb_src -struct _upb_src { -}; -typedef struct { -} upb_src_vtbl; +typedef void (*upb_src_sethandlers_fptr)(upb_src *src, upb_handlers *handlers); +typedef void (*upb_src_run_fptr)(upb_src *src, upb_status *status); // upb_bytesrc. typedef upb_strlen_t (*upb_bytesrc_read_fptr)( @@ -42,42 +41,65 @@ typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( typedef struct { upb_bytesrc_read_fptr read; upb_bytesrc_getstr_fptr getstr; -} upb_bytesrc_vtable; +} upb_bytesrc_vtbl; typedef struct { upb_bytesink_write_fptr write; upb_bytesink_putstr_fptr putstr; -} upb_bytesink_vtable; +} upb_bytesink_vtbl; + +typedef struct { + upb_src_sethandlers_fptr sethandlers; + upb_src_run_fptr run; +} upb_src_vtbl; + // "Base Class" definitions; components that implement these interfaces should // contain one of these structures. struct _upb_bytesrc { - upb_bytesrc_vtable *vtbl; + upb_bytesrc_vtbl *vtbl; upb_status status; bool eof; }; struct _upb_bytesink { - upb_bytesink_vtable *vtbl; + upb_bytesink_vtbl *vtbl; upb_status status; bool eof; }; -INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtable *vtbl) { +struct _upb_src { + upb_src_vtbl *vtbl; +}; + +INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) { s->vtbl = vtbl; s->eof = false; upb_status_init(&s->status); } -INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { +INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) { s->vtbl = vtbl; s->eof = false; upb_status_init(&s->status); } +INLINE void upb_src_init(upb_src *s, upb_src_vtbl *vtbl) { + s->vtbl = vtbl; +} + // Implementation of virtual function dispatch. +// upb_src +INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers) { + src->vtbl->sethandlers(src, handlers); +} + +INLINE void upb_src_run(upb_src *src, upb_status *status) { + src->vtbl->run(src, status); +} + // upb_bytesrc INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, upb_strlen_t count) { @@ -152,7 +174,41 @@ INLINE bool upb_handlers_isempty(upb_handlers *h) { return !h->set && !h->closure; } +INLINE upb_flow_t upb_nop(void *closure) { + (void)closure; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_value_nop(void *closure, struct _upb_fielddef *f, upb_value val) { + (void)closure; + (void)f; + (void)val; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_startsubmsg_nop(void *closure, struct _upb_fielddef *f, + upb_handlers *delegate_to) { + (void)closure; + (void)f; + (void)delegate_to; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, + upb_value val) { + (void)closure; + (void)fieldnum; + (void)val; + return UPB_CONTINUE; +} + INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set) { + if (!set->startmsg) set->startmsg = &upb_nop; + if (!set->endmsg) set->endmsg = &upb_nop; + if (!set->value) set->value = &upb_value_nop; + if (!set->startsubmsg) set->startsubmsg = &upb_startsubmsg_nop; + if (!set->endsubmsg) set->endsubmsg = &upb_nop; + if (!set->unknownval) set->unknownval = &upb_unknownval_nop; h->set = set; } @@ -182,16 +238,19 @@ INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h) { d->top->handlers = *h; } -INLINE void upb_dispatch_startmsg(upb_dispatcher *d) { +INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d) { assert(d->stack == d->top); - d->top->handlers.set->startmsg(d->top->handlers.closure); + return d->top->handlers.set->startmsg(d->top->handlers.closure); } -INLINE void upb_dispatch_endmsg(upb_dispatcher *d) { +INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d) { assert(d->stack == d->top); - d->top->handlers.set->endmsg(d->top->handlers.closure); + return d->top->handlers.set->endmsg(d->top->handlers.closure); } +// TODO: several edge cases to fix: +// - delegated start returns UPB_BREAK, should replay the start on resume. +// - endsubmsg returns UPB_BREAK, should NOT replay the delegated endmsg. INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f) { upb_handlers handlers; @@ -203,17 +262,18 @@ INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, ++d->top; d->top->handlers = handlers; d->top->depth = 0; - d->top->handlers.set->startmsg(d->top->handlers.closure); - ret = UPB_CONTINUE; + ret = d->top->handlers.set->startmsg(d->top->handlers.closure); } - ++d->top->depth; + if (ret == UPB_CONTINUE) ++d->top->depth; upb_handlers_uninit(&handlers); return ret; } INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { + upb_flow_t ret; if (--d->top->depth == 0) { - d->top->handlers.set->endmsg(d->top->handlers.closure); + ret = d->top->handlers.set->endmsg(d->top->handlers.closure); + if (ret != UPB_CONTINUE) return ret; --d->top; } return d->top->handlers.set->endsubmsg(d->top->handlers.closure); diff --git a/core/upb_string.c b/core/upb_string.c index b243dfd..e9ff0d9 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -61,13 +61,13 @@ void _upb_string_free(upb_string *str) { free(str); } -upb_string *upb_string_tryrecycle(upb_string *str) { +void upb_string_recycle(upb_string **_str) { + upb_string *str = *_str; if(str && upb_atomic_read(&str->refcount) == 1) { str->ptr = NULL; upb_string_release(str); - return str; } else { - return upb_string_new(); + *_str = upb_string_new(); } } @@ -111,7 +111,7 @@ void upb_string_vprintf(upb_string *str, const char *format, va_list args) { // We don't care about the terminating NULL, but snprintf might // bail out of printing even other characters if it doesn't have // enough space to write the NULL also. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); buf = upb_string_getrwbuf(str, true_size + 1); vsnprintf(buf, true_size + 1, format, args); } diff --git a/core/upb_string.h b/core/upb_string.h index f82603b..1f4b20c 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -133,7 +133,7 @@ INLINE void upb_string_endread(upb_string *str) { (void)str; } // upb_src_getstr(str); // } // } -upb_string *upb_string_recycle(upb_string **str); +void upb_string_recycle(upb_string **str); // The options for setting the contents of a string. These may only be called // when a string is first created or recycled; once other functions have been diff --git a/tests/test_def.c b/tests/test_def.c index 732835d..5be0672 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -10,6 +10,7 @@ int main() { int count; upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY); for (int i = 0; i < count; i++) { + printf("Def with name: " UPB_STRFMT "\n", UPB_STRARG(defs[i]->fqname)); upb_def_unref(defs[i]); } free(defs); diff --git a/tests/test_string.c b/tests/test_string.c index 7c9ed02..6446806 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -23,7 +23,8 @@ static void test_static() { upb_string_unref(&static_upbstr); // Recycling a static string returns a new string (that can be modified). - upb_string *str = upb_string_tryrecycle(&static_upbstr); + upb_string *str = &static_upbstr; + upb_string_recycle(&str); assert(str != &static_upbstr); upb_string_unref(str); @@ -34,8 +35,9 @@ static void test_dynamic() { assert(str != NULL); upb_string_unref(str); - // Can also create a string by tryrecycle(NULL). - str = upb_string_tryrecycle(NULL); + // Can also create a string by recycle(NULL). + str = NULL; + upb_string_recycle(&str); assert(str != NULL); upb_strcpyc(str, static_str); @@ -45,7 +47,8 @@ static void test_dynamic() { assert(upb_streqlc(str, static_str)); upb_string_endread(str); - upb_string *str2 = upb_string_tryrecycle(str); + upb_string *str2 = str; + upb_string_recycle(&str2); // No other referents, so should return the same string. assert(str2 == str); @@ -58,7 +61,7 @@ static void test_dynamic() { // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); upb_string_substr(str, str2, 1, 2); assert(upb_string_len(str) == 2); assert(upb_string_len(str2) == 4); @@ -70,7 +73,7 @@ static void test_dynamic() { assert(upb_atomic_read(&str2->refcount) == 2); // Recycling str should eliminate the extra ref. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); assert(upb_atomic_read(&str2->refcount) == 1); // Resetting str should reuse its old data. @@ -80,7 +83,7 @@ static void test_dynamic() { // Resetting str to something very long should require new data to be // allocated. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); const char longstring[] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; upb_strcpyc(str, longstring); const char *robuf6 = upb_string_getrobuf(str); @@ -88,7 +91,7 @@ static void test_dynamic() { assert(upb_streqlc(str, longstring)); // Test printf. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); upb_string_printf(str, "Number: %d, String: %s", 5, "YO!"); assert(upb_streqlc(str, "Number: 5, String: YO!")); -- cgit v1.2.3 From c9df91b04a429f9324afeefece28f21e7078e3ac Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 22 Jan 2011 01:03:02 -0800 Subject: upb bootstraps again! and with no memory leaks! --- core/upb_def.c | 40 +++++++++++++++++----------------------- core/upb_def.h | 2 +- core/upb_stream_vtbl.h | 1 + core/upb_string.c | 1 + tests/test_def.c | 4 +--- tests/test_string.c | 11 +++++++++++ 6 files changed, 32 insertions(+), 27 deletions(-) (limited to 'core/upb_string.c') diff --git a/core/upb_def.c b/core/upb_def.c index a935930..c21843e 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -429,7 +429,7 @@ typedef struct _upb_unresolveddef { static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { upb_unresolveddef *def = malloc(sizeof(*def)); upb_def_init(&def->base, UPB_DEF_UNRESOLVED); - def->name = str; + def->name = upb_string_getref(str); return def; } @@ -445,6 +445,7 @@ static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { static void upb_enumdef_free(upb_enumdef *e) { upb_enum_iter i; for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { + // Frees the ref taken when the string was parsed. upb_string_unref(upb_enum_iter_name(i)); } upb_strtable_free(&e->ntoi); @@ -468,7 +469,7 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, switch(f->number) { case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: upb_string_unref(b->name); - upb_string_getref(upb_value_getstr(val)); + b->name = upb_string_getref(upb_value_getstr(val)); b->saw_name = true; break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: @@ -495,6 +496,7 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { // We don't unref "name" because we pass our ref to the iton entry of the // table. strtables can ref their keys, but the inttable doesn't know that // the value is a string. + b->name = NULL; return UPB_CONTINUE; } @@ -641,7 +643,7 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { b->f->name = upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { - if(b->f->def) upb_def_unref(b->f->def); + upb_def_unref(b->f->def); b->f->def = UPB_UPCAST(upb_unresolveddef_new(upb_value_getstr(val))); b->f->owned = true; break; @@ -847,6 +849,7 @@ static upb_symtab_ent *upb_resolve(upb_strtable *t, return e; } else { // Remove components from base until we find an entry or run out. + // TODO: This branch is totally broken, but currently not used. upb_string *sym_str = upb_string_new(); int baselen = upb_string_len(base); while(1) { @@ -1212,21 +1215,14 @@ static void upb_baredecoder_run(upb_src *src, upb_status *status) { upb_baredecoder *d = (upb_baredecoder*)src; assert(!upb_handlers_isempty(&d->dispatcher.top->handlers)); upb_string *str = NULL; - upb_strlen_t stack[UPB_MAX_NESTING]; + upb_strlen_t stack[UPB_MAX_NESTING] = {UPB_STRLEN_MAX}; upb_strlen_t *top = &stack[0]; - *top = upb_string_len(d->input); d->offset = 0; #define CHECK(x) if (x != UPB_CONTINUE && x != BEGIN_SUBMSG) goto err; CHECK(upb_dispatch_startmsg(&d->dispatcher)); while(d->offset < upb_string_len(d->input)) { - // Detect end-of-submessage. - while(d->offset >= *top) { - CHECK(upb_dispatch_endsubmsg(&d->dispatcher)); - d->offset = *(top--); - } - uint32_t key = upb_baredecoder_readv64(d); upb_fielddef f; f.number = key >> 3; @@ -1266,22 +1262,22 @@ static void upb_baredecoder_run(upb_src *src, upb_status *status) { } CHECK(upb_dispatch_value(&d->dispatcher, &f, v)); } + // Detect end-of-submessage. + while(d->offset >= *top) { + CHECK(upb_dispatch_endsubmsg(&d->dispatcher)); + d->offset = *(top--); + } } CHECK(upb_dispatch_endmsg(&d->dispatcher)); - printf("SUCCESS!!\n"); upb_string_unref(str); return; err: upb_copyerr(status, d->dispatcher.top->handlers.status); - upb_printerr(d->dispatcher.top->handlers.status); - upb_printerr(status); upb_string_unref(str); - printf("ERROR!!\n"); } -static upb_baredecoder *upb_baredecoder_new(upb_string *str) -{ +static upb_baredecoder *upb_baredecoder_new(upb_string *str) { static upb_src_vtbl vtbl = { &upb_baredecoder_sethandlers, &upb_baredecoder_run, @@ -1294,19 +1290,16 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) return d; } -static void upb_baredecoder_free(upb_baredecoder *d) -{ +static void upb_baredecoder_free(upb_baredecoder *d) { upb_string_unref(d->input); free(d); } -static upb_src *upb_baredecoder_src(upb_baredecoder *d) -{ +static upb_src *upb_baredecoder_src(upb_baredecoder *d) { return &d->src; } -void upb_symtab_add_descriptorproto(upb_symtab *symtab) -{ +void upb_symtab_add_descriptorproto(upb_symtab *symtab) { // For the moment we silently decline to perform the operation if the symbols // already exist in the symtab. Revisit this when we have a better story // about whether syms in a table can be replaced. @@ -1329,4 +1322,5 @@ void upb_symtab_add_descriptorproto(upb_symtab *symtab) upb_symtab_unref(symtab); abort(); } + upb_status_uninit(&status); } diff --git a/core/upb_def.h b/core/upb_def.h index 9eb961a..d9bab97 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -77,7 +77,7 @@ INLINE void upb_def_ref(upb_def *def) { if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def); } INLINE void upb_def_unref(upb_def *def) { - if(upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); + if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); } /* upb_fielddef ***************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index e462122..fd71b2d 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -275,6 +275,7 @@ INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { ret = d->top->handlers.set->endmsg(d->top->handlers.closure); if (ret != UPB_CONTINUE) return ret; --d->top; + assert(d->top >= d->stack); } return d->top->handlers.set->endsubmsg(d->top->handlers.closure); } diff --git a/core/upb_string.c b/core/upb_string.c index e9ff0d9..c599728 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -67,6 +67,7 @@ void upb_string_recycle(upb_string **_str) { str->ptr = NULL; upb_string_release(str); } else { + upb_string_unref(str); *_str = upb_string_new(); } } diff --git a/tests/test_def.c b/tests/test_def.c index 5be0672..2d2658f 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -10,13 +10,10 @@ int main() { int count; upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY); for (int i = 0; i < count; i++) { - printf("Def with name: " UPB_STRFMT "\n", UPB_STRARG(defs[i]->fqname)); upb_def_unref(defs[i]); } free(defs); - printf("Size: %zd\n", sizeof(upb_ntof_ent)); - upb_string *str = upb_strdupc("google.protobuf.FileDescriptorSet"); upb_def *fds = upb_symtab_lookup(s, str); assert(fds != NULL); @@ -24,4 +21,5 @@ int main() { upb_def_unref(fds); upb_string_unref(str); upb_symtab_unref(s); + return 0; } diff --git a/tests/test_string.c b/tests/test_string.c index 6446806..ef0e2a9 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -40,6 +40,17 @@ static void test_dynamic() { upb_string_recycle(&str); assert(str != NULL); + // Take a ref and recycle; should create a new string and release a ref + // on the old one. + upb_string *strcp = upb_string_getref(str); + assert(strcp == str); + assert(upb_atomic_read(&str->refcount) == 2); + upb_string_recycle(&str); + assert(strcp != str); + assert(upb_atomic_read(&str->refcount) == 1); + assert(upb_atomic_read(&strcp->refcount) == 1); + upb_string_unref(strcp); + upb_strcpyc(str, static_str); assert(upb_string_len(str) == (sizeof(static_str) - 1)); const char *robuf = upb_string_getrobuf(str); -- cgit v1.2.3