From be5ddd8a645eaa949a8d500718257fb7cb71cf44 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Jul 2010 19:25:39 -0700 Subject: Tweaks to upb_src/upb_sink interfaces. --- tests/test_table.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tests') diff --git a/tests/test_table.cc b/tests/test_table.cc index 47d806c..37e14a8 100644 --- a/tests/test_table.cc +++ b/tests/test_table.cc @@ -1,7 +1,7 @@ #undef NDEBUG /* ensure tests always assert. */ #include "upb_table.h" -#include "upb_data.h" +#include "upb_string.h" #include "test_util.h" #include #include @@ -45,7 +45,7 @@ void test_strtable(const vector& keys, uint32_t num_to_insert) all.insert(key); strtable_entry e; e.value = key[0]; - upb_strptr str = upb_strduplen(key.c_str(), key.size()); + upb_string *str = upb_strduplen(key.c_str(), key.size()); e.e.key = str; upb_strtable_insert(&table, &e.e); upb_string_unref(str); // The table still owns a ref. @@ -55,7 +55,7 @@ void test_strtable(const vector& keys, uint32_t num_to_insert) /* Test correctness. */ for(uint32_t i = 0; i < keys.size(); i++) { const string& key = keys[i]; - upb_strptr str = upb_strduplen(key.c_str(), key.size()); + upb_string *str = upb_strduplen(key.c_str(), key.size()); strtable_entry *e = (strtable_entry*)upb_strtable_lookup(&table, str); if(m.find(key) != m.end()) { /* Assume map implementation is correct. */ assert(e); @@ -71,7 +71,7 @@ void test_strtable(const vector& keys, uint32_t num_to_insert) strtable_entry *e; for(e = (strtable_entry*)upb_strtable_begin(&table); e; e = (strtable_entry*)upb_strtable_next(&table, &e->e)) { - string tmp(upb_string_getrobuf(e->e.key), upb_strlen(e->e.key)); + string tmp(upb_string_getrobuf(e->e.key), upb_string_len(e->e.key)); std::set::iterator i = all.find(tmp); assert(i != all.end()); all.erase(i); -- cgit v1.2.3 From e29bf964d1716398e8354a50f506906a307298e5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 12:15:31 -0700 Subject: Tests for string and fleshed out implementation. --- Makefile | 15 ++++++++----- core/upb_string.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++------- core/upb_string.h | 40 ++++++++++++++++++++++++---------- tests/test_string.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++ tests/test_table.cc | 13 ++++++++++- 5 files changed, 161 insertions(+), 26 deletions(-) create mode 100644 tests/test_string.c (limited to 'tests') diff --git a/Makefile b/Makefile index ca4f940..1f977b4 100644 --- a/Makefile +++ b/Makefile @@ -86,22 +86,25 @@ tests/test.proto.pb: tests/test.proto # TODO: replace with upbc protoc tests/test.proto -otests/test.proto.pb -TESTS=tests/tests \ +TESTS=tests/test_string \ + tests/test_table +tests: $(TESTS) + +OTHER_TESTS=tests/tests \ tests/test_table \ tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2 \ tests/test.proto.pb $(TESTS): core/libupb.a -#VALGRIND=valgrind --leak-check=full --error-exitcode=1 -VALGRIND= +VALGRIND=valgrind --leak-check=full --error-exitcode=1 +#VALGRIND= test: tests @echo Running all tests under valgrind. - $(VALGRIND) ./tests/tests # Needs to be rewritten to separate the benchmark. # valgrind --error-exitcode=1 ./tests/test_table - @for test in tests/t.* ; do \ - if [ -f ./$$test ] ; then \ + @for test in tests/*; do \ + if [ -x ./$$test ] ; then \ echo $(VALGRIND) ./$$test: \\c; \ $(VALGRIND) ./$$test; \ fi \ diff --git a/core/upb_string.c b/core/upb_string.c index 91ab9ae..f9af9e9 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -7,8 +7,11 @@ #include "upb_string.h" #include - -#define UPB_STRING_UNFINALIZED -1 +#ifdef __GLIBC__ +#include +#elif defined(__APPLE__) +#include +#endif static uint32_t upb_round_up_pow2(uint32_t v) { // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 @@ -25,23 +28,67 @@ static uint32_t upb_round_up_pow2(uint32_t v) { upb_string *upb_string_new() { upb_string *str = malloc(sizeof(*str)); str->ptr = NULL; + str->cached_mem = NULL; +#ifndef UPB_HAVE_MSIZE str->size = 0; - str->len = UPB_STRING_UNFINALIZED; +#endif + str->src = NULL; upb_atomic_refcount_init(&str->refcount, 1); return str; } +uint32_t upb_string_size(upb_string *str) { +#ifdef __GLIBC__ + return malloc_usable_size(str->cached_mem); +#elif defined(__APPLE__) + return malloc_size(str->cached_mem); +#else + return str->size; +#endif +} + +static void upb_string_release(upb_string *str) { + if(str->src) { + upb_string_unref(str->src); + str->src = NULL; + } +} + void _upb_string_free(upb_string *str) { - if(str->ptr) free(str->ptr); + if(str->cached_mem) free(str->cached_mem); + upb_string_release(str); free(str); } +upb_string *upb_string_tryrecycle(upb_string *str) { + if(str == NULL || upb_atomic_read(&str->refcount) > 1) { + return upb_string_new(); + } else { + str->ptr = NULL; + upb_string_release(str); + return str; + } +} + char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { - assert(str->len == UPB_STRING_UNFINALIZED); - if (str->size < len) { - str->size = upb_round_up_pow2(len); - str->ptr = realloc(str->ptr, str->size); + assert(str->ptr == NULL); + uint32_t size = upb_string_size(str); + if (size < len) { + size = upb_round_up_pow2(len); + str->cached_mem = realloc(str->cached_mem, size); +#ifndef UPB_HAVE_MSIZE + str->size = size; +#endif } str->len = len; + str->ptr = str->cached_mem; return str->ptr; } + +void upb_string_substr(upb_string *str, upb_string *target_str, + upb_strlen_t start, upb_strlen_t len) { + assert(str->ptr == NULL); + str->src = upb_string_getref(target_str); + str->ptr = upb_string_getrobuf(target_str) + start; + str->len = len; +} diff --git a/core/upb_string.h b/core/upb_string.h index 770dba7..7ec3d48 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -16,8 +16,6 @@ * without having to reallocate the upb_string. * - strings can be substrings of other strings (owning a ref on the source * string). - * - strings can refer to memory that they do not own, in which case we avoid - * copies if possible (the exact strategy for doing this can vary). * - strings are not thread-safe by default, but can be made so by calling a * function. This is not the default because it causes extra CPU overhead. */ @@ -37,16 +35,31 @@ extern "C" { // All members of this struct are private, and may only be read/written through // the associated functions. Also, strings may *only* be allocated on the heap. struct _upb_string { + // The pointer to our currently active data. This may be memory we own + // or a pointer into memory we don't own. char *ptr; + + // If non-NULL, this is a block of memory we own. We keep this cached even + // if "ptr" is currently aliasing memory we don't own. + char *cached_mem; + + // The effective length of the string (the bytes at ptr). int32_t len; +#ifndef UPB_HAVE_MSIZE + // How many bytes are allocated in cached_mem. + // + // Many platforms have a function that can tell you the size of a block + // that was previously malloc'd. In this case we can avoid storing the + // size explicitly. uint32_t size; +#endif + + // The string's refcount. upb_atomic_refcount_t refcount; - union { - // Used if this is a slice of another string. - struct _upb_string *src; - // Used if this string is referencing external unowned memory. - upb_atomic_refcount_t reader_count; - } extra; + + // Used if this is a slice of another string, NULL otherwise. We own a ref + // on src. + struct _upb_string *src; }; // Returns a newly-created, empty, non-finalized string. When the string is no @@ -113,11 +126,14 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); void upb_string_substr(upb_string *str, upb_string *target_str, upb_strlen_t start, upb_strlen_t len); +// Sketch of an API for allowing upb_strings to reference external, unowned +// data. Waiting for a clear use case before actually implementing it. +// // Makes the string "str" a reference to the given string data. The caller // guarantees that the given string data will not change or be deleted until // a matching call to upb_string_detach(). -void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); -void upb_string_detach(upb_string *str); +// void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); +// void upb_string_detach(upb_string *str); // Allows using upb_strings in printf, ie: // upb_strptr str = UPB_STRLIT("Hello, World!\n"); @@ -176,7 +192,9 @@ INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) { } // Like upb_strdup(), but duplicates a C NULL-terminated string. -upb_string *upb_strdupc(const char *src); +INLINE upb_string *upb_strdupc(const char *src) { + return upb_strduplen(src, strlen(src)); +} // Appends 'append' to 's' in-place, resizing s if necessary. void upb_strcat(upb_string *s, upb_string *append); diff --git a/tests/test_string.c b/tests/test_string.c new file mode 100644 index 0000000..4fdab6c --- /dev/null +++ b/tests/test_string.c @@ -0,0 +1,56 @@ + +#undef NDEBUG /* ensure tests always assert. */ +#include "upb_string.h" + +char static_str[] = "Static string."; + +int main() { + upb_string *str = upb_string_new(); + assert(str != NULL); + upb_string_unref(str); + + // Can also create a string by tryrecycle(NULL). + str = upb_string_tryrecycle(NULL); + assert(str != NULL); + + upb_strcpyc(str, static_str); + assert(upb_string_len(str) == (sizeof(static_str) - 1)); + const char *robuf = upb_string_getrobuf(str); + assert(robuf != NULL); + assert(memcmp(robuf, static_str, upb_string_len(str)) == 0); + upb_string_endread(str); + + upb_string *str2 = upb_string_tryrecycle(str); + // No other referents, so should return the same string. + assert(str2 == str); + + // Write a shorter string, the same memory should be reused. + upb_strcpyc(str, "XX"); + const char *robuf2 = upb_string_getrobuf(str); + assert(robuf2 == robuf); + assert(memcmp(robuf2, "XX", 2) == 0); + + // Make string alias part of another string. + str2 = upb_strdupc("WXYZ"); + upb_string_substr(str, str2, 1, 2); + assert(upb_string_len(str) == 2); + assert(upb_string_len(str2) == 4); + // The two string should be aliasing the same data. + const char *robuf3 = upb_string_getrobuf(str); + const char *robuf4 = upb_string_getrobuf(str2); + assert(robuf3 == robuf4 + 1); + // The aliased string should have an extra ref. + assert(upb_atomic_read(&str2->refcount) == 2); + + // Recycling str should eliminate the extra ref. + str = upb_string_tryrecycle(str); + assert(upb_atomic_read(&str2->refcount) == 1); + + // Resetting str should reuse its old data. + upb_strcpyc(str, "XX"); + const char *robuf5 = upb_string_getrobuf(str); + assert(robuf5 == robuf); + + upb_string_unref(str); + upb_string_unref(str2); +} diff --git a/tests/test_table.cc b/tests/test_table.cc index 37e14a8..47d5e57 100644 --- a/tests/test_table.cc +++ b/tests/test_table.cc @@ -12,6 +12,8 @@ #include #include +bool benchmark = false; + using std::string; using std::vector; @@ -116,6 +118,11 @@ void test_inttable(int32_t *keys, size_t num_entries) } } + if(!benchmark) { + upb_inttable_free(&table); + return; + } + /* Test performance. We only test lookups for keys that are known to exist. */ uintptr_t x = 0; const unsigned int iterations = 0xFFFFFF; @@ -219,8 +226,12 @@ int32_t *get_contiguous_keys(int32_t num) return buf; } -int main() +int main(int argc, char *argv[]) { + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--benchmark") == 0) benchmark = true; + } + vector keys; keys.push_back("google.protobuf.FileDescriptorSet"); keys.push_back("google.protobuf.FileDescriptorProto"); -- cgit v1.2.3 From 2ef013126c682a44d15554ea7a04144fc9a10fed Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 13:28:47 -0700 Subject: Fleshed out upb_string further. Now upb_def's only unresolved references are upb_src. --- Makefile | 3 ++- core/upb_def.c | 11 +++++------ core/upb_string.c | 30 +++++++++++++++++++++++++++++- core/upb_string.h | 46 +++++++++++++++++++++++++++++++++++++--------- tests/test_string.c | 17 +++++++++++++++-- 5 files changed, 88 insertions(+), 19 deletions(-) (limited to 'tests') diff --git a/Makefile b/Makefile index 1f977b4..2abe0c7 100644 --- a/Makefile +++ b/Makefile @@ -87,7 +87,8 @@ tests/test.proto.pb: tests/test.proto protoc tests/test.proto -otests/test.proto.pb TESTS=tests/test_string \ - tests/test_table + tests/test_table \ + tests/test_def tests: $(TESTS) OTHER_TESTS=tests/tests \ diff --git a/core/upb_def.c b/core/upb_def.c index bfab738..1f57c70 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -44,13 +44,12 @@ static void upb_deflist_push(upb_deflist *l, upb_def *d) { * join("", "Baz") -> "Baz" * Caller owns a ref on the returned string. */ static upb_string *upb_join(upb_string *base, upb_string *name) { - upb_string *joined = upb_strdup(base); - upb_strlen_t len = upb_string_len(joined); - if(len > 0) { - upb_string_getrwbuf(joined, len + 1)[len] = UPB_SYMBOL_SEPARATOR; + if (upb_string_len(base) == 0) { + return upb_string_getref(name); + } else { + return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT, + UPB_STRARG(base), UPB_STRARG(name)); } - upb_strcat(joined, name); - return joined; } // Qualify the defname for all defs starting with offset "start" with "str". diff --git a/core/upb_string.c b/core/upb_string.c index f9af9e9..2f487aa 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -82,7 +82,7 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { } str->len = len; str->ptr = str->cached_mem; - return str->ptr; + return str->cached_mem; } void upb_string_substr(upb_string *str, upb_string *target_str, @@ -92,3 +92,31 @@ void upb_string_substr(upb_string *str, upb_string *target_str, str->ptr = upb_string_getrobuf(target_str) + start; str->len = len; } + +void upb_string_vprintf(upb_string *str, const char *format, va_list args) { + // Try once without reallocating. We have to va_copy because we might have + // to call vsnprintf again. + uint32_t size = UPB_MAX(upb_string_size(str), 16); + char *buf = upb_string_getrwbuf(str, size); + va_list args_copy; + va_copy(args_copy, args); + uint32_t true_size = vsnprintf(buf, size, format, args_copy); + va_end(args_copy); + + if (true_size > size) { + // Need to reallocate. + str = upb_string_tryrecycle(str); + buf = upb_string_getrwbuf(str, true_size); + vsnprintf(buf, true_size, format, args); + } + str->len = true_size; +} + +upb_string *upb_string_asprintf(const char *format, ...) { + upb_string *str = upb_string_new(); + va_list args; + va_start(args, format); + upb_string_vprintf(str, format, args); + va_end(args); + return str; +} diff --git a/core/upb_string.h b/core/upb_string.h index 7ec3d48..5cc0eaf 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -25,6 +25,7 @@ #include #include +#include #include "upb_atomic.h" #include "upb.h" @@ -37,7 +38,7 @@ extern "C" { struct _upb_string { // The pointer to our currently active data. This may be memory we own // or a pointer into memory we don't own. - char *ptr; + const char *ptr; // If non-NULL, this is a block of memory we own. We keep this cached even // if "ptr" is currently aliasing memory we don't own. @@ -111,16 +112,25 @@ INLINE void upb_string_endread(upb_string *str) { (void)str; } // } upb_string *upb_string_tryrecycle(upb_string *str); -// The three options for setting the contents of a string. These may only be -// called when a string is first created or recycled; once other functions have -// been called on the string, these functions are not allowed until the string -// is recycled. +// The options for setting the contents of a string. These may only be called +// when a string is first created or recycled; once other functions have been +// called on the string, these functions are not allowed until the string is +// recycled. // Gets a pointer suitable for writing to the string, which is guaranteed to // have at least "len" bytes of data available. The size of the string will // become "len". char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); +// Replaces the contents of str with the contents of the given printf. +void upb_string_vprintf(upb_string *str, const char *format, va_list args); +INLINE void upb_string_printf(upb_string *str, const char *format, ...) { + va_list args; + va_start(args, format); + upb_string_vprintf(str, format, args); + va_end(args); +} + // Sets the contents of "str" to be the given substring of "target_str", to // which the caller must own a ref. void upb_string_substr(upb_string *str, upb_string *target_str, @@ -144,7 +154,7 @@ void upb_string_substr(upb_string *str, upb_string *target_str, /* upb_string library functions ***********************************************/ // Named like their counterparts, these are all safe against buffer -// overflow. These only use the public upb_string interface. +// overflow. For the most part these only use the public upb_string interface. // More efficient than upb_strcmp if all you need is to test equality. INLINE bool upb_streql(upb_string *s1, upb_string *s2) { @@ -163,6 +173,17 @@ INLINE bool upb_streql(upb_string *s1, upb_string *s2) { // Like strcmp(). int upb_strcmp(upb_string *s1, upb_string *s2); +// Compare a upb_string with memory or a NULL-terminated C string. +INLINE bool upb_streqllen(upb_string *str, const void *buf, upb_strlen_t len) { + return len == upb_string_len(str) && + memcmp(upb_string_getrobuf(str), buf, len) == 0; +} + +INLINE bool upb_streqlc(upb_string *str, const void *buf) { + // Could be made one-pass. + return upb_streqllen(str, buf, strlen((const char*)buf)); +} + // Like upb_strcpy, but copies from a buffer and length. INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) { memcpy(upb_string_getrwbuf(dest, len), src, len); @@ -175,10 +196,10 @@ INLINE void upb_strcpy(upb_string *dest, upb_string *src) { } // Like upb_strcpy, but copies from a NULL-terminated string. -INLINE void upb_strcpyc(upb_string *dest, const char *src) { +INLINE void upb_strcpyc(upb_string *dest, const void *src) { // This does two passes over src, but that is necessary unless we want to // repeatedly re-allocate dst, which seems worse. - upb_strcpylen(dest, src, strlen(src)); + upb_strcpylen(dest, src, strlen((const char*)src)); } // Returns a new string whose contents are a copy of s. @@ -200,11 +221,18 @@ INLINE upb_string *upb_strdupc(const char *src) { void upb_strcat(upb_string *s, upb_string *append); // Returns a new string that is a substring of the given string. -upb_string *upb_strslice(upb_string *s, int offset, int len); +INLINE upb_string *upb_strslice(upb_string *s, int offset, int len) { + upb_string *str = upb_string_new(); + upb_string_substr(str, s, offset, len); + return str; +} // Reads an entire file into a newly-allocated string. upb_string *upb_strreadfile(const char *filename); +// Returns a new string with the contents of the given printf. +upb_string *upb_string_asprintf(const char *format, ...); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tests/test_string.c b/tests/test_string.c index 4fdab6c..5e6e2a9 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -17,7 +17,7 @@ int main() { assert(upb_string_len(str) == (sizeof(static_str) - 1)); const char *robuf = upb_string_getrobuf(str); assert(robuf != NULL); - assert(memcmp(robuf, static_str, upb_string_len(str)) == 0); + assert(upb_streqlc(str, static_str)); upb_string_endread(str); upb_string *str2 = upb_string_tryrecycle(str); @@ -28,7 +28,7 @@ int main() { upb_strcpyc(str, "XX"); const char *robuf2 = upb_string_getrobuf(str); assert(robuf2 == robuf); - assert(memcmp(robuf2, "XX", 2) == 0); + assert(upb_streqlc(str, "XX")); // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); @@ -51,6 +51,19 @@ int main() { const char *robuf5 = upb_string_getrobuf(str); assert(robuf5 == robuf); + // Resetting str to something very long should require new data to be + // allocated. + str = upb_string_tryrecycle(str); + const char longstring[] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; + upb_strcpyc(str, longstring); + const char *robuf6 = upb_string_getrobuf(str); + assert(robuf6 != robuf); + assert(upb_streqlc(str, longstring)); + + // Test printf. + str = upb_string_tryrecycle(str); + upb_string_printf(str, "Number: %d, String: %s", 5, "YO!"); + upb_string_unref(str); upb_string_unref(str2); } -- cgit v1.2.3 From db6c7387bc1df49deac41155a173e33017a75ed8 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 18:11:24 -0700 Subject: Incremental progress towards getting upb_def to bootstrap. --- Makefile | 3 +- core/upb.c | 9 ++--- core/upb.h | 7 +++- core/upb_def.c | 102 ++++++++++++++++++++++--------------------------- core/upb_def.h | 62 +++++++++++++++++++++--------- core/upb_stream_vtbl.h | 1 + core/upb_table.c | 2 +- tests/test_string.c | 3 ++ 8 files changed, 105 insertions(+), 84 deletions(-) (limited to 'tests') diff --git a/Makefile b/Makefile index 568dcad..2b2a269 100644 --- a/Makefile +++ b/Makefile @@ -48,12 +48,13 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ descriptor/descriptor.c +$(SRC): perf-cppflags # Parts of core that are yet to be converted. OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. core/upb_def.o: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< + $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< core/upb_def.lo: core/upb_def.c $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC diff --git a/core/upb.c b/core/upb.c index a98512d..9ed5617 100644 --- a/core/upb.c +++ b/core/upb.c @@ -44,12 +44,11 @@ void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...) { if(upb_ok(status)) { // The first error is the most interesting. - status->str = upb_string_new(); - char *str = upb_string_getrwbuf(status->str, UPB_ERRORMSG_MAXLEN); status->code = code; + status->str = upb_string_tryrecycle(status->str); va_list args; va_start(args, msg); - vsnprintf(str, UPB_ERRORMSG_MAXLEN, msg, args); + upb_string_vprintf(status->str, msg, args); va_end(args); } } @@ -57,10 +56,10 @@ void upb_seterr(upb_status *status, enum upb_status_code code, void upb_copyerr(upb_status *to, upb_status *from) { to->code = from->code; - to->str = upb_string_getref(from->str); + if(from->str) to->str = upb_string_getref(from->str); } -void upb_reset(upb_status *status) { +void upb_status_reset(upb_status *status) { status->code = UPB_STATUS_OK; upb_string_unref(status->str); status->str = NULL; diff --git a/core/upb.h b/core/upb.h index 230e638..630d9e1 100644 --- a/core/upb.h +++ b/core/upb.h @@ -195,7 +195,12 @@ INLINE bool upb_ok(upb_status *status) { return status->code == UPB_STATUS_OK; } -void upb_reset(upb_status *status); +INLINE void upb_status_init(upb_status *status) { + status->code = UPB_STATUS_OK; + status->str = NULL; +} + +void upb_status_reset(upb_status *status); void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...); void upb_copyerr(upb_status *to, upb_status *from); diff --git a/core/upb_def.c b/core/upb_def.c index cc4fd80..0f48559 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -155,8 +155,9 @@ static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base, } else { open_defs[num_open_defs++] = m; } - for(int i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; + upb_msg_iter iter = upb_msg_begin(m); + for(; !upb_msg_done(iter); iter = upb_msg_next(m, iter)) { + upb_fielddef *f = upb_msg_iter_field(iter); upb_def *def = f->def; if(upb_issubmsg(f) && def->is_cyclic) { upb_msgdef *sub_m = upb_downcast_msgdef(def); @@ -230,16 +231,6 @@ static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { /* upb_enumdef ****************************************************************/ -typedef struct { - upb_strtable_entry e; - uint32_t value; -} ntoi_ent; - -typedef struct { - upb_inttable_entry e; - upb_string *string; -} iton_ent; - static void upb_enumdef_free(upb_enumdef *e) { upb_strtable_free(&e->ntoi); upb_inttable_free(&e->iton); @@ -271,8 +262,8 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); goto err; } - ntoi_ent ntoi_ent = {{name, 0}, number}; - iton_ent iton_ent = {{number, 0}, name}; + upb_ntoi_ent ntoi_ent = {{name, 0}, number}; + upb_iton_ent iton_ent = {{number, 0}, name}; upb_strtable_insert(&e->ntoi, &ntoi_ent.e); upb_inttable_insert(&e->iton, &iton_ent.e); // We don't unref "name" because we pass our ref to the iton entry of the @@ -291,11 +282,14 @@ static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) { upb_enumdef *e = malloc(sizeof(*e)); upb_def_init(&e->base, UPB_DEF_ENUM); - upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(iton_ent)); + upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); + upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: + e->base.fqname = upb_string_tryrecycle(e->base.fqname); + CHECKSRC(upb_src_getstr(src, e->base.fqname)); case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: CHECK(upb_addenum_val(src, e, status)); break; @@ -304,37 +298,25 @@ static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) break; } } + assert(e->base.fqname); upb_deflist_push(defs, UPB_UPCAST(e)); return true; +src_err: + upb_copyerr(status, upb_src_status(src)); err: upb_enumdef_free(e); return false; } -static void fill_iter(upb_enum_iter *iter, ntoi_ent *ent) { - iter->state = ent; - iter->name = ent->e.key; - iter->val = ent->value; -} - -void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e) { +upb_enum_iter upb_enum_begin(upb_enumdef *e) { // We could iterate over either table here; the choice is arbitrary. - ntoi_ent *ent = upb_strtable_begin(&e->ntoi); - iter->e = e; - fill_iter(iter, ent); + return upb_inttable_begin(&e->iton); } -void upb_enum_next(upb_enum_iter *iter) { - ntoi_ent *ent = iter->state; - assert(ent); - ent = upb_strtable_next(&iter->e->ntoi, &ent->e); - iter->state = ent; - if(ent) fill_iter(iter, ent); -} - -bool upb_enum_done(upb_enum_iter *iter) { - return iter->state == NULL; +upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) { + assert(iter); + return upb_inttable_next(&e->iton, &iter->e); } @@ -346,7 +328,7 @@ static void upb_fielddef_free(upb_fielddef *f) { static void upb_fielddef_uninit(upb_fielddef *f) { upb_string_unref(f->name); - if(upb_hasdef(f) && f->owned) { + if(f->owned) { upb_def_unref(f->def); } } @@ -354,6 +336,8 @@ static void upb_fielddef_uninit(upb_fielddef *f) { static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) { upb_fielddef *f = malloc(sizeof(*f)); + f->number = -1; + f->name = NULL; f->def = NULL; f->owned = false; upb_fielddef *parsed_f; @@ -388,6 +372,7 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) } CHECKSRC(upb_src_eof(src)); // TODO: verify that all required fields were present. + assert(f->number != -1 && f->name != NULL); assert((f->def != NULL) == upb_hasdef(f)); // Field was successfully read, add it as a field of the msgdef. @@ -461,9 +446,9 @@ err: static void upb_msgdef_free(upb_msgdef *m) { - for (upb_field_count_t i = 0; i < m->num_fields; i++) - upb_fielddef_uninit(&m->fields[i]); - free(m->fields); + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) + upb_fielddef_uninit(upb_msg_iter_field(i)); upb_strtable_free(&m->ntof); upb_inttable_free(&m->itof); upb_def_uninit(&m->base); @@ -479,6 +464,13 @@ static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) { upb_def_ref(def); } +upb_msg_iter upb_msg_begin(upb_msgdef *m) { + return upb_inttable_begin(&m->itof); +} + +upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) { + return upb_inttable_next(&m->itof, &iter->e); +} /* symtab internal ***********************************************************/ @@ -601,8 +593,9 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) } else { UPB_UPCAST(m)->search_depth = ++depth; bool cycle_found = false; - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); if(!upb_issubmsg(f)) continue; upb_def *sub_def = f->def; upb_msgdef *sub_m = upb_downcast_msgdef(sub_def); @@ -632,8 +625,9 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, // Type names are resolved relative to the message in which they appear. upb_string *base = e->e.key; - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); if(!upb_hasdef(f)) continue; // No resolving necessary. upb_string *name = upb_downcast_unresolveddef(f->def)->name; @@ -873,7 +867,6 @@ typedef struct { upb_wire_type_t wire_type; upb_strlen_t delimited_len; upb_strlen_t stack[UPB_MAX_NESTING], *top; - upb_string *str; } upb_baredecoder; static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) @@ -929,6 +922,12 @@ static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) return &d->field; } +static bool upb_baredecoder_getstr(upb_baredecoder *d, upb_string *str) { + upb_string_substr(str, d->input, d->offset, d->delimited_len); + d->offset += d->delimited_len; + return true; +} + static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) { switch(d->wire_type) { @@ -950,11 +949,6 @@ static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) return true; } -static bool upb_baredecoder_getstr(upb_baredecoder *d, upb_string *str) { - upb_string_substr(str, d->input, d->offset, d->delimited_len); - return true; -} - static bool upb_baredecoder_skipval(upb_baredecoder *d) { upb_value val; @@ -986,7 +980,6 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) { upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); - d->str = upb_string_new(); d->top = &d->stack[0]; upb_src_init(&d->src, &upb_baredecoder_src_vtbl); return d; @@ -995,7 +988,6 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) static void upb_baredecoder_free(upb_baredecoder *d) { upb_string_unref(d->input); - upb_string_unref(d->str); free(d); } @@ -1004,11 +996,8 @@ static upb_src *upb_baredecoder_src(upb_baredecoder *d) return &d->src; } -upb_symtab *upb_get_descriptor_symtab() +void upb_symtab_add_descriptorproto(upb_symtab *symtab) { - // TODO: implement sharing of symtabs, so that successive calls to this - // function will return the same symtab. - upb_symtab *symtab = upb_symtab_new(); // TODO: allow upb_strings to be static or on the stack. upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); upb_baredecoder *decoder = upb_baredecoder_new(descriptor); @@ -1017,5 +1006,4 @@ upb_symtab *upb_get_descriptor_symtab() assert(upb_ok(&status)); upb_baredecoder_free(decoder); upb_string_unref(descriptor); - return symtab; } diff --git a/core/upb_def.h b/core/upb_def.h index 5c8c11e..82d8520 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -135,11 +135,6 @@ INLINE bool upb_elem_ismm(upb_fielddef *f) { typedef struct _upb_msgdef { upb_def base; upb_atomic_refcount_t cycle_refcount; - size_t size; - upb_field_count_t num_fields; - uint32_t set_flags_bytes; - uint32_t num_required_fields; // Required fields have the lowest set bytemasks. - upb_fielddef *fields; // We have exclusive ownership of these. // Tables for looking up fields by number and name. upb_inttable itof; // int to field @@ -170,6 +165,21 @@ INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_string *name) { return e ? e->f : NULL; } +// Iteration over fields. The order is undefined. +// upb_msg_iter i; +// for(i = upb_msg_begin(m); !upb_msg_done(&i); i = upb_msg_next(&i)) { +// // ... +// } +typedef upb_itof_ent *upb_msg_iter; + +upb_msg_iter upb_msg_begin(upb_msgdef *m); +upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter); +INLINE bool upb_msg_done(upb_msg_iter iter) { return iter == NULL; } + +INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) { + return iter->f; +} + /* upb_enumdef ****************************************************************/ typedef struct _upb_enumdef { @@ -178,6 +188,16 @@ typedef struct _upb_enumdef { upb_inttable iton; } upb_enumdef; +typedef struct { + upb_strtable_entry e; + uint32_t value; +} upb_ntoi_ent; + +typedef struct { + upb_inttable_entry e; + upb_string *string; +} upb_iton_ent; + typedef int32_t upb_enumval_t; // Lookups from name to integer and vice-versa. @@ -186,18 +206,22 @@ upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); // Iteration over name/value pairs. The order is undefined. // upb_enum_iter i; -// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { +// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { // // ... // } -typedef struct { - upb_enumdef *e; - void *state; // Internal iteration state. - upb_string *name; - upb_enumval_t val; -} upb_enum_iter; -void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e); -void upb_enum_next(upb_enum_iter *iter); -bool upb_enum_done(upb_enum_iter *iter); +typedef upb_iton_ent *upb_enum_iter; + +upb_enum_iter upb_enum_begin(upb_enumdef *e); +upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter); +INLINE bool upb_enum_done(upb_enum_iter iter) { return iter == NULL; } + +INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) { + return iter->string; +} +INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) { + return iter->e.key; +} + /* upb_symtab *****************************************************************/ @@ -252,10 +276,10 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); // more useful? Maybe it should be an option. void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status); -// Returns a symtab that defines google.protobuf.DescriptorProto and all other -// types that are defined in descriptor.proto. This allows you to load other -// proto types. The caller owns a ref on the returned symtab. -upb_symtab *upb_get_descriptor_symtab(); +// Adds defs for google.protobuf.FileDescriptorSet and friends to this symtab. +// This is necessary for bootstrapping, since these are the upb_defs that +// specify other defs and allow them to be loaded. +void upb_symtab_add_descriptorproto(upb_symtab *s); /* upb_def casts **************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 52172d2..ba2670e 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -88,6 +88,7 @@ struct upb_bytesrc { INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { s->vtbl = vtbl; s->eof = false; + upb_status_init(&s->status); #ifndef DEBUG // TODO: initialize debug-mode checking. #endif diff --git a/core/upb_table.c b/core/upb_table.c index b91776c..b860204 100644 --- a/core/upb_table.c +++ b/core/upb_table.c @@ -179,7 +179,7 @@ static void strinsert(upb_strtable *t, upb_strtable_entry *e) memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ upb_strtable_entry *evictee_e = strent(t, evictee_bucket); while(1) { - assert(!upb_string_isnull(evictee_e->key)); + assert(evictee_e->key); assert(evictee_e->next != UPB_END_OF_CHAIN); if(evictee_e->next == bucket) { evictee_e->next = empty_bucket; diff --git a/tests/test_string.c b/tests/test_string.c index 5e6e2a9..5869b70 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -66,4 +66,7 @@ int main() { upb_string_unref(str); upb_string_unref(str2); + + // Unref of NULL is harmless. + upb_string_unref(NULL); } -- cgit v1.2.3 From ae0beee2854b977f472d48cd149b880b074b59c5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 19:37:47 -0700 Subject: Fixed upb_string error with strange vsnprintf() behavior. --- core/upb.c | 9 +++++++++ core/upb.h | 1 + core/upb_def.c | 49 +++++++++++++++++++++++++++++++++++++------------ core/upb_string.c | 13 +++++++++---- tests/test_string.c | 9 +++++++++ 5 files changed, 65 insertions(+), 16 deletions(-) (limited to 'tests') diff --git a/core/upb.c b/core/upb.c index 9ed5617..d581bbe 100644 --- a/core/upb.c +++ b/core/upb.c @@ -64,3 +64,12 @@ void upb_status_reset(upb_status *status) { upb_string_unref(status->str); status->str = NULL; } + +void upb_printerr(upb_status *status) { + if(status->str) { + fprintf(stderr, "code: %d, msg: " UPB_STRFMT "\n", + status->code, UPB_STRARG(status->str)); + } else { + fprintf(stderr, "code: %d, no msg\n", status->code); + } +} diff --git a/core/upb.h b/core/upb.h index 630d9e1..13317bb 100644 --- a/core/upb.h +++ b/core/upb.h @@ -200,6 +200,7 @@ INLINE void upb_status_init(upb_status *status) { status->str = NULL; } +void upb_printerr(upb_status *status); void upb_status_reset(upb_status *status); void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...); diff --git a/core/upb_def.c b/core/upb_def.c index 0f48559..2b2916e 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -21,7 +21,7 @@ typedef struct { static void upb_deflist_init(upb_deflist *l) { l->size = 8; - l->defs = malloc(l->size); + l->defs = malloc(l->size * sizeof(void*)); l->len = 0; } @@ -34,7 +34,7 @@ static void upb_deflist_uninit(upb_deflist *l) { static void upb_deflist_push(upb_deflist *l, upb_def *d) { if(l->len == l->size) { l->size *= 2; - l->defs = realloc(l->defs, l->size); + l->defs = realloc(l->defs, l->size * sizeof(void*)); } l->defs[l->len++] = d; } @@ -238,6 +238,7 @@ static void upb_enumdef_free(upb_enumdef *e) { free(e); } +// google.protobuf.EnumValueDescriptorProto. static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) { int32_t number = -1; @@ -245,13 +246,13 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { - case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &number)); - break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: name = upb_string_tryrecycle(name); CHECKSRC(upb_src_getstr(src, name)); break; + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: + CHECKSRC(upb_src_getint32(src, &number)); + break; default: CHECKSRC(upb_src_skipval(src)); break; @@ -278,6 +279,7 @@ err: return false; } +// google.protobuf.EnumDescriptorProto. static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) { upb_enumdef *e = malloc(sizeof(*e)); @@ -290,8 +292,11 @@ static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: e->base.fqname = upb_string_tryrecycle(e->base.fqname); CHECKSRC(upb_src_getstr(src, e->base.fqname)); + break; case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: + CHECKSRC(upb_src_startmsg(src)); CHECK(upb_addenum_val(src, e, status)); + CHECKSRC(upb_src_endmsg(src)); break; default: upb_src_skipval(src); @@ -729,8 +734,10 @@ err: // We need to free all defs from "tmptab." upb_rwlock_unlock(&s->lock); for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; - e = upb_strtable_next(&tmptab, &e->e)) + e = upb_strtable_next(&tmptab, &e->e)) { + fprintf(stderr, "Unreffing def: '" UPB_STRFMT "'\n", UPB_STRARG(e->e.key)); upb_def_unref(e->def); + } upb_strtable_free(&tmptab); return false; } @@ -914,10 +921,12 @@ static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) key = upb_baredecoder_readv32(d); d->wire_type = key & 0x7; d->field.number = key >> 3; + fprintf(stderr, "field num: %d, wire_type: %d\n", d->field.number, d->wire_type); if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { // For delimited wire values we parse the length now, since we need it in // all cases. d->delimited_len = upb_baredecoder_readv32(d); + fprintf(stderr, "delimited size: %d\n", d->delimited_len); } return &d->field; } @@ -944,6 +953,7 @@ static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) *val.uint32 = upb_baredecoder_readf32(d); break; default: + *(char*)0 = 0; assert(false); } return true; @@ -951,19 +961,24 @@ static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) static bool upb_baredecoder_skipval(upb_baredecoder *d) { - upb_value val; - return upb_baredecoder_getval(d, upb_value_addrof(&val)); + if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { + d->offset += d->delimited_len; + return true; + } else { + upb_value val; + return upb_baredecoder_getval(d, upb_value_addrof(&val)); + } } static bool upb_baredecoder_startmsg(upb_baredecoder *d) { - *(d->top++) = d->offset + d->delimited_len; + *(++d->top) = d->offset + d->delimited_len; return true; } static bool upb_baredecoder_endmsg(upb_baredecoder *d) { - d->offset = *(--d->top); + d->offset = *(d->top--); return true; } @@ -980,7 +995,9 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) { upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); + d->offset = 0; d->top = &d->stack[0]; + *(d->top) = upb_string_len(d->input); upb_src_init(&d->src, &upb_baredecoder_src_vtbl); return d; } @@ -1001,9 +1018,17 @@ void upb_symtab_add_descriptorproto(upb_symtab *symtab) // TODO: allow upb_strings to be static or on the stack. upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); upb_baredecoder *decoder = upb_baredecoder_new(descriptor); - upb_status status; + upb_status status = UPB_STATUS_INIT; upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status); - assert(upb_ok(&status)); upb_baredecoder_free(decoder); upb_string_unref(descriptor); + + if(!upb_ok(&status)) { + // upb itself is corrupt. + upb_printerr(&status); + upb_symtab_unref(symtab); + abort(); + } + fprintf(stderr, "Claims to have succeeded\n"); + upb_printerr(&status); } diff --git a/core/upb_string.c b/core/upb_string.c index 2f487aa..3563c9e 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -87,6 +87,7 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { void upb_string_substr(upb_string *str, upb_string *target_str, upb_strlen_t start, upb_strlen_t len) { + if(str->ptr) *(char*)0 = 0; assert(str->ptr == NULL); str->src = upb_string_getref(target_str); str->ptr = upb_string_getrobuf(target_str) + start; @@ -103,11 +104,15 @@ void upb_string_vprintf(upb_string *str, const char *format, va_list args) { uint32_t true_size = vsnprintf(buf, size, format, args_copy); va_end(args_copy); - if (true_size > size) { - // Need to reallocate. + if (true_size >= size) { + // Need to reallocate. We reallocate even if the sizes were equal, + // because snprintf excludes the terminating NULL from its count. + // We don't care about the terminating NULL, but snprintf might + // bail out of printing even other characters if it doesn't have + // enough space to write the NULL also. str = upb_string_tryrecycle(str); - buf = upb_string_getrwbuf(str, true_size); - vsnprintf(buf, true_size, format, args); + buf = upb_string_getrwbuf(str, true_size + 1); + vsnprintf(buf, true_size + 1, format, args); } str->len = true_size; } diff --git a/tests/test_string.c b/tests/test_string.c index 5869b70..46f35b9 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -32,6 +32,7 @@ int main() { // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); + str = upb_string_tryrecycle(str); upb_string_substr(str, str2, 1, 2); assert(upb_string_len(str) == 2); assert(upb_string_len(str2) == 4); @@ -63,9 +64,17 @@ int main() { // Test printf. str = upb_string_tryrecycle(str); upb_string_printf(str, "Number: %d, String: %s", 5, "YO!"); + assert(upb_streqlc(str, "Number: 5, String: YO!")); + + // Test asprintf + upb_string *str3 = upb_string_asprintf("Yo %s: " UPB_STRFMT "\n", + "Josh", UPB_STRARG(str)); + const char expected[] = "Yo Josh: Number: 5, String: YO!\n"; + assert(upb_streqlc(str3, expected)); upb_string_unref(str); upb_string_unref(str2); + upb_string_unref(str3); // Unref of NULL is harmless. upb_string_unref(NULL); -- cgit v1.2.3 From c7a95061a7c02ffeebd71eeb56bf19fc1c1797dd Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 20:13:06 -0700 Subject: Successfully bootstraps!! --- core/upb.c | 2 +- core/upb.h | 2 +- core/upb_def.c | 27 ++++++++++++++------------- tests/test_def.c | 24 ++++++++++++++++++++++++ 4 files changed, 40 insertions(+), 15 deletions(-) create mode 100644 tests/test_def.c (limited to 'tests') diff --git a/core/upb.c b/core/upb.c index d581bbe..c396323 100644 --- a/core/upb.c +++ b/core/upb.c @@ -59,7 +59,7 @@ void upb_copyerr(upb_status *to, upb_status *from) if(from->str) to->str = upb_string_getref(from->str); } -void upb_status_reset(upb_status *status) { +void upb_clearerr(upb_status *status) { status->code = UPB_STATUS_OK; upb_string_unref(status->str); status->str = NULL; diff --git a/core/upb.h b/core/upb.h index 13317bb..b605fd9 100644 --- a/core/upb.h +++ b/core/upb.h @@ -201,7 +201,7 @@ INLINE void upb_status_init(upb_status *status) { } void upb_printerr(upb_status *status); -void upb_status_reset(upb_status *status); +void upb_clearerr(upb_status *status); void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...); void upb_copyerr(upb_status *to, upb_status *from); diff --git a/core/upb_def.c b/core/upb_def.c index 2b2916e..b9402c5 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -211,7 +211,9 @@ static void upb_def_uninit(upb_def *def) { typedef struct _upb_unresolveddef { upb_def base; - // The target type name. This may or may not be fully qualified. + // The target type name. This may or may not be fully qualified. It is + // tempting to want to use base.fqname for this, but that will be qualified + // which is inappropriate for a name we still have to resolve. upb_string *name; } upb_unresolveddef; @@ -224,6 +226,7 @@ static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { } static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { + upb_string_unref(def->name); upb_def_uninit(&def->base); free(def); } @@ -232,6 +235,10 @@ static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { /* upb_enumdef ****************************************************************/ static void upb_enumdef_free(upb_enumdef *e) { + upb_enum_iter i; + for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { + upb_string_unref(upb_enum_iter_name(i)); + } upb_strtable_free(&e->ntoi); upb_inttable_free(&e->iton); upb_def_uninit(&e->base); @@ -328,14 +335,11 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) { /* upb_fielddef ***************************************************************/ static void upb_fielddef_free(upb_fielddef *f) { - free(f); -} - -static void upb_fielddef_uninit(upb_fielddef *f) { upb_string_unref(f->name); if(f->owned) { upb_def_unref(f->def); } + free(f); } static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) @@ -453,7 +457,7 @@ static void upb_msgdef_free(upb_msgdef *m) { upb_msg_iter i; for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) - upb_fielddef_uninit(upb_msg_iter_field(i)); + upb_fielddef_free(upb_msg_iter_field(i)); upb_strtable_free(&m->ntof); upb_inttable_free(&m->itof); upb_def_uninit(&m->base); @@ -487,7 +491,7 @@ static bool upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM: package = upb_string_tryrecycle(package); CHECKSRC(upb_src_getstr(src, package)); break; @@ -589,6 +593,7 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) "in a cycle of length %d, which exceeds the maximum type " "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len, UPB_MAX_TYPE_CYCLE_LEN); + return false; } return true; } else if(UPB_UPCAST(m)->search_depth > 0) { @@ -664,7 +669,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, upb_msgdef *m = upb_dyncast_msgdef(e->def); if(!m) continue; // The findcycles() call will decrement the external refcount of the - if(!upb_symtab_findcycles(m, 0, status)) return false; + upb_symtab_findcycles(m, 0, status); upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; upb_cycle_ref_or_unref(m, NULL, open_defs, 0, true); } @@ -735,7 +740,6 @@ err: upb_rwlock_unlock(&s->lock); for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; e = upb_strtable_next(&tmptab, &e->e)) { - fprintf(stderr, "Unreffing def: '" UPB_STRFMT "'\n", UPB_STRARG(e->e.key)); upb_def_unref(e->def); } upb_strtable_free(&tmptab); @@ -921,12 +925,10 @@ static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) key = upb_baredecoder_readv32(d); d->wire_type = key & 0x7; d->field.number = key >> 3; - fprintf(stderr, "field num: %d, wire_type: %d\n", d->field.number, d->wire_type); if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { // For delimited wire values we parse the length now, since we need it in // all cases. d->delimited_len = upb_baredecoder_readv32(d); - fprintf(stderr, "delimited size: %d\n", d->delimited_len); } return &d->field; } @@ -1026,9 +1028,8 @@ void upb_symtab_add_descriptorproto(upb_symtab *symtab) if(!upb_ok(&status)) { // upb itself is corrupt. upb_printerr(&status); + upb_clearerr(&status); upb_symtab_unref(symtab); abort(); } - fprintf(stderr, "Claims to have succeeded\n"); - upb_printerr(&status); } diff --git a/tests/test_def.c b/tests/test_def.c new file mode 100644 index 0000000..e6f95d7 --- /dev/null +++ b/tests/test_def.c @@ -0,0 +1,24 @@ + +#undef NDEBUG /* ensure tests always assert. */ +#include "upb_def.h" +#include + +int main() { + upb_symtab *s = upb_symtab_new(); + upb_symtab_add_descriptorproto(s); + + int count; + upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY); + for (int i = 0; i < count; i++) { + upb_def_unref(defs[i]); + } + free(defs); + + upb_string *str = upb_strdupc("google.protobuf.FileDescriptorSet"); + upb_def *fds = upb_symtab_lookup(s, str); + assert(fds != NULL); + assert(upb_dyncast_msgdef(fds) != NULL); + upb_def_unref(fds); + upb_string_unref(str); + upb_symtab_unref(s); +} -- cgit v1.2.3 From 7a6a702792e769366a8852fc90dbea9cfc9e01c0 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 11 Jul 2010 18:53:27 -0700 Subject: Allow static upb_strings. This can allow strings to reference static data, and reduced the memory footprint of test_def by about 10% (3k). --- core/upb_def.c | 4 +--- core/upb_string.c | 12 ++++++++--- core/upb_string.h | 57 +++++++++++++++++++++++++++++++++++++++++++++---- descriptor/descriptor.c | 7 ++++-- descriptor/descriptor.h | 5 +++-- tests/test_string.c | 33 +++++++++++++++++++++++++++- 6 files changed, 103 insertions(+), 15 deletions(-) (limited to 'tests') diff --git a/core/upb_def.c b/core/upb_def.c index b9402c5..c0d72db 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -1018,12 +1018,10 @@ static upb_src *upb_baredecoder_src(upb_baredecoder *d) void upb_symtab_add_descriptorproto(upb_symtab *symtab) { // TODO: allow upb_strings to be static or on the stack. - upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); - upb_baredecoder *decoder = upb_baredecoder_new(descriptor); + upb_baredecoder *decoder = upb_baredecoder_new(&descriptor_str); upb_status status = UPB_STATUS_INIT; upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status); upb_baredecoder_free(decoder); - upb_string_unref(descriptor); if(!upb_ok(&status)) { // upb itself is corrupt. diff --git a/core/upb_string.c b/core/upb_string.c index 3563c9e..ca3c669 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -61,12 +61,12 @@ void _upb_string_free(upb_string *str) { } upb_string *upb_string_tryrecycle(upb_string *str) { - if(str == NULL || upb_atomic_read(&str->refcount) > 1) { - return upb_string_new(); - } else { + if(str && upb_atomic_read(&str->refcount) == 1) { str->ptr = NULL; upb_string_release(str); return str; + } else { + return upb_string_new(); } } @@ -125,3 +125,9 @@ upb_string *upb_string_asprintf(const char *format, ...) { va_end(args); return str; } + +upb_string *upb_strdup(upb_string *s) { + upb_string *str = upb_string_new(); + upb_strcpy(str, s); + return str; +} diff --git a/core/upb_string.h b/core/upb_string.h index 5cc0eaf..65ba404 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -63,6 +63,17 @@ struct _upb_string { struct _upb_string *src; }; +// Internal-only initializer for upb_string instances. +#ifdef UPB_HAVE_MSIZE +#define _UPB_STRING_INIT(str, len, refcount) {(char*)str, NULL, len, {refcount}, NULL} +#else +#define _UPB_STRING_INIT(str, len, refcount) {(char*)str, NULL, len, 0, {refcount}, NULL} +#endif + +// Special pseudo-refcounts for static/stack-allocated strings, respectively. +#define _UPB_STRING_REFCOUNT_STATIC -1 +#define _UPB_STRING_REFCOUNT_STACK -2 + // Returns a newly-created, empty, non-finalized string. When the string is no // longer needed, it should be unref'd, never freed directly. upb_string *upb_string_new(); @@ -72,15 +83,21 @@ void _upb_string_free(upb_string *str); // Releases a ref on the given string, which may free the memory. "str" // can be NULL, in which case this is a no-op. INLINE void upb_string_unref(upb_string *str) { - if (str && upb_atomic_unref(&str->refcount)) _upb_string_free(str); + if (str && upb_atomic_read(&str->refcount) > 0 && + upb_atomic_unref(&str->refcount)) { + _upb_string_free(str); + } } +upb_string *upb_strdup(upb_string *s); // Forward-declare. + // Returns a string with the same contents as "str". The caller owns a ref on // the returned string, which may or may not be the same object as "str. INLINE upb_string *upb_string_getref(upb_string *str) { - // If/when we support stack-allocated strings, this will have to allocate - // a new string if the given string is on the stack. - upb_atomic_ref(&str->refcount); + int refcount = upb_atomic_read(&str->refcount); + if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str); + // We don't ref the special <0 refcount for static strings. + if (refcount > 0) upb_atomic_ref(&str->refcount); return str; } @@ -151,6 +168,38 @@ void upb_string_substr(upb_string *str, upb_string *target_str, #define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str) #define UPB_STRFMT "%.*s" +// Macros for constructing upb_string objects statically or on the stack. These +// can be used like: +// +// upb_string static_str = UPB_STATIC_STRING("Foo"); +// +// int main() { +// upb_string stack_str = UPB_STACK_STRING("Foo"); +// // Now: +// // upb_streql(&static_str, &stack_str) == true +// // upb_streql(&static_str, UPB_STRLIT("Foo")) == true +// } +// +// You can also use UPB_STACK_STRING or UPB_STATIC_STRING with character arrays, +// but you must not change the underlying data once you've passed the string on: +// +// void foo() { +// char data[] = "ABC123"; +// upb_string stack_str = UPB_STACK_STR(data); +// bar(&stack_str); +// data[0] = "B"; // NOT ALLOWED!! +// } +// +// TODO: should the stack business just be like attach/detach? The latter seems +// more flexible, though it does require a stack allocation. Maybe put this off +// until there is a clear use case. +#define UPB_STATIC_STRING(str) \ + _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STATIC) +#define UPB_STATIC_STRING_LEN(str, len) \ + _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STATIC) +#define UPB_STACK_STRING(str) _UPB_STRING_INIT(str, _UPB_STRING_REFCOUNT_STACK) +#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) + /* upb_string library functions ***********************************************/ // Named like their counterparts, these are all safe against buffer diff --git a/descriptor/descriptor.c b/descriptor/descriptor.c index cd50a16..ee6b25b 100644 --- a/descriptor/descriptor.c +++ b/descriptor/descriptor.c @@ -1,4 +1,6 @@ -unsigned char descriptor_pb[] = { +#include "descriptor.h" + +static unsigned char descriptor_pb[] = { 0x0a, 0x9b, 0x1b, 0x0a, 0x1b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x2f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0f, 0x67, 0x6f, @@ -291,4 +293,5 @@ unsigned char descriptor_pb[] = { 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x48, 0x01 }; -unsigned int descriptor_pb_len = 3486; +static const unsigned int descriptor_pb_len = 3486; +upb_string descriptor_str = UPB_STATIC_STRING(descriptor_pb); diff --git a/descriptor/descriptor.h b/descriptor/descriptor.h index b598a9a..f6d3ca3 100644 --- a/descriptor/descriptor.h +++ b/descriptor/descriptor.h @@ -11,12 +11,13 @@ #ifndef UPB_DESCRIPTOR_H_ #define UPB_DESCRIPTOR_H_ +#include "upb_string.h" + #ifdef __cplusplus extern "C" { #endif -extern unsigned char descriptor_pb[]; -extern unsigned int descriptor_pb_len; +extern upb_string descriptor_str; #ifdef __cplusplus } /* extern "C" */ diff --git a/tests/test_string.c b/tests/test_string.c index 46f35b9..7c9ed02 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -3,8 +3,33 @@ #include "upb_string.h" char static_str[] = "Static string."; +upb_string static_upbstr = UPB_STATIC_STRING(static_str); -int main() { +static void test_static() { + // Static string is initialized appropriately. + assert(upb_streql(&static_upbstr, UPB_STRLIT("Static string."))); + + // Taking a ref on a static string returns the same string, and repeated + // refs don't get the string in a confused state. + assert(upb_string_getref(&static_upbstr) == &static_upbstr); + assert(upb_string_getref(&static_upbstr) == &static_upbstr); + assert(upb_string_getref(&static_upbstr) == &static_upbstr); + + // Unreffing a static string does nothing (is not harmful). + upb_string_unref(&static_upbstr); + upb_string_unref(&static_upbstr); + upb_string_unref(&static_upbstr); + upb_string_unref(&static_upbstr); + upb_string_unref(&static_upbstr); + + // Recycling a static string returns a new string (that can be modified). + upb_string *str = upb_string_tryrecycle(&static_upbstr); + assert(str != &static_upbstr); + + upb_string_unref(str); +} + +static void test_dynamic() { upb_string *str = upb_string_new(); assert(str != NULL); upb_string_unref(str); @@ -29,6 +54,7 @@ int main() { const char *robuf2 = upb_string_getrobuf(str); assert(robuf2 == robuf); assert(upb_streqlc(str, "XX")); + assert(upb_streql(str, UPB_STRLIT("XX"))); // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); @@ -79,3 +105,8 @@ int main() { // Unref of NULL is harmless. upb_string_unref(NULL); } + +int main() { + test_static(); + test_dynamic(); +} -- cgit v1.2.3 From 79de3ca9e48877dfc506c9a486e1e5599c2312f9 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 13:49:50 -0700 Subject: Add forgotten test_decoder.c. --- tests/test_decoder.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tests/test_decoder.c (limited to 'tests') diff --git a/tests/test_decoder.c b/tests/test_decoder.c new file mode 100644 index 0000000..0e6f19c --- /dev/null +++ b/tests/test_decoder.c @@ -0,0 +1,32 @@ + +#include "upb_decoder.h" +#include "upb_textprinter.h" +#include "upb_stdio.h" + +int main() { + upb_symtab *symtab = upb_symtab_new(); + upb_symtab_add_descriptorproto(symtab); + upb_def *fds = upb_symtab_lookup( + symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + + upb_stdio *in = upb_stdio_new(); + upb_stdio_reset(in, stdin); + upb_stdio *out = upb_stdio_new(); + upb_stdio_reset(out, stdout); + upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds)); + upb_decoder_reset(d, upb_stdio_bytesrc(in)); + upb_textprinter *p = upb_textprinter_new(); + upb_textprinter_reset(p, upb_stdio_bytesink(out), false); + + upb_status status = UPB_STATUS_INIT; + upb_streamdata(upb_decoder_src(d), upb_textprinter_sink(p), &status); + + assert(upb_ok(&status)); + + upb_stdio_free(in); + upb_stdio_free(out); + upb_decoder_free(d); + upb_textprinter_free(p); + upb_def_unref(fds); + upb_symtab_unref(symtab); +} -- cgit v1.2.3 From 21ee24a7300dbdabef707457d2407b4f9187603b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 21 Jul 2010 18:59:01 -0700 Subject: Updated Lua extension to handle fielddefs. --- core/upb_def.c | 1 + core/upb_def.h | 22 ++++++++++------- core/upb_table.c | 2 +- lang_ext/lua/upb.c | 71 +++++++++++++++++++++++++++++++++++++++++++----------- tests/test_def.c | 2 ++ 5 files changed, 74 insertions(+), 24 deletions(-) (limited to 'tests') diff --git a/core/upb_def.c b/core/upb_def.c index 1feaf9d..e40e1f0 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -355,6 +355,7 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) f->name = NULL; f->def = NULL; f->owned = false; + f->msgdef = m; upb_fielddef *parsed_f; int32_t tmp; while((parsed_f = upb_src_getdef(src))) { diff --git a/core/upb_def.h b/core/upb_def.h index ae9e0fa..5c19a7a 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -87,23 +87,27 @@ INLINE void upb_def_unref(upb_def *def) { // is either a field of a upb_msgdef or contained inside a upb_extensiondef. // It is also reference-counted. typedef struct _upb_fielddef { - upb_atomic_refcount_t refcount; - upb_string *name; - upb_field_number_t number; - upb_field_type_t type; - upb_label_t label; upb_value default_value; + upb_string *name; + + struct _upb_msgdef *msgdef; + // For the case of an enum or a submessage, points to the def for that type. upb_def *def; - // True if we own a ref on "def" (above). This is true unless this edge is - // part of a cycle. - bool owned; + upb_atomic_refcount_t refcount; + uint32_t byte_offset; // Where in a upb_msg to find the data. // These are set only when this fielddef is part of a msgdef. - uint32_t byte_offset; // Where in a upb_msg to find the data. upb_field_count_t field_index; // Indicates set bit. + + upb_field_number_t number; + upb_field_type_t type; + upb_label_t label; + // True if we own a ref on "def" (above). This is true unless this edge is + // part of a cycle. + bool owned; } upb_fielddef; // A variety of tests about the type of a field. diff --git a/core/upb_table.c b/core/upb_table.c index b860204..a6e0a56 100644 --- a/core/upb_table.c +++ b/core/upb_table.c @@ -28,7 +28,7 @@ void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) { t->count = 0; t->entry_size = entry_size; - t->size_lg2 = 1; + t->size_lg2 = 0; while(size >>= 1) t->size_lg2++; size_t bytes = upb_table_size(t) * t->entry_size; t->mask = upb_table_size(t) - 1; diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c index bfc1355..a16a187 100644 --- a/lang_ext/lua/upb.c +++ b/lang_ext/lua/upb.c @@ -15,10 +15,14 @@ // We cache all the lua objects (userdata) we vend in a weak table, indexed by // the C pointer of the object they are caching. -typedef void (*lupb_unref)(void *cobj); +typedef void (*lupb_cb)(void *cobj); + +static void lupb_nop(void *foo) { + (void)foo; +} static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type, - lupb_unref unref) { + lupb_cb ref, lupb_cb unref) { // Lookup our cache in the registry (we don't put our objects in the registry // directly because we need our cache to be a weak table). lua_getfield(L, LUA_REGISTRYINDEX, "upb.objcache"); @@ -40,6 +44,7 @@ static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type, lua_pushlightuserdata(L, cobj); lua_pushvalue(L, -2); lua_rawset(L, -4); + ref(cobj); } else { unref(cobj); } @@ -73,29 +78,21 @@ static void lupb_def_getorcreate(lua_State *L, upb_def *def) { luaL_error(L, "unknown deftype %d", def->type); type_name = NULL; // Placate the compiler. } - return lupb_cache_getorcreate(L, def, type_name, lupb_def_unref); + return lupb_cache_getorcreate(L, def, type_name, lupb_nop, lupb_def_unref); } +// msgdef + static lupb_def *lupb_msgdef_check(lua_State *L, int narg) { return luaL_checkudata(L, narg, "upb.msgdef"); } -static lupb_def *lupb_enumdef_check(lua_State *L, int narg) { - return luaL_checkudata(L, narg, "upb.enumdef"); -} - static int lupb_msgdef_gc(lua_State *L) { lupb_def *ldef = lupb_msgdef_check(L, 1); upb_def_unref(ldef->def); return 0; } -static int lupb_enumdef_gc(lua_State *L) { - lupb_def *ldef = lupb_enumdef_check(L, 1); - upb_def_unref(ldef->def); - return 0; -} - static const struct luaL_Reg lupb_msgdef_mm[] = { {"__gc", lupb_msgdef_gc}, {NULL, NULL} @@ -105,6 +102,18 @@ static const struct luaL_Reg lupb_msgdef_m[] = { {NULL, NULL} }; +// enumdef + +static lupb_def *lupb_enumdef_check(lua_State *L, int narg) { + return luaL_checkudata(L, narg, "upb.enumdef"); +} + +static int lupb_enumdef_gc(lua_State *L) { + lupb_def *ldef = lupb_enumdef_check(L, 1); + upb_def_unref(ldef->def); + return 0; +} + static const struct luaL_Reg lupb_enumdef_mm[] = { {"__gc", lupb_enumdef_gc}, {NULL, NULL} @@ -115,6 +124,40 @@ static const struct luaL_Reg lupb_enumdef_m[] = { }; +/* lupb_fielddef **************************************************************/ + +typedef struct { + upb_fielddef *field; +} lupb_fielddef; + +static void lupb_fielddef_ref(void *cobj) { + upb_def_ref(UPB_UPCAST(((upb_fielddef*)cobj)->msgdef)); +} + +static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f) { + lupb_cache_getorcreate(L, f, "upb.fielddef", lupb_fielddef_ref, lupb_nop); +} + +static lupb_fielddef *lupb_fielddef_check(lua_State *L, int narg) { + return luaL_checkudata(L, narg, "upb.fielddef"); +} + +static int lupb_fielddef_gc(lua_State *L) { + lupb_fielddef *lfielddef = lupb_fielddef_check(L, 1); + upb_def_unref(UPB_UPCAST(lfielddef->field->msgdef)); + return 0; +} + +static const struct luaL_Reg lupb_fielddef_mm[] = { + {"__gc", lupb_fielddef_gc}, + {NULL, NULL} +}; + +static const struct luaL_Reg lupb_fielddef_m[] = { + {NULL, NULL} +}; + + /* lupb_symtab ****************************************************************/ typedef struct { @@ -193,7 +236,7 @@ static const struct luaL_Reg lupb_symtab_mm[] = { static int lupb_symtab_new(lua_State *L) { upb_symtab *s = upb_symtab_new(); - lupb_cache_getorcreate(L, s, "upb.symtab", lupb_symtab_unref); + lupb_cache_getorcreate(L, s, "upb.symtab", lupb_nop, lupb_symtab_unref); return 1; } diff --git a/tests/test_def.c b/tests/test_def.c index e6f95d7..732835d 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -14,6 +14,8 @@ int main() { } free(defs); + printf("Size: %zd\n", sizeof(upb_ntof_ent)); + upb_string *str = upb_strdupc("google.protobuf.FileDescriptorSet"); upb_def *fds = upb_symtab_lookup(s, str); assert(fds != NULL); -- cgit v1.2.3 From a9e998159c5ac8c4f2644b5ed0eda2e8ff1f8706 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 2 Aug 2010 10:25:24 -0700 Subject: Fleshed out upb_msg: test_vs_proto2 compiles but fails. --- Makefile | 10 ++-- core/upb.h | 98 ++++++++++++++++++++++++++++++++++---- core/upb_atomic.h | 4 ++ core/upb_def.c | 65 ++++++++++++++++++++++++- core/upb_def.h | 28 +++++++++-- core/upb_msg.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++ core/upb_msg.h | 114 ++++++++++++++++++++++++++++++++++++++++---- stream/upb_decoder.c | 8 ++-- stream/upb_strstream.h | 2 +- tests/test_vs_proto2.cc | 54 ++++++++++++--------- 10 files changed, 452 insertions(+), 54 deletions(-) create mode 100644 core/upb_msg.c (limited to 'tests') diff --git a/Makefile b/Makefile index 203bed6..131b3c0 100644 --- a/Makefile +++ b/Makefile @@ -55,6 +55,7 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ core/upb_stream.c stream/upb_stdio.c stream/upb_strstream.c stream/upb_textprinter.c \ + core/upb_msg.c \ descriptor/descriptor.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. @@ -101,14 +102,13 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ tests/test_def \ - tests/test_decoder -tests: $(TESTS) - -OTHER_TESTS=tests/tests \ - tests/test_table \ + tests/test_decoder \ tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2 \ tests/test.proto.pb +tests: $(TESTS) + +OTHER_TESTS=tests/tests \ $(TESTS): core/libupb.a VALGRIND=valgrind --leak-check=full --error-exitcode=1 diff --git a/core/upb.h b/core/upb.h index b605fd9..7ee0469 100644 --- a/core/upb.h +++ b/core/upb.h @@ -80,24 +80,16 @@ enum upb_wire_type { typedef uint8_t upb_wire_type_t; -// Value type as defined in a .proto file. eg. string, int32, etc. The +// Type of a field as defined in a .proto file. eg. string, int32, etc. The // integers that represent this are defined by descriptor.proto. Note that // descriptor.proto reserves "0" for errors, and we use it to represent // exceptional circumstances. -typedef uint8_t upb_field_type_t; +typedef uint8_t upb_fieldtype_t; // For referencing the type constants tersely. #define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type #define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type -INLINE bool upb_issubmsgtype(upb_field_type_t type) { - return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); -} - -INLINE bool upb_isstringtype(upb_field_type_t type) { - return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); -} - // Info for a given field type. typedef struct { uint8_t align; @@ -129,6 +121,10 @@ typedef union { struct _upb_string; typedef struct _upb_string upb_string; +struct _upb_array; +typedef struct _upb_array upb_array; +struct _upb_msg; +typedef struct _upb_msg upb_msg; typedef uint32_t upb_strlen_t; @@ -142,6 +138,11 @@ typedef union { uint32_t uint32; uint64_t uint64; bool _bool; + upb_string *str; + upb_msg *msg; + upb_array *arr; + upb_atomic_refcount_t *refcount; + void *_void; } upb_value; // A pointer to a .proto value. The owner must have an out-of-band way of @@ -155,13 +156,90 @@ typedef union { uint32_t *uint32; uint64_t *uint64; bool *_bool; + upb_string **str; + upb_msg **msg; + upb_array **arr; + void *_void; } upb_valueptr; +// The type of a upb_value. This is like a upb_fieldtype_t, but adds the +// constant UPB_VALUETYPE_ARRAY to represent an array. +typedef uint8_t upb_valuetype_t; +#define UPB_VALUETYPE_ARRAY 32 + INLINE upb_valueptr upb_value_addrof(upb_value *val) { upb_valueptr ptr = {&val->_double}; return ptr; } +// Converts upb_value_ptr -> upb_value by reading from the pointer. We need to +// know the value type to perform this operation, because we need to know how +// much memory to copy. +INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { + upb_value val; + +#define CASE(t, member_name) \ + case UPB_TYPE(t): val.member_name = *ptr.member_name; break; + + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + default: break; + } + return val; + +#undef CASE +} + +// Writes a upb_value to a upb_value_ptr location. We need to know the value +// type to perform this operation, because we need to know how much memory to +// copy. +INLINE void upb_value_write(upb_valueptr ptr, upb_value val, + upb_fieldtype_t ft) { +#define CASE(t, member_name) \ + case UPB_TYPE(t): *ptr.member_name = val.member_name; break; + + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + default: break; + } + +#undef CASE +} + // Status codes used as a return value. Codes >0 are not fatal and can be // resumed. enum upb_status_code { diff --git a/core/upb_atomic.h b/core/upb_atomic.h index 01fc8a2..1cd848b 100644 --- a/core/upb_atomic.h +++ b/core/upb_atomic.h @@ -127,6 +127,10 @@ INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { Implement them or compile with UPB_THREAD_UNSAFE. #endif +INLINE bool upb_atomic_only(upb_atomic_refcount_t *a) { + return upb_atomic_read(a) == 1; +} + /* Reader/Writer lock. ********************************************************/ #ifdef UPB_THREAD_UNSAFE diff --git a/core/upb_def.c b/core/upb_def.c index e117455..1c8fbdc 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -12,6 +12,16 @@ #define CHECKSRC(x) if(!(x)) goto src_err #define CHECK(x) if(!(x)) goto err +/* Rounds p up to the next multiple of t. */ +static size_t upb_align_up(size_t val, size_t align) { + return val % align == 0 ? val : val + align - (val % align); +} + +static int upb_div_round_up(int numerator, int denominator) { + /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ + return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; +} + // A little dynamic array for storing a growing list of upb_defs. typedef struct { upb_def **defs; @@ -409,6 +419,19 @@ src_err: /* upb_msgdef *****************************************************************/ +static int upb_compare_typed_fields(upb_fielddef *f1, upb_fielddef *f2) { + // Sort by data size (ascending) to reduce padding. + size_t size1 = upb_types[f1->type].size; + size_t size2 = upb_types[f2->type].size; + if (size1 != size2) return size1 - size2; + // Otherwise return in number order (just so we get a reproduceable order. + return f1->number - f2->number; +} + +static int upb_compare_fields(const void *f1, const void *f2) { + return upb_compare_typed_fields(*(void**)f1, *(void**)f2); +} + // Processes a google.protobuf.DescriptorProto, adding defs to "defs." static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) { @@ -418,7 +441,6 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); int32_t start_count = defs->len; - upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { @@ -451,6 +473,45 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); goto err; } + + + // Create an ordering over the fields. + upb_field_count_t n = upb_msgdef_numfields(m); + upb_fielddef **sorted_fields = malloc(sizeof(upb_fielddef*) * n); + upb_field_count_t field = 0; + upb_msg_iter i; + for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + sorted_fields[field++]= upb_msg_iter_field(i); + } + qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields); + + // Assign offsets in the msg. + m->set_flags_bytes = upb_div_round_up(n, 8); + m->size = sizeof(upb_atomic_refcount_t) + m->set_flags_bytes; + + size_t max_align = 0; + for (int i = 0; i < n; i++) { + upb_fielddef *f = sorted_fields[i]; + upb_type_info *type_info = &upb_types[f->type]; + + // This identifies the set bit. When we implement is_initialized (a + // general check about whether all required bits are set) we will probably + // want to use a different ordering that puts all the required bits + // together. + f->field_index = i; + + // General alignment rules are: each member must be at an address that is a + // multiple of that type's alignment. Also, the size of the structure as a + // whole must be a multiple of the greatest alignment of any member. + size_t offset = upb_align_up(m->size, type_info->align); + // Offsets are relative to the end of the refcount. + f->byte_offset = offset - sizeof(upb_atomic_refcount_t); + m->size = offset + type_info->size; + max_align = UPB_MAX(max_align, type_info->align); + } + + if (max_align > 0) m->size = upb_align_up(m->size, max_align); + upb_deflist_qualify(defs, m->base.fqname, start_count); upb_deflist_push(defs, UPB_UPCAST(m)); return true; @@ -664,7 +725,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, } // Check the type of the found def. - upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; + upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; if(found->def->type != expected) { upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type"); return false; diff --git a/core/upb_def.h b/core/upb_def.h index 3294a8d..9eb961a 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -103,7 +103,7 @@ typedef struct _upb_fielddef { upb_field_count_t field_index; // Indicates set bit. upb_field_number_t number; - upb_field_type_t type; + upb_fieldtype_t type; upb_label_t label; // True if we own a ref on "def" (above). This is true unless this edge is // part of a cycle. @@ -112,10 +112,10 @@ typedef struct _upb_fielddef { // A variety of tests about the type of a field. INLINE bool upb_issubmsg(upb_fielddef *f) { - return upb_issubmsgtype(f->type); + return f->type == UPB_TYPE(GROUP) || f->type == UPB_TYPE(MESSAGE); } INLINE bool upb_isstring(upb_fielddef *f) { - return upb_isstringtype(f->type); + return f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES); } INLINE bool upb_isarray(upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); @@ -125,6 +125,19 @@ INLINE bool upb_hasdef(upb_fielddef *f) { return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); } +INLINE upb_valuetype_t upb_field_valuetype(upb_fielddef *f) { + if (upb_isarray(f)) { + return UPB_VALUETYPE_ARRAY; + } else { + return f->type; + } +} + +INLINE upb_valuetype_t upb_elem_valuetype(upb_fielddef *f) { + assert(upb_isarray(f)); + return f->type; +} + INLINE bool upb_field_ismm(upb_fielddef *f) { return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); } @@ -139,6 +152,8 @@ INLINE bool upb_elem_ismm(upb_fielddef *f) { typedef struct _upb_msgdef { upb_def base; upb_atomic_refcount_t cycle_refcount; + uint32_t size; + uint32_t set_flags_bytes; // Tables for looking up fields by number and name. upb_inttable itof; // int to field @@ -169,9 +184,14 @@ INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) { return e ? e->f : NULL; } +INLINE upb_field_count_t upb_msgdef_numfields(upb_msgdef *m) { + return upb_strtable_count(&m->ntof); +} + // Iteration over fields. The order is undefined. // upb_msg_iter i; -// for(i = upb_msg_begin(m); !upb_msg_done(&i); i = upb_msg_next(&i)) { +// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { +// upb_fielddef *f = upb_msg_iter_field(i); // // ... // } typedef upb_itof_ent *upb_msg_iter; diff --git a/core/upb_msg.c b/core/upb_msg.c new file mode 100644 index 0000000..75f7a35 --- /dev/null +++ b/core/upb_msg.c @@ -0,0 +1,123 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + * Data structure for storing a message of protobuf data. + */ + +#include "upb_msg.h" + +void _upb_elem_free(upb_value v, upb_fielddef *f) { + switch(f->type) { + case UPB_TYPE(MESSAGE): + case UPB_TYPE(GROUP): + _upb_msg_free(v.msg, upb_downcast_msgdef(f->def)); + break; + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + _upb_string_free(v.str); + break; + default: + abort(); + } +} + +void _upb_field_free(upb_value v, upb_fielddef *f) { + if (upb_isarray(f)) { + _upb_array_free(v.arr, f); + } else { + _upb_elem_free(v, f); + } +} + +upb_msg *upb_msg_new(upb_msgdef *md) { + upb_msg *msg = malloc(md->size); + // Clear all set bits and cached pointers. + memset(msg, 0, md->size); + upb_atomic_refcount_init(&msg->refcount, 1); + return msg; +} + +void _upb_msg_free(upb_msg *msg, upb_msgdef *md) { + // Need to release refs on all sub-objects. + upb_msg_iter i; + for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + if (upb_field_ismm(f)) _upb_field_unref(upb_value_read(p, type), f); + } + free(msg); +} + +upb_array *upb_array_new(void) { + upb_array *arr = malloc(sizeof(*arr)); + upb_atomic_refcount_init(&arr->refcount, 1); + arr->size = 0; + arr->len = 0; + arr->elements._void = NULL; + return arr; +} + +void _upb_array_free(upb_array *arr, upb_fielddef *f) { + if (upb_elem_ismm(f)) { + // Need to release refs on sub-objects. + upb_valuetype_t type = upb_elem_valuetype(f); + for (upb_arraylen_t i = 0; i < arr->size; i++) { + upb_valueptr p = _upb_array_getptr(arr, f, i); + _upb_elem_unref(upb_value_read(p, type), f); + } + } + if (arr->elements._void) free(arr->elements._void); + free(arr); +} + +upb_value upb_field_new(upb_fielddef *f, upb_valuetype_t type) { + upb_value v; + switch(type) { + case UPB_TYPE(MESSAGE): + case UPB_TYPE(GROUP): + v.msg = upb_msg_new(upb_downcast_msgdef(f->def)); + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + v.str = upb_string_new(); + case UPB_VALUETYPE_ARRAY: + v.arr = upb_array_new(); + default: + abort(); + } + return v; +} + +static void upb_field_recycle(upb_value val) { + (void)val; +} + +upb_value upb_field_tryrecycle(upb_valueptr p, upb_value val, upb_fielddef *f, + upb_valuetype_t type) { + if (val._void == NULL || !upb_atomic_only(val.refcount)) { + if (val._void != NULL) upb_atomic_unref(val.refcount); + val = upb_field_new(f, type); + upb_value_write(p, val, type); + } else { + upb_field_recycle(val); + } + return val; +} + +void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status) { + (void)msg; + (void)md; + (void)str; + (void)status; +} + +void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status) { + (void)msg; + (void)md; + (void)str; + (void)status; +} diff --git a/core/upb_msg.h b/core/upb_msg.h index 5215bd9..2db67c0 100644 --- a/core/upb_msg.h +++ b/core/upb_msg.h @@ -9,14 +9,39 @@ #ifndef UPB_MSG_H #define UPB_MSG_H +#include "upb.h" +#include "upb_def.h" +#include + #ifdef __cplusplus extern "C" { #endif -typedef struct { +upb_value upb_field_tryrecycle(upb_valueptr p, upb_value v, upb_fielddef *f, + upb_valuetype_t type); + +INLINE void _upb_value_ref(upb_value v) { upb_atomic_ref(v.refcount); } + +void _upb_field_free(upb_value v, upb_fielddef *f); +void _upb_elem_free(upb_value v, upb_fielddef *f); +INLINE void _upb_field_unref(upb_value v, upb_fielddef *f) { + assert(upb_field_ismm(f)); + if (v.refcount && upb_atomic_unref(v.refcount)) + _upb_field_free(v, f); +} +INLINE void _upb_elem_unref(upb_value v, upb_fielddef *f) { + assert(upb_elem_ismm(f)); + if (v.refcount && upb_atomic_unref(v.refcount)) + _upb_elem_free(v, f); +} + +/* upb_array ******************************************************************/ + +typedef uint32_t upb_arraylen_t; +struct _upb_array { upb_atomic_refcount_t refcount; - uint32_t len; - uint32_t size; + upb_arraylen_t len; + upb_arraylen_t size; upb_valueptr elements; }; @@ -31,29 +56,70 @@ INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) { if (upb_atomic_unref(&a->refcount)) _upb_array_free(a, f); } +INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f, + uint32_t elem) { + upb_valueptr p; + p._void = &a->elements.uint8[elem * upb_types[f->type].size]; + return p; +} + INLINE upb_value upb_array_get(upb_array *a, upb_fielddef *f, uint32_t elem) { assert(elem < upb_array_len(a)); return upb_value_read(_upb_array_getptr(a, f, elem), f->type); } // For string or submessages, will release a ref on the previously set value. +// and take a ref on the new value. The array must already be at least "elem" +// long; to append use append_mutable. INLINE void upb_array_set(upb_array *a, upb_fielddef *f, uint32_t elem, upb_value val) { + assert(elem < upb_array_len(a)); + upb_valueptr p = _upb_array_getptr(a, f, elem); + if (upb_elem_ismm(f)) { + _upb_elem_unref(upb_value_read(p, f->type), f); + _upb_value_ref(val); + } + upb_value_write(p, val, f->type); } -// Append an element with the default value, returning it. For strings or -// submessages, this will try to reuse previously allocated memory. -INLINE upb_value upb_array_append_mutable(upb_array *a, upb_fielddef *f) { +INLINE void upb_array_resize(upb_array *a, upb_fielddef *f) { + if (a->len == a->size) { + a->len *= 2; + a->elements._void = realloc(a->elements._void, + a->len * upb_types[f->type].size); + } } -typedef struct { +// Append an element to an array of string or submsg with the default value, +// returning it. This will try to reuse previously allocated memory. +INLINE upb_value upb_array_appendmutable(upb_array *a, upb_fielddef *f) { + assert(upb_elem_ismm(f)); + upb_array_resize(a, f); + upb_valueptr p = _upb_array_getptr(a, f, a->len++); + upb_valuetype_t type = upb_elem_valuetype(f); + upb_value val = upb_value_read(p, type); + val = upb_field_tryrecycle(p, val, f, type); + return val; +} + + +/* upb_msg ********************************************************************/ + +struct _upb_msg { upb_atomic_refcount_t refcount; uint8_t data[4]; // We allocate the appropriate amount per message. -} upb_msg; +}; // Creates a new msg of the given type. upb_msg *upb_msg_new(upb_msgdef *md); +// Returns a pointer to the given field. +INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) { + upb_valueptr p; + p._void = &msg->data[f->byte_offset]; + return p; +} + void _upb_msg_free(upb_msg *msg, upb_msgdef *md); INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) { if (upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md); @@ -65,6 +131,10 @@ INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { return (msg->data[f->field_index/8] & (1 << (f->field_index % 8))) != 0; } +INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { + msg->data[f->field_index/8] |= (1 << (f->field_index % 8)); +} + // Returns the current value of the given field if set, or the default value if // not set. INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { @@ -79,12 +149,29 @@ INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { // Otherwise sets it and returns an empty instance, attempting to reuse any // previously allocated memory. INLINE upb_value upb_msg_getmutable(upb_msg *msg, upb_fielddef *f) { + assert(upb_field_ismm(f)); + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + upb_value val = upb_value_read(p, type); + if (!upb_msg_has(msg, f)) { + upb_msg_sethas(msg, f); + val = upb_field_tryrecycle(p, val, f, type); + } + return val; } // Sets the current value of the field. If this is a string, array, or // submessage field, releases a ref on the value (if any) that was previously // set. INLINE void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) { + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + if (upb_field_ismm(f)) { + _upb_field_unref(upb_value_read(p, type), f); + _upb_value_ref(val); + } + upb_msg_sethas(msg, f); + upb_value_write(p, val, upb_field_valuetype(f)); } // Unsets all field values back to their defaults. @@ -92,6 +179,17 @@ INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { memset(msg->data, 0, md->set_flags_bytes); } +// A convenience function for decoding an entire protobuf all at once, without +// having to worry about setting up the appropriate objects. +void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status); + +// A convenience function for encoding an entire protobuf all at once. If an +// error occurs, the null string is returned and the status object contains +// the error. +void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 7591f78..c35212e 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -14,8 +14,10 @@ // Returns true if the give wire type and field type combination is valid, // taking into account both packed and non-packed encodings. -static bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { - return (1 << wt) & upb_types[ft].allowed_wire_types; +static bool upb_check_type(upb_wire_type_t wt, upb_fielddef *f) { + // TODO: need to take into account the label; only repeated fields are + // allowed to use packed encoding. + return (1 << wt) & upb_types[f->type].allowed_wire_types; } // Performs zig-zag decoding, which is used by sint32 and sint64. @@ -358,7 +360,7 @@ again: // unknown fields we will implement that here. upb_decoder_skipval(d); goto again; - } else if (!upb_check_type(wire_type, f->type)) { + } else if (!upb_check_type(wire_type, f)) { // This is a recoverable error condition. We skip the value but also // return NULL and report the error. upb_decoder_skipval(d); diff --git a/stream/upb_strstream.h b/stream/upb_strstream.h index fa9bace..d01d21f 100644 --- a/stream/upb_strstream.h +++ b/stream/upb_strstream.h @@ -31,7 +31,7 @@ void upb_stringsrc_free(upb_stringsrc *s); void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); // Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. -upb_bytesrc *upb_stringsrc_bytesrc(); +upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s); /* upb_stringsink *************************************************************/ diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc index 9083788..9446b8f 100644 --- a/tests/test_vs_proto2.cc +++ b/tests/test_vs_proto2.cc @@ -4,9 +4,10 @@ #include #include #include -#include "upb_data.h" +#include "upb_msg.h" #include "upb_def.h" #include "upb_decoder.h" +#include "upb_strstream.h" int num_assertions = 0; #define ASSERT(expr) do { \ @@ -25,7 +26,7 @@ void compare_arrays(const google::protobuf::Reflection *r, upb_msg *upb_msg, upb_fielddef *upb_f) { ASSERT(upb_msg_has(upb_msg, upb_f)); - upb_arrayptr arr = upb_msg_get(upb_msg, upb_f).arr; + upb_array *arr = upb_msg_get(upb_msg, upb_f).arr; ASSERT(upb_array_len(arr) == (upb_arraylen_t)r->FieldSize(proto2_msg, proto2_f)); for(upb_arraylen_t i = 0; i < upb_array_len(arr); i++) { upb_value v = upb_array_get(arr, upb_f, i); @@ -63,7 +64,7 @@ void compare_arrays(const google::protobuf::Reflection *r, case UPB_TYPE(STRING): case UPB_TYPE(BYTES): { std::string str = r->GetRepeatedString(proto2_msg, proto2_f, i); - std::string str2(upb_string_getrobuf(v.str), upb_strlen(v.str)); + std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str)); ASSERT(str == str2); break; } @@ -116,7 +117,7 @@ void compare_values(const google::protobuf::Reflection *r, case UPB_TYPE(STRING): case UPB_TYPE(BYTES): { std::string str = r->GetString(proto2_msg, proto2_f); - std::string str2(upb_string_getrobuf(v.str), upb_strlen(v.str)); + std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str)); ASSERT(str == str2); break; } @@ -133,9 +134,10 @@ void compare(const google::protobuf::Message& proto2_msg, const google::protobuf::Reflection *r = proto2_msg.GetReflection(); const google::protobuf::Descriptor *d = proto2_msg.GetDescriptor(); - ASSERT((upb_field_count_t)d->field_count() == upb_md->num_fields); - for(upb_field_count_t i = 0; i < upb_md->num_fields; i++) { - upb_fielddef *upb_f = &upb_md->fields[i]; + ASSERT((upb_field_count_t)d->field_count() == upb_msgdef_numfields(upb_md)); + upb_msg_iter i; + for(i = upb_msg_begin(upb_md); !upb_msg_done(i); i = upb_msg_next(upb_md, i)) { + upb_fielddef *upb_f = upb_msg_iter_field(i); const google::protobuf::FieldDescriptor *proto2_f = d->FindFieldByNumber(upb_f->number); // Make sure the definitions are equal. @@ -143,7 +145,7 @@ void compare(const google::protobuf::Message& proto2_msg, ASSERT(proto2_f); ASSERT(upb_f->number == proto2_f->number()); ASSERT(std::string(upb_string_getrobuf(upb_f->name), - upb_strlen(upb_f->name)) == + upb_string_len(upb_f->name)) == proto2_f->name()); ASSERT(upb_f->type == proto2_f->type()); ASSERT(upb_isarray(upb_f) == proto2_f->is_repeated()); @@ -166,10 +168,10 @@ void compare(const google::protobuf::Message& proto2_msg, void parse_and_compare(MESSAGE_CIDENT *proto2_msg, upb_msg *upb_msg, upb_msgdef *upb_md, - upb_strptr str) + upb_string *str) { // Parse to both proto2 and upb. - ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_strlen(str))); + ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_string_len(str))); upb_status status = UPB_STATUS_INIT; upb_msg_decodestr(upb_msg, upb_md, str, &status); ASSERT(upb_ok(&status)); @@ -194,22 +196,32 @@ int main(int argc, char *argv[]) // Initialize upb state, parse descriptor. upb_status status = UPB_STATUS_INIT; - upb_symtab *c = upb_symtab_new(); - upb_strptr fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); - if(upb_string_isnull(fds)) { + upb_symtab *symtab = upb_symtab_new(); + upb_string *fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); + if(fds == NULL) { fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n"); return 1; } - upb_symtab_add_desc(c, fds, &status); + upb_symtab_add_descriptorproto(symtab); + upb_def *fds_msgdef = upb_symtab_lookup( + symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + + upb_stringsrc *ssrc = upb_stringsrc_new(); + upb_stringsrc_reset(ssrc, fds); + upb_decoder *decoder = upb_decoder_new(upb_downcast_msgdef(fds_msgdef)); + upb_decoder_reset(decoder, upb_stringsrc_bytesrc(ssrc)); + upb_symtab_addfds(symtab, upb_decoder_src(decoder), &status); if(!upb_ok(&status)) { - fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": %s.\n", - status.msg); + fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": "); + upb_printerr(&status); return 1; } upb_string_unref(fds); + upb_decoder_free(decoder); + upb_stringsrc_free(ssrc); - upb_strptr proto_name = upb_strdupc(MESSAGE_NAME); - upb_msgdef *def = upb_downcast_msgdef(upb_symtab_lookup(c, proto_name)); + upb_string *proto_name = upb_strdupc(MESSAGE_NAME); + upb_msgdef *def = upb_downcast_msgdef(upb_symtab_lookup(symtab, proto_name)); if(!def) { fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n", UPB_STRARG(proto_name)); @@ -218,8 +230,8 @@ int main(int argc, char *argv[]) upb_string_unref(proto_name); // Read the message data itself. - upb_strptr str = upb_strreadfile(MESSAGE_FILE); - if(upb_string_isnull(str)) { + upb_string *str = upb_strreadfile(MESSAGE_FILE); + if(str == NULL) { fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); return 1; } @@ -234,7 +246,7 @@ int main(int argc, char *argv[]) upb_msg_unref(upb_msg, def); upb_def_unref(UPB_UPCAST(def)); upb_string_unref(str); - upb_symtab_unref(c); + upb_symtab_unref(symtab); return 0; } -- cgit v1.2.3 From a695b92ccea4b82180ae45d21d7ed4445f7d0769 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 21 Jan 2011 19:18:22 -0800 Subject: Debugging test_def, it's close to working again! --- Makefile | 10 +++--- core/upb_def.c | 80 +++++++++++++++++++++++++++-------------- core/upb_stream.h | 21 ++++++++--- core/upb_stream_vtbl.h | 96 ++++++++++++++++++++++++++++++++++++++++---------- core/upb_string.c | 8 ++--- core/upb_string.h | 2 +- tests/test_def.c | 1 + tests/test_string.c | 19 +++++----- 8 files changed, 171 insertions(+), 66 deletions(-) (limited to 'tests') diff --git a/Makefile b/Makefile index 42c7d41..af79363 100644 --- a/Makefile +++ b/Makefile @@ -74,9 +74,9 @@ OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. core/upb_def.o: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< + $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< core/upb_def.lo: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC + $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< -fPIC lang_ext/lua/upb.so: lang_ext/lua/upb.lo $(CC) $(CFLAGS) $(CPPFLAGS) -shared -o $@ $< core/libupb_pic.a @@ -112,13 +112,13 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ - tests/test_stream \ -# tests/test_def \ + tests/test_def \ +# tests/test_stream \ # tests/test_decoder \ # tests/t.test_vs_proto2.googlemessage1 \ # tests/t.test_vs_proto2.googlemessage2 \ # tests/test.proto.pb -tests: $(TESTS) +tests: $(LIBUPB) $(TESTS) OTHER_TESTS=tests/tests \ $(TESTS): $(LIBUPB) diff --git a/core/upb_def.c b/core/upb_def.c index 79b6632..a935930 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -319,6 +319,18 @@ void upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { } // Handlers for google.protobuf.FileDescriptorProto. +static upb_flow_t upb_defbuilder_FileDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; + upb_defbuilder_startcontainer(b); + return UPB_CONTINUE; +} + +static upb_flow_t upb_defbuilder_FileDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_defbuilder_endcontainer(b); + return UPB_CONTINUE; +} + static upb_flow_t upb_defbuilder_FileDescriptorProto_value(void *_b, upb_fielddef *f, upb_value val) { @@ -353,8 +365,8 @@ static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, upb_handlers *h) { static upb_handlerset handlers = { - NULL, // startmsg - NULL, // endmsg + &upb_defbuilder_FileDescriptorProto_startmsg, + &upb_defbuilder_FileDescriptorProto_endmsg, &upb_defbuilder_FileDescriptorProto_value, &upb_defbuilder_FileDescriptorProto_startsubmsg, }; @@ -457,9 +469,11 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: upb_string_unref(b->name); upb_string_getref(upb_value_getstr(val)); + b->saw_name = true; break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: b->number = upb_value_getint32(val); + b->saw_number = true; break; default: break; @@ -507,8 +521,8 @@ static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { } static upb_flow_t upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { - upb_defbuilder *b = _b; - assert(upb_defbuilder_last(b)->fqname != NULL); + (void)_b; + assert(upb_defbuilder_last((upb_defbuilder*)_b)->fqname != NULL); return UPB_CONTINUE; } @@ -627,10 +641,8 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { b->f->name = upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { - upb_string *str = upb_string_new(); - if (!upb_value_getfullstr(val, str, NULL)) return UPB_BREAK; if(b->f->def) upb_def_unref(b->f->def); - b->f->def = UPB_UPCAST(upb_unresolveddef_new(str)); + b->f->def = UPB_UPCAST(upb_unresolveddef_new(upb_value_getstr(val))); b->f->owned = true; break; } @@ -720,6 +732,7 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) { m->size = offset + type_info->size; max_align = UPB_MAX(max_align, type_info->align); } + free(sorted_fields); if (max_align > 0) m->size = upb_align_up(m->size, max_align); @@ -1131,6 +1144,7 @@ void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) // * keeping a pointer to the upb_fielddef* and reading it later (the same // upb_fielddef is reused over and over). // * detecting errors in the input (we trust that our input is known-good). +// * skipping the rest of the submessage (UPB_SKIPSUBMSG). // // It also does not support any of the follow protobuf features: // * packed fields. @@ -1189,18 +1203,27 @@ static uint32_t upb_baredecoder_readf32(upb_baredecoder *d) return val; } -bool upb_baredecoder_run(upb_baredecoder *d) { +static void upb_baredecoder_sethandlers(upb_src *src, upb_handlers *handlers) { + upb_baredecoder *d = (upb_baredecoder*)src; + upb_dispatcher_reset(&d->dispatcher, handlers); +} + +static void upb_baredecoder_run(upb_src *src, upb_status *status) { + upb_baredecoder *d = (upb_baredecoder*)src; + assert(!upb_handlers_isempty(&d->dispatcher.top->handlers)); upb_string *str = NULL; upb_strlen_t stack[UPB_MAX_NESTING]; upb_strlen_t *top = &stack[0]; *top = upb_string_len(d->input); d->offset = 0; - upb_dispatch_startmsg(&d->dispatcher); +#define CHECK(x) if (x != UPB_CONTINUE && x != BEGIN_SUBMSG) goto err; + + CHECK(upb_dispatch_startmsg(&d->dispatcher)); while(d->offset < upb_string_len(d->input)) { // Detect end-of-submessage. while(d->offset >= *top) { - upb_dispatch_endsubmsg(&d->dispatcher); + CHECK(upb_dispatch_endsubmsg(&d->dispatcher)); d->offset = *(top--); } @@ -1216,9 +1239,11 @@ bool upb_baredecoder_run(upb_baredecoder *d) { upb_string_substr(str, d->input, d->offset, delim_len); upb_value v; upb_value_setstr(&v, str); - if(upb_dispatch_value(&d->dispatcher, &f, v) == BEGIN_SUBMSG) { + upb_flow_t ret = upb_dispatch_value(&d->dispatcher, &f, v); + CHECK(ret); + if(ret == BEGIN_SUBMSG) { // Should deliver as a submessage instead. - upb_dispatch_startsubmsg(&d->dispatcher, &f); + CHECK(upb_dispatch_startsubmsg(&d->dispatcher, &f)); *(++top) = d->offset + delim_len; } else { d->offset += delim_len; @@ -1228,11 +1253,9 @@ bool upb_baredecoder_run(upb_baredecoder *d) { switch(wt) { case UPB_WIRE_TYPE_VARINT: upb_value_setraw(&v, upb_baredecoder_readv64(d)); - upb_dispatch_value(&d->dispatcher, &f, v); break; case UPB_WIRE_TYPE_64BIT: upb_value_setraw(&v, upb_baredecoder_readf64(d)); - upb_dispatch_value(&d->dispatcher, &f, v); break; case UPB_WIRE_TYPE_32BIT: upb_value_setraw(&v, upb_baredecoder_readf32(d)); @@ -1241,28 +1264,33 @@ bool upb_baredecoder_run(upb_baredecoder *d) { assert(false); abort(); } - upb_dispatch_value(&d->dispatcher, &f, v); + CHECK(upb_dispatch_value(&d->dispatcher, &f, v)); } } - upb_dispatch_endmsg(&d->dispatcher); - return true; + CHECK(upb_dispatch_endmsg(&d->dispatcher)); + printf("SUCCESS!!\n"); + upb_string_unref(str); + return; + +err: + upb_copyerr(status, d->dispatcher.top->handlers.status); + upb_printerr(d->dispatcher.top->handlers.status); + upb_printerr(status); + upb_string_unref(str); + printf("ERROR!!\n"); } static upb_baredecoder *upb_baredecoder_new(upb_string *str) { - //static upb_src_vtable vtbl = { - // (upb_src_getdef_fptr)&upb_baredecoder_getdef, - // (upb_src_getval_fptr)&upb_baredecoder_getval, - // (upb_src_getstr_fptr)&upb_baredecoder_getstr, - // (upb_src_skipval_fptr)&upb_baredecoder_skipval, - // (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, - // (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, - //}; + static upb_src_vtbl vtbl = { + &upb_baredecoder_sethandlers, + &upb_baredecoder_run, + }; upb_baredecoder *d = malloc(sizeof(*d)); + upb_src_init(&d->src, &vtbl); d->input = upb_string_getref(str); d->offset = 0; upb_dispatcher_init(&d->dispatcher); - //upb_src_init(&d->src, &vtbl); return d; } diff --git a/core/upb_stream.h b/core/upb_stream.h index 66bfec2..cf01a5f 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -136,8 +136,8 @@ struct _upb_dispatcher; typedef struct _upb_dispatcher upb_dispatcher; INLINE void upb_dispatcher_init(upb_dispatcher *d); INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); -INLINE void upb_dispatch_startmsg(upb_dispatcher *d); -INLINE void upb_dispatch_endmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d); INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f); INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, @@ -151,8 +151,21 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, struct _upb_src; typedef struct _upb_src upb_src; -void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); -void upb_src_run(upb_src *src, upb_status *status); +// upb_src_sethandlers() must be called once and only once before upb_src_run() +// is called. This sets up the callbacks that will handle the parse. A +// upb_src that is fully initialized except for the call to +// upb_src_sethandlers() is called "prepared" -- this is useful for library +// functions that want to consume the output of a generic upb_src. +// Calling sethandlers() multiple times is an error and will trigger an abort(). +INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); + +// Runs the src, calling the callbacks that were registered with +// upb_src_sethandlers(), and returning the status of the operation in +// "status." The status might indicate UPB_TRYAGAIN (indicating EAGAIN on a +// non-blocking socket) or a resumable error; in both cases upb_src_run can be +// called again later. TRYAGAIN could come from either the src (input buffers +// are empty) or the handlers (output buffers are full). +INLINE void upb_src_run(upb_src *src, upb_status *status); /* upb_bytesrc ****************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index d017177..e462122 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -13,6 +13,7 @@ #include #include "upb_stream.h" +#include "upb_string.h" #ifdef __cplusplus extern "C" { @@ -21,10 +22,8 @@ extern "C" { // Typedefs for function pointers to all of the virtual functions. // upb_src -struct _upb_src { -}; -typedef struct { -} upb_src_vtbl; +typedef void (*upb_src_sethandlers_fptr)(upb_src *src, upb_handlers *handlers); +typedef void (*upb_src_run_fptr)(upb_src *src, upb_status *status); // upb_bytesrc. typedef upb_strlen_t (*upb_bytesrc_read_fptr)( @@ -42,42 +41,65 @@ typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( typedef struct { upb_bytesrc_read_fptr read; upb_bytesrc_getstr_fptr getstr; -} upb_bytesrc_vtable; +} upb_bytesrc_vtbl; typedef struct { upb_bytesink_write_fptr write; upb_bytesink_putstr_fptr putstr; -} upb_bytesink_vtable; +} upb_bytesink_vtbl; + +typedef struct { + upb_src_sethandlers_fptr sethandlers; + upb_src_run_fptr run; +} upb_src_vtbl; + // "Base Class" definitions; components that implement these interfaces should // contain one of these structures. struct _upb_bytesrc { - upb_bytesrc_vtable *vtbl; + upb_bytesrc_vtbl *vtbl; upb_status status; bool eof; }; struct _upb_bytesink { - upb_bytesink_vtable *vtbl; + upb_bytesink_vtbl *vtbl; upb_status status; bool eof; }; -INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtable *vtbl) { +struct _upb_src { + upb_src_vtbl *vtbl; +}; + +INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) { s->vtbl = vtbl; s->eof = false; upb_status_init(&s->status); } -INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { +INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) { s->vtbl = vtbl; s->eof = false; upb_status_init(&s->status); } +INLINE void upb_src_init(upb_src *s, upb_src_vtbl *vtbl) { + s->vtbl = vtbl; +} + // Implementation of virtual function dispatch. +// upb_src +INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers) { + src->vtbl->sethandlers(src, handlers); +} + +INLINE void upb_src_run(upb_src *src, upb_status *status) { + src->vtbl->run(src, status); +} + // upb_bytesrc INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, upb_strlen_t count) { @@ -152,7 +174,41 @@ INLINE bool upb_handlers_isempty(upb_handlers *h) { return !h->set && !h->closure; } +INLINE upb_flow_t upb_nop(void *closure) { + (void)closure; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_value_nop(void *closure, struct _upb_fielddef *f, upb_value val) { + (void)closure; + (void)f; + (void)val; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_startsubmsg_nop(void *closure, struct _upb_fielddef *f, + upb_handlers *delegate_to) { + (void)closure; + (void)f; + (void)delegate_to; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, + upb_value val) { + (void)closure; + (void)fieldnum; + (void)val; + return UPB_CONTINUE; +} + INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set) { + if (!set->startmsg) set->startmsg = &upb_nop; + if (!set->endmsg) set->endmsg = &upb_nop; + if (!set->value) set->value = &upb_value_nop; + if (!set->startsubmsg) set->startsubmsg = &upb_startsubmsg_nop; + if (!set->endsubmsg) set->endsubmsg = &upb_nop; + if (!set->unknownval) set->unknownval = &upb_unknownval_nop; h->set = set; } @@ -182,16 +238,19 @@ INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h) { d->top->handlers = *h; } -INLINE void upb_dispatch_startmsg(upb_dispatcher *d) { +INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d) { assert(d->stack == d->top); - d->top->handlers.set->startmsg(d->top->handlers.closure); + return d->top->handlers.set->startmsg(d->top->handlers.closure); } -INLINE void upb_dispatch_endmsg(upb_dispatcher *d) { +INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d) { assert(d->stack == d->top); - d->top->handlers.set->endmsg(d->top->handlers.closure); + return d->top->handlers.set->endmsg(d->top->handlers.closure); } +// TODO: several edge cases to fix: +// - delegated start returns UPB_BREAK, should replay the start on resume. +// - endsubmsg returns UPB_BREAK, should NOT replay the delegated endmsg. INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f) { upb_handlers handlers; @@ -203,17 +262,18 @@ INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, ++d->top; d->top->handlers = handlers; d->top->depth = 0; - d->top->handlers.set->startmsg(d->top->handlers.closure); - ret = UPB_CONTINUE; + ret = d->top->handlers.set->startmsg(d->top->handlers.closure); } - ++d->top->depth; + if (ret == UPB_CONTINUE) ++d->top->depth; upb_handlers_uninit(&handlers); return ret; } INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { + upb_flow_t ret; if (--d->top->depth == 0) { - d->top->handlers.set->endmsg(d->top->handlers.closure); + ret = d->top->handlers.set->endmsg(d->top->handlers.closure); + if (ret != UPB_CONTINUE) return ret; --d->top; } return d->top->handlers.set->endsubmsg(d->top->handlers.closure); diff --git a/core/upb_string.c b/core/upb_string.c index b243dfd..e9ff0d9 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -61,13 +61,13 @@ void _upb_string_free(upb_string *str) { free(str); } -upb_string *upb_string_tryrecycle(upb_string *str) { +void upb_string_recycle(upb_string **_str) { + upb_string *str = *_str; if(str && upb_atomic_read(&str->refcount) == 1) { str->ptr = NULL; upb_string_release(str); - return str; } else { - return upb_string_new(); + *_str = upb_string_new(); } } @@ -111,7 +111,7 @@ void upb_string_vprintf(upb_string *str, const char *format, va_list args) { // We don't care about the terminating NULL, but snprintf might // bail out of printing even other characters if it doesn't have // enough space to write the NULL also. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); buf = upb_string_getrwbuf(str, true_size + 1); vsnprintf(buf, true_size + 1, format, args); } diff --git a/core/upb_string.h b/core/upb_string.h index f82603b..1f4b20c 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -133,7 +133,7 @@ INLINE void upb_string_endread(upb_string *str) { (void)str; } // upb_src_getstr(str); // } // } -upb_string *upb_string_recycle(upb_string **str); +void upb_string_recycle(upb_string **str); // The options for setting the contents of a string. These may only be called // when a string is first created or recycled; once other functions have been diff --git a/tests/test_def.c b/tests/test_def.c index 732835d..5be0672 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -10,6 +10,7 @@ int main() { int count; upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY); for (int i = 0; i < count; i++) { + printf("Def with name: " UPB_STRFMT "\n", UPB_STRARG(defs[i]->fqname)); upb_def_unref(defs[i]); } free(defs); diff --git a/tests/test_string.c b/tests/test_string.c index 7c9ed02..6446806 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -23,7 +23,8 @@ static void test_static() { upb_string_unref(&static_upbstr); // Recycling a static string returns a new string (that can be modified). - upb_string *str = upb_string_tryrecycle(&static_upbstr); + upb_string *str = &static_upbstr; + upb_string_recycle(&str); assert(str != &static_upbstr); upb_string_unref(str); @@ -34,8 +35,9 @@ static void test_dynamic() { assert(str != NULL); upb_string_unref(str); - // Can also create a string by tryrecycle(NULL). - str = upb_string_tryrecycle(NULL); + // Can also create a string by recycle(NULL). + str = NULL; + upb_string_recycle(&str); assert(str != NULL); upb_strcpyc(str, static_str); @@ -45,7 +47,8 @@ static void test_dynamic() { assert(upb_streqlc(str, static_str)); upb_string_endread(str); - upb_string *str2 = upb_string_tryrecycle(str); + upb_string *str2 = str; + upb_string_recycle(&str2); // No other referents, so should return the same string. assert(str2 == str); @@ -58,7 +61,7 @@ static void test_dynamic() { // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); upb_string_substr(str, str2, 1, 2); assert(upb_string_len(str) == 2); assert(upb_string_len(str2) == 4); @@ -70,7 +73,7 @@ static void test_dynamic() { assert(upb_atomic_read(&str2->refcount) == 2); // Recycling str should eliminate the extra ref. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); assert(upb_atomic_read(&str2->refcount) == 1); // Resetting str should reuse its old data. @@ -80,7 +83,7 @@ static void test_dynamic() { // Resetting str to something very long should require new data to be // allocated. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); const char longstring[] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; upb_strcpyc(str, longstring); const char *robuf6 = upb_string_getrobuf(str); @@ -88,7 +91,7 @@ static void test_dynamic() { assert(upb_streqlc(str, longstring)); // Test printf. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); upb_string_printf(str, "Number: %d, String: %s", 5, "YO!"); assert(upb_streqlc(str, "Number: 5, String: YO!")); -- cgit v1.2.3 From c9df91b04a429f9324afeefece28f21e7078e3ac Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 22 Jan 2011 01:03:02 -0800 Subject: upb bootstraps again! and with no memory leaks! --- core/upb_def.c | 40 +++++++++++++++++----------------------- core/upb_def.h | 2 +- core/upb_stream_vtbl.h | 1 + core/upb_string.c | 1 + tests/test_def.c | 4 +--- tests/test_string.c | 11 +++++++++++ 6 files changed, 32 insertions(+), 27 deletions(-) (limited to 'tests') diff --git a/core/upb_def.c b/core/upb_def.c index a935930..c21843e 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -429,7 +429,7 @@ typedef struct _upb_unresolveddef { static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { upb_unresolveddef *def = malloc(sizeof(*def)); upb_def_init(&def->base, UPB_DEF_UNRESOLVED); - def->name = str; + def->name = upb_string_getref(str); return def; } @@ -445,6 +445,7 @@ static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { static void upb_enumdef_free(upb_enumdef *e) { upb_enum_iter i; for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { + // Frees the ref taken when the string was parsed. upb_string_unref(upb_enum_iter_name(i)); } upb_strtable_free(&e->ntoi); @@ -468,7 +469,7 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, switch(f->number) { case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: upb_string_unref(b->name); - upb_string_getref(upb_value_getstr(val)); + b->name = upb_string_getref(upb_value_getstr(val)); b->saw_name = true; break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: @@ -495,6 +496,7 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { // We don't unref "name" because we pass our ref to the iton entry of the // table. strtables can ref their keys, but the inttable doesn't know that // the value is a string. + b->name = NULL; return UPB_CONTINUE; } @@ -641,7 +643,7 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { b->f->name = upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { - if(b->f->def) upb_def_unref(b->f->def); + upb_def_unref(b->f->def); b->f->def = UPB_UPCAST(upb_unresolveddef_new(upb_value_getstr(val))); b->f->owned = true; break; @@ -847,6 +849,7 @@ static upb_symtab_ent *upb_resolve(upb_strtable *t, return e; } else { // Remove components from base until we find an entry or run out. + // TODO: This branch is totally broken, but currently not used. upb_string *sym_str = upb_string_new(); int baselen = upb_string_len(base); while(1) { @@ -1212,21 +1215,14 @@ static void upb_baredecoder_run(upb_src *src, upb_status *status) { upb_baredecoder *d = (upb_baredecoder*)src; assert(!upb_handlers_isempty(&d->dispatcher.top->handlers)); upb_string *str = NULL; - upb_strlen_t stack[UPB_MAX_NESTING]; + upb_strlen_t stack[UPB_MAX_NESTING] = {UPB_STRLEN_MAX}; upb_strlen_t *top = &stack[0]; - *top = upb_string_len(d->input); d->offset = 0; #define CHECK(x) if (x != UPB_CONTINUE && x != BEGIN_SUBMSG) goto err; CHECK(upb_dispatch_startmsg(&d->dispatcher)); while(d->offset < upb_string_len(d->input)) { - // Detect end-of-submessage. - while(d->offset >= *top) { - CHECK(upb_dispatch_endsubmsg(&d->dispatcher)); - d->offset = *(top--); - } - uint32_t key = upb_baredecoder_readv64(d); upb_fielddef f; f.number = key >> 3; @@ -1266,22 +1262,22 @@ static void upb_baredecoder_run(upb_src *src, upb_status *status) { } CHECK(upb_dispatch_value(&d->dispatcher, &f, v)); } + // Detect end-of-submessage. + while(d->offset >= *top) { + CHECK(upb_dispatch_endsubmsg(&d->dispatcher)); + d->offset = *(top--); + } } CHECK(upb_dispatch_endmsg(&d->dispatcher)); - printf("SUCCESS!!\n"); upb_string_unref(str); return; err: upb_copyerr(status, d->dispatcher.top->handlers.status); - upb_printerr(d->dispatcher.top->handlers.status); - upb_printerr(status); upb_string_unref(str); - printf("ERROR!!\n"); } -static upb_baredecoder *upb_baredecoder_new(upb_string *str) -{ +static upb_baredecoder *upb_baredecoder_new(upb_string *str) { static upb_src_vtbl vtbl = { &upb_baredecoder_sethandlers, &upb_baredecoder_run, @@ -1294,19 +1290,16 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) return d; } -static void upb_baredecoder_free(upb_baredecoder *d) -{ +static void upb_baredecoder_free(upb_baredecoder *d) { upb_string_unref(d->input); free(d); } -static upb_src *upb_baredecoder_src(upb_baredecoder *d) -{ +static upb_src *upb_baredecoder_src(upb_baredecoder *d) { return &d->src; } -void upb_symtab_add_descriptorproto(upb_symtab *symtab) -{ +void upb_symtab_add_descriptorproto(upb_symtab *symtab) { // For the moment we silently decline to perform the operation if the symbols // already exist in the symtab. Revisit this when we have a better story // about whether syms in a table can be replaced. @@ -1329,4 +1322,5 @@ void upb_symtab_add_descriptorproto(upb_symtab *symtab) upb_symtab_unref(symtab); abort(); } + upb_status_uninit(&status); } diff --git a/core/upb_def.h b/core/upb_def.h index 9eb961a..d9bab97 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -77,7 +77,7 @@ INLINE void upb_def_ref(upb_def *def) { if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def); } INLINE void upb_def_unref(upb_def *def) { - if(upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); + if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); } /* upb_fielddef ***************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index e462122..fd71b2d 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -275,6 +275,7 @@ INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { ret = d->top->handlers.set->endmsg(d->top->handlers.closure); if (ret != UPB_CONTINUE) return ret; --d->top; + assert(d->top >= d->stack); } return d->top->handlers.set->endsubmsg(d->top->handlers.closure); } diff --git a/core/upb_string.c b/core/upb_string.c index e9ff0d9..c599728 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -67,6 +67,7 @@ void upb_string_recycle(upb_string **_str) { str->ptr = NULL; upb_string_release(str); } else { + upb_string_unref(str); *_str = upb_string_new(); } } diff --git a/tests/test_def.c b/tests/test_def.c index 5be0672..2d2658f 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -10,13 +10,10 @@ int main() { int count; upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY); for (int i = 0; i < count; i++) { - printf("Def with name: " UPB_STRFMT "\n", UPB_STRARG(defs[i]->fqname)); upb_def_unref(defs[i]); } free(defs); - printf("Size: %zd\n", sizeof(upb_ntof_ent)); - upb_string *str = upb_strdupc("google.protobuf.FileDescriptorSet"); upb_def *fds = upb_symtab_lookup(s, str); assert(fds != NULL); @@ -24,4 +21,5 @@ int main() { upb_def_unref(fds); upb_string_unref(str); upb_symtab_unref(s); + return 0; } diff --git a/tests/test_string.c b/tests/test_string.c index 6446806..ef0e2a9 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -40,6 +40,17 @@ static void test_dynamic() { upb_string_recycle(&str); assert(str != NULL); + // Take a ref and recycle; should create a new string and release a ref + // on the old one. + upb_string *strcp = upb_string_getref(str); + assert(strcp == str); + assert(upb_atomic_read(&str->refcount) == 2); + upb_string_recycle(&str); + assert(strcp != str); + assert(upb_atomic_read(&str->refcount) == 1); + assert(upb_atomic_read(&strcp->refcount) == 1); + upb_string_unref(strcp); + upb_strcpyc(str, static_str); assert(upb_string_len(str) == (sizeof(static_str) - 1)); const char *robuf = upb_string_getrobuf(str); -- cgit v1.2.3 From 2ea9737e5d5b085eef6fe762c7e86a7161b9d231 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 22 Jan 2011 01:06:19 -0800 Subject: Added test_stream.c for testing upb_stream.h. --- Makefile | 2 +- tests/test_stream.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 tests/test_stream.c (limited to 'tests') diff --git a/Makefile b/Makefile index af79363..04779c0 100644 --- a/Makefile +++ b/Makefile @@ -113,7 +113,7 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ tests/test_def \ -# tests/test_stream \ + tests/test_stream \ # tests/test_decoder \ # tests/t.test_vs_proto2.googlemessage1 \ # tests/t.test_vs_proto2.googlemessage2 \ diff --git a/tests/test_stream.c b/tests/test_stream.c new file mode 100644 index 0000000..b6d511c --- /dev/null +++ b/tests/test_stream.c @@ -0,0 +1,127 @@ + +#undef NDEBUG /* ensure tests always assert. */ +#include "upb_stream.h" +#include "upb_string.h" + +typedef struct { + upb_string *str; + bool should_delegate; +} test_data; + +extern upb_handlerset test_handlers; + +static void strappendf(upb_string *s, const char *format, ...) { + upb_string *str = upb_string_new(); + va_list args; + va_start(args, format); + upb_string_vprintf(str, format, args); + va_end(args); + upb_strcat(s, str); + upb_string_unref(str); +} + +static upb_flow_t startmsg(void *closure) { + test_data *d = closure; + strappendf(d->str, "startmsg\n"); + return UPB_CONTINUE; +} + +static upb_flow_t endmsg(void *closure) { + test_data *d = closure; + strappendf(d->str, "endmsg\n"); + return UPB_CONTINUE; +} + +static upb_flow_t value(void *closure, struct _upb_fielddef *f, upb_value val) { + (void)f; + test_data *d = closure; + strappendf(d->str, "value, %lld\n", upb_value_getint64(val)); + return UPB_CONTINUE; +} + +static upb_flow_t startsubmsg(void *closure, struct _upb_fielddef *f, + upb_handlers *delegate_to) { + (void)f; + test_data *d = closure; + strappendf(d->str, "startsubmsg\n"); + if (d->should_delegate) { + upb_register_handlerset(delegate_to, &test_handlers); + upb_set_handler_closure(delegate_to, closure, NULL); + return UPB_DELEGATE; + } else { + return UPB_CONTINUE; + } +} + +static upb_flow_t endsubmsg(void *closure) { + test_data *d = closure; + strappendf(d->str, "endsubmsg\n"); + return UPB_CONTINUE; +} + +static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum, + upb_value val) { + (void)val; + test_data *d = closure; + strappendf(d->str, "unknownval, %d\n", fieldnum); + return UPB_CONTINUE; +} + +upb_handlerset test_handlers = { + &startmsg, + &endmsg, + &value, + &startsubmsg, + &endsubmsg, + &unknownval, +}; + +static void test_dispatcher() { + test_data data; + data.should_delegate = false; + data.str = upb_string_new(); + upb_handlers h; + upb_handlers_init(&h); + upb_handlers_reset(&h); + upb_register_handlerset(&h, &test_handlers); + upb_set_handler_closure(&h, &data, NULL); + upb_dispatcher d; + upb_dispatcher_init(&d); + upb_dispatcher_reset(&d, &h); + + upb_dispatch_startmsg(&d); + upb_value val; + upb_value_setint64(&val, 5); + upb_dispatch_value(&d, NULL, val); + upb_dispatch_startsubmsg(&d, NULL); + data.should_delegate = true; + upb_dispatch_startsubmsg(&d, NULL); + data.should_delegate = false; + upb_dispatch_startsubmsg(&d, NULL); + upb_dispatch_value(&d, NULL, val); + upb_dispatch_endsubmsg(&d); + upb_dispatch_endsubmsg(&d); + upb_dispatch_endsubmsg(&d); + upb_dispatch_endmsg(&d); + + upb_string expected = UPB_STACK_STRING( + "startmsg\n" + "value, 5\n" + "startsubmsg\n" + "startsubmsg\n" + "startmsg\n" // Because of the delegation. + "startsubmsg\n" + "value, 5\n" + "endsubmsg\n" + "endmsg\n" // Because of the delegation. + "endsubmsg\n" + "endsubmsg\n" + "endmsg\n"); + assert(upb_streql(data.str, &expected)); + upb_string_unref(data.str); +} + +int main() { + test_dispatcher(); + return 0; +} -- cgit v1.2.3 From 02a8cdfff29d6a17836847490a06dfe535855d52 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 29 Jan 2011 23:22:33 -0800 Subject: Fixes to decoder, stdio, textprinter. --- core/upb_stream_vtbl.h | 6 ++--- stream/upb_decoder.c | 68 ++++++++++++++++++++++++++++++++---------------- stream/upb_stdio.c | 38 ++++++++++++++++++--------- stream/upb_textprinter.c | 4 +-- tests/test_decoder.c | 10 +++++-- 5 files changed, 85 insertions(+), 41 deletions(-) (limited to 'tests') diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 8e8971f..a6990bc 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -34,10 +34,10 @@ typedef bool (*upb_bytesrc_getstr_fptr)( // upb_bytesink. typedef upb_strlen_t (*upb_bytesink_write_fptr)( upb_bytesink *bytesink, void *buf, upb_strlen_t count); -typedef bool (*upb_bytesink_putstr_fptr)( +typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( upb_bytesink *bytesink, upb_string *str, upb_status *status); typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)( - upb_status *status, const char *fmt, va_list args); + upb_bytesink *bytesink, upb_status *status, const char *fmt, va_list args); // Vtables for the above interfaces. typedef struct { @@ -153,7 +153,7 @@ INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, const char *fmt, ...) { va_list args; va_start(args, fmt); - upb_strlen_t ret = sink->vtbl->vprintf(status, fmt, args); + upb_strlen_t ret = sink->vtbl->vprintf(sink, status, fmt, args); va_end(args); return ret; } diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index e60915f..a7a2c76 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -18,22 +18,24 @@ // possibilities for optimization/experimentation here. INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *status) { + const char *p = *ptr; uint32_t low, high = 0; uint32_t b; - b = *(*ptr++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(*ptr++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(*ptr++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(*ptr++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(*ptr++); low |= (b & 0x7f) << 28; - high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(*ptr++); high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(*ptr++); high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(*ptr++); high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(*ptr++); high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 28; + high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; upb_seterr(status, UPB_ERROR, "Unterminated varint"); return false; done: + *ptr = p; *val = ((uint64_t)high << 32) | low; return true; } @@ -50,7 +52,7 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } typedef struct { upb_msgdef *msgdef; upb_fielddef *field; - ssize_t end_offset; // For groups, 0. + size_t end_offset; // For groups, 0. } upb_decoder_frame; struct upb_decoder { @@ -73,7 +75,7 @@ struct upb_decoder { upb_string *buf; // The offset within the overall stream represented by the *beginning* of buf. - upb_strlen_t buf_stream_offset; + size_t buf_stream_offset; }; typedef struct { @@ -98,7 +100,7 @@ static upb_flow_t upb_pop(upb_decoder *d); // Constant used to signal that the submessage is a group and therefore we // don't know its end offset. This cannot be the offset of a real submessage // end because it takes at least one byte to begin a submessage. -#define UPB_GROUP_END_OFFSET -1 +#define UPB_GROUP_END_OFFSET 0 #define UPB_MAX_VARINT_ENCODED_SIZE 10 // Called only from the slow path, this function copies the next "len" bytes @@ -132,12 +134,12 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, } // Wait for end-of-submessage or end-of-buffer, whichever comes first. - ssize_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf); - ssize_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr; - ssize_t submsg_remaining = + size_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf); + size_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr; + size_t submsg_remaining = d->top->end_offset - d->buf_stream_offset - offset_in_buf; if (d->top->end_offset == UPB_GROUP_END_OFFSET || - buf_remaining > submsg_remaining) { + buf_remaining < submsg_remaining) { s->len = buf_remaining; } else { // Check that non of our subtraction overflowed. @@ -165,13 +167,16 @@ static bool upb_decode_varint_slow(upb_decoder *d, upb_dstate *s, upb_seterr(d->status, UPB_ERROR, "Varint was unterminated after 10 bytes.\n"); return false; + } else if (d->status->code == UPB_EOF && bitpos == 0) { + // Regular EOF. + return false; } else if (d->status->code == UPB_EOF && (byte & 0x80)) { upb_seterr(d->status, UPB_ERROR, "Provided data ended in the middle of a varint.\n"); return false; } else { // Success. - upb_value_setint64(val, val64); + upb_value_setraw(val, val64); return true; } } @@ -210,7 +215,7 @@ INLINE bool upb_decode_varint(upb_decoder *d, upb_dstate *s, upb_value *val) { const char *p = s->ptr; if (!upb_decode_varint_fast(&p, &val64, d->status)) return false; upb_dstate_advance(s, p - s->ptr); - upb_value_setint64(val, val64); + upb_value_setraw(val, val64); return true; } else { return upb_decode_varint_slow(d, s, val); @@ -245,6 +250,7 @@ INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str, if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen, s)) return false; } + upb_value_setstr(val, *str); return true; } @@ -259,7 +265,7 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) { } static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, - upb_strlen_t submsg_len, upb_fieldtype_t type) { + upb_value submsg_len, upb_fieldtype_t type) { d->top->field = f; d->top++; if(d->top >= d->limit) { @@ -268,7 +274,7 @@ static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, } d->top->end_offset = (type == UPB_TYPE(GROUP)) ? UPB_GROUP_END_OFFSET : - d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + submsg_len; + d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + upb_value_getint32(submsg_len); d->top->msgdef = upb_downcast_msgdef(f->def); return upb_dispatch_startsubmsg(&d->dispatcher, f); } @@ -280,6 +286,7 @@ static upb_flow_t upb_pop(upb_decoder *d) { void upb_decoder_run(upb_src *src, upb_status *status) { upb_decoder *d = (upb_decoder*)src; + d->status = status; // We put our dstate on the stack so the compiler knows they can't be changed // by external code (like when we dispatch a callback). We must be sure not // to let its address escape this source file. @@ -299,9 +306,14 @@ void upb_decoder_run(upb_src *src, upb_status *status) { if (!upb_decode_tag(d, &state, &tag)) { if (status->code == UPB_EOF && d->top == d->stack) { // Normal end-of-file. + upb_clearerr(status); CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); return; } else { + if (status->code == UPB_EOF) { + upb_seterr(status, UPB_ERROR, + "Input ended in the middle of a submessage."); + } goto err; } } @@ -352,7 +364,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK_FLOW(upb_push(d, &state, f, upb_value_getint32(val), f->type)); + CHECK_FLOW(upb_push(d, &state, f, val, f->type)); continue; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): @@ -397,9 +409,21 @@ upb_decoder *upb_decoder_new(upb_msgdef *msgdef) { upb_dispatcher_init(&d->dispatcher); d->toplevel_msgdef = msgdef; d->limit = &d->stack[UPB_MAX_NESTING]; + d->buf = NULL; return d; } +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { + d->bytesrc = bytesrc; + d->top = &d->stack[0]; + d->top->msgdef = d->toplevel_msgdef; + d->top->end_offset = SIZE_MAX; // never want to end top-level message. + upb_string_unref(d->buf); + d->buf = NULL; +} + void upb_decoder_free(upb_decoder *d) { free(d); } + +upb_src *upb_decoder_src(upb_decoder *d) { return &d->src; } diff --git a/stream/upb_stdio.c b/stream/upb_stdio.c index 7923664..8857677 100644 --- a/stream/upb_stdio.c +++ b/stream/upb_stdio.c @@ -23,6 +23,9 @@ void upb_stdio_reset(upb_stdio *stdio, FILE* file) { stdio->file = file; } + +/* upb_bytesrc methods ********************************************************/ + static upb_strlen_t upb_stdio_read(upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status) { upb_stdio *stdio = (upb_stdio*)src; @@ -50,18 +53,27 @@ static bool upb_stdio_getstr(upb_bytesrc *src, upb_string *str, return true; } -int32_t upb_stdio_put(upb_bytesink *sink, upb_string *str) { + +/* upb_bytesink methods *******************************************************/ + +upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); upb_strlen_t len = upb_string_len(str); upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); if(written < len) { - // Error or EOF. - stdio->bytesink.eof = feof(stdio->file); - if(ferror(stdio->file)) { - upb_seterr(&stdio->bytesink.status, UPB_ERROR, - "Error writing to stdio stream."); - return 0; - } + upb_seterr(status, UPB_ERROR, "Error writing to stdio stream."); + return -1; + } + return written; +} + +upb_strlen_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status, + const char *fmt, va_list args) { + upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); + upb_strlen_t written = vfprintf(stdio->file, fmt, args); + if (written < 0) { + upb_seterr(status, UPB_ERROR, "Error writing to stdio stream."); + return -1; } return written; } @@ -72,13 +84,15 @@ upb_stdio *upb_stdio_new() { upb_stdio_getstr, }; - //static upb_bytesink_vtbl bytesink_vtbl = { - // upb_stdio_put - //}; + static upb_bytesink_vtbl bytesink_vtbl = { + NULL, + upb_stdio_putstr, + upb_stdio_vprintf + }; upb_stdio *stdio = malloc(sizeof(*stdio)); upb_bytesrc_init(&stdio->bytesrc, &bytesrc_vtbl); - //upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); + upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); return stdio; } diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 7025494..531da12 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -90,7 +90,7 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, case UPB_TYPE(STRING): case UPB_TYPE(BYTES): // TODO: escaping. - CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(": \""), &p->status)); + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); CHECK(upb_bytesink_putstr(p->bytesink, upb_value_getstr(val), &p->status)) CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); break; @@ -107,7 +107,7 @@ static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f, upb_textprinter *p = _p; upb_textprinter_startfield(p, f); p->indent_depth++; - upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" {"), &p->status); + upb_bytesink_putstr(p->bytesink, UPB_STRLIT("{"), &p->status); if(!p->single_line) upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status); return UPB_CONTINUE; } diff --git a/tests/test_decoder.c b/tests/test_decoder.c index 0e6f19c..ed5a77e 100644 --- a/tests/test_decoder.c +++ b/tests/test_decoder.c @@ -16,13 +16,19 @@ int main() { upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds)); upb_decoder_reset(d, upb_stdio_bytesrc(in)); upb_textprinter *p = upb_textprinter_new(); - upb_textprinter_reset(p, upb_stdio_bytesink(out), false); + upb_handlers handlers; + upb_handlers_init(&handlers); + upb_textprinter_reset(p, &handlers, upb_stdio_bytesink(out), false); + upb_src *src = upb_decoder_src(d); + upb_src_sethandlers(src, &handlers); upb_status status = UPB_STATUS_INIT; - upb_streamdata(upb_decoder_src(d), upb_textprinter_sink(p), &status); + upb_src_run(src, &status); + upb_printerr(&status); assert(upb_ok(&status)); + upb_stdio_free(in); upb_stdio_free(out); upb_decoder_free(d); -- cgit v1.2.3 From 9aa7e559d634a3ecf087ee376f82704e2290f478 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 30 Jan 2011 16:28:37 -0800 Subject: Fixes to decoder and textprinter: it works (for some input)! A protobuf -> text stream for descriptor.proto now outputs the same text as proto2. --- stream/upb_decoder.c | 98 ++++++++++++++++++++++++------------------------ stream/upb_textprinter.c | 19 ++++------ tests/test_decoder.c | 8 +++- 3 files changed, 62 insertions(+), 63 deletions(-) (limited to 'tests') diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index a7a2c76..3a279a1 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -51,7 +51,6 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } // upb_decoder_frame is one frame of that stack. typedef struct { upb_msgdef *msgdef; - upb_fielddef *field; size_t end_offset; // For groups, 0. } upb_decoder_frame; @@ -82,29 +81,37 @@ typedef struct { // Our current position in the data buffer. const char *ptr; - // Number of bytes available at ptr, until either end-of-buf or - // end-of-submessage (whichever is smaller). + // End of this submessage, relative to *ptr. + const char *submsg_end; + + // Number of bytes available at ptr. size_t len; // Msgdef for the current level. upb_msgdef *msgdef; } upb_dstate; +// Constant used to signal that the submessage is a group and therefore we +// don't know its end offset. This cannot be the offset of a real submessage +// end because it takes at least one byte to begin a submessage. +#define UPB_GROUP_END_OFFSET 0 +#define UPB_MAX_VARINT_ENCODED_SIZE 10 + INLINE void upb_dstate_advance(upb_dstate *s, size_t len) { s->ptr += len; s->len -= len; } -static upb_flow_t upb_pop(upb_decoder *d); +INLINE void upb_dstate_setmsgend(upb_decoder *d, upb_dstate *s) { + s->submsg_end = (d->top->end_offset == UPB_GROUP_END_OFFSET) ? + (void*)UINTPTR_MAX : + upb_string_getrobuf(d->buf) + (d->top->end_offset - d->buf_stream_offset); +} -// Constant used to signal that the submessage is a group and therefore we -// don't know its end offset. This cannot be the offset of a real submessage -// end because it takes at least one byte to begin a submessage. -#define UPB_GROUP_END_OFFSET 0 -#define UPB_MAX_VARINT_ENCODED_SIZE 10 +static upb_flow_t upb_pop(upb_decoder *d, upb_dstate *s); // Called only from the slow path, this function copies the next "len" bytes -// from the stream to "data", adjusting "buf" and "len" appropriately. +// from the stream to "data", adjusting the dstate appropriately. static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, upb_dstate *s) { while (1) { @@ -112,41 +119,17 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, memcpy(data, s->ptr, to_copy); upb_dstate_advance(s, to_copy); bytes_wanted -= to_copy; - if (bytes_wanted == 0) return true; - - // Did "len" indicate end-of-submessage or end-of-buffer? - ssize_t buf_offset = - d->buf ? ((const char*)s->ptr - upb_string_getrobuf(d->buf)) : 0; - if (d->top->end_offset > 0 && - d->top->end_offset == d->buf_stream_offset + buf_offset) { - // End-of-submessage. - if (bytes_wanted > 0) { - upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); - return false; - } - if (upb_pop(d) != UPB_CONTINUE) return false; - } else { - // End-of-buffer. - if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); - upb_string_recycle(&d->buf); - if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; - s->ptr = upb_string_getrobuf(d->buf); + if (bytes_wanted == 0) { + upb_dstate_setmsgend(d, s); + return true; } - // Wait for end-of-submessage or end-of-buffer, whichever comes first. - size_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf); - size_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr; - size_t submsg_remaining = - d->top->end_offset - d->buf_stream_offset - offset_in_buf; - if (d->top->end_offset == UPB_GROUP_END_OFFSET || - buf_remaining < submsg_remaining) { - s->len = buf_remaining; - } else { - // Check that non of our subtraction overflowed. - assert(d->top->end_offset > d->buf_stream_offset); - assert(d->top->end_offset - d->buf_stream_offset > offset_in_buf); - s->len = submsg_remaining; - } + // Get next buffer. + if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); + upb_string_recycle(&d->buf); + if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; + s->ptr = upb_string_getrobuf(d->buf); + s->len = upb_string_len(d->buf); } } @@ -266,7 +249,6 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) { static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, upb_value submsg_len, upb_fieldtype_t type) { - d->top->field = f; d->top++; if(d->top >= d->limit) { upb_seterr(d->status, UPB_ERROR, "Nesting too deep."); @@ -274,13 +256,16 @@ static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, } d->top->end_offset = (type == UPB_TYPE(GROUP)) ? UPB_GROUP_END_OFFSET : - d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + upb_value_getint32(submsg_len); + d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + + upb_value_getint32(submsg_len); d->top->msgdef = upb_downcast_msgdef(f->def); + upb_dstate_setmsgend(d, s); return upb_dispatch_startsubmsg(&d->dispatcher, f); } -static upb_flow_t upb_pop(upb_decoder *d) { +static upb_flow_t upb_pop(upb_decoder *d, upb_dstate *s) { d->top--; + upb_dstate_setmsgend(d, s); return upb_dispatch_endsubmsg(&d->dispatcher); } @@ -290,7 +275,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // We put our dstate on the stack so the compiler knows they can't be changed // by external code (like when we dispatch a callback). We must be sure not // to let its address escape this source file. - upb_dstate state = {NULL, 0, d->top->msgdef}; + upb_dstate state = {NULL, (void*)0x1, 0, d->top->msgdef}; upb_string *str = NULL; // TODO: handle UPB_SKIPSUBMSG @@ -301,6 +286,15 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // Main loop: executed once per tag/field pair. while(1) { + // Check for end-of-submessage. + while (state.ptr >= state.submsg_end) { + if (state.ptr > state.submsg_end) { + upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); + goto err; + } + CHECK_FLOW(upb_pop(d, &state)); + } + // Parse/handle tag. upb_tag tag; if (!upb_decode_tag(d, &state, &tag)) { @@ -308,6 +302,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // Normal end-of-file. upb_clearerr(status); CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); + upb_string_unref(str); return; } else { if (status->code == UPB_EOF) { @@ -322,12 +317,14 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // since most types will read a varint here. upb_value val; switch (tag.wire_type) { + case UPB_WIRE_TYPE_START_GROUP: + break; // Nothing to do now, below we will push appropriately. case UPB_WIRE_TYPE_END_GROUP: if(d->top->end_offset != UPB_GROUP_END_OFFSET) { upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } - CHECK_FLOW(upb_pop(d)); + CHECK_FLOW(upb_pop(d, &state)); continue; // We have no value to dispatch. case UPB_WIRE_TYPE_VARINT: case UPB_WIRE_TYPE_DELIMITED: @@ -383,6 +380,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { } err: + upb_string_unref(str); if (upb_ok(status)) { upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK"); } @@ -417,12 +415,14 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { d->bytesrc = bytesrc; d->top = &d->stack[0]; d->top->msgdef = d->toplevel_msgdef; - d->top->end_offset = SIZE_MAX; // never want to end top-level message. + // Never want to end top-level message, so treat it like a group. + d->top->end_offset = UPB_GROUP_END_OFFSET; upb_string_unref(d->buf); d->buf = NULL; } void upb_decoder_free(upb_decoder *d) { + upb_string_unref(d->buf); free(d); } diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 531da12..894a1ea 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -30,14 +30,6 @@ err: return -1; } -static int upb_textprinter_startfield(upb_textprinter *p, upb_fielddef *f) { - upb_textprinter_indent(p); - CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name))); - return 0; -err: - return -1; -} - static int upb_textprinter_endfield(upb_textprinter *p) { if(p->single_line) { CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); @@ -52,7 +44,8 @@ err: static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, upb_value val) { upb_textprinter *p = _p; - upb_textprinter_startfield(p, f); + upb_textprinter_indent(p); + CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name))); #define CASE(fmtstr, member) \ CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break; switch(f->type) { @@ -105,11 +98,13 @@ static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f, upb_handlers *delegate_to) { (void)delegate_to; upb_textprinter *p = _p; - upb_textprinter_startfield(p, f); - p->indent_depth++; - upb_bytesink_putstr(p->bytesink, UPB_STRLIT("{"), &p->status); + upb_textprinter_indent(p); + CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT " {", UPB_STRARG(f->name))); if(!p->single_line) upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status); + p->indent_depth++; return UPB_CONTINUE; +err: + return UPB_BREAK; } static upb_flow_t upb_textprinter_endsubmsg(void *_p) diff --git a/tests/test_decoder.c b/tests/test_decoder.c index ed5a77e..f48472d 100644 --- a/tests/test_decoder.c +++ b/tests/test_decoder.c @@ -25,14 +25,18 @@ int main() { upb_status status = UPB_STATUS_INIT; upb_src_run(src, &status); - upb_printerr(&status); assert(upb_ok(&status)); - + upb_status_uninit(&status); upb_stdio_free(in); upb_stdio_free(out); upb_decoder_free(d); upb_textprinter_free(p); upb_def_unref(fds); upb_symtab_unref(symtab); + + // Prevent C library from holding buffers open, so Valgrind doesn't see + // memory leaks. + fclose(stdin); + fclose(stdout); } -- cgit v1.2.3