From 28ec9a1fa0f9b1d741920dfa8afc91fa2532c43d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Jul 2010 20:20:33 -0700 Subject: Split src/ into core/ and stream/. --- core/upb_string.h | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 core/upb_string.h (limited to 'core/upb_string.h') diff --git a/core/upb_string.h b/core/upb_string.h new file mode 100644 index 0000000..770dba7 --- /dev/null +++ b/core/upb_string.h @@ -0,0 +1,194 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + * This file defines a simple string type. The overriding goal of upb_string + * is to avoid memcpy(), malloc(), and free() wheverever possible, while + * keeping both CPU and memory overhead low. Throughout upb there are + * situations where one wants to reference all or part of another string + * without copying. upb_string provides APIs for doing this. + * + * Characteristics of upb_string: + * - strings are reference-counted. + * - strings are logically immutable. + * - if a string has no other referents, it can be "recycled" into a new string + * without having to reallocate the upb_string. + * - strings can be substrings of other strings (owning a ref on the source + * string). + * - strings can refer to memory that they do not own, in which case we avoid + * copies if possible (the exact strategy for doing this can vary). + * - strings are not thread-safe by default, but can be made so by calling a + * function. This is not the default because it causes extra CPU overhead. + */ + +#ifndef UPB_STRING_H +#define UPB_STRING_H + +#include +#include +#include "upb_atomic.h" +#include "upb.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// All members of this struct are private, and may only be read/written through +// the associated functions. Also, strings may *only* be allocated on the heap. +struct _upb_string { + char *ptr; + int32_t len; + uint32_t size; + upb_atomic_refcount_t refcount; + union { + // Used if this is a slice of another string. + struct _upb_string *src; + // Used if this string is referencing external unowned memory. + upb_atomic_refcount_t reader_count; + } extra; +}; + +// Returns a newly-created, empty, non-finalized string. When the string is no +// longer needed, it should be unref'd, never freed directly. +upb_string *upb_string_new(); + +void _upb_string_free(upb_string *str); + +// Releases a ref on the given string, which may free the memory. "str" +// can be NULL, in which case this is a no-op. +INLINE void upb_string_unref(upb_string *str) { + if (str && upb_atomic_unref(&str->refcount)) _upb_string_free(str); +} + +// Returns a string with the same contents as "str". The caller owns a ref on +// the returned string, which may or may not be the same object as "str. +INLINE upb_string *upb_string_getref(upb_string *str) { + // If/when we support stack-allocated strings, this will have to allocate + // a new string if the given string is on the stack. + upb_atomic_ref(&str->refcount); + return str; +} + +// Returns the length of the string. +INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } + +// Use to read the bytes of the string. The caller *must* call +// upb_string_endread() after the data has been read. The window between +// upb_string_getrobuf() and upb_string_endread() should be kept as short as +// possible, because any pending upb_string_detach() may be blocked until +// upb_string_endread is called(). No other functions may be called on the +// string during this window except upb_string_len(). +INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } +INLINE void upb_string_endread(upb_string *str) { (void)str; } + +// Attempts to recycle the string "str" so it may be reused and have different +// data written to it. The returned string is either "str" if it could be +// recycled or a newly created string if "str" has other references. +// +// As a special case, passing NULL will allocate a new string. This is +// convenient for the pattern: +// +// upb_string *str = NULL; +// while (x) { +// if (y) { +// str = upb_string_tryrecycle(str); +// upb_src_getstr(str); +// } +// } +upb_string *upb_string_tryrecycle(upb_string *str); + +// The three options for setting the contents of a string. These may only be +// called when a string is first created or recycled; once other functions have +// been called on the string, these functions are not allowed until the string +// is recycled. + +// Gets a pointer suitable for writing to the string, which is guaranteed to +// have at least "len" bytes of data available. The size of the string will +// become "len". +char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); + +// Sets the contents of "str" to be the given substring of "target_str", to +// which the caller must own a ref. +void upb_string_substr(upb_string *str, upb_string *target_str, + upb_strlen_t start, upb_strlen_t len); + +// Makes the string "str" a reference to the given string data. The caller +// guarantees that the given string data will not change or be deleted until +// a matching call to upb_string_detach(). +void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); +void upb_string_detach(upb_string *str); + +// Allows using upb_strings in printf, ie: +// upb_strptr str = UPB_STRLIT("Hello, World!\n"); +// printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */ +#define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str) +#define UPB_STRFMT "%.*s" + +/* upb_string library functions ***********************************************/ + +// Named like their counterparts, these are all safe against buffer +// overflow. These only use the public upb_string interface. + +// More efficient than upb_strcmp if all you need is to test equality. +INLINE bool upb_streql(upb_string *s1, upb_string *s2) { + upb_strlen_t len = upb_string_len(s1); + if(len != upb_string_len(s2)) { + return false; + } else { + bool ret = + memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0; + upb_string_endread(s1); + upb_string_endread(s2); + return ret; + } +} + +// Like strcmp(). +int upb_strcmp(upb_string *s1, upb_string *s2); + +// Like upb_strcpy, but copies from a buffer and length. +INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) { + memcpy(upb_string_getrwbuf(dest, len), src, len); +} + +// Replaces the contents of "dest" with the contents of "src". +INLINE void upb_strcpy(upb_string *dest, upb_string *src) { + upb_strcpylen(dest, upb_string_getrobuf(src), upb_string_len(src)); + upb_string_endread(src); +} + +// Like upb_strcpy, but copies from a NULL-terminated string. +INLINE void upb_strcpyc(upb_string *dest, const char *src) { + // This does two passes over src, but that is necessary unless we want to + // repeatedly re-allocate dst, which seems worse. + upb_strcpylen(dest, src, strlen(src)); +} + +// Returns a new string whose contents are a copy of s. +upb_string *upb_strdup(upb_string *s); + +// Like upb_strdup(), but duplicates a given buffer and length. +INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) { + upb_string *s = upb_string_new(); + upb_strcpylen(s, src, len); + return s; +} + +// Like upb_strdup(), but duplicates a C NULL-terminated string. +upb_string *upb_strdupc(const char *src); + +// Appends 'append' to 's' in-place, resizing s if necessary. +void upb_strcat(upb_string *s, upb_string *append); + +// Returns a new string that is a substring of the given string. +upb_string *upb_strslice(upb_string *s, int offset, int len); + +// Reads an entire file into a newly-allocated string. +upb_string *upb_strreadfile(const char *filename); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif -- cgit v1.2.3 From e29bf964d1716398e8354a50f506906a307298e5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 12:15:31 -0700 Subject: Tests for string and fleshed out implementation. --- Makefile | 15 ++++++++----- core/upb_string.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++------- core/upb_string.h | 40 ++++++++++++++++++++++++---------- tests/test_string.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++ tests/test_table.cc | 13 ++++++++++- 5 files changed, 161 insertions(+), 26 deletions(-) create mode 100644 tests/test_string.c (limited to 'core/upb_string.h') diff --git a/Makefile b/Makefile index ca4f940..1f977b4 100644 --- a/Makefile +++ b/Makefile @@ -86,22 +86,25 @@ tests/test.proto.pb: tests/test.proto # TODO: replace with upbc protoc tests/test.proto -otests/test.proto.pb -TESTS=tests/tests \ +TESTS=tests/test_string \ + tests/test_table +tests: $(TESTS) + +OTHER_TESTS=tests/tests \ tests/test_table \ tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2 \ tests/test.proto.pb $(TESTS): core/libupb.a -#VALGRIND=valgrind --leak-check=full --error-exitcode=1 -VALGRIND= +VALGRIND=valgrind --leak-check=full --error-exitcode=1 +#VALGRIND= test: tests @echo Running all tests under valgrind. - $(VALGRIND) ./tests/tests # Needs to be rewritten to separate the benchmark. # valgrind --error-exitcode=1 ./tests/test_table - @for test in tests/t.* ; do \ - if [ -f ./$$test ] ; then \ + @for test in tests/*; do \ + if [ -x ./$$test ] ; then \ echo $(VALGRIND) ./$$test: \\c; \ $(VALGRIND) ./$$test; \ fi \ diff --git a/core/upb_string.c b/core/upb_string.c index 91ab9ae..f9af9e9 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -7,8 +7,11 @@ #include "upb_string.h" #include - -#define UPB_STRING_UNFINALIZED -1 +#ifdef __GLIBC__ +#include +#elif defined(__APPLE__) +#include +#endif static uint32_t upb_round_up_pow2(uint32_t v) { // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 @@ -25,23 +28,67 @@ static uint32_t upb_round_up_pow2(uint32_t v) { upb_string *upb_string_new() { upb_string *str = malloc(sizeof(*str)); str->ptr = NULL; + str->cached_mem = NULL; +#ifndef UPB_HAVE_MSIZE str->size = 0; - str->len = UPB_STRING_UNFINALIZED; +#endif + str->src = NULL; upb_atomic_refcount_init(&str->refcount, 1); return str; } +uint32_t upb_string_size(upb_string *str) { +#ifdef __GLIBC__ + return malloc_usable_size(str->cached_mem); +#elif defined(__APPLE__) + return malloc_size(str->cached_mem); +#else + return str->size; +#endif +} + +static void upb_string_release(upb_string *str) { + if(str->src) { + upb_string_unref(str->src); + str->src = NULL; + } +} + void _upb_string_free(upb_string *str) { - if(str->ptr) free(str->ptr); + if(str->cached_mem) free(str->cached_mem); + upb_string_release(str); free(str); } +upb_string *upb_string_tryrecycle(upb_string *str) { + if(str == NULL || upb_atomic_read(&str->refcount) > 1) { + return upb_string_new(); + } else { + str->ptr = NULL; + upb_string_release(str); + return str; + } +} + char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { - assert(str->len == UPB_STRING_UNFINALIZED); - if (str->size < len) { - str->size = upb_round_up_pow2(len); - str->ptr = realloc(str->ptr, str->size); + assert(str->ptr == NULL); + uint32_t size = upb_string_size(str); + if (size < len) { + size = upb_round_up_pow2(len); + str->cached_mem = realloc(str->cached_mem, size); +#ifndef UPB_HAVE_MSIZE + str->size = size; +#endif } str->len = len; + str->ptr = str->cached_mem; return str->ptr; } + +void upb_string_substr(upb_string *str, upb_string *target_str, + upb_strlen_t start, upb_strlen_t len) { + assert(str->ptr == NULL); + str->src = upb_string_getref(target_str); + str->ptr = upb_string_getrobuf(target_str) + start; + str->len = len; +} diff --git a/core/upb_string.h b/core/upb_string.h index 770dba7..7ec3d48 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -16,8 +16,6 @@ * without having to reallocate the upb_string. * - strings can be substrings of other strings (owning a ref on the source * string). - * - strings can refer to memory that they do not own, in which case we avoid - * copies if possible (the exact strategy for doing this can vary). * - strings are not thread-safe by default, but can be made so by calling a * function. This is not the default because it causes extra CPU overhead. */ @@ -37,16 +35,31 @@ extern "C" { // All members of this struct are private, and may only be read/written through // the associated functions. Also, strings may *only* be allocated on the heap. struct _upb_string { + // The pointer to our currently active data. This may be memory we own + // or a pointer into memory we don't own. char *ptr; + + // If non-NULL, this is a block of memory we own. We keep this cached even + // if "ptr" is currently aliasing memory we don't own. + char *cached_mem; + + // The effective length of the string (the bytes at ptr). int32_t len; +#ifndef UPB_HAVE_MSIZE + // How many bytes are allocated in cached_mem. + // + // Many platforms have a function that can tell you the size of a block + // that was previously malloc'd. In this case we can avoid storing the + // size explicitly. uint32_t size; +#endif + + // The string's refcount. upb_atomic_refcount_t refcount; - union { - // Used if this is a slice of another string. - struct _upb_string *src; - // Used if this string is referencing external unowned memory. - upb_atomic_refcount_t reader_count; - } extra; + + // Used if this is a slice of another string, NULL otherwise. We own a ref + // on src. + struct _upb_string *src; }; // Returns a newly-created, empty, non-finalized string. When the string is no @@ -113,11 +126,14 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); void upb_string_substr(upb_string *str, upb_string *target_str, upb_strlen_t start, upb_strlen_t len); +// Sketch of an API for allowing upb_strings to reference external, unowned +// data. Waiting for a clear use case before actually implementing it. +// // Makes the string "str" a reference to the given string data. The caller // guarantees that the given string data will not change or be deleted until // a matching call to upb_string_detach(). -void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); -void upb_string_detach(upb_string *str); +// void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); +// void upb_string_detach(upb_string *str); // Allows using upb_strings in printf, ie: // upb_strptr str = UPB_STRLIT("Hello, World!\n"); @@ -176,7 +192,9 @@ INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) { } // Like upb_strdup(), but duplicates a C NULL-terminated string. -upb_string *upb_strdupc(const char *src); +INLINE upb_string *upb_strdupc(const char *src) { + return upb_strduplen(src, strlen(src)); +} // Appends 'append' to 's' in-place, resizing s if necessary. void upb_strcat(upb_string *s, upb_string *append); diff --git a/tests/test_string.c b/tests/test_string.c new file mode 100644 index 0000000..4fdab6c --- /dev/null +++ b/tests/test_string.c @@ -0,0 +1,56 @@ + +#undef NDEBUG /* ensure tests always assert. */ +#include "upb_string.h" + +char static_str[] = "Static string."; + +int main() { + upb_string *str = upb_string_new(); + assert(str != NULL); + upb_string_unref(str); + + // Can also create a string by tryrecycle(NULL). + str = upb_string_tryrecycle(NULL); + assert(str != NULL); + + upb_strcpyc(str, static_str); + assert(upb_string_len(str) == (sizeof(static_str) - 1)); + const char *robuf = upb_string_getrobuf(str); + assert(robuf != NULL); + assert(memcmp(robuf, static_str, upb_string_len(str)) == 0); + upb_string_endread(str); + + upb_string *str2 = upb_string_tryrecycle(str); + // No other referents, so should return the same string. + assert(str2 == str); + + // Write a shorter string, the same memory should be reused. + upb_strcpyc(str, "XX"); + const char *robuf2 = upb_string_getrobuf(str); + assert(robuf2 == robuf); + assert(memcmp(robuf2, "XX", 2) == 0); + + // Make string alias part of another string. + str2 = upb_strdupc("WXYZ"); + upb_string_substr(str, str2, 1, 2); + assert(upb_string_len(str) == 2); + assert(upb_string_len(str2) == 4); + // The two string should be aliasing the same data. + const char *robuf3 = upb_string_getrobuf(str); + const char *robuf4 = upb_string_getrobuf(str2); + assert(robuf3 == robuf4 + 1); + // The aliased string should have an extra ref. + assert(upb_atomic_read(&str2->refcount) == 2); + + // Recycling str should eliminate the extra ref. + str = upb_string_tryrecycle(str); + assert(upb_atomic_read(&str2->refcount) == 1); + + // Resetting str should reuse its old data. + upb_strcpyc(str, "XX"); + const char *robuf5 = upb_string_getrobuf(str); + assert(robuf5 == robuf); + + upb_string_unref(str); + upb_string_unref(str2); +} diff --git a/tests/test_table.cc b/tests/test_table.cc index 37e14a8..47d5e57 100644 --- a/tests/test_table.cc +++ b/tests/test_table.cc @@ -12,6 +12,8 @@ #include #include +bool benchmark = false; + using std::string; using std::vector; @@ -116,6 +118,11 @@ void test_inttable(int32_t *keys, size_t num_entries) } } + if(!benchmark) { + upb_inttable_free(&table); + return; + } + /* Test performance. We only test lookups for keys that are known to exist. */ uintptr_t x = 0; const unsigned int iterations = 0xFFFFFF; @@ -219,8 +226,12 @@ int32_t *get_contiguous_keys(int32_t num) return buf; } -int main() +int main(int argc, char *argv[]) { + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--benchmark") == 0) benchmark = true; + } + vector keys; keys.push_back("google.protobuf.FileDescriptorSet"); keys.push_back("google.protobuf.FileDescriptorProto"); -- cgit v1.2.3 From 2ef013126c682a44d15554ea7a04144fc9a10fed Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 13:28:47 -0700 Subject: Fleshed out upb_string further. Now upb_def's only unresolved references are upb_src. --- Makefile | 3 ++- core/upb_def.c | 11 +++++------ core/upb_string.c | 30 +++++++++++++++++++++++++++++- core/upb_string.h | 46 +++++++++++++++++++++++++++++++++++++--------- tests/test_string.c | 17 +++++++++++++++-- 5 files changed, 88 insertions(+), 19 deletions(-) (limited to 'core/upb_string.h') diff --git a/Makefile b/Makefile index 1f977b4..2abe0c7 100644 --- a/Makefile +++ b/Makefile @@ -87,7 +87,8 @@ tests/test.proto.pb: tests/test.proto protoc tests/test.proto -otests/test.proto.pb TESTS=tests/test_string \ - tests/test_table + tests/test_table \ + tests/test_def tests: $(TESTS) OTHER_TESTS=tests/tests \ diff --git a/core/upb_def.c b/core/upb_def.c index bfab738..1f57c70 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -44,13 +44,12 @@ static void upb_deflist_push(upb_deflist *l, upb_def *d) { * join("", "Baz") -> "Baz" * Caller owns a ref on the returned string. */ static upb_string *upb_join(upb_string *base, upb_string *name) { - upb_string *joined = upb_strdup(base); - upb_strlen_t len = upb_string_len(joined); - if(len > 0) { - upb_string_getrwbuf(joined, len + 1)[len] = UPB_SYMBOL_SEPARATOR; + if (upb_string_len(base) == 0) { + return upb_string_getref(name); + } else { + return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT, + UPB_STRARG(base), UPB_STRARG(name)); } - upb_strcat(joined, name); - return joined; } // Qualify the defname for all defs starting with offset "start" with "str". diff --git a/core/upb_string.c b/core/upb_string.c index f9af9e9..2f487aa 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -82,7 +82,7 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { } str->len = len; str->ptr = str->cached_mem; - return str->ptr; + return str->cached_mem; } void upb_string_substr(upb_string *str, upb_string *target_str, @@ -92,3 +92,31 @@ void upb_string_substr(upb_string *str, upb_string *target_str, str->ptr = upb_string_getrobuf(target_str) + start; str->len = len; } + +void upb_string_vprintf(upb_string *str, const char *format, va_list args) { + // Try once without reallocating. We have to va_copy because we might have + // to call vsnprintf again. + uint32_t size = UPB_MAX(upb_string_size(str), 16); + char *buf = upb_string_getrwbuf(str, size); + va_list args_copy; + va_copy(args_copy, args); + uint32_t true_size = vsnprintf(buf, size, format, args_copy); + va_end(args_copy); + + if (true_size > size) { + // Need to reallocate. + str = upb_string_tryrecycle(str); + buf = upb_string_getrwbuf(str, true_size); + vsnprintf(buf, true_size, format, args); + } + str->len = true_size; +} + +upb_string *upb_string_asprintf(const char *format, ...) { + upb_string *str = upb_string_new(); + va_list args; + va_start(args, format); + upb_string_vprintf(str, format, args); + va_end(args); + return str; +} diff --git a/core/upb_string.h b/core/upb_string.h index 7ec3d48..5cc0eaf 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -25,6 +25,7 @@ #include #include +#include #include "upb_atomic.h" #include "upb.h" @@ -37,7 +38,7 @@ extern "C" { struct _upb_string { // The pointer to our currently active data. This may be memory we own // or a pointer into memory we don't own. - char *ptr; + const char *ptr; // If non-NULL, this is a block of memory we own. We keep this cached even // if "ptr" is currently aliasing memory we don't own. @@ -111,16 +112,25 @@ INLINE void upb_string_endread(upb_string *str) { (void)str; } // } upb_string *upb_string_tryrecycle(upb_string *str); -// The three options for setting the contents of a string. These may only be -// called when a string is first created or recycled; once other functions have -// been called on the string, these functions are not allowed until the string -// is recycled. +// The options for setting the contents of a string. These may only be called +// when a string is first created or recycled; once other functions have been +// called on the string, these functions are not allowed until the string is +// recycled. // Gets a pointer suitable for writing to the string, which is guaranteed to // have at least "len" bytes of data available. The size of the string will // become "len". char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); +// Replaces the contents of str with the contents of the given printf. +void upb_string_vprintf(upb_string *str, const char *format, va_list args); +INLINE void upb_string_printf(upb_string *str, const char *format, ...) { + va_list args; + va_start(args, format); + upb_string_vprintf(str, format, args); + va_end(args); +} + // Sets the contents of "str" to be the given substring of "target_str", to // which the caller must own a ref. void upb_string_substr(upb_string *str, upb_string *target_str, @@ -144,7 +154,7 @@ void upb_string_substr(upb_string *str, upb_string *target_str, /* upb_string library functions ***********************************************/ // Named like their counterparts, these are all safe against buffer -// overflow. These only use the public upb_string interface. +// overflow. For the most part these only use the public upb_string interface. // More efficient than upb_strcmp if all you need is to test equality. INLINE bool upb_streql(upb_string *s1, upb_string *s2) { @@ -163,6 +173,17 @@ INLINE bool upb_streql(upb_string *s1, upb_string *s2) { // Like strcmp(). int upb_strcmp(upb_string *s1, upb_string *s2); +// Compare a upb_string with memory or a NULL-terminated C string. +INLINE bool upb_streqllen(upb_string *str, const void *buf, upb_strlen_t len) { + return len == upb_string_len(str) && + memcmp(upb_string_getrobuf(str), buf, len) == 0; +} + +INLINE bool upb_streqlc(upb_string *str, const void *buf) { + // Could be made one-pass. + return upb_streqllen(str, buf, strlen((const char*)buf)); +} + // Like upb_strcpy, but copies from a buffer and length. INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) { memcpy(upb_string_getrwbuf(dest, len), src, len); @@ -175,10 +196,10 @@ INLINE void upb_strcpy(upb_string *dest, upb_string *src) { } // Like upb_strcpy, but copies from a NULL-terminated string. -INLINE void upb_strcpyc(upb_string *dest, const char *src) { +INLINE void upb_strcpyc(upb_string *dest, const void *src) { // This does two passes over src, but that is necessary unless we want to // repeatedly re-allocate dst, which seems worse. - upb_strcpylen(dest, src, strlen(src)); + upb_strcpylen(dest, src, strlen((const char*)src)); } // Returns a new string whose contents are a copy of s. @@ -200,11 +221,18 @@ INLINE upb_string *upb_strdupc(const char *src) { void upb_strcat(upb_string *s, upb_string *append); // Returns a new string that is a substring of the given string. -upb_string *upb_strslice(upb_string *s, int offset, int len); +INLINE upb_string *upb_strslice(upb_string *s, int offset, int len) { + upb_string *str = upb_string_new(); + upb_string_substr(str, s, offset, len); + return str; +} // Reads an entire file into a newly-allocated string. upb_string *upb_strreadfile(const char *filename); +// Returns a new string with the contents of the given printf. +upb_string *upb_string_asprintf(const char *format, ...); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tests/test_string.c b/tests/test_string.c index 4fdab6c..5e6e2a9 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -17,7 +17,7 @@ int main() { assert(upb_string_len(str) == (sizeof(static_str) - 1)); const char *robuf = upb_string_getrobuf(str); assert(robuf != NULL); - assert(memcmp(robuf, static_str, upb_string_len(str)) == 0); + assert(upb_streqlc(str, static_str)); upb_string_endread(str); upb_string *str2 = upb_string_tryrecycle(str); @@ -28,7 +28,7 @@ int main() { upb_strcpyc(str, "XX"); const char *robuf2 = upb_string_getrobuf(str); assert(robuf2 == robuf); - assert(memcmp(robuf2, "XX", 2) == 0); + assert(upb_streqlc(str, "XX")); // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); @@ -51,6 +51,19 @@ int main() { const char *robuf5 = upb_string_getrobuf(str); assert(robuf5 == robuf); + // Resetting str to something very long should require new data to be + // allocated. + str = upb_string_tryrecycle(str); + const char longstring[] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; + upb_strcpyc(str, longstring); + const char *robuf6 = upb_string_getrobuf(str); + assert(robuf6 != robuf); + assert(upb_streqlc(str, longstring)); + + // Test printf. + str = upb_string_tryrecycle(str); + upb_string_printf(str, "Number: %d, String: %s", 5, "YO!"); + upb_string_unref(str); upb_string_unref(str2); } -- cgit v1.2.3 From 7a6a702792e769366a8852fc90dbea9cfc9e01c0 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 11 Jul 2010 18:53:27 -0700 Subject: Allow static upb_strings. This can allow strings to reference static data, and reduced the memory footprint of test_def by about 10% (3k). --- core/upb_def.c | 4 +--- core/upb_string.c | 12 ++++++++--- core/upb_string.h | 57 +++++++++++++++++++++++++++++++++++++++++++++---- descriptor/descriptor.c | 7 ++++-- descriptor/descriptor.h | 5 +++-- tests/test_string.c | 33 +++++++++++++++++++++++++++- 6 files changed, 103 insertions(+), 15 deletions(-) (limited to 'core/upb_string.h') diff --git a/core/upb_def.c b/core/upb_def.c index b9402c5..c0d72db 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -1018,12 +1018,10 @@ static upb_src *upb_baredecoder_src(upb_baredecoder *d) void upb_symtab_add_descriptorproto(upb_symtab *symtab) { // TODO: allow upb_strings to be static or on the stack. - upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); - upb_baredecoder *decoder = upb_baredecoder_new(descriptor); + upb_baredecoder *decoder = upb_baredecoder_new(&descriptor_str); upb_status status = UPB_STATUS_INIT; upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status); upb_baredecoder_free(decoder); - upb_string_unref(descriptor); if(!upb_ok(&status)) { // upb itself is corrupt. diff --git a/core/upb_string.c b/core/upb_string.c index 3563c9e..ca3c669 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -61,12 +61,12 @@ void _upb_string_free(upb_string *str) { } upb_string *upb_string_tryrecycle(upb_string *str) { - if(str == NULL || upb_atomic_read(&str->refcount) > 1) { - return upb_string_new(); - } else { + if(str && upb_atomic_read(&str->refcount) == 1) { str->ptr = NULL; upb_string_release(str); return str; + } else { + return upb_string_new(); } } @@ -125,3 +125,9 @@ upb_string *upb_string_asprintf(const char *format, ...) { va_end(args); return str; } + +upb_string *upb_strdup(upb_string *s) { + upb_string *str = upb_string_new(); + upb_strcpy(str, s); + return str; +} diff --git a/core/upb_string.h b/core/upb_string.h index 5cc0eaf..65ba404 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -63,6 +63,17 @@ struct _upb_string { struct _upb_string *src; }; +// Internal-only initializer for upb_string instances. +#ifdef UPB_HAVE_MSIZE +#define _UPB_STRING_INIT(str, len, refcount) {(char*)str, NULL, len, {refcount}, NULL} +#else +#define _UPB_STRING_INIT(str, len, refcount) {(char*)str, NULL, len, 0, {refcount}, NULL} +#endif + +// Special pseudo-refcounts for static/stack-allocated strings, respectively. +#define _UPB_STRING_REFCOUNT_STATIC -1 +#define _UPB_STRING_REFCOUNT_STACK -2 + // Returns a newly-created, empty, non-finalized string. When the string is no // longer needed, it should be unref'd, never freed directly. upb_string *upb_string_new(); @@ -72,15 +83,21 @@ void _upb_string_free(upb_string *str); // Releases a ref on the given string, which may free the memory. "str" // can be NULL, in which case this is a no-op. INLINE void upb_string_unref(upb_string *str) { - if (str && upb_atomic_unref(&str->refcount)) _upb_string_free(str); + if (str && upb_atomic_read(&str->refcount) > 0 && + upb_atomic_unref(&str->refcount)) { + _upb_string_free(str); + } } +upb_string *upb_strdup(upb_string *s); // Forward-declare. + // Returns a string with the same contents as "str". The caller owns a ref on // the returned string, which may or may not be the same object as "str. INLINE upb_string *upb_string_getref(upb_string *str) { - // If/when we support stack-allocated strings, this will have to allocate - // a new string if the given string is on the stack. - upb_atomic_ref(&str->refcount); + int refcount = upb_atomic_read(&str->refcount); + if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str); + // We don't ref the special <0 refcount for static strings. + if (refcount > 0) upb_atomic_ref(&str->refcount); return str; } @@ -151,6 +168,38 @@ void upb_string_substr(upb_string *str, upb_string *target_str, #define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str) #define UPB_STRFMT "%.*s" +// Macros for constructing upb_string objects statically or on the stack. These +// can be used like: +// +// upb_string static_str = UPB_STATIC_STRING("Foo"); +// +// int main() { +// upb_string stack_str = UPB_STACK_STRING("Foo"); +// // Now: +// // upb_streql(&static_str, &stack_str) == true +// // upb_streql(&static_str, UPB_STRLIT("Foo")) == true +// } +// +// You can also use UPB_STACK_STRING or UPB_STATIC_STRING with character arrays, +// but you must not change the underlying data once you've passed the string on: +// +// void foo() { +// char data[] = "ABC123"; +// upb_string stack_str = UPB_STACK_STR(data); +// bar(&stack_str); +// data[0] = "B"; // NOT ALLOWED!! +// } +// +// TODO: should the stack business just be like attach/detach? The latter seems +// more flexible, though it does require a stack allocation. Maybe put this off +// until there is a clear use case. +#define UPB_STATIC_STRING(str) \ + _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STATIC) +#define UPB_STATIC_STRING_LEN(str, len) \ + _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STATIC) +#define UPB_STACK_STRING(str) _UPB_STRING_INIT(str, _UPB_STRING_REFCOUNT_STACK) +#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) + /* upb_string library functions ***********************************************/ // Named like their counterparts, these are all safe against buffer diff --git a/descriptor/descriptor.c b/descriptor/descriptor.c index cd50a16..ee6b25b 100644 --- a/descriptor/descriptor.c +++ b/descriptor/descriptor.c @@ -1,4 +1,6 @@ -unsigned char descriptor_pb[] = { +#include "descriptor.h" + +static unsigned char descriptor_pb[] = { 0x0a, 0x9b, 0x1b, 0x0a, 0x1b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x2f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0f, 0x67, 0x6f, @@ -291,4 +293,5 @@ unsigned char descriptor_pb[] = { 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x48, 0x01 }; -unsigned int descriptor_pb_len = 3486; +static const unsigned int descriptor_pb_len = 3486; +upb_string descriptor_str = UPB_STATIC_STRING(descriptor_pb); diff --git a/descriptor/descriptor.h b/descriptor/descriptor.h index b598a9a..f6d3ca3 100644 --- a/descriptor/descriptor.h +++ b/descriptor/descriptor.h @@ -11,12 +11,13 @@ #ifndef UPB_DESCRIPTOR_H_ #define UPB_DESCRIPTOR_H_ +#include "upb_string.h" + #ifdef __cplusplus extern "C" { #endif -extern unsigned char descriptor_pb[]; -extern unsigned int descriptor_pb_len; +extern upb_string descriptor_str; #ifdef __cplusplus } /* extern "C" */ diff --git a/tests/test_string.c b/tests/test_string.c index 46f35b9..7c9ed02 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -3,8 +3,33 @@ #include "upb_string.h" char static_str[] = "Static string."; +upb_string static_upbstr = UPB_STATIC_STRING(static_str); -int main() { +static void test_static() { + // Static string is initialized appropriately. + assert(upb_streql(&static_upbstr, UPB_STRLIT("Static string."))); + + // Taking a ref on a static string returns the same string, and repeated + // refs don't get the string in a confused state. + assert(upb_string_getref(&static_upbstr) == &static_upbstr); + assert(upb_string_getref(&static_upbstr) == &static_upbstr); + assert(upb_string_getref(&static_upbstr) == &static_upbstr); + + // Unreffing a static string does nothing (is not harmful). + upb_string_unref(&static_upbstr); + upb_string_unref(&static_upbstr); + upb_string_unref(&static_upbstr); + upb_string_unref(&static_upbstr); + upb_string_unref(&static_upbstr); + + // Recycling a static string returns a new string (that can be modified). + upb_string *str = upb_string_tryrecycle(&static_upbstr); + assert(str != &static_upbstr); + + upb_string_unref(str); +} + +static void test_dynamic() { upb_string *str = upb_string_new(); assert(str != NULL); upb_string_unref(str); @@ -29,6 +54,7 @@ int main() { const char *robuf2 = upb_string_getrobuf(str); assert(robuf2 == robuf); assert(upb_streqlc(str, "XX")); + assert(upb_streql(str, UPB_STRLIT("XX"))); // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); @@ -79,3 +105,8 @@ int main() { // Unref of NULL is harmless. upb_string_unref(NULL); } + +int main() { + test_static(); + test_dynamic(); +} -- cgit v1.2.3 From 5871ed0d02ff69b20b65f577dd3be18a2e92dec7 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 18 Jul 2010 22:45:15 -0700 Subject: First go at Lua bindings. --- Makefile | 10 +++ core/upb_def.c | 4 +- core/upb_def.h | 8 +- core/upb_string.h | 5 +- lang_ext/lua/upb.c | 254 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 274 insertions(+), 7 deletions(-) create mode 100644 lang_ext/lua/upb.c (limited to 'core/upb_string.h') diff --git a/Makefile b/Makefile index 10ef96d..749c5a7 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,13 @@ CFLAGS=-std=c99 INCLUDE=-Idescriptor -Icore -Itests -Istream -I. CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) LDLIBS=-lpthread +ifeq ($(shell uname), Darwin) + CPPFLAGS += -I/usr/include/lua5.1 + LDFLAGS += -L/usr/local/lib -llua +else + CFLAGS += $(strip $(shell pkg-config --silence-errors --cflags lua || pkg-config --cflags lua5.1)) + LDFLAGS += $(strip $(shell pkg-config --silence-errors --libs lua || pkg-config --libs lua5.1)) +endif LIBUPB=core/libupb.a LIBUPB_PIC=core/libupb_pic.a @@ -59,6 +66,9 @@ core/upb_def.o: core/upb_def.c core/upb_def.lo: core/upb_def.c $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC +lang_ext/lua/upb.so: lang_ext/lua/upb.lo + $(CC) $(CFLAGS) $(CPPFLAGS) -shared -o $@ $< core/libupb_pic.a + STATICOBJ=$(patsubst %.c,%.o,$(SRC)) SHAREDOBJ=$(patsubst %.c,%.lo,$(SRC)) diff --git a/core/upb_def.c b/core/upb_def.c index fd00895..0d97982 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -190,7 +190,7 @@ void _upb_def_cyclic_ref(upb_def *def) { upb_cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true); } -static void upb_def_init(upb_def *def, upb_def_type type) { +static void upb_def_init(upb_def *def, upb_deftype type) { def->type = type; def->is_cyclic = 0; // We detect this later, after resolving refs. def->search_depth = 0; @@ -779,7 +779,7 @@ void _upb_symtab_free(upb_symtab *s) free(s); } -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type) +upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) { upb_rwlock_rdlock(&s->lock); int total = upb_strtable_count(&s->symtab); diff --git a/core/upb_def.h b/core/upb_def.h index 9cdc54d..ae9e0fa 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -48,15 +48,15 @@ typedef enum { // For specifying that defs of any type are requsted from getdefs. UPB_DEF_ANY = -1 -} upb_def_type; +} upb_deftype; // This typedef is more space-efficient than declaring an enum var directly. -typedef int8_t upb_def_type_t; +typedef int8_t upb_deftype_t; typedef struct { upb_string *fqname; // Fully qualified. upb_atomic_refcount_t refcount; - upb_def_type_t type; + upb_deftype_t type; // The is_cyclic flag could go in upb_msgdef instead of here, because only // messages can be involved in cycles. However, putting them here is free @@ -265,7 +265,7 @@ upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym); // caller owns the returned array (which is of length *count) as well as a ref // to each symbol inside. If type is UPB_DEF_ANY then defs of all types are // returned, otherwise only defs of the required type are returned. -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); +upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type); // "fds" is a upb_src that will yield data from the // google.protobuf.FileDescriptorSet message type. upb_symtab_addfds() adds diff --git a/core/upb_string.h b/core/upb_string.h index 65ba404..bd89f67 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -197,7 +197,10 @@ void upb_string_substr(upb_string *str, upb_string *target_str, _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STATIC) #define UPB_STATIC_STRING_LEN(str, len) \ _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STATIC) -#define UPB_STACK_STRING(str) _UPB_STRING_INIT(str, _UPB_STRING_REFCOUNT_STACK) +#define UPB_STACK_STRING(str) \ + _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STACK) +#define UPB_STACK_STRING_LEN(str, len) \ + _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STACK) #define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) /* upb_string library functions ***********************************************/ diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c new file mode 100644 index 0000000..ac7f188 --- /dev/null +++ b/lang_ext/lua/upb.c @@ -0,0 +1,254 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * A Lua extension for upb. + */ + +#include "lauxlib.h" +#include "upb_def.h" + +/* lupb_def *******************************************************************/ + +// All the def types share the same C layout, even though they are differen Lua +// types with different metatables. +typedef struct { + upb_def *def; +} lupb_def; + +static void lupb_pushnewdef(lua_State *L, upb_def *def) { + lupb_def *ldef = lua_newuserdata(L, sizeof(lupb_def)); + ldef->def = def; + const char *type_name; + switch(def->type) { + case UPB_DEF_MSG: + type_name = "upb.msgdef"; + break; + case UPB_DEF_ENUM: + type_name = "upb.enumdef"; + break; + default: + luaL_error(L, "unknown deftype %d", def->type); + } + luaL_getmetatable(L, type_name); + lua_setmetatable(L, -2); +} + +static lupb_def *lupb_msgdef_check(lua_State *L, int narg) { + return luaL_checkudata(L, narg, "upb.msgdef"); +} + +static lupb_def *lupb_enumdef_check(lua_State *L, int narg) { + return luaL_checkudata(L, narg, "upb.enumdef"); +} + +static int lupb_msgdef_gc(lua_State *L) { + lupb_def *ldef = lupb_msgdef_check(L, 1); + upb_def_unref(ldef->def); + return 0; +} + +static int lupb_enumdef_gc(lua_State *L) { + lupb_def *ldef = lupb_enumdef_check(L, 1); + upb_def_unref(ldef->def); + return 0; +} + +static const struct luaL_Reg lupb_msgdef_methods[] = { + {"__gc", lupb_msgdef_gc}, + {NULL, NULL} +}; + +static const struct luaL_Reg lupb_enumdef_methods[] = { + {"__gc", lupb_enumdef_gc}, + {NULL, NULL} +}; + + +/* lupb_symtab ****************************************************************/ + +// lupb_symtab caches the Lua objects it vends (defs) via lookup or resolve. +// It does this (instead of creating a new Lua object every time) for two +// reasons: +// * it uses less memory, because we can reuse existing objects. +// * it gives the expected equality semantics, eg. symtab[sym] == symtab[sym]. +// +// The downside is a bit of complexity. We need a place to store these +// cached defs; the only good answer is in the metatable. This means we need +// a new metatable for every symtab instance (instead of one shared by all +// instances). Since this is different than the regular pattern, we can't +// use luaL_checkudata(), we have to implement it ourselves. +typedef struct { + upb_symtab *symtab; +} lupb_symtab; + +static int lupb_symtab_gc(lua_State *L); + +// Inherits a ref on the symtab. +static void lupb_pushnewsymtab(lua_State *L, upb_symtab *symtab) { + lupb_symtab *lsymtab = lua_newuserdata(L, sizeof(lupb_symtab)); + lsymtab->symtab = symtab; + // Create its metatable (see note above about mt-per-object). + lua_createtable(L, 0, 1); + luaL_getmetatable(L, "upb.symtab"); + lua_setfield(L, -2, "__index"); // Uses the type metatable to find methods. + lua_pushcfunction(L, lupb_symtab_gc); + lua_setfield(L, -2, "__gc"); + + // Put this metatable in the registry so we can find it for type validation. + lua_pushlightuserdata(L, lsymtab); + lua_pushvalue(L, -2); + lua_rawset(L, LUA_REGISTRYINDEX); + + // Set the symtab's metatable. + lua_setmetatable(L, -2); +} + +// Checks that narg is a proper lupb_symtab object. If it is, leaves its +// metatable on the stack for cache lookups/updates. +lupb_symtab *lupb_symtab_check(lua_State *L, int narg) { + lupb_symtab *symtab = lua_touserdata(L, narg); + if (symtab != NULL) { + if (lua_getmetatable(L, narg)) { + // We use a metatable-per-object to support memoization of defs. + lua_pushlightuserdata(L, symtab); + lua_rawget(L, LUA_REGISTRYINDEX); + if (lua_rawequal(L, -1, -2)) { // Does it have the correct mt? + lua_pop(L, 1); // Remove one copy of the mt, keep the other. + return symtab; + } + } + } + luaL_typerror(L, narg, "upb.symtab"); + return NULL; // Placate the compiler; luaL_typerror will longjmp out of here. +} + +static int lupb_symtab_gc(lua_State *L) { + lupb_symtab *s = lupb_symtab_check(L, 1); + upb_symtab_unref(s->symtab); + + // Remove its metatable from the registry. + lua_pushlightuserdata(L, s); + lua_pushnil(L); + lua_rawset(L, LUA_REGISTRYINDEX); + return 0; +} + +// "mt" is the index of the metatable, -1 is the fqname of this def. +// Leaves the Lua object for the def at the top of the stack. +// Inherits a ref on "def". +static void lupb_symtab_getorcreate(lua_State *L, upb_def *def, int mt) { + // We may have this def cached, in which case we should return the same Lua + // object (as long as the value in the underlying symtab has not changed. + lua_rawget(L, mt); + if (!lua_isnil(L, -1)) { + // Def is cached, make sure it hasn't changed. + lupb_def *ldef = lua_touserdata(L, -1); + if (!ldef) luaL_error(L, "upb's internal cache is corrupt."); + if (ldef->def == def) { + // Cache is good, we can just return the cached value. + upb_def_unref(def); + return; + } + } + // Cached entry didn't exist or wasn't good. + lua_pop(L, 1); // Remove bad cached value. + lupb_pushnewdef(L, def); + + // Set it in the cache. + lua_pushvalue(L, 2); // push name (arg to this function). + lua_pushvalue(L, -2); // push the new def. + lua_rawset(L, mt); // set in the cache (the mt). +} + +static int lupb_symtab_lookup(lua_State *L) { + lupb_symtab *s = lupb_symtab_check(L, 1); + size_t len; + const char *name = luaL_checklstring(L, 2, &len); + upb_string namestr = UPB_STACK_STRING_LEN(name, len); + upb_def *def = upb_symtab_lookup(s->symtab, &namestr); + if (!def) { + // There shouldn't be a value in our cache either because the symtab + // currently provides no API for deleting syms from a table. In case + // this changes in the future, we explicitly delete from the cache here. + lua_pushvalue(L, 2); // push name (arg to this function). + lua_pushnil(L); + lua_rawset(L, -3); // lupb_symtab_check() left our mt on the stack. + + // Return nil because the symbol was not found. + lua_pushnil(L); + return 1; + } else { + lua_pushvalue(L, 2); + lupb_symtab_getorcreate(L, def, 3); + return 1; + } +} + +static int lupb_symtab_getdefs(lua_State *L) { + lupb_symtab *s = lupb_symtab_check(L, 1); + upb_deftype_t type = luaL_checkint(L, 2); + int count; + upb_def **defs = upb_symtab_getdefs(s->symtab, &count, type); + + // Create the table in which we will return the defs. + lua_createtable(L, 0, count); + int ret = lua_gettop(L); + + for (int i = 0; i < count; i++) { + upb_def *def = defs[i]; + // Look it up in the cache by name. + upb_string *name = def->fqname; + lua_pushlstring(L, upb_string_getrobuf(name), upb_string_len(name)); + lua_pushvalue(L, -1); // Push it again since the getorcreate consumes one. + lupb_symtab_getorcreate(L, def, 3); + + // Add it to our return table. + lua_settable(L, ret); + } + return 1; +} + +static int lupb_symtab_add_descriptorproto(lua_State *L) { + lupb_symtab *s = lupb_symtab_check(L, 1); + upb_symtab_add_descriptorproto(s->symtab); + return 0; // No args to return. +} + +static const struct luaL_Reg lupb_symtab_methods[] = { + {"add_descriptorproto", lupb_symtab_add_descriptorproto}, + //{"addfds", lupb_symtab_addfds}, + {"getdefs", lupb_symtab_getdefs}, + {"lookup", lupb_symtab_lookup}, + //{"resolve", lupb_symtab_resolve}, + {NULL, NULL} +}; + + +/* lupb toplevel **************************************************************/ + +static int lupb_symtab_new(lua_State *L) { + upb_symtab *s = upb_symtab_new(); + lupb_pushnewsymtab(L, s); + return 1; +} + +static const struct luaL_Reg lupb_toplevel_methods[] = { + {"symtab", lupb_symtab_new}, + {NULL, NULL} +}; + +int luaopen_upb(lua_State *L) { + luaL_newmetatable(L, "upb.msgdef"); + luaL_register(L, NULL, lupb_msgdef_methods); + + luaL_newmetatable(L, "upb.enumdef"); + luaL_register(L, NULL, lupb_enumdef_methods); + + luaL_newmetatable(L, "upb.symtab"); + luaL_register(L, NULL, lupb_symtab_methods); + + luaL_register(L, "upb", lupb_toplevel_methods); + return 1; // Return package table. +} -- cgit v1.2.3 From b471ca6b81b88dc23aae6a53345d94d9a2714a7c Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 6 Dec 2010 15:52:40 -0800 Subject: The last major revision to the upb_stream protocol. Sources and sinks communicate by means of a upb_handlers object, which encapsulates a set of handler callbacks and will possibly offer richer semantics in the future like giving specific fields different callbacks. The upb_handlers protocol supports delegation, so sets of handlers can be written in reusable ways. For example, if a set of handlers is written to handle a specific .proto type, those handlers can be used whether that type is at the top level or whether it is a sub-message of a higher-level type. Delegation allows the streaming protocol to properly compose. --- Makefile | 41 +++++---- core/upb_stream.c | 55 ------------ core/upb_stream.h | 167 +++++++++++++++++++++++------------ core/upb_stream_vtbl.h | 235 +++++++++++++++++++++---------------------------- core/upb_string.c | 9 ++ core/upb_string.h | 7 +- 6 files changed, 249 insertions(+), 265 deletions(-) delete mode 100644 core/upb_stream.c (limited to 'core/upb_string.h') diff --git a/Makefile b/Makefile index 131b3c0..5c6598c 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ CXX=g++ CFLAGS=-std=c99 INCLUDE=-Idescriptor -Icore -Itests -Istream -I. CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) -LDLIBS=-lpthread +LDLIBS=-lpthread core/libupb.a ifeq ($(shell uname), Darwin) CPPFLAGS += -I/usr/include/lua5.1 LDFLAGS += -L/usr/local/lib -llua @@ -47,16 +47,27 @@ clean: rm -rf $(LIBUPB) $(LIBUPB_PIC) rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(call rwildcard,,*.gc*) rm -rf benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb* - rm -rf tests/tests tests/t.* tests/test_table + rm -rf $(TESTS) tests/t.* rm -rf descriptor/descriptor.pb rm -rf tools/upbc deps cd lang_ext/python && python setup.py clean --all +-include deps +deps: gen-deps.sh Makefile $(call rwildcard,,*.c) $(call rwildcard,,*.h) + @./gen-deps.sh $(SRC) + # The core library (core/libupb.a) -SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ - core/upb_stream.c stream/upb_stdio.c stream/upb_strstream.c stream/upb_textprinter.c \ - core/upb_msg.c \ - descriptor/descriptor.c +SRC=core/upb.c \ + core/upb_table.c \ + core/upb_string.c \ + descriptor/descriptor.c \ +# core/upb_def.c \ +# core/upb_msg.c \ +# stream/upb_decoder.c \ +# stream/upb_stdio.c \ +# stream/upb_strstream.c \ +# stream/upb_textprinter.c + $(SRC): perf-cppflags # Parts of core that are yet to be converted. OTHERSRC=src/upb_encoder.c src/upb_text.c @@ -101,15 +112,16 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ - tests/test_def \ - tests/test_decoder \ - tests/t.test_vs_proto2.googlemessage1 \ - tests/t.test_vs_proto2.googlemessage2 \ - tests/test.proto.pb + tests/test_stream \ +# tests/test_def \ +# tests/test_decoder \ +# tests/t.test_vs_proto2.googlemessage1 \ +# tests/t.test_vs_proto2.googlemessage2 \ +# tests/test.proto.pb tests: $(TESTS) OTHER_TESTS=tests/tests \ -$(TESTS): core/libupb.a +$(TESTS): $(LIBUPB) VALGRIND=valgrind --leak-check=full --error-exitcode=1 #VALGRIND= @@ -118,7 +130,7 @@ test: tests @set -e # Abort on error. # Needs to be rewritten to separate the benchmark. # valgrind --error-exitcode=1 ./tests/test_table - @for test in tests/*; do \ + @for test in $(TESTS); do \ if [ -x ./$$test ] ; then \ echo !!! $(VALGRIND) ./$$test; \ $(VALGRIND) ./$$test || exit 1; \ @@ -247,6 +259,3 @@ benchmarks/b.parsetostruct_googlemessage2.proto2_compiled: \ -DMESSAGE_HFILE=\"google_messages.pb.h\" \ benchmarks/google_messages.pb.cc -lprotobuf -lpthread --include deps -deps: gen-deps.sh Makefile $(call rwildcard,,*.c) $(call rwildcard,,*.h) - @./gen-deps.sh $(SRC) diff --git a/core/upb_stream.c b/core/upb_stream.c deleted file mode 100644 index 0d47392..0000000 --- a/core/upb_stream.c +++ /dev/null @@ -1,55 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_stream.h" - -#include "upb_def.h" - -#define CHECKSRC(x) if(!x) goto src_err -#define CHECKSINK(x) if(!x) goto sink_err - -void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status) { - upb_fielddef *f; - upb_string *str = NULL; - int depth = 0; - while(1) { - while((f = upb_src_getdef(src)) != NULL) { - CHECKSINK(upb_sink_putdef(sink, f)); - if(upb_issubmsg(f)) { - upb_src_startmsg(src); - upb_sink_startmsg(sink); - ++depth; - } else if(upb_isstring(f)) { - str = upb_string_tryrecycle(str); - CHECKSRC(upb_src_getstr(src, str)); - CHECKSINK(upb_sink_putstr(sink, str)); - } else { - // Primitive type. - upb_value val; - CHECKSRC(upb_src_getval(src, upb_value_addrof(&val))); - CHECKSINK(upb_sink_putval(sink, val)); - } - } - // If we're not EOF now, the loop terminated due to an error. - CHECKSRC(upb_src_eof(src)); - if (depth == 0) break; - --depth; - upb_src_endmsg(src); - upb_sink_endmsg(sink); - } - upb_string_unref(str); - return; - -src_err: - upb_string_unref(str); - upb_copyerr(status, upb_src_status(src)); - return; - -sink_err: - upb_string_unref(str); - upb_copyerr(status, upb_sink_status(sink)); - return; -} diff --git a/core/upb_stream.h b/core/upb_stream.h index cd00c1e..1eb111e 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -19,7 +19,7 @@ #ifndef UPB_SRCSINK_H #define UPB_SRCSINK_H -#include "upb_stream_vtbl.h" +#include "upb.h" #ifdef __cplusplus extern "C" { @@ -28,98 +28,149 @@ extern "C" { // Forward-declare. We can't include upb_def.h; it would be circular. struct _upb_fielddef; -/* upb_sink *******************************************************************/ +/* upb_handlers ***************************************************************/ -// A upb_sink is a component that receives a stream of protobuf data. -// It is an abstract interface that is implemented either by the system or -// by users. -// -// TODO: unknown fields. +// upb_handlers define the interface by which a upb_src passes data to a +// upb_sink. -// Constants that a sink returns to indicate to its caller whether it should +// Constants that a handler returns to indicate to its caller whether it should // continue or not. typedef enum { // Caller should continue sending values to the sink. - UPB_SINK_CONTINUE, + UPB_CONTINUE, - // Return from upb_sink_putdef() to skip the next value (which may be a - // submessage). - UPB_SINK_SKIP, + // Skips to the end of the current submessage (or if we are at the top + // level, skips to the end of the entire message). + UPB_SKIP, // Caller should stop sending values; check sink status for details. // If processing resumes later, it should resume with the next value. - UPB_SINK_STOP, -} upb_sinkret_t; - -// Puts the given fielddef into the stream. -upb_sinkret_t upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); - -// Puts the given value into the stream. -upb_sinkret_t upb_sink_putval(upb_sink *sink, upb_value val); -upb_sinkret_t upb_sink_putstr(upb_sink *sink, upb_string *str); - -// Starts/ends a submessage. upb_sink_startmsg may seem redundant, but a -// client could have a submessage already serialized, and therefore put it -// as a string instead of its individual elements. -upb_sinkret_t upb_sink_startmsg(upb_sink *sink); -upb_sinkret_t upb_sink_endmsg(upb_sink *sink); - -// Returns the current error status for the stream. -upb_status *upb_sink_status(upb_sink *sink); - - -/* upb_src ********************************************************************/ - -// A upb_src is a resumable push parser for protobuf data. It works by first -// accepting registration of a upb_sink to which it will push data, then -// in a second phase is parses the actual data. + UPB_STOP, + + // When returned from a startsubmsg handler, indicates that the submessage + // should be handled by a different set of handlers, which have been + // registered on the provided upb_handlers object. May not be returned + // from any other callback. + UPB_DELEGATE, +} upb_flow_t; + +// upb_handlers +struct _upb_handlers; +typedef struct _upb_handlers upb_handlers; + +typedef void (*upb_startmsg_handler_t)(void *closure); +typedef void (*upb_endmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_value_handler_t)(void *closure, + struct _upb_fielddef *f, + upb_value val); +typedef upb_flow_t (*upb_startsubmsg_handler_t)(void *closure, + struct _upb_fielddef *f, + upb_handlers *delegate_to); +typedef upb_flow_t (*upb_endsubmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_unknownval_handler_t)(void *closure, + upb_field_number_t fieldnum, + upb_value val); + +// An empty set of handlers, for convenient copy/paste: // - -// Sets the given sink as the target of this src. It will be called when the -// upb_src_parse() is run. -void upb_src_setsink(upb_src *src, upb_sink *sink); - -// Pushes data from this src to the previously registered sink, returning -// true if all data was processed. If false is returned, check -// upb_src_status() for details; if it is a resumable status, upb_src_run -// may be called again to resume processing. -bool upb_src_run(upb_src *src); +// static void startmsg(void *closure) { +// // Called when the top-level message begins. +// } +// +// static void endmsg(void *closure) { +// // Called when the top-level message ends. +// } +// +// static upb_flow_t value(void *closure, upb_fielddef *f, upb_value val) { +// // Called for every value in the stream. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t startsubmsg(void *closure, upb_fielddef *f, +// upb_handlers *delegate_to) { +// // Called when a submessage begins; can delegate by returning UPB_DELEGATE. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t endsubmsg(void *closure) { +// // Called when a submessage ends. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum, +// upb_value val) { +// Called with an unknown value is encountered. +// return UPB_CONTINUE; +// } +typedef struct { + upb_startmsg_handler_t startmsg; + upb_endmsg_handler_t endmsg; + upb_value_handler_t value; + upb_startsubmsg_handler_t startsubmsg; + upb_endsubmsg_handler_t endsubmsg; + upb_unknownval_handler_t unknownval; +} upb_handlerset; + +// Functions to register handlers on a upb_handlers object. +INLINE void upb_handlers_init(upb_handlers *h); +INLINE void upb_handlers_uninit(upb_handlers *h); +INLINE void upb_handlers_reset(upb_handlers *h); +INLINE bool upb_handlers_isempty(upb_handlers *h); +INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); +INLINE void upb_set_handler_closure(upb_handlers *h, void *closure); + +// An object that transparently handles delegation so that the caller needs +// only follow the protocol as if delegation did not exist. +struct _upb_dispatcher; +typedef struct _upb_dispatcher upb_dispatcher; +INLINE void upb_dispatcher_init(upb_dispatcher *d); +INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); +INLINE void upb_dispatch_startmsg(upb_dispatcher *d); +INLINE void upb_dispatch_endmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f); +INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, + upb_value val); +INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, + upb_field_number_t fieldnum, upb_value val); /* upb_bytesrc ****************************************************************/ +struct _upb_bytesrc; +typedef struct _upb_bytesrc upb_bytesrc; + // Returns the next string in the stream. false is returned on error or eof. // The string must be at least "minlen" bytes long unless the stream is eof. -bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); +INLINE bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); // Appends the next "len" bytes in the stream in-place to "str". This should // be used when the caller needs to build a contiguous string of the existing // data in "str" with more data. The call fails if fewer than len bytes are // available in the stream. -bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); +INLINE bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); // Returns the current error status for the stream. // Note! The "eof" flag works like feof() in C; it cannot report end-of-file // until a read has failed due to eof. It cannot preemptively tell you that // the next call will fail due to eof. Since these are the semantics that C // and UNIX provide, we're stuck with them if we want to support eg. stdio. -INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } -INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } +INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src); +INLINE bool upb_bytesrc_eof(upb_bytesrc *src); /* upb_bytesink ***************************************************************/ +struct _upb_bytesink; +typedef struct _upb_bytesink upb_bytesink; + // Puts the given string. Returns the number of bytes that were actually, // consumed, which may be fewer than were in the string, or <0 on error. -int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); +INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); // Returns the current error status for the stream. -upb_status *upb_bytesink_status(upb_bytesink *sink); - -/* Utility functions **********************************************************/ - -// Streams data from src to sink until EOF or error. -void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status); +INLINE upb_status *upb_bytesink_status(upb_bytesink *sink); +#include "upb_stream_vtbl.h" #ifdef __cplusplus } /* extern "C" */ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 96f6cfe..91464a7 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -5,59 +5,21 @@ * interfaces. Only components that are implementing these interfaces need * to worry about this file. * - * This is tedious; this is the place in upb where I most wish I had a C++ - * feature. In C++ the compiler would generate this all for me. If there's - * any consolation, it's that I have a bit of flexibility you don't have in - * C++: I could, with preprocessor magic alone "de-virtualize" this interface - * for a particular source file. Say I had a C file that called a upb_src, - * but didn't want to pay the virtual function overhead. I could define: - * - * #define upb_src_getdef(src) upb_decoder_getdef((upb_decoder*)src) - * #define upb_src_stargmsg(src) upb_decoder_startmsg(upb_decoder*)src) - * // etc. - * - * The source file is compatible with the regular upb_src interface, but here - * we bind it to a particular upb_src (upb_decoder), which could lead to - * improved performance at a loss of flexibility for this one upb_src client. - * * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. */ #ifndef UPB_SRCSINK_VTBL_H_ #define UPB_SRCSINK_VTBL_H_ -#include "upb.h" +#include +#include "upb_stream.h" #ifdef __cplusplus extern "C" { #endif -struct upb_src; -typedef struct upb_src upb_src; -struct upb_sink; -typedef struct upb_sink upb_sink; -struct upb_bytesrc; -typedef struct upb_bytesrc upb_bytesrc; -struct upb_bytesink; -typedef struct upb_bytesink upb_bytesink; - // Typedefs for function pointers to all of the virtual functions. -// upb_src. -typedef struct _upb_fielddef *(*upb_src_getdef_fptr)(upb_src *src); -typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); -typedef bool (*upb_src_getstr_fptr)(upb_src *src, upb_string *str); -typedef bool (*upb_src_skipval_fptr)(upb_src *src); -typedef bool (*upb_src_startmsg_fptr)(upb_src *src); -typedef bool (*upb_src_endmsg_fptr)(upb_src *src); - -// upb_sink. -typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); -typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); -typedef bool (*upb_sink_putstr_fptr)(upb_sink *sink, upb_string *str); -typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); -typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); - // upb_bytesrc. typedef bool (*upb_bytesrc_get_fptr)( upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); @@ -68,23 +30,6 @@ typedef bool (*upb_bytesrc_append_fptr)( typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); // Vtables for the above interfaces. -typedef struct { - upb_src_getdef_fptr getdef; - upb_src_getval_fptr getval; - upb_src_getstr_fptr getstr; - upb_src_skipval_fptr skipval; - upb_src_startmsg_fptr startmsg; - upb_src_endmsg_fptr endmsg; -} upb_src_vtable; - -typedef struct { - upb_sink_putdef_fptr putdef; - upb_sink_putval_fptr putval; - upb_sink_putstr_fptr putstr; - upb_sink_startmsg_fptr startmsg; - upb_sink_endmsg_fptr endmsg; -} upb_sink_vtable; - typedef struct { upb_bytesrc_get_fptr get; upb_bytesrc_append_fptr append; @@ -97,42 +42,18 @@ typedef struct { // "Base Class" definitions; components that implement these interfaces should // contain one of these structures. -struct upb_src { - upb_src_vtable *vtbl; - upb_status status; - bool eof; -}; - -struct upb_sink { - upb_sink_vtable *vtbl; - upb_status status; - bool eof; -}; - -struct upb_bytesrc { +struct _upb_bytesrc { upb_bytesrc_vtable *vtbl; upb_status status; bool eof; }; -struct upb_bytesink { +struct _upb_bytesink { upb_bytesink_vtable *vtbl; upb_status status; bool eof; }; -INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { - s->vtbl = vtbl; - s->eof = false; - upb_status_init(&s->status); -} - -INLINE void upb_sink_init(upb_sink *s, upb_sink_vtable *vtbl) { - s->vtbl = vtbl; - s->eof = false; - upb_status_init(&s->status); -} - INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtable *vtbl) { s->vtbl = vtbl; s->eof = false; @@ -146,46 +67,6 @@ INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { } // Implementation of virtual function dispatch. -INLINE struct _upb_fielddef *upb_src_getdef(upb_src *src) { - return src->vtbl->getdef(src); -} -INLINE bool upb_src_getval(upb_src *src, upb_valueptr val) { - return src->vtbl->getval(src, val); -} -INLINE bool upb_src_getstr(upb_src *src, upb_string *str) { - return src->vtbl->getstr(src, str); -} -INLINE bool upb_src_skipval(upb_src *src) { return src->vtbl->skipval(src); } -INLINE bool upb_src_startmsg(upb_src *src) { return src->vtbl->startmsg(src); } -INLINE bool upb_src_endmsg(upb_src *src) { return src->vtbl->endmsg(src); } - -// Implementation of type-specific upb_src accessors. If we encounter a upb_src -// where these can be implemented directly in a measurably more efficient way, -// we can make these part of the vtable also. -// -// For <64-bit types we have to use a temporary to accommodate baredecoder, -// which does not know the actual width of the type. -INLINE bool upb_src_getbool(upb_src *src, bool *_bool) { - upb_value val; - bool ret = upb_src_getval(src, upb_value_addrof(&val)); - *_bool = val._bool; - return ret; -} - -INLINE bool upb_src_getint32(upb_src *src, int32_t *i32) { - upb_value val; - bool ret = upb_src_getval(src, upb_value_addrof(&val)); - *i32 = val.int32; - return ret; -} - -// TODO. -bool upb_src_getint32(upb_src *src, int32_t *val); -bool upb_src_getint64(upb_src *src, int64_t *val); -bool upb_src_getuint32(upb_src *src, uint32_t *val); -bool upb_src_getuint64(upb_src *src, uint64_t *val); -bool upb_src_getfloat(upb_src *src, float *val); -bool upb_src_getdouble(upb_src *src, double *val); // upb_bytesrc INLINE bool upb_bytesrc_get( @@ -198,24 +79,108 @@ INLINE bool upb_bytesrc_append( return bytesrc->vtbl->append(bytesrc, str, len); } -// upb_sink -INLINE bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def) { - return sink->vtbl->putdef(sink, def); +INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } +INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } + +// upb_handlers +struct _upb_handlers { + upb_handlerset *set; + void *closure; +}; + +INLINE void upb_handlers_init(upb_handlers *h) { + (void)h; +} +INLINE void upb_handlers_uninit(upb_handlers *h) { + (void)h; +} + +INLINE void upb_handlers_reset(upb_handlers *h) { + h->set = NULL; + h->closure = NULL; +} + +INLINE bool upb_handlers_isempty(upb_handlers *h) { + return !h->set && !h->closure; +} + +INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set) { + h->set = set; +} + +INLINE void upb_set_handler_closure(upb_handlers *h, void *closure) { + h->closure = closure; +} + +// upb_dispatcher +typedef struct { + upb_handlers handlers; + int depth; +} upb_dispatcher_frame; + +struct _upb_dispatcher { + upb_dispatcher_frame stack[UPB_MAX_NESTING], *top, *limit; +}; + +INLINE void upb_dispatcher_init(upb_dispatcher *d) { + d->limit = d->stack + sizeof(d->stack); } -INLINE bool upb_sink_putval(upb_sink *sink, upb_value val) { - return sink->vtbl->putval(sink, val); + +INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h) { + d->top = d->stack; + d->top->depth = 1; // Never want to trigger end-of-delegation. + d->top->handlers = *h; } -INLINE bool upb_sink_putstr(upb_sink *sink, upb_string *str) { - return sink->vtbl->putstr(sink, str); + +INLINE void upb_dispatch_startmsg(upb_dispatcher *d) { + assert(d->stack == d->top); + d->top->handlers.set->startmsg(d->top->handlers.closure); } -INLINE bool upb_sink_startmsg(upb_sink *sink) { - return sink->vtbl->startmsg(sink); + +INLINE void upb_dispatch_endmsg(upb_dispatcher *d) { + assert(d->stack == d->top); + d->top->handlers.set->endmsg(d->top->handlers.closure); } -INLINE bool upb_sink_endmsg(upb_sink *sink) { - return sink->vtbl->endmsg(sink); + +INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, + struct _upb_fielddef *f) { + upb_handlers handlers; + upb_handlers_init(&handlers); + upb_handlers_reset(&handlers); + upb_flow_t ret = d->top->handlers.set->startsubmsg(d->top->handlers.closure, f, &handlers); + assert((ret == UPB_DELEGATE) == !upb_handlers_isempty(&handlers)); + if (ret == UPB_DELEGATE) { + ++d->top; + d->top->handlers = handlers; + d->top->depth = 0; + d->top->handlers.set->startmsg(d->top->handlers.closure); + ret = UPB_CONTINUE; + } + ++d->top->depth; + upb_handlers_uninit(&handlers); + return ret; +} + +INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { + if (--d->top->depth == 0) { + d->top->handlers.set->endmsg(d->top->handlers.closure); + --d->top; + } + return d->top->handlers.set->endsubmsg(d->top->handlers.closure); } -INLINE upb_status *upb_sink_status(upb_sink *sink) { return &sink->status; } +INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, + struct _upb_fielddef *f, + upb_value val) { + return d->top->handlers.set->value(d->top->handlers.closure, f, val); +} + +INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, + upb_field_number_t fieldnum, + upb_value val) { + return d->top->handlers.set->unknownval(d->top->handlers.closure, + fieldnum, val); +} // upb_bytesink INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { diff --git a/core/upb_string.c b/core/upb_string.c index 847a3ee..4f5f5c2 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -29,6 +29,7 @@ upb_string *upb_string_new() { upb_string *str = malloc(sizeof(*str)); str->ptr = NULL; str->cached_mem = NULL; + str->len = 0; #ifndef UPB_HAVE_MSIZE str->size = 0; #endif @@ -132,6 +133,14 @@ upb_string *upb_strdup(upb_string *s) { return str; } +void upb_strcat(upb_string *s, upb_string *append) { + uint32_t old_size = upb_string_len(s); + uint32_t append_size = upb_string_len(append); + uint32_t new_size = old_size + append_size; + char *buf = upb_string_getrwbuf(s, new_size); + memcpy(buf + old_size, upb_string_getrobuf(append), append_size); +} + upb_string *upb_strreadfile(const char *filename) { FILE *f = fopen(filename, "rb"); if(!f) return NULL; diff --git a/core/upb_string.h b/core/upb_string.h index bd89f67..ee345e3 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -18,6 +18,11 @@ * string). * - strings are not thread-safe by default, but can be made so by calling a * function. This is not the default because it causes extra CPU overhead. + * + * Reference-counted strings have recently fallen out of favor because of the + * performance impacts of doing thread-safe reference counting with atomic + * operations. We side-step this issue by not performing atomic operations + * unless the string has been marked thread-safe. */ #ifndef UPB_STRING_H @@ -34,7 +39,7 @@ extern "C" { #endif // All members of this struct are private, and may only be read/written through -// the associated functions. Also, strings may *only* be allocated on the heap. +// the associated functions. struct _upb_string { // The pointer to our currently active data. This may be memory we own // or a pointer into memory we don't own. -- cgit v1.2.3 From bcc688a303439c758a47da9f0eb1c064ece6ce09 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 10 Jan 2011 20:37:04 -0800 Subject: upb_def compiles again! --- core/upb.c | 2 +- core/upb.h | 37 ++++--- core/upb_def.c | 283 +++++++++++++++++++++++++++++++------------------ core/upb_msg.c | 13 ++- core/upb_stream.h | 62 +++++++---- core/upb_stream_vtbl.h | 88 ++++++++++----- core/upb_string.c | 2 +- core/upb_string.h | 13 +-- 8 files changed, 325 insertions(+), 175 deletions(-) (limited to 'core/upb_string.h') diff --git a/core/upb.c b/core/upb.c index c396323..2f715d0 100644 --- a/core/upb.c +++ b/core/upb.c @@ -45,7 +45,7 @@ void upb_seterr(upb_status *status, enum upb_status_code code, { if(upb_ok(status)) { // The first error is the most interesting. status->code = code; - status->str = upb_string_tryrecycle(status->str); + upb_string_recycle(&status->str); va_list args; va_start(args, msg); upb_string_vprintf(status->str, msg, args); diff --git a/core/upb.h b/core/upb.h index 2057d60..64bc88c 100644 --- a/core/upb.h +++ b/core/upb.h @@ -126,14 +126,20 @@ struct _upb_array; typedef struct _upb_array upb_array; struct _upb_msg; typedef struct _upb_msg upb_msg; +struct _upb_bytesrc; +typedef struct _upb_bytesrc upb_bytesrc; -typedef uint32_t upb_strlen_t; +typedef int32_t upb_strlen_t; +#define UPB_STRLEN_MAX INT32_MAX // The type of a upb_value. This is like a upb_fieldtype_t, but adds the // constant UPB_VALUETYPE_ARRAY to represent an array. typedef uint8_t upb_valuetype_t; #define UPB_VALUETYPE_ARRAY 32 +#define UPB_VALUETYPE_BYTESRC 32 +#define UPB_VALUETYPE_RAW 33 + // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. typedef struct { @@ -146,6 +152,7 @@ typedef struct { uint64_t uint64; bool _bool; upb_string *str; + upb_bytesrc *bytesrc; upb_msg *msg; upb_array *arr; upb_atomic_refcount_t *refcount; @@ -167,21 +174,27 @@ typedef struct { #define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ ctype upb_value_get ## name(upb_value val) { \ - assert(val.type == UPB_TYPE(proto_type)); \ + assert(val.type == proto_type || val.type == UPB_VALUETYPE_RAW); \ return val.val.membername; \ } \ - void upb_value_ ## name(upb_value *val, ctype cval) { \ - SET_TYPE(val->type, UPB_TYPE(proto_type)); \ + void upb_value_set ## name(upb_value *val, ctype cval) { \ + SET_TYPE(val->type, proto_type); \ val->val.membername = cval; \ } -UPB_VALUE_ACCESSORS(double, _double, double, DOUBLE); -UPB_VALUE_ACCESSORS(float, _float, float, FLOAT); -UPB_VALUE_ACCESSORS(int32, int32, int32_t, INT32); -UPB_VALUE_ACCESSORS(int64, int64, int64_t, INT64); -UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UINT32); -UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UINT64); -UPB_VALUE_ACCESSORS(bool, _bool, bool, BOOL); -UPB_VALUE_ACCESSORS(str, str, upb_string*, STRING); +UPB_VALUE_ACCESSORS(double, _double, double, UPB_TYPE(DOUBLE)); +UPB_VALUE_ACCESSORS(float, _float, float, UPB_TYPE(FLOAT)); +UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_TYPE(INT32)); +UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64)); +UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32)); +UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64)); +UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); +UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); +UPB_VALUE_ACCESSORS(bytesrc, bytesrc, upb_bytesrc*, UPB_VALUETYPE_BYTESRC); + +void upb_value_setraw(upb_value *val, uint64_t cval) { + SET_TYPE(val->type, UPB_VALUETYPE_RAW); + val->val.uint64 = cval; +} // A pointer to a .proto value. The owner must have an out-of-band way of // knowing the type, so it knows which union member to use. diff --git a/core/upb_def.c b/core/upb_def.c index 4320fb6..4f12dbe 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -228,6 +228,10 @@ static void upb_deflist_push(upb_deflist *l, upb_def *d) { l->defs[l->len++] = d; } +static upb_def *upb_deflist_last(upb_deflist *l) { + return l->defs[l->len-1]; +} + // Qualify the defname for all defs starting with offset "start" with "str". static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { for(uint32_t i = start; i < l->len; i++) { @@ -238,8 +242,14 @@ static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) } } +// We keep a stack of all the messages scopes we are currently in, as well as +// the top-level file scope. This is necessary to correctly qualify the +// definitions that are contained inside. "name" tracks the name of the +// message or package (a bare name -- not qualified by any enclosing scopes). typedef struct { upb_string *name; + // Index of the first def that is under this scope. For msgdefs, the + // msgdef itself is at start-1. int start; } upb_defbuilder_frame; @@ -250,6 +260,10 @@ struct _upb_defbuilder { uint32_t number; upb_string *name; + bool saw_number; + bool saw_name; + + upb_fielddef *f; }; typedef struct _upb_defbuilder upb_defbuilder; @@ -259,6 +273,28 @@ static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, upb_handlers *h); +static void upb_defbuilder_init(upb_defbuilder *b) { + upb_deflist_init(&b->defs); + b->stack_len = 0; + b->name = NULL; +} + +static void upb_defbuilder_uninit(upb_defbuilder *b) { + upb_string_unref(b->name); + upb_deflist_uninit(&b->defs); +} + +static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) { + if (b->stack_len <= 1) return NULL; + int index = b->stack[b->stack_len-1].start - 1; + assert(index >= 0); + return upb_downcast_msgdef(b->defs.defs[index]); +} + +static upb_def *upb_defbuilder_last(upb_defbuilder *b) { + return upb_deflist_last(&b->defs); +} + // Start/end handlers for FileDescriptorProto and DescriptorProto (the two // entities that have names and can contain sub-definitions. void upb_defbuilder_startcontainer(upb_defbuilder *b) { @@ -291,9 +327,8 @@ static upb_flow_t upb_defbuilder_FileDescriptorProto_value(void *_b, case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: return BEGIN_SUBMSG; - default: - return UPB_SKIP; } + return UPB_CONTINUE; } static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( @@ -308,19 +343,19 @@ static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( return UPB_DELEGATE; default: // TODO: services and extensions. - return UPB_SKIP; + return UPB_SKIPSUBMSG; } } static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_defbuilder_FileDescriptorProto_handlers = { + static upb_handlerset handlers = { NULL, // startmsg NULL, // endmsg &upb_defbuilder_FileDescriptorProto_value, &upb_defbuilder_FileDescriptorProto_startsubmsg, }; - upb_register_handlerset(h, &upb_defbuilder_FileDescriptorProto_handlers); + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -333,9 +368,8 @@ static upb_flow_t upb_defbuilder_FileDescriptorSet_value(void *b, switch(f->number) { case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: return BEGIN_SUBMSG; - default: - return UPB_SKIP; } + return UPB_CONTINUE; } static upb_flow_t upb_defbuilder_FileDescriptorSet_startsubmsg( @@ -345,20 +379,19 @@ static upb_flow_t upb_defbuilder_FileDescriptorSet_startsubmsg( case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: upb_defbuilder_register_FileDescriptorProto(b, h); return UPB_DELEGATE; - default: - return UPB_SKIP; } + return UPB_SKIPSUBMSG; } static void upb_defbuilder_register_FileDescriptorSet( upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_defbuilder_FileDescriptorSet_handlers = { + static upb_handlerset handlers = { NULL, // startmsg NULL, // endmsg &upb_defbuilder_FileDescriptorSet_value, &upb_defbuilder_FileDescriptorSet_startsubmsg, }; - upb_register_handlerset(h, &upb_defbuilder_FileDescriptorSet_handlers); + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -406,18 +439,20 @@ static void upb_enumdef_free(upb_enumdef *e) { } // google.protobuf.EnumValueDescriptorProto. -static void upb_enumdef_EnumValueDescriptorProto_startmsg(upb_defbuilder *b) { - b->number = -1; - b->name = NULL; +static void upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; + b->saw_number = false; + b->saw_name = false; } -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(upb_defbuilder *b, +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; switch(f->number) { case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: - b->name = upb_string_tryrecycle(name); - CHECKSRC(upb_src_getstr(src, name)); + upb_string_unref(b->name); + upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: b->number = upb_value_getint32(val); @@ -428,34 +463,37 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(upb_defbuilder *b, return UPB_CONTINUE; } -static void upb_enumdef_EnumValueDescriptorProto_endmsg(upb_defbuilder *b) { - if(b->name == NULL || b->number == -1) { - upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); - goto err; +static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + if(!b->saw_number || !b->saw_name) { + //upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); + //goto err; + return; } - upb_ntoi_ent ntoi_ent = {{name, 0}, number}; - upb_iton_ent iton_ent = {{number, 0}, name}; + upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number}; + upb_iton_ent iton_ent = {{b->number, 0}, b->name}; + upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); upb_strtable_insert(&e->ntoi, &ntoi_ent.e); upb_inttable_insert(&e->iton, &iton_ent.e); // We don't unref "name" because we pass our ref to the iton entry of the // table. strtables can ref their keys, but the inttable doesn't know that // the value is a string. - return UPB_CONTINUE; } static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_enumdef_EnumValueDescriptorProto_handlers = { + static upb_handlerset handlers = { &upb_enumdef_EnumValueDescriptorProto_startmsg, &upb_enumdef_EnumValueDescriptorProto_endmsg, &upb_enumdef_EnumValueDescriptorProto_value, - } - upb_register_handlerset(h, &upb_enumdef_EnumValueDescriptorProto_handlers); + }; + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } // google.protobuf.EnumDescriptorProto. -void upb_enumdef_EnumDescriptorProto_startmsg(upb_defbuilder *b) { +void upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; upb_enumdef *e = malloc(sizeof(*e)); upb_def_init(&e->base, UPB_DEF_ENUM); upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); @@ -463,42 +501,51 @@ void upb_enumdef_EnumDescriptorProto_startmsg(upb_defbuilder *b) { upb_deflist_push(&b->defs, UPB_UPCAST(e)); } -void upb_enumdef_EnumDescriptorProto_endmsg(upb_defbuilder *b) { - assert(e->base.fqname); +void upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + assert(upb_defbuilder_last(b)->fqname != NULL); } -static upb_flow_t upb_enumdef_EnumDescriptorProto_value(upb_defbuilder *b, +static upb_flow_t upb_enumdef_EnumDescriptorProto_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; switch(f->number) { - case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: { + upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); upb_string_unref(e->base.fqname); - e->base.fqname = upb_value_getstr(val); + e->base.fqname = upb_string_getref(upb_value_getstr(val)); + return UPB_CONTINUE; + } case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: return BEGIN_SUBMSG; + default: + return UPB_CONTINUE; } - return UPB_CONTINUE; } -static upb_flow_t upb_enumdef_EnumDescriptorProto_startsubmsg(upb_defbuilder *b, +static upb_flow_t upb_enumdef_EnumDescriptorProto_startsubmsg(void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; switch(f->number) { case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: upb_enumdef_register_EnumValueDescriptorProto(b, h); return UPB_DELEGATE; + default: + return UPB_SKIPSUBMSG; } - return UPB_SKIP; } static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_enumdef_EnumDescriptorProto_handlers = { + static upb_handlerset handlers = { &upb_enumdef_EnumDescriptorProto_startmsg, &upb_enumdef_EnumDescriptorProto_endmsg, &upb_enumdef_EnumDescriptorProto_value, - } - upb_register_handlerset(h, &upb_enumdef_EnumDescriptorProto_handlers); + &upb_enumdef_EnumDescriptorProto_startsubmsg, + }; + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -529,56 +576,71 @@ static void upb_fielddef_free(upb_fielddef *f) { free(f); } -static void upb_fielddef_startmsg(upb_defbuilder *b) { +static void upb_fielddef_startmsg(void *_b) { + upb_defbuilder *b = _b; upb_fielddef *f = malloc(sizeof(*f)); f->number = -1; f->name = NULL; f->def = NULL; f->owned = false; - f->msgdef = m; + f->msgdef = upb_defbuilder_top(b); b->f = f; } -static void upb_fielddef_endmsg(upb_defbuilder *b) { +static void upb_fielddef_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_fielddef *f = b->f; // TODO: verify that all required fields were present. assert(f->number != -1 && f->name != NULL); assert((f->def != NULL) == upb_hasdef(f)); // Field was successfully read, add it as a field of the msgdef. + upb_msgdef *m = upb_defbuilder_top(b); upb_itof_ent itof_ent = {{f->number, 0}, f}; upb_ntof_ent ntof_ent = {{f->name, 0}, f}; upb_inttable_insert(&m->itof, &itof_ent.e); upb_strtable_insert(&m->ntof, &ntof_ent.e); - return true; } -static upb_flow_t upb_fielddef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { - switch(parsed_f->number) { +static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; + switch(f->number) { case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: - f->type = upb_value_getint32(val); + b->f->type = upb_value_getint32(val); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: - f->label = upb_value_getint32(val); + b->f->label = upb_value_getint32(val); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM: - f->number = upb_value_getint32(val); + b->f->number = upb_value_getint32(val); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: - f->name = upb_string_tryrecycle(f->name); - CHECKSRC(upb_src_getstr(src, f->name)); + upb_string_unref(b->f->name); + b->f->name = upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { upb_string *str = upb_string_new(); - CHECKSRC(upb_src_getstr(src, str)); - if(f->def) upb_def_unref(f->def); - f->def = UPB_UPCAST(upb_unresolveddef_new(str)); - f->owned = true; + if (!upb_value_getfullstr(val, str, NULL)) return UPB_ERROR; + if(b->f->def) upb_def_unref(b->f->def); + b->f->def = UPB_UPCAST(upb_unresolveddef_new(str)); + b->f->owned = true; break; } } return UPB_CONTINUE; } +static void upb_fielddef_register_FieldDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset handlers = { + &upb_fielddef_startmsg, + &upb_fielddef_endmsg, + &upb_fielddef_value, + }; + upb_register_handlerset(h, &handlers); + upb_set_handler_closure(h, b); +} + /* upb_msgdef *****************************************************************/ @@ -596,21 +658,24 @@ static int upb_compare_fields(const void *f1, const void *f2) { } // google.protobuf.DescriptorProto. -static void upb_msgdef_startmsg(upb_defbuilder *b) { +static void upb_msgdef_startmsg(void *_b) { + upb_defbuilder *b = _b; upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG); upb_atomic_refcount_init(&m->cycle_refcount, 0); upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); upb_deflist_push(&b->defs, UPB_UPCAST(m)); - upb_defbuilder_startcontainer(b, UPB_UPCAST(m)); + upb_defbuilder_startcontainer(b); } -static void upb_msgdef_endmsg(upb_defbuilder *b) { - upb_msgdef *m = upb_downcast_msgdef(upb_deflist_stacktop(&m->defs)); +static void upb_msgdef_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_msgdef *m = upb_defbuilder_top(b); if(!m->base.fqname) { - upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); - return UPB_ERROR; + //upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); + //return UPB_ERROR; + return; } // Create an ordering over the fields. @@ -651,51 +716,57 @@ static void upb_msgdef_endmsg(upb_defbuilder *b) { if (max_align > 0) m->size = upb_align_up(m->size, max_align); upb_defbuilder_endcontainer(b); - return UPB_CONTINUE; + //return UPB_CONTINUE; } -static bool upb_msgdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { +static upb_flow_t upb_msgdef_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; switch(f->number) { - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: - upb_defbuilder_setscopename(upb_value_getstr(val)); - break; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: { + upb_msgdef *m = upb_defbuilder_top(b); + upb_string_unref(m->base.fqname); + m->base.fqname = upb_string_getref(upb_value_getstr(val)); + upb_defbuilder_setscopename(b, upb_value_getstr(val)); + return UPB_CONTINUE; + } case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: return BEGIN_SUBMSG; default: // TODO: extensions. - return UPB_SKIP; + return UPB_CONTINUE; } } -static upb_flow_t upb_msgdef_startsubmsg(upb_defbuilder *b, upb_fielddef *f, +static upb_flow_t upb_msgdef_startsubmsg(void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; switch(f->number) { case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: - upb_register_FieldDescriptorProto(b, h); + upb_fielddef_register_FieldDescriptorProto(b, h); return UPB_DELEGATE; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: upb_msgdef_register_DescriptorProto(b, h); return UPB_DELEGATE; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - upb_register_EnumDescriptorProto(b, h); + upb_enumdef_register_EnumDescriptorProto(b, h); return UPB_DELEGATE; break; default: - return UPB_SKIP; + return UPB_SKIPSUBMSG; } } static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_msgdef_DescriptorProto_handlers = { + static upb_handlerset handlers = { &upb_msgdef_startmsg, &upb_msgdef_endmsg, &upb_msgdef_value, &upb_msgdef_startsubmsg, - } - upb_register_handlerset(h, &upb_msgdef_DescriptorProto_handlers); + }; + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -884,7 +955,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, // indicating whether the new defs can overwrite existing defs in the symtab, // attempts to add the given defs to the symtab. The whole operation either // succeeds or fails. Ownership of "defs" and "exts" is taken. -bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, +bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs, bool allow_redef, upb_status *status) { upb_rwlock_wrlock(&s->lock); @@ -892,9 +963,9 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, // Build a table of the defs we mean to add, for duplicate detection and name // resolution. upb_strtable tmptab; - upb_strtable_init(&tmptab, defs->len, sizeof(upb_symtab_ent)); - for (uint32_t i = 0; i < defs->len; i++) { - upb_def *def = defs->defs[i]; + upb_strtable_init(&tmptab, num_defs, sizeof(upb_symtab_ent)); + for (int i = 0; i < num_defs; i++) { + upb_def *def = defs[i]; upb_symtab_ent e = {{def->fqname, 0}, def}; // Redefinition is never allowed within a single FileDescriptorSet. @@ -909,13 +980,13 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, // Pass ownership from the deflist to the strtable. upb_strtable_insert(&tmptab, &e.e); - defs->defs[i] = NULL; + defs[i] = NULL; } // TODO: process the list of extensions by modifying entries from // tmptab in-place (copying them from the symtab first if necessary). - CHECK(upb_resolverefs(&tmptab, &s->symtab, status)); + if (!upb_resolverefs(&tmptab, &s->symtab, status)) goto err; // The defs in tmptab have been vetted, and can be added to the symtab // without causing errors. Now add all tmptab defs to the symtab, @@ -946,6 +1017,7 @@ err: upb_def_unref(e->def); } upb_strtable_free(&tmptab); + for (int i = 0; i < num_defs; i++) upb_def_unref(defs[i]); return false; } @@ -1026,20 +1098,18 @@ upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) { - upb_defbuilder *b = upb_defbuilder_new(); - upb_defbuilder_register_handlers(b, upb_src_gethandlers(src)); + upb_defbuilder b; + upb_defbuilder_init(&b); + //upb_defbuilder_register_FileDescriptorSet(&b, upb_src_gethandlers(src)); + upb_defbuilder_register_FileDescriptorSet(&b, NULL); if(!upb_src_run(src)) { upb_copyerr(status, upb_src_status(src)); + upb_defbuilder_uninit(&b); return; } - upb_symtab_add_defs(s, b->defs, b->defs_len, false, status); - upb_deflist_uninit(&defs); + upb_symtab_add_defs(s, b.defs.defs, b.defs.len, false, status); + upb_defbuilder_uninit(&b); return; - -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_deflist_uninit(&defs); } @@ -1074,8 +1144,10 @@ err: // complicated to support on big-endian machines. typedef struct { + upb_src src; upb_string *input; upb_strlen_t offset; + upb_dispatcher dispatcher; } upb_baredecoder; static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) @@ -1121,9 +1193,9 @@ bool upb_baredecoder_run(upb_baredecoder *d) { upb_dispatch_startmsg(&d->dispatcher); while(d->offset < upb_string_len(d->input)) { // Detect end-of-submessage. - while(d->offset >= *d->top) { + while(d->offset >= *top) { upb_dispatch_endsubmsg(&d->dispatcher); - d->offset = *(d->top--); + d->offset = *(top--); } uint32_t key = upb_baredecoder_readv64(d); @@ -1134,16 +1206,16 @@ bool upb_baredecoder_run(upb_baredecoder *d) { uint32_t delim_len = upb_baredecoder_readv32(d); // We don't know if it's a string or a submessage; deliver first as // string. - str = upb_string_tryrecycle(str); - upb_string_substr(str, d->input, d->offset, d->delimited_len); + upb_string_recycle(&str); + upb_string_substr(str, d->input, d->offset, delim_len); upb_value v; upb_value_setstr(&v, str); - if(upb_dispatch_value(&d->dispatcher, &f, v) == UPB_TREAT_AS_SUBMSG) { + if(upb_dispatch_value(&d->dispatcher, &f, v) == BEGIN_SUBMSG) { // Should deliver as a submessage instead. upb_dispatch_startsubmsg(&d->dispatcher, &f); - *(++d->top) = d->offset + delimited_len; + *(++top) = d->offset + delim_len; } else { - d->offset += delimited_len; + d->offset += delim_len; } } else { upb_value v; @@ -1167,23 +1239,24 @@ bool upb_baredecoder_run(upb_baredecoder *d) { } } upb_dispatch_endmsg(&d->dispatcher); + return true; } -static upb_src_vtable upb_baredecoder_src_vtbl = { - (upb_src_getdef_fptr)&upb_baredecoder_getdef, - (upb_src_getval_fptr)&upb_baredecoder_getval, - (upb_src_getstr_fptr)&upb_baredecoder_getstr, - (upb_src_skipval_fptr)&upb_baredecoder_skipval, - (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, - (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, -}; - static upb_baredecoder *upb_baredecoder_new(upb_string *str) { + //static upb_src_vtable vtbl = { + // (upb_src_getdef_fptr)&upb_baredecoder_getdef, + // (upb_src_getval_fptr)&upb_baredecoder_getval, + // (upb_src_getstr_fptr)&upb_baredecoder_getstr, + // (upb_src_skipval_fptr)&upb_baredecoder_skipval, + // (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, + // (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, + //}; upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); d->offset = 0; - upb_src_init(&d->src, &upb_baredecoder_src_vtbl); + upb_dispatcher_init(&d->dispatcher); + //upb_src_init(&d->src, &vtbl); return d; } diff --git a/core/upb_msg.c b/core/upb_msg.c index 75f7a35..a0a5196 100644 --- a/core/upb_msg.c +++ b/core/upb_msg.c @@ -7,6 +7,8 @@ */ #include "upb_msg.h" +#include "upb_decoder.h" +#include "upb_strstream.h" void _upb_elem_free(upb_value v, upb_fielddef *f) { switch(f->type) { @@ -108,10 +110,13 @@ upb_value upb_field_tryrecycle(upb_valueptr p, upb_value val, upb_fielddef *f, void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, upb_status *status) { - (void)msg; - (void)md; - (void)str; - (void)status; + upb_stringsrc *ssrc = upb_stringsrc_new(); + upb_stringsrc_reset(ssrc, str); + upb_decoder *d = upb_decoder_new(md); + upb_decoder_reset(d, upb_stringsrc_bytesrc(ssrc)); + + upb_decoder_free(d); + upb_stringsrc_free(ssrc); } void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, diff --git a/core/upb_stream.h b/core/upb_stream.h index c96c544..9ae69de 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -39,13 +39,16 @@ typedef enum { // Caller should continue sending values to the sink. UPB_CONTINUE, - // Skips to the end of the current submessage (or if we are at the top - // level, skips to the end of the entire message). - UPB_SKIP, + // An error occurred; check status for details. + UPB_ERROR, - // Caller should stop sending values; check sink status for details. + // Processing should stop for now, but could be resumed later. // If processing resumes later, it should resume with the next value. - UPB_STOP, + UPB_SUSPEND, + + // Skips to the end of the current submessage (or if we are at the top + // level, skips to the end of the entire message). + UPB_SKIPSUBMSG, // When returned from a startsubmsg handler, indicates that the submessage // should be handled by a different set of handlers, which have been @@ -117,6 +120,9 @@ INLINE void upb_handlers_uninit(upb_handlers *h); INLINE void upb_handlers_reset(upb_handlers *h); INLINE bool upb_handlers_isempty(upb_handlers *h); INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); +// TODO: for clients that want to increase efficiency by preventing bytesrcs +// from automatically being converted to strings in the value callback. +// INLINE void upb_handlers_use_bytesrcs(bool use_bytesrcs); INLINE void upb_set_handler_closure(upb_handlers *h, void *closure); // An object that transparently handles delegation so that the caller needs @@ -140,21 +146,30 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, struct _upb_src; typedef struct _upb_src upb_src; +bool upb_src_run(upb_src *src); +upb_status *upb_src_status(upb_src *src); -/* upb_bytesrc ****************************************************************/ - -struct _upb_bytesrc; -typedef struct _upb_bytesrc upb_bytesrc; -// Returns the next string in the stream. false is returned on error or eof. -// The string must be at least "minlen" bytes long unless the stream is eof. -INLINE bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); +/* upb_bytesrc ****************************************************************/ -// Appends the next "len" bytes in the stream in-place to "str". This should -// be used when the caller needs to build a contiguous string of the existing -// data in "str" with more data. The call fails if fewer than len bytes are -// available in the stream. -INLINE bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); +// Reads up to "count" bytes into "buf", returning the total number of bytes +// read. If <0, indicates error (check upb_bytesrc_status for details). +INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, + upb_strlen_t count); + +// Like upb_bytesrc_read(), but modifies "str" in-place, possibly aliasing +// existing string data (which avoids a copy). +INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, + upb_strlen_t count); + +// A convenience function for getting all the remaining data in a upb_bytesrc +// as a upb_string. Returns false and sets "status" if the operation fails. +INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, + upb_status *status); +INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, + upb_status *status) { + return upb_bytesrc_getfullstr(upb_value_getbytesrc(val), str, status); +} // Returns the current error status for the stream. // Note! The "eof" flag works like feof() in C; it cannot report end-of-file @@ -164,14 +179,21 @@ INLINE bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t l INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src); INLINE bool upb_bytesrc_eof(upb_bytesrc *src); + /* upb_bytesink ***************************************************************/ struct _upb_bytesink; typedef struct _upb_bytesink upb_bytesink; -// Puts the given string. Returns the number of bytes that were actually, -// consumed, which may be fewer than were in the string, or <0 on error. -INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); +// Writes up to "count" bytes from "buf", returning the total number of bytes +// written. If <0, indicates error (check upb_bytesink_status() for details). +INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, + upb_strlen_t count); + +// Puts the given string, which may alias the string data (which avoids a +// copy). Returns the number of bytes that were actually, consumed, which may +// be fewer than were in the string, or <0 on error. +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str); // Returns the current error status for the stream. INLINE upb_status *upb_bytesink_status(upb_bytesink *sink); diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 91464a7..c0cf04f 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -20,23 +20,33 @@ extern "C" { // Typedefs for function pointers to all of the virtual functions. +// upb_src +struct _upb_src { +}; +typedef struct { +} upb_src_vtbl; + // upb_bytesrc. -typedef bool (*upb_bytesrc_get_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); -typedef bool (*upb_bytesrc_append_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t len); +typedef upb_strlen_t (*upb_bytesrc_read_fptr)( + upb_bytesrc *src, void *buf, upb_strlen_t count); +typedef bool (*upb_bytesrc_getstr_fptr)( + upb_bytesrc *src, upb_string *str, upb_strlen_t count); // upb_bytesink. -typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); +typedef upb_strlen_t (*upb_bytesink_write_fptr)( + upb_bytesink *bytesink, void *buf, upb_strlen_t count); +typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( + upb_bytesink *bytesink, upb_string *str); // Vtables for the above interfaces. typedef struct { - upb_bytesrc_get_fptr get; - upb_bytesrc_append_fptr append; + upb_bytesrc_read_fptr read; + upb_bytesrc_getstr_fptr getstr; } upb_bytesrc_vtable; typedef struct { - upb_bytesink_put_fptr put; + upb_bytesink_write_fptr write; + upb_bytesink_putstr_fptr putstr; } upb_bytesink_vtable; // "Base Class" definitions; components that implement these interfaces should @@ -69,19 +79,56 @@ INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { // Implementation of virtual function dispatch. // upb_bytesrc -INLINE bool upb_bytesrc_get( - upb_bytesrc *bytesrc, upb_string *str, upb_strlen_t minlen) { - return bytesrc->vtbl->get(bytesrc, str, minlen); -} +INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, + upb_strlen_t count) { + return src->vtbl->read(src, buf, count); +} + +INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, + upb_strlen_t count) { + return src->vtbl->getstr(src, str, count); +} + +INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, + upb_status *status) { + // We start with a getstr, because that could possibly alias data instead of + // copying. + if (!upb_bytesrc_getstr(src, str, UPB_STRLEN_MAX)) goto error; + // Trade-off between number of read calls and amount of overallocation. + const size_t bufsize = 4096; + while (!upb_bytesrc_eof(src)) { + upb_strlen_t len = upb_string_len(str); + char *buf = upb_string_getrwbuf(str, len + bufsize); + upb_strlen_t read = upb_bytesrc_read(src, buf + len, bufsize); + if (read < 0) goto error; + // Resize to proper size. + upb_string_getrwbuf(str, len + read); + } + return true; -INLINE bool upb_bytesrc_append( - upb_bytesrc *bytesrc, upb_string *str, upb_strlen_t len) { - return bytesrc->vtbl->append(bytesrc, str, len); +error: + upb_copyerr(status, upb_bytesrc_status(src)); + return false; } INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } + +// upb_bytesink +INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, + upb_strlen_t count) { + return sink->vtbl->write(sink, buf, count); +} + +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str) { + return sink->vtbl->putstr(sink, str); +} + +INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { + return &sink->status; +} + // upb_handlers struct _upb_handlers { upb_handlerset *set; @@ -182,17 +229,6 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, fieldnum, val); } -// upb_bytesink -INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { - return sink->vtbl->put(sink, str); -} -INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { - return &sink->status; -} - -// upb_bytesink - - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/core/upb_string.c b/core/upb_string.c index 4f5f5c2..b243dfd 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -73,7 +73,7 @@ upb_string *upb_string_tryrecycle(upb_string *str) { char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { // assert(str->ptr == NULL); - uint32_t size = upb_string_size(str); + upb_strlen_t size = upb_string_size(str); if (size < len) { size = upb_round_up_pow2(len); str->cached_mem = realloc(str->cached_mem, size); diff --git a/core/upb_string.h b/core/upb_string.h index ee345e3..f82603b 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -119,20 +119,21 @@ INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } INLINE void upb_string_endread(upb_string *str) { (void)str; } // Attempts to recycle the string "str" so it may be reused and have different -// data written to it. The returned string is either "str" if it could be -// recycled or a newly created string if "str" has other references. +// data written to it. After the function returns, "str" points to a writable +// string, which is either the original string if it had no other references +// or a newly created string if it did have other references. // -// As a special case, passing NULL will allocate a new string. This is -// convenient for the pattern: +// As a special case, passing a pointer to NULL will allocate a new string. +// This is convenient for the pattern: // // upb_string *str = NULL; // while (x) { // if (y) { -// str = upb_string_tryrecycle(str); +// upb_string_recycle(&str); // upb_src_getstr(str); // } // } -upb_string *upb_string_tryrecycle(upb_string *str); +upb_string *upb_string_recycle(upb_string **str); // The options for setting the contents of a string. These may only be called // when a string is first created or recycled; once other functions have been -- cgit v1.2.3 From a695b92ccea4b82180ae45d21d7ed4445f7d0769 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 21 Jan 2011 19:18:22 -0800 Subject: Debugging test_def, it's close to working again! --- Makefile | 10 +++--- core/upb_def.c | 80 +++++++++++++++++++++++++++-------------- core/upb_stream.h | 21 ++++++++--- core/upb_stream_vtbl.h | 96 ++++++++++++++++++++++++++++++++++++++++---------- core/upb_string.c | 8 ++--- core/upb_string.h | 2 +- tests/test_def.c | 1 + tests/test_string.c | 19 +++++----- 8 files changed, 171 insertions(+), 66 deletions(-) (limited to 'core/upb_string.h') diff --git a/Makefile b/Makefile index 42c7d41..af79363 100644 --- a/Makefile +++ b/Makefile @@ -74,9 +74,9 @@ OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. core/upb_def.o: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< + $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< core/upb_def.lo: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC + $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< -fPIC lang_ext/lua/upb.so: lang_ext/lua/upb.lo $(CC) $(CFLAGS) $(CPPFLAGS) -shared -o $@ $< core/libupb_pic.a @@ -112,13 +112,13 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ - tests/test_stream \ -# tests/test_def \ + tests/test_def \ +# tests/test_stream \ # tests/test_decoder \ # tests/t.test_vs_proto2.googlemessage1 \ # tests/t.test_vs_proto2.googlemessage2 \ # tests/test.proto.pb -tests: $(TESTS) +tests: $(LIBUPB) $(TESTS) OTHER_TESTS=tests/tests \ $(TESTS): $(LIBUPB) diff --git a/core/upb_def.c b/core/upb_def.c index 79b6632..a935930 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -319,6 +319,18 @@ void upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { } // Handlers for google.protobuf.FileDescriptorProto. +static upb_flow_t upb_defbuilder_FileDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; + upb_defbuilder_startcontainer(b); + return UPB_CONTINUE; +} + +static upb_flow_t upb_defbuilder_FileDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_defbuilder_endcontainer(b); + return UPB_CONTINUE; +} + static upb_flow_t upb_defbuilder_FileDescriptorProto_value(void *_b, upb_fielddef *f, upb_value val) { @@ -353,8 +365,8 @@ static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, upb_handlers *h) { static upb_handlerset handlers = { - NULL, // startmsg - NULL, // endmsg + &upb_defbuilder_FileDescriptorProto_startmsg, + &upb_defbuilder_FileDescriptorProto_endmsg, &upb_defbuilder_FileDescriptorProto_value, &upb_defbuilder_FileDescriptorProto_startsubmsg, }; @@ -457,9 +469,11 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: upb_string_unref(b->name); upb_string_getref(upb_value_getstr(val)); + b->saw_name = true; break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: b->number = upb_value_getint32(val); + b->saw_number = true; break; default: break; @@ -507,8 +521,8 @@ static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { } static upb_flow_t upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { - upb_defbuilder *b = _b; - assert(upb_defbuilder_last(b)->fqname != NULL); + (void)_b; + assert(upb_defbuilder_last((upb_defbuilder*)_b)->fqname != NULL); return UPB_CONTINUE; } @@ -627,10 +641,8 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { b->f->name = upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { - upb_string *str = upb_string_new(); - if (!upb_value_getfullstr(val, str, NULL)) return UPB_BREAK; if(b->f->def) upb_def_unref(b->f->def); - b->f->def = UPB_UPCAST(upb_unresolveddef_new(str)); + b->f->def = UPB_UPCAST(upb_unresolveddef_new(upb_value_getstr(val))); b->f->owned = true; break; } @@ -720,6 +732,7 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) { m->size = offset + type_info->size; max_align = UPB_MAX(max_align, type_info->align); } + free(sorted_fields); if (max_align > 0) m->size = upb_align_up(m->size, max_align); @@ -1131,6 +1144,7 @@ void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) // * keeping a pointer to the upb_fielddef* and reading it later (the same // upb_fielddef is reused over and over). // * detecting errors in the input (we trust that our input is known-good). +// * skipping the rest of the submessage (UPB_SKIPSUBMSG). // // It also does not support any of the follow protobuf features: // * packed fields. @@ -1189,18 +1203,27 @@ static uint32_t upb_baredecoder_readf32(upb_baredecoder *d) return val; } -bool upb_baredecoder_run(upb_baredecoder *d) { +static void upb_baredecoder_sethandlers(upb_src *src, upb_handlers *handlers) { + upb_baredecoder *d = (upb_baredecoder*)src; + upb_dispatcher_reset(&d->dispatcher, handlers); +} + +static void upb_baredecoder_run(upb_src *src, upb_status *status) { + upb_baredecoder *d = (upb_baredecoder*)src; + assert(!upb_handlers_isempty(&d->dispatcher.top->handlers)); upb_string *str = NULL; upb_strlen_t stack[UPB_MAX_NESTING]; upb_strlen_t *top = &stack[0]; *top = upb_string_len(d->input); d->offset = 0; - upb_dispatch_startmsg(&d->dispatcher); +#define CHECK(x) if (x != UPB_CONTINUE && x != BEGIN_SUBMSG) goto err; + + CHECK(upb_dispatch_startmsg(&d->dispatcher)); while(d->offset < upb_string_len(d->input)) { // Detect end-of-submessage. while(d->offset >= *top) { - upb_dispatch_endsubmsg(&d->dispatcher); + CHECK(upb_dispatch_endsubmsg(&d->dispatcher)); d->offset = *(top--); } @@ -1216,9 +1239,11 @@ bool upb_baredecoder_run(upb_baredecoder *d) { upb_string_substr(str, d->input, d->offset, delim_len); upb_value v; upb_value_setstr(&v, str); - if(upb_dispatch_value(&d->dispatcher, &f, v) == BEGIN_SUBMSG) { + upb_flow_t ret = upb_dispatch_value(&d->dispatcher, &f, v); + CHECK(ret); + if(ret == BEGIN_SUBMSG) { // Should deliver as a submessage instead. - upb_dispatch_startsubmsg(&d->dispatcher, &f); + CHECK(upb_dispatch_startsubmsg(&d->dispatcher, &f)); *(++top) = d->offset + delim_len; } else { d->offset += delim_len; @@ -1228,11 +1253,9 @@ bool upb_baredecoder_run(upb_baredecoder *d) { switch(wt) { case UPB_WIRE_TYPE_VARINT: upb_value_setraw(&v, upb_baredecoder_readv64(d)); - upb_dispatch_value(&d->dispatcher, &f, v); break; case UPB_WIRE_TYPE_64BIT: upb_value_setraw(&v, upb_baredecoder_readf64(d)); - upb_dispatch_value(&d->dispatcher, &f, v); break; case UPB_WIRE_TYPE_32BIT: upb_value_setraw(&v, upb_baredecoder_readf32(d)); @@ -1241,28 +1264,33 @@ bool upb_baredecoder_run(upb_baredecoder *d) { assert(false); abort(); } - upb_dispatch_value(&d->dispatcher, &f, v); + CHECK(upb_dispatch_value(&d->dispatcher, &f, v)); } } - upb_dispatch_endmsg(&d->dispatcher); - return true; + CHECK(upb_dispatch_endmsg(&d->dispatcher)); + printf("SUCCESS!!\n"); + upb_string_unref(str); + return; + +err: + upb_copyerr(status, d->dispatcher.top->handlers.status); + upb_printerr(d->dispatcher.top->handlers.status); + upb_printerr(status); + upb_string_unref(str); + printf("ERROR!!\n"); } static upb_baredecoder *upb_baredecoder_new(upb_string *str) { - //static upb_src_vtable vtbl = { - // (upb_src_getdef_fptr)&upb_baredecoder_getdef, - // (upb_src_getval_fptr)&upb_baredecoder_getval, - // (upb_src_getstr_fptr)&upb_baredecoder_getstr, - // (upb_src_skipval_fptr)&upb_baredecoder_skipval, - // (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, - // (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, - //}; + static upb_src_vtbl vtbl = { + &upb_baredecoder_sethandlers, + &upb_baredecoder_run, + }; upb_baredecoder *d = malloc(sizeof(*d)); + upb_src_init(&d->src, &vtbl); d->input = upb_string_getref(str); d->offset = 0; upb_dispatcher_init(&d->dispatcher); - //upb_src_init(&d->src, &vtbl); return d; } diff --git a/core/upb_stream.h b/core/upb_stream.h index 66bfec2..cf01a5f 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -136,8 +136,8 @@ struct _upb_dispatcher; typedef struct _upb_dispatcher upb_dispatcher; INLINE void upb_dispatcher_init(upb_dispatcher *d); INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); -INLINE void upb_dispatch_startmsg(upb_dispatcher *d); -INLINE void upb_dispatch_endmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d); INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f); INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, @@ -151,8 +151,21 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, struct _upb_src; typedef struct _upb_src upb_src; -void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); -void upb_src_run(upb_src *src, upb_status *status); +// upb_src_sethandlers() must be called once and only once before upb_src_run() +// is called. This sets up the callbacks that will handle the parse. A +// upb_src that is fully initialized except for the call to +// upb_src_sethandlers() is called "prepared" -- this is useful for library +// functions that want to consume the output of a generic upb_src. +// Calling sethandlers() multiple times is an error and will trigger an abort(). +INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); + +// Runs the src, calling the callbacks that were registered with +// upb_src_sethandlers(), and returning the status of the operation in +// "status." The status might indicate UPB_TRYAGAIN (indicating EAGAIN on a +// non-blocking socket) or a resumable error; in both cases upb_src_run can be +// called again later. TRYAGAIN could come from either the src (input buffers +// are empty) or the handlers (output buffers are full). +INLINE void upb_src_run(upb_src *src, upb_status *status); /* upb_bytesrc ****************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index d017177..e462122 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -13,6 +13,7 @@ #include #include "upb_stream.h" +#include "upb_string.h" #ifdef __cplusplus extern "C" { @@ -21,10 +22,8 @@ extern "C" { // Typedefs for function pointers to all of the virtual functions. // upb_src -struct _upb_src { -}; -typedef struct { -} upb_src_vtbl; +typedef void (*upb_src_sethandlers_fptr)(upb_src *src, upb_handlers *handlers); +typedef void (*upb_src_run_fptr)(upb_src *src, upb_status *status); // upb_bytesrc. typedef upb_strlen_t (*upb_bytesrc_read_fptr)( @@ -42,42 +41,65 @@ typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( typedef struct { upb_bytesrc_read_fptr read; upb_bytesrc_getstr_fptr getstr; -} upb_bytesrc_vtable; +} upb_bytesrc_vtbl; typedef struct { upb_bytesink_write_fptr write; upb_bytesink_putstr_fptr putstr; -} upb_bytesink_vtable; +} upb_bytesink_vtbl; + +typedef struct { + upb_src_sethandlers_fptr sethandlers; + upb_src_run_fptr run; +} upb_src_vtbl; + // "Base Class" definitions; components that implement these interfaces should // contain one of these structures. struct _upb_bytesrc { - upb_bytesrc_vtable *vtbl; + upb_bytesrc_vtbl *vtbl; upb_status status; bool eof; }; struct _upb_bytesink { - upb_bytesink_vtable *vtbl; + upb_bytesink_vtbl *vtbl; upb_status status; bool eof; }; -INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtable *vtbl) { +struct _upb_src { + upb_src_vtbl *vtbl; +}; + +INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) { s->vtbl = vtbl; s->eof = false; upb_status_init(&s->status); } -INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { +INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) { s->vtbl = vtbl; s->eof = false; upb_status_init(&s->status); } +INLINE void upb_src_init(upb_src *s, upb_src_vtbl *vtbl) { + s->vtbl = vtbl; +} + // Implementation of virtual function dispatch. +// upb_src +INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers) { + src->vtbl->sethandlers(src, handlers); +} + +INLINE void upb_src_run(upb_src *src, upb_status *status) { + src->vtbl->run(src, status); +} + // upb_bytesrc INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, upb_strlen_t count) { @@ -152,7 +174,41 @@ INLINE bool upb_handlers_isempty(upb_handlers *h) { return !h->set && !h->closure; } +INLINE upb_flow_t upb_nop(void *closure) { + (void)closure; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_value_nop(void *closure, struct _upb_fielddef *f, upb_value val) { + (void)closure; + (void)f; + (void)val; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_startsubmsg_nop(void *closure, struct _upb_fielddef *f, + upb_handlers *delegate_to) { + (void)closure; + (void)f; + (void)delegate_to; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, + upb_value val) { + (void)closure; + (void)fieldnum; + (void)val; + return UPB_CONTINUE; +} + INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set) { + if (!set->startmsg) set->startmsg = &upb_nop; + if (!set->endmsg) set->endmsg = &upb_nop; + if (!set->value) set->value = &upb_value_nop; + if (!set->startsubmsg) set->startsubmsg = &upb_startsubmsg_nop; + if (!set->endsubmsg) set->endsubmsg = &upb_nop; + if (!set->unknownval) set->unknownval = &upb_unknownval_nop; h->set = set; } @@ -182,16 +238,19 @@ INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h) { d->top->handlers = *h; } -INLINE void upb_dispatch_startmsg(upb_dispatcher *d) { +INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d) { assert(d->stack == d->top); - d->top->handlers.set->startmsg(d->top->handlers.closure); + return d->top->handlers.set->startmsg(d->top->handlers.closure); } -INLINE void upb_dispatch_endmsg(upb_dispatcher *d) { +INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d) { assert(d->stack == d->top); - d->top->handlers.set->endmsg(d->top->handlers.closure); + return d->top->handlers.set->endmsg(d->top->handlers.closure); } +// TODO: several edge cases to fix: +// - delegated start returns UPB_BREAK, should replay the start on resume. +// - endsubmsg returns UPB_BREAK, should NOT replay the delegated endmsg. INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f) { upb_handlers handlers; @@ -203,17 +262,18 @@ INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, ++d->top; d->top->handlers = handlers; d->top->depth = 0; - d->top->handlers.set->startmsg(d->top->handlers.closure); - ret = UPB_CONTINUE; + ret = d->top->handlers.set->startmsg(d->top->handlers.closure); } - ++d->top->depth; + if (ret == UPB_CONTINUE) ++d->top->depth; upb_handlers_uninit(&handlers); return ret; } INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { + upb_flow_t ret; if (--d->top->depth == 0) { - d->top->handlers.set->endmsg(d->top->handlers.closure); + ret = d->top->handlers.set->endmsg(d->top->handlers.closure); + if (ret != UPB_CONTINUE) return ret; --d->top; } return d->top->handlers.set->endsubmsg(d->top->handlers.closure); diff --git a/core/upb_string.c b/core/upb_string.c index b243dfd..e9ff0d9 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -61,13 +61,13 @@ void _upb_string_free(upb_string *str) { free(str); } -upb_string *upb_string_tryrecycle(upb_string *str) { +void upb_string_recycle(upb_string **_str) { + upb_string *str = *_str; if(str && upb_atomic_read(&str->refcount) == 1) { str->ptr = NULL; upb_string_release(str); - return str; } else { - return upb_string_new(); + *_str = upb_string_new(); } } @@ -111,7 +111,7 @@ void upb_string_vprintf(upb_string *str, const char *format, va_list args) { // We don't care about the terminating NULL, but snprintf might // bail out of printing even other characters if it doesn't have // enough space to write the NULL also. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); buf = upb_string_getrwbuf(str, true_size + 1); vsnprintf(buf, true_size + 1, format, args); } diff --git a/core/upb_string.h b/core/upb_string.h index f82603b..1f4b20c 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -133,7 +133,7 @@ INLINE void upb_string_endread(upb_string *str) { (void)str; } // upb_src_getstr(str); // } // } -upb_string *upb_string_recycle(upb_string **str); +void upb_string_recycle(upb_string **str); // The options for setting the contents of a string. These may only be called // when a string is first created or recycled; once other functions have been diff --git a/tests/test_def.c b/tests/test_def.c index 732835d..5be0672 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -10,6 +10,7 @@ int main() { int count; upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY); for (int i = 0; i < count; i++) { + printf("Def with name: " UPB_STRFMT "\n", UPB_STRARG(defs[i]->fqname)); upb_def_unref(defs[i]); } free(defs); diff --git a/tests/test_string.c b/tests/test_string.c index 7c9ed02..6446806 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -23,7 +23,8 @@ static void test_static() { upb_string_unref(&static_upbstr); // Recycling a static string returns a new string (that can be modified). - upb_string *str = upb_string_tryrecycle(&static_upbstr); + upb_string *str = &static_upbstr; + upb_string_recycle(&str); assert(str != &static_upbstr); upb_string_unref(str); @@ -34,8 +35,9 @@ static void test_dynamic() { assert(str != NULL); upb_string_unref(str); - // Can also create a string by tryrecycle(NULL). - str = upb_string_tryrecycle(NULL); + // Can also create a string by recycle(NULL). + str = NULL; + upb_string_recycle(&str); assert(str != NULL); upb_strcpyc(str, static_str); @@ -45,7 +47,8 @@ static void test_dynamic() { assert(upb_streqlc(str, static_str)); upb_string_endread(str); - upb_string *str2 = upb_string_tryrecycle(str); + upb_string *str2 = str; + upb_string_recycle(&str2); // No other referents, so should return the same string. assert(str2 == str); @@ -58,7 +61,7 @@ static void test_dynamic() { // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); upb_string_substr(str, str2, 1, 2); assert(upb_string_len(str) == 2); assert(upb_string_len(str2) == 4); @@ -70,7 +73,7 @@ static void test_dynamic() { assert(upb_atomic_read(&str2->refcount) == 2); // Recycling str should eliminate the extra ref. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); assert(upb_atomic_read(&str2->refcount) == 1); // Resetting str should reuse its old data. @@ -80,7 +83,7 @@ static void test_dynamic() { // Resetting str to something very long should require new data to be // allocated. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); const char longstring[] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; upb_strcpyc(str, longstring); const char *robuf6 = upb_string_getrobuf(str); @@ -88,7 +91,7 @@ static void test_dynamic() { assert(upb_streqlc(str, longstring)); // Test printf. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); upb_string_printf(str, "Number: %d, String: %s", 5, "YO!"); assert(upb_streqlc(str, "Number: 5, String: YO!")); -- cgit v1.2.3 From 58a70b55c62cfefcbe7a55a2fd41ee6b87c7256f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 23 Jan 2011 16:29:10 -0800 Subject: Decoder code structure is mostly in-place. --- core/upb_stream.h | 20 ++- core/upb_string.h | 57 ++++++-- stream/upb_decoder.c | 363 ++++++++++++++++++++++----------------------------- 3 files changed, 212 insertions(+), 228 deletions(-) (limited to 'core/upb_string.h') diff --git a/core/upb_stream.h b/core/upb_stream.h index cf01a5f..54fd930 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -171,14 +171,18 @@ INLINE void upb_src_run(upb_src *src, upb_status *status); /* upb_bytesrc ****************************************************************/ // Reads up to "count" bytes into "buf", returning the total number of bytes -// read. If <0, indicates error (check upb_bytesrc_status for details). +// read. If 0, indicates error and puts details in "status". INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, - upb_strlen_t count); + upb_strlen_t count, upb_status *status); // Like upb_bytesrc_read(), but modifies "str" in-place, possibly aliasing -// existing string data (which avoids a copy). +// existing string data (which avoids a copy). On the other hand, if +// the data was *not* already in an existing string, this copies it into +// a upb_string, and if the data needs to be put in a specific range of +// memory (because eg. you need to put it into a different kind of string +// object) then upb_bytesrc_get() could be better. INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, - upb_strlen_t count); + upb_status *status); // A convenience function for getting all the remaining data in a upb_bytesrc // as a upb_string. Returns false and sets "status" if the operation fails. @@ -189,14 +193,6 @@ INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, return upb_bytesrc_getfullstr(upb_value_getbytesrc(val), str, status); } -// Returns the current error status for the stream. -// Note! The "eof" flag works like feof() in C; it cannot report end-of-file -// until a read has failed due to eof. It cannot preemptively tell you that -// the next call will fail due to eof. Since these are the semantics that C -// and UNIX provide, we're stuck with them if we want to support eg. stdio. -INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src); -INLINE bool upb_bytesrc_eof(upb_bytesrc *src); - /* upb_bytesink ***************************************************************/ diff --git a/core/upb_string.h b/core/upb_string.h index 1f4b20c..04c0ae9 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -3,26 +3,39 @@ * * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. * - * This file defines a simple string type. The overriding goal of upb_string - * is to avoid memcpy(), malloc(), and free() wheverever possible, while - * keeping both CPU and memory overhead low. Throughout upb there are - * situations where one wants to reference all or part of another string - * without copying. upb_string provides APIs for doing this. + * This file defines a simple string type which is length-delimited instead + * of NULL-terminated, and which has useful sharing semantics. + * + * The overriding goal of upb_string is to avoid memcpy(), malloc(), and free() + * wheverever possible, while keeping both CPU and memory overhead low. + * Throughout upb there are situations where one wants to reference all or part + * of another string without copying. upb_string provides APIs for doing this. * * Characteristics of upb_string: * - strings are reference-counted. - * - strings are logically immutable. + * - strings are immutable (can be mutated only when first created or recycled). * - if a string has no other referents, it can be "recycled" into a new string * without having to reallocate the upb_string. * - strings can be substrings of other strings (owning a ref on the source * string). - * - strings are not thread-safe by default, but can be made so by calling a - * function. This is not the default because it causes extra CPU overhead. * * Reference-counted strings have recently fallen out of favor because of the * performance impacts of doing thread-safe reference counting with atomic * operations. We side-step this issue by not performing atomic operations * unless the string has been marked thread-safe. + * + * Strings are expected to be 8-bit-clean, but "char*" is such an entrenched + * idiom that we go with it instead of making our pointers uint8_t*. + * + * WARNING: THE GETREF, UNREF, AND RECYCLE OPERATIONS ARE NOT THREAD_SAFE + * UNLESS THE STRING HAS BEEN MARKED SYNCHRONIZED! What this means is that if + * you are logically passing a reference to a upb_string to another thread + * (which implies that the other thread must eventually call unref of recycle), + * you have two options: + * + * - create a copy of the string that will be used in the other thread only. + * - call upb_string_get_synchronized_ref(), which will make getref, unref, and + * recycle thread-safe for this upb_string. */ #ifndef UPB_STRING_H @@ -83,10 +96,12 @@ struct _upb_string { // longer needed, it should be unref'd, never freed directly. upb_string *upb_string_new(); +// Internal-only; clients should call upb_string_unref(). void _upb_string_free(upb_string *str); // Releases a ref on the given string, which may free the memory. "str" -// can be NULL, in which case this is a no-op. +// can be NULL, in which case this is a no-op. WARNING: NOT THREAD_SAFE +// UNLESS THE STRING IS SYNCHRONIZED. INLINE void upb_string_unref(upb_string *str) { if (str && upb_atomic_read(&str->refcount) > 0 && upb_atomic_unref(&str->refcount)) { @@ -98,6 +113,7 @@ upb_string *upb_strdup(upb_string *s); // Forward-declare. // Returns a string with the same contents as "str". The caller owns a ref on // the returned string, which may or may not be the same object as "str. +// WARNING: NOT THREAD-SAFE UNLESS THE STRING IS SYNCHRONIZED! INLINE upb_string *upb_string_getref(upb_string *str) { int refcount = upb_atomic_read(&str->refcount); if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str); @@ -163,8 +179,11 @@ void upb_string_substr(upb_string *str, upb_string *target_str, // data. Waiting for a clear use case before actually implementing it. // // Makes the string "str" a reference to the given string data. The caller -// guarantees that the given string data will not change or be deleted until -// a matching call to upb_string_detach(). +// guarantees that the given string data will not change or be deleted until a +// matching call to upb_string_detach(), which may block until any concurrent +// readers have finished reading. upb_string_detach() preserves the contents +// of the string by copying the referenced data if there are any other +// referents. // void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); // void upb_string_detach(upb_string *str); @@ -207,6 +226,22 @@ void upb_string_substr(upb_string *str, upb_string *target_str, _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STACK) #define UPB_STACK_STRING_LEN(str, len) \ _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STACK) + +// A convenient way of specifying upb_strings as literals, like: +// +// upb_streql(UPB_STRLIT("expected"), other_str); +// +// However, this requires either C99 compound initializers or C++. +// Must ONLY be called with a string literal as its argument! +//#ifdef __cplusplus +//namespace upb { +//class String : public upb_string { +// // This constructor must ONLY be called with a string literal. +// String(const char *str) : upb_string(UPB_STATIC_STRING(str)) {} +//}; +//} +//#define UPB_STRLIT(str) upb::String(str) +//#endif #define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) /* upb_string library functions ***********************************************/ diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index b820b08..fbd7eba 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -11,127 +11,39 @@ #include #include "upb_def.h" -/* Functions to read wire values. *********************************************/ - -// These functions are internal to the decode, but might be moved into an -// internal header file if we at some point in the future opt to do code -// generation, because the generated code would want to inline these functions. -// The same applies to the functions to read .proto values below. - -const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status); - -// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). -INLINE const uint8_t *upb_get_v_uint64_t(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status) -{ - // We inline this common case (1-byte varints), if that fails we dispatch to - // the full (non-inlined) version. - if((*buf & 0x80) == 0) { - *val = *buf & 0x7f; - return buf + 1; - } else { - return upb_get_v_uint64_t_full(buf, end, val, status); - } +/* Pure Decoding **************************************************************/ + +// The key fast-path varint-decoding routine. There are a lot of possibilities +// for optimization/experimentation here. +INLINE bool upb_decode_varint_fast(uint8_t **buf, uint8_t *end, uint64_t &val, + upb_status *status) { + *high = 0; + uint32_t b; + uint8_t *ptr = p->ptr; + b = *(*buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 28; + *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + + upb_seterr(status, UPB_ERROR, "Unterminated varint"); + return false; +done: + return true; } -// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit -// varint is not a true wire type. -INLINE const uint8_t *upb_get_v_uint32_t(const uint8_t *buf, const uint8_t *end, - uint32_t *val, upb_status *status) -{ - uint64_t val64; - const uint8_t *ret = upb_get_v_uint64_t(buf, end, &val64, status); - *val = (uint32_t)val64; // Discard the high bits. - return ret; -} -// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). -INLINE const uint8_t *upb_get_f_uint32_t(const uint8_t *buf, const uint8_t *end, - uint32_t *val, upb_status *status) -{ - const uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - memcpy(val, buf, sizeof(uint32_t)); - return uint32_end; -} - -// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). -INLINE const uint8_t *upb_get_f_uint64_t(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status) -{ - const uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - memcpy(val, buf, sizeof(uint64_t)); - return uint64_end; -} - -INLINE const uint8_t *upb_skip_v_uint64_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *const maxend = buf + 10; - uint8_t last = 0x80; - for(; buf < (uint8_t*)end && (last & 0x80); buf++) - last = *buf; - - if(buf >= end && buf <= maxend && (last & 0x80)) { - status->code = UPB_STATUS_NEED_MORE_DATA; - buf = end; - } else if(buf > maxend) { - status->code = UPB_ERROR_UNTERMINATED_VARINT; - buf = end; - } - return buf; -} - -INLINE const uint8_t *upb_skip_f_uint32_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - return uint32_end; -} - -INLINE const uint8_t *upb_skip_f_uint64_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - return uint64_end; -} - -/* Functions to read .proto values. *******************************************/ +/* Decoding/Buffering of individual values ************************************/ // Performs zig-zag decoding, which is used by sint32 and sint64. INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } -// Parses a tag, places the result in *tag. -INLINE const uint8_t *decode_tag(const uint8_t *buf, const uint8_t *end, - upb_tag *tag, upb_status *status) -{ - uint32_t tag_int; - const uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); - tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); - tag->field_number = tag_int >> 3; - return ret; -} - // The decoder keeps a stack with one entry per level of recursion. // upb_decoder_frame is one frame of that stack. typedef struct { @@ -144,6 +56,7 @@ struct upb_decoder { // Immutable state of the decoder. upb_src src; upb_dispatcher dispatcher; + upb_bytesrc *bytesrc; upb_msgdef *toplevel_msgdef; upb_decoder_frame stack[UPB_MAX_NESTING]; @@ -158,66 +71,108 @@ struct upb_decoder { // Current input buffer. upb_string *buf; + // Our current offset *within* buf. + upb_strlen_t buf_offset; + // The offset within the overall stream represented by the *beginning* of buf. upb_strlen_t buf_stream_offset; +}; - // Our current offset *within* buf. Will be negative if we are buffering - // from previous buffers in tmpbuf. - upb_strlen_t buf_offset; +// Called only from the slow path, this function copies the next "len" bytes +// from the stream to "data", adjusting "buf" and "end" appropriately. +INLINE bool upb_getbuf(upb_decoder *d, void *data, size_t len, + uint8_t **buf, uint8_t **end) { + while (len > 0) { + memcpy(data, *buf, *end-*buf); + len -= (*end-*buf); + if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; + *buf = upb_string_getrobuf(d->buf); + *end = *buf + upb_string_len(d->buf); + } +} - // Holds any bytes we have from previous buffers. The number of bytes we - // have encoded here is -buf_offset, if buf_offset<0, 0 otherwise. - uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; -}; +// We use this path when we don't have UPB_MAX_ENCODED_SIZE contiguous bytes +// available in our current buffer. We don't inline this because we accept +// that it will be slow and we don't want to pay for two copies of it. +static bool upb_decode_varint_slow(upb_decoder *d) { + uint8_t buf[UPB_MAX_ENCODED_SIZE]; + uint8_t *p = buf, *end = buf + sizeof(buf); + for(int bitpos = 0; p < end && getbyte(d, p) && (last & 0x80); p++, bitpos += 7) + *val |= ((uint64_t)((last = *p) & 0x7F)) << bitpos; + + if(d->status->code == UPB_EOF && (last & 0x80)) { + upb_seterr(status, UPB_ERROR, + "Provided data ended in the middle of a varint.\n"); + } else if(buf == maxend) { + upb_seterr(status, UPB_ERROR, + "Varint was unterminated after 10 bytes.\n"); + } else { + // Success. + return; + } +} -upb_flow_t upb_decode_varint(upb_decoder *d, ptrs *p, - uint32_t *low, uint32_t *high) { - if (p->end - p->ptr > UPB_MAX_ENCODED_SIZE) { - // Fast path; we know we have a complete varint in our existing buffer. - *high = 0; - uint32_t b; - uint8_t *ptr = p->ptr; - b = *(buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 28; - *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; - - if(bytes_available >= 10) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Varint was unterminated " - "after 10 bytes, stream offset: %u", upb_decoder_offset(d)); - return false; - } +INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_buf, + const uint8_t **end, upb_tag *tag) { + const uint8_t *buf = *_buf, *end = *_end; + uint32_t tag_int; + // Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048). + if (end - buf < 2) goto slow; // unlikely. + tag_int = *buf & 0x7f; + if ((*(buf++) & 0x80) == 0) goto done; // predictable if fields are in order + tag_int |= (*buf & 0x7f) << 7; + if ((*(buf++) & 0x80) != 0) goto slow; // unlikely. +slow: + if (!upb_decode_varint_slow(d, _buf, _end)) return false; + buf = *_buf; // Trick the next line into not overwriting us. +done: + *_buf = buf; + tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); + tag->field_number = tag_int >> 3; + return true; +} + +INLINE bool upb_decode_varint(upb_decoder *d, ptrs *p, + uint32_t *low, uint32_t *high) { + if (p->end - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) + return upb_decode_varint_fast(d); + else + return upb_decode_varint_slow(d); +} - done: - p->ptr = ptr; +INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, + uint8_t **buf, uint8_t **end, upb_value *val) { + static const char table = {0, 8, 0, 0, 0, 4}; + size_t bytes = table[wt]; + if (*end - *buf >= bytes) { + // Common (fast) case. + memcpy(&val, *buf, bytes); + *buf += bytes; } else { - // Slow path: we may have to combine one or more buffers to get a whole - // varint worth of data. - uint8_t buf[UPB_MAX_ENCODED_SIZE]; - uint8_t *p = buf, *end = buf + sizeof(buf); - for(ing bitpos = 0; p < end && getbyte(d, p) && (last & 0x80); p++, bitpos += 7) - *val |= ((uint64_t)((last = *p) & 0x7F)) << bitpos; - - if(d->status->code == UPB_EOF && (last & 0x80)) { - upb_seterr(status, UPB_ERROR, - "Provided data ended in the middle of a varint.\n"); - } else if(buf == maxend) { - upb_seterr(status, UPB_ERROR, - "Varint was unterminated after 10 bytes.\n"); - } else { - // Success. - return; - } - ungetbytes(d, buf, p - buf); + if (!upb_getbuf(d, &val, bytes, buf, end)) return false; + } + return true; +} + +// "val" initially holds the length of the string, this is replaced by the +// contents of the string. +INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) { + upb_string_recycle(str); + upb_strlen_t len = upb_valu_getint32(*val); + if (*end - *buf >= len) { + // Common (fast) case. + upb_string_substr(*str, d->buf, *buf - upb_string_getrobuf(d->buf), len); + *buf += len; + } else { + if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, buf, end)) + return false; } + return true; } + +/* The main decoding loop *****************************************************/ + static const void *get_msgend(upb_decoder *d) { if(d->top->end_offset > 0) @@ -238,36 +193,29 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { return upb_types[ft].expected_wire_type == wt; } - -// Pushes a new stack frame for a submessage with the given len (which will -// be zero if the submessage is a group). -static const uint8_t *push(upb_decoder *d, const uint8_t *start, +static bool upb_push(upb_decoder *d, const uint8_t *start, uint32_t submsg_len, upb_fielddef *f, upb_status *status) { d->top->field = f; d->top++; if(d->top >= d->limit) { - upb_seterr(status, UPB_ERROR_MAX_NESTING_EXCEEDED, - "Nesting exceeded maximum (%d levels)\n", - UPB_MAX_NESTING); - return NULL; + upb_seterr(status, UPB_ERROR, "Nesting too deep."); + return false; } - upb_decoder_frame *frame = d->top; - frame->end_offset = d->completed_offset + submsg_len; - frame->msgdef = upb_downcast_msgdef(f->def); - - upb_dispatch_startsubmsg(&d->dispatcher, f); - return get_msgend(d); + d->top->end_offset = d->completed_offset + submsg_len; + d->top->msgdef = upb_downcast_msgdef(f->def); + *submsg_end = get_msgend(d); + if (!upb_dispatch_startsubmsg(&d->dispatcher, f)) return false; + return true; } -// Pops a stack frame, returning a pointer for where the next submsg should -// end (or a pointer that is out of range for a group). -static const void *pop(upb_decoder *d, const uint8_t *start, upb_status *status) +static bool upb_pop(upb_decoder *d, const uint8_t *start, upb_status *status) { d->top--; upb_dispatch_endsubmsg(&d->dispatcher); - return get_msgend(d); + *submsg_end = get_msgend(d); + return true; } void upb_decoder_run(upb_src *src, upb_status *status) { @@ -278,11 +226,13 @@ void upb_decoder_run(upb_src *src, upb_status *status) { upb_msgdef *msgdef = d->top->msgdef; upb_string *str = NULL; + upb_dispatch_startmsg(&d->dispatcher); + // Main loop: executed once per tag/field pair. while(1) { // Parse/handle tag. upb_tag tag; - CHECK(decode_tag(d, &buf, &end, &tag)); + CHECK(upb_decode_tag(d, &buf, &end, &tag)); // Decode wire data. Hopefully this branch will predict pretty well // since most types will read a varint here. @@ -290,24 +240,19 @@ void upb_decoder_run(upb_src *src, upb_status *status) { switch (tag.wire_type) { case UPB_WIRE_TYPE_END_GROUP: if(!isgroup(submsg_end)) { - upb_seterr(status, UPB_STATUS_ERROR, "End group seen but current " - "message is not a group, byte offset: %zd", - d->completed_offset + (completed - start)); + upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } - submsg_end = pop(d, start, status, &msgdef); - completed = buf; - goto check_msgend; + CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); + goto check_msgend; // We have no value to dispatch. case UPB_WIRE_TYPE_VARINT: case UPB_WIRE_TYPE_DELIMITED: // For the delimited case we are parsing the length. CHECK(upb_decode_varint(d, &buf, &end, &val)); break; case UPB_WIRE_TYPE_32BIT: - CHECK(upb_decode_32bit(d, &buf, &end, &val)); - break; case UPB_WIRE_TYPE_64BIT: - CHECK(upb_decode_64bit(d, &buf, &end, &val)); + CHECK(upb_decode_fixed(d, tag.wire_type, &buf, &end, &val)); break; } @@ -315,24 +260,31 @@ void upb_decoder_run(upb_src *src, upb_status *status) { upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number); if (!f) { - // Unknown field. + if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED) + CHECK(upb_decode_string(d, &val, &str)); + CHECK(upb_dispatch_unknownval(d, tag.field_number, val)); } else if (!upb_check_type(tag.wire_type, f->type)) { - // Field has incorrect type. + // TODO: put more details in this error msg. + upb_seterr(status, UPB_ERROR, "Field had incorrect type."); + goto err; } // Perform any further massaging of the data now that we have the fielddef. // Now we can distinguish strings from submessages, and we know about // zig-zag-encoded types. // TODO: handle packed encoding. + // TODO: if we were being paranoid, we could check for 32-bit-varint types + // that the top 32 bits all match the highest bit of the low 32 bits. + // If this is not true we are losing data. But the main protobuf library + // doesn't check this, and it would slow us down, so pass for now. switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK(push(d, start, upb_value_getint32(val), f, status, &msgdef)); - goto check_msgend; + CHECK(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); + goto check_msgend; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): - CHECK(upb_decode_string(d, str, upb_value_getint32(val))); - upb_value_setstr(&val, str); + CHECK(upb_decode_string(d, &val, &str)); break; case UPB_TYPE(SINT32): upb_value_setint32(&val, upb_zzdec_32(upb_value_getint32(val))); @@ -341,26 +293,27 @@ void upb_decoder_run(upb_src *src, upb_status *status) { upb_value_setint64(&val, upb_zzdec_64(upb_value_getint64(val))); break; default: - // Other types need no further processing at this point. + break; // Other types need no further processing at this point. } CHECK(upb_dispatch_value(d->sink, f, val, status)); check_msgend: while(buf >= submsg_end) { if(buf > submsg_end) { - upb_seterr(status, UPB_ERROR, "Expected submsg end offset " - "did not lie on a tag/value boundary."); + upb_seterr(status, UPB_ERROR, "Bad submessage end.") goto err; } - submsg_end = pop(d, start, status, &msgdef); + CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); } - completed = buf; } + CHECK(upb_dispatch_endmsg(&d->dispatcher)); + return; + err: - read = (char*)completed - (char*)start; - d->completed_offset += read; - return read; + if (upb_ok(status)) { + upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK"); + } } void upb_decoder_sethandlers(upb_src *src, upb_handlers *handlers) { -- cgit v1.2.3 From fe659c8c93c464fcbcfb5739935a2e4341d01fd4 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 23 Jan 2011 18:59:31 -0800 Subject: Getting closer to a decoder that could actually compile and work. --- core/upb_stream.h | 7 +- core/upb_string.h | 6 ++ stream/upb_decoder.c | 207 +++++++++++++++++++++++++++------------------------ 3 files changed, 119 insertions(+), 101 deletions(-) (limited to 'core/upb_string.h') diff --git a/core/upb_stream.h b/core/upb_stream.h index 54fd930..bf312a8 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -40,8 +40,11 @@ typedef enum { UPB_CONTINUE, // Stop processing for now; check status for details. If no status was set, - // a generic error will be returned. If the error is resumable, processing - // will resume by delivering this callback again. + // a generic error will be returned. If the error is resumable, it is not + // (yet) defined where processing will resume -- waiting for real-world + // examples of resumable decoders and resume-requiring clients. upb_src + // implementations that are not capable of resuming will override the return + // status to be non-resumable if a resumable status was set by the handlers. UPB_BREAK, // Skips to the end of the current submessage (or if we are at the top diff --git a/core/upb_string.h b/core/upb_string.h index 04c0ae9..1a7e06b 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -134,6 +134,12 @@ INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } INLINE void upb_string_endread(upb_string *str) { (void)str; } +// Convenience method for getting the end of the string. Calls +// upb_string_getrobuf() so inherits the caveats of calling that function. +INLINE const char *upb_string_getbufend(upb_string *str) { + return upb_string_getrobuf(str) + upb_string_len(str); +} + // Attempts to recycle the string "str" so it may be reused and have different // data written to it. After the function returns, "str" points to a writable // string, which is either the original string if it had no other references diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index fbd7eba..9a17451 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -13,23 +13,24 @@ /* Pure Decoding **************************************************************/ -// The key fast-path varint-decoding routine. There are a lot of possibilities -// for optimization/experimentation here. -INLINE bool upb_decode_varint_fast(uint8_t **buf, uint8_t *end, uint64_t &val, +// The key fast-path varint-decoding routine. Here we can assume we have at +// least UPB_MAX_ENCODED_SIZE bytes available. There are a lot of +// possibilities for optimization/experimentation here. +INLINE bool upb_decode_varint_fast(uint8_t **ptr, uint64_t &val, upb_status *status) { *high = 0; uint32_t b; uint8_t *ptr = p->ptr; - b = *(*buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(*buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(*buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(*buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(*buf++); *low |= (b & 0x7f) << 28; + b = *(*ptr++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(*ptr++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(*ptr++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(*ptr++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(*ptr++); *low |= (b & 0x7f) << 28; *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(*buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(*buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(*buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(*buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + b = *(*ptr++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(*ptr++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(*ptr++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(*ptr++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; upb_seterr(status, UPB_ERROR, "Unterminated varint"); return false; @@ -71,23 +72,51 @@ struct upb_decoder { // Current input buffer. upb_string *buf; - // Our current offset *within* buf. - upb_strlen_t buf_offset; - // The offset within the overall stream represented by the *beginning* of buf. upb_strlen_t buf_stream_offset; }; // Called only from the slow path, this function copies the next "len" bytes -// from the stream to "data", adjusting "buf" and "end" appropriately. -INLINE bool upb_getbuf(upb_decoder *d, void *data, size_t len, - uint8_t **buf, uint8_t **end) { - while (len > 0) { - memcpy(data, *buf, *end-*buf); - len -= (*end-*buf); - if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; - *buf = upb_string_getrobuf(d->buf); - *end = *buf + upb_string_len(d->buf); +// from the stream to "data", adjusting "buf" and "len" appropriately. +static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, + uint8_t **ptr, size_t *len) { + while (1) { + memcpy(data, *ptr, *len); + bytes_wanted -= *len; + *ptr += *len; + if (bytes_wanted == 0) return true; + + // Did "len" indicate end-of-submessage or end-of-buffer? + size_t buf_offset = d->buf ? (*ptr - upb_string_getrobuf(d->buf)) : 0; + if (d->top->end_offset > 0 && + d->top->end_offset == d->buf_stream_offset + buf_offset) { + // End-of-submessage. + if (bytes_wanted > 0) { + upb_seterr(d->status, UPB_ERROR, "Bad submessage end.") + return false; + } + if (upb_pop(d) != UPB_CONTINUE) return false; + } else { + // End-of-buffer. + if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); + if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; + *ptr = upb_string_getrobuf(d->buf); + } + + // Wait for end-of-submessage or end-of-buffer, whichever comes first. + size_t offset_in_buf = *ptr - upb_string_getrobuf(d->buf); + size_t buf_remaining = upb_string_getbufend(d->buf) - *ptr; + size_t submsg_remaining = + d->top->end_offset - d->buf_stream_offset - offset_in_buf; + if (d->top->end_offset == UPB_GROUP_END_OFFSET || + buf_remaining > submsg_remaining) { + *len = buf_remaining; + } else { + // Check that non of our subtraction overflowed. + assert(d->top->end_offset > d->buf_stream_offset); + assert(d->top->end_offset - d->buf_stream_offset > offset_in_buf); + *len = submsg_remaining; + } } } @@ -112,21 +141,21 @@ static bool upb_decode_varint_slow(upb_decoder *d) { } } -INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_buf, - const uint8_t **end, upb_tag *tag) { - const uint8_t *buf = *_buf, *end = *_end; +INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_ptr, + const uint8_t **len, upb_tag *tag) { + const uint8_t *ptr = *_ptr, *len = *_end; uint32_t tag_int; // Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048). - if (end - buf < 2) goto slow; // unlikely. - tag_int = *buf & 0x7f; - if ((*(buf++) & 0x80) == 0) goto done; // predictable if fields are in order - tag_int |= (*buf & 0x7f) << 7; - if ((*(buf++) & 0x80) != 0) goto slow; // unlikely. + if (len - ptr < 2) goto slow; // unlikely. + tag_int = *ptr & 0x7f; + if ((*(ptr++) & 0x80) == 0) goto done; // predictable if fields are in order + tag_int |= (*ptr & 0x7f) << 7; + if ((*(ptr++) & 0x80) != 0) goto slow; // unlikely. slow: - if (!upb_decode_varint_slow(d, _buf, _end)) return false; - buf = *_buf; // Trick the next line into not overwriting us. + if (!upb_decode_varint_slow(d, _ptr, _end)) return false; + ptr = *_ptr; // Trick the next line into not overwriting us. done: - *_buf = buf; + *_ptr = ptr; tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); tag->field_number = tag_int >> 3; return true; @@ -134,22 +163,22 @@ done: INLINE bool upb_decode_varint(upb_decoder *d, ptrs *p, uint32_t *low, uint32_t *high) { - if (p->end - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) + if (p->len - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) return upb_decode_varint_fast(d); else return upb_decode_varint_slow(d); } INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, - uint8_t **buf, uint8_t **end, upb_value *val) { + uint8_t **ptr, uint8_t **len, upb_value *val) { static const char table = {0, 8, 0, 0, 0, 4}; size_t bytes = table[wt]; - if (*end - *buf >= bytes) { + if (*len - *ptr >= bytes) { // Common (fast) case. - memcpy(&val, *buf, bytes); - *buf += bytes; + memcpy(&val, *ptr, bytes); + *ptr += bytes; } else { - if (!upb_getbuf(d, &val, bytes, buf, end)) return false; + if (!upb_getptr(d, &val, bytes, ptr, len)) return false; } return true; } @@ -159,12 +188,12 @@ INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) { upb_string_recycle(str); upb_strlen_t len = upb_valu_getint32(*val); - if (*end - *buf >= len) { + if (*len - *ptr >= len) { // Common (fast) case. - upb_string_substr(*str, d->buf, *buf - upb_string_getrobuf(d->buf), len); - *buf += len; + upb_string_substr(*str, d->buf, *ptr - upb_string_getrobuf(d->buf), len); + *ptr += len; } else { - if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, buf, end)) + if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, ptr, len)) return false; } return true; @@ -173,19 +202,6 @@ INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) /* The main decoding loop *****************************************************/ -static const void *get_msgend(upb_decoder *d) -{ - if(d->top->end_offset > 0) - return upb_string_getrobuf(d->buf) + (d->top->end_offset - d->buf_stream_offset); - else - return (void*)UINTPTR_MAX; // group. -} - -static bool isgroup(const void *submsg_end) -{ - return submsg_end == (void*)UINTPTR_MAX; -} - extern upb_wire_type_t upb_expected_wire_types[]; // Returns true if wt is the correct on-the-wire type for ft. INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { @@ -193,76 +209,78 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { return upb_types[ft].expected_wire_type == wt; } -static bool upb_push(upb_decoder *d, const uint8_t *start, - uint32_t submsg_len, upb_fielddef *f, - upb_status *status) -{ +static upb_flow_t upb_push(upb_decoder *d, upb_fielddef *f, + upb_strlen_t submsg_len, upb_field_type_t type) { d->top->field = f; d->top++; if(d->top >= d->limit) { upb_seterr(status, UPB_ERROR, "Nesting too deep."); - return false; + return UPB_ERROR; } - d->top->end_offset = d->completed_offset + submsg_len; + d->top->end_offset = type == UPB_TYPE(GROUP) ? + UPB_GROUP_END_OFFSET : d->completed_offset + submsg_len; d->top->msgdef = upb_downcast_msgdef(f->def); - *submsg_end = get_msgend(d); - if (!upb_dispatch_startsubmsg(&d->dispatcher, f)) return false; - return true; + return upb_dispatch_startsubmsg(&d->dispatcher, f); } -static bool upb_pop(upb_decoder *d, const uint8_t *start, upb_status *status) -{ +static upb_flow_t upb_pop(upb_decoder *d) { d->top--; - upb_dispatch_endsubmsg(&d->dispatcher); - *submsg_end = get_msgend(d); - return true; + return upb_dispatch_endsubmsg(&d->dispatcher); } void upb_decoder_run(upb_src *src, upb_status *status) { - // buf is our current offset, moves from start to end. - const uint8_t *buf = (uint8_t*)upb_string_getrobuf(str) + d->buf_offset; - const uint8_t *end = (uint8_t*)upb_string_getrobuf(str) + upb_string_len(str); - const uint8_t *submsg_end = get_msgend(d, start); - upb_msgdef *msgdef = d->top->msgdef; + // We use stack variables for our frequently used vars so the compiler knows + // they can't be changed by external code (like when we dispatch a callback). + + // Our current position in the data buffer. + uint8_t *ptr = NULL; + // Number of bytes available at ptr, until either end-of-buf or + // end-of-submessage (whichever is smaller). + size_t len = 0; + upb_string *str = NULL; - upb_dispatch_startmsg(&d->dispatcher); +// TODO: handle UPB_SKIPSUBMSG +#define CHECK_FLOW(expr) if ((expr) != UPB_CONTINUE) goto err +#define CHECK(expr) if (!expr) goto err; + + CHECK_FLOW(upb_dispatch_startmsg(&d->dispatcher)); // Main loop: executed once per tag/field pair. while(1) { // Parse/handle tag. upb_tag tag; - CHECK(upb_decode_tag(d, &buf, &end, &tag)); + CHECK(upb_decode_tag(d, &ptr, &len, &tag)); // Decode wire data. Hopefully this branch will predict pretty well // since most types will read a varint here. upb_value val; switch (tag.wire_type) { case UPB_WIRE_TYPE_END_GROUP: - if(!isgroup(submsg_end)) { + if(d->top->end_offset != UPB_GROUP_END_OFFSET) upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } - CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); - goto check_msgend; // We have no value to dispatch. + CHECK_FLOW(upb_pop(d)); + continue; // We have no value to dispatch. case UPB_WIRE_TYPE_VARINT: case UPB_WIRE_TYPE_DELIMITED: // For the delimited case we are parsing the length. - CHECK(upb_decode_varint(d, &buf, &end, &val)); + CHECK(upb_decode_varint(d, &ptr, &len, &val)); break; case UPB_WIRE_TYPE_32BIT: case UPB_WIRE_TYPE_64BIT: - CHECK(upb_decode_fixed(d, tag.wire_type, &buf, &end, &val)); + CHECK(upb_decode_fixed(d, tag.wire_type, &ptr, &len, &val)); break; } // Look up field by tag number. - upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number); + upb_fielddef *f = upb_msg_itof(d->top->msgdef, tag.field_number); if (!f) { if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED) CHECK(upb_decode_string(d, &val, &str)); - CHECK(upb_dispatch_unknownval(d, tag.field_number, val)); + CHECK_FLOW(upb_dispatch_unknownval(d, tag.field_number, val)); } else if (!upb_check_type(tag.wire_type, f->type)) { // TODO: put more details in this error msg. upb_seterr(status, UPB_ERROR, "Field had incorrect type."); @@ -280,8 +298,8 @@ void upb_decoder_run(upb_src *src, upb_status *status) { switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); - goto check_msgend; // We have no value to dispatch. + CHECK_FLOW(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); + continue; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): CHECK(upb_decode_string(d, &val, &str)); @@ -295,19 +313,10 @@ void upb_decoder_run(upb_src *src, upb_status *status) { default: break; // Other types need no further processing at this point. } - CHECK(upb_dispatch_value(d->sink, f, val, status)); - -check_msgend: - while(buf >= submsg_end) { - if(buf > submsg_end) { - upb_seterr(status, UPB_ERROR, "Bad submessage end.") - goto err; - } - CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); - } + CHECK_FLOW(upb_dispatch_value(d->sink, f, val, status)); } - CHECK(upb_dispatch_endmsg(&d->dispatcher)); + CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); return; err: -- cgit v1.2.3 From fbb9fd35e05b88908beeca2c2b88b15aec1fca01 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 28 Jan 2011 10:11:48 -0800 Subject: Improve comments in headers, to better explain core interfaces. --- core/upb_def.h | 9 ++-- core/upb_stream.h | 123 ++++++++++++++++++++++++++++++++----------------- core/upb_stream_vtbl.h | 2 +- core/upb_string.h | 7 ++- 4 files changed, 91 insertions(+), 50 deletions(-) (limited to 'core/upb_string.h') diff --git a/core/upb_def.h b/core/upb_def.h index d9bab97..e95aec3 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -1,17 +1,18 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * Copyright (c) 2009-2011 Joshua Haberman. See LICENSE for details. * - * Provides definitions of .proto constructs: + * Provides a mechanism for loading proto definitions from descriptors, and + * data structures to represent those definitions. These form the protobuf + * schema, and are used extensively throughout upb: * - upb_msgdef: describes a "message" construct. * - upb_fielddef: describes a message field. * - upb_enumdef: describes an enum. * (TODO: definitions of extensions and services). * * Defs are obtained from a upb_symtab object. A upb_symtab is empty when - * constructed, and definitions can be added by supplying serialized - * descriptors. + * constructed, and definitions can be added by supplying descriptors. * * Defs are immutable and reference-counted. Symbol tables reference any defs * that are the "current" definitions. If an extension is loaded that adds a diff --git a/core/upb_stream.h b/core/upb_stream.h index d0045cc..09e4025 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -1,23 +1,46 @@ /* * upb - a minimalist implementation of protocol buffers. * - * This file defines four general-purpose streaming interfaces for protobuf - * data or bytes: + * This file defines four general-purpose streaming data interfaces. * - * - upb_src: pull interface for protobuf data. - * - upb_sink: push interface for protobuf data. - * - upb_bytesrc: pull interface for bytes. - * - upb_bytesink: push interface for bytes. + * - upb_handlers: represents a set of callbacks, very much like in XML's SAX + * API, that a client can register to do a streaming tree traversal over a + * stream of structured protobuf data, without knowing where that data is + * coming from. There is only one upb_handlers type (it is not a virtual + * base class), but the object lets you register any set of handlers. * - * These interfaces are used as general-purpose glue in upb. For example, the - * decoder interface works by implementing a upb_src and calling a upb_bytesrc. + * The upb_handlers interface supports delegation: when entering a submessage, + * you can delegate to another set of upb_handlers instead of handling the + * submessage yourself. This allows upb_handlers objects to *compose* -- you + * can implement a set of upb_handlers without knowing or caring whether this + * is the top-level message or not. * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * The other interfaces are the C equivalent of "virtual base classes" that + * anyone can implement: + * + * - upb_src: an interface that represents a source of streaming protobuf data. + * It lets you register a set of upb_handlers, and then call upb_src_run(), + * which pulls the protobuf data from somewhere and then calls the handlers. + * + * - upb_bytesrc: a pull interface for streams of bytes, basically an + * abstraction of read()/fread(), but it avoids copies where possible. + * + * - upb_bytesink: push interface for streams of bytes, basically an + * abstraction of write()/fwrite(), but it avoids copies where possible. + * + * All of the encoders and decoders are based on these generic interfaces, + * which lets you write streaming algorithms that do not depend on a specific + * serialization format; for example, you can write a pretty printer that works + * with input that came from protobuf binary format, protobuf text format, or + * even an in-memory upb_msg -- the pretty printer will not know the + * difference. + * + * Copyright (c) 2010-2011 Joshua Haberman. See LICENSE for details. * */ -#ifndef UPB_SRCSINK_H -#define UPB_SRCSINK_H +#ifndef UPB_STREAM_H +#define UPB_STREAM_H #include "upb.h" @@ -53,8 +76,10 @@ typedef enum { // When returned from a startsubmsg handler, indicates that the submessage // should be handled by a different set of handlers, which have been - // registered on the provided upb_handlers object. May not be returned - // from any other callback. + // registered on the provided upb_handlers object. This allows upb_handlers + // objects to compose; a set of upb_handlers need not know whether it is the + // top-level message or a sub-message. May not be returned from any other + // callback. UPB_DELEGATE, } upb_flow_t; @@ -105,9 +130,19 @@ typedef upb_flow_t (*upb_unknownval_handler_t)(void *closure, // // static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum, // upb_value val) { -// Called with an unknown value is encountered. +// // Called with an unknown value is encountered. // return UPB_CONTINUE; // } +// +// // Any handlers you don't need can be set to NULL. +// static upb_handlerset handlers = { +// startmsg, +// endmsg, +// value, +// startsubmsg, +// endsubmsg, +// unknownval, +// }; typedef struct { upb_startmsg_handler_t startmsg; upb_endmsg_handler_t endmsg; @@ -128,26 +163,12 @@ INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); // from automatically being converted to strings in the value callback. // INLINE void upb_handlers_use_bytesrcs(bool use_bytesrcs); -// The closure will be passed to every handler. The status will be used -// only immediately after a handler has returned UPB_STOP. +// The closure will be passed to every handler. The status will be read by the +// upb_src immediately after a handler has returned UPB_BREAK and used as the +// overall upb_src status; it will not be referenced at any other time. INLINE void upb_set_handler_closure(upb_handlers *h, void *closure, upb_status *status); -// An object that transparently handles delegation so that the caller needs -// only follow the protocol as if delegation did not exist. -struct _upb_dispatcher; -typedef struct _upb_dispatcher upb_dispatcher; -INLINE void upb_dispatcher_init(upb_dispatcher *d); -INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); -INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); -INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d); -INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f); -INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); -INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, - upb_value val); -INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, - upb_field_number_t fieldnum, upb_value val); - /* upb_src ********************************************************************/ @@ -171,6 +192,24 @@ INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); INLINE void upb_src_run(upb_src *src, upb_status *status); +// A convenience object that a upb_src can use to invoke handlers. It +// transparently handles delegation so that the upb_src needs only follow the +// protocol as if delegation did not exist. +struct _upb_dispatcher; +typedef struct _upb_dispatcher upb_dispatcher; +INLINE void upb_dispatcher_init(upb_dispatcher *d); +INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); +INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, + struct _upb_fielddef *f); +INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, + upb_value val); +INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, + upb_field_number_t fieldnum, + upb_value val); + /* upb_bytesrc ****************************************************************/ // Reads up to "count" bytes into "buf", returning the total number of bytes @@ -178,16 +217,16 @@ INLINE void upb_src_run(upb_src *src, upb_status *status); INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); -// Like upb_bytesrc_read(), but modifies "str" in-place. "str" MUST be newly -// created or just recycled. Returns "false" if no data was returned, either -// due to error or EOF (check status for details). +// Like upb_bytesrc_read(), but modifies "str" in-place. Caller must ensure +// that "str" is created or just recycled. Returns "false" if no data was +// returned, either due to error or EOF (check status for details). // // In comparison to upb_bytesrc_read(), this call can possibly alias existing // string data (which avoids a copy). On the other hand, if the data was *not* // already in an existing string, this copies it into a upb_string, and if the // data needs to be put in a specific range of memory (because eg. you need to // put it into a different kind of string object) then upb_bytesrc_get() could -// be better. +// save you a copy. INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, upb_status *status); @@ -206,15 +245,13 @@ INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, struct _upb_bytesink; typedef struct _upb_bytesink upb_bytesink; -// Writes up to "count" bytes from "buf", returning the total number of bytes -// written. If <0, indicates error (check upb_bytesink_status() for details). -INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, - upb_strlen_t count); +INLINE bool upb_bytesink_printf(upb_bytesink *sink, const char *fmt, ...); -// Puts the given string, which may alias the string data (which avoids a -// copy). Returns the number of bytes that were actually, consumed, which may -// be fewer than were in the string, or <0 on error. -INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str); +// Puts the given string, returning true if the operation was successful, otherwise +// check "status" for details. Ownership of the string is *not* passed; if +// the callee wants a reference he must call upb_string_getref() on it. +INLINE bool upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, + upb_status *status); // Returns the current error status for the stream. INLINE upb_status *upb_bytesink_status(upb_bytesink *sink); diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index ddefba9..ef655fd 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -139,7 +139,7 @@ INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, return sink->vtbl->write(sink, buf, count); } -INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str) { +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { return sink->vtbl->putstr(sink, str); } diff --git a/core/upb_string.h b/core/upb_string.h index 1a7e06b..7d0ae87 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -9,7 +9,9 @@ * The overriding goal of upb_string is to avoid memcpy(), malloc(), and free() * wheverever possible, while keeping both CPU and memory overhead low. * Throughout upb there are situations where one wants to reference all or part - * of another string without copying. upb_string provides APIs for doing this. + * of another string without copying. upb_string provides APIs for doing this, + * and allows the referenced string to be kept alive for as long as anyone is + * referencing it. * * Characteristics of upb_string: * - strings are reference-counted. @@ -22,7 +24,8 @@ * Reference-counted strings have recently fallen out of favor because of the * performance impacts of doing thread-safe reference counting with atomic * operations. We side-step this issue by not performing atomic operations - * unless the string has been marked thread-safe. + * unless the string has been marked thread-safe. Time will tell whether this + * scheme is easy and convenient enough to be practical. * * Strings are expected to be 8-bit-clean, but "char*" is such an entrenched * idiom that we go with it instead of making our pointers uint8_t*. -- cgit v1.2.3