From 4667ed4be921b2142321e47c8ccc6a35a9189277 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 6 Feb 2011 08:21:47 -0800 Subject: All tests pass again, valgrind-clean! Next up: benchmarks. --- Makefile | 19 +++++++------ core/upb_def.c | 40 +++++++++++++++++++++------- core/upb_def.h | 24 ++++++++++++----- core/upb_glue.c | 16 +++++++++++ core/upb_glue.h | 6 ++++- core/upb_msg.h | 4 +-- stream/upb_decoder.c | 11 ++++---- tests/test_vs_proto2.cc | 1 + tests/tests.c | 71 +++++++++++++++++++------------------------------ 9 files changed, 117 insertions(+), 75 deletions(-) diff --git a/Makefile b/Makefile index a96c3f7..18c33d4 100644 --- a/Makefile +++ b/Makefile @@ -118,17 +118,20 @@ tests/test.proto.pb: tests/test.proto # TODO: replace with upbc protoc tests/test.proto -otests/test.proto.pb -TESTS=tests/test_string \ - tests/test_table \ - tests/test_def \ - tests/test_stream \ - tests/t.test_vs_proto2.googlemessage1 \ - tests/t.test_vs_proto2.googlemessage2 \ +TESTS= \ + tests/test_string \ + tests/test_table \ + tests/test_def \ + tests/test_stream \ + tests/t.test_vs_proto2.googlemessage1 \ + tests/t.test_vs_proto2.googlemessage2 \ + tests/test.proto.pb \ + tests/tests + # tests/test_decoder \ -# tests/test.proto.pb + tests: $(LIBUPB) $(TESTS) -OTHER_TESTS=tests/tests \ $(TESTS): $(LIBUPB) VALGRIND=valgrind --leak-check=full --error-exitcode=1 diff --git a/core/upb_def.c b/core/upb_def.c index 298ede1..8abb8bc 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -5,10 +5,13 @@ */ #include +#include #include "descriptor_const.h" #include "descriptor.h" #include "upb_def.h" +#define alignof(t) offsetof(struct { char c; t x; }, x) + /* Rounds p up to the next multiple of t. */ static size_t upb_align_up(size_t val, size_t align) { return val % align == 0 ? val : val + align - (val % align); @@ -24,7 +27,7 @@ static int upb_div_round_up(int numerator, int denominator) { * join("", "Baz") -> "Baz" * Caller owns a ref on the returned string. */ static upb_string *upb_join(upb_string *base, upb_string *name) { - if (upb_string_len(base) == 0) { + if (!base || upb_string_len(base) == 0) { return upb_string_getref(name); } else { return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT, @@ -725,14 +728,22 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) { // together. f->field_index = i; + size_t size, align; + if (upb_isarray(f)) { + size = sizeof(void*); + align = alignof(void*); + } else { + size = type_info->size; + align = type_info->align; + } // General alignment rules are: each member must be at an address that is a // multiple of that type's alignment. Also, the size of the structure as a // whole must be a multiple of the greatest alignment of any member. - size_t offset = upb_align_up(m->size, type_info->align); + size_t offset = upb_align_up(m->size, align); // Offsets are relative to the end of the refcount. f->byte_offset = offset - sizeof(upb_atomic_refcount_t); - m->size = offset + type_info->size; - max_align = UPB_MAX(max_align, type_info->align); + m->size = offset + size; + max_align = UPB_MAX(max_align, align); } free(sorted_fields); @@ -1054,6 +1065,7 @@ upb_symtab *upb_symtab_new() upb_atomic_refcount_init(&s->refcount, 1); upb_rwlock_init(&s->lock); upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent)); + s->fds_msgdef = NULL; return s; } @@ -1304,12 +1316,7 @@ void upb_symtab_add_descriptorproto(upb_symtab *symtab) { // For the moment we silently decline to perform the operation if the symbols // already exist in the symtab. Revisit this when we have a better story // about whether syms in a table can be replaced. - upb_def *def = upb_symtab_lookup( - symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet")); - if(def) { - upb_def_unref(def); - return; - } + if(symtab->fds_msgdef) upb_def_unref(UPB_UPCAST(symtab->fds_msgdef)); upb_baredecoder *decoder = upb_baredecoder_new(&descriptor_str); upb_status status = UPB_STATUS_INIT; @@ -1323,5 +1330,18 @@ void upb_symtab_add_descriptorproto(upb_symtab *symtab) { upb_symtab_unref(symtab); abort(); } + upb_def *def = upb_symtab_lookup( + symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + if (!def || (symtab->fds_msgdef = upb_dyncast_msgdef(def)) == NULL) { + // upb itself is corrupt. + abort(); + } + upb_def_unref(def); // The symtab already holds a ref on it. upb_status_uninit(&status); } + +upb_msgdef *upb_symtab_fds_def(upb_symtab *s) { + assert(s->fds_msgdef != NULL); + upb_def_ref(UPB_UPCAST(s->fds_msgdef)); + return s->fds_msgdef; +} diff --git a/core/upb_def.h b/core/upb_def.h index e95aec3..34f31ec 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -254,11 +254,13 @@ INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) { // A SymbolTable is where upb_defs live. It is empty when first constructed. // Clients add definitions to the symtab by supplying unserialized or // serialized descriptors (as defined in descriptor.proto). -typedef struct { +struct _upb_symtab { upb_atomic_refcount_t refcount; upb_rwlock_t lock; // Protects all members except the refcount. upb_strtable symtab; // The symbol table. -} upb_symtab; + upb_msgdef *fds_msgdef; // Msgdef for google.protobuf.FileDescriptorSet. +}; +typedef struct _upb_symtab upb_symtab; // Initializes a upb_symtab. Contexts are not freed explicitly, but unref'd // when the caller is done with them. @@ -293,10 +295,13 @@ upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym); upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type); // "fds" is a upb_src that will yield data from the -// google.protobuf.FileDescriptorSet message type. upb_symtab_addfds() adds -// all the definitions from the given FileDescriptorSet and adds them to the -// symtab. status indicates whether the operation was successful or not, and -// the error message (if any). +// google.protobuf.FileDescriptorSet message type. It is not necessary that +// the upb_def for FileDescriptorSet came from this symtab, but it must be +// compatible with the official descriptor.proto, as published by Google. +// +// upb_symtab_addfds() adds all the definitions from the given +// FileDescriptorSet and adds them to the symtab. status indicates whether the +// operation was successful or not, and the error message (if any). // // TODO: should this allow redefinition? Either is possible, but which is // more useful? Maybe it should be an option. @@ -307,6 +312,13 @@ void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status); // specify other defs and allow them to be loaded. void upb_symtab_add_descriptorproto(upb_symtab *s); +// Returns the upb_msgdef for google.protobuf.FileDescriptorSet, which the +// caller owns a ref on. This is a convenience method that is equivalent to +// looking up the symbol called "google.protobuf.FileDescriptorSet" yourself, +// except that it only will return a def that was added by +// upb_symtab_add_descriptorproto(). +upb_msgdef *upb_symtab_fds_def(upb_symtab *s); + /* upb_def casts **************************************************************/ diff --git a/core/upb_glue.c b/core/upb_glue.c index 7540a3a..541827e 100644 --- a/core/upb_glue.c +++ b/core/upb_glue.c @@ -36,3 +36,19 @@ void upb_strtomsg(upb_string *str, upb_msg *msg, upb_msgdef *md, upb_msgpopulator_uninit(&p); upb_handlers_uninit(&h); } + +void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) { + upb_stringsrc strsrc; + upb_stringsrc_init(&strsrc); + upb_stringsrc_reset(&strsrc, str); + + upb_decoder d; + upb_msgdef *fds_msgdef = upb_symtab_fds_def(symtab); + upb_decoder_init(&d, fds_msgdef); + upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc)); + + upb_symtab_addfds(symtab, upb_decoder_src(&d), status); + upb_stringsrc_uninit(&strsrc); + upb_decoder_uninit(&d); + upb_def_unref(UPB_UPCAST(fds_msgdef)); +} diff --git a/core/upb_glue.h b/core/upb_glue.h index 61111f5..ca32436 100644 --- a/core/upb_glue.h +++ b/core/upb_glue.h @@ -25,16 +25,20 @@ extern "C" { // Forward-declares so we don't have to include everything in this .h file. // Clients should use the regular, typedef'd names (eg. upb_string). -struct _upb_string; struct _upb_msg; struct _upb_msgdef; struct _upb_status; +struct _upb_string; +struct _upb_symtab; // Decodes the given string, which must be in protobuf binary format, to the // given upb_msg with msgdef "md", storing the status of the operation in "s". void upb_strtomsg(struct _upb_string *str, struct _upb_msg *msg, struct _upb_msgdef *md, struct _upb_status *s); +void upb_parsedesc(struct _upb_symtab *symtab, struct _upb_string *str, + struct _upb_status *status); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/core/upb_msg.h b/core/upb_msg.h index 475b346..1318e08 100644 --- a/core/upb_msg.h +++ b/core/upb_msg.h @@ -88,7 +88,7 @@ INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { val.type = UPB_VALUETYPE_ARRAY; #endif break; - default: printf("type: %d\n", ft); assert(false); + default: assert(false); } return val; @@ -202,7 +202,7 @@ INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { } INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { - return upb_value_read(_upb_msg_getptr(msg, f), f->type); + return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f)); } // Unsets all field values back to their defaults. diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index b85abb9..588d553 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -26,11 +26,12 @@ INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; b = *(p++); low |= (b & 0x7f) << 28; - high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + high = (b & 0x7f) >> 4; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 3; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done; upb_seterr(status, UPB_ERROR, "Unterminated varint"); return false; diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc index 9633420..749eedf 100644 --- a/tests/test_vs_proto2.cc +++ b/tests/test_vs_proto2.cc @@ -27,6 +27,7 @@ void compare_arrays(const google::protobuf::Reflection *r, upb_msg *upb_msg, upb_fielddef *upb_f) { ASSERT(upb_msg_has(upb_msg, upb_f)); + ASSERT(upb_isarray(upb_f)); upb_array *arr = upb_value_getarr(upb_msg_get(upb_msg, upb_f)); ASSERT(upb_array_len(arr) == (upb_arraylen_t)r->FieldSize(proto2_msg, proto2_f)); for(upb_arraylen_t i = 0; i < upb_array_len(arr); i++) { diff --git a/tests/tests.c b/tests/tests.c index f96b5a4..17e00f3 100644 --- a/tests/tests.c +++ b/tests/tests.c @@ -5,6 +5,7 @@ #include #include "upb_decoder.c" #include "upb_def.h" +#include "upb_glue.h" int num_assertions = 0; #define ASSERT(expr) do { \ @@ -16,18 +17,13 @@ static void test_get_v_uint64_t() { #define TEST(name, bytes, val) {\ upb_status status = UPB_STATUS_INIT; \ - const uint8_t name[] = bytes; \ - const uint8_t *name ## _buf = name; \ + const char name[] = bytes; \ + const char *name ## _buf = name; \ uint64_t name ## _val = 0; \ - name ## _buf = upb_get_v_uint64_t(name, name + sizeof(name) - 1, &name ## _val, &status); \ + upb_decode_varint_fast(&name ## _buf, &name ## _val, &status); \ ASSERT(upb_ok(&status)); \ ASSERT(name ## _val == val); \ ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \ - /* Test NEED_MORE_DATA. */ \ - if(sizeof(name) > 2) { \ - name ## _buf = upb_get_v_uint64_t(name, name + sizeof(name) - 2, &name ## _val, &status); \ - ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA); \ - } \ } TEST(zero, "\x00", 0ULL); @@ -43,39 +39,18 @@ static void test_get_v_uint64_t() TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0x8303fdf9f1e1c181ULL); #undef TEST - uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01}; + char twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01}; + const char *twelvebyte_buf = twelvebyte; uint64_t twelvebyte_val = 0; upb_status status = UPB_STATUS_INIT; /* A varint that terminates before hitting the end of the provided buffer, * but in too many bytes (11 instead of 10). */ - upb_get_v_uint64_t(twelvebyte, twelvebyte + 12, &twelvebyte_val, &status); - ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT); - - /* A varint that terminates simultaneously with the end of the provided - * buffer, but in too many bytes (11 instead of 10). */ - upb_reset(&status); - upb_get_v_uint64_t(twelvebyte, twelvebyte + 11, &twelvebyte_val, &status); - ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT); - - /* A varint whose buffer ends on exactly the byte where the varint must - * terminate, but the final byte does not terminate. The absolutely most - * correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know - * by this point that the varint does not properly terminate. But we also - * allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not - * compromise overall correctness -- clients who supply more data later will - * then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no - * more data to supply will (rightly) conclude that their protobuf is corrupt. - */ - upb_reset(&status); - upb_get_v_uint64_t(twelvebyte, twelvebyte + 10, &twelvebyte_val, &status); - ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT || - status.code == UPB_STATUS_NEED_MORE_DATA); - - upb_reset(&status); - upb_get_v_uint64_t(twelvebyte, twelvebyte + 9, &twelvebyte_val, &status); - ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA); + upb_decode_varint_fast(&twelvebyte_buf, &twelvebyte_val, &status); + ASSERT(status.code == UPB_ERROR); + upb_status_uninit(&status); } +#if 0 static void test_get_v_uint32_t() { #define TEST(name, bytes, val) {\ @@ -226,35 +201,45 @@ static void test_get_f_uint32_t() #undef TEST } +#endif static void test_upb_symtab() { upb_symtab *s = upb_symtab_new(); + upb_symtab_add_descriptorproto(s); ASSERT(s); - upb_strptr descriptor = upb_strreadfile("tests/test.proto.pb"); - if(upb_string_isnull(descriptor)) { + upb_string *descriptor = upb_strreadfile("tests/test.proto.pb"); + if(!descriptor) { fprintf(stderr, "Couldn't read input file tests/test.proto.pb\n"); exit(1); } upb_status status = UPB_STATUS_INIT; - upb_symtab_add_desc(s, descriptor, &status); + upb_parsedesc(s, descriptor, &status); ASSERT(upb_ok(&status)); + upb_status_uninit(&status); upb_string_unref(descriptor); // Test cycle detection by making a cyclic def's main refcount go to zero // and then be incremented to one again. - upb_strptr symname = upb_strdupc("A"); + upb_string *symname = upb_strdupc("A"); upb_def *def = upb_symtab_lookup(s, symname); upb_string_unref(symname); ASSERT(def); upb_symtab_unref(s); upb_msgdef *m = upb_downcast_msgdef(def); - upb_fielddef *f = &m->fields[0]; + upb_msg_iter i = upb_msg_begin(m); + upb_fielddef *f = upb_msg_iter_field(i); ASSERT(upb_hasdef(f)); upb_def *def2 = f->def; + + i = upb_msg_next(m, i); + ASSERT(upb_msg_done(i)); // "A" should only have one field. + ASSERT(upb_downcast_msgdef(def2)); upb_def_ref(def2); upb_def_unref(def); upb_def_unref(def2); + + } @@ -268,9 +253,9 @@ int main() } while (0) TEST(test_get_v_uint64_t); - TEST(test_get_v_uint32_t); - TEST(test_skip_v_uint64_t); - TEST(test_get_f_uint32_t); + //TEST(test_get_v_uint32_t); + //TEST(test_skip_v_uint64_t); + //TEST(test_get_f_uint32_t); TEST(test_upb_symtab); printf("All tests passed (%d assertions).\n", num_assertions); return 0; -- cgit v1.2.3