From fd184f0df2e5e428873eadfaf1ae829d2e4d8e51 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 22 Feb 2011 01:54:31 -0800 Subject: Major work on Lua extension and default values. Default values are now supported, and the Lua extension can now create and modify individual protobuf objects. --- Makefile | 8 +- lang_ext/lua/test.lua | 17 +++ lang_ext/lua/upb.c | 330 ++++++++++++++++++++++++++++++++++++++++-------- src/descriptor.h | 26 ---- src/upb_decoder.c | 126 +++--------------- src/upb_decoder_x64.asm | 4 +- src/upb_def.c | 166 +++++++++++++++++++++++- src/upb_def.h | 16 ++- src/upb_msg.c | 16 +++ src/upb_msg.h | 47 ++++++- src/upb_string.c | 13 +- src/upb_string.h | 10 ++ src/upbc.c | 1 - tests/test_vs_proto2.cc | 18 +-- tests/tests.c | 208 +----------------------------- 15 files changed, 592 insertions(+), 414 deletions(-) delete mode 100644 src/descriptor.h diff --git a/Makefile b/Makefile index d9f8008..13ce46a 100644 --- a/Makefile +++ b/Makefile @@ -94,6 +94,7 @@ TESTS_SRC= \ tests/test_stream.c \ tests/test_string.c \ tests/tests.c \ + tests/tests_varint.c \ tests/test_vs_proto2.cc ALLSRC=$(CORE) $(STREAM) $(BENCHMARKS_SRC) $(TESTS_SRC) @@ -138,11 +139,11 @@ $(LIBUPB_PIC): $(PICOBJ) # critical path but gets very large when -O3 is used. src/upb_def.o: src/upb_def.c $(E) CC $< - $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< + $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< src/upb_def.lo: src/upb_def.c $(E) 'CC -fPIC' $< - $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC + $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< -fPIC src/upb_decoder_x64.o: src/upb_decoder_x64.asm $(E) NASM $< @@ -183,6 +184,7 @@ SIMPLE_TESTS= \ tests/test_string \ tests/test_def \ tests/test_stream \ + tests/test_varint \ tests/tests # tests/test_decoder \ @@ -202,7 +204,7 @@ tests/tests: tests/test.proto.pb $(SIMPLE_TESTS): % : %.c $(E) CC $< - $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $< + $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $< $(LIBUPB) VALGRIND=valgrind --leak-check=full --error-exitcode=1 test: tests diff --git a/lang_ext/lua/test.lua b/lang_ext/lua/test.lua index a49cebc..978fb11 100644 --- a/lang_ext/lua/test.lua +++ b/lang_ext/lua/test.lua @@ -18,3 +18,20 @@ symtab:parsedesc(f:read("*all")) for _, def in ipairs(symtab:getdefs(-1)) do print(def:name()) end + +SpeedMessage1 = symtab:lookup("benchmarks.SpeedMessage1") +print(SpeedMessage1:name()) + +msg = SpeedMessage1() +-- print(msg.field1) +-- print(msg.field129) +-- print(msg.field271) +-- print(msg.field15.field15) +-- print(msg.field1) +-- print(msg.field1) +-- msg.field1 = "YEAH BABY!" +-- print(msg.field1) +print(msg.field129) +msg.field129 = 5 +print(msg.field129) + diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c index bf1eb02..460ac86 100644 --- a/lang_ext/lua/upb.c +++ b/lang_ext/lua/upb.c @@ -7,9 +7,20 @@ */ #include +#include +#include #include "lauxlib.h" #include "upb_def.h" #include "upb_glue.h" +#include "upb_msg.h" + +static void lupb_msg_getorcreate(lua_State *L, upb_msg *msg, upb_msgdef *md); + +// All the def types share the same C layout, even though they are different Lua +// types with different metatables. +typedef struct { + upb_def *def; +} lupb_def; void lupb_pushstring(lua_State *L, upb_string *str) { lua_pushlstring(L, upb_string_getrobuf(str), upb_string_len(str)); @@ -30,21 +41,17 @@ void lupb_checkstatus(lua_State *L, upb_status *s) { upb_status_uninit(s); } + /* object cache ***************************************************************/ // We cache all the lua objects (userdata) we vend in a weak table, indexed by // the C pointer of the object they are caching. -typedef void (*lupb_cb)(void *cobj); - -static void lupb_nop(void *foo) { - (void)foo; -} - -static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type, - lupb_cb ref, lupb_cb unref) { +static void *lupb_cache_getorcreate_size( + lua_State *L, void *cobj, const char *type, size_t size) { // Lookup our cache in the registry (we don't put our objects in the registry // directly because we need our cache to be a weak table). + void **obj = NULL; lua_getfield(L, LUA_REGISTRYINDEX, "upb.objcache"); assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb. lua_pushlightuserdata(L, cobj); @@ -55,7 +62,7 @@ static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type, lua_pop(L, 1); // We take advantage of the fact that all of our objects are currently a // single pointer, and thus have the same layout. - void **obj = lua_newuserdata(L, sizeof(void*)); + obj = lua_newuserdata(L, size); *obj = cobj; luaL_getmetatable(L, type); assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb. @@ -65,44 +72,235 @@ static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type, lua_pushlightuserdata(L, cobj); lua_pushvalue(L, -2); lua_rawset(L, -4); - ref(cobj); - } else { - unref(cobj); } lua_insert(L, -2); lua_pop(L, 1); + return obj; } +// Most types are just 1 pointer and can use this helper. +static bool lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type) { + return lupb_cache_getorcreate_size(L, cobj, type, sizeof(void*)) != NULL; +} -/* lupb_def *******************************************************************/ -// All the def types share the same C layout, even though they are different Lua -// types with different metatables. +/* lupb_msg********************************************************************/ + +// We prefer field access syntax (foo.bar, foo.bar = 5) over method syntax +// (foo:bar(), foo:set_bar(5)) to make messages behave more like regular tables. +// However, there are methods also, like foo:CopyFrom(other_foo) or foo:Clear(). + typedef struct { - upb_def *def; -} lupb_def; + upb_msg *msg; + upb_msgdef *msgdef; +} lupb_msg; -static void lupb_def_unref(void *cobj) { - upb_def_unref((upb_def*)cobj); +static lupb_msg *lupb_msg_check(lua_State *L, int narg) { + return luaL_checkudata(L, narg, "upb.msg"); } -static void lupb_def_getorcreate(lua_State *L, upb_def *def) { - const char *type_name; - switch(def->type) { - case UPB_DEF_MSG: - type_name = "upb.msgdef"; +static void lupb_msg_pushnew(lua_State *L, upb_msgdef *md) { + upb_msg *msg = upb_msg_new(md); + lupb_msg *m = lupb_cache_getorcreate_size(L, msg, "upb.msg", sizeof(lupb_msg)); + assert(m); + m->msgdef = md; + // We need to ensure that the msgdef outlives the msg. This performs an + // atomic ref, if this turns out to be too expensive there are other + // possible approaches, like creating a separate metatable for every + // msgdef that references the msgdef. + upb_msgdef_ref(md); +} + +// Caller does *not* pass a ref. +static void lupb_msg_getorcreate(lua_State *L, upb_msg *msg, upb_msgdef *md) { + lupb_msg *m = lupb_cache_getorcreate_size(L, msg, "upb.msg", sizeof(lupb_msg)); + if (m) { + // New Lua object, we need to ref the message. + m->msg = upb_msg_getref(msg); + m->msgdef = md; + // See comment above. + upb_msgdef_ref(md); + } +} + +static int lupb_msg_gc(lua_State *L) { + lupb_msg *m = lupb_msg_check(L, 1); + upb_msg_unref(m->msg, m->msgdef); + upb_msgdef_unref(m->msgdef); + return 0; +} + +static void lupb_pushvalue(lua_State *L, upb_value val, upb_fielddef *f) { + switch (f->type) { + case UPB_TYPE(INT32): + case UPB_TYPE(SINT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(ENUM): + lua_pushnumber(L, upb_value_getint32(val)); break; + case UPB_TYPE(INT64): + case UPB_TYPE(SINT64): + case UPB_TYPE(SFIXED64): + lua_pushnumber(L, upb_value_getint64(val)); break; + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + lua_pushnumber(L, upb_value_getuint32(val)); break; + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + lua_pushnumber(L, upb_value_getuint64(val)); break; + case UPB_TYPE(DOUBLE): + lua_pushnumber(L, upb_value_getdouble(val)); break; + case UPB_TYPE(FLOAT): + lua_pushnumber(L, upb_value_getfloat(val)); break; + case UPB_TYPE(BOOL): + lua_pushboolean(L, upb_value_getbool(val)); break; + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): { + upb_string *str = upb_value_getstr(val); + assert(str); + lua_pushlstring(L, upb_string_getrobuf(str), upb_string_len(str)); break; + } + case UPB_TYPE(MESSAGE): + case UPB_TYPE(GROUP): { + upb_msg *msg = upb_value_getmsg(val); + assert(msg); + lupb_msg_getorcreate(L, msg, upb_downcast_msgdef(f->def)); + } + } +} + +static upb_value lupb_getvalue(lua_State *L, int narg, upb_fielddef *f) { + upb_value val; + lua_Number num; + if (!upb_issubmsg(f) && !upb_isstring(f) && f->type != UPB_TYPE(BOOL)) { + num = luaL_checknumber(L, narg); + if (f->type != UPB_TYPE(DOUBLE) && f->type != UPB_TYPE(FLOAT) && + num != rint(num)) { + luaL_error(L, "Cannot assign non-integer number %f to integer field", num); + } + } + switch (f->type) { + case UPB_TYPE(INT32): + case UPB_TYPE(SINT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(ENUM): + if (num > INT32_MAX || num < INT32_MIN) + luaL_error(L, "Number %f is out-of-range for 32-bit integer field.", num); + upb_value_setint32(&val, num); break; - case UPB_DEF_ENUM: - type_name = "upb.enumdef"; + case UPB_TYPE(INT64): + case UPB_TYPE(SINT64): + case UPB_TYPE(SFIXED64): + if (num > INT64_MAX || num < INT64_MIN) + luaL_error(L, "Number %f is out-of-range for 64-bit integer field.", num); + upb_value_setint64(&val, num); break; - default: - luaL_error(L, "unknown deftype %d", def->type); - type_name = NULL; // Placate the compiler. + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + if (num > UINT32_MAX || num < 0) + luaL_error(L, "Number %f is out-of-range for unsigned 32-bit integer field.", num); + upb_value_setuint32(&val, num); + break; + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + if (num > UINT64_MAX || num < 0) + luaL_error(L, "Number %f is out-of-range for unsigned 64-bit integer field.", num); + upb_value_setuint64(&val, num); + break; + case UPB_TYPE(DOUBLE): + if (num > DBL_MAX || num < -DBL_MAX) { + // This could happen if lua_Number was long double. + luaL_error(L, "Number %f is out-of-range for double field.", num); + } + upb_value_setdouble(&val, num); + break; + case UPB_TYPE(FLOAT): + if (num > FLT_MAX || num < -FLT_MAX) + luaL_error(L, "Number %f is out-of-range for float field.", num); + upb_value_setfloat(&val, num); + break; + case UPB_TYPE(BOOL): + if (!lua_isboolean(L, narg)) + luaL_error(L, "Must explicitly pass true or false for boolean fields"); + upb_value_setbool(&val, lua_toboolean(L, narg)); + break; + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): { + // TODO: is there any reasonable way to avoid a copy here? + size_t len; + const char *str = luaL_checklstring(L, narg, &len); + upb_value_setstr(&val, upb_strduplen(str, len)); + break; + } + case UPB_TYPE(MESSAGE): + case UPB_TYPE(GROUP): { + lupb_msg *m = lupb_msg_check(L, narg); + if (m->msgdef != upb_downcast_msgdef(f->def)) + luaL_error(L, "Tried to assign a message of the wrong type."); + upb_value_setmsg(&val, m->msg); + break; + } + } + return val; +} + + +static int lupb_msg_index(lua_State *L) { + assert(lua_gettop(L) == 2); // __index should always be called with 2 args. + lupb_msg *m = lupb_msg_check(L, 1); + size_t len; + const char *name = luaL_checklstring(L, 2, &len); + upb_string namestr = UPB_STACK_STRING_LEN(name, len); + upb_fielddef *f = upb_msgdef_ntof(m->msgdef, &namestr); + if (f) { + lupb_pushvalue(L, upb_msg_get(m->msg, f), f); + } else { + // It wasn't a field, perhaps it's a method? + lua_getmetatable(L, 1); + lua_pushvalue(L, 2); + lua_rawget(L, -2); + if (lua_isnil(L, -1)) { + luaL_error(L, "%s is not a field name or a method name", name); + } + } + return 1; +} + +static int lupb_msg_newindex(lua_State *L) { + assert(lua_gettop(L) == 3); // __newindex should always be called with 3 args. + lupb_msg *m = lupb_msg_check(L, 1); + size_t len; + const char *name = luaL_checklstring(L, 2, &len); + upb_string namestr = UPB_STACK_STRING_LEN(name, len); + upb_fielddef *f = upb_msgdef_ntof(m->msgdef, &namestr); + if (f) { + upb_value val = lupb_getvalue(L, 3, f); + upb_msg_set(m->msg, f, val); + if (upb_isstring(f)) { + upb_string_unref(upb_value_getstr(val)); + } + } else { + luaL_error(L, "%s is not a field name", name); } - lupb_cache_getorcreate(L, def, type_name, lupb_nop, lupb_def_unref); + return 0; +} + +static int lupb_msg_clear(lua_State *L) { + lupb_msg *m = lupb_msg_check(L, 1); + upb_msg_clear(m->msg, m->msgdef); + return 0; } -// msgdef +static const struct luaL_Reg lupb_msg_mm[] = { + {"__gc", lupb_msg_gc}, + {"__index", lupb_msg_index}, + {"__newindex", lupb_msg_newindex}, + // Our __index mm will look up methods if the index isn't a field name. + {"Clear", lupb_msg_clear}, + {NULL, NULL} +}; + + +/* lupb_msgdef ****************************************************************/ static upb_msgdef *lupb_msgdef_check(lua_State *L, int narg) { lupb_def *ldef = luaL_checkudata(L, narg, "upb.msgdef"); @@ -115,6 +313,12 @@ static int lupb_msgdef_gc(lua_State *L) { return 0; } +static int lupb_msgdef_call(lua_State *L) { + upb_msgdef *md = lupb_msgdef_check(L, 1); + lupb_msg_pushnew(L, md); + return 1; +} + static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f); static int lupb_msgdef_name(lua_State *L) { @@ -150,6 +354,7 @@ static int lupb_msgdef_fieldbynum(lua_State *L) { } static const struct luaL_Reg lupb_msgdef_mm[] = { + {"__call", lupb_msgdef_call}, {"__gc", lupb_msgdef_gc}, {NULL, NULL} }; @@ -161,7 +366,8 @@ static const struct luaL_Reg lupb_msgdef_m[] = { {NULL, NULL} }; -// enumdef + +/* lupb_enumdef ***************************************************************/ static upb_enumdef *lupb_enumdef_check(lua_State *L, int narg) { lupb_def *ldef = luaL_checkudata(L, narg, "upb.enumdef"); @@ -191,18 +397,41 @@ static const struct luaL_Reg lupb_enumdef_m[] = { }; +/* lupb_def *******************************************************************/ + +static void lupb_def_getorcreate(lua_State *L, upb_def *def, int owned) { + bool created = false; + switch(def->type) { + case UPB_DEF_MSG: + created = lupb_cache_getorcreate(L, def, "upb.msgdef"); + break; + case UPB_DEF_ENUM: + created = lupb_cache_getorcreate(L, def, "upb.enumdef"); + break; + default: + luaL_error(L, "unknown deftype %d", def->type); + } + if (!owned && created) { + upb_def_ref(def); + } else if (owned && !created) { + upb_def_unref(def); + } +} + + /* lupb_fielddef **************************************************************/ typedef struct { upb_fielddef *field; } lupb_fielddef; -static void lupb_fielddef_ref(void *cobj) { - upb_def_ref(UPB_UPCAST(((upb_fielddef*)cobj)->msgdef)); -} - static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f) { - lupb_cache_getorcreate(L, f, "upb.fielddef", lupb_fielddef_ref, lupb_nop); + bool created = lupb_cache_getorcreate(L, f, "upb.fielddef"); + if (created) { + // Need to obtain a ref on this field's msgdef (fielddefs themselves aren't + // refcounted, but they're kept alive by their owning msgdef). + upb_def_ref(UPB_UPCAST(f->msgdef)); + } } static lupb_fielddef *lupb_fielddef_check(lua_State *L, int narg) { @@ -221,11 +450,9 @@ static int lupb_fielddef_index(lua_State *L) { } else if (strcmp(str, "label") == 0) { lua_pushinteger(L, f->field->label); } else if (strcmp(str, "def") == 0) { - upb_def_ref(f->field->def); - lupb_def_getorcreate(L, f->field->def); + lupb_def_getorcreate(L, f->field->def, false); } else if (strcmp(str, "msgdef") == 0) { - upb_def_ref(UPB_UPCAST(f->field->msgdef)); - lupb_def_getorcreate(L, UPB_UPCAST(f->field->msgdef)); + lupb_def_getorcreate(L, UPB_UPCAST(f->field->msgdef), false); } else { lua_pushnil(L); } @@ -264,10 +491,6 @@ static int lupb_symtab_gc(lua_State *L) { return 0; } -static void lupb_symtab_unref(void *cobj) { - upb_symtab_unref((upb_symtab*)cobj); -} - static int lupb_symtab_lookup(lua_State *L) { lupb_symtab *s = lupb_symtab_check(L, 1); size_t len; @@ -275,7 +498,7 @@ static int lupb_symtab_lookup(lua_State *L) { upb_string namestr = UPB_STACK_STRING_LEN(name, len); upb_def *def = upb_symtab_lookup(s->symtab, &namestr); if (def) { - lupb_def_getorcreate(L, def); + lupb_def_getorcreate(L, def, true); } else { lua_pushnil(L); } @@ -293,7 +516,7 @@ static int lupb_symtab_getdefs(lua_State *L) { for (int i = 0; i < count; i++) { upb_def *def = defs[i]; lua_pushnumber(L, i + 1); // 1-based array. - lupb_def_getorcreate(L, def); + lupb_def_getorcreate(L, def, true); // Add it to our return table. lua_settable(L, -3); } @@ -331,13 +554,15 @@ static const struct luaL_Reg lupb_symtab_mm[] = { static int lupb_symtab_new(lua_State *L) { upb_symtab *s = upb_symtab_new(); - lupb_cache_getorcreate(L, s, "upb.symtab", lupb_nop, lupb_symtab_unref); + bool created = lupb_cache_getorcreate(L, s, "upb.symtab"); + (void)created; // For NDEBUG + assert(created); // It's new, there shouldn't be an obj for it already. return 1; } static int lupb_getfdsdef(lua_State *L) { - lupb_cache_getorcreate( - L, upb_getfdsdef(), "upb.msgdef", lupb_nop, lupb_def_unref); + upb_msgdef *fdsdef = upb_getfdsdef(); // Gets a ref on fdsdef. + lupb_def_getorcreate(L, UPB_UPCAST(fdsdef), true); return 1; } @@ -357,7 +582,7 @@ static void lupb_register_type(lua_State *L, const char *name, // Methods go in the mt's __index method. This implies that you can't // implement __index and also set methods yourself. luaL_register(L, NULL, m); - lua_setfield(L, -2, "__index"); + lua_setfield(L, -2, "__index"); } lua_pop(L, 1); // The mt. } @@ -367,8 +592,9 @@ int luaopen_upb(lua_State *L) { lupb_register_type(L, "upb.enumdef", lupb_enumdef_m, lupb_enumdef_mm); lupb_register_type(L, "upb.fielddef", NULL, lupb_fielddef_mm); lupb_register_type(L, "upb.symtab", lupb_symtab_m, lupb_symtab_mm); + lupb_register_type(L, "upb.msg", NULL, lupb_msg_mm); - // Create our object cache. TODO: need to make this table weak! + // Create our object cache. lua_createtable(L, 0, 0); lua_createtable(L, 0, 1); // Cache metatable. lua_pushstring(L, "v"); // Values are weak. diff --git a/src/descriptor.h b/src/descriptor.h deleted file mode 100644 index f6d3ca3..0000000 --- a/src/descriptor.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * This file contains declarations for an array that contains the contents - * of descriptor.proto, serialized as a protobuf. xxd is used to create - * the actual definition. - */ - -#ifndef UPB_DESCRIPTOR_H_ -#define UPB_DESCRIPTOR_H_ - -#include "upb_string.h" - -#ifdef __cplusplus -extern "C" { -#endif - -extern upb_string descriptor_str; - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_DESCRIPTOR_H_ */ diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 8b10522..78fc8b1 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -1,10 +1,11 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. + * Copyright (c) 2008-2011 Joshua Haberman. See LICENSE for details. */ #include "upb_decoder.h" +#include "upb_varint_decoder.h" #include #include @@ -21,105 +22,6 @@ extern fastdecode_ret upb_fastdecode(const char *p, const char *end, upb_value_handler_t value_cb, void *closure, void *table, int table_size); -/* Pure Decoding **************************************************************/ - -// The key fast-path varint-decoding routine. Here we can assume we have at -// least UPB_MAX_VARINT_ENCODED_SIZE bytes available. There are a lot of -// possibilities for optimization/experimentation here. - -#ifdef USE_SSE_VARINT_DECODING -#include - -// This works, but is empirically slower than the branchy version below. Why? -// Most varints are very short. Next step: use branches for 1/2-byte varints, -// but use the SSE version for 3-10 byte varints. -INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) { - const char *p = *ptr; - __m128i val128 = _mm_loadu_si128((void*)p); - unsigned int continuation_bits = _mm_movemask_epi8(val128); - unsigned int bsr_val = ~continuation_bits; - int varint_length = __builtin_ffs(bsr_val); - if (varint_length > 10) { - upb_seterr(s, UPB_ERROR, "Unterminated varint"); - return false; - } - - uint16_t twob; - memcpy(&twob, p, 2); - twob &= 0x7f7f; - twob = ((twob & 0xff00) >> 1) | (twob & 0xff); - - uint64_t eightb; - memcpy(&eightb, p + 2, 8); - eightb &= 0x7f7f7f7f7f7f7f7f; - eightb = ((eightb & 0xff00ff00ff00ff00) >> 1) | (eightb & 0x00ff00ff00ff00ff); - eightb = ((eightb & 0xffff0000ffff0000) >> 2) | (eightb & 0x0000ffff0000ffff); - eightb = ((eightb & 0xffffffff00000000) >> 4) | (eightb & 0x00000000ffffffff); - - uint64_t all_bits = twob | (eightb << 14); - int varint_bits = varint_length * 7; - uint64_t mask = varint_bits == 70 ? (uint64_t)-1 : (1ULL << (varint_bits)) - 1; - *val = all_bits & mask; - *ptr = p + varint_length; - return true; -} - -#else - -INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) { - const char *p = *ptr; - uint32_t low, high = 0; - uint32_t b; - b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7f) << 28; - high = (b & 0x7f) >> 4; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 3; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done; - - upb_seterr(s, UPB_ERROR, "Unterminated varint"); - return false; - -done: - *val = ((uint64_t)high << 32) | low; - *ptr = p; - return true; -} - -typedef struct { - const char *newbuf; - uint64_t val; -} retval; - -retval upb_decode_varint_fast64(const char *p) { - uint64_t ret; - uint64_t b; - retval r = {(void*)0, 0}; - b = *(p++); ret = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 28; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 35; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 42; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 49; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 56; if(!(b & 0x80)) goto done; - b = *(p++); ret |= (b & 0x7f) << 63; if(!(b & 0x80)) goto done; - return r; - -done: - r.val = ret; - r.newbuf = p; - return r; -} - -#endif - /* Decoding/Buffering of individual values ************************************/ @@ -233,11 +135,13 @@ done: INLINE bool upb_decode_varint(upb_decoder *d, upb_value *val) { if (upb_decoder_bufleft(d) >= 16) { // Common (fast) case. - uint64_t val64; - const char *p = d->ptr; - if (!upb_decode_varint_fast(&p, &val64, d->status)) return false; - upb_decoder_advance(d, p - d->ptr); - upb_value_setraw(val, val64); + upb_decoderet r = upb_decode_varint_fast(d->ptr); + if (r.p == NULL) { + upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n"); + return false; + } + upb_value_setraw(val, r.val); + upb_decoder_advance(d, r.p - d->ptr); return true; } else { return upb_decode_varint_slow(d, val); @@ -352,11 +256,19 @@ void upb_decoder_run(upb_src *src, upb_status *status) { d->dispatcher.top->handlers.set->value, d->dispatcher.top->handlers.closure, d->top->msgdef->itof.array, - d->top->msgdef->itof.array_size); + d->top->msgdef->itof.array_size, + d->tmp); CHECK_FLOW(ret.flow); + if (ret.ptr - d->ptr > 0) { + DEBUGPRINTF("Fast path parsed %d bytes of data!\n", ret.ptr - d->ptr); + } d->ptr = ret.ptr; if (end - d->ptr < 12) { - DEBUGPRINTF("Off the fast path because <12 bytes of data\n"); + if (end == d->submsg_end && end != d->end) { + DEBUGPRINTF("Off the fast path because <12 bytes of data, but ONLY because of submsg end.\n"); + } else { + DEBUGPRINTF("Off the fast path because <12 bytes of data, NOT because of submsg end.\n"); + } } else { DEBUGPRINTF("Off the fast path for some other reason.\n"); } diff --git a/src/upb_decoder_x64.asm b/src/upb_decoder_x64.asm index c59d131..032ea86 100644 --- a/src/upb_decoder_x64.asm +++ b/src/upb_decoder_x64.asm @@ -33,7 +33,7 @@ SECTION .text ; Register allocation. %define BUF rbx ; const char *p, current buf position. %define END rbp ; const char *end, where the buf ends (either submsg end or buf end) -%define FREE r12 ; unused +%define STRING r12 ; unused %define FIELDDEF r13 ; upb_fielddef *f, needs to be preserved across varint decoding call. %define CALLBACK r14 %define CLOSURE r15 @@ -143,6 +143,7 @@ _upb_fastdecode: ; Parse arguments into reg vals and stack. mov BUF, rdi + mov COMMITTED_BUF_SPILL, rdi mov END, rsi mov CALLBACK, rdx mov CLOSURE, rcx @@ -210,7 +211,6 @@ align 16 align 16 .string: - .cant_fast_path: mov rax, 0 ; UPB_CONTINUE -- continue as before. .done: diff --git a/src/upb_def.c b/src/upb_def.c index 0382610..d77e29a 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -6,9 +6,11 @@ #include #include +#include #include "descriptor.c" #include "descriptor_const.h" #include "upb_def.h" +#include "upb_msg.h" #define alignof(t) offsetof(struct { char c; t x; }, x) @@ -261,6 +263,8 @@ struct _upb_defbuilder { bool saw_number; bool saw_name; + upb_string *default_string; + upb_fielddef *f; }; typedef struct _upb_defbuilder upb_defbuilder; @@ -276,12 +280,18 @@ static void upb_defbuilder_init(upb_defbuilder *b) { upb_status_init(&b->status); b->stack_len = 0; b->name = NULL; + b->default_string = NULL; } static void upb_defbuilder_uninit(upb_defbuilder *b) { upb_string_unref(b->name); upb_status_uninit(&b->status); upb_deflist_uninit(&b->defs); + upb_string_unref(b->default_string); + while (b->stack_len > 0) { + upb_defbuilder_frame *f = &b->stack[--b->stack_len]; + upb_string_unref(f->name); + } } static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) { @@ -587,6 +597,19 @@ upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) { /* upb_fielddef ***************************************************************/ static void upb_fielddef_free(upb_fielddef *f) { + if (upb_isstring(f) || f->type == UPB_TYPE(ENUM)) { + upb_string_unref(upb_value_getstr(f->default_value)); + } else if (upb_issubmsg(f)) { + upb_msg *m = upb_value_getmsg(f->default_value); + assert(m); + // We cheat a bit here. We need to unref msg, but we don't have a reliable + // way of accessing the msgdef (which is required by upb_msg_unref()), + // because f->def may have already been collected as part of a cycle if + // this is an unowned ref. But we know that default messages never contain + // references to other messages, and their only string references are to + // the singleton empty string, so we can safely unref+free msg directly. + if (upb_atomic_unref(&m->refcount)) free(m); + } upb_string_unref(f->name); if(f->owned) { upb_def_unref(f->def); @@ -606,6 +629,109 @@ static upb_flow_t upb_fielddef_startmsg(void *_b) { return UPB_CONTINUE; } +// Converts the default value in string "dstr" into "d". Passes a ref on dstr. +// Returns true on success. +static bool upb_fielddef_setdefault(upb_string *dstr, upb_value *d, int type) { + bool success = true; + if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) { + // We'll keep the ref we had on it. We include enums in this case because + // we need the enumdef to resolve the name, but we may not have it yet. + // We'll resolve it later. + if (dstr) { + upb_value_setstr(d, dstr); + } else { + upb_value_setstr(d, upb_emptystring()); + } + } else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) { + // We don't expect to get a default value. + upb_string_unref(dstr); + if (dstr != NULL) { + printf("Returning false because I got a default string for a message!\n"); + success = false; + } + } else { + // The strto* functions need the string to be NULL-terminated. + char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr); + char *end; + upb_string_unref(dstr); + switch (type) { + case UPB_TYPE(INT32): + case UPB_TYPE(SINT32): + case UPB_TYPE(SFIXED32): + if (strz) { + long val = strtol(strz, &end, 0); + if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) + success = false; + else + upb_value_setint32(d, val); + } else { + upb_value_setint32(d, 0); + } + break; + case UPB_TYPE(INT64): + case UPB_TYPE(SINT64): + case UPB_TYPE(SFIXED64): + if (strz) { + upb_value_setint64(d, strtoll(strz, &end, 0)); + if (errno == ERANGE || *end) success = false; + } else { + upb_value_setint64(d, 0); + } + break; + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + if (strz) { + long val = strtoul(strz, &end, 0); + if (val > UINT32_MAX || errno == ERANGE || *end) + success = false; + else + upb_value_setuint32(d, val); + } else { + upb_value_setuint32(d, 0); + } + break; + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + if (strz) { + upb_value_setuint64(d, strtoull(strz, &end, 0)); + if (errno == ERANGE || *end) success = false; + } else { + upb_value_setuint64(d, 0); + } + break; + case UPB_TYPE(DOUBLE): + if (strz) { + upb_value_setdouble(d, strtod(strz, &end)); + if (errno == ERANGE || *end) success = false; + } else { + upb_value_setdouble(d, 0.0); + } + break; + case UPB_TYPE(FLOAT): + if (strz) { + upb_value_setfloat(d, strtof(strz, &end)); + if (errno == ERANGE || *end) success = false; + } else { + upb_value_setfloat(d, 0.0); + } + break; + case UPB_TYPE(BOOL): + if (!strz || strcmp(strz, "false") == 0) + upb_value_setbool(d, false); + else if (strcmp(strz, "true") == 0) + upb_value_setbool(d, true); + else + success = false; + break; + } + if (!success) { + printf("Returning false on the int conversion path, was trying to convert: %s, type=%d\n", strz, type); + } + free(strz); + } + return success; +} + static upb_flow_t upb_fielddef_endmsg(void *_b) { upb_defbuilder *b = _b; upb_fielddef *f = b->f; @@ -619,6 +745,15 @@ static upb_flow_t upb_fielddef_endmsg(void *_b) { upb_ntof_ent ntof_ent = {{f->name, 0}, f}; upb_inttable_insert(&m->itof, f->number, &itof_ent); upb_strtable_insert(&m->ntof, &ntof_ent.e); + + upb_string *dstr = b->default_string; + b->default_string = NULL; + if (!upb_fielddef_setdefault(dstr, &f->default_value, f->type)) { + // We don't worry too much about giving a great error message since the + // compiler should have ensured this was correct. + upb_seterr(&b->status, UPB_ERROR, "Error converting default value."); + return UPB_BREAK; + } return UPB_CONTINUE; } @@ -644,6 +779,12 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { b->f->owned = true; break; } + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_FIELDNUM: + // Have to convert from string to the correct type, but we might not know + // the type yet. + upb_string_unref(b->default_string); + b->default_string = upb_string_getref(upb_value_getstr(val)); + break; } return UPB_CONTINUE; } @@ -683,6 +824,7 @@ static upb_flow_t upb_msgdef_startmsg(void *_b) { upb_atomic_refcount_init(&m->cycle_refcount, 0); upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); + m->default_message = NULL; upb_deflist_push(&b->defs, UPB_UPCAST(m)); upb_defbuilder_startcontainer(b); return UPB_CONTINUE; @@ -703,7 +845,7 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) { upb_field_count_t field = 0; upb_msg_iter i; for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - sorted_fields[field++]= upb_msg_iter_field(i); + sorted_fields[field++] = upb_msg_iter_field(i); } qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields); @@ -745,6 +887,18 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) { if (max_align > 0) m->size = upb_align_up(m->size, max_align); + // Create default message instance, an immutable message with all default + // values set (except submessages, which are simply marked as unset). We + // could alternatively leave all set bits unset, but this would make + // upb_msg_get() take its unexpected branch more often for no good reason. + m->default_message = upb_msg_new(m); + for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + if (!upb_issubmsg(f) && !f->type == UPB_TYPE(ENUM)) { + upb_msg_set(m->default_message, f, f->default_value); + } + } + upb_defbuilder_endcontainer(b); return UPB_CONTINUE; } @@ -802,6 +956,7 @@ static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, static void upb_msgdef_free(upb_msgdef *m) { + upb_msg_unref(m->default_message, m); upb_msg_iter i; for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) upb_fielddef_free(upb_msg_iter_field(i)); @@ -818,6 +973,10 @@ static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) { // We will later make the ref unowned if it is a part of a cycle. f->owned = true; upb_def_ref(def); + if (upb_issubmsg(f)) { + upb_msgdef *md = upb_downcast_msgdef(def); + upb_value_setmsg(&f->default_value, upb_msg_getref(md->default_message)); + } } upb_msg_iter upb_msg_begin(upb_msgdef *m) { @@ -937,7 +1096,8 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) } // Given a table of pending defs "tmptab" and a table of existing defs "symtab", -// resolves all of the unresolved refs for the defs in tmptab. +// resolves all of the unresolved refs for the defs in tmptab. Also resolves +// default values for enumerations and submessages. bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, upb_status *status) { @@ -1352,7 +1512,7 @@ upb_def *upb_getdescriptordef(upb_string *str) { // upb itself is corrupt. abort(); } - upb_def_unref(UPB_UPCAST(def)); // The symtab already holds a ref on it. + upb_msgdef_unref(def); // The symtab already holds a ref on it. atexit(upb_free_descriptor_symtab); } return upb_symtab_resolve( diff --git a/src/upb_def.h b/src/upb_def.h index 121d5bc..3f79895 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -81,6 +81,9 @@ INLINE void upb_def_unref(upb_def *def) { if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); } +#define UPB_UPCAST(ptr) (&(ptr)->base) + + /* upb_fielddef ***************************************************************/ // A upb_fielddef describes a single field in a message. It isn't a full def @@ -158,6 +161,10 @@ typedef struct _upb_msgdef { // Tables for looking up fields by number and name. upb_inttable itof; // int to field upb_strtable ntof; // name to field + + // Immutable msg instance that has all default values set. + // TODO: need a way of making this immutable! + struct _upb_msg *default_message; } upb_msgdef; // Hash table entries for looking up fields by name or number. @@ -172,6 +179,13 @@ typedef struct { upb_fielddef *f; } upb_ntof_ent; +INLINE void upb_msgdef_unref(upb_msgdef *md) { + upb_def_unref(UPB_UPCAST(md)); +} +INLINE void upb_msgdef_ref(upb_msgdef *md) { + upb_def_ref(UPB_UPCAST(md)); +} + // Looks up a field by name or number. While these are written to be as fast // as possible, it will still be faster to cache the results of this lookup if // possible. These return NULL if no such field is found. @@ -361,8 +375,6 @@ UPB_DOWNCAST_DEF(extdef, EXT); UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED); #undef UPB_DOWNCAST_DEF -#define UPB_UPCAST(ptr) (&(ptr)->base) - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/upb_msg.c b/src/upb_msg.c index 9dfbea4..211004c 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -145,6 +145,22 @@ INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { msg->data[f->set_bit_offset] |= f->set_bit_mask; } +void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) { + assert(val.type == upb_field_valuetype(f)); + upb_valueptr ptr = _upb_msg_getptr(msg, f); + if (upb_field_ismm(f)) { + // Unref any previous value we may have had there. + upb_value oldval = upb_value_read(ptr, upb_field_valuetype(f)); + upb_field_unref(oldval, f); + + // Ref the new value. + upb_atomic_refcount_t *refcount = upb_value_getrefcount(val); + if (refcount) upb_atomic_ref(refcount); + } + upb_msg_sethas(msg, f); + return upb_value_write(ptr, val, upb_field_valuetype(f)); +} + static upb_valueptr upb_msg_getappendptr(upb_msg *msg, upb_fielddef *f) { upb_valueptr p = _upb_msg_getptr(msg, f); if (upb_isarray(f)) { diff --git a/src/upb_msg.h b/src/upb_msg.h index 3246971..ff8489c 100644 --- a/src/upb_msg.h +++ b/src/upb_msg.h @@ -135,6 +135,7 @@ INLINE void upb_value_write(upb_valueptr ptr, upb_value val, #undef CASE } + /* upb_array ******************************************************************/ typedef uint32_t upb_arraylen_t; @@ -172,8 +173,17 @@ INLINE upb_value upb_array_get(upb_array *arr, upb_fielddef *f, return upb_value_read(_upb_array_getptr(arr, f, i), f->type); } + /* upb_msg ********************************************************************/ +// upb_msg is not self-describing; the upb_msg does not contain a pointer to the +// upb_msgdef. While this makes the API a bit more cumbersome to use, this +// choice was made for a few important reasons: +// +// 1. it would make every message 8 bytes larger on 64-bit platforms. This is +// a high overhead for small messages. +// 2. you would want the msg to own a ref on its msgdef, but this would require +// an atomic operation for every message create or destroy! struct _upb_msg { upb_atomic_refcount_t refcount; uint8_t data[4]; // We allocate the appropriate amount per message. @@ -194,6 +204,11 @@ upb_msg *upb_msg_new(upb_msgdef *md); INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) { if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md); } +INLINE upb_msg *upb_msg_getref(upb_msg *msg) { + assert(msg); + upb_atomic_ref(&msg->refcount); + return msg; +} void upb_msg_recycle(upb_msg **msg, upb_msgdef *msgdef); @@ -203,10 +218,40 @@ INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { return (msg->data[f->set_bit_offset] & f->set_bit_mask) != 0; } +// We have several options for handling default values: +// 1. inside upb_msg_clear(), overwrite all values to be their defaults, +// overwriting submessage pointers to point to the default instance again. +// 2. inside upb_msg_get(), test upb_msg_has() and return md->default_value +// if it is not set. upb_msg_clear() only clears the set bits. +// We lazily clear objects if/when we reuse them. +// 3. inside upb_msg_clear(), overwrite all values to be their default, +// and recurse into submessages to set all their values to defaults also. +// 4. as a hybrid of (1) and (3), make each "set bit" tri-state, where it +// can have a value of "unset, but cached sub-message needs to be cleared." +// Like (2) we can cache sub-messages and lazily clear, but primitive values +// can always be returned straight from the message. +// +// (1) is undesirable, because it prevents us from caching sub-objects. +// (2) makes clear() cheaper, but makes get() branchier. +// (3) makes get() less branchy, but makes clear() have worse cache behavior. +// (4) makes get() differently branchy (only returns default from msgdef if +// NON-primitive value is unset), but uses more set bits. It's questionable +// whether it would be a performance improvement. +// +// For the moment we go with (2). Google's protobuf does (3), which is likely +// part of the reason we beat it in some benchmarks. + +// For submessages and strings, the returned value is not owned. INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { - return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f)); + if (upb_msg_has(msg, f)) { + return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f)); + } else { + return f->default_value; + } } +void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val); + // Unsets all field values back to their defaults. INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { memset(msg->data, 0, md->set_flags_bytes); diff --git a/src/upb_string.c b/src/upb_string.c index 81b152d..29ce7d4 100644 --- a/src/upb_string.c +++ b/src/upb_string.c @@ -147,4 +147,15 @@ error: return NULL; } -void upb_string_noninlinerecycle(upb_string **_str) { return upb_string_recycle(_str); } +upb_string *upb_emptystring() { + static upb_string empty = UPB_STATIC_STRING(""); + return ∅ +} + +char *upb_string_newcstr(upb_string *str) { + upb_strlen_t len = upb_string_len(str); + char *ret = malloc(len+1); + memcpy(ret, upb_string_getrobuf(str), len); + ret[len] = '\0'; + return ret; +} diff --git a/src/upb_string.h b/src/upb_string.h index 3799c5e..efafa44 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -134,6 +134,9 @@ INLINE upb_string *upb_string_getref(upb_string *str) { // Returns the length of the string. INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } +INLINE bool upb_string_isempty(upb_string *str) { + return !str || upb_string_len(str) == 0; +} // Use to read the bytes of the string. The caller *must* call // upb_string_endread() after the data has been read. The window between @@ -273,6 +276,10 @@ void upb_string_substr(upb_string *str, upb_string *target_str, //#endif #define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) +// Returns a singleton empty string. +upb_string *upb_emptystring(); + + /* upb_string library functions ***********************************************/ // Named like their counterparts, these are all safe against buffer @@ -339,6 +346,9 @@ INLINE upb_string *upb_strdupc(const char *src) { return upb_strduplen(src, strlen(src)); } +// Returns a newly-allocated NULL-terminated copy of str. +char *upb_string_newcstr(upb_string *str); + // Appends 'append' to 's' in-place, resizing s if necessary. void upb_strcat(upb_string *s, upb_string *append); diff --git a/src/upbc.c b/src/upbc.c index 428ec41..4fa8a71 100644 --- a/src/upbc.c +++ b/src/upbc.c @@ -12,7 +12,6 @@ #include #include #include -#include "descriptor.h" #include "upb_def.h" #include "upb_msg.h" #include "upb_glue.h" diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc index 1839123..f3c54b4 100644 --- a/tests/test_vs_proto2.cc +++ b/tests/test_vs_proto2.cc @@ -1,20 +1,20 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * A test that verifies that our results are identical to proto2 for a + * given proto type and input protobuf. + * + * Copyright (c) 2011 Joshua Haberman. See LICENSE for details. + */ -#undef NDEBUG /* ensure tests always assert. */ #include #include #include #include -#include "upb_decoder.h" +#include "upb_test.h" #include "upb_def.h" #include "upb_glue.h" #include "upb_msg.h" -#include "upb_strstream.h" - -int num_assertions = 0; -#define ASSERT(expr) do { \ - ++num_assertions; \ - assert(expr); \ - } while(0) #include MESSAGE_HFILE diff --git a/tests/tests.c b/tests/tests.c index c691b18..a04b1da 100644 --- a/tests/tests.c +++ b/tests/tests.c @@ -1,212 +1,13 @@ -#undef NDEBUG /* ensure tests always assert. */ #include #include #include -#include "upb_decoder.c" #include "upb_def.h" #include "upb_glue.h" - -int num_assertions = 0; -#define ASSERT(expr) do { \ - ++num_assertions; \ - assert(expr); \ - } while(0) - -static void test_get_v_uint64_t() -{ -#define TEST(name, bytes, val) {\ - upb_status status = UPB_STATUS_INIT; \ - const char name[] = bytes "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" ; \ - const char *name ## _buf = name; \ - uint64_t name ## _val = 0; \ - upb_decode_varint_fast(&name ## _buf, &name ## _val, &status); \ - ASSERT(upb_ok(&status)); \ - ASSERT(name ## _val == val); \ - ASSERT(name ## _buf == name + sizeof(name) - 16); /* - 1 for NULL */ \ - } - - TEST(zero, "\x00", 0ULL); - TEST(one, "\x01", 1ULL); - TEST(twob, "\x81\x14", 0xa01ULL); - TEST(twob, "\x81\x03", 0x181ULL); - TEST(threeb, "\x81\x83\x07", 0x1c181ULL); - TEST(fourb, "\x81\x83\x87\x0f", 0x1e1c181ULL); - TEST(fiveb, "\x81\x83\x87\x8f\x1f", 0x1f1e1c181ULL); - TEST(sixb, "\x81\x83\x87\x8f\x9f\x3f", 0x1f9f1e1c181ULL); - TEST(sevenb, "\x81\x83\x87\x8f\x9f\xbf\x7f", 0x1fdf9f1e1c181ULL); - TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01", 0x3fdf9f1e1c181ULL); - TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03", 0x303fdf9f1e1c181ULL); - TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0x8303fdf9f1e1c181ULL); -#undef TEST - - char twelvebyte[16] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01}; - const char *twelvebyte_buf = twelvebyte; - uint64_t twelvebyte_val = 0; - upb_status status = UPB_STATUS_INIT; - /* A varint that terminates before hitting the end of the provided buffer, - * but in too many bytes (11 instead of 10). */ - upb_decode_varint_fast(&twelvebyte_buf, &twelvebyte_val, &status); - ASSERT(status.code == UPB_ERROR); - upb_status_uninit(&status); -} - -#if 0 -static void test_get_v_uint32_t() -{ -#define TEST(name, bytes, val) {\ - upb_status status = UPB_STATUS_INIT; \ - const uint8_t name[] = bytes; \ - const uint8_t *name ## _buf = name; \ - uint32_t name ## _val = 0; \ - name ## _buf = upb_get_v_uint32_t(name, name + sizeof(name), &name ## _val, &status); \ - ASSERT(upb_ok(&status)); \ - ASSERT(name ## _val == val); \ - ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \ - /* Test NEED_MORE_DATA. */ \ - if(sizeof(name) > 2) { \ - name ## _buf = upb_get_v_uint32_t(name, name + sizeof(name) - 2, &name ## _val, &status); \ - ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA); \ - } \ - } - - TEST(zero, "\x00", 0UL); - TEST(one, "\x01", 1UL); - TEST(twob, "\x81\x03", 0x181UL); - TEST(threeb, "\x81\x83\x07", 0x1c181UL); - TEST(fourb, "\x81\x83\x87\x0f", 0x1e1c181UL); - /* get_v_uint32_t truncates, so all the rest return the same thing. */ - TEST(fiveb, "\x81\x83\x87\x8f\x1f", 0xf1e1c181UL); - TEST(sixb, "\x81\x83\x87\x8f\x9f\x3f", 0xf1e1c181UL); - TEST(sevenb, "\x81\x83\x87\x8f\x9f\xbf\x7f", 0xf1e1c181UL); - TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01", 0xf1e1c181UL); - TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03", 0xf1e1c181UL); - TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0xf1e1c181UL); -#undef TEST - - uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01}; - uint32_t twelvebyte_val = 0; - upb_status status = UPB_STATUS_INIT; - /* A varint that terminates before hitting the end of the provided buffer, - * but in too many bytes (11 instead of 10). */ - upb_get_v_uint32_t(twelvebyte, twelvebyte + 12, &twelvebyte_val, &status); - ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT); - - /* A varint that terminates simultaneously with the end of the provided - * buffer, but in too many bytes (11 instead of 10). */ - upb_reset(&status); - upb_get_v_uint32_t(twelvebyte, twelvebyte + 11, &twelvebyte_val, &status); - ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT); - - /* A varint whose buffer ends on exactly the byte where the varint must - * terminate, but the final byte does not terminate. The absolutely most - * correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know - * by this point that the varint does not properly terminate. But we also - * allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not - * compromise overall correctness -- clients who supply more data later will - * then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no - * more data to supply will (rightly) conclude that their protobuf is corrupt. - */ - upb_reset(&status); - upb_get_v_uint32_t(twelvebyte, twelvebyte + 10, &twelvebyte_val, &status); - ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT || - status.code == UPB_STATUS_NEED_MORE_DATA); - - upb_reset(&status); - upb_get_v_uint32_t(twelvebyte, twelvebyte + 9, &twelvebyte_val, &status); - ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA); -} - -static void test_skip_v_uint64_t() -{ -#define TEST(name, bytes) {\ - upb_status status = UPB_STATUS_INIT; \ - const uint8_t name[] = bytes; \ - const uint8_t *name ## _buf = name; \ - name ## _buf = upb_skip_v_uint64_t(name ## _buf, name + sizeof(name), &status); \ - ASSERT(upb_ok(&status)); \ - ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \ - /* Test NEED_MORE_DATA. */ \ - if(sizeof(name) > 2) { \ - name ## _buf = upb_skip_v_uint64_t(name, name + sizeof(name) - 2, &status); \ - ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA); \ - } \ - } - - TEST(zero, "\x00"); - TEST(one, "\x01"); - TEST(twob, "\x81\x03"); - TEST(threeb, "\x81\x83\x07"); - TEST(fourb, "\x81\x83\x87\x0f"); - TEST(fiveb, "\x81\x83\x87\x8f\x1f"); - TEST(sixb, "\x81\x83\x87\x8f\x9f\x3f"); - TEST(sevenb, "\x81\x83\x87\x8f\x9f\xbf\x7f"); - TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01"); - TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03"); - TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07"); -#undef TEST - - uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01}; - upb_status status = UPB_STATUS_INIT; - /* A varint that terminates before hitting the end of the provided buffer, - * but in too many bytes (11 instead of 10). */ - upb_skip_v_uint64_t(twelvebyte, twelvebyte + 12, &status); - ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT); - - /* A varint that terminates simultaneously with the end of the provided - * buffer, but in too many bytes (11 instead of 10). */ - upb_reset(&status); - upb_skip_v_uint64_t(twelvebyte, twelvebyte + 11, &status); - ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT); - - /* A varint whose buffer ends on exactly the byte where the varint must - * terminate, but the final byte does not terminate. The absolutely most - * correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know - * by this point that the varint does not properly terminate. But we also - * allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not - * compromise overall correctness -- clients who supply more data later will - * then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no - * more data to supply will (rightly) conclude that their protobuf is corrupt. - */ - upb_reset(&status); - upb_skip_v_uint64_t(twelvebyte, twelvebyte + 10, &status); - ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT || - status.code == UPB_STATUS_NEED_MORE_DATA); - - upb_reset(&status); - upb_skip_v_uint64_t(twelvebyte, twelvebyte + 9, &status); - ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA); -} - -static void test_get_f_uint32_t() -{ -#define TEST(name, bytes, val) {\ - upb_status status = UPB_STATUS_INIT; \ - const uint8_t name[] = bytes; \ - const uint8_t *name ## _buf = name; \ - uint32_t name ## _val = 0; \ - name ## _buf = upb_get_f_uint32_t(name ## _buf, name + sizeof(name), &name ## _val, &status); \ - ASSERT(upb_ok(&status)); \ - ASSERT(name ## _val == val); \ - ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \ - } - - TEST(zero, "\x00\x00\x00\x00", 0x0UL); - TEST(one, "\x01\x00\x00\x00", 0x1UL); - - uint8_t threeb[] = {0x00, 0x00, 0x00}; - uint32_t threeb_val; - upb_status status = UPB_STATUS_INIT; - upb_get_f_uint32_t(threeb, threeb + sizeof(threeb), &threeb_val, &status); - ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA); - -#undef TEST -} -#endif +#include "upb_test.h" static void test_upb_symtab() { upb_symtab *s = upb_symtab_new(); - upb_symtab_add_descriptorproto(s); ASSERT(s); upb_string *descriptor = upb_strreadfile("tests/test.proto.pb"); if(!descriptor) { @@ -240,11 +41,8 @@ static void test_upb_symtab() { upb_def_ref(def2); upb_def_unref(def); upb_def_unref(def2); - - } - int main() { #define TEST(func) do { \ @@ -254,10 +52,6 @@ int main() printf("ok (%d assertions).\n", num_assertions - assertions_before); \ } while (0) - TEST(test_get_v_uint64_t); - //TEST(test_get_v_uint32_t); - //TEST(test_skip_v_uint64_t); - //TEST(test_get_f_uint32_t); TEST(test_upb_symtab); printf("All tests passed (%d assertions).\n", num_assertions); return 0; -- cgit v1.2.3