summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile8
-rw-r--r--lang_ext/lua/test.lua17
-rw-r--r--lang_ext/lua/upb.c330
-rw-r--r--src/descriptor.h26
-rw-r--r--src/upb_decoder.c126
-rw-r--r--src/upb_decoder_x64.asm4
-rw-r--r--src/upb_def.c166
-rw-r--r--src/upb_def.h16
-rw-r--r--src/upb_msg.c16
-rw-r--r--src/upb_msg.h47
-rw-r--r--src/upb_string.c13
-rw-r--r--src/upb_string.h10
-rw-r--r--src/upbc.c1
-rw-r--r--tests/test_vs_proto2.cc18
-rw-r--r--tests/tests.c208
15 files changed, 592 insertions, 414 deletions
diff --git a/Makefile b/Makefile
index d9f8008..13ce46a 100644
--- a/Makefile
+++ b/Makefile
@@ -94,6 +94,7 @@ TESTS_SRC= \
tests/test_stream.c \
tests/test_string.c \
tests/tests.c \
+ tests/tests_varint.c \
tests/test_vs_proto2.cc
ALLSRC=$(CORE) $(STREAM) $(BENCHMARKS_SRC) $(TESTS_SRC)
@@ -138,11 +139,11 @@ $(LIBUPB_PIC): $(PICOBJ)
# critical path but gets very large when -O3 is used.
src/upb_def.o: src/upb_def.c
$(E) CC $<
- $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $<
+ $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $<
src/upb_def.lo: src/upb_def.c
$(E) 'CC -fPIC' $<
- $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC
+ $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< -fPIC
src/upb_decoder_x64.o: src/upb_decoder_x64.asm
$(E) NASM $<
@@ -183,6 +184,7 @@ SIMPLE_TESTS= \
tests/test_string \
tests/test_def \
tests/test_stream \
+ tests/test_varint \
tests/tests
# tests/test_decoder \
@@ -202,7 +204,7 @@ tests/tests: tests/test.proto.pb
$(SIMPLE_TESTS): % : %.c
$(E) CC $<
- $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
+ $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $< $(LIBUPB)
VALGRIND=valgrind --leak-check=full --error-exitcode=1
test: tests
diff --git a/lang_ext/lua/test.lua b/lang_ext/lua/test.lua
index a49cebc..978fb11 100644
--- a/lang_ext/lua/test.lua
+++ b/lang_ext/lua/test.lua
@@ -18,3 +18,20 @@ symtab:parsedesc(f:read("*all"))
for _, def in ipairs(symtab:getdefs(-1)) do
print(def:name())
end
+
+SpeedMessage1 = symtab:lookup("benchmarks.SpeedMessage1")
+print(SpeedMessage1:name())
+
+msg = SpeedMessage1()
+-- print(msg.field1)
+-- print(msg.field129)
+-- print(msg.field271)
+-- print(msg.field15.field15)
+-- print(msg.field1)
+-- print(msg.field1)
+-- msg.field1 = "YEAH BABY!"
+-- print(msg.field1)
+print(msg.field129)
+msg.field129 = 5
+print(msg.field129)
+
diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c
index bf1eb02..460ac86 100644
--- a/lang_ext/lua/upb.c
+++ b/lang_ext/lua/upb.c
@@ -7,9 +7,20 @@
*/
#include <stdlib.h>
+#include <math.h>
+#include <float.h>
#include "lauxlib.h"
#include "upb_def.h"
#include "upb_glue.h"
+#include "upb_msg.h"
+
+static void lupb_msg_getorcreate(lua_State *L, upb_msg *msg, upb_msgdef *md);
+
+// All the def types share the same C layout, even though they are different Lua
+// types with different metatables.
+typedef struct {
+ upb_def *def;
+} lupb_def;
void lupb_pushstring(lua_State *L, upb_string *str) {
lua_pushlstring(L, upb_string_getrobuf(str), upb_string_len(str));
@@ -30,21 +41,17 @@ void lupb_checkstatus(lua_State *L, upb_status *s) {
upb_status_uninit(s);
}
+
/* object cache ***************************************************************/
// We cache all the lua objects (userdata) we vend in a weak table, indexed by
// the C pointer of the object they are caching.
-typedef void (*lupb_cb)(void *cobj);
-
-static void lupb_nop(void *foo) {
- (void)foo;
-}
-
-static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type,
- lupb_cb ref, lupb_cb unref) {
+static void *lupb_cache_getorcreate_size(
+ lua_State *L, void *cobj, const char *type, size_t size) {
// Lookup our cache in the registry (we don't put our objects in the registry
// directly because we need our cache to be a weak table).
+ void **obj = NULL;
lua_getfield(L, LUA_REGISTRYINDEX, "upb.objcache");
assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb.
lua_pushlightuserdata(L, cobj);
@@ -55,7 +62,7 @@ static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type,
lua_pop(L, 1);
// We take advantage of the fact that all of our objects are currently a
// single pointer, and thus have the same layout.
- void **obj = lua_newuserdata(L, sizeof(void*));
+ obj = lua_newuserdata(L, size);
*obj = cobj;
luaL_getmetatable(L, type);
assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb.
@@ -65,44 +72,235 @@ static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type,
lua_pushlightuserdata(L, cobj);
lua_pushvalue(L, -2);
lua_rawset(L, -4);
- ref(cobj);
- } else {
- unref(cobj);
}
lua_insert(L, -2);
lua_pop(L, 1);
+ return obj;
}
+// Most types are just 1 pointer and can use this helper.
+static bool lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type) {
+ return lupb_cache_getorcreate_size(L, cobj, type, sizeof(void*)) != NULL;
+}
-/* lupb_def *******************************************************************/
-// All the def types share the same C layout, even though they are different Lua
-// types with different metatables.
+/* lupb_msg********************************************************************/
+
+// We prefer field access syntax (foo.bar, foo.bar = 5) over method syntax
+// (foo:bar(), foo:set_bar(5)) to make messages behave more like regular tables.
+// However, there are methods also, like foo:CopyFrom(other_foo) or foo:Clear().
+
typedef struct {
- upb_def *def;
-} lupb_def;
+ upb_msg *msg;
+ upb_msgdef *msgdef;
+} lupb_msg;
-static void lupb_def_unref(void *cobj) {
- upb_def_unref((upb_def*)cobj);
+static lupb_msg *lupb_msg_check(lua_State *L, int narg) {
+ return luaL_checkudata(L, narg, "upb.msg");
}
-static void lupb_def_getorcreate(lua_State *L, upb_def *def) {
- const char *type_name;
- switch(def->type) {
- case UPB_DEF_MSG:
- type_name = "upb.msgdef";
+static void lupb_msg_pushnew(lua_State *L, upb_msgdef *md) {
+ upb_msg *msg = upb_msg_new(md);
+ lupb_msg *m = lupb_cache_getorcreate_size(L, msg, "upb.msg", sizeof(lupb_msg));
+ assert(m);
+ m->msgdef = md;
+ // We need to ensure that the msgdef outlives the msg. This performs an
+ // atomic ref, if this turns out to be too expensive there are other
+ // possible approaches, like creating a separate metatable for every
+ // msgdef that references the msgdef.
+ upb_msgdef_ref(md);
+}
+
+// Caller does *not* pass a ref.
+static void lupb_msg_getorcreate(lua_State *L, upb_msg *msg, upb_msgdef *md) {
+ lupb_msg *m = lupb_cache_getorcreate_size(L, msg, "upb.msg", sizeof(lupb_msg));
+ if (m) {
+ // New Lua object, we need to ref the message.
+ m->msg = upb_msg_getref(msg);
+ m->msgdef = md;
+ // See comment above.
+ upb_msgdef_ref(md);
+ }
+}
+
+static int lupb_msg_gc(lua_State *L) {
+ lupb_msg *m = lupb_msg_check(L, 1);
+ upb_msg_unref(m->msg, m->msgdef);
+ upb_msgdef_unref(m->msgdef);
+ return 0;
+}
+
+static void lupb_pushvalue(lua_State *L, upb_value val, upb_fielddef *f) {
+ switch (f->type) {
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(SINT32):
+ case UPB_TYPE(SFIXED32):
+ case UPB_TYPE(ENUM):
+ lua_pushnumber(L, upb_value_getint32(val)); break;
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(SINT64):
+ case UPB_TYPE(SFIXED64):
+ lua_pushnumber(L, upb_value_getint64(val)); break;
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(FIXED32):
+ lua_pushnumber(L, upb_value_getuint32(val)); break;
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(FIXED64):
+ lua_pushnumber(L, upb_value_getuint64(val)); break;
+ case UPB_TYPE(DOUBLE):
+ lua_pushnumber(L, upb_value_getdouble(val)); break;
+ case UPB_TYPE(FLOAT):
+ lua_pushnumber(L, upb_value_getfloat(val)); break;
+ case UPB_TYPE(BOOL):
+ lua_pushboolean(L, upb_value_getbool(val)); break;
+ case UPB_TYPE(STRING):
+ case UPB_TYPE(BYTES): {
+ upb_string *str = upb_value_getstr(val);
+ assert(str);
+ lua_pushlstring(L, upb_string_getrobuf(str), upb_string_len(str)); break;
+ }
+ case UPB_TYPE(MESSAGE):
+ case UPB_TYPE(GROUP): {
+ upb_msg *msg = upb_value_getmsg(val);
+ assert(msg);
+ lupb_msg_getorcreate(L, msg, upb_downcast_msgdef(f->def));
+ }
+ }
+}
+
+static upb_value lupb_getvalue(lua_State *L, int narg, upb_fielddef *f) {
+ upb_value val;
+ lua_Number num;
+ if (!upb_issubmsg(f) && !upb_isstring(f) && f->type != UPB_TYPE(BOOL)) {
+ num = luaL_checknumber(L, narg);
+ if (f->type != UPB_TYPE(DOUBLE) && f->type != UPB_TYPE(FLOAT) &&
+ num != rint(num)) {
+ luaL_error(L, "Cannot assign non-integer number %f to integer field", num);
+ }
+ }
+ switch (f->type) {
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(SINT32):
+ case UPB_TYPE(SFIXED32):
+ case UPB_TYPE(ENUM):
+ if (num > INT32_MAX || num < INT32_MIN)
+ luaL_error(L, "Number %f is out-of-range for 32-bit integer field.", num);
+ upb_value_setint32(&val, num);
break;
- case UPB_DEF_ENUM:
- type_name = "upb.enumdef";
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(SINT64):
+ case UPB_TYPE(SFIXED64):
+ if (num > INT64_MAX || num < INT64_MIN)
+ luaL_error(L, "Number %f is out-of-range for 64-bit integer field.", num);
+ upb_value_setint64(&val, num);
break;
- default:
- luaL_error(L, "unknown deftype %d", def->type);
- type_name = NULL; // Placate the compiler.
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(FIXED32):
+ if (num > UINT32_MAX || num < 0)
+ luaL_error(L, "Number %f is out-of-range for unsigned 32-bit integer field.", num);
+ upb_value_setuint32(&val, num);
+ break;
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(FIXED64):
+ if (num > UINT64_MAX || num < 0)
+ luaL_error(L, "Number %f is out-of-range for unsigned 64-bit integer field.", num);
+ upb_value_setuint64(&val, num);
+ break;
+ case UPB_TYPE(DOUBLE):
+ if (num > DBL_MAX || num < -DBL_MAX) {
+ // This could happen if lua_Number was long double.
+ luaL_error(L, "Number %f is out-of-range for double field.", num);
+ }
+ upb_value_setdouble(&val, num);
+ break;
+ case UPB_TYPE(FLOAT):
+ if (num > FLT_MAX || num < -FLT_MAX)
+ luaL_error(L, "Number %f is out-of-range for float field.", num);
+ upb_value_setfloat(&val, num);
+ break;
+ case UPB_TYPE(BOOL):
+ if (!lua_isboolean(L, narg))
+ luaL_error(L, "Must explicitly pass true or false for boolean fields");
+ upb_value_setbool(&val, lua_toboolean(L, narg));
+ break;
+ case UPB_TYPE(STRING):
+ case UPB_TYPE(BYTES): {
+ // TODO: is there any reasonable way to avoid a copy here?
+ size_t len;
+ const char *str = luaL_checklstring(L, narg, &len);
+ upb_value_setstr(&val, upb_strduplen(str, len));
+ break;
+ }
+ case UPB_TYPE(MESSAGE):
+ case UPB_TYPE(GROUP): {
+ lupb_msg *m = lupb_msg_check(L, narg);
+ if (m->msgdef != upb_downcast_msgdef(f->def))
+ luaL_error(L, "Tried to assign a message of the wrong type.");
+ upb_value_setmsg(&val, m->msg);
+ break;
+ }
+ }
+ return val;
+}
+
+
+static int lupb_msg_index(lua_State *L) {
+ assert(lua_gettop(L) == 2); // __index should always be called with 2 args.
+ lupb_msg *m = lupb_msg_check(L, 1);
+ size_t len;
+ const char *name = luaL_checklstring(L, 2, &len);
+ upb_string namestr = UPB_STACK_STRING_LEN(name, len);
+ upb_fielddef *f = upb_msgdef_ntof(m->msgdef, &namestr);
+ if (f) {
+ lupb_pushvalue(L, upb_msg_get(m->msg, f), f);
+ } else {
+ // It wasn't a field, perhaps it's a method?
+ lua_getmetatable(L, 1);
+ lua_pushvalue(L, 2);
+ lua_rawget(L, -2);
+ if (lua_isnil(L, -1)) {
+ luaL_error(L, "%s is not a field name or a method name", name);
+ }
+ }
+ return 1;
+}
+
+static int lupb_msg_newindex(lua_State *L) {
+ assert(lua_gettop(L) == 3); // __newindex should always be called with 3 args.
+ lupb_msg *m = lupb_msg_check(L, 1);
+ size_t len;
+ const char *name = luaL_checklstring(L, 2, &len);
+ upb_string namestr = UPB_STACK_STRING_LEN(name, len);
+ upb_fielddef *f = upb_msgdef_ntof(m->msgdef, &namestr);
+ if (f) {
+ upb_value val = lupb_getvalue(L, 3, f);
+ upb_msg_set(m->msg, f, val);
+ if (upb_isstring(f)) {
+ upb_string_unref(upb_value_getstr(val));
+ }
+ } else {
+ luaL_error(L, "%s is not a field name", name);
}
- lupb_cache_getorcreate(L, def, type_name, lupb_nop, lupb_def_unref);
+ return 0;
+}
+
+static int lupb_msg_clear(lua_State *L) {
+ lupb_msg *m = lupb_msg_check(L, 1);
+ upb_msg_clear(m->msg, m->msgdef);
+ return 0;
}
-// msgdef
+static const struct luaL_Reg lupb_msg_mm[] = {
+ {"__gc", lupb_msg_gc},
+ {"__index", lupb_msg_index},
+ {"__newindex", lupb_msg_newindex},
+ // Our __index mm will look up methods if the index isn't a field name.
+ {"Clear", lupb_msg_clear},
+ {NULL, NULL}
+};
+
+
+/* lupb_msgdef ****************************************************************/
static upb_msgdef *lupb_msgdef_check(lua_State *L, int narg) {
lupb_def *ldef = luaL_checkudata(L, narg, "upb.msgdef");
@@ -115,6 +313,12 @@ static int lupb_msgdef_gc(lua_State *L) {
return 0;
}
+static int lupb_msgdef_call(lua_State *L) {
+ upb_msgdef *md = lupb_msgdef_check(L, 1);
+ lupb_msg_pushnew(L, md);
+ return 1;
+}
+
static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f);
static int lupb_msgdef_name(lua_State *L) {
@@ -150,6 +354,7 @@ static int lupb_msgdef_fieldbynum(lua_State *L) {
}
static const struct luaL_Reg lupb_msgdef_mm[] = {
+ {"__call", lupb_msgdef_call},
{"__gc", lupb_msgdef_gc},
{NULL, NULL}
};
@@ -161,7 +366,8 @@ static const struct luaL_Reg lupb_msgdef_m[] = {
{NULL, NULL}
};
-// enumdef
+
+/* lupb_enumdef ***************************************************************/
static upb_enumdef *lupb_enumdef_check(lua_State *L, int narg) {
lupb_def *ldef = luaL_checkudata(L, narg, "upb.enumdef");
@@ -191,18 +397,41 @@ static const struct luaL_Reg lupb_enumdef_m[] = {
};
+/* lupb_def *******************************************************************/
+
+static void lupb_def_getorcreate(lua_State *L, upb_def *def, int owned) {
+ bool created = false;
+ switch(def->type) {
+ case UPB_DEF_MSG:
+ created = lupb_cache_getorcreate(L, def, "upb.msgdef");
+ break;
+ case UPB_DEF_ENUM:
+ created = lupb_cache_getorcreate(L, def, "upb.enumdef");
+ break;
+ default:
+ luaL_error(L, "unknown deftype %d", def->type);
+ }
+ if (!owned && created) {
+ upb_def_ref(def);
+ } else if (owned && !created) {
+ upb_def_unref(def);
+ }
+}
+
+
/* lupb_fielddef **************************************************************/
typedef struct {
upb_fielddef *field;
} lupb_fielddef;
-static void lupb_fielddef_ref(void *cobj) {
- upb_def_ref(UPB_UPCAST(((upb_fielddef*)cobj)->msgdef));
-}
-
static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f) {
- lupb_cache_getorcreate(L, f, "upb.fielddef", lupb_fielddef_ref, lupb_nop);
+ bool created = lupb_cache_getorcreate(L, f, "upb.fielddef");
+ if (created) {
+ // Need to obtain a ref on this field's msgdef (fielddefs themselves aren't
+ // refcounted, but they're kept alive by their owning msgdef).
+ upb_def_ref(UPB_UPCAST(f->msgdef));
+ }
}
static lupb_fielddef *lupb_fielddef_check(lua_State *L, int narg) {
@@ -221,11 +450,9 @@ static int lupb_fielddef_index(lua_State *L) {
} else if (strcmp(str, "label") == 0) {
lua_pushinteger(L, f->field->label);
} else if (strcmp(str, "def") == 0) {
- upb_def_ref(f->field->def);
- lupb_def_getorcreate(L, f->field->def);
+ lupb_def_getorcreate(L, f->field->def, false);
} else if (strcmp(str, "msgdef") == 0) {
- upb_def_ref(UPB_UPCAST(f->field->msgdef));
- lupb_def_getorcreate(L, UPB_UPCAST(f->field->msgdef));
+ lupb_def_getorcreate(L, UPB_UPCAST(f->field->msgdef), false);
} else {
lua_pushnil(L);
}
@@ -264,10 +491,6 @@ static int lupb_symtab_gc(lua_State *L) {
return 0;
}
-static void lupb_symtab_unref(void *cobj) {
- upb_symtab_unref((upb_symtab*)cobj);
-}
-
static int lupb_symtab_lookup(lua_State *L) {
lupb_symtab *s = lupb_symtab_check(L, 1);
size_t len;
@@ -275,7 +498,7 @@ static int lupb_symtab_lookup(lua_State *L) {
upb_string namestr = UPB_STACK_STRING_LEN(name, len);
upb_def *def = upb_symtab_lookup(s->symtab, &namestr);
if (def) {
- lupb_def_getorcreate(L, def);
+ lupb_def_getorcreate(L, def, true);
} else {
lua_pushnil(L);
}
@@ -293,7 +516,7 @@ static int lupb_symtab_getdefs(lua_State *L) {
for (int i = 0; i < count; i++) {
upb_def *def = defs[i];
lua_pushnumber(L, i + 1); // 1-based array.
- lupb_def_getorcreate(L, def);
+ lupb_def_getorcreate(L, def, true);
// Add it to our return table.
lua_settable(L, -3);
}
@@ -331,13 +554,15 @@ static const struct luaL_Reg lupb_symtab_mm[] = {
static int lupb_symtab_new(lua_State *L) {
upb_symtab *s = upb_symtab_new();
- lupb_cache_getorcreate(L, s, "upb.symtab", lupb_nop, lupb_symtab_unref);
+ bool created = lupb_cache_getorcreate(L, s, "upb.symtab");
+ (void)created; // For NDEBUG
+ assert(created); // It's new, there shouldn't be an obj for it already.
return 1;
}
static int lupb_getfdsdef(lua_State *L) {
- lupb_cache_getorcreate(
- L, upb_getfdsdef(), "upb.msgdef", lupb_nop, lupb_def_unref);
+ upb_msgdef *fdsdef = upb_getfdsdef(); // Gets a ref on fdsdef.
+ lupb_def_getorcreate(L, UPB_UPCAST(fdsdef), true);
return 1;
}
@@ -357,7 +582,7 @@ static void lupb_register_type(lua_State *L, const char *name,
// Methods go in the mt's __index method. This implies that you can't
// implement __index and also set methods yourself.
luaL_register(L, NULL, m);
- lua_setfield(L, -2, "__index");
+ lua_setfield(L, -2, "__index");
}
lua_pop(L, 1); // The mt.
}
@@ -367,8 +592,9 @@ int luaopen_upb(lua_State *L) {
lupb_register_type(L, "upb.enumdef", lupb_enumdef_m, lupb_enumdef_mm);
lupb_register_type(L, "upb.fielddef", NULL, lupb_fielddef_mm);
lupb_register_type(L, "upb.symtab", lupb_symtab_m, lupb_symtab_mm);
+ lupb_register_type(L, "upb.msg", NULL, lupb_msg_mm);
- // Create our object cache. TODO: need to make this table weak!
+ // Create our object cache.
lua_createtable(L, 0, 0);
lua_createtable(L, 0, 1); // Cache metatable.
lua_pushstring(L, "v"); // Values are weak.
diff --git a/src/descriptor.h b/src/descriptor.h
deleted file mode 100644
index f6d3ca3..0000000
--- a/src/descriptor.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
- *
- * This file contains declarations for an array that contains the contents
- * of descriptor.proto, serialized as a protobuf. xxd is used to create
- * the actual definition.
- */
-
-#ifndef UPB_DESCRIPTOR_H_
-#define UPB_DESCRIPTOR_H_
-
-#include "upb_string.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern upb_string descriptor_str;
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* UPB_DESCRIPTOR_H_ */
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index 8b10522..78fc8b1 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -1,10 +1,11 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
- * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details.
+ * Copyright (c) 2008-2011 Joshua Haberman. See LICENSE for details.
*/
#include "upb_decoder.h"
+#include "upb_varint_decoder.h"
#include <inttypes.h>
#include <stddef.h>
@@ -21,105 +22,6 @@ extern fastdecode_ret upb_fastdecode(const char *p, const char *end,
upb_value_handler_t value_cb, void *closure,
void *table, int table_size);
-/* Pure Decoding **************************************************************/
-
-// The key fast-path varint-decoding routine. Here we can assume we have at
-// least UPB_MAX_VARINT_ENCODED_SIZE bytes available. There are a lot of
-// possibilities for optimization/experimentation here.
-
-#ifdef USE_SSE_VARINT_DECODING
-#include <emmintrin.h>
-
-// This works, but is empirically slower than the branchy version below. Why?
-// Most varints are very short. Next step: use branches for 1/2-byte varints,
-// but use the SSE version for 3-10 byte varints.
-INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) {
- const char *p = *ptr;
- __m128i val128 = _mm_loadu_si128((void*)p);
- unsigned int continuation_bits = _mm_movemask_epi8(val128);
- unsigned int bsr_val = ~continuation_bits;
- int varint_length = __builtin_ffs(bsr_val);
- if (varint_length > 10) {
- upb_seterr(s, UPB_ERROR, "Unterminated varint");
- return false;
- }
-
- uint16_t twob;
- memcpy(&twob, p, 2);
- twob &= 0x7f7f;
- twob = ((twob & 0xff00) >> 1) | (twob & 0xff);
-
- uint64_t eightb;
- memcpy(&eightb, p + 2, 8);
- eightb &= 0x7f7f7f7f7f7f7f7f;
- eightb = ((eightb & 0xff00ff00ff00ff00) >> 1) | (eightb & 0x00ff00ff00ff00ff);
- eightb = ((eightb & 0xffff0000ffff0000) >> 2) | (eightb & 0x0000ffff0000ffff);
- eightb = ((eightb & 0xffffffff00000000) >> 4) | (eightb & 0x00000000ffffffff);
-
- uint64_t all_bits = twob | (eightb << 14);
- int varint_bits = varint_length * 7;
- uint64_t mask = varint_bits == 70 ? (uint64_t)-1 : (1ULL << (varint_bits)) - 1;
- *val = all_bits & mask;
- *ptr = p + varint_length;
- return true;
-}
-
-#else
-
-INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) {
- const char *p = *ptr;
- uint32_t low, high = 0;
- uint32_t b;
- b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done;
- b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
- b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
- b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
- b = *(p++); low |= (b & 0x7f) << 28;
- high = (b & 0x7f) >> 4; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 3; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done;
-
- upb_seterr(s, UPB_ERROR, "Unterminated varint");
- return false;
-
-done:
- *val = ((uint64_t)high << 32) | low;
- *ptr = p;
- return true;
-}
-
-typedef struct {
- const char *newbuf;
- uint64_t val;
-} retval;
-
-retval upb_decode_varint_fast64(const char *p) {
- uint64_t ret;
- uint64_t b;
- retval r = {(void*)0, 0};
- b = *(p++); ret = (b & 0x7f) ; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 28; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 35; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 42; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 49; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 56; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 63; if(!(b & 0x80)) goto done;
- return r;
-
-done:
- r.val = ret;
- r.newbuf = p;
- return r;
-}
-
-#endif
-
/* Decoding/Buffering of individual values ************************************/
@@ -233,11 +135,13 @@ done:
INLINE bool upb_decode_varint(upb_decoder *d, upb_value *val) {
if (upb_decoder_bufleft(d) >= 16) {
// Common (fast) case.
- uint64_t val64;
- const char *p = d->ptr;
- if (!upb_decode_varint_fast(&p, &val64, d->status)) return false;
- upb_decoder_advance(d, p - d->ptr);
- upb_value_setraw(val, val64);
+ upb_decoderet r = upb_decode_varint_fast(d->ptr);
+ if (r.p == NULL) {
+ upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
+ return false;
+ }
+ upb_value_setraw(val, r.val);
+ upb_decoder_advance(d, r.p - d->ptr);
return true;
} else {
return upb_decode_varint_slow(d, val);
@@ -352,11 +256,19 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
d->dispatcher.top->handlers.set->value,
d->dispatcher.top->handlers.closure,
d->top->msgdef->itof.array,
- d->top->msgdef->itof.array_size);
+ d->top->msgdef->itof.array_size,
+ d->tmp);
CHECK_FLOW(ret.flow);
+ if (ret.ptr - d->ptr > 0) {
+ DEBUGPRINTF("Fast path parsed %d bytes of data!\n", ret.ptr - d->ptr);
+ }
d->ptr = ret.ptr;
if (end - d->ptr < 12) {
- DEBUGPRINTF("Off the fast path because <12 bytes of data\n");
+ if (end == d->submsg_end && end != d->end) {
+ DEBUGPRINTF("Off the fast path because <12 bytes of data, but ONLY because of submsg end.\n");
+ } else {
+ DEBUGPRINTF("Off the fast path because <12 bytes of data, NOT because of submsg end.\n");
+ }
} else {
DEBUGPRINTF("Off the fast path for some other reason.\n");
}
diff --git a/src/upb_decoder_x64.asm b/src/upb_decoder_x64.asm
index c59d131..032ea86 100644
--- a/src/upb_decoder_x64.asm
+++ b/src/upb_decoder_x64.asm
@@ -33,7 +33,7 @@ SECTION .text
; Register allocation.
%define BUF rbx ; const char *p, current buf position.
%define END rbp ; const char *end, where the buf ends (either submsg end or buf end)
-%define FREE r12 ; unused
+%define STRING r12 ; unused
%define FIELDDEF r13 ; upb_fielddef *f, needs to be preserved across varint decoding call.
%define CALLBACK r14
%define CLOSURE r15
@@ -143,6 +143,7 @@ _upb_fastdecode:
; Parse arguments into reg vals and stack.
mov BUF, rdi
+ mov COMMITTED_BUF_SPILL, rdi
mov END, rsi
mov CALLBACK, rdx
mov CLOSURE, rcx
@@ -210,7 +211,6 @@ align 16
align 16
.string:
-
.cant_fast_path:
mov rax, 0 ; UPB_CONTINUE -- continue as before.
.done:
diff --git a/src/upb_def.c b/src/upb_def.c
index 0382610..d77e29a 100644
--- a/src/upb_def.c
+++ b/src/upb_def.c
@@ -6,9 +6,11 @@
#include <stdlib.h>
#include <stddef.h>
+#include <errno.h>
#include "descriptor.c"
#include "descriptor_const.h"
#include "upb_def.h"
+#include "upb_msg.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
@@ -261,6 +263,8 @@ struct _upb_defbuilder {
bool saw_number;
bool saw_name;
+ upb_string *default_string;
+
upb_fielddef *f;
};
typedef struct _upb_defbuilder upb_defbuilder;
@@ -276,12 +280,18 @@ static void upb_defbuilder_init(upb_defbuilder *b) {
upb_status_init(&b->status);
b->stack_len = 0;
b->name = NULL;
+ b->default_string = NULL;
}
static void upb_defbuilder_uninit(upb_defbuilder *b) {
upb_string_unref(b->name);
upb_status_uninit(&b->status);
upb_deflist_uninit(&b->defs);
+ upb_string_unref(b->default_string);
+ while (b->stack_len > 0) {
+ upb_defbuilder_frame *f = &b->stack[--b->stack_len];
+ upb_string_unref(f->name);
+ }
}
static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) {
@@ -587,6 +597,19 @@ upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) {
/* upb_fielddef ***************************************************************/
static void upb_fielddef_free(upb_fielddef *f) {
+ if (upb_isstring(f) || f->type == UPB_TYPE(ENUM)) {
+ upb_string_unref(upb_value_getstr(f->default_value));
+ } else if (upb_issubmsg(f)) {
+ upb_msg *m = upb_value_getmsg(f->default_value);
+ assert(m);
+ // We cheat a bit here. We need to unref msg, but we don't have a reliable
+ // way of accessing the msgdef (which is required by upb_msg_unref()),
+ // because f->def may have already been collected as part of a cycle if
+ // this is an unowned ref. But we know that default messages never contain
+ // references to other messages, and their only string references are to
+ // the singleton empty string, so we can safely unref+free msg directly.
+ if (upb_atomic_unref(&m->refcount)) free(m);
+ }
upb_string_unref(f->name);
if(f->owned) {
upb_def_unref(f->def);
@@ -606,6 +629,109 @@ static upb_flow_t upb_fielddef_startmsg(void *_b) {
return UPB_CONTINUE;
}
+// Converts the default value in string "dstr" into "d". Passes a ref on dstr.
+// Returns true on success.
+static bool upb_fielddef_setdefault(upb_string *dstr, upb_value *d, int type) {
+ bool success = true;
+ if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) {
+ // We'll keep the ref we had on it. We include enums in this case because
+ // we need the enumdef to resolve the name, but we may not have it yet.
+ // We'll resolve it later.
+ if (dstr) {
+ upb_value_setstr(d, dstr);
+ } else {
+ upb_value_setstr(d, upb_emptystring());
+ }
+ } else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) {
+ // We don't expect to get a default value.
+ upb_string_unref(dstr);
+ if (dstr != NULL) {
+ printf("Returning false because I got a default string for a message!\n");
+ success = false;
+ }
+ } else {
+ // The strto* functions need the string to be NULL-terminated.
+ char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr);
+ char *end;
+ upb_string_unref(dstr);
+ switch (type) {
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(SINT32):
+ case UPB_TYPE(SFIXED32):
+ if (strz) {
+ long val = strtol(strz, &end, 0);
+ if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
+ success = false;
+ else
+ upb_value_setint32(d, val);
+ } else {
+ upb_value_setint32(d, 0);
+ }
+ break;
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(SINT64):
+ case UPB_TYPE(SFIXED64):
+ if (strz) {
+ upb_value_setint64(d, strtoll(strz, &end, 0));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setint64(d, 0);
+ }
+ break;
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(FIXED32):
+ if (strz) {
+ long val = strtoul(strz, &end, 0);
+ if (val > UINT32_MAX || errno == ERANGE || *end)
+ success = false;
+ else
+ upb_value_setuint32(d, val);
+ } else {
+ upb_value_setuint32(d, 0);
+ }
+ break;
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(FIXED64):
+ if (strz) {
+ upb_value_setuint64(d, strtoull(strz, &end, 0));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setuint64(d, 0);
+ }
+ break;
+ case UPB_TYPE(DOUBLE):
+ if (strz) {
+ upb_value_setdouble(d, strtod(strz, &end));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setdouble(d, 0.0);
+ }
+ break;
+ case UPB_TYPE(FLOAT):
+ if (strz) {
+ upb_value_setfloat(d, strtof(strz, &end));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setfloat(d, 0.0);
+ }
+ break;
+ case UPB_TYPE(BOOL):
+ if (!strz || strcmp(strz, "false") == 0)
+ upb_value_setbool(d, false);
+ else if (strcmp(strz, "true") == 0)
+ upb_value_setbool(d, true);
+ else
+ success = false;
+ break;
+ }
+ if (!success) {
+ printf("Returning false on the int conversion path, was trying to convert: %s, type=%d\n", strz, type);
+ }
+ free(strz);
+ }
+ return success;
+}
+
static upb_flow_t upb_fielddef_endmsg(void *_b) {
upb_defbuilder *b = _b;
upb_fielddef *f = b->f;
@@ -619,6 +745,15 @@ static upb_flow_t upb_fielddef_endmsg(void *_b) {
upb_ntof_ent ntof_ent = {{f->name, 0}, f};
upb_inttable_insert(&m->itof, f->number, &itof_ent);
upb_strtable_insert(&m->ntof, &ntof_ent.e);
+
+ upb_string *dstr = b->default_string;
+ b->default_string = NULL;
+ if (!upb_fielddef_setdefault(dstr, &f->default_value, f->type)) {
+ // We don't worry too much about giving a great error message since the
+ // compiler should have ensured this was correct.
+ upb_seterr(&b->status, UPB_ERROR, "Error converting default value.");
+ return UPB_BREAK;
+ }
return UPB_CONTINUE;
}
@@ -644,6 +779,12 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) {
b->f->owned = true;
break;
}
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_FIELDNUM:
+ // Have to convert from string to the correct type, but we might not know
+ // the type yet.
+ upb_string_unref(b->default_string);
+ b->default_string = upb_string_getref(upb_value_getstr(val));
+ break;
}
return UPB_CONTINUE;
}
@@ -683,6 +824,7 @@ static upb_flow_t upb_msgdef_startmsg(void *_b) {
upb_atomic_refcount_init(&m->cycle_refcount, 0);
upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent));
upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent));
+ m->default_message = NULL;
upb_deflist_push(&b->defs, UPB_UPCAST(m));
upb_defbuilder_startcontainer(b);
return UPB_CONTINUE;
@@ -703,7 +845,7 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) {
upb_field_count_t field = 0;
upb_msg_iter i;
for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- sorted_fields[field++]= upb_msg_iter_field(i);
+ sorted_fields[field++] = upb_msg_iter_field(i);
}
qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields);
@@ -745,6 +887,18 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) {
if (max_align > 0) m->size = upb_align_up(m->size, max_align);
+ // Create default message instance, an immutable message with all default
+ // values set (except submessages, which are simply marked as unset). We
+ // could alternatively leave all set bits unset, but this would make
+ // upb_msg_get() take its unexpected branch more often for no good reason.
+ m->default_message = upb_msg_new(m);
+ for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ if (!upb_issubmsg(f) && !f->type == UPB_TYPE(ENUM)) {
+ upb_msg_set(m->default_message, f, f->default_value);
+ }
+ }
+
upb_defbuilder_endcontainer(b);
return UPB_CONTINUE;
}
@@ -802,6 +956,7 @@ static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b,
static void upb_msgdef_free(upb_msgdef *m)
{
+ upb_msg_unref(m->default_message, m);
upb_msg_iter i;
for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
upb_fielddef_free(upb_msg_iter_field(i));
@@ -818,6 +973,10 @@ static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) {
// We will later make the ref unowned if it is a part of a cycle.
f->owned = true;
upb_def_ref(def);
+ if (upb_issubmsg(f)) {
+ upb_msgdef *md = upb_downcast_msgdef(def);
+ upb_value_setmsg(&f->default_value, upb_msg_getref(md->default_message));
+ }
}
upb_msg_iter upb_msg_begin(upb_msgdef *m) {
@@ -937,7 +1096,8 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status)
}
// Given a table of pending defs "tmptab" and a table of existing defs "symtab",
-// resolves all of the unresolved refs for the defs in tmptab.
+// resolves all of the unresolved refs for the defs in tmptab. Also resolves
+// default values for enumerations and submessages.
bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab,
upb_status *status)
{
@@ -1352,7 +1512,7 @@ upb_def *upb_getdescriptordef(upb_string *str) {
// upb itself is corrupt.
abort();
}
- upb_def_unref(UPB_UPCAST(def)); // The symtab already holds a ref on it.
+ upb_msgdef_unref(def); // The symtab already holds a ref on it.
atexit(upb_free_descriptor_symtab);
}
return upb_symtab_resolve(
diff --git a/src/upb_def.h b/src/upb_def.h
index 121d5bc..3f79895 100644
--- a/src/upb_def.h
+++ b/src/upb_def.h
@@ -81,6 +81,9 @@ INLINE void upb_def_unref(upb_def *def) {
if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def);
}
+#define UPB_UPCAST(ptr) (&(ptr)->base)
+
+
/* upb_fielddef ***************************************************************/
// A upb_fielddef describes a single field in a message. It isn't a full def
@@ -158,6 +161,10 @@ typedef struct _upb_msgdef {
// Tables for looking up fields by number and name.
upb_inttable itof; // int to field
upb_strtable ntof; // name to field
+
+ // Immutable msg instance that has all default values set.
+ // TODO: need a way of making this immutable!
+ struct _upb_msg *default_message;
} upb_msgdef;
// Hash table entries for looking up fields by name or number.
@@ -172,6 +179,13 @@ typedef struct {
upb_fielddef *f;
} upb_ntof_ent;
+INLINE void upb_msgdef_unref(upb_msgdef *md) {
+ upb_def_unref(UPB_UPCAST(md));
+}
+INLINE void upb_msgdef_ref(upb_msgdef *md) {
+ upb_def_ref(UPB_UPCAST(md));
+}
+
// Looks up a field by name or number. While these are written to be as fast
// as possible, it will still be faster to cache the results of this lookup if
// possible. These return NULL if no such field is found.
@@ -361,8 +375,6 @@ UPB_DOWNCAST_DEF(extdef, EXT);
UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DOWNCAST_DEF
-#define UPB_UPCAST(ptr) (&(ptr)->base)
-
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/src/upb_msg.c b/src/upb_msg.c
index 9dfbea4..211004c 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -145,6 +145,22 @@ INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) {
msg->data[f->set_bit_offset] |= f->set_bit_mask;
}
+void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) {
+ assert(val.type == upb_field_valuetype(f));
+ upb_valueptr ptr = _upb_msg_getptr(msg, f);
+ if (upb_field_ismm(f)) {
+ // Unref any previous value we may have had there.
+ upb_value oldval = upb_value_read(ptr, upb_field_valuetype(f));
+ upb_field_unref(oldval, f);
+
+ // Ref the new value.
+ upb_atomic_refcount_t *refcount = upb_value_getrefcount(val);
+ if (refcount) upb_atomic_ref(refcount);
+ }
+ upb_msg_sethas(msg, f);
+ return upb_value_write(ptr, val, upb_field_valuetype(f));
+}
+
static upb_valueptr upb_msg_getappendptr(upb_msg *msg, upb_fielddef *f) {
upb_valueptr p = _upb_msg_getptr(msg, f);
if (upb_isarray(f)) {
diff --git a/src/upb_msg.h b/src/upb_msg.h
index 3246971..ff8489c 100644
--- a/src/upb_msg.h
+++ b/src/upb_msg.h
@@ -135,6 +135,7 @@ INLINE void upb_value_write(upb_valueptr ptr, upb_value val,
#undef CASE
}
+
/* upb_array ******************************************************************/
typedef uint32_t upb_arraylen_t;
@@ -172,8 +173,17 @@ INLINE upb_value upb_array_get(upb_array *arr, upb_fielddef *f,
return upb_value_read(_upb_array_getptr(arr, f, i), f->type);
}
+
/* upb_msg ********************************************************************/
+// upb_msg is not self-describing; the upb_msg does not contain a pointer to the
+// upb_msgdef. While this makes the API a bit more cumbersome to use, this
+// choice was made for a few important reasons:
+//
+// 1. it would make every message 8 bytes larger on 64-bit platforms. This is
+// a high overhead for small messages.
+// 2. you would want the msg to own a ref on its msgdef, but this would require
+// an atomic operation for every message create or destroy!
struct _upb_msg {
upb_atomic_refcount_t refcount;
uint8_t data[4]; // We allocate the appropriate amount per message.
@@ -194,6 +204,11 @@ upb_msg *upb_msg_new(upb_msgdef *md);
INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) {
if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md);
}
+INLINE upb_msg *upb_msg_getref(upb_msg *msg) {
+ assert(msg);
+ upb_atomic_ref(&msg->refcount);
+ return msg;
+}
void upb_msg_recycle(upb_msg **msg, upb_msgdef *msgdef);
@@ -203,10 +218,40 @@ INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) {
return (msg->data[f->set_bit_offset] & f->set_bit_mask) != 0;
}
+// We have several options for handling default values:
+// 1. inside upb_msg_clear(), overwrite all values to be their defaults,
+// overwriting submessage pointers to point to the default instance again.
+// 2. inside upb_msg_get(), test upb_msg_has() and return md->default_value
+// if it is not set. upb_msg_clear() only clears the set bits.
+// We lazily clear objects if/when we reuse them.
+// 3. inside upb_msg_clear(), overwrite all values to be their default,
+// and recurse into submessages to set all their values to defaults also.
+// 4. as a hybrid of (1) and (3), make each "set bit" tri-state, where it
+// can have a value of "unset, but cached sub-message needs to be cleared."
+// Like (2) we can cache sub-messages and lazily clear, but primitive values
+// can always be returned straight from the message.
+//
+// (1) is undesirable, because it prevents us from caching sub-objects.
+// (2) makes clear() cheaper, but makes get() branchier.
+// (3) makes get() less branchy, but makes clear() have worse cache behavior.
+// (4) makes get() differently branchy (only returns default from msgdef if
+// NON-primitive value is unset), but uses more set bits. It's questionable
+// whether it would be a performance improvement.
+//
+// For the moment we go with (2). Google's protobuf does (3), which is likely
+// part of the reason we beat it in some benchmarks.
+
+// For submessages and strings, the returned value is not owned.
INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) {
- return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f));
+ if (upb_msg_has(msg, f)) {
+ return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f));
+ } else {
+ return f->default_value;
+ }
}
+void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val);
+
// Unsets all field values back to their defaults.
INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) {
memset(msg->data, 0, md->set_flags_bytes);
diff --git a/src/upb_string.c b/src/upb_string.c
index 81b152d..29ce7d4 100644
--- a/src/upb_string.c
+++ b/src/upb_string.c
@@ -147,4 +147,15 @@ error:
return NULL;
}
-void upb_string_noninlinerecycle(upb_string **_str) { return upb_string_recycle(_str); }
+upb_string *upb_emptystring() {
+ static upb_string empty = UPB_STATIC_STRING("");
+ return &empty;
+}
+
+char *upb_string_newcstr(upb_string *str) {
+ upb_strlen_t len = upb_string_len(str);
+ char *ret = malloc(len+1);
+ memcpy(ret, upb_string_getrobuf(str), len);
+ ret[len] = '\0';
+ return ret;
+}
diff --git a/src/upb_string.h b/src/upb_string.h
index 3799c5e..efafa44 100644
--- a/src/upb_string.h
+++ b/src/upb_string.h
@@ -134,6 +134,9 @@ INLINE upb_string *upb_string_getref(upb_string *str) {
// Returns the length of the string.
INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; }
+INLINE bool upb_string_isempty(upb_string *str) {
+ return !str || upb_string_len(str) == 0;
+}
// Use to read the bytes of the string. The caller *must* call
// upb_string_endread() after the data has been read. The window between
@@ -273,6 +276,10 @@ void upb_string_substr(upb_string *str, upb_string *target_str,
//#endif
#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str)
+// Returns a singleton empty string.
+upb_string *upb_emptystring();
+
+
/* upb_string library functions ***********************************************/
// Named like their <string.h> counterparts, these are all safe against buffer
@@ -339,6 +346,9 @@ INLINE upb_string *upb_strdupc(const char *src) {
return upb_strduplen(src, strlen(src));
}
+// Returns a newly-allocated NULL-terminated copy of str.
+char *upb_string_newcstr(upb_string *str);
+
// Appends 'append' to 's' in-place, resizing s if necessary.
void upb_strcat(upb_string *s, upb_string *append);
diff --git a/src/upbc.c b/src/upbc.c
index 428ec41..4fa8a71 100644
--- a/src/upbc.c
+++ b/src/upbc.c
@@ -12,7 +12,6 @@
#include <inttypes.h>
#include <stdarg.h>
#include <stdlib.h>
-#include "descriptor.h"
#include "upb_def.h"
#include "upb_msg.h"
#include "upb_glue.h"
diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc
index 1839123..f3c54b4 100644
--- a/tests/test_vs_proto2.cc
+++ b/tests/test_vs_proto2.cc
@@ -1,20 +1,20 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * A test that verifies that our results are identical to proto2 for a
+ * given proto type and input protobuf.
+ *
+ * Copyright (c) 2011 Joshua Haberman. See LICENSE for details.
+ */
-#undef NDEBUG /* ensure tests always assert. */
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <google/protobuf/descriptor.h>
-#include "upb_decoder.h"
+#include "upb_test.h"
#include "upb_def.h"
#include "upb_glue.h"
#include "upb_msg.h"
-#include "upb_strstream.h"
-
-int num_assertions = 0;
-#define ASSERT(expr) do { \
- ++num_assertions; \
- assert(expr); \
- } while(0)
#include MESSAGE_HFILE
diff --git a/tests/tests.c b/tests/tests.c
index c691b18..a04b1da 100644
--- a/tests/tests.c
+++ b/tests/tests.c
@@ -1,212 +1,13 @@
-#undef NDEBUG /* ensure tests always assert. */
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
-#include "upb_decoder.c"
#include "upb_def.h"
#include "upb_glue.h"
-
-int num_assertions = 0;
-#define ASSERT(expr) do { \
- ++num_assertions; \
- assert(expr); \
- } while(0)
-
-static void test_get_v_uint64_t()
-{
-#define TEST(name, bytes, val) {\
- upb_status status = UPB_STATUS_INIT; \
- const char name[] = bytes "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" ; \
- const char *name ## _buf = name; \
- uint64_t name ## _val = 0; \
- upb_decode_varint_fast(&name ## _buf, &name ## _val, &status); \
- ASSERT(upb_ok(&status)); \
- ASSERT(name ## _val == val); \
- ASSERT(name ## _buf == name + sizeof(name) - 16); /* - 1 for NULL */ \
- }
-
- TEST(zero, "\x00", 0ULL);
- TEST(one, "\x01", 1ULL);
- TEST(twob, "\x81\x14", 0xa01ULL);
- TEST(twob, "\x81\x03", 0x181ULL);
- TEST(threeb, "\x81\x83\x07", 0x1c181ULL);
- TEST(fourb, "\x81\x83\x87\x0f", 0x1e1c181ULL);
- TEST(fiveb, "\x81\x83\x87\x8f\x1f", 0x1f1e1c181ULL);
- TEST(sixb, "\x81\x83\x87\x8f\x9f\x3f", 0x1f9f1e1c181ULL);
- TEST(sevenb, "\x81\x83\x87\x8f\x9f\xbf\x7f", 0x1fdf9f1e1c181ULL);
- TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01", 0x3fdf9f1e1c181ULL);
- TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03", 0x303fdf9f1e1c181ULL);
- TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0x8303fdf9f1e1c181ULL);
-#undef TEST
-
- char twelvebyte[16] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
- const char *twelvebyte_buf = twelvebyte;
- uint64_t twelvebyte_val = 0;
- upb_status status = UPB_STATUS_INIT;
- /* A varint that terminates before hitting the end of the provided buffer,
- * but in too many bytes (11 instead of 10). */
- upb_decode_varint_fast(&twelvebyte_buf, &twelvebyte_val, &status);
- ASSERT(status.code == UPB_ERROR);
- upb_status_uninit(&status);
-}
-
-#if 0
-static void test_get_v_uint32_t()
-{
-#define TEST(name, bytes, val) {\
- upb_status status = UPB_STATUS_INIT; \
- const uint8_t name[] = bytes; \
- const uint8_t *name ## _buf = name; \
- uint32_t name ## _val = 0; \
- name ## _buf = upb_get_v_uint32_t(name, name + sizeof(name), &name ## _val, &status); \
- ASSERT(upb_ok(&status)); \
- ASSERT(name ## _val == val); \
- ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \
- /* Test NEED_MORE_DATA. */ \
- if(sizeof(name) > 2) { \
- name ## _buf = upb_get_v_uint32_t(name, name + sizeof(name) - 2, &name ## _val, &status); \
- ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA); \
- } \
- }
-
- TEST(zero, "\x00", 0UL);
- TEST(one, "\x01", 1UL);
- TEST(twob, "\x81\x03", 0x181UL);
- TEST(threeb, "\x81\x83\x07", 0x1c181UL);
- TEST(fourb, "\x81\x83\x87\x0f", 0x1e1c181UL);
- /* get_v_uint32_t truncates, so all the rest return the same thing. */
- TEST(fiveb, "\x81\x83\x87\x8f\x1f", 0xf1e1c181UL);
- TEST(sixb, "\x81\x83\x87\x8f\x9f\x3f", 0xf1e1c181UL);
- TEST(sevenb, "\x81\x83\x87\x8f\x9f\xbf\x7f", 0xf1e1c181UL);
- TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01", 0xf1e1c181UL);
- TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03", 0xf1e1c181UL);
- TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0xf1e1c181UL);
-#undef TEST
-
- uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
- uint32_t twelvebyte_val = 0;
- upb_status status = UPB_STATUS_INIT;
- /* A varint that terminates before hitting the end of the provided buffer,
- * but in too many bytes (11 instead of 10). */
- upb_get_v_uint32_t(twelvebyte, twelvebyte + 12, &twelvebyte_val, &status);
- ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT);
-
- /* A varint that terminates simultaneously with the end of the provided
- * buffer, but in too many bytes (11 instead of 10). */
- upb_reset(&status);
- upb_get_v_uint32_t(twelvebyte, twelvebyte + 11, &twelvebyte_val, &status);
- ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT);
-
- /* A varint whose buffer ends on exactly the byte where the varint must
- * terminate, but the final byte does not terminate. The absolutely most
- * correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know
- * by this point that the varint does not properly terminate. But we also
- * allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not
- * compromise overall correctness -- clients who supply more data later will
- * then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no
- * more data to supply will (rightly) conclude that their protobuf is corrupt.
- */
- upb_reset(&status);
- upb_get_v_uint32_t(twelvebyte, twelvebyte + 10, &twelvebyte_val, &status);
- ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT ||
- status.code == UPB_STATUS_NEED_MORE_DATA);
-
- upb_reset(&status);
- upb_get_v_uint32_t(twelvebyte, twelvebyte + 9, &twelvebyte_val, &status);
- ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA);
-}
-
-static void test_skip_v_uint64_t()
-{
-#define TEST(name, bytes) {\
- upb_status status = UPB_STATUS_INIT; \
- const uint8_t name[] = bytes; \
- const uint8_t *name ## _buf = name; \
- name ## _buf = upb_skip_v_uint64_t(name ## _buf, name + sizeof(name), &status); \
- ASSERT(upb_ok(&status)); \
- ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \
- /* Test NEED_MORE_DATA. */ \
- if(sizeof(name) > 2) { \
- name ## _buf = upb_skip_v_uint64_t(name, name + sizeof(name) - 2, &status); \
- ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA); \
- } \
- }
-
- TEST(zero, "\x00");
- TEST(one, "\x01");
- TEST(twob, "\x81\x03");
- TEST(threeb, "\x81\x83\x07");
- TEST(fourb, "\x81\x83\x87\x0f");
- TEST(fiveb, "\x81\x83\x87\x8f\x1f");
- TEST(sixb, "\x81\x83\x87\x8f\x9f\x3f");
- TEST(sevenb, "\x81\x83\x87\x8f\x9f\xbf\x7f");
- TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01");
- TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03");
- TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07");
-#undef TEST
-
- uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
- upb_status status = UPB_STATUS_INIT;
- /* A varint that terminates before hitting the end of the provided buffer,
- * but in too many bytes (11 instead of 10). */
- upb_skip_v_uint64_t(twelvebyte, twelvebyte + 12, &status);
- ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT);
-
- /* A varint that terminates simultaneously with the end of the provided
- * buffer, but in too many bytes (11 instead of 10). */
- upb_reset(&status);
- upb_skip_v_uint64_t(twelvebyte, twelvebyte + 11, &status);
- ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT);
-
- /* A varint whose buffer ends on exactly the byte where the varint must
- * terminate, but the final byte does not terminate. The absolutely most
- * correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know
- * by this point that the varint does not properly terminate. But we also
- * allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not
- * compromise overall correctness -- clients who supply more data later will
- * then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no
- * more data to supply will (rightly) conclude that their protobuf is corrupt.
- */
- upb_reset(&status);
- upb_skip_v_uint64_t(twelvebyte, twelvebyte + 10, &status);
- ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT ||
- status.code == UPB_STATUS_NEED_MORE_DATA);
-
- upb_reset(&status);
- upb_skip_v_uint64_t(twelvebyte, twelvebyte + 9, &status);
- ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA);
-}
-
-static void test_get_f_uint32_t()
-{
-#define TEST(name, bytes, val) {\
- upb_status status = UPB_STATUS_INIT; \
- const uint8_t name[] = bytes; \
- const uint8_t *name ## _buf = name; \
- uint32_t name ## _val = 0; \
- name ## _buf = upb_get_f_uint32_t(name ## _buf, name + sizeof(name), &name ## _val, &status); \
- ASSERT(upb_ok(&status)); \
- ASSERT(name ## _val == val); \
- ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \
- }
-
- TEST(zero, "\x00\x00\x00\x00", 0x0UL);
- TEST(one, "\x01\x00\x00\x00", 0x1UL);
-
- uint8_t threeb[] = {0x00, 0x00, 0x00};
- uint32_t threeb_val;
- upb_status status = UPB_STATUS_INIT;
- upb_get_f_uint32_t(threeb, threeb + sizeof(threeb), &threeb_val, &status);
- ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA);
-
-#undef TEST
-}
-#endif
+#include "upb_test.h"
static void test_upb_symtab() {
upb_symtab *s = upb_symtab_new();
- upb_symtab_add_descriptorproto(s);
ASSERT(s);
upb_string *descriptor = upb_strreadfile("tests/test.proto.pb");
if(!descriptor) {
@@ -240,11 +41,8 @@ static void test_upb_symtab() {
upb_def_ref(def2);
upb_def_unref(def);
upb_def_unref(def2);
-
-
}
-
int main()
{
#define TEST(func) do { \
@@ -254,10 +52,6 @@ int main()
printf("ok (%d assertions).\n", num_assertions - assertions_before); \
} while (0)
- TEST(test_get_v_uint64_t);
- //TEST(test_get_v_uint32_t);
- //TEST(test_skip_v_uint64_t);
- //TEST(test_get_f_uint32_t);
TEST(test_upb_symtab);
printf("All tests passed (%d assertions).\n", num_assertions);
return 0;
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback