summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2011-02-22 01:54:31 -0800
committerJoshua Haberman <joshua@reverberate.org>2011-02-22 01:54:31 -0800
commitfd184f0df2e5e428873eadfaf1ae829d2e4d8e51 (patch)
tree19c4a1d9099f04c74de60eb4d8149ea1b5d930a0
parent0c6786c6fad563f181e66c90df2a74597ce6d18b (diff)
Major work on Lua extension and default values.
Default values are now supported, and the Lua extension can now create and modify individual protobuf objects.
-rw-r--r--Makefile8
-rw-r--r--lang_ext/lua/test.lua17
-rw-r--r--lang_ext/lua/upb.c330
-rw-r--r--src/descriptor.h26
-rw-r--r--src/upb_decoder.c126
-rw-r--r--src/upb_decoder_x64.asm4
-rw-r--r--src/upb_def.c166
-rw-r--r--src/upb_def.h16
-rw-r--r--src/upb_msg.c16
-rw-r--r--src/upb_msg.h47
-rw-r--r--src/upb_string.c13
-rw-r--r--src/upb_string.h10
-rw-r--r--src/upbc.c1
-rw-r--r--tests/test_vs_proto2.cc18
-rw-r--r--tests/tests.c208
15 files changed, 592 insertions, 414 deletions
diff --git a/Makefile b/Makefile
index d9f8008..13ce46a 100644
--- a/Makefile
+++ b/Makefile
@@ -94,6 +94,7 @@ TESTS_SRC= \
tests/test_stream.c \
tests/test_string.c \
tests/tests.c \
+ tests/tests_varint.c \
tests/test_vs_proto2.cc
ALLSRC=$(CORE) $(STREAM) $(BENCHMARKS_SRC) $(TESTS_SRC)
@@ -138,11 +139,11 @@ $(LIBUPB_PIC): $(PICOBJ)
# critical path but gets very large when -O3 is used.
src/upb_def.o: src/upb_def.c
$(E) CC $<
- $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $<
+ $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $<
src/upb_def.lo: src/upb_def.c
$(E) 'CC -fPIC' $<
- $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC
+ $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< -fPIC
src/upb_decoder_x64.o: src/upb_decoder_x64.asm
$(E) NASM $<
@@ -183,6 +184,7 @@ SIMPLE_TESTS= \
tests/test_string \
tests/test_def \
tests/test_stream \
+ tests/test_varint \
tests/tests
# tests/test_decoder \
@@ -202,7 +204,7 @@ tests/tests: tests/test.proto.pb
$(SIMPLE_TESTS): % : %.c
$(E) CC $<
- $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
+ $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $< $(LIBUPB)
VALGRIND=valgrind --leak-check=full --error-exitcode=1
test: tests
diff --git a/lang_ext/lua/test.lua b/lang_ext/lua/test.lua
index a49cebc..978fb11 100644
--- a/lang_ext/lua/test.lua
+++ b/lang_ext/lua/test.lua
@@ -18,3 +18,20 @@ symtab:parsedesc(f:read("*all"))
for _, def in ipairs(symtab:getdefs(-1)) do
print(def:name())
end
+
+SpeedMessage1 = symtab:lookup("benchmarks.SpeedMessage1")
+print(SpeedMessage1:name())
+
+msg = SpeedMessage1()
+-- print(msg.field1)
+-- print(msg.field129)
+-- print(msg.field271)
+-- print(msg.field15.field15)
+-- print(msg.field1)
+-- print(msg.field1)
+-- msg.field1 = "YEAH BABY!"
+-- print(msg.field1)
+print(msg.field129)
+msg.field129 = 5
+print(msg.field129)
+
diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c
index bf1eb02..460ac86 100644
--- a/lang_ext/lua/upb.c
+++ b/lang_ext/lua/upb.c
@@ -7,9 +7,20 @@
*/
#include <stdlib.h>
+#include <math.h>
+#include <float.h>
#include "lauxlib.h"
#include "upb_def.h"
#include "upb_glue.h"
+#include "upb_msg.h"
+
+static void lupb_msg_getorcreate(lua_State *L, upb_msg *msg, upb_msgdef *md);
+
+// All the def types share the same C layout, even though they are different Lua
+// types with different metatables.
+typedef struct {
+ upb_def *def;
+} lupb_def;
void lupb_pushstring(lua_State *L, upb_string *str) {
lua_pushlstring(L, upb_string_getrobuf(str), upb_string_len(str));
@@ -30,21 +41,17 @@ void lupb_checkstatus(lua_State *L, upb_status *s) {
upb_status_uninit(s);
}
+
/* object cache ***************************************************************/
// We cache all the lua objects (userdata) we vend in a weak table, indexed by
// the C pointer of the object they are caching.
-typedef void (*lupb_cb)(void *cobj);
-
-static void lupb_nop(void *foo) {
- (void)foo;
-}
-
-static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type,
- lupb_cb ref, lupb_cb unref) {
+static void *lupb_cache_getorcreate_size(
+ lua_State *L, void *cobj, const char *type, size_t size) {
// Lookup our cache in the registry (we don't put our objects in the registry
// directly because we need our cache to be a weak table).
+ void **obj = NULL;
lua_getfield(L, LUA_REGISTRYINDEX, "upb.objcache");
assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb.
lua_pushlightuserdata(L, cobj);
@@ -55,7 +62,7 @@ static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type,
lua_pop(L, 1);
// We take advantage of the fact that all of our objects are currently a
// single pointer, and thus have the same layout.
- void **obj = lua_newuserdata(L, sizeof(void*));
+ obj = lua_newuserdata(L, size);
*obj = cobj;
luaL_getmetatable(L, type);
assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb.
@@ -65,44 +72,235 @@ static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type,
lua_pushlightuserdata(L, cobj);
lua_pushvalue(L, -2);
lua_rawset(L, -4);
- ref(cobj);
- } else {
- unref(cobj);
}
lua_insert(L, -2);
lua_pop(L, 1);
+ return obj;
}
+// Most types are just 1 pointer and can use this helper.
+static bool lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type) {
+ return lupb_cache_getorcreate_size(L, cobj, type, sizeof(void*)) != NULL;
+}
-/* lupb_def *******************************************************************/
-// All the def types share the same C layout, even though they are different Lua
-// types with different metatables.
+/* lupb_msg********************************************************************/
+
+// We prefer field access syntax (foo.bar, foo.bar = 5) over method syntax
+// (foo:bar(), foo:set_bar(5)) to make messages behave more like regular tables.
+// However, there are methods also, like foo:CopyFrom(other_foo) or foo:Clear().
+
typedef struct {
- upb_def *def;
-} lupb_def;
+ upb_msg *msg;
+ upb_msgdef *msgdef;
+} lupb_msg;
-static void lupb_def_unref(void *cobj) {
- upb_def_unref((upb_def*)cobj);
+static lupb_msg *lupb_msg_check(lua_State *L, int narg) {
+ return luaL_checkudata(L, narg, "upb.msg");
}
-static void lupb_def_getorcreate(lua_State *L, upb_def *def) {
- const char *type_name;
- switch(def->type) {
- case UPB_DEF_MSG:
- type_name = "upb.msgdef";
+static void lupb_msg_pushnew(lua_State *L, upb_msgdef *md) {
+ upb_msg *msg = upb_msg_new(md);
+ lupb_msg *m = lupb_cache_getorcreate_size(L, msg, "upb.msg", sizeof(lupb_msg));
+ assert(m);
+ m->msgdef = md;
+ // We need to ensure that the msgdef outlives the msg. This performs an
+ // atomic ref, if this turns out to be too expensive there are other
+ // possible approaches, like creating a separate metatable for every
+ // msgdef that references the msgdef.
+ upb_msgdef_ref(md);
+}
+
+// Caller does *not* pass a ref.
+static void lupb_msg_getorcreate(lua_State *L, upb_msg *msg, upb_msgdef *md) {
+ lupb_msg *m = lupb_cache_getorcreate_size(L, msg, "upb.msg", sizeof(lupb_msg));
+ if (m) {
+ // New Lua object, we need to ref the message.
+ m->msg = upb_msg_getref(msg);
+ m->msgdef = md;
+ // See comment above.
+ upb_msgdef_ref(md);
+ }
+}
+
+static int lupb_msg_gc(lua_State *L) {
+ lupb_msg *m = lupb_msg_check(L, 1);
+ upb_msg_unref(m->msg, m->msgdef);
+ upb_msgdef_unref(m->msgdef);
+ return 0;
+}
+
+static void lupb_pushvalue(lua_State *L, upb_value val, upb_fielddef *f) {
+ switch (f->type) {
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(SINT32):
+ case UPB_TYPE(SFIXED32):
+ case UPB_TYPE(ENUM):
+ lua_pushnumber(L, upb_value_getint32(val)); break;
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(SINT64):
+ case UPB_TYPE(SFIXED64):
+ lua_pushnumber(L, upb_value_getint64(val)); break;
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(FIXED32):
+ lua_pushnumber(L, upb_value_getuint32(val)); break;
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(FIXED64):
+ lua_pushnumber(L, upb_value_getuint64(val)); break;
+ case UPB_TYPE(DOUBLE):
+ lua_pushnumber(L, upb_value_getdouble(val)); break;
+ case UPB_TYPE(FLOAT):
+ lua_pushnumber(L, upb_value_getfloat(val)); break;
+ case UPB_TYPE(BOOL):
+ lua_pushboolean(L, upb_value_getbool(val)); break;
+ case UPB_TYPE(STRING):
+ case UPB_TYPE(BYTES): {
+ upb_string *str = upb_value_getstr(val);
+ assert(str);
+ lua_pushlstring(L, upb_string_getrobuf(str), upb_string_len(str)); break;
+ }
+ case UPB_TYPE(MESSAGE):
+ case UPB_TYPE(GROUP): {
+ upb_msg *msg = upb_value_getmsg(val);
+ assert(msg);
+ lupb_msg_getorcreate(L, msg, upb_downcast_msgdef(f->def));
+ }
+ }
+}
+
+static upb_value lupb_getvalue(lua_State *L, int narg, upb_fielddef *f) {
+ upb_value val;
+ lua_Number num;
+ if (!upb_issubmsg(f) && !upb_isstring(f) && f->type != UPB_TYPE(BOOL)) {
+ num = luaL_checknumber(L, narg);
+ if (f->type != UPB_TYPE(DOUBLE) && f->type != UPB_TYPE(FLOAT) &&
+ num != rint(num)) {
+ luaL_error(L, "Cannot assign non-integer number %f to integer field", num);
+ }
+ }
+ switch (f->type) {
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(SINT32):
+ case UPB_TYPE(SFIXED32):
+ case UPB_TYPE(ENUM):
+ if (num > INT32_MAX || num < INT32_MIN)
+ luaL_error(L, "Number %f is out-of-range for 32-bit integer field.", num);
+ upb_value_setint32(&val, num);
break;
- case UPB_DEF_ENUM:
- type_name = "upb.enumdef";
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(SINT64):
+ case UPB_TYPE(SFIXED64):
+ if (num > INT64_MAX || num < INT64_MIN)
+ luaL_error(L, "Number %f is out-of-range for 64-bit integer field.", num);
+ upb_value_setint64(&val, num);
break;
- default:
- luaL_error(L, "unknown deftype %d", def->type);
- type_name = NULL; // Placate the compiler.
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(FIXED32):
+ if (num > UINT32_MAX || num < 0)
+ luaL_error(L, "Number %f is out-of-range for unsigned 32-bit integer field.", num);
+ upb_value_setuint32(&val, num);
+ break;
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(FIXED64):
+ if (num > UINT64_MAX || num < 0)
+ luaL_error(L, "Number %f is out-of-range for unsigned 64-bit integer field.", num);
+ upb_value_setuint64(&val, num);
+ break;
+ case UPB_TYPE(DOUBLE):
+ if (num > DBL_MAX || num < -DBL_MAX) {
+ // This could happen if lua_Number was long double.
+ luaL_error(L, "Number %f is out-of-range for double field.", num);
+ }
+ upb_value_setdouble(&val, num);
+ break;
+ case UPB_TYPE(FLOAT):
+ if (num > FLT_MAX || num < -FLT_MAX)
+ luaL_error(L, "Number %f is out-of-range for float field.", num);
+ upb_value_setfloat(&val, num);
+ break;
+ case UPB_TYPE(BOOL):
+ if (!lua_isboolean(L, narg))
+ luaL_error(L, "Must explicitly pass true or false for boolean fields");
+ upb_value_setbool(&val, lua_toboolean(L, narg));
+ break;
+ case UPB_TYPE(STRING):
+ case UPB_TYPE(BYTES): {
+ // TODO: is there any reasonable way to avoid a copy here?
+ size_t len;
+ const char *str = luaL_checklstring(L, narg, &len);
+ upb_value_setstr(&val, upb_strduplen(str, len));
+ break;
+ }
+ case UPB_TYPE(MESSAGE):
+ case UPB_TYPE(GROUP): {
+ lupb_msg *m = lupb_msg_check(L, narg);
+ if (m->msgdef != upb_downcast_msgdef(f->def))
+ luaL_error(L, "Tried to assign a message of the wrong type.");
+ upb_value_setmsg(&val, m->msg);
+ break;
+ }
+ }
+ return val;
+}
+
+
+static int lupb_msg_index(lua_State *L) {
+ assert(lua_gettop(L) == 2); // __index should always be called with 2 args.
+ lupb_msg *m = lupb_msg_check(L, 1);
+ size_t len;
+ const char *name = luaL_checklstring(L, 2, &len);
+ upb_string namestr = UPB_STACK_STRING_LEN(name, len);
+ upb_fielddef *f = upb_msgdef_ntof(m->msgdef, &namestr);
+ if (f) {
+ lupb_pushvalue(L, upb_msg_get(m->msg, f), f);
+ } else {
+ // It wasn't a field, perhaps it's a method?
+ lua_getmetatable(L, 1);
+ lua_pushvalue(L, 2);
+ lua_rawget(L, -2);
+ if (lua_isnil(L, -1)) {
+ luaL_error(L, "%s is not a field name or a method name", name);
+ }
+ }
+ return 1;
+}
+
+static int lupb_msg_newindex(lua_State *L) {
+ assert(lua_gettop(L) == 3); // __newindex should always be called with 3 args.
+ lupb_msg *m = lupb_msg_check(L, 1);
+ size_t len;
+ const char *name = luaL_checklstring(L, 2, &len);
+ upb_string namestr = UPB_STACK_STRING_LEN(name, len);
+ upb_fielddef *f = upb_msgdef_ntof(m->msgdef, &namestr);
+ if (f) {
+ upb_value val = lupb_getvalue(L, 3, f);
+ upb_msg_set(m->msg, f, val);
+ if (upb_isstring(f)) {
+ upb_string_unref(upb_value_getstr(val));
+ }
+ } else {
+ luaL_error(L, "%s is not a field name", name);
}
- lupb_cache_getorcreate(L, def, type_name, lupb_nop, lupb_def_unref);
+ return 0;
+}
+
+static int lupb_msg_clear(lua_State *L) {
+ lupb_msg *m = lupb_msg_check(L, 1);
+ upb_msg_clear(m->msg, m->msgdef);
+ return 0;
}
-// msgdef
+static const struct luaL_Reg lupb_msg_mm[] = {
+ {"__gc", lupb_msg_gc},
+ {"__index", lupb_msg_index},
+ {"__newindex", lupb_msg_newindex},
+ // Our __index mm will look up methods if the index isn't a field name.
+ {"Clear", lupb_msg_clear},
+ {NULL, NULL}
+};
+
+
+/* lupb_msgdef ****************************************************************/
static upb_msgdef *lupb_msgdef_check(lua_State *L, int narg) {
lupb_def *ldef = luaL_checkudata(L, narg, "upb.msgdef");
@@ -115,6 +313,12 @@ static int lupb_msgdef_gc(lua_State *L) {
return 0;
}
+static int lupb_msgdef_call(lua_State *L) {
+ upb_msgdef *md = lupb_msgdef_check(L, 1);
+ lupb_msg_pushnew(L, md);
+ return 1;
+}
+
static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f);
static int lupb_msgdef_name(lua_State *L) {
@@ -150,6 +354,7 @@ static int lupb_msgdef_fieldbynum(lua_State *L) {
}
static const struct luaL_Reg lupb_msgdef_mm[] = {
+ {"__call", lupb_msgdef_call},
{"__gc", lupb_msgdef_gc},
{NULL, NULL}
};
@@ -161,7 +366,8 @@ static const struct luaL_Reg lupb_msgdef_m[] = {
{NULL, NULL}
};
-// enumdef
+
+/* lupb_enumdef ***************************************************************/
static upb_enumdef *lupb_enumdef_check(lua_State *L, int narg) {
lupb_def *ldef = luaL_checkudata(L, narg, "upb.enumdef");
@@ -191,18 +397,41 @@ static const struct luaL_Reg lupb_enumdef_m[] = {
};
+/* lupb_def *******************************************************************/
+
+static void lupb_def_getorcreate(lua_State *L, upb_def *def, int owned) {
+ bool created = false;
+ switch(def->type) {
+ case UPB_DEF_MSG:
+ created = lupb_cache_getorcreate(L, def, "upb.msgdef");
+ break;
+ case UPB_DEF_ENUM:
+ created = lupb_cache_getorcreate(L, def, "upb.enumdef");
+ break;
+ default:
+ luaL_error(L, "unknown deftype %d", def->type);
+ }
+ if (!owned && created) {
+ upb_def_ref(def);
+ } else if (owned && !created) {
+ upb_def_unref(def);
+ }
+}
+
+
/* lupb_fielddef **************************************************************/
typedef struct {
upb_fielddef *field;
} lupb_fielddef;
-static void lupb_fielddef_ref(void *cobj) {
- upb_def_ref(UPB_UPCAST(((upb_fielddef*)cobj)->msgdef));
-}
-
static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f) {
- lupb_cache_getorcreate(L, f, "upb.fielddef", lupb_fielddef_ref, lupb_nop);
+ bool created = lupb_cache_getorcreate(L, f, "upb.fielddef");
+ if (created) {
+ // Need to obtain a ref on this field's msgdef (fielddefs themselves aren't
+ // refcounted, but they're kept alive by their owning msgdef).
+ upb_def_ref(UPB_UPCAST(f->msgdef));
+ }
}
static lupb_fielddef *lupb_fielddef_check(lua_State *L, int narg) {
@@ -221,11 +450,9 @@ static int lupb_fielddef_index(lua_State *L) {
} else if (strcmp(str, "label") == 0) {
lua_pushinteger(L, f->field->label);
} else if (strcmp(str, "def") == 0) {
- upb_def_ref(f->field->def);
- lupb_def_getorcreate(L, f->field->def);
+ lupb_def_getorcreate(L, f->field->def, false);
} else if (strcmp(str, "msgdef") == 0) {
- upb_def_ref(UPB_UPCAST(f->field->msgdef));
- lupb_def_getorcreate(L, UPB_UPCAST(f->field->msgdef));
+ lupb_def_getorcreate(L, UPB_UPCAST(f->field->msgdef), false);
} else {
lua_pushnil(L);
}
@@ -264,10 +491,6 @@ static int lupb_symtab_gc(lua_State *L) {
return 0;
}
-static void lupb_symtab_unref(void *cobj) {
- upb_symtab_unref((upb_symtab*)cobj);
-}
-
static int lupb_symtab_lookup(lua_State *L) {
lupb_symtab *s = lupb_symtab_check(L, 1);
size_t len;
@@ -275,7 +498,7 @@ static int lupb_symtab_lookup(lua_State *L) {
upb_string namestr = UPB_STACK_STRING_LEN(name, len);
upb_def *def = upb_symtab_lookup(s->symtab, &namestr);
if (def) {
- lupb_def_getorcreate(L, def);
+ lupb_def_getorcreate(L, def, true);
} else {
lua_pushnil(L);
}
@@ -293,7 +516,7 @@ static int lupb_symtab_getdefs(lua_State *L) {
for (int i = 0; i < count; i++) {
upb_def *def = defs[i];
lua_pushnumber(L, i + 1); // 1-based array.
- lupb_def_getorcreate(L, def);
+ lupb_def_getorcreate(L, def, true);
// Add it to our return table.
lua_settable(L, -3);
}
@@ -331,13 +554,15 @@ static const struct luaL_Reg lupb_symtab_mm[] = {
static int lupb_symtab_new(lua_State *L) {
upb_symtab *s = upb_symtab_new();
- lupb_cache_getorcreate(L, s, "upb.symtab", lupb_nop, lupb_symtab_unref);
+ bool created = lupb_cache_getorcreate(L, s, "upb.symtab");
+ (void)created; // For NDEBUG
+ assert(created); // It's new, there shouldn't be an obj for it already.
return 1;
}
static int lupb_getfdsdef(lua_State *L) {
- lupb_cache_getorcreate(
- L, upb_getfdsdef(), "upb.msgdef", lupb_nop, lupb_def_unref);
+ upb_msgdef *fdsdef = upb_getfdsdef(); // Gets a ref on fdsdef.
+ lupb_def_getorcreate(L, UPB_UPCAST(fdsdef), true);
return 1;
}
@@ -357,7 +582,7 @@ static void lupb_register_type(lua_State *L, const char *name,
// Methods go in the mt's __index method. This implies that you can't
// implement __index and also set methods yourself.
luaL_register(L, NULL, m);
- lua_setfield(L, -2, "__index");
+ lua_setfield(L, -2, "__index");
}
lua_pop(L, 1); // The mt.
}
@@ -367,8 +592,9 @@ int luaopen_upb(lua_State *L) {
lupb_register_type(L, "upb.enumdef", lupb_enumdef_m, lupb_enumdef_mm);
lupb_register_type(L, "upb.fielddef", NULL, lupb_fielddef_mm);
lupb_register_type(L, "upb.symtab", lupb_symtab_m, lupb_symtab_mm);
+ lupb_register_type(L, "upb.msg", NULL, lupb_msg_mm);
- // Create our object cache. TODO: need to make this table weak!
+ // Create our object cache.
lua_createtable(L, 0, 0);
lua_createtable(L, 0, 1); // Cache metatable.
lua_pushstring(L, "v"); // Values are weak.
diff --git a/src/descriptor.h b/src/descriptor.h
deleted file mode 100644
index f6d3ca3..0000000
--- a/src/descriptor.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
- *
- * This file contains declarations for an array that contains the contents
- * of descriptor.proto, serialized as a protobuf. xxd is used to create
- * the actual definition.
- */
-
-#ifndef UPB_DESCRIPTOR_H_
-#define UPB_DESCRIPTOR_H_
-
-#include "upb_string.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern upb_string descriptor_str;
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* UPB_DESCRIPTOR_H_ */
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index 8b10522..78fc8b1 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -1,10 +1,11 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
- * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details.
+ * Copyright (c) 2008-2011 Joshua Haberman. See LICENSE for details.
*/
#include "upb_decoder.h"
+#include "upb_varint_decoder.h"
#include <inttypes.h>
#include <stddef.h>
@@ -21,105 +22,6 @@ extern fastdecode_ret upb_fastdecode(const char *p, const char *end,
upb_value_handler_t value_cb, void *closure,
void *table, int table_size);
-/* Pure Decoding **************************************************************/
-
-// The key fast-path varint-decoding routine. Here we can assume we have at
-// least UPB_MAX_VARINT_ENCODED_SIZE bytes available. There are a lot of
-// possibilities for optimization/experimentation here.
-
-#ifdef USE_SSE_VARINT_DECODING
-#include <emmintrin.h>
-
-// This works, but is empirically slower than the branchy version below. Why?
-// Most varints are very short. Next step: use branches for 1/2-byte varints,
-// but use the SSE version for 3-10 byte varints.
-INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) {
- const char *p = *ptr;
- __m128i val128 = _mm_loadu_si128((void*)p);
- unsigned int continuation_bits = _mm_movemask_epi8(val128);
- unsigned int bsr_val = ~continuation_bits;
- int varint_length = __builtin_ffs(bsr_val);
- if (varint_length > 10) {
- upb_seterr(s, UPB_ERROR, "Unterminated varint");
- return false;
- }
-
- uint16_t twob;
- memcpy(&twob, p, 2);
- twob &= 0x7f7f;
- twob = ((twob & 0xff00) >> 1) | (twob & 0xff);
-
- uint64_t eightb;
- memcpy(&eightb, p + 2, 8);
- eightb &= 0x7f7f7f7f7f7f7f7f;
- eightb = ((eightb & 0xff00ff00ff00ff00) >> 1) | (eightb & 0x00ff00ff00ff00ff);
- eightb = ((eightb & 0xffff0000ffff0000) >> 2) | (eightb & 0x0000ffff0000ffff);
- eightb = ((eightb & 0xffffffff00000000) >> 4) | (eightb & 0x00000000ffffffff);
-
- uint64_t all_bits = twob | (eightb << 14);
- int varint_bits = varint_length * 7;
- uint64_t mask = varint_bits == 70 ? (uint64_t)-1 : (1ULL << (varint_bits)) - 1;
- *val = all_bits & mask;
- *ptr = p + varint_length;
- return true;
-}
-
-#else
-
-INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) {
- const char *p = *ptr;
- uint32_t low, high = 0;
- uint32_t b;
- b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done;
- b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
- b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
- b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
- b = *(p++); low |= (b & 0x7f) << 28;
- high = (b & 0x7f) >> 4; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 3; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done;
- b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done;
-
- upb_seterr(s, UPB_ERROR, "Unterminated varint");
- return false;
-
-done:
- *val = ((uint64_t)high << 32) | low;
- *ptr = p;
- return true;
-}
-
-typedef struct {
- const char *newbuf;
- uint64_t val;
-} retval;
-
-retval upb_decode_varint_fast64(const char *p) {
- uint64_t ret;
- uint64_t b;
- retval r = {(void*)0, 0};
- b = *(p++); ret = (b & 0x7f) ; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 28; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 35; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 42; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 49; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 56; if(!(b & 0x80)) goto done;
- b = *(p++); ret |= (b & 0x7f) << 63; if(!(b & 0x80)) goto done;
- return r;
-
-done:
- r.val = ret;
- r.newbuf = p;
- return r;
-}
-
-#endif
-
/* Decoding/Buffering of individual values ************************************/
@@ -233,11 +135,13 @@ done:
INLINE bool upb_decode_varint(upb_decoder *d, upb_value *val) {
if (upb_decoder_bufleft(d) >= 16) {
// Common (fast) case.
- uint64_t val64;
- const char *p = d->ptr;
- if (!upb_decode_varint_fast(&p, &val64, d->status)) return false;
- upb_decoder_advance(d, p - d->ptr);
- upb_value_setraw(val, val64);
+ upb_decoderet r = upb_decode_varint_fast(d->ptr);
+ if (r.p == NULL) {
+ upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
+ return false;
+ }
+ upb_value_setraw(val, r.val);
+ upb_decoder_advance(d, r.p - d->ptr);
return true;
} else {
return upb_decode_varint_slow(d, val);
@@ -352,11 +256,19 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
d->dispatcher.top->handlers.set->value,
d->dispatcher.top->handlers.closure,
d->top->msgdef->itof.array,
- d->top->msgdef->itof.array_size);
+ d->top->msgdef->itof.array_size,
+ d->tmp);
CHECK_FLOW(ret.flow);
+ if (ret.ptr - d->ptr > 0) {
+ DEBUGPRINTF("Fast path parsed %d bytes of data!\n", ret.ptr - d->ptr);
+ }
d->ptr = ret.ptr;
if (end - d->ptr < 12) {
- DEBUGPRINTF("Off the fast path because <12 bytes of data\n");
+ if (end == d->submsg_end && end != d->end) {
+ DEBUGPRINTF("Off the fast path because <12 bytes of data, but ONLY because of submsg end.\n");
+ } else {
+ DEBUGPRINTF("Off the fast path because <12 bytes of data, NOT because of submsg end.\n");
+ }
} else {
DEBUGPRINTF("Off the fast path for some other reason.\n");
}
diff --git a/src/upb_decoder_x64.asm b/src/upb_decoder_x64.asm
index c59d131..032ea86 100644
--- a/src/upb_decoder_x64.asm
+++ b/src/upb_decoder_x64.asm
@@ -33,7 +33,7 @@ SECTION .text
; Register allocation.
%define BUF rbx ; const char *p, current buf position.
%define END rbp ; const char *end, where the buf ends (either submsg end or buf end)
-%define FREE r12 ; unused
+%define STRING r12 ; unused
%define FIELDDEF r13 ; upb_fielddef *f, needs to be preserved across varint decoding call.
%define CALLBACK r14
%define CLOSURE r15
@@ -143,6 +143,7 @@ _upb_fastdecode:
; Parse arguments into reg vals and stack.
mov BUF, rdi
+ mov COMMITTED_BUF_SPILL, rdi
mov END, rsi
mov CALLBACK, rdx
mov CLOSURE, rcx
@@ -210,7 +211,6 @@ align 16
align 16
.string:
-
.cant_fast_path:
mov rax, 0 ; UPB_CONTINUE -- continue as before.
.done:
diff --git a/src/upb_def.c b/src/upb_def.c
index 0382610..d77e29a 100644
--- a/src/upb_def.c
+++ b/src/upb_def.c
@@ -6,9 +6,11 @@
#include <stdlib.h>
#include <stddef.h>
+#include <errno.h>
#include "descriptor.c"
#include "descriptor_const.h"
#include "upb_def.h"
+#include "upb_msg.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
@@ -261,6 +263,8 @@ struct _upb_defbuilder {
bool saw_number;
bool saw_name;
+ upb_string *default_string;
+
upb_fielddef *f;
};
typedef struct _upb_defbuilder upb_defbuilder;
@@ -276,12 +280,18 @@ static void upb_defbuilder_init(upb_defbuilder *b) {
upb_status_init(&b->status);
b->stack_len = 0;
b->name = NULL;
+ b->default_string = NULL;
}
static void upb_defbuilder_uninit(upb_defbuilder *b) {
upb_string_unref(b->name);
upb_status_uninit(&b->status);
upb_deflist_uninit(&b->defs);
+ upb_string_unref(b->default_string);
+ while (b->stack_len > 0) {
+ upb_defbuilder_frame *f = &b->stack[--b->stack_len];
+ upb_string_unref(f->name);
+ }
}
static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) {
@@ -587,6 +597,19 @@ upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) {
/* upb_fielddef ***************************************************************/
static void upb_fielddef_free(upb_fielddef *f) {
+ if (upb_isstring(f) || f->type == UPB_TYPE(ENUM)) {
+ upb_string_unref(upb_value_getstr(f->default_value));
+ } else if (upb_issubmsg(f)) {
+ upb_msg *m = upb_value_getmsg(f->default_value);
+ assert(m);
+ // We cheat a bit here. We need to unref msg, but we don't have a reliable
+ // way of accessing the msgdef (which is required by upb_msg_unref()),
+ // because f->def may have already been collected as part of a cycle if
+ // this is an unowned ref. But we know that default messages never contain
+ // references to other messages, and their only string references are to
+ // the singleton empty string, so we can safely unref+free msg directly.
+ if (upb_atomic_unref(&m->refcount)) free(m);
+ }
upb_string_unref(f->name);
if(f->owned) {
upb_def_unref(f->def);
@@ -606,6 +629,109 @@ static upb_flow_t upb_fielddef_startmsg(void *_b) {
return UPB_CONTINUE;
}
+// Converts the default value in string "dstr" into "d". Passes a ref on dstr.
+// Returns true on success.
+static bool upb_fielddef_setdefault(upb_string *dstr, upb_value *d, int type) {
+ bool success = true;
+ if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) {
+ // We'll keep the ref we had on it. We include enums in this case because
+ // we need the enumdef to resolve the name, but we may not have it yet.
+ // We'll resolve it later.
+ if (dstr) {
+ upb_value_setstr(d, dstr);
+ } else {
+ upb_value_setstr(d, upb_emptystring());
+ }
+ } else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) {
+ // We don't expect to get a default value.
+ upb_string_unref(dstr);
+ if (dstr != NULL) {
+ printf("Returning false because I got a default string for a message!\n");
+ success = false;
+ }
+ } else {
+ // The strto* functions need the string to be NULL-terminated.
+ char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr);
+ char *end;
+ upb_string_unref(dstr);
+ switch (type) {
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(SINT32):
+ case UPB_TYPE(SFIXED32):
+ if (strz) {
+ long val = strtol(strz, &end, 0);
+ if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
+ success = false;
+ else
+ upb_value_setint32(d, val);
+ } else {
+ upb_value_setint32(d, 0);
+ }
+ break;
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(SINT64):
+ case UPB_TYPE(SFIXED64):
+ if (strz) {
+ upb_value_setint64(d, strtoll(strz, &end, 0));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setint64(d, 0);
+ }
+ break;
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(FIXED32):
+ if (strz) {
+ long val = strtoul(strz, &end, 0);
+ if (val > UINT32_MAX || errno == ERANGE || *end)
+ success = false;
+ else
+ upb_value_setuint32(d, val);
+ } else {
+ upb_value_setuint32(d, 0);
+ }
+ break;
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(FIXED64):
+ if (strz) {
+ upb_value_setuint64(d, strtoull(strz, &end, 0));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setuint64(d, 0);
+ }
+ break;
+ case UPB_TYPE(DOUBLE):
+ if (strz) {
+ upb_value_setdouble(d, strtod(strz, &end));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setdouble(d, 0.0);
+ }
+ break;
+ case UPB_TYPE(FLOAT):
+ if (strz) {
+ upb_value_setfloat(d, strtof(strz, &end));
+ if (errno == ERANGE || *end) success = false;
+ } else {
+ upb_value_setfloat(d, 0.0);
+ }
+ break;
+ case UPB_TYPE(BOOL):
+ if (!strz || strcmp(strz, "false") == 0)
+ upb_value_setbool(d, false);
+ else if (strcmp(strz, "true") == 0)
+ upb_value_setbool(d, true);
+ else
+ success = false;
+ break;
+ }
+ if (!success) {
+ printf("Returning false on the int conversion path, was trying to convert: %s, type=%d\n", strz, type);
+ }
+ free(strz);
+ }
+ return success;
+}
+
static upb_flow_t upb_fielddef_endmsg(void *_b) {
upb_defbuilder *b = _b;
upb_fielddef *f = b->f;
@@ -619,6 +745,15 @@ static upb_flow_t upb_fielddef_endmsg(void *_b) {
upb_ntof_ent ntof_ent = {{f->name, 0}, f};
upb_inttable_insert(&m->itof, f->number, &itof_ent);
upb_strtable_insert(&m->ntof, &ntof_ent.e);
+
+ upb_string *dstr = b->default_string;
+ b->default_string = NULL;
+ if (!upb_fielddef_setdefault(dstr, &f->default_value, f->type)) {
+ // We don't worry too much about giving a great error message since the
+ // compiler should have ensured this was correct.
+ upb_seterr(&b->status, UPB_ERROR, "Error converting default value.");
+ return UPB_BREAK;
+ }
return UPB_CONTINUE;
}
@@ -644,6 +779,12 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) {
b->f->owned = true;
break;
}
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_FIELDNUM:
+ // Have to convert from string to the correct type, but we might not know
+ // the type yet.
+ upb_string_unref(b->default_string);
+ b->default_string = upb_string_getref(upb_value_getstr(val));
+ break;
}
return UPB_CONTINUE;
}
@@ -683,6 +824,7 @@ static upb_flow_t upb_msgdef_startmsg(void *_b) {
upb_atomic_refcount_init(&m->cycle_refcount, 0);
upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent));
upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent));
+ m->default_message = NULL;
upb_deflist_push(&b->defs, UPB_UPCAST(m));
upb_defbuilder_startcontainer(b);
return UPB_CONTINUE;
@@ -703,7 +845,7 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) {
upb_field_count_t field = 0;
upb_msg_iter i;
for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- sorted_fields[field++]= upb_msg_iter_field(i);
+ sorted_fields[field++] = upb_msg_iter_field(i);
}
qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields);
@@ -745,6 +887,18 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) {
if (max_align > 0) m->size = upb_align_up(m->size, max_align);
+ // Create default message instance, an immutable message with all default
+ // values set (except submessages, which are simply marked as unset). We
+ // could alternatively leave all set bits unset, but this would make
+ // upb_msg_get() take its unexpected branch more often for no good reason.
+ m->default_message = upb_msg_new(m);
+ for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ if (!upb_issubmsg(f) && !f->type == UPB_TYPE(ENUM)) {
+ upb_msg_set(m->default_message, f, f->default_value);
+ }
+ }
+
upb_defbuilder_endcontainer(b);
return UPB_CONTINUE;
}
@@ -802,6 +956,7 @@ static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b,
static void upb_msgdef_free(upb_msgdef *m)
{
+ upb_msg_unref(m->default_message, m);
upb_msg_iter i;
for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
upb_fielddef_free(upb_msg_iter_field(i));
@@ -818,6 +973,10 @@ static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) {
// We will later make the ref unowned if it is a part of a cycle.
f->owned = true;
upb_def_ref(def);
+ if (upb_issubmsg(f)) {
+ upb_msgdef *md = upb_downcast_msgdef(def);
+ upb_value_setmsg(&f->default_value, upb_msg_getref(md->default_message));
+ }
}
upb_msg_iter upb_msg_begin(upb_msgdef *m) {
@@ -937,7 +1096,8 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status)
}
// Given a table of pending defs "tmptab" and a table of existing defs "symtab",
-// resolves all of the unresolved refs for the defs in tmptab.
+// resolves all of the unresolved refs for the defs in tmptab. Also resolves
+// default values for enumerations and submessages.
bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab,
upb_status *status)
{
@@ -1352,7 +1512,7 @@ upb_def *upb_getdescriptordef(upb_string *str) {
// upb itself is corrupt.
abort();
}
- upb_def_unref(UPB_UPCAST(def)); // The symtab already holds a ref on it.
+ upb_msgdef_unref(def); // The symtab already holds a ref on it.
atexit(upb_free_descriptor_symtab);
}
return upb_symtab_resolve(
diff --git a/src/upb_def.h b/src/upb_def.h
index 121d5bc..3f79895 100644
--- a/src/upb_def.h
+++ b/src/upb_def.h
@@ -81,6 +81,9 @@ INLINE void upb_def_unref(upb_def *def) {
if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def);
}
+#define UPB_UPCAST(ptr) (&(ptr)->base)
+
+
/* upb_fielddef ***************************************************************/
// A upb_fielddef describes a single field in a message. It isn't a full def
@@ -158,6 +161,10 @@ typedef struct _upb_msgdef {
// Tables for looking up fields by number and name.
upb_inttable itof; // int to field
upb_strtable ntof; // name to field
+
+ // Immutable msg instance that has all default values set.
+ // TODO: need a way of making this immutable!
+ struct _upb_msg *default_message;
} upb_msgdef;
// Hash table entries for looking up fields by name or number.
@@ -172,6 +179,13 @@ typedef struct {
upb_fielddef *f;
} upb_ntof_ent;
+INLINE void upb_msgdef_unref(upb_msgdef *md) {
+ upb_def_unref(UPB_UPCAST(md));
+}
+INLINE void upb_msgdef_ref(upb_msgdef *md) {
+ upb_def_ref(UPB_UPCAST(md));
+}
+
// Looks up a field by name or number. While these are written to be as fast
// as possible, it will still be faster to cache the results of this lookup if
// possible. These return NULL if no such field is found.
@@ -361,8 +375,6 @@ UPB_DOWNCAST_DEF(extdef, EXT);
UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DOWNCAST_DEF
-#define UPB_UPCAST(ptr) (&(ptr)->base)
-
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/src/upb_msg.c b/src/upb_msg.c
index 9dfbea4..211004c 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -145,6 +145,22 @@ INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) {
msg->data[f->set_bit_offset] |= f->set_bit_mask;
}
+void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) {
+ assert(val.type == upb_field_valuetype(f));
+ upb_valueptr ptr = _upb_msg_getptr(msg, f);
+ if (upb_field_ismm(f)) {
+ // Unref any previous value we may have had there.
+ upb_value oldval = upb_value_read(ptr, upb_field_valuetype(f));
+ upb_field_unref(oldval, f);
+
+ // Ref the new value.
+ upb_atomic_refcount_t *refcount = upb_value_getrefcount(val);
+ if (refcount) upb_atomic_ref(refcount);
+ }
+ upb_msg_sethas(msg, f);
+ return upb_value_write(ptr, val, upb_field_valuetype(f));
+}
+
static upb_valueptr upb_msg_getappendptr(upb_msg *msg, upb_fielddef *f) {
upb_valueptr p = _upb_msg_getptr(msg, f);
if (upb_isarray(f)) {
diff --git a/src/upb_msg.h b/src/upb_msg.h
index 3246971..ff8489c 100644
--- a/src/upb_msg.h
+++ b/src/upb_msg.h
@@ -135,6 +135,7 @@ INLINE void upb_value_write(upb_valueptr ptr, upb_value val,
#undef CASE
}
+
/* upb_array ******************************************************************/
typedef uint32_t upb_arraylen_t;
@@ -172,8 +173,17 @@ INLINE upb_value upb_array_get(upb_array *arr, upb_fielddef *f,
return upb_value_read(_upb_array_getptr(arr, f, i), f->type);
}
+
/* upb_msg ********************************************************************/
+// upb_msg is not self-describing; the upb_msg does not contain a pointer to the
+// upb_msgdef. While this makes the API a bit more cumbersome to use, this
+// choice was made for a few important reasons:
+//
+// 1. it would make every message 8 bytes larger on 64-bit platforms. This is
+// a high overhead for small messages.
+// 2. you would want the msg to own a ref on its msgdef, but this would require
+// an atomic operation for every message create or destroy!
struct _upb_msg {
upb_atomic_refcount_t refcount;
uint8_t data[4]; // We allocate the appropriate amount per message.
@@ -194,6 +204,11 @@ upb_msg *upb_msg_new(upb_msgdef *md);
INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) {
if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md);
}
+INLINE upb_msg *upb_msg_getref(upb_msg *msg) {
+ assert(msg);
+ upb_atomic_ref(&msg->refcount);
+ return msg;
+}
void upb_msg_recycle(upb_msg **msg, upb_msgdef *msgdef);
@@ -203,10 +218,40 @@ INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) {
return (msg->data[f->set_bit_offset] & f->set_bit_mask) != 0;
}
+// We have several options for handling default values:
+// 1. inside upb_msg_clear(), overwrite all values to be their defaults,
+// overwriting submessage pointers to point to the default instance again.
+// 2. inside upb_msg_get(), test upb_msg_has() and return md->default_value
+// if it is not set. upb_msg_clear() only clears the set bits.
+// We lazily clear objects if/when we reuse them.
+// 3. inside upb_msg_clear(), overwrite all values to be their default,
+// and recurse into submessages to set all their values to defaults also.
+// 4. as a hybrid of (1) and (3), make each "set bit" tri-state, where it
+// can have a value of "unset, but cached sub-message needs to be cleared."
+// Like (2) we can cache sub-messages and lazily clear, but primitive values
+// can always be returned straight from the message.
+//
+// (1) is undesirable, because it prevents us from caching sub-objects.
+// (2) makes clear() cheaper, but makes get() branchier.
+// (3) makes get() less branchy, but makes clear() have worse cache behavior.
+// (4) makes get() differently branchy (only returns default from msgdef if
+// NON-primitive value is unset), but uses more set bits. It's questionable
+// whether it would be a performance improvement.
+//
+// For the moment we go with (2). Google's protobuf does (3), which is likely
+// part of the reason we beat it in some benchmarks.
+
+// For submessages and strings, the returned value is not owned.
INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) {
- return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f));
+ if (upb_msg_has(msg, f)) {
+ return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f));
+ } else {
+ return f->default_value;
+ }
}
+void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val);
+
// Unsets all field values back to their defaults.
INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) {
memset(msg->data, 0, md->set_flags_bytes);
diff --git a/src/upb_string.c b/src/upb_string.c
index 81b152d..29ce7d4 100644
--- a/src/upb_string.c
+++ b/src/upb_string.c
@@ -147,4 +147,15 @@ error:
return NULL;
}
-void upb_string_noninlinerecycle(upb_string **_str) { return upb_string_recycle(_str); }
+upb_string *upb_emptystring() {
+ static upb_string empty = UPB_STATIC_STRING("");
+ return &empty;
+}
+
+char *upb_string_newcstr(upb_string *str) {
+ upb_strlen_t len = upb_string_len(str);
+ char *ret = malloc(len+1);
+ memcpy(ret, upb_string_getrobuf(str), len);
+ ret[len] = '\0';
+ return ret;
+}
diff --git a/src/upb_string.h b/src/upb_string.h
index 3799c5e..efafa44 100644
--- a/src/upb_string.h
+++ b/src/upb_string.h
@@ -134,6 +134,9 @@ INLINE upb_string *upb_string_getref(upb_string *str) {
// Returns the length of the string.
INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; }
+INLINE bool upb_string_isempty(upb_string *str) {
+ return !str || upb_string_len(str) == 0;
+}
// Use to read the bytes of the string. The caller *must* call
// upb_string_endread() after the data has been read. The window between
@@ -273,6 +276,10 @@ void upb_string_substr(upb_string *str, upb_string *target_str,
//#endif
#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str)
+// Returns a singleton empty string.
+upb_string *upb_emptystring();
+
+
/* upb_string library functions ***********************************************/
// Named like their <string.h> counterparts, these are all safe against buffer
@@ -339,6 +346,9 @@ INLINE upb_string *upb_strdupc(const char *src) {
return upb_strduplen(src, strlen(src));
}
+// Returns a newly-allocated NULL-terminated copy of str.
+char *upb_string_newcstr(upb_string *str);
+
// Appends 'append' to 's' in-place, resizing s if necessary.
void upb_strcat(upb_string *s, upb_string *append);
diff --git a/src/upbc.c b/src/upbc.c
index 428ec41..4fa8a71 100644
--- a/src/upbc.c
+++ b/src/upbc.c
@@ -12,7 +12,6 @@
#include <inttypes.h>
#include <stdarg.h>
#include <stdlib.h>
-#include "descriptor.h"
#include "upb_def.h"
#include "upb_msg.h"
#include "upb_glue.h"
diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc
index 1839123..f3c54b4 100644
--- a/tests/test_vs_proto2.cc
+++ b/tests/test_vs_proto2.cc
@@ -1,20 +1,20 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * A test that verifies that our results are identical to proto2 for a
+ * given proto type and input protobuf.
+ *
+ * Copyright (c) 2011 Joshua Haberman. See LICENSE for details.
+ */
-#undef NDEBUG /* ensure tests always assert. */
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <google/protobuf/descriptor.h>
-#include "upb_decoder.h"
+#include "upb_test.h"
#include "upb_def.h"
#include "upb_glue.h"
#include "upb_msg.h"
-#include "upb_strstream.h"
-
-int num_assertions = 0;
-#define ASSERT(expr) do { \
- ++num_assertions; \
- assert(expr); \
- } while(0)
#include MESSAGE_HFILE
diff --git a/tests/tests.c b/tests/tests.c
index c691b18..a04b1da 100644
--- a/tests/tests.c
+++ b/tests/tests.c
@@ -1,212 +1,13 @@
-#undef NDEBUG /* ensure tests always assert. */
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
-#include "upb_decoder.c"
#include "upb_def.h"
#include "upb_glue.h"
-
-int num_assertions = 0;
-#define ASSERT(expr) do { \
- ++num_assertions; \
- assert(expr); \
- } while(0)
-
-static void test_get_v_uint64_t()
-{
-#define TEST(name, bytes, val) {\
- upb_status status = UPB_STATUS_INIT; \
- const char name[] = bytes "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" ; \
- const char *name ## _buf = name; \
- uint64_t name ## _val = 0; \
- upb_decode_varint_fast(&name ## _buf, &name ## _val, &status); \
- ASSERT(upb_ok(&status)); \
- ASSERT(name ## _val == val); \
- ASSERT(name ## _buf == name + sizeof(name) - 16); /* - 1 for NULL */ \
- }
-
- TEST(zero, "\x00", 0ULL);
- TEST(one, "\x01", 1ULL);
- TEST(twob, "\x81\x14", 0xa01ULL);
- TEST(twob, "\x81\x03", 0x181ULL);
- TEST(threeb, "\x81\x83\x07", 0x1c181ULL);
- TEST(fourb, "\x81\x83\x87\x0f", 0x1e1c181ULL);
- TEST(fiveb, "\x81\x83\x87\x8f\x1f", 0x1f1e1c181ULL);
- TEST(sixb, "\x81\x83\x87\x8f\x9f\x3f", 0x1f9f1e1c181ULL);
- TEST(sevenb, "\x81\x83\x87\x8f\x9f\xbf\x7f", 0x1fdf9f1e1c181ULL);
- TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01", 0x3fdf9f1e1c181ULL);
- TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03", 0x303fdf9f1e1c181ULL);
- TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0x8303fdf9f1e1c181ULL);
-#undef TEST
-
- char twelvebyte[16] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
- const char *twelvebyte_buf = twelvebyte;
- uint64_t twelvebyte_val = 0;
- upb_status status = UPB_STATUS_INIT;
- /* A varint that terminates before hitting the end of the provided buffer,
- * but in too many bytes (11 instead of 10). */
- upb_decode_varint_fast(&twelvebyte_buf, &twelvebyte_val, &status);
- ASSERT(status.code == UPB_ERROR);
- upb_status_uninit(&status);
-}
-
-#if 0
-static void test_get_v_uint32_t()
-{
-#define TEST(name, bytes, val) {\
- upb_status status = UPB_STATUS_INIT; \
- const uint8_t name[] = bytes; \
- const uint8_t *name ## _buf = name; \
- uint32_t name ## _val = 0; \
- name ## _buf = upb_get_v_uint32_t(name, name + sizeof(name), &name ## _val, &status); \
- ASSERT(upb_ok(&status)); \
- ASSERT(name ## _val == val); \
- ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \
- /* Test NEED_MORE_DATA. */ \
- if(sizeof(name) > 2) { \
- name ## _buf = upb_get_v_uint32_t(name, name + sizeof(name) - 2, &name ## _val, &status); \
- ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA); \
- } \
- }
-
- TEST(zero, "\x00", 0UL);
- TEST(one, "\x01", 1UL);
- TEST(twob, "\x81\x03", 0x181UL);
- TEST(threeb, "\x81\x83\x07", 0x1c181UL);
- TEST(fourb, "\x81\x83\x87\x0f", 0x1e1c181UL);
- /* get_v_uint32_t truncates, so all the rest return the same thing. */
- TEST(fiveb, "\x81\x83\x87\x8f\x1f", 0xf1e1c181UL);
- TEST(sixb, "\x81\x83\x87\x8f\x9f\x3f", 0xf1e1c181UL);
- TEST(sevenb, "\x81\x83\x87\x8f\x9f\xbf\x7f", 0xf1e1c181UL);
- TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01", 0xf1e1c181UL);
- TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03", 0xf1e1c181UL);
- TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0xf1e1c181UL);
-#undef TEST
-
- uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
- uint32_t twelvebyte_val = 0;
- upb_status status = UPB_STATUS_INIT;
- /* A varint that terminates before hitting the end of the provided buffer,
- * but in too many bytes (11 instead of 10). */
- upb_get_v_uint32_t(twelvebyte, twelvebyte + 12, &twelvebyte_val, &status);
- ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT);
-
- /* A varint that terminates simultaneously with the end of the provided
- * buffer, but in too many bytes (11 instead of 10). */
- upb_reset(&status);
- upb_get_v_uint32_t(twelvebyte, twelvebyte + 11, &twelvebyte_val, &status);
- ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT);
-
- /* A varint whose buffer ends on exactly the byte where the varint must
- * terminate, but the final byte does not terminate. The absolutely most
- * correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know
- * by this point that the varint does not properly terminate. But we also
- * allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not
- * compromise overall correctness -- clients who supply more data later will
- * then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no
- * more data to supply will (rightly) conclude that their protobuf is corrupt.
- */
- upb_reset(&status);
- upb_get_v_uint32_t(twelvebyte, twelvebyte + 10, &twelvebyte_val, &status);
- ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT ||
- status.code == UPB_STATUS_NEED_MORE_DATA);
-
- upb_reset(&status);
- upb_get_v_uint32_t(twelvebyte, twelvebyte + 9, &twelvebyte_val, &status);
- ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA);
-}
-
-static void test_skip_v_uint64_t()
-{
-#define TEST(name, bytes) {\
- upb_status status = UPB_STATUS_INIT; \
- const uint8_t name[] = bytes; \
- const uint8_t *name ## _buf = name; \
- name ## _buf = upb_skip_v_uint64_t(name ## _buf, name + sizeof(name), &status); \
- ASSERT(upb_ok(&status)); \
- ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \
- /* Test NEED_MORE_DATA. */ \
- if(sizeof(name) > 2) { \
- name ## _buf = upb_skip_v_uint64_t(name, name + sizeof(name) - 2, &status); \
- ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA); \
- } \
- }
-
- TEST(zero, "\x00");
- TEST(one, "\x01");
- TEST(twob, "\x81\x03");
- TEST(threeb, "\x81\x83\x07");
- TEST(fourb, "\x81\x83\x87\x0f");
- TEST(fiveb, "\x81\x83\x87\x8f\x1f");
- TEST(sixb, "\x81\x83\x87\x8f\x9f\x3f");
- TEST(sevenb, "\x81\x83\x87\x8f\x9f\xbf\x7f");
- TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01");
- TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03");
- TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07");
-#undef TEST
-
- uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
- upb_status status = UPB_STATUS_INIT;
- /* A varint that terminates before hitting the end of the provided buffer,
- * but in too many bytes (11 instead of 10). */
- upb_skip_v_uint64_t(twelvebyte, twelvebyte + 12, &status);
- ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT);
-
- /* A varint that terminates simultaneously with the end of the provided
- * buffer, but in too many bytes (11 instead of 10). */
- upb_reset(&status);
- upb_skip_v_uint64_t(twelvebyte, twelvebyte + 11, &status);
- ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT);
-
- /* A varint whose buffer ends on exactly the byte where the varint must
- * terminate, but the final byte does not terminate. The absolutely most
- * correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know
- * by this point that the varint does not properly terminate. But we also
- * allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not
- * compromise overall correctness -- clients who supply more data later will
- * then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no
- * more data to supply will (rightly) conclude that their protobuf is corrupt.
- */
- upb_reset(&status);
- upb_skip_v_uint64_t(twelvebyte, twelvebyte + 10, &status);
- ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT ||
- status.code == UPB_STATUS_NEED_MORE_DATA);
-
- upb_reset(&status);
- upb_skip_v_uint64_t(twelvebyte, twelvebyte + 9, &status);
- ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA);
-}
-
-static void test_get_f_uint32_t()
-{
-#define TEST(name, bytes, val) {\
- upb_status status = UPB_STATUS_INIT; \
- const uint8_t name[] = bytes; \
- const uint8_t *name ## _buf = name; \
- uint32_t name ## _val = 0; \
- name ## _buf = upb_get_f_uint32_t(name ## _buf, name + sizeof(name), &name ## _val, &status); \
- ASSERT(upb_ok(&status)); \
- ASSERT(name ## _val == val); \
- ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \
- }
-
- TEST(zero, "\x00\x00\x00\x00", 0x0UL);
- TEST(one, "\x01\x00\x00\x00", 0x1UL);
-
- uint8_t threeb[] = {0x00, 0x00, 0x00};
- uint32_t threeb_val;
- upb_status status = UPB_STATUS_INIT;
- upb_get_f_uint32_t(threeb, threeb + sizeof(threeb), &threeb_val, &status);
- ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA);
-
-#undef TEST
-}
-#endif
+#include "upb_test.h"
static void test_upb_symtab() {
upb_symtab *s = upb_symtab_new();
- upb_symtab_add_descriptorproto(s);
ASSERT(s);
upb_string *descriptor = upb_strreadfile("tests/test.proto.pb");
if(!descriptor) {
@@ -240,11 +41,8 @@ static void test_upb_symtab() {
upb_def_ref(def2);
upb_def_unref(def);
upb_def_unref(def2);
-
-
}
-
int main()
{
#define TEST(func) do { \
@@ -254,10 +52,6 @@ int main()
printf("ok (%d assertions).\n", num_assertions - assertions_before); \
} while (0)
- TEST(test_get_v_uint64_t);
- //TEST(test_get_v_uint32_t);
- //TEST(test_skip_v_uint64_t);
- //TEST(test_get_f_uint32_t);
TEST(test_upb_symtab);
printf("All tests passed (%d assertions).\n", num_assertions);
return 0;
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback