From c8f6a27e6b27ed5d51cf6c8da5ec080b9952fa99 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 12 Aug 2018 19:23:26 -0700 Subject: Enforced that upb_msg lives in an Arena only, and other simplifying. upb_msg was trying to be general enough that it could either live in an arena or be allocated with malloc()/free(). This was too much complexity for too little benefit. We should commit to just saying that upb_msg is arena-only. I also ripped out the code to glue upb_msg to the existing handlers-based encoder/decoder. upb_msg has its own, small, simple encoder/decoder. I'm trying to whittle down upb_msg to a small and simple core. I updated the Lua extension for these changes. Lua needs some more work to properly create arenas per message. For now I just created a single global arena. --- upb/bindings/lua/msg.c | 128 ++++--------- upb/bindings/lua/upb.h | 3 +- upb/bindings/lua/upb/pb.c | 117 ++---------- upb/decode.c | 16 +- upb/msg.c | 467 +++++----------------------------------------- upb/msg.h | 96 ++-------- upb/structs.int.h | 2 +- 7 files changed, 122 insertions(+), 707 deletions(-) (limited to 'upb') diff --git a/upb/bindings/lua/msg.c b/upb/bindings/lua/msg.c index e468ace..e983f46 100644 --- a/upb/bindings/lua/msg.c +++ b/upb/bindings/lua/msg.c @@ -94,56 +94,6 @@ static void *lupb_newuserdata(lua_State *L, size_t size, const char *type) { } -/* lupb_alloc *****************************************************************/ - -typedef struct { - upb_alloc alloc; - lua_State *L; -} lupb_alloc; - -char lupb_alloc_cache_key; - -static void *lupb_alloc_func(upb_alloc *alloc, void *ptr, size_t oldsize, - size_t size) { - lupb_alloc *lalloc = (lupb_alloc*)alloc; - void *ud; - - /* We read this every time in case the user changes the Lua alloc function. */ - lua_Alloc func = lua_getallocf(lalloc->L, &ud); - - return func(ud, ptr, oldsize, size); -} - -static void lupb_alloc_pushnew(lua_State *L) { - lupb_alloc *lalloc = lua_newuserdata(L, sizeof(lupb_alloc)); - - lalloc->alloc.func = &lupb_alloc_func; - lalloc->L = L; -} - -/* Returns the global lupb_alloc func that was created in our luaopen(). - * Callers can be guaranteed that it will be alive as long as |L| is. */ -static upb_alloc *lupb_alloc_get(lua_State *L) { - lupb_alloc *lalloc; - - lua_pushlightuserdata(L, &lupb_alloc_cache_key); - lua_gettable(L, LUA_REGISTRYINDEX); - lalloc = lua_touserdata(L, -1); - UPB_ASSERT(lalloc); - lua_pop(L, 1); - - return &lalloc->alloc; -} - -static void lupb_alloc_initsingleton(lua_State *L) { - lua_pushlightuserdata(L, &lupb_alloc_cache_key); - lupb_alloc_pushnew(L); - lua_settable(L, LUA_REGISTRYINDEX); -} - -#define ADD_BYTES(ptr, bytes) ((void*)((char*)ptr + bytes)) - - /* lupb_arena *****************************************************************/ /* lupb_arena only exists to wrap a upb_arena. It is never exposed to users; @@ -164,6 +114,30 @@ int lupb_arena_new(lua_State *L) { return 1; } +char lupb_arena_cache_key; + +/* Returns the global lupb_arena func that was created in our luaopen(). + * Callers can be guaranteed that it will be alive as long as |L| is. + * TODO(haberman): we shouldn't use a global arena! We should have + * one arena for a parse, or per independently-created message. */ +static upb_arena *lupb_arena_get(lua_State *L) { + upb_arena *arena; + + lua_pushlightuserdata(L, &lupb_arena_cache_key); + lua_gettable(L, LUA_REGISTRYINDEX); + arena = lua_touserdata(L, -1); + UPB_ASSERT(arena); + lua_pop(L, 1); + + return arena; +} + +static void lupb_arena_initsingleton(lua_State *L) { + lua_pushlightuserdata(L, &lupb_arena_cache_key); + lupb_arena_new(L); + lua_settable(L, LUA_REGISTRYINDEX); +} + static int lupb_arena_gc(lua_State *L) { upb_arena *a = lupb_arena_check(L, 1); upb_arena_uninit(a); @@ -314,12 +288,6 @@ const upb_msglayout *lupb_msgclass_getlayout(lua_State *L, int narg) { return lupb_msgclass_check(L, narg)->layout; } -const upb_handlers *lupb_msgclass_getmergehandlers(lua_State *L, int narg) { - const lupb_msgclass *lmsgclass = lupb_msgclass_check(L, narg); - return upb_msgfactory_getmergehandlers( - lmsgclass->lfactory->factory, lmsgclass->msgdef); -} - const upb_msgdef *lupb_msgclass_getmsgdef(const lupb_msgclass *lmsgclass) { return lmsgclass->msgdef; } @@ -547,12 +515,6 @@ static int lupb_array_checkindex(lua_State *L, int narg, uint32_t max) { return n - 1; /* Lua uses 1-based indexing. :( */ } -static int lupb_array_gc(lua_State *L) { - lupb_array *larray = lupb_array_check(L, 1); - upb_array_uninit(larray->arr); - return 0; -} - /* lupb_array Public API */ static int lupb_array_new(lua_State *L) { @@ -568,11 +530,9 @@ static int lupb_array_new(lua_State *L) { lupb_uservalseti(L, -1, ARRAY_MSGCLASS_INDEX, 1); /* GC-root lmsgclass. */ } - larray = - lupb_newuserdata(L, sizeof(*larray) + upb_array_sizeof(type), LUPB_ARRAY); + larray = lupb_newuserdata(L, sizeof(*larray), LUPB_ARRAY); larray->lmsgclass = lmsgclass; - larray->arr = ADD_BYTES(larray, sizeof(*larray)); - upb_array_init(larray->arr, type, lupb_alloc_get(L)); + larray->arr = upb_array_new(type, lupb_arena_get(L)); return 1; } @@ -614,7 +574,6 @@ static int lupb_array_len(lua_State *L) { } static const struct luaL_Reg lupb_array_mm[] = { - {"__gc", lupb_array_gc}, {"__index", lupb_array_index}, {"__len", lupb_array_len}, {"__newindex", lupb_array_newindex}, @@ -681,12 +640,6 @@ static upb_msgval lupb_map_typecheck(lua_State *L, int narg, int msg, return upb_msgval_map(map); } -static int lupb_map_gc(lua_State *L) { - lupb_map *lmap = lupb_map_check(L, 1); - upb_map_uninit(lmap->map); - return 0; -} - /* lupb_map Public API */ /** @@ -707,9 +660,7 @@ static int lupb_map_new(lua_State *L) { value_type = UPB_TYPE_MESSAGE; } - lmap = lupb_newuserdata( - L, sizeof(*lmap) + upb_map_sizeof(key_type, value_type), LUPB_MAP); - lmap->map = ADD_BYTES(lmap, sizeof(*lmap)); + lmap = lupb_newuserdata(L, sizeof(*lmap), LUPB_MAP); if (value_type == UPB_TYPE_MESSAGE) { value_lmsgclass = lupb_msgclass_check(L, 2); @@ -717,7 +668,7 @@ static int lupb_map_new(lua_State *L) { } lmap->value_lmsgclass = value_lmsgclass; - upb_map_init(lmap->map, key_type, value_type, lupb_alloc_get(L)); + lmap->map = upb_map_new(key_type, value_type, lupb_arena_get(L)); return 1; } @@ -858,7 +809,6 @@ static int lupb_map_pairs(lua_State *L) { /* upb_mapiter ]]] */ static const struct luaL_Reg lupb_map_mm[] = { - {"__gc", lupb_map_gc}, {"__index", lupb_map_index}, {"__len", lupb_map_len}, {"__newindex", lupb_map_newindex}, @@ -912,6 +862,13 @@ const upb_msg *lupb_msg_checkmsg(lua_State *L, int narg, return lmsg->msg; } +upb_msg *lupb_msg_checkmsg2(lua_State *L, int narg, + const upb_msglayout **layout) { + lupb_msg *lmsg = lupb_msg_check(L, narg); + *layout = lmsg->lmsgclass->layout; + return lmsg->msg; +} + const upb_msgdef *lupb_msg_checkdef(lua_State *L, int narg) { return lupb_msg_check(L, narg)->lmsgclass->msgdef; } @@ -958,12 +915,6 @@ int lupb_msg_pushref(lua_State *L, int msgclass, upb_msg *msg) { return 1; } -static int lupb_msg_gc(lua_State *L) { - lupb_msg *lmsg = lupb_msg_check(L, 1); - upb_msg_uninit(lmsg->msg, lmsg->lmsgclass->layout); - return 0; -} - /* lupb_msg Public API */ /** @@ -974,12 +925,10 @@ static int lupb_msg_gc(lua_State *L) { */ static int lupb_msg_pushnew(lua_State *L, int narg) { const lupb_msgclass *lmsgclass = lupb_msgclass_check(L, narg); - size_t size = sizeof(lupb_msg) + upb_msg_sizeof(lmsgclass->layout); - lupb_msg *lmsg = lupb_newuserdata(L, size, LUPB_MSG); + lupb_msg *lmsg = lupb_newuserdata(L, sizeof(lupb_msg), LUPB_MSG); lmsg->lmsgclass = lmsgclass; - lmsg->msg = upb_msg_init( - ADD_BYTES(lmsg, sizeof(*lmsg)), lmsgclass->layout, lupb_alloc_get(L)); + lmsg->msg = upb_msg_new(lmsgclass->layout, lupb_arena_get(L)); lupb_uservalseti(L, -1, LUPB_MSG_MSGCLASSINDEX, narg); @@ -1070,7 +1019,6 @@ static int lupb_msg_newindex(lua_State *L) { } static const struct luaL_Reg lupb_msg_mm[] = { - {"__gc", lupb_msg_gc}, {"__index", lupb_msg_index}, {"__newindex", lupb_msg_newindex}, {NULL, NULL} @@ -1096,5 +1044,5 @@ void lupb_msg_registertypes(lua_State *L) { lupb_register_type(L, LUPB_MAP, NULL, lupb_map_mm); lupb_register_type(L, LUPB_MSG, NULL, lupb_msg_mm); - lupb_alloc_initsingleton(L); + lupb_arena_initsingleton(L); } diff --git a/upb/bindings/lua/upb.h b/upb/bindings/lua/upb.h index ea2910a..52bc5a2 100644 --- a/upb/bindings/lua/upb.h +++ b/upb/bindings/lua/upb.h @@ -131,11 +131,12 @@ int lupb_arena_new(lua_State *L); int lupb_msg_pushref(lua_State *L, int msgclass, void *msg); const upb_msg *lupb_msg_checkmsg(lua_State *L, int narg, const lupb_msgclass *lmsgclass); +upb_msg *lupb_msg_checkmsg2(lua_State *L, int narg, + const upb_msglayout **layout); const lupb_msgclass *lupb_msgclass_check(lua_State *L, int narg); const upb_msglayout *lupb_msgclass_getlayout(lua_State *L, int narg); const upb_msgdef *lupb_msgclass_getmsgdef(const lupb_msgclass *lmsgclass); -const upb_handlers *lupb_msgclass_getmergehandlers(lua_State *L, int narg); upb_msgfactory *lupb_msgclass_getfactory(const lupb_msgclass *lmsgclass); void lupb_msg_registertypes(lua_State *L); diff --git a/upb/bindings/lua/upb/pb.c b/upb/bindings/lua/upb/pb.c index 1d56066..466e8f7 100644 --- a/upb/bindings/lua/upb/pb.c +++ b/upb/bindings/lua/upb/pb.c @@ -6,135 +6,56 @@ */ #include "upb/bindings/lua/upb.h" -#include "upb/pb/decoder.h" -#include "upb/pb/encoder.h" +#include "upb/decode.h" +#include "upb/encode.h" #define LUPB_PBDECODERMETHOD "lupb.pb.decodermethod" -static int lupb_pb_strtomessage(lua_State *L) { +static int lupb_pb_decode(lua_State *L) { size_t len; upb_status status = UPB_STATUS_INIT; - const char *pb = lua_tolstring(L, 1, &len); - const upb_msglayout *layout = lua_touserdata(L, lua_upvalueindex(1)); - const upb_pbdecodermethod *method = lua_touserdata(L, lua_upvalueindex(2)); - const upb_handlers *handlers = upb_pbdecodermethod_desthandlers(method); - - upb_arena *msg_arena; + const upb_msglayout *layout; + upb_msg *msg = lupb_msg_checkmsg2(L, 1, &layout); + const char *pb = lua_tolstring(L, 2, &len); + upb_stringview buf = upb_stringview_make(pb, len); upb_env env; - upb_sink sink; - upb_pbdecoder *decoder; - void *msg; - - lupb_arena_new(L); - msg_arena = lupb_arena_check(L, -1); - msg = upb_msg_new(layout, upb_arena_alloc(msg_arena)); upb_env_init(&env); upb_env_reporterrorsto(&env, &status); - upb_sink_reset(&sink, handlers, msg); - decoder = upb_pbdecoder_create(&env, method, &sink); - upb_bufsrc_putbuf(pb, len, upb_pbdecoder_input(decoder)); + upb_decode(buf, msg, (const void*)layout, &env); /* Free resources before we potentially bail on error. */ upb_env_uninit(&env); lupb_checkstatus(L, &status); - /* References the arena at the top of the stack. */ - lupb_msg_pushref(L, lua_upvalueindex(3), msg); - return 1; + return 0; } -static int lupb_pb_messagetostr(lua_State *L) { - const lupb_msgclass *lmsgclass = lua_touserdata(L, lua_upvalueindex(1)); - const upb_msg *msg = lupb_msg_checkmsg(L, 1, lmsgclass); - const upb_visitorplan *vp = lua_touserdata(L, lua_upvalueindex(2)); - const upb_handlers *encode_handlers = lua_touserdata(L, lua_upvalueindex(3)); - +static int lupb_pb_encode(lua_State *L) { + const upb_msglayout *layout; + const upb_msg *msg = lupb_msg_checkmsg2(L, 1, &layout); upb_env env; - upb_bufsink *bufsink; - upb_bytessink *bytessink; - upb_pb_encoder *encoder; - upb_visitor *visitor; - const char *buf; - size_t len; + size_t size; upb_status status = UPB_STATUS_INIT; + char *result; upb_env_init(&env); upb_env_reporterrorsto(&env, &status); - bufsink = upb_bufsink_new(&env); - bytessink = upb_bufsink_sink(bufsink); - encoder = upb_pb_encoder_create(&env, encode_handlers, bytessink); - visitor = upb_visitor_create(&env, vp, upb_pb_encoder_input(encoder)); - upb_visitor_visitmsg(visitor, msg); - buf = upb_bufsink_getdata(bufsink, &len); - lua_pushlstring(L, buf, len); + result = upb_encode(msg, (const void*)layout, &env, &size); /* Free resources before we potentially bail on error. */ upb_env_uninit(&env); lupb_checkstatus(L, &status); + lua_pushlstring(L, result, size); return 1; } -static int lupb_pb_makestrtomsgdecoder(lua_State *L) { - const upb_msglayout *layout = lupb_msgclass_getlayout(L, 1); - const upb_handlers *handlers = lupb_msgclass_getmergehandlers(L, 1); - const upb_pbdecodermethod *m; - - upb_pbdecodermethodopts opts; - upb_pbdecodermethodopts_init(&opts, handlers); - - m = upb_pbdecodermethod_new(&opts, &m); - - /* Push upvalues for the closure. */ - lua_pushlightuserdata(L, (void*)layout); - lua_pushlightuserdata(L, (void*)m); - lua_pushvalue(L, 1); - - /* Upvalue for the closure, only to keep the decodermethod alive. */ - lupb_refcounted_pushnewrapper( - L, upb_pbdecodermethod_upcast(m), LUPB_PBDECODERMETHOD, &m); - - lua_pushcclosure(L, &lupb_pb_strtomessage, 4); - - return 1; /* The decoder closure. */ -} - -static int lupb_pb_makemsgtostrencoder(lua_State *L) { - const lupb_msgclass *lmsgclass = lupb_msgclass_check(L, 1); - const upb_msgdef *md = lupb_msgclass_getmsgdef(lmsgclass); - upb_msgfactory *factory = lupb_msgclass_getfactory(lmsgclass); - const upb_handlers *encode_handlers; - const upb_visitorplan *vp; - - encode_handlers = upb_pb_encoder_newhandlers(md, &encode_handlers); - vp = upb_msgfactory_getvisitorplan(factory, encode_handlers); - - /* Push upvalues for the closure. */ - lua_pushlightuserdata(L, (void*)lmsgclass); - lua_pushlightuserdata(L, (void*)vp); - lua_pushlightuserdata(L, (void*)encode_handlers); - - /* Upvalues for the closure, only to keep the other upvalues alive. */ - lua_pushvalue(L, 1); - lupb_refcounted_pushnewrapper( - L, upb_handlers_upcast(encode_handlers), LUPB_PBDECODERMETHOD, &encode_handlers); - - lua_pushcclosure(L, &lupb_pb_messagetostr, 5); - - return 1; /* The decoder closure. */ -} - -static const struct luaL_Reg decodermethod_mm[] = { - {"__gc", lupb_refcounted_gc}, - {NULL, NULL} -}; - static const struct luaL_Reg toplevel_m[] = { - {"MakeStringToMessageDecoder", lupb_pb_makestrtomsgdecoder}, - {"MakeMessageToStringEncoder", lupb_pb_makemsgtostrencoder}, + {"decode", lupb_pb_decode}, + {"encode", lupb_pb_encode}, {NULL, NULL} }; @@ -144,7 +65,5 @@ int luaopen_upb_pb_c(lua_State *L) { return 1; } - lupb_register_type(L, LUPB_PBDECODERMETHOD, NULL, decodermethod_mm); - return 1; } diff --git a/upb/decode.c b/upb/decode.c index b5033f0..ca18e95 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -172,6 +172,7 @@ static bool upb_array_grow(upb_array *arr, size_t elements) { size_t new_bytes; size_t old_bytes; void *new_data; + upb_alloc *alloc = upb_arena_alloc(arr->arena); while (new_size < needed) { new_size *= 2; @@ -179,7 +180,7 @@ static bool upb_array_grow(upb_array *arr, size_t elements) { old_bytes = arr->len * arr->element_size; new_bytes = new_size * arr->element_size; - new_data = upb_realloc(arr->alloc, arr->data, old_bytes, new_bytes); + new_data = upb_realloc(alloc, arr->data, old_bytes, new_bytes); CHK(new_data); arr->data = new_data; @@ -212,12 +213,11 @@ static upb_array *upb_getorcreatearr(upb_decstate *d, upb_array *arr = upb_getarr(frame, field); if (!arr) { - arr = upb_env_malloc(d->env, sizeof(*arr)); + upb_fieldtype_t type = upb_desctype_to_fieldtype[field->descriptortype]; + arr = upb_array_new(type, upb_env_arena(d->env)); if (!arr) { return NULL; } - upb_array_init(arr, upb_desctype_to_fieldtype[field->descriptortype], - upb_arena_alloc(upb_env_arena(d->env))); *(upb_array**)&frame->msg[field->offset] = arr; } @@ -278,10 +278,8 @@ static bool upb_decode_submsg(upb_decstate *d, UPB_ASSERT(subm); if (!submsg) { - submsg = upb_env_malloc(d->env, upb_msg_sizeof((upb_msglayout *)subm)); + submsg = upb_msg_new((upb_msglayout *)subm, upb_env_arena(d->env)); CHK(submsg); - submsg = upb_msg_init( - submsg, (upb_msglayout*)subm, upb_arena_alloc(upb_env_arena(d->env))); *(void**)submsg_slot = submsg; } @@ -460,10 +458,8 @@ static bool upb_decode_toarray(upb_decstate *d, upb_decframe *frame, subm = frame->m->submsgs[field->submsg_index]; UPB_ASSERT(subm); - submsg = upb_env_malloc(d->env, upb_msg_sizeof((upb_msglayout *)subm)); + submsg = upb_msg_new((upb_msglayout *)subm, upb_env_arena(d->env)); CHK(submsg); - submsg = upb_msg_init(submsg, (upb_msglayout*)subm, - upb_arena_alloc(upb_env_arena(d->env))); field_mem = upb_array_add(arr, 1); CHK(field_mem); diff --git a/upb/msg.c b/upb/msg.c index 527587f..501e4c3 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -22,9 +22,6 @@ bool upb_fieldtype_mapkeyok(upb_fieldtype_t type) { type == UPB_TYPE_UINT64 || type == UPB_TYPE_STRING; } -void *upb_array_pack(const upb_array *arr, void *p, size_t *ofs, size_t size); -void *upb_map_pack(const upb_map *map, void *p, size_t *ofs, size_t size); - #define PTR_AT(msg, ofs, type) (type*)((char*)msg + ofs) #define VOIDPTR_AT(msg, ofs) PTR_AT(msg, ofs, void) #define ENCODE_MAX_NESTING 64 @@ -176,16 +173,6 @@ static size_t upb_msglayout_place(upb_msglayout *l, size_t size) { return ret; } -static uint32_t upb_msglayout_offset(const upb_msglayout *l, - const upb_fielddef *f) { - return l->data.fields[upb_fielddef_index(f)].offset; -} - -static uint32_t upb_msglayout_hasbit(const upb_msglayout *l, - const upb_fielddef *f) { - return l->data.fields[upb_fielddef_index(f)].hasbit; -} - static bool upb_msglayout_initdefault(upb_msglayout *l, const upb_msgdef *m) { upb_msg_field_iter it; @@ -420,229 +407,6 @@ const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f, } } -/* Our handlers that we don't expose externally. */ - -void *upb_msg_startstr(void *msg, const void *hd, size_t size_hint) { - uint32_t ofs = (uintptr_t)hd; - upb_alloc *alloc = upb_msg_alloc(msg); - upb_msgval val; - UPB_UNUSED(size_hint); - - val = upb_msgval_read(msg, ofs, upb_msgval_sizeof(UPB_TYPE_STRING)); - - upb_free(alloc, (void*)val.str.data); - val.str.data = NULL; - val.str.size = 0; - - upb_msgval_write(msg, ofs, val, upb_msgval_sizeof(UPB_TYPE_STRING)); - return msg; -} - -size_t upb_msg_str(void *msg, const void *hd, const char *ptr, size_t size, - const upb_bufhandle *handle) { - uint32_t ofs = (uintptr_t)hd; - upb_alloc *alloc = upb_msg_alloc(msg); - upb_msgval val; - size_t newsize; - UPB_UNUSED(handle); - - val = upb_msgval_read(msg, ofs, upb_msgval_sizeof(UPB_TYPE_STRING)); - - newsize = val.str.size + size; - val.str.data = upb_realloc(alloc, (void*)val.str.data, val.str.size, newsize); - - if (!val.str.data) { - return false; - } - - memcpy((char*)val.str.data + val.str.size, ptr, size); - val.str.size = newsize; - upb_msgval_write(msg, ofs, val, upb_msgval_sizeof(UPB_TYPE_STRING)); - return size; -} - -static void callback(const void *closure, upb_handlers *h) { - upb_msgfactory *factory = (upb_msgfactory*)closure; - const upb_msgdef *md = upb_handlers_msgdef(h); - const upb_msglayout* layout = upb_msgfactory_getlayout(factory, md); - upb_msg_field_iter i; - UPB_UNUSED(factory); - - for(upb_msg_field_begin(&i, md); - !upb_msg_field_done(&i); - upb_msg_field_next(&i)) { - const upb_fielddef *f = upb_msg_iter_field(&i); - size_t offset = upb_msglayout_offset(layout, f); - upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; - upb_handlerattr_sethandlerdata(&attr, (void*)offset); - - if (upb_fielddef_isseq(f)) { - } else if (upb_fielddef_isstring(f)) { - upb_handlers_setstartstr(h, f, upb_msg_startstr, &attr); - upb_handlers_setstring(h, f, upb_msg_str, &attr); - } else { - upb_msg_setscalarhandler( - h, f, offset, upb_msglayout_hasbit(layout, f)); - } - } -} - -const upb_handlers *upb_msgfactory_getmergehandlers(upb_msgfactory *f, - const upb_msgdef *m) { - upb_msgfactory *mutable_f = (void*)f; - - /* TODO(haberman): properly cache these. */ - const upb_handlers *ret = upb_handlers_newfrozen(m, f, callback, f); - upb_inttable_push(&mutable_f->mergehandlers, upb_value_constptr(ret)); - - return ret; -} - -const upb_visitorplan *upb_msgfactory_getvisitorplan(upb_msgfactory *f, - const upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - return (const upb_visitorplan*)upb_msgfactory_getlayout(f, md); -} - - -/** upb_visitor ***************************************************************/ - -struct upb_visitor { - const upb_msglayout *layout; - upb_sink *sink; -}; - -static upb_selector_t getsel2(const upb_fielddef *f, upb_handlertype_t type) { - upb_selector_t ret; - bool ok = upb_handlers_getselector(f, type, &ret); - UPB_ASSERT(ok); - return ret; -} - -static bool upb_visitor_hasfield(const upb_msg *msg, const upb_fielddef *f, - const upb_msglayout *layout) { - int field_index = upb_fielddef_index(f); - if (upb_fielddef_isseq(f)) { - return upb_msgval_getarr(upb_msg_get(msg, field_index, layout)) != NULL; - } else if (upb_msgdef_syntax(upb_fielddef_containingtype(f)) == - UPB_SYNTAX_PROTO2) { - return upb_msg_has(msg, field_index, layout); - } else { - upb_msgval val = upb_msg_get(msg, field_index, layout); - switch (upb_fielddef_type(f)) { - case UPB_TYPE_FLOAT: - return upb_msgval_getfloat(val) != 0; - case UPB_TYPE_DOUBLE: - return upb_msgval_getdouble(val) != 0; - case UPB_TYPE_BOOL: - return upb_msgval_getbool(val); - case UPB_TYPE_ENUM: - case UPB_TYPE_INT32: - return upb_msgval_getint32(val) != 0; - case UPB_TYPE_UINT32: - return upb_msgval_getuint32(val) != 0; - case UPB_TYPE_INT64: - return upb_msgval_getint64(val) != 0; - case UPB_TYPE_UINT64: - return upb_msgval_getuint64(val) != 0; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - return upb_msgval_getstr(val).size > 0; - case UPB_TYPE_MESSAGE: - return upb_msgval_getmsg(val) != NULL; - } - UPB_UNREACHABLE(); - } -} - -static bool upb_visitor_visitmsg2(const upb_msg *msg, - const upb_msglayout *layout, upb_sink *sink, - int depth) { - const upb_msgdef *md = upb_handlers_msgdef(sink->handlers); - upb_msg_field_iter i; - upb_status status; - - upb_sink_startmsg(sink); - - /* Protect against cycles (possible because users may freely reassign message - * and repeated fields) by imposing a maximum recursion depth. */ - if (depth > ENCODE_MAX_NESTING) { - return false; - } - - for (upb_msg_field_begin(&i, md); - !upb_msg_field_done(&i); - upb_msg_field_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); - upb_msgval val; - - if (!upb_visitor_hasfield(msg, f, layout)) { - continue; - } - - val = upb_msg_get(msg, upb_fielddef_index(f), layout); - - if (upb_fielddef_isseq(f)) { - const upb_array *arr = upb_msgval_getarr(val); - UPB_ASSERT(arr); - /* TODO: putary(ary, f, sink, depth);*/ - } else if (upb_fielddef_issubmsg(f)) { - const upb_map *map = upb_msgval_getmap(val); - UPB_ASSERT(map); - /* TODO: putmap(map, f, sink, depth);*/ - } else if (upb_fielddef_isstring(f)) { - /* TODO putstr(); */ - } else { - upb_selector_t sel = getsel2(f, upb_handlers_getprimitivehandlertype(f)); - UPB_ASSERT(upb_fielddef_isprimitive(f)); - - switch (upb_fielddef_type(f)) { - case UPB_TYPE_FLOAT: - CHECK_TRUE(upb_sink_putfloat(sink, sel, upb_msgval_getfloat(val))); - break; - case UPB_TYPE_DOUBLE: - CHECK_TRUE(upb_sink_putdouble(sink, sel, upb_msgval_getdouble(val))); - break; - case UPB_TYPE_BOOL: - CHECK_TRUE(upb_sink_putbool(sink, sel, upb_msgval_getbool(val))); - break; - case UPB_TYPE_ENUM: - case UPB_TYPE_INT32: - CHECK_TRUE(upb_sink_putint32(sink, sel, upb_msgval_getint32(val))); - break; - case UPB_TYPE_UINT32: - CHECK_TRUE(upb_sink_putuint32(sink, sel, upb_msgval_getuint32(val))); - break; - case UPB_TYPE_INT64: - CHECK_TRUE(upb_sink_putint64(sink, sel, upb_msgval_getint64(val))); - break; - case UPB_TYPE_UINT64: - CHECK_TRUE(upb_sink_putuint64(sink, sel, upb_msgval_getuint64(val))); - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - case UPB_TYPE_MESSAGE: - UPB_UNREACHABLE(); - } - } - } - - upb_sink_endmsg(sink, &status); - return true; -} - -upb_visitor *upb_visitor_create(upb_env *e, const upb_visitorplan *vp, - upb_sink *output) { - upb_visitor *visitor = upb_env_malloc(e, sizeof(*visitor)); - visitor->layout = (const upb_msglayout*)vp; - visitor->sink = output; - return visitor; -} - -bool upb_visitor_visitmsg(upb_visitor *visitor, const upb_msg *msg) { - return upb_visitor_visitmsg2(msg, visitor->layout, visitor->sink, 0); -} - /** upb_msg *******************************************************************/ @@ -658,7 +422,7 @@ bool upb_visitor_visitmsg(upb_visitor *visitor, const upb_msg *msg) { /* Used when a message is not extendable. */ typedef struct { /* TODO(haberman): add unknown fields. */ - upb_alloc *alloc; + upb_arena *arena; } upb_msg_internal; /* Used when a message is extendable. */ @@ -702,12 +466,20 @@ static uint32_t *upb_msg_oneofcase(const upb_msg *msg, int field_index, return PTR_AT(msg, l->data.oneofs[field->oneof_index].case_offset, uint32_t); } -size_t upb_msg_sizeof(const upb_msglayout *l) { +static size_t upb_msg_sizeof(const upb_msglayout *l) { return l->data.size + upb_msg_internalsize(l); } -upb_msg *upb_msg_init(void *mem, const upb_msglayout *l, upb_alloc *a) { - upb_msg *msg = VOIDPTR_AT(mem, upb_msg_internalsize(l)); +upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a) { + upb_alloc *alloc = upb_arena_alloc(a); + void *mem = upb_malloc(alloc, upb_msg_sizeof(l)); + upb_msg *msg; + + if (!mem) { + return NULL; + } + + msg = VOIDPTR_AT(mem, upb_msg_internalsize(l)); /* Initialize normal members. */ if (l->data.default_msg) { @@ -717,7 +489,7 @@ upb_msg *upb_msg_init(void *mem, const upb_msglayout *l, upb_alloc *a) { } /* Initialize internal members. */ - upb_msg_getinternal(msg)->alloc = a; + upb_msg_getinternal(msg)->arena = a; if (l->data.extendable) { upb_msg_getinternalwithext(msg, l)->extdict = NULL; @@ -726,29 +498,8 @@ upb_msg *upb_msg_init(void *mem, const upb_msglayout *l, upb_alloc *a) { return msg; } -void *upb_msg_uninit(upb_msg *msg, const upb_msglayout *l) { - if (l->data.extendable) { - upb_inttable *ext_dict = upb_msg_getinternalwithext(msg, l)->extdict; - if (ext_dict) { - upb_inttable_uninit2(ext_dict, upb_msg_alloc(msg)); - upb_free(upb_msg_alloc(msg), ext_dict); - } - } - - return VOIDPTR_AT(msg, -upb_msg_internalsize(l)); -} - -upb_msg *upb_msg_new(const upb_msglayout *l, upb_alloc *a) { - void *mem = upb_malloc(a, upb_msg_sizeof(l)); - return mem ? upb_msg_init(mem, l, a) : NULL; -} - -void upb_msg_free(upb_msg *msg, const upb_msglayout *l) { - upb_free(upb_msg_alloc(msg), upb_msg_uninit(msg, l)); -} - -upb_alloc *upb_msg_alloc(const upb_msg *msg) { - return upb_msg_getinternal_const(msg)->alloc; +upb_arena *upb_msg_arena(const upb_msg *msg) { + return upb_msg_getinternal_const(msg)->arena; } bool upb_msg_has(const upb_msg *msg, @@ -805,37 +556,22 @@ void upb_msg_set(upb_msg *msg, int field_index, upb_msgval val, #define DEREF_ARR(arr, i, type) ((type*)arr->data)[i] -size_t upb_array_sizeof(upb_fieldtype_t type) { - UPB_UNUSED(type); - return sizeof(upb_array); -} - -void upb_array_init(upb_array *arr, upb_fieldtype_t type, upb_alloc *alloc) { - arr->type = type; - arr->data = NULL; - arr->len = 0; - arr->size = 0; - arr->element_size = upb_msgval_sizeof(type); - arr->alloc = alloc; -} - -void upb_array_uninit(upb_array *arr) { - upb_free(arr->alloc, arr->data); -} - -upb_array *upb_array_new(upb_fieldtype_t type, upb_alloc *a) { - upb_array *ret = upb_malloc(a, upb_array_sizeof(type)); +upb_array *upb_array_new(upb_fieldtype_t type, upb_arena *a) { + upb_alloc *alloc = upb_arena_alloc(a); + upb_array *ret = upb_malloc(alloc, sizeof(upb_array)); - if (ret) { - upb_array_init(ret, type, a); + if (!ret) { + return NULL; } - return ret; -} + ret->type = type; + ret->data = NULL; + ret->len = 0; + ret->size = 0; + ret->element_size = upb_msgval_sizeof(type); + ret->arena = a; -void upb_array_free(upb_array *arr) { - upb_array_uninit(arr); - upb_free(arr->alloc, arr); + return ret; } size_t upb_array_size(const upb_array *arr) { @@ -862,8 +598,9 @@ bool upb_array_set(upb_array *arr, size_t i, upb_msgval val) { size_t new_size = UPB_MAX(arr->size * 2, 8); size_t new_bytes = new_size * arr->element_size; size_t old_bytes = arr->size * arr->element_size; + upb_alloc *alloc = upb_arena_alloc(arr->arena); upb_msgval *new_data = - upb_realloc(arr->alloc, arr->data, old_bytes, new_bytes); + upb_realloc(alloc, arr->data, old_bytes, new_bytes); if (!new_data) { return false; @@ -889,7 +626,7 @@ struct upb_map { /* We may want to optimize this to use inttable where possible, for greater * efficiency and lower memory footprint. */ upb_strtable strtab; - upb_alloc *alloc; + upb_arena *arena; }; static void upb_map_tokey(upb_fieldtype_t type, upb_msgval *key, @@ -940,52 +677,28 @@ static upb_msgval upb_map_fromkey(upb_fieldtype_t type, const char *key, UPB_UNREACHABLE(); } -size_t upb_map_sizeof(upb_fieldtype_t ktype, upb_fieldtype_t vtype) { - /* Size does not currently depend on key/value type. */ - UPB_UNUSED(ktype); - UPB_UNUSED(vtype); - return sizeof(upb_map); -} - -bool upb_map_init(upb_map *map, upb_fieldtype_t ktype, upb_fieldtype_t vtype, - upb_alloc *a) { - upb_ctype_t vtabtype = upb_fieldtotabtype(vtype); - UPB_ASSERT(upb_fieldtype_mapkeyok(ktype)); - map->key_type = ktype; - map->val_type = vtype; - map->alloc = a; - - if (!upb_strtable_init2(&map->strtab, vtabtype, a)) { - return false; - } - - return true; -} - -void upb_map_uninit(upb_map *map) { - upb_strtable_uninit2(&map->strtab, map->alloc); -} - upb_map *upb_map_new(upb_fieldtype_t ktype, upb_fieldtype_t vtype, - upb_alloc *a) { - upb_map *map = upb_malloc(a, upb_map_sizeof(ktype, vtype)); + upb_arena *a) { + upb_ctype_t vtabtype = upb_fieldtotabtype(vtype); + upb_alloc *alloc = upb_arena_alloc(a); + upb_map *map = upb_malloc(alloc, sizeof(upb_map)); if (!map) { return NULL; } - if (!upb_map_init(map, ktype, vtype, a)) { + UPB_ASSERT(upb_fieldtype_mapkeyok(ktype)); + map->key_type = ktype; + map->val_type = vtype; + map->arena = a; + + if (!upb_strtable_init2(&map->strtab, vtabtype, alloc)) { return NULL; } return map; } -void upb_map_free(upb_map *map) { - upb_map_uninit(map); - upb_free(map->alloc, map); -} - size_t upb_map_size(const upb_map *map) { return upb_strtable_count(&map->strtab); } @@ -1019,7 +732,7 @@ bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val, size_t key_len; upb_value tabval = upb_toval(val); upb_value removedtabval; - upb_alloc *a = map->alloc; + upb_alloc *a = upb_arena_alloc(map->arena); upb_map_tokey(map->key_type, &key, &key_str, &key_len); @@ -1035,7 +748,7 @@ bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val, bool upb_map_del(upb_map *map, upb_msgval key) { const char *key_str; size_t key_len; - upb_alloc *a = map->alloc; + upb_alloc *a = upb_arena_alloc(map->arena); upb_map_tokey(map->key_type, &key, &key_str, &key_len); return upb_strtable_remove3(&map->strtab, key_str, key_len, NULL, a); @@ -1097,99 +810,3 @@ void upb_mapiter_setdone(upb_mapiter *i) { bool upb_mapiter_isequal(const upb_mapiter *i1, const upb_mapiter *i2) { return upb_strtable_iter_isequal(&i1->iter, &i2->iter); } - - -/** Handlers for upb_msg ******************************************************/ - -typedef struct { - size_t offset; - int32_t hasbit; -} upb_msg_handlerdata; - -/* Fallback implementation if the handler is not specialized by the producer. */ -#define MSG_WRITER(type, ctype) \ - bool upb_msg_set ## type (void *c, const void *hd, ctype val) { \ - uint8_t *m = c; \ - const upb_msg_handlerdata *d = hd; \ - if (d->hasbit > 0) \ - *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8); \ - *(ctype*)&m[d->offset] = val; \ - return true; \ - } \ - -MSG_WRITER(double, double) -MSG_WRITER(float, float) -MSG_WRITER(int32, int32_t) -MSG_WRITER(int64, int64_t) -MSG_WRITER(uint32, uint32_t) -MSG_WRITER(uint64, uint64_t) -MSG_WRITER(bool, bool) - -bool upb_msg_setscalarhandler(upb_handlers *h, const upb_fielddef *f, - size_t offset, int32_t hasbit) { - upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; - bool ok; - - upb_msg_handlerdata *d = upb_gmalloc(sizeof(*d)); - if (!d) return false; - d->offset = offset; - d->hasbit = hasbit; - - upb_handlerattr_sethandlerdata(&attr, d); - upb_handlerattr_setalwaysok(&attr, true); - upb_handlers_addcleanup(h, d, upb_gfree); - -#define TYPE(u, l) \ - case UPB_TYPE_##u: \ - ok = upb_handlers_set##l(h, f, upb_msg_set##l, &attr); break; - - ok = false; - - switch (upb_fielddef_type(f)) { - TYPE(INT64, int64); - TYPE(INT32, int32); - TYPE(ENUM, int32); - TYPE(UINT64, uint64); - TYPE(UINT32, uint32); - TYPE(DOUBLE, double); - TYPE(FLOAT, float); - TYPE(BOOL, bool); - default: UPB_ASSERT(false); break; - } -#undef TYPE - - upb_handlerattr_uninit(&attr); - return ok; -} - -bool upb_msg_getscalarhandlerdata(const upb_handlers *h, - upb_selector_t s, - upb_fieldtype_t *type, - size_t *offset, - int32_t *hasbit) { - const upb_msg_handlerdata *d; - upb_func *f = upb_handlers_gethandler(h, s); - - if ((upb_int64_handlerfunc*)f == upb_msg_setint64) { - *type = UPB_TYPE_INT64; - } else if ((upb_int32_handlerfunc*)f == upb_msg_setint32) { - *type = UPB_TYPE_INT32; - } else if ((upb_uint64_handlerfunc*)f == upb_msg_setuint64) { - *type = UPB_TYPE_UINT64; - } else if ((upb_uint32_handlerfunc*)f == upb_msg_setuint32) { - *type = UPB_TYPE_UINT32; - } else if ((upb_double_handlerfunc*)f == upb_msg_setdouble) { - *type = UPB_TYPE_DOUBLE; - } else if ((upb_float_handlerfunc*)f == upb_msg_setfloat) { - *type = UPB_TYPE_FLOAT; - } else if ((upb_bool_handlerfunc*)f == upb_msg_setbool) { - *type = UPB_TYPE_BOOL; - } else { - return false; - } - - d = upb_handlers_gethandlerdata(h, s); - *offset = d->offset; - *hasbit = d->hasbit; - return true; -} diff --git a/upb/msg.h b/upb/msg.h index e875f6e..17c3e65 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -4,22 +4,17 @@ ** However it differs from other common representations like ** google::protobuf::Message in one key way: it does not prescribe any ** ownership between messages and submessages, and it relies on the -** client to delete each message/submessage/array/map at the appropriate -** time. +** client to ensure that each submessage/array/map outlives its parent. +** +** All messages, arrays, and maps live in an Arena. If the entire message +** tree is in the same arena, ensuring proper lifetimes is simple. However +** the client can mix arenas as long as they ensure that there are no +** dangling pointers. ** ** A client can access a upb::Message without knowing anything about ** ownership semantics, but to create or mutate a message a user needs ** to implement the memory management themselves. ** -** Currently all messages, arrays, and maps store a upb_alloc* internally. -** Mutating operations use this when they require dynamically-allocated -** memory. We could potentially eliminate this size overhead later by -** letting the user flip a bit on the factory that prevents this from -** being stored. The user would then need to use separate functions where -** the upb_alloc* is passed explicitly. However for handlers to populate -** such structures, they would need a place to store this upb_alloc* during -** parsing; upb_handlers don't currently have a good way to accommodate this. -** ** TODO: UTF-8 checking? **/ @@ -66,16 +61,6 @@ typedef void upb_msg; * msglayout. */ -/** upb_visitor ***************************************************************/ - -/* upb_visitor will visit all the fields of a message and its submessages. It - * uses a upb_visitorplan which you can obtain from a upb_msgfactory. */ - -upb_visitor *upb_visitor_create(upb_env *e, const upb_visitorplan *vp, - upb_sink *output); -bool upb_visitor_visitmsg(upb_visitor *v, const upb_msg *msg); - - /** upb_msgfactory ************************************************************/ /* A upb_msgfactory contains a cache of upb_msglayout, upb_handlers, and @@ -104,10 +89,6 @@ const upb_symtab *upb_msgfactory_symtab(const upb_msgfactory *f); * upb_msgfactory. */ const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f, const upb_msgdef *m); -const upb_handlers *upb_msgfactory_getmergehandlers(upb_msgfactory *f, - const upb_msgdef *m); -const upb_visitorplan *upb_msgfactory_getvisitorplan(upb_msgfactory *f, - const upb_handlers *h); /** upb_stringview ************************************************************/ @@ -183,52 +164,13 @@ UPB_INLINE upb_msgval upb_msgval_makestr(const char *data, size_t size) { /** upb_msg *******************************************************************/ /* A upb_msg represents a protobuf message. It always corresponds to a specific - * upb_msglayout, which describes how it is laid out in memory. - * - * The message will have a fixed size, as returned by upb_msg_sizeof(), which - * will be used to store fixed-length fields. The upb_msg may also allocate - * dynamic memory internally to store data such as: - * - * - extensions - * - unknown fields - */ + * upb_msglayout, which describes how it is laid out in memory. */ -/* Returns the size of a message given this layout. */ -size_t upb_msg_sizeof(const upb_msglayout *l); +/* Creates a new message of the given type/layout in this arena. */ +upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a); -/* upb_msg_init() / upb_msg_uninit() allow the user to use a pre-allocated - * block of memory as a message. The block's size should be upb_msg_sizeof(). - * upb_msg_uninit() must be called to release internally-allocated memory - * unless the allocator is an arena that does not require freeing. - * - * Please note that upb_msg_init() may return a value that is different than - * |msg|, so you must assign the return value and not cast your memory block - * to upb_msg* directly! - * - * Please note that upb_msg_uninit() does *not* free any submessages, maps, - * or arrays referred to by this message's fields. You must free them manually - * yourself. - * - * upb_msg_uninit returns the original memory block, which may be useful if - * you dynamically allocated it (though upb_msg_new() would normally be more - * appropriate in this case). */ -upb_msg *upb_msg_init(void *msg, const upb_msglayout *l, upb_alloc *a); -void *upb_msg_uninit(upb_msg *msg, const upb_msglayout *l); - -/* Like upb_msg_init() / upb_msg_uninit(), except the message's memory is - * allocated / freed from the given upb_alloc. */ -upb_msg *upb_msg_new(const upb_msglayout *l, upb_alloc *a); -void upb_msg_free(upb_msg *msg, const upb_msglayout *l); - -/* Returns the upb_alloc for the given message. - * TODO(haberman): get rid of this? Not sure we want to be storing this - * for every message. */ -upb_alloc *upb_msg_alloc(const upb_msg *msg); - -/* Packs the tree of messages rooted at "msg" into a single hunk of memory, - * allocated from the given allocator. */ -void *upb_msg_pack(const upb_msg *msg, const upb_msglayout *l, - void *p, size_t *ofs, size_t size); +/* Returns the arena for the given message. */ +upb_arena *upb_msg_arena(const upb_msg *msg); /* Read-only message API. Can be safely called by anyone. */ @@ -282,16 +224,12 @@ bool upb_msg_clearfield(upb_msg *msg, * semantics are the same as upb_msg. A upb_array allocates dynamic * memory internally for the array elements. */ -size_t upb_array_sizeof(upb_fieldtype_t type); -void upb_array_init(upb_array *arr, upb_fieldtype_t type, upb_alloc *a); -void upb_array_uninit(upb_array *arr); -upb_array *upb_array_new(upb_fieldtype_t type, upb_alloc *a); -void upb_array_free(upb_array *arr); +upb_array *upb_array_new(upb_fieldtype_t type, upb_arena *a); +upb_fieldtype_t upb_array_type(const upb_array *arr); /* Read-only interface. Safe for anyone to call. */ size_t upb_array_size(const upb_array *arr); -upb_fieldtype_t upb_array_type(const upb_array *arr); upb_msgval upb_array_get(const upb_array *arr, size_t i); /* Write interface. May only be called by the message's owner who can enforce @@ -308,12 +246,8 @@ bool upb_array_set(upb_array *arr, size_t i, upb_msgval val); * So you must ensure that any string or message values outlive the map, and you * must delete them manually when they are no longer required. */ -size_t upb_map_sizeof(upb_fieldtype_t ktype, upb_fieldtype_t vtype); -bool upb_map_init(upb_map *map, upb_fieldtype_t ktype, upb_fieldtype_t vtype, - upb_alloc *a); -void upb_map_uninit(upb_map *map); -upb_map *upb_map_new(upb_fieldtype_t ktype, upb_fieldtype_t vtype, upb_alloc *a); -void upb_map_free(upb_map *map); +upb_map *upb_map_new(upb_fieldtype_t ktype, upb_fieldtype_t vtype, + upb_arena *a); /* Read-only interface. Safe for anyone to call. */ diff --git a/upb/structs.int.h b/upb/structs.int.h index 242155b..6bc502e 100644 --- a/upb/structs.int.h +++ b/upb/structs.int.h @@ -11,7 +11,7 @@ struct upb_array { void *data; /* Each element is element_size. */ size_t len; /* Measured in elements. */ size_t size; /* Measured in elements. */ - upb_alloc *alloc; + upb_arena *arena; }; #endif /* UPB_STRUCTS_H_ */ -- cgit v1.2.3