From 1b9d37a00ebae8b59773c8501d8712e1c3335302 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 1 Jul 2017 15:15:52 -0700 Subject: Start migrating upb_msglayout to be suitable for generated code. This involves: - remove upb_msglayout -> upb_msgfactory dependency. - remove upb_msglayout -> upb_msgdef dependency (in progress). - make upb_msglayout use a representation that can be statically initialized by generated code. The goal here is that upb_msglayout becomes a kind of "descriptor lite": it contains enough data to parser and serialize protobufs and manipulate a upb_msg in memory, while being far smaller and simpler than a full descriptor. It also does not include field names, which can be a benefit for applications that do not want to leak field names. Generated code can then create a upb_msglayout, and do most things without ever needing to construct full descriptors/defs if they don't want to. --- Makefile | 2 +- tests/bindings/lua/test_upb.lua | 35 ++++--- upb/bindings/lua/msg.c | 24 ++--- upb/bindings/lua/upb.h | 1 + upb/bindings/lua/upb/pb.c | 2 +- upb/msg.c | 210 ++++++++++++++++++++++++---------------- upb/msg.h | 69 +++++++++---- 7 files changed, 217 insertions(+), 126 deletions(-) diff --git a/Makefile b/Makefile index 09b71c4..581993b 100644 --- a/Makefile +++ b/Makefile @@ -463,7 +463,7 @@ testlua: lua echo LUA $$test; \ LUA_PATH="third_party/lunit/?.lua;upb/bindings/lua/?.lua" \ LUA_CPATH=upb/bindings/lua/?.so \ - lua $$test; \ + $(RUN_UNDER) lua $$test; \ done clean: clean_lua diff --git a/tests/bindings/lua/test_upb.lua b/tests/bindings/lua/test_upb.lua index 1dc0717..07d794c 100644 --- a/tests/bindings/lua/test_upb.lua +++ b/tests/bindings/lua/test_upb.lua @@ -565,13 +565,20 @@ function test_msg_primitives() local symtab = upb.SymbolTable{ upb.MessageDef{full_name = "TestMessage", fields = { - upb.FieldDef{name = "i32", number = 1, type = upb.TYPE_INT32}, - upb.FieldDef{name = "u32", number = 2, type = upb.TYPE_UINT32}, - upb.FieldDef{name = "i64", number = 3, type = upb.TYPE_INT64}, - upb.FieldDef{name = "u64", number = 4, type = upb.TYPE_UINT64}, - upb.FieldDef{name = "dbl", number = 5, type = upb.TYPE_DOUBLE}, - upb.FieldDef{name = "flt", number = 6, type = upb.TYPE_FLOAT}, - upb.FieldDef{name = "bool", number = 7, type = upb.TYPE_BOOL}, + upb.FieldDef{ + name = "i32", number = 1, type = upb.TYPE_INT32, default = 1}, + upb.FieldDef{ + name = "u32", number = 2, type = upb.TYPE_UINT32, default = 2}, + upb.FieldDef{ + name = "i64", number = 3, type = upb.TYPE_INT64, default = 3}, + upb.FieldDef{ + name = "u64", number = 4, type = upb.TYPE_UINT64, default = 4}, + upb.FieldDef{ + name = "dbl", number = 5, type = upb.TYPE_DOUBLE, default = 5}, + upb.FieldDef{ + name = "flt", number = 6, type = upb.TYPE_FLOAT, default = 6}, + upb.FieldDef{ + name = "bool", number = 7, type = upb.TYPE_BOOL, default = true}, } } } @@ -581,13 +588,13 @@ function test_msg_primitives() msg = TestMessage() -- Unset member returns default value. - assert_equal(0, msg.i32) - assert_equal(0, msg.u32) - assert_equal(0, msg.i64) - assert_equal(0, msg.u64) - assert_equal(0, msg.dbl) - assert_equal(0, msg.flt) - assert_equal(false, msg.bool) + assert_equal(1, msg.i32) + assert_equal(2, msg.u32) + assert_equal(3, msg.i64) + assert_equal(4, msg.u64) + assert_equal(5, msg.dbl) + assert_equal(6, msg.flt) + assert_equal(true, msg.bool) -- Attempts to access non-existent fields fail. assert_error_match("no such field", function() msg.no_such = 1 end) diff --git a/upb/bindings/lua/msg.c b/upb/bindings/lua/msg.c index 64c8e7c..c222ca5 100644 --- a/upb/bindings/lua/msg.c +++ b/upb/bindings/lua/msg.c @@ -189,7 +189,8 @@ typedef struct lupb_msgfactory { upb_msgfactory *factory; } lupb_msgfactory; -static int lupb_msgclass_pushnew(lua_State *L, int factory, const upb_msglayout *l); +static int lupb_msgclass_pushnew(lua_State *L, int factory, + const upb_msgdef *md); /* lupb_msgfactory helpers. */ @@ -199,8 +200,6 @@ static lupb_msgfactory *lupb_msgfactory_check(lua_State *L, int narg) { static void lupb_msgfactory_pushmsgclass(lua_State *L, int narg, const upb_msgdef *md) { - const lupb_msgfactory *lfactory = lupb_msgfactory_check(L, narg); - lupb_getuservalue(L, narg); lua_pushlightuserdata(L, (void*)md); lua_rawget(L, -2); @@ -208,8 +207,7 @@ static void lupb_msgfactory_pushmsgclass(lua_State *L, int narg, if (lua_isnil(L, -1)) { lua_pop(L, 1); /* TODO: verify md is in symtab? */ - lupb_msgclass_pushnew(L, narg, - upb_msgfactory_getlayout(lfactory->factory, md)); + lupb_msgclass_pushnew(L, narg, md); /* Set in userval. */ lua_pushlightuserdata(L, (void*)md); @@ -317,6 +315,10 @@ const upb_handlers *lupb_msgclass_getmergehandlers(lua_State *L, int narg) { lmsgclass->lfactory->factory, upb_msglayout_msgdef(lmsgclass->layout)); } +upb_msgfactory *lupb_msgclass_getfactory(const lupb_msgclass *lmsgclass) { + return lmsgclass->lfactory->factory; +} + /** * lupb_msgclass_typecheck() * @@ -360,13 +362,13 @@ static const lupb_msgclass *lupb_msgclass_getsubmsgclass(lua_State *L, int narg, return lupb_msgclass_msgclassfor(L, narg, upb_fielddef_msgsubdef(f)); } -static int lupb_msgclass_pushnew(lua_State *L, int factory, const upb_msglayout *l) { +static int lupb_msgclass_pushnew(lua_State *L, int factory, + const upb_msgdef *md) { const lupb_msgfactory *lfactory = lupb_msgfactory_check(L, factory); lupb_msgclass *lmc = lupb_newuserdata(L, sizeof(*lmc), LUPB_MSGCLASS); - UPB_ASSERT(l); lupb_uservalseti(L, -1, LUPB_MSGCLASS_FACTORY, factory); - lmc->layout = l; + lmc->layout = upb_msgfactory_getlayout(lfactory->factory, md); lmc->lfactory = lfactory; return 1; @@ -933,7 +935,7 @@ static const lupb_msgclass *lupb_msg_getsubmsgclass(lua_State *L, int narg, return lupb_msgclass_getsubmsgclass(L, -1, f); } -int lupb_msg_pushref(lua_State *L, int msgclass, void *msg) { +int lupb_msg_pushref(lua_State *L, int msgclass, upb_msg *msg) { const lupb_msgclass *lmsgclass = lupb_msgclass_check(L, msgclass); lupb_msg *lmsg = lupb_newuserdata(L, sizeof(lupb_msg), LUPB_MSG); @@ -966,8 +968,8 @@ static int lupb_msg_pushnew(lua_State *L, int narg) { lupb_msg *lmsg = lupb_newuserdata(L, size, LUPB_MSG); lmsg->lmsgclass = lmsgclass; - lmsg->msg = ADD_BYTES(lmsg, sizeof(*lmsg)); - upb_msg_init(lmsg->msg, lmsgclass->layout, lupb_alloc_get(L)); + lmsg->msg = upb_msg_init( + ADD_BYTES(lmsg, sizeof(*lmsg)), lmsgclass->layout, lupb_alloc_get(L)); lupb_uservalseti(L, -1, LUPB_MSG_MSGCLASSINDEX, narg); diff --git a/upb/bindings/lua/upb.h b/upb/bindings/lua/upb.h index 88a201c..982ae57 100644 --- a/upb/bindings/lua/upb.h +++ b/upb/bindings/lua/upb.h @@ -135,6 +135,7 @@ const upb_msg *lupb_msg_checkmsg(lua_State *L, int narg, const lupb_msgclass *lupb_msgclass_check(lua_State *L, int narg); const upb_msglayout *lupb_msgclass_getlayout(lua_State *L, int narg); const upb_handlers *lupb_msgclass_getmergehandlers(lua_State *L, int narg); +upb_msgfactory *lupb_msgclass_getfactory(const lupb_msgclass *lmsgclass); void lupb_msg_registertypes(lua_State *L); #endif /* UPB_LUA_UPB_H_ */ diff --git a/upb/bindings/lua/upb/pb.c b/upb/bindings/lua/upb/pb.c index a25d1ac..731430d 100644 --- a/upb/bindings/lua/upb/pb.c +++ b/upb/bindings/lua/upb/pb.c @@ -106,7 +106,7 @@ static int lupb_pb_makemsgtostrencoder(lua_State *L) { const upb_msglayout *layout = lupb_msgclass_getlayout(L, 1); const lupb_msgclass *lmsgclass = lupb_msgclass_check(L, 1); const upb_msgdef *md = upb_msglayout_msgdef(layout); - upb_msgfactory *factory = upb_msglayout_factory(layout); + upb_msgfactory *factory = lupb_msgclass_getfactory(lmsgclass); const upb_handlers *encode_handlers; const upb_visitorplan *vp; diff --git a/upb/msg.c b/upb/msg.c index 39e3035..26d0e98 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -24,7 +24,7 @@ bool upb_fieldtype_mapkeyok(upb_fieldtype_t type) { void *upb_array_pack(const upb_array *arr, void *p, size_t *ofs, size_t size); void *upb_map_pack(const upb_map *map, void *p, size_t *ofs, size_t size); -#define CHARPTR_AT(msg, ofs) ((char*)msg + ofs) +#define VOIDPTR_AT(msg, ofs) (void*)((char*)msg + ofs) #define ENCODE_MAX_NESTING 64 #define CHECK_TRUE(x) if (!(x)) { return false; } @@ -150,16 +150,8 @@ static upb_msgval upb_msgval_fromdefault(const upb_fielddef *f) { /** upb_msglayout *************************************************************/ struct upb_msglayout { - upb_msgfactory *factory; - const upb_msgdef *msgdef; - size_t size; - size_t extdict_offset; - void *default_msg; - uint32_t *field_offsets; - uint32_t *case_offsets; - uint32_t *hasbits; - bool has_extdict; - uint8_t align; + const upb_msgdef *msgdef; /* TODO(haberman): remove. */ + struct upb_msglayout_msginit_v1 data; }; static void upb_msg_checkfield(const upb_msglayout *l, const upb_fielddef *f) { @@ -167,7 +159,7 @@ static void upb_msg_checkfield(const upb_msglayout *l, const upb_fielddef *f) { } static void upb_msglayout_free(upb_msglayout *l) { - upb_gfree(l->default_msg); + upb_gfree(l->data.default_msg); upb_gfree(l); } @@ -178,35 +170,35 @@ const upb_msgdef *upb_msglayout_msgdef(const upb_msglayout *l) { static size_t upb_msglayout_place(upb_msglayout *l, size_t size) { size_t ret; - l->size = align_up(l->size, size); - l->align = align_up(l->align, size); - ret = l->size; - l->size += size; + l->data.size = align_up(l->data.size, size); + l->data.align = align_up(l->data.align, size); + ret = l->data.size; + l->data.size += size; return ret; } static uint32_t upb_msglayout_offset(const upb_msglayout *l, const upb_fielddef *f) { - return l->field_offsets[upb_fielddef_index(f)]; + return l->data.fields[upb_fielddef_index(f)].offset; } static uint32_t upb_msglayout_hasbit(const upb_msglayout *l, const upb_fielddef *f) { - return l->hasbits[upb_fielddef_index(f)]; + return l->data.fields[upb_fielddef_index(f)].hasbit; } static bool upb_msglayout_initdefault(upb_msglayout *l) { const upb_msgdef *m = l->msgdef; upb_msg_field_iter it; - if (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2 && l->size) { + if (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2 && l->data.size) { /* Allocate default message and set default values in it. */ - l->default_msg = upb_gmalloc(l->size); - if (!l->default_msg) { + l->data.default_msg = upb_gmalloc(l->data.size); + if (!l->data.default_msg) { return false; } - memset(l->default_msg, 0, l->size); + memset(l->data.default_msg, 0, l->data.size); for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it); upb_msg_field_next(&it)) { @@ -219,7 +211,7 @@ static bool upb_msglayout_initdefault(upb_msglayout *l) { if (!upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f) && !upb_fielddef_isseq(f)) { - upb_msg_set(l->default_msg, f, upb_msgval_fromdefault(f), l); + upb_msg_set(l->data.default_msg, f, upb_msgval_fromdefault(f), l); } } } @@ -232,22 +224,29 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { upb_msg_oneof_iter oit; upb_msglayout *l; size_t hasbit; - size_t array_size = upb_msgdef_numfields(m) + upb_msgdef_numoneofs(m); + size_t submsg_count = 0; - if (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2) { - array_size += upb_msgdef_numfields(m); /* hasbits. */ + for (upb_msg_field_begin(&it, m), hasbit = sizeof(void*) * 8; + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { + const upb_fielddef* f = upb_msg_iter_field(&it); + if (upb_fielddef_issubmsg(f)) { + submsg_count++; + } } - l = upb_gmalloc(sizeof(*l) + (sizeof(uint32_t) * array_size)); + l = upb_gmalloc(sizeof(*l)); if (!l) return NULL; memset(l, 0, sizeof(*l)); - l->msgdef = m; - l->align = 1; - l->field_offsets = (uint32_t*)CHARPTR_AT(l, sizeof(*l)); - l->case_offsets = l->field_offsets + upb_msgdef_numfields(m); - l->hasbits = l->case_offsets + upb_msgdef_numoneofs(m); + + /* TODO(haberman): check OOM. */ + l->data.fields = upb_gmalloc(upb_msgdef_numfields(m) * + sizeof(struct upb_msglayout_fieldinit_v1)); + l->data.submsgs = upb_gmalloc(submsg_count * sizeof(void*)); + l->data.case_offsets = upb_gmalloc(upb_msgdef_numoneofs(m) * + sizeof(*l->data.case_offsets)); /* Allocate data offsets in three stages: * @@ -265,12 +264,13 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { const upb_fielddef* f = upb_msg_iter_field(&it); if (upb_fielddef_haspresence(f) && !upb_fielddef_containingoneof(f)) { - l->hasbits[upb_fielddef_index(f)] = hasbit++; + l->data.fields[upb_fielddef_index(f)].hasbit = hasbit++; } } /* Account for space used by hasbits. */ - l->size = div_round_up(hasbit, 8); + l->data.size = div_round_up(hasbit, 8); + l->data.align = 1; /* Allocate non-oneof fields. */ for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it); @@ -279,13 +279,12 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { size_t field_size = upb_msg_fieldsize(f); size_t index = upb_fielddef_index(f); - if (upb_fielddef_containingoneof(f)) { /* Oneofs are handled separately below. */ continue; } - l->field_offsets[index] = upb_msglayout_place(l, field_size); + l->data.fields[index].offset = upb_msglayout_place(l, field_size); } /* Allocate oneof fields. Each oneof field consists of a uint32 for the case @@ -311,19 +310,19 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { case_offset = upb_msglayout_place(l, case_size); val_offset = upb_msglayout_place(l, field_size); - l->case_offsets[upb_oneofdef_index(oneof)] = case_offset; + l->data.case_offsets[upb_oneofdef_index(oneof)] = case_offset; /* Assign all fields in the oneof this same offset. */ for (upb_oneof_begin(&fit, oneof); !upb_oneof_done(&fit); upb_oneof_next(&fit)) { const upb_fielddef* f = upb_oneof_iter_field(&fit); - l->field_offsets[upb_fielddef_index(f)] = val_offset; + l->data.fields[upb_fielddef_index(f)].offset = val_offset; } } /* Size of the entire structure should be a multiple of its greatest * alignment. */ - l->size = align_up(l->size, l->align); + l->data.size = align_up(l->data.size, l->data.align); if (upb_msglayout_initdefault(l)) { return l; @@ -333,8 +332,17 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { } } -upb_msgfactory *upb_msglayout_factory(const upb_msglayout *layout) { - return layout->factory; +upb_msglayout *upb_msglayout_frominit_v1( + const struct upb_msglayout_msginit_v1 *init, upb_alloc *a) { + UPB_UNUSED(a); + /* If upb upgrades to a v2, this would create a heap-allocated v2. */ + return (upb_msglayout*)init; +} + +void upb_msglayout_uninit_v1(upb_msglayout *layout, upb_alloc *a) { + UPB_UNUSED(layout); + UPB_UNUSED(a); + /* If upb upgrades to a v2, this would free the heap-allocated v2. */ } @@ -393,7 +401,6 @@ const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f, upb_msglayout *l = upb_msglayout_new(m); upb_inttable_insertptr(&mutable_f->layouts, m, upb_value_ptr(l)); UPB_ASSERT(l); - l->factory = f; return l; } } @@ -402,8 +409,7 @@ const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f, void *upb_msg_startstr(void *msg, const void *hd, size_t size_hint) { uint32_t ofs = (uintptr_t)hd; - /* We pass NULL here because we know we can get away with it. */ - upb_alloc *alloc = upb_msg_alloc(msg, NULL); + upb_alloc *alloc = upb_msg_alloc(msg); upb_msgval val; UPB_UNUSED(size_hint); @@ -420,8 +426,7 @@ void *upb_msg_startstr(void *msg, const void *hd, size_t size_hint) { size_t upb_msg_str(void *msg, const void *hd, const char *ptr, size_t size, const upb_bufhandle *handle) { uint32_t ofs = (uintptr_t)hd; - /* We pass NULL here because we know we can get away with it. */ - upb_alloc *alloc = upb_msg_alloc(msg, NULL); + upb_alloc *alloc = upb_msg_alloc(msg); upb_msgval val; size_t newsize; UPB_UNUSED(handle); @@ -628,20 +633,58 @@ bool upb_visitor_visitmsg(upb_visitor *visitor, const upb_msg *msg) { /* If we always read/write as a consistent type to each address, this shouldn't * violate aliasing. */ -#define DEREF(msg, ofs, type) *(type*)CHARPTR_AT(msg, ofs) +#define DEREF(msg, ofs, type) *(type*)VOIDPTR_AT(msg, ofs) + +/* Internal members of a upb_msg. We can change this without breaking binary + * compatibility. We put these before the user's data. The user's upb_msg* + * points after the upb_msg_internal. */ + +/* Used when a message is not extendable. */ +typedef struct { + /* TODO(haberman): add unknown fields. */ + upb_alloc *alloc; +} upb_msg_internal; + +/* Used when a message is extendable. */ +typedef struct { + upb_inttable *extdict; + upb_msg_internal base; +} upb_msg_internal_withext; + +#define INTERNAL_MEMBERS_SIZE(l) \ + sizeof(upb_msg_internal) - (l->data.extendable * sizeof(void*)) + +static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) { + return VOIDPTR_AT(msg, -sizeof(upb_msg_internal)); +} + +static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) { + return VOIDPTR_AT(msg, -sizeof(upb_msg_internal)); +} + +static upb_msg_internal_withext *upb_msg_getinternalwithext( + upb_msg *msg, const upb_msglayout *l) { + UPB_ASSERT(l->data.extendable); + return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext)); +} + +static const upb_msg_internal_withext *upb_msg_getinternalwithext_const( + const upb_msg *msg, const upb_msglayout *l) { + UPB_ASSERT(l->data.extendable); + return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext)); +} static upb_inttable *upb_msg_trygetextdict(const upb_msg *msg, const upb_msglayout *l) { - return l->has_extdict ? DEREF(msg, l->extdict_offset, upb_inttable*) : NULL; + return upb_msg_getinternalwithext_const(msg, l)->extdict; } static upb_inttable *upb_msg_getextdict(upb_msg *msg, const upb_msglayout *l, upb_alloc *a) { upb_inttable *ext_dict; - UPB_ASSERT(l->has_extdict); - ext_dict = upb_msg_trygetextdict(msg, l); + ext_dict = upb_msg_getinternalwithext(msg, l)->extdict; if (!ext_dict) { ext_dict = upb_malloc(a, sizeof(upb_inttable)); @@ -656,7 +699,7 @@ static upb_inttable *upb_msg_getextdict(upb_msg *msg, return NULL; } - DEREF(msg, l->extdict_offset, upb_inttable*) = ext_dict; + upb_msg_getinternalwithext(msg, l)->extdict = ext_dict; } return ext_dict; @@ -665,7 +708,7 @@ static upb_inttable *upb_msg_getextdict(upb_msg *msg, static uint32_t upb_msg_getoneofint(const upb_msg *msg, const upb_oneofdef *o, const upb_msglayout *l) { - size_t oneof_ofs = l->case_offsets[upb_oneofdef_index(o)]; + size_t oneof_ofs = l->data.case_offsets[upb_oneofdef_index(o)]; return DEREF(msg, oneof_ofs, uint8_t); } @@ -673,7 +716,7 @@ static void upb_msg_setoneofcase(const upb_msg *msg, const upb_oneofdef *o, const upb_msglayout *l, uint32_t val) { - size_t oneof_ofs = l->case_offsets[upb_oneofdef_index(o)]; + size_t oneof_ofs = l->data.case_offsets[upb_oneofdef_index(o)]; DEREF(msg, oneof_ofs, uint8_t) = val; } @@ -683,46 +726,49 @@ static bool upb_msg_oneofis(const upb_msg *msg, const upb_msglayout *l, return upb_msg_getoneofint(msg, o, l) == upb_fielddef_number(f); } -size_t upb_msg_sizeof(const upb_msglayout *l) { return l->size; } +size_t upb_msg_sizeof(const upb_msglayout *l) { + return l->data.size + INTERNAL_MEMBERS_SIZE(l); +} -void upb_msg_init(upb_msg *msg, const upb_msglayout *l, upb_alloc *a) { - if (l->default_msg) { - memcpy(msg, l->default_msg, l->size); +upb_msg *upb_msg_init(void *mem, const upb_msglayout *l, upb_alloc *a) { + upb_msg *msg = VOIDPTR_AT(mem, INTERNAL_MEMBERS_SIZE(l)); + if (l->data.default_msg) { + memcpy(msg, l->data.default_msg, l->data.size); } else { - memset(msg, 0, l->size); + memset(msg, 0, l->data.size); + } + + upb_msg_getinternal(msg)->alloc = a; + if (l->data.extendable) { + upb_msg_getinternalwithext(msg, l)->extdict = NULL; } - /* Set arena pointer. */ - memcpy(msg, &a, sizeof(a)); + return msg; } -void upb_msg_uninit(upb_msg *msg, const upb_msglayout *l) { - upb_inttable *ext_dict = upb_msg_trygetextdict(msg, l); - if (ext_dict) { - upb_inttable_uninit2(ext_dict, upb_msg_alloc(msg, l)); +void *upb_msg_uninit(upb_msg *msg, const upb_msglayout *l) { + if (l->data.extendable) { + upb_inttable *ext_dict = upb_msg_getinternalwithext(msg, l)->extdict; + if (ext_dict) { + upb_inttable_uninit2(ext_dict, upb_msg_alloc(msg)); + upb_free(upb_msg_alloc(msg), ext_dict); + } } + + return VOIDPTR_AT(msg, -INTERNAL_MEMBERS_SIZE(l)); } upb_msg *upb_msg_new(const upb_msglayout *l, upb_alloc *a) { - upb_msg *msg = upb_malloc(a, upb_msg_sizeof(l)); - - if (msg) { - upb_msg_init(msg, l, a); - } - - return msg; + void *mem = upb_malloc(a, upb_msg_sizeof(l)); + return mem ? upb_msg_init(mem, l, a) : NULL; } void upb_msg_free(upb_msg *msg, const upb_msglayout *l) { - upb_msg_uninit(msg, l); - upb_free(upb_msg_alloc(msg, l), msg); + upb_free(upb_msg_alloc(msg), upb_msg_uninit(msg, l)); } -upb_alloc *upb_msg_alloc(const upb_msg *msg, const upb_msglayout *l) { - upb_alloc *alloc; - UPB_UNUSED(l); - memcpy(&alloc, msg, sizeof(alloc)); - return alloc; +upb_alloc *upb_msg_alloc(const upb_msg *msg) { + return upb_msg_getinternal_const(msg)->alloc; } bool upb_msg_has(const upb_msg *msg, @@ -743,7 +789,7 @@ bool upb_msg_has(const upb_msg *msg, return upb_msg_getoneofint(msg, o, l) == upb_fielddef_number(f); } else { /* Other fields are set when their hasbit is set. */ - uint32_t hasbit = l->hasbits[upb_fielddef_index(f)]; + uint32_t hasbit = l->data.fields[upb_fielddef_index(f)].hasbit; return DEREF(msg, hasbit / 8, char) | (1 << (hasbit % 8)); } } @@ -761,7 +807,7 @@ upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f, return upb_msgval_fromdefault(f); } } else { - size_t ofs = l->field_offsets[upb_fielddef_index(f)]; + size_t ofs = l->data.fields[upb_fielddef_index(f)].offset; const upb_oneofdef *o = upb_fielddef_containingoneof(f); upb_msgval ret; @@ -780,10 +826,10 @@ bool upb_msg_set(upb_msg *msg, const upb_fielddef *f, upb_msgval val, const upb_msglayout *l) { - upb_alloc *a = upb_msg_alloc(msg, l); upb_msg_checkfield(l, f); if (upb_fielddef_isextension(f)) { + upb_alloc *a = upb_msg_alloc(msg); /* TODO(haberman): introduce table API that can do this in one call. */ upb_inttable *ext = upb_msg_getextdict(msg, l, a); upb_value val2 = upb_toval(val); @@ -792,7 +838,7 @@ bool upb_msg_set(upb_msg *msg, return false; } } else { - size_t ofs = l->field_offsets[upb_fielddef_index(f)]; + size_t ofs = l->data.fields[upb_fielddef_index(f)].offset; const upb_oneofdef *o = upb_fielddef_containingoneof(f); if (o) { diff --git a/upb/msg.h b/upb/msg.h index 36470ae..c2b5cf0 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -65,18 +65,6 @@ typedef void upb_msg; * instances of this from a upb_msgfactory, and the factory always owns the * msglayout. */ -/* Gets the factory for this layout */ -upb_msgfactory *upb_msglayout_factory(const upb_msglayout *l); - -/* Get the msglayout for a submessage. This requires that this field is a - * submessage, ie. upb_fielddef_issubmsg(upb_msglayout_msgdef(l)) == true. - * - * Since map entry messages don't have layouts, if upb_fielddef_ismap(f) == true - * then this function will return the layout for the map's value. It requires - * that the value type of the map field is a submessage. */ -const upb_msglayout *upb_msglayout_sublayout(const upb_msglayout *l, - const upb_fielddef *f); - /* Returns the msgdef for this msglayout. */ const upb_msgdef *upb_msglayout_msgdef(const upb_msglayout *l); @@ -212,19 +200,29 @@ size_t upb_msg_sizeof(const upb_msglayout *l); * upb_msg_uninit() must be called to release internally-allocated memory * unless the allocator is an arena that does not require freeing. * + * Please note that upb_msg_init() may return a value that is different than + * |msg|, so you must assign the return value and not cast your memory block + * to upb_msg* directly! + * * Please note that upb_msg_uninit() does *not* free any submessages, maps, * or arrays referred to by this message's fields. You must free them manually - * yourself. */ -void upb_msg_init(upb_msg *msg, const upb_msglayout *l, upb_alloc *a); -void upb_msg_uninit(upb_msg *msg, const upb_msglayout *l); + * yourself. + * + * upb_msg_uninit returns the original memory block, which may be useful if + * you dynamically allocated it (though upb_msg_new() would normally be more + * appropriate in this case). */ +upb_msg *upb_msg_init(void *msg, const upb_msglayout *l, upb_alloc *a); +void *upb_msg_uninit(upb_msg *msg, const upb_msglayout *l); /* Like upb_msg_init() / upb_msg_uninit(), except the message's memory is * allocated / freed from the given upb_alloc. */ upb_msg *upb_msg_new(const upb_msglayout *l, upb_alloc *a); void upb_msg_free(upb_msg *msg, const upb_msglayout *l); -/* Returns the upb_alloc for the given message. */ -upb_alloc *upb_msg_alloc(const upb_msg *msg, const upb_msglayout *l); +/* Returns the upb_alloc for the given message. + * TODO(haberman): get rid of this? Not sure we want to be storing this + * for every message. */ +upb_alloc *upb_msg_alloc(const upb_msg *msg); /* Packs the tree of messages rooted at "msg" into a single hunk of memory, * allocated from the given allocator. */ @@ -400,6 +398,43 @@ bool upb_msg_getscalarhandlerdata(const upb_handlers *h, size_t *offset, int32_t *hasbit); + +/** Interfaces for generated code *********************************************/ + +struct upb_msglayout_strinit_v1 { + const char *ptr; + uint32_t length; +}; + +struct upb_msglayout_fieldinit_v1 { + uint32_t number; + uint32_t offset; + uint16_t hasbit; + uint16_t oneof_index; + uint16_t submsg_index; + uint8_t type; + uint8_t label; +}; + +struct upb_msglayout_msginit_v1 { + struct upb_msglayout_fieldinit_v1 *fields; + struct upb_msglayout_msginit_v1 **submsgs; + uint32_t *case_offsets; + void *default_msg; + /* Must be aligned to 8. Doesn't include internal members like unknown + * fields, extension dict, pointer to msglayout, etc. */ + uint32_t size; + bool extendable; + char align; +}; + +/* Initialize/uninitialize a msglayout from a msginit. If upb uses v1 + * internally, this will not allocate any memory. Should only be used by + * generated code. */ +upb_msglayout *upb_msglayout_frominit_v1( + const struct upb_msglayout_msginit_v1 *init, upb_alloc *a); +void upb_msglayout_uninit_v1(upb_msglayout *layout, upb_alloc *a); + UPB_END_EXTERN_C #endif /* UPB_MSG_H_ */ -- cgit v1.2.3 From 76fcdd2ee92e8f7852f96ccd49fe776236ae4e60 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 2 Jul 2017 09:53:14 -0700 Subject: Removed all upb_msgdef/upb_fielddef from upb_msg. --- upb/bindings/lua/msg.c | 33 ++++-- upb/bindings/lua/upb.h | 1 + upb/bindings/lua/upb/pb.c | 3 +- upb/msg.c | 256 ++++++++++++++++++---------------------------- upb/msg.h | 62 +++++------ 5 files changed, 150 insertions(+), 205 deletions(-) diff --git a/upb/bindings/lua/msg.c b/upb/bindings/lua/msg.c index c222ca5..41163bc 100644 --- a/upb/bindings/lua/msg.c +++ b/upb/bindings/lua/msg.c @@ -283,11 +283,16 @@ static const struct luaL_Reg lupb_msgfactory_mm[] = { /* lupb_msgclass **************************************************************/ +/* Userval contains a map of: + * [1] -> MessageFactory (to keep GC-reachable) + * [const upb_msgdef*] -> [lupb_msgclass userdata] + */ + #define LUPB_MSGCLASS_FACTORY 1 -#define LUPB_MSGCLASS_MSGDEF 2 struct lupb_msgclass { const upb_msglayout *layout; + const upb_msgdef *msgdef; const lupb_msgfactory *lfactory; }; @@ -312,7 +317,11 @@ const upb_msglayout *lupb_msgclass_getlayout(lua_State *L, int narg) { const upb_handlers *lupb_msgclass_getmergehandlers(lua_State *L, int narg) { const lupb_msgclass *lmsgclass = lupb_msgclass_check(L, narg); return upb_msgfactory_getmergehandlers( - lmsgclass->lfactory->factory, upb_msglayout_msgdef(lmsgclass->layout)); + lmsgclass->lfactory->factory, lmsgclass->msgdef); +} + +const upb_msgdef *lupb_msgclass_getmsgdef(const lupb_msgclass *lmsgclass) { + return lmsgclass->msgdef; } upb_msgfactory *lupb_msgclass_getfactory(const lupb_msgclass *lmsgclass) { @@ -329,8 +338,8 @@ static void lupb_msgclass_typecheck(lua_State *L, const lupb_msgclass *expected, const lupb_msgclass *actual) { if (expected != actual) { luaL_error(L, "Message had incorrect type, expected '%s', got '%s'", - upb_msgdef_fullname(upb_msglayout_msgdef(expected->layout)), - upb_msgdef_fullname(upb_msglayout_msgdef(actual->layout))); + upb_msgdef_fullname(expected->msgdef), + upb_msgdef_fullname(actual->msgdef)); } } @@ -370,6 +379,7 @@ static int lupb_msgclass_pushnew(lua_State *L, int factory, lupb_uservalseti(L, -1, LUPB_MSGCLASS_FACTORY, factory); lmc->layout = upb_msgfactory_getlayout(lfactory->factory, md); lmc->lfactory = lfactory; + lmc->msgdef = md; return 1; } @@ -903,7 +913,7 @@ const upb_msg *lupb_msg_checkmsg(lua_State *L, int narg, } const upb_msgdef *lupb_msg_checkdef(lua_State *L, int narg) { - return upb_msglayout_msgdef(lupb_msg_check(L, narg)->lmsgclass->layout); + return lupb_msg_check(L, narg)->lmsgclass->msgdef; } static const upb_fielddef *lupb_msg_checkfield(lua_State *L, @@ -911,7 +921,7 @@ static const upb_fielddef *lupb_msg_checkfield(lua_State *L, int fieldarg) { size_t len; const char *fieldname = luaL_checklstring(L, fieldarg, &len); - const upb_msgdef *msgdef = upb_msglayout_msgdef(msg->lmsgclass->layout); + const upb_msgdef *msgdef = msg->lmsgclass->msgdef; const upb_fielddef *f = upb_msgdef_ntof(msgdef, fieldname, len); if (!f) { @@ -988,6 +998,7 @@ static int lupb_msg_index(lua_State *L) { lupb_msg *lmsg = lupb_msg_check(L, 1); const upb_fielddef *f = lupb_msg_checkfield(L, lmsg, 2); const upb_msglayout *l = lmsg->lmsgclass->layout; + int field_index = upb_fielddef_index(f); if (in_userval(f)) { lupb_uservalgeti(L, 1, lupb_fieldindex(f)); @@ -1000,8 +1011,8 @@ static int lupb_msg_index(lua_State *L) { /* TODO(haberman) */ } else { UPB_ASSERT(upb_fielddef_isstring(f)); - if (upb_msg_has(lmsg->msg, f, l)) { - upb_msgval val = upb_msg_get(lmsg->msg, f, l); + if (upb_msg_has(lmsg->msg, field_index, l)) { + upb_msgval val = upb_msg_get(lmsg->msg, field_index, l); lua_pop(L, 1); lua_pushlstring(L, val.str.ptr, val.str.len); lupb_uservalseti(L, 1, lupb_fieldindex(f), -1); @@ -1009,7 +1020,8 @@ static int lupb_msg_index(lua_State *L) { } } } else { - lupb_pushmsgval(L, upb_fielddef_type(f), upb_msg_get(lmsg->msg, f, l)); + upb_msgval val = upb_msg_get(lmsg->msg, field_index, l); + lupb_pushmsgval(L, upb_fielddef_type(f), val); } return 1; @@ -1027,6 +1039,7 @@ static int lupb_msg_newindex(lua_State *L) { lupb_msg *lmsg = lupb_msg_check(L, 1); const upb_fielddef *f = lupb_msg_checkfield(L, lmsg, 2); upb_fieldtype_t type = upb_fielddef_type(f); + int field_index = upb_fielddef_index(f); upb_msgval msgval; /* Typecheck and get msgval. */ @@ -1047,7 +1060,7 @@ static int lupb_msg_newindex(lua_State *L) { /* Set in upb_msg and userval (if necessary). */ - upb_msg_set(lmsg->msg, f, msgval, lmsg->lmsgclass->layout); + upb_msg_set(lmsg->msg, field_index, msgval, lmsg->lmsgclass->layout); if (in_userval(f)) { lupb_uservalseti(L, 1, lupb_fieldindex(f), 3); diff --git a/upb/bindings/lua/upb.h b/upb/bindings/lua/upb.h index 982ae57..ea2910a 100644 --- a/upb/bindings/lua/upb.h +++ b/upb/bindings/lua/upb.h @@ -134,6 +134,7 @@ const upb_msg *lupb_msg_checkmsg(lua_State *L, int narg, const lupb_msgclass *lupb_msgclass_check(lua_State *L, int narg); const upb_msglayout *lupb_msgclass_getlayout(lua_State *L, int narg); +const upb_msgdef *lupb_msgclass_getmsgdef(const lupb_msgclass *lmsgclass); const upb_handlers *lupb_msgclass_getmergehandlers(lua_State *L, int narg); upb_msgfactory *lupb_msgclass_getfactory(const lupb_msgclass *lmsgclass); void lupb_msg_registertypes(lua_State *L); diff --git a/upb/bindings/lua/upb/pb.c b/upb/bindings/lua/upb/pb.c index 731430d..1d56066 100644 --- a/upb/bindings/lua/upb/pb.c +++ b/upb/bindings/lua/upb/pb.c @@ -103,9 +103,8 @@ static int lupb_pb_makestrtomsgdecoder(lua_State *L) { } static int lupb_pb_makemsgtostrencoder(lua_State *L) { - const upb_msglayout *layout = lupb_msgclass_getlayout(L, 1); const lupb_msgclass *lmsgclass = lupb_msgclass_check(L, 1); - const upb_msgdef *md = upb_msglayout_msgdef(layout); + const upb_msgdef *md = lupb_msgclass_getmsgdef(lmsgclass); upb_msgfactory *factory = lupb_msgclass_getfactory(lmsgclass); const upb_handlers *encode_handlers; const upb_visitorplan *vp; diff --git a/upb/msg.c b/upb/msg.c index 26d0e98..5574363 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -24,7 +24,8 @@ bool upb_fieldtype_mapkeyok(upb_fieldtype_t type) { void *upb_array_pack(const upb_array *arr, void *p, size_t *ofs, size_t size); void *upb_map_pack(const upb_map *map, void *p, size_t *ofs, size_t size); -#define VOIDPTR_AT(msg, ofs) (void*)((char*)msg + ofs) +#define PTR_AT(msg, ofs, type) (type*)((char*)msg + ofs) +#define VOIDPTR_AT(msg, ofs) PTR_AT(msg, ofs, void) #define ENCODE_MAX_NESTING 64 #define CHECK_TRUE(x) if (!(x)) { return false; } @@ -71,7 +72,15 @@ static size_t upb_msgval_sizeof(upb_fieldtype_t type) { UPB_UNREACHABLE(); } -static uint8_t upb_msg_fieldsize(const upb_fielddef *f) { +static uint8_t upb_msg_fieldsize(const upb_msglayout_fieldinit_v1 *field) { + if (field->label == UPB_LABEL_REPEATED) { + return sizeof(void*); + } else { + return upb_msgval_sizeof(field->type); + } +} + +static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) { if (upb_fielddef_isseq(f)) { return sizeof(void*); } else { @@ -115,7 +124,6 @@ static upb_ctype_t upb_fieldtotabtype(upb_fieldtype_t type) { } static upb_msgval upb_msgval_fromdefault(const upb_fielddef *f) { - /* TODO(haberman): improve/optimize this (maybe use upb_msgval in fielddef) */ switch (upb_fielddef_type(f)) { case UPB_TYPE_FLOAT: return upb_msgval_float(upb_fielddef_defaultfloat(f)); @@ -150,23 +158,14 @@ static upb_msgval upb_msgval_fromdefault(const upb_fielddef *f) { /** upb_msglayout *************************************************************/ struct upb_msglayout { - const upb_msgdef *msgdef; /* TODO(haberman): remove. */ struct upb_msglayout_msginit_v1 data; }; -static void upb_msg_checkfield(const upb_msglayout *l, const upb_fielddef *f) { - UPB_ASSERT(l->msgdef == upb_fielddef_containingtype(f)); -} - static void upb_msglayout_free(upb_msglayout *l) { upb_gfree(l->data.default_msg); upb_gfree(l); } -const upb_msgdef *upb_msglayout_msgdef(const upb_msglayout *l) { - return l->msgdef; -} - static size_t upb_msglayout_place(upb_msglayout *l, size_t size) { size_t ret; @@ -187,8 +186,7 @@ static uint32_t upb_msglayout_hasbit(const upb_msglayout *l, return l->data.fields[upb_fielddef_index(f)].hasbit; } -static bool upb_msglayout_initdefault(upb_msglayout *l) { - const upb_msgdef *m = l->msgdef; +static bool upb_msglayout_initdefault(upb_msglayout *l, const upb_msgdef *m) { upb_msg_field_iter it; if (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2 && l->data.size) { @@ -211,7 +209,10 @@ static bool upb_msglayout_initdefault(upb_msglayout *l) { if (!upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f) && !upb_fielddef_isseq(f)) { - upb_msg_set(l->data.default_msg, f, upb_msgval_fromdefault(f), l); + upb_msg_set(l->data.default_msg, + upb_fielddef_index(f), + upb_msgval_fromdefault(f), + l); } } } @@ -225,6 +226,9 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { upb_msglayout *l; size_t hasbit; size_t submsg_count = 0; + const upb_msglayout_msginit_v1 **submsgs; + upb_msglayout_fieldinit_v1 *fields; + upb_msglayout_oneofinit_v1 *oneofs; for (upb_msg_field_begin(&it, m), hasbit = sizeof(void*) * 8; !upb_msg_field_done(&it); @@ -239,14 +243,18 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { if (!l) return NULL; memset(l, 0, sizeof(*l)); - l->msgdef = m; /* TODO(haberman): check OOM. */ - l->data.fields = upb_gmalloc(upb_msgdef_numfields(m) * - sizeof(struct upb_msglayout_fieldinit_v1)); - l->data.submsgs = upb_gmalloc(submsg_count * sizeof(void*)); - l->data.case_offsets = upb_gmalloc(upb_msgdef_numoneofs(m) * - sizeof(*l->data.case_offsets)); + fields = upb_gmalloc(upb_msgdef_numfields(m) * sizeof(*fields)); + submsgs = upb_gmalloc(submsg_count * sizeof(*submsgs)); + oneofs = upb_gmalloc(upb_msgdef_numoneofs(m) * sizeof(*oneofs)); + + l->data.field_count = upb_msgdef_numfields(m); + l->data.oneof_count = upb_msgdef_numoneofs(m); + l->data.fields = fields; + l->data.submsgs = submsgs; + l->data.oneofs = oneofs; + l->data.is_proto2 = (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2); /* Allocate data offsets in three stages: * @@ -257,14 +265,25 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { * OPT: There is a lot of room for optimization here to minimize the size. */ - /* Allocate hasbits. Start at sizeof(void*) for upb_alloc*. */ - for (upb_msg_field_begin(&it, m), hasbit = sizeof(void*) * 8; + /* Allocate hasbits and set basic field attributes. */ + for (upb_msg_field_begin(&it, m), hasbit = 0; !upb_msg_field_done(&it); upb_msg_field_next(&it)) { const upb_fielddef* f = upb_msg_iter_field(&it); + upb_msglayout_fieldinit_v1 *field = &fields[upb_fielddef_index(f)]; + + field->number = upb_fielddef_number(f); + field->type = upb_fielddef_type(f); + field->label = upb_fielddef_label(f); + + if (upb_fielddef_containingoneof(f)) { + field->oneof_index = upb_oneofdef_index(upb_fielddef_containingoneof(f)); + } else { + field->oneof_index = UPB_NOT_IN_ONEOF; + } if (upb_fielddef_haspresence(f) && !upb_fielddef_containingoneof(f)) { - l->data.fields[upb_fielddef_index(f)].hasbit = hasbit++; + field->hasbit = hasbit++; } } @@ -276,7 +295,7 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it); upb_msg_field_next(&it)) { const upb_fielddef* f = upb_msg_iter_field(&it); - size_t field_size = upb_msg_fieldsize(f); + size_t field_size = upb_msg_fielddefsize(f); size_t index = upb_fielddef_index(f); if (upb_fielddef_containingoneof(f)) { @@ -284,47 +303,38 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { continue; } - l->data.fields[index].offset = upb_msglayout_place(l, field_size); + fields[index].offset = upb_msglayout_place(l, field_size); } /* Allocate oneof fields. Each oneof field consists of a uint32 for the case * and space for the actual data. */ for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit); upb_msg_oneof_next(&oit)) { - const upb_oneofdef* oneof = upb_msg_iter_oneof(&oit); + const upb_oneofdef* o = upb_msg_iter_oneof(&oit); upb_oneof_iter fit; + size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */ + upb_msglayout_oneofinit_v1 *oneof = &oneofs[upb_oneofdef_index(o)]; size_t field_size = 0; - size_t case_offset; - size_t val_offset; /* Calculate field size: the max of all field sizes. */ - for (upb_oneof_begin(&fit, oneof); + for (upb_oneof_begin(&fit, o); !upb_oneof_done(&fit); upb_oneof_next(&fit)) { const upb_fielddef* f = upb_oneof_iter_field(&fit); - field_size = UPB_MAX(field_size, upb_msg_fieldsize(f)); + field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f)); } /* Align and allocate case offset. */ - case_offset = upb_msglayout_place(l, case_size); - val_offset = upb_msglayout_place(l, field_size); - - l->data.case_offsets[upb_oneofdef_index(oneof)] = case_offset; - - /* Assign all fields in the oneof this same offset. */ - for (upb_oneof_begin(&fit, oneof); !upb_oneof_done(&fit); - upb_oneof_next(&fit)) { - const upb_fielddef* f = upb_oneof_iter_field(&fit); - l->data.fields[upb_fielddef_index(f)].offset = val_offset; - } + oneof->case_offset = upb_msglayout_place(l, case_size); + oneof->data_offset = upb_msglayout_place(l, field_size); } /* Size of the entire structure should be a multiple of its greatest * alignment. */ l->data.size = align_up(l->data.size, l->data.align); - if (upb_msglayout_initdefault(l)) { + if (upb_msglayout_initdefault(l, m)) { return l; } else { upb_msglayout_free(l); @@ -506,13 +516,14 @@ static upb_selector_t getsel2(const upb_fielddef *f, upb_handlertype_t type) { static bool upb_visitor_hasfield(const upb_msg *msg, const upb_fielddef *f, const upb_msglayout *layout) { + int field_index = upb_fielddef_index(f); if (upb_fielddef_isseq(f)) { - return upb_msgval_getarr(upb_msg_get(msg, f, layout)) != NULL; + return upb_msgval_getarr(upb_msg_get(msg, field_index, layout)) != NULL; } else if (upb_msgdef_syntax(upb_fielddef_containingtype(f)) == UPB_SYNTAX_PROTO2) { - return upb_msg_has(msg, f, layout); + return upb_msg_has(msg, field_index, layout); } else { - upb_msgval val = upb_msg_get(msg, f, layout); + upb_msgval val = upb_msg_get(msg, field_index, layout); switch (upb_fielddef_type(f)) { case UPB_TYPE_FLOAT: return upb_msgval_getfloat(val) != 0; @@ -542,7 +553,7 @@ static bool upb_visitor_hasfield(const upb_msg *msg, const upb_fielddef *f, static bool upb_visitor_visitmsg2(const upb_msg *msg, const upb_msglayout *layout, upb_sink *sink, int depth) { - const upb_msgdef *md = upb_msglayout_msgdef(layout); + const upb_msgdef *md = upb_handlers_msgdef(sink->handlers); upb_msg_field_iter i; upb_status status; @@ -564,7 +575,7 @@ static bool upb_visitor_visitmsg2(const upb_msg *msg, continue; } - val = upb_msg_get(msg, f, layout); + val = upb_msg_get(msg, upb_fielddef_index(f), layout); if (upb_fielddef_isseq(f)) { const upb_array *arr = upb_msgval_getarr(val); @@ -633,7 +644,7 @@ bool upb_visitor_visitmsg(upb_visitor *visitor, const upb_msg *msg) { /* If we always read/write as a consistent type to each address, this shouldn't * violate aliasing. */ -#define DEREF(msg, ofs, type) *(type*)VOIDPTR_AT(msg, ofs) +#define DEREF(msg, ofs, type) *PTR_AT(msg, ofs, type) /* Internal members of a upb_msg. We can change this without breaking binary * compatibility. We put these before the user's data. The user's upb_msg* @@ -668,62 +679,21 @@ static upb_msg_internal_withext *upb_msg_getinternalwithext( return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext)); } -static const upb_msg_internal_withext *upb_msg_getinternalwithext_const( - const upb_msg *msg, const upb_msglayout *l) { - UPB_ASSERT(l->data.extendable); - return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext)); +static const upb_msglayout_fieldinit_v1 *upb_msg_checkfield( + int field_index, const upb_msglayout *l) { + UPB_ASSERT(field_index >= 0 && field_index < l->data.field_count); + return &l->data.fields[field_index]; } -static upb_inttable *upb_msg_trygetextdict(const upb_msg *msg, - const upb_msglayout *l) { - return upb_msg_getinternalwithext_const(msg, l)->extdict; +static bool upb_msg_inoneof(const upb_msglayout_fieldinit_v1 *field) { + return field->oneof_index != UPB_NOT_IN_ONEOF; } -static upb_inttable *upb_msg_getextdict(upb_msg *msg, - const upb_msglayout *l, - upb_alloc *a) { - upb_inttable *ext_dict; - - ext_dict = upb_msg_getinternalwithext(msg, l)->extdict; - - if (!ext_dict) { - ext_dict = upb_malloc(a, sizeof(upb_inttable)); - - if (!ext_dict) { - return NULL; - } - - /* Use an 8-byte type to ensure all bytes are copied. */ - if (!upb_inttable_init2(ext_dict, UPB_CTYPE_INT64, a)) { - upb_free(a, ext_dict); - return NULL; - } - - upb_msg_getinternalwithext(msg, l)->extdict = ext_dict; - } - - return ext_dict; -} - -static uint32_t upb_msg_getoneofint(const upb_msg *msg, - const upb_oneofdef *o, - const upb_msglayout *l) { - size_t oneof_ofs = l->data.case_offsets[upb_oneofdef_index(o)]; - return DEREF(msg, oneof_ofs, uint8_t); -} - -static void upb_msg_setoneofcase(const upb_msg *msg, - const upb_oneofdef *o, - const upb_msglayout *l, - uint32_t val) { - size_t oneof_ofs = l->data.case_offsets[upb_oneofdef_index(o)]; - DEREF(msg, oneof_ofs, uint8_t) = val; -} - - -static bool upb_msg_oneofis(const upb_msg *msg, const upb_msglayout *l, - const upb_oneofdef *o, const upb_fielddef *f) { - return upb_msg_getoneofint(msg, o, l) == upb_fielddef_number(f); +static uint32_t *upb_msg_oneofcase(const upb_msg *msg, int field_index, + const upb_msglayout *l) { + const upb_msglayout_fieldinit_v1 *field = upb_msg_checkfield(field_index, l); + UPB_ASSERT(upb_msg_inoneof(field)); + return PTR_AT(msg, l->data.oneofs[field->oneof_index].case_offset, uint32_t); } size_t upb_msg_sizeof(const upb_msglayout *l) { @@ -772,82 +742,52 @@ upb_alloc *upb_msg_alloc(const upb_msg *msg) { } bool upb_msg_has(const upb_msg *msg, - const upb_fielddef *f, + int field_index, const upb_msglayout *l) { - const upb_oneofdef *o; - upb_msg_checkfield(l, f); - UPB_ASSERT(upb_fielddef_haspresence(f)); - - if (upb_fielddef_isextension(f)) { - /* Extensions are set when they are present in the extension dict. */ - upb_inttable *ext_dict = upb_msg_trygetextdict(msg, l); - upb_value v; - return ext_dict != NULL && - upb_inttable_lookup32(ext_dict, upb_fielddef_number(f), &v); - } else if ((o = upb_fielddef_containingoneof(f)) != NULL) { + const upb_msglayout_fieldinit_v1 *field = upb_msg_checkfield(field_index, l); + + UPB_ASSERT(l->data.is_proto2); + + if (upb_msg_inoneof(field)) { /* Oneofs are set when the oneof number is set to this field. */ - return upb_msg_getoneofint(msg, o, l) == upb_fielddef_number(f); + return *upb_msg_oneofcase(msg, field_index, l) == field->number; } else { /* Other fields are set when their hasbit is set. */ - uint32_t hasbit = l->data.fields[upb_fielddef_index(f)].hasbit; + uint32_t hasbit = l->data.fields[field_index].hasbit; return DEREF(msg, hasbit / 8, char) | (1 << (hasbit % 8)); } } -upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f, +upb_msgval upb_msg_get(const upb_msg *msg, int field_index, const upb_msglayout *l) { - upb_msg_checkfield(l, f); + const upb_msglayout_fieldinit_v1 *field = upb_msg_checkfield(field_index, l); + int size = upb_msg_fieldsize(field); - if (upb_fielddef_isextension(f)) { - upb_inttable *ext_dict = upb_msg_trygetextdict(msg, l); - upb_value val; - if (upb_inttable_lookup32(ext_dict, upb_fielddef_number(f), &val)) { - return upb_msgval_fromval(val); + if (upb_msg_inoneof(field)) { + if (*upb_msg_oneofcase(msg, field_index, l) == field->number) { + size_t ofs = l->data.oneofs[field->oneof_index].data_offset; + return upb_msgval_read(msg, ofs, size); } else { - return upb_msgval_fromdefault(f); + /* Return default. */ + return upb_msgval_read(l->data.default_msg, field->offset, size); } } else { - size_t ofs = l->data.fields[upb_fielddef_index(f)].offset; - const upb_oneofdef *o = upb_fielddef_containingoneof(f); - upb_msgval ret; - - if (o && !upb_msg_oneofis(msg, l, o, f)) { - /* Oneof defaults can't come from the message because the memory is reused - * by all types in the oneof. */ - return upb_msgval_fromdefault(f); - } - - ret = upb_msgval_read(msg, ofs, upb_msg_fieldsize(f)); - return ret; + return upb_msgval_read(msg, field->offset, size); } } -bool upb_msg_set(upb_msg *msg, - const upb_fielddef *f, - upb_msgval val, +void upb_msg_set(upb_msg *msg, int field_index, upb_msgval val, const upb_msglayout *l) { - upb_msg_checkfield(l, f); - - if (upb_fielddef_isextension(f)) { - upb_alloc *a = upb_msg_alloc(msg); - /* TODO(haberman): introduce table API that can do this in one call. */ - upb_inttable *ext = upb_msg_getextdict(msg, l, a); - upb_value val2 = upb_toval(val); - if (!upb_inttable_replace(ext, upb_fielddef_number(f), val2) && - !upb_inttable_insert2(ext, upb_fielddef_number(f), val2, a)) { - return false; - } - } else { - size_t ofs = l->data.fields[upb_fielddef_index(f)].offset; - const upb_oneofdef *o = upb_fielddef_containingoneof(f); - - if (o) { - upb_msg_setoneofcase(msg, o, l, upb_fielddef_number(f)); - } + const upb_msglayout_fieldinit_v1 *field = upb_msg_checkfield(field_index, l); + int size = upb_msg_fieldsize(field); - upb_msgval_write(msg, ofs, val, upb_msg_fieldsize(f)); + if (upb_msg_inoneof(field)) { + size_t ofs = l->data.oneofs[field->oneof_index].data_offset; + *upb_msg_oneofcase(msg, field_index, l) = field->number; + upb_msgval_write(msg, ofs, val, size); + } else { + upb_msgval_write(msg, field->offset, val, size); } - return true; } diff --git a/upb/msg.h b/upb/msg.h index c2b5cf0..1d888b8 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -65,9 +65,6 @@ typedef void upb_msg; * instances of this from a upb_msgfactory, and the factory always owns the * msglayout. */ -/* Returns the msgdef for this msglayout. */ -const upb_msgdef *upb_msglayout_msgdef(const upb_msglayout *l); - /** upb_visitor ***************************************************************/ @@ -242,25 +239,14 @@ void *upb_msg_pack(const upb_msg *msg, const upb_msglayout *l, * arenas). */ upb_msgval upb_msg_get(const upb_msg *msg, - const upb_fielddef *f, + int field_index, const upb_msglayout *l); /* May only be called for fields where upb_fielddef_haspresence(f) == true. */ bool upb_msg_has(const upb_msg *msg, - const upb_fielddef *f, + int field_index, const upb_msglayout *l); -/* Returns NULL if no field in the oneof is set. */ -const upb_fielddef *upb_msg_getoneofcase(const upb_msg *msg, - const upb_oneofdef *o, - const upb_msglayout *l); - -/* Returns true if any field in the oneof is set. */ -bool upb_msg_hasoneof(const upb_msg *msg, - const upb_oneofdef *o, - const upb_msglayout *l); - - /* Mutable message API. May only be called by the owner of the message who * knows its ownership scheme and how to keep it consistent. */ @@ -268,8 +254,8 @@ bool upb_msg_hasoneof(const upb_msg *msg, * management: if you overwrite a pointer to a msg/array/map/string without * cleaning it up (or using an arena) it will leak. */ -bool upb_msg_set(upb_msg *msg, - const upb_fielddef *f, +void upb_msg_set(upb_msg *msg, + int field_index, upb_msgval val, const upb_msglayout *l); @@ -280,12 +266,7 @@ bool upb_msg_set(upb_msg *msg, * arrays/maps/strings/msgs that this field may have pointed to. */ bool upb_msg_clearfield(upb_msg *msg, - const upb_fielddef *f, - const upb_msglayout *l); - -/* Clears all fields in the oneof such that none of them are set. */ -bool upb_msg_clearoneof(upb_msg *msg, - const upb_oneofdef *o, + int field_index, const upb_msglayout *l); /* TODO(haberman): copyfrom()/mergefrom()? */ @@ -401,38 +382,49 @@ bool upb_msg_getscalarhandlerdata(const upb_handlers *h, /** Interfaces for generated code *********************************************/ -struct upb_msglayout_strinit_v1 { +#define UPB_NOT_IN_ONEOF UINT16_MAX + +typedef struct { const char *ptr; uint32_t length; -}; +} upb_msglayout_strinit_v1; -struct upb_msglayout_fieldinit_v1 { +typedef struct { uint32_t number; - uint32_t offset; + uint32_t offset; /* If in a oneof, offset of default in default_msg below. */ uint16_t hasbit; - uint16_t oneof_index; + uint16_t oneof_index; /* UPB_NOT_IN_ONEOF if not in a oneof. */ uint16_t submsg_index; uint8_t type; uint8_t label; -}; +} upb_msglayout_fieldinit_v1; + +typedef struct { + uint32_t data_offset; + uint32_t case_offset; +} upb_msglayout_oneofinit_v1; -struct upb_msglayout_msginit_v1 { - struct upb_msglayout_fieldinit_v1 *fields; - struct upb_msglayout_msginit_v1 **submsgs; +typedef struct upb_msglayout_msginit_v1 { + const struct upb_msglayout_msginit_v1 **submsgs; + const upb_msglayout_fieldinit_v1 *fields; + const upb_msglayout_oneofinit_v1 *oneofs; uint32_t *case_offsets; void *default_msg; /* Must be aligned to 8. Doesn't include internal members like unknown * fields, extension dict, pointer to msglayout, etc. */ uint32_t size; + uint16_t field_count; + uint16_t oneof_count; bool extendable; + bool is_proto2; char align; -}; +} upb_msglayout_msginit_v1; /* Initialize/uninitialize a msglayout from a msginit. If upb uses v1 * internally, this will not allocate any memory. Should only be used by * generated code. */ upb_msglayout *upb_msglayout_frominit_v1( - const struct upb_msglayout_msginit_v1 *init, upb_alloc *a); + const upb_msglayout_msginit_v1 *init, upb_alloc *a); void upb_msglayout_uninit_v1(upb_msglayout *layout, upb_alloc *a); UPB_END_EXTERN_C -- cgit v1.2.3 From 28268113678e869ffa42eb20cab0c338f55142dc Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 10 Jul 2017 16:43:55 -0500 Subject: Responded to PR comments. --- upb/msg.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/upb/msg.c b/upb/msg.c index 5574363..5729408 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -206,6 +206,7 @@ static bool upb_msglayout_initdefault(upb_msglayout *l, const upb_msgdef *m) { continue; } + /* TODO(haberman): handle strings. */ if (!upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f) && !upb_fielddef_isseq(f)) { @@ -244,11 +245,21 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { memset(l, 0, sizeof(*l)); - /* TODO(haberman): check OOM. */ fields = upb_gmalloc(upb_msgdef_numfields(m) * sizeof(*fields)); submsgs = upb_gmalloc(submsg_count * sizeof(*submsgs)); oneofs = upb_gmalloc(upb_msgdef_numoneofs(m) * sizeof(*oneofs)); + if ((!fields && upb_msgdef_numfields(m)) || + (!submsgs && submsg_count) || + (!oneofs && upb_msgdef_numoneofs(m))) { + /* OOM. */ + upb_gfree(l); + upb_gfree(fields); + upb_gfree(submsgs); + upb_gfree(oneofs); + return NULL; + } + l->data.field_count = upb_msgdef_numfields(m); l->data.oneof_count = upb_msgdef_numoneofs(m); l->data.fields = fields; @@ -342,19 +353,6 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { } } -upb_msglayout *upb_msglayout_frominit_v1( - const struct upb_msglayout_msginit_v1 *init, upb_alloc *a) { - UPB_UNUSED(a); - /* If upb upgrades to a v2, this would create a heap-allocated v2. */ - return (upb_msglayout*)init; -} - -void upb_msglayout_uninit_v1(upb_msglayout *layout, upb_alloc *a) { - UPB_UNUSED(layout); - UPB_UNUSED(a); - /* If upb upgrades to a v2, this would free the heap-allocated v2. */ -} - /** upb_msgfactory ************************************************************/ @@ -662,8 +660,9 @@ typedef struct { upb_msg_internal base; } upb_msg_internal_withext; -#define INTERNAL_MEMBERS_SIZE(l) \ - sizeof(upb_msg_internal) - (l->data.extendable * sizeof(void*)) +static int upb_msg_internalsize(const upb_msglayout *l) { + return sizeof(upb_msg_internal) - l->data.extendable * sizeof(void*); +} static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) { return VOIDPTR_AT(msg, -sizeof(upb_msg_internal)); @@ -697,18 +696,20 @@ static uint32_t *upb_msg_oneofcase(const upb_msg *msg, int field_index, } size_t upb_msg_sizeof(const upb_msglayout *l) { - return l->data.size + INTERNAL_MEMBERS_SIZE(l); + return l->data.size + upb_msg_internalsize(l); } upb_msg *upb_msg_init(void *mem, const upb_msglayout *l, upb_alloc *a) { - upb_msg *msg = VOIDPTR_AT(mem, INTERNAL_MEMBERS_SIZE(l)); + upb_msg *msg = VOIDPTR_AT(mem, upb_msg_internalsize(l)); if (l->data.default_msg) { memcpy(msg, l->data.default_msg, l->data.size); } else { memset(msg, 0, l->data.size); } + UPB_ASSERT(!upb_msg_getinternal(msg)->alloc); upb_msg_getinternal(msg)->alloc = a; + if (l->data.extendable) { upb_msg_getinternalwithext(msg, l)->extdict = NULL; } @@ -725,7 +726,7 @@ void *upb_msg_uninit(upb_msg *msg, const upb_msglayout *l) { } } - return VOIDPTR_AT(msg, -INTERNAL_MEMBERS_SIZE(l)); + return VOIDPTR_AT(msg, -upb_msg_internalsize(l)); } upb_msg *upb_msg_new(const upb_msglayout *l, upb_alloc *a) { -- cgit v1.2.3 From 3e8acc3f4e4a2045b04dd8794c87b2a198542382 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 10 Jul 2017 21:52:27 -0500 Subject: Removed incorrect assert and added comments. --- upb/msg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/upb/msg.c b/upb/msg.c index 5729408..850e86e 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -701,13 +701,15 @@ size_t upb_msg_sizeof(const upb_msglayout *l) { upb_msg *upb_msg_init(void *mem, const upb_msglayout *l, upb_alloc *a) { upb_msg *msg = VOIDPTR_AT(mem, upb_msg_internalsize(l)); + + /* Initialize normal members. */ if (l->data.default_msg) { memcpy(msg, l->data.default_msg, l->data.size); } else { memset(msg, 0, l->data.size); } - UPB_ASSERT(!upb_msg_getinternal(msg)->alloc); + /* Initialize internal members. */ upb_msg_getinternal(msg)->alloc = a; if (l->data.extendable) { -- cgit v1.2.3