From 1b9d37a00ebae8b59773c8501d8712e1c3335302 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 1 Jul 2017 15:15:52 -0700 Subject: Start migrating upb_msglayout to be suitable for generated code. This involves: - remove upb_msglayout -> upb_msgfactory dependency. - remove upb_msglayout -> upb_msgdef dependency (in progress). - make upb_msglayout use a representation that can be statically initialized by generated code. The goal here is that upb_msglayout becomes a kind of "descriptor lite": it contains enough data to parser and serialize protobufs and manipulate a upb_msg in memory, while being far smaller and simpler than a full descriptor. It also does not include field names, which can be a benefit for applications that do not want to leak field names. Generated code can then create a upb_msglayout, and do most things without ever needing to construct full descriptors/defs if they don't want to. --- upb/msg.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 17 deletions(-) (limited to 'upb/msg.h') diff --git a/upb/msg.h b/upb/msg.h index 36470ae..c2b5cf0 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -65,18 +65,6 @@ typedef void upb_msg; * instances of this from a upb_msgfactory, and the factory always owns the * msglayout. */ -/* Gets the factory for this layout */ -upb_msgfactory *upb_msglayout_factory(const upb_msglayout *l); - -/* Get the msglayout for a submessage. This requires that this field is a - * submessage, ie. upb_fielddef_issubmsg(upb_msglayout_msgdef(l)) == true. - * - * Since map entry messages don't have layouts, if upb_fielddef_ismap(f) == true - * then this function will return the layout for the map's value. It requires - * that the value type of the map field is a submessage. */ -const upb_msglayout *upb_msglayout_sublayout(const upb_msglayout *l, - const upb_fielddef *f); - /* Returns the msgdef for this msglayout. */ const upb_msgdef *upb_msglayout_msgdef(const upb_msglayout *l); @@ -212,19 +200,29 @@ size_t upb_msg_sizeof(const upb_msglayout *l); * upb_msg_uninit() must be called to release internally-allocated memory * unless the allocator is an arena that does not require freeing. * + * Please note that upb_msg_init() may return a value that is different than + * |msg|, so you must assign the return value and not cast your memory block + * to upb_msg* directly! + * * Please note that upb_msg_uninit() does *not* free any submessages, maps, * or arrays referred to by this message's fields. You must free them manually - * yourself. */ -void upb_msg_init(upb_msg *msg, const upb_msglayout *l, upb_alloc *a); -void upb_msg_uninit(upb_msg *msg, const upb_msglayout *l); + * yourself. + * + * upb_msg_uninit returns the original memory block, which may be useful if + * you dynamically allocated it (though upb_msg_new() would normally be more + * appropriate in this case). */ +upb_msg *upb_msg_init(void *msg, const upb_msglayout *l, upb_alloc *a); +void *upb_msg_uninit(upb_msg *msg, const upb_msglayout *l); /* Like upb_msg_init() / upb_msg_uninit(), except the message's memory is * allocated / freed from the given upb_alloc. */ upb_msg *upb_msg_new(const upb_msglayout *l, upb_alloc *a); void upb_msg_free(upb_msg *msg, const upb_msglayout *l); -/* Returns the upb_alloc for the given message. */ -upb_alloc *upb_msg_alloc(const upb_msg *msg, const upb_msglayout *l); +/* Returns the upb_alloc for the given message. + * TODO(haberman): get rid of this? Not sure we want to be storing this + * for every message. */ +upb_alloc *upb_msg_alloc(const upb_msg *msg); /* Packs the tree of messages rooted at "msg" into a single hunk of memory, * allocated from the given allocator. */ @@ -400,6 +398,43 @@ bool upb_msg_getscalarhandlerdata(const upb_handlers *h, size_t *offset, int32_t *hasbit); + +/** Interfaces for generated code *********************************************/ + +struct upb_msglayout_strinit_v1 { + const char *ptr; + uint32_t length; +}; + +struct upb_msglayout_fieldinit_v1 { + uint32_t number; + uint32_t offset; + uint16_t hasbit; + uint16_t oneof_index; + uint16_t submsg_index; + uint8_t type; + uint8_t label; +}; + +struct upb_msglayout_msginit_v1 { + struct upb_msglayout_fieldinit_v1 *fields; + struct upb_msglayout_msginit_v1 **submsgs; + uint32_t *case_offsets; + void *default_msg; + /* Must be aligned to 8. Doesn't include internal members like unknown + * fields, extension dict, pointer to msglayout, etc. */ + uint32_t size; + bool extendable; + char align; +}; + +/* Initialize/uninitialize a msglayout from a msginit. If upb uses v1 + * internally, this will not allocate any memory. Should only be used by + * generated code. */ +upb_msglayout *upb_msglayout_frominit_v1( + const struct upb_msglayout_msginit_v1 *init, upb_alloc *a); +void upb_msglayout_uninit_v1(upb_msglayout *layout, upb_alloc *a); + UPB_END_EXTERN_C #endif /* UPB_MSG_H_ */ -- cgit v1.2.3 From 76fcdd2ee92e8f7852f96ccd49fe776236ae4e60 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 2 Jul 2017 09:53:14 -0700 Subject: Removed all upb_msgdef/upb_fielddef from upb_msg. --- upb/bindings/lua/msg.c | 33 ++++-- upb/bindings/lua/upb.h | 1 + upb/bindings/lua/upb/pb.c | 3 +- upb/msg.c | 256 ++++++++++++++++++---------------------------- upb/msg.h | 62 +++++------ 5 files changed, 150 insertions(+), 205 deletions(-) (limited to 'upb/msg.h') diff --git a/upb/bindings/lua/msg.c b/upb/bindings/lua/msg.c index c222ca5..41163bc 100644 --- a/upb/bindings/lua/msg.c +++ b/upb/bindings/lua/msg.c @@ -283,11 +283,16 @@ static const struct luaL_Reg lupb_msgfactory_mm[] = { /* lupb_msgclass **************************************************************/ +/* Userval contains a map of: + * [1] -> MessageFactory (to keep GC-reachable) + * [const upb_msgdef*] -> [lupb_msgclass userdata] + */ + #define LUPB_MSGCLASS_FACTORY 1 -#define LUPB_MSGCLASS_MSGDEF 2 struct lupb_msgclass { const upb_msglayout *layout; + const upb_msgdef *msgdef; const lupb_msgfactory *lfactory; }; @@ -312,7 +317,11 @@ const upb_msglayout *lupb_msgclass_getlayout(lua_State *L, int narg) { const upb_handlers *lupb_msgclass_getmergehandlers(lua_State *L, int narg) { const lupb_msgclass *lmsgclass = lupb_msgclass_check(L, narg); return upb_msgfactory_getmergehandlers( - lmsgclass->lfactory->factory, upb_msglayout_msgdef(lmsgclass->layout)); + lmsgclass->lfactory->factory, lmsgclass->msgdef); +} + +const upb_msgdef *lupb_msgclass_getmsgdef(const lupb_msgclass *lmsgclass) { + return lmsgclass->msgdef; } upb_msgfactory *lupb_msgclass_getfactory(const lupb_msgclass *lmsgclass) { @@ -329,8 +338,8 @@ static void lupb_msgclass_typecheck(lua_State *L, const lupb_msgclass *expected, const lupb_msgclass *actual) { if (expected != actual) { luaL_error(L, "Message had incorrect type, expected '%s', got '%s'", - upb_msgdef_fullname(upb_msglayout_msgdef(expected->layout)), - upb_msgdef_fullname(upb_msglayout_msgdef(actual->layout))); + upb_msgdef_fullname(expected->msgdef), + upb_msgdef_fullname(actual->msgdef)); } } @@ -370,6 +379,7 @@ static int lupb_msgclass_pushnew(lua_State *L, int factory, lupb_uservalseti(L, -1, LUPB_MSGCLASS_FACTORY, factory); lmc->layout = upb_msgfactory_getlayout(lfactory->factory, md); lmc->lfactory = lfactory; + lmc->msgdef = md; return 1; } @@ -903,7 +913,7 @@ const upb_msg *lupb_msg_checkmsg(lua_State *L, int narg, } const upb_msgdef *lupb_msg_checkdef(lua_State *L, int narg) { - return upb_msglayout_msgdef(lupb_msg_check(L, narg)->lmsgclass->layout); + return lupb_msg_check(L, narg)->lmsgclass->msgdef; } static const upb_fielddef *lupb_msg_checkfield(lua_State *L, @@ -911,7 +921,7 @@ static const upb_fielddef *lupb_msg_checkfield(lua_State *L, int fieldarg) { size_t len; const char *fieldname = luaL_checklstring(L, fieldarg, &len); - const upb_msgdef *msgdef = upb_msglayout_msgdef(msg->lmsgclass->layout); + const upb_msgdef *msgdef = msg->lmsgclass->msgdef; const upb_fielddef *f = upb_msgdef_ntof(msgdef, fieldname, len); if (!f) { @@ -988,6 +998,7 @@ static int lupb_msg_index(lua_State *L) { lupb_msg *lmsg = lupb_msg_check(L, 1); const upb_fielddef *f = lupb_msg_checkfield(L, lmsg, 2); const upb_msglayout *l = lmsg->lmsgclass->layout; + int field_index = upb_fielddef_index(f); if (in_userval(f)) { lupb_uservalgeti(L, 1, lupb_fieldindex(f)); @@ -1000,8 +1011,8 @@ static int lupb_msg_index(lua_State *L) { /* TODO(haberman) */ } else { UPB_ASSERT(upb_fielddef_isstring(f)); - if (upb_msg_has(lmsg->msg, f, l)) { - upb_msgval val = upb_msg_get(lmsg->msg, f, l); + if (upb_msg_has(lmsg->msg, field_index, l)) { + upb_msgval val = upb_msg_get(lmsg->msg, field_index, l); lua_pop(L, 1); lua_pushlstring(L, val.str.ptr, val.str.len); lupb_uservalseti(L, 1, lupb_fieldindex(f), -1); @@ -1009,7 +1020,8 @@ static int lupb_msg_index(lua_State *L) { } } } else { - lupb_pushmsgval(L, upb_fielddef_type(f), upb_msg_get(lmsg->msg, f, l)); + upb_msgval val = upb_msg_get(lmsg->msg, field_index, l); + lupb_pushmsgval(L, upb_fielddef_type(f), val); } return 1; @@ -1027,6 +1039,7 @@ static int lupb_msg_newindex(lua_State *L) { lupb_msg *lmsg = lupb_msg_check(L, 1); const upb_fielddef *f = lupb_msg_checkfield(L, lmsg, 2); upb_fieldtype_t type = upb_fielddef_type(f); + int field_index = upb_fielddef_index(f); upb_msgval msgval; /* Typecheck and get msgval. */ @@ -1047,7 +1060,7 @@ static int lupb_msg_newindex(lua_State *L) { /* Set in upb_msg and userval (if necessary). */ - upb_msg_set(lmsg->msg, f, msgval, lmsg->lmsgclass->layout); + upb_msg_set(lmsg->msg, field_index, msgval, lmsg->lmsgclass->layout); if (in_userval(f)) { lupb_uservalseti(L, 1, lupb_fieldindex(f), 3); diff --git a/upb/bindings/lua/upb.h b/upb/bindings/lua/upb.h index 982ae57..ea2910a 100644 --- a/upb/bindings/lua/upb.h +++ b/upb/bindings/lua/upb.h @@ -134,6 +134,7 @@ const upb_msg *lupb_msg_checkmsg(lua_State *L, int narg, const lupb_msgclass *lupb_msgclass_check(lua_State *L, int narg); const upb_msglayout *lupb_msgclass_getlayout(lua_State *L, int narg); +const upb_msgdef *lupb_msgclass_getmsgdef(const lupb_msgclass *lmsgclass); const upb_handlers *lupb_msgclass_getmergehandlers(lua_State *L, int narg); upb_msgfactory *lupb_msgclass_getfactory(const lupb_msgclass *lmsgclass); void lupb_msg_registertypes(lua_State *L); diff --git a/upb/bindings/lua/upb/pb.c b/upb/bindings/lua/upb/pb.c index 731430d..1d56066 100644 --- a/upb/bindings/lua/upb/pb.c +++ b/upb/bindings/lua/upb/pb.c @@ -103,9 +103,8 @@ static int lupb_pb_makestrtomsgdecoder(lua_State *L) { } static int lupb_pb_makemsgtostrencoder(lua_State *L) { - const upb_msglayout *layout = lupb_msgclass_getlayout(L, 1); const lupb_msgclass *lmsgclass = lupb_msgclass_check(L, 1); - const upb_msgdef *md = upb_msglayout_msgdef(layout); + const upb_msgdef *md = lupb_msgclass_getmsgdef(lmsgclass); upb_msgfactory *factory = lupb_msgclass_getfactory(lmsgclass); const upb_handlers *encode_handlers; const upb_visitorplan *vp; diff --git a/upb/msg.c b/upb/msg.c index 26d0e98..5574363 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -24,7 +24,8 @@ bool upb_fieldtype_mapkeyok(upb_fieldtype_t type) { void *upb_array_pack(const upb_array *arr, void *p, size_t *ofs, size_t size); void *upb_map_pack(const upb_map *map, void *p, size_t *ofs, size_t size); -#define VOIDPTR_AT(msg, ofs) (void*)((char*)msg + ofs) +#define PTR_AT(msg, ofs, type) (type*)((char*)msg + ofs) +#define VOIDPTR_AT(msg, ofs) PTR_AT(msg, ofs, void) #define ENCODE_MAX_NESTING 64 #define CHECK_TRUE(x) if (!(x)) { return false; } @@ -71,7 +72,15 @@ static size_t upb_msgval_sizeof(upb_fieldtype_t type) { UPB_UNREACHABLE(); } -static uint8_t upb_msg_fieldsize(const upb_fielddef *f) { +static uint8_t upb_msg_fieldsize(const upb_msglayout_fieldinit_v1 *field) { + if (field->label == UPB_LABEL_REPEATED) { + return sizeof(void*); + } else { + return upb_msgval_sizeof(field->type); + } +} + +static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) { if (upb_fielddef_isseq(f)) { return sizeof(void*); } else { @@ -115,7 +124,6 @@ static upb_ctype_t upb_fieldtotabtype(upb_fieldtype_t type) { } static upb_msgval upb_msgval_fromdefault(const upb_fielddef *f) { - /* TODO(haberman): improve/optimize this (maybe use upb_msgval in fielddef) */ switch (upb_fielddef_type(f)) { case UPB_TYPE_FLOAT: return upb_msgval_float(upb_fielddef_defaultfloat(f)); @@ -150,23 +158,14 @@ static upb_msgval upb_msgval_fromdefault(const upb_fielddef *f) { /** upb_msglayout *************************************************************/ struct upb_msglayout { - const upb_msgdef *msgdef; /* TODO(haberman): remove. */ struct upb_msglayout_msginit_v1 data; }; -static void upb_msg_checkfield(const upb_msglayout *l, const upb_fielddef *f) { - UPB_ASSERT(l->msgdef == upb_fielddef_containingtype(f)); -} - static void upb_msglayout_free(upb_msglayout *l) { upb_gfree(l->data.default_msg); upb_gfree(l); } -const upb_msgdef *upb_msglayout_msgdef(const upb_msglayout *l) { - return l->msgdef; -} - static size_t upb_msglayout_place(upb_msglayout *l, size_t size) { size_t ret; @@ -187,8 +186,7 @@ static uint32_t upb_msglayout_hasbit(const upb_msglayout *l, return l->data.fields[upb_fielddef_index(f)].hasbit; } -static bool upb_msglayout_initdefault(upb_msglayout *l) { - const upb_msgdef *m = l->msgdef; +static bool upb_msglayout_initdefault(upb_msglayout *l, const upb_msgdef *m) { upb_msg_field_iter it; if (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2 && l->data.size) { @@ -211,7 +209,10 @@ static bool upb_msglayout_initdefault(upb_msglayout *l) { if (!upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f) && !upb_fielddef_isseq(f)) { - upb_msg_set(l->data.default_msg, f, upb_msgval_fromdefault(f), l); + upb_msg_set(l->data.default_msg, + upb_fielddef_index(f), + upb_msgval_fromdefault(f), + l); } } } @@ -225,6 +226,9 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { upb_msglayout *l; size_t hasbit; size_t submsg_count = 0; + const upb_msglayout_msginit_v1 **submsgs; + upb_msglayout_fieldinit_v1 *fields; + upb_msglayout_oneofinit_v1 *oneofs; for (upb_msg_field_begin(&it, m), hasbit = sizeof(void*) * 8; !upb_msg_field_done(&it); @@ -239,14 +243,18 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { if (!l) return NULL; memset(l, 0, sizeof(*l)); - l->msgdef = m; /* TODO(haberman): check OOM. */ - l->data.fields = upb_gmalloc(upb_msgdef_numfields(m) * - sizeof(struct upb_msglayout_fieldinit_v1)); - l->data.submsgs = upb_gmalloc(submsg_count * sizeof(void*)); - l->data.case_offsets = upb_gmalloc(upb_msgdef_numoneofs(m) * - sizeof(*l->data.case_offsets)); + fields = upb_gmalloc(upb_msgdef_numfields(m) * sizeof(*fields)); + submsgs = upb_gmalloc(submsg_count * sizeof(*submsgs)); + oneofs = upb_gmalloc(upb_msgdef_numoneofs(m) * sizeof(*oneofs)); + + l->data.field_count = upb_msgdef_numfields(m); + l->data.oneof_count = upb_msgdef_numoneofs(m); + l->data.fields = fields; + l->data.submsgs = submsgs; + l->data.oneofs = oneofs; + l->data.is_proto2 = (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2); /* Allocate data offsets in three stages: * @@ -257,14 +265,25 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { * OPT: There is a lot of room for optimization here to minimize the size. */ - /* Allocate hasbits. Start at sizeof(void*) for upb_alloc*. */ - for (upb_msg_field_begin(&it, m), hasbit = sizeof(void*) * 8; + /* Allocate hasbits and set basic field attributes. */ + for (upb_msg_field_begin(&it, m), hasbit = 0; !upb_msg_field_done(&it); upb_msg_field_next(&it)) { const upb_fielddef* f = upb_msg_iter_field(&it); + upb_msglayout_fieldinit_v1 *field = &fields[upb_fielddef_index(f)]; + + field->number = upb_fielddef_number(f); + field->type = upb_fielddef_type(f); + field->label = upb_fielddef_label(f); + + if (upb_fielddef_containingoneof(f)) { + field->oneof_index = upb_oneofdef_index(upb_fielddef_containingoneof(f)); + } else { + field->oneof_index = UPB_NOT_IN_ONEOF; + } if (upb_fielddef_haspresence(f) && !upb_fielddef_containingoneof(f)) { - l->data.fields[upb_fielddef_index(f)].hasbit = hasbit++; + field->hasbit = hasbit++; } } @@ -276,7 +295,7 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it); upb_msg_field_next(&it)) { const upb_fielddef* f = upb_msg_iter_field(&it); - size_t field_size = upb_msg_fieldsize(f); + size_t field_size = upb_msg_fielddefsize(f); size_t index = upb_fielddef_index(f); if (upb_fielddef_containingoneof(f)) { @@ -284,47 +303,38 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { continue; } - l->data.fields[index].offset = upb_msglayout_place(l, field_size); + fields[index].offset = upb_msglayout_place(l, field_size); } /* Allocate oneof fields. Each oneof field consists of a uint32 for the case * and space for the actual data. */ for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit); upb_msg_oneof_next(&oit)) { - const upb_oneofdef* oneof = upb_msg_iter_oneof(&oit); + const upb_oneofdef* o = upb_msg_iter_oneof(&oit); upb_oneof_iter fit; + size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */ + upb_msglayout_oneofinit_v1 *oneof = &oneofs[upb_oneofdef_index(o)]; size_t field_size = 0; - size_t case_offset; - size_t val_offset; /* Calculate field size: the max of all field sizes. */ - for (upb_oneof_begin(&fit, oneof); + for (upb_oneof_begin(&fit, o); !upb_oneof_done(&fit); upb_oneof_next(&fit)) { const upb_fielddef* f = upb_oneof_iter_field(&fit); - field_size = UPB_MAX(field_size, upb_msg_fieldsize(f)); + field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f)); } /* Align and allocate case offset. */ - case_offset = upb_msglayout_place(l, case_size); - val_offset = upb_msglayout_place(l, field_size); - - l->data.case_offsets[upb_oneofdef_index(oneof)] = case_offset; - - /* Assign all fields in the oneof this same offset. */ - for (upb_oneof_begin(&fit, oneof); !upb_oneof_done(&fit); - upb_oneof_next(&fit)) { - const upb_fielddef* f = upb_oneof_iter_field(&fit); - l->data.fields[upb_fielddef_index(f)].offset = val_offset; - } + oneof->case_offset = upb_msglayout_place(l, case_size); + oneof->data_offset = upb_msglayout_place(l, field_size); } /* Size of the entire structure should be a multiple of its greatest * alignment. */ l->data.size = align_up(l->data.size, l->data.align); - if (upb_msglayout_initdefault(l)) { + if (upb_msglayout_initdefault(l, m)) { return l; } else { upb_msglayout_free(l); @@ -506,13 +516,14 @@ static upb_selector_t getsel2(const upb_fielddef *f, upb_handlertype_t type) { static bool upb_visitor_hasfield(const upb_msg *msg, const upb_fielddef *f, const upb_msglayout *layout) { + int field_index = upb_fielddef_index(f); if (upb_fielddef_isseq(f)) { - return upb_msgval_getarr(upb_msg_get(msg, f, layout)) != NULL; + return upb_msgval_getarr(upb_msg_get(msg, field_index, layout)) != NULL; } else if (upb_msgdef_syntax(upb_fielddef_containingtype(f)) == UPB_SYNTAX_PROTO2) { - return upb_msg_has(msg, f, layout); + return upb_msg_has(msg, field_index, layout); } else { - upb_msgval val = upb_msg_get(msg, f, layout); + upb_msgval val = upb_msg_get(msg, field_index, layout); switch (upb_fielddef_type(f)) { case UPB_TYPE_FLOAT: return upb_msgval_getfloat(val) != 0; @@ -542,7 +553,7 @@ static bool upb_visitor_hasfield(const upb_msg *msg, const upb_fielddef *f, static bool upb_visitor_visitmsg2(const upb_msg *msg, const upb_msglayout *layout, upb_sink *sink, int depth) { - const upb_msgdef *md = upb_msglayout_msgdef(layout); + const upb_msgdef *md = upb_handlers_msgdef(sink->handlers); upb_msg_field_iter i; upb_status status; @@ -564,7 +575,7 @@ static bool upb_visitor_visitmsg2(const upb_msg *msg, continue; } - val = upb_msg_get(msg, f, layout); + val = upb_msg_get(msg, upb_fielddef_index(f), layout); if (upb_fielddef_isseq(f)) { const upb_array *arr = upb_msgval_getarr(val); @@ -633,7 +644,7 @@ bool upb_visitor_visitmsg(upb_visitor *visitor, const upb_msg *msg) { /* If we always read/write as a consistent type to each address, this shouldn't * violate aliasing. */ -#define DEREF(msg, ofs, type) *(type*)VOIDPTR_AT(msg, ofs) +#define DEREF(msg, ofs, type) *PTR_AT(msg, ofs, type) /* Internal members of a upb_msg. We can change this without breaking binary * compatibility. We put these before the user's data. The user's upb_msg* @@ -668,62 +679,21 @@ static upb_msg_internal_withext *upb_msg_getinternalwithext( return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext)); } -static const upb_msg_internal_withext *upb_msg_getinternalwithext_const( - const upb_msg *msg, const upb_msglayout *l) { - UPB_ASSERT(l->data.extendable); - return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext)); +static const upb_msglayout_fieldinit_v1 *upb_msg_checkfield( + int field_index, const upb_msglayout *l) { + UPB_ASSERT(field_index >= 0 && field_index < l->data.field_count); + return &l->data.fields[field_index]; } -static upb_inttable *upb_msg_trygetextdict(const upb_msg *msg, - const upb_msglayout *l) { - return upb_msg_getinternalwithext_const(msg, l)->extdict; +static bool upb_msg_inoneof(const upb_msglayout_fieldinit_v1 *field) { + return field->oneof_index != UPB_NOT_IN_ONEOF; } -static upb_inttable *upb_msg_getextdict(upb_msg *msg, - const upb_msglayout *l, - upb_alloc *a) { - upb_inttable *ext_dict; - - ext_dict = upb_msg_getinternalwithext(msg, l)->extdict; - - if (!ext_dict) { - ext_dict = upb_malloc(a, sizeof(upb_inttable)); - - if (!ext_dict) { - return NULL; - } - - /* Use an 8-byte type to ensure all bytes are copied. */ - if (!upb_inttable_init2(ext_dict, UPB_CTYPE_INT64, a)) { - upb_free(a, ext_dict); - return NULL; - } - - upb_msg_getinternalwithext(msg, l)->extdict = ext_dict; - } - - return ext_dict; -} - -static uint32_t upb_msg_getoneofint(const upb_msg *msg, - const upb_oneofdef *o, - const upb_msglayout *l) { - size_t oneof_ofs = l->data.case_offsets[upb_oneofdef_index(o)]; - return DEREF(msg, oneof_ofs, uint8_t); -} - -static void upb_msg_setoneofcase(const upb_msg *msg, - const upb_oneofdef *o, - const upb_msglayout *l, - uint32_t val) { - size_t oneof_ofs = l->data.case_offsets[upb_oneofdef_index(o)]; - DEREF(msg, oneof_ofs, uint8_t) = val; -} - - -static bool upb_msg_oneofis(const upb_msg *msg, const upb_msglayout *l, - const upb_oneofdef *o, const upb_fielddef *f) { - return upb_msg_getoneofint(msg, o, l) == upb_fielddef_number(f); +static uint32_t *upb_msg_oneofcase(const upb_msg *msg, int field_index, + const upb_msglayout *l) { + const upb_msglayout_fieldinit_v1 *field = upb_msg_checkfield(field_index, l); + UPB_ASSERT(upb_msg_inoneof(field)); + return PTR_AT(msg, l->data.oneofs[field->oneof_index].case_offset, uint32_t); } size_t upb_msg_sizeof(const upb_msglayout *l) { @@ -772,82 +742,52 @@ upb_alloc *upb_msg_alloc(const upb_msg *msg) { } bool upb_msg_has(const upb_msg *msg, - const upb_fielddef *f, + int field_index, const upb_msglayout *l) { - const upb_oneofdef *o; - upb_msg_checkfield(l, f); - UPB_ASSERT(upb_fielddef_haspresence(f)); - - if (upb_fielddef_isextension(f)) { - /* Extensions are set when they are present in the extension dict. */ - upb_inttable *ext_dict = upb_msg_trygetextdict(msg, l); - upb_value v; - return ext_dict != NULL && - upb_inttable_lookup32(ext_dict, upb_fielddef_number(f), &v); - } else if ((o = upb_fielddef_containingoneof(f)) != NULL) { + const upb_msglayout_fieldinit_v1 *field = upb_msg_checkfield(field_index, l); + + UPB_ASSERT(l->data.is_proto2); + + if (upb_msg_inoneof(field)) { /* Oneofs are set when the oneof number is set to this field. */ - return upb_msg_getoneofint(msg, o, l) == upb_fielddef_number(f); + return *upb_msg_oneofcase(msg, field_index, l) == field->number; } else { /* Other fields are set when their hasbit is set. */ - uint32_t hasbit = l->data.fields[upb_fielddef_index(f)].hasbit; + uint32_t hasbit = l->data.fields[field_index].hasbit; return DEREF(msg, hasbit / 8, char) | (1 << (hasbit % 8)); } } -upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f, +upb_msgval upb_msg_get(const upb_msg *msg, int field_index, const upb_msglayout *l) { - upb_msg_checkfield(l, f); + const upb_msglayout_fieldinit_v1 *field = upb_msg_checkfield(field_index, l); + int size = upb_msg_fieldsize(field); - if (upb_fielddef_isextension(f)) { - upb_inttable *ext_dict = upb_msg_trygetextdict(msg, l); - upb_value val; - if (upb_inttable_lookup32(ext_dict, upb_fielddef_number(f), &val)) { - return upb_msgval_fromval(val); + if (upb_msg_inoneof(field)) { + if (*upb_msg_oneofcase(msg, field_index, l) == field->number) { + size_t ofs = l->data.oneofs[field->oneof_index].data_offset; + return upb_msgval_read(msg, ofs, size); } else { - return upb_msgval_fromdefault(f); + /* Return default. */ + return upb_msgval_read(l->data.default_msg, field->offset, size); } } else { - size_t ofs = l->data.fields[upb_fielddef_index(f)].offset; - const upb_oneofdef *o = upb_fielddef_containingoneof(f); - upb_msgval ret; - - if (o && !upb_msg_oneofis(msg, l, o, f)) { - /* Oneof defaults can't come from the message because the memory is reused - * by all types in the oneof. */ - return upb_msgval_fromdefault(f); - } - - ret = upb_msgval_read(msg, ofs, upb_msg_fieldsize(f)); - return ret; + return upb_msgval_read(msg, field->offset, size); } } -bool upb_msg_set(upb_msg *msg, - const upb_fielddef *f, - upb_msgval val, +void upb_msg_set(upb_msg *msg, int field_index, upb_msgval val, const upb_msglayout *l) { - upb_msg_checkfield(l, f); - - if (upb_fielddef_isextension(f)) { - upb_alloc *a = upb_msg_alloc(msg); - /* TODO(haberman): introduce table API that can do this in one call. */ - upb_inttable *ext = upb_msg_getextdict(msg, l, a); - upb_value val2 = upb_toval(val); - if (!upb_inttable_replace(ext, upb_fielddef_number(f), val2) && - !upb_inttable_insert2(ext, upb_fielddef_number(f), val2, a)) { - return false; - } - } else { - size_t ofs = l->data.fields[upb_fielddef_index(f)].offset; - const upb_oneofdef *o = upb_fielddef_containingoneof(f); - - if (o) { - upb_msg_setoneofcase(msg, o, l, upb_fielddef_number(f)); - } + const upb_msglayout_fieldinit_v1 *field = upb_msg_checkfield(field_index, l); + int size = upb_msg_fieldsize(field); - upb_msgval_write(msg, ofs, val, upb_msg_fieldsize(f)); + if (upb_msg_inoneof(field)) { + size_t ofs = l->data.oneofs[field->oneof_index].data_offset; + *upb_msg_oneofcase(msg, field_index, l) = field->number; + upb_msgval_write(msg, ofs, val, size); + } else { + upb_msgval_write(msg, field->offset, val, size); } - return true; } diff --git a/upb/msg.h b/upb/msg.h index c2b5cf0..1d888b8 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -65,9 +65,6 @@ typedef void upb_msg; * instances of this from a upb_msgfactory, and the factory always owns the * msglayout. */ -/* Returns the msgdef for this msglayout. */ -const upb_msgdef *upb_msglayout_msgdef(const upb_msglayout *l); - /** upb_visitor ***************************************************************/ @@ -242,25 +239,14 @@ void *upb_msg_pack(const upb_msg *msg, const upb_msglayout *l, * arenas). */ upb_msgval upb_msg_get(const upb_msg *msg, - const upb_fielddef *f, + int field_index, const upb_msglayout *l); /* May only be called for fields where upb_fielddef_haspresence(f) == true. */ bool upb_msg_has(const upb_msg *msg, - const upb_fielddef *f, + int field_index, const upb_msglayout *l); -/* Returns NULL if no field in the oneof is set. */ -const upb_fielddef *upb_msg_getoneofcase(const upb_msg *msg, - const upb_oneofdef *o, - const upb_msglayout *l); - -/* Returns true if any field in the oneof is set. */ -bool upb_msg_hasoneof(const upb_msg *msg, - const upb_oneofdef *o, - const upb_msglayout *l); - - /* Mutable message API. May only be called by the owner of the message who * knows its ownership scheme and how to keep it consistent. */ @@ -268,8 +254,8 @@ bool upb_msg_hasoneof(const upb_msg *msg, * management: if you overwrite a pointer to a msg/array/map/string without * cleaning it up (or using an arena) it will leak. */ -bool upb_msg_set(upb_msg *msg, - const upb_fielddef *f, +void upb_msg_set(upb_msg *msg, + int field_index, upb_msgval val, const upb_msglayout *l); @@ -280,12 +266,7 @@ bool upb_msg_set(upb_msg *msg, * arrays/maps/strings/msgs that this field may have pointed to. */ bool upb_msg_clearfield(upb_msg *msg, - const upb_fielddef *f, - const upb_msglayout *l); - -/* Clears all fields in the oneof such that none of them are set. */ -bool upb_msg_clearoneof(upb_msg *msg, - const upb_oneofdef *o, + int field_index, const upb_msglayout *l); /* TODO(haberman): copyfrom()/mergefrom()? */ @@ -401,38 +382,49 @@ bool upb_msg_getscalarhandlerdata(const upb_handlers *h, /** Interfaces for generated code *********************************************/ -struct upb_msglayout_strinit_v1 { +#define UPB_NOT_IN_ONEOF UINT16_MAX + +typedef struct { const char *ptr; uint32_t length; -}; +} upb_msglayout_strinit_v1; -struct upb_msglayout_fieldinit_v1 { +typedef struct { uint32_t number; - uint32_t offset; + uint32_t offset; /* If in a oneof, offset of default in default_msg below. */ uint16_t hasbit; - uint16_t oneof_index; + uint16_t oneof_index; /* UPB_NOT_IN_ONEOF if not in a oneof. */ uint16_t submsg_index; uint8_t type; uint8_t label; -}; +} upb_msglayout_fieldinit_v1; + +typedef struct { + uint32_t data_offset; + uint32_t case_offset; +} upb_msglayout_oneofinit_v1; -struct upb_msglayout_msginit_v1 { - struct upb_msglayout_fieldinit_v1 *fields; - struct upb_msglayout_msginit_v1 **submsgs; +typedef struct upb_msglayout_msginit_v1 { + const struct upb_msglayout_msginit_v1 **submsgs; + const upb_msglayout_fieldinit_v1 *fields; + const upb_msglayout_oneofinit_v1 *oneofs; uint32_t *case_offsets; void *default_msg; /* Must be aligned to 8. Doesn't include internal members like unknown * fields, extension dict, pointer to msglayout, etc. */ uint32_t size; + uint16_t field_count; + uint16_t oneof_count; bool extendable; + bool is_proto2; char align; -}; +} upb_msglayout_msginit_v1; /* Initialize/uninitialize a msglayout from a msginit. If upb uses v1 * internally, this will not allocate any memory. Should only be used by * generated code. */ upb_msglayout *upb_msglayout_frominit_v1( - const struct upb_msglayout_msginit_v1 *init, upb_alloc *a); + const upb_msglayout_msginit_v1 *init, upb_alloc *a); void upb_msglayout_uninit_v1(upb_msglayout *layout, upb_alloc *a); UPB_END_EXTERN_C -- cgit v1.2.3