summaryrefslogtreecommitdiff
path: root/upb/msg.c
diff options
context:
space:
mode:
authorJoshua Haberman <jhaberman@gmail.com>2017-07-01 15:15:52 -0700
committerJosh Haberman <jhaberman@gmail.com>2017-07-02 00:28:25 +0100
commit1b9d37a00ebae8b59773c8501d8712e1c3335302 (patch)
tree1295ee5099609f5a6c05689adfa7962ad4cdc5ab /upb/msg.c
parente38098cbfc58c0e2911b2c2b2abff043de42a85b (diff)
Start migrating upb_msglayout to be suitable for generated code.
This involves: - remove upb_msglayout -> upb_msgfactory dependency. - remove upb_msglayout -> upb_msgdef dependency (in progress). - make upb_msglayout use a representation that can be statically initialized by generated code. The goal here is that upb_msglayout becomes a kind of "descriptor lite": it contains enough data to parser and serialize protobufs and manipulate a upb_msg in memory, while being far smaller and simpler than a full descriptor. It also does not include field names, which can be a benefit for applications that do not want to leak field names. Generated code can then create a upb_msglayout, and do most things without ever needing to construct full descriptors/defs if they don't want to.
Diffstat (limited to 'upb/msg.c')
-rw-r--r--upb/msg.c210
1 files changed, 128 insertions, 82 deletions
diff --git a/upb/msg.c b/upb/msg.c
index 39e3035..26d0e98 100644
--- a/upb/msg.c
+++ b/upb/msg.c
@@ -24,7 +24,7 @@ bool upb_fieldtype_mapkeyok(upb_fieldtype_t type) {
void *upb_array_pack(const upb_array *arr, void *p, size_t *ofs, size_t size);
void *upb_map_pack(const upb_map *map, void *p, size_t *ofs, size_t size);
-#define CHARPTR_AT(msg, ofs) ((char*)msg + ofs)
+#define VOIDPTR_AT(msg, ofs) (void*)((char*)msg + ofs)
#define ENCODE_MAX_NESTING 64
#define CHECK_TRUE(x) if (!(x)) { return false; }
@@ -150,16 +150,8 @@ static upb_msgval upb_msgval_fromdefault(const upb_fielddef *f) {
/** upb_msglayout *************************************************************/
struct upb_msglayout {
- upb_msgfactory *factory;
- const upb_msgdef *msgdef;
- size_t size;
- size_t extdict_offset;
- void *default_msg;
- uint32_t *field_offsets;
- uint32_t *case_offsets;
- uint32_t *hasbits;
- bool has_extdict;
- uint8_t align;
+ const upb_msgdef *msgdef; /* TODO(haberman): remove. */
+ struct upb_msglayout_msginit_v1 data;
};
static void upb_msg_checkfield(const upb_msglayout *l, const upb_fielddef *f) {
@@ -167,7 +159,7 @@ static void upb_msg_checkfield(const upb_msglayout *l, const upb_fielddef *f) {
}
static void upb_msglayout_free(upb_msglayout *l) {
- upb_gfree(l->default_msg);
+ upb_gfree(l->data.default_msg);
upb_gfree(l);
}
@@ -178,35 +170,35 @@ const upb_msgdef *upb_msglayout_msgdef(const upb_msglayout *l) {
static size_t upb_msglayout_place(upb_msglayout *l, size_t size) {
size_t ret;
- l->size = align_up(l->size, size);
- l->align = align_up(l->align, size);
- ret = l->size;
- l->size += size;
+ l->data.size = align_up(l->data.size, size);
+ l->data.align = align_up(l->data.align, size);
+ ret = l->data.size;
+ l->data.size += size;
return ret;
}
static uint32_t upb_msglayout_offset(const upb_msglayout *l,
const upb_fielddef *f) {
- return l->field_offsets[upb_fielddef_index(f)];
+ return l->data.fields[upb_fielddef_index(f)].offset;
}
static uint32_t upb_msglayout_hasbit(const upb_msglayout *l,
const upb_fielddef *f) {
- return l->hasbits[upb_fielddef_index(f)];
+ return l->data.fields[upb_fielddef_index(f)].hasbit;
}
static bool upb_msglayout_initdefault(upb_msglayout *l) {
const upb_msgdef *m = l->msgdef;
upb_msg_field_iter it;
- if (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2 && l->size) {
+ if (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2 && l->data.size) {
/* Allocate default message and set default values in it. */
- l->default_msg = upb_gmalloc(l->size);
- if (!l->default_msg) {
+ l->data.default_msg = upb_gmalloc(l->data.size);
+ if (!l->data.default_msg) {
return false;
}
- memset(l->default_msg, 0, l->size);
+ memset(l->data.default_msg, 0, l->data.size);
for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
upb_msg_field_next(&it)) {
@@ -219,7 +211,7 @@ static bool upb_msglayout_initdefault(upb_msglayout *l) {
if (!upb_fielddef_isstring(f) &&
!upb_fielddef_issubmsg(f) &&
!upb_fielddef_isseq(f)) {
- upb_msg_set(l->default_msg, f, upb_msgval_fromdefault(f), l);
+ upb_msg_set(l->data.default_msg, f, upb_msgval_fromdefault(f), l);
}
}
}
@@ -232,22 +224,29 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) {
upb_msg_oneof_iter oit;
upb_msglayout *l;
size_t hasbit;
- size_t array_size = upb_msgdef_numfields(m) + upb_msgdef_numoneofs(m);
+ size_t submsg_count = 0;
- if (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2) {
- array_size += upb_msgdef_numfields(m); /* hasbits. */
+ for (upb_msg_field_begin(&it, m), hasbit = sizeof(void*) * 8;
+ !upb_msg_field_done(&it);
+ upb_msg_field_next(&it)) {
+ const upb_fielddef* f = upb_msg_iter_field(&it);
+ if (upb_fielddef_issubmsg(f)) {
+ submsg_count++;
+ }
}
- l = upb_gmalloc(sizeof(*l) + (sizeof(uint32_t) * array_size));
+ l = upb_gmalloc(sizeof(*l));
if (!l) return NULL;
memset(l, 0, sizeof(*l));
-
l->msgdef = m;
- l->align = 1;
- l->field_offsets = (uint32_t*)CHARPTR_AT(l, sizeof(*l));
- l->case_offsets = l->field_offsets + upb_msgdef_numfields(m);
- l->hasbits = l->case_offsets + upb_msgdef_numoneofs(m);
+
+ /* TODO(haberman): check OOM. */
+ l->data.fields = upb_gmalloc(upb_msgdef_numfields(m) *
+ sizeof(struct upb_msglayout_fieldinit_v1));
+ l->data.submsgs = upb_gmalloc(submsg_count * sizeof(void*));
+ l->data.case_offsets = upb_gmalloc(upb_msgdef_numoneofs(m) *
+ sizeof(*l->data.case_offsets));
/* Allocate data offsets in three stages:
*
@@ -265,12 +264,13 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) {
const upb_fielddef* f = upb_msg_iter_field(&it);
if (upb_fielddef_haspresence(f) && !upb_fielddef_containingoneof(f)) {
- l->hasbits[upb_fielddef_index(f)] = hasbit++;
+ l->data.fields[upb_fielddef_index(f)].hasbit = hasbit++;
}
}
/* Account for space used by hasbits. */
- l->size = div_round_up(hasbit, 8);
+ l->data.size = div_round_up(hasbit, 8);
+ l->data.align = 1;
/* Allocate non-oneof fields. */
for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
@@ -279,13 +279,12 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) {
size_t field_size = upb_msg_fieldsize(f);
size_t index = upb_fielddef_index(f);
-
if (upb_fielddef_containingoneof(f)) {
/* Oneofs are handled separately below. */
continue;
}
- l->field_offsets[index] = upb_msglayout_place(l, field_size);
+ l->data.fields[index].offset = upb_msglayout_place(l, field_size);
}
/* Allocate oneof fields. Each oneof field consists of a uint32 for the case
@@ -311,19 +310,19 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) {
case_offset = upb_msglayout_place(l, case_size);
val_offset = upb_msglayout_place(l, field_size);
- l->case_offsets[upb_oneofdef_index(oneof)] = case_offset;
+ l->data.case_offsets[upb_oneofdef_index(oneof)] = case_offset;
/* Assign all fields in the oneof this same offset. */
for (upb_oneof_begin(&fit, oneof); !upb_oneof_done(&fit);
upb_oneof_next(&fit)) {
const upb_fielddef* f = upb_oneof_iter_field(&fit);
- l->field_offsets[upb_fielddef_index(f)] = val_offset;
+ l->data.fields[upb_fielddef_index(f)].offset = val_offset;
}
}
/* Size of the entire structure should be a multiple of its greatest
* alignment. */
- l->size = align_up(l->size, l->align);
+ l->data.size = align_up(l->data.size, l->data.align);
if (upb_msglayout_initdefault(l)) {
return l;
@@ -333,8 +332,17 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) {
}
}
-upb_msgfactory *upb_msglayout_factory(const upb_msglayout *layout) {
- return layout->factory;
+upb_msglayout *upb_msglayout_frominit_v1(
+ const struct upb_msglayout_msginit_v1 *init, upb_alloc *a) {
+ UPB_UNUSED(a);
+ /* If upb upgrades to a v2, this would create a heap-allocated v2. */
+ return (upb_msglayout*)init;
+}
+
+void upb_msglayout_uninit_v1(upb_msglayout *layout, upb_alloc *a) {
+ UPB_UNUSED(layout);
+ UPB_UNUSED(a);
+ /* If upb upgrades to a v2, this would free the heap-allocated v2. */
}
@@ -393,7 +401,6 @@ const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f,
upb_msglayout *l = upb_msglayout_new(m);
upb_inttable_insertptr(&mutable_f->layouts, m, upb_value_ptr(l));
UPB_ASSERT(l);
- l->factory = f;
return l;
}
}
@@ -402,8 +409,7 @@ const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f,
void *upb_msg_startstr(void *msg, const void *hd, size_t size_hint) {
uint32_t ofs = (uintptr_t)hd;
- /* We pass NULL here because we know we can get away with it. */
- upb_alloc *alloc = upb_msg_alloc(msg, NULL);
+ upb_alloc *alloc = upb_msg_alloc(msg);
upb_msgval val;
UPB_UNUSED(size_hint);
@@ -420,8 +426,7 @@ void *upb_msg_startstr(void *msg, const void *hd, size_t size_hint) {
size_t upb_msg_str(void *msg, const void *hd, const char *ptr, size_t size,
const upb_bufhandle *handle) {
uint32_t ofs = (uintptr_t)hd;
- /* We pass NULL here because we know we can get away with it. */
- upb_alloc *alloc = upb_msg_alloc(msg, NULL);
+ upb_alloc *alloc = upb_msg_alloc(msg);
upb_msgval val;
size_t newsize;
UPB_UNUSED(handle);
@@ -628,20 +633,58 @@ bool upb_visitor_visitmsg(upb_visitor *visitor, const upb_msg *msg) {
/* If we always read/write as a consistent type to each address, this shouldn't
* violate aliasing.
*/
-#define DEREF(msg, ofs, type) *(type*)CHARPTR_AT(msg, ofs)
+#define DEREF(msg, ofs, type) *(type*)VOIDPTR_AT(msg, ofs)
+
+/* Internal members of a upb_msg. We can change this without breaking binary
+ * compatibility. We put these before the user's data. The user's upb_msg*
+ * points after the upb_msg_internal. */
+
+/* Used when a message is not extendable. */
+typedef struct {
+ /* TODO(haberman): add unknown fields. */
+ upb_alloc *alloc;
+} upb_msg_internal;
+
+/* Used when a message is extendable. */
+typedef struct {
+ upb_inttable *extdict;
+ upb_msg_internal base;
+} upb_msg_internal_withext;
+
+#define INTERNAL_MEMBERS_SIZE(l) \
+ sizeof(upb_msg_internal) - (l->data.extendable * sizeof(void*))
+
+static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
+ return VOIDPTR_AT(msg, -sizeof(upb_msg_internal));
+}
+
+static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) {
+ return VOIDPTR_AT(msg, -sizeof(upb_msg_internal));
+}
+
+static upb_msg_internal_withext *upb_msg_getinternalwithext(
+ upb_msg *msg, const upb_msglayout *l) {
+ UPB_ASSERT(l->data.extendable);
+ return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext));
+}
+
+static const upb_msg_internal_withext *upb_msg_getinternalwithext_const(
+ const upb_msg *msg, const upb_msglayout *l) {
+ UPB_ASSERT(l->data.extendable);
+ return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext));
+}
static upb_inttable *upb_msg_trygetextdict(const upb_msg *msg,
const upb_msglayout *l) {
- return l->has_extdict ? DEREF(msg, l->extdict_offset, upb_inttable*) : NULL;
+ return upb_msg_getinternalwithext_const(msg, l)->extdict;
}
static upb_inttable *upb_msg_getextdict(upb_msg *msg,
const upb_msglayout *l,
upb_alloc *a) {
upb_inttable *ext_dict;
- UPB_ASSERT(l->has_extdict);
- ext_dict = upb_msg_trygetextdict(msg, l);
+ ext_dict = upb_msg_getinternalwithext(msg, l)->extdict;
if (!ext_dict) {
ext_dict = upb_malloc(a, sizeof(upb_inttable));
@@ -656,7 +699,7 @@ static upb_inttable *upb_msg_getextdict(upb_msg *msg,
return NULL;
}
- DEREF(msg, l->extdict_offset, upb_inttable*) = ext_dict;
+ upb_msg_getinternalwithext(msg, l)->extdict = ext_dict;
}
return ext_dict;
@@ -665,7 +708,7 @@ static upb_inttable *upb_msg_getextdict(upb_msg *msg,
static uint32_t upb_msg_getoneofint(const upb_msg *msg,
const upb_oneofdef *o,
const upb_msglayout *l) {
- size_t oneof_ofs = l->case_offsets[upb_oneofdef_index(o)];
+ size_t oneof_ofs = l->data.case_offsets[upb_oneofdef_index(o)];
return DEREF(msg, oneof_ofs, uint8_t);
}
@@ -673,7 +716,7 @@ static void upb_msg_setoneofcase(const upb_msg *msg,
const upb_oneofdef *o,
const upb_msglayout *l,
uint32_t val) {
- size_t oneof_ofs = l->case_offsets[upb_oneofdef_index(o)];
+ size_t oneof_ofs = l->data.case_offsets[upb_oneofdef_index(o)];
DEREF(msg, oneof_ofs, uint8_t) = val;
}
@@ -683,46 +726,49 @@ static bool upb_msg_oneofis(const upb_msg *msg, const upb_msglayout *l,
return upb_msg_getoneofint(msg, o, l) == upb_fielddef_number(f);
}
-size_t upb_msg_sizeof(const upb_msglayout *l) { return l->size; }
+size_t upb_msg_sizeof(const upb_msglayout *l) {
+ return l->data.size + INTERNAL_MEMBERS_SIZE(l);
+}
-void upb_msg_init(upb_msg *msg, const upb_msglayout *l, upb_alloc *a) {
- if (l->default_msg) {
- memcpy(msg, l->default_msg, l->size);
+upb_msg *upb_msg_init(void *mem, const upb_msglayout *l, upb_alloc *a) {
+ upb_msg *msg = VOIDPTR_AT(mem, INTERNAL_MEMBERS_SIZE(l));
+ if (l->data.default_msg) {
+ memcpy(msg, l->data.default_msg, l->data.size);
} else {
- memset(msg, 0, l->size);
+ memset(msg, 0, l->data.size);
+ }
+
+ upb_msg_getinternal(msg)->alloc = a;
+ if (l->data.extendable) {
+ upb_msg_getinternalwithext(msg, l)->extdict = NULL;
}
- /* Set arena pointer. */
- memcpy(msg, &a, sizeof(a));
+ return msg;
}
-void upb_msg_uninit(upb_msg *msg, const upb_msglayout *l) {
- upb_inttable *ext_dict = upb_msg_trygetextdict(msg, l);
- if (ext_dict) {
- upb_inttable_uninit2(ext_dict, upb_msg_alloc(msg, l));
+void *upb_msg_uninit(upb_msg *msg, const upb_msglayout *l) {
+ if (l->data.extendable) {
+ upb_inttable *ext_dict = upb_msg_getinternalwithext(msg, l)->extdict;
+ if (ext_dict) {
+ upb_inttable_uninit2(ext_dict, upb_msg_alloc(msg));
+ upb_free(upb_msg_alloc(msg), ext_dict);
+ }
}
+
+ return VOIDPTR_AT(msg, -INTERNAL_MEMBERS_SIZE(l));
}
upb_msg *upb_msg_new(const upb_msglayout *l, upb_alloc *a) {
- upb_msg *msg = upb_malloc(a, upb_msg_sizeof(l));
-
- if (msg) {
- upb_msg_init(msg, l, a);
- }
-
- return msg;
+ void *mem = upb_malloc(a, upb_msg_sizeof(l));
+ return mem ? upb_msg_init(mem, l, a) : NULL;
}
void upb_msg_free(upb_msg *msg, const upb_msglayout *l) {
- upb_msg_uninit(msg, l);
- upb_free(upb_msg_alloc(msg, l), msg);
+ upb_free(upb_msg_alloc(msg), upb_msg_uninit(msg, l));
}
-upb_alloc *upb_msg_alloc(const upb_msg *msg, const upb_msglayout *l) {
- upb_alloc *alloc;
- UPB_UNUSED(l);
- memcpy(&alloc, msg, sizeof(alloc));
- return alloc;
+upb_alloc *upb_msg_alloc(const upb_msg *msg) {
+ return upb_msg_getinternal_const(msg)->alloc;
}
bool upb_msg_has(const upb_msg *msg,
@@ -743,7 +789,7 @@ bool upb_msg_has(const upb_msg *msg,
return upb_msg_getoneofint(msg, o, l) == upb_fielddef_number(f);
} else {
/* Other fields are set when their hasbit is set. */
- uint32_t hasbit = l->hasbits[upb_fielddef_index(f)];
+ uint32_t hasbit = l->data.fields[upb_fielddef_index(f)].hasbit;
return DEREF(msg, hasbit / 8, char) | (1 << (hasbit % 8));
}
}
@@ -761,7 +807,7 @@ upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f,
return upb_msgval_fromdefault(f);
}
} else {
- size_t ofs = l->field_offsets[upb_fielddef_index(f)];
+ size_t ofs = l->data.fields[upb_fielddef_index(f)].offset;
const upb_oneofdef *o = upb_fielddef_containingoneof(f);
upb_msgval ret;
@@ -780,10 +826,10 @@ bool upb_msg_set(upb_msg *msg,
const upb_fielddef *f,
upb_msgval val,
const upb_msglayout *l) {
- upb_alloc *a = upb_msg_alloc(msg, l);
upb_msg_checkfield(l, f);
if (upb_fielddef_isextension(f)) {
+ upb_alloc *a = upb_msg_alloc(msg);
/* TODO(haberman): introduce table API that can do this in one call. */
upb_inttable *ext = upb_msg_getextdict(msg, l, a);
upb_value val2 = upb_toval(val);
@@ -792,7 +838,7 @@ bool upb_msg_set(upb_msg *msg,
return false;
}
} else {
- size_t ofs = l->field_offsets[upb_fielddef_index(f)];
+ size_t ofs = l->data.fields[upb_fielddef_index(f)].offset;
const upb_oneofdef *o = upb_fielddef_containingoneof(f);
if (o) {
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback