From a105c015b106b6be31fc45fdd4c568684815801c Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Thu, 6 Sep 2018 23:16:58 -0700 Subject: Added support for unknown fields to upb_msg. After this CL, upb passes all existing proto3 conformance tests. However the conformance suite is missing a lot of cases and should be fleshed out. --- upb/decode.c | 8 ++++---- upb/encode.c | 8 ++++++++ upb/msg.c | 32 ++++++++++++++++++++++++++++++-- upb/msg.h | 3 +++ 4 files changed, 45 insertions(+), 6 deletions(-) (limited to 'upb') diff --git a/upb/decode.c b/upb/decode.c index cd13125..fc0e644 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -130,9 +130,7 @@ static void upb_set32(void *msg, size_t ofs, uint32_t val) { static bool upb_append_unknown(upb_decstate *d, upb_decframe *frame, const char *start) { - UPB_UNUSED(d); - UPB_UNUSED(frame); - UPB_UNUSED(start); + upb_msg_addunknown(frame->msg, start, d->ptr - start); return true; } @@ -545,7 +543,9 @@ static bool upb_decode_field(upb_decstate *d, upb_decframe *frame) { } } else { CHK(field_number != 0); - return upb_skip_unknownfielddata(d, frame, field_number, wire_type); + CHK(upb_skip_unknownfielddata(d, frame, field_number, wire_type)); + CHK(upb_append_unknown(d, frame, field_start)); + return true; } } diff --git a/upb/encode.c b/upb/encode.c index a9f2c0d..bff8262 100644 --- a/upb/encode.c +++ b/upb/encode.c @@ -331,6 +331,8 @@ bool upb_encode_message(upb_encstate *e, const char *msg, const upb_msglayout *m, size_t *size) { int i; size_t pre_len = e->limit - e->ptr; + const char *unknown; + size_t unknown_size; for (i = m->field_count - 1; i >= 0; i--) { const upb_msglayout_field *f = &m->fields[i]; @@ -357,6 +359,12 @@ bool upb_encode_message(upb_encstate *e, const char *msg, } } + unknown = upb_msg_getunknown(msg, &unknown_size); + + if (unknown) { + upb_put_bytes(e, unknown, unknown_size); + } + *size = (e->limit - e->ptr) - pre_len; return true; } diff --git a/upb/msg.c b/upb/msg.c index b70fe40..4125d4c 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -113,8 +113,12 @@ static upb_ctype_t upb_fieldtotabtype(upb_fieldtype_t type) { /* Used when a message is not extendable. */ typedef struct { - /* TODO(haberman): add unknown fields. */ + /* TODO(haberman): use pointer tagging so we we are slim when known unknown + * fields are not present. */ upb_arena *arena; + char *unknown; + size_t unknown_len; + size_t unknown_size; } upb_msg_internal; /* Used when a message is extendable. */ @@ -141,6 +145,25 @@ static upb_msg_internal_withext *upb_msg_getinternalwithext( return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext)); } +void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len) { + upb_msg_internal* in = upb_msg_getinternal(msg); + if (len > in->unknown_size - in->unknown_len) { + upb_alloc *alloc = upb_arena_alloc(in->arena); + size_t need = in->unknown_size + len; + size_t newsize = UPB_MAX(in->unknown_size * 2, need); + in->unknown = upb_realloc(alloc, in->unknown, in->unknown_size, newsize); + in->unknown_size = newsize; + } + memcpy(in->unknown + in->unknown_len, data, len); + in->unknown_len += len; +} + +const char *upb_msg_getunknown(upb_msg *msg, size_t *len) { + upb_msg_internal* in = upb_msg_getinternal(msg); + *len = in->unknown_len; + return in->unknown; +} + static const upb_msglayout_field *upb_msg_checkfield(int field_index, const upb_msglayout *l) { UPB_ASSERT(field_index >= 0 && field_index < l->field_count); @@ -165,6 +188,7 @@ static size_t upb_msg_sizeof(const upb_msglayout *l) { upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a) { upb_alloc *alloc = upb_arena_alloc(a); void *mem = upb_malloc(alloc, upb_msg_sizeof(l)); + upb_msg_internal *in; upb_msg *msg; if (!mem) { @@ -177,7 +201,11 @@ upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a) { memset(msg, 0, l->size); /* Initialize internal members. */ - upb_msg_getinternal(msg)->arena = a; + in = upb_msg_getinternal(msg); + in->arena = a; + in->unknown = NULL; + in->unknown_len = 0; + in->unknown_size = 0; if (l->extendable) { upb_msg_getinternalwithext(msg, l)->extdict = NULL; diff --git a/upb/msg.h b/upb/msg.h index 8236799..3de5bab 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -154,6 +154,9 @@ upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a); /* Returns the arena for the given message. */ upb_arena *upb_msg_arena(const upb_msg *msg); +void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len); +const char *upb_msg_getunknown(upb_msg *msg, size_t *len); + /* Read-only message API. Can be safely called by anyone. */ /* Returns the value associated with this field: -- cgit v1.2.3 From 950d7a9530701c52edd93569364934b7944a197c Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Thu, 6 Sep 2018 23:32:08 -0700 Subject: Fixed warnings. --- upb/msg.c | 4 ++-- upb/msg.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'upb') diff --git a/upb/msg.c b/upb/msg.c index 4125d4c..29ba2e1 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -158,8 +158,8 @@ void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len) { in->unknown_len += len; } -const char *upb_msg_getunknown(upb_msg *msg, size_t *len) { - upb_msg_internal* in = upb_msg_getinternal(msg); +const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) { + const upb_msg_internal* in = upb_msg_getinternal_const(msg); *len = in->unknown_len; return in->unknown; } diff --git a/upb/msg.h b/upb/msg.h index 3de5bab..c72e9f0 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -155,7 +155,7 @@ upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a); upb_arena *upb_msg_arena(const upb_msg *msg); void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len); -const char *upb_msg_getunknown(upb_msg *msg, size_t *len); +const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); /* Read-only message API. Can be safely called by anyone. */ -- cgit v1.2.3