From 1aafd4111b9b6d08d2d0937b0f396a4caa9ea04d Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Sat, 8 Jul 2017 00:00:05 -0700 Subject: A good start on upb_encode and upb_decode. --- tests/conformance_upb.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'tests') diff --git a/tests/conformance_upb.c b/tests/conformance_upb.c index 9f83e80..e1221b2 100644 --- a/tests/conformance_upb.c +++ b/tests/conformance_upb.c @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -52,7 +53,7 @@ void DoTest( if (!test_message) { /* TODO(haberman): return details. */ - static char msg[] = "Parse error (no more details available)."; + static const char msg[] = "Parse error (no more details available)."; conformance_ConformanceResponse_set_parse_error( response, upb_stringview_make(msg, sizeof(msg))); return; @@ -60,20 +61,20 @@ void DoTest( break; case conformance_ConformanceRequest_payload_json_payload: { - static char msg[] = "JSON support not yet implemented."; + static const char msg[] = "JSON support not yet implemented."; conformance_ConformanceResponse_set_skipped( response, upb_stringview_make(msg, sizeof(msg))); return; } case conformance_ConformanceRequest_payload_NOT_SET: - fprintf(stderr, "conformance_upb: Request didn't have payload."); - exit(1); + fprintf(stderr, "conformance_upb: Request didn't have payload.\n"); + return; } switch (conformance_ConformanceRequest_requested_output_format(request)) { case conformance_UNSPECIFIED: - fprintf(stderr, "conformance_upb: Unspecified output format."); + fprintf(stderr, "conformance_upb: Unspecified output format.\n"); exit(1); case conformance_PROTOBUF: { @@ -81,8 +82,10 @@ void DoTest( char *serialized = protobuf_test_messages_proto3_TestAllTypes_serialize( test_message, env, &serialized_len); if (!serialized) { - fprintf(stderr, "conformance_upb: Error serialiing."); - exit(1); + static const char msg[] = "Error serializing."; + conformance_ConformanceResponse_set_serialize_error( + response, upb_stringview_make(msg, sizeof(msg))); + return; } conformance_ConformanceResponse_set_protobuf_payload( response, upb_stringview_make(serialized, serialized_len)); @@ -90,14 +93,14 @@ void DoTest( } case conformance_JSON: { - static char msg[] = "JSON support not yet implemented."; + static const char msg[] = "JSON support not yet implemented."; conformance_ConformanceResponse_set_skipped( response, upb_stringview_make(msg, sizeof(msg))); break; } default: - fprintf(stderr, "conformance_upb: Unknown output format: %d", + fprintf(stderr, "conformance_upb: Unknown output format: %d\n", conformance_ConformanceRequest_requested_output_format(request)); exit(1); } @@ -111,7 +114,7 @@ bool DoTestIo() { char *serialized_input; char *serialized_output; uint32_t input_size; - size_t output_size; + size_t output_size = 0; conformance_ConformanceRequest *request; conformance_ConformanceResponse *response; -- cgit v1.2.3 From be9094d91a2da777002a0f713306ac1bb74a6ac5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 17 Jul 2017 21:54:38 +0200 Subject: New encode/decode: most (171 / 192) conformance tests pass. --- tests/conformance_upb_failures.txt | 21 ++ tools/make_c_api.lua | 6 +- upb/decode.c | 727 ++++++++++++++++++++++--------------- upb/encode.c | 88 ++--- upb/msg.c | 2 +- upb/msg.h | 7 +- upb/pb/varint.int.h | 14 +- upb/upb.h | 12 +- 8 files changed, 525 insertions(+), 352 deletions(-) create mode 100644 tests/conformance_upb_failures.txt (limited to 'tests') diff --git a/tests/conformance_upb_failures.txt b/tests/conformance_upb_failures.txt new file mode 100644 index 0000000..7ff834a --- /dev/null +++ b/tests/conformance_upb_failures.txt @@ -0,0 +1,21 @@ +Recommended.ProtobufInput.OneofZeroBool.ProtobufOutput +Recommended.ProtobufInput.OneofZeroBytes.ProtobufOutput +Recommended.ProtobufInput.OneofZeroDouble.ProtobufOutput +Recommended.ProtobufInput.OneofZeroEnum.ProtobufOutput +Recommended.ProtobufInput.OneofZeroFloat.ProtobufOutput +Recommended.ProtobufInput.OneofZeroString.ProtobufOutput +Recommended.ProtobufInput.OneofZeroUint32.ProtobufOutput +Recommended.ProtobufInput.OneofZeroUint64.ProtobufOutput +Required.ProtobufInput.PrematureEofInSubmessageValue.MESSAGE +Required.ProtobufInput.RepeatedScalarSelectsLast.BOOL.ProtobufOutput +Required.ProtobufInput.RepeatedScalarSelectsLast.FIXED32.ProtobufOutput +Required.ProtobufInput.RepeatedScalarSelectsLast.FIXED64.ProtobufOutput +Required.ProtobufInput.RepeatedScalarSelectsLast.UINT64.ProtobufOutput +Required.ProtobufInput.ValidDataRepeated.BOOL.ProtobufOutput +Required.ProtobufInput.ValidDataRepeated.INT32.ProtobufOutput +Required.ProtobufInput.ValidDataRepeated.INT64.ProtobufOutput +Required.ProtobufInput.ValidDataRepeated.SINT32.ProtobufOutput +Required.ProtobufInput.ValidDataRepeated.SINT64.ProtobufOutput +Required.ProtobufInput.ValidDataRepeated.UINT32.ProtobufOutput +Required.ProtobufInput.ValidDataRepeated.UINT64.ProtobufOutput +Required.ProtobufInput.ValidDataScalar.BOOL.ProtobufOutput diff --git a/tools/make_c_api.lua b/tools/make_c_api.lua index 9e6d734..cfcf38d 100644 --- a/tools/make_c_api.lua +++ b/tools/make_c_api.lua @@ -417,7 +417,7 @@ local function write_c_file(filedef, hfilename, append) append('static const upb_msglayout_fieldinit_v1 %s[%s] = {\n', fields_array_name, field_count) for _, field in ipairs(fields_number_order) do - local submsg_index = "-1" + local submsg_index = "UPB_NO_SUBMSG" local oneof_index = "UPB_NOT_IN_ONEOF" if field:type() == upb.TYPE_MESSAGE then submsg_index = submsg_indexes[field:subdef()] @@ -430,7 +430,7 @@ local function write_c_file(filedef, hfilename, append) field:number(), msgname, (field:containing_oneof() and field:containing_oneof():name()) or field:name(), - hasbit_indexes[field] or "-1", + hasbit_indexes[field] or "UPB_NO_HASBIT", oneof_index, submsg_index, field:descriptor_type(), @@ -448,7 +448,7 @@ local function write_c_file(filedef, hfilename, append) msgname, field_count, 0, -- TODO: oneof_count 'false', -- TODO: extendable - 'true' -- TODO: is_proto2 + msg:file():syntax() == upb.SYNTAX_PROTO2 ) append('};\n\n') diff --git a/upb/decode.c b/upb/decode.c index 7cffbe4..a1910a1 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -1,16 +1,32 @@ +#include "upb/upb.h" #include "upb/decode.h" #include "upb/structs.int.h" -typedef enum { - UPB_WIRE_TYPE_VARINT = 0, - UPB_WIRE_TYPE_64BIT = 1, - UPB_WIRE_TYPE_DELIMITED = 2, - UPB_WIRE_TYPE_START_GROUP = 3, - UPB_WIRE_TYPE_END_GROUP = 4, - UPB_WIRE_TYPE_32BIT = 5 -} upb_wiretype_t; - +/* Maps descriptor type -> upb field type. */ +static const uint8_t upb_desctype_to_fieldtype[] = { + UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */ + UPB_TYPE_DOUBLE, /* DOUBLE */ + UPB_TYPE_FLOAT, /* FLOAT */ + UPB_TYPE_INT64, /* INT64 */ + UPB_TYPE_UINT64, /* UINT64 */ + UPB_TYPE_INT32, /* INT32 */ + UPB_TYPE_UINT64, /* FIXED64 */ + UPB_TYPE_UINT32, /* FIXED32 */ + UPB_TYPE_BOOL, /* BOOL */ + UPB_TYPE_STRING, /* STRING */ + UPB_TYPE_MESSAGE, /* GROUP */ + UPB_TYPE_MESSAGE, /* MESSAGE */ + UPB_TYPE_BYTES, /* BYTES */ + UPB_TYPE_UINT32, /* UINT32 */ + UPB_TYPE_ENUM, /* ENUM */ + UPB_TYPE_INT32, /* SFIXED32 */ + UPB_TYPE_INT64, /* SFIXED64 */ + UPB_TYPE_INT32, /* SINT32 */ + UPB_TYPE_INT64, /* SINT64 */ +}; + +/* Data pertaining to the parse. */ typedef struct { upb_env *env; /* Current decoding pointer. Points to the beginning of a field until we @@ -18,20 +34,23 @@ typedef struct { const char *ptr; } upb_decstate; +/* Data pertaining to a single message frame. */ typedef struct { + const char *limit; int32_t group_number; /* 0 if we are not parsing a group. */ + + /* These members are unset for an unknown group frame. */ char *msg; const upb_msglayout_msginit_v1 *m; - const char *limit; } upb_decframe; #define CHK(x) if (!(x)) { return false; } -static void upb_decode_seterr(upb_env *env, const char *msg) { - upb_status status = UPB_STATUS_INIT; - upb_status_seterrmsg(&status, msg); - upb_env_reporterror(env, &status); -} +static bool upb_skip_unknowngroup(upb_decstate *d, int field_number, + const char *limit); +static bool upb_decode_message(upb_decstate *d, const char *limit, + int group_number, char *msg, + const upb_msglayout_msginit_v1 *l); static bool upb_decode_varint(const char **ptr, const char *limit, uint64_t *val) { @@ -41,10 +60,7 @@ static bool upb_decode_varint(const char **ptr, const char *limit, *val = 0; while (byte & 0x80) { - if (bitpos == 70 || p == limit) { - return false; - } - + CHK(bitpos < 70 && p < limit); byte = *p; *val |= (uint64_t)(byte & 0x7F) << bitpos; p++; @@ -58,47 +74,34 @@ static bool upb_decode_varint(const char **ptr, const char *limit, static bool upb_decode_varint32(const char **ptr, const char *limit, uint32_t *val) { uint64_t u64; - if (!upb_decode_varint(ptr, limit, &u64) || u64 > UINT32_MAX) { - return false; - } else { - *val = u64; - return true; - } -} - -static const upb_msglayout_fieldinit_v1 *upb_find_field( - const upb_msglayout_msginit_v1 *l, uint32_t field_number) { - /* Lots of optimization opportunities here. */ - int i; - for (i = 0; i < l->field_count; i++) { - if (l->fields[i].number == field_number) { - return &l->fields[i]; - } - } - - return NULL; /* Unknown field. */ + CHK(upb_decode_varint(ptr, limit, &u64) && u64 <= UINT32_MAX); + *val = u64; + return true; } static bool upb_decode_64bit(const char **ptr, const char *limit, uint64_t *val) { - if (limit - *ptr < 8) { - return false; - } else { - memcpy(val, *ptr, 8); - *ptr += 8; - return true; - } + CHK(limit - *ptr >= 8); + memcpy(val, *ptr, 8); + *ptr += 8; + return true; } static bool upb_decode_32bit(const char **ptr, const char *limit, uint32_t *val) { - if (limit - *ptr < 4) { - return false; - } else { - memcpy(val, *ptr, 4); - *ptr += 4; - return true; - } + CHK(limit - *ptr >= 4); + memcpy(val, *ptr, 4); + *ptr += 4; + return true; +} + +static bool upb_decode_tag(const char **ptr, const char *limit, + int *field_number, int *wire_type) { + uint32_t tag; + CHK(upb_decode_varint32(ptr, limit, &tag)); + *field_number = tag >> 3; + *wire_type = tag & 7; + return true; } static int32_t upb_zzdec_32(uint32_t n) { @@ -113,10 +116,7 @@ static bool upb_decode_string(const char **ptr, const char *limit, upb_stringview *val) { uint32_t len; - if (!upb_decode_varint32(ptr, limit, &len) || - limit - *ptr < len) { - return false; - } + CHK(upb_decode_varint32(ptr, limit, &len) && limit - *ptr >= len); *val = upb_stringview_make(*ptr, len); *ptr += len; @@ -127,277 +127,436 @@ static void upb_set32(void *msg, size_t ofs, uint32_t val) { memcpy((char*)msg + ofs, &val, sizeof(val)); } -static bool upb_append_unknownfield(const char **ptr, const char *start, - const char *limit, char *msg) { - UPB_UNUSED(limit); - UPB_UNUSED(msg); - *ptr = limit; +static bool upb_append_unknown(upb_decstate *d, upb_decframe *frame, + const char *start) { + UPB_UNUSED(d); + UPB_UNUSED(frame); + UPB_UNUSED(start); return true; } -static bool upb_decode_unknownfielddata(upb_decstate *d, const char *ptr, - const char *limit, char *msg, - const upb_msglayout_msginit_v1 *l) { - do { - switch (wire_type) { - case UPB_WIRE_TYPE_VARINT: - CHK(upb_decode_varint(&ptr, limit, &val)); - break; - case UPB_WIRE_TYPE_32BIT: - CHK(upb_decode_32bit(&ptr, limit, &val)); - break; - case UPB_WIRE_TYPE_64BIT: - CHK(upb_decode_64bit(&ptr, limit, &val)); - break; - case UPB_WIRE_TYPE_DELIMITED: { - upb_stringview val; - CHK(upb_decode_string(&ptr, limit, &val)); - } - case UPB_WIRE_TYPE_START_GROUP: - depth++; - continue; - case UPB_WIRE_TYPE_END_GROUP: - depth--; - continue; +static bool upb_skip_unknownfielddata(upb_decstate *d, upb_decframe *frame, + int field_number, int wire_type) { + switch (wire_type) { + case UPB_WIRE_TYPE_VARINT: { + uint64_t val; + return upb_decode_varint(&d->ptr, frame->limit, &val); + } + case UPB_WIRE_TYPE_32BIT: { + uint32_t val; + return upb_decode_32bit(&d->ptr, frame->limit, &val); + } + case UPB_WIRE_TYPE_64BIT: { + uint64_t val; + return upb_decode_64bit(&d->ptr, frame->limit, &val); } + case UPB_WIRE_TYPE_DELIMITED: { + upb_stringview val; + return upb_decode_string(&d->ptr, frame->limit, &val); + } + case UPB_WIRE_TYPE_START_GROUP: + return upb_skip_unknowngroup(d, field_number, frame->limit); + case UPB_WIRE_TYPE_END_GROUP: + CHK(field_number == frame->group_number); + frame->limit = d->ptr; + return true; + } + return false; +} - UPB_ASSERT(depth == 0); - upb_append_unknown(msg, l, d->ptr, ptr); - d->ptr = ptr; - return true; - } while (true); +static bool upb_array_grow(upb_array *arr, size_t elements) { + size_t needed = arr->len + elements; + size_t new_size = UPB_MAX(arr->size, 8); + size_t new_bytes; + size_t old_bytes; + void *new_data; + + while (new_size < needed) { + new_size *= 2; + } + + old_bytes = arr->len * arr->element_size; + new_bytes = new_size * arr->element_size; + new_data = upb_realloc(arr->alloc, arr->data, old_bytes, new_bytes); + CHK(new_data); + + arr->data = new_data; + arr->size = new_size; + return true; } -static bool upb_decode_knownfield(upb_decstate *d, const char *ptr, - const char *limit, char *msg, - const upb_msglayout_msginit_v1 *l, - const upb_msglayout_fieldinit_v1 *l) { - switch (wire_type) { - case UPB_WIRE_TYPE_VARINT: - return upb_decode_varintfield(d, ptr, limit, msg, l, f); - case UPB_WIRE_TYPE_32BIT: - return upb_decode_32bitfield(d, ptr, limit, msg, l, f); - case UPB_WIRE_TYPE_64BIT: - return upb_decode_64bitfield(d, ptr, limit, msg, l, f); - case UPB_WIRE_TYPE_DELIMITED: - return upb_decode_delimitedfield(d, ptr, limit, msg, l, f); - case UPB_WIRE_TYPE_START_GROUP: - case UPB_WIRE_TYPE_END_GROUP: +static void *upb_array_reserve(upb_array *arr, size_t elements) { + if (arr->size - arr->len < elements) { + CHK(upb_array_grow(arr, elements)); } + return (char*)arr->data + (arr->len * arr->element_size); } +static void *upb_array_add(upb_array *arr, size_t elements) { + void *ret = upb_array_reserve(arr, elements); + arr->len += elements; + return ret; +} -static bool upb_decode_field(upb_decstate *d, const char *limit, char *msg, - const upb_msglayout_msginit_v1 *l) { - uint32_t tag; - uint32_t wire_type; - uint32_t field_number; - const char *ptr = d->ptr; - const upb_msglayout_fieldinit_v1 *f; - - if (!upb_decode_varint32(&ptr, limit, &tag)) { - upb_decode_seterr(env, "Error decoding tag.\n"); - return false; +static upb_array *upb_getarr(upb_decframe *frame, + const upb_msglayout_fieldinit_v1 *field) { + UPB_ASSERT(field->label == UPB_LABEL_REPEATED); + return *(upb_array**)&frame->msg[field->offset]; +} + +static upb_array *upb_getorcreatearr(upb_decstate *d, + upb_decframe *frame, + const upb_msglayout_fieldinit_v1 *field) { + upb_array *arr = upb_getarr(frame, field); + + if (!arr) { + arr = upb_env_malloc(d->env, sizeof(*arr)); + if (!arr) { + return NULL; + } + upb_array_init(arr, upb_desctype_to_fieldtype[field->type], + upb_arena_alloc(upb_env_arena(d->env))); + *(upb_array**)&frame->msg[field->offset] = arr; } - wire_type = tag & 0x7; - field_number = tag >> 3; + return arr; +} + +static void upb_sethasbit(upb_decframe *frame, + const upb_msglayout_fieldinit_v1 *field) { + UPB_ASSERT(field->hasbit != UPB_NO_HASBIT); + frame->msg[field->hasbit / 8] |= (1 << (field->hasbit % 8)); +} + +static void upb_setoneofcase(upb_decframe *frame, + const upb_msglayout_fieldinit_v1 *field) { + UPB_ASSERT(field->oneof_index != UPB_NOT_IN_ONEOF); + upb_set32(frame->msg, frame->m->oneofs[field->oneof_index].case_offset, + field->number); +} + +static char *upb_decode_prepareslot(upb_decstate *d, + upb_decframe *frame, + const upb_msglayout_fieldinit_v1 *field) { + char *field_mem = frame->msg + field->offset; + upb_array *arr; - if (field_number == 0) { - return false; + if (field->label == UPB_LABEL_REPEATED) { + arr = upb_getorcreatearr(d, frame, field); + field_mem = upb_array_reserve(arr, 1); } - f = upb_find_field(l, field_number); + return field_mem; +} - if (f) { - return upb_decode_knownfield(d, ptr, limit, msg, l, f); - } else { - return upb_decode_unknownfield(d, ptr, limit, msg, l); +static void upb_decode_setpresent(upb_decframe *frame, + const upb_msglayout_fieldinit_v1 *field) { + if (field->label == UPB_LABEL_REPEATED) { + upb_array *arr = upb_getarr(frame, field); + UPB_ASSERT(arr->len < arr->size); + arr->len++; + } else if (field->oneof_index != UPB_NOT_IN_ONEOF) { + upb_setoneofcase(frame, field); + } else if (field->hasbit != UPB_NO_HASBIT) { + upb_sethasbit(frame, field); } +} - if (f->label == UPB_LABEL_REPEATED) { - arr = upb_getarray(msg, f, env); +static bool upb_decode_submsg(upb_decstate *d, + upb_decframe *frame, + const char *limit, + const upb_msglayout_fieldinit_v1 *field, + int group_number) { + char *submsg = *(void**)&frame->msg[field->offset]; + const upb_msglayout_msginit_v1 *subm; + + UPB_ASSERT(field->submsg_index != UPB_NO_SUBMSG); + subm = frame->m->submsgs[field->submsg_index]; + UPB_ASSERT(subm); + + if (!submsg) { + submsg = upb_env_malloc(d->env, upb_msg_sizeof((upb_msglayout *)subm)); + CHK(submsg); + submsg = upb_msg_init( + submsg, (upb_msglayout*)subm, upb_arena_alloc(upb_env_arena(d->env))); + *(void**)&frame->msg[field->offset] = submsg; } - switch (wire_type) { - case UPB_WIRE_TYPE_VARINT: { - uint64_t val; - if (!upb_decode_varint(&ptr, limit, &val)) { - upb_decode_seterr(env, "Error decoding varint value.\n"); - return false; - } + upb_decode_message(d, limit, group_number, submsg, subm); - if (!f) { - return upb_append_unknown(ptr, field_start, ptr, msg); - } + return true; +} - if (f->label == UPB_LABEL_REPEATED) { - upb_array *arr = upb_getarray(msg, f, env); - switch (f->type) { - case UPB_DESCRIPTOR_TYPE_INT64: - case UPB_DESCRIPTOR_TYPE_UINT64: - memcpy(arr->data, &val, sizeof(val)); - arr->len++; - break; - case UPB_DESCRIPTOR_TYPE_INT32: - case UPB_DESCRIPTOR_TYPE_UINT32: - case UPB_DESCRIPTOR_TYPE_ENUM: { - uint32_t val32 = val; - memcpy(arr->data, &val32, sizeof(val32)); - arr->len++; - break; - } - case UPB_DESCRIPTOR_TYPE_SINT32: { - int32_t decoded = upb_zzdec_32(val); - memcpy(arr->data, &decoded, sizeof(decoded)); - arr->len++; - break; - } - case UPB_DESCRIPTOR_TYPE_SINT64: { - int64_t decoded = upb_zzdec_64(val); - memcpy(arr->data, &decoded, sizeof(decoded)); - arr->len++; - break; - } - default: - return upb_append_unknown(ptr, field_start, ptr, msg); - } - } else { - switch (f->type) { - case UPB_DESCRIPTOR_TYPE_INT64: - case UPB_DESCRIPTOR_TYPE_UINT64: - memcpy(msg + f->offset, &val, sizeof(val)); - break; - case UPB_DESCRIPTOR_TYPE_INT32: - case UPB_DESCRIPTOR_TYPE_UINT32: - case UPB_DESCRIPTOR_TYPE_ENUM: { - uint32_t val32 = val; - memcpy(msg + f->offset, &val32, sizeof(val32)); - break; - } - case UPB_DESCRIPTOR_TYPE_SINT32: { - int32_t decoded = upb_zzdec_32(val); - memcpy(msg + f->offset, &decoded, sizeof(decoded)); - break; - } - case UPB_DESCRIPTOR_TYPE_SINT64: { - int64_t decoded = upb_zzdec_64(val); - memcpy(msg + f->offset, &decoded, sizeof(decoded)); - break; - } - default: - return upb_append_unknown(ptr, field_start, ptr, msg); - } - } +static bool upb_decode_varintfield(upb_decstate *d, upb_decframe *frame, + const char *field_start, + const upb_msglayout_fieldinit_v1 *field) { + uint64_t val; + void *field_mem; + field_mem = upb_decode_prepareslot(d, frame, field); + CHK(field_mem); + CHK(upb_decode_varint(&d->ptr, frame->limit, &val)); + + switch ((upb_descriptortype_t)field->type) { + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_UINT64: + memcpy(field_mem, &val, sizeof(val)); + break; + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_ENUM: { + uint32_t val32 = val; + memcpy(field_mem, &val32, sizeof(val32)); break; } - case UPB_WIRE_TYPE_64BIT: { - uint64_t val; - if (!upb_decode_64bit(&ptr, limit, &val)) { - upb_decode_seterr(env, "Error decoding 64bit value.\n"); - return false; - } + case UPB_DESCRIPTOR_TYPE_SINT32: { + int32_t decoded = upb_zzdec_32(val); + memcpy(field_mem, &decoded, sizeof(decoded)); + break; + } + case UPB_DESCRIPTOR_TYPE_SINT64: { + int64_t decoded = upb_zzdec_64(val); + memcpy(field_mem, &decoded, sizeof(decoded)); + break; + } + default: + return upb_append_unknown(d, frame, field_start); + } - if (!f) { - return upb_append_unknown(ptr, field_start, ptr, msg); - } + upb_decode_setpresent(frame, field); + return true; +} - switch (f->type) { - case UPB_DESCRIPTOR_TYPE_DOUBLE: - case UPB_DESCRIPTOR_TYPE_FIXED64: - case UPB_DESCRIPTOR_TYPE_SFIXED64: - memcpy(msg + f->offset, &val, sizeof(val)); - default: - return upb_append_unknown(ptr, field_start, ptr, msg); - } +static bool upb_decode_64bitfield(upb_decstate *d, upb_decframe *frame, + const char *field_start, + const upb_msglayout_fieldinit_v1 *field) { + void *field_mem; + uint64_t val; + + field_mem = upb_decode_prepareslot(d, frame, field); + CHK(field_mem); + CHK(upb_decode_64bit(&d->ptr, frame->limit, &val)); + + switch ((upb_descriptortype_t)field->type) { + case UPB_DESCRIPTOR_TYPE_DOUBLE: + case UPB_DESCRIPTOR_TYPE_FIXED64: + case UPB_DESCRIPTOR_TYPE_SFIXED64: + memcpy(field_mem, &val, sizeof(val)); + break; + default: + return upb_append_unknown(d, frame, field_start); + } + upb_decode_setpresent(frame, field); + return true; +} + +static bool upb_decode_32bitfield(upb_decstate *d, upb_decframe *frame, + const char *field_start, + const upb_msglayout_fieldinit_v1 *field) { + void *field_mem; + uint32_t val; + + field_mem = upb_decode_prepareslot(d, frame, field); + CHK(field_mem); + CHK(upb_decode_32bit(&d->ptr, frame->limit, &val)); + + switch ((upb_descriptortype_t)field->type) { + case UPB_DESCRIPTOR_TYPE_FLOAT: + case UPB_DESCRIPTOR_TYPE_FIXED32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + memcpy(field_mem, &val, sizeof(val)); break; + default: + return upb_append_unknown(d, frame, field_start); + } + + upb_decode_setpresent(frame, field); + return true; +} + +static bool upb_decode_fixedpacked(upb_array *arr, upb_stringview data, + int elem_size) { + int elements = data.size / elem_size; + void *field_mem; + + CHK((data.size % elem_size) == 0); + field_mem = upb_array_add(arr, elements); + CHK(field_mem); + memcpy(field_mem, data.data, data.size); + return true; +} + +static bool upb_decode_toarray(upb_decstate *d, upb_decframe *frame, + const char *field_start, + const upb_msglayout_fieldinit_v1 *field, + upb_stringview val) { + upb_array *arr = upb_getorcreatearr(d, frame, field); + +#define VARINT_CASE(ctype, decode) { \ + const char *ptr = val.data; \ + const char *limit = ptr + val.size; \ + while (ptr < limit) { \ + uint64_t val; \ + void *field_mem; \ + ctype decoded; \ + CHK(upb_decode_varint(&ptr, limit, &val)); \ + decoded = (decode)(val); \ + field_mem = upb_array_add(arr, 1); \ + CHK(field_mem); \ + memcpy(field_mem, &decoded, sizeof(ctype)); \ + } \ + return true; \ +} + + switch ((upb_descriptortype_t)field->type) { + case UPB_DESCRIPTOR_TYPE_STRING: + case UPB_DESCRIPTOR_TYPE_BYTES: { + void *field_mem = upb_array_add(arr, 1); + CHK(field_mem); + memcpy(field_mem, &val, sizeof(val)); + return true; } - case UPB_WIRE_TYPE_32BIT: { - uint32_t val; - if (!upb_decode_32bit(&ptr, limit, &val)) { - upb_decode_seterr(env, "Error decoding 32bit value.\n"); - return false; - } + case UPB_DESCRIPTOR_TYPE_FLOAT: + case UPB_DESCRIPTOR_TYPE_FIXED32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + return upb_decode_fixedpacked(arr, val, sizeof(int32_t)); + case UPB_DESCRIPTOR_TYPE_DOUBLE: + case UPB_DESCRIPTOR_TYPE_FIXED64: + case UPB_DESCRIPTOR_TYPE_SFIXED64: + return upb_decode_fixedpacked(arr, val, sizeof(int64_t)); + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_ENUM: + /* TODO: proto2 enum field that isn't in the enum. */ + VARINT_CASE(uint32_t, uint32_t); + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_UINT64: + VARINT_CASE(uint64_t, uint64_t); + case UPB_DESCRIPTOR_TYPE_BOOL: + VARINT_CASE(bool, bool); + case UPB_DESCRIPTOR_TYPE_SINT32: + VARINT_CASE(int32_t, upb_zzdec_32); + case UPB_DESCRIPTOR_TYPE_SINT64: + VARINT_CASE(int64_t, upb_zzdec_64); + case UPB_DESCRIPTOR_TYPE_MESSAGE: + CHK(val.size <= (size_t)(frame->limit - val.data)); + return upb_decode_submsg(d, frame, val.data + val.size, field, 0); + case UPB_DESCRIPTOR_TYPE_GROUP: + return upb_append_unknown(d, frame, field_start); + } +#undef VARINT_CASE +} - if (!f) { - return upb_append_unknown(ptr, field_start, ptr, msg); - } +static bool upb_decode_delimitedfield(upb_decstate *d, upb_decframe *frame, + const char *field_start, + const upb_msglayout_fieldinit_v1 *field) { + upb_stringview val; + + CHK(upb_decode_string(&d->ptr, frame->limit, &val)); - switch (f->type) { - case UPB_DESCRIPTOR_TYPE_FLOAT: - case UPB_DESCRIPTOR_TYPE_FIXED32: - case UPB_DESCRIPTOR_TYPE_SFIXED32: - memcpy(msg + f->offset, &val, sizeof(val)); - default: - return upb_append_unknown(ptr, field_start, ptr, msg); + if (field->label == UPB_LABEL_REPEATED) { + return upb_decode_toarray(d, frame, field_start, field, val); + } else { + switch ((upb_descriptortype_t)field->type) { + case UPB_DESCRIPTOR_TYPE_STRING: + case UPB_DESCRIPTOR_TYPE_BYTES: { + void *field_mem = upb_decode_prepareslot(d, frame, field); + CHK(field_mem); + memcpy(field_mem, &val, sizeof(val)); + break; } + case UPB_DESCRIPTOR_TYPE_MESSAGE: + CHK(val.size <= (size_t)(frame->limit - val.data)); + CHK(upb_decode_submsg(d, frame, val.data + val.size, field, 0)); + break; + default: + /* TODO(haberman): should we accept the last element of a packed? */ + return upb_append_unknown(d, frame, field_start); + } + upb_decode_setpresent(frame, field); + return true; + } +} - break; +static const upb_msglayout_fieldinit_v1 *upb_find_field( + const upb_msglayout_msginit_v1 *l, uint32_t field_number) { + /* Lots of optimization opportunities here. */ + int i; + for (i = 0; i < l->field_count; i++) { + if (l->fields[i].number == field_number) { + return &l->fields[i]; } - case UPB_WIRE_TYPE_DELIMITED: { - upb_stringview val; - if (!upb_decode_string(&ptr, limit, &val)) { - upb_decode_seterr(env, "Error decoding delimited value.\n"); - return false; - } + } - if (!f) { - return upb_append_unknown(ptr, field_start, ptr, msg); - } + return NULL; /* Unknown field. */ +} - switch (f->type) { - case UPB_DESCRIPTOR_TYPE_STRING: - case UPB_DESCRIPTOR_TYPE_BYTES: - memcpy(msg + f->offset, &val, sizeof(val)); - break; - case UPB_DESCRIPTOR_TYPE_INT64: - case UPB_DESCRIPTOR_TYPE_UINT64: { - memcpy(msg + f->offset, &val, sizeof(val)); - break; - case UPB_DESCRIPTOR_TYPE_INT32: - case UPB_DESCRIPTOR_TYPE_UINT32: - case UPB_DESCRIPTOR_TYPE_ENUM: { - uint32_t val32 = val; - memcpy(msg + f->offset, &val32, sizeof(val32)); - break; - } - case UPB_DESCRIPTOR_TYPE_SINT32: { - int32_t decoded = upb_zzdec_32(val); - memcpy(msg + f->offset, &decoded, sizeof(decoded)); - break; - } - case UPB_DESCRIPTOR_TYPE_SINT64: - case UPB_DESCRIPTOR_TYPE_FLOAT: - case UPB_DESCRIPTOR_TYPE_FIXED32: - case UPB_DESCRIPTOR_TYPE_SFIXED32: - /* - case UPB_DESCRIPTOR_TYPE_MESSAGE: { - upb_decode_message(val, - } - */ - default: - return upb_append_unknown(ptr, field_start, ptr, msg); - } +static bool upb_decode_field(upb_decstate *d, upb_decframe *frame) { + int field_number; + int wire_type; + const char *field_start = d->ptr; + const upb_msglayout_fieldinit_v1 *field; - break; + CHK(upb_decode_tag(&d->ptr, frame->limit, &field_number, &wire_type)); + field = upb_find_field(frame->m, field_number); + + if (field) { + switch (wire_type) { + case UPB_WIRE_TYPE_VARINT: + return upb_decode_varintfield(d, frame, field_start, field); + case UPB_WIRE_TYPE_32BIT: + return upb_decode_32bitfield(d, frame, field_start, field); + case UPB_WIRE_TYPE_64BIT: + return upb_decode_64bitfield(d, frame, field_start, field); + case UPB_WIRE_TYPE_DELIMITED: + return upb_decode_delimitedfield(d, frame, field_start, field); + case UPB_WIRE_TYPE_START_GROUP: + CHK(field->type == UPB_DESCRIPTOR_TYPE_GROUP); + return upb_decode_submsg(d, frame, frame->limit, field, field_number); + case UPB_WIRE_TYPE_END_GROUP: + CHK(frame->group_number == field_number) + frame->limit = d->ptr; + return true; + default: + return false; } + } else { + CHK(field_number != 0); + return upb_skip_unknownfielddata(d, frame, field_number, wire_type); } +} + +static bool upb_skip_unknowngroup(upb_decstate *d, int field_number, + const char *limit) { + upb_decframe frame; + frame.msg = NULL; + frame.m = NULL; + frame.group_number = field_number; + frame.limit = limit; - if (f->oneof_index != UPB_NOT_IN_ONEOF) { - upb_set32(msg, l->oneofs[f->oneof_index].case_offset, f->number); + while (d->ptr < frame.limit) { + int wire_type; + int field_number; + + CHK(upb_decode_tag(&d->ptr, frame.limit, &field_number, &wire_type)); + CHK(upb_skip_unknownfielddata(d, &frame, field_number, wire_type)); } - d->ptr = ptr; return true; } -static bool upb_decode_message(upb_decstate *d, upb_decframe *frame) { - while (d->ptr < frame->limit) { - if (!upb_decode_field(d, frame)) { - return false; - } +static bool upb_decode_message(upb_decstate *d, const char *limit, + int group_number, char *msg, + const upb_msglayout_msginit_v1 *l) { + upb_decframe frame; + frame.group_number = group_number; + frame.limit = limit; + frame.msg = msg; + frame.m = l; + + while (d->ptr < frame.limit) { + CHK(upb_decode_field(d, &frame)); } return true; @@ -409,11 +568,5 @@ bool upb_decode(upb_stringview buf, void *msg, state.ptr = buf.data; state.env = env; - upb_decframe frame; - frame.msg = msg; - frame.l = l; - frame.group_number = 0; - frame.limit = buf.data + buf.size - - return upb_decode_message(&state, &frame); + return upb_decode_message(&state, buf.data + buf.size, 0, msg, l); } diff --git a/upb/encode.c b/upb/encode.c index 2fe1cc3..ced971e 100644 --- a/upb/encode.c +++ b/upb/encode.c @@ -1,10 +1,35 @@ +/* We encode backwards, to avoid pre-computing lengths (one-pass encode). */ +#include "upb/upb.h" #include "upb/encode.h" #include "upb/structs.int.h" #define UPB_PB_VARINT_MAX_LEN 10 #define CHK(x) do { if (!(x)) { return false; } } while(0) +/* Maps descriptor type -> upb field type. */ +static const uint8_t upb_desctype_to_fieldtype[] = { + UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */ + UPB_TYPE_DOUBLE, /* DOUBLE */ + UPB_TYPE_FLOAT, /* FLOAT */ + UPB_TYPE_INT64, /* INT64 */ + UPB_TYPE_UINT64, /* UINT64 */ + UPB_TYPE_INT32, /* INT32 */ + UPB_TYPE_UINT64, /* FIXED64 */ + UPB_TYPE_UINT32, /* FIXED32 */ + UPB_TYPE_BOOL, /* BOOL */ + UPB_TYPE_STRING, /* STRING */ + UPB_TYPE_MESSAGE, /* GROUP */ + UPB_TYPE_MESSAGE, /* MESSAGE */ + UPB_TYPE_BYTES, /* BYTES */ + UPB_TYPE_UINT32, /* UINT32 */ + UPB_TYPE_ENUM, /* ENUM */ + UPB_TYPE_INT32, /* SFIXED32 */ + UPB_TYPE_INT64, /* SFIXED64 */ + UPB_TYPE_INT32, /* SINT32 */ + UPB_TYPE_INT64, /* SINT64 */ +}; + static size_t upb_encode_varint(uint64_t val, char *buf) { size_t i; if (val == 0) { buf[0] = 0; return 1; } @@ -21,38 +46,6 @@ static size_t upb_encode_varint(uint64_t val, char *buf) { static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } -typedef enum { - UPB_WIRE_TYPE_VARINT = 0, - UPB_WIRE_TYPE_64BIT = 1, - UPB_WIRE_TYPE_DELIMITED = 2, - UPB_WIRE_TYPE_START_GROUP = 3, - UPB_WIRE_TYPE_END_GROUP = 4, - UPB_WIRE_TYPE_32BIT = 5 -} upb_wiretype_t; - -/* Index is descriptor type. */ -const uint8_t upb_native_wiretypes[] = { - UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */ - UPB_WIRE_TYPE_64BIT, /* DOUBLE */ - UPB_WIRE_TYPE_32BIT, /* FLOAT */ - UPB_WIRE_TYPE_VARINT, /* INT64 */ - UPB_WIRE_TYPE_VARINT, /* UINT64 */ - UPB_WIRE_TYPE_VARINT, /* INT32 */ - UPB_WIRE_TYPE_64BIT, /* FIXED64 */ - UPB_WIRE_TYPE_32BIT, /* FIXED32 */ - UPB_WIRE_TYPE_VARINT, /* BOOL */ - UPB_WIRE_TYPE_DELIMITED, /* STRING */ - UPB_WIRE_TYPE_START_GROUP, /* GROUP */ - UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */ - UPB_WIRE_TYPE_DELIMITED, /* BYTES */ - UPB_WIRE_TYPE_VARINT, /* UINT32 */ - UPB_WIRE_TYPE_VARINT, /* ENUM */ - UPB_WIRE_TYPE_32BIT, /* SFIXED32 */ - UPB_WIRE_TYPE_64BIT, /* SFIXED64 */ - UPB_WIRE_TYPE_VARINT, /* SINT32 */ - UPB_WIRE_TYPE_VARINT, /* SINT64 */ -}; - typedef struct { upb_env *env; char *buf, *ptr, *limit; @@ -165,21 +158,24 @@ static bool upb_encode_array(upb_encstate *e, const char *field_mem, const upb_msglayout_fieldinit_v1 *f) { const upb_array *arr = *(const upb_array**)field_mem; - if (arr->len == 0) { + if (arr == NULL || arr->len == 0) { return true; } -#define VARINT_CASE(ctype, encode) do { \ - uint64_t *start = arr->data; \ - uint64_t *ptr = start + arr->len; \ + UPB_ASSERT(arr->type == upb_desctype_to_fieldtype[f->type]); + +#define VARINT_CASE(ctype, encode) { \ + ctype *start = arr->data; \ + ctype *ptr = start + arr->len; \ char *buf_ptr = e->ptr; \ do { \ ptr--; \ CHK(upb_put_varint(e, encode)); \ } while (ptr != start); \ CHK(upb_put_varint(e, buf_ptr - e->ptr)); \ - break; \ -} while(0) +} \ +break; \ +do { ; } while(0) switch (f->type) { case UPB_DESCRIPTOR_TYPE_DOUBLE: @@ -352,7 +348,7 @@ bool upb_encode_message(upb_encstate* e, const char *msg, const upb_msglayout_fieldinit_v1 *f = &m->fields[i]; if (f->label == UPB_LABEL_REPEATED) { - CHK(upb_encode_array(e, msg, m, f)); + CHK(upb_encode_array(e, msg + f->offset, m, f)); } else { if (upb_encode_hasscalarfield(msg, m, f)) { CHK(upb_encode_scalarfield(e, msg + f->offset, m, f, !m->is_proto2)); @@ -372,10 +368,14 @@ char *upb_encode(const void *msg, const upb_msglayout_msginit_v1 *m, e.limit = NULL; e.ptr = NULL; - if (!upb_encode_message(&e, msg, m, size)) { - return false; - } - + CHK(upb_encode_message(&e, msg, m, size)); *size = e.limit - e.ptr; - return e.ptr; + + if (*size == 0) { + static char ch; + return &ch; + } else { + UPB_ASSERT(e.ptr); + return e.ptr; + } } diff --git a/upb/msg.c b/upb/msg.c index f0a4201..b3b0420 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -68,7 +68,7 @@ static size_t upb_msgval_sizeof(upb_fieldtype_t type) { case UPB_TYPE_MESSAGE: return sizeof(void*); case UPB_TYPE_STRING: - return sizeof(char*) + sizeof(size_t); + return sizeof(upb_stringview); } UPB_UNREACHABLE(); } diff --git a/upb/msg.h b/upb/msg.h index 71df7f1..3579a3f 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -388,13 +388,14 @@ bool upb_msg_getscalarhandlerdata(const upb_handlers *h, #define UPB_NOT_IN_ONEOF UINT16_MAX #define UPB_NO_HASBIT UINT16_MAX +#define UPB_NO_SUBMSG UINT16_MAX typedef struct { uint32_t number; uint32_t offset; /* If in a oneof, offset of default in default_msg below. */ - uint16_t hasbit; - uint16_t oneof_index; /* UPB_NOT_IN_ONEOF if not in a oneof. */ - uint16_t submsg_index; + uint16_t hasbit; /* UPB_NO_HASBIT if no hasbit. */ + uint16_t oneof_index; /* UPB_NOT_IN_ONEOF if not in a oneof. */ + uint16_t submsg_index; /* UPB_NO_SUBMSG if no submsg. */ uint8_t type; uint8_t label; } upb_msglayout_fieldinit_v1; diff --git a/upb/pb/varint.int.h b/upb/pb/varint.int.h index 3ef84fb..9c54311 100644 --- a/upb/pb/varint.int.h +++ b/upb/pb/varint.int.h @@ -15,21 +15,9 @@ extern "C" { #endif -/* A list of types as they are encoded on-the-wire. */ -typedef enum { - UPB_WIRE_TYPE_VARINT = 0, - UPB_WIRE_TYPE_64BIT = 1, - UPB_WIRE_TYPE_DELIMITED = 2, - UPB_WIRE_TYPE_START_GROUP = 3, - UPB_WIRE_TYPE_END_GROUP = 4, - UPB_WIRE_TYPE_32BIT = 5 -} upb_wiretype_t; - #define UPB_MAX_WIRE_TYPE 5 -/* The maximum number of bytes that it takes to encode a 64-bit varint. - * Note that with a better encoding this could be 9 (TODO: write up a - * wiki document about this). */ +/* The maximum number of bytes that it takes to encode a 64-bit varint. */ #define UPB_PB_VARINT_MAX_LEN 10 /* Array of the "native" (ie. non-packed-repeated) wire type for the given a diff --git a/upb/upb.h b/upb/upb.h index 19cd02c..2d6db8e 100644 --- a/upb/upb.h +++ b/upb/upb.h @@ -298,6 +298,16 @@ class PointerBase2 : public PointerBase { #endif +/* A list of types as they are encoded on-the-wire. */ +typedef enum { + UPB_WIRE_TYPE_VARINT = 0, + UPB_WIRE_TYPE_64BIT = 1, + UPB_WIRE_TYPE_DELIMITED = 2, + UPB_WIRE_TYPE_START_GROUP = 3, + UPB_WIRE_TYPE_END_GROUP = 4, + UPB_WIRE_TYPE_32BIT = 5 +} upb_wiretype_t; + /* upb::ErrorSpace ************************************************************/ @@ -626,7 +636,7 @@ void upb_env_uninit(upb_env *e); void upb_env_initonly(upb_env *e); -upb_arena *upb_env_arena(upb_env *e); +UPB_INLINE upb_arena *upb_env_arena(upb_env *e) { return (upb_arena*)e; } bool upb_env_ok(const upb_env *e); void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, void *ud); -- cgit v1.2.3 From b697882fb272c2f3408f5c821a88aaf3e3db6f52 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 18 Jul 2017 01:04:48 +0200 Subject: Fixed varint length when buffer is reallocated. --- tests/conformance_upb_failures.txt | 6 ------ upb/encode.c | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'tests') diff --git a/tests/conformance_upb_failures.txt b/tests/conformance_upb_failures.txt index 7ff834a..33c013d 100644 --- a/tests/conformance_upb_failures.txt +++ b/tests/conformance_upb_failures.txt @@ -12,10 +12,4 @@ Required.ProtobufInput.RepeatedScalarSelectsLast.FIXED32.ProtobufOutput Required.ProtobufInput.RepeatedScalarSelectsLast.FIXED64.ProtobufOutput Required.ProtobufInput.RepeatedScalarSelectsLast.UINT64.ProtobufOutput Required.ProtobufInput.ValidDataRepeated.BOOL.ProtobufOutput -Required.ProtobufInput.ValidDataRepeated.INT32.ProtobufOutput -Required.ProtobufInput.ValidDataRepeated.INT64.ProtobufOutput -Required.ProtobufInput.ValidDataRepeated.SINT32.ProtobufOutput -Required.ProtobufInput.ValidDataRepeated.SINT64.ProtobufOutput -Required.ProtobufInput.ValidDataRepeated.UINT32.ProtobufOutput -Required.ProtobufInput.ValidDataRepeated.UINT64.ProtobufOutput Required.ProtobufInput.ValidDataScalar.BOOL.ProtobufOutput diff --git a/upb/encode.c b/upb/encode.c index ced971e..8a220ef 100644 --- a/upb/encode.c +++ b/upb/encode.c @@ -167,12 +167,12 @@ static bool upb_encode_array(upb_encstate *e, const char *field_mem, #define VARINT_CASE(ctype, encode) { \ ctype *start = arr->data; \ ctype *ptr = start + arr->len; \ - char *buf_ptr = e->ptr; \ + size_t pre_len = e->limit - e->ptr; \ do { \ ptr--; \ CHK(upb_put_varint(e, encode)); \ } while (ptr != start); \ - CHK(upb_put_varint(e, buf_ptr - e->ptr)); \ + CHK(upb_put_varint(e, e->limit - e->ptr - pre_len)); \ } \ break; \ do { ; } while(0) -- cgit v1.2.3 From 15308afff2d0d288b73c1b4278bd28f926ce02b8 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 18 Jul 2017 01:58:03 +0200 Subject: Fixes for oneof conformance tests. --- tests/conformance_upb_failures.txt | 14 -------------- upb/decode.c | 5 +++++ upb/encode.c | 13 +++++++++---- 3 files changed, 14 insertions(+), 18 deletions(-) (limited to 'tests') diff --git a/tests/conformance_upb_failures.txt b/tests/conformance_upb_failures.txt index 33c013d..05aab58 100644 --- a/tests/conformance_upb_failures.txt +++ b/tests/conformance_upb_failures.txt @@ -1,15 +1 @@ -Recommended.ProtobufInput.OneofZeroBool.ProtobufOutput -Recommended.ProtobufInput.OneofZeroBytes.ProtobufOutput -Recommended.ProtobufInput.OneofZeroDouble.ProtobufOutput -Recommended.ProtobufInput.OneofZeroEnum.ProtobufOutput -Recommended.ProtobufInput.OneofZeroFloat.ProtobufOutput -Recommended.ProtobufInput.OneofZeroString.ProtobufOutput -Recommended.ProtobufInput.OneofZeroUint32.ProtobufOutput -Recommended.ProtobufInput.OneofZeroUint64.ProtobufOutput Required.ProtobufInput.PrematureEofInSubmessageValue.MESSAGE -Required.ProtobufInput.RepeatedScalarSelectsLast.BOOL.ProtobufOutput -Required.ProtobufInput.RepeatedScalarSelectsLast.FIXED32.ProtobufOutput -Required.ProtobufInput.RepeatedScalarSelectsLast.FIXED64.ProtobufOutput -Required.ProtobufInput.RepeatedScalarSelectsLast.UINT64.ProtobufOutput -Required.ProtobufInput.ValidDataRepeated.BOOL.ProtobufOutput -Required.ProtobufInput.ValidDataScalar.BOOL.ProtobufOutput diff --git a/upb/decode.c b/upb/decode.c index a1910a1..3a44021 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -309,6 +309,11 @@ static bool upb_decode_varintfield(upb_decstate *d, upb_decframe *frame, memcpy(field_mem, &val32, sizeof(val32)); break; } + case UPB_DESCRIPTOR_TYPE_BOOL: { + bool valbool = val != 0; + memcpy(field_mem, &valbool, sizeof(valbool)); + break; + } case UPB_DESCRIPTOR_TYPE_SINT32: { int32_t decoded = upb_zzdec_32(val); memcpy(field_mem, &decoded, sizeof(decoded)); diff --git a/upb/encode.c b/upb/encode.c index 8a220ef..4aafb9c 100644 --- a/upb/encode.c +++ b/upb/encode.c @@ -258,7 +258,7 @@ static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem, bool is_proto3) { #define CASE(ctype, type, wire_type, encodeval) do { \ ctype val = *(ctype*)field_mem; \ - if (is_proto3 && val == 0) { \ + if (is_proto3 && f->oneof_index == UPB_NOT_IN_ONEOF && val == 0) { \ return true; \ } \ return upb_put_ ## type(e, encodeval) && \ @@ -292,7 +292,7 @@ static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem, case UPB_DESCRIPTOR_TYPE_STRING: case UPB_DESCRIPTOR_TYPE_BYTES: { upb_stringview view = *(upb_stringview*)field_mem; - if (is_proto3 && view.size == 0) { + if (is_proto3 && f->oneof_index == UPB_NOT_IN_ONEOF && view.size == 0) { return true; } return upb_put_bytes(e, view.data, view.size) && @@ -303,7 +303,7 @@ static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem, size_t size; void *submsg = *(void**)field_mem; const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index]; - if (is_proto3 && submsg == NULL) { + if (is_proto3 && f->oneof_index == UPB_NOT_IN_ONEOF && submsg == NULL) { return true; } return upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) && @@ -314,7 +314,7 @@ static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem, size_t size; void *submsg = *(void**)field_mem; const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index]; - if (is_proto3 && submsg == NULL) { + if (is_proto3 && f->oneof_index == UPB_NOT_IN_ONEOF && submsg == NULL) { return true; } return upb_encode_message(e, submsg, subm, &size) && @@ -344,6 +344,11 @@ bool upb_encode_message(upb_encstate* e, const char *msg, size_t *size) { int i; char *buf_end = e->ptr; + + if (msg == NULL) { + return true; + } + for (i = m->field_count - 1; i >= 0; i--) { const upb_msglayout_fieldinit_v1 *f = &m->fields[i]; -- cgit v1.2.3 From 806ffc1d2053f1c02167c7965b39abc997d12ad6 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 18 Jul 2017 11:48:29 +0200 Subject: Responded to PR comments. --- tests/conformance_upb.c | 2 +- tools/make_c_api.lua | 8 +++----- upb/decode.c | 9 +++++---- upb/encode.c | 8 ++++++-- 4 files changed, 15 insertions(+), 12 deletions(-) (limited to 'tests') diff --git a/tests/conformance_upb.c b/tests/conformance_upb.c index e1221b2..2ef8c6d 100644 --- a/tests/conformance_upb.c +++ b/tests/conformance_upb.c @@ -114,7 +114,7 @@ bool DoTestIo() { char *serialized_input; char *serialized_output; uint32_t input_size; - size_t output_size = 0; + size_t output_size; conformance_ConformanceRequest *request; conformance_ConformanceResponse *response; diff --git a/tools/make_c_api.lua b/tools/make_c_api.lua index cfcf38d..552e861 100644 --- a/tools/make_c_api.lua +++ b/tools/make_c_api.lua @@ -155,6 +155,8 @@ local function field_layout_rank(field) -- 1. padding alignment is (nearly) minimized. -- 2. fields that might have defaults (1-4) are segregated -- from fields that are always zero-initialized (5-7). + -- + -- We skip oneof fields, because they are emitted in a separate pass. local rank if field:containing_oneof() then rank = 100 -- These go last (actually we skip them). @@ -347,7 +349,7 @@ local function write_c_file(filedef, hfilename, append) end if field:containing_oneof() then - -- Do nothing now + -- Handled below. else if has_hasbit(field) then hasbit_indexes[field] = hasbit_count @@ -358,14 +360,11 @@ local function write_c_file(filedef, hfilename, append) end end - local oneof_last_fields = {} -- Oneof fields. for oneof in msg:oneofs() do local fullname = to_cident(oneof:containing_type():full_name() .. "." .. oneof:name()) append(' union {\n') - oneof_last_fields[oneof] = "" for field in oneof:fields() do - oneof_last_fields[oneof] = field:name() append(' %s %s;\n', ctype(field), field:name()) end append(' } %s;\n', oneof:name()) @@ -425,7 +424,6 @@ local function write_c_file(filedef, hfilename, append) if field:containing_oneof() then oneof_index = oneof_indexes[field:containing_oneof()] end - -- TODO(haberman): oneofs. append(' {%s, offsetof(%s, %s), %s, %s, %s, %s, %s},\n', field:number(), msgname, diff --git a/upb/decode.c b/upb/decode.c index 3a44021..b09da42 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -54,18 +54,18 @@ static bool upb_decode_message(upb_decstate *d, const char *limit, static bool upb_decode_varint(const char **ptr, const char *limit, uint64_t *val) { - uint8_t byte = 0x80; + uint8_t byte; int bitpos = 0; const char *p = *ptr; *val = 0; - while (byte & 0x80) { + do { CHK(bitpos < 70 && p < limit); byte = *p; *val |= (uint64_t)(byte & 0x7F) << bitpos; p++; bitpos += 7; - } + } while (byte & 0x80); *ptr = p; return true; @@ -385,7 +385,7 @@ static bool upb_decode_fixedpacked(upb_array *arr, upb_stringview data, int elements = data.size / elem_size; void *field_mem; - CHK((data.size % elem_size) == 0); + CHK(elements * elem_size == data.size); field_mem = upb_array_add(arr, elements); CHK(field_mem); memcpy(field_mem, data.data, data.size); @@ -451,6 +451,7 @@ static bool upb_decode_toarray(upb_decstate *d, upb_decframe *frame, return upb_append_unknown(d, frame, field_start); } #undef VARINT_CASE + UPB_UNREACHABLE(); } static bool upb_decode_delimitedfield(upb_decstate *d, upb_decframe *frame, diff --git a/upb/encode.c b/upb/encode.c index 4aafb9c..e7585eb 100644 --- a/upb/encode.c +++ b/upb/encode.c @@ -32,7 +32,7 @@ static const uint8_t upb_desctype_to_fieldtype[] = { static size_t upb_encode_varint(uint64_t val, char *buf) { size_t i; - if (val == 0) { buf[0] = 0; return 1; } + if (val < 128) { buf[0] = val; return 1; } i = 0; while (val) { uint8_t byte = val & 0x7fU; @@ -373,7 +373,11 @@ char *upb_encode(const void *msg, const upb_msglayout_msginit_v1 *m, e.limit = NULL; e.ptr = NULL; - CHK(upb_encode_message(&e, msg, m, size)); + if (!upb_encode_message(&e, msg, m, size)) { + *size = 0; + return NULL; + } + *size = e.limit - e.ptr; if (*size == 0) { -- cgit v1.2.3 From 6b8767422154008eed98d0df42e36758d38877a4 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 19 Jul 2017 00:33:30 +0200 Subject: Address review comments and fix compile warnings. --- tests/conformance_upb.c | 2 +- upb/decode.c | 2 +- upb/encode.c | 10 ++++++---- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'tests') diff --git a/tests/conformance_upb.c b/tests/conformance_upb.c index 2ef8c6d..70ea300 100644 --- a/tests/conformance_upb.c +++ b/tests/conformance_upb.c @@ -33,7 +33,7 @@ bool CheckedRead(int fd, void *buf, size_t len) { } void CheckedWrite(int fd, const void *buf, size_t len) { - if (write(fd, buf, len) != len) { + if ((size_t)write(fd, buf, len) != len) { perror("writing to test runner"); exit(1); } diff --git a/upb/decode.c b/upb/decode.c index b09da42..cba9f61 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -385,7 +385,7 @@ static bool upb_decode_fixedpacked(upb_array *arr, upb_stringview data, int elements = data.size / elem_size; void *field_mem; - CHK(elements * elem_size == data.size); + CHK((size_t)(elements * elem_size) == data.size); field_mem = upb_array_add(arr, elements); CHK(field_mem); memcpy(field_mem, data.data, data.size); diff --git a/upb/encode.c b/upb/encode.c index e7585eb..cb72bf9 100644 --- a/upb/encode.c +++ b/upb/encode.c @@ -256,9 +256,11 @@ static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem, const upb_msglayout_msginit_v1 *m, const upb_msglayout_fieldinit_v1 *f, bool is_proto3) { + bool skip_zero_value = is_proto3 && f->oneof_index == UPB_NOT_IN_ONEOF; + #define CASE(ctype, type, wire_type, encodeval) do { \ ctype val = *(ctype*)field_mem; \ - if (is_proto3 && f->oneof_index == UPB_NOT_IN_ONEOF && val == 0) { \ + if (skip_zero_value && val == 0) { \ return true; \ } \ return upb_put_ ## type(e, encodeval) && \ @@ -292,7 +294,7 @@ static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem, case UPB_DESCRIPTOR_TYPE_STRING: case UPB_DESCRIPTOR_TYPE_BYTES: { upb_stringview view = *(upb_stringview*)field_mem; - if (is_proto3 && f->oneof_index == UPB_NOT_IN_ONEOF && view.size == 0) { + if (skip_zero_value && view.size == 0) { return true; } return upb_put_bytes(e, view.data, view.size) && @@ -303,7 +305,7 @@ static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem, size_t size; void *submsg = *(void**)field_mem; const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index]; - if (is_proto3 && f->oneof_index == UPB_NOT_IN_ONEOF && submsg == NULL) { + if (skip_zero_value && submsg == NULL) { return true; } return upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) && @@ -314,7 +316,7 @@ static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem, size_t size; void *submsg = *(void**)field_mem; const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index]; - if (is_proto3 && f->oneof_index == UPB_NOT_IN_ONEOF && submsg == NULL) { + if (skip_zero_value && submsg == NULL) { return true; } return upb_encode_message(e, submsg, subm, &size) && -- cgit v1.2.3