diff options
-rw-r--r-- | .gitmodules | 3 | ||||
-rw-r--r-- | Makefile | 19 | ||||
-rw-r--r-- | tests/conformance_upb.c | 23 | ||||
m--------- | third_party/protobuf | 0 | ||||
-rw-r--r-- | tools/make_c_api.lua | 143 | ||||
-rw-r--r-- | tools/upbc.lua | 9 | ||||
-rw-r--r-- | upb/bindings/lua/msg.c | 2 | ||||
-rw-r--r-- | upb/decode.c | 385 | ||||
-rw-r--r-- | upb/decode.h | 17 | ||||
-rw-r--r-- | upb/encode.c | 381 | ||||
-rw-r--r-- | upb/encode.h | 17 | ||||
-rw-r--r-- | upb/msg.c | 10 | ||||
-rw-r--r-- | upb/msg.h | 2 | ||||
-rw-r--r-- | upb/structs.int.h | 18 | ||||
-rw-r--r-- | upb/upb.h | 3 |
15 files changed, 979 insertions, 53 deletions
diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..8b52c1d --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "third_party/protobuf"] + path = third_party/protobuf + url = https://github.com/google/protobuf.git @@ -110,7 +110,7 @@ clean_leave_profile: @rm -rf obj lib @rm -f tests/google_message?.h @rm -f tests/json/test.upbdefs.o - @rm -f $(TESTS) tests/testmain.o tests/t.* + @rm -f $(TESTS) tests/testmain.o tests/t.* tests/conformance_upb @rm -rf tools/upbc deps @rm -rf upb/bindings/python/build @rm -f upb/bindings/ruby/Makefile @@ -148,7 +148,9 @@ make_objs_cc = $$(patsubst upb/$$(pc).cc,obj/upb/$$(pc).$(1),$$($$(call to_srcs, # Core libraries (ie. not bindings). ############################################################### upb_SRCS = \ + upb/decode.c \ upb/def.c \ + upb/encode.c \ upb/handlers.c \ upb/msg.c \ upb/refcounted.c \ @@ -361,6 +363,21 @@ test: done; @echo "All tests passed!" +obj/conformance_protos: obj/conformance_protos.pb tools/upbc + cd obj && ../tools/upbc conformance_protos.pb && touch conformance_protos + +obj/conformance_protos.pb: third_party/protobuf/autogen.sh + protoc -Ithird_party/protobuf/conformance -Ithird_party/protobuf/src --include_imports \ + third_party/protobuf/conformance/conformance.proto \ + third_party/protobuf/src/google/protobuf/test_messages_proto3.proto \ + -o obj/conformance_protos.pb + +third_party/protouf/autogen.sh: .gitmodules + git submodule init && git submodule update + +tests/conformance_upb: tests/conformance_upb.c lib/libupb.a obj/conformance_protos + $(CC) -o tests/conformance_upb tests/conformance_upb.c -Iobj -I. $(CPPFLAGS) $(CFLAGS) obj/conformance.upb.c obj/google/protobuf/*.upb.c lib/libupb.a + # Google protobuf binding ###################################################### diff --git a/tests/conformance_upb.c b/tests/conformance_upb.c index 1671409..e1221b2 100644 --- a/tests/conformance_upb.c +++ b/tests/conformance_upb.c @@ -4,6 +4,7 @@ #include <errno.h> #include <stdarg.h> +#include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -52,7 +53,7 @@ void DoTest( if (!test_message) { /* TODO(haberman): return details. */ - static char msg[] = "Parse error (no more details available)."; + static const char msg[] = "Parse error (no more details available)."; conformance_ConformanceResponse_set_parse_error( response, upb_stringview_make(msg, sizeof(msg))); return; @@ -60,20 +61,20 @@ void DoTest( break; case conformance_ConformanceRequest_payload_json_payload: { - static char msg[] = "JSON support not yet implemented."; + static const char msg[] = "JSON support not yet implemented."; conformance_ConformanceResponse_set_skipped( response, upb_stringview_make(msg, sizeof(msg))); return; } case conformance_ConformanceRequest_payload_NOT_SET: - fprintf(stderr, "conformance_upb: Request didn't have payload."); - exit(1); + fprintf(stderr, "conformance_upb: Request didn't have payload.\n"); + return; } switch (conformance_ConformanceRequest_requested_output_format(request)) { case conformance_UNSPECIFIED: - fprintf(stderr, "conformance_upb: Unspecified output format."); + fprintf(stderr, "conformance_upb: Unspecified output format.\n"); exit(1); case conformance_PROTOBUF: { @@ -81,8 +82,10 @@ void DoTest( char *serialized = protobuf_test_messages_proto3_TestAllTypes_serialize( test_message, env, &serialized_len); if (!serialized) { - fprintf(stderr, "conformance_upb: Error serializing."); - exit(1); + static const char msg[] = "Error serializing."; + conformance_ConformanceResponse_set_serialize_error( + response, upb_stringview_make(msg, sizeof(msg))); + return; } conformance_ConformanceResponse_set_protobuf_payload( response, upb_stringview_make(serialized, serialized_len)); @@ -90,14 +93,14 @@ void DoTest( } case conformance_JSON: { - static char msg[] = "JSON support not yet implemented."; + static const char msg[] = "JSON support not yet implemented."; conformance_ConformanceResponse_set_skipped( response, upb_stringview_make(msg, sizeof(msg))); break; } default: - fprintf(stderr, "conformance_upb: Unknown output format: %d", + fprintf(stderr, "conformance_upb: Unknown output format: %d\n", conformance_ConformanceRequest_requested_output_format(request)); exit(1); } @@ -111,7 +114,7 @@ bool DoTestIo() { char *serialized_input; char *serialized_output; uint32_t input_size; - size_t output_size; + size_t output_size = 0; conformance_ConformanceRequest *request; conformance_ConformanceResponse *response; diff --git a/third_party/protobuf b/third_party/protobuf new file mode 160000 +Subproject 6bd51a59df41b99058ec8c2b03a177a218267ce diff --git a/tools/make_c_api.lua b/tools/make_c_api.lua index c13f37c..9e6d734 100644 --- a/tools/make_c_api.lua +++ b/tools/make_c_api.lua @@ -42,7 +42,6 @@ local function to_preproc(...) return string.upper(to_cident(...)) end - -- Strips away last path element, ie: -- foo.Bar.Baz -> foo.Bar local function remove_name(name) @@ -55,6 +54,10 @@ local function remove_name(name) return string.sub(name, 1, package_end) end +local function enum_value_symbol(enumdef, name) + return to_cident(remove_name(enumdef:full_name())) .. "_" .. name +end + local function dump_enum_vals(enumdef, append) local enum_vals = {} @@ -91,7 +94,7 @@ local function dump_enum_vals(enumdef, append) local cident = to_cident(remove_name(enumdef:full_name())) for i, pair in ipairs(enum_vals) do k, v = pair[1], pair[2] - append(' %s = %d', cident .. "_" .. k, v) + append(' %s = %d', enum_value_symbol(enumdef, k), v) if i == #enum_vals then append('\n') else @@ -100,6 +103,20 @@ local function dump_enum_vals(enumdef, append) end end +local function field_default(field) + if field:type() == upb.TYPE_MESSAGE then + return "NULL" + elseif field:type() == upb.TYPE_STRING or + field:type() == upb.TYPE_BYTES then + local default = field:default() or "" + return string.format('upb_stringview_make("%s", strlen("%s"))', field:default(), field:default()) + elseif field:type() == upb.TYPE_ENUM then + return enum_value_symbol(field:subdef(), field:default()) + else + return field:default(); + end +end + local function ctype(field) if field:label() == upb.LABEL_REPEATED then return "upb_array*" @@ -129,25 +146,24 @@ end local function field_layout_rank(field) -- Order: -- 1, 2, 3. primitive fields (8, 4, 1 byte) - -- 4. oneof fields - -- 5. string fields - -- 6. submessage fields - -- 7. repeated fields + -- 4. string fields + -- 5. submessage fields + -- 6. repeated fields -- -- This has the following nice properties: -- -- 1. padding alignment is (nearly) minimized. - -- 2. fields that might have defaults (1-5) are segregated - -- from fields that are always zero-initialized (6-7). + -- 2. fields that might have defaults (1-4) are segregated + -- from fields that are always zero-initialized (5-7). local rank if field:containing_oneof() then - rank = 4 + rank = 100 -- These go last (actually we skip them). elseif field:label() == upb.LABEL_REPEATED then - rank = 7 - elseif field:type() == upb.TYPE_MESSAGE then rank = 6 - elseif field:type() == upb.TYPE_STRING or field:type() == upb.TYPE_BYTES then + elseif field:type() == upb.TYPE_MESSAGE then rank = 5 + elseif field:type() == upb.TYPE_STRING or field:type() == upb.TYPE_BYTES then + rank = 4 elseif field:type() == upb.TYPE_BOOL then rank = 3 elseif field:type() == upb.TYPE_FLOAT or @@ -258,6 +274,8 @@ local function write_c_file(filedef, hfilename, append) emit_file_warning(filedef, append) append('#include <stddef.h>\n') + append('#include "upb/decode.h"\n\n') + append('#include "upb/encode.h"\n\n') append('#include "upb/msg.h"\n') append('#include "upb/upb.h"\n') append('#include "%s"\n\n', hfilename) @@ -274,13 +292,29 @@ local function write_c_file(filedef, hfilename, append) local fields_array_ref = "NULL" local submsgs_array_ref = "NULL" + local oneofs_array_ref = "NULL" local field_count = 0 local submsg_count = 0 local submsg_set = {} local submsg_indexes = {} local hasbit_count = 0 local hasbit_indexes = {} - -- TODO(haberman): oneofs + local oneof_count = 0 + local oneof_indexes = {} + + -- Create a layout order for oneofs. + local oneofs_layout_order = {} + for oneof in msg:oneofs() do + table.insert(oneofs_layout_order, oneof) + end + table.sort(oneofs_layout_order, function(a, b) + return a:name() < b:name() + end) + + for _, oneof in ipairs(oneofs_layout_order) do + oneof_indexes[oneof] = oneof_count + oneof_count = oneof_count + 1 + end -- Create a layout order for fields. We use this order for the struct and -- for offsets, but our list of fields we keep in field number order. @@ -302,6 +336,8 @@ local function write_c_file(filedef, hfilename, append) end) append('struct %s {\n', msgname) + + -- Non-oneof fields. for _, field in ipairs(fields_layout_order) do field_count = field_count + 1 @@ -310,15 +346,46 @@ local function write_c_file(filedef, hfilename, append) submsg_set[field:subdef()] = true end - if has_hasbit(field) then - hasbit_indexes[field] = hasbit_count - hasbit_count = hasbit_count + 1 + if field:containing_oneof() then + -- Do nothing now + else + if has_hasbit(field) then + hasbit_indexes[field] = hasbit_count + hasbit_count = hasbit_count + 1 + end + + append(' %s %s;\n', ctype(field), field:name()) end + end - append(' %s %s;\n', ctype(field), field:name()) + local oneof_last_fields = {} + -- Oneof fields. + for oneof in msg:oneofs() do + local fullname = to_cident(oneof:containing_type():full_name() .. "." .. oneof:name()) + append(' union {\n') + oneof_last_fields[oneof] = "" + for field in oneof:fields() do + oneof_last_fields[oneof] = field:name() + append(' %s %s;\n', ctype(field), field:name()) + end + append(' } %s;\n', oneof:name()) + append(' %s_oneofcases %s_case;\n', fullname, oneof:name()) end + append('};\n\n') + if oneof_count > 0 then + local oneofs_array_name = msgname .. "_oneofs" + oneofs_array_ref = "&" .. oneofs_array_name .. "[0]" + append('static const upb_msglayout_oneofinit_v1 %s[%s] = {\n', + oneofs_array_name, oneof_count) + for _, oneof in ipairs(oneofs_layout_order) do + append(' {offsetof(%s, %s), offsetof(%s, %s_case)},\n', + msgname, oneof:name(), msgname, oneof:name()) + end + append('};\n\n') + end + if submsg_count > 0 then -- TODO(haberman): could save a little bit of space by only generating a -- "submsgs" array for every strongly-connected component. @@ -355,11 +422,14 @@ local function write_c_file(filedef, hfilename, append) if field:type() == upb.TYPE_MESSAGE then submsg_index = submsg_indexes[field:subdef()] end + if field:containing_oneof() then + oneof_index = oneof_indexes[field:containing_oneof()] + end -- TODO(haberman): oneofs. append(' {%s, offsetof(%s, %s), %s, %s, %s, %s, %s},\n', field:number(), msgname, - field:name(), + (field:containing_oneof() and field:containing_oneof():name()) or field:name(), hasbit_indexes[field] or "-1", oneof_index, submsg_index, @@ -372,7 +442,7 @@ local function write_c_file(filedef, hfilename, append) append('const upb_msglayout_msginit_v1 %s_msginit = {\n', msgname) append(' %s,\n', submsgs_array_ref) append(' %s,\n', fields_array_ref) - append(' NULL, /* TODO. oneofs */\n') + append(' %s,\n', oneofs_array_ref) append(' NULL, /* TODO. default_msg */\n') append(' UPB_ALIGNED_SIZEOF(%s), %s, %s, %s, %s\n', msgname, field_count, @@ -391,36 +461,49 @@ local function write_c_file(filedef, hfilename, append) append('%s *%s_parsenew(upb_stringview buf, upb_env *env) {\n', msgname, msgname) - append(' UPB_UNUSED(buf);\n') - append(' UPB_UNUSED(env);\n') - append(' return NULL;\n') + append(' %s *msg = %s_new(env);\n', msgname, msgname) + append(' if (upb_decode(buf, msg, &%s_msginit, env)) {\n', msgname) + append(' return msg;\n') + append(' } else {\n') + append(' return NULL;\n') + append(' }\n') append('}\n') append('char *%s_serialize(%s *msg, upb_env *env, size_t *size) {\n', msgname, msgname) - append(' UPB_UNUSED(msg);\n') - append(' UPB_UNUSED(env);\n') - append(' UPB_UNUSED(size);\n') - append(' return NULL; /* TODO. */\n') + append(' return upb_encode(msg, &%s_msginit, env, size);\n', msgname) append('}\n') for field in msg:fields() do local typename = ctype(field) append('%s %s_%s(const %s *msg) {\n', typename, msgname, field:name(), msgname); - append(' return msg->%s;\n', field:name()) + if field:containing_oneof() then + local oneof = field:containing_oneof() + append(' return msg->%s_case == %s ? msg->%s.%s : %s;\n', + oneof:name(), field:number(), oneof:name(), field:name(), + field_default(field)) + else + append(' return msg->%s;\n', field:name()) + end append('}\n') append('void %s_set_%s(%s *msg, %s value) {\n', msgname, field:name(), msgname, typename); - append(' msg->%s = value;\n', field:name()) + if field:containing_oneof() then + local oneof = field:containing_oneof() + append(' msg->%s.%s = value;\n', oneof:name(), field:name()) + append(' msg->%s_case = %s;\n', oneof:name(), field:number()) + else + append(' msg->%s = value;\n', field:name()) + end append('}\n') end for oneof in msg:oneofs() do local fullname = to_cident(oneof:containing_type():full_name() .. "." .. oneof:name()) append('%s_oneofcases %s_case(const %s *msg) {\n', fullname, fullname, msgname) - append(' return 0; /* TODO. */') - append('}') + append(' return msg->%s_case;\n', oneof:name()) + append('}\n') end end end diff --git a/tools/upbc.lua b/tools/upbc.lua index bfd93f8..a538b1c 100644 --- a/tools/upbc.lua +++ b/tools/upbc.lua @@ -15,8 +15,13 @@ local upb = require "upb" local generate_upbdefs = false for _, argument in ipairs(arg) do - if argument == "--generate-upbdefs" then - generate_upbdefs = true + if argument.sub(argument, 1, 2) == "--" then + if argument == "--generate-upbdefs" then + generate_upbdefs = true + else + print("Unknown flag: " .. argument) + return 1 + end else if src then print("upbc can only handle one input file at a time.") diff --git a/upb/bindings/lua/msg.c b/upb/bindings/lua/msg.c index dc8420f..e468ace 100644 --- a/upb/bindings/lua/msg.c +++ b/upb/bindings/lua/msg.c @@ -434,7 +434,7 @@ static upb_msgval lupb_tomsgval(lua_State *L, upb_fieldtype_t type, int narg, case UPB_TYPE_BYTES: { size_t len; const char *ptr = lupb_checkstring(L, narg, &len); - return upb_msgval_str(upb_stringview_make(ptr, len)); + return upb_msgval_makestr(ptr, len); } case UPB_TYPE_MESSAGE: UPB_ASSERT(lmsgclass); diff --git a/upb/decode.c b/upb/decode.c new file mode 100644 index 0000000..f28642b --- /dev/null +++ b/upb/decode.c @@ -0,0 +1,385 @@ + +#include "upb/decode.h" + +typedef enum { + UPB_WIRE_TYPE_VARINT = 0, + UPB_WIRE_TYPE_64BIT = 1, + UPB_WIRE_TYPE_DELIMITED = 2, + UPB_WIRE_TYPE_START_GROUP = 3, + UPB_WIRE_TYPE_END_GROUP = 4, + UPB_WIRE_TYPE_32BIT = 5 +} upb_wiretype_t; + +typedef struct { + upb_env *env; + /* Current decoding pointer. Points to the beginning of a field until we + * have finished decoding the whole field. */ + const char *ptr; +} upb_decstate; + +#define CHK(x) if (!(x)) { return false; } + +static void upb_decode_seterr(upb_env *env, const char *msg) { + upb_status status = UPB_STATUS_INIT; + upb_status_seterrmsg(&status, msg); + upb_env_reporterror(env, &status); +} + +static bool upb_decode_varint(const char **ptr, const char *limit, + uint64_t *val) { + uint8_t byte = 0x80; + int bitpos = 0; + const char *p = *ptr; + *val = 0; + + while (byte & 0x80) { + if (bitpos == 70 || p == limit) { + return false; + } + + byte = *p; + *val |= (uint64_t)(byte & 0x7F) << bitpos; + p++; + bitpos += 7; + } + + *ptr = p; + return true; +} + +static bool upb_decode_varint32(const char **ptr, const char *limit, + uint32_t *val) { + uint64_t u64; + if (!upb_decode_varint(ptr, limit, &u64) || u64 > UINT32_MAX) { + return false; + } else { + *val = u64; + return true; + } +} + +static const upb_msglayout_fieldinit_v1 *upb_find_field( + const upb_msglayout_msginit_v1 *l, uint32_t field_number) { + /* Lots of optimization opportunities here. */ + int i; + for (i = 0; i < l->field_count; i++) { + if (l->fields[i].number == field_number) { + return &l->fields[i]; + } + } + + return NULL; /* Unknown field. */ +} + +static bool upb_decode_64bit(const char **ptr, const char *limit, + uint64_t *val) { + if (limit - *ptr < 8) { + return false; + } else { + memcpy(val, *ptr, 8); + *ptr += 8; + return true; + } +} + +static bool upb_decode_32bit(const char **ptr, const char *limit, + uint32_t *val) { + if (limit - *ptr < 4) { + return false; + } else { + memcpy(val, *ptr, 4); + *ptr += 4; + return true; + } +} + +static int32_t upb_zzdec_32(uint32_t n) { + return (n >> 1) ^ -(int32_t)(n & 1); +} + +static int64_t upb_zzdec_64(uint64_t n) { + return (n >> 1) ^ -(int64_t)(n & 1); +} + +static bool upb_decode_string(const char **ptr, const char *limit, + upb_stringview *val) { + uint32_t len; + + if (!upb_decode_varint32(ptr, limit, &len) || + limit - *ptr < len) { + return false; + } + + *val = upb_stringview_make(*ptr, len); + *ptr += len; + return true; +} + +static void upb_set32(void *msg, size_t ofs, uint32_t val) { + memcpy((char*)msg + ofs, &val, sizeof(val)); +} + +static bool upb_append_unknownfield(const char **ptr, const char *start, + const char *limit, char *msg) { + UPB_UNUSED(limit); + UPB_UNUSED(msg); + *ptr = limit; + return true; +} + +static bool upb_decode_unknownfielddata(upb_decstate *d, const char *ptr, + const char *limit, char *msg, + const upb_msglayout_msginit_v1 *l) { + do { + switch (wire_type) { + case UPB_WIRE_TYPE_VARINT: + CHK(upb_decode_varint(&ptr, limit, &val)); + break; + case UPB_WIRE_TYPE_32BIT: + CHK(upb_decode_32bit(&ptr, limit, &val)); + break; + case UPB_WIRE_TYPE_64BIT: + CHK(upb_decode_64bit(&ptr, limit, &val)); + break; + case UPB_WIRE_TYPE_DELIMITED: { + upb_stringview val; + CHK(upb_decode_string(&ptr, limit, &val)); + } + case UPB_WIRE_TYPE_START_GROUP: + depth++; + continue; + case UPB_WIRE_TYPE_END_GROUP: + depth--; + continue; + } + + UPB_ASSERT(depth == 0); + upb_append_unknown(msg, l, d->ptr, ptr); + d->ptr = ptr; + return true; + } while (true); +} + +static bool upb_decode_field(upb_decstate *d, const char *limit, char *msg, + const upb_msglayout_msginit_v1 *l) { + uint32_t tag; + uint32_t wire_type; + uint32_t field_number; + const char *ptr = d->ptr; + const upb_msglayout_fieldinit_v1 *f; + + if (!upb_decode_varint32(&ptr, limit, &tag)) { + upb_decode_seterr(env, "Error decoding tag.\n"); + return false; + } + + wire_type = tag & 0x7; + field_number = tag >> 3; + + if (field_number == 0) { + return false; + } + + f = upb_find_field(l, field_number); + + if (f) { + return upb_decode_knownfield(d, ptr, limit, msg, l, f); + } else { + return upb_decode_unknownfield(d, ptr, limit, msg, l); + } + + if (f->label == UPB_LABEL_REPEATED) { + arr = upb_getarray(msg, f, env); + } + + switch (wire_type) { + case UPB_WIRE_TYPE_VARINT: { + uint64_t val; + if (!upb_decode_varint(&ptr, limit, &val)) { + upb_decode_seterr(env, "Error decoding varint value.\n"); + return false; + } + + if (!f) { + return upb_append_unknown(ptr, field_start, ptr, msg); + } + + if (f->label == UPB_LABEL_REPEATED) { + upb_array *arr = upb_getarray(msg, f, env); + switch (f->type) { + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_UINT64: + memcpy(arr->data, &val, sizeof(val)); + arr->len++; + break; + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_ENUM: { + uint32_t val32 = val; + memcpy(arr->data, &val32, sizeof(val32)); + arr->len++; + break; + } + case UPB_DESCRIPTOR_TYPE_SINT32: { + int32_t decoded = upb_zzdec_32(val); + memcpy(arr->data, &decoded, sizeof(decoded)); + arr->len++; + break; + } + case UPB_DESCRIPTOR_TYPE_SINT64: { + int64_t decoded = upb_zzdec_64(val); + memcpy(arr->data, &decoded, sizeof(decoded)); + arr->len++; + break; + } + default: + return upb_append_unknown(ptr, field_start, ptr, msg); + } + } else { + switch (f->type) { + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_UINT64: + memcpy(msg + f->offset, &val, sizeof(val)); + break; + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_ENUM: { + uint32_t val32 = val; + memcpy(msg + f->offset, &val32, sizeof(val32)); + break; + } + case UPB_DESCRIPTOR_TYPE_SINT32: { + int32_t decoded = upb_zzdec_32(val); + memcpy(msg + f->offset, &decoded, sizeof(decoded)); + break; + } + case UPB_DESCRIPTOR_TYPE_SINT64: { + int64_t decoded = upb_zzdec_64(val); + memcpy(msg + f->offset, &decoded, sizeof(decoded)); + break; + } + default: + return upb_append_unknown(ptr, field_start, ptr, msg); + } + } + + break; + } + case UPB_WIRE_TYPE_64BIT: { + uint64_t val; + if (!upb_decode_64bit(&ptr, limit, &val)) { + upb_decode_seterr(env, "Error decoding 64bit value.\n"); + return false; + } + + if (!f) { + return upb_append_unknown(ptr, field_start, ptr, msg); + } + + switch (f->type) { + case UPB_DESCRIPTOR_TYPE_DOUBLE: + case UPB_DESCRIPTOR_TYPE_FIXED64: + case UPB_DESCRIPTOR_TYPE_SFIXED64: + memcpy(msg + f->offset, &val, sizeof(val)); + default: + return upb_append_unknown(ptr, field_start, ptr, msg); + } + + break; + } + case UPB_WIRE_TYPE_32BIT: { + uint32_t val; + if (!upb_decode_32bit(&ptr, limit, &val)) { + upb_decode_seterr(env, "Error decoding 32bit value.\n"); + return false; + } + + if (!f) { + return upb_append_unknown(ptr, field_start, ptr, msg); + } + + switch (f->type) { + case UPB_DESCRIPTOR_TYPE_FLOAT: + case UPB_DESCRIPTOR_TYPE_FIXED32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + memcpy(msg + f->offset, &val, sizeof(val)); + default: + return upb_append_unknown(ptr, field_start, ptr, msg); + } + + break; + } + case UPB_WIRE_TYPE_DELIMITED: { + upb_stringview val; + if (!upb_decode_string(&ptr, limit, &val)) { + upb_decode_seterr(env, "Error decoding delimited value.\n"); + return false; + } + + if (!f) { + return upb_append_unknown(ptr, field_start, ptr, msg); + } + + switch (f->type) { + case UPB_DESCRIPTOR_TYPE_STRING: + case UPB_DESCRIPTOR_TYPE_BYTES: + memcpy(msg + f->offset, &val, sizeof(val)); + break; + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_UINT64: { + memcpy(msg + f->offset, &val, sizeof(val)); + break; + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_ENUM: { + uint32_t val32 = val; + memcpy(msg + f->offset, &val32, sizeof(val32)); + break; + } + case UPB_DESCRIPTOR_TYPE_SINT32: { + int32_t decoded = upb_zzdec_32(val); + memcpy(msg + f->offset, &decoded, sizeof(decoded)); + break; + } + case UPB_DESCRIPTOR_TYPE_SINT64: + case UPB_DESCRIPTOR_TYPE_FLOAT: + case UPB_DESCRIPTOR_TYPE_FIXED32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + /* + case UPB_DESCRIPTOR_TYPE_MESSAGE: { + upb_decode_message(val, + } + */ + default: + return upb_append_unknown(ptr, field_start, ptr, msg); + } + + break; + } + } + + if (f->oneof_index != UPB_NOT_IN_ONEOF) { + upb_set32(msg, l->oneofs[f->oneof_index].case_offset, f->number); + } + + d->ptr = ptr; + return true; +} + +static bool upb_decode_message(upb_decstate *d, upb_stringview buf, + char *msg, const upb_msglayout_msginit_v1 *l) { + const char *limit = ptr + buf.size; + + while (d->ptr < limit) { + if (!upb_decode_field(&ptr, limit, msg, l, env)) { + return false; + } + } + + return true; +} + +bool upb_decode(upb_stringview buf, void *msg, + const upb_msglayout_msginit_v1 *l, upb_env *env) { + return upb_decode_message(buf, msg, l, env); +} diff --git a/upb/decode.h b/upb/decode.h new file mode 100644 index 0000000..2a9e39e --- /dev/null +++ b/upb/decode.h @@ -0,0 +1,17 @@ +/* +** upb_decode: parsing into a upb_msg using a upb_msglayout. +*/ + +#ifndef UPB_DECODE_H_ +#define UPB_DECODE_H_ + +#include "upb/msg.h" + +UPB_BEGIN_EXTERN_C + +bool upb_decode(upb_stringview buf, void *msg, + const upb_msglayout_msginit_v1 *l, upb_env *env); + +UPB_END_EXTERN_C + +#endif /* UPB_DECODE_H_ */ diff --git a/upb/encode.c b/upb/encode.c new file mode 100644 index 0000000..2fe1cc3 --- /dev/null +++ b/upb/encode.c @@ -0,0 +1,381 @@ + +#include "upb/encode.h" +#include "upb/structs.int.h" + +#define UPB_PB_VARINT_MAX_LEN 10 +#define CHK(x) do { if (!(x)) { return false; } } while(0) + +static size_t upb_encode_varint(uint64_t val, char *buf) { + size_t i; + if (val == 0) { buf[0] = 0; return 1; } + i = 0; + while (val) { + uint8_t byte = val & 0x7fU; + val >>= 7; + if (val) byte |= 0x80U; + buf[i++] = byte; + } + return i; +} + +static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } +static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } + +typedef enum { + UPB_WIRE_TYPE_VARINT = 0, + UPB_WIRE_TYPE_64BIT = 1, + UPB_WIRE_TYPE_DELIMITED = 2, + UPB_WIRE_TYPE_START_GROUP = 3, + UPB_WIRE_TYPE_END_GROUP = 4, + UPB_WIRE_TYPE_32BIT = 5 +} upb_wiretype_t; + +/* Index is descriptor type. */ +const uint8_t upb_native_wiretypes[] = { + UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */ + UPB_WIRE_TYPE_64BIT, /* DOUBLE */ + UPB_WIRE_TYPE_32BIT, /* FLOAT */ + UPB_WIRE_TYPE_VARINT, /* INT64 */ + UPB_WIRE_TYPE_VARINT, /* UINT64 */ + UPB_WIRE_TYPE_VARINT, /* INT32 */ + UPB_WIRE_TYPE_64BIT, /* FIXED64 */ + UPB_WIRE_TYPE_32BIT, /* FIXED32 */ + UPB_WIRE_TYPE_VARINT, /* BOOL */ + UPB_WIRE_TYPE_DELIMITED, /* STRING */ + UPB_WIRE_TYPE_START_GROUP, /* GROUP */ + UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */ + UPB_WIRE_TYPE_DELIMITED, /* BYTES */ + UPB_WIRE_TYPE_VARINT, /* UINT32 */ + UPB_WIRE_TYPE_VARINT, /* ENUM */ + UPB_WIRE_TYPE_32BIT, /* SFIXED32 */ + UPB_WIRE_TYPE_64BIT, /* SFIXED64 */ + UPB_WIRE_TYPE_VARINT, /* SINT32 */ + UPB_WIRE_TYPE_VARINT, /* SINT64 */ +}; + +typedef struct { + upb_env *env; + char *buf, *ptr, *limit; +} upb_encstate; + +static size_t upb_roundup_pow2(size_t bytes) { + size_t ret = 128; + while (ret < bytes) { + ret *= 2; + } + return ret; +} + +static bool upb_encode_growbuffer(upb_encstate *e, size_t bytes) { + size_t old_size = e->limit - e->buf; + size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); + char *new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size); + CHK(new_buf); + + /* We want previous data at the end, realloc() put it at the beginning. */ + memmove(e->limit - old_size, e->buf, old_size); + + e->ptr = new_buf + new_size - (e->limit - e->ptr); + e->limit = new_buf + new_size; + e->buf = new_buf; + return true; +} + +/* Call to ensure that at least "bytes" bytes are available for writing at + * e->ptr. Returns false if the bytes could not be allocated. */ +static bool upb_encode_reserve(upb_encstate *e, size_t bytes) { + CHK(UPB_LIKELY((size_t)(e->ptr - e->buf) >= bytes) || + upb_encode_growbuffer(e, bytes)); + + e->ptr -= bytes; + return true; +} + +/* Writes the given bytes to the buffer, handling reserve/advance. */ +static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) { + CHK(upb_encode_reserve(e, len)); + memcpy(e->ptr, data, len); + return true; +} + +static bool upb_put_fixed64(upb_encstate *e, uint64_t val) { + /* TODO(haberman): byte-swap for big endian. */ + return upb_put_bytes(e, &val, sizeof(uint64_t)); +} + +static bool upb_put_fixed32(upb_encstate *e, uint32_t val) { + /* TODO(haberman): byte-swap for big endian. */ + return upb_put_bytes(e, &val, sizeof(uint32_t)); +} + +static bool upb_put_varint(upb_encstate *e, uint64_t val) { + size_t len; + char *start; + CHK(upb_encode_reserve(e, UPB_PB_VARINT_MAX_LEN)); + len = upb_encode_varint(val, e->ptr); + start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; + memmove(start, e->ptr, len); + e->ptr = start; + return true; +} + +static bool upb_put_double(upb_encstate *e, double d) { + uint64_t u64; + UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); + memcpy(&u64, &d, sizeof(uint64_t)); + return upb_put_fixed64(e, u64); +} + +static bool upb_put_float(upb_encstate *e, float d) { + uint32_t u32; + UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); + memcpy(&u32, &d, sizeof(uint32_t)); + return upb_put_fixed32(e, u32); +} + +static uint32_t upb_readcase(const char *msg, const upb_msglayout_msginit_v1 *m, + int oneof_index) { + uint32_t ret; + memcpy(&ret, msg + m->oneofs[oneof_index].case_offset, sizeof(ret)); + return ret; +} + +static bool upb_readhasbit(const char *msg, + const upb_msglayout_fieldinit_v1 *f) { + UPB_ASSERT(f->hasbit != UPB_NO_HASBIT); + return msg[f->hasbit / 8] & (1 << (f->hasbit % 8)); +} + +static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) { + return upb_put_varint(e, (field_number << 3) | wire_type); +} + +static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr, + size_t size) { + size_t bytes = arr->len * size; + return upb_put_bytes(e, arr->data, bytes) && upb_put_varint(e, bytes); +} + +bool upb_encode_message(upb_encstate *e, const char *msg, + const upb_msglayout_msginit_v1 *m, + size_t *size); + +static bool upb_encode_array(upb_encstate *e, const char *field_mem, + const upb_msglayout_msginit_v1 *m, + const upb_msglayout_fieldinit_v1 *f) { + const upb_array *arr = *(const upb_array**)field_mem; + + if (arr->len == 0) { + return true; + } + +#define VARINT_CASE(ctype, encode) do { \ + uint64_t *start = arr->data; \ + uint64_t *ptr = start + arr->len; \ + char *buf_ptr = e->ptr; \ + do { \ + ptr--; \ + CHK(upb_put_varint(e, encode)); \ + } while (ptr != start); \ + CHK(upb_put_varint(e, buf_ptr - e->ptr)); \ + break; \ +} while(0) + + switch (f->type) { + case UPB_DESCRIPTOR_TYPE_DOUBLE: + CHK(upb_put_fixedarray(e, arr, sizeof(double))); + break; + case UPB_DESCRIPTOR_TYPE_FLOAT: + CHK(upb_put_fixedarray(e, arr, sizeof(float))); + break; + case UPB_DESCRIPTOR_TYPE_SFIXED64: + case UPB_DESCRIPTOR_TYPE_FIXED64: + CHK(upb_put_fixedarray(e, arr, sizeof(uint64_t))); + break; + case UPB_DESCRIPTOR_TYPE_FIXED32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + CHK(upb_put_fixedarray(e, arr, sizeof(uint32_t))); + break; + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_UINT64: + VARINT_CASE(uint64_t, *ptr); + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_ENUM: + VARINT_CASE(uint32_t, *ptr); + case UPB_DESCRIPTOR_TYPE_BOOL: + VARINT_CASE(bool, *ptr); + case UPB_DESCRIPTOR_TYPE_SINT32: + VARINT_CASE(int32_t, upb_zzenc_32(*ptr)); + case UPB_DESCRIPTOR_TYPE_SINT64: + VARINT_CASE(int64_t, upb_zzenc_64(*ptr)); + case UPB_DESCRIPTOR_TYPE_STRING: + case UPB_DESCRIPTOR_TYPE_BYTES: { + upb_stringview *start = arr->data; + upb_stringview *ptr = start + arr->len; + do { + ptr--; + CHK(upb_put_bytes(e, ptr->data, ptr->size) && + upb_put_varint(e, ptr->size) && + upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)); + } while (ptr != start); + return true; + } + case UPB_DESCRIPTOR_TYPE_GROUP: { + void **start = arr->data; + void **ptr = start + arr->len; + const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index]; + do { + size_t size; + ptr--; + CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) && + upb_encode_message(e, *ptr, subm, &size) && + upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP)); + } while (ptr != start); + return true; + } + case UPB_DESCRIPTOR_TYPE_MESSAGE: { + void **start = arr->data; + void **ptr = start + arr->len; + const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index]; + do { + size_t size; + ptr--; + CHK(upb_encode_message(e, *ptr, subm, &size) && + upb_put_varint(e, size) && + upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)); + } while (ptr != start); + return true; + } + } +#undef VARINT_CASE + + /* We encode all primitive arrays as packed, regardless of what was specified + * in the .proto file. Could special case 1-sized arrays. */ + CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)); + return true; +} + +static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem, + const upb_msglayout_msginit_v1 *m, + const upb_msglayout_fieldinit_v1 *f, + bool is_proto3) { +#define CASE(ctype, type, wire_type, encodeval) do { \ + ctype val = *(ctype*)field_mem; \ + if (is_proto3 && val == 0) { \ + return true; \ + } \ + return upb_put_ ## type(e, encodeval) && \ + upb_put_tag(e, f->number, wire_type); \ +} while(0) + + switch (f->type) { + case UPB_DESCRIPTOR_TYPE_DOUBLE: + CASE(double, double, UPB_WIRE_TYPE_64BIT, val); + case UPB_DESCRIPTOR_TYPE_FLOAT: + CASE(float, float, UPB_WIRE_TYPE_32BIT, val); + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_UINT64: + CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val); + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_ENUM: + CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val); + case UPB_DESCRIPTOR_TYPE_SFIXED64: + case UPB_DESCRIPTOR_TYPE_FIXED64: + CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val); + case UPB_DESCRIPTOR_TYPE_FIXED32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val); + case UPB_DESCRIPTOR_TYPE_BOOL: + CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val); + case UPB_DESCRIPTOR_TYPE_SINT32: + CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzenc_32(val)); + case UPB_DESCRIPTOR_TYPE_SINT64: + CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzenc_64(val)); + case UPB_DESCRIPTOR_TYPE_STRING: + case UPB_DESCRIPTOR_TYPE_BYTES: { + upb_stringview view = *(upb_stringview*)field_mem; + if (is_proto3 && view.size == 0) { + return true; + } + return upb_put_bytes(e, view.data, view.size) && + upb_put_varint(e, view.size) && + upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED); + } + case UPB_DESCRIPTOR_TYPE_GROUP: { + size_t size; + void *submsg = *(void**)field_mem; + const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index]; + if (is_proto3 && submsg == NULL) { + return true; + } + return upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) && + upb_encode_message(e, submsg, subm, &size) && + upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP); + } + case UPB_DESCRIPTOR_TYPE_MESSAGE: { + size_t size; + void *submsg = *(void**)field_mem; + const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index]; + if (is_proto3 && submsg == NULL) { + return true; + } + return upb_encode_message(e, submsg, subm, &size) && + upb_put_varint(e, size) && + upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED); + } + } +#undef CASE + UPB_UNREACHABLE(); +} + +bool upb_encode_hasscalarfield(const char *msg, + const upb_msglayout_msginit_v1 *m, + const upb_msglayout_fieldinit_v1 *f) { + if (f->oneof_index != UPB_NOT_IN_ONEOF) { + return upb_readcase(msg, m, f->oneof_index) == f->number; + } else if (m->is_proto2) { + return upb_readhasbit(msg, f); + } else { + /* For proto3, we'll test for the field being empty later. */ + return true; + } +} + +bool upb_encode_message(upb_encstate* e, const char *msg, + const upb_msglayout_msginit_v1 *m, + size_t *size) { + int i; + char *buf_end = e->ptr; + for (i = m->field_count - 1; i >= 0; i--) { + const upb_msglayout_fieldinit_v1 *f = &m->fields[i]; + + if (f->label == UPB_LABEL_REPEATED) { + CHK(upb_encode_array(e, msg, m, f)); + } else { + if (upb_encode_hasscalarfield(msg, m, f)) { + CHK(upb_encode_scalarfield(e, msg + f->offset, m, f, !m->is_proto2)); + } + } + } + + *size = buf_end - e->ptr; + return true; +} + +char *upb_encode(const void *msg, const upb_msglayout_msginit_v1 *m, + upb_env *env, size_t *size) { + upb_encstate e; + e.env = env; + e.buf = NULL; + e.limit = NULL; + e.ptr = NULL; + + if (!upb_encode_message(&e, msg, m, size)) { + return false; + } + + *size = e.limit - e.ptr; + return e.ptr; +} diff --git a/upb/encode.h b/upb/encode.h new file mode 100644 index 0000000..83908d4 --- /dev/null +++ b/upb/encode.h @@ -0,0 +1,17 @@ +/* +** upb_encode: parsing into a upb_msg using a upb_msglayout. +*/ + +#ifndef UPB_ENCODE_H_ +#define UPB_ENCODE_H_ + +#include "upb/msg.h" + +UPB_BEGIN_EXTERN_C + +char *upb_encode(const void *msg, const upb_msglayout_msginit_v1 *l, + upb_env *env, size_t *size); + +UPB_END_EXTERN_C + +#endif /* UPB_ENCODE_H_ */ @@ -1,5 +1,6 @@ #include "upb/msg.h" +#include "upb/structs.int.h" static bool is_power_of_two(size_t val) { return (val & (val - 1)) == 0; @@ -794,15 +795,6 @@ void upb_msg_set(upb_msg *msg, int field_index, upb_msgval val, /** upb_array *****************************************************************/ -struct upb_array { - upb_fieldtype_t type; - uint8_t element_size; - void *data; /* Each element is element_size. */ - size_t len; /* Measured in elements. */ - size_t size; /* Measured in elements. */ - upb_alloc *alloc; -}; - #define DEREF_ARR(arr, i, type) ((type*)arr->data)[i] size_t upb_array_sizeof(upb_fieldtype_t type) { @@ -126,6 +126,7 @@ UPB_INLINE upb_stringview upb_stringview_make(const char *data, size_t size) { #define UPB_STRINGVIEW_INIT(ptr, len) {ptr, len} + /** upb_msgval ****************************************************************/ /* A union representing all possible protobuf values. Used for generic get/set @@ -386,6 +387,7 @@ bool upb_msg_getscalarhandlerdata(const upb_handlers *h, /** Interfaces for generated code *********************************************/ #define UPB_NOT_IN_ONEOF UINT16_MAX +#define UPB_NO_HASBIT UINT16_MAX typedef struct { uint32_t number; diff --git a/upb/structs.int.h b/upb/structs.int.h new file mode 100644 index 0000000..242155b --- /dev/null +++ b/upb/structs.int.h @@ -0,0 +1,18 @@ +/* +** structs.int.h: structures definitions that are internal to upb. +*/ + +#ifndef UPB_STRUCTS_H_ +#define UPB_STRUCTS_H_ + +struct upb_array { + upb_fieldtype_t type; + uint8_t element_size; + void *data; /* Each element is element_size. */ + size_t len; /* Measured in elements. */ + size_t size; /* Measured in elements. */ + upb_alloc *alloc; +}; + +#endif /* UPB_STRUCTS_H_ */ + @@ -34,6 +34,9 @@ template <int N> class InlinedEnvironment; #define UPB_INLINE static #endif +/* Hints to the compiler about likely/unlikely branches. */ +#define UPB_LIKELY(x) __builtin_expect((x),1) + /* Define UPB_BIG_ENDIAN manually if you're on big endian and your compiler * doesn't provide these preprocessor symbols. */ #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) |