From 9cb10577fcefa3ed004e0bbdc61e6238e8137e3c Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 4 Jul 2017 17:02:48 -0700 Subject: First version of a real C codegen for upb. Also includes an implementation of the conformance tests to display what the API usage will be like. There is still a lot to do, and things that are broken (oneofs, repeated fields, etc), but it's a good start. --- tools/make_c_api.lua | 432 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 432 insertions(+) create mode 100644 tools/make_c_api.lua (limited to 'tools/make_c_api.lua') diff --git a/tools/make_c_api.lua b/tools/make_c_api.lua new file mode 100644 index 0000000..aaf5d1e --- /dev/null +++ b/tools/make_c_api.lua @@ -0,0 +1,432 @@ +--[[ + + Code to generate a C API in: + foo.proto -> foo.upb.h + foo.upb.c + + This code is evolving very quickly and so there are lots of little things + that aren't perfect right now. As it settles a little more, the code + quality should improve. + +--]] + +local upb = require "upb" +local dump_cinit = require "dump_cinit" +local export = {} + +local typemap = { + [upb.TYPE_BOOL] = "bool", + [upb.TYPE_FLOAT] = "float", + [upb.TYPE_INT32] = "int32_t", + [upb.TYPE_UINT32] = "uint32_t", + [upb.TYPE_DOUBLE] = "double", + [upb.TYPE_INT64] = "int64_t", + [upb.TYPE_UINT64] = "uint64_t", + [upb.TYPE_STRING] = "upb_stringview", + [upb.TYPE_BYTES] = "upb_stringview", +} + +function strip_proto(filename) + return string.gsub(filename, '%.proto$','') +end + +--[[ + [upb.TYPE_ENUM] = 5, + [upb.TYPE_MESSAGE] = 8, +--]] + +local function join(...) + return table.concat({...}, ".") +end + +local function to_cident(...) + return string.gsub(join(...), "[%./]", "_") +end + +local function to_preproc(...) + return string.upper(to_cident(...)) +end + + +-- Strips away last path element, ie: +-- foo.Bar.Baz -> foo.Bar +local function remove_name(name) + local package_end = 0 + for i=1,string.len(name) do + if string.byte(name, i) == string.byte(".", 1) then + package_end = i - 1 + end + end + return string.sub(name, 1, package_end) +end + +local function dump_enum_vals(enumdef, append) + local enum_vals = {} + + for k, v in enumdef:values() do + enum_vals[#enum_vals + 1] = {k, v} + end + + table.sort(enum_vals, function(a, b) return a[2] < b[2] end) + + -- protobuf convention is that enum values are scoped at the level of the + -- enum itself, to follow C++. Ie, if you have the enum: + -- message Foo { + -- enum E { + -- VAL1 = 1; + -- VAL2 = 2; + -- } + -- } + -- + -- The name of VAL1 is Foo.VAL1, not Foo.E.VAL1. + -- + -- This seems a bit sketchy, but people often name their enum values + -- accordingly, ie: + -- + -- enum Foo { + -- FOO_VAL1 = 1; + -- FOO_VAL2 = 2; + -- } + -- + -- So if we don't respect this also, we end up with constants that look like: + -- + -- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1 + -- + -- (notice the duplicated "TYPE"). + local cident = to_cident(remove_name(enumdef:full_name())) + for i, pair in ipairs(enum_vals) do + k, v = pair[1], pair[2] + append(' %s = %d', cident .. "_" .. k, v) + if i == #enum_vals then + append('\n') + else + append(',\n') + end + end +end + +local function ctype(field) + if field:label() == upb.LABEL_REPEATED then + return "upb_array*" + elseif field:type() == upb.TYPE_MESSAGE then + if field:containing_type():file() == field:subdef():file() then + return to_cident(field:subdef():full_name()) .. "*" + else + return "struct " .. to_cident(field:subdef():full_name()) .. "*" + end + elseif field:type() == upb.TYPE_ENUM then + return to_cident(field:subdef():full_name()) + else + return typemap[field:type()] or "void*" + end +end + +local function emit_file_warning(filedef, append) + append('/* This file was generated by upbc (the upb compiler) from the input\n') + append(' * file:\n') + append(' *\n') + append(' * %s\n', filedef:name()) + append(' *\n') + append(' * Do not edit -- your changes will be discarded when the file is\n') + append(' * regenerated. */\n\n') +end + +local function field_layout_rank(field) + -- Order: + -- 1, 2, 3. primitive fields (8, 4, 1 byte) + -- 4. oneof fields + -- 5. string fields + -- 6. submessage fields + -- 7. repeated fields + local rank + if field:containing_oneof() then + rank = 4 + elseif field:label() == upb.LABEL_REPEATED then + rank = 7 + elseif field:type() == upb.TYPE_MESSAGE then + rank = 6 + elseif field:type() == upb.TYPE_STRING or field:type() == upb.TYPE_BYTES then + rank = 5 + elseif field:type() == upb.TYPE_BOOL then + rank = 3 + elseif field:type() == upb.TYPE_FLOAT or + field:type() == upb.TYPE_INT32 or + field:type() == upb.TYPE_UINT32 then + rank = 2 + else + rank = 1 + end + + -- Break ties with field number. + return (rank * 2^29) + field:number() +end + +local function has_hasbit(field) + if field:containing_type():file():syntax() == upb.SYNTAX_PROTO2 then + return field:label() ~= upb.LABEL_REPEATED and not field:containing_oneof() + else + return false + end +end + +local function write_h_file(filedef, append) + emit_file_warning(filedef, append) + local basename_preproc = to_preproc(filedef:name()) + append('#ifndef %s_UPB_H_\n', basename_preproc) + append('#define %s_UPB_H_\n\n', basename_preproc) + + append('#include "upb/msg.h"\n\n') + + append('UPB_BEGIN_EXTERN_C\n\n') + + for msg in filedef:defs(upb.DEF_MSG) do + -- TODO(haberman): forward declare C++ type names so we can use + -- UPB_DECLARE_TYPE(). + local msgname = to_cident(msg:full_name()) + append('struct %s;\n', msgname) + append('typedef struct %s %s;\n', msgname, msgname) + end + + append("/* Enums */\n\n") + for _, def in ipairs(sorted_defs(filedef:defs(upb.DEF_ENUM))) do + local cident = to_cident(def:full_name()) + append('typedef enum {\n') + dump_enum_vals(def, append) + append('} %s;\n\n', cident) + end + + for msg in filedef:defs(upb.DEF_MSG) do + local msgname = to_cident(msg:full_name()) + append('/* %s message definition. */\n', msgname) + append('extern const upb_msglayout_msginit_v1 %s_msginit;\n', msgname) + append('%s *%s_new(upb_env *env);\n', msgname, msgname) + append('%s *%s_parsenew(upb_stringview buf, upb_env *env);\n', + msgname, msgname) + append('char *%s_serialize(%s *msg, upb_env *env, size_t *len);\n', + msgname, msgname) + append('void %s_free(%s *msg, upb_env *env);\n', msgname, msgname) + append('\n') + + append('/* %s getters. */\n', msgname) + local setters, get_setters = dump_cinit.str_appender() + for field in msg:fields() do + local fieldname = to_cident(field:name()) + if field:type() == upb.TYPE_MESSAGE and + field:subdef():file() ~= filedef then + -- Forward declaration for message type declared in another file. + append('struct %s;\n', to_cident(field:subdef():full_name())) + end + if field:label() == upb.LABEL_REPEATED then + else + local typename = ctype(field) + append('%s %s_%s(const %s *msg);\n', + typename, msgname, fieldname, msgname) + setters('void %s_set_%s(%s *msg, %s value);\n', + msgname, fieldname, msgname, typename) + end + end + + for oneof in msg:oneofs() do + local fullname = to_cident(oneof:containing_type():full_name() .. "." .. oneof:name()) + append('typedef enum {\n') + for field in oneof:fields() do + append(' %s = %d,\n', fullname .. "_" .. field:name(), field:number()) + end + append(' %s_NOT_SET = 0,\n', fullname) + append('} %s_oneofcases;\n', fullname) + append('%s_oneofcases %s_case(const %s *msg);\n', fullname, fullname, msgname) + end + + append('\n') + append('/* %s setters. */\n', msgname) + append(get_setters()) + + append('\n') + append('\n') + end + + append('UPB_END_EXTERN_C') + + append('\n') + append('\n') + + append('#endif /* %s_UPB_H_ */\n', basename_preproc) +end + +local function write_c_file(filedef, hfilename, append) + emit_file_warning(filedef, append) + + append('#include \n') + append('#include "upb/msg.h"\n') + append('#include "upb/upb.h"\n') + append('#include "%s"\n\n', hfilename) + + for dep in filedef:dependencies() do + local outbase = strip_proto(dep:name()) + append('#include "%s.upb.h"\n', outbase) + end + + append('\n') + + for msg in filedef:defs(upb.DEF_MSG) do + local msgname = to_cident(msg:full_name()) + + local fields_array_ref = "NULL" + local submsgs_array_ref = "NULL" + local field_count = 0 + local submsg_count = 0 + local submsg_set = {} + local submsg_indexes = {} + local hasbit_count = 0 + local hasbit_indexes = {} + -- TODO(haberman): oneofs + + -- Create a layout order for fields. We use this order for the struct and + -- for offsets, but our list of fields we keep in field number order. + local fields_layout_order = {} + for field in msg:fields() do + table.insert(fields_layout_order, field) + end + table.sort(fields_layout_order, function(a, b) + return field_layout_rank(a) < field_layout_rank(b) + end) + + -- Another sorted array in field number order. + local fields_number_order = {} + for field in msg:fields() do + table.insert(fields_number_order, field) + end + table.sort(fields_number_order, function(a, b) + return a:number() < b:number() + end) + + append('struct %s {\n', msgname) + for _, field in ipairs(fields_layout_order) do + field_count = field_count + 1 + + if field:type() == upb.TYPE_MESSAGE then + submsg_count = submsg_count + 1 + submsg_set[field:subdef()] = true + end + + if has_hasbit(field) then + hasbit_indexes[field] = hasbit_count + hasbit_count = hasbit_count + 1 + end + + append(' %s %s;\n', ctype(field), field:name()) + end + append('};\n\n') + + if submsg_count > 0 then + -- TODO(haberman): could save a little bit of space by only generating a + -- "submsgs" array for every strongly-connected component. + local submsgs_array_name = msgname .. "_submsgs" + submsgs_array_ref = "&" .. submsgs_array_name .. "[0]" + append('static const upb_msglayout_msginit_v1 *const %s[%s] = {\n', + submsgs_array_name, submsg_count) + + -- Create a deterministically-sorted array of submessage entries. + local submsg_array = {} + for k, v in pairs(submsg_set) do + table.insert(submsg_array, k) + end + table.sort(submsg_array, function(a, b) + return a:full_name() < b:full_name() + end) + + for i, submsg in ipairs(submsg_array) do + append(' &%s_msginit,\n', to_cident(submsg:full_name())) + submsg_indexes[submsg] = i - 1 + end + + append('};\n\n') + end + + if field_count > 0 then + local fields_array_name = msgname .. "__fields" + fields_array_ref = "&" .. fields_array_name .. "[0]" + append('static const upb_msglayout_fieldinit_v1 %s[%s] = {\n', + fields_array_name, field_count) + for _, field in ipairs(fields_number_order) do + local submsg_index = "-1" + local oneof_index = "UPB_NOT_IN_ONEOF" + if field:type() == upb.TYPE_MESSAGE then + submsg_index = submsg_indexes[field:subdef()] + end + -- TODO(haberman): oneofs. + append(' {%s, offsetof(%s, %s), %s, %s, %s, %s, %s},\n', + field:number(), + msgname, + field:name(), + hasbit_indexes[field] or "-1", + oneof_index, + submsg_index, + field:descriptor_type(), + field:label()) + end + append('};\n\n') + end + + append('const upb_msglayout_msginit_v1 %s_msginit = {\n', msgname) + append(' %s,\n', submsgs_array_ref) + append(' %s,\n', fields_array_ref) + append(' NULL, /* TODO. oneofs */\n') + append(' NULL, /* TODO. default_msg */\n') + append(' UPB_ALIGNED_SIZEOF(%s), %s, %s, %s, %s\n', + msgname, field_count, + 0, -- TODO: oneof_count + 'false', -- TODO: extendable + 'true' -- TODO: is_proto2 + ) + append('};\n\n') + + append('%s *%s_new(upb_env *env) {\n', msgname, msgname) + append(' %s *msg = upb_env_malloc(env, sizeof(*msg));\n', + msgname) + append(' memset(msg, 0, sizeof(*msg)); /* TODO: defaults */\n') + append(' return msg;\n') + append('}\n') + + append('%s *%s_parsenew(upb_stringview buf, upb_env *env) {\n', + msgname, msgname) + append(' UPB_UNUSED(buf);\n') + append(' UPB_UNUSED(env);\n') + append(' return NULL;\n') + append('}\n') + + append('char *%s_serialize(%s *msg, upb_env *env, size_t *size) {\n', + msgname, msgname) + append(' UPB_UNUSED(msg);\n') + append(' UPB_UNUSED(env);\n') + append(' UPB_UNUSED(size);\n') + append(' return NULL; /* TODO. */\n') + append('}\n') + + for field in msg:fields() do + local typename = ctype(field) + append('%s %s_%s(const %s *msg) {\n', + typename, msgname, field:name(), msgname); + append(' return msg->%s;\n', field:name()) + append('}\n') + append('void %s_set_%s(%s *msg, %s value) {\n', + msgname, field:name(), msgname, typename); + append(' msg->%s = value;\n', field:name()) + append('}\n') + end + + for oneof in msg:oneofs() do + local fullname = to_cident(oneof:containing_type():full_name() .. "." .. oneof:name()) + append('%s_oneofcases %s_case(const %s *msg) {\n', fullname, fullname, msgname) + append(' return 0; /* TODO. */') + append('}') + end + end +end + +function export.write_gencode(filedef, hfilename, append_h, append_c) + write_h_file(filedef, append_h) + write_c_file(filedef, hfilename, append_c) +end + +return export -- cgit v1.2.3 From 1278ff899409025692d6915421f1af1c99dc837e Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 11 Jul 2017 14:11:54 -0500 Subject: Responded to PR comments. --- tests/conformance_upb.c | 2 +- tools/make_c_api.lua | 11 ++++++----- tools/upbc.lua | 4 ++++ upb/msg.c | 4 ++-- upb/msg.h | 2 +- 5 files changed, 14 insertions(+), 9 deletions(-) (limited to 'tools/make_c_api.lua') diff --git a/tests/conformance_upb.c b/tests/conformance_upb.c index 9f83e80..1671409 100644 --- a/tests/conformance_upb.c +++ b/tests/conformance_upb.c @@ -81,7 +81,7 @@ void DoTest( char *serialized = protobuf_test_messages_proto3_TestAllTypes_serialize( test_message, env, &serialized_len); if (!serialized) { - fprintf(stderr, "conformance_upb: Error serialiing."); + fprintf(stderr, "conformance_upb: Error serializing."); exit(1); } conformance_ConformanceResponse_set_protobuf_payload( diff --git a/tools/make_c_api.lua b/tools/make_c_api.lua index aaf5d1e..c13f37c 100644 --- a/tools/make_c_api.lua +++ b/tools/make_c_api.lua @@ -30,11 +30,6 @@ function strip_proto(filename) return string.gsub(filename, '%.proto$','') end ---[[ - [upb.TYPE_ENUM] = 5, - [upb.TYPE_MESSAGE] = 8, ---]] - local function join(...) return table.concat({...}, ".") end @@ -138,6 +133,12 @@ local function field_layout_rank(field) -- 5. string fields -- 6. submessage fields -- 7. repeated fields + -- + -- This has the following nice properties: + -- + -- 1. padding alignment is (nearly) minimized. + -- 2. fields that might have defaults (1-5) are segregated + -- from fields that are always zero-initialized (6-7). local rank if field:containing_oneof() then rank = 4 diff --git a/tools/upbc.lua b/tools/upbc.lua index bf9a68d..bfd93f8 100644 --- a/tools/upbc.lua +++ b/tools/upbc.lua @@ -18,6 +18,10 @@ for _, argument in ipairs(arg) do if argument == "--generate-upbdefs" then generate_upbdefs = true else + if src then + print("upbc can only handle one input file at a time.") + return 1 + end src = argument end end diff --git a/upb/msg.c b/upb/msg.c index 34525b7..f1070d7 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -340,8 +340,8 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { } /* Size of the entire structure should be a multiple of its greatest - * alignment. */ - l->data.size = align_up(l->data.size, 8 /* TODO: track for real? */); + * alignment. TODO: track overall alignment for real? */ + l->data.size = align_up(l->data.size, 8); if (upb_msglayout_initdefault(l, m)) { return l; diff --git a/upb/msg.h b/upb/msg.h index ee1e2fb..7026a50 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -408,7 +408,7 @@ typedef struct upb_msglayout_msginit_v1 { const upb_msglayout_oneofinit_v1 *oneofs; void *default_msg; /* Must be aligned to sizeof(void*). Doesn't include internal members like - * unknown * fields, extension dict, pointer to msglayout, etc. */ + * unknown fields, extension dict, pointer to msglayout, etc. */ uint32_t size; uint16_t field_count; uint16_t oneof_count; -- cgit v1.2.3