summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorJoshua Haberman <jhaberman@gmail.com>2017-07-04 17:02:48 -0700
committerJoshua Haberman <jhaberman@gmail.com>2017-07-04 17:02:48 -0700
commit9cb10577fcefa3ed004e0bbdc61e6238e8137e3c (patch)
treee270ff7b0f782dadf2942f6816b071aa2a134e21 /tools
parent76fcdd2ee92e8f7852f96ccd49fe776236ae4e60 (diff)
First version of a real C codegen for upb.
Also includes an implementation of the conformance tests to display what the API usage will be like. There is still a lot to do, and things that are broken (oneofs, repeated fields, etc), but it's a good start.
Diffstat (limited to 'tools')
-rw-r--r--tools/make_c_api.lua432
-rw-r--r--tools/upbc.lua55
2 files changed, 478 insertions, 9 deletions
diff --git a/tools/make_c_api.lua b/tools/make_c_api.lua
new file mode 100644
index 0000000..aaf5d1e
--- /dev/null
+++ b/tools/make_c_api.lua
@@ -0,0 +1,432 @@
+--[[
+
+ Code to generate a C API in:
+ foo.proto -> foo.upb.h
+ foo.upb.c
+
+ This code is evolving very quickly and so there are lots of little things
+ that aren't perfect right now. As it settles a little more, the code
+ quality should improve.
+
+--]]
+
+local upb = require "upb"
+local dump_cinit = require "dump_cinit"
+local export = {}
+
+local typemap = {
+ [upb.TYPE_BOOL] = "bool",
+ [upb.TYPE_FLOAT] = "float",
+ [upb.TYPE_INT32] = "int32_t",
+ [upb.TYPE_UINT32] = "uint32_t",
+ [upb.TYPE_DOUBLE] = "double",
+ [upb.TYPE_INT64] = "int64_t",
+ [upb.TYPE_UINT64] = "uint64_t",
+ [upb.TYPE_STRING] = "upb_stringview",
+ [upb.TYPE_BYTES] = "upb_stringview",
+}
+
+function strip_proto(filename)
+ return string.gsub(filename, '%.proto$','')
+end
+
+--[[
+ [upb.TYPE_ENUM] = 5,
+ [upb.TYPE_MESSAGE] = 8,
+--]]
+
+local function join(...)
+ return table.concat({...}, ".")
+end
+
+local function to_cident(...)
+ return string.gsub(join(...), "[%./]", "_")
+end
+
+local function to_preproc(...)
+ return string.upper(to_cident(...))
+end
+
+
+-- Strips away last path element, ie:
+-- foo.Bar.Baz -> foo.Bar
+local function remove_name(name)
+ local package_end = 0
+ for i=1,string.len(name) do
+ if string.byte(name, i) == string.byte(".", 1) then
+ package_end = i - 1
+ end
+ end
+ return string.sub(name, 1, package_end)
+end
+
+local function dump_enum_vals(enumdef, append)
+ local enum_vals = {}
+
+ for k, v in enumdef:values() do
+ enum_vals[#enum_vals + 1] = {k, v}
+ end
+
+ table.sort(enum_vals, function(a, b) return a[2] < b[2] end)
+
+ -- protobuf convention is that enum values are scoped at the level of the
+ -- enum itself, to follow C++. Ie, if you have the enum:
+ -- message Foo {
+ -- enum E {
+ -- VAL1 = 1;
+ -- VAL2 = 2;
+ -- }
+ -- }
+ --
+ -- The name of VAL1 is Foo.VAL1, not Foo.E.VAL1.
+ --
+ -- This seems a bit sketchy, but people often name their enum values
+ -- accordingly, ie:
+ --
+ -- enum Foo {
+ -- FOO_VAL1 = 1;
+ -- FOO_VAL2 = 2;
+ -- }
+ --
+ -- So if we don't respect this also, we end up with constants that look like:
+ --
+ -- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1
+ --
+ -- (notice the duplicated "TYPE").
+ local cident = to_cident(remove_name(enumdef:full_name()))
+ for i, pair in ipairs(enum_vals) do
+ k, v = pair[1], pair[2]
+ append(' %s = %d', cident .. "_" .. k, v)
+ if i == #enum_vals then
+ append('\n')
+ else
+ append(',\n')
+ end
+ end
+end
+
+local function ctype(field)
+ if field:label() == upb.LABEL_REPEATED then
+ return "upb_array*"
+ elseif field:type() == upb.TYPE_MESSAGE then
+ if field:containing_type():file() == field:subdef():file() then
+ return to_cident(field:subdef():full_name()) .. "*"
+ else
+ return "struct " .. to_cident(field:subdef():full_name()) .. "*"
+ end
+ elseif field:type() == upb.TYPE_ENUM then
+ return to_cident(field:subdef():full_name())
+ else
+ return typemap[field:type()] or "void*"
+ end
+end
+
+local function emit_file_warning(filedef, append)
+ append('/* This file was generated by upbc (the upb compiler) from the input\n')
+ append(' * file:\n')
+ append(' *\n')
+ append(' * %s\n', filedef:name())
+ append(' *\n')
+ append(' * Do not edit -- your changes will be discarded when the file is\n')
+ append(' * regenerated. */\n\n')
+end
+
+local function field_layout_rank(field)
+ -- Order:
+ -- 1, 2, 3. primitive fields (8, 4, 1 byte)
+ -- 4. oneof fields
+ -- 5. string fields
+ -- 6. submessage fields
+ -- 7. repeated fields
+ local rank
+ if field:containing_oneof() then
+ rank = 4
+ elseif field:label() == upb.LABEL_REPEATED then
+ rank = 7
+ elseif field:type() == upb.TYPE_MESSAGE then
+ rank = 6
+ elseif field:type() == upb.TYPE_STRING or field:type() == upb.TYPE_BYTES then
+ rank = 5
+ elseif field:type() == upb.TYPE_BOOL then
+ rank = 3
+ elseif field:type() == upb.TYPE_FLOAT or
+ field:type() == upb.TYPE_INT32 or
+ field:type() == upb.TYPE_UINT32 then
+ rank = 2
+ else
+ rank = 1
+ end
+
+ -- Break ties with field number.
+ return (rank * 2^29) + field:number()
+end
+
+local function has_hasbit(field)
+ if field:containing_type():file():syntax() == upb.SYNTAX_PROTO2 then
+ return field:label() ~= upb.LABEL_REPEATED and not field:containing_oneof()
+ else
+ return false
+ end
+end
+
+local function write_h_file(filedef, append)
+ emit_file_warning(filedef, append)
+ local basename_preproc = to_preproc(filedef:name())
+ append('#ifndef %s_UPB_H_\n', basename_preproc)
+ append('#define %s_UPB_H_\n\n', basename_preproc)
+
+ append('#include "upb/msg.h"\n\n')
+
+ append('UPB_BEGIN_EXTERN_C\n\n')
+
+ for msg in filedef:defs(upb.DEF_MSG) do
+ -- TODO(haberman): forward declare C++ type names so we can use
+ -- UPB_DECLARE_TYPE().
+ local msgname = to_cident(msg:full_name())
+ append('struct %s;\n', msgname)
+ append('typedef struct %s %s;\n', msgname, msgname)
+ end
+
+ append("/* Enums */\n\n")
+ for _, def in ipairs(sorted_defs(filedef:defs(upb.DEF_ENUM))) do
+ local cident = to_cident(def:full_name())
+ append('typedef enum {\n')
+ dump_enum_vals(def, append)
+ append('} %s;\n\n', cident)
+ end
+
+ for msg in filedef:defs(upb.DEF_MSG) do
+ local msgname = to_cident(msg:full_name())
+ append('/* %s message definition. */\n', msgname)
+ append('extern const upb_msglayout_msginit_v1 %s_msginit;\n', msgname)
+ append('%s *%s_new(upb_env *env);\n', msgname, msgname)
+ append('%s *%s_parsenew(upb_stringview buf, upb_env *env);\n',
+ msgname, msgname)
+ append('char *%s_serialize(%s *msg, upb_env *env, size_t *len);\n',
+ msgname, msgname)
+ append('void %s_free(%s *msg, upb_env *env);\n', msgname, msgname)
+ append('\n')
+
+ append('/* %s getters. */\n', msgname)
+ local setters, get_setters = dump_cinit.str_appender()
+ for field in msg:fields() do
+ local fieldname = to_cident(field:name())
+ if field:type() == upb.TYPE_MESSAGE and
+ field:subdef():file() ~= filedef then
+ -- Forward declaration for message type declared in another file.
+ append('struct %s;\n', to_cident(field:subdef():full_name()))
+ end
+ if field:label() == upb.LABEL_REPEATED then
+ else
+ local typename = ctype(field)
+ append('%s %s_%s(const %s *msg);\n',
+ typename, msgname, fieldname, msgname)
+ setters('void %s_set_%s(%s *msg, %s value);\n',
+ msgname, fieldname, msgname, typename)
+ end
+ end
+
+ for oneof in msg:oneofs() do
+ local fullname = to_cident(oneof:containing_type():full_name() .. "." .. oneof:name())
+ append('typedef enum {\n')
+ for field in oneof:fields() do
+ append(' %s = %d,\n', fullname .. "_" .. field:name(), field:number())
+ end
+ append(' %s_NOT_SET = 0,\n', fullname)
+ append('} %s_oneofcases;\n', fullname)
+ append('%s_oneofcases %s_case(const %s *msg);\n', fullname, fullname, msgname)
+ end
+
+ append('\n')
+ append('/* %s setters. */\n', msgname)
+ append(get_setters())
+
+ append('\n')
+ append('\n')
+ end
+
+ append('UPB_END_EXTERN_C')
+
+ append('\n')
+ append('\n')
+
+ append('#endif /* %s_UPB_H_ */\n', basename_preproc)
+end
+
+local function write_c_file(filedef, hfilename, append)
+ emit_file_warning(filedef, append)
+
+ append('#include <stddef.h>\n')
+ append('#include "upb/msg.h"\n')
+ append('#include "upb/upb.h"\n')
+ append('#include "%s"\n\n', hfilename)
+
+ for dep in filedef:dependencies() do
+ local outbase = strip_proto(dep:name())
+ append('#include "%s.upb.h"\n', outbase)
+ end
+
+ append('\n')
+
+ for msg in filedef:defs(upb.DEF_MSG) do
+ local msgname = to_cident(msg:full_name())
+
+ local fields_array_ref = "NULL"
+ local submsgs_array_ref = "NULL"
+ local field_count = 0
+ local submsg_count = 0
+ local submsg_set = {}
+ local submsg_indexes = {}
+ local hasbit_count = 0
+ local hasbit_indexes = {}
+ -- TODO(haberman): oneofs
+
+ -- Create a layout order for fields. We use this order for the struct and
+ -- for offsets, but our list of fields we keep in field number order.
+ local fields_layout_order = {}
+ for field in msg:fields() do
+ table.insert(fields_layout_order, field)
+ end
+ table.sort(fields_layout_order, function(a, b)
+ return field_layout_rank(a) < field_layout_rank(b)
+ end)
+
+ -- Another sorted array in field number order.
+ local fields_number_order = {}
+ for field in msg:fields() do
+ table.insert(fields_number_order, field)
+ end
+ table.sort(fields_number_order, function(a, b)
+ return a:number() < b:number()
+ end)
+
+ append('struct %s {\n', msgname)
+ for _, field in ipairs(fields_layout_order) do
+ field_count = field_count + 1
+
+ if field:type() == upb.TYPE_MESSAGE then
+ submsg_count = submsg_count + 1
+ submsg_set[field:subdef()] = true
+ end
+
+ if has_hasbit(field) then
+ hasbit_indexes[field] = hasbit_count
+ hasbit_count = hasbit_count + 1
+ end
+
+ append(' %s %s;\n', ctype(field), field:name())
+ end
+ append('};\n\n')
+
+ if submsg_count > 0 then
+ -- TODO(haberman): could save a little bit of space by only generating a
+ -- "submsgs" array for every strongly-connected component.
+ local submsgs_array_name = msgname .. "_submsgs"
+ submsgs_array_ref = "&" .. submsgs_array_name .. "[0]"
+ append('static const upb_msglayout_msginit_v1 *const %s[%s] = {\n',
+ submsgs_array_name, submsg_count)
+
+ -- Create a deterministically-sorted array of submessage entries.
+ local submsg_array = {}
+ for k, v in pairs(submsg_set) do
+ table.insert(submsg_array, k)
+ end
+ table.sort(submsg_array, function(a, b)
+ return a:full_name() < b:full_name()
+ end)
+
+ for i, submsg in ipairs(submsg_array) do
+ append(' &%s_msginit,\n', to_cident(submsg:full_name()))
+ submsg_indexes[submsg] = i - 1
+ end
+
+ append('};\n\n')
+ end
+
+ if field_count > 0 then
+ local fields_array_name = msgname .. "__fields"
+ fields_array_ref = "&" .. fields_array_name .. "[0]"
+ append('static const upb_msglayout_fieldinit_v1 %s[%s] = {\n',
+ fields_array_name, field_count)
+ for _, field in ipairs(fields_number_order) do
+ local submsg_index = "-1"
+ local oneof_index = "UPB_NOT_IN_ONEOF"
+ if field:type() == upb.TYPE_MESSAGE then
+ submsg_index = submsg_indexes[field:subdef()]
+ end
+ -- TODO(haberman): oneofs.
+ append(' {%s, offsetof(%s, %s), %s, %s, %s, %s, %s},\n',
+ field:number(),
+ msgname,
+ field:name(),
+ hasbit_indexes[field] or "-1",
+ oneof_index,
+ submsg_index,
+ field:descriptor_type(),
+ field:label())
+ end
+ append('};\n\n')
+ end
+
+ append('const upb_msglayout_msginit_v1 %s_msginit = {\n', msgname)
+ append(' %s,\n', submsgs_array_ref)
+ append(' %s,\n', fields_array_ref)
+ append(' NULL, /* TODO. oneofs */\n')
+ append(' NULL, /* TODO. default_msg */\n')
+ append(' UPB_ALIGNED_SIZEOF(%s), %s, %s, %s, %s\n',
+ msgname, field_count,
+ 0, -- TODO: oneof_count
+ 'false', -- TODO: extendable
+ 'true' -- TODO: is_proto2
+ )
+ append('};\n\n')
+
+ append('%s *%s_new(upb_env *env) {\n', msgname, msgname)
+ append(' %s *msg = upb_env_malloc(env, sizeof(*msg));\n',
+ msgname)
+ append(' memset(msg, 0, sizeof(*msg)); /* TODO: defaults */\n')
+ append(' return msg;\n')
+ append('}\n')
+
+ append('%s *%s_parsenew(upb_stringview buf, upb_env *env) {\n',
+ msgname, msgname)
+ append(' UPB_UNUSED(buf);\n')
+ append(' UPB_UNUSED(env);\n')
+ append(' return NULL;\n')
+ append('}\n')
+
+ append('char *%s_serialize(%s *msg, upb_env *env, size_t *size) {\n',
+ msgname, msgname)
+ append(' UPB_UNUSED(msg);\n')
+ append(' UPB_UNUSED(env);\n')
+ append(' UPB_UNUSED(size);\n')
+ append(' return NULL; /* TODO. */\n')
+ append('}\n')
+
+ for field in msg:fields() do
+ local typename = ctype(field)
+ append('%s %s_%s(const %s *msg) {\n',
+ typename, msgname, field:name(), msgname);
+ append(' return msg->%s;\n', field:name())
+ append('}\n')
+ append('void %s_set_%s(%s *msg, %s value) {\n',
+ msgname, field:name(), msgname, typename);
+ append(' msg->%s = value;\n', field:name())
+ append('}\n')
+ end
+
+ for oneof in msg:oneofs() do
+ local fullname = to_cident(oneof:containing_type():full_name() .. "." .. oneof:name())
+ append('%s_oneofcases %s_case(const %s *msg) {\n', fullname, fullname, msgname)
+ append(' return 0; /* TODO. */')
+ append('}')
+ end
+ end
+end
+
+function export.write_gencode(filedef, hfilename, append_h, append_c)
+ write_h_file(filedef, append_h)
+ write_c_file(filedef, hfilename, append_c)
+end
+
+return export
diff --git a/tools/upbc.lua b/tools/upbc.lua
index 5db5fba..bf9a68d 100644
--- a/tools/upbc.lua
+++ b/tools/upbc.lua
@@ -1,20 +1,29 @@
--[[
- The upb compiler. Unlike the proto2 compiler, this does
- not output any parsing code or generated classes or anything
- specific to the protobuf binary format at all. At the moment
- it only dumps C initializers for upb_defs, so that a .proto
- file can be represented in a .o file.
+ The upb compiler. It can write two different kinds of output
+ files:
+
+ - generated code for a C API (foo.upb.h, foo.upb.c)
+ - (obsolete): definitions of upb defs. (foo.upbdefs.h, foo.upbdefs.c)
--]]
local dump_cinit = require "dump_cinit"
+local make_c_api = require "make_c_api"
local upb = require "upb"
-local src = arg[1]
+local generate_upbdefs = false
+
+for _, argument in ipairs(arg) do
+ if argument == "--generate-upbdefs" then
+ generate_upbdefs = true
+ else
+ src = argument
+ end
+end
if not src then
- print("Usage: upbc <binary descriptor>")
+ print("Usage: upbc [--generate-upbdefs] <binary descriptor>")
return 1
end
@@ -32,6 +41,8 @@ for _, file in ipairs(files) do
symtab:add_file(file)
local outbase = strip_proto(file:name())
+ -- Write upbdefs.
+
local hfilename = outbase .. ".upbdefs.h"
local cfilename = outbase .. ".upbdefs.c"
@@ -44,14 +55,40 @@ for _, file in ipairs(files) do
end
os.execute(string.format("mkdir -p `dirname %s`", outbase))
+
+ if generate_upbdefs then
+ -- Legacy generated defs.
+ local hfile = assert(io.open(hfilename, "w"), "couldn't open " .. hfilename)
+ local cfile = assert(io.open(cfilename, "w"), "couldn't open " .. cfilename)
+
+ local happend = dump_cinit.file_appender(hfile)
+ local cappend = dump_cinit.file_appender(cfile)
+
+ dump_cinit.dump_defs(file, happend, cappend)
+
+ hfile:close()
+ cfile:close()
+ end
+
+ -- Write C API.
+ hfilename = outbase .. ".upb.h"
+ cfilename = outbase .. ".upb.c"
+
+ if os.getenv("UPBC_VERBOSE") then
+ print("upbc:")
+ print(string.format(" source file=%s", src))
+ print(string.format(" output file base=%s", outbase))
+ print(string.format(" hfilename=%s", hfilename))
+ print(string.format(" cfilename=%s", cfilename))
+ end
+
local hfile = assert(io.open(hfilename, "w"), "couldn't open " .. hfilename)
local cfile = assert(io.open(cfilename, "w"), "couldn't open " .. cfilename)
local happend = dump_cinit.file_appender(hfile)
local cappend = dump_cinit.file_appender(cfile)
- -- Dump defs
- dump_cinit.dump_defs(file, happend, cappend)
+ make_c_api.write_gencode(file, hfilename, happend, cappend)
hfile:close()
cfile:close()
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback