summaryrefslogtreecommitdiff
path: root/upbc
diff options
context:
space:
mode:
authorJosh Haberman <jhaberman@gmail.com>2018-11-12 21:21:20 -0800
committerJosh Haberman <jhaberman@gmail.com>2018-11-12 21:21:20 -0800
commit5f575995b56f2ae97d3b9f5dc1efef233def7419 (patch)
tree6d3322991947b4847503435a7867fac13b59d5b4 /upbc
parentd4e78f9fdff9ad14a0e8e3ad5d7d8379ee2124ef (diff)
Added upb compiler written in C++ as a normal protoc plugin.
Diffstat (limited to 'upbc')
-rw-r--r--upbc/generator.cc540
-rw-r--r--upbc/generator.h12
-rw-r--r--upbc/main.cc9
-rw-r--r--upbc/message_layout.cc173
-rw-r--r--upbc/message_layout.h104
5 files changed, 838 insertions, 0 deletions
diff --git a/upbc/generator.cc b/upbc/generator.cc
new file mode 100644
index 0000000..56e6276
--- /dev/null
+++ b/upbc/generator.cc
@@ -0,0 +1,540 @@
+
+#include <unordered_map>
+#include <unordered_set>
+#include <memory>
+
+#include "absl/strings/ascii.h"
+#include "absl/strings/str_replace.h"
+#include "absl/strings/substitute.h"
+#include "google/protobuf/compiler/code_generator.h"
+#include "google/protobuf/descriptor.h"
+#include "google/protobuf/io/zero_copy_stream.h"
+
+#include "upbc/generator.h"
+#include "upbc/message_layout.h"
+
+namespace protoc = ::google::protobuf::compiler;
+namespace protobuf = ::google::protobuf;
+
+static std::string StripExtension(absl::string_view fname) {
+ size_t lastdot = fname.find_last_of(".");
+ if (lastdot == std::string::npos) {
+ return std::string(fname);
+ }
+ return std::string(fname.substr(0, lastdot));
+}
+
+static std::string HeaderFilename(std::string proto_filename) {
+ return StripExtension(proto_filename) + ".upb.h";
+}
+
+static std::string SourceFilename(std::string proto_filename) {
+ return StripExtension(proto_filename) + ".upb.c";
+}
+
+class Output {
+ public:
+ Output(protobuf::io::ZeroCopyOutputStream* stream) : stream_(stream) {}
+ ~Output() { stream_->BackUp(size_); }
+
+ template <class... Arg>
+ void operator()(absl::string_view format, const Arg&... arg) {
+ Write(absl::Substitute(format, arg...));
+ }
+
+ private:
+ void Write(absl::string_view data) {
+ while (!data.empty()) {
+ RefreshOutput();
+ size_t to_write = std::min(data.size(), size_);
+ memcpy(ptr_, data.data(), to_write);
+ data.remove_prefix(to_write);
+ ptr_ += to_write;
+ size_ -= to_write;
+ }
+ }
+
+ void RefreshOutput() {
+ while (size_ == 0) {
+ void *ptr;
+ int size;
+ if (!stream_->Next(&ptr, &size)) {
+ fprintf(stderr, "upbc: Failed to write to to output\n");
+ abort();
+ }
+ ptr_ = static_cast<char*>(ptr);
+ size_ = size;
+ }
+ }
+
+ protobuf::io::ZeroCopyOutputStream* stream_;
+ char *ptr_ = nullptr;
+ size_t size_ = 0;
+};
+
+namespace upbc {
+
+class Generator : public protoc::CodeGenerator {
+ ~Generator() override {}
+ bool Generate(const protobuf::FileDescriptor* file,
+ const std::string& parameter, protoc::GeneratorContext* context,
+ std::string* error) const override;
+
+};
+
+void AddMessages(const protobuf::Descriptor* message,
+ std::vector<const protobuf::Descriptor*>* messages) {
+ messages->push_back(message);
+ for (int i = 0; i < message->nested_type_count(); i++) {
+ AddMessages(message->nested_type(i), messages);
+ }
+}
+
+void AddEnums(const protobuf::Descriptor* message,
+ std::vector<const protobuf::EnumDescriptor*>* enums) {
+ for (int i = 0; i < message->enum_type_count(); i++) {
+ enums->push_back(message->enum_type(i));
+ }
+ for (int i = 0; i < message->nested_type_count(); i++) {
+ AddEnums(message->nested_type(i), enums);
+ }
+}
+
+template <class T>
+void SortDefs(std::vector<T>* defs) {
+ std::sort(defs->begin(), defs->end(),
+ [](T a, T b) { return a->full_name() < b->full_name(); });
+}
+
+std::vector<const protobuf::Descriptor*> SortedMessages(
+ const protobuf::FileDescriptor* file) {
+ std::vector<const protobuf::Descriptor*> messages;
+ for (int i = 0; i < file->message_type_count(); i++) {
+ AddMessages(file->message_type(i), &messages);
+ }
+ //SortDefs(&messages);
+ return messages;
+}
+
+std::vector<const protobuf::EnumDescriptor*> SortedEnums(
+ const protobuf::FileDescriptor* file) {
+ std::vector<const protobuf::EnumDescriptor*> enums;
+ for (int i = 0; i < file->enum_type_count(); i++) {
+ enums.push_back(file->enum_type(i));
+ }
+ for (int i = 0; i < file->message_type_count(); i++) {
+ AddEnums(file->message_type(i), &enums);
+ }
+ SortDefs(&enums);
+ return enums;
+}
+
+std::vector<const protobuf::FieldDescriptor*> FieldNumberOrder(
+ const protobuf::Descriptor* message) {
+ std::vector<const protobuf::FieldDescriptor*> messages;
+ for (int i = 0; i < message->field_count(); i++) {
+ messages.push_back(message->field(i));
+ }
+ std::sort(messages.begin(), messages.end(),
+ [](const protobuf::FieldDescriptor* a,
+ const protobuf::FieldDescriptor* b) {
+ return a->number() < b->number();
+ });
+ return messages;
+}
+
+std::vector<const protobuf::FieldDescriptor*> SortedSubmessages(
+ const protobuf::Descriptor* message) {
+ std::vector<const protobuf::FieldDescriptor*> ret;
+ for (int i = 0; i < message->field_count(); i++) {
+ if (message->field(i)->cpp_type() ==
+ protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
+ ret.push_back(message->field(i));
+ }
+ }
+ std::sort(ret.begin(), ret.end(),
+ [](const protobuf::FieldDescriptor* a,
+ const protobuf::FieldDescriptor* b) {
+ return a->message_type()->full_name() <
+ b->message_type()->full_name();
+ });
+ return ret;
+}
+
+std::string ToCIdent(absl::string_view str) {
+ return absl::StrReplaceAll(str, {{".", "_"}, {"/", "_"}});
+}
+
+std::string ToPreproc(absl::string_view str) {
+ return absl::AsciiStrToUpper(ToCIdent(str));
+}
+
+std::string EnumValueSymbol(const protobuf::EnumValueDescriptor* value) {
+ return ToCIdent(value->full_name());
+}
+
+std::string GetSizeInit(const MessageLayout::Size& size) {
+ return absl::Substitute("UPB_SIZE($0, $1)", size.size32, size.size64);
+}
+
+std::string CTypeInternal(const protobuf::FieldDescriptor* field,
+ bool is_const) {
+ std::string maybe_const = is_const ? "const " : "";
+ if (field->label() == protobuf::FieldDescriptor::LABEL_REPEATED) {
+ return maybe_const + "upb_array*";
+ }
+
+ switch (field->cpp_type()) {
+ case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: {
+ std::string maybe_struct =
+ field->file() != field->message_type()->file() ? "struct " : "";
+ return maybe_const + maybe_struct +
+ ToCIdent(field->message_type()->full_name()) + "*";
+ }
+ case protobuf::FieldDescriptor::CPPTYPE_ENUM:
+ return ToCIdent(field->enum_type()->full_name());
+ case protobuf::FieldDescriptor::CPPTYPE_BOOL:
+ return "bool";
+ case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
+ return "float";
+ case protobuf::FieldDescriptor::CPPTYPE_INT32:
+ return "int32_t";
+ case protobuf::FieldDescriptor::CPPTYPE_UINT32:
+ return "uint32_t";
+ case protobuf::FieldDescriptor::CPPTYPE_DOUBLE:
+ return "double";
+ case protobuf::FieldDescriptor::CPPTYPE_INT64:
+ return "int64_t";
+ case protobuf::FieldDescriptor::CPPTYPE_UINT64:
+ return "uint64_t";
+ case protobuf::FieldDescriptor::CPPTYPE_STRING:
+ return "upb_stringview";
+ default:
+ fprintf(stderr, "Unexpected type");
+ abort();
+ }
+}
+
+std::string FieldDefault(const protobuf::FieldDescriptor* field) {
+ switch (field->cpp_type()) {
+ case protobuf::FieldDescriptor::CPPTYPE_MESSAGE:
+ return "NULL";
+ case protobuf::FieldDescriptor::CPPTYPE_STRING:
+ return absl::Substitute("upb_stringview_make(\"$0\", strlen(\"$0\"))",
+ absl::CEscape(field->default_value_string()));
+ case protobuf::FieldDescriptor::CPPTYPE_INT32:
+ return absl::StrCat(field->default_value_int32());
+ case protobuf::FieldDescriptor::CPPTYPE_INT64:
+ return absl::StrCat(field->default_value_int64());
+ case protobuf::FieldDescriptor::CPPTYPE_UINT32:
+ return absl::StrCat(field->default_value_uint32());
+ case protobuf::FieldDescriptor::CPPTYPE_UINT64:
+ return absl::StrCat(field->default_value_uint64());
+ case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
+ return absl::StrCat(field->default_value_float());
+ case protobuf::FieldDescriptor::CPPTYPE_DOUBLE:
+ return absl::StrCat(field->default_value_double());
+ case protobuf::FieldDescriptor::CPPTYPE_BOOL:
+ return field->default_value_bool() ? "true" : "false";
+ case protobuf::FieldDescriptor::CPPTYPE_ENUM:
+ return EnumValueSymbol(field->default_value_enum());
+ }
+ ABSL_ASSERT(false);
+ return "XXX";
+}
+
+std::string CType(const protobuf::FieldDescriptor* field) {
+ return CTypeInternal(field, false);
+}
+
+std::string CTypeConst(const protobuf::FieldDescriptor* field) {
+ return CTypeInternal(field, true);
+}
+
+void DumpEnumValues(const protobuf::EnumDescriptor* desc, Output& output) {
+ std::vector<const protobuf::EnumValueDescriptor*> values;
+ for (int i = 0; i < desc->value_count(); i++) {
+ values.push_back(desc->value(i));
+ }
+ std::sort(values.begin(), values.end(),
+ [](const protobuf::EnumValueDescriptor* a,
+ const protobuf::EnumValueDescriptor* b) {
+ return a->number() < b->number();
+ });
+
+ for (size_t i = 0; i < values.size(); i++) {
+ auto value = values[i];
+ output(" $0 = $1", EnumValueSymbol(value), value->number());
+ if (i != values.size() - 1) {
+ output(",");
+ }
+ output("\n");
+ }
+}
+
+void EmitFileWarning(const protobuf::FileDescriptor* file, Output& output) {
+ output(
+ "/* This file was generated by upbc (the upb compiler) from the input\n"
+ " * file:\n"
+ " *\n"
+ " * $0\n"
+ " *\n"
+ " * Do not edit -- your changes will be discarded when the file is\n"
+ " * regenerated. */\n\n",
+ file->name());
+}
+
+void GenerateMessageInHeader(const protobuf::Descriptor* message, Output& output) {
+ MessageLayout layout(message);
+
+ output("/* $0 */\n\n", message->full_name());
+ std::string msgname = ToCIdent(message->full_name());
+ output(
+ "extern const upb_msglayout $0_msginit;\n"
+ "UPB_INLINE $0 *$0_new(upb_arena *arena) {\n"
+ " return upb_msg_new(&$0_msginit, arena);\n"
+ "}\n"
+ "UPB_INLINE $0 *$0_parsenew(upb_stringview buf, upb_arena *arena) {\n"
+ " $0 *ret = $0_new(arena);\n"
+ " return (ret && upb_decode(buf, ret, &$0_msginit)) ? ret : NULL;\n"
+ "}\n"
+ "UPB_INLINE char *$0_serialize(const $0 *msg, upb_arena *arena, size_t "
+ "*len) {\n"
+ " return upb_encode(msg, &$0_msginit, arena, len);\n"
+ "}\n"
+ "\n",
+ msgname);
+
+ for (int i = 0; i < message->oneof_decl_count(); i++) {
+ const protobuf::OneofDescriptor* oneof = message->oneof_decl(i);
+ std::string fullname = ToCIdent(oneof->full_name());
+ output("typedef enum {\n");
+ for (int i = 0; i < oneof->field_count(); i++) {
+ const protobuf::FieldDescriptor* field = oneof->field(i);
+ output(" $0_$1 = $2,\n", fullname, field->name(), field->number());
+ }
+ output(
+ " $0_NOT_SET = 0,\n"
+ "} $0_oneofcases;\n",
+ fullname);
+ output(
+ "UPB_INLINE $0_oneofcases $1_$2_case(const $1* msg) { "
+ "return UPB_FIELD_AT(msg, int, $3); }\n"
+ "\n",
+ fullname, msgname, oneof->name(),
+ GetSizeInit(layout.GetOneofCaseOffset(oneof)));
+ }
+
+ for (auto field : FieldNumberOrder(message)) {
+ output("UPB_INLINE $0 $1_$2(const $1 *msg) {", CTypeConst(field), msgname,
+ field->name());
+ if (field->containing_oneof()) {
+ output(" return UPB_READ_ONEOF(msg, $0, $1, $2, $3, $4); }\n",
+ CTypeConst(field), GetSizeInit(layout.GetFieldOffset(field)),
+ GetSizeInit(layout.GetOneofCaseOffset(field->containing_oneof())),
+ field->number(), FieldDefault(field));
+ } else {
+ output(" return UPB_FIELD_AT(msg, $0, $1); }\n", CTypeConst(field),
+ GetSizeInit(layout.GetFieldOffset(field)));
+ }
+ }
+
+ output("\n");
+
+ for (auto field : FieldNumberOrder(message)) {
+ output("UPB_INLINE void $0_set_$1($0 *msg, $2 value) { ", msgname,
+ field->name(), CType(field));
+ if (field->containing_oneof()) {
+ output("UPB_WRITE_ONEOF(msg, $0, $1, value, $2, $3); }\n", CType(field),
+ GetSizeInit(layout.GetFieldOffset(field)),
+ GetSizeInit(layout.GetOneofCaseOffset(field->containing_oneof())),
+ field->number());
+ } else {
+ output("UPB_FIELD_AT(msg, $0, $1) = value; }\n", CType(field),
+ GetSizeInit(layout.GetFieldOffset(field)));
+ }
+ }
+
+ output("\n\n");
+}
+
+void WriteHeader(const protobuf::FileDescriptor* file, Output& output) {
+ EmitFileWarning(file, output);
+ output(
+ "#ifndef $0_UPB_H_\n"
+ "#define $0_UPB_H_\n\n"
+ "#include \"upb/msg.h\"\n\n"
+ "#include \"upb/decode.h\"\n"
+ "#include \"upb/encode.h\"\n"
+ "#include \"upb/port_def.inc\"\n"
+ "UPB_BEGIN_EXTERN_C\n\n",
+ ToPreproc(file->name()));
+
+ // Forward-declare types defined in this file.
+ for (auto message : SortedMessages(file)) {
+ output("struct $0;\n", ToCIdent(message->full_name()));
+ }
+ for (auto message : SortedMessages(file)) {
+ output("typedef struct $0 $0;\n", ToCIdent(message->full_name()));
+ };
+
+ // Forward-declare types not in this file, but used as submessages.
+ std::set<std::string> forward_names;
+ for (auto message : SortedMessages(file)) {
+ for (int i = 0; i < message->field_count(); i++) {
+ const protobuf::FieldDescriptor* field = message->field(i);
+ if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE &&
+ field->file() != message->file()) {
+ forward_names.insert(ToCIdent(field->message_type()->full_name()));
+ }
+ }
+ }
+ for (const auto& name : forward_names) {
+ output("struct $0;\n", name);
+ }
+
+ output(
+ "\n"
+ "/* Enums */\n\n");
+ for (auto enumdesc : SortedEnums(file)) {
+ output("typedef enum {\n");
+ DumpEnumValues(enumdesc, output);
+ output("} $0;\n\n", ToCIdent(enumdesc->full_name()));
+ }
+
+ for (auto message : SortedMessages(file)) {
+ GenerateMessageInHeader(message, output);
+ }
+
+ output(
+ "UPB_END_EXTERN_C\n"
+ "\n"
+ "#include \"upb/port_undef.inc\"\n"
+ "\n"
+ "#endif /* $0_UPB_H_ */\n",
+ ToPreproc(file->name()));
+}
+
+void WriteSource(const protobuf::FileDescriptor* file, Output& output) {
+ EmitFileWarning(file, output);
+
+ output(
+ "#include <stddef.h>\n"
+ "#include \"upb/msg.h\"\n"
+ "#include \"$0\"\n",
+ HeaderFilename(file->name()));
+
+ for (int i = 0; i < file->dependency_count(); i++) {
+ output("#include \"$0\"\n", HeaderFilename(file->dependency(i)->name()));
+ }
+
+ output(
+ "\n"
+ "#include \"upb/port_def.inc\"\n"
+ "\n");
+
+
+ for (auto message : SortedMessages(file)) {
+ std::string msgname = ToCIdent(message->full_name());
+ std::string fields_array_ref = "NULL";
+ std::string submsgs_array_ref = "NULL";
+ std::string oneofs_array_ref = "NULL";
+ std::unordered_map<const protobuf::Descriptor*, int> submsg_indexes;
+ MessageLayout layout(message);
+ std::vector<const protobuf::FieldDescriptor*> sorted_submsgs =
+ SortedSubmessages(message);
+
+ if (!sorted_submsgs.empty()) {
+ // TODO(haberman): could save a little bit of space by only generating a
+ // "submsgs" array for every strongly-connected component.
+ std::string submsgs_array_name = msgname + "_submsgs";
+ submsgs_array_ref = "&" + submsgs_array_name + "[0]";
+ output("static const upb_msglayout *const $0[$1] = {\n",
+ submsgs_array_name, sorted_submsgs.size());
+
+ int i = 0;
+ for (auto submsg : sorted_submsgs) {
+ if (submsg_indexes.find(submsg->message_type()) !=
+ submsg_indexes.end()) {
+ continue;
+ }
+ output(" &$0_msginit,\n",
+ ToCIdent(submsg->message_type()->full_name()));
+ submsg_indexes[submsg->message_type()] = i++;
+ }
+
+ output("};\n\n");
+ }
+
+ std::vector<const protobuf::FieldDescriptor*> field_number_order =
+ FieldNumberOrder(message);
+ if (!field_number_order.empty()) {
+ std::string fields_array_name = msgname + "__fields";
+ fields_array_ref = "&" + fields_array_name + "[0]";
+ output("static const upb_msglayout_field $0[$1] = {\n",
+ fields_array_name, field_number_order.size());
+ for (auto field : field_number_order) {
+ int submsg_index = 0;
+ std::string presence = "0";
+
+ if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
+ submsg_index = submsg_indexes[field->message_type()];
+ }
+
+ if (MessageLayout::HasHasbit(field)) {
+ presence = absl::StrCat(layout.GetHasbitIndex(field) + 1);
+ } else if (field->containing_oneof()) {
+ MessageLayout::Size case_offset =
+ layout.GetOneofCaseOffset(field->containing_oneof());
+
+ // Our encoding that distinguishes oneofs from presence-having fields.
+ case_offset.size32 = -case_offset.size32 - 1;
+ case_offset.size64 = -case_offset.size64 - 1;
+ presence = GetSizeInit(case_offset);
+ }
+
+ output(" {$0, $1, $2, $3, $4, $5},\n",
+ field->number(),
+ GetSizeInit(layout.GetFieldOffset(field)),
+ presence,
+ submsg_index,
+ field->type(),
+ field->label());
+ }
+ output("};\n\n");
+ }
+
+ output("const upb_msglayout $0_msginit = {\n", msgname);
+ output(" $0,\n", submsgs_array_ref);
+ output(" $0,\n", fields_array_ref);
+ output(" $0, $1, $2,\n", GetSizeInit(layout.message_size()),
+ field_number_order.size(),
+ "false" // TODO: extendable
+ );
+
+ output("};\n\n");
+ }
+
+ output("#include \"upb/port_undef.inc\"\n");
+ output("\n");
+}
+
+bool Generator::Generate(const protobuf::FileDescriptor* file,
+ const std::string& parameter,
+ protoc::GeneratorContext* context,
+ std::string* error) const {
+ Output h_output(context->Open(HeaderFilename(file->name())));
+ WriteHeader(file, h_output);
+
+ Output c_output(context->Open(SourceFilename(file->name())));
+ WriteSource(file, c_output);
+
+ return true;
+}
+
+std::unique_ptr<google::protobuf::compiler::CodeGenerator> GetGenerator() {
+ return std::unique_ptr<google::protobuf::compiler::CodeGenerator>(
+ new Generator());
+}
+
+} // namespace upbc
diff --git a/upbc/generator.h b/upbc/generator.h
new file mode 100644
index 0000000..ed6cedc
--- /dev/null
+++ b/upbc/generator.h
@@ -0,0 +1,12 @@
+
+#ifndef UPBC_GENERATOR_H_
+#define UPBC_GENERATOR_H_
+
+#include <memory>
+#include <google/protobuf/compiler/code_generator.h>
+
+namespace upbc {
+std::unique_ptr<google::protobuf::compiler::CodeGenerator> GetGenerator();
+}
+
+#endif // UPBC_GENERATOR_H_
diff --git a/upbc/main.cc b/upbc/main.cc
new file mode 100644
index 0000000..a9682a9
--- /dev/null
+++ b/upbc/main.cc
@@ -0,0 +1,9 @@
+
+#include <google/protobuf/compiler/plugin.h>
+
+#include "upbc/generator.h"
+
+int main(int argc, char** argv) {
+ return google::protobuf::compiler::PluginMain(argc, argv,
+ upbc::GetGenerator().get());
+}
diff --git a/upbc/message_layout.cc b/upbc/message_layout.cc
new file mode 100644
index 0000000..b6614f0
--- /dev/null
+++ b/upbc/message_layout.cc
@@ -0,0 +1,173 @@
+
+#include "upbc/message_layout.h"
+
+namespace upbc {
+
+namespace protobuf = ::google::protobuf;
+
+static int64_t DivRoundUp(int64_t a, int64_t b) {
+ ABSL_ASSERT(a >= 0);
+ ABSL_ASSERT(b > 0);
+ return (a + b - 1) / b;
+}
+
+MessageLayout::Size MessageLayout::Place(
+ MessageLayout::SizeAndAlign size_and_align) {
+ Size offset = size_;
+ offset.AlignUp(size_and_align.align);
+ size_ = offset;
+ size_.Add(size_and_align.size);
+ //maxalign_.MaxFrom(size_and_align.align);
+ maxalign_.MaxFrom(size_and_align.size);
+ return offset;
+}
+
+bool MessageLayout::HasHasbit(const protobuf::FieldDescriptor* field) {
+ return field->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2 &&
+ field->label() != protobuf::FieldDescriptor::LABEL_REPEATED &&
+ !field->containing_oneof();
+}
+
+MessageLayout::SizeAndAlign MessageLayout::SizeOf(
+ const protobuf::FieldDescriptor* field) {
+ if (field->label() == protobuf::FieldDescriptor::LABEL_REPEATED ||
+ field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
+ return {{4, 8}, {4, 8}};
+ }
+
+ switch (field->cpp_type()) {
+ case protobuf::FieldDescriptor::CPPTYPE_STRING:
+ // upb_stringview
+ // return {{8, 16}, {4, 8}};
+ return {{8, 16}, {8, 16}};
+ case protobuf::FieldDescriptor::CPPTYPE_BOOL:
+ return {{1, 1}, {1, 1}};
+ case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
+ case protobuf::FieldDescriptor::CPPTYPE_INT32:
+ case protobuf::FieldDescriptor::CPPTYPE_UINT32:
+ return {{4, 4}, {4, 4}};
+ default:
+ return {{8, 8}, {8, 8}};
+ }
+}
+
+int64_t MessageLayout::FieldLayoutRank(const protobuf::FieldDescriptor* field) {
+ // Order:
+ // 1, 2, 3. primitive fields (8, 4, 1 byte)
+ // 4. string fields
+ // 5. submessage fields
+ // 6. repeated fields
+ //
+ // This has the following nice properties:
+ //
+ // 1. padding alignment is (nearly) minimized.
+ // 2. fields that might have defaults (1-4) are segregated
+ // from fields that are always zero-initialized (5-7).
+ //
+ // We skip oneof fields, because they are emitted in a separate pass.
+ int64_t rank;
+ if (field->containing_oneof()) {
+ fprintf(stderr, "shouldn't have oneofs here.\n");
+ abort();
+ } else if (field->label() == protobuf::FieldDescriptor::LABEL_REPEATED) {
+ rank = 6;
+ } else {
+ switch (field->cpp_type()) {
+ case protobuf::FieldDescriptor::CPPTYPE_MESSAGE:
+ rank = 5;
+ break;
+ case protobuf::FieldDescriptor::CPPTYPE_STRING:
+ rank = 4;
+ break;
+ case protobuf::FieldDescriptor::CPPTYPE_BOOL:
+ rank = 3;
+ break;
+ case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
+ case protobuf::FieldDescriptor::CPPTYPE_INT32:
+ case protobuf::FieldDescriptor::CPPTYPE_UINT32:
+ rank = 2;
+ break;
+ default:
+ rank = 1;
+ break;
+ }
+ }
+
+ // Break ties with field number.
+ return (rank << 29) | field->number();
+}
+
+void MessageLayout::ComputeLayout(const protobuf::Descriptor* descriptor) {
+ size_ = Size{0, 0};
+ maxalign_ = Size{0, 0};
+ PlaceNonOneofFields(descriptor);
+ PlaceOneofFields(descriptor);
+
+ // Align overall size up to max size.
+ size_.AlignUp(maxalign_);
+}
+
+void MessageLayout::PlaceNonOneofFields(
+ const protobuf::Descriptor* descriptor) {
+ std::vector<const protobuf::FieldDescriptor*> field_order;
+ for (int i = 0; i < descriptor->field_count(); i++) {
+ const protobuf::FieldDescriptor* field = descriptor->field(i);
+ if (!field->containing_oneof()) {
+ field_order.push_back(descriptor->field(i));
+ }
+ }
+ std::sort(field_order.begin(), field_order.end(),
+ [](const protobuf::FieldDescriptor* a,
+ const protobuf::FieldDescriptor* b) {
+ return FieldLayoutRank(a) < FieldLayoutRank(b);
+ });
+
+ // Place/count hasbits.
+ int hasbit_count = 0;
+ for (auto field : field_order) {
+ if (HasHasbit(field)) {
+ hasbit_indexes_[field] = hasbit_count++;
+ }
+ }
+
+ // Place hasbits at the beginning.
+ int64_t hasbit_bytes = DivRoundUp(hasbit_count, 8);
+ Place(SizeAndAlign{{hasbit_bytes, hasbit_bytes}, {1, 1}});
+
+ // Place non-oneof fields.
+ for (auto field : field_order) {
+ field_offsets_[field] = Place(SizeOf(field));
+ }
+}
+
+void MessageLayout::PlaceOneofFields(const protobuf::Descriptor* descriptor) {
+ std::vector<const protobuf::OneofDescriptor*> oneof_order;
+ for (int i = 0; i < descriptor->oneof_decl_count(); i++) {
+ oneof_order.push_back(descriptor->oneof_decl(i));
+ }
+ std::sort(oneof_order.begin(), oneof_order.end(),
+ [](const protobuf::OneofDescriptor* a,
+ const protobuf::OneofDescriptor* b) {
+ return a->full_name() < b->full_name();
+ });
+
+ for (auto oneof : oneof_order) {
+ SizeAndAlign oneof_maxsize{{0, 0}, {0, 0}};
+ // Calculate max size.
+ for (int i = 0; i < oneof->field_count(); i++) {
+ oneof_maxsize.MaxFrom(SizeOf(oneof->field(i)));
+ }
+
+ // Place discriminator enum and data.
+ Size data = Place(oneof_maxsize);
+ Size discriminator = Place(SizeAndAlign{{4, 4}, {4, 4}});
+
+ oneof_case_offsets_[oneof] = discriminator;
+
+ for (int i = 0; i < oneof->field_count(); i++) {
+ field_offsets_[oneof->field(i)] = data;
+ }
+ }
+}
+
+} // namespace upbc
diff --git a/upbc/message_layout.h b/upbc/message_layout.h
new file mode 100644
index 0000000..bdcc336
--- /dev/null
+++ b/upbc/message_layout.h
@@ -0,0 +1,104 @@
+
+#ifndef UPBC_MESSAGE_LAYOUT_H
+#define UPBC_MESSAGE_LAYOUT_H
+
+#include <unordered_map>
+#include "absl/base/macros.h"
+#include "google/protobuf/descriptor.h"
+
+namespace upbc {
+
+class MessageLayout {
+ public:
+ struct Size {
+ void Add(const Size& other) {
+ size32 += other.size32;
+ size64 += other.size64;
+ }
+
+ void MaxFrom(const Size& other) {
+ size32 = std::max(size32, other.size32);
+ size64 = std::max(size64, other.size64);
+ }
+
+ void AlignUp(const Size& align) {
+ size32 = Align(size32, align.size32);
+ size64 = Align(size64, align.size64);
+ }
+
+ int64_t size32;
+ int64_t size64;
+ };
+
+ struct SizeAndAlign {
+ Size size;
+ Size align;
+
+ void MaxFrom(const SizeAndAlign& other) {
+ size.MaxFrom(other.size);
+ align.MaxFrom(other.align);
+ }
+ };
+
+ MessageLayout(const google::protobuf::Descriptor* descriptor) {
+ ComputeLayout(descriptor);
+ }
+
+ Size GetFieldOffset(const google::protobuf::FieldDescriptor* field) const {
+ return GetMapValue(field_offsets_, field);
+ }
+
+ Size GetOneofCaseOffset(
+ const google::protobuf::OneofDescriptor* oneof) const {
+ return GetMapValue(oneof_case_offsets_, oneof);
+ }
+
+ int GetHasbitIndex(const google::protobuf::FieldDescriptor* field) const {
+ return GetMapValue(hasbit_indexes_, field);
+ }
+
+ Size message_size() const { return size_; }
+
+ static bool HasHasbit(const google::protobuf::FieldDescriptor* field);
+
+ private:
+ void ComputeLayout(const google::protobuf::Descriptor* descriptor);
+ void PlaceNonOneofFields(const google::protobuf::Descriptor* descriptor);
+ void PlaceOneofFields(const google::protobuf::Descriptor* descriptor);
+ Size Place(SizeAndAlign size_and_align);
+
+ template <class K, class V>
+ static V GetMapValue(const std::unordered_map<K, V>& map, K key) {
+ auto iter = map.find(key);
+ if (iter == map.end()) {
+ fprintf(stderr, "No value for field.\n");
+ abort();
+ }
+ return iter->second;
+ }
+
+ static bool IsPowerOfTwo(size_t val) {
+ return (val & (val - 1)) == 0;
+ }
+
+ static size_t Align(size_t val, size_t align) {
+ ABSL_ASSERT(IsPowerOfTwo(align));
+ return (val + align - 1) & ~(align - 1);
+ }
+
+ static SizeAndAlign SizeOf(const google::protobuf::FieldDescriptor* field);
+ static int64_t FieldLayoutRank(const google::protobuf::FieldDescriptor* field);
+
+ std::unordered_map<const google::protobuf::FieldDescriptor*, Size>
+ field_offsets_;
+ std::unordered_map<const google::protobuf::FieldDescriptor*, int>
+ hasbit_indexes_;
+ std::unordered_map<const google::protobuf::OneofDescriptor*, Size>
+ oneof_case_offsets_;
+ Size maxalign_;
+ Size size_;
+};
+
+} // namespace upbc
+
+#endif // UPBC_MESSAGE_LAYOUT_H
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback