From 5f575995b56f2ae97d3b9f5dc1efef233def7419 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Mon, 12 Nov 2018 21:21:20 -0800 Subject: Added upb compiler written in C++ as a normal protoc plugin. --- upbc/generator.cc | 540 +++++++++++++++++++++++++++++++++++++++++++++++++ upbc/generator.h | 12 ++ upbc/main.cc | 9 + upbc/message_layout.cc | 173 ++++++++++++++++ upbc/message_layout.h | 104 ++++++++++ 5 files changed, 838 insertions(+) create mode 100644 upbc/generator.cc create mode 100644 upbc/generator.h create mode 100644 upbc/main.cc create mode 100644 upbc/message_layout.cc create mode 100644 upbc/message_layout.h (limited to 'upbc') diff --git a/upbc/generator.cc b/upbc/generator.cc new file mode 100644 index 0000000..56e6276 --- /dev/null +++ b/upbc/generator.cc @@ -0,0 +1,540 @@ + +#include +#include +#include + +#include "absl/strings/ascii.h" +#include "absl/strings/str_replace.h" +#include "absl/strings/substitute.h" +#include "google/protobuf/compiler/code_generator.h" +#include "google/protobuf/descriptor.h" +#include "google/protobuf/io/zero_copy_stream.h" + +#include "upbc/generator.h" +#include "upbc/message_layout.h" + +namespace protoc = ::google::protobuf::compiler; +namespace protobuf = ::google::protobuf; + +static std::string StripExtension(absl::string_view fname) { + size_t lastdot = fname.find_last_of("."); + if (lastdot == std::string::npos) { + return std::string(fname); + } + return std::string(fname.substr(0, lastdot)); +} + +static std::string HeaderFilename(std::string proto_filename) { + return StripExtension(proto_filename) + ".upb.h"; +} + +static std::string SourceFilename(std::string proto_filename) { + return StripExtension(proto_filename) + ".upb.c"; +} + +class Output { + public: + Output(protobuf::io::ZeroCopyOutputStream* stream) : stream_(stream) {} + ~Output() { stream_->BackUp(size_); } + + template + void operator()(absl::string_view format, const Arg&... arg) { + Write(absl::Substitute(format, arg...)); + } + + private: + void Write(absl::string_view data) { + while (!data.empty()) { + RefreshOutput(); + size_t to_write = std::min(data.size(), size_); + memcpy(ptr_, data.data(), to_write); + data.remove_prefix(to_write); + ptr_ += to_write; + size_ -= to_write; + } + } + + void RefreshOutput() { + while (size_ == 0) { + void *ptr; + int size; + if (!stream_->Next(&ptr, &size)) { + fprintf(stderr, "upbc: Failed to write to to output\n"); + abort(); + } + ptr_ = static_cast(ptr); + size_ = size; + } + } + + protobuf::io::ZeroCopyOutputStream* stream_; + char *ptr_ = nullptr; + size_t size_ = 0; +}; + +namespace upbc { + +class Generator : public protoc::CodeGenerator { + ~Generator() override {} + bool Generate(const protobuf::FileDescriptor* file, + const std::string& parameter, protoc::GeneratorContext* context, + std::string* error) const override; + +}; + +void AddMessages(const protobuf::Descriptor* message, + std::vector* messages) { + messages->push_back(message); + for (int i = 0; i < message->nested_type_count(); i++) { + AddMessages(message->nested_type(i), messages); + } +} + +void AddEnums(const protobuf::Descriptor* message, + std::vector* enums) { + for (int i = 0; i < message->enum_type_count(); i++) { + enums->push_back(message->enum_type(i)); + } + for (int i = 0; i < message->nested_type_count(); i++) { + AddEnums(message->nested_type(i), enums); + } +} + +template +void SortDefs(std::vector* defs) { + std::sort(defs->begin(), defs->end(), + [](T a, T b) { return a->full_name() < b->full_name(); }); +} + +std::vector SortedMessages( + const protobuf::FileDescriptor* file) { + std::vector messages; + for (int i = 0; i < file->message_type_count(); i++) { + AddMessages(file->message_type(i), &messages); + } + //SortDefs(&messages); + return messages; +} + +std::vector SortedEnums( + const protobuf::FileDescriptor* file) { + std::vector enums; + for (int i = 0; i < file->enum_type_count(); i++) { + enums.push_back(file->enum_type(i)); + } + for (int i = 0; i < file->message_type_count(); i++) { + AddEnums(file->message_type(i), &enums); + } + SortDefs(&enums); + return enums; +} + +std::vector FieldNumberOrder( + const protobuf::Descriptor* message) { + std::vector messages; + for (int i = 0; i < message->field_count(); i++) { + messages.push_back(message->field(i)); + } + std::sort(messages.begin(), messages.end(), + [](const protobuf::FieldDescriptor* a, + const protobuf::FieldDescriptor* b) { + return a->number() < b->number(); + }); + return messages; +} + +std::vector SortedSubmessages( + const protobuf::Descriptor* message) { + std::vector ret; + for (int i = 0; i < message->field_count(); i++) { + if (message->field(i)->cpp_type() == + protobuf::FieldDescriptor::CPPTYPE_MESSAGE) { + ret.push_back(message->field(i)); + } + } + std::sort(ret.begin(), ret.end(), + [](const protobuf::FieldDescriptor* a, + const protobuf::FieldDescriptor* b) { + return a->message_type()->full_name() < + b->message_type()->full_name(); + }); + return ret; +} + +std::string ToCIdent(absl::string_view str) { + return absl::StrReplaceAll(str, {{".", "_"}, {"/", "_"}}); +} + +std::string ToPreproc(absl::string_view str) { + return absl::AsciiStrToUpper(ToCIdent(str)); +} + +std::string EnumValueSymbol(const protobuf::EnumValueDescriptor* value) { + return ToCIdent(value->full_name()); +} + +std::string GetSizeInit(const MessageLayout::Size& size) { + return absl::Substitute("UPB_SIZE($0, $1)", size.size32, size.size64); +} + +std::string CTypeInternal(const protobuf::FieldDescriptor* field, + bool is_const) { + std::string maybe_const = is_const ? "const " : ""; + if (field->label() == protobuf::FieldDescriptor::LABEL_REPEATED) { + return maybe_const + "upb_array*"; + } + + switch (field->cpp_type()) { + case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: { + std::string maybe_struct = + field->file() != field->message_type()->file() ? "struct " : ""; + return maybe_const + maybe_struct + + ToCIdent(field->message_type()->full_name()) + "*"; + } + case protobuf::FieldDescriptor::CPPTYPE_ENUM: + return ToCIdent(field->enum_type()->full_name()); + case protobuf::FieldDescriptor::CPPTYPE_BOOL: + return "bool"; + case protobuf::FieldDescriptor::CPPTYPE_FLOAT: + return "float"; + case protobuf::FieldDescriptor::CPPTYPE_INT32: + return "int32_t"; + case protobuf::FieldDescriptor::CPPTYPE_UINT32: + return "uint32_t"; + case protobuf::FieldDescriptor::CPPTYPE_DOUBLE: + return "double"; + case protobuf::FieldDescriptor::CPPTYPE_INT64: + return "int64_t"; + case protobuf::FieldDescriptor::CPPTYPE_UINT64: + return "uint64_t"; + case protobuf::FieldDescriptor::CPPTYPE_STRING: + return "upb_stringview"; + default: + fprintf(stderr, "Unexpected type"); + abort(); + } +} + +std::string FieldDefault(const protobuf::FieldDescriptor* field) { + switch (field->cpp_type()) { + case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: + return "NULL"; + case protobuf::FieldDescriptor::CPPTYPE_STRING: + return absl::Substitute("upb_stringview_make(\"$0\", strlen(\"$0\"))", + absl::CEscape(field->default_value_string())); + case protobuf::FieldDescriptor::CPPTYPE_INT32: + return absl::StrCat(field->default_value_int32()); + case protobuf::FieldDescriptor::CPPTYPE_INT64: + return absl::StrCat(field->default_value_int64()); + case protobuf::FieldDescriptor::CPPTYPE_UINT32: + return absl::StrCat(field->default_value_uint32()); + case protobuf::FieldDescriptor::CPPTYPE_UINT64: + return absl::StrCat(field->default_value_uint64()); + case protobuf::FieldDescriptor::CPPTYPE_FLOAT: + return absl::StrCat(field->default_value_float()); + case protobuf::FieldDescriptor::CPPTYPE_DOUBLE: + return absl::StrCat(field->default_value_double()); + case protobuf::FieldDescriptor::CPPTYPE_BOOL: + return field->default_value_bool() ? "true" : "false"; + case protobuf::FieldDescriptor::CPPTYPE_ENUM: + return EnumValueSymbol(field->default_value_enum()); + } + ABSL_ASSERT(false); + return "XXX"; +} + +std::string CType(const protobuf::FieldDescriptor* field) { + return CTypeInternal(field, false); +} + +std::string CTypeConst(const protobuf::FieldDescriptor* field) { + return CTypeInternal(field, true); +} + +void DumpEnumValues(const protobuf::EnumDescriptor* desc, Output& output) { + std::vector values; + for (int i = 0; i < desc->value_count(); i++) { + values.push_back(desc->value(i)); + } + std::sort(values.begin(), values.end(), + [](const protobuf::EnumValueDescriptor* a, + const protobuf::EnumValueDescriptor* b) { + return a->number() < b->number(); + }); + + for (size_t i = 0; i < values.size(); i++) { + auto value = values[i]; + output(" $0 = $1", EnumValueSymbol(value), value->number()); + if (i != values.size() - 1) { + output(","); + } + output("\n"); + } +} + +void EmitFileWarning(const protobuf::FileDescriptor* file, Output& output) { + output( + "/* This file was generated by upbc (the upb compiler) from the input\n" + " * file:\n" + " *\n" + " * $0\n" + " *\n" + " * Do not edit -- your changes will be discarded when the file is\n" + " * regenerated. */\n\n", + file->name()); +} + +void GenerateMessageInHeader(const protobuf::Descriptor* message, Output& output) { + MessageLayout layout(message); + + output("/* $0 */\n\n", message->full_name()); + std::string msgname = ToCIdent(message->full_name()); + output( + "extern const upb_msglayout $0_msginit;\n" + "UPB_INLINE $0 *$0_new(upb_arena *arena) {\n" + " return upb_msg_new(&$0_msginit, arena);\n" + "}\n" + "UPB_INLINE $0 *$0_parsenew(upb_stringview buf, upb_arena *arena) {\n" + " $0 *ret = $0_new(arena);\n" + " return (ret && upb_decode(buf, ret, &$0_msginit)) ? ret : NULL;\n" + "}\n" + "UPB_INLINE char *$0_serialize(const $0 *msg, upb_arena *arena, size_t " + "*len) {\n" + " return upb_encode(msg, &$0_msginit, arena, len);\n" + "}\n" + "\n", + msgname); + + for (int i = 0; i < message->oneof_decl_count(); i++) { + const protobuf::OneofDescriptor* oneof = message->oneof_decl(i); + std::string fullname = ToCIdent(oneof->full_name()); + output("typedef enum {\n"); + for (int i = 0; i < oneof->field_count(); i++) { + const protobuf::FieldDescriptor* field = oneof->field(i); + output(" $0_$1 = $2,\n", fullname, field->name(), field->number()); + } + output( + " $0_NOT_SET = 0,\n" + "} $0_oneofcases;\n", + fullname); + output( + "UPB_INLINE $0_oneofcases $1_$2_case(const $1* msg) { " + "return UPB_FIELD_AT(msg, int, $3); }\n" + "\n", + fullname, msgname, oneof->name(), + GetSizeInit(layout.GetOneofCaseOffset(oneof))); + } + + for (auto field : FieldNumberOrder(message)) { + output("UPB_INLINE $0 $1_$2(const $1 *msg) {", CTypeConst(field), msgname, + field->name()); + if (field->containing_oneof()) { + output(" return UPB_READ_ONEOF(msg, $0, $1, $2, $3, $4); }\n", + CTypeConst(field), GetSizeInit(layout.GetFieldOffset(field)), + GetSizeInit(layout.GetOneofCaseOffset(field->containing_oneof())), + field->number(), FieldDefault(field)); + } else { + output(" return UPB_FIELD_AT(msg, $0, $1); }\n", CTypeConst(field), + GetSizeInit(layout.GetFieldOffset(field))); + } + } + + output("\n"); + + for (auto field : FieldNumberOrder(message)) { + output("UPB_INLINE void $0_set_$1($0 *msg, $2 value) { ", msgname, + field->name(), CType(field)); + if (field->containing_oneof()) { + output("UPB_WRITE_ONEOF(msg, $0, $1, value, $2, $3); }\n", CType(field), + GetSizeInit(layout.GetFieldOffset(field)), + GetSizeInit(layout.GetOneofCaseOffset(field->containing_oneof())), + field->number()); + } else { + output("UPB_FIELD_AT(msg, $0, $1) = value; }\n", CType(field), + GetSizeInit(layout.GetFieldOffset(field))); + } + } + + output("\n\n"); +} + +void WriteHeader(const protobuf::FileDescriptor* file, Output& output) { + EmitFileWarning(file, output); + output( + "#ifndef $0_UPB_H_\n" + "#define $0_UPB_H_\n\n" + "#include \"upb/msg.h\"\n\n" + "#include \"upb/decode.h\"\n" + "#include \"upb/encode.h\"\n" + "#include \"upb/port_def.inc\"\n" + "UPB_BEGIN_EXTERN_C\n\n", + ToPreproc(file->name())); + + // Forward-declare types defined in this file. + for (auto message : SortedMessages(file)) { + output("struct $0;\n", ToCIdent(message->full_name())); + } + for (auto message : SortedMessages(file)) { + output("typedef struct $0 $0;\n", ToCIdent(message->full_name())); + }; + + // Forward-declare types not in this file, but used as submessages. + std::set forward_names; + for (auto message : SortedMessages(file)) { + for (int i = 0; i < message->field_count(); i++) { + const protobuf::FieldDescriptor* field = message->field(i); + if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE && + field->file() != message->file()) { + forward_names.insert(ToCIdent(field->message_type()->full_name())); + } + } + } + for (const auto& name : forward_names) { + output("struct $0;\n", name); + } + + output( + "\n" + "/* Enums */\n\n"); + for (auto enumdesc : SortedEnums(file)) { + output("typedef enum {\n"); + DumpEnumValues(enumdesc, output); + output("} $0;\n\n", ToCIdent(enumdesc->full_name())); + } + + for (auto message : SortedMessages(file)) { + GenerateMessageInHeader(message, output); + } + + output( + "UPB_END_EXTERN_C\n" + "\n" + "#include \"upb/port_undef.inc\"\n" + "\n" + "#endif /* $0_UPB_H_ */\n", + ToPreproc(file->name())); +} + +void WriteSource(const protobuf::FileDescriptor* file, Output& output) { + EmitFileWarning(file, output); + + output( + "#include \n" + "#include \"upb/msg.h\"\n" + "#include \"$0\"\n", + HeaderFilename(file->name())); + + for (int i = 0; i < file->dependency_count(); i++) { + output("#include \"$0\"\n", HeaderFilename(file->dependency(i)->name())); + } + + output( + "\n" + "#include \"upb/port_def.inc\"\n" + "\n"); + + + for (auto message : SortedMessages(file)) { + std::string msgname = ToCIdent(message->full_name()); + std::string fields_array_ref = "NULL"; + std::string submsgs_array_ref = "NULL"; + std::string oneofs_array_ref = "NULL"; + std::unordered_map submsg_indexes; + MessageLayout layout(message); + std::vector sorted_submsgs = + SortedSubmessages(message); + + if (!sorted_submsgs.empty()) { + // TODO(haberman): could save a little bit of space by only generating a + // "submsgs" array for every strongly-connected component. + std::string submsgs_array_name = msgname + "_submsgs"; + submsgs_array_ref = "&" + submsgs_array_name + "[0]"; + output("static const upb_msglayout *const $0[$1] = {\n", + submsgs_array_name, sorted_submsgs.size()); + + int i = 0; + for (auto submsg : sorted_submsgs) { + if (submsg_indexes.find(submsg->message_type()) != + submsg_indexes.end()) { + continue; + } + output(" &$0_msginit,\n", + ToCIdent(submsg->message_type()->full_name())); + submsg_indexes[submsg->message_type()] = i++; + } + + output("};\n\n"); + } + + std::vector field_number_order = + FieldNumberOrder(message); + if (!field_number_order.empty()) { + std::string fields_array_name = msgname + "__fields"; + fields_array_ref = "&" + fields_array_name + "[0]"; + output("static const upb_msglayout_field $0[$1] = {\n", + fields_array_name, field_number_order.size()); + for (auto field : field_number_order) { + int submsg_index = 0; + std::string presence = "0"; + + if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) { + submsg_index = submsg_indexes[field->message_type()]; + } + + if (MessageLayout::HasHasbit(field)) { + presence = absl::StrCat(layout.GetHasbitIndex(field) + 1); + } else if (field->containing_oneof()) { + MessageLayout::Size case_offset = + layout.GetOneofCaseOffset(field->containing_oneof()); + + // Our encoding that distinguishes oneofs from presence-having fields. + case_offset.size32 = -case_offset.size32 - 1; + case_offset.size64 = -case_offset.size64 - 1; + presence = GetSizeInit(case_offset); + } + + output(" {$0, $1, $2, $3, $4, $5},\n", + field->number(), + GetSizeInit(layout.GetFieldOffset(field)), + presence, + submsg_index, + field->type(), + field->label()); + } + output("};\n\n"); + } + + output("const upb_msglayout $0_msginit = {\n", msgname); + output(" $0,\n", submsgs_array_ref); + output(" $0,\n", fields_array_ref); + output(" $0, $1, $2,\n", GetSizeInit(layout.message_size()), + field_number_order.size(), + "false" // TODO: extendable + ); + + output("};\n\n"); + } + + output("#include \"upb/port_undef.inc\"\n"); + output("\n"); +} + +bool Generator::Generate(const protobuf::FileDescriptor* file, + const std::string& parameter, + protoc::GeneratorContext* context, + std::string* error) const { + Output h_output(context->Open(HeaderFilename(file->name()))); + WriteHeader(file, h_output); + + Output c_output(context->Open(SourceFilename(file->name()))); + WriteSource(file, c_output); + + return true; +} + +std::unique_ptr GetGenerator() { + return std::unique_ptr( + new Generator()); +} + +} // namespace upbc diff --git a/upbc/generator.h b/upbc/generator.h new file mode 100644 index 0000000..ed6cedc --- /dev/null +++ b/upbc/generator.h @@ -0,0 +1,12 @@ + +#ifndef UPBC_GENERATOR_H_ +#define UPBC_GENERATOR_H_ + +#include +#include + +namespace upbc { +std::unique_ptr GetGenerator(); +} + +#endif // UPBC_GENERATOR_H_ diff --git a/upbc/main.cc b/upbc/main.cc new file mode 100644 index 0000000..a9682a9 --- /dev/null +++ b/upbc/main.cc @@ -0,0 +1,9 @@ + +#include + +#include "upbc/generator.h" + +int main(int argc, char** argv) { + return google::protobuf::compiler::PluginMain(argc, argv, + upbc::GetGenerator().get()); +} diff --git a/upbc/message_layout.cc b/upbc/message_layout.cc new file mode 100644 index 0000000..b6614f0 --- /dev/null +++ b/upbc/message_layout.cc @@ -0,0 +1,173 @@ + +#include "upbc/message_layout.h" + +namespace upbc { + +namespace protobuf = ::google::protobuf; + +static int64_t DivRoundUp(int64_t a, int64_t b) { + ABSL_ASSERT(a >= 0); + ABSL_ASSERT(b > 0); + return (a + b - 1) / b; +} + +MessageLayout::Size MessageLayout::Place( + MessageLayout::SizeAndAlign size_and_align) { + Size offset = size_; + offset.AlignUp(size_and_align.align); + size_ = offset; + size_.Add(size_and_align.size); + //maxalign_.MaxFrom(size_and_align.align); + maxalign_.MaxFrom(size_and_align.size); + return offset; +} + +bool MessageLayout::HasHasbit(const protobuf::FieldDescriptor* field) { + return field->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2 && + field->label() != protobuf::FieldDescriptor::LABEL_REPEATED && + !field->containing_oneof(); +} + +MessageLayout::SizeAndAlign MessageLayout::SizeOf( + const protobuf::FieldDescriptor* field) { + if (field->label() == protobuf::FieldDescriptor::LABEL_REPEATED || + field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) { + return {{4, 8}, {4, 8}}; + } + + switch (field->cpp_type()) { + case protobuf::FieldDescriptor::CPPTYPE_STRING: + // upb_stringview + // return {{8, 16}, {4, 8}}; + return {{8, 16}, {8, 16}}; + case protobuf::FieldDescriptor::CPPTYPE_BOOL: + return {{1, 1}, {1, 1}}; + case protobuf::FieldDescriptor::CPPTYPE_FLOAT: + case protobuf::FieldDescriptor::CPPTYPE_INT32: + case protobuf::FieldDescriptor::CPPTYPE_UINT32: + return {{4, 4}, {4, 4}}; + default: + return {{8, 8}, {8, 8}}; + } +} + +int64_t MessageLayout::FieldLayoutRank(const protobuf::FieldDescriptor* field) { + // Order: + // 1, 2, 3. primitive fields (8, 4, 1 byte) + // 4. string fields + // 5. submessage fields + // 6. repeated fields + // + // This has the following nice properties: + // + // 1. padding alignment is (nearly) minimized. + // 2. fields that might have defaults (1-4) are segregated + // from fields that are always zero-initialized (5-7). + // + // We skip oneof fields, because they are emitted in a separate pass. + int64_t rank; + if (field->containing_oneof()) { + fprintf(stderr, "shouldn't have oneofs here.\n"); + abort(); + } else if (field->label() == protobuf::FieldDescriptor::LABEL_REPEATED) { + rank = 6; + } else { + switch (field->cpp_type()) { + case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: + rank = 5; + break; + case protobuf::FieldDescriptor::CPPTYPE_STRING: + rank = 4; + break; + case protobuf::FieldDescriptor::CPPTYPE_BOOL: + rank = 3; + break; + case protobuf::FieldDescriptor::CPPTYPE_FLOAT: + case protobuf::FieldDescriptor::CPPTYPE_INT32: + case protobuf::FieldDescriptor::CPPTYPE_UINT32: + rank = 2; + break; + default: + rank = 1; + break; + } + } + + // Break ties with field number. + return (rank << 29) | field->number(); +} + +void MessageLayout::ComputeLayout(const protobuf::Descriptor* descriptor) { + size_ = Size{0, 0}; + maxalign_ = Size{0, 0}; + PlaceNonOneofFields(descriptor); + PlaceOneofFields(descriptor); + + // Align overall size up to max size. + size_.AlignUp(maxalign_); +} + +void MessageLayout::PlaceNonOneofFields( + const protobuf::Descriptor* descriptor) { + std::vector field_order; + for (int i = 0; i < descriptor->field_count(); i++) { + const protobuf::FieldDescriptor* field = descriptor->field(i); + if (!field->containing_oneof()) { + field_order.push_back(descriptor->field(i)); + } + } + std::sort(field_order.begin(), field_order.end(), + [](const protobuf::FieldDescriptor* a, + const protobuf::FieldDescriptor* b) { + return FieldLayoutRank(a) < FieldLayoutRank(b); + }); + + // Place/count hasbits. + int hasbit_count = 0; + for (auto field : field_order) { + if (HasHasbit(field)) { + hasbit_indexes_[field] = hasbit_count++; + } + } + + // Place hasbits at the beginning. + int64_t hasbit_bytes = DivRoundUp(hasbit_count, 8); + Place(SizeAndAlign{{hasbit_bytes, hasbit_bytes}, {1, 1}}); + + // Place non-oneof fields. + for (auto field : field_order) { + field_offsets_[field] = Place(SizeOf(field)); + } +} + +void MessageLayout::PlaceOneofFields(const protobuf::Descriptor* descriptor) { + std::vector oneof_order; + for (int i = 0; i < descriptor->oneof_decl_count(); i++) { + oneof_order.push_back(descriptor->oneof_decl(i)); + } + std::sort(oneof_order.begin(), oneof_order.end(), + [](const protobuf::OneofDescriptor* a, + const protobuf::OneofDescriptor* b) { + return a->full_name() < b->full_name(); + }); + + for (auto oneof : oneof_order) { + SizeAndAlign oneof_maxsize{{0, 0}, {0, 0}}; + // Calculate max size. + for (int i = 0; i < oneof->field_count(); i++) { + oneof_maxsize.MaxFrom(SizeOf(oneof->field(i))); + } + + // Place discriminator enum and data. + Size data = Place(oneof_maxsize); + Size discriminator = Place(SizeAndAlign{{4, 4}, {4, 4}}); + + oneof_case_offsets_[oneof] = discriminator; + + for (int i = 0; i < oneof->field_count(); i++) { + field_offsets_[oneof->field(i)] = data; + } + } +} + +} // namespace upbc diff --git a/upbc/message_layout.h b/upbc/message_layout.h new file mode 100644 index 0000000..bdcc336 --- /dev/null +++ b/upbc/message_layout.h @@ -0,0 +1,104 @@ + +#ifndef UPBC_MESSAGE_LAYOUT_H +#define UPBC_MESSAGE_LAYOUT_H + +#include +#include "absl/base/macros.h" +#include "google/protobuf/descriptor.h" + +namespace upbc { + +class MessageLayout { + public: + struct Size { + void Add(const Size& other) { + size32 += other.size32; + size64 += other.size64; + } + + void MaxFrom(const Size& other) { + size32 = std::max(size32, other.size32); + size64 = std::max(size64, other.size64); + } + + void AlignUp(const Size& align) { + size32 = Align(size32, align.size32); + size64 = Align(size64, align.size64); + } + + int64_t size32; + int64_t size64; + }; + + struct SizeAndAlign { + Size size; + Size align; + + void MaxFrom(const SizeAndAlign& other) { + size.MaxFrom(other.size); + align.MaxFrom(other.align); + } + }; + + MessageLayout(const google::protobuf::Descriptor* descriptor) { + ComputeLayout(descriptor); + } + + Size GetFieldOffset(const google::protobuf::FieldDescriptor* field) const { + return GetMapValue(field_offsets_, field); + } + + Size GetOneofCaseOffset( + const google::protobuf::OneofDescriptor* oneof) const { + return GetMapValue(oneof_case_offsets_, oneof); + } + + int GetHasbitIndex(const google::protobuf::FieldDescriptor* field) const { + return GetMapValue(hasbit_indexes_, field); + } + + Size message_size() const { return size_; } + + static bool HasHasbit(const google::protobuf::FieldDescriptor* field); + + private: + void ComputeLayout(const google::protobuf::Descriptor* descriptor); + void PlaceNonOneofFields(const google::protobuf::Descriptor* descriptor); + void PlaceOneofFields(const google::protobuf::Descriptor* descriptor); + Size Place(SizeAndAlign size_and_align); + + template + static V GetMapValue(const std::unordered_map& map, K key) { + auto iter = map.find(key); + if (iter == map.end()) { + fprintf(stderr, "No value for field.\n"); + abort(); + } + return iter->second; + } + + static bool IsPowerOfTwo(size_t val) { + return (val & (val - 1)) == 0; + } + + static size_t Align(size_t val, size_t align) { + ABSL_ASSERT(IsPowerOfTwo(align)); + return (val + align - 1) & ~(align - 1); + } + + static SizeAndAlign SizeOf(const google::protobuf::FieldDescriptor* field); + static int64_t FieldLayoutRank(const google::protobuf::FieldDescriptor* field); + + std::unordered_map + field_offsets_; + std::unordered_map + hasbit_indexes_; + std::unordered_map + oneof_case_offsets_; + Size maxalign_; + Size size_; +}; + +} // namespace upbc + +#endif // UPBC_MESSAGE_LAYOUT_H -- cgit v1.2.3