From 5f575995b56f2ae97d3b9f5dc1efef233def7419 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Mon, 12 Nov 2018 21:21:20 -0800 Subject: Added upb compiler written in C++ as a normal protoc plugin. --- upbc/message_layout.cc | 173 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 upbc/message_layout.cc (limited to 'upbc/message_layout.cc') diff --git a/upbc/message_layout.cc b/upbc/message_layout.cc new file mode 100644 index 0000000..b6614f0 --- /dev/null +++ b/upbc/message_layout.cc @@ -0,0 +1,173 @@ + +#include "upbc/message_layout.h" + +namespace upbc { + +namespace protobuf = ::google::protobuf; + +static int64_t DivRoundUp(int64_t a, int64_t b) { + ABSL_ASSERT(a >= 0); + ABSL_ASSERT(b > 0); + return (a + b - 1) / b; +} + +MessageLayout::Size MessageLayout::Place( + MessageLayout::SizeAndAlign size_and_align) { + Size offset = size_; + offset.AlignUp(size_and_align.align); + size_ = offset; + size_.Add(size_and_align.size); + //maxalign_.MaxFrom(size_and_align.align); + maxalign_.MaxFrom(size_and_align.size); + return offset; +} + +bool MessageLayout::HasHasbit(const protobuf::FieldDescriptor* field) { + return field->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2 && + field->label() != protobuf::FieldDescriptor::LABEL_REPEATED && + !field->containing_oneof(); +} + +MessageLayout::SizeAndAlign MessageLayout::SizeOf( + const protobuf::FieldDescriptor* field) { + if (field->label() == protobuf::FieldDescriptor::LABEL_REPEATED || + field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) { + return {{4, 8}, {4, 8}}; + } + + switch (field->cpp_type()) { + case protobuf::FieldDescriptor::CPPTYPE_STRING: + // upb_stringview + // return {{8, 16}, {4, 8}}; + return {{8, 16}, {8, 16}}; + case protobuf::FieldDescriptor::CPPTYPE_BOOL: + return {{1, 1}, {1, 1}}; + case protobuf::FieldDescriptor::CPPTYPE_FLOAT: + case protobuf::FieldDescriptor::CPPTYPE_INT32: + case protobuf::FieldDescriptor::CPPTYPE_UINT32: + return {{4, 4}, {4, 4}}; + default: + return {{8, 8}, {8, 8}}; + } +} + +int64_t MessageLayout::FieldLayoutRank(const protobuf::FieldDescriptor* field) { + // Order: + // 1, 2, 3. primitive fields (8, 4, 1 byte) + // 4. string fields + // 5. submessage fields + // 6. repeated fields + // + // This has the following nice properties: + // + // 1. padding alignment is (nearly) minimized. + // 2. fields that might have defaults (1-4) are segregated + // from fields that are always zero-initialized (5-7). + // + // We skip oneof fields, because they are emitted in a separate pass. + int64_t rank; + if (field->containing_oneof()) { + fprintf(stderr, "shouldn't have oneofs here.\n"); + abort(); + } else if (field->label() == protobuf::FieldDescriptor::LABEL_REPEATED) { + rank = 6; + } else { + switch (field->cpp_type()) { + case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: + rank = 5; + break; + case protobuf::FieldDescriptor::CPPTYPE_STRING: + rank = 4; + break; + case protobuf::FieldDescriptor::CPPTYPE_BOOL: + rank = 3; + break; + case protobuf::FieldDescriptor::CPPTYPE_FLOAT: + case protobuf::FieldDescriptor::CPPTYPE_INT32: + case protobuf::FieldDescriptor::CPPTYPE_UINT32: + rank = 2; + break; + default: + rank = 1; + break; + } + } + + // Break ties with field number. + return (rank << 29) | field->number(); +} + +void MessageLayout::ComputeLayout(const protobuf::Descriptor* descriptor) { + size_ = Size{0, 0}; + maxalign_ = Size{0, 0}; + PlaceNonOneofFields(descriptor); + PlaceOneofFields(descriptor); + + // Align overall size up to max size. + size_.AlignUp(maxalign_); +} + +void MessageLayout::PlaceNonOneofFields( + const protobuf::Descriptor* descriptor) { + std::vector field_order; + for (int i = 0; i < descriptor->field_count(); i++) { + const protobuf::FieldDescriptor* field = descriptor->field(i); + if (!field->containing_oneof()) { + field_order.push_back(descriptor->field(i)); + } + } + std::sort(field_order.begin(), field_order.end(), + [](const protobuf::FieldDescriptor* a, + const protobuf::FieldDescriptor* b) { + return FieldLayoutRank(a) < FieldLayoutRank(b); + }); + + // Place/count hasbits. + int hasbit_count = 0; + for (auto field : field_order) { + if (HasHasbit(field)) { + hasbit_indexes_[field] = hasbit_count++; + } + } + + // Place hasbits at the beginning. + int64_t hasbit_bytes = DivRoundUp(hasbit_count, 8); + Place(SizeAndAlign{{hasbit_bytes, hasbit_bytes}, {1, 1}}); + + // Place non-oneof fields. + for (auto field : field_order) { + field_offsets_[field] = Place(SizeOf(field)); + } +} + +void MessageLayout::PlaceOneofFields(const protobuf::Descriptor* descriptor) { + std::vector oneof_order; + for (int i = 0; i < descriptor->oneof_decl_count(); i++) { + oneof_order.push_back(descriptor->oneof_decl(i)); + } + std::sort(oneof_order.begin(), oneof_order.end(), + [](const protobuf::OneofDescriptor* a, + const protobuf::OneofDescriptor* b) { + return a->full_name() < b->full_name(); + }); + + for (auto oneof : oneof_order) { + SizeAndAlign oneof_maxsize{{0, 0}, {0, 0}}; + // Calculate max size. + for (int i = 0; i < oneof->field_count(); i++) { + oneof_maxsize.MaxFrom(SizeOf(oneof->field(i))); + } + + // Place discriminator enum and data. + Size data = Place(oneof_maxsize); + Size discriminator = Place(SizeAndAlign{{4, 4}, {4, 4}}); + + oneof_case_offsets_[oneof] = discriminator; + + for (int i = 0; i < oneof->field_count(); i++) { + field_offsets_[oneof->field(i)] = data; + } + } +} + +} // namespace upbc -- cgit v1.2.3