From 3bd691a4975b2267ff04611507e766a7f9f87e83 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 8 May 2015 16:56:29 -0700 Subject: Google-internal development. --- tests/bindings/googlepb/test_vs_proto2.cc | 13 +- tests/json/test_json.cc | 110 ++++- tests/pb/test_decoder.cc | 51 ++- tests/test_cpp.cc | 103 ++++- tests/test_def.c | 91 ++++- upb/bindings/googlepb/bridge.cc | 3 +- upb/bindings/googlepb/proto2.cc | 9 +- upb/bindings/lua/upb.c | 29 +- upb/bindings/lua/upb/pb.c | 30 +- upb/bindings/python/upb.c | 41 +- upb/bindings/ruby/upb.c | 39 +- upb/def.c | 429 ++++++++++++++++++-- upb/def.h | 639 +++++++++++++++++++++++++++--- upb/descriptor/reader.c | 117 ++++-- upb/descriptor/reader.h | 78 +--- upb/env.c | 259 ++++++++++++ upb/env.h | 256 ++++++++++++ upb/handlers-inl.h | 36 +- upb/handlers.c | 27 +- upb/handlers.h | 2 - upb/json/parser.c | 466 +++++++++++++++++----- upb/json/parser.h | 82 +--- upb/json/parser.rl | 394 ++++++++++++++---- upb/json/printer.c | 340 +++++++++++++--- upb/json/printer.h | 70 ++-- upb/pb/compile_decoder.c | 15 +- upb/pb/compile_decoder_x64.dasc | 10 +- upb/pb/compile_decoder_x64.h | 24 +- upb/pb/decoder.c | 176 +++++--- upb/pb/decoder.h | 173 +++----- upb/pb/decoder.int.h | 92 ++++- upb/pb/encoder.c | 150 +++++-- upb/pb/encoder.h | 116 ++---- upb/pb/glue.c | 18 +- upb/pb/textprinter.c | 57 +-- upb/pb/textprinter.h | 38 +- upb/sink.h | 21 - upb/symtab.c | 12 +- upb/table.c | 8 +- upb/table.int.h | 3 + upb/upb.h | 9 + 41 files changed, 3592 insertions(+), 1044 deletions(-) create mode 100644 upb/env.c create mode 100644 upb/env.h diff --git a/tests/bindings/googlepb/test_vs_proto2.cc b/tests/bindings/googlepb/test_vs_proto2.cc index c89339a..8e68791 100644 --- a/tests/bindings/googlepb/test_vs_proto2.cc +++ b/tests/bindings/googlepb/test_vs_proto2.cc @@ -40,8 +40,8 @@ const unsigned char message2_data[] = { void compare_metadata(const google::protobuf::Descriptor* d, const upb::MessageDef *upb_md) { ASSERT(d->field_count() == upb_md->field_count()); - for (upb::MessageDef::const_iterator i = upb_md->begin(); i != upb_md->end(); - ++i) { + for (upb::MessageDef::const_field_iterator i = upb_md->field_begin(); + i != upb_md->field_end(); ++i) { const upb::FieldDef* upb_f = *i; const google::protobuf::FieldDescriptor *proto2_f = d->FindFieldByNumber(upb_f->number()); @@ -77,13 +77,14 @@ void parse_and_compare(google::protobuf::Message *msg1, cache.GetDecoderMethod(upb::pb::DecoderMethodOptions(protomsg_handlers))); upb::Status status; - upb::pb::Decoder decoder(decoder_method.get(), &status); + upb::Environment env; + env.ReportErrorsTo(&status); upb::Sink protomsg_sink(protomsg_handlers, msg2); - - decoder.ResetOutput(&protomsg_sink); + upb::pb::Decoder* decoder = + upb::pb::Decoder::Create(&env, decoder_method.get(), &protomsg_sink); msg2->Clear(); - bool ok = upb::BufferSource::PutBuffer(str, len, decoder.input()); + bool ok = upb::BufferSource::PutBuffer(str, len, decoder->input()); if (!ok) { fprintf(stderr, "error parsing: %s\n", status.error_message()); print_diff(*msg1, *msg2); diff --git a/tests/json/test_json.cc b/tests/json/test_json.cc index f1e2304..f2e26b8 100644 --- a/tests/json/test_json.cc +++ b/tests/json/test_json.cc @@ -85,6 +85,33 @@ static TestCase kTestRoundtripMessages[] = { TEST("{\"optional_string\":\"\\uFFFF\"}"), EXPECT("{\"optional_string\":\"\xEF\xBF\xBF\"}") }, + // map-field tests + { + TEST("{\"map_string_string\":{\"a\":\"value1\",\"b\":\"value2\"," + "\"c\":\"value3\"}}"), + EXPECT_SAME + }, + { + TEST("{\"map_int32_string\":{\"1\":\"value1\",\"-1\":\"value2\"," + "\"1234\":\"value3\"}}"), + EXPECT_SAME + }, + { + TEST("{\"map_bool_string\":{\"false\":\"value1\",\"true\":\"value2\"}}"), + EXPECT_SAME + }, + { + TEST("{\"map_string_int32\":{\"asdf\":1234,\"jkl;\":-1}}"), + EXPECT_SAME + }, + { + TEST("{\"map_string_bool\":{\"asdf\":true,\"jkl;\":false}}"), + EXPECT_SAME + }, + { + TEST("{\"map_string_msg\":{\"asdf\":{\"foo\":42},\"jkl;\":{\"foo\":84}}}"), + EXPECT_SAME + }, TEST_SENTINEL }; @@ -115,6 +142,53 @@ static const upb::MessageDef* BuildTestMessage( submsg->set_full_name("SubMessage", &st); AddField(submsg.get(), 1, "foo", UPB_TYPE_INT32, false); + // Create MapEntryStringString. + upb::reffed_ptr mapentry_string_string( + upb::MessageDef::New()); + mapentry_string_string->set_full_name("MapEntry_String_String", &st); + mapentry_string_string->setmapentry(true); + AddField(mapentry_string_string.get(), 1, "key", UPB_TYPE_STRING, false); + AddField(mapentry_string_string.get(), 2, "value", UPB_TYPE_STRING, false); + + // Create MapEntryInt32String. + upb::reffed_ptr mapentry_int32_string( + upb::MessageDef::New()); + mapentry_int32_string->set_full_name("MapEntry_Int32_String", &st); + mapentry_int32_string->setmapentry(true); + AddField(mapentry_int32_string.get(), 1, "key", UPB_TYPE_INT32, false); + AddField(mapentry_int32_string.get(), 2, "value", UPB_TYPE_STRING, false); + + // Create MapEntryBoolString. + upb::reffed_ptr mapentry_bool_string( + upb::MessageDef::New()); + mapentry_bool_string->set_full_name("MapEntry_Bool_String", &st); + mapentry_bool_string->setmapentry(true); + AddField(mapentry_bool_string.get(), 1, "key", UPB_TYPE_BOOL, false); + AddField(mapentry_bool_string.get(), 2, "value", UPB_TYPE_STRING, false); + + // Create MapEntryStringInt32. + upb::reffed_ptr mapentry_string_int32( + upb::MessageDef::New()); + mapentry_string_int32->set_full_name("MapEntry_String_Int32", &st); + mapentry_string_int32->setmapentry(true); + AddField(mapentry_string_int32.get(), 1, "key", UPB_TYPE_STRING, false); + AddField(mapentry_string_int32.get(), 2, "value", UPB_TYPE_INT32, false); + + // Create MapEntryStringBool. + upb::reffed_ptr mapentry_string_bool(upb::MessageDef::New()); + mapentry_string_bool->set_full_name("MapEntry_String_Bool", &st); + mapentry_string_bool->setmapentry(true); + AddField(mapentry_string_bool.get(), 1, "key", UPB_TYPE_STRING, false); + AddField(mapentry_string_bool.get(), 2, "value", UPB_TYPE_BOOL, false); + + // Create MapEntryStringMessage. + upb::reffed_ptr mapentry_string_msg(upb::MessageDef::New()); + mapentry_string_msg->set_full_name("MapEntry_String_Message", &st); + mapentry_string_msg->setmapentry(true); + AddField(mapentry_string_msg.get(), 1, "key", UPB_TYPE_STRING, false); + AddField(mapentry_string_msg.get(), 2, "value", UPB_TYPE_MESSAGE, false, + upb::upcast(submsg.get())); + // Create MyEnum. upb::reffed_ptr myenum(upb::EnumDef::New()); myenum->set_full_name("MyEnum", &st); @@ -150,13 +224,33 @@ static const upb::MessageDef* BuildTestMessage( AddField(md.get(), 19, "optional_enum", UPB_TYPE_ENUM, true, upb::upcast(myenum.get())); + AddField(md.get(), 20, "map_string_string", UPB_TYPE_MESSAGE, true, + upb::upcast(mapentry_string_string.get())); + AddField(md.get(), 21, "map_int32_string", UPB_TYPE_MESSAGE, true, + upb::upcast(mapentry_int32_string.get())); + AddField(md.get(), 22, "map_bool_string", UPB_TYPE_MESSAGE, true, + upb::upcast(mapentry_bool_string.get())); + AddField(md.get(), 23, "map_string_int32", UPB_TYPE_MESSAGE, true, + upb::upcast(mapentry_string_int32.get())); + AddField(md.get(), 24, "map_string_bool", UPB_TYPE_MESSAGE, true, + upb::upcast(mapentry_string_bool.get())); + AddField(md.get(), 25, "map_string_msg", UPB_TYPE_MESSAGE, true, + upb::upcast(mapentry_string_msg.get())); + // Add both to our symtab. - upb::Def* defs[3] = { + upb::Def* defs[9] = { upb::upcast(submsg.ReleaseTo(&defs)), upb::upcast(myenum.ReleaseTo(&defs)), upb::upcast(md.ReleaseTo(&defs)), + upb::upcast(mapentry_string_string.ReleaseTo(&defs)), + upb::upcast(mapentry_int32_string.ReleaseTo(&defs)), + upb::upcast(mapentry_bool_string.ReleaseTo(&defs)), + upb::upcast(mapentry_string_int32.ReleaseTo(&defs)), + upb::upcast(mapentry_string_bool.ReleaseTo(&defs)), + upb::upcast(mapentry_string_msg.ReleaseTo(&defs)), }; - symtab->Add(defs, 3, &defs, &st); + symtab->Add(defs, 9, &defs, &st); + ASSERT(st.ok()); // Return TestMessage. return symtab->LookupMessage("TestMessage"); @@ -198,14 +292,14 @@ void test_json_roundtrip_message(const char* json_src, const upb::Handlers* serialize_handlers, int seam) { upb::Status st; - upb::json::Parser parser(&st); - upb::json::Printer printer(serialize_handlers); + upb::Environment env; + env.ReportErrorsTo(&st); StringSink data_sink; + upb::json::Printer* printer = + upb::json::Printer::Create(&env, serialize_handlers, data_sink.Sink()); + upb::json::Parser* parser = upb::json::Parser::Create(&env, printer->input()); - parser.ResetOutput(printer.input()); - printer.ResetOutput(data_sink.Sink()); - - upb::BytesSink* input = parser.input(); + upb::BytesSink* input = parser->input(); void *sub; size_t len = strlen(json_src); size_t ofs = 0; diff --git a/tests/pb/test_decoder.cc b/tests/pb/test_decoder.cc index 3aea777..98926a6 100644 --- a/tests/pb/test_decoder.cc +++ b/tests/pb/test_decoder.cc @@ -64,6 +64,8 @@ (float)completed * 100 / total); \ } +#define MAX_NESTING 64 + uint32_t filter_hash = 0; double completed; double total; @@ -210,7 +212,7 @@ string submsg(uint32_t fn, const string& buf) { // using the closure depth to test that the stack of closures is properly // handled. -int closures[UPB_DECODER_MAX_NESTING]; +int closures[MAX_NESTING]; string output; void indentbuf(string *buf, int depth) { @@ -508,6 +510,15 @@ upb::reffed_ptr NewHandlers(TestMode mode) { const upb::Handlers *global_handlers; const upb::pb::DecoderMethod *global_method; +upb::pb::Decoder* CreateDecoder(upb::Environment* env, + const upb::pb::DecoderMethod* method, + upb::Sink* sink) { + upb::pb::Decoder *ret = upb::pb::Decoder::Create(env, method, sink); + ASSERT(ret != NULL); + ret->set_max_nesting(MAX_NESTING); + return ret; +} + uint32_t Hash(const string& proto, const string* expected_output, size_t seam1, size_t seam2) { uint32_t hash = MurmurHash2(proto.c_str(), proto.size(), 0); @@ -545,19 +556,21 @@ static bool parse(upb::pb::Decoder* decoder, void* subc, const char* buf, #define LINE(x) x "\n" void run_decoder(const string& proto, const string* expected_output) { upb::Status status; - upb::pb::Decoder decoder(global_method, &status); upb::Sink sink(global_handlers, &closures[0]); - decoder.ResetOutput(&sink); for (size_t i = 0; i < proto.size(); i++) { for (size_t j = i; j < UPB_MIN(proto.size(), i + 5); j++) { + // TODO(haberman): hoist this again once the environment supports reset. + upb::Environment env; + env.ReportErrorsTo(&status); + upb::pb::Decoder *decoder = CreateDecoder(&env, global_method, &sink); + testhash = Hash(proto, expected_output, i, j); if (filter_hash && testhash != filter_hash) continue; if (test_mode != COUNT_ONLY) { - decoder.Reset(); output.clear(); status.Clear(); size_t ofs = 0; - upb::BytesSink* input = decoder.input(); + upb::BytesSink* input = decoder->input(); void *sub; if (filter_hash) { @@ -576,9 +589,9 @@ void run_decoder(const string& proto, const string* expected_output) { } bool ok = input->Start(proto.size(), &sub) && - parse(&decoder, sub, proto.c_str(), 0, i, &ofs, &status) && - parse(&decoder, sub, proto.c_str(), i, j, &ofs, &status) && - parse(&decoder, sub, proto.c_str(), j, proto.size(), &ofs, + parse(decoder, sub, proto.c_str(), 0, i, &ofs, &status) && + parse(decoder, sub, proto.c_str(), i, j, &ofs, &status) && + parse(decoder, sub, proto.c_str(), j, proto.size(), &ofs, &status) && ofs == proto.size(); @@ -852,7 +865,7 @@ void test_invalid() { // Test exceeding the resource limit of stack depth. string buf; - for (int i = 0; i <= UPB_DECODER_MAX_NESTING; i++) { + for (int i = 0; i <= MAX_NESTING; i++) { buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf)); } assert_does_not_parse(buf); @@ -871,11 +884,12 @@ void test_valid() { if (!filter_hash || filter_hash == testhash) { testhash = emptyhash; upb::Status status; - upb::pb::Decoder decoder(global_method, &status); + upb::Environment env; + env.ReportErrorsTo(&status); upb::Sink sink(global_handlers, &closures[0]); - decoder.ResetOutput(&sink); + upb::pb::Decoder* decoder = CreateDecoder(&env, global_method, &sink); output.clear(); - bool ok = upb::BufferSource::PutBuffer("", 0, decoder.input()); + bool ok = upb::BufferSource::PutBuffer("", 0, decoder->input()); ASSERT(ok); ASSERT(status.ok()); if (test_mode == ALL_HANDLERS) { @@ -1076,7 +1090,7 @@ void test_valid() { // Staying within the stack limit should work properly. string buf; string textbuf; - int total = UPB_DECODER_MAX_NESTING - 1; + int total = MAX_NESTING - 1; for (int i = 0; i < total; i++) { buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf)); indentbuf(&textbuf, i); @@ -1135,11 +1149,12 @@ upb::reffed_ptr method = { NULL, 0 }, }; for (int i = 0; testdata[i].data; i++) { + upb::Environment env; upb::Status status; - upb::pb::Decoder decoder(method.get(), &status); - upb::Sink sink(global_handlers, &closures[0]); - decoder.ResetOutput(&sink); - upb::BytesSink* input = decoder.input(); + env.ReportErrorsTo(&status); + upb::Sink sink(method->dest_handlers(), &closures[0]); + upb::pb::Decoder* decoder = CreateDecoder(&env, method.get(), &sink); + upb::BytesSink* input = decoder->input(); void* subc; ASSERT(input->Start(0, &subc)); size_t ofs = 0; @@ -1182,7 +1197,7 @@ extern "C" { int run_tests(int argc, char *argv[]) { if (argc > 1) filter_hash = strtol(argv[1], NULL, 16); - for (int i = 0; i < UPB_DECODER_MAX_NESTING; i++) { + for (int i = 0; i < MAX_NESTING; i++) { closures[i] = i; } diff --git a/tests/test_cpp.cc b/tests/test_cpp.cc index 55bb4b3..c2de6c3 100644 --- a/tests/test_cpp.cc +++ b/tests/test_cpp.cc @@ -12,8 +12,10 @@ #include #include +#include #include "upb/def.h" +#include "upb/env.h" #include "upb/descriptor/reader.h" #include "upb/handlers.h" #include "upb/pb/decoder.h" @@ -164,7 +166,7 @@ static void TestSymbolTable(const char *descriptor_file) { #ifdef UPB_CXX11 // Test range-based for. std::set fielddefs; - for (const upb::FieldDef* f : *md.get()) { + for (const upb::FieldDef* f : md.get()->fields()) { AssertInsert(&fielddefs, f); ASSERT(f->containing_type() == md.get()); } @@ -1117,6 +1119,103 @@ void TestHandlerDataDestruction() { ASSERT(x == 0); } +void TestOneofs() { + upb::Status status; + upb::reffed_ptr md(upb::MessageDef::New()); + upb::reffed_ptr o(upb::OneofDef::New()); + + o->set_name("test_oneof", &status); + ASSERT(status.ok()); + + for (int i = 0; i < 5; i++) { + std::ostringstream fieldname; + fieldname << "field_" << i; + upb::reffed_ptr f(upb::FieldDef::New()); + f->set_name(fieldname.str(), &status); + ASSERT(status.ok()); + f->set_type(UPB_TYPE_INT32); + f->set_number(i + 1, &status); + ASSERT(status.ok()); + f->set_label(UPB_LABEL_OPTIONAL); + + o->AddField(f.get(), &status); + ASSERT(status.ok()); + } + + md->AddOneof(o.get(), &status); + ASSERT(status.ok()); + + int field_count = 0; + for (upb::OneofDef::iterator it = o->begin(); it != o->end(); ++it) { + upb::FieldDef* f = *it; + ASSERT(f->type() == UPB_TYPE_INT32); + field_count++; + } + ASSERT(field_count == 5); + + upb::MessageDef::oneof_iterator msg_it = md->oneof_begin(); + ASSERT(msg_it != md->oneof_end()); + ASSERT((*msg_it) == o.get()); + +#ifdef UPB_CXX11 + // Test range-based for on both fields and oneofs (with the iterator adaptor). + field_count = 0; + for (auto* field : md->fields()) { + UPB_UNUSED(field); + field_count++; + } + ASSERT(field_count == 5); + + int oneof_count = 0; + for (auto* oneof : md->oneofs()) { + UPB_UNUSED(oneof); + oneof_count++; + } + ASSERT(oneof_count == 1); +#endif // UPB_CXX11 + + // Test that we can add a new field to the oneof and that it becomes a member + // of the msgdef as well. + upb::reffed_ptr newf(upb::FieldDef::New()); + newf->set_name("new_field_10", &status); + ASSERT(status.ok()); + newf->set_number(10, &status); + ASSERT(status.ok()); + newf->set_label(UPB_LABEL_OPTIONAL); + newf->set_type(UPB_TYPE_INT32); + o->AddField(newf.get(), &status); + ASSERT(status.ok()); + ASSERT(newf->containing_type() == md.get()); + + // Test that we can add a new field to the msgdef first and then to the oneof. + upb::reffed_ptr newf2(upb::FieldDef::New()); + newf2->set_name("new_field_11", &status); + ASSERT(status.ok()); + newf2->set_number(11, &status); + ASSERT(status.ok()); + newf2->set_label(UPB_LABEL_OPTIONAL); + newf2->set_type(UPB_TYPE_INT32); + md->AddField(newf2.get(), &status); + ASSERT(status.ok()); + o->AddField(newf2.get(), &status); + ASSERT(status.ok()); + ASSERT(newf2->containing_oneof() == o.get()); + + // Test that we cannot add REQUIRED or REPEATED fields to the oneof. + upb::reffed_ptr newf3(upb::FieldDef::New()); + newf3->set_name("new_field_12", &status); + ASSERT(status.ok()); + newf3->set_number(12, &status); + ASSERT(status.ok()); + newf3->set_label(UPB_LABEL_REQUIRED); + newf3->set_type(UPB_TYPE_INT32); + o->AddField(newf3.get(), &status); + ASSERT(!status.ok()); + newf->set_label(UPB_LABEL_REPEATED); + o->AddField(newf3.get(), &status); + ASSERT(!status.ok()); +} + extern "C" { int run_tests(int argc, char *argv[]) { @@ -1173,6 +1272,8 @@ int run_tests(int argc, char *argv[]) { TestHandlerDataDestruction(); + TestOneofs(); + return 0; } diff --git a/tests/test_def.c b/tests/test_def.c index dcf80c0..800d685 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -189,7 +189,9 @@ static upb_fielddef *newfield( ASSERT(upb_fielddef_setnumber(f, num, NULL)); upb_fielddef_settype(f, type); upb_fielddef_setlabel(f, label); - ASSERT(upb_fielddef_setsubdefname(f, type_name, NULL)); + if (type_name) { + ASSERT(upb_fielddef_setsubdefname(f, type_name, NULL)); + } return f; } @@ -342,6 +344,91 @@ static void test_descriptor_flags() { upb_msgdef_unref(m2, &m2); } +static void test_mapentry_check() { + upb_status s = UPB_STATUS_INIT; + + upb_msgdef *m = upb_msgdef_new(&m); + upb_msgdef_setfullname(m, "TestMessage", &s); + upb_fielddef *f = upb_fielddef_new(&f); + upb_fielddef_setname(f, "field1", &s); + upb_fielddef_setnumber(f, 1, &s); + upb_fielddef_setlabel(f, UPB_LABEL_OPTIONAL); + upb_fielddef_settype(f, UPB_TYPE_MESSAGE); + upb_fielddef_setsubdefname(f, ".MapEntry", &s); + upb_msgdef_addfield(m, f, &f, &s); + ASSERT(upb_ok(&s)); + + upb_msgdef *subm = upb_msgdef_new(&subm); + upb_msgdef_setfullname(subm, "MapEntry", &s); + upb_msgdef_setmapentry(subm, true); + + upb_symtab *symtab = upb_symtab_new(&symtab); + upb_def *defs[] = {UPB_UPCAST(m), UPB_UPCAST(subm)}; + upb_symtab_add(symtab, defs, 2, NULL, &s); + // Should not have succeeded: non-repeated field pointing to a MapEntry. + ASSERT(!upb_ok(&s)); + + upb_fielddef_setlabel(f, UPB_LABEL_REPEATED); + upb_symtab_add(symtab, defs, 2, NULL, &s); + ASSERT(upb_ok(&s)); + + upb_symtab_unref(symtab, &symtab); + upb_msgdef_unref(subm, &subm); + upb_msgdef_unref(m, &m); +} + +static void test_oneofs() { + upb_status s = UPB_STATUS_INIT; + bool ok = true; + + upb_symtab *symtab = upb_symtab_new(&symtab); + ASSERT(symtab != NULL); + + // Create a test message for fields to refer to. + upb_msgdef *subm = upb_msgdef_newnamed("SubMessage", &symtab); + upb_msgdef_addfield(subm, newfield("field1", 1, UPB_TYPE_INT32, + UPB_LABEL_OPTIONAL, NULL, &symtab), + &symtab, NULL); + upb_def *subm_defs[] = {UPB_UPCAST(subm)}; + ASSERT_STATUS(upb_symtab_add(symtab, subm_defs, 1, &symtab, &s), &s); + + upb_msgdef *m = upb_msgdef_newnamed("TestMessage", &symtab); + ASSERT(upb_msgdef_numoneofs(m) == 0); + + upb_oneofdef *o = upb_oneofdef_new(&o); + ASSERT(upb_oneofdef_numfields(o) == 0); + ASSERT(upb_oneofdef_name(o) == NULL); + + ok = upb_oneofdef_setname(o, "test_oneof", &s); + ASSERT_STATUS(ok, &s); + + ok = upb_oneofdef_addfield(o, newfield("field1", 1, UPB_TYPE_INT32, + UPB_LABEL_OPTIONAL, NULL, &symtab), + &symtab, NULL); + ASSERT_STATUS(ok, &s); + ok = upb_oneofdef_addfield(o, newfield("field2", 2, UPB_TYPE_MESSAGE, + UPB_LABEL_OPTIONAL, ".SubMessage", + &symtab), + &symtab, NULL); + ASSERT_STATUS(ok, &s); + + ok = upb_msgdef_addoneof(m, o, NULL, &s); + ASSERT_STATUS(ok, &s); + + upb_def *defs[] = {UPB_UPCAST(m)}; + ASSERT_STATUS(upb_symtab_add(symtab, defs, 1, &symtab, &s), &s); + + ASSERT(upb_msgdef_numoneofs(m) == 1); + const upb_oneofdef *lookup_o = upb_msgdef_ntooz(m, "test_oneof"); + ASSERT(lookup_o == o); + + const upb_fielddef *lookup_field = upb_oneofdef_ntofz(o, "field1"); + ASSERT(lookup_field != NULL && upb_fielddef_number(lookup_field) == 1); + + upb_symtab_unref(symtab, &symtab); + upb_oneofdef_unref(o, &o); +} + int run_tests(int argc, char *argv[]) { if (argc < 2) { fprintf(stderr, "Usage: test_def \n"); @@ -358,5 +445,7 @@ int run_tests(int argc, char *argv[]) { test_partial_freeze(); test_noreftracking(); test_descriptor_flags(); + test_mapentry_check(); + test_oneofs(); return 0; } diff --git a/upb/bindings/googlepb/bridge.cc b/upb/bindings/googlepb/bridge.cc index 6ae8868..2a2bbbf 100644 --- a/upb/bindings/googlepb/bridge.cc +++ b/upb/bindings/googlepb/bridge.cc @@ -246,7 +246,8 @@ const Handlers* CodeCache::GetMaybeUnfrozenWriteHandlers( to_freeze_.push_back(h); const goog::Descriptor* d = m.GetDescriptor(); - for (upb::MessageDef::const_iterator i = md->begin(); i != md->end(); ++i) { + for (upb::MessageDef::const_field_iterator i = md->field_begin(); + i != md->field_end(); ++i) { const FieldDef* upb_f = *i; const goog::FieldDescriptor* proto2_f = diff --git a/upb/bindings/googlepb/proto2.cc b/upb/bindings/googlepb/proto2.cc index 3911172..87c13b6 100644 --- a/upb/bindings/googlepb/proto2.cc +++ b/upb/bindings/googlepb/proto2.cc @@ -946,14 +946,14 @@ case goog::FieldDescriptor::cpptype: \ public: typedef goog::Message Type; #ifdef GOOGLE_PROTOBUF_HAS_ARENAS - static ::proto2::Arena* GetArena(Type* t) { + static goog::Arena* GetArena(Type* t) { return t->GetArena(); } static void* GetMaybeArenaPointer(Type* t) { return t->GetMaybeArenaPointer(); } static inline Type* NewFromPrototype( - const Type* prototype, ::proto2::Arena* arena = NULL) { + const Type* prototype, goog::Arena* arena = NULL) { return prototype->New(arena); } static void Delete(Type* t, goog::Arena* arena = NULL) { @@ -1277,6 +1277,11 @@ case goog::FieldDescriptor::cpptype: \ return lazy_field_.SetAllocated(static_cast(message)); } + virtual void UnsafeArenaSetAllocatedMessage(proto2::MessageLite* message) { + return lazy_field_.UnsafeArenaSetAllocated( + static_cast(message)); + } + virtual proto2::MessageLite* ReleaseMessage( const proto2::MessageLite& prototype) { return lazy_field_.ReleaseByPrototype( diff --git a/upb/bindings/lua/upb.c b/upb/bindings/lua/upb.c index 17fc0a8..5ad0235 100644 --- a/upb/bindings/lua/upb.c +++ b/upb/bindings/lua/upb.c @@ -139,7 +139,8 @@ bool lupb_openlib(lua_State *L, void *ptr, const char *name, // Pushes a new userdata with the given metatable and ensures that it has a // uservalue. -static void *newudata_with_userval(lua_State *L, size_t size, const char *type) { +static void *newudata_with_userval(lua_State *L, size_t size, + const char *type) { void *ret = lua_newuserdata(L, size); // Set metatable. @@ -952,17 +953,17 @@ static int lupb_msgdef_field(lua_State *L) { } static int lupb_msgiter_next(lua_State *L) { - upb_msg_iter *i = lua_touserdata(L, lua_upvalueindex(1)); - if (upb_msg_done(i)) return 0; + upb_msg_field_iter *i = lua_touserdata(L, lua_upvalueindex(1)); + if (upb_msg_field_done(i)) return 0; lupb_def_pushwrapper(L, UPB_UPCAST(upb_msg_iter_field(i)), NULL); - upb_msg_next(i); + upb_msg_field_next(i); return 1; } static int lupb_msgdef_fields(lua_State *L) { const upb_msgdef *m = lupb_msgdef_check(L, 1); - upb_msg_iter *i = lua_newuserdata(L, sizeof(upb_msg_iter)); - upb_msg_begin(i, m); + upb_msg_field_iter *i = lua_newuserdata(L, sizeof(upb_msg_field_iter)); + upb_msg_field_begin(i, m); // Need to guarantee that the msgdef outlives the iter. lua_pushvalue(L, 1); lua_pushcclosure(L, &lupb_msgiter_next, 2); @@ -1416,8 +1417,10 @@ static lupb_msgdef *lupb_msg_assignoffsets(lua_State *L, int narg) { size_t userval_idx = 1; // Assign offsets. - upb_msg_iter i; - for (upb_msg_begin(&i, lmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for (upb_msg_field_begin(&i, lmd->md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); if (in_userval(f)) { offsets[upb_fielddef_index(f)] = userval_idx++; @@ -1442,7 +1445,9 @@ static lupb_msgdef *lupb_msg_assignoffsets(lua_State *L, int narg) { lua_newtable(L); // This will be our userval. int idx = 1; - for (upb_msg_begin(&i, lmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { + for (upb_msg_field_begin(&i, lmd->md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE) { bool created = lupb_def_pushwrapper(L, upb_fielddef_subdef(f), NULL); @@ -1660,9 +1665,9 @@ void callback(const void *closure, upb_handlers *h) { lua_State *L = (lua_State*)closure; lupb_def_pushwrapper(L, UPB_UPCAST(upb_handlers_msgdef(h)), NULL); lupb_msgdef *lmd = lupb_msg_assignoffsets(L, -1); - upb_msg_iter i; - upb_msg_begin(&i, upb_handlers_msgdef(h)); - for (; !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + upb_msg_field_begin(&i, upb_handlers_msgdef(h)); + for (; !upb_msg_field_done(&i); upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); int hasbit = upb_fielddef_index(f); uint16_t ofs = lmd->field_offsets[upb_fielddef_index(f)]; diff --git a/upb/bindings/lua/upb/pb.c b/upb/bindings/lua/upb/pb.c index c9f1f47..ea3d755 100644 --- a/upb/bindings/lua/upb/pb.c +++ b/upb/bindings/lua/upb/pb.c @@ -41,6 +41,13 @@ static int lupb_pbdecodermethod_new(lua_State *L) { return 1; // The DecoderMethod wrapper. } +// We implement upb's allocation function by allocating a Lua userdata. +// This is a raw hunk of memory that will be GC'd by Lua. +static void *lua_alloc(void *ud, size_t size) { + lua_State *L = ud; + return lua_newuserdata(L, size); +} + // Unlike most of our exposed Lua functions, this does not correspond to an // actual method on the underlying DecoderMethod. But it's convenient, and // important to implement in C because we can do stack allocation and @@ -61,19 +68,22 @@ static int lupb_pbdecodermethod_parse(lua_State *L) { // Handlers need this. lua_getuservalue(L, -1); - upb_pbdecoder decoder; upb_status status = UPB_STATUS_INIT; - upb_pbdecoder_init(&decoder, method, &status); + upb_env env; + upb_env_init(&env); + upb_env_reporterrorsto(&env, &status); upb_sink sink; upb_sink_reset(&sink, handlers, msg); - upb_pbdecoder_resetoutput(&decoder, &sink); - upb_bufsrc_putbuf(pb, len, upb_pbdecoder_input(&decoder)); - // TODO: Our need to call uninit isn't longjmp-safe; what if the decode - // triggers a Lua error? uninit is only needed if the decoder - // dynamically-allocated a growing stack -- ditch this feature and live with - // the compile-time limit? Or have a custom allocation function that - // allocates Lua GC-rooted memory? - upb_pbdecoder_uninit(&decoder); + upb_pbdecoder *decoder = upb_pbdecoder_create(&env, method, &sink); + upb_bufsrc_putbuf(pb, len, upb_pbdecoder_input(decoder)); + + // This won't get called in the error case, which longjmp's across us. But + // since we made our alloc function allocate only GC-able memory, that + // shouldn't matter. It *would* matter if the environment had references to + // any non-memory resources (ie. filehandles). As an alternative to this we + // could make the environment itself a userdata. + upb_env_uninit(&env); + lupb_checkstatus(L, &status); lua_pop(L, 1); // Uservalue. diff --git a/upb/bindings/python/upb.c b/upb/bindings/python/upb.c index 497074b..6cfc8e9 100644 --- a/upb/bindings/python/upb.c +++ b/upb/bindings/python/upb.c @@ -298,7 +298,8 @@ static PyObject *PyUpb_MessageDef_new(PyTypeObject *subtype, static PyObject *PyUpb_MessageDef_add_fields(PyObject *o, PyObject *args); -static int PyUpb_MessageDef_init(PyObject *self, PyObject *args, PyObject *kwds) { +static int PyUpb_MessageDef_init( + PyObject *self, PyObject *args, PyObject *kwds) { if (!kwds) return 0; PyObject *key, *value; Py_ssize_t pos = 0; @@ -323,7 +324,8 @@ static PyObject *PyUpb_MessageDef_getattro(PyObject *obj, PyObject *attr_name) { return PyObject_GenericGetAttr(obj, attr_name); } -static int PyUpb_MessageDef_setattro(PyObject *o, PyObject *key, PyObject *val) { +static int PyUpb_MessageDef_setattro( + PyObject *o, PyObject *key, PyObject *val) { upb_msgdef *m = Check_MessageDef(o, -1); if (!upb_def_ismutable(UPB_UPCAST(m))) { PyErr_SetString(PyExc_TypeError, "MessageDef is not mutable."); @@ -343,9 +345,11 @@ static int PyUpb_MessageDef_setattro(PyObject *o, PyObject *key, PyObject *val) static PyObject *PyUpb_MessageDef_fields(PyObject *obj, PyObject *args) { upb_msgdef *m = Check_MessageDef(obj, NULL); PyObject *ret = PyList_New(0); - upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, m); + !upb_msg_field_done(&i); + upb_msg_field_next(&ii)) { + upb_fielddef *f = upb_msg_iter_field(&i); PyList_Append(ret, PyUpb_FieldDef_GetOrCreate(f)); } return ret; @@ -374,9 +378,12 @@ static PyObject *PyUpb_MessageDef_add_field(PyObject *o, PyObject *field) { } static PyMethodDef PyUpb_MessageDef_methods[] = { - {"add_field", &PyUpb_MessageDef_add_field, METH_O, "Adds a list of fields."}, - {"add_fields", &PyUpb_MessageDef_add_fields, METH_O, "Adds a list of fields."}, - {"fields", &PyUpb_MessageDef_fields, METH_NOARGS, "Returns list of fields."}, + {"add_field", &PyUpb_MessageDef_add_field, METH_O, + "Adds a list of fields."}, + {"add_fields", &PyUpb_MessageDef_add_fields, METH_O, + "Adds a list of fields."}, + {"fields", &PyUpb_MessageDef_fields, METH_NOARGS, + "Returns list of fields."}, {NULL, NULL} }; @@ -448,7 +455,8 @@ static PyObject *PyUpb_SymbolTable_new(PyTypeObject *subtype, return PyUpb_ObjCacheGet(upb_symtab_new(), subtype); } -static int PyUpb_SymbolTable_init(PyObject *self, PyObject *args, PyObject *kwds) { +static int PyUpb_SymbolTable_init( + PyObject *self, PyObject *args, PyObject *kwds) { return 0; } @@ -475,8 +483,10 @@ static PyObject *PyUpb_SymbolTable_add_defs(PyObject *o, PyObject *defs) { cdefs[i++] = def; upb_msgdef *md = upb_dyncast_msgdef(def); if (!md) continue; - upb_msg_iter j; - for(j = upb_msg_begin(md); !upb_msg_done(j); j = upb_msg_next(md, j)) { + upb_msg_field_iter j; + for(upb_msg_field_begin(&j, md); + !upb_msg_field_done(&j); + upb_msg_field_next(&j)) { upb_fielddef *f = upb_msg_iter_field(j); upb_fielddef_setaccessor(f, PyUpb_AccessorForField(f)); } @@ -601,7 +611,8 @@ static upb_sflow_t PyUpb_Message_StartSubmessage(void *m, upb_value fval) { return UPB_CONTINUE_WITH(*submsg); } -static upb_sflow_t PyUpb_Message_StartRepeatedSubmessage(void *a, upb_value fval) { +static upb_sflow_t PyUpb_Message_StartRepeatedSubmessage( + void *a, upb_value fval) { (void)fval; PyObject **elem = upb_stdarray_append(a, sizeof(void*)); PyTypeObject *type = ((PyUpb_MessageType*)Py_TYPE(a))->alt_type; @@ -609,7 +620,8 @@ static upb_sflow_t PyUpb_Message_StartRepeatedSubmessage(void *a, upb_value fval return UPB_CONTINUE_WITH(*elem); } -static upb_flow_t PyUpb_Message_StringValue(void *m, upb_value fval, upb_value val) { +static upb_flow_t PyUpb_Message_StringValue( + void *m, upb_value fval, upb_value val) { PyObject **str = PyUpb_Accessor_GetPtr(m, fval); if (*str) { Py_DECREF(*str); } *str = PyString_FromStringAndSize(NULL, upb_value_getstrref(val)->len); @@ -618,7 +630,8 @@ static upb_flow_t PyUpb_Message_StringValue(void *m, upb_value fval, upb_value v return UPB_CONTINUE; } -static upb_flow_t PyUpb_Message_AppendStringValue(void *a, upb_value fval, upb_value val) { +static upb_flow_t PyUpb_Message_AppendStringValue( + void *a, upb_value fval, upb_value val) { (void)fval; PyObject **elem = upb_stdarray_append(a, sizeof(void*)); *elem = PyString_FromStringAndSize(NULL, upb_value_getstrref(val)->len); diff --git a/upb/bindings/ruby/upb.c b/upb/bindings/ruby/upb.c index 2817a15..9618366 100644 --- a/upb/bindings/ruby/upb.c +++ b/upb/bindings/ruby/upb.c @@ -273,9 +273,11 @@ static size_t rupb_sizeof(const upb_fielddef *f) { static void assign_offsets(rb_msglayout *layout, const upb_msgdef *md) { layout->field_offsets = ALLOC_N(uint32_t, upb_msgdef_numfields(md)); size_t ofs = 0; - upb_msg_iter i; + upb_msg_field_iter i; - for (upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { + for (upb_msg_field_begin(&i, md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); size_t field_size = rupb_sizeof(f); @@ -301,8 +303,10 @@ static void make_prototype(rb_msglayout *layout, const upb_msgdef *md) { // more specific initialization. memset(prototype, 0, layout->size); - upb_msg_iter i; - for (upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for (upb_msg_field_begin(&i, md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); if (is_ruby_value(f)) { size_t ofs = layout->field_offsets[upb_fielddef_index(f)]; @@ -373,8 +377,10 @@ static void msgdef_mark(void *_rmd) { rb_gc_mark(rmd->klass); // Mark all submessage types. - upb_msg_iter i; - for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for (upb_msg_field_begin(&i, rmd->md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); if (upb_fielddef_issubmsg(f)) { // If we were trying to be more aggressively lazy, the submessage might @@ -495,8 +501,10 @@ static void msg_mark(void *p) { // We need to mark all references to other Ruby values: strings, arrays, and // submessages that we point to. - upb_msg_iter i; - for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for (upb_msg_field_begin(&i, rmd->md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); if (is_ruby_value(f)) { size_t ofs = rmd->layout.field_offsets[upb_fielddef_index(f)]; @@ -903,7 +911,8 @@ static void *submsg_handler(void *closure, const void *hd) { const submsg_handlerdata_t *submsgdata = hd; if (DEREF(msg, submsgdata->ofs, VALUE) == Qnil) { - DEREF(msg, submsgdata->ofs, VALUE) = msg_new(msgdef_getwrapper(submsgdata->md)); + DEREF(msg, submsgdata->ofs, VALUE) = + msg_new(msgdef_getwrapper(submsgdata->md)); } VALUE submsg = DEREF(msg, submsgdata->ofs, VALUE); @@ -912,9 +921,11 @@ static void *submsg_handler(void *closure, const void *hd) { static void add_handlers_for_message(const void *closure, upb_handlers *h) { const rupb_MessageDef *rmd = get_rbmsgdef(upb_handlers_msgdef(h)); - upb_msg_iter i; + upb_msg_field_iter i; - for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { + for (upb_msg_field_begin(&i, rmd->md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); size_t ofs = rmd->layout.field_offsets[upb_fielddef_index(f)]; @@ -1085,8 +1096,10 @@ static void putmsg(rupb_Message *msg, const rupb_MessageDef *rmd, upb_sink *sink) { upb_sink_startmsg(sink); - upb_msg_iter i; - for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for (upb_msg_field_begin(&i, rmd->md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); uint32_t ofs = rmd->layout.field_offsets[upb_fielddef_index(f)]; diff --git a/upb/def.c b/upb/def.c index 349dfd5..0963675 100644 --- a/upb/def.c +++ b/upb/def.c @@ -211,6 +211,21 @@ static bool upb_validate_field(upb_fielddef *f, upb_status *s) { upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f)); } + // Ensure that MapEntry submessages only appear as repeated fields, not + // optional/required (singular) fields. + if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE && + upb_fielddef_msgsubdef(f) != NULL) { + const upb_msgdef *subdef = upb_fielddef_msgsubdef(f); + if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) { + upb_status_seterrf(s, + "Field %s refers to mapentry message but is not " + "a repeated field", + upb_fielddef_name(f) ? upb_fielddef_name(f) : + "(unnamed)"); + return false; + } + } + return true; } @@ -248,10 +263,12 @@ static bool assign_msg_indices(upb_msgdef *m, upb_status *s) { upb_fielddef **fields = malloc(n * sizeof(*fields)); if (!fields) return false; - upb_msg_iter j; + upb_msg_field_iter j; int i; m->submsg_field_count = 0; - for(i = 0, upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j), i++) { + for(i = 0, upb_msg_field_begin(&j, m); + !upb_msg_field_done(&j); + upb_msg_field_next(&j), i++) { upb_fielddef *f = upb_msg_iter_field(&j); assert(f->msg.def == m); if (!upb_validate_field(f, s)) { @@ -287,7 +304,9 @@ static bool assign_msg_indices(upb_msgdef *m, upb_status *s) { upb_selector_t sel; upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v); upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v); - for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + for(upb_msg_field_begin(&j, m); + !upb_msg_field_done(&j); + upb_msg_field_next(&j)) { upb_fielddef *f = upb_msg_iter_field(&j); // These calls will assert-fail in upb_table if the value already exists. TRY(UPB_HANDLER_INT32); @@ -545,6 +564,9 @@ static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit, if (upb_fielddef_containingtype(f)) { visit(r, UPB_UPCAST2(upb_fielddef_containingtype(f)), closure); } + if (upb_fielddef_containingoneof(f)) { + visit(r, UPB_UPCAST2(upb_fielddef_containingoneof(f)), closure); + } if (upb_fielddef_subdef(f)) { visit(r, UPB_UPCAST(upb_fielddef_subdef(f)), closure); } @@ -620,6 +642,7 @@ upb_fielddef *upb_fielddef_new(const void *owner) { } f->msg.def = NULL; f->sub.def = NULL; + f->oneof = NULL; f->subdef_is_symbolic = false; f->msg_is_symbolic = false; f->label_ = UPB_LABEL_OPTIONAL; @@ -749,6 +772,10 @@ const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) { return f->msg_is_symbolic ? NULL : f->msg.def; } +const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) { + return f->oneof; +} + upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f) { return (upb_msgdef*)upb_fielddef_containingtype(f); } @@ -777,6 +804,10 @@ bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name, } bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) { + if (upb_fielddef_containingtype(f) || upb_fielddef_containingoneof(f)) { + upb_status_seterrmsg(s, "Already added to message or oneof"); + return false; + } return upb_def_setfullname(UPB_UPCAST(f), name, s); } @@ -1227,6 +1258,11 @@ bool upb_fielddef_isprimitive(const upb_fielddef *f) { return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f); } +bool upb_fielddef_ismap(const upb_fielddef *f) { + return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) && + upb_msgdef_mapentry(upb_fielddef_msgsubdef(f)); +} + bool upb_fielddef_hassubdef(const upb_fielddef *f) { return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM; } @@ -1248,15 +1284,25 @@ bool upb_fielddef_checkdescriptortype(int32_t type) { static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit, void *closure) { const upb_msgdef *m = (const upb_msgdef*)r; - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, m); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); visit(r, UPB_UPCAST2(f), closure); } + upb_msg_oneof_iter o; + for(upb_msg_oneof_begin(&o, m); + !upb_msg_oneof_done(&o); + upb_msg_oneof_next(&o)) { + upb_oneofdef *f = upb_msg_iter_oneof(&o); + visit(r, UPB_UPCAST2(f), closure); + } } static void freemsg(upb_refcounted *r) { upb_msgdef *m = (upb_msgdef*)r; + upb_strtable_uninit(&m->ntoo); upb_strtable_uninit(&m->ntof); upb_inttable_uninit(&m->itof); upb_def_uninit(UPB_UPCAST(m)); @@ -1268,14 +1314,17 @@ upb_msgdef *upb_msgdef_new(const void *owner) { upb_msgdef *m = malloc(sizeof(*m)); if (!m) return NULL; if (!upb_def_init(UPB_UPCAST(m), UPB_DEF_MSG, &vtbl, owner)) goto err2; - if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2; - if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1; + if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err3; + if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err2; + if (!upb_strtable_init(&m->ntoo, UPB_CTYPE_PTR)) goto err1; m->map_entry = false; return m; err1: - upb_inttable_uninit(&m->itof); + upb_strtable_uninit(&m->ntof); err2: + upb_inttable_uninit(&m->itof); +err3: free(m); return NULL; } @@ -1287,14 +1336,28 @@ upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) { upb_def_fullname(UPB_UPCAST(m)), NULL); newm->map_entry = m->map_entry; UPB_ASSERT_VAR(ok, ok); - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, m); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f); + // Fields in oneofs are dup'd below. + if (upb_fielddef_containingoneof(f)) continue; if (!f || !upb_msgdef_addfield(newm, f, &f, NULL)) { upb_msgdef_unref(newm, owner); return NULL; } } + upb_msg_oneof_iter o; + for(upb_msg_oneof_begin(&o, m); + !upb_msg_oneof_done(&o); + upb_msg_oneof_next(&o)) { + upb_oneofdef *f = upb_oneofdef_dup(upb_msg_iter_oneof(&o), &f); + if (!f || !upb_msgdef_addoneof(newm, f, &f, NULL)) { + upb_msgdef_unref(newm, owner); + return NULL; + } + } return newm; } @@ -1333,6 +1396,35 @@ bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname, return upb_def_setfullname(UPB_UPCAST(m), fullname, s); } +// Helper: check that the field |f| is safe to add to msgdef |m|. Set an error +// on status |s| and return false if not. +static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f, + upb_status *s) { + if (upb_fielddef_containingtype(f) != NULL) { + upb_status_seterrmsg(s, "fielddef already belongs to a message"); + return false; + } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) { + upb_status_seterrmsg(s, "field name or number were not set"); + return false; + } else if (upb_msgdef_ntofz(m, upb_fielddef_name(f)) || + upb_msgdef_itof(m, upb_fielddef_number(f))) { + upb_status_seterrmsg(s, "duplicate field name or number for field"); + return false; + } + return true; +} + +static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) { + release_containingtype(f); + f->msg.def = m; + f->msg_is_symbolic = false; + upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f)); + upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f)); + upb_ref2(f, m); + upb_ref2(m, f); + if (ref_donor) upb_fielddef_unref(f, ref_donor); +} + bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor, upb_status *s) { // TODO: extensions need to have a separate namespace, because proto2 allows a @@ -1346,28 +1438,65 @@ bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor, // We also need to validate that the field number is in an extension range iff // it is an extension. + // This method is idempotent. Check if |f| is already part of this msgdef and + // return immediately if so. + if (upb_fielddef_containingtype(f) == m) { + return true; + } + // Check constraints for all fields before performing any action. - if (upb_fielddef_containingtype(f) != NULL) { - upb_status_seterrmsg(s, "fielddef already belongs to a message"); + if (!check_field_add(m, f, s)) { return false; - } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) { - upb_status_seterrmsg(s, "field name or number were not set"); - return false; - } else if(upb_msgdef_itof(m, upb_fielddef_number(f)) || - upb_msgdef_ntofz(m, upb_fielddef_name(f))) { - upb_status_seterrmsg(s, "duplicate field name or number"); + } else if (upb_fielddef_containingoneof(f) != NULL) { + // Fields in a oneof can only be added by adding the oneof to the msgdef. + upb_status_seterrmsg(s, "fielddef is part of a oneof"); return false; } // Constraint checks ok, perform the action. - release_containingtype(f); - f->msg.def = m; - f->msg_is_symbolic = false; - upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f)); - upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f)); - upb_ref2(f, m); - upb_ref2(m, f); - if (ref_donor) upb_fielddef_unref(f, ref_donor); + add_field(m, f, ref_donor); + return true; +} + +bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor, + upb_status *s) { + // Check various conditions that would prevent this oneof from being added. + if (upb_oneofdef_containingtype(o)) { + upb_status_seterrmsg(s, "oneofdef already belongs to a message"); + return false; + } else if (upb_oneofdef_name(o) == NULL) { + upb_status_seterrmsg(s, "oneofdef name was not set"); + return false; + } else if (upb_msgdef_ntooz(m, upb_oneofdef_name(o))) { + upb_status_seterrmsg(s, "duplicate oneof name"); + return false; + } + + // Check that all of the oneof's fields do not conflict with names or numbers + // of fields already in the message. + upb_oneof_iter it; + for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) { + const upb_fielddef *f = upb_oneof_iter_field(&it); + if (!check_field_add(m, f, s)) { + return false; + } + } + + // Everything checks out -- commit now. + + // Add oneof itself first. + o->parent = m; + upb_strtable_insert(&m->ntoo, upb_oneofdef_name(o), upb_value_ptr(o)); + upb_ref2(o, m); + upb_ref2(m, o); + + // Add each field of the oneof directly to the msgdef. + for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) { + upb_fielddef *f = upb_oneof_iter_field(&it); + add_field(m, f, NULL); + } + + if (ref_donor) upb_oneofdef_unref(o, ref_donor); return true; } @@ -1385,10 +1514,21 @@ const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name, upb_value_getptr(val) : NULL; } +const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name, + size_t len) { + upb_value val; + return upb_strtable_lookup2(&m->ntoo, name, len, &val) ? + upb_value_getptr(val) : NULL; +} + int upb_msgdef_numfields(const upb_msgdef *m) { return upb_strtable_count(&m->ntof); } +int upb_msgdef_numoneofs(const upb_msgdef *m) { + return upb_strtable_count(&m->ntoo); +} + void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) { assert(!upb_msgdef_isfrozen(m)); m->map_entry = map_entry; @@ -1398,18 +1538,245 @@ bool upb_msgdef_mapentry(const upb_msgdef *m) { return m->map_entry; } -void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) { +void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) { upb_inttable_begin(iter, &m->itof); } -void upb_msg_next(upb_msg_iter *iter) { upb_inttable_next(iter); } +void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); } + +bool upb_msg_field_done(const upb_msg_field_iter *iter) { + return upb_inttable_done(iter); +} + +upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) { + return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter)); +} + +void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) { + upb_inttable_iter_setdone(iter); +} + +void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) { + upb_strtable_begin(iter, &m->ntoo); +} + +void upb_msg_oneof_next(upb_msg_oneof_iter *iter) { upb_strtable_next(iter); } -bool upb_msg_done(const upb_msg_iter *iter) { return upb_inttable_done(iter); } +bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) { + return upb_strtable_done(iter); +} + +upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) { + return (upb_oneofdef*)upb_value_getptr(upb_strtable_iter_value(iter)); +} + +void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) { + upb_strtable_iter_setdone(iter); +} + +/* upb_oneofdef ***************************************************************/ + +static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit, + void *closure) { + const upb_oneofdef *o = (const upb_oneofdef*)r; + upb_oneof_iter i; + for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) { + const upb_fielddef *f = upb_oneof_iter_field(&i); + visit(r, UPB_UPCAST2(f), closure); + } + if (o->parent) { + visit(r, UPB_UPCAST2(o->parent), closure); + } +} + +static void freeoneof(upb_refcounted *r) { + upb_oneofdef *o = (upb_oneofdef*)r; + upb_strtable_uninit(&o->ntof); + upb_inttable_uninit(&o->itof); + upb_def_uninit(UPB_UPCAST(o)); + free(o); +} + +upb_oneofdef *upb_oneofdef_new(const void *owner) { + static const struct upb_refcounted_vtbl vtbl = {visitoneof, freeoneof}; + upb_oneofdef *o = malloc(sizeof(*o)); + o->parent = NULL; + if (!o) return NULL; + if (!upb_def_init(UPB_UPCAST(o), UPB_DEF_ONEOF, &vtbl, owner)) goto err2; + if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2; + if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1; + return o; + +err1: + upb_inttable_uninit(&o->itof); +err2: + free(o); + return NULL; +} + +upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) { + upb_oneofdef *newo = upb_oneofdef_new(owner); + if (!newo) return NULL; + bool ok = upb_def_setfullname(UPB_UPCAST(newo), + upb_def_fullname(UPB_UPCAST(o)), NULL); + UPB_ASSERT_VAR(ok, ok); + upb_oneof_iter i; + for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) { + upb_fielddef *f = upb_fielddef_dup(upb_oneof_iter_field(&i), &f); + if (!f || !upb_oneofdef_addfield(newo, f, &f, NULL)) { + upb_oneofdef_unref(newo, owner); + return NULL; + } + } + return newo; +} + +bool upb_oneofdef_isfrozen(const upb_oneofdef *o) { + return upb_def_isfrozen(UPB_UPCAST(o)); +} + +void upb_oneofdef_ref(const upb_oneofdef *o, const void *owner) { + upb_def_ref(UPB_UPCAST(o), owner); +} + +void upb_oneofdef_unref(const upb_oneofdef *o, const void *owner) { + upb_def_unref(UPB_UPCAST(o), owner); +} + +void upb_oneofdef_donateref(const upb_oneofdef *o, const void *from, + const void *to) { + upb_def_donateref(UPB_UPCAST(o), from, to); +} + +void upb_oneofdef_checkref(const upb_oneofdef *o, const void *owner) { + upb_def_checkref(UPB_UPCAST(o), owner); +} + +const char *upb_oneofdef_name(const upb_oneofdef *o) { + return upb_def_fullname(UPB_UPCAST(o)); +} + +bool upb_oneofdef_setname(upb_oneofdef *o, const char *fullname, + upb_status *s) { + if (upb_oneofdef_containingtype(o)) { + upb_status_seterrmsg(s, "oneof already added to a message"); + return false; + } + return upb_def_setfullname(UPB_UPCAST(o), fullname, s); +} + +const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) { + return o->parent; +} + +int upb_oneofdef_numfields(const upb_oneofdef *o) { + return upb_strtable_count(&o->ntof); +} + +bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f, + const void *ref_donor, + upb_status *s) { + assert(!upb_oneofdef_isfrozen(o)); + assert(!o->parent || !upb_msgdef_isfrozen(o->parent)); + + // This method is idempotent. Check if |f| is already part of this oneofdef + // and return immediately if so. + if (upb_fielddef_containingoneof(f) == o) { + return true; + } + + // The field must have an OPTIONAL label. + if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) { + upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label"); + return false; + } + + // Check that no field with this name or number exists already in the oneof. + // Also check that the field is not already part of a oneof. + if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) { + upb_status_seterrmsg(s, "field name or number were not set"); + return false; + } else if (upb_oneofdef_itof(o, upb_fielddef_number(f)) || + upb_oneofdef_ntofz(o, upb_fielddef_name(f))) { + upb_status_seterrmsg(s, "duplicate field name or number"); + return false; + } else if (upb_fielddef_containingoneof(f) != NULL) { + upb_status_seterrmsg(s, "fielddef already belongs to a oneof"); + return false; + } + + // We allow adding a field to the oneof either if the field is not part of a + // msgdef, or if it is and we are also part of the same msgdef. + if (o->parent == NULL) { + // If we're not in a msgdef, the field cannot be either. Otherwise we would + // need to magically add this oneof to a msgdef to remain consistent, which + // is surprising behavior. + if (upb_fielddef_containingtype(f) != NULL) { + upb_status_seterrmsg(s, "fielddef already belongs to a message, but " + "oneof does not"); + return false; + } + } else { + // If we're in a msgdef, the user can add fields that either aren't in any + // msgdef (in which case they're added to our msgdef) or already a part of + // our msgdef. + if (upb_fielddef_containingtype(f) != NULL && + upb_fielddef_containingtype(f) != o->parent) { + upb_status_seterrmsg(s, "fielddef belongs to a different message " + "than oneof"); + return false; + } + } + + // Commit phase. First add the field to our parent msgdef, if any, because + // that may fail; then add the field to our own tables. + + if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) { + if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) { + return false; + } + } + + release_containingtype(f); + f->oneof = o; + upb_inttable_insert(&o->itof, upb_fielddef_number(f), upb_value_ptr(f)); + upb_strtable_insert(&o->ntof, upb_fielddef_name(f), upb_value_ptr(f)); + upb_ref2(f, o); + upb_ref2(o, f); + if (ref_donor) upb_fielddef_unref(f, ref_donor); + + return true; +} + +const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o, + const char *name, size_t length) { + upb_value val; + return upb_strtable_lookup2(&o->ntof, name, length, &val) ? + upb_value_getptr(val) : NULL; +} + +const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) { + upb_value val; + return upb_inttable_lookup32(&o->itof, num, &val) ? + upb_value_getptr(val) : NULL; +} + +void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) { + upb_inttable_begin(iter, &o->itof); +} + +void upb_oneof_next(upb_oneof_iter *iter) { + upb_inttable_next(iter); +} + +bool upb_oneof_done(upb_oneof_iter *iter) { + return upb_inttable_done(iter); +} -upb_fielddef *upb_msg_iter_field(const upb_msg_iter *iter) { +upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) { return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter)); } -void upb_msg_iter_setdone(upb_msg_iter *iter) { +void upb_oneof_iter_setdone(upb_oneof_iter *iter) { upb_inttable_iter_setdone(iter); } diff --git a/upb/def.h b/upb/def.h index ee138a0..3281076 100644 --- a/upb/def.h +++ b/upb/def.h @@ -34,6 +34,7 @@ class Def; class EnumDef; class FieldDef; class MessageDef; +class OneofDef; } #endif @@ -41,6 +42,7 @@ UPB_DECLARE_TYPE(upb::Def, upb_def); UPB_DECLARE_TYPE(upb::EnumDef, upb_enumdef); UPB_DECLARE_TYPE(upb::FieldDef, upb_fielddef); UPB_DECLARE_TYPE(upb::MessageDef, upb_msgdef); +UPB_DECLARE_TYPE(upb::OneofDef, upb_oneofdef); // Maximum field number allowed for FieldDefs. This is an inherent limit of the // protobuf wire format. @@ -64,6 +66,7 @@ typedef enum { UPB_DEF_MSG, UPB_DEF_FIELD, UPB_DEF_ENUM, + UPB_DEF_ONEOF, UPB_DEF_SERVICE, // Not yet implemented. UPB_DEF_ANY = -1, // Wildcard for upb_symtab_get*() } upb_deftype_t; @@ -348,6 +351,10 @@ UPB_DEFINE_DEF(upb::FieldDef, fielddef, FIELD, const MessageDef* containing_type() const; const char* containing_type_name(); + // The OneofDef to which this field belongs, or NULL if this field is not part + // of a oneof. + const OneofDef* containing_oneof() const; + // The field's type according to the enum in descriptor.proto. This is not // the same as UPB_TYPE_*, because it distinguishes between (for example) // INT32 and SINT32, whereas our "type" enum does not. This return of @@ -361,6 +368,7 @@ UPB_DEFINE_DEF(upb::FieldDef, fielddef, FIELD, bool IsString() const; bool IsSequence() const; bool IsPrimitive() const; + bool IsMap() const; // How integers are encoded. Only meaningful for integer types. // Defaults to UPB_INTFMT_VARIABLE, and is reset when "type" changes. @@ -521,6 +529,7 @@ UPB_DEFINE_STRUCT(upb_fielddef, upb_def, } sub; // The msgdef or enumdef for this field, if upb_hassubdef(f). bool subdef_is_symbolic; bool msg_is_symbolic; + const upb_oneofdef *oneof; bool default_is_string; bool type_is_set_; // False until type is explicitly set. bool is_extension_; @@ -536,11 +545,11 @@ UPB_DEFINE_STRUCT(upb_fielddef, upb_def, )); #define UPB_FIELDDEF_INIT(label, type, intfmt, tagdelim, is_extension, lazy, \ - packed, name, num, msgdef, subdef, selector_base, \ + packed, name, num, msgdef, subdef, selector_base, \ index, defaultval, refs, ref2s) \ { \ UPB_DEF_INIT(name, UPB_DEF_FIELD, refs, ref2s), defaultval, {msgdef}, \ - {subdef}, false, false, \ + {subdef}, NULL, false, false, \ type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES, true, is_extension, \ lazy, packed, intfmt, tagdelim, type, label, num, selector_base, index \ } @@ -574,6 +583,7 @@ bool upb_fielddef_isextension(const upb_fielddef *f); bool upb_fielddef_lazy(const upb_fielddef *f); bool upb_fielddef_packed(const upb_fielddef *f); const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f); +const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f); upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f); const char *upb_fielddef_containingtypename(upb_fielddef *f); upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f); @@ -583,6 +593,7 @@ bool upb_fielddef_issubmsg(const upb_fielddef *f); bool upb_fielddef_isstring(const upb_fielddef *f); bool upb_fielddef_isseq(const upb_fielddef *f); bool upb_fielddef_isprimitive(const upb_fielddef *f); +bool upb_fielddef_ismap(const upb_fielddef *f); int64_t upb_fielddef_defaultint64(const upb_fielddef *f); int32_t upb_fielddef_defaultint32(const upb_fielddef *f); uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f); @@ -641,7 +652,8 @@ UPB_END_EXTERN_C // } /* upb::MessageDef ************************************************************/ -typedef upb_inttable_iter upb_msg_iter; +typedef upb_inttable_iter upb_msg_field_iter; +typedef upb_strtable_iter upb_msg_oneof_iter; // Structure that describes a single .proto message type. // @@ -671,14 +683,37 @@ UPB_DEFINE_DEF(upb::MessageDef, msgdef, MSG, UPB_QUOTE( // The number of fields that belong to the MessageDef. int field_count() const; + // The number of oneofs that belong to the MessageDef. + int oneof_count() const; + // Adds a field (upb_fielddef object) to a msgdef. Requires that the msgdef // and the fielddefs are mutable. The fielddef's name and number must be // set, and the message may not already contain any field with this name or // number, and this fielddef may not be part of another message. In error // cases false is returned and the msgdef is unchanged. + // + // If the given field is part of a oneof, this call succeeds if and only if + // that oneof is already part of this msgdef. (Note that adding a oneof to a + // msgdef automatically adds all of its fields to the msgdef at the time that + // the oneof is added, so it is usually more idiomatic to add the oneof's + // fields first then add the oneof to the msgdef. This case is supported for + // convenience.) + // + // If |f| is already part of this MessageDef, this method performs no action + // and returns true (success). Thus, this method is idempotent. bool AddField(FieldDef* f, Status* s); bool AddField(const reffed_ptr& f, Status* s); + // Adds a oneof (upb_oneofdef object) to a msgdef. Requires that the msgdef, + // oneof, and any fielddefs are mutable, that the fielddefs contained in the + // oneof do not have any name or number conflicts with existing fields in the + // msgdef, and that the oneof's name is unique among all oneofs in the msgdef. + // If the oneof is added successfully, all of its fields will be added + // directly to the msgdef as well. In error cases, false is returned and the + // msgdef is unchanged. + bool AddOneof(OneofDef* o, Status* s); + bool AddOneof(const reffed_ptr& o, Status* s); + // These return NULL if the field is not found. FieldDef* FindFieldByNumber(uint32_t number); FieldDef* FindFieldByName(const char *name, size_t len); @@ -702,6 +737,25 @@ UPB_DEFINE_DEF(upb::MessageDef, msgdef, MSG, UPB_QUOTE( return FindFieldByName(str.c_str(), str.size()); } + OneofDef* FindOneofByName(const char* name, size_t len); + const OneofDef* FindOneofByName(const char* name, size_t len) const; + + OneofDef* FindOneofByName(const char* name) { + return FindOneofByName(name, strlen(name)); + } + const OneofDef* FindOneofByName(const char* name) const { + return FindOneofByName(name, strlen(name)); + } + + template + OneofDef* FindOneofByName(const T& str) { + return FindOneofByName(str.c_str(), str.size()); + } + template + const OneofDef* FindOneofByName(const T& str) const { + return FindOneofByName(str.c_str(), str.size()); + } + // Returns a new msgdef that is a copy of the given msgdef (and a copy of all // the fields) but with any references to submessages broken and replaced // with just the name of the submessage. Returns NULL if memory allocation @@ -717,39 +771,117 @@ UPB_DEFINE_DEF(upb::MessageDef, msgdef, MSG, UPB_QUOTE( bool mapentry() const; // Iteration over fields. The order is undefined. - class iterator : public std::iterator { + class field_iterator + : public std::iterator { public: - explicit iterator(MessageDef* md); - static iterator end(MessageDef* md); + explicit field_iterator(MessageDef* md); + static field_iterator end(MessageDef* md); void operator++(); FieldDef* operator*() const; - bool operator!=(const iterator& other) const; - bool operator==(const iterator& other) const; + bool operator!=(const field_iterator& other) const; + bool operator==(const field_iterator& other) const; private: - upb_msg_iter iter_; + upb_msg_field_iter iter_; }; - class const_iterator + class const_field_iterator : public std::iterator { public: - explicit const_iterator(const MessageDef* md); - static const_iterator end(const MessageDef* md); + explicit const_field_iterator(const MessageDef* md); + static const_field_iterator end(const MessageDef* md); void operator++(); const FieldDef* operator*() const; - bool operator!=(const const_iterator& other) const; - bool operator==(const const_iterator& other) const; + bool operator!=(const const_field_iterator& other) const; + bool operator==(const const_field_iterator& other) const; private: - upb_msg_iter iter_; + upb_msg_field_iter iter_; }; - iterator begin(); - iterator end(); - const_iterator begin() const; - const_iterator end() const; + // Iteration over oneofs. The order is undefined. + class oneof_iterator + : public std::iterator { + public: + explicit oneof_iterator(MessageDef* md); + static oneof_iterator end(MessageDef* md); + + void operator++(); + OneofDef* operator*() const; + bool operator!=(const oneof_iterator& other) const; + bool operator==(const oneof_iterator& other) const; + + private: + upb_msg_oneof_iter iter_; + }; + + class const_oneof_iterator + : public std::iterator { + public: + explicit const_oneof_iterator(const MessageDef* md); + static const_oneof_iterator end(const MessageDef* md); + + void operator++(); + const OneofDef* operator*() const; + bool operator!=(const const_oneof_iterator& other) const; + bool operator==(const const_oneof_iterator& other) const; + + private: + upb_msg_oneof_iter iter_; + }; + + class FieldAccessor { + public: + explicit FieldAccessor(MessageDef* msg) : msg_(msg) {} + field_iterator begin() { return msg_->field_begin(); } + field_iterator end() { return msg_->field_end(); } + private: + MessageDef* msg_; + }; + + class ConstFieldAccessor { + public: + explicit ConstFieldAccessor(const MessageDef* msg) : msg_(msg) {} + const_field_iterator begin() { return msg_->field_begin(); } + const_field_iterator end() { return msg_->field_end(); } + private: + const MessageDef* msg_; + }; + + class OneofAccessor { + public: + explicit OneofAccessor(MessageDef* msg) : msg_(msg) {} + oneof_iterator begin() { return msg_->oneof_begin(); } + oneof_iterator end() { return msg_->oneof_end(); } + private: + MessageDef* msg_; + }; + + class ConstOneofAccessor { + public: + explicit ConstOneofAccessor(const MessageDef* msg) : msg_(msg) {} + const_oneof_iterator begin() { return msg_->oneof_begin(); } + const_oneof_iterator end() { return msg_->oneof_end(); } + private: + const MessageDef* msg_; + }; + + field_iterator field_begin(); + field_iterator field_end(); + const_field_iterator field_begin() const; + const_field_iterator field_end() const; + + oneof_iterator oneof_begin(); + oneof_iterator oneof_end(); + const_oneof_iterator oneof_begin() const; + const_oneof_iterator oneof_end() const; + + FieldAccessor fields() { return FieldAccessor(this); } + ConstFieldAccessor fields() const { return ConstFieldAccessor(this); } + OneofAccessor oneofs() { return OneofAccessor(this); } + ConstOneofAccessor oneofs() const { return ConstOneofAccessor(this); } private: UPB_DISALLOW_POD_OPS(MessageDef, upb::MessageDef); @@ -762,6 +894,9 @@ UPB_DEFINE_STRUCT(upb_msgdef, upb_def, upb_inttable itof; // int to field upb_strtable ntof; // name to field + // Tables for looking up oneofs by name. + upb_strtable ntoo; // name to oneof + // Is this a map-entry message? // TODO: set this flag properly for static descriptors; regenerate // descriptor.upb.c. @@ -770,11 +905,14 @@ UPB_DEFINE_STRUCT(upb_msgdef, upb_def, // TODO(haberman): proper extension ranges (there can be multiple). )); +// TODO: also support static initialization of the oneofs table. This will be +// needed if we compile in descriptors that contain oneofs. #define UPB_MSGDEF_INIT(name, selector_count, submsg_field_count, itof, ntof, \ refs, ref2s) \ { \ UPB_DEF_INIT(name, UPB_DEF_MSG, refs, ref2s), selector_count, \ - submsg_field_count, itof, ntof, false \ + submsg_field_count, itof, ntof, \ + UPB_EMPTY_STRTABLE_INIT(UPB_CTYPE_PTR), false \ } UPB_BEGIN_EXTERN_C // { @@ -798,6 +936,8 @@ bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname, upb_status *s); upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner); bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor, upb_status *s); +bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor, + upb_status *s); // Field lookup in a couple of different variations: // - itof = int to field @@ -822,11 +962,38 @@ UPB_INLINE upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m, return (upb_fielddef *)upb_msgdef_ntof(m, name, len); } +// Oneof lookup: +// - ntoo = name to oneof +// - ntooz = name to oneof, null-terminated string. +const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name, + size_t len); +int upb_msgdef_numoneofs(const upb_msgdef *m); + +UPB_INLINE const upb_oneofdef *upb_msgdef_ntooz(const upb_msgdef *m, + const char *name) { + return upb_msgdef_ntoo(m, name, strlen(name)); +} + +UPB_INLINE upb_oneofdef *upb_msgdef_ntoo_mutable(upb_msgdef *m, + const char *name, size_t len) { + return (upb_oneofdef *)upb_msgdef_ntoo(m, name, len); +} + void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry); bool upb_msgdef_mapentry(const upb_msgdef *m); -// upb_msg_iter i; -// for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { +// Well-known field tag numbers for map-entry messages. +#define UPB_MAPENTRY_KEY 1 +#define UPB_MAPENTRY_VALUE 2 + +const upb_oneofdef *upb_msgdef_findoneof(const upb_msgdef *m, + const char *name); +int upb_msgdef_numoneofs(const upb_msgdef *m); + +// upb_msg_field_iter i; +// for(upb_msg_field_begin(&i, m); +// !upb_msg_field_done(&i); +// upb_msg_field_next(&i)) { // upb_fielddef *f = upb_msg_iter_field(&i); // // ... // } @@ -834,11 +1001,18 @@ bool upb_msgdef_mapentry(const upb_msgdef *m); // For C we don't have separate iterators for const and non-const. // It is the caller's responsibility to cast the upb_fielddef* to // const if the upb_msgdef* is const. -void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m); -void upb_msg_next(upb_msg_iter *iter); -bool upb_msg_done(const upb_msg_iter *iter); -upb_fielddef *upb_msg_iter_field(const upb_msg_iter *iter); -void upb_msg_iter_setdone(upb_msg_iter *iter); +void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m); +void upb_msg_field_next(upb_msg_field_iter *iter); +bool upb_msg_field_done(const upb_msg_field_iter *iter); +upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter); +void upb_msg_field_iter_setdone(upb_msg_field_iter *iter); + +// Similar to above, we also support iterating through the oneofs in a msgdef. +void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m); +void upb_msg_oneof_next(upb_msg_oneof_iter *iter); +bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter); +upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter); +void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter); UPB_END_EXTERN_C // } @@ -980,6 +1154,172 @@ int32_t upb_enum_iter_number(upb_enum_iter *iter); UPB_END_EXTERN_C // } +/* upb::OneofDef **************************************************************/ + +typedef upb_inttable_iter upb_oneof_iter; + +// Class that represents a oneof. Its base class is upb::Def (convert with +// upb::upcast()). +UPB_DEFINE_DEF(upb::OneofDef, oneofdef, ONEOF, UPB_QUOTE( + public: + // Returns NULL if memory allocation failed. + static reffed_ptr New(); + + // Functionality from upb::RefCounted. + bool IsFrozen() const; + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void* from, const void* to) const; + void CheckRef(const void* owner) const; + + // Functionality from upb::Def. + const char* full_name() const; + + // Returns the MessageDef that owns this OneofDef. + const MessageDef* containing_type() const; + + // Returns the name of this oneof. This is the name used to look up the oneof + // by name once added to a message def. + const char* name() const; + bool set_name(const char* name, Status* s); + + // Returns the number of fields currently defined in the oneof. + int field_count() const; + + // Adds a field to the oneof. The field must not have been added to any other + // oneof or msgdef. If the oneof is not yet part of a msgdef, then when the + // oneof is eventually added to a msgdef, all fields added to the oneof will + // also be added to the msgdef at that time. If the oneof is already part of a + // msgdef, the field must either be a part of that msgdef already, or must not + // be a part of any msgdef; in the latter case, the field is added to the + // msgdef as a part of this operation. + // + // The field may only have an OPTIONAL label, never REQUIRED or REPEATED. + // + // If |f| is already part of this MessageDef, this method performs no action + // and returns true (success). Thus, this method is idempotent. + bool AddField(FieldDef* field, Status* s); + bool AddField(const reffed_ptr& field, Status* s); + + // Looks up by name. + const FieldDef* FindFieldByName(const char* name, size_t len) const; + FieldDef* FindFieldByName(const char* name, size_t len); + const FieldDef* FindFieldByName(const char* name) const { + return FindFieldByName(name, strlen(name)); + } + FieldDef* FindFieldByName(const char* name) { + return FindFieldByName(name, strlen(name)); + } + + template + FieldDef* FindFieldByName(const T& str) { + return FindFieldByName(str.c_str(), str.size()); + } + template + const FieldDef* FindFieldByName(const T& str) const { + return FindFieldByName(str.c_str(), str.size()); + } + + // Looks up by tag number. + const FieldDef* FindFieldByNumber(uint32_t num) const; + + // Returns a new OneofDef with all the same fields. The OneofDef will be owned + // by the given owner. + OneofDef* Dup(const void* owner) const; + + // Iteration over fields. The order is undefined. + class iterator : public std::iterator { + public: + explicit iterator(OneofDef* md); + static iterator end(OneofDef* md); + + void operator++(); + FieldDef* operator*() const; + bool operator!=(const iterator& other) const; + bool operator==(const iterator& other) const; + + private: + upb_oneof_iter iter_; + }; + + class const_iterator + : public std::iterator { + public: + explicit const_iterator(const OneofDef* md); + static const_iterator end(const OneofDef* md); + + void operator++(); + const FieldDef* operator*() const; + bool operator!=(const const_iterator& other) const; + bool operator==(const const_iterator& other) const; + + private: + upb_oneof_iter iter_; + }; + + iterator begin(); + iterator end(); + const_iterator begin() const; + const_iterator end() const; + + private: + UPB_DISALLOW_POD_OPS(OneofDef, upb::OneofDef); +), +UPB_DEFINE_STRUCT(upb_oneofdef, upb_def, + upb_strtable ntof; + upb_inttable itof; + const upb_msgdef *parent; +)); + +#define UPB_ONEOFDEF_INIT(name, ntof, itof, refs, ref2s) \ + { UPB_DEF_INIT(name, UPB_DEF_ENUM, refs, ref2s), ntof, itof } + +UPB_BEGIN_EXTERN_C // { + +// Native C API. +upb_oneofdef *upb_oneofdef_new(const void *owner); +upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner); + +// From upb_refcounted. +void upb_oneofdef_unref(const upb_oneofdef *o, const void *owner); +bool upb_oneofdef_isfrozen(const upb_oneofdef *e); +void upb_oneofdef_ref(const upb_oneofdef *o, const void *owner); +void upb_oneofdef_donateref(const upb_oneofdef *m, const void *from, + const void *to); +void upb_oneofdef_checkref(const upb_oneofdef *o, const void *owner); + +const char *upb_oneofdef_name(const upb_oneofdef *o); +bool upb_oneofdef_setname(upb_oneofdef *o, const char *name, upb_status *s); + +const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o); +int upb_oneofdef_numfields(const upb_oneofdef *o); +bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f, + const void *ref_donor, + upb_status *s); + +// Oneof lookups: +// - ntof: look up a field by name. +// - ntofz: look up a field by name (as a null-terminated string). +// - itof: look up a field by number. +const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o, + const char *name, size_t length); +UPB_INLINE const upb_fielddef *upb_oneofdef_ntofz(const upb_oneofdef *o, + const char *name) { + return upb_oneofdef_ntof(o, name, strlen(name)); +} +const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num); + +// upb_oneof_iter i; +// for(upb_oneof_begin(&i, e); !upb_oneof_done(&i); upb_oneof_next(&i)) { +// // ... +// } +void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o); +void upb_oneof_next(upb_oneof_iter *iter); +bool upb_oneof_done(upb_oneof_iter *iter); +upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter); +void upb_oneof_iter_setdone(upb_oneof_iter *iter); + +UPB_END_EXTERN_C // } #ifdef __cplusplus @@ -1106,6 +1446,9 @@ inline void FieldDef::set_packed(bool packed) { inline const MessageDef* FieldDef::containing_type() const { return upb_fielddef_containingtype(this); } +inline const OneofDef* FieldDef::containing_oneof() const { + return upb_fielddef_containingoneof(this); +} inline const char* FieldDef::containing_type_name() { return upb_fielddef_containingtypename(this); } @@ -1142,6 +1485,7 @@ inline bool FieldDef::IsSubMessage() const { } inline bool FieldDef::IsString() const { return upb_fielddef_isstring(this); } inline bool FieldDef::IsSequence() const { return upb_fielddef_isseq(this); } +inline bool FieldDef::IsMap() const { return upb_fielddef_ismap(this); } inline int64_t FieldDef::default_int64() const { return upb_fielddef_defaultint64(this); } @@ -1256,12 +1600,21 @@ inline bool MessageDef::Freeze(Status* status) { inline int MessageDef::field_count() const { return upb_msgdef_numfields(this); } +inline int MessageDef::oneof_count() const { + return upb_msgdef_numoneofs(this); +} inline bool MessageDef::AddField(upb_fielddef* f, Status* s) { return upb_msgdef_addfield(this, f, NULL, s); } inline bool MessageDef::AddField(const reffed_ptr& f, Status* s) { return upb_msgdef_addfield(this, f.get(), NULL, s); } +inline bool MessageDef::AddOneof(upb_oneofdef* o, Status* s) { + return upb_msgdef_addoneof(this, o, NULL, s); +} +inline bool MessageDef::AddOneof(const reffed_ptr& o, Status* s) { + return upb_msgdef_addoneof(this, o.get(), NULL, s); +} inline FieldDef* MessageDef::FindFieldByNumber(uint32_t number) { return upb_msgdef_itof_mutable(this, number); } @@ -1275,6 +1628,13 @@ inline const FieldDef *MessageDef::FindFieldByName(const char *name, size_t len) const { return upb_msgdef_ntof(this, name, len); } +inline OneofDef* MessageDef::FindOneofByName(const char* name, size_t len) { + return upb_msgdef_ntoo_mutable(this, name, len); +} +inline const OneofDef* MessageDef::FindOneofByName(const char* name, + size_t len) const { + return upb_msgdef_ntoo(this, name, len); +} inline MessageDef* MessageDef::Dup(const void *owner) const { return upb_msgdef_dup(this, owner); } @@ -1284,55 +1644,127 @@ inline void MessageDef::setmapentry(bool map_entry) { inline bool MessageDef::mapentry() const { return upb_msgdef_mapentry(this); } -inline MessageDef::iterator MessageDef::begin() { return iterator(this); } -inline MessageDef::iterator MessageDef::end() { return iterator::end(this); } -inline MessageDef::const_iterator MessageDef::begin() const { - return const_iterator(this); +inline MessageDef::field_iterator MessageDef::field_begin() { + return field_iterator(this); } -inline MessageDef::const_iterator MessageDef::end() const { - return const_iterator::end(this); +inline MessageDef::field_iterator MessageDef::field_end() { + return field_iterator::end(this); +} +inline MessageDef::const_field_iterator MessageDef::field_begin() const { + return const_field_iterator(this); +} +inline MessageDef::const_field_iterator MessageDef::field_end() const { + return const_field_iterator::end(this); +} + +inline MessageDef::oneof_iterator MessageDef::oneof_begin() { + return oneof_iterator(this); +} +inline MessageDef::oneof_iterator MessageDef::oneof_end() { + return oneof_iterator::end(this); +} +inline MessageDef::const_oneof_iterator MessageDef::oneof_begin() const { + return const_oneof_iterator(this); +} +inline MessageDef::const_oneof_iterator MessageDef::oneof_end() const { + return const_oneof_iterator::end(this); } -inline MessageDef::iterator::iterator(MessageDef* md) { - upb_msg_begin(&iter_, md); +inline MessageDef::field_iterator::field_iterator(MessageDef* md) { + upb_msg_field_begin(&iter_, md); } -inline MessageDef::iterator MessageDef::iterator::end(MessageDef* md) { - MessageDef::iterator iter(md); - upb_msg_iter_setdone(&iter.iter_); +inline MessageDef::field_iterator MessageDef::field_iterator::end( + MessageDef* md) { + MessageDef::field_iterator iter(md); + upb_msg_field_iter_setdone(&iter.iter_); return iter; } -inline FieldDef* MessageDef::iterator::operator*() const { +inline FieldDef* MessageDef::field_iterator::operator*() const { return upb_msg_iter_field(&iter_); } -inline void MessageDef::iterator::operator++() { return upb_msg_next(&iter_); } -inline bool MessageDef::iterator::operator==(const iterator &other) const { +inline void MessageDef::field_iterator::operator++() { + return upb_msg_field_next(&iter_); +} +inline bool MessageDef::field_iterator::operator==( + const field_iterator &other) const { return upb_inttable_iter_isequal(&iter_, &other.iter_); } -inline bool MessageDef::iterator::operator!=(const iterator &other) const { +inline bool MessageDef::field_iterator::operator!=( + const field_iterator &other) const { return !(*this == other); } -inline MessageDef::const_iterator::const_iterator(const MessageDef* md) { - upb_msg_begin(&iter_, md); +inline MessageDef::const_field_iterator::const_field_iterator( + const MessageDef* md) { + upb_msg_field_begin(&iter_, md); } -inline MessageDef::const_iterator MessageDef::const_iterator::end( +inline MessageDef::const_field_iterator MessageDef::const_field_iterator::end( const MessageDef *md) { - MessageDef::const_iterator iter(md); - upb_msg_iter_setdone(&iter.iter_); + MessageDef::const_field_iterator iter(md); + upb_msg_field_iter_setdone(&iter.iter_); return iter; } -inline const FieldDef* MessageDef::const_iterator::operator*() const { +inline const FieldDef* MessageDef::const_field_iterator::operator*() const { return upb_msg_iter_field(&iter_); } -inline void MessageDef::const_iterator::operator++() { - return upb_msg_next(&iter_); +inline void MessageDef::const_field_iterator::operator++() { + return upb_msg_field_next(&iter_); } -inline bool MessageDef::const_iterator::operator==( - const const_iterator &other) const { +inline bool MessageDef::const_field_iterator::operator==( + const const_field_iterator &other) const { return upb_inttable_iter_isequal(&iter_, &other.iter_); } -inline bool MessageDef::const_iterator::operator!=( - const const_iterator &other) const { +inline bool MessageDef::const_field_iterator::operator!=( + const const_field_iterator &other) const { + return !(*this == other); +} + +inline MessageDef::oneof_iterator::oneof_iterator(MessageDef* md) { + upb_msg_oneof_begin(&iter_, md); +} +inline MessageDef::oneof_iterator MessageDef::oneof_iterator::end( + MessageDef* md) { + MessageDef::oneof_iterator iter(md); + upb_msg_oneof_iter_setdone(&iter.iter_); + return iter; +} +inline OneofDef* MessageDef::oneof_iterator::operator*() const { + return upb_msg_iter_oneof(&iter_); +} +inline void MessageDef::oneof_iterator::operator++() { + return upb_msg_oneof_next(&iter_); +} +inline bool MessageDef::oneof_iterator::operator==( + const oneof_iterator &other) const { + return upb_strtable_iter_isequal(&iter_, &other.iter_); +} +inline bool MessageDef::oneof_iterator::operator!=( + const oneof_iterator &other) const { + return !(*this == other); +} + +inline MessageDef::const_oneof_iterator::const_oneof_iterator( + const MessageDef* md) { + upb_msg_oneof_begin(&iter_, md); +} +inline MessageDef::const_oneof_iterator MessageDef::const_oneof_iterator::end( + const MessageDef *md) { + MessageDef::const_oneof_iterator iter(md); + upb_msg_oneof_iter_setdone(&iter.iter_); + return iter; +} +inline const OneofDef* MessageDef::const_oneof_iterator::operator*() const { + return upb_msg_iter_oneof(&iter_); +} +inline void MessageDef::const_oneof_iterator::operator++() { + return upb_msg_oneof_next(&iter_); +} +inline bool MessageDef::const_oneof_iterator::operator==( + const const_oneof_iterator &other) const { + return upb_strtable_iter_isequal(&iter_, &other.iter_); +} +inline bool MessageDef::const_oneof_iterator::operator!=( + const const_oneof_iterator &other) const { return !(*this == other); } @@ -1400,6 +1832,105 @@ inline const char* EnumDef::Iterator::name() { } inline bool EnumDef::Iterator::Done() { return upb_enum_done(&iter_); } inline void EnumDef::Iterator::Next() { return upb_enum_next(&iter_); } + +inline reffed_ptr OneofDef::New() { + upb_oneofdef *o = upb_oneofdef_new(&o); + return reffed_ptr(o, &o); +} +inline bool OneofDef::IsFrozen() const { return upb_oneofdef_isfrozen(this); } +inline void OneofDef::Ref(const void* owner) const { + return upb_oneofdef_ref(this, owner); +} +inline void OneofDef::Unref(const void* owner) const { + return upb_oneofdef_unref(this, owner); +} +inline void OneofDef::DonateRef(const void* from, const void* to) const { + return upb_oneofdef_donateref(this, from, to); +} +inline void OneofDef::CheckRef(const void* owner) const { + return upb_oneofdef_checkref(this, owner); +} +inline const char* OneofDef::full_name() const { + return upb_oneofdef_name(this); +} + +inline const MessageDef* OneofDef::containing_type() const { + return upb_oneofdef_containingtype(this); +} +inline const char* OneofDef::name() const { + return upb_oneofdef_name(this); +} +inline bool OneofDef::set_name(const char* name, Status* s) { + return upb_oneofdef_setname(this, name, s); +} +inline int OneofDef::field_count() const { + return upb_oneofdef_numfields(this); +} +inline bool OneofDef::AddField(FieldDef* field, Status* s) { + return upb_oneofdef_addfield(this, field, NULL, s); +} +inline bool OneofDef::AddField(const reffed_ptr& field, Status* s) { + return upb_oneofdef_addfield(this, field.get(), NULL, s); +} +inline const FieldDef* OneofDef::FindFieldByName(const char* name, + size_t len) const { + return upb_oneofdef_ntof(this, name, len); +} +inline const FieldDef* OneofDef::FindFieldByNumber(uint32_t num) const { + return upb_oneofdef_itof(this, num); +} +inline OneofDef::iterator OneofDef::begin() { return iterator(this); } +inline OneofDef::iterator OneofDef::end() { return iterator::end(this); } +inline OneofDef::const_iterator OneofDef::begin() const { + return const_iterator(this); +} +inline OneofDef::const_iterator OneofDef::end() const { + return const_iterator::end(this); +} + +inline OneofDef::iterator::iterator(OneofDef* o) { + upb_oneof_begin(&iter_, o); +} +inline OneofDef::iterator OneofDef::iterator::end(OneofDef* o) { + OneofDef::iterator iter(o); + upb_oneof_iter_setdone(&iter.iter_); + return iter; +} +inline FieldDef* OneofDef::iterator::operator*() const { + return upb_oneof_iter_field(&iter_); +} +inline void OneofDef::iterator::operator++() { return upb_oneof_next(&iter_); } +inline bool OneofDef::iterator::operator==(const iterator &other) const { + return upb_inttable_iter_isequal(&iter_, &other.iter_); +} +inline bool OneofDef::iterator::operator!=(const iterator &other) const { + return !(*this == other); +} + +inline OneofDef::const_iterator::const_iterator(const OneofDef* md) { + upb_oneof_begin(&iter_, md); +} +inline OneofDef::const_iterator OneofDef::const_iterator::end( + const OneofDef *md) { + OneofDef::const_iterator iter(md); + upb_oneof_iter_setdone(&iter.iter_); + return iter; +} +inline const FieldDef* OneofDef::const_iterator::operator*() const { + return upb_msg_iter_field(&iter_); +} +inline void OneofDef::const_iterator::operator++() { + return upb_oneof_next(&iter_); +} +inline bool OneofDef::const_iterator::operator==( + const const_iterator &other) const { + return upb_inttable_iter_isequal(&iter_, &other.iter_); +} +inline bool OneofDef::const_iterator::operator!=( + const const_iterator &other) const { + return !(*this == other); +} + } // namespace upb #endif diff --git a/upb/descriptor/reader.c b/upb/descriptor/reader.c index 1baad81..0b289c0 100644 --- a/upb/descriptor/reader.c +++ b/upb/descriptor/reader.c @@ -20,6 +20,54 @@ #include "upb/sink.h" #include "upb/descriptor/descriptor.upb.h" +// upb_deflist is an internal-only dynamic array for storing a growing list of +// upb_defs. +typedef struct { + upb_def **defs; + size_t len; + size_t size; + bool owned; +} upb_deflist; + +// We keep a stack of all the messages scopes we are currently in, as well as +// the top-level file scope. This is necessary to correctly qualify the +// definitions that are contained inside. "name" tracks the name of the +// message or package (a bare name -- not qualified by any enclosing scopes). +typedef struct { + char *name; + // Index of the first def that is under this scope. For msgdefs, the + // msgdef itself is at start-1. + int start; +} upb_descreader_frame; + +// The maximum number of nested declarations that are allowed, ie. +// message Foo { +// message Bar { +// message Baz { +// } +// } +// } +// +// This is a resource limit that affects how big our runtime stack can grow. +// TODO: make this a runtime-settable property of the Reader instance. +#define UPB_MAX_MESSAGE_NESTING 64 + +struct upb_descreader { + upb_sink sink; + upb_deflist defs; + upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING]; + int stack_len; + + uint32_t number; + char *name; + bool saw_number; + bool saw_name; + + char *default_string; + + upb_fielddef *f; +}; + static char *upb_strndup(const char *buf, size_t n) { char *ret = malloc(n + 1); if (!ret) return NULL; @@ -99,36 +147,6 @@ static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) { /* upb_descreader ************************************************************/ -void upb_descreader_init(upb_descreader *r, const upb_handlers *handlers, - upb_status *status) { - UPB_UNUSED(status); - upb_deflist_init(&r->defs); - upb_sink_reset(upb_descreader_input(r), handlers, r); - r->stack_len = 0; - r->name = NULL; - r->default_string = NULL; -} - -void upb_descreader_uninit(upb_descreader *r) { - free(r->name); - upb_deflist_uninit(&r->defs); - free(r->default_string); - while (r->stack_len > 0) { - upb_descreader_frame *f = &r->stack[--r->stack_len]; - free(f->name); - } -} - -upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) { - *n = r->defs.len; - upb_deflist_donaterefs(&r->defs, owner); - return r->defs.defs; -} - -upb_sink *upb_descreader_input(upb_descreader *r) { - return &r->sink; -} - static upb_msgdef *upb_descreader_top(upb_descreader *r) { assert(r->stack_len > 1); int index = r->stack[r->stack_len-1].start - 1; @@ -568,6 +586,45 @@ static void reghandlers(const void *closure, upb_handlers *h) { #undef D +void descreader_cleanup(void *_r) { + upb_descreader *r = _r; + free(r->name); + upb_deflist_uninit(&r->defs); + free(r->default_string); + while (r->stack_len > 0) { + upb_descreader_frame *f = &r->stack[--r->stack_len]; + free(f->name); + } +} + + +/* Public API ****************************************************************/ + +upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) { + upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader)); + if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) { + return NULL; + } + + upb_deflist_init(&r->defs); + upb_sink_reset(upb_descreader_input(r), h, r); + r->stack_len = 0; + r->name = NULL; + r->default_string = NULL; + + return r; +} + +upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) { + *n = r->defs.len; + upb_deflist_donaterefs(&r->defs, owner); + return r->defs.defs; +} + +upb_sink *upb_descreader_input(upb_descreader *r) { + return &r->sink; +} + const upb_handlers *upb_descreader_newhandlers(const void *owner) { const upb_symtab *s = upbdefs_google_protobuf_descriptor(&s); const upb_handlers *h = upb_handlers_newfrozen( diff --git a/upb/descriptor/reader.h b/upb/descriptor/reader.h index 700fd65..bcd4b06 100644 --- a/upb/descriptor/reader.h +++ b/upb/descriptor/reader.h @@ -11,6 +11,7 @@ #ifndef UPB_DESCRIPTOR_H #define UPB_DESCRIPTOR_H +#include "upb/env.h" #include "upb/sink.h" #ifdef __cplusplus @@ -23,45 +24,11 @@ class Reader; UPB_DECLARE_TYPE(upb::descriptor::Reader, upb_descreader); -// Internal-only structs used by Reader. - -// upb_deflist is an internal-only dynamic array for storing a growing list of -// upb_defs. -typedef struct { - UPB_PRIVATE_FOR_CPP - upb_def **defs; - size_t len; - size_t size; - bool owned; -} upb_deflist; - -// We keep a stack of all the messages scopes we are currently in, as well as -// the top-level file scope. This is necessary to correctly qualify the -// definitions that are contained inside. "name" tracks the name of the -// message or package (a bare name -- not qualified by any enclosing scopes). -typedef struct { - UPB_PRIVATE_FOR_CPP - char *name; - // Index of the first def that is under this scope. For msgdefs, the - // msgdef itself is at start-1. - int start; -} upb_descreader_frame; - -// The maximum number of nested declarations that are allowed, ie. -// message Foo { -// message Bar { -// message Baz { -// } -// } -// } -// -// This is a resource limit that affects how big our runtime stack can grow. -// TODO: make this a runtime-settable property of the Reader instance. -#define UPB_MAX_MESSAGE_NESTING 64 +#ifdef __cplusplus // Class that receives descriptor data according to the descriptor.proto schema // and use it to build upb::Defs corresponding to that schema. -UPB_DEFINE_CLASS0(upb::descriptor::Reader, +class upb::descriptor::Reader { public: // These handlers must have come from NewHandlers() and must outlive the // Reader. @@ -71,11 +38,7 @@ UPB_DEFINE_CLASS0(upb::descriptor::Reader, // to build/memory-manage the handlers at runtime at all). Unfortunately this // is a bit tricky to implement for Handlers, but necessary to simplify this // interface. - Reader(const Handlers* handlers, Status* status); - ~Reader(); - - // Resets the reader's state and discards any defs it may have built. - void Reset(); + static Reader* Create(Environment* env, const Handlers* handlers); // The reader's input; this is where descriptor.proto data should be sent. Sink* input(); @@ -91,45 +54,30 @@ UPB_DEFINE_CLASS0(upb::descriptor::Reader, // Builds and returns handlers for the reader, owned by "owner." static Handlers* NewHandlers(const void* owner); -, -UPB_DEFINE_STRUCT0(upb_descreader, - upb_sink sink; - upb_deflist defs; - upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING]; - int stack_len; - uint32_t number; - char *name; - bool saw_number; - bool saw_name; + private: + UPB_DISALLOW_POD_OPS(Reader, upb::descriptor::Reader); +}; - char *default_string; - - upb_fielddef *f; -)); +#endif -UPB_BEGIN_EXTERN_C // { +UPB_BEGIN_EXTERN_C // C API. -void upb_descreader_init(upb_descreader *r, const upb_handlers *handlers, - upb_status *status); -void upb_descreader_uninit(upb_descreader *r); -void upb_descreader_reset(upb_descreader *r); +upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h); upb_sink *upb_descreader_input(upb_descreader *r); upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n); const upb_handlers *upb_descreader_newhandlers(const void *owner); -UPB_END_EXTERN_C // } +UPB_END_EXTERN_C #ifdef __cplusplus // C++ implementation details. ///////////////////////////////////////////////// namespace upb { namespace descriptor { -inline Reader::Reader(const Handlers *h, Status *s) { - upb_descreader_init(this, h, s); +inline Reader* Reader::Create(Environment* e, const Handlers *h) { + return upb_descreader_create(e, h); } -inline Reader::~Reader() { upb_descreader_uninit(this); } -inline void Reader::Reset() { upb_descreader_reset(this); } inline Sink* Reader::input() { return upb_descreader_input(this); } inline upb::Def** Reader::GetDefs(void* owner, int* n) { return upb_descreader_getdefs(this, owner, n); diff --git a/upb/env.c b/upb/env.c new file mode 100644 index 0000000..ae5fb85 --- /dev/null +++ b/upb/env.c @@ -0,0 +1,259 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2014 Google Inc. See LICENSE for details. + * Author: Josh Haberman + */ + +#include "upb/env.h" + +#include +#include +#include + +typedef struct cleanup_ent { + upb_cleanup_func *cleanup; + void *ud; + struct cleanup_ent *next; +} cleanup_ent; + +static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, size_t size); + +/* Default allocator **********************************************************/ + +// Just use realloc, keeping all allocated blocks in a linked list to destroy at +// the end. + +typedef struct mem_block { + // List is doubly-linked, because in cases where realloc() moves an existing + // block, we need to be able to remove the old pointer from the list + // efficiently. + struct mem_block *prev, *next; +#ifndef NDEBUG + size_t size; // Doesn't include mem_block structure. +#endif + char data[]; +} mem_block; + +typedef struct { + mem_block *head; +} default_alloc_ud; + +static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) { + UPB_UNUSED(oldsize); + default_alloc_ud *ud = _ud; + + mem_block *from = ptr ? ptr - sizeof(mem_block) : NULL; + +#ifndef NDEBUG + if (from) { + assert(oldsize <= from->size); + } +#endif + + mem_block *block = realloc(from, size + sizeof(mem_block)); + if (!block) return NULL; + +#ifndef NDEBUG + block->size = size; +#endif + + if (from) { + if (block != from) { + // The block was moved, so pointers in next and prev blocks must be + // updated to its new location. + if (block->next) block->next->prev = block; + if (block->prev) block->prev->next = block; + } + } else { + // Insert at head of linked list. + block->prev = NULL; + block->next = ud->head; + if (block->next) block->next->prev = block; + ud->head = block; + } + + return &block->data; +} + +static void default_alloc_cleanup(void *_ud) { + default_alloc_ud *ud = _ud; + mem_block *block = ud->head; + + while (block) { + void *to_free = block; + block = block->next; + free(to_free); + } +} + + +/* Standard error functions ***************************************************/ + +static bool default_err(void *ud, const upb_status *status) { + UPB_UNUSED(ud); + fprintf(stderr, "upb error: %s\n", upb_status_errmsg(status)); + return false; +} + +static bool write_err_to(void *ud, const upb_status *status) { + upb_status *copy_to = ud; + upb_status_copy(copy_to, status); + return false; +} + + +/* upb_env ********************************************************************/ + +void upb_env_init(upb_env *e) { + e->ok_ = true; + e->bytes_allocated = 0; + e->cleanup_head = NULL; + + default_alloc_ud *ud = (default_alloc_ud*)&e->default_alloc_ud; + ud->head = NULL; + + // Set default functions. + upb_env_setallocfunc(e, default_alloc, ud); + upb_env_seterrorfunc(e, default_err, NULL); +} + +void upb_env_uninit(upb_env *e) { + cleanup_ent *ent = e->cleanup_head; + + while (ent) { + ent->cleanup(ent->ud); + ent = ent->next; + } + + // Must do this after running cleanup functions, because this will delete + // the memory we store our cleanup entries in! + if (e->alloc == default_alloc) { + default_alloc_cleanup(e->alloc_ud); + } +} + +UPB_FORCEINLINE void upb_env_setallocfunc(upb_env *e, upb_alloc_func *alloc, + void *ud) { + e->alloc = alloc; + e->alloc_ud = ud; +} + +UPB_FORCEINLINE void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, + void *ud) { + e->err = func; + e->err_ud = ud; +} + +void upb_env_reporterrorsto(upb_env *e, upb_status *status) { + e->err = write_err_to; + e->err_ud = status; +} + +bool upb_env_ok(const upb_env *e) { + return e->ok_; +} + +bool upb_env_reporterror(upb_env *e, const upb_status *status) { + e->ok_ = false; + return e->err(e->err_ud, status); +} + +bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) { + cleanup_ent *ent = upb_env_malloc(e, sizeof(cleanup_ent)); + if (!ent) return false; + + ent->cleanup = func; + ent->ud = ud; + ent->next = e->cleanup_head; + e->cleanup_head = ent; + + return true; +} + +void *upb_env_malloc(upb_env *e, size_t size) { + e->bytes_allocated += size; + if (e->alloc == seeded_alloc) { + // This is equivalent to the next branch, but allows inlining for a + // measurable perf benefit. + return seeded_alloc(e->alloc_ud, NULL, 0, size); + } else { + return e->alloc(e->alloc_ud, NULL, 0, size); + } +} + +void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) { + assert(oldsize <= size); + char *ret = e->alloc(e->alloc_ud, ptr, oldsize, size); + +#ifndef NDEBUG + // Overwrite non-preserved memory to ensure callers are passing the oldsize + // that they truly require. + memset(ret + oldsize, 0xff, size - oldsize); +#endif + + return ret; +} + +size_t upb_env_bytesallocated(const upb_env *e) { + return e->bytes_allocated; +} + + +/* upb_seededalloc ************************************************************/ + +// Be conservative and choose 16 in case anyone is using SSE. +static const size_t maxalign = 16; + +static size_t align_up(size_t size) { + return ((size + maxalign - 1) / maxalign) * maxalign; +} + +UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, + size_t size) { + upb_seededalloc *a = ud; + size = align_up(size); + + if (oldsize == 0 && size <= a->mem_limit - a->mem_ptr) { + // Fast path: we can satisfy from the initial allocation. + void *ret = a->mem_ptr; + a->mem_ptr += size; + return ret; + } else { + // Slow path: fallback to other allocator. + a->need_cleanup = true; + return a->alloc(a->alloc_ud, ptr, oldsize, size); + } +} + +void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len) { + a->mem_base = mem; + a->mem_ptr = mem; + a->mem_limit = mem + len; + a->need_cleanup = false; + a->returned_allocfunc = false; + + default_alloc_ud *ud = (default_alloc_ud*)&a->default_alloc_ud; + ud->head = NULL; + + upb_seededalloc_setfallbackalloc(a, default_alloc, ud); +} + +void upb_seededalloc_uninit(upb_seededalloc *a) { + if (a->alloc == default_alloc && a->need_cleanup) { + default_alloc_cleanup(a->alloc_ud); + } +} + +UPB_FORCEINLINE void upb_seededalloc_setfallbackalloc(upb_seededalloc *a, + upb_alloc_func *alloc, + void *ud) { + assert(!a->returned_allocfunc); + a->alloc = alloc; + a->alloc_ud = ud; +} + +upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a) { + a->returned_allocfunc = true; + return seeded_alloc; +} diff --git a/upb/env.h b/upb/env.h new file mode 100644 index 0000000..500b0ac --- /dev/null +++ b/upb/env.h @@ -0,0 +1,256 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2014 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * A upb::Environment provides a means for injecting malloc and an + * error-reporting callback into encoders/decoders. This allows them to be + * independent of nearly all assumptions about their actual environment. + * + * It is also a container for allocating the encoders/decoders themselves that + * insulates clients from knowing their actual size. This provides ABI + * compatibility even if the size of the objects change. And this allows the + * structure definitions to be in the .c files instead of the .h files, making + * the .h files smaller and more readable. + */ + +#include "upb/upb.h" + +#ifndef UPB_ENV_H_ +#define UPB_ENV_H_ + +#ifdef __cplusplus +namespace upb { +class Environment; +class SeededAllocator; +} +#endif + +UPB_DECLARE_TYPE(upb::Environment, upb_env); +UPB_DECLARE_TYPE(upb::SeededAllocator, upb_seededalloc); + +typedef void *upb_alloc_func(void *ud, void *ptr, size_t oldsize, size_t size); +typedef void upb_cleanup_func(void *ud); +typedef bool upb_error_func(void *ud, const upb_status *status); + +// An environment is *not* thread-safe. +UPB_DEFINE_CLASS0(upb::Environment, + public: + Environment(); + ~Environment(); + + // Set a custom memory allocation function for the environment. May ONLY + // be called before any calls to Malloc()/Realloc()/AddCleanup() below. + // If this is not called, the system realloc() function will be used. + // The given user pointer "ud" will be passed to the allocation function. + // + // The allocation function will not receive corresponding "free" calls. it + // must ensure that the memory is valid for the lifetime of the Environment, + // but it may be reclaimed any time thereafter. The likely usage is that + // "ud" points to a stateful allocator, and that the allocator frees all + // memory, arena-style, when it is destroyed. In this case the allocator must + // outlive the Environment. Another possibility is that the allocation + // function returns GC-able memory that is guaranteed to be GC-rooted for the + // life of the Environment. + void SetAllocationFunction(upb_alloc_func* alloc, void* ud); + + template + void SetAllocator(T* allocator) { + SetAllocationFunction(allocator->GetAllocationFunction(), allocator); + } + + // Set a custom error reporting function. + void SetErrorFunction(upb_error_func* func, void* ud); + + // Set the error reporting function to simply copy the status to the given + // status and abort. + void ReportErrorsTo(Status* status); + + // Returns true if all allocations and AddCleanup() calls have succeeded, + // and no errors were reported with ReportError() (except ones that recovered + // successfully). + bool ok() const; + + ////////////////////////////////////////////////////////////////////////////// + // Functions for use by encoders/decoders. + + // Reports an error to this environment's callback, returning true if + // the caller should try to recover. + bool ReportError(const Status* status); + + // Allocate memory. Uses the environment's allocation function. + // + // There is no need to free(). All memory will be freed automatically, but is + // guaranteed to outlive the Environment. + void* Malloc(size_t size); + + // Reallocate memory. Preserves "oldsize" bytes from the existing buffer + // Requires: oldsize <= existing_size. + // + // TODO(haberman): should we also enforce that oldsize <= size? + void* Realloc(void* ptr, size_t oldsize, size_t size); + + // Add a cleanup function to run when the environment is destroyed. + // Returns false on out-of-memory. + // + // The first call to AddCleanup() after SetAllocationFunction() is guaranteed + // to return true -- this makes it possible to robustly set a cleanup handler + // for a custom allocation function. + bool AddCleanup(upb_cleanup_func* func, void* ud); + + // Total number of bytes that have been allocated. It is undefined what + // Realloc() does to this counter. + size_t BytesAllocated() const; + + private: + UPB_DISALLOW_COPY_AND_ASSIGN(Environment); +, +UPB_DEFINE_STRUCT0(upb_env, + bool ok_; + size_t bytes_allocated; + + // Alloc function. + upb_alloc_func *alloc; + void *alloc_ud; + + // Error-reporting function. + upb_error_func *err; + void *err_ud; + + // Userdata for default alloc func. + void *default_alloc_ud; + + // Cleanup entries. Pointer to a cleanup_ent, defined in env.c + void *cleanup_head; + + // For future expansion, since the size of this struct is exposed to users. + void *future1; + void *future2; +)); + +UPB_BEGIN_EXTERN_C + +void upb_env_init(upb_env *e); +void upb_env_uninit(upb_env *e); +void upb_env_setallocfunc(upb_env *e, upb_alloc_func *func, void *ud); +void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, void *ud); +void upb_env_reporterrorsto(upb_env *e, upb_status *status); +bool upb_env_ok(const upb_env *e); +bool upb_env_reporterror(upb_env *e, const upb_status *status); +void *upb_env_malloc(upb_env *e, size_t size); +void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size); +bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud); +size_t upb_env_bytesallocated(const upb_env *e); + +UPB_END_EXTERN_C + +// An allocator that allocates from an initial memory region (likely the stack) +// before falling back to another allocator. +UPB_DEFINE_CLASS0(upb::SeededAllocator, + public: + SeededAllocator(void *mem, size_t len); + ~SeededAllocator(); + + // Set a custom fallback memory allocation function for the allocator, to use + // once the initial region runs out. + // + // May ONLY be called before GetAllocationFunction(). If this is not + // called, the system realloc() will be the fallback allocator. + void SetFallbackAllocator(upb_alloc_func *alloc, void *ud); + + // Gets the allocation function for this allocator. + upb_alloc_func* GetAllocationFunction(); + + private: + UPB_DISALLOW_COPY_AND_ASSIGN(SeededAllocator); +, +UPB_DEFINE_STRUCT0(upb_seededalloc, + // Fallback alloc function. + upb_alloc_func *alloc; + upb_cleanup_func *alloc_cleanup; + void *alloc_ud; + bool need_cleanup; + bool returned_allocfunc; + + // Userdata for default alloc func. + void *default_alloc_ud; + + // Pointers for the initial memory region. + void *mem_base; + void *mem_ptr; + void *mem_limit; + + // For future expansion, since the size of this struct is exposed to users. + void *future1; + void *future2; +)); + +UPB_BEGIN_EXTERN_C + +void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len); +void upb_seededalloc_uninit(upb_seededalloc *a); +void upb_seededalloc_setfallbackalloc(upb_seededalloc *a, upb_alloc_func *func, + void *ud); +upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a); + +UPB_END_EXTERN_C + +#ifdef __cplusplus + +namespace upb { + +inline Environment::Environment() { + upb_env_init(this); +} +inline Environment::~Environment() { + upb_env_uninit(this); +} +inline void Environment::SetAllocationFunction(upb_alloc_func *alloc, + void *ud) { + upb_env_setallocfunc(this, alloc, ud); +} +inline void Environment::SetErrorFunction(upb_error_func *func, void *ud) { + upb_env_seterrorfunc(this, func, ud); +} +inline void Environment::ReportErrorsTo(Status* status) { + upb_env_reporterrorsto(this, status); +} +inline bool Environment::ok() const { + return upb_env_ok(this); +} +inline bool Environment::ReportError(const Status* status) { + return upb_env_reporterror(this, status); +} +inline void *Environment::Malloc(size_t size) { + return upb_env_malloc(this, size); +} +inline void *Environment::Realloc(void *ptr, size_t oldsize, size_t size) { + return upb_env_realloc(this, ptr, oldsize, size); +} +inline bool Environment::AddCleanup(upb_cleanup_func *func, void *ud) { + return upb_env_addcleanup(this, func, ud); +} +inline size_t Environment::BytesAllocated() const { + return upb_env_bytesallocated(this); +} + +inline SeededAllocator::SeededAllocator(void *mem, size_t len) { + upb_seededalloc_init(this, mem, len); +} +inline SeededAllocator::~SeededAllocator() { + upb_seededalloc_uninit(this); +} +inline void SeededAllocator::SetFallbackAllocator(upb_alloc_func *alloc, + void *ud) { + upb_seededalloc_setfallbackalloc(this, alloc, ud); +} +inline upb_alloc_func *SeededAllocator::GetAllocationFunction() { + return upb_seededalloc_getallocfunc(this); +} + +} // namespace upb + +#endif // __cplusplus + +#endif // UPB_ENV_H_ diff --git a/upb/handlers-inl.h b/upb/handlers-inl.h index 87a755f..70ddf91 100644 --- a/upb/handlers-inl.h +++ b/upb/handlers-inl.h @@ -147,12 +147,17 @@ template struct disable_if_same {}; template void DeletePointer(void *p) { delete static_cast(p); } template -struct FirstUnlessVoid { +struct FirstUnlessVoidOrBool { typedef T1 value; }; template -struct FirstUnlessVoid { +struct FirstUnlessVoidOrBool { + typedef T2 value; +}; + +template +struct FirstUnlessVoidOrBool { typedef T2 value; }; @@ -534,10 +539,14 @@ inline MethodSig4 MatchFunc(R (C::*f)(P1, P2, P3, P4)) { // // 1. If the function returns void, make it return the expected type and with // a value that always indicates success. -// 2. If the function is expected to return void* but doesn't, wrap it so it -// does (either by returning the closure param if the wrapped function -// returns void or by casting a different pointer type to void* for -// return). +// 2. If the function returns bool, make it return the expected type with a +// value that indicates success or failure. +// +// The "expected type" for return is: +// 1. void* for start handlers. If the closure parameter has a different type +// we will cast it to void* for the return in the success case. +// 2. size_t for string buffer handlers. +// 3. bool for everything else. // Template parameters are FuncN type and desired return type. template @@ -926,10 +935,13 @@ inline Handler::Handler(F func) attr_.SetClosureType(UniquePtrForType()); // We use the closure type (from the first parameter) if the return type is - // void. This is all nonsense for non START* handlers, but it doesn't matter - // because in that case the value will be ignored. - typedef typename FirstUnlessVoid::value + // void or bool, since these are the two cases we wrap to return the closure's + // type anyway. + // + // This is all nonsense for non START* handlers, but it doesn't matter because + // in that case the value will be ignored. + typedef typename FirstUnlessVoidOrBool::value EffectiveReturn; attr_.SetReturnClosureType(UniquePtrForType()); } @@ -1124,9 +1136,7 @@ inline BytesHandler::BytesHandler() { upb_byteshandler_init(this); } -inline BytesHandler::~BytesHandler() { - upb_byteshandler_uninit(this); -} +inline BytesHandler::~BytesHandler() {} } // namespace upb diff --git a/upb/handlers.c b/upb/handlers.c index c0fd271..93f22a1 100644 --- a/upb/handlers.c +++ b/upb/handlers.c @@ -40,8 +40,10 @@ static void freehandlers(upb_refcounted *r) { static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit, void *closure) { const upb_handlers *h = (const upb_handlers*)r; - upb_msg_iter i; - for(upb_msg_begin(&i, h->msg); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, h->msg); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); if (!upb_fielddef_issubmsg(f)) continue; const upb_handlers *sub = upb_handlers_getsubhandlers(h, f); @@ -70,8 +72,10 @@ static upb_handlers *newformsg(const upb_msgdef *m, const void *owner, // For each submessage field, get or create a handlers object and set it as // the subhandlers. - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, m); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); if (!upb_fielddef_issubmsg(f)) continue; @@ -172,7 +176,14 @@ static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f, if (closure_type && *context_closure_type && closure_type != *context_closure_type) { // TODO(haberman): better message for debugging. - upb_status_seterrmsg(&h->status_, "closure type does not match"); + if (f) { + upb_status_seterrf(&h->status_, + "closure type does not match for field %s", + upb_fielddef_name(f)); + } else { + upb_status_seterrmsg( + &h->status_, "closure type does not match for message-level handler"); + } return false; } @@ -428,8 +439,10 @@ bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) { // Check that there are no closure mismatches due to missing Start* handlers // or subhandlers with different type-level types. - upb_msg_iter j; - for(upb_msg_begin(&j, h->msg); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_msg_field_iter j; + for(upb_msg_field_begin(&j, h->msg); + !upb_msg_field_done(&j); + upb_msg_field_next(&j)) { const upb_fielddef *f = upb_msg_iter_field(&j); if (upb_fielddef_isseq(f)) { diff --git a/upb/handlers.h b/upb/handlers.h index 2267d98..1b8864a 100644 --- a/upb/handlers.h +++ b/upb/handlers.h @@ -755,10 +755,8 @@ UPB_DEFINE_STRUCT0(upb_byteshandler, )); void upb_byteshandler_init(upb_byteshandler *h); -void upb_byteshandler_uninit(upb_byteshandler *h); // Caller must ensure that "d" outlives the handlers. -// TODO(haberman): support handlerfree function for the data. // TODO(haberman): should this have a "freeze" operation? It's not necessary // for memory management, but could be useful to force immutability and provide // a convenient moment to verify that all registration succeeded. diff --git a/upb/json/parser.c b/upb/json/parser.c index cfe1def..4f4a96e 100644 --- a/upb/json/parser.c +++ b/upb/json/parser.c @@ -33,6 +33,71 @@ #include "upb/json/parser.h" +#define UPB_JSON_MAX_DEPTH 64 + +typedef struct { + upb_sink sink; + + // The current message in which we're parsing, and the field whose value we're + // expecting next. + const upb_msgdef *m; + const upb_fielddef *f; + + // We are in a repeated-field context, ready to emit mapentries as + // submessages. This flag alters the start-of-object (open-brace) behavior to + // begin a sequence of mapentry messages rather than a single submessage. + bool is_map; + + // We are in a map-entry message context. This flag is set when parsing the + // value field of a single map entry and indicates to all value-field parsers + // (subobjects, strings, numbers, and bools) that the map-entry submessage + // should end as soon as the value is parsed. + bool is_mapentry; + + // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent + // message's map field that we're currently parsing. This differs from |f| + // because |f| is the field in the *current* message (i.e., the map-entry + // message itself), not the parent's field that leads to this map. + const upb_fielddef *mapfield; +} upb_jsonparser_frame; + +struct upb_json_parser { + upb_env *env; + upb_byteshandler input_handler_; + upb_bytessink input_; + + // Stack to track the JSON scopes we are in. + upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH]; + upb_jsonparser_frame *top; + upb_jsonparser_frame *limit; + + upb_status *status; + + // Ragel's internal parsing stack for the parsing state machine. + int current_state; + int parser_stack[UPB_JSON_MAX_DEPTH]; + int parser_top; + + // The handle for the current buffer. + const upb_bufhandle *handle; + + // Accumulate buffer. See details in parser.rl. + const char *accumulated; + size_t accumulated_len; + char *accumulate_buf; + size_t accumulate_buf_size; + + // Multi-part text data. See details in parser.rl. + int multipart_state; + upb_selector_t string_selector; + + // Input capture. See details in parser.rl. + const char *capture; + + // Intermediate result of parsing a unicode escape sequence. + uint32_t digit; +}; + #define PARSER_CHECK_RETURN(x) if (!(x)) return false // Used to signal that a capture has been suspended. @@ -235,12 +300,13 @@ static void accumulate_clear(upb_json_parser *p) { // Used internally by accumulate_append(). static bool accumulate_realloc(upb_json_parser *p, size_t need) { - size_t new_size = UPB_MAX(p->accumulate_buf_size, 128); + size_t old_size = p->accumulate_buf_size; + size_t new_size = UPB_MAX(old_size, 128); while (new_size < need) { new_size = saturating_multiply(new_size, 2); } - void *mem = realloc(p->accumulate_buf, new_size); + void *mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size); if (!mem) { upb_status_seterrmsg(p->status, "Out of memory allocating buffer."); return false; @@ -262,16 +328,14 @@ static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len, return true; } - if (p->accumulate_buf_size - p->accumulated_len < len) { - size_t need; - if (!checked_add(p->accumulated_len, len, &need)) { - upb_status_seterrmsg(p->status, "Integer overflow."); - return false; - } + size_t need; + if (!checked_add(p->accumulated_len, len, &need)) { + upb_status_seterrmsg(p->status, "Integer overflow."); + return false; + } - if (!accumulate_realloc(p, need)) { - return false; - } + if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) { + return false; } if (p->accumulated != p->accumulate_buf) { @@ -510,16 +574,28 @@ static void start_number(upb_json_parser *p, const char *ptr) { capture_begin(p, ptr); } +static bool parse_number(upb_json_parser *p); + static bool end_number(upb_json_parser *p, const char *ptr) { if (!capture_end(p, ptr)) { return false; } + return parse_number(p); +} + +static bool parse_number(upb_json_parser *p) { + // strtol() and friends unfortunately do not support specifying the length of + // the input string, so we need to force a copy into a NULL-terminated buffer. + if (!multipart_text(p, "\0", 1, false)) { + return false; + } + size_t len; const char *buf = accumulate_getptr(p, &len); - const char *myend = buf + len; - char *end; + const char *myend = buf + len - 1; // One for NULL. + char *end; switch (upb_fielddef_type(p->top->f)) { case UPB_TYPE_ENUM: case UPB_TYPE_INT32: { @@ -575,10 +651,11 @@ static bool end_number(upb_json_parser *p, const char *ptr) { } multipart_end(p); + return true; err: - upb_status_seterrf(p->status, "error parsing number: %.*s", buf, len); + upb_status_seterrf(p->status, "error parsing number: %s", buf); multipart_end(p); return false; } @@ -593,6 +670,7 @@ static bool parser_putbool(upb_json_parser *p, bool val) { bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val); UPB_ASSERT_VAR(ok, ok); + return true; } @@ -609,6 +687,8 @@ static bool start_stringval(upb_json_parser *p) { upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); inner->m = p->top->m; inner->f = p->top->f; + inner->is_map = false; + inner->is_mapentry = false; p->top = inner; if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) { @@ -686,6 +766,7 @@ static bool end_stringval(upb_json_parser *p) { } multipart_end(p); + return ok; } @@ -694,54 +775,217 @@ static void start_member(upb_json_parser *p) { multipart_startaccum(p); } -static bool end_member(upb_json_parser *p) { - assert(!p->top->f); +// Helper: invoked during parse_mapentry() to emit the mapentry message's key +// field based on the current contents of the accumulate buffer. +static bool parse_mapentry_key(upb_json_parser *p) { + size_t len; const char *buf = accumulate_getptr(p, &len); - const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); + // Emit the key field. We do a bit of ad-hoc parsing here because the + // parser state machine has already decided that this is a string field + // name, and we are reinterpreting it as some arbitrary key type. In + // particular, integer and bool keys are quoted, so we need to parse the + // quoted string contents here. - if (!f) { - // TODO(haberman): Ignore unknown fields if requested/configured to do so. - upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); + p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY); + if (p->top->f == NULL) { + upb_status_seterrmsg(p->status, "mapentry message has no key"); return false; } + switch (upb_fielddef_type(p->top->f)) { + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: + // Invoke end_number. The accum buffer has the number's text already. + if (!parse_number(p)) { + return false; + } + break; + case UPB_TYPE_BOOL: + if (len == 4 && !strncmp(buf, "true", 4)) { + if (!parser_putbool(p, true)) { + return false; + } + } else if (len == 5 && !strncmp(buf, "false", 5)) { + if (!parser_putbool(p, false)) { + return false; + } + } else { + upb_status_seterrmsg(p->status, + "Map bool key not 'true' or 'false'"); + return false; + } + multipart_end(p); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + upb_sink subsink; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); + upb_sink_startstr(&p->top->sink, sel, len, &subsink); + sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); + upb_sink_putstring(&subsink, sel, buf, len, NULL); + sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); + upb_sink_endstr(&subsink, sel); + multipart_end(p); + break; + } + default: + upb_status_seterrmsg(p->status, "Invalid field type for map key"); + return false; + } - p->top->f = f; - multipart_end(p); + return true; +} + +// Helper: emit one map entry (as a submessage in the map field sequence). This +// is invoked from end_membername(), at the end of the map entry's key string, +// with the map key in the accumulate buffer. It parses the key from that +// buffer, emits the handler calls to start the mapentry submessage (setting up +// its subframe in the process), and sets up state in the subframe so that the +// value parser (invoked next) will emit the mapentry's value field and then +// end the mapentry message. + +static bool handle_mapentry(upb_json_parser *p) { + // Map entry: p->top->sink is the seq frame, so we need to start a frame + // for the mapentry itself, and then set |f| in that frame so that the map + // value field is parsed, and also set a flag to end the frame after the + // map-entry value is parsed. + if (!check_stack(p)) return false; + + const upb_fielddef *mapfield = p->top->mapfield; + const upb_msgdef *mapentrymsg = upb_fielddef_msgsubdef(mapfield); + + upb_jsonparser_frame *inner = p->top + 1; + p->top->f = mapfield; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); + upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); + inner->m = mapentrymsg; + inner->mapfield = mapfield; + inner->is_map = false; + + // Don't set this to true *yet* -- we reuse parsing handlers below to push + // the key field value to the sink, and these handlers will pop the frame + // if they see is_mapentry (when invoked by the parser state machine, they + // would have just seen the map-entry value, not key). + inner->is_mapentry = false; + p->top = inner; + + // send STARTMSG in submsg frame. + upb_sink_startmsg(&p->top->sink); + + parse_mapentry_key(p); + + // Set up the value field to receive the map-entry value. + p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE); + p->top->is_mapentry = true; // set up to pop frame after value is parsed. + p->top->mapfield = mapfield; + if (p->top->f == NULL) { + upb_status_seterrmsg(p->status, "mapentry message has no value"); + return false; + } return true; } -static void clear_member(upb_json_parser *p) { p->top->f = NULL; } +static bool end_membername(upb_json_parser *p) { + assert(!p->top->f); + + if (p->top->is_map) { + return handle_mapentry(p); + } else { + size_t len; + const char *buf = accumulate_getptr(p, &len); + const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); + + if (!f) { + // TODO(haberman): Ignore unknown fields if requested/configured to do so. + upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); + return false; + } + + p->top->f = f; + multipart_end(p); + + return true; + } +} + +static void end_member(upb_json_parser *p) { + // If we just parsed a map-entry value, end that frame too. + if (p->top->is_mapentry) { + assert(p->top > p->stack); + // send ENDMSG on submsg. + upb_status s = UPB_STATUS_INIT; + upb_sink_endmsg(&p->top->sink, &s); + const upb_fielddef* mapfield = p->top->mapfield; + + // send ENDSUBMSG in repeated-field-of-mapentries frame. + p->top--; + upb_selector_t sel; + bool ok = upb_handlers_getselector(mapfield, + UPB_HANDLER_ENDSUBMSG, &sel); + UPB_ASSERT_VAR(ok, ok); + upb_sink_endsubmsg(&p->top->sink, sel); + } + + p->top->f = NULL; +} static bool start_subobject(upb_json_parser *p) { assert(p->top->f); - if (!upb_fielddef_issubmsg(p->top->f)) { + if (upb_fielddef_ismap(p->top->f)) { + // Beginning of a map. Start a new parser frame in a repeated-field + // context. + if (!check_stack(p)) return false; + + upb_jsonparser_frame *inner = p->top + 1; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); + upb_sink_startseq(&p->top->sink, sel, &inner->sink); + inner->m = upb_fielddef_msgsubdef(p->top->f); + inner->mapfield = p->top->f; + inner->f = NULL; + inner->is_map = true; + inner->is_mapentry = false; + p->top = inner; + + return true; + } else if (upb_fielddef_issubmsg(p->top->f)) { + // Beginning of a subobject. Start a new parser frame in the submsg + // context. + if (!check_stack(p)) return false; + + upb_jsonparser_frame *inner = p->top + 1; + + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); + upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); + inner->m = upb_fielddef_msgsubdef(p->top->f); + inner->f = NULL; + inner->is_map = false; + inner->is_mapentry = false; + p->top = inner; + + return true; + } else { upb_status_seterrf(p->status, "Object specified for non-message/group field: %s", upb_fielddef_name(p->top->f)); return false; } - - if (!check_stack(p)) return false; - - upb_jsonparser_frame *inner = p->top + 1; - - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); - upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); - inner->m = upb_fielddef_msgsubdef(p->top->f); - inner->f = NULL; - p->top = inner; - - return true; } static void end_subobject(upb_json_parser *p) { - p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); - upb_sink_endsubmsg(&p->top->sink, sel); + if (p->top->is_map) { + p->top--; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); + upb_sink_endseq(&p->top->sink, sel); + } else { + p->top--; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); + upb_sink_endsubmsg(&p->top->sink, sel); + } } static bool start_array(upb_json_parser *p) { @@ -761,6 +1005,8 @@ static bool start_array(upb_json_parser *p) { upb_sink_startseq(&p->top->sink, sel, &inner->sink); inner->m = p->top->m; inner->f = p->top->f; + inner->is_map = false; + inner->is_mapentry = false; p->top = inner; return true; @@ -775,12 +1021,16 @@ static void end_array(upb_json_parser *p) { } static void start_object(upb_json_parser *p) { - upb_sink_startmsg(&p->top->sink); + if (!p->top->is_map) { + upb_sink_startmsg(&p->top->sink); + } } static void end_object(upb_json_parser *p) { - upb_status status; - upb_sink_endmsg(&p->top->sink, &status); + if (!p->top->is_map) { + upb_status status; + upb_sink_endmsg(&p->top->sink, &status); + } } @@ -805,11 +1055,11 @@ static void end_object(upb_json_parser *p) { // final state once, when the closing '"' is seen. -#line 901 "upb/json/parser.rl" +#line 1151 "upb/json/parser.rl" -#line 813 "upb/json/parser.c" +#line 1063 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 2, 1, 3, 1, 5, 1, 6, 1, 7, 1, 8, 1, @@ -960,7 +1210,7 @@ static const int json_en_value_machine = 27; static const int json_en_main = 1; -#line 904 "upb/json/parser.rl" +#line 1154 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { @@ -980,7 +1230,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, capture_resume(parser, buf); -#line 984 "upb/json/parser.c" +#line 1234 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -1055,118 +1305,118 @@ _match: switch ( *_acts++ ) { case 0: -#line 816 "upb/json/parser.rl" +#line 1066 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 1: -#line 817 "upb/json/parser.rl" +#line 1067 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 10; goto _again;} } break; case 2: -#line 821 "upb/json/parser.rl" +#line 1071 "upb/json/parser.rl" { start_text(parser, p); } break; case 3: -#line 822 "upb/json/parser.rl" +#line 1072 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_text(parser, p)); } break; case 4: -#line 828 "upb/json/parser.rl" +#line 1078 "upb/json/parser.rl" { start_hex(parser); } break; case 5: -#line 829 "upb/json/parser.rl" +#line 1079 "upb/json/parser.rl" { hexdigit(parser, p); } break; case 6: -#line 830 "upb/json/parser.rl" +#line 1080 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_hex(parser)); } break; case 7: -#line 836 "upb/json/parser.rl" +#line 1086 "upb/json/parser.rl" { CHECK_RETURN_TOP(escape(parser, p)); } break; case 8: -#line 842 "upb/json/parser.rl" +#line 1092 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 9: -#line 845 "upb/json/parser.rl" +#line 1095 "upb/json/parser.rl" { {stack[top++] = cs; cs = 19; goto _again;} } break; case 10: -#line 847 "upb/json/parser.rl" +#line 1097 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 27; goto _again;} } break; case 11: -#line 852 "upb/json/parser.rl" +#line 1102 "upb/json/parser.rl" { start_member(parser); } break; case 12: -#line 853 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_member(parser)); } +#line 1103 "upb/json/parser.rl" + { CHECK_RETURN_TOP(end_membername(parser)); } break; case 13: -#line 856 "upb/json/parser.rl" - { clear_member(parser); } +#line 1106 "upb/json/parser.rl" + { end_member(parser); } break; case 14: -#line 862 "upb/json/parser.rl" +#line 1112 "upb/json/parser.rl" { start_object(parser); } break; case 15: -#line 865 "upb/json/parser.rl" +#line 1115 "upb/json/parser.rl" { end_object(parser); } break; case 16: -#line 871 "upb/json/parser.rl" +#line 1121 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_array(parser)); } break; case 17: -#line 875 "upb/json/parser.rl" +#line 1125 "upb/json/parser.rl" { end_array(parser); } break; case 18: -#line 880 "upb/json/parser.rl" +#line 1130 "upb/json/parser.rl" { start_number(parser, p); } break; case 19: -#line 881 "upb/json/parser.rl" +#line 1131 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 20: -#line 883 "upb/json/parser.rl" +#line 1133 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 21: -#line 884 "upb/json/parser.rl" +#line 1134 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_stringval(parser)); } break; case 22: -#line 886 "upb/json/parser.rl" +#line 1136 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, true)); } break; case 23: -#line 888 "upb/json/parser.rl" +#line 1138 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, false)); } break; case 24: -#line 890 "upb/json/parser.rl" +#line 1140 "upb/json/parser.rl" { /* null value */ } break; case 25: -#line 892 "upb/json/parser.rl" +#line 1142 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_subobject(parser)); } break; case 26: -#line 893 "upb/json/parser.rl" +#line 1143 "upb/json/parser.rl" { end_subobject(parser); } break; case 27: -#line 898 "upb/json/parser.rl" +#line 1148 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 1170 "upb/json/parser.c" +#line 1420 "upb/json/parser.c" } } @@ -1179,7 +1429,7 @@ _again: _out: {} } -#line 923 "upb/json/parser.rl" +#line 1173 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(parser->status, "Parse error at %s\n", p); @@ -1201,52 +1451,58 @@ bool end(void *closure, const void *hd) { return true; } - -/* Public API *****************************************************************/ - -void upb_json_parser_init(upb_json_parser *p, upb_status *status) { - p->limit = p->stack + UPB_JSON_MAX_DEPTH; - p->accumulate_buf = NULL; - p->accumulate_buf_size = 0; - upb_byteshandler_init(&p->input_handler_); - upb_byteshandler_setstring(&p->input_handler_, parse, NULL); - upb_byteshandler_setendstr(&p->input_handler_, end, NULL); - upb_bytessink_reset(&p->input_, &p->input_handler_, p); - p->status = status; -} - -void upb_json_parser_uninit(upb_json_parser *p) { - upb_byteshandler_uninit(&p->input_handler_); - free(p->accumulate_buf); -} - -void upb_json_parser_reset(upb_json_parser *p) { +static void json_parser_reset(upb_json_parser *p) { p->top = p->stack; p->top->f = NULL; + p->top->is_map = false; + p->top->is_mapentry = false; int cs; int top; // Emit Ragel initialization of the parser. -#line 1232 "upb/json/parser.c" +#line 1465 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 971 "upb/json/parser.rl" +#line 1204 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; accumulate_clear(p); p->multipart_state = MULTIPART_INACTIVE; p->capture = NULL; + p->accumulated = NULL; } -void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) { - upb_json_parser_reset(p); - upb_sink_reset(&p->top->sink, sink->handlers, sink->closure); - p->top->m = upb_handlers_msgdef(sink->handlers); - p->accumulated = NULL; + +/* Public API *****************************************************************/ + +upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) { +#ifndef NDEBUG + const size_t size_before = upb_env_bytesallocated(env); +#endif + upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser)); + if (!p) return false; + + p->env = env; + p->limit = p->stack + UPB_JSON_MAX_DEPTH; + p->accumulate_buf = NULL; + p->accumulate_buf_size = 0; + upb_byteshandler_init(&p->input_handler_); + upb_byteshandler_setstring(&p->input_handler_, parse, NULL); + upb_byteshandler_setendstr(&p->input_handler_, end, NULL); + upb_bytessink_reset(&p->input_, &p->input_handler_, p); + + json_parser_reset(p); + upb_sink_reset(&p->top->sink, output->handlers, output->closure); + p->top->m = upb_handlers_msgdef(output->handlers); + + // If this fails, uncomment and increase the value in parser.h. + // fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); + assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE); + return p; } upb_bytessink *upb_json_parser_input(upb_json_parser *p) { diff --git a/upb/json/parser.h b/upb/json/parser.h index 51578f2..b932adf 100644 --- a/upb/json/parser.h +++ b/upb/json/parser.h @@ -11,6 +11,7 @@ #ifndef UPB_JSON_PARSER_H_ #define UPB_JSON_PARSER_H_ +#include "upb/env.h" #include "upb/sink.h" #ifdef __cplusplus @@ -23,78 +24,32 @@ class Parser; UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser); -// Internal-only struct used by the parser. -typedef struct { - UPB_PRIVATE_FOR_CPP - upb_sink sink; - const upb_msgdef *m; - const upb_fielddef *f; -} upb_jsonparser_frame; - - /* upb::json::Parser **********************************************************/ -#define UPB_JSON_MAX_DEPTH 64 +// Preallocation hint: parser won't allocate more bytes than this when first +// constructed. This hint may be an overestimate for some build configurations. +// But if the parser library is upgraded without recompiling the application, +// it may be an underestimate. +#define UPB_JSON_PARSER_SIZE 3568 + +#ifdef __cplusplus // Parses an incoming BytesStream, pushing the results to the destination sink. -UPB_DEFINE_CLASS0(upb::json::Parser, +class upb::json::Parser { public: - Parser(Status* status); - ~Parser(); + static Parser* Create(Environment* env, Sink* output); - // Resets the state of the printer, so that it will expect to begin a new - // document. - void Reset(); - - // Resets the output pointer which will serve as our closure. Implies - // Reset(). - void ResetOutput(Sink* output); - - // The input to the printer. BytesSink* input(); -, -UPB_DEFINE_STRUCT0(upb_json_parser, - upb_byteshandler input_handler_; - upb_bytessink input_; - - // Stack to track the JSON scopes we are in. - upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH]; - upb_jsonparser_frame *top; - upb_jsonparser_frame *limit; - upb_status *status; + private: + UPB_DISALLOW_POD_OPS(Parser, upb::json::Parser); +}; - // Ragel's internal parsing stack for the parsing state machine. - int current_state; - int parser_stack[UPB_JSON_MAX_DEPTH]; - int parser_top; - - // The handle for the current buffer. - const upb_bufhandle *handle; - - // Accumulate buffer. See details in parser.rl. - const char *accumulated; - size_t accumulated_len; - char *accumulate_buf; - size_t accumulate_buf_size; - - // Multi-part text data. See details in parser.rl. - int multipart_state; - upb_selector_t string_selector; - - // Input capture. See details in parser.rl. - const char *capture; - - // Intermediate result of parsing a unicode escape sequence. - uint32_t digit; -)); +#endif UPB_BEGIN_EXTERN_C -void upb_json_parser_init(upb_json_parser *p, upb_status *status); -void upb_json_parser_uninit(upb_json_parser *p); -void upb_json_parser_reset(upb_json_parser *p); -void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *output); +upb_json_parser *upb_json_parser_create(upb_env *e, upb_sink *output); upb_bytessink *upb_json_parser_input(upb_json_parser *p); UPB_END_EXTERN_C @@ -103,11 +58,8 @@ UPB_END_EXTERN_C namespace upb { namespace json { -inline Parser::Parser(Status* status) { upb_json_parser_init(this, status); } -inline Parser::~Parser() { upb_json_parser_uninit(this); } -inline void Parser::Reset() { upb_json_parser_reset(this); } -inline void Parser::ResetOutput(Sink* output) { - upb_json_parser_resetoutput(this, output); +inline Parser* Parser::Create(Environment* env, Sink* output) { + return upb_json_parser_create(env, output); } inline BytesSink* Parser::input() { return upb_json_parser_input(this); diff --git a/upb/json/parser.rl b/upb/json/parser.rl index b72bc10..3a400ea 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -31,6 +31,71 @@ #include "upb/json/parser.h" +#define UPB_JSON_MAX_DEPTH 64 + +typedef struct { + upb_sink sink; + + // The current message in which we're parsing, and the field whose value we're + // expecting next. + const upb_msgdef *m; + const upb_fielddef *f; + + // We are in a repeated-field context, ready to emit mapentries as + // submessages. This flag alters the start-of-object (open-brace) behavior to + // begin a sequence of mapentry messages rather than a single submessage. + bool is_map; + + // We are in a map-entry message context. This flag is set when parsing the + // value field of a single map entry and indicates to all value-field parsers + // (subobjects, strings, numbers, and bools) that the map-entry submessage + // should end as soon as the value is parsed. + bool is_mapentry; + + // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent + // message's map field that we're currently parsing. This differs from |f| + // because |f| is the field in the *current* message (i.e., the map-entry + // message itself), not the parent's field that leads to this map. + const upb_fielddef *mapfield; +} upb_jsonparser_frame; + +struct upb_json_parser { + upb_env *env; + upb_byteshandler input_handler_; + upb_bytessink input_; + + // Stack to track the JSON scopes we are in. + upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH]; + upb_jsonparser_frame *top; + upb_jsonparser_frame *limit; + + upb_status *status; + + // Ragel's internal parsing stack for the parsing state machine. + int current_state; + int parser_stack[UPB_JSON_MAX_DEPTH]; + int parser_top; + + // The handle for the current buffer. + const upb_bufhandle *handle; + + // Accumulate buffer. See details in parser.rl. + const char *accumulated; + size_t accumulated_len; + char *accumulate_buf; + size_t accumulate_buf_size; + + // Multi-part text data. See details in parser.rl. + int multipart_state; + upb_selector_t string_selector; + + // Input capture. See details in parser.rl. + const char *capture; + + // Intermediate result of parsing a unicode escape sequence. + uint32_t digit; +}; + #define PARSER_CHECK_RETURN(x) if (!(x)) return false // Used to signal that a capture has been suspended. @@ -233,12 +298,13 @@ static void accumulate_clear(upb_json_parser *p) { // Used internally by accumulate_append(). static bool accumulate_realloc(upb_json_parser *p, size_t need) { - size_t new_size = UPB_MAX(p->accumulate_buf_size, 128); + size_t old_size = p->accumulate_buf_size; + size_t new_size = UPB_MAX(old_size, 128); while (new_size < need) { new_size = saturating_multiply(new_size, 2); } - void *mem = realloc(p->accumulate_buf, new_size); + void *mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size); if (!mem) { upb_status_seterrmsg(p->status, "Out of memory allocating buffer."); return false; @@ -260,16 +326,14 @@ static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len, return true; } - if (p->accumulate_buf_size - p->accumulated_len < len) { - size_t need; - if (!checked_add(p->accumulated_len, len, &need)) { - upb_status_seterrmsg(p->status, "Integer overflow."); - return false; - } + size_t need; + if (!checked_add(p->accumulated_len, len, &need)) { + upb_status_seterrmsg(p->status, "Integer overflow."); + return false; + } - if (!accumulate_realloc(p, need)) { - return false; - } + if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) { + return false; } if (p->accumulated != p->accumulate_buf) { @@ -508,16 +572,28 @@ static void start_number(upb_json_parser *p, const char *ptr) { capture_begin(p, ptr); } +static bool parse_number(upb_json_parser *p); + static bool end_number(upb_json_parser *p, const char *ptr) { if (!capture_end(p, ptr)) { return false; } + return parse_number(p); +} + +static bool parse_number(upb_json_parser *p) { + // strtol() and friends unfortunately do not support specifying the length of + // the input string, so we need to force a copy into a NULL-terminated buffer. + if (!multipart_text(p, "\0", 1, false)) { + return false; + } + size_t len; const char *buf = accumulate_getptr(p, &len); - const char *myend = buf + len; - char *end; + const char *myend = buf + len - 1; // One for NULL. + char *end; switch (upb_fielddef_type(p->top->f)) { case UPB_TYPE_ENUM: case UPB_TYPE_INT32: { @@ -573,10 +649,11 @@ static bool end_number(upb_json_parser *p, const char *ptr) { } multipart_end(p); + return true; err: - upb_status_seterrf(p->status, "error parsing number: %.*s", buf, len); + upb_status_seterrf(p->status, "error parsing number: %s", buf); multipart_end(p); return false; } @@ -591,6 +668,7 @@ static bool parser_putbool(upb_json_parser *p, bool val) { bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val); UPB_ASSERT_VAR(ok, ok); + return true; } @@ -607,6 +685,8 @@ static bool start_stringval(upb_json_parser *p) { upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); inner->m = p->top->m; inner->f = p->top->f; + inner->is_map = false; + inner->is_mapentry = false; p->top = inner; if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) { @@ -684,6 +764,7 @@ static bool end_stringval(upb_json_parser *p) { } multipart_end(p); + return ok; } @@ -692,54 +773,217 @@ static void start_member(upb_json_parser *p) { multipart_startaccum(p); } -static bool end_member(upb_json_parser *p) { - assert(!p->top->f); +// Helper: invoked during parse_mapentry() to emit the mapentry message's key +// field based on the current contents of the accumulate buffer. +static bool parse_mapentry_key(upb_json_parser *p) { + size_t len; const char *buf = accumulate_getptr(p, &len); - const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); + // Emit the key field. We do a bit of ad-hoc parsing here because the + // parser state machine has already decided that this is a string field + // name, and we are reinterpreting it as some arbitrary key type. In + // particular, integer and bool keys are quoted, so we need to parse the + // quoted string contents here. - if (!f) { - // TODO(haberman): Ignore unknown fields if requested/configured to do so. - upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); + p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY); + if (p->top->f == NULL) { + upb_status_seterrmsg(p->status, "mapentry message has no key"); return false; } + switch (upb_fielddef_type(p->top->f)) { + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: + // Invoke end_number. The accum buffer has the number's text already. + if (!parse_number(p)) { + return false; + } + break; + case UPB_TYPE_BOOL: + if (len == 4 && !strncmp(buf, "true", 4)) { + if (!parser_putbool(p, true)) { + return false; + } + } else if (len == 5 && !strncmp(buf, "false", 5)) { + if (!parser_putbool(p, false)) { + return false; + } + } else { + upb_status_seterrmsg(p->status, + "Map bool key not 'true' or 'false'"); + return false; + } + multipart_end(p); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + upb_sink subsink; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); + upb_sink_startstr(&p->top->sink, sel, len, &subsink); + sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); + upb_sink_putstring(&subsink, sel, buf, len, NULL); + sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); + upb_sink_endstr(&subsink, sel); + multipart_end(p); + break; + } + default: + upb_status_seterrmsg(p->status, "Invalid field type for map key"); + return false; + } - p->top->f = f; - multipart_end(p); + return true; +} + +// Helper: emit one map entry (as a submessage in the map field sequence). This +// is invoked from end_membername(), at the end of the map entry's key string, +// with the map key in the accumulate buffer. It parses the key from that +// buffer, emits the handler calls to start the mapentry submessage (setting up +// its subframe in the process), and sets up state in the subframe so that the +// value parser (invoked next) will emit the mapentry's value field and then +// end the mapentry message. + +static bool handle_mapentry(upb_json_parser *p) { + // Map entry: p->top->sink is the seq frame, so we need to start a frame + // for the mapentry itself, and then set |f| in that frame so that the map + // value field is parsed, and also set a flag to end the frame after the + // map-entry value is parsed. + if (!check_stack(p)) return false; + + const upb_fielddef *mapfield = p->top->mapfield; + const upb_msgdef *mapentrymsg = upb_fielddef_msgsubdef(mapfield); + + upb_jsonparser_frame *inner = p->top + 1; + p->top->f = mapfield; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); + upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); + inner->m = mapentrymsg; + inner->mapfield = mapfield; + inner->is_map = false; + + // Don't set this to true *yet* -- we reuse parsing handlers below to push + // the key field value to the sink, and these handlers will pop the frame + // if they see is_mapentry (when invoked by the parser state machine, they + // would have just seen the map-entry value, not key). + inner->is_mapentry = false; + p->top = inner; + + // send STARTMSG in submsg frame. + upb_sink_startmsg(&p->top->sink); + + parse_mapentry_key(p); + + // Set up the value field to receive the map-entry value. + p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE); + p->top->is_mapentry = true; // set up to pop frame after value is parsed. + p->top->mapfield = mapfield; + if (p->top->f == NULL) { + upb_status_seterrmsg(p->status, "mapentry message has no value"); + return false; + } return true; } -static void clear_member(upb_json_parser *p) { p->top->f = NULL; } +static bool end_membername(upb_json_parser *p) { + assert(!p->top->f); + + if (p->top->is_map) { + return handle_mapentry(p); + } else { + size_t len; + const char *buf = accumulate_getptr(p, &len); + const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); + + if (!f) { + // TODO(haberman): Ignore unknown fields if requested/configured to do so. + upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); + return false; + } + + p->top->f = f; + multipart_end(p); + + return true; + } +} + +static void end_member(upb_json_parser *p) { + // If we just parsed a map-entry value, end that frame too. + if (p->top->is_mapentry) { + assert(p->top > p->stack); + // send ENDMSG on submsg. + upb_status s = UPB_STATUS_INIT; + upb_sink_endmsg(&p->top->sink, &s); + const upb_fielddef* mapfield = p->top->mapfield; + + // send ENDSUBMSG in repeated-field-of-mapentries frame. + p->top--; + upb_selector_t sel; + bool ok = upb_handlers_getselector(mapfield, + UPB_HANDLER_ENDSUBMSG, &sel); + UPB_ASSERT_VAR(ok, ok); + upb_sink_endsubmsg(&p->top->sink, sel); + } + + p->top->f = NULL; +} static bool start_subobject(upb_json_parser *p) { assert(p->top->f); - if (!upb_fielddef_issubmsg(p->top->f)) { + if (upb_fielddef_ismap(p->top->f)) { + // Beginning of a map. Start a new parser frame in a repeated-field + // context. + if (!check_stack(p)) return false; + + upb_jsonparser_frame *inner = p->top + 1; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); + upb_sink_startseq(&p->top->sink, sel, &inner->sink); + inner->m = upb_fielddef_msgsubdef(p->top->f); + inner->mapfield = p->top->f; + inner->f = NULL; + inner->is_map = true; + inner->is_mapentry = false; + p->top = inner; + + return true; + } else if (upb_fielddef_issubmsg(p->top->f)) { + // Beginning of a subobject. Start a new parser frame in the submsg + // context. + if (!check_stack(p)) return false; + + upb_jsonparser_frame *inner = p->top + 1; + + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); + upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); + inner->m = upb_fielddef_msgsubdef(p->top->f); + inner->f = NULL; + inner->is_map = false; + inner->is_mapentry = false; + p->top = inner; + + return true; + } else { upb_status_seterrf(p->status, "Object specified for non-message/group field: %s", upb_fielddef_name(p->top->f)); return false; } - - if (!check_stack(p)) return false; - - upb_jsonparser_frame *inner = p->top + 1; - - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); - upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); - inner->m = upb_fielddef_msgsubdef(p->top->f); - inner->f = NULL; - p->top = inner; - - return true; } static void end_subobject(upb_json_parser *p) { - p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); - upb_sink_endsubmsg(&p->top->sink, sel); + if (p->top->is_map) { + p->top--; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); + upb_sink_endseq(&p->top->sink, sel); + } else { + p->top--; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); + upb_sink_endsubmsg(&p->top->sink, sel); + } } static bool start_array(upb_json_parser *p) { @@ -759,6 +1003,8 @@ static bool start_array(upb_json_parser *p) { upb_sink_startseq(&p->top->sink, sel, &inner->sink); inner->m = p->top->m; inner->f = p->top->f; + inner->is_map = false; + inner->is_mapentry = false; p->top = inner; return true; @@ -773,12 +1019,16 @@ static void end_array(upb_json_parser *p) { } static void start_object(upb_json_parser *p) { - upb_sink_startmsg(&p->top->sink); + if (!p->top->is_map) { + upb_sink_startmsg(&p->top->sink); + } } static void end_object(upb_json_parser *p) { - upb_status status; - upb_sink_endmsg(&p->top->sink, &status); + if (!p->top->is_map) { + upb_status status; + upb_sink_endmsg(&p->top->sink, &status); + } } @@ -850,10 +1100,10 @@ static void end_object(upb_json_parser *p) { ws string >{ start_member(parser); } - @{ CHECK_RETURN_TOP(end_member(parser)); } + @{ CHECK_RETURN_TOP(end_membername(parser)); } ws ":" ws value2 - %{ clear_member(parser); } + %{ end_member(parser); } ws; object = @@ -941,28 +1191,11 @@ bool end(void *closure, const void *hd) { return true; } - -/* Public API *****************************************************************/ - -void upb_json_parser_init(upb_json_parser *p, upb_status *status) { - p->limit = p->stack + UPB_JSON_MAX_DEPTH; - p->accumulate_buf = NULL; - p->accumulate_buf_size = 0; - upb_byteshandler_init(&p->input_handler_); - upb_byteshandler_setstring(&p->input_handler_, parse, NULL); - upb_byteshandler_setendstr(&p->input_handler_, end, NULL); - upb_bytessink_reset(&p->input_, &p->input_handler_, p); - p->status = status; -} - -void upb_json_parser_uninit(upb_json_parser *p) { - upb_byteshandler_uninit(&p->input_handler_); - free(p->accumulate_buf); -} - -void upb_json_parser_reset(upb_json_parser *p) { +static void json_parser_reset(upb_json_parser *p) { p->top = p->stack; p->top->f = NULL; + p->top->is_map = false; + p->top->is_mapentry = false; int cs; int top; @@ -973,13 +1206,36 @@ void upb_json_parser_reset(upb_json_parser *p) { accumulate_clear(p); p->multipart_state = MULTIPART_INACTIVE; p->capture = NULL; + p->accumulated = NULL; } -void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) { - upb_json_parser_reset(p); - upb_sink_reset(&p->top->sink, sink->handlers, sink->closure); - p->top->m = upb_handlers_msgdef(sink->handlers); - p->accumulated = NULL; + +/* Public API *****************************************************************/ + +upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) { +#ifndef NDEBUG + const size_t size_before = upb_env_bytesallocated(env); +#endif + upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser)); + if (!p) return false; + + p->env = env; + p->limit = p->stack + UPB_JSON_MAX_DEPTH; + p->accumulate_buf = NULL; + p->accumulate_buf_size = 0; + upb_byteshandler_init(&p->input_handler_); + upb_byteshandler_setstring(&p->input_handler_, parse, NULL); + upb_byteshandler_setendstr(&p->input_handler_, end, NULL); + upb_bytessink_reset(&p->input_, &p->input_handler_, p); + + json_parser_reset(p); + upb_sink_reset(&p->top->sink, output->handlers, output->closure); + p->top->m = upb_handlers_msgdef(output->handlers); + + // If this fails, uncomment and increase the value in parser.h. + // fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); + assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE); + return p; } upb_bytessink *upb_json_parser_input(upb_json_parser *p) { diff --git a/upb/json/printer.c b/upb/json/printer.c index b996ccf..132736c 100644 --- a/upb/json/printer.c +++ b/upb/json/printer.c @@ -15,6 +15,27 @@ #include #include +struct upb_json_printer { + upb_sink input_; + // BytesSink closure. + void *subc_; + upb_bytessink *output_; + + // We track the depth so that we know when to emit startstr/endstr on the + // output. + int depth_; + + // Have we emitted the first element? This state is necessary to emit commas + // without leaving a trailing comma in arrays/maps. We keep this state per + // frame depth. + // + // Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages. + // We count frames (contexts in which we separate elements by commas) as both + // repeated fields and messages (maps), and the worst case is a + // message->repeated field->submessage->repeated field->... nesting. + bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2]; +}; + // StringPiece; a pointer plus a length. typedef struct { const char *ptr; @@ -182,13 +203,23 @@ static bool putkey(void *closure, const void *handler_data) { return true; \ } \ static bool repeated_##type(void *closure, const void *handler_data, \ - type val) { \ + type val) { \ upb_json_printer *p = closure; \ print_comma(p); \ CHK(put##type(closure, handler_data, val)); \ return true; \ } +#define TYPE_HANDLERS_MAPKEY(type, fmt_func) \ + static bool putmapkey_##type(void *closure, const void *handler_data, \ + type val) { \ + upb_json_printer *p = closure; \ + print_data(p, "\"", 1); \ + CHK(put##type(closure, handler_data, val)); \ + print_data(p, "\":", 2); \ + return true; \ + } + TYPE_HANDLERS(double, fmt_double); TYPE_HANDLERS(float, fmt_float); TYPE_HANDLERS(bool, fmt_bool); @@ -197,7 +228,15 @@ TYPE_HANDLERS(uint32_t, fmt_int64); TYPE_HANDLERS(int64_t, fmt_int64); TYPE_HANDLERS(uint64_t, fmt_uint64); +// double and float are not allowed to be map keys. +TYPE_HANDLERS_MAPKEY(bool, fmt_bool); +TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64); +TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64); +TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64); +TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64); + #undef TYPE_HANDLERS +#undef TYPE_HANDLERS_MAPKEY typedef struct { void *keyname; @@ -222,20 +261,36 @@ static bool scalar_enum(void *closure, const void *handler_data, return true; } -static bool repeated_enum(void *closure, const void *handler_data, - int32_t val) { - const EnumHandlerData *hd = handler_data; - upb_json_printer *p = closure; - print_comma(p); - - const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val); +static void print_enum_symbolic_name(upb_json_printer *p, + const upb_enumdef *def, + int32_t val) { + const char *symbolic_name = upb_enumdef_iton(def, val); if (symbolic_name) { print_data(p, "\"", 1); putstring(p, symbolic_name, strlen(symbolic_name)); print_data(p, "\"", 1); } else { - putint32_t(closure, NULL, val); + putint32_t(p, NULL, val); } +} + +static bool repeated_enum(void *closure, const void *handler_data, + int32_t val) { + const EnumHandlerData *hd = handler_data; + upb_json_printer *p = closure; + print_comma(p); + + print_enum_symbolic_name(p, hd->enumdef, val); + + return true; +} + +static bool mapvalue_enum(void *closure, const void *handler_data, + int32_t val) { + const EnumHandlerData *hd = handler_data; + upb_json_printer *p = closure; + + print_enum_symbolic_name(p, hd->enumdef, val); return true; } @@ -251,25 +306,35 @@ static void *repeated_startsubmsg(void *closure, const void *handler_data) { return closure; } -static bool startmap(void *closure, const void *handler_data) { +static void start_frame(upb_json_printer *p) { + p->depth_++; + p->first_elem_[p->depth_] = true; + print_data(p, "{", 1); +} + +static void end_frame(upb_json_printer *p) { + print_data(p, "}", 1); + p->depth_--; +} + +static bool printer_startmsg(void *closure, const void *handler_data) { UPB_UNUSED(handler_data); upb_json_printer *p = closure; - if (p->depth_++ == 0) { + if (p->depth_ == 0) { upb_bytessink_start(p->output_, 0, &p->subc_); } - p->first_elem_[p->depth_] = true; - print_data(p, "{", 1); + start_frame(p); return true; } -static bool endmap(void *closure, const void *handler_data, upb_status *s) { +static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) { UPB_UNUSED(handler_data); UPB_UNUSED(s); upb_json_printer *p = closure; - if (--p->depth_ == 0) { + end_frame(p); + if (p->depth_ == 0) { upb_bytessink_end(p->output_); } - print_data(p, "}", 1); return true; } @@ -290,6 +355,23 @@ static bool endseq(void *closure, const void *handler_data) { return true; } +static void *startmap(void *closure, const void *handler_data) { + upb_json_printer *p = closure; + CHK(putkey(closure, handler_data)); + p->depth_++; + p->first_elem_[p->depth_] = true; + print_data(p, "{", 1); + return closure; +} + +static bool endmap(void *closure, const void *handler_data) { + UPB_UNUSED(handler_data); + upb_json_printer *p = closure; + print_data(p, "}", 1); + p->depth_--; + return true; +} + static size_t putstr(void *closure, const void *handler_data, const char *str, size_t len, const upb_bufhandle *handle) { UPB_UNUSED(handler_data); @@ -404,6 +486,36 @@ static bool repeated_endstr(void *closure, const void *handler_data) { return true; } +static void *mapkeyval_startstr(void *closure, const void *handler_data, + size_t size_hint) { + UPB_UNUSED(handler_data); + UPB_UNUSED(size_hint); + upb_json_printer *p = closure; + print_data(p, "\"", 1); + return p; +} + +static size_t mapkey_str(void *closure, const void *handler_data, + const char *str, size_t len, + const upb_bufhandle *handle) { + CHK(putstr(closure, handler_data, str, len, handle)); + return len; +} + +static bool mapkey_endstr(void *closure, const void *handler_data) { + UPB_UNUSED(handler_data); + upb_json_printer *p = closure; + print_data(p, "\":", 2); + return true; +} + +static bool mapvalue_endstr(void *closure, const void *handler_data) { + UPB_UNUSED(handler_data); + upb_json_printer *p = closure; + print_data(p, "\"", 1); + return true; +} + static size_t scalar_bytes(void *closure, const void *handler_data, const char *str, size_t len, const upb_bufhandle *handle) { @@ -421,31 +533,161 @@ static size_t repeated_bytes(void *closure, const void *handler_data, return len; } -void printer_sethandlers(const void *closure, upb_handlers *h) { +static size_t mapkey_bytes(void *closure, const void *handler_data, + const char *str, size_t len, + const upb_bufhandle *handle) { + upb_json_printer *p = closure; + CHK(putbytes(closure, handler_data, str, len, handle)); + print_data(p, ":", 1); + return len; +} + +static void set_enum_hd(upb_handlers *h, + const upb_fielddef *f, + upb_handlerattr *attr) { + EnumHandlerData *hd = malloc(sizeof(EnumHandlerData)); + hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f); + hd->keyname = newstrpc(h, f); + upb_handlers_addcleanup(h, hd, free); + upb_handlerattr_sethandlerdata(attr, hd); +} + +// Set up handlers for a mapentry submessage (i.e., an individual key/value pair +// in a map). +// +// TODO: Handle missing key, missing value, out-of-order key/value, or repeated +// key or value cases properly. The right way to do this is to allocate a +// temporary structure at the start of a mapentry submessage, store key and +// value data in it as key and value handlers are called, and then print the +// key/value pair once at the end of the submessage. If we don't do this, we +// should at least detect the case and throw an error. However, so far all of +// our sources that emit mapentry messages do so canonically (with one key +// field, and then one value field), so this is not a pressing concern at the +// moment. +void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) { UPB_UNUSED(closure); + const upb_msgdef *md = upb_handlers_msgdef(h); + + // A mapentry message is printed simply as '"key": value'. Rather than + // special-case key and value for every type below, we just handle both + // fields explicitly here. + const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY); + const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE); + + upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER; + + switch (upb_fielddef_type(key_field)) { + case UPB_TYPE_INT32: + upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr); + break; + case UPB_TYPE_INT64: + upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr); + break; + case UPB_TYPE_UINT32: + upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr); + break; + case UPB_TYPE_UINT64: + upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr); + break; + case UPB_TYPE_BOOL: + upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr); + break; + case UPB_TYPE_STRING: + upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr); + upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr); + upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr); + break; + case UPB_TYPE_BYTES: + upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr); + break; + default: + assert(false); + break; + } + switch (upb_fielddef_type(value_field)) { + case UPB_TYPE_INT32: + upb_handlers_setint32(h, value_field, putint32_t, &empty_attr); + break; + case UPB_TYPE_INT64: + upb_handlers_setint64(h, value_field, putint64_t, &empty_attr); + break; + case UPB_TYPE_UINT32: + upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr); + break; + case UPB_TYPE_UINT64: + upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr); + break; + case UPB_TYPE_BOOL: + upb_handlers_setbool(h, value_field, putbool, &empty_attr); + break; + case UPB_TYPE_FLOAT: + upb_handlers_setfloat(h, value_field, putfloat, &empty_attr); + break; + case UPB_TYPE_DOUBLE: + upb_handlers_setdouble(h, value_field, putdouble, &empty_attr); + break; + case UPB_TYPE_STRING: + upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr); + upb_handlers_setstring(h, value_field, putstr, &empty_attr); + upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr); + break; + case UPB_TYPE_BYTES: + upb_handlers_setstring(h, value_field, putbytes, &empty_attr); + break; + case UPB_TYPE_ENUM: { + upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER; + set_enum_hd(h, value_field, &enum_attr); + upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr); + upb_handlerattr_uninit(&enum_attr); + break; + } + case UPB_TYPE_MESSAGE: + // No handler necessary -- the submsg handlers will print the message + // as appropriate. + break; + } + + upb_handlerattr_uninit(&empty_attr); +} + +void printer_sethandlers(const void *closure, upb_handlers *h) { + UPB_UNUSED(closure); + const upb_msgdef *md = upb_handlers_msgdef(h); + bool is_mapentry = upb_msgdef_mapentry(md); upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER; - upb_handlers_setstartmsg(h, startmap, &empty_attr); - upb_handlers_setendmsg(h, endmap, &empty_attr); - -#define TYPE(type, name, ctype) \ - case type: \ - if (upb_fielddef_isseq(f)) { \ - upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \ - } else { \ - upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \ - } \ + + if (is_mapentry) { + // mapentry messages are sufficiently different that we handle them + // separately. + printer_sethandlers_mapentry(closure, h); + return; + } + + upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr); + upb_handlers_setendmsg(h, printer_endmsg, &empty_attr); + +#define TYPE(type, name, ctype) \ + case type: \ + if (upb_fielddef_isseq(f)) { \ + upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \ + } else { \ + upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \ + } \ break; - upb_msg_iter i; - upb_msg_begin(&i, upb_handlers_msgdef(h)); - for(; !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + upb_msg_field_begin(&i, md); + for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER; upb_handlerattr_sethandlerdata(&name_attr, newstrpc(h, f)); - if (upb_fielddef_isseq(f)) { + if (upb_fielddef_ismap(f)) { + upb_handlers_setstartseq(h, f, startmap, &name_attr); + upb_handlers_setendseq(h, f, endmap, &name_attr); + } else if (upb_fielddef_isseq(f)) { upb_handlers_setstartseq(h, f, startseq, &name_attr); upb_handlers_setendseq(h, f, endseq, &empty_attr); } @@ -462,12 +704,8 @@ void printer_sethandlers(const void *closure, upb_handlers *h) { // For now, we always emit symbolic names for enums. We may want an // option later to control this behavior, but we will wait for a real // need first. - EnumHandlerData *hd = malloc(sizeof(EnumHandlerData)); - hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f); - hd->keyname = newstrpc(h, f); - upb_handlers_addcleanup(h, hd, free); upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER; - upb_handlerattr_sethandlerdata(&enum_attr, hd); + set_enum_hd(h, f, &enum_attr); if (upb_fielddef_isseq(f)) { upb_handlers_setint32(h, f, repeated_enum, &enum_attr); @@ -514,25 +752,29 @@ void printer_sethandlers(const void *closure, upb_handlers *h) { #undef TYPE } -/* Public API *****************************************************************/ - -void upb_json_printer_init(upb_json_printer *p, const upb_handlers *h) { - p->output_ = NULL; +static void json_printer_reset(upb_json_printer *p) { p->depth_ = 0; - upb_sink_reset(&p->input_, h, p); } -void upb_json_printer_uninit(upb_json_printer *p) { - UPB_UNUSED(p); -} -void upb_json_printer_reset(upb_json_printer *p) { - p->depth_ = 0; -} +/* Public API *****************************************************************/ + +upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h, + upb_bytessink *output) { +#ifndef NDEBUG + size_t size_before = upb_env_bytesallocated(e); +#endif + + upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer)); + if (!p) return NULL; -void upb_json_printer_resetoutput(upb_json_printer *p, upb_bytessink *output) { - upb_json_printer_reset(p); p->output_ = output; + json_printer_reset(p); + upb_sink_reset(&p->input_, h, p); + + // If this fails, increase the value in printer.h. + assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE); + return p; } upb_sink *upb_json_printer_input(upb_json_printer *p) { diff --git a/upb/json/printer.h b/upb/json/printer.h index fbc206d..c73cb79 100644 --- a/upb/json/printer.h +++ b/upb/json/printer.h @@ -11,6 +11,7 @@ #ifndef UPB_JSON_TYPED_PRINTER_H_ #define UPB_JSON_TYPED_PRINTER_H_ +#include "upb/env.h" #include "upb/sink.h" #ifdef __cplusplus @@ -26,71 +27,48 @@ UPB_DECLARE_TYPE(upb::json::Printer, upb_json_printer); /* upb::json::Printer *********************************************************/ -// Prints an incoming stream of data to a BytesSink in JSON format. -UPB_DEFINE_CLASS0(upb::json::Printer, - public: - Printer(const upb::Handlers* handlers); - ~Printer(); +#define UPB_JSON_PRINTER_SIZE 168 - // Resets the state of the printer, so that it will expect to begin a new - // document. - void Reset(); +#ifdef __cplusplus - // Resets the output pointer which will serve as our closure. Implies - // Reset(). - void ResetOutput(BytesSink* output); +// Prints an incoming stream of data to a BytesSink in JSON format. +class upb::json::Printer { + public: + static Printer* Create(Environment* env, const upb::Handlers* handlers, + BytesSink* output); // The input to the printer. Sink* input(); // Returns handlers for printing according to the specified schema. static reffed_ptr NewHandlers(const upb::MessageDef* md); -, -UPB_DEFINE_STRUCT0(upb_json_printer, - upb_sink input_; - // BytesSink closure. - void *subc_; - upb_bytessink *output_; - - // We track the depth so that we know when to emit startstr/endstr on the - // output. - int depth_; - // Have we emitted the first element? This state is necessary to emit commas - // without leaving a trailing comma in arrays/maps. We keep this state per - // frame depth. - // - // Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages. - // We count frames (contexts in which we separate elements by commas) as both - // repeated fields and messages (maps), and the worst case is a - // message->repeated field->submessage->repeated field->... nesting. - bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2]; -)); - -UPB_BEGIN_EXTERN_C // { -// Native C API. + static const size_t kSize = UPB_JSON_PRINTER_SIZE; -void upb_json_printer_init(upb_json_printer *p, const upb_handlers *h); -void upb_json_printer_uninit(upb_json_printer *p); -void upb_json_printer_reset(upb_json_printer *p); -void upb_json_printer_resetoutput(upb_json_printer *p, upb_bytessink *output); + private: + UPB_DISALLOW_POD_OPS(Printer, upb::json::Printer); +}; + +#endif + +UPB_BEGIN_EXTERN_C + +// Native C API. +upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h, + upb_bytessink *output); upb_sink *upb_json_printer_input(upb_json_printer *p); const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md, const void *owner); -UPB_END_EXTERN_C // } +UPB_END_EXTERN_C #ifdef __cplusplus namespace upb { namespace json { -inline Printer::Printer(const upb::Handlers* handlers) { - upb_json_printer_init(this, handlers); -} -inline Printer::~Printer() { upb_json_printer_uninit(this); } -inline void Printer::Reset() { upb_json_printer_reset(this); } -inline void Printer::ResetOutput(BytesSink* output) { - upb_json_printer_resetoutput(this, output); +inline Printer* Printer::Create(Environment* env, const upb::Handlers* handlers, + BytesSink* output) { + return upb_json_printer_create(env, handlers, output); } inline Sink* Printer::input() { return upb_json_printer_input(this); } inline reffed_ptr Printer::NewHandlers( diff --git a/upb/pb/compile_decoder.c b/upb/pb/compile_decoder.c index 11aa4e9..a17332b 100644 --- a/upb/pb/compile_decoder.c +++ b/upb/pb/compile_decoder.c @@ -64,7 +64,6 @@ mgroup *newgroup(const void *owner) { static void freemethod(upb_refcounted *r) { upb_pbdecodermethod *method = (upb_pbdecodermethod*)r; - upb_byteshandler_uninit(&method->input_handler_); if (method->dest_handlers_) { upb_handlers_unref(method->dest_handlers_, method); @@ -762,8 +761,10 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) { putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h); label(c, LABEL_FIELD); uint32_t* start_pc = c->pc; - upb_msg_iter i; - for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); upb_fieldtype_t type = upb_fielddef_type(f); @@ -813,9 +814,11 @@ static void find_methods(compiler *c, const upb_handlers *h) { newmethod(h, c->group); // Find submethods. - upb_msg_iter i; + upb_msg_field_iter i; const upb_msgdef *md = upb_handlers_msgdef(h); - for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { + for(upb_msg_field_begin(&i, md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); const upb_handlers *sub_h; if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE && @@ -857,7 +860,7 @@ static void set_bytecode_handlers(mgroup *g) { } -/* JIT setup. ******************************************************************/ +/* JIT setup. *****************************************************************/ #ifdef UPB_USE_JIT_X64 diff --git a/upb/pb/compile_decoder_x64.dasc b/upb/pb/compile_decoder_x64.dasc index 3181cab..e72e4e3 100644 --- a/upb/pb/compile_decoder_x64.dasc +++ b/upb/pb/compile_decoder_x64.dasc @@ -242,7 +242,7 @@ static void emit_static_asm(jitcompiler *jc) { | |2: | // Resume decoder. - | lea ARG2_64, DECODER->callstack + | mov ARG2_64, DECODER->callstack | sub rsp, ARG3_64 | mov ARG1_64, rsp | callp memcpy // Restore stack. @@ -255,7 +255,7 @@ static void emit_static_asm(jitcompiler *jc) { asmlabel(jc, "exitjit"); |->exitjit: | // Save the stack into DECODER->callstack. - | lea ARG1_64, DECODER->callstack + | mov ARG1_64, DECODER->callstack | mov ARG2_64, rsp | mov ARG3_64, DECODER->saved_rsp | sub ARG3_64, rsp @@ -300,11 +300,11 @@ static void emit_static_asm(jitcompiler *jc) { | sub rcx, rdx | jb ->err // Len is greater than enclosing message. | mov FRAME->end_ofs, rcx + | cmp FRAME, DECODER->limit + | je >3 // Stack overflow | add FRAME, sizeof(upb_pbdecoder_frame) | mov DELIMEND, PTR | add DELIMEND, rdx - | cmp FRAME, DECODER->limit - | je >3 // Stack overflow | mov dword FRAME->groupnum, 0 | test rcx, rcx | jz >2 @@ -1071,9 +1071,9 @@ static void jitbytecode(jitcompiler *jc) { | // code with the packed code-path. If this is changed later, this | // store can be removed. | mov qword FRAME->end_ofs, 0 - | add FRAME, sizeof(upb_pbdecoder_frame) | cmp FRAME, DECODER->limit | je ->err + | add FRAME, sizeof(upb_pbdecoder_frame) | mov dword FRAME->groupnum, arg break; case OP_PUSHLENDELIM: diff --git a/upb/pb/compile_decoder_x64.h b/upb/pb/compile_decoder_x64.h index ef4459d..9527361 100644 --- a/upb/pb/compile_decoder_x64.h +++ b/upb/pb/compile_decoder_x64.h @@ -28,8 +28,8 @@ static const unsigned char upb_jit_actionlist[2162] = { 73,139,159,233,77,139,167,233,77,139,174,233,73,139,174,233,73,43,175,233, 73,3,175,233,73,139,151,233,72,133,210,15,133,244,248,252,255,208,73,139, 135,233,73,199,135,233,0,0,0,0,248,1,255,91,65,92,65,93,65,94,65,95,93,195, - 248,2,73,141,183,233,72,41,212,72,137,231,72,184,237,237,65,84,73,137,228, - 72,129,228,239,252,255,208,76,137,228,65,92,195,255,248,11,73,141,191,233, + 248,2,73,139,183,233,72,41,212,72,137,231,72,184,237,237,65,84,73,137,228, + 72,129,228,239,252,255,208,76,137,228,65,92,195,255,248,11,73,139,191,233, 72,137,230,73,139,151,233,72,41,226,73,137,151,233,137,195,72,184,237,237, 65,84,73,137,228,72,129,228,239,252,255,208,76,137,228,65,92,137,216,73,139, 167,233,91,65,92,65,93,65,94,65,95,93,195,255,248,12,73,57,159,233,15,132, @@ -40,7 +40,7 @@ static const unsigned char upb_jit_actionlist[2162] = { 255,76,57,227,15,132,244,253,255,76,137,225,72,41,217,72,131,252,249,1,15, 130,244,253,255,15,182,19,132,210,15,137,244,254,248,7,232,244,14,248,8,72, 131,195,1,72,137,252,233,72,41,217,72,41,209,15,130,244,15,73,137,142,233, - 73,129,198,239,72,137,221,72,1,213,77,59,183,233,15,132,244,249,65,199,134, + 77,59,183,233,15,132,244,249,73,129,198,239,72,137,221,72,1,213,65,199,134, 233,0,0,0,0,72,133,201,15,132,244,248,77,139,167,233,72,57,252,235,15,135, 244,248,76,57,229,15,135,244,248,255,73,137,252,236,248,2,195,248,3,73,139, 159,233,76,137,252,255,255,72,190,237,237,255,190,237,255,49,252,246,255, @@ -122,8 +122,8 @@ static const unsigned char upb_jit_actionlist[2162] = { 1,248,2,255,72,137,218,76,137,225,72,41,217,77,139,135,233,72,184,237,237, 65,84,73,137,228,72,129,228,239,252,255,208,76,137,228,65,92,72,1,195,255, 76,57,227,15,132,244,249,232,244,29,248,3,255,76,137,227,255,72,57,252,235, - 15,133,244,1,248,4,255,77,137,174,233,73,199,134,233,0,0,0,0,73,129,198,239, - 77,59,183,233,15,132,244,15,65,199,134,233,237,255,232,244,13,255,73,129, + 15,133,244,1,248,4,255,77,137,174,233,73,199,134,233,0,0,0,0,77,59,183,233, + 15,132,244,15,73,129,198,239,65,199,134,233,237,255,232,244,13,255,73,129, 252,238,239,77,139,174,233,255,77,139,167,233,73,3,174,233,73,59,175,233, 15,130,244,247,76,57,229,15,135,244,247,73,137,252,236,248,1,255,72,57,221, 15,132,245,255,232,245,255,248,9,72,131,196,8,195,255 @@ -419,7 +419,7 @@ static void emit_static_asm(jitcompiler *jc) { //| //|2: //| // Resume decoder. - //| lea ARG2_64, DECODER->callstack + //| mov ARG2_64, DECODER->callstack //| sub rsp, ARG3_64 //| mov ARG1_64, rsp //| callp memcpy // Restore stack. @@ -434,7 +434,7 @@ static void emit_static_asm(jitcompiler *jc) { asmlabel(jc, "exitjit"); //|->exitjit: //| // Save the stack into DECODER->callstack. - //| lea ARG1_64, DECODER->callstack + //| mov ARG1_64, DECODER->callstack //| mov ARG2_64, rsp //| mov ARG3_64, DECODER->saved_rsp //| sub ARG3_64, rsp @@ -490,11 +490,11 @@ static void emit_static_asm(jitcompiler *jc) { //| sub rcx, rdx //| jb ->err // Len is greater than enclosing message. //| mov FRAME->end_ofs, rcx + //| cmp FRAME, DECODER->limit + //| je >3 // Stack overflow //| add FRAME, sizeof(upb_pbdecoder_frame) //| mov DELIMEND, PTR //| add DELIMEND, rdx - //| cmp FRAME, DECODER->limit - //| je >3 // Stack overflow //| mov dword FRAME->groupnum, 0 //| test rcx, rcx //| jz >2 @@ -504,7 +504,7 @@ static void emit_static_asm(jitcompiler *jc) { //| cmp DELIMEND, DATAEND //| ja >2 //| mov DATAEND, DELIMEND // If DELIMEND >= PTR && DELIMEND < DATAEND - dasm_put(Dst, 337, Dt1(->end_ofs), sizeof(upb_pbdecoder_frame), Dt2(->limit), Dt1(->groupnum), Dt2(->end)); + dasm_put(Dst, 337, Dt1(->end_ofs), Dt2(->limit), sizeof(upb_pbdecoder_frame), Dt1(->groupnum), Dt2(->end)); # 317 "upb/pb/compile_decoder_x64.dasc" //|2: //| ret @@ -1609,11 +1609,11 @@ static void jitbytecode(jitcompiler *jc) { //| // code with the packed code-path. If this is changed later, this //| // store can be removed. //| mov qword FRAME->end_ofs, 0 - //| add FRAME, sizeof(upb_pbdecoder_frame) //| cmp FRAME, DECODER->limit //| je ->err + //| add FRAME, sizeof(upb_pbdecoder_frame) //| mov dword FRAME->groupnum, arg - dasm_put(Dst, 2070, Dt1(->sink.closure), Dt1(->end_ofs), sizeof(upb_pbdecoder_frame), Dt2(->limit), Dt1(->groupnum), arg); + dasm_put(Dst, 2070, Dt1(->sink.closure), Dt1(->end_ofs), Dt2(->limit), sizeof(upb_pbdecoder_frame), Dt1(->groupnum), arg); # 1078 "upb/pb/compile_decoder_x64.dasc" break; case OP_PUSHLENDELIM: diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 04ca413..f371bce 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -19,10 +19,7 @@ */ #include -#include -#include #include -#include #include "upb/pb/decoder.int.h" #include "upb/pb/varint.int.h" @@ -70,18 +67,17 @@ static bool consumes_input(opcode op) { static bool in_residual_buf(const upb_pbdecoder *d, const char *p); -// It's unfortunate that we have to micro-manage the compiler this way, -// especially since this tuning is necessarily specific to one hardware -// configuration. But emperically on a Core i7, performance increases 30-50% -// with these annotations. Every instance where these appear, gcc 4.2.1 made -// the wrong decision and degraded performance in benchmarks. -#define FORCEINLINE static inline __attribute__((always_inline)) -#define NOINLINE __attribute__((noinline)) +// It's unfortunate that we have to micro-manage the compiler with +// UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily +// specific to one hardware configuration. But empirically on a Core i7, +// performance increases 30-50% with these annotations. Every instance where +// these appear, gcc 4.2.1 made the wrong decision and degraded performance in +// benchmarks. static void seterr(upb_pbdecoder *d, const char *msg) { - // TODO(haberman): encapsulate this access to pipeline->status, but not sure - // exactly what that interface should look like. - upb_status_seterrmsg(d->status, msg); + upb_status status = UPB_STATUS_INIT; + upb_status_seterrmsg(&status, msg); + upb_env_reporterror(d->env, &status); } void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) { @@ -249,7 +245,8 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) { // Copies the next "bytes" bytes into "buf" and advances the stream. // Requires that this many bytes are available in the current buffer. -FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) { +UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf, + size_t bytes) { assert(bytes <= curbufleft(d)); memcpy(buf, d->ptr, bytes); advance(d, bytes); @@ -258,8 +255,8 @@ FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) { // Slow path for getting the next "bytes" bytes, regardless of whether they are // available in the current buffer or not. Returns a status code as described // in decoder.int.h. -static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf, - size_t bytes) { +UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf, + size_t bytes) { const size_t avail = curbufleft(d); consumebytes(d, buf, avail); bytes -= avail; @@ -280,7 +277,8 @@ static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf, // Gets the next "bytes" bytes, regardless of whether they are available in the // current buffer or not. Returns a status code as described in decoder.int.h. -FORCEINLINE int32_t getbytes(upb_pbdecoder *d, void *buf, size_t bytes) { +UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf, + size_t bytes) { if (curbufleft(d) >= bytes) { // Buffer has enough data to satisfy. consumebytes(d, buf, bytes); @@ -290,8 +288,8 @@ FORCEINLINE int32_t getbytes(upb_pbdecoder *d, void *buf, size_t bytes) { } } -static NOINLINE size_t peekbytes_slow(upb_pbdecoder *d, void *buf, - size_t bytes) { +UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf, + size_t bytes) { size_t ret = curbufleft(d); memcpy(buf, d->ptr, ret); if (in_residual_buf(d, d->ptr)) { @@ -302,7 +300,8 @@ static NOINLINE size_t peekbytes_slow(upb_pbdecoder *d, void *buf, return ret; } -FORCEINLINE size_t peekbytes(upb_pbdecoder *d, void *buf, size_t bytes) { +UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf, + size_t bytes) { if (curbufleft(d) >= bytes) { memcpy(buf, d->ptr, bytes); return bytes; @@ -316,8 +315,8 @@ FORCEINLINE size_t peekbytes(upb_pbdecoder *d, void *buf, size_t bytes) { // Slow path for decoding a varint from the current buffer position. // Returns a status code as described in decoder.int.h. -NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, - uint64_t *u64) { +UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, + uint64_t *u64) { *u64 = 0; uint8_t byte = 0x80; int bitpos; @@ -335,7 +334,7 @@ NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, // Decodes a varint from the current buffer position. // Returns a status code as described in decoder.int.h. -FORCEINLINE int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) { +UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) { if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) { *u64 = *d->ptr; advance(d, 1); @@ -358,7 +357,7 @@ FORCEINLINE int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) { // Decodes a 32-bit varint from the current buffer position. // Returns a status code as described in decoder.int.h. -FORCEINLINE int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) { +UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) { uint64_t u64; int32_t ret = decode_varint(d, &u64); if (ret >= 0) return ret; @@ -377,14 +376,14 @@ FORCEINLINE int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) { // Decodes a fixed32 from the current buffer position. // Returns a status code as described in decoder.int.h. // TODO: proper byte swapping for big-endian machines. -FORCEINLINE int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) { +UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) { return getbytes(d, u32, 4); } // Decodes a fixed64 from the current buffer position. // Returns a status code as described in decoder.int.h. // TODO: proper byte swapping for big-endian machines. -FORCEINLINE int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) { +UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) { return getbytes(d, u64, 8); } @@ -408,7 +407,7 @@ static bool decoder_push(upb_pbdecoder *d, uint64_t end) { if (end > fr->end_ofs) { seterr(d, "Submessage end extends past enclosing submessage."); return false; - } else if ((fr + 1) == d->limit) { + } else if (fr == d->limit) { seterr(d, kPbDecoderStackOverflow); return false; } @@ -435,8 +434,8 @@ static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) { // Pops a frame from the decoder stack. static void decoder_pop(upb_pbdecoder *d) { d->top--; } -NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, - uint64_t expected) { +UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, + uint64_t expected) { uint64_t data = 0; size_t bytes = upb_value_size(expected); size_t read = peekbytes(d, &data, bytes); @@ -814,7 +813,10 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) { upb_pbdecoder *d = closure; UPB_UNUSED(size_hint); + d->top->end_ofs = UINT64_MAX; + d->bufstart_ofs = 0; d->call_len = 1; + d->callstack[0] = &halt; d->pc = pc; return d; } @@ -823,6 +825,8 @@ void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) { UPB_UNUSED(hd); UPB_UNUSED(size_hint); upb_pbdecoder *d = closure; + d->top->end_ofs = UINT64_MAX; + d->bufstart_ofs = 0; d->call_len = 0; return d; } @@ -879,55 +883,115 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) { return true; } -void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *m, - upb_status *s) { - d->limit = &d->stack[UPB_DECODER_MAX_NESTING]; - upb_bytessink_reset(&d->input_, &m->input_handler_, d); - d->method_ = m; - d->callstack[0] = &halt; - d->status = s; - upb_pbdecoder_reset(d); -} - void upb_pbdecoder_reset(upb_pbdecoder *d) { d->top = d->stack; - d->top->end_ofs = UINT64_MAX; d->top->groupnum = 0; - d->bufstart_ofs = 0; d->ptr = d->residual; d->buf = d->residual; d->end = d->residual; d->residual_end = d->residual; - d->call_len = 1; } -uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) { - return offset(d); +static size_t stacksize(upb_pbdecoder *d, size_t entries) { + UPB_UNUSED(d); + return entries * sizeof(upb_pbdecoder_frame); } -// Not currently required, but to support outgrowing the static stack we need -// this. -void upb_pbdecoder_uninit(upb_pbdecoder *d) { +static size_t callstacksize(upb_pbdecoder *d, size_t entries) { UPB_UNUSED(d); -} -const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) { - return d->method_; +#ifdef UPB_USE_JIT_X64 + if (d->method_->is_native_) { + // Each native stack frame needs two pointers, plus we need a few frames for + // the enter/exit trampolines. + size_t ret = entries * sizeof(void*) * 2; + ret += sizeof(void*) * 10; + return ret; + } +#endif + + return entries * sizeof(uint32_t*); } -bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink* sink) { - // TODO(haberman): do we need to test whether the decoder is already on the - // stack (like calling this from within a callback)? Should we support - // rebinding the output at all? +upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m, + upb_sink *sink) { + const size_t default_max_nesting = 64; +#ifndef NDEBUG + size_t size_before = upb_env_bytesallocated(e); +#endif + + upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder)); + if (!d) return NULL; + + d->method_ = m; + d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting)); + d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting)); + if (!d->stack || !d->callstack) { + return NULL; + } + + d->env = e; + d->limit = d->stack + default_max_nesting - 1; + d->stack_size = default_max_nesting; + + upb_pbdecoder_reset(d); + upb_bytessink_reset(&d->input_, &m->input_handler_, d); + assert(sink); if (d->method_->dest_handlers_) { if (sink->handlers != d->method_->dest_handlers_) - return false; + return NULL; } upb_sink_reset(&d->top->sink, sink->handlers, sink->closure); - return true; + + // If this fails, increase the value in decoder.h. + assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE); + return d; +} + +uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) { + return offset(d); +} + +const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) { + return d->method_; } upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) { return &d->input_; } + +size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) { + return d->stack_size; +} + +bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) { + if (max < d->top - d->stack) { + // Can't set a limit smaller than what we are currently at. + return false; + } + + if (max > d->stack_size) { + // Need to reallocate stack and callstack to accommodate. + size_t old_size = stacksize(d, d->stack_size); + size_t new_size = stacksize(d, max); + void *p = upb_env_realloc(d->env, d->stack, old_size, new_size); + if (!p) { + return false; + } + d->stack = p; + + old_size = callstacksize(d, d->stack_size); + new_size = callstacksize(d, max); + p = upb_env_realloc(d->env, d->callstack, old_size, new_size); + if (!p) { + return false; + } + d->callstack = p; + + d->stack_size = max; + } + + d->limit = d->stack + max - 1; + return true; +} diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h index e93f3bd..d37718c 100644 --- a/upb/pb/decoder.h +++ b/upb/pb/decoder.h @@ -18,7 +18,7 @@ #ifndef UPB_DECODER_H_ #define UPB_DECODER_H_ -#include "upb/table.int.h" +#include "upb/env.h" #include "upb/sink.h" #ifdef __cplusplus @@ -37,44 +37,6 @@ UPB_DECLARE_TYPE(upb::pb::Decoder, upb_pbdecoder); UPB_DECLARE_TYPE(upb::pb::DecoderMethod, upb_pbdecodermethod); UPB_DECLARE_TYPE(upb::pb::DecoderMethodOptions, upb_pbdecodermethodopts); -// The maximum that any submessages can be nested. Matches proto2's limit. -// This specifies the size of the decoder's statically-sized array and therefore -// setting it high will cause the upb::pb::Decoder object to be larger. -// -// If necessary we can add a runtime-settable property to Decoder that allow -// this to be larger than the compile-time setting, but this would add -// complexity, particularly since we would have to decide how/if to give users -// the ability to set a custom memory allocation function. -#define UPB_DECODER_MAX_NESTING 64 - -// Internal-only struct used by the decoder. -typedef struct { - UPB_PRIVATE_FOR_CPP - // Space optimization note: we store two pointers here that the JIT - // doesn't need at all; the upb_handlers* inside the sink and - // the dispatch table pointer. We can optimze so that the JIT uses - // smaller stack frames than the interpreter. The only thing we need - // to guarantee is that the fallback routines can find end_ofs. - upb_sink sink; - - // The absolute stream offset of the end-of-frame delimiter. - // Non-delimited frames (groups and non-packed repeated fields) reuse the - // delimiter of their parent, even though the frame may not end there. - // - // NOTE: the JIT stores a slightly different value here for non-top frames. - // It stores the value relative to the end of the enclosed message. But the - // top frame is still stored the same way, which is important for ensuring - // that calls from the JIT into C work correctly. - uint64_t end_ofs; - const uint32_t *base; - - // 0 indicates a length-delimited field. - // A positive number indicates a known group. - // A negative number indicates an unknown group. - int32_t groupnum; - upb_inttable *dispatch; // Not used by the JIT. -} upb_pbdecoder_frame; - // The parameters one uses to construct a DecoderMethod. // TODO(haberman): move allowjit here? Seems more convenient for users. UPB_DEFINE_CLASS0(upb::pb::DecoderMethodOptions, @@ -152,22 +114,31 @@ UPB_DEFINE_STRUCT(upb_pbdecodermethod, upb_refcounted, upb_inttable dispatch; )); +// Preallocation hint: decoder won't allocate more bytes than this when first +// constructed. This hint may be an overestimate for some build configurations. +// But if the decoder library is upgraded without recompiling the application, +// it may be an underestimate. +#define UPB_PB_DECODER_SIZE 4400 + +#ifdef __cplusplus + // A Decoder receives binary protobuf data on its input sink and pushes the // decoded data to its output sink. -UPB_DEFINE_CLASS0(upb::pb::Decoder, +class upb::pb::Decoder { public: // Constructs a decoder instance for the given method, which must outlive this // decoder. Any errors during parsing will be set on the given status, which // must also outlive this decoder. - Decoder(const DecoderMethod* method, Status* status); - ~Decoder(); + // + // The sink must match the given method. + static Decoder* Create(Environment* env, const DecoderMethod* method, + Sink* output); // Returns the DecoderMethod this decoder is parsing from. - // TODO(haberman): Do users need to be able to rebind this? const DecoderMethod* method() const; - // Resets the state of the decoder. - void Reset(); + // The sink on which this decoder receives input. + BytesSink* input(); // Returns number of bytes successfully parsed. // @@ -178,76 +149,25 @@ UPB_DEFINE_CLASS0(upb::pb::Decoder, // callback. uint64_t BytesParsed() const; - // Resets the output sink of the Decoder. - // The given sink must match method()->dest_handlers(). + // Gets/sets the parsing nexting limit. If the total number of nested + // submessages and repeated fields hits this limit, parsing will fail. This + // is a resource limit that controls the amount of memory used by the parsing + // stack. // - // This must be called at least once before the decoder can be used. It may - // only be called with the decoder is in a state where it was just created or - // reset with pipeline.Reset(). The given sink must be from the same pipeline - // as this decoder. - bool ResetOutput(Sink* sink); - - // The sink on which this decoder receives input. - BytesSink* input(); - - private: - UPB_DISALLOW_COPY_AND_ASSIGN(Decoder); -, -UPB_DEFINE_STRUCT0(upb_pbdecoder, UPB_QUOTE( - // Our input sink. - upb_bytessink input_; - - // The decoder method we are parsing with (owned). - const upb_pbdecodermethod *method_; - - size_t call_len; - const uint32_t *pc, *last; + // Setting the limit will fail if the parser is currently suspended at a depth + // greater than this, or if memory allocation of the stack fails. + size_t max_nesting() const; + bool set_max_nesting(size_t max); - // Current input buffer and its stream offset. - const char *buf, *ptr, *end, *checkpoint; - - // End of the delimited region, relative to ptr, or NULL if not in this buf. - const char *delim_end; - - // End of the delimited region, relative to ptr, or end if not in this buf. - const char *data_end; - - // Overall stream offset of "buf." - uint64_t bufstart_ofs; - - // Buffer for residual bytes not parsed from the previous buffer. - // The maximum number of residual bytes we require is 12; a five-byte - // unknown tag plus an eight-byte value, less one because the value - // is only a partial value. - char residual[12]; - char *residual_end; - - // Stores the user buffer passed to our decode function. - const char *buf_param; - size_t size_param; - const upb_bufhandle *handle; + void Reset(); -#ifdef UPB_USE_JIT_X64 - // Used momentarily by the generated code to store a value while a user - // function is called. - uint32_t tmp_len; + static const size_t kSize = UPB_PB_DECODER_SIZE; - const void *saved_rsp; -#endif + private: + UPB_DISALLOW_POD_OPS(Decoder, upb::pb::Decoder); +}; - upb_status *status; - - // Our internal stack. - upb_pbdecoder_frame *top, *limit; - upb_pbdecoder_frame stack[UPB_DECODER_MAX_NESTING]; -#ifdef UPB_USE_JIT_X64 - // Each native stack frame needs two pointers, plus we need a few frames for - // the enter/exit trampolines. - const uint32_t *callstack[(UPB_DECODER_MAX_NESTING * 2) + 10]; -#else - const uint32_t *callstack[UPB_DECODER_MAX_NESTING]; -#endif -))); +#endif // __cplusplus // A class for caching protobuf processing code, whether bytecode for the // interpreted decoder or machine code for the JIT. @@ -296,14 +216,15 @@ UPB_DEFINE_STRUCT0(upb_pbcodecache, UPB_BEGIN_EXTERN_C // { -void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *method, - upb_status *status); -void upb_pbdecoder_uninit(upb_pbdecoder *d); -void upb_pbdecoder_reset(upb_pbdecoder *d); +upb_pbdecoder *upb_pbdecoder_create(upb_env *e, + const upb_pbdecodermethod *method, + upb_sink *output); const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d); -bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink *sink); upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d); uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d); +size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d); +bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max); +void upb_pbdecoder_reset(upb_pbdecoder *d); void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts, const upb_handlers *h); @@ -338,27 +259,27 @@ namespace upb { namespace pb { -inline Decoder::Decoder(const DecoderMethod* m, Status* s) { - upb_pbdecoder_init(this, m, s); -} -inline Decoder::~Decoder() { - upb_pbdecoder_uninit(this); +// static +inline Decoder* Decoder::Create(Environment* env, const DecoderMethod* m, + Sink* sink) { + return upb_pbdecoder_create(env, m, sink); } inline const DecoderMethod* Decoder::method() const { return upb_pbdecoder_method(this); } -inline void Decoder::Reset() { - upb_pbdecoder_reset(this); +inline BytesSink* Decoder::input() { + return upb_pbdecoder_input(this); } inline uint64_t Decoder::BytesParsed() const { return upb_pbdecoder_bytesparsed(this); } -inline bool Decoder::ResetOutput(Sink* sink) { - return upb_pbdecoder_resetoutput(this, sink); +inline size_t Decoder::max_nesting() const { + return upb_pbdecoder_maxnesting(this); } -inline BytesSink* Decoder::input() { - return upb_pbdecoder_input(this); +inline bool Decoder::set_max_nesting(size_t max) { + return upb_pbdecoder_setmaxnesting(this, max); } +inline void Decoder::Reset() { upb_pbdecoder_reset(this); } inline DecoderMethodOptions::DecoderMethodOptions(const Handlers* h) { upb_pbdecodermethodopts_init(this, h); diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h index 302701e..5522be7 100644 --- a/upb/pb/decoder.int.h +++ b/upb/pb/decoder.int.h @@ -13,8 +13,9 @@ #include #include "upb/def.h" #include "upb/handlers.h" -#include "upb/sink.h" #include "upb/pb/decoder.h" +#include "upb/sink.h" +#include "upb/table.int.h" // Opcode definitions. The canonical meaning of each opcode is its // implementation in the interpreter (the JIT is written to match this). @@ -112,6 +113,95 @@ typedef struct { #endif } mgroup; +// The maximum that any submessages can be nested. Matches proto2's limit. +// This specifies the size of the decoder's statically-sized array and therefore +// setting it high will cause the upb::pb::Decoder object to be larger. +// +// If necessary we can add a runtime-settable property to Decoder that allow +// this to be larger than the compile-time setting, but this would add +// complexity, particularly since we would have to decide how/if to give users +// the ability to set a custom memory allocation function. +#define UPB_DECODER_MAX_NESTING 64 + +// Internal-only struct used by the decoder. +typedef struct { + // Space optimization note: we store two pointers here that the JIT + // doesn't need at all; the upb_handlers* inside the sink and + // the dispatch table pointer. We can optimze so that the JIT uses + // smaller stack frames than the interpreter. The only thing we need + // to guarantee is that the fallback routines can find end_ofs. + upb_sink sink; + + // The absolute stream offset of the end-of-frame delimiter. + // Non-delimited frames (groups and non-packed repeated fields) reuse the + // delimiter of their parent, even though the frame may not end there. + // + // NOTE: the JIT stores a slightly different value here for non-top frames. + // It stores the value relative to the end of the enclosed message. But the + // top frame is still stored the same way, which is important for ensuring + // that calls from the JIT into C work correctly. + uint64_t end_ofs; + const uint32_t *base; + + // 0 indicates a length-delimited field. + // A positive number indicates a known group. + // A negative number indicates an unknown group. + int32_t groupnum; + upb_inttable *dispatch; // Not used by the JIT. +} upb_pbdecoder_frame; + +struct upb_pbdecoder { + upb_env *env; + + // Our input sink. + upb_bytessink input_; + + // The decoder method we are parsing with (owned). + const upb_pbdecodermethod *method_; + + size_t call_len; + const uint32_t *pc, *last; + + // Current input buffer and its stream offset. + const char *buf, *ptr, *end, *checkpoint; + + // End of the delimited region, relative to ptr, or NULL if not in this buf. + const char *delim_end; + + // End of the delimited region, relative to ptr, or end if not in this buf. + const char *data_end; + + // Overall stream offset of "buf." + uint64_t bufstart_ofs; + + // Buffer for residual bytes not parsed from the previous buffer. + // The maximum number of residual bytes we require is 12; a five-byte + // unknown tag plus an eight-byte value, less one because the value + // is only a partial value. + char residual[12]; + char *residual_end; + + // Stores the user buffer passed to our decode function. + const char *buf_param; + size_t size_param; + const upb_bufhandle *handle; + + // Our internal stack. + upb_pbdecoder_frame *stack, *top, *limit; + const uint32_t **callstack; + size_t stack_size; + + upb_status *status; + +#ifdef UPB_USE_JIT_X64 + // Used momentarily by the generated code to store a value while a user + // function is called. + uint32_t tmp_len; + + const void *saved_rsp; +#endif +}; + // Decoder entry points; used as handlers. void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint); void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint); diff --git a/upb/pb/encoder.c b/upb/pb/encoder.c index d2c22e9..7a50996 100644 --- a/upb/pb/encoder.c +++ b/upb/pb/encoder.c @@ -62,6 +62,68 @@ #include +// The output buffer is divided into segments; a segment is a string of data +// that is "ready to go" -- it does not need any varint lengths inserted into +// the middle. The seams between segments are where varints will be inserted +// once they are known. +// +// We also use the concept of a "run", which is a range of encoded bytes that +// occur at a single submessage level. Every segment contains one or more runs. +// +// A segment can span messages. Consider: +// +// .--Submessage lengths---------. +// | | | +// | V V +// V | |--------------- | |----------------- +// Submessages: | |----------------------------------------------- +// Top-level msg: ------------------------------------------------------------ +// +// Segments: ----- ------------------- ----------------- +// Runs: *---- *--------------*--- *---------------- +// (* marks the start) +// +// Note that the top-level menssage is not in any segment because it does not +// have any length preceding it. +// +// A segment is only interrupted when another length needs to be inserted. So +// observe how the second segment spans both the inner submessage and part of +// the next enclosing message. +typedef struct { + uint32_t msglen; // The length to varint-encode before this segment. + uint32_t seglen; // Length of the segment. +} upb_pb_encoder_segment; + +struct upb_pb_encoder { + upb_env *env; + + // Our input and output. + upb_sink input_; + upb_bytessink *output_; + + // The "subclosure" -- used as the inner closure as part of the bytessink + // protocol. + void *subc; + + // The output buffer and limit, and our current write position. "buf" + // initially points to "initbuf", but is dynamically allocated if we need to + // grow beyond the initial size. + char *buf, *ptr, *limit; + + // The beginning of the current run, or undefined if we are at the top level. + char *runbegin; + + // The list of segments we are accumulating. + upb_pb_encoder_segment *segbuf, *segptr, *seglimit; + + // The stack of enclosing submessages. Each entry in the stack points to the + // segment where this submessage's length is being accumulated. + int *stack, *top, *stacklimit; + + // Depth of startmsg/endmsg calls. + int depth; +}; + /* low-level buffering ********************************************************/ // Low-level functions for interacting with the output buffer. @@ -80,24 +142,22 @@ static upb_pb_encoder_segment *top(upb_pb_encoder *e) { // e->ptr. Returns false if the bytes could not be allocated. static bool reserve(upb_pb_encoder *e, size_t bytes) { if ((e->limit - e->ptr) < bytes) { + // Grow buffer. size_t needed = bytes + (e->ptr - e->buf); size_t old_size = e->limit - e->buf; + size_t new_size = old_size; + while (new_size < needed) { new_size *= 2; } - char *realloc_from = (e->buf == e->initbuf) ? NULL : e->buf; - char *new_buf = realloc(realloc_from, new_size); + char *new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size); if (new_buf == NULL) { return false; } - if (realloc_from == NULL) { - memcpy(new_buf, e->initbuf, old_size); - } - e->ptr = new_buf + (e->ptr - e->buf); e->runbegin = new_buf + (e->runbegin - e->buf); e->limit = new_buf + new_size; @@ -166,21 +226,17 @@ static bool start_delim(upb_pb_encoder *e) { } if (++e->segptr == e->seglimit) { - upb_pb_encoder_segment *realloc_from = - (e->segbuf == e->seginitbuf) ? NULL : e->segbuf; + // Grow segment buffer. size_t old_size = (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment); size_t new_size = old_size * 2; - upb_pb_encoder_segment *new_buf = realloc(realloc_from, new_size); + upb_pb_encoder_segment *new_buf = + upb_env_realloc(e->env, e->segbuf, old_size, new_size); if (new_buf == NULL) { return false; } - if (realloc_from == NULL) { - memcpy(new_buf, e->seginitbuf, old_size); - } - e->segptr = new_buf + (e->segptr - e->segbuf); e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment)); e->segbuf = new_buf; @@ -378,8 +434,10 @@ static void newhandlers_callback(const void *closure, upb_handlers *h) { upb_handlers_setendmsg(h, endmsg, NULL); const upb_msgdef *m = upb_handlers_msgdef(h); - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, m); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) && upb_fielddef_packed(f); @@ -449,6 +507,12 @@ static void newhandlers_callback(const void *closure, upb_handlers *h) { } } +void upb_pb_encoder_reset(upb_pb_encoder *e) { + e->segptr = NULL; + e->top = NULL; + e->depth = 0; +} + /* public API *****************************************************************/ @@ -457,40 +521,42 @@ const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m, return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL); } -#define ARRAYSIZE(x) (sizeof(x) / sizeof(x[0])) - -void upb_pb_encoder_init(upb_pb_encoder *e, const upb_handlers *h) { - e->output_ = NULL; - e->subc = NULL; - e->buf = e->initbuf; - e->ptr = e->buf; - e->limit = e->buf + ARRAYSIZE(e->initbuf); - e->segbuf = e->seginitbuf; - e->seglimit = e->segbuf + ARRAYSIZE(e->seginitbuf); - e->stacklimit = e->stack + ARRAYSIZE(e->stack); - upb_sink_reset(&e->input_, h, e); -} - -void upb_pb_encoder_uninit(upb_pb_encoder *e) { - if (e->buf != e->initbuf) { - free(e->buf); +upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h, + upb_bytessink *output) { + const size_t initial_bufsize = 256; + const size_t initial_segbufsize = 16; + // TODO(haberman): make this configurable. + const size_t stack_size = 64; +#ifndef NDEBUG + const size_t size_before = upb_env_bytesallocated(env); +#endif + + upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder)); + if (!e) return NULL; + + e->buf = upb_env_malloc(env, initial_bufsize); + e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf)); + e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack)); + + if (!e->buf || !e->segbuf || !e->stack) { + return NULL; } - if (e->segbuf != e->seginitbuf) { - free(e->segbuf); - } -} + e->limit = e->buf + initial_bufsize; + e->seglimit = e->segbuf + initial_segbufsize; + e->stacklimit = e->stack + stack_size; -void upb_pb_encoder_resetoutput(upb_pb_encoder *e, upb_bytessink *output) { upb_pb_encoder_reset(e); + upb_sink_reset(&e->input_, h, e); + + e->env = env; e->output_ = output; e->subc = output->closure; -} + e->ptr = e->buf; -void upb_pb_encoder_reset(upb_pb_encoder *e) { - e->segptr = NULL; - e->top = NULL; - e->depth = 0; + // If this fails, increase the value in encoder.h. + assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE); + return e; } upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; } diff --git a/upb/pb/encoder.h b/upb/pb/encoder.h index 2df5797..edff95b 100644 --- a/upb/pb/encoder.h +++ b/upb/pb/encoder.h @@ -15,6 +15,7 @@ #ifndef UPB_ENCODER_H_ #define UPB_ENCODER_H_ +#include "upb/env.h" #include "upb/sink.h" #ifdef __cplusplus @@ -31,101 +32,42 @@ UPB_DECLARE_TYPE(upb::pb::Encoder, upb_pb_encoder); /* upb::pb::Encoder ***********************************************************/ -// The output buffer is divided into segments; a segment is a string of data -// that is "ready to go" -- it does not need any varint lengths inserted into -// the middle. The seams between segments are where varints will be inserted -// once they are known. -// -// We also use the concept of a "run", which is a range of encoded bytes that -// occur at a single submessage level. Every segment contains one or more runs. -// -// A segment can span messages. Consider: -// -// .--Submessage lengths---------. -// | | | -// | V V -// V | |--------------- | |----------------- -// Submessages: | |----------------------------------------------- -// Top-level msg: ------------------------------------------------------------ -// -// Segments: ----- ------------------- ----------------- -// Runs: *---- *--------------*--- *---------------- -// (* marks the start) -// -// Note that the top-level menssage is not in any segment because it does not -// have any length preceding it. -// -// A segment is only interrupted when another length needs to be inserted. So -// observe how the second segment spans both the inner submessage and part of -// the next enclosing message. -typedef struct { - UPB_PRIVATE_FOR_CPP - uint32_t msglen; // The length to varint-encode before this segment. - uint32_t seglen; // Length of the segment. -} upb_pb_encoder_segment; - -UPB_DEFINE_CLASS0(upb::pb::Encoder, - public: - Encoder(const upb::Handlers* handlers); - ~Encoder(); - - static reffed_ptr NewHandlers(const upb::MessageDef* msg); +// Preallocation hint: decoder won't allocate more bytes than this when first +// constructed. This hint may be an overestimate for some build configurations. +// But if the decoder library is upgraded without recompiling the application, +// it may be an underestimate. +#define UPB_PB_ENCODER_SIZE 768 - // Resets the state of the printer, so that it will expect to begin a new - // document. - void Reset(); +#ifdef __cplusplus - // Resets the output pointer which will serve as our closure. - void ResetOutput(BytesSink* output); +class upb::pb::Encoder { + public: + // Creates a new encoder in the given environment. The Handlers must have + // come from NewHandlers() below. + static Encoder* Create(Environment* env, const Handlers* handlers, + BytesSink* output); // The input to the encoder. Sink* input(); - private: - UPB_DISALLOW_COPY_AND_ASSIGN(Encoder); -, -UPB_DEFINE_STRUCT0(upb_pb_encoder, UPB_QUOTE( - // Our input and output. - upb_sink input_; - upb_bytessink *output_; - - // The "subclosure" -- used as the inner closure as part of the bytessink - // protocol. - void *subc; - - // The output buffer and limit, and our current write position. "buf" - // initially points to "initbuf", but is dynamically allocated if we need to - // grow beyond the initial size. - char *buf, *ptr, *limit; + // Creates a new set of handlers for this MessageDef. + static reffed_ptr NewHandlers(const MessageDef* msg); - // The beginning of the current run, or undefined if we are at the top level. - char *runbegin; + static const size_t kSize = UPB_PB_ENCODER_SIZE; - // The list of segments we are accumulating. - upb_pb_encoder_segment *segbuf, *segptr, *seglimit; - - // The stack of enclosing submessages. Each entry in the stack points to the - // segment where this submessage's length is being accumulated. - int stack[UPB_PBENCODER_MAX_NESTING], *top, *stacklimit; - - // Depth of startmsg/endmsg calls. - int depth; + private: + UPB_DISALLOW_POD_OPS(Encoder, upb::pb::Encoder); +}; - // Initial buffers for the output buffer and segment buffer. If we outgrow - // these we will dynamically allocate bigger ones. - char initbuf[256]; - upb_pb_encoder_segment seginitbuf[32]; -))); +#endif UPB_BEGIN_EXTERN_C const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m, const void *owner); -void upb_pb_encoder_reset(upb_pb_encoder *e); upb_sink *upb_pb_encoder_input(upb_pb_encoder *p); -void upb_pb_encoder_init(upb_pb_encoder *e, const upb_handlers *h); -void upb_pb_encoder_resetoutput(upb_pb_encoder *e, upb_bytessink *output); -void upb_pb_encoder_uninit(upb_pb_encoder *e); +upb_pb_encoder* upb_pb_encoder_create(upb_env* e, const upb_handlers* h, + upb_bytessink* output); UPB_END_EXTERN_C @@ -133,17 +75,9 @@ UPB_END_EXTERN_C namespace upb { namespace pb { -inline Encoder::Encoder(const upb::Handlers* handlers) { - upb_pb_encoder_init(this, handlers); -} -inline Encoder::~Encoder() { - upb_pb_encoder_uninit(this); -} -inline void Encoder::Reset() { - upb_pb_encoder_reset(this); -} -inline void Encoder::ResetOutput(BytesSink* output) { - upb_pb_encoder_resetoutput(this, output); +inline Encoder* Encoder::Create(Environment* env, const Handlers* handlers, + BytesSink* output) { + return upb_pb_encoder_create(env, handlers, output); } inline Sink* Encoder::input() { return upb_pb_encoder_input(this); diff --git a/upb/pb/glue.c b/upb/pb/glue.c index fde2dd1..1259dac 100644 --- a/upb/pb/glue.c +++ b/upb/pb/glue.c @@ -22,26 +22,26 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, const upb_pbdecodermethod *decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m); - upb_pbdecoder decoder; - upb_descreader reader; + upb_env env; + upb_env_init(&env); + upb_env_reporterrorsto(&env, status); - upb_pbdecoder_init(&decoder, decoder_m, status); - upb_descreader_init(&reader, reader_h, status); - upb_pbdecoder_resetoutput(&decoder, upb_descreader_input(&reader)); + upb_descreader *reader = upb_descreader_create(&env, reader_h); + upb_pbdecoder *decoder = + upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader)); // Push input data. - bool ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(&decoder)); + bool ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder)); upb_def **ret = NULL; if (!ok) goto cleanup; - upb_def **defs = upb_descreader_getdefs(&reader, owner, n); + upb_def **defs = upb_descreader_getdefs(reader, owner, n); ret = malloc(sizeof(upb_def*) * (*n)); memcpy(ret, defs, sizeof(upb_def*) * (*n)); cleanup: - upb_pbdecoder_uninit(&decoder); - upb_descreader_uninit(&reader); + upb_env_uninit(&env); upb_handlers_unref(reader_h, &reader_h); upb_pbdecodermethod_unref(decoder_m, &decoder_m); return ret; diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c index 8ceed68..07f951d 100644 --- a/upb/pb/textprinter.c +++ b/upb/pb/textprinter.c @@ -19,6 +19,14 @@ #include "upb/sink.h" +struct upb_textprinter { + upb_sink input_; + upb_bytessink *output_; + int indent_depth_; + bool single_line_; + void *subc; +}; + #define CHECK(x) if ((x) < 0) goto err; static const char *shortname(const char *longname) { @@ -236,24 +244,6 @@ err: return false; } - -/* Public API *****************************************************************/ - -void upb_textprinter_init(upb_textprinter *p, const upb_handlers *h) { - p->single_line_ = false; - p->indent_depth_ = 0; - upb_sink_reset(&p->input_, h, p); -} - -void upb_textprinter_uninit(upb_textprinter *p) { - UPB_UNUSED(p); -} - -void upb_textprinter_reset(upb_textprinter *p, bool single_line) { - p->single_line_ = single_line; - p->indent_depth_ = 0; -} - static void onmreg(const void *c, upb_handlers *h) { UPB_UNUSED(c); const upb_msgdef *m = upb_handlers_msgdef(h); @@ -261,8 +251,10 @@ static void onmreg(const void *c, upb_handlers *h) { upb_handlers_setstartmsg(h, textprinter_startmsg, NULL); upb_handlers_setendmsg(h, textprinter_endmsg, NULL); - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, m); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; upb_handlerattr_sethandlerdata(&attr, f); @@ -311,6 +303,26 @@ static void onmreg(const void *c, upb_handlers *h) { } } +static void textprinter_reset(upb_textprinter *p, bool single_line) { + p->single_line_ = single_line; + p->indent_depth_ = 0; +} + + +/* Public API *****************************************************************/ + +upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h, + upb_bytessink *output) { + upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter)); + if (!p) return NULL; + + p->output_ = output; + upb_sink_reset(&p->input_, h, p); + textprinter_reset(p, false); + + return p; +} + const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m, const void *owner) { return upb_handlers_newfrozen(m, owner, &onmreg, NULL); @@ -318,11 +330,6 @@ const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m, upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; } -bool upb_textprinter_resetoutput(upb_textprinter *p, upb_bytessink *output) { - p->output_ = output; - return true; -} - void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) { p->single_line_ = single_line; } diff --git a/upb/pb/textprinter.h b/upb/pb/textprinter.h index 97e01f7..3ba3403 100644 --- a/upb/pb/textprinter.h +++ b/upb/pb/textprinter.h @@ -8,6 +8,7 @@ #ifndef UPB_TEXT_H_ #define UPB_TEXT_H_ +#include "upb/env.h" #include "upb/sink.h" #ifdef __cplusplus @@ -20,58 +21,51 @@ class TextPrinter; UPB_DECLARE_TYPE(upb::pb::TextPrinter, upb_textprinter); -UPB_DEFINE_CLASS0(upb::pb::TextPrinter, +#ifdef __cplusplus + +class upb::pb::TextPrinter { public: // The given handlers must have come from NewHandlers(). It must outlive the // TextPrinter. - explicit TextPrinter(const upb::Handlers* handlers); + static TextPrinter *Create(Environment *env, const upb::Handlers *handlers, + BytesSink *output); void SetSingleLineMode(bool single_line); - bool ResetOutput(BytesSink* output); Sink* input(); // If handler caching becomes a requirement we can add a code cache as in // decoder.h static reffed_ptr NewHandlers(const MessageDef* md); +}; - private: -, -UPB_DEFINE_STRUCT0(upb_textprinter, - upb_sink input_; - upb_bytessink *output_; - int indent_depth_; - bool single_line_; - void *subc; -)); +#endif -UPB_BEGIN_EXTERN_C // { +UPB_BEGIN_EXTERN_C // C API. -void upb_textprinter_init(upb_textprinter *p, const upb_handlers *h); -void upb_textprinter_uninit(upb_textprinter *p); -bool upb_textprinter_resetoutput(upb_textprinter *p, upb_bytessink *output); +upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h, + upb_bytessink *output); void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line); upb_sink *upb_textprinter_input(upb_textprinter *p); const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m, const void *owner); -UPB_END_EXTERN_C // } +UPB_END_EXTERN_C #ifdef __cplusplus namespace upb { namespace pb { -inline TextPrinter::TextPrinter(const upb::Handlers* handlers) { - upb_textprinter_init(this, handlers); +inline TextPrinter *TextPrinter::Create(Environment *env, + const upb::Handlers *handlers, + BytesSink *output) { + return upb_textprinter_create(env, handlers, output); } inline void TextPrinter::SetSingleLineMode(bool single_line) { upb_textprinter_setsingleline(this, single_line); } -inline bool TextPrinter::ResetOutput(BytesSink* output) { - return upb_textprinter_resetoutput(this, output); -} inline Sink* TextPrinter::input() { return upb_textprinter_input(this); } diff --git a/upb/sink.h b/upb/sink.h index 479aaff..1ada31a 100644 --- a/upb/sink.h +++ b/upb/sink.h @@ -34,27 +34,6 @@ UPB_DECLARE_TYPE(upb::BufferSource, upb_bufsrc); UPB_DECLARE_TYPE(upb::BytesSink, upb_bytessink); UPB_DECLARE_TYPE(upb::Sink, upb_sink); -// Internal-only struct for the sink. -struct upb_sinkframe { - UPB_PRIVATE_FOR_CPP - const upb_handlers *h; - void *closure; - - // For any frames besides the top, this is the END* callback that will run - // when the subframe is popped (for example, for a "sequence" frame the frame - // above it will be a UPB_HANDLER_ENDSEQ handler). But this is only - // necessary for assertion checking inside upb_sink and can be omitted if the - // sink has only one caller. - // - // TODO(haberman): have a mechanism for ensuring that a sink only has one - // caller. - upb_selector_t selector; -}; - -// The maximum nesting depth that upb::Sink will allow. Matches proto2's limit. -// TODO: make this a runtime-settable property of Sink. -#define UPB_SINK_MAX_NESTING 64 - // A upb::Sink is an object that binds a upb::Handlers object to some runtime // state. It represents an endpoint to which data can be sent. // diff --git a/upb/symtab.c b/upb/symtab.c index f172aaa..817a581 100644 --- a/upb/symtab.c +++ b/upb/symtab.c @@ -139,8 +139,10 @@ static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab, // For messages, continue the recursion by visiting all subdefs. const upb_msgdef *m = upb_dyncast_msgdef(def); if (m) { - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, m); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); if (!upb_fielddef_hassubdef(f)) continue; // |= to avoid short-circuit; we need its side-effects. @@ -293,8 +295,10 @@ bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, // Type names are resolved relative to the message in which they appear. const char *base = upb_msgdef_fullname(m); - upb_msg_iter j; - for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_msg_field_iter j; + for(upb_msg_field_begin(&j, m); + !upb_msg_field_done(&j); + upb_msg_field_next(&j)) { upb_fielddef *f = upb_msg_iter_field(&j); const char *name = upb_fielddef_subdefname(f); if (name && !upb_fielddef_subdef(f)) { diff --git a/upb/table.c b/upb/table.c index aa99fe2..fc69125 100644 --- a/upb/table.c +++ b/upb/table.c @@ -40,12 +40,16 @@ char *upb_strdup(const char *s) { } char *upb_strdup2(const char *s, size_t len) { + // Prevent overflow errors. + if (len == SIZE_MAX) return NULL; // Always null-terminate, even if binary data; but don't rely on the input to // have a null-terminating byte since it may be a raw binary buffer. size_t n = len + 1; char *p = malloc(n); - if (p) memcpy(p, s, len); - p[len] = 0; + if (p) { + memcpy(p, s, len); + p[len] = 0; + } return p; } diff --git a/upb/table.int.h b/upb/table.int.h index e851e90..e27fb01 100644 --- a/upb/table.int.h +++ b/upb/table.int.h @@ -207,6 +207,9 @@ typedef struct { #define UPB_STRTABLE_INIT(count, mask, ctype, size_lg2, entries) \ {{count, mask, ctype, size_lg2, entries}} +#define UPB_EMPTY_STRTABLE_INIT(ctype) \ + UPB_STRTABLE_INIT(0, 0, ctype, 0, NULL) + typedef struct { upb_table t; // For entries that don't fit in the array part. const _upb_value *array; // Array part of the table. See const note above. diff --git a/upb/upb.h b/upb/upb.h index 3744ea6..13efaed 100644 --- a/upb/upb.h +++ b/upb/upb.h @@ -25,6 +25,15 @@ #define UPB_INLINE static inline #endif +// For use in C/C++ source files (not headers), forces inlining within the file. +#ifdef __GNUC__ +#define UPB_FORCEINLINE inline __attribute__((always_inline)) +#define UPB_NOINLINE __attribute__((noinline)) +#else +#define UPB_FORCEINLINE +#define UPB_NOINLINE +#endif + #if __STDC_VERSION__ >= 199901L #define UPB_C99 #endif -- cgit v1.2.3 From 838009ba2b8ea1e99061c66e0fbd9cb53a96ec20 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 8 May 2015 17:20:55 -0700 Subject: Fixes for the open-source build. --- Makefile | 1 + upb/bindings/lua/upb.c | 2 +- upb/bindings/lua/upb/pb.c | 15 +++------------ upb/env.c | 8 +++++--- upb/env.h | 6 +++--- upb/pb/decoder.c | 4 +++- 6 files changed, 16 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index 5887fbe..e75fe54 100644 --- a/Makefile +++ b/Makefile @@ -131,6 +131,7 @@ make_objs_cc = $$(patsubst upb/$$(pc).cc,obj/upb/$$(pc).$(1),$$($$(call to_srcs, upb_SRCS = \ upb/def.c \ + upb/env.c \ upb/handlers.c \ upb/refcounted.c \ upb/shim/shim.c \ diff --git a/upb/bindings/lua/upb.c b/upb/bindings/lua/upb.c index 5ad0235..b35af24 100644 --- a/upb/bindings/lua/upb.c +++ b/upb/bindings/lua/upb.c @@ -1358,7 +1358,7 @@ static size_t align_up(size_t val, size_t align) { // If we always read/write as a consistent type to each value, this shouldn't // violate aliasing. -#define DEREF(msg, ofs, type) *(type*)(&msg->data[ofs]) +#define DEREF(msg, ofs, type) *(type*)((char*)msg + sizeof(lupb_msg) + ofs) lupb_msg *lupb_msg_check(lua_State *L, int narg) { lupb_msg *msg = luaL_checkudata(L, narg, LUPB_MSG); diff --git a/upb/bindings/lua/upb/pb.c b/upb/bindings/lua/upb/pb.c index ea3d755..920648f 100644 --- a/upb/bindings/lua/upb/pb.c +++ b/upb/bindings/lua/upb/pb.c @@ -41,13 +41,6 @@ static int lupb_pbdecodermethod_new(lua_State *L) { return 1; // The DecoderMethod wrapper. } -// We implement upb's allocation function by allocating a Lua userdata. -// This is a raw hunk of memory that will be GC'd by Lua. -static void *lua_alloc(void *ud, size_t size) { - lua_State *L = ud; - return lua_newuserdata(L, size); -} - // Unlike most of our exposed Lua functions, this does not correspond to an // actual method on the underlying DecoderMethod. But it's convenient, and // important to implement in C because we can do stack allocation and @@ -77,11 +70,9 @@ static int lupb_pbdecodermethod_parse(lua_State *L) { upb_pbdecoder *decoder = upb_pbdecoder_create(&env, method, &sink); upb_bufsrc_putbuf(pb, len, upb_pbdecoder_input(decoder)); - // This won't get called in the error case, which longjmp's across us. But - // since we made our alloc function allocate only GC-able memory, that - // shouldn't matter. It *would* matter if the environment had references to - // any non-memory resources (ie. filehandles). As an alternative to this we - // could make the environment itself a userdata. + // TODO: This won't get called in the error case, which longjmp's across us. + // This will cause the memory to leak. To remedy this, we should make the + // upb_env wrapped in a userdata that guarantees this will get called. upb_env_uninit(&env); lupb_checkstatus(L, &status); diff --git a/upb/env.c b/upb/env.c index ae5fb85..7fa3334 100644 --- a/upb/env.c +++ b/upb/env.c @@ -43,7 +43,7 @@ static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) { UPB_UNUSED(oldsize); default_alloc_ud *ud = _ud; - mem_block *from = ptr ? ptr - sizeof(mem_block) : NULL; + mem_block *from = ptr ? (void*)((char*)ptr - sizeof(mem_block)) : NULL; #ifndef NDEBUG if (from) { @@ -214,7 +214,9 @@ UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, upb_seededalloc *a = ud; size = align_up(size); - if (oldsize == 0 && size <= a->mem_limit - a->mem_ptr) { + assert(a->mem_limit >= a->mem_ptr); + + if (oldsize == 0 && size <= (size_t)(a->mem_limit - a->mem_ptr)) { // Fast path: we can satisfy from the initial allocation. void *ret = a->mem_ptr; a->mem_ptr += size; @@ -229,7 +231,7 @@ UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len) { a->mem_base = mem; a->mem_ptr = mem; - a->mem_limit = mem + len; + a->mem_limit = (char*)mem + len; a->need_cleanup = false; a->returned_allocfunc = false; diff --git a/upb/env.h b/upb/env.h index 500b0ac..78dda20 100644 --- a/upb/env.h +++ b/upb/env.h @@ -177,9 +177,9 @@ UPB_DEFINE_STRUCT0(upb_seededalloc, void *default_alloc_ud; // Pointers for the initial memory region. - void *mem_base; - void *mem_ptr; - void *mem_limit; + char *mem_base; + char *mem_ptr; + char *mem_limit; // For future expansion, since the size of this struct is exposed to users. void *future1; diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index f23645d..a780666 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -966,7 +966,9 @@ size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) { } bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) { - if (max < d->top - d->stack) { + assert(d->top >= d->stack); + + if (max < (size_t)(d->top - d->stack)) { // Can't set a limit smaller than what we are currently at. return false; } -- cgit v1.2.3