From b55f32b278deddaf2c75d7d24b5722017850d6d1 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Thu, 8 Jan 2015 01:31:28 -0800 Subject: Fix for JSON parser: don't overrun buffer parsing ints. --- upb/json/parser.rl | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'upb/json/parser.rl') diff --git a/upb/json/parser.rl b/upb/json/parser.rl index b72bc10..719af4c 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -260,16 +260,14 @@ static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len, return true; } - if (p->accumulate_buf_size - p->accumulated_len < len) { - size_t need; - if (!checked_add(p->accumulated_len, len, &need)) { - upb_status_seterrmsg(p->status, "Integer overflow."); - return false; - } + size_t need; + if (!checked_add(p->accumulated_len, len, &need)) { + upb_status_seterrmsg(p->status, "Integer overflow."); + return false; + } - if (!accumulate_realloc(p, need)) { - return false; - } + if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) { + return false; } if (p->accumulated != p->accumulate_buf) { @@ -513,9 +511,15 @@ static bool end_number(upb_json_parser *p, const char *ptr) { return false; } + // strtol() and friends unfortunately do not support specifying the length of + // the input string, so we need to force a copy into a NULL-terminated buffer. + if (!multipart_text(p, "\0", 1, false)) { + return false; + } + size_t len; const char *buf = accumulate_getptr(p, &len); - const char *myend = buf + len; + const char *myend = buf + len - 1; // One for NULL. char *end; switch (upb_fielddef_type(p->top->f)) { @@ -576,7 +580,7 @@ static bool end_number(upb_json_parser *p, const char *ptr) { return true; err: - upb_status_seterrf(p->status, "error parsing number: %.*s", buf, len); + upb_status_seterrf(p->status, "error parsing number: %s", buf); multipart_end(p); return false; } -- cgit v1.2.3 From fb585045692c482b6946fff63f0cd8425c8c70b5 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 2 Feb 2015 13:38:05 -0800 Subject: Support maps in JSON parsing and serialization. This is a sync of our internal developing of JSON parsing and serialization. It implements native understanding of MapEntry submessages, so that map fields with (key, value) pairs are serialized as JSON maps (objects) natively rather than as arrays of objects with 'key' and 'value' fields. The parser also now understands how to emit handler calls corresponding to MapEntry objects when processing a map field. This sync also picks up a bugfix in `table.c` to handle an alloc-failed case. --- tests/json/test_json.cc | 98 ++++++++++++++++- tests/test_def.c | 34 ++++++ upb/def.c | 20 ++++ upb/def.h | 7 ++ upb/json/parser.h | 20 +++- upb/json/parser.rl | 245 ++++++++++++++++++++++++++++++++++++------ upb/json/printer.c | 275 ++++++++++++++++++++++++++++++++++++++++++------ upb/table.c | 8 +- 8 files changed, 636 insertions(+), 71 deletions(-) (limited to 'upb/json/parser.rl') diff --git a/tests/json/test_json.cc b/tests/json/test_json.cc index f1e2304..4465c76 100644 --- a/tests/json/test_json.cc +++ b/tests/json/test_json.cc @@ -85,6 +85,33 @@ static TestCase kTestRoundtripMessages[] = { TEST("{\"optional_string\":\"\\uFFFF\"}"), EXPECT("{\"optional_string\":\"\xEF\xBF\xBF\"}") }, + // map-field tests + { + TEST("{\"map_string_string\":{\"a\":\"value1\",\"b\":\"value2\"," + "\"c\":\"value3\"}}"), + EXPECT_SAME + }, + { + TEST("{\"map_int32_string\":{\"1\":\"value1\",\"-1\":\"value2\"," + "\"1234\":\"value3\"}}"), + EXPECT_SAME + }, + { + TEST("{\"map_bool_string\":{\"false\":\"value1\",\"true\":\"value2\"}}"), + EXPECT_SAME + }, + { + TEST("{\"map_string_int32\":{\"asdf\":1234,\"jkl;\":-1}}"), + EXPECT_SAME + }, + { + TEST("{\"map_string_bool\":{\"asdf\":true,\"jkl;\":false}}"), + EXPECT_SAME + }, + { + TEST("{\"map_string_msg\":{\"asdf\":{\"foo\":42},\"jkl;\":{\"foo\":84}}}"), + EXPECT_SAME + }, TEST_SENTINEL }; @@ -115,6 +142,53 @@ static const upb::MessageDef* BuildTestMessage( submsg->set_full_name("SubMessage", &st); AddField(submsg.get(), 1, "foo", UPB_TYPE_INT32, false); + // Create MapEntryStringString. + upb::reffed_ptr mapentry_string_string( + upb::MessageDef::New()); + mapentry_string_string->set_full_name("MapEntry_String_String", &st); + mapentry_string_string->setmapentry(true); + AddField(mapentry_string_string.get(), 1, "key", UPB_TYPE_STRING, false); + AddField(mapentry_string_string.get(), 2, "value", UPB_TYPE_STRING, false); + + // Create MapEntryInt32String. + upb::reffed_ptr mapentry_int32_string( + upb::MessageDef::New()); + mapentry_int32_string->set_full_name("MapEntry_Int32_String", &st); + mapentry_int32_string->setmapentry(true); + AddField(mapentry_int32_string.get(), 1, "key", UPB_TYPE_INT32, false); + AddField(mapentry_int32_string.get(), 2, "value", UPB_TYPE_STRING, false); + + // Create MapEntryBoolString. + upb::reffed_ptr mapentry_bool_string( + upb::MessageDef::New()); + mapentry_bool_string->set_full_name("MapEntry_Bool_String", &st); + mapentry_bool_string->setmapentry(true); + AddField(mapentry_bool_string.get(), 1, "key", UPB_TYPE_BOOL, false); + AddField(mapentry_bool_string.get(), 2, "value", UPB_TYPE_STRING, false); + + // Create MapEntryStringInt32. + upb::reffed_ptr mapentry_string_int32( + upb::MessageDef::New()); + mapentry_string_int32->set_full_name("MapEntry_String_Int32", &st); + mapentry_string_int32->setmapentry(true); + AddField(mapentry_string_int32.get(), 1, "key", UPB_TYPE_STRING, false); + AddField(mapentry_string_int32.get(), 2, "value", UPB_TYPE_INT32, false); + + // Create MapEntryStringBool. + upb::reffed_ptr mapentry_string_bool(upb::MessageDef::New()); + mapentry_string_bool->set_full_name("MapEntry_String_Bool", &st); + mapentry_string_bool->setmapentry(true); + AddField(mapentry_string_bool.get(), 1, "key", UPB_TYPE_STRING, false); + AddField(mapentry_string_bool.get(), 2, "value", UPB_TYPE_BOOL, false); + + // Create MapEntryStringMessage. + upb::reffed_ptr mapentry_string_msg(upb::MessageDef::New()); + mapentry_string_msg->set_full_name("MapEntry_String_Message", &st); + mapentry_string_msg->setmapentry(true); + AddField(mapentry_string_msg.get(), 1, "key", UPB_TYPE_STRING, false); + AddField(mapentry_string_msg.get(), 2, "value", UPB_TYPE_MESSAGE, false, + upb::upcast(submsg.get())); + // Create MyEnum. upb::reffed_ptr myenum(upb::EnumDef::New()); myenum->set_full_name("MyEnum", &st); @@ -150,13 +224,33 @@ static const upb::MessageDef* BuildTestMessage( AddField(md.get(), 19, "optional_enum", UPB_TYPE_ENUM, true, upb::upcast(myenum.get())); + AddField(md.get(), 20, "map_string_string", UPB_TYPE_MESSAGE, true, + upb::upcast(mapentry_string_string.get())); + AddField(md.get(), 21, "map_int32_string", UPB_TYPE_MESSAGE, true, + upb::upcast(mapentry_int32_string.get())); + AddField(md.get(), 22, "map_bool_string", UPB_TYPE_MESSAGE, true, + upb::upcast(mapentry_bool_string.get())); + AddField(md.get(), 23, "map_string_int32", UPB_TYPE_MESSAGE, true, + upb::upcast(mapentry_string_int32.get())); + AddField(md.get(), 24, "map_string_bool", UPB_TYPE_MESSAGE, true, + upb::upcast(mapentry_string_bool.get())); + AddField(md.get(), 25, "map_string_msg", UPB_TYPE_MESSAGE, true, + upb::upcast(mapentry_string_msg.get())); + // Add both to our symtab. - upb::Def* defs[3] = { + upb::Def* defs[9] = { upb::upcast(submsg.ReleaseTo(&defs)), upb::upcast(myenum.ReleaseTo(&defs)), upb::upcast(md.ReleaseTo(&defs)), + upb::upcast(mapentry_string_string.ReleaseTo(&defs)), + upb::upcast(mapentry_int32_string.ReleaseTo(&defs)), + upb::upcast(mapentry_bool_string.ReleaseTo(&defs)), + upb::upcast(mapentry_string_int32.ReleaseTo(&defs)), + upb::upcast(mapentry_string_bool.ReleaseTo(&defs)), + upb::upcast(mapentry_string_msg.ReleaseTo(&defs)), }; - symtab->Add(defs, 3, &defs, &st); + symtab->Add(defs, 9, &defs, &st); + ASSERT(st.ok()); // Return TestMessage. return symtab->LookupMessage("TestMessage"); diff --git a/tests/test_def.c b/tests/test_def.c index b85b031..800d685 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -344,6 +344,39 @@ static void test_descriptor_flags() { upb_msgdef_unref(m2, &m2); } +static void test_mapentry_check() { + upb_status s = UPB_STATUS_INIT; + + upb_msgdef *m = upb_msgdef_new(&m); + upb_msgdef_setfullname(m, "TestMessage", &s); + upb_fielddef *f = upb_fielddef_new(&f); + upb_fielddef_setname(f, "field1", &s); + upb_fielddef_setnumber(f, 1, &s); + upb_fielddef_setlabel(f, UPB_LABEL_OPTIONAL); + upb_fielddef_settype(f, UPB_TYPE_MESSAGE); + upb_fielddef_setsubdefname(f, ".MapEntry", &s); + upb_msgdef_addfield(m, f, &f, &s); + ASSERT(upb_ok(&s)); + + upb_msgdef *subm = upb_msgdef_new(&subm); + upb_msgdef_setfullname(subm, "MapEntry", &s); + upb_msgdef_setmapentry(subm, true); + + upb_symtab *symtab = upb_symtab_new(&symtab); + upb_def *defs[] = {UPB_UPCAST(m), UPB_UPCAST(subm)}; + upb_symtab_add(symtab, defs, 2, NULL, &s); + // Should not have succeeded: non-repeated field pointing to a MapEntry. + ASSERT(!upb_ok(&s)); + + upb_fielddef_setlabel(f, UPB_LABEL_REPEATED); + upb_symtab_add(symtab, defs, 2, NULL, &s); + ASSERT(upb_ok(&s)); + + upb_symtab_unref(symtab, &symtab); + upb_msgdef_unref(subm, &subm); + upb_msgdef_unref(m, &m); +} + static void test_oneofs() { upb_status s = UPB_STATUS_INIT; bool ok = true; @@ -412,6 +445,7 @@ int run_tests(int argc, char *argv[]) { test_partial_freeze(); test_noreftracking(); test_descriptor_flags(); + test_mapentry_check(); test_oneofs(); return 0; } diff --git a/upb/def.c b/upb/def.c index 3dba221..0963675 100644 --- a/upb/def.c +++ b/upb/def.c @@ -211,6 +211,21 @@ static bool upb_validate_field(upb_fielddef *f, upb_status *s) { upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f)); } + // Ensure that MapEntry submessages only appear as repeated fields, not + // optional/required (singular) fields. + if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE && + upb_fielddef_msgsubdef(f) != NULL) { + const upb_msgdef *subdef = upb_fielddef_msgsubdef(f); + if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) { + upb_status_seterrf(s, + "Field %s refers to mapentry message but is not " + "a repeated field", + upb_fielddef_name(f) ? upb_fielddef_name(f) : + "(unnamed)"); + return false; + } + } + return true; } @@ -1243,6 +1258,11 @@ bool upb_fielddef_isprimitive(const upb_fielddef *f) { return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f); } +bool upb_fielddef_ismap(const upb_fielddef *f) { + return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) && + upb_msgdef_mapentry(upb_fielddef_msgsubdef(f)); +} + bool upb_fielddef_hassubdef(const upb_fielddef *f) { return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM; } diff --git a/upb/def.h b/upb/def.h index fc342f3..3281076 100644 --- a/upb/def.h +++ b/upb/def.h @@ -368,6 +368,7 @@ UPB_DEFINE_DEF(upb::FieldDef, fielddef, FIELD, bool IsString() const; bool IsSequence() const; bool IsPrimitive() const; + bool IsMap() const; // How integers are encoded. Only meaningful for integer types. // Defaults to UPB_INTFMT_VARIABLE, and is reset when "type" changes. @@ -592,6 +593,7 @@ bool upb_fielddef_issubmsg(const upb_fielddef *f); bool upb_fielddef_isstring(const upb_fielddef *f); bool upb_fielddef_isseq(const upb_fielddef *f); bool upb_fielddef_isprimitive(const upb_fielddef *f); +bool upb_fielddef_ismap(const upb_fielddef *f); int64_t upb_fielddef_defaultint64(const upb_fielddef *f); int32_t upb_fielddef_defaultint32(const upb_fielddef *f); uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f); @@ -980,6 +982,10 @@ UPB_INLINE upb_oneofdef *upb_msgdef_ntoo_mutable(upb_msgdef *m, void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry); bool upb_msgdef_mapentry(const upb_msgdef *m); +// Well-known field tag numbers for map-entry messages. +#define UPB_MAPENTRY_KEY 1 +#define UPB_MAPENTRY_VALUE 2 + const upb_oneofdef *upb_msgdef_findoneof(const upb_msgdef *m, const char *name); int upb_msgdef_numoneofs(const upb_msgdef *m); @@ -1479,6 +1485,7 @@ inline bool FieldDef::IsSubMessage() const { } inline bool FieldDef::IsString() const { return upb_fielddef_isstring(this); } inline bool FieldDef::IsSequence() const { return upb_fielddef_isseq(this); } +inline bool FieldDef::IsMap() const { return upb_fielddef_ismap(this); } inline int64_t FieldDef::default_int64() const { return upb_fielddef_defaultint64(this); } diff --git a/upb/json/parser.h b/upb/json/parser.h index 51578f2..c693edf 100644 --- a/upb/json/parser.h +++ b/upb/json/parser.h @@ -23,12 +23,30 @@ class Parser; UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser); -// Internal-only struct used by the parser. +// Internal-only struct used by the parser. A parser frame corresponds +// one-to-one with a handler (sink) frame. typedef struct { UPB_PRIVATE_FOR_CPP upb_sink sink; + // The current message in which we're parsing, and the field whose value we're + // expecting next. const upb_msgdef *m; const upb_fielddef *f; + + // We are in a repeated-field context, ready to emit mapentries as + // submessages. This flag alters the start-of-object (open-brace) behavior to + // begin a sequence of mapentry messages rather than a single submessage. + bool is_map; + // We are in a map-entry message context. This flag is set when parsing the + // value field of a single map entry and indicates to all value-field parsers + // (subobjects, strings, numbers, and bools) that the map-entry submessage + // should end as soon as the value is parsed. + bool is_mapentry; + // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent + // message's map field that we're currently parsing. This differs from |f| + // because |f| is the field in the *current* message (i.e., the map-entry + // message itself), not the parent's field that leads to this map. + const upb_fielddef *mapfield; } upb_jsonparser_frame; diff --git a/upb/json/parser.rl b/upb/json/parser.rl index 719af4c..c7b4362 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -221,7 +221,6 @@ badpadding: // the true value in a contiguous buffer. static void assert_accumulate_empty(upb_json_parser *p) { - UPB_UNUSED(p); assert(p->accumulated == NULL); assert(p->accumulated_len == 0); } @@ -506,6 +505,8 @@ static void start_number(upb_json_parser *p, const char *ptr) { capture_begin(p, ptr); } +static bool parse_number(upb_json_parser *p, const char *buf, const char *end); + static bool end_number(upb_json_parser *p, const char *ptr) { if (!capture_end(p, ptr)) { return false; @@ -520,8 +521,12 @@ static bool end_number(upb_json_parser *p, const char *ptr) { size_t len; const char *buf = accumulate_getptr(p, &len); const char *myend = buf + len - 1; // One for NULL. - char *end; + return parse_number(p, buf, myend); +} +static bool parse_number(upb_json_parser *p, const char *buf, + const char *myend) { + char *end; switch (upb_fielddef_type(p->top->f)) { case UPB_TYPE_ENUM: case UPB_TYPE_INT32: { @@ -577,6 +582,7 @@ static bool end_number(upb_json_parser *p, const char *ptr) { } multipart_end(p); + return true; err: @@ -595,6 +601,7 @@ static bool parser_putbool(upb_json_parser *p, bool val) { bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val); UPB_ASSERT_VAR(ok, ok); + return true; } @@ -611,6 +618,8 @@ static bool start_stringval(upb_json_parser *p) { upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); inner->m = p->top->m; inner->f = p->top->f; + inner->is_map = false; + inner->is_mapentry = false; p->top = inner; if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) { @@ -688,6 +697,7 @@ static bool end_stringval(upb_json_parser *p) { } multipart_end(p); + return ok; } @@ -696,54 +706,217 @@ static void start_member(upb_json_parser *p) { multipart_startaccum(p); } -static bool end_member(upb_json_parser *p) { - assert(!p->top->f); +// Helper: invoked during parse_mapentry() to emit the mapentry message's key +// field based on the current contents of the accumulate buffer. +static bool parse_mapentry_key(upb_json_parser *p) { + size_t len; const char *buf = accumulate_getptr(p, &len); - const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); + // Emit the key field. We do a bit of ad-hoc parsing here because the + // parser state machine has already decided that this is a string field + // name, and we are reinterpreting it as some arbitrary key type. In + // particular, integer and bool keys are quoted, so we need to parse the + // quoted string contents here. - if (!f) { - // TODO(haberman): Ignore unknown fields if requested/configured to do so. - upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); + p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY); + if (p->top->f == NULL) { + upb_status_seterrmsg(p->status, "mapentry message has no key"); return false; } + switch (upb_fielddef_type(p->top->f)) { + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: + // Invoke end_number. The accum buffer has the number's text already. + if (!parse_number(p, buf, buf + len)) { + return false; + } + break; + case UPB_TYPE_BOOL: + if (len == 4 && !strncmp(buf, "true", 4)) { + if (!parser_putbool(p, true)) { + return false; + } + } else if (len == 5 && !strncmp(buf, "false", 5)) { + if (!parser_putbool(p, false)) { + return false; + } + } else { + upb_status_seterrmsg(p->status, + "Map bool key not 'true' or 'false'"); + return false; + } + multipart_end(p); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + upb_sink subsink; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); + upb_sink_startstr(&p->top->sink, sel, len, &subsink); + sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); + upb_sink_putstring(&subsink, sel, buf, len, NULL); + sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); + upb_sink_endstr(&subsink, sel); + multipart_end(p); + break; + } + default: + upb_status_seterrmsg(p->status, "Invalid field type for map key"); + return false; + } - p->top->f = f; - multipart_end(p); + return true; +} + +// Helper: emit one map entry (as a submessage in the map field sequence). This +// is invoked from end_membername(), at the end of the map entry's key string, +// with the map key in the accumulate buffer. It parses the key from that +// buffer, emits the handler calls to start the mapentry submessage (setting up +// its subframe in the process), and sets up state in the subframe so that the +// value parser (invoked next) will emit the mapentry's value field and then +// end the mapentry message. + +static bool handle_mapentry(upb_json_parser *p) { + // Map entry: p->top->sink is the seq frame, so we need to start a frame + // for the mapentry itself, and then set |f| in that frame so that the map + // value field is parsed, and also set a flag to end the frame after the + // map-entry value is parsed. + if (!check_stack(p)) return false; + + const upb_fielddef *mapfield = p->top->mapfield; + const upb_msgdef *mapentrymsg = upb_fielddef_msgsubdef(mapfield); + + upb_jsonparser_frame *inner = p->top + 1; + p->top->f = mapfield; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); + upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); + inner->m = mapentrymsg; + inner->mapfield = mapfield; + inner->is_map = false; + + // Don't set this to true *yet* -- we reuse parsing handlers below to push + // the key field value to the sink, and these handlers will pop the frame + // if they see is_mapentry (when invoked by the parser state machine, they + // would have just seen the map-entry value, not key). + inner->is_mapentry = false; + p->top = inner; + + // send STARTMSG in submsg frame. + upb_sink_startmsg(&p->top->sink); + + parse_mapentry_key(p); + + // Set up the value field to receive the map-entry value. + p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE); + p->top->is_mapentry = true; // set up to pop frame after value is parsed. + p->top->mapfield = mapfield; + if (p->top->f == NULL) { + upb_status_seterrmsg(p->status, "mapentry message has no value"); + return false; + } return true; } -static void clear_member(upb_json_parser *p) { p->top->f = NULL; } +static bool end_membername(upb_json_parser *p) { + assert(!p->top->f); + + if (p->top->is_map) { + return handle_mapentry(p); + } else { + size_t len; + const char *buf = accumulate_getptr(p, &len); + const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); + + if (!f) { + // TODO(haberman): Ignore unknown fields if requested/configured to do so. + upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); + return false; + } + + p->top->f = f; + multipart_end(p); + + return true; + } +} + +static void end_member(upb_json_parser *p) { + // If we just parsed a map-entry value, end that frame too. + if (p->top->is_mapentry) { + assert(p->top > p->stack); + // send ENDMSG on submsg. + upb_status s = UPB_STATUS_INIT; + upb_sink_endmsg(&p->top->sink, &s); + const upb_fielddef* mapfield = p->top->mapfield; + + // send ENDSUBMSG in repeated-field-of-mapentries frame. + p->top--; + upb_selector_t sel; + bool ok = upb_handlers_getselector(mapfield, + UPB_HANDLER_ENDSUBMSG, &sel); + UPB_ASSERT_VAR(ok, ok); + upb_sink_endsubmsg(&p->top->sink, sel); + } + + p->top->f = NULL; +} static bool start_subobject(upb_json_parser *p) { assert(p->top->f); - if (!upb_fielddef_issubmsg(p->top->f)) { + if (upb_fielddef_ismap(p->top->f)) { + // Beginning of a map. Start a new parser frame in a repeated-field + // context. + if (!check_stack(p)) return false; + + upb_jsonparser_frame *inner = p->top + 1; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); + upb_sink_startseq(&p->top->sink, sel, &inner->sink); + inner->m = upb_fielddef_msgsubdef(p->top->f); + inner->mapfield = p->top->f; + inner->f = NULL; + inner->is_map = true; + inner->is_mapentry = false; + p->top = inner; + + return true; + } else if (upb_fielddef_issubmsg(p->top->f)) { + // Beginning of a subobject. Start a new parser frame in the submsg + // context. + if (!check_stack(p)) return false; + + upb_jsonparser_frame *inner = p->top + 1; + + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); + upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); + inner->m = upb_fielddef_msgsubdef(p->top->f); + inner->f = NULL; + inner->is_map = false; + inner->is_mapentry = false; + p->top = inner; + + return true; + } else { upb_status_seterrf(p->status, "Object specified for non-message/group field: %s", upb_fielddef_name(p->top->f)); return false; } - - if (!check_stack(p)) return false; - - upb_jsonparser_frame *inner = p->top + 1; - - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); - upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); - inner->m = upb_fielddef_msgsubdef(p->top->f); - inner->f = NULL; - p->top = inner; - - return true; } static void end_subobject(upb_json_parser *p) { - p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); - upb_sink_endsubmsg(&p->top->sink, sel); + if (p->top->is_map) { + p->top--; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); + upb_sink_endseq(&p->top->sink, sel); + } else { + p->top--; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); + upb_sink_endsubmsg(&p->top->sink, sel); + } } static bool start_array(upb_json_parser *p) { @@ -763,6 +936,8 @@ static bool start_array(upb_json_parser *p) { upb_sink_startseq(&p->top->sink, sel, &inner->sink); inner->m = p->top->m; inner->f = p->top->f; + inner->is_map = false; + inner->is_mapentry = false; p->top = inner; return true; @@ -777,12 +952,16 @@ static void end_array(upb_json_parser *p) { } static void start_object(upb_json_parser *p) { - upb_sink_startmsg(&p->top->sink); + if (!p->top->is_map) { + upb_sink_startmsg(&p->top->sink); + } } static void end_object(upb_json_parser *p) { - upb_status status; - upb_sink_endmsg(&p->top->sink, &status); + if (!p->top->is_map) { + upb_status status; + upb_sink_endmsg(&p->top->sink, &status); + } } @@ -854,10 +1033,10 @@ static void end_object(upb_json_parser *p) { ws string >{ start_member(parser); } - @{ CHECK_RETURN_TOP(end_member(parser)); } + @{ CHECK_RETURN_TOP(end_membername(parser)); } ws ":" ws value2 - %{ clear_member(parser); } + %{ end_member(parser); } ws; object = @@ -967,6 +1146,8 @@ void upb_json_parser_uninit(upb_json_parser *p) { void upb_json_parser_reset(upb_json_parser *p) { p->top = p->stack; p->top->f = NULL; + p->top->is_map = false; + p->top->is_mapentry = false; int cs; int top; diff --git a/upb/json/printer.c b/upb/json/printer.c index 3dd3c20..bb748ec 100644 --- a/upb/json/printer.c +++ b/upb/json/printer.c @@ -182,11 +182,19 @@ static bool putkey(void *closure, const void *handler_data) { return true; \ } \ static bool repeated_##type(void *closure, const void *handler_data, \ - type val) { \ + type val) { \ upb_json_printer *p = closure; \ print_comma(p); \ CHK(put##type(closure, handler_data, val)); \ return true; \ + } \ + static bool putmapkey_##type(void *closure, const void *handler_data, \ + type val) { \ + upb_json_printer *p = closure; \ + print_data(p, "\"", 1); \ + CHK(put##type(closure, handler_data, val)); \ + print_data(p, "\":", 2); \ + return true; \ } TYPE_HANDLERS(double, fmt_double); @@ -222,20 +230,36 @@ static bool scalar_enum(void *closure, const void *handler_data, return true; } -static bool repeated_enum(void *closure, const void *handler_data, - int32_t val) { - const EnumHandlerData *hd = handler_data; - upb_json_printer *p = closure; - print_comma(p); - - const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val); +static void print_enum_symbolic_name(upb_json_printer *p, + const upb_enumdef *def, + int32_t val) { + const char *symbolic_name = upb_enumdef_iton(def, val); if (symbolic_name) { print_data(p, "\"", 1); putstring(p, symbolic_name, strlen(symbolic_name)); print_data(p, "\"", 1); } else { - putint32_t(closure, NULL, val); + putint32_t(p, NULL, val); } +} + +static bool repeated_enum(void *closure, const void *handler_data, + int32_t val) { + const EnumHandlerData *hd = handler_data; + upb_json_printer *p = closure; + print_comma(p); + + print_enum_symbolic_name(p, hd->enumdef, val); + + return true; +} + +static bool mapvalue_enum(void *closure, const void *handler_data, + int32_t val) { + const EnumHandlerData *hd = handler_data; + upb_json_printer *p = closure; + + print_enum_symbolic_name(p, hd->enumdef, val); return true; } @@ -251,25 +275,35 @@ static void *repeated_startsubmsg(void *closure, const void *handler_data) { return closure; } -static bool startmap(void *closure, const void *handler_data) { +static void start_frame(upb_json_printer *p) { + p->depth_++; + p->first_elem_[p->depth_] = true; + print_data(p, "{", 1); +} + +static void end_frame(upb_json_printer *p) { + print_data(p, "}", 1); + p->depth_--; +} + +static bool printer_startmsg(void *closure, const void *handler_data) { UPB_UNUSED(handler_data); upb_json_printer *p = closure; - if (p->depth_++ == 0) { + if (p->depth_ == 0) { upb_bytessink_start(p->output_, 0, &p->subc_); } - p->first_elem_[p->depth_] = true; - print_data(p, "{", 1); + start_frame(p); return true; } -static bool endmap(void *closure, const void *handler_data, upb_status *s) { +static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) { UPB_UNUSED(handler_data); UPB_UNUSED(s); upb_json_printer *p = closure; - if (--p->depth_ == 0) { + end_frame(p); + if (p->depth_ == 0) { upb_bytessink_end(p->output_); } - print_data(p, "}", 1); return true; } @@ -290,6 +324,23 @@ static bool endseq(void *closure, const void *handler_data) { return true; } +static void *startmap(void *closure, const void *handler_data) { + upb_json_printer *p = closure; + CHK(putkey(closure, handler_data)); + p->depth_++; + p->first_elem_[p->depth_] = true; + print_data(p, "{", 1); + return closure; +} + +static bool endmap(void *closure, const void *handler_data) { + UPB_UNUSED(handler_data); + upb_json_printer *p = closure; + print_data(p, "}", 1); + p->depth_--; + return true; +} + static size_t putstr(void *closure, const void *handler_data, const char *str, size_t len, const upb_bufhandle *handle) { UPB_UNUSED(handler_data); @@ -404,6 +455,36 @@ static bool repeated_endstr(void *closure, const void *handler_data) { return true; } +static void *mapkeyval_startstr(void *closure, const void *handler_data, + size_t size_hint) { + UPB_UNUSED(handler_data); + UPB_UNUSED(size_hint); + upb_json_printer *p = closure; + print_data(p, "\"", 1); + return p; +} + +static size_t mapkey_str(void *closure, const void *handler_data, + const char *str, size_t len, + const upb_bufhandle *handle) { + CHK(putstr(closure, handler_data, str, len, handle)); + return len; +} + +static bool mapkey_endstr(void *closure, const void *handler_data) { + UPB_UNUSED(handler_data); + upb_json_printer *p = closure; + print_data(p, "\":", 2); + return true; +} + +static bool mapvalue_endstr(void *closure, const void *handler_data) { + UPB_UNUSED(handler_data); + upb_json_printer *p = closure; + print_data(p, "\"", 1); + return true; +} + static size_t scalar_bytes(void *closure, const void *handler_data, const char *str, size_t len, const upb_bufhandle *handle) { @@ -421,31 +502,161 @@ static size_t repeated_bytes(void *closure, const void *handler_data, return len; } -void printer_sethandlers(const void *closure, upb_handlers *h) { +static size_t mapkey_bytes(void *closure, const void *handler_data, + const char *str, size_t len, + const upb_bufhandle *handle) { + upb_json_printer *p = closure; + CHK(putbytes(closure, handler_data, str, len, handle)); + print_data(p, ":", 1); + return len; +} + +static void set_enum_hd(upb_handlers *h, + const upb_fielddef *f, + upb_handlerattr *attr) { + EnumHandlerData *hd = malloc(sizeof(EnumHandlerData)); + hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f); + hd->keyname = newstrpc(h, f); + upb_handlers_addcleanup(h, hd, free); + upb_handlerattr_sethandlerdata(attr, hd); +} + +// Set up handlers for a mapentry submessage (i.e., an individual key/value pair +// in a map). +// +// TODO: Handle missing key, missing value, out-of-order key/value, or repeated +// key or value cases properly. The right way to do this is to allocate a +// temporary structure at the start of a mapentry submessage, store key and +// value data in it as key and value handlers are called, and then print the +// key/value pair once at the end of the submessage. If we don't do this, we +// should at least detect the case and throw an error. However, so far all of +// our sources that emit mapentry messages do so canonically (with one key +// field, and then one value field), so this is not a pressing concern at the +// moment. +void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) { UPB_UNUSED(closure); + const upb_msgdef *md = upb_handlers_msgdef(h); + + // A mapentry message is printed simply as '"key": value'. Rather than + // special-case key and value for every type below, we just handle both + // fields explicitly here. + const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY); + const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE); upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER; - upb_handlers_setstartmsg(h, startmap, &empty_attr); - upb_handlers_setendmsg(h, endmap, &empty_attr); - -#define TYPE(type, name, ctype) \ - case type: \ - if (upb_fielddef_isseq(f)) { \ - upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \ - } else { \ - upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \ - } \ + + switch (upb_fielddef_type(key_field)) { + case UPB_TYPE_INT32: + upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr); + break; + case UPB_TYPE_INT64: + upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr); + break; + case UPB_TYPE_UINT32: + upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr); + break; + case UPB_TYPE_UINT64: + upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr); + break; + case UPB_TYPE_BOOL: + upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr); + break; + case UPB_TYPE_STRING: + upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr); + upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr); + upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr); + break; + case UPB_TYPE_BYTES: + upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr); + break; + default: + assert(false); + break; + } + + switch (upb_fielddef_type(value_field)) { + case UPB_TYPE_INT32: + upb_handlers_setint32(h, value_field, putint32_t, &empty_attr); + break; + case UPB_TYPE_INT64: + upb_handlers_setint64(h, value_field, putint64_t, &empty_attr); + break; + case UPB_TYPE_UINT32: + upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr); + break; + case UPB_TYPE_UINT64: + upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr); + break; + case UPB_TYPE_BOOL: + upb_handlers_setbool(h, value_field, putbool, &empty_attr); + break; + case UPB_TYPE_FLOAT: + upb_handlers_setfloat(h, value_field, putfloat, &empty_attr); + break; + case UPB_TYPE_DOUBLE: + upb_handlers_setdouble(h, value_field, putdouble, &empty_attr); + break; + case UPB_TYPE_STRING: + upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr); + upb_handlers_setstring(h, value_field, putstr, &empty_attr); + upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr); + break; + case UPB_TYPE_BYTES: + upb_handlers_setstring(h, value_field, putbytes, &empty_attr); + break; + case UPB_TYPE_ENUM: { + upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER; + set_enum_hd(h, value_field, &enum_attr); + upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr); + upb_handlerattr_uninit(&enum_attr); + break; + } + case UPB_TYPE_MESSAGE: + // No handler necessary -- the submsg handlers will print the message + // as appropriate. + break; + } + + upb_handlerattr_uninit(&empty_attr); +} + +void printer_sethandlers(const void *closure, upb_handlers *h) { + UPB_UNUSED(closure); + const upb_msgdef *md = upb_handlers_msgdef(h); + bool is_mapentry = upb_msgdef_mapentry(md); + upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER; + + if (is_mapentry) { + // mapentry messages are sufficiently different that we handle them + // separately. + printer_sethandlers_mapentry(closure, h); + return; + } + + upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr); + upb_handlers_setendmsg(h, printer_endmsg, &empty_attr); + +#define TYPE(type, name, ctype) \ + case type: \ + if (upb_fielddef_isseq(f)) { \ + upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \ + } else { \ + upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \ + } \ break; upb_msg_field_iter i; - upb_msg_field_begin(&i, upb_handlers_msgdef(h)); + upb_msg_field_begin(&i, md); for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER; upb_handlerattr_sethandlerdata(&name_attr, newstrpc(h, f)); - if (upb_fielddef_isseq(f)) { + if (upb_fielddef_ismap(f)) { + upb_handlers_setstartseq(h, f, startmap, &name_attr); + upb_handlers_setendseq(h, f, endmap, &name_attr); + } else if (upb_fielddef_isseq(f)) { upb_handlers_setstartseq(h, f, startseq, &name_attr); upb_handlers_setendseq(h, f, endseq, &empty_attr); } @@ -462,12 +673,8 @@ void printer_sethandlers(const void *closure, upb_handlers *h) { // For now, we always emit symbolic names for enums. We may want an // option later to control this behavior, but we will wait for a real // need first. - EnumHandlerData *hd = malloc(sizeof(EnumHandlerData)); - hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f); - hd->keyname = newstrpc(h, f); - upb_handlers_addcleanup(h, hd, free); upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER; - upb_handlerattr_sethandlerdata(&enum_attr, hd); + set_enum_hd(h, f, &enum_attr); if (upb_fielddef_isseq(f)) { upb_handlers_setint32(h, f, repeated_enum, &enum_attr); diff --git a/upb/table.c b/upb/table.c index aa99fe2..fc69125 100644 --- a/upb/table.c +++ b/upb/table.c @@ -40,12 +40,16 @@ char *upb_strdup(const char *s) { } char *upb_strdup2(const char *s, size_t len) { + // Prevent overflow errors. + if (len == SIZE_MAX) return NULL; // Always null-terminate, even if binary data; but don't rely on the input to // have a null-terminating byte since it may be a raw binary buffer. size_t n = len + 1; char *p = malloc(n); - if (p) memcpy(p, s, len); - p[len] = 0; + if (p) { + memcpy(p, s, len); + p[len] = 0; + } return p; } -- cgit v1.2.3 From 099d57346ad36e4d65de559d3ac56b99b54b374a Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 2 Feb 2015 14:25:04 -0800 Subject: Fixed JSON parser issue: missing NUL byte in parse_number() in some code paths. Also, fix unused-function warnings. --- upb/json/parser.c | 316 +++++++++++++++++++++++++++++++++++++++++------------ upb/json/parser.rl | 13 ++- upb/json/printer.c | 12 +- 3 files changed, 267 insertions(+), 74 deletions(-) (limited to 'upb/json/parser.rl') diff --git a/upb/json/parser.c b/upb/json/parser.c index 63ea1b8..a1e83c5 100644 --- a/upb/json/parser.c +++ b/upb/json/parser.c @@ -508,11 +508,17 @@ static void start_number(upb_json_parser *p, const char *ptr) { capture_begin(p, ptr); } +static bool parse_number(upb_json_parser *p); + static bool end_number(upb_json_parser *p, const char *ptr) { if (!capture_end(p, ptr)) { return false; } + return parse_number(p); +} + +static bool parse_number(upb_json_parser *p) { // strtol() and friends unfortunately do not support specifying the length of // the input string, so we need to force a copy into a NULL-terminated buffer. if (!multipart_text(p, "\0", 1, false)) { @@ -522,8 +528,8 @@ static bool end_number(upb_json_parser *p, const char *ptr) { size_t len; const char *buf = accumulate_getptr(p, &len); const char *myend = buf + len - 1; // One for NULL. - char *end; + char *end; switch (upb_fielddef_type(p->top->f)) { case UPB_TYPE_ENUM: case UPB_TYPE_INT32: { @@ -579,6 +585,7 @@ static bool end_number(upb_json_parser *p, const char *ptr) { } multipart_end(p); + return true; err: @@ -597,6 +604,7 @@ static bool parser_putbool(upb_json_parser *p, bool val) { bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val); UPB_ASSERT_VAR(ok, ok); + return true; } @@ -613,6 +621,8 @@ static bool start_stringval(upb_json_parser *p) { upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); inner->m = p->top->m; inner->f = p->top->f; + inner->is_map = false; + inner->is_mapentry = false; p->top = inner; if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) { @@ -690,6 +700,7 @@ static bool end_stringval(upb_json_parser *p) { } multipart_end(p); + return ok; } @@ -698,54 +709,217 @@ static void start_member(upb_json_parser *p) { multipart_startaccum(p); } -static bool end_member(upb_json_parser *p) { - assert(!p->top->f); +// Helper: invoked during parse_mapentry() to emit the mapentry message's key +// field based on the current contents of the accumulate buffer. +static bool parse_mapentry_key(upb_json_parser *p) { + size_t len; const char *buf = accumulate_getptr(p, &len); - const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); + // Emit the key field. We do a bit of ad-hoc parsing here because the + // parser state machine has already decided that this is a string field + // name, and we are reinterpreting it as some arbitrary key type. In + // particular, integer and bool keys are quoted, so we need to parse the + // quoted string contents here. - if (!f) { - // TODO(haberman): Ignore unknown fields if requested/configured to do so. - upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); + p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY); + if (p->top->f == NULL) { + upb_status_seterrmsg(p->status, "mapentry message has no key"); return false; } + switch (upb_fielddef_type(p->top->f)) { + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: + // Invoke end_number. The accum buffer has the number's text already. + if (!parse_number(p)) { + return false; + } + break; + case UPB_TYPE_BOOL: + if (len == 4 && !strncmp(buf, "true", 4)) { + if (!parser_putbool(p, true)) { + return false; + } + } else if (len == 5 && !strncmp(buf, "false", 5)) { + if (!parser_putbool(p, false)) { + return false; + } + } else { + upb_status_seterrmsg(p->status, + "Map bool key not 'true' or 'false'"); + return false; + } + multipart_end(p); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + upb_sink subsink; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); + upb_sink_startstr(&p->top->sink, sel, len, &subsink); + sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); + upb_sink_putstring(&subsink, sel, buf, len, NULL); + sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); + upb_sink_endstr(&subsink, sel); + multipart_end(p); + break; + } + default: + upb_status_seterrmsg(p->status, "Invalid field type for map key"); + return false; + } - p->top->f = f; - multipart_end(p); + return true; +} + +// Helper: emit one map entry (as a submessage in the map field sequence). This +// is invoked from end_membername(), at the end of the map entry's key string, +// with the map key in the accumulate buffer. It parses the key from that +// buffer, emits the handler calls to start the mapentry submessage (setting up +// its subframe in the process), and sets up state in the subframe so that the +// value parser (invoked next) will emit the mapentry's value field and then +// end the mapentry message. + +static bool handle_mapentry(upb_json_parser *p) { + // Map entry: p->top->sink is the seq frame, so we need to start a frame + // for the mapentry itself, and then set |f| in that frame so that the map + // value field is parsed, and also set a flag to end the frame after the + // map-entry value is parsed. + if (!check_stack(p)) return false; + + const upb_fielddef *mapfield = p->top->mapfield; + const upb_msgdef *mapentrymsg = upb_fielddef_msgsubdef(mapfield); + + upb_jsonparser_frame *inner = p->top + 1; + p->top->f = mapfield; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); + upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); + inner->m = mapentrymsg; + inner->mapfield = mapfield; + inner->is_map = false; + + // Don't set this to true *yet* -- we reuse parsing handlers below to push + // the key field value to the sink, and these handlers will pop the frame + // if they see is_mapentry (when invoked by the parser state machine, they + // would have just seen the map-entry value, not key). + inner->is_mapentry = false; + p->top = inner; + + // send STARTMSG in submsg frame. + upb_sink_startmsg(&p->top->sink); + + parse_mapentry_key(p); + + // Set up the value field to receive the map-entry value. + p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE); + p->top->is_mapentry = true; // set up to pop frame after value is parsed. + p->top->mapfield = mapfield; + if (p->top->f == NULL) { + upb_status_seterrmsg(p->status, "mapentry message has no value"); + return false; + } return true; } -static void clear_member(upb_json_parser *p) { p->top->f = NULL; } +static bool end_membername(upb_json_parser *p) { + assert(!p->top->f); + + if (p->top->is_map) { + return handle_mapentry(p); + } else { + size_t len; + const char *buf = accumulate_getptr(p, &len); + const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); + + if (!f) { + // TODO(haberman): Ignore unknown fields if requested/configured to do so. + upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); + return false; + } + + p->top->f = f; + multipart_end(p); + + return true; + } +} + +static void end_member(upb_json_parser *p) { + // If we just parsed a map-entry value, end that frame too. + if (p->top->is_mapentry) { + assert(p->top > p->stack); + // send ENDMSG on submsg. + upb_status s = UPB_STATUS_INIT; + upb_sink_endmsg(&p->top->sink, &s); + const upb_fielddef* mapfield = p->top->mapfield; + + // send ENDSUBMSG in repeated-field-of-mapentries frame. + p->top--; + upb_selector_t sel; + bool ok = upb_handlers_getselector(mapfield, + UPB_HANDLER_ENDSUBMSG, &sel); + UPB_ASSERT_VAR(ok, ok); + upb_sink_endsubmsg(&p->top->sink, sel); + } + + p->top->f = NULL; +} static bool start_subobject(upb_json_parser *p) { assert(p->top->f); - if (!upb_fielddef_issubmsg(p->top->f)) { + if (upb_fielddef_ismap(p->top->f)) { + // Beginning of a map. Start a new parser frame in a repeated-field + // context. + if (!check_stack(p)) return false; + + upb_jsonparser_frame *inner = p->top + 1; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); + upb_sink_startseq(&p->top->sink, sel, &inner->sink); + inner->m = upb_fielddef_msgsubdef(p->top->f); + inner->mapfield = p->top->f; + inner->f = NULL; + inner->is_map = true; + inner->is_mapentry = false; + p->top = inner; + + return true; + } else if (upb_fielddef_issubmsg(p->top->f)) { + // Beginning of a subobject. Start a new parser frame in the submsg + // context. + if (!check_stack(p)) return false; + + upb_jsonparser_frame *inner = p->top + 1; + + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); + upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); + inner->m = upb_fielddef_msgsubdef(p->top->f); + inner->f = NULL; + inner->is_map = false; + inner->is_mapentry = false; + p->top = inner; + + return true; + } else { upb_status_seterrf(p->status, "Object specified for non-message/group field: %s", upb_fielddef_name(p->top->f)); return false; } - - if (!check_stack(p)) return false; - - upb_jsonparser_frame *inner = p->top + 1; - - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); - upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); - inner->m = upb_fielddef_msgsubdef(p->top->f); - inner->f = NULL; - p->top = inner; - - return true; } static void end_subobject(upb_json_parser *p) { - p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); - upb_sink_endsubmsg(&p->top->sink, sel); + if (p->top->is_map) { + p->top--; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); + upb_sink_endseq(&p->top->sink, sel); + } else { + p->top--; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); + upb_sink_endsubmsg(&p->top->sink, sel); + } } static bool start_array(upb_json_parser *p) { @@ -765,6 +939,8 @@ static bool start_array(upb_json_parser *p) { upb_sink_startseq(&p->top->sink, sel, &inner->sink); inner->m = p->top->m; inner->f = p->top->f; + inner->is_map = false; + inner->is_mapentry = false; p->top = inner; return true; @@ -779,12 +955,16 @@ static void end_array(upb_json_parser *p) { } static void start_object(upb_json_parser *p) { - upb_sink_startmsg(&p->top->sink); + if (!p->top->is_map) { + upb_sink_startmsg(&p->top->sink); + } } static void end_object(upb_json_parser *p) { - upb_status status; - upb_sink_endmsg(&p->top->sink, &status); + if (!p->top->is_map) { + upb_status status; + upb_sink_endmsg(&p->top->sink, &status); + } } @@ -809,11 +989,11 @@ static void end_object(upb_json_parser *p) { // final state once, when the closing '"' is seen. -#line 905 "upb/json/parser.rl" +#line 1085 "upb/json/parser.rl" -#line 817 "upb/json/parser.c" +#line 997 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 2, 1, 3, 1, 5, 1, 6, 1, 7, 1, 8, 1, @@ -964,7 +1144,7 @@ static const int json_en_value_machine = 27; static const int json_en_main = 1; -#line 908 "upb/json/parser.rl" +#line 1088 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { @@ -984,7 +1164,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, capture_resume(parser, buf); -#line 988 "upb/json/parser.c" +#line 1168 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -1059,118 +1239,118 @@ _match: switch ( *_acts++ ) { case 0: -#line 820 "upb/json/parser.rl" +#line 1000 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 1: -#line 821 "upb/json/parser.rl" +#line 1001 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 10; goto _again;} } break; case 2: -#line 825 "upb/json/parser.rl" +#line 1005 "upb/json/parser.rl" { start_text(parser, p); } break; case 3: -#line 826 "upb/json/parser.rl" +#line 1006 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_text(parser, p)); } break; case 4: -#line 832 "upb/json/parser.rl" +#line 1012 "upb/json/parser.rl" { start_hex(parser); } break; case 5: -#line 833 "upb/json/parser.rl" +#line 1013 "upb/json/parser.rl" { hexdigit(parser, p); } break; case 6: -#line 834 "upb/json/parser.rl" +#line 1014 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_hex(parser)); } break; case 7: -#line 840 "upb/json/parser.rl" +#line 1020 "upb/json/parser.rl" { CHECK_RETURN_TOP(escape(parser, p)); } break; case 8: -#line 846 "upb/json/parser.rl" +#line 1026 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 9: -#line 849 "upb/json/parser.rl" +#line 1029 "upb/json/parser.rl" { {stack[top++] = cs; cs = 19; goto _again;} } break; case 10: -#line 851 "upb/json/parser.rl" +#line 1031 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 27; goto _again;} } break; case 11: -#line 856 "upb/json/parser.rl" +#line 1036 "upb/json/parser.rl" { start_member(parser); } break; case 12: -#line 857 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_member(parser)); } +#line 1037 "upb/json/parser.rl" + { CHECK_RETURN_TOP(end_membername(parser)); } break; case 13: -#line 860 "upb/json/parser.rl" - { clear_member(parser); } +#line 1040 "upb/json/parser.rl" + { end_member(parser); } break; case 14: -#line 866 "upb/json/parser.rl" +#line 1046 "upb/json/parser.rl" { start_object(parser); } break; case 15: -#line 869 "upb/json/parser.rl" +#line 1049 "upb/json/parser.rl" { end_object(parser); } break; case 16: -#line 875 "upb/json/parser.rl" +#line 1055 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_array(parser)); } break; case 17: -#line 879 "upb/json/parser.rl" +#line 1059 "upb/json/parser.rl" { end_array(parser); } break; case 18: -#line 884 "upb/json/parser.rl" +#line 1064 "upb/json/parser.rl" { start_number(parser, p); } break; case 19: -#line 885 "upb/json/parser.rl" +#line 1065 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 20: -#line 887 "upb/json/parser.rl" +#line 1067 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 21: -#line 888 "upb/json/parser.rl" +#line 1068 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_stringval(parser)); } break; case 22: -#line 890 "upb/json/parser.rl" +#line 1070 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, true)); } break; case 23: -#line 892 "upb/json/parser.rl" +#line 1072 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, false)); } break; case 24: -#line 894 "upb/json/parser.rl" +#line 1074 "upb/json/parser.rl" { /* null value */ } break; case 25: -#line 896 "upb/json/parser.rl" +#line 1076 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_subobject(parser)); } break; case 26: -#line 897 "upb/json/parser.rl" +#line 1077 "upb/json/parser.rl" { end_subobject(parser); } break; case 27: -#line 902 "upb/json/parser.rl" +#line 1082 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 1174 "upb/json/parser.c" +#line 1354 "upb/json/parser.c" } } @@ -1183,7 +1363,7 @@ _again: _out: {} } -#line 927 "upb/json/parser.rl" +#line 1107 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(parser->status, "Parse error at %s\n", p); @@ -1227,18 +1407,20 @@ void upb_json_parser_uninit(upb_json_parser *p) { void upb_json_parser_reset(upb_json_parser *p) { p->top = p->stack; p->top->f = NULL; + p->top->is_map = false; + p->top->is_mapentry = false; int cs; int top; // Emit Ragel initialization of the parser. -#line 1236 "upb/json/parser.c" +#line 1418 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 975 "upb/json/parser.rl" +#line 1157 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; accumulate_clear(p); diff --git a/upb/json/parser.rl b/upb/json/parser.rl index c7b4362..0379be4 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -221,6 +221,7 @@ badpadding: // the true value in a contiguous buffer. static void assert_accumulate_empty(upb_json_parser *p) { + UPB_UNUSED(p); assert(p->accumulated == NULL); assert(p->accumulated_len == 0); } @@ -505,13 +506,17 @@ static void start_number(upb_json_parser *p, const char *ptr) { capture_begin(p, ptr); } -static bool parse_number(upb_json_parser *p, const char *buf, const char *end); +static bool parse_number(upb_json_parser *p); static bool end_number(upb_json_parser *p, const char *ptr) { if (!capture_end(p, ptr)) { return false; } + return parse_number(p); +} + +static bool parse_number(upb_json_parser *p) { // strtol() and friends unfortunately do not support specifying the length of // the input string, so we need to force a copy into a NULL-terminated buffer. if (!multipart_text(p, "\0", 1, false)) { @@ -521,11 +526,7 @@ static bool end_number(upb_json_parser *p, const char *ptr) { size_t len; const char *buf = accumulate_getptr(p, &len); const char *myend = buf + len - 1; // One for NULL. - return parse_number(p, buf, myend); -} -static bool parse_number(upb_json_parser *p, const char *buf, - const char *myend) { char *end; switch (upb_fielddef_type(p->top->f)) { case UPB_TYPE_ENUM: @@ -730,7 +731,7 @@ static bool parse_mapentry_key(upb_json_parser *p) { case UPB_TYPE_UINT32: case UPB_TYPE_UINT64: // Invoke end_number. The accum buffer has the number's text already. - if (!parse_number(p, buf, buf + len)) { + if (!parse_number(p)) { return false; } break; diff --git a/upb/json/printer.c b/upb/json/printer.c index bb748ec..deae058 100644 --- a/upb/json/printer.c +++ b/upb/json/printer.c @@ -187,7 +187,9 @@ static bool putkey(void *closure, const void *handler_data) { print_comma(p); \ CHK(put##type(closure, handler_data, val)); \ return true; \ - } \ + } + +#define TYPE_HANDLERS_MAPKEY(type, fmt_func) \ static bool putmapkey_##type(void *closure, const void *handler_data, \ type val) { \ upb_json_printer *p = closure; \ @@ -205,7 +207,15 @@ TYPE_HANDLERS(uint32_t, fmt_int64); TYPE_HANDLERS(int64_t, fmt_int64); TYPE_HANDLERS(uint64_t, fmt_uint64); +// double and float are not allowed to be map keys. +TYPE_HANDLERS_MAPKEY(bool, fmt_bool); +TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64); +TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64); +TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64); +TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64); + #undef TYPE_HANDLERS +#undef TYPE_HANDLERS_MAPKEY typedef struct { void *keyname; -- cgit v1.2.3 From 508c39ee133d6725a4440ba98a1555e2026975c2 Mon Sep 17 00:00:00 2001 From: Martin Maly Date: Wed, 6 May 2015 13:18:33 -0700 Subject: Resolve compilation errors if compiled with more stringent semantic checks. Adding Travis test to build with strict warnings. Fixing a warning in a test which used signed/unsigned integer comparison. --- .travis.yml | 1 + Makefile | 8 ++++++++ tests/json/test_json.cc | 2 +- travis.sh | 10 ++++++++++ upb/handlers.c | 2 +- upb/json/parser.c | 17 +++++++++++------ upb/json/parser.rl | 9 ++++++++- upb/json/printer.c | 2 +- upb/pb/decoder.c | 6 +++--- upb/pb/encoder.c | 4 ++-- upb/table.c | 2 +- 11 files changed, 47 insertions(+), 16 deletions(-) (limited to 'upb/json/parser.rl') diff --git a/.travis.yml b/.travis.yml index 4407f5c..917aa91 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,3 +10,4 @@ env: - UPB_TRAVIS_BUILD=withprotobuf - UPB_TRAVIS_BUILD=lua - UPB_TRAVIS_BUILD=coverage + - UPB_TRAVIS_BUILD=warnings diff --git a/Makefile b/Makefile index 8af52d4..5887fbe 100644 --- a/Makefile +++ b/Makefile @@ -38,6 +38,10 @@ USER_CPPFLAGS= # Build with "make WITH_JIT=yes" (or anything besides "no") to enable the JIT. WITH_JIT=no +# Build with "make WITH_MAX_WARNINGS=yes" (or anything besides "no") to enable +# with strict warnings and treat warnings as errors. +WITH_MAX_WARNINGS=no + # Basic compiler/flag setup. CC=cc CXX=c++ @@ -53,6 +57,10 @@ ifneq ($(WITH_JIT), no) CPPFLAGS += -DUPB_USE_JIT_X64 endif +ifneq ($(WITH_MAX_WARNINGS), no) + WARNFLAGS=-Wall -Wextra -Wpointer-arith -Werror +endif + # Build with "make Q=" to see all commands that are being executed. Q=@ diff --git a/tests/json/test_json.cc b/tests/json/test_json.cc index 4465c76..828e603 100644 --- a/tests/json/test_json.cc +++ b/tests/json/test_json.cc @@ -347,7 +347,7 @@ void test_json_roundtrip() { test_case->input : test_case->expected; - for (int i = 0; i < strlen(test_case->input); i++) { + for (size_t i = 0; i < strlen(test_case->input); i++) { test_json_roundtrip_message(test_case->input, expected, serialize_handlers.get(), i); } diff --git a/travis.sh b/travis.sh index 26cf481..502b2f1 100755 --- a/travis.sh +++ b/travis.sh @@ -28,6 +28,16 @@ withprotobuf_script() { make test } +# Build with strict warnings. +warnings_install() { + : +} +warnings_script() { + make -j12 default WITH_MAX_WARNINGS=yes + make -j12 tests WITH_MAX_WARNINGS=yes + make test +} + # A 32-bit build. Can only test the core because any dependencies # need to be available as 32-bit libs also, which gets hairy fast. # Can't enable the JIT because it only supports x64. diff --git a/upb/handlers.c b/upb/handlers.c index fd19633..fe368e5 100644 --- a/upb/handlers.c +++ b/upb/handlers.c @@ -574,7 +574,7 @@ bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type, *s = f->selector_base; break; } - assert(*s < upb_fielddef_containingtype(f)->selector_count); + assert((size_t)*s < upb_fielddef_containingtype(f)->selector_count); return true; } diff --git a/upb/json/parser.c b/upb/json/parser.c index a1e83c5..08cd13d 100644 --- a/upb/json/parser.c +++ b/upb/json/parser.c @@ -1135,8 +1135,6 @@ static const char _json_trans_actions[] = { }; static const int json_start = 1; -static const int json_first_final = 56; -static const int json_error = 0; static const int json_en_number_machine = 10; static const int json_en_string_machine = 19; @@ -1164,7 +1162,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, capture_resume(parser, buf); -#line 1168 "upb/json/parser.c" +#line 1166 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -1350,7 +1348,7 @@ _match: #line 1082 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 1354 "upb/json/parser.c" +#line 1352 "upb/json/parser.c" } } @@ -1382,6 +1380,13 @@ error: bool end(void *closure, const void *hd) { UPB_UNUSED(closure); UPB_UNUSED(hd); + + // Prevent compile warning on unused static constants. + UPB_UNUSED(json_start); + UPB_UNUSED(json_en_number_machine); + UPB_UNUSED(json_en_string_machine); + UPB_UNUSED(json_en_value_machine); + UPB_UNUSED(json_en_main); return true; } @@ -1414,13 +1419,13 @@ void upb_json_parser_reset(upb_json_parser *p) { int top; // Emit Ragel initialization of the parser. -#line 1418 "upb/json/parser.c" +#line 1423 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 1157 "upb/json/parser.rl" +#line 1164 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; accumulate_clear(p); diff --git a/upb/json/parser.rl b/upb/json/parser.rl index 0379be4..b171617 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -1084,7 +1084,7 @@ static void end_object(upb_json_parser *p) { main := ws object ws; }%% -%% write data; +%% write data noerror nofinal; size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { @@ -1122,6 +1122,13 @@ error: bool end(void *closure, const void *hd) { UPB_UNUSED(closure); UPB_UNUSED(hd); + + // Prevent compile warning on unused static constants. + UPB_UNUSED(json_start); + UPB_UNUSED(json_en_number_machine); + UPB_UNUSED(json_en_string_machine); + UPB_UNUSED(json_en_value_machine); + UPB_UNUSED(json_en_main); return true; } diff --git a/upb/json/printer.c b/upb/json/printer.c index deae058..c7267e0 100644 --- a/upb/json/printer.c +++ b/upb/json/printer.c @@ -162,7 +162,7 @@ static bool putkey(void *closure, const void *handler_data) { return true; } -#define CHKFMT(val) if ((val) == -1) return false; +#define CHKFMT(val) if ((val) == (size_t)-1) return false; #define CHK(val) if (!(val)) return false; #define TYPE_HANDLERS(type, fmt_func) \ diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 04ca413..522b02e 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -124,7 +124,7 @@ static bool in_residual_buf(const upb_pbdecoder *d, const char *p) { // and the parsing stack, so must be called whenever either is updated. static void set_delim_end(upb_pbdecoder *d) { size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs; - if (delim_ofs <= (d->end - d->buf)) { + if (delim_ofs <= (size_t)(d->end - d->buf)) { d->delim_end = d->buf + delim_ofs; d->data_end = d->delim_end; } else { @@ -268,7 +268,7 @@ static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf, advancetobuf(d, d->buf_param, d->size_param); } if (curbufleft(d) >= bytes) { - consumebytes(d, buf + avail, bytes); + consumebytes(d, (char *)buf + avail, bytes); return DECODE_OK; } else if (d->data_end == d->delim_end) { seterr(d, "Submessage ended in the middle of a value or group"); @@ -296,7 +296,7 @@ static NOINLINE size_t peekbytes_slow(upb_pbdecoder *d, void *buf, memcpy(buf, d->ptr, ret); if (in_residual_buf(d, d->ptr)) { size_t copy = UPB_MIN(bytes - ret, d->size_param); - memcpy(buf + ret, d->buf_param, copy); + memcpy((char *)buf + ret, d->buf_param, copy); ret += copy; } return ret; diff --git a/upb/pb/encoder.c b/upb/pb/encoder.c index d5685dc..2534a4d 100644 --- a/upb/pb/encoder.c +++ b/upb/pb/encoder.c @@ -79,7 +79,7 @@ static upb_pb_encoder_segment *top(upb_pb_encoder *e) { // Call to ensure that at least "bytes" bytes are available for writing at // e->ptr. Returns false if the bytes could not be allocated. static bool reserve(upb_pb_encoder *e, size_t bytes) { - if ((e->limit - e->ptr) < bytes) { + if ((size_t)(e->limit - e->ptr) < bytes) { size_t needed = bytes + (e->ptr - e->buf); size_t old_size = e->limit - e->buf; size_t new_size = old_size; @@ -110,7 +110,7 @@ static bool reserve(upb_pb_encoder *e, size_t bytes) { // Call when "bytes" bytes have been writte at e->ptr. The caller *must* have // previously called reserve() with at least this many bytes. static void encoder_advance(upb_pb_encoder *e, size_t bytes) { - assert((e->limit - e->ptr) >= bytes); + assert((size_t)(e->limit - e->ptr) >= bytes); e->ptr += bytes; } diff --git a/upb/table.c b/upb/table.c index fc69125..9914a03 100644 --- a/upb/table.c +++ b/upb/table.c @@ -542,7 +542,7 @@ void upb_inttable_compact(upb_inttable *t) { counts[log2ceil(key)]++; } - int arr_size; + size_t arr_size = 1; int arr_count = upb_inttable_count(t); if (upb_inttable_count(t) >= max_key * MIN_DENSITY) { -- cgit v1.2.3