From 3c742bfdc8f63e1cfed63a93571bb4e72e5f49c4 Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Fri, 5 Oct 2018 11:12:23 -0700 Subject: Fix json ignore unknown (#128) * Fix json ignore unknown Previously, there were several problems with ignoring unknown in json. 1) After finding a field is unknown, the parser's state is not changed. Thus, there is no way to distinguish whether the parser is dealing with an unknown field or it's just a top level message. 2) Several method didn't respect unknown field, e.g., start_object, end_bool, start_array. * Update json parser size * Update json parser size --- upb/json/parser.c | 156 ++++++++++++++++++++++++++++++++--------------------- upb/json/parser.h | 2 +- upb/json/parser.rl | 44 +++++++++++++-- 3 files changed, 135 insertions(+), 67 deletions(-) (limited to 'upb') diff --git a/upb/json/parser.c b/upb/json/parser.c index 10242aa..3ae4722 100644 --- a/upb/json/parser.c +++ b/upb/json/parser.c @@ -113,6 +113,9 @@ typedef struct { * because |f| is the field in the *current* message (i.e., the map-entry * message itself), not the parent's field that leads to this map. */ const upb_fielddef *mapfield; + + /* True if the field to be parsed is unknown. */ + bool is_unknown_field; } upb_jsonparser_frame; struct upb_json_parser { @@ -918,6 +921,10 @@ static bool end_bool(upb_json_parser *p, bool val) { start_value_object(p, VALUE_BOOLVALUE); } + if (p->top->is_unknown_field) { + return true; + } + if (!parser_putbool(p, val)) { return false; } @@ -1024,6 +1031,7 @@ static bool start_stringval(upb_json_parser *p) { inner->name_table = NULL; inner->is_map = false; inner->is_mapentry = false; + inner->is_unknown_field = false; p->top = inner; if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) { @@ -1515,6 +1523,7 @@ static bool handle_mapentry(upb_json_parser *p) { inner->name_table = NULL; inner->mapfield = mapfield; inner->is_map = false; + inner->is_unknown_field = false; /* Don't set this to true *yet* -- we reuse parsing handlers below to push * the key field value to the sink, and these handlers will pop the frame @@ -1545,6 +1554,8 @@ static bool end_membername(upb_json_parser *p) { UPB_ASSERT(!p->top->f); if (!p->top->m) { + p->top->is_unknown_field = true; + multipart_end(p); return true; } @@ -1561,6 +1572,7 @@ static bool end_membername(upb_json_parser *p) { return true; } else if (p->ignore_json_unknown) { + p->top->is_unknown_field = true; multipart_end(p); return true; } else { @@ -1592,10 +1604,11 @@ static void end_member(upb_json_parser *p) { } p->top->f = NULL; + p->top->is_unknown_field = false; } static bool start_subobject(upb_json_parser *p) { - if (p->top->f == NULL) { + if (p->top->is_unknown_field) { upb_jsonparser_frame *inner; if (!check_stack(p)) return false; @@ -1604,6 +1617,7 @@ static bool start_subobject(upb_json_parser *p) { inner->f = NULL; inner->is_map = false; inner->is_mapentry = false; + inner->is_unknown_field = false; p->top = inner; return true; } @@ -1625,6 +1639,7 @@ static bool start_subobject(upb_json_parser *p) { inner->f = NULL; inner->is_map = true; inner->is_mapentry = false; + inner->is_unknown_field = false; p->top = inner; return true; @@ -1645,6 +1660,7 @@ static bool start_subobject(upb_json_parser *p) { inner->f = NULL; inner->is_map = false; inner->is_mapentry = false; + inner->is_unknown_field = false; p->top = inner; return true; @@ -1744,7 +1760,18 @@ static bool start_array(upb_json_parser *p) { start_listvalue_object(p); } - UPB_ASSERT(p->top->f); + if (p->top->is_unknown_field) { + inner = p->top + 1; + inner->m = NULL; + inner->name_table = NULL; + inner->f = NULL; + inner->is_map = false; + inner->is_mapentry = false; + inner->is_unknown_field = true; + p->top = inner; + + return true; + } if (!upb_fielddef_isseq(p->top->f)) { upb_status_seterrf(&p->status, @@ -1764,6 +1791,7 @@ static bool start_array(upb_json_parser *p) { inner->f = p->top->f; inner->is_map = false; inner->is_mapentry = false; + inner->is_unknown_field = false; p->top = inner; return true; @@ -1775,6 +1803,11 @@ static void end_array(upb_json_parser *p) { UPB_ASSERT(p->top > p->stack); p->top--; + + if (p->top->is_unknown_field) { + return; + } + sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); upb_sink_endseq(&p->top->sink, sel); @@ -1794,13 +1827,13 @@ static void end_array(upb_json_parser *p) { } static void start_object(upb_json_parser *p) { - if (!p->top->is_map) { + if (!p->top->is_map && p->top->m != NULL) { upb_sink_startmsg(&p->top->sink); } } static void end_object(upb_json_parser *p) { - if (!p->top->is_map) { + if (!p->top->is_map && p->top->m != NULL) { upb_status status; upb_status_clear(&status); upb_sink_endmsg(&p->top->sink, &status); @@ -1912,7 +1945,7 @@ static void end_structvalue_object(upb_json_parser *p) { } static bool is_top_level(upb_json_parser *p) { - return p->top == p->stack && p->top->f == NULL; + return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field; } static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) { @@ -1975,11 +2008,11 @@ static bool is_string_wrapper_object(upb_json_parser *p) { * final state once, when the closing '"' is seen. */ -#line 2114 "upb/json/parser.rl" +#line 2147 "upb/json/parser.rl" -#line 1983 "upb/json/parser.c" +#line 2016 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 1, 1, 3, 1, 4, 1, 6, 1, 7, 1, 8, 1, @@ -2230,7 +2263,7 @@ static const int json_en_value_machine = 76; static const int json_en_main = 1; -#line 2117 "upb/json/parser.rl" +#line 2150 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { @@ -2253,7 +2286,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, capture_resume(parser, buf); -#line 2257 "upb/json/parser.c" +#line 2290 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -2328,83 +2361,83 @@ _match: switch ( *_acts++ ) { case 1: -#line 1988 "upb/json/parser.rl" +#line 2021 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 2: -#line 1990 "upb/json/parser.rl" +#line 2023 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 24; goto _again;} } break; case 3: -#line 1994 "upb/json/parser.rl" +#line 2027 "upb/json/parser.rl" { start_text(parser, p); } break; case 4: -#line 1995 "upb/json/parser.rl" +#line 2028 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_text(parser, p)); } break; case 5: -#line 2001 "upb/json/parser.rl" +#line 2034 "upb/json/parser.rl" { start_hex(parser); } break; case 6: -#line 2002 "upb/json/parser.rl" +#line 2035 "upb/json/parser.rl" { hexdigit(parser, p); } break; case 7: -#line 2003 "upb/json/parser.rl" +#line 2036 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_hex(parser)); } break; case 8: -#line 2009 "upb/json/parser.rl" +#line 2042 "upb/json/parser.rl" { CHECK_RETURN_TOP(escape(parser, p)); } break; case 9: -#line 2015 "upb/json/parser.rl" +#line 2048 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 10: -#line 2027 "upb/json/parser.rl" +#line 2060 "upb/json/parser.rl" { start_duration_base(parser, p); } break; case 11: -#line 2028 "upb/json/parser.rl" +#line 2061 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_duration_base(parser, p)); } break; case 12: -#line 2030 "upb/json/parser.rl" +#line 2063 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 13: -#line 2035 "upb/json/parser.rl" +#line 2068 "upb/json/parser.rl" { start_timestamp_base(parser, p); } break; case 14: -#line 2036 "upb/json/parser.rl" +#line 2069 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_timestamp_base(parser, p)); } break; case 15: -#line 2038 "upb/json/parser.rl" +#line 2071 "upb/json/parser.rl" { start_timestamp_fraction(parser, p); } break; case 16: -#line 2039 "upb/json/parser.rl" +#line 2072 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); } break; case 17: -#line 2041 "upb/json/parser.rl" +#line 2074 "upb/json/parser.rl" { start_timestamp_zone(parser, p); } break; case 18: -#line 2042 "upb/json/parser.rl" +#line 2075 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); } break; case 19: -#line 2044 "upb/json/parser.rl" +#line 2077 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 20: -#line 2049 "upb/json/parser.rl" +#line 2082 "upb/json/parser.rl" { if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) { {stack[top++] = cs; cs = 48; goto _again;} @@ -2416,78 +2449,78 @@ _match: } break; case 21: -#line 2060 "upb/json/parser.rl" +#line 2093 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 76; goto _again;} } break; case 22: -#line 2065 "upb/json/parser.rl" +#line 2098 "upb/json/parser.rl" { start_member(parser); } break; case 23: -#line 2066 "upb/json/parser.rl" +#line 2099 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_membername(parser)); } break; case 24: -#line 2069 "upb/json/parser.rl" +#line 2102 "upb/json/parser.rl" { end_member(parser); } break; case 25: -#line 2075 "upb/json/parser.rl" +#line 2108 "upb/json/parser.rl" { start_object(parser); } break; case 26: -#line 2078 "upb/json/parser.rl" +#line 2111 "upb/json/parser.rl" { end_object(parser); } break; case 27: -#line 2084 "upb/json/parser.rl" +#line 2117 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_array(parser)); } break; case 28: -#line 2088 "upb/json/parser.rl" +#line 2121 "upb/json/parser.rl" { end_array(parser); } break; case 29: -#line 2093 "upb/json/parser.rl" +#line 2126 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_number(parser, p)); } break; case 30: -#line 2094 "upb/json/parser.rl" +#line 2127 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 31: -#line 2096 "upb/json/parser.rl" +#line 2129 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 32: -#line 2097 "upb/json/parser.rl" +#line 2130 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_stringval(parser)); } break; case 33: -#line 2099 "upb/json/parser.rl" +#line 2132 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_bool(parser, true)); } break; case 34: -#line 2101 "upb/json/parser.rl" +#line 2134 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_bool(parser, false)); } break; case 35: -#line 2103 "upb/json/parser.rl" +#line 2136 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_null(parser)); } break; case 36: -#line 2105 "upb/json/parser.rl" +#line 2138 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_subobject_full(parser)); } break; case 37: -#line 2106 "upb/json/parser.rl" +#line 2139 "upb/json/parser.rl" { end_subobject_full(parser); } break; case 38: -#line 2111 "upb/json/parser.rl" +#line 2144 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 2491 "upb/json/parser.c" +#line 2524 "upb/json/parser.c" } } @@ -2504,34 +2537,34 @@ _again: while ( __nacts-- > 0 ) { switch ( *__acts++ ) { case 0: -#line 1986 "upb/json/parser.rl" +#line 2019 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 26: -#line 2078 "upb/json/parser.rl" +#line 2111 "upb/json/parser.rl" { end_object(parser); } break; case 30: -#line 2094 "upb/json/parser.rl" +#line 2127 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 33: -#line 2099 "upb/json/parser.rl" +#line 2132 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_bool(parser, true)); } break; case 34: -#line 2101 "upb/json/parser.rl" +#line 2134 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_bool(parser, false)); } break; case 35: -#line 2103 "upb/json/parser.rl" +#line 2136 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_null(parser)); } break; case 37: -#line 2106 "upb/json/parser.rl" +#line 2139 "upb/json/parser.rl" { end_subobject_full(parser); } break; -#line 2535 "upb/json/parser.c" +#line 2568 "upb/json/parser.c" } } } @@ -2539,7 +2572,7 @@ _again: _out: {} } -#line 2139 "upb/json/parser.rl" +#line 2172 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(&parser->status, "Parse error at '%.*s'\n", pe - p, p); @@ -2571,9 +2604,9 @@ bool end(void *closure, const void *hd) { parse(parser, hd, &eof_ch, 0, NULL); return parser->current_state >= -#line 2575 "upb/json/parser.c" +#line 2608 "upb/json/parser.c" 105 -#line 2169 "upb/json/parser.rl" +#line 2202 "upb/json/parser.rl" ; } @@ -2585,16 +2618,17 @@ static void json_parser_reset(upb_json_parser *p) { p->top->f = NULL; p->top->is_map = false; p->top->is_mapentry = false; + p->top->is_unknown_field = false; /* Emit Ragel initialization of the parser. */ -#line 2592 "upb/json/parser.c" +#line 2626 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 2183 "upb/json/parser.rl" +#line 2217 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; accumulate_clear(p); diff --git a/upb/json/parser.h b/upb/json/parser.h index 316df08..15beaeb 100644 --- a/upb/json/parser.h +++ b/upb/json/parser.h @@ -29,7 +29,7 @@ UPB_DECLARE_DERIVED_TYPE(upb::json::ParserMethod, upb::RefCounted, * constructed. This hint may be an overestimate for some build configurations. * But if the parser library is upgraded without recompiling the application, * it may be an underestimate. */ -#define UPB_JSON_PARSER_SIZE 4160 +#define UPB_JSON_PARSER_SIZE 4672 #ifdef __cplusplus diff --git a/upb/json/parser.rl b/upb/json/parser.rl index 8e6caa9..29efd09 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -111,6 +111,9 @@ typedef struct { * because |f| is the field in the *current* message (i.e., the map-entry * message itself), not the parent's field that leads to this map. */ const upb_fielddef *mapfield; + + /* True if the field to be parsed is unknown. */ + bool is_unknown_field; } upb_jsonparser_frame; struct upb_json_parser { @@ -916,6 +919,10 @@ static bool end_bool(upb_json_parser *p, bool val) { start_value_object(p, VALUE_BOOLVALUE); } + if (p->top->is_unknown_field) { + return true; + } + if (!parser_putbool(p, val)) { return false; } @@ -1022,6 +1029,7 @@ static bool start_stringval(upb_json_parser *p) { inner->name_table = NULL; inner->is_map = false; inner->is_mapentry = false; + inner->is_unknown_field = false; p->top = inner; if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) { @@ -1513,6 +1521,7 @@ static bool handle_mapentry(upb_json_parser *p) { inner->name_table = NULL; inner->mapfield = mapfield; inner->is_map = false; + inner->is_unknown_field = false; /* Don't set this to true *yet* -- we reuse parsing handlers below to push * the key field value to the sink, and these handlers will pop the frame @@ -1543,6 +1552,8 @@ static bool end_membername(upb_json_parser *p) { UPB_ASSERT(!p->top->f); if (!p->top->m) { + p->top->is_unknown_field = true; + multipart_end(p); return true; } @@ -1559,6 +1570,7 @@ static bool end_membername(upb_json_parser *p) { return true; } else if (p->ignore_json_unknown) { + p->top->is_unknown_field = true; multipart_end(p); return true; } else { @@ -1590,10 +1602,11 @@ static void end_member(upb_json_parser *p) { } p->top->f = NULL; + p->top->is_unknown_field = false; } static bool start_subobject(upb_json_parser *p) { - if (p->top->f == NULL) { + if (p->top->is_unknown_field) { upb_jsonparser_frame *inner; if (!check_stack(p)) return false; @@ -1602,6 +1615,7 @@ static bool start_subobject(upb_json_parser *p) { inner->f = NULL; inner->is_map = false; inner->is_mapentry = false; + inner->is_unknown_field = false; p->top = inner; return true; } @@ -1623,6 +1637,7 @@ static bool start_subobject(upb_json_parser *p) { inner->f = NULL; inner->is_map = true; inner->is_mapentry = false; + inner->is_unknown_field = false; p->top = inner; return true; @@ -1643,6 +1658,7 @@ static bool start_subobject(upb_json_parser *p) { inner->f = NULL; inner->is_map = false; inner->is_mapentry = false; + inner->is_unknown_field = false; p->top = inner; return true; @@ -1742,7 +1758,18 @@ static bool start_array(upb_json_parser *p) { start_listvalue_object(p); } - UPB_ASSERT(p->top->f); + if (p->top->is_unknown_field) { + inner = p->top + 1; + inner->m = NULL; + inner->name_table = NULL; + inner->f = NULL; + inner->is_map = false; + inner->is_mapentry = false; + inner->is_unknown_field = true; + p->top = inner; + + return true; + } if (!upb_fielddef_isseq(p->top->f)) { upb_status_seterrf(&p->status, @@ -1762,6 +1789,7 @@ static bool start_array(upb_json_parser *p) { inner->f = p->top->f; inner->is_map = false; inner->is_mapentry = false; + inner->is_unknown_field = false; p->top = inner; return true; @@ -1773,6 +1801,11 @@ static void end_array(upb_json_parser *p) { UPB_ASSERT(p->top > p->stack); p->top--; + + if (p->top->is_unknown_field) { + return; + } + sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); upb_sink_endseq(&p->top->sink, sel); @@ -1792,13 +1825,13 @@ static void end_array(upb_json_parser *p) { } static void start_object(upb_json_parser *p) { - if (!p->top->is_map) { + if (!p->top->is_map && p->top->m != NULL) { upb_sink_startmsg(&p->top->sink); } } static void end_object(upb_json_parser *p) { - if (!p->top->is_map) { + if (!p->top->is_map && p->top->m != NULL) { upb_status status; upb_status_clear(&status); upb_sink_endmsg(&p->top->sink, &status); @@ -1910,7 +1943,7 @@ static void end_structvalue_object(upb_json_parser *p) { } static bool is_top_level(upb_json_parser *p) { - return p->top == p->stack && p->top->f == NULL; + return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field; } static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) { @@ -2177,6 +2210,7 @@ static void json_parser_reset(upb_json_parser *p) { p->top->f = NULL; p->top->is_map = false; p->top->is_mapentry = false; + p->top->is_unknown_field = false; /* Emit Ragel initialization of the parser. */ %% write init; -- cgit v1.2.3