From 72e66e2b556e1725a4a4fc87dc423a610ae1f1cc Mon Sep 17 00:00:00 2001 From: Bo Yang Date: Sun, 24 Jun 2018 01:10:19 +0000 Subject: Enable ignore unknown field in json parsing --- upb/json/parser.c | 128 +++++++++++++++++++++++++++++++++++------------------ upb/json/parser.h | 9 ++-- upb/json/parser.rl | 56 ++++++++++++++++++++--- 3 files changed, 139 insertions(+), 54 deletions(-) (limited to 'upb') diff --git a/upb/json/parser.c b/upb/json/parser.c index 4899401..04fedbf 100644 --- a/upb/json/parser.c +++ b/upb/json/parser.c @@ -96,6 +96,9 @@ struct upb_json_parser { /* Intermediate result of parsing a unicode escape sequence. */ uint32_t digit; + + /* Whether to proceed if unknown field is met. */ + bool ignore_json_unknown; }; struct upb_json_parsermethod { @@ -616,6 +619,11 @@ static bool end_number(upb_json_parser *p, const char *ptr) { return false; } + if (p->top->f == NULL) { + multipart_end(p); + return true; + } + return parse_number(p, false); } @@ -768,6 +776,10 @@ static bool parse_number(upb_json_parser *p, bool is_quoted) { static bool parser_putbool(upb_json_parser *p, bool val) { bool ok; + if (p->top->f == NULL) { + return true; + } + if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) { upb_status_seterrf(&p->status, "Boolean value specified for non-bool field: %s", @@ -783,7 +795,10 @@ static bool parser_putbool(upb_json_parser *p, bool val) { } static bool start_stringval(upb_json_parser *p) { - UPB_ASSERT(p->top->f); + if (p->top->f == NULL) { + multipart_startaccum(p); + return true; + } if (upb_fielddef_isstring(p->top->f)) { upb_jsonparser_frame *inner; @@ -834,6 +849,11 @@ static bool start_stringval(upb_json_parser *p) { static bool end_stringval(upb_json_parser *p) { bool ok = true; + if (p->top->f == NULL) { + multipart_end(p); + return true; + } + switch (upb_fielddef_type(p->top->f)) { case UPB_TYPE_BYTES: if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING), @@ -1025,6 +1045,10 @@ static bool handle_mapentry(upb_json_parser *p) { static bool end_membername(upb_json_parser *p) { UPB_ASSERT(!p->top->f); + if (!p->top->m) { + return true; + } + if (p->top->is_map) { return handle_mapentry(p); } else { @@ -1036,10 +1060,11 @@ static bool end_membername(upb_json_parser *p) { p->top->f = upb_value_getconstptr(v); multipart_end(p); + return true; + } else if (p->ignore_json_unknown) { + multipart_end(p); return true; } else { - /* TODO(haberman): Ignore unknown fields if requested/configured to do - * so. */ upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf); upb_env_reporterror(p->env, &p->status); return false; @@ -1071,7 +1096,18 @@ static void end_member(upb_json_parser *p) { } static bool start_subobject(upb_json_parser *p) { - UPB_ASSERT(p->top->f); + if (p->top->f == NULL) { + upb_jsonparser_frame *inner; + if (!check_stack(p)) return false; + + inner = p->top + 1; + inner->m = NULL; + inner->f = NULL; + inner->is_map = false; + inner->is_mapentry = false; + p->top = inner; + return true; + } if (upb_fielddef_ismap(p->top->f)) { upb_jsonparser_frame *inner; @@ -1130,9 +1166,12 @@ static void end_subobject(upb_json_parser *p) { upb_sink_endseq(&p->top->sink, sel); } else { upb_selector_t sel; + bool is_unknown = p->top->m == NULL; p->top--; - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); - upb_sink_endsubmsg(&p->top->sink, sel); + if (!is_unknown) { + sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); + upb_sink_endsubmsg(&p->top->sink, sel); + } } } @@ -1214,11 +1253,11 @@ static void end_object(upb_json_parser *p) { * final state once, when the closing '"' is seen. */ -#line 1310 "upb/json/parser.rl" +#line 1349 "upb/json/parser.rl" -#line 1222 "upb/json/parser.c" +#line 1261 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 2, 1, 3, 1, 5, 1, 6, 1, 7, 1, 8, 1, @@ -1367,7 +1406,7 @@ static const int json_en_value_machine = 27; static const int json_en_main = 1; -#line 1313 "upb/json/parser.rl" +#line 1352 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { @@ -1389,7 +1428,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, capture_resume(parser, buf); -#line 1393 "upb/json/parser.c" +#line 1432 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -1464,118 +1503,118 @@ _match: switch ( *_acts++ ) { case 0: -#line 1225 "upb/json/parser.rl" +#line 1264 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 1: -#line 1226 "upb/json/parser.rl" +#line 1265 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 10; goto _again;} } break; case 2: -#line 1230 "upb/json/parser.rl" +#line 1269 "upb/json/parser.rl" { start_text(parser, p); } break; case 3: -#line 1231 "upb/json/parser.rl" +#line 1270 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_text(parser, p)); } break; case 4: -#line 1237 "upb/json/parser.rl" +#line 1276 "upb/json/parser.rl" { start_hex(parser); } break; case 5: -#line 1238 "upb/json/parser.rl" +#line 1277 "upb/json/parser.rl" { hexdigit(parser, p); } break; case 6: -#line 1239 "upb/json/parser.rl" +#line 1278 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_hex(parser)); } break; case 7: -#line 1245 "upb/json/parser.rl" +#line 1284 "upb/json/parser.rl" { CHECK_RETURN_TOP(escape(parser, p)); } break; case 8: -#line 1251 "upb/json/parser.rl" +#line 1290 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 9: -#line 1254 "upb/json/parser.rl" +#line 1293 "upb/json/parser.rl" { {stack[top++] = cs; cs = 19; goto _again;} } break; case 10: -#line 1256 "upb/json/parser.rl" +#line 1295 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 27; goto _again;} } break; case 11: -#line 1261 "upb/json/parser.rl" +#line 1300 "upb/json/parser.rl" { start_member(parser); } break; case 12: -#line 1262 "upb/json/parser.rl" +#line 1301 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_membername(parser)); } break; case 13: -#line 1265 "upb/json/parser.rl" +#line 1304 "upb/json/parser.rl" { end_member(parser); } break; case 14: -#line 1271 "upb/json/parser.rl" +#line 1310 "upb/json/parser.rl" { start_object(parser); } break; case 15: -#line 1274 "upb/json/parser.rl" +#line 1313 "upb/json/parser.rl" { end_object(parser); } break; case 16: -#line 1280 "upb/json/parser.rl" +#line 1319 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_array(parser)); } break; case 17: -#line 1284 "upb/json/parser.rl" +#line 1323 "upb/json/parser.rl" { end_array(parser); } break; case 18: -#line 1289 "upb/json/parser.rl" +#line 1328 "upb/json/parser.rl" { start_number(parser, p); } break; case 19: -#line 1290 "upb/json/parser.rl" +#line 1329 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 20: -#line 1292 "upb/json/parser.rl" +#line 1331 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 21: -#line 1293 "upb/json/parser.rl" +#line 1332 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_stringval(parser)); } break; case 22: -#line 1295 "upb/json/parser.rl" +#line 1334 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, true)); } break; case 23: -#line 1297 "upb/json/parser.rl" +#line 1336 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, false)); } break; case 24: -#line 1299 "upb/json/parser.rl" +#line 1338 "upb/json/parser.rl" { /* null value */ } break; case 25: -#line 1301 "upb/json/parser.rl" +#line 1340 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_subobject(parser)); } break; case 26: -#line 1302 "upb/json/parser.rl" +#line 1341 "upb/json/parser.rl" { end_subobject(parser); } break; case 27: -#line 1307 "upb/json/parser.rl" +#line 1346 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 1579 "upb/json/parser.c" +#line 1618 "upb/json/parser.c" } } @@ -1588,7 +1627,7 @@ _again: _out: {} } -#line 1334 "upb/json/parser.rl" +#line 1373 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(&parser->status, "Parse error at '%.*s'\n", pe - p, p); @@ -1629,13 +1668,13 @@ static void json_parser_reset(upb_json_parser *p) { /* Emit Ragel initialization of the parser. */ -#line 1633 "upb/json/parser.c" +#line 1672 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 1374 "upb/json/parser.rl" +#line 1413 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; accumulate_clear(p); @@ -1722,7 +1761,8 @@ static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) { upb_json_parser *upb_json_parser_create(upb_env *env, const upb_json_parsermethod *method, - upb_sink *output) { + upb_sink *output, + bool ignore_json_unknown) { #ifndef NDEBUG const size_t size_before = upb_env_bytesallocated(env); #endif @@ -1741,6 +1781,8 @@ upb_json_parser *upb_json_parser_create(upb_env *env, p->top->m = upb_handlers_msgdef(output->handlers); set_name_table(p, p->top); + p->ignore_json_unknown = ignore_json_unknown; + /* If this fails, uncomment and increase the value in parser.h. */ /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */ UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(env) - size_before <= diff --git a/upb/json/parser.h b/upb/json/parser.h index bcc2c84..3540c53 100644 --- a/upb/json/parser.h +++ b/upb/json/parser.h @@ -38,7 +38,7 @@ UPB_DECLARE_DERIVED_TYPE(upb::json::ParserMethod, upb::RefCounted, class upb::json::Parser { public: static Parser* Create(Environment* env, const ParserMethod* method, - Sink* output); + Sink* output, bool ignore_json_unknown); BytesSink* input(); @@ -72,7 +72,8 @@ UPB_BEGIN_EXTERN_C upb_json_parser* upb_json_parser_create(upb_env* e, const upb_json_parsermethod* m, - upb_sink* output); + upb_sink* output, + bool ignore_json_unknown); upb_bytessink *upb_json_parser_input(upb_json_parser *p); upb_json_parsermethod* upb_json_parsermethod_new(const upb_msgdef* md, @@ -92,8 +93,8 @@ UPB_END_EXTERN_C namespace upb { namespace json { inline Parser* Parser::Create(Environment* env, const ParserMethod* method, - Sink* output) { - return upb_json_parser_create(env, method, output); + Sink* output, bool ignore_json_unknown) { + return upb_json_parser_create(env, method, output, ignore_json_unknown); } inline BytesSink* Parser::input() { return upb_json_parser_input(this); diff --git a/upb/json/parser.rl b/upb/json/parser.rl index 0312628..c731035 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -94,6 +94,9 @@ struct upb_json_parser { /* Intermediate result of parsing a unicode escape sequence. */ uint32_t digit; + + /* Whether to proceed if unknown field is met. */ + bool ignore_json_unknown; }; struct upb_json_parsermethod { @@ -614,6 +617,11 @@ static bool end_number(upb_json_parser *p, const char *ptr) { return false; } + if (p->top->f == NULL) { + multipart_end(p); + return true; + } + return parse_number(p, false); } @@ -766,6 +774,10 @@ static bool parse_number(upb_json_parser *p, bool is_quoted) { static bool parser_putbool(upb_json_parser *p, bool val) { bool ok; + if (p->top->f == NULL) { + return true; + } + if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) { upb_status_seterrf(&p->status, "Boolean value specified for non-bool field: %s", @@ -781,7 +793,10 @@ static bool parser_putbool(upb_json_parser *p, bool val) { } static bool start_stringval(upb_json_parser *p) { - UPB_ASSERT(p->top->f); + if (p->top->f == NULL) { + multipart_startaccum(p); + return true; + } if (upb_fielddef_isstring(p->top->f)) { upb_jsonparser_frame *inner; @@ -832,6 +847,11 @@ static bool start_stringval(upb_json_parser *p) { static bool end_stringval(upb_json_parser *p) { bool ok = true; + if (p->top->f == NULL) { + multipart_end(p); + return true; + } + switch (upb_fielddef_type(p->top->f)) { case UPB_TYPE_BYTES: if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING), @@ -1023,6 +1043,10 @@ static bool handle_mapentry(upb_json_parser *p) { static bool end_membername(upb_json_parser *p) { UPB_ASSERT(!p->top->f); + if (!p->top->m) { + return true; + } + if (p->top->is_map) { return handle_mapentry(p); } else { @@ -1034,10 +1058,11 @@ static bool end_membername(upb_json_parser *p) { p->top->f = upb_value_getconstptr(v); multipart_end(p); + return true; + } else if (p->ignore_json_unknown) { + multipart_end(p); return true; } else { - /* TODO(haberman): Ignore unknown fields if requested/configured to do - * so. */ upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf); upb_env_reporterror(p->env, &p->status); return false; @@ -1069,7 +1094,18 @@ static void end_member(upb_json_parser *p) { } static bool start_subobject(upb_json_parser *p) { - UPB_ASSERT(p->top->f); + if (p->top->f == NULL) { + upb_jsonparser_frame *inner; + if (!check_stack(p)) return false; + + inner = p->top + 1; + inner->m = NULL; + inner->f = NULL; + inner->is_map = false; + inner->is_mapentry = false; + p->top = inner; + return true; + } if (upb_fielddef_ismap(p->top->f)) { upb_jsonparser_frame *inner; @@ -1128,9 +1164,12 @@ static void end_subobject(upb_json_parser *p) { upb_sink_endseq(&p->top->sink, sel); } else { upb_selector_t sel; + bool is_unknown = p->top->m == NULL; p->top--; - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); - upb_sink_endsubmsg(&p->top->sink, sel); + if (!is_unknown) { + sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); + upb_sink_endsubmsg(&p->top->sink, sel); + } } } @@ -1457,7 +1496,8 @@ static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) { upb_json_parser *upb_json_parser_create(upb_env *env, const upb_json_parsermethod *method, - upb_sink *output) { + upb_sink *output, + bool ignore_json_unknown) { #ifndef NDEBUG const size_t size_before = upb_env_bytesallocated(env); #endif @@ -1476,6 +1516,8 @@ upb_json_parser *upb_json_parser_create(upb_env *env, p->top->m = upb_handlers_msgdef(output->handlers); set_name_table(p, p->top); + p->ignore_json_unknown = ignore_json_unknown; + /* If this fails, uncomment and increase the value in parser.h. */ /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */ UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(env) - size_before <= -- cgit v1.2.3