From 0553eff64a87eceff0de3b6260b4f2d45b61703a Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 9 Jan 2019 22:40:50 -0800 Subject: upb_refcounted is gone! Some tests still to fix. --- upb/json/parser.c | 320 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 169 insertions(+), 151 deletions(-) (limited to 'upb/json/parser.c') diff --git a/upb/json/parser.c b/upb/json/parser.c index 90a401d..4bc9163 100644 --- a/upb/json/parser.c +++ b/upb/json/parser.c @@ -154,14 +154,13 @@ void upb_stringsink_uninit(upb_stringsink *sink) { free(sink->ptr); } typedef struct { /* For encoding Any value field in binary format. */ - const upb_handlers *encoder_handlers; - upb_pb_encoder *encoder; + upb_handlercache *encoder_handlercache; upb_stringsink stringsink; /* For decoding Any value field in json format. */ - upb_json_parsermethod *parser_method; - upb_json_parser* parser; + upb_json_codecache *parser_codecache; upb_sink sink; + upb_json_parser *parser; /* Mark the range of uninterpreted values in json input before type url. */ const char *before_type_url_start; @@ -180,7 +179,7 @@ typedef struct { const upb_fielddef *f; /* The table mapping json name to fielddef for this message. */ - upb_strtable *name_table; + const upb_strtable *name_table; /* We are in a repeated-field context, ready to emit mapentries as * submessages. This flag alters the start-of-object (open-brace) behavior to @@ -259,60 +258,67 @@ struct upb_json_parser { struct tm tm; }; -struct upb_json_parsermethod { - upb_refcounted base; +struct upb_json_codecache { + upb_arena arena; + upb_inttable methods; /* upb_msgdef* -> upb_json_parsermethod* */ +}; +struct upb_json_parsermethod { + const upb_json_codecache *cache; upb_byteshandler input_handler_; - /* Keys are upb_msgdef*, values are upb_strtable (json_name -> fielddef) */ - upb_inttable name_tables; + /* Maps json_name -> fielddef */ + upb_strtable name_table; }; #define PARSER_CHECK_RETURN(x) if (!(x)) return false -static void json_parser_any_frame_reset(upb_jsonparser_any_frame *frame) { - frame->encoder_handlers = NULL; - frame->encoder = NULL; - frame->parser_method = NULL; +static upb_jsonparser_any_frame *json_parser_any_frame_new( + upb_json_parser *p) { + upb_jsonparser_any_frame *frame; + + frame = upb_env_malloc(p->env, sizeof(upb_jsonparser_any_frame)); + + frame->encoder_handlercache = upb_pb_encoder_newcache(); + frame->parser_codecache = upb_json_codecache_new(); frame->parser = NULL; frame->before_type_url_start = NULL; frame->before_type_url_end = NULL; frame->after_type_url_start = NULL; + + upb_stringsink_init(&frame->stringsink); + + return frame; } static void json_parser_any_frame_set_payload_type( upb_json_parser *p, upb_jsonparser_any_frame *frame, const upb_msgdef *payload_type) { + const upb_handlers *h; + const upb_json_parsermethod *parser_method; + upb_pb_encoder *encoder; + /* Initialize encoder. */ - frame->encoder_handlers = - upb_pb_encoder_newhandlers(payload_type, &frame->encoder_handlers); - upb_stringsink_init(&frame->stringsink); - frame->encoder = - upb_pb_encoder_create( - p->env, frame->encoder_handlers, - &frame->stringsink.sink); + h = upb_handlercache_get(frame->encoder_handlercache, payload_type); + encoder = upb_pb_encoder_create(p->env, h, &frame->stringsink.sink); /* Initialize parser. */ - frame->parser_method = - upb_json_parsermethod_new(payload_type, &frame->parser_method); - upb_sink_reset(&frame->sink, frame->encoder_handlers, frame->encoder); - frame->parser = - upb_json_parser_create(p->env, frame->parser_method, p->symtab, - &frame->sink, p->ignore_json_unknown); + parser_method = upb_json_codecache_get(frame->parser_codecache, payload_type); + upb_sink_reset(&frame->sink, h, encoder); + frame->parser = upb_json_parser_create(p->env, parser_method, p->symtab, + &frame->sink, p->ignore_json_unknown); } static void json_parser_any_frame_free(upb_jsonparser_any_frame *frame) { - upb_handlers_unref(frame->encoder_handlers, - &frame->encoder_handlers); - upb_json_parsermethod_unref(frame->parser_method, - &frame->parser_method); + upb_handlercache_free(frame->encoder_handlercache); + upb_json_codecache_free(frame->parser_codecache); upb_stringsink_uninit(&frame->stringsink); } static bool json_parser_any_frame_has_type_url( upb_jsonparser_any_frame *frame) { - return frame->encoder != NULL; + return frame->parser != NULL; } static bool json_parser_any_frame_has_value_before_type_url( @@ -334,7 +340,7 @@ static bool json_parser_any_frame_has_value( static void json_parser_any_frame_set_before_type_url_end( upb_jsonparser_any_frame *frame, const char *ptr) { - if (frame->encoder == NULL) { + if (frame->parser == NULL) { frame->before_type_url_end = ptr; } } @@ -376,9 +382,12 @@ static bool check_stack(upb_json_parser *p) { static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) { upb_value v; - bool ok = upb_inttable_lookupptr(&p->method->name_tables, frame->m, &v); + const upb_json_codecache *cache = p->method->cache; + bool ok = upb_inttable_lookupptr(&cache->methods, frame->m, &v); + const upb_json_parsermethod *method = upb_value_getptr(v); UPB_ASSERT(ok); - frame->name_table = upb_value_getptr(v); + + frame->name_table = &method->name_table; } /* There are GCC/Clang built-ins for overflow checking which we could start @@ -1929,9 +1938,7 @@ static bool start_subobject(upb_json_parser *p) { if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) { p->top->is_any = true; - p->top->any_frame = - upb_env_malloc(p->env, sizeof(upb_jsonparser_any_frame)); - json_parser_any_frame_reset(p->top->any_frame); + p->top->any_frame = json_parser_any_frame_new(p); } else { p->top->is_any = false; p->top->any_frame = NULL; @@ -2409,11 +2416,11 @@ static bool is_string_wrapper_object(upb_json_parser *p) { * final state once, when the closing '"' is seen. */ -#line 2571 "upb/json/parser.rl" +#line 2578 "upb/json/parser.rl" -#line 2417 "upb/json/parser.c" +#line 2424 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 1, 1, 3, 1, 4, 1, 6, 1, 7, 1, 8, 1, @@ -2660,7 +2667,7 @@ static const int json_en_value_machine = 75; static const int json_en_main = 1; -#line 2574 "upb/json/parser.rl" +#line 2581 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { @@ -2683,7 +2690,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, capture_resume(parser, buf); -#line 2687 "upb/json/parser.c" +#line 2694 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -2758,83 +2765,83 @@ _match: switch ( *_acts++ ) { case 1: -#line 2422 "upb/json/parser.rl" +#line 2429 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 2: -#line 2424 "upb/json/parser.rl" +#line 2431 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 23;goto _again;} } break; case 3: -#line 2428 "upb/json/parser.rl" +#line 2435 "upb/json/parser.rl" { start_text(parser, p); } break; case 4: -#line 2429 "upb/json/parser.rl" +#line 2436 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_text(parser, p)); } break; case 5: -#line 2435 "upb/json/parser.rl" +#line 2442 "upb/json/parser.rl" { start_hex(parser); } break; case 6: -#line 2436 "upb/json/parser.rl" +#line 2443 "upb/json/parser.rl" { hexdigit(parser, p); } break; case 7: -#line 2437 "upb/json/parser.rl" +#line 2444 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_hex(parser)); } break; case 8: -#line 2443 "upb/json/parser.rl" +#line 2450 "upb/json/parser.rl" { CHECK_RETURN_TOP(escape(parser, p)); } break; case 9: -#line 2449 "upb/json/parser.rl" +#line 2456 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 10: -#line 2461 "upb/json/parser.rl" +#line 2468 "upb/json/parser.rl" { start_duration_base(parser, p); } break; case 11: -#line 2462 "upb/json/parser.rl" +#line 2469 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_duration_base(parser, p)); } break; case 12: -#line 2464 "upb/json/parser.rl" +#line 2471 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 13: -#line 2469 "upb/json/parser.rl" +#line 2476 "upb/json/parser.rl" { start_timestamp_base(parser, p); } break; case 14: -#line 2470 "upb/json/parser.rl" +#line 2477 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_timestamp_base(parser, p)); } break; case 15: -#line 2472 "upb/json/parser.rl" +#line 2479 "upb/json/parser.rl" { start_timestamp_fraction(parser, p); } break; case 16: -#line 2473 "upb/json/parser.rl" +#line 2480 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); } break; case 17: -#line 2475 "upb/json/parser.rl" +#line 2482 "upb/json/parser.rl" { start_timestamp_zone(parser, p); } break; case 18: -#line 2476 "upb/json/parser.rl" +#line 2483 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); } break; case 19: -#line 2478 "upb/json/parser.rl" +#line 2485 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 20: -#line 2483 "upb/json/parser.rl" +#line 2490 "upb/json/parser.rl" { if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) { {stack[top++] = cs; cs = 47;goto _again;} @@ -2846,11 +2853,11 @@ _match: } break; case 21: -#line 2494 "upb/json/parser.rl" +#line 2501 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 75;goto _again;} } break; case 22: -#line 2499 "upb/json/parser.rl" +#line 2506 "upb/json/parser.rl" { if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { start_any_member(parser, p); @@ -2860,11 +2867,11 @@ _match: } break; case 23: -#line 2506 "upb/json/parser.rl" +#line 2513 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_membername(parser)); } break; case 24: -#line 2509 "upb/json/parser.rl" +#line 2516 "upb/json/parser.rl" { if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { end_any_member(parser, p); @@ -2874,7 +2881,7 @@ _match: } break; case 25: -#line 2520 "upb/json/parser.rl" +#line 2527 "upb/json/parser.rl" { if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { start_any_object(parser, p); @@ -2884,7 +2891,7 @@ _match: } break; case 26: -#line 2529 "upb/json/parser.rl" +#line 2536 "upb/json/parser.rl" { if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { CHECK_RETURN_TOP(end_any_object(parser, p)); @@ -2894,54 +2901,54 @@ _match: } break; case 27: -#line 2541 "upb/json/parser.rl" +#line 2548 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_array(parser)); } break; case 28: -#line 2545 "upb/json/parser.rl" +#line 2552 "upb/json/parser.rl" { end_array(parser); } break; case 29: -#line 2550 "upb/json/parser.rl" +#line 2557 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_number(parser, p)); } break; case 30: -#line 2551 "upb/json/parser.rl" +#line 2558 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 31: -#line 2553 "upb/json/parser.rl" +#line 2560 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 32: -#line 2554 "upb/json/parser.rl" +#line 2561 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_stringval(parser)); } break; case 33: -#line 2556 "upb/json/parser.rl" +#line 2563 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_bool(parser, true)); } break; case 34: -#line 2558 "upb/json/parser.rl" +#line 2565 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_bool(parser, false)); } break; case 35: -#line 2560 "upb/json/parser.rl" +#line 2567 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_null(parser)); } break; case 36: -#line 2562 "upb/json/parser.rl" +#line 2569 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_subobject_full(parser)); } break; case 37: -#line 2563 "upb/json/parser.rl" +#line 2570 "upb/json/parser.rl" { end_subobject_full(parser); } break; case 38: -#line 2568 "upb/json/parser.rl" +#line 2575 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 2945 "upb/json/parser.c" +#line 2952 "upb/json/parser.c" } } @@ -2958,32 +2965,32 @@ _again: while ( __nacts-- > 0 ) { switch ( *__acts++ ) { case 0: -#line 2420 "upb/json/parser.rl" +#line 2427 "upb/json/parser.rl" { p--; {cs = stack[--top]; if ( p == pe ) goto _test_eof; goto _again;} } break; case 30: -#line 2551 "upb/json/parser.rl" +#line 2558 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 33: -#line 2556 "upb/json/parser.rl" +#line 2563 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_bool(parser, true)); } break; case 34: -#line 2558 "upb/json/parser.rl" +#line 2565 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_bool(parser, false)); } break; case 35: -#line 2560 "upb/json/parser.rl" +#line 2567 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_null(parser)); } break; case 37: -#line 2563 "upb/json/parser.rl" +#line 2570 "upb/json/parser.rl" { end_subobject_full(parser); } break; -#line 2987 "upb/json/parser.c" +#line 2994 "upb/json/parser.c" } } } @@ -2991,7 +2998,7 @@ goto _again;} } _out: {} } -#line 2596 "upb/json/parser.rl" +#line 2603 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(&parser->status, "Parse error at '%.*s'\n", pe - p, p); @@ -3039,13 +3046,13 @@ static void json_parser_reset(upb_json_parser *p) { /* Emit Ragel initialization of the parser. */ -#line 3043 "upb/json/parser.c" +#line 3050 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 2643 "upb/json/parser.rl" +#line 2650 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; accumulate_clear(p); @@ -3055,70 +3062,46 @@ static void json_parser_reset(upb_json_parser *p) { upb_status_clear(&p->status); } -static void free_json_parsermethod(upb_refcounted *r) { - upb_json_parsermethod *method = (upb_json_parsermethod*)r; - - upb_inttable_iter i; - upb_inttable_begin(&i, &method->name_tables); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - upb_value val = upb_inttable_iter_value(&i); - upb_strtable *t = upb_value_getptr(val); - upb_strtable_uninit(t); - upb_gfree(t); - } - - upb_inttable_uninit(&method->name_tables); +static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c, + const upb_msgdef *md) { + upb_msg_field_iter i; + upb_alloc *alloc = upb_arena_alloc(&c->arena); - upb_gfree(r); -} + upb_json_parsermethod *m = upb_gmalloc(sizeof(*m)); -static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) { - upb_msg_field_iter i; - upb_strtable *t; + m->cache = c; - /* It would be nice to stack-allocate this, but protobufs do not limit the - * length of fields to any reasonable limit. */ - char *buf = NULL; - size_t len = 0; + upb_byteshandler_init(&m->input_handler_); + upb_byteshandler_setstring(&m->input_handler_, parse, m); + upb_byteshandler_setendstr(&m->input_handler_, end, m); - if (upb_inttable_lookupptr(&m->name_tables, md, NULL)) { - return; - } + upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc); - /* TODO(haberman): handle malloc failure. */ - t = upb_gmalloc(sizeof(*t)); - upb_strtable_init(t, UPB_CTYPE_CONSTPTR); - upb_inttable_insertptr(&m->name_tables, md, upb_value_ptr(t)); + /* Build name_table */ for(upb_msg_field_begin(&i, md); !upb_msg_field_done(&i); upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); + upb_value v = upb_value_constptr(f); + char *buf; /* Add an entry for the JSON name. */ - size_t field_len = upb_fielddef_getjsonname(f, buf, len); - if (field_len > len) { - size_t len2; - buf = upb_grealloc(buf, 0, field_len); - len = field_len; - len2 = upb_fielddef_getjsonname(f, buf, len); - UPB_ASSERT(len == len2); - } - upb_strtable_insert(t, buf, upb_value_constptr(f)); + size_t len = upb_fielddef_getjsonname(f, NULL, 0); + buf = upb_malloc(alloc, len); + upb_fielddef_getjsonname(f, buf, len); + upb_strtable_insert3(&m->name_table, buf, len, v, alloc); if (strcmp(buf, upb_fielddef_name(f)) != 0) { /* Since the JSON name is different from the regular field name, add an * entry for the raw name (compliant proto3 JSON parsers must accept * both). */ - upb_strtable_insert(t, upb_fielddef_name(f), upb_value_constptr(f)); - } - - if (upb_fielddef_issubmsg(f)) { - add_jsonname_table(m, upb_fielddef_msgsubdef(f)); + const char *name = upb_fielddef_name(f); + upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc); } } - upb_gfree(buf); + return m; } /* Public API *****************************************************************/ @@ -3146,9 +3129,7 @@ upb_json_parser *upb_json_parser_create(upb_env *env, p->top->m = upb_handlers_msgdef(output->handlers); if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) { p->top->is_any = true; - p->top->any_frame = - upb_env_malloc(p->env, sizeof(upb_jsonparser_any_frame)); - json_parser_any_frame_reset(p->top->any_frame); + p->top->any_frame = json_parser_any_frame_new(p); } else { p->top->is_any = false; p->top->any_frame = NULL; @@ -3169,24 +3150,61 @@ upb_bytessink *upb_json_parser_input(upb_json_parser *p) { return &p->input_; } -upb_json_parsermethod *upb_json_parsermethod_new(const upb_msgdef* md, - const void* owner) { - static const struct upb_refcounted_vtbl vtbl = {NULL, free_json_parsermethod}; - upb_json_parsermethod *ret = upb_gmalloc(sizeof(*ret)); - upb_refcounted_init(upb_json_parsermethod_upcast_mutable(ret), &vtbl, owner); +const upb_byteshandler *upb_json_parsermethod_inputhandler( + const upb_json_parsermethod *m) { + return &m->input_handler_; +} + +upb_json_codecache *upb_json_codecache_new() { + upb_alloc *alloc; + upb_json_codecache *c; - upb_byteshandler_init(&ret->input_handler_); - upb_byteshandler_setstring(&ret->input_handler_, parse, ret); - upb_byteshandler_setendstr(&ret->input_handler_, end, ret); + c = upb_gmalloc(sizeof(*c)); - upb_inttable_init(&ret->name_tables, UPB_CTYPE_PTR); + upb_arena_init(&c->arena); + alloc = upb_arena_alloc(&c->arena); - add_jsonname_table(ret, md); + upb_inttable_init2(&c->methods, UPB_CTYPE_CONSTPTR, alloc); - return ret; + return c; } -const upb_byteshandler *upb_json_parsermethod_inputhandler( - const upb_json_parsermethod *m) { - return &m->input_handler_; +void upb_json_codecache_free(upb_json_codecache *c) { + upb_arena_uninit(&c->arena); + upb_gfree(c); +} + +upb_json_parsermethod *upb_json_codecache_get(upb_json_codecache *c, + const upb_msgdef *md) { + upb_json_parsermethod *m; + upb_value v; + upb_msg_field_iter i; + + if (upb_inttable_lookupptr(&c->methods, md, &v)) { + return upb_value_getptr(v); + } + + m = parsermethod_new(c, md); + v = upb_value_ptr(m); + + if (!m) return NULL; + if (!upb_inttable_insertptr(&c->methods, m, v)) return NULL; + + /* Populate parser methods for all submessages, so the name tables will + * be available during parsing. */ + for(upb_msg_field_begin(&i, md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + + if (upb_fielddef_issubmsg(f)) { + const upb_msgdef *subdef = upb_fielddef_msgsubdef(f); + const upb_json_parsermethod *sub_method = + upb_json_codecache_get(c, subdef); + + if (!sub_method) return NULL; + } + } + + return m; } -- cgit v1.2.3