From 3bd691a4975b2267ff04611507e766a7f9f87e83 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 8 May 2015 16:56:29 -0700 Subject: Google-internal development. --- upb/json/parser.c | 466 +++++++++++++++++++++++++++++++++++++++++------------ upb/json/parser.h | 82 ++-------- upb/json/parser.rl | 394 ++++++++++++++++++++++++++++++++++++-------- upb/json/printer.c | 340 ++++++++++++++++++++++++++++++++------ upb/json/printer.h | 70 +++----- 5 files changed, 1018 insertions(+), 334 deletions(-) (limited to 'upb/json') diff --git a/upb/json/parser.c b/upb/json/parser.c index cfe1def..4f4a96e 100644 --- a/upb/json/parser.c +++ b/upb/json/parser.c @@ -33,6 +33,71 @@ #include "upb/json/parser.h" +#define UPB_JSON_MAX_DEPTH 64 + +typedef struct { + upb_sink sink; + + // The current message in which we're parsing, and the field whose value we're + // expecting next. + const upb_msgdef *m; + const upb_fielddef *f; + + // We are in a repeated-field context, ready to emit mapentries as + // submessages. This flag alters the start-of-object (open-brace) behavior to + // begin a sequence of mapentry messages rather than a single submessage. + bool is_map; + + // We are in a map-entry message context. This flag is set when parsing the + // value field of a single map entry and indicates to all value-field parsers + // (subobjects, strings, numbers, and bools) that the map-entry submessage + // should end as soon as the value is parsed. + bool is_mapentry; + + // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent + // message's map field that we're currently parsing. This differs from |f| + // because |f| is the field in the *current* message (i.e., the map-entry + // message itself), not the parent's field that leads to this map. + const upb_fielddef *mapfield; +} upb_jsonparser_frame; + +struct upb_json_parser { + upb_env *env; + upb_byteshandler input_handler_; + upb_bytessink input_; + + // Stack to track the JSON scopes we are in. + upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH]; + upb_jsonparser_frame *top; + upb_jsonparser_frame *limit; + + upb_status *status; + + // Ragel's internal parsing stack for the parsing state machine. + int current_state; + int parser_stack[UPB_JSON_MAX_DEPTH]; + int parser_top; + + // The handle for the current buffer. + const upb_bufhandle *handle; + + // Accumulate buffer. See details in parser.rl. + const char *accumulated; + size_t accumulated_len; + char *accumulate_buf; + size_t accumulate_buf_size; + + // Multi-part text data. See details in parser.rl. + int multipart_state; + upb_selector_t string_selector; + + // Input capture. See details in parser.rl. + const char *capture; + + // Intermediate result of parsing a unicode escape sequence. + uint32_t digit; +}; + #define PARSER_CHECK_RETURN(x) if (!(x)) return false // Used to signal that a capture has been suspended. @@ -235,12 +300,13 @@ static void accumulate_clear(upb_json_parser *p) { // Used internally by accumulate_append(). static bool accumulate_realloc(upb_json_parser *p, size_t need) { - size_t new_size = UPB_MAX(p->accumulate_buf_size, 128); + size_t old_size = p->accumulate_buf_size; + size_t new_size = UPB_MAX(old_size, 128); while (new_size < need) { new_size = saturating_multiply(new_size, 2); } - void *mem = realloc(p->accumulate_buf, new_size); + void *mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size); if (!mem) { upb_status_seterrmsg(p->status, "Out of memory allocating buffer."); return false; @@ -262,16 +328,14 @@ static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len, return true; } - if (p->accumulate_buf_size - p->accumulated_len < len) { - size_t need; - if (!checked_add(p->accumulated_len, len, &need)) { - upb_status_seterrmsg(p->status, "Integer overflow."); - return false; - } + size_t need; + if (!checked_add(p->accumulated_len, len, &need)) { + upb_status_seterrmsg(p->status, "Integer overflow."); + return false; + } - if (!accumulate_realloc(p, need)) { - return false; - } + if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) { + return false; } if (p->accumulated != p->accumulate_buf) { @@ -510,16 +574,28 @@ static void start_number(upb_json_parser *p, const char *ptr) { capture_begin(p, ptr); } +static bool parse_number(upb_json_parser *p); + static bool end_number(upb_json_parser *p, const char *ptr) { if (!capture_end(p, ptr)) { return false; } + return parse_number(p); +} + +static bool parse_number(upb_json_parser *p) { + // strtol() and friends unfortunately do not support specifying the length of + // the input string, so we need to force a copy into a NULL-terminated buffer. + if (!multipart_text(p, "\0", 1, false)) { + return false; + } + size_t len; const char *buf = accumulate_getptr(p, &len); - const char *myend = buf + len; - char *end; + const char *myend = buf + len - 1; // One for NULL. + char *end; switch (upb_fielddef_type(p->top->f)) { case UPB_TYPE_ENUM: case UPB_TYPE_INT32: { @@ -575,10 +651,11 @@ static bool end_number(upb_json_parser *p, const char *ptr) { } multipart_end(p); + return true; err: - upb_status_seterrf(p->status, "error parsing number: %.*s", buf, len); + upb_status_seterrf(p->status, "error parsing number: %s", buf); multipart_end(p); return false; } @@ -593,6 +670,7 @@ static bool parser_putbool(upb_json_parser *p, bool val) { bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val); UPB_ASSERT_VAR(ok, ok); + return true; } @@ -609,6 +687,8 @@ static bool start_stringval(upb_json_parser *p) { upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); inner->m = p->top->m; inner->f = p->top->f; + inner->is_map = false; + inner->is_mapentry = false; p->top = inner; if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) { @@ -686,6 +766,7 @@ static bool end_stringval(upb_json_parser *p) { } multipart_end(p); + return ok; } @@ -694,54 +775,217 @@ static void start_member(upb_json_parser *p) { multipart_startaccum(p); } -static bool end_member(upb_json_parser *p) { - assert(!p->top->f); +// Helper: invoked during parse_mapentry() to emit the mapentry message's key +// field based on the current contents of the accumulate buffer. +static bool parse_mapentry_key(upb_json_parser *p) { + size_t len; const char *buf = accumulate_getptr(p, &len); - const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); + // Emit the key field. We do a bit of ad-hoc parsing here because the + // parser state machine has already decided that this is a string field + // name, and we are reinterpreting it as some arbitrary key type. In + // particular, integer and bool keys are quoted, so we need to parse the + // quoted string contents here. - if (!f) { - // TODO(haberman): Ignore unknown fields if requested/configured to do so. - upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); + p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY); + if (p->top->f == NULL) { + upb_status_seterrmsg(p->status, "mapentry message has no key"); return false; } + switch (upb_fielddef_type(p->top->f)) { + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: + // Invoke end_number. The accum buffer has the number's text already. + if (!parse_number(p)) { + return false; + } + break; + case UPB_TYPE_BOOL: + if (len == 4 && !strncmp(buf, "true", 4)) { + if (!parser_putbool(p, true)) { + return false; + } + } else if (len == 5 && !strncmp(buf, "false", 5)) { + if (!parser_putbool(p, false)) { + return false; + } + } else { + upb_status_seterrmsg(p->status, + "Map bool key not 'true' or 'false'"); + return false; + } + multipart_end(p); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + upb_sink subsink; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); + upb_sink_startstr(&p->top->sink, sel, len, &subsink); + sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); + upb_sink_putstring(&subsink, sel, buf, len, NULL); + sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); + upb_sink_endstr(&subsink, sel); + multipart_end(p); + break; + } + default: + upb_status_seterrmsg(p->status, "Invalid field type for map key"); + return false; + } - p->top->f = f; - multipart_end(p); + return true; +} + +// Helper: emit one map entry (as a submessage in the map field sequence). This +// is invoked from end_membername(), at the end of the map entry's key string, +// with the map key in the accumulate buffer. It parses the key from that +// buffer, emits the handler calls to start the mapentry submessage (setting up +// its subframe in the process), and sets up state in the subframe so that the +// value parser (invoked next) will emit the mapentry's value field and then +// end the mapentry message. + +static bool handle_mapentry(upb_json_parser *p) { + // Map entry: p->top->sink is the seq frame, so we need to start a frame + // for the mapentry itself, and then set |f| in that frame so that the map + // value field is parsed, and also set a flag to end the frame after the + // map-entry value is parsed. + if (!check_stack(p)) return false; + + const upb_fielddef *mapfield = p->top->mapfield; + const upb_msgdef *mapentrymsg = upb_fielddef_msgsubdef(mapfield); + + upb_jsonparser_frame *inner = p->top + 1; + p->top->f = mapfield; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); + upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); + inner->m = mapentrymsg; + inner->mapfield = mapfield; + inner->is_map = false; + + // Don't set this to true *yet* -- we reuse parsing handlers below to push + // the key field value to the sink, and these handlers will pop the frame + // if they see is_mapentry (when invoked by the parser state machine, they + // would have just seen the map-entry value, not key). + inner->is_mapentry = false; + p->top = inner; + + // send STARTMSG in submsg frame. + upb_sink_startmsg(&p->top->sink); + + parse_mapentry_key(p); + + // Set up the value field to receive the map-entry value. + p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE); + p->top->is_mapentry = true; // set up to pop frame after value is parsed. + p->top->mapfield = mapfield; + if (p->top->f == NULL) { + upb_status_seterrmsg(p->status, "mapentry message has no value"); + return false; + } return true; } -static void clear_member(upb_json_parser *p) { p->top->f = NULL; } +static bool end_membername(upb_json_parser *p) { + assert(!p->top->f); + + if (p->top->is_map) { + return handle_mapentry(p); + } else { + size_t len; + const char *buf = accumulate_getptr(p, &len); + const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); + + if (!f) { + // TODO(haberman): Ignore unknown fields if requested/configured to do so. + upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); + return false; + } + + p->top->f = f; + multipart_end(p); + + return true; + } +} + +static void end_member(upb_json_parser *p) { + // If we just parsed a map-entry value, end that frame too. + if (p->top->is_mapentry) { + assert(p->top > p->stack); + // send ENDMSG on submsg. + upb_status s = UPB_STATUS_INIT; + upb_sink_endmsg(&p->top->sink, &s); + const upb_fielddef* mapfield = p->top->mapfield; + + // send ENDSUBMSG in repeated-field-of-mapentries frame. + p->top--; + upb_selector_t sel; + bool ok = upb_handlers_getselector(mapfield, + UPB_HANDLER_ENDSUBMSG, &sel); + UPB_ASSERT_VAR(ok, ok); + upb_sink_endsubmsg(&p->top->sink, sel); + } + + p->top->f = NULL; +} static bool start_subobject(upb_json_parser *p) { assert(p->top->f); - if (!upb_fielddef_issubmsg(p->top->f)) { + if (upb_fielddef_ismap(p->top->f)) { + // Beginning of a map. Start a new parser frame in a repeated-field + // context. + if (!check_stack(p)) return false; + + upb_jsonparser_frame *inner = p->top + 1; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); + upb_sink_startseq(&p->top->sink, sel, &inner->sink); + inner->m = upb_fielddef_msgsubdef(p->top->f); + inner->mapfield = p->top->f; + inner->f = NULL; + inner->is_map = true; + inner->is_mapentry = false; + p->top = inner; + + return true; + } else if (upb_fielddef_issubmsg(p->top->f)) { + // Beginning of a subobject. Start a new parser frame in the submsg + // context. + if (!check_stack(p)) return false; + + upb_jsonparser_frame *inner = p->top + 1; + + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); + upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); + inner->m = upb_fielddef_msgsubdef(p->top->f); + inner->f = NULL; + inner->is_map = false; + inner->is_mapentry = false; + p->top = inner; + + return true; + } else { upb_status_seterrf(p->status, "Object specified for non-message/group field: %s", upb_fielddef_name(p->top->f)); return false; } - - if (!check_stack(p)) return false; - - upb_jsonparser_frame *inner = p->top + 1; - - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); - upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); - inner->m = upb_fielddef_msgsubdef(p->top->f); - inner->f = NULL; - p->top = inner; - - return true; } static void end_subobject(upb_json_parser *p) { - p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); - upb_sink_endsubmsg(&p->top->sink, sel); + if (p->top->is_map) { + p->top--; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); + upb_sink_endseq(&p->top->sink, sel); + } else { + p->top--; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); + upb_sink_endsubmsg(&p->top->sink, sel); + } } static bool start_array(upb_json_parser *p) { @@ -761,6 +1005,8 @@ static bool start_array(upb_json_parser *p) { upb_sink_startseq(&p->top->sink, sel, &inner->sink); inner->m = p->top->m; inner->f = p->top->f; + inner->is_map = false; + inner->is_mapentry = false; p->top = inner; return true; @@ -775,12 +1021,16 @@ static void end_array(upb_json_parser *p) { } static void start_object(upb_json_parser *p) { - upb_sink_startmsg(&p->top->sink); + if (!p->top->is_map) { + upb_sink_startmsg(&p->top->sink); + } } static void end_object(upb_json_parser *p) { - upb_status status; - upb_sink_endmsg(&p->top->sink, &status); + if (!p->top->is_map) { + upb_status status; + upb_sink_endmsg(&p->top->sink, &status); + } } @@ -805,11 +1055,11 @@ static void end_object(upb_json_parser *p) { // final state once, when the closing '"' is seen. -#line 901 "upb/json/parser.rl" +#line 1151 "upb/json/parser.rl" -#line 813 "upb/json/parser.c" +#line 1063 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 2, 1, 3, 1, 5, 1, 6, 1, 7, 1, 8, 1, @@ -960,7 +1210,7 @@ static const int json_en_value_machine = 27; static const int json_en_main = 1; -#line 904 "upb/json/parser.rl" +#line 1154 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { @@ -980,7 +1230,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, capture_resume(parser, buf); -#line 984 "upb/json/parser.c" +#line 1234 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -1055,118 +1305,118 @@ _match: switch ( *_acts++ ) { case 0: -#line 816 "upb/json/parser.rl" +#line 1066 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 1: -#line 817 "upb/json/parser.rl" +#line 1067 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 10; goto _again;} } break; case 2: -#line 821 "upb/json/parser.rl" +#line 1071 "upb/json/parser.rl" { start_text(parser, p); } break; case 3: -#line 822 "upb/json/parser.rl" +#line 1072 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_text(parser, p)); } break; case 4: -#line 828 "upb/json/parser.rl" +#line 1078 "upb/json/parser.rl" { start_hex(parser); } break; case 5: -#line 829 "upb/json/parser.rl" +#line 1079 "upb/json/parser.rl" { hexdigit(parser, p); } break; case 6: -#line 830 "upb/json/parser.rl" +#line 1080 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_hex(parser)); } break; case 7: -#line 836 "upb/json/parser.rl" +#line 1086 "upb/json/parser.rl" { CHECK_RETURN_TOP(escape(parser, p)); } break; case 8: -#line 842 "upb/json/parser.rl" +#line 1092 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 9: -#line 845 "upb/json/parser.rl" +#line 1095 "upb/json/parser.rl" { {stack[top++] = cs; cs = 19; goto _again;} } break; case 10: -#line 847 "upb/json/parser.rl" +#line 1097 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 27; goto _again;} } break; case 11: -#line 852 "upb/json/parser.rl" +#line 1102 "upb/json/parser.rl" { start_member(parser); } break; case 12: -#line 853 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_member(parser)); } +#line 1103 "upb/json/parser.rl" + { CHECK_RETURN_TOP(end_membername(parser)); } break; case 13: -#line 856 "upb/json/parser.rl" - { clear_member(parser); } +#line 1106 "upb/json/parser.rl" + { end_member(parser); } break; case 14: -#line 862 "upb/json/parser.rl" +#line 1112 "upb/json/parser.rl" { start_object(parser); } break; case 15: -#line 865 "upb/json/parser.rl" +#line 1115 "upb/json/parser.rl" { end_object(parser); } break; case 16: -#line 871 "upb/json/parser.rl" +#line 1121 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_array(parser)); } break; case 17: -#line 875 "upb/json/parser.rl" +#line 1125 "upb/json/parser.rl" { end_array(parser); } break; case 18: -#line 880 "upb/json/parser.rl" +#line 1130 "upb/json/parser.rl" { start_number(parser, p); } break; case 19: -#line 881 "upb/json/parser.rl" +#line 1131 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 20: -#line 883 "upb/json/parser.rl" +#line 1133 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 21: -#line 884 "upb/json/parser.rl" +#line 1134 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_stringval(parser)); } break; case 22: -#line 886 "upb/json/parser.rl" +#line 1136 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, true)); } break; case 23: -#line 888 "upb/json/parser.rl" +#line 1138 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, false)); } break; case 24: -#line 890 "upb/json/parser.rl" +#line 1140 "upb/json/parser.rl" { /* null value */ } break; case 25: -#line 892 "upb/json/parser.rl" +#line 1142 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_subobject(parser)); } break; case 26: -#line 893 "upb/json/parser.rl" +#line 1143 "upb/json/parser.rl" { end_subobject(parser); } break; case 27: -#line 898 "upb/json/parser.rl" +#line 1148 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 1170 "upb/json/parser.c" +#line 1420 "upb/json/parser.c" } } @@ -1179,7 +1429,7 @@ _again: _out: {} } -#line 923 "upb/json/parser.rl" +#line 1173 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(parser->status, "Parse error at %s\n", p); @@ -1201,52 +1451,58 @@ bool end(void *closure, const void *hd) { return true; } - -/* Public API *****************************************************************/ - -void upb_json_parser_init(upb_json_parser *p, upb_status *status) { - p->limit = p->stack + UPB_JSON_MAX_DEPTH; - p->accumulate_buf = NULL; - p->accumulate_buf_size = 0; - upb_byteshandler_init(&p->input_handler_); - upb_byteshandler_setstring(&p->input_handler_, parse, NULL); - upb_byteshandler_setendstr(&p->input_handler_, end, NULL); - upb_bytessink_reset(&p->input_, &p->input_handler_, p); - p->status = status; -} - -void upb_json_parser_uninit(upb_json_parser *p) { - upb_byteshandler_uninit(&p->input_handler_); - free(p->accumulate_buf); -} - -void upb_json_parser_reset(upb_json_parser *p) { +static void json_parser_reset(upb_json_parser *p) { p->top = p->stack; p->top->f = NULL; + p->top->is_map = false; + p->top->is_mapentry = false; int cs; int top; // Emit Ragel initialization of the parser. -#line 1232 "upb/json/parser.c" +#line 1465 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 971 "upb/json/parser.rl" +#line 1204 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; accumulate_clear(p); p->multipart_state = MULTIPART_INACTIVE; p->capture = NULL; + p->accumulated = NULL; } -void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) { - upb_json_parser_reset(p); - upb_sink_reset(&p->top->sink, sink->handlers, sink->closure); - p->top->m = upb_handlers_msgdef(sink->handlers); - p->accumulated = NULL; + +/* Public API *****************************************************************/ + +upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) { +#ifndef NDEBUG + const size_t size_before = upb_env_bytesallocated(env); +#endif + upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser)); + if (!p) return false; + + p->env = env; + p->limit = p->stack + UPB_JSON_MAX_DEPTH; + p->accumulate_buf = NULL; + p->accumulate_buf_size = 0; + upb_byteshandler_init(&p->input_handler_); + upb_byteshandler_setstring(&p->input_handler_, parse, NULL); + upb_byteshandler_setendstr(&p->input_handler_, end, NULL); + upb_bytessink_reset(&p->input_, &p->input_handler_, p); + + json_parser_reset(p); + upb_sink_reset(&p->top->sink, output->handlers, output->closure); + p->top->m = upb_handlers_msgdef(output->handlers); + + // If this fails, uncomment and increase the value in parser.h. + // fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); + assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE); + return p; } upb_bytessink *upb_json_parser_input(upb_json_parser *p) { diff --git a/upb/json/parser.h b/upb/json/parser.h index 51578f2..b932adf 100644 --- a/upb/json/parser.h +++ b/upb/json/parser.h @@ -11,6 +11,7 @@ #ifndef UPB_JSON_PARSER_H_ #define UPB_JSON_PARSER_H_ +#include "upb/env.h" #include "upb/sink.h" #ifdef __cplusplus @@ -23,78 +24,32 @@ class Parser; UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser); -// Internal-only struct used by the parser. -typedef struct { - UPB_PRIVATE_FOR_CPP - upb_sink sink; - const upb_msgdef *m; - const upb_fielddef *f; -} upb_jsonparser_frame; - - /* upb::json::Parser **********************************************************/ -#define UPB_JSON_MAX_DEPTH 64 +// Preallocation hint: parser won't allocate more bytes than this when first +// constructed. This hint may be an overestimate for some build configurations. +// But if the parser library is upgraded without recompiling the application, +// it may be an underestimate. +#define UPB_JSON_PARSER_SIZE 3568 + +#ifdef __cplusplus // Parses an incoming BytesStream, pushing the results to the destination sink. -UPB_DEFINE_CLASS0(upb::json::Parser, +class upb::json::Parser { public: - Parser(Status* status); - ~Parser(); + static Parser* Create(Environment* env, Sink* output); - // Resets the state of the printer, so that it will expect to begin a new - // document. - void Reset(); - - // Resets the output pointer which will serve as our closure. Implies - // Reset(). - void ResetOutput(Sink* output); - - // The input to the printer. BytesSink* input(); -, -UPB_DEFINE_STRUCT0(upb_json_parser, - upb_byteshandler input_handler_; - upb_bytessink input_; - - // Stack to track the JSON scopes we are in. - upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH]; - upb_jsonparser_frame *top; - upb_jsonparser_frame *limit; - upb_status *status; + private: + UPB_DISALLOW_POD_OPS(Parser, upb::json::Parser); +}; - // Ragel's internal parsing stack for the parsing state machine. - int current_state; - int parser_stack[UPB_JSON_MAX_DEPTH]; - int parser_top; - - // The handle for the current buffer. - const upb_bufhandle *handle; - - // Accumulate buffer. See details in parser.rl. - const char *accumulated; - size_t accumulated_len; - char *accumulate_buf; - size_t accumulate_buf_size; - - // Multi-part text data. See details in parser.rl. - int multipart_state; - upb_selector_t string_selector; - - // Input capture. See details in parser.rl. - const char *capture; - - // Intermediate result of parsing a unicode escape sequence. - uint32_t digit; -)); +#endif UPB_BEGIN_EXTERN_C -void upb_json_parser_init(upb_json_parser *p, upb_status *status); -void upb_json_parser_uninit(upb_json_parser *p); -void upb_json_parser_reset(upb_json_parser *p); -void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *output); +upb_json_parser *upb_json_parser_create(upb_env *e, upb_sink *output); upb_bytessink *upb_json_parser_input(upb_json_parser *p); UPB_END_EXTERN_C @@ -103,11 +58,8 @@ UPB_END_EXTERN_C namespace upb { namespace json { -inline Parser::Parser(Status* status) { upb_json_parser_init(this, status); } -inline Parser::~Parser() { upb_json_parser_uninit(this); } -inline void Parser::Reset() { upb_json_parser_reset(this); } -inline void Parser::ResetOutput(Sink* output) { - upb_json_parser_resetoutput(this, output); +inline Parser* Parser::Create(Environment* env, Sink* output) { + return upb_json_parser_create(env, output); } inline BytesSink* Parser::input() { return upb_json_parser_input(this); diff --git a/upb/json/parser.rl b/upb/json/parser.rl index b72bc10..3a400ea 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -31,6 +31,71 @@ #include "upb/json/parser.h" +#define UPB_JSON_MAX_DEPTH 64 + +typedef struct { + upb_sink sink; + + // The current message in which we're parsing, and the field whose value we're + // expecting next. + const upb_msgdef *m; + const upb_fielddef *f; + + // We are in a repeated-field context, ready to emit mapentries as + // submessages. This flag alters the start-of-object (open-brace) behavior to + // begin a sequence of mapentry messages rather than a single submessage. + bool is_map; + + // We are in a map-entry message context. This flag is set when parsing the + // value field of a single map entry and indicates to all value-field parsers + // (subobjects, strings, numbers, and bools) that the map-entry submessage + // should end as soon as the value is parsed. + bool is_mapentry; + + // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent + // message's map field that we're currently parsing. This differs from |f| + // because |f| is the field in the *current* message (i.e., the map-entry + // message itself), not the parent's field that leads to this map. + const upb_fielddef *mapfield; +} upb_jsonparser_frame; + +struct upb_json_parser { + upb_env *env; + upb_byteshandler input_handler_; + upb_bytessink input_; + + // Stack to track the JSON scopes we are in. + upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH]; + upb_jsonparser_frame *top; + upb_jsonparser_frame *limit; + + upb_status *status; + + // Ragel's internal parsing stack for the parsing state machine. + int current_state; + int parser_stack[UPB_JSON_MAX_DEPTH]; + int parser_top; + + // The handle for the current buffer. + const upb_bufhandle *handle; + + // Accumulate buffer. See details in parser.rl. + const char *accumulated; + size_t accumulated_len; + char *accumulate_buf; + size_t accumulate_buf_size; + + // Multi-part text data. See details in parser.rl. + int multipart_state; + upb_selector_t string_selector; + + // Input capture. See details in parser.rl. + const char *capture; + + // Intermediate result of parsing a unicode escape sequence. + uint32_t digit; +}; + #define PARSER_CHECK_RETURN(x) if (!(x)) return false // Used to signal that a capture has been suspended. @@ -233,12 +298,13 @@ static void accumulate_clear(upb_json_parser *p) { // Used internally by accumulate_append(). static bool accumulate_realloc(upb_json_parser *p, size_t need) { - size_t new_size = UPB_MAX(p->accumulate_buf_size, 128); + size_t old_size = p->accumulate_buf_size; + size_t new_size = UPB_MAX(old_size, 128); while (new_size < need) { new_size = saturating_multiply(new_size, 2); } - void *mem = realloc(p->accumulate_buf, new_size); + void *mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size); if (!mem) { upb_status_seterrmsg(p->status, "Out of memory allocating buffer."); return false; @@ -260,16 +326,14 @@ static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len, return true; } - if (p->accumulate_buf_size - p->accumulated_len < len) { - size_t need; - if (!checked_add(p->accumulated_len, len, &need)) { - upb_status_seterrmsg(p->status, "Integer overflow."); - return false; - } + size_t need; + if (!checked_add(p->accumulated_len, len, &need)) { + upb_status_seterrmsg(p->status, "Integer overflow."); + return false; + } - if (!accumulate_realloc(p, need)) { - return false; - } + if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) { + return false; } if (p->accumulated != p->accumulate_buf) { @@ -508,16 +572,28 @@ static void start_number(upb_json_parser *p, const char *ptr) { capture_begin(p, ptr); } +static bool parse_number(upb_json_parser *p); + static bool end_number(upb_json_parser *p, const char *ptr) { if (!capture_end(p, ptr)) { return false; } + return parse_number(p); +} + +static bool parse_number(upb_json_parser *p) { + // strtol() and friends unfortunately do not support specifying the length of + // the input string, so we need to force a copy into a NULL-terminated buffer. + if (!multipart_text(p, "\0", 1, false)) { + return false; + } + size_t len; const char *buf = accumulate_getptr(p, &len); - const char *myend = buf + len; - char *end; + const char *myend = buf + len - 1; // One for NULL. + char *end; switch (upb_fielddef_type(p->top->f)) { case UPB_TYPE_ENUM: case UPB_TYPE_INT32: { @@ -573,10 +649,11 @@ static bool end_number(upb_json_parser *p, const char *ptr) { } multipart_end(p); + return true; err: - upb_status_seterrf(p->status, "error parsing number: %.*s", buf, len); + upb_status_seterrf(p->status, "error parsing number: %s", buf); multipart_end(p); return false; } @@ -591,6 +668,7 @@ static bool parser_putbool(upb_json_parser *p, bool val) { bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val); UPB_ASSERT_VAR(ok, ok); + return true; } @@ -607,6 +685,8 @@ static bool start_stringval(upb_json_parser *p) { upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); inner->m = p->top->m; inner->f = p->top->f; + inner->is_map = false; + inner->is_mapentry = false; p->top = inner; if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) { @@ -684,6 +764,7 @@ static bool end_stringval(upb_json_parser *p) { } multipart_end(p); + return ok; } @@ -692,54 +773,217 @@ static void start_member(upb_json_parser *p) { multipart_startaccum(p); } -static bool end_member(upb_json_parser *p) { - assert(!p->top->f); +// Helper: invoked during parse_mapentry() to emit the mapentry message's key +// field based on the current contents of the accumulate buffer. +static bool parse_mapentry_key(upb_json_parser *p) { + size_t len; const char *buf = accumulate_getptr(p, &len); - const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); + // Emit the key field. We do a bit of ad-hoc parsing here because the + // parser state machine has already decided that this is a string field + // name, and we are reinterpreting it as some arbitrary key type. In + // particular, integer and bool keys are quoted, so we need to parse the + // quoted string contents here. - if (!f) { - // TODO(haberman): Ignore unknown fields if requested/configured to do so. - upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); + p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY); + if (p->top->f == NULL) { + upb_status_seterrmsg(p->status, "mapentry message has no key"); return false; } + switch (upb_fielddef_type(p->top->f)) { + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: + // Invoke end_number. The accum buffer has the number's text already. + if (!parse_number(p)) { + return false; + } + break; + case UPB_TYPE_BOOL: + if (len == 4 && !strncmp(buf, "true", 4)) { + if (!parser_putbool(p, true)) { + return false; + } + } else if (len == 5 && !strncmp(buf, "false", 5)) { + if (!parser_putbool(p, false)) { + return false; + } + } else { + upb_status_seterrmsg(p->status, + "Map bool key not 'true' or 'false'"); + return false; + } + multipart_end(p); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + upb_sink subsink; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); + upb_sink_startstr(&p->top->sink, sel, len, &subsink); + sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); + upb_sink_putstring(&subsink, sel, buf, len, NULL); + sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); + upb_sink_endstr(&subsink, sel); + multipart_end(p); + break; + } + default: + upb_status_seterrmsg(p->status, "Invalid field type for map key"); + return false; + } - p->top->f = f; - multipart_end(p); + return true; +} + +// Helper: emit one map entry (as a submessage in the map field sequence). This +// is invoked from end_membername(), at the end of the map entry's key string, +// with the map key in the accumulate buffer. It parses the key from that +// buffer, emits the handler calls to start the mapentry submessage (setting up +// its subframe in the process), and sets up state in the subframe so that the +// value parser (invoked next) will emit the mapentry's value field and then +// end the mapentry message. + +static bool handle_mapentry(upb_json_parser *p) { + // Map entry: p->top->sink is the seq frame, so we need to start a frame + // for the mapentry itself, and then set |f| in that frame so that the map + // value field is parsed, and also set a flag to end the frame after the + // map-entry value is parsed. + if (!check_stack(p)) return false; + + const upb_fielddef *mapfield = p->top->mapfield; + const upb_msgdef *mapentrymsg = upb_fielddef_msgsubdef(mapfield); + + upb_jsonparser_frame *inner = p->top + 1; + p->top->f = mapfield; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); + upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); + inner->m = mapentrymsg; + inner->mapfield = mapfield; + inner->is_map = false; + + // Don't set this to true *yet* -- we reuse parsing handlers below to push + // the key field value to the sink, and these handlers will pop the frame + // if they see is_mapentry (when invoked by the parser state machine, they + // would have just seen the map-entry value, not key). + inner->is_mapentry = false; + p->top = inner; + + // send STARTMSG in submsg frame. + upb_sink_startmsg(&p->top->sink); + + parse_mapentry_key(p); + + // Set up the value field to receive the map-entry value. + p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE); + p->top->is_mapentry = true; // set up to pop frame after value is parsed. + p->top->mapfield = mapfield; + if (p->top->f == NULL) { + upb_status_seterrmsg(p->status, "mapentry message has no value"); + return false; + } return true; } -static void clear_member(upb_json_parser *p) { p->top->f = NULL; } +static bool end_membername(upb_json_parser *p) { + assert(!p->top->f); + + if (p->top->is_map) { + return handle_mapentry(p); + } else { + size_t len; + const char *buf = accumulate_getptr(p, &len); + const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); + + if (!f) { + // TODO(haberman): Ignore unknown fields if requested/configured to do so. + upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); + return false; + } + + p->top->f = f; + multipart_end(p); + + return true; + } +} + +static void end_member(upb_json_parser *p) { + // If we just parsed a map-entry value, end that frame too. + if (p->top->is_mapentry) { + assert(p->top > p->stack); + // send ENDMSG on submsg. + upb_status s = UPB_STATUS_INIT; + upb_sink_endmsg(&p->top->sink, &s); + const upb_fielddef* mapfield = p->top->mapfield; + + // send ENDSUBMSG in repeated-field-of-mapentries frame. + p->top--; + upb_selector_t sel; + bool ok = upb_handlers_getselector(mapfield, + UPB_HANDLER_ENDSUBMSG, &sel); + UPB_ASSERT_VAR(ok, ok); + upb_sink_endsubmsg(&p->top->sink, sel); + } + + p->top->f = NULL; +} static bool start_subobject(upb_json_parser *p) { assert(p->top->f); - if (!upb_fielddef_issubmsg(p->top->f)) { + if (upb_fielddef_ismap(p->top->f)) { + // Beginning of a map. Start a new parser frame in a repeated-field + // context. + if (!check_stack(p)) return false; + + upb_jsonparser_frame *inner = p->top + 1; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); + upb_sink_startseq(&p->top->sink, sel, &inner->sink); + inner->m = upb_fielddef_msgsubdef(p->top->f); + inner->mapfield = p->top->f; + inner->f = NULL; + inner->is_map = true; + inner->is_mapentry = false; + p->top = inner; + + return true; + } else if (upb_fielddef_issubmsg(p->top->f)) { + // Beginning of a subobject. Start a new parser frame in the submsg + // context. + if (!check_stack(p)) return false; + + upb_jsonparser_frame *inner = p->top + 1; + + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); + upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); + inner->m = upb_fielddef_msgsubdef(p->top->f); + inner->f = NULL; + inner->is_map = false; + inner->is_mapentry = false; + p->top = inner; + + return true; + } else { upb_status_seterrf(p->status, "Object specified for non-message/group field: %s", upb_fielddef_name(p->top->f)); return false; } - - if (!check_stack(p)) return false; - - upb_jsonparser_frame *inner = p->top + 1; - - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); - upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); - inner->m = upb_fielddef_msgsubdef(p->top->f); - inner->f = NULL; - p->top = inner; - - return true; } static void end_subobject(upb_json_parser *p) { - p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); - upb_sink_endsubmsg(&p->top->sink, sel); + if (p->top->is_map) { + p->top--; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); + upb_sink_endseq(&p->top->sink, sel); + } else { + p->top--; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); + upb_sink_endsubmsg(&p->top->sink, sel); + } } static bool start_array(upb_json_parser *p) { @@ -759,6 +1003,8 @@ static bool start_array(upb_json_parser *p) { upb_sink_startseq(&p->top->sink, sel, &inner->sink); inner->m = p->top->m; inner->f = p->top->f; + inner->is_map = false; + inner->is_mapentry = false; p->top = inner; return true; @@ -773,12 +1019,16 @@ static void end_array(upb_json_parser *p) { } static void start_object(upb_json_parser *p) { - upb_sink_startmsg(&p->top->sink); + if (!p->top->is_map) { + upb_sink_startmsg(&p->top->sink); + } } static void end_object(upb_json_parser *p) { - upb_status status; - upb_sink_endmsg(&p->top->sink, &status); + if (!p->top->is_map) { + upb_status status; + upb_sink_endmsg(&p->top->sink, &status); + } } @@ -850,10 +1100,10 @@ static void end_object(upb_json_parser *p) { ws string >{ start_member(parser); } - @{ CHECK_RETURN_TOP(end_member(parser)); } + @{ CHECK_RETURN_TOP(end_membername(parser)); } ws ":" ws value2 - %{ clear_member(parser); } + %{ end_member(parser); } ws; object = @@ -941,28 +1191,11 @@ bool end(void *closure, const void *hd) { return true; } - -/* Public API *****************************************************************/ - -void upb_json_parser_init(upb_json_parser *p, upb_status *status) { - p->limit = p->stack + UPB_JSON_MAX_DEPTH; - p->accumulate_buf = NULL; - p->accumulate_buf_size = 0; - upb_byteshandler_init(&p->input_handler_); - upb_byteshandler_setstring(&p->input_handler_, parse, NULL); - upb_byteshandler_setendstr(&p->input_handler_, end, NULL); - upb_bytessink_reset(&p->input_, &p->input_handler_, p); - p->status = status; -} - -void upb_json_parser_uninit(upb_json_parser *p) { - upb_byteshandler_uninit(&p->input_handler_); - free(p->accumulate_buf); -} - -void upb_json_parser_reset(upb_json_parser *p) { +static void json_parser_reset(upb_json_parser *p) { p->top = p->stack; p->top->f = NULL; + p->top->is_map = false; + p->top->is_mapentry = false; int cs; int top; @@ -973,13 +1206,36 @@ void upb_json_parser_reset(upb_json_parser *p) { accumulate_clear(p); p->multipart_state = MULTIPART_INACTIVE; p->capture = NULL; + p->accumulated = NULL; } -void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) { - upb_json_parser_reset(p); - upb_sink_reset(&p->top->sink, sink->handlers, sink->closure); - p->top->m = upb_handlers_msgdef(sink->handlers); - p->accumulated = NULL; + +/* Public API *****************************************************************/ + +upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) { +#ifndef NDEBUG + const size_t size_before = upb_env_bytesallocated(env); +#endif + upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser)); + if (!p) return false; + + p->env = env; + p->limit = p->stack + UPB_JSON_MAX_DEPTH; + p->accumulate_buf = NULL; + p->accumulate_buf_size = 0; + upb_byteshandler_init(&p->input_handler_); + upb_byteshandler_setstring(&p->input_handler_, parse, NULL); + upb_byteshandler_setendstr(&p->input_handler_, end, NULL); + upb_bytessink_reset(&p->input_, &p->input_handler_, p); + + json_parser_reset(p); + upb_sink_reset(&p->top->sink, output->handlers, output->closure); + p->top->m = upb_handlers_msgdef(output->handlers); + + // If this fails, uncomment and increase the value in parser.h. + // fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); + assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE); + return p; } upb_bytessink *upb_json_parser_input(upb_json_parser *p) { diff --git a/upb/json/printer.c b/upb/json/printer.c index b996ccf..132736c 100644 --- a/upb/json/printer.c +++ b/upb/json/printer.c @@ -15,6 +15,27 @@ #include #include +struct upb_json_printer { + upb_sink input_; + // BytesSink closure. + void *subc_; + upb_bytessink *output_; + + // We track the depth so that we know when to emit startstr/endstr on the + // output. + int depth_; + + // Have we emitted the first element? This state is necessary to emit commas + // without leaving a trailing comma in arrays/maps. We keep this state per + // frame depth. + // + // Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages. + // We count frames (contexts in which we separate elements by commas) as both + // repeated fields and messages (maps), and the worst case is a + // message->repeated field->submessage->repeated field->... nesting. + bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2]; +}; + // StringPiece; a pointer plus a length. typedef struct { const char *ptr; @@ -182,13 +203,23 @@ static bool putkey(void *closure, const void *handler_data) { return true; \ } \ static bool repeated_##type(void *closure, const void *handler_data, \ - type val) { \ + type val) { \ upb_json_printer *p = closure; \ print_comma(p); \ CHK(put##type(closure, handler_data, val)); \ return true; \ } +#define TYPE_HANDLERS_MAPKEY(type, fmt_func) \ + static bool putmapkey_##type(void *closure, const void *handler_data, \ + type val) { \ + upb_json_printer *p = closure; \ + print_data(p, "\"", 1); \ + CHK(put##type(closure, handler_data, val)); \ + print_data(p, "\":", 2); \ + return true; \ + } + TYPE_HANDLERS(double, fmt_double); TYPE_HANDLERS(float, fmt_float); TYPE_HANDLERS(bool, fmt_bool); @@ -197,7 +228,15 @@ TYPE_HANDLERS(uint32_t, fmt_int64); TYPE_HANDLERS(int64_t, fmt_int64); TYPE_HANDLERS(uint64_t, fmt_uint64); +// double and float are not allowed to be map keys. +TYPE_HANDLERS_MAPKEY(bool, fmt_bool); +TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64); +TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64); +TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64); +TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64); + #undef TYPE_HANDLERS +#undef TYPE_HANDLERS_MAPKEY typedef struct { void *keyname; @@ -222,20 +261,36 @@ static bool scalar_enum(void *closure, const void *handler_data, return true; } -static bool repeated_enum(void *closure, const void *handler_data, - int32_t val) { - const EnumHandlerData *hd = handler_data; - upb_json_printer *p = closure; - print_comma(p); - - const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val); +static void print_enum_symbolic_name(upb_json_printer *p, + const upb_enumdef *def, + int32_t val) { + const char *symbolic_name = upb_enumdef_iton(def, val); if (symbolic_name) { print_data(p, "\"", 1); putstring(p, symbolic_name, strlen(symbolic_name)); print_data(p, "\"", 1); } else { - putint32_t(closure, NULL, val); + putint32_t(p, NULL, val); } +} + +static bool repeated_enum(void *closure, const void *handler_data, + int32_t val) { + const EnumHandlerData *hd = handler_data; + upb_json_printer *p = closure; + print_comma(p); + + print_enum_symbolic_name(p, hd->enumdef, val); + + return true; +} + +static bool mapvalue_enum(void *closure, const void *handler_data, + int32_t val) { + const EnumHandlerData *hd = handler_data; + upb_json_printer *p = closure; + + print_enum_symbolic_name(p, hd->enumdef, val); return true; } @@ -251,25 +306,35 @@ static void *repeated_startsubmsg(void *closure, const void *handler_data) { return closure; } -static bool startmap(void *closure, const void *handler_data) { +static void start_frame(upb_json_printer *p) { + p->depth_++; + p->first_elem_[p->depth_] = true; + print_data(p, "{", 1); +} + +static void end_frame(upb_json_printer *p) { + print_data(p, "}", 1); + p->depth_--; +} + +static bool printer_startmsg(void *closure, const void *handler_data) { UPB_UNUSED(handler_data); upb_json_printer *p = closure; - if (p->depth_++ == 0) { + if (p->depth_ == 0) { upb_bytessink_start(p->output_, 0, &p->subc_); } - p->first_elem_[p->depth_] = true; - print_data(p, "{", 1); + start_frame(p); return true; } -static bool endmap(void *closure, const void *handler_data, upb_status *s) { +static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) { UPB_UNUSED(handler_data); UPB_UNUSED(s); upb_json_printer *p = closure; - if (--p->depth_ == 0) { + end_frame(p); + if (p->depth_ == 0) { upb_bytessink_end(p->output_); } - print_data(p, "}", 1); return true; } @@ -290,6 +355,23 @@ static bool endseq(void *closure, const void *handler_data) { return true; } +static void *startmap(void *closure, const void *handler_data) { + upb_json_printer *p = closure; + CHK(putkey(closure, handler_data)); + p->depth_++; + p->first_elem_[p->depth_] = true; + print_data(p, "{", 1); + return closure; +} + +static bool endmap(void *closure, const void *handler_data) { + UPB_UNUSED(handler_data); + upb_json_printer *p = closure; + print_data(p, "}", 1); + p->depth_--; + return true; +} + static size_t putstr(void *closure, const void *handler_data, const char *str, size_t len, const upb_bufhandle *handle) { UPB_UNUSED(handler_data); @@ -404,6 +486,36 @@ static bool repeated_endstr(void *closure, const void *handler_data) { return true; } +static void *mapkeyval_startstr(void *closure, const void *handler_data, + size_t size_hint) { + UPB_UNUSED(handler_data); + UPB_UNUSED(size_hint); + upb_json_printer *p = closure; + print_data(p, "\"", 1); + return p; +} + +static size_t mapkey_str(void *closure, const void *handler_data, + const char *str, size_t len, + const upb_bufhandle *handle) { + CHK(putstr(closure, handler_data, str, len, handle)); + return len; +} + +static bool mapkey_endstr(void *closure, const void *handler_data) { + UPB_UNUSED(handler_data); + upb_json_printer *p = closure; + print_data(p, "\":", 2); + return true; +} + +static bool mapvalue_endstr(void *closure, const void *handler_data) { + UPB_UNUSED(handler_data); + upb_json_printer *p = closure; + print_data(p, "\"", 1); + return true; +} + static size_t scalar_bytes(void *closure, const void *handler_data, const char *str, size_t len, const upb_bufhandle *handle) { @@ -421,31 +533,161 @@ static size_t repeated_bytes(void *closure, const void *handler_data, return len; } -void printer_sethandlers(const void *closure, upb_handlers *h) { +static size_t mapkey_bytes(void *closure, const void *handler_data, + const char *str, size_t len, + const upb_bufhandle *handle) { + upb_json_printer *p = closure; + CHK(putbytes(closure, handler_data, str, len, handle)); + print_data(p, ":", 1); + return len; +} + +static void set_enum_hd(upb_handlers *h, + const upb_fielddef *f, + upb_handlerattr *attr) { + EnumHandlerData *hd = malloc(sizeof(EnumHandlerData)); + hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f); + hd->keyname = newstrpc(h, f); + upb_handlers_addcleanup(h, hd, free); + upb_handlerattr_sethandlerdata(attr, hd); +} + +// Set up handlers for a mapentry submessage (i.e., an individual key/value pair +// in a map). +// +// TODO: Handle missing key, missing value, out-of-order key/value, or repeated +// key or value cases properly. The right way to do this is to allocate a +// temporary structure at the start of a mapentry submessage, store key and +// value data in it as key and value handlers are called, and then print the +// key/value pair once at the end of the submessage. If we don't do this, we +// should at least detect the case and throw an error. However, so far all of +// our sources that emit mapentry messages do so canonically (with one key +// field, and then one value field), so this is not a pressing concern at the +// moment. +void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) { UPB_UNUSED(closure); + const upb_msgdef *md = upb_handlers_msgdef(h); + + // A mapentry message is printed simply as '"key": value'. Rather than + // special-case key and value for every type below, we just handle both + // fields explicitly here. + const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY); + const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE); + + upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER; + + switch (upb_fielddef_type(key_field)) { + case UPB_TYPE_INT32: + upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr); + break; + case UPB_TYPE_INT64: + upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr); + break; + case UPB_TYPE_UINT32: + upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr); + break; + case UPB_TYPE_UINT64: + upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr); + break; + case UPB_TYPE_BOOL: + upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr); + break; + case UPB_TYPE_STRING: + upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr); + upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr); + upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr); + break; + case UPB_TYPE_BYTES: + upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr); + break; + default: + assert(false); + break; + } + switch (upb_fielddef_type(value_field)) { + case UPB_TYPE_INT32: + upb_handlers_setint32(h, value_field, putint32_t, &empty_attr); + break; + case UPB_TYPE_INT64: + upb_handlers_setint64(h, value_field, putint64_t, &empty_attr); + break; + case UPB_TYPE_UINT32: + upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr); + break; + case UPB_TYPE_UINT64: + upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr); + break; + case UPB_TYPE_BOOL: + upb_handlers_setbool(h, value_field, putbool, &empty_attr); + break; + case UPB_TYPE_FLOAT: + upb_handlers_setfloat(h, value_field, putfloat, &empty_attr); + break; + case UPB_TYPE_DOUBLE: + upb_handlers_setdouble(h, value_field, putdouble, &empty_attr); + break; + case UPB_TYPE_STRING: + upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr); + upb_handlers_setstring(h, value_field, putstr, &empty_attr); + upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr); + break; + case UPB_TYPE_BYTES: + upb_handlers_setstring(h, value_field, putbytes, &empty_attr); + break; + case UPB_TYPE_ENUM: { + upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER; + set_enum_hd(h, value_field, &enum_attr); + upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr); + upb_handlerattr_uninit(&enum_attr); + break; + } + case UPB_TYPE_MESSAGE: + // No handler necessary -- the submsg handlers will print the message + // as appropriate. + break; + } + + upb_handlerattr_uninit(&empty_attr); +} + +void printer_sethandlers(const void *closure, upb_handlers *h) { + UPB_UNUSED(closure); + const upb_msgdef *md = upb_handlers_msgdef(h); + bool is_mapentry = upb_msgdef_mapentry(md); upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER; - upb_handlers_setstartmsg(h, startmap, &empty_attr); - upb_handlers_setendmsg(h, endmap, &empty_attr); - -#define TYPE(type, name, ctype) \ - case type: \ - if (upb_fielddef_isseq(f)) { \ - upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \ - } else { \ - upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \ - } \ + + if (is_mapentry) { + // mapentry messages are sufficiently different that we handle them + // separately. + printer_sethandlers_mapentry(closure, h); + return; + } + + upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr); + upb_handlers_setendmsg(h, printer_endmsg, &empty_attr); + +#define TYPE(type, name, ctype) \ + case type: \ + if (upb_fielddef_isseq(f)) { \ + upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \ + } else { \ + upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \ + } \ break; - upb_msg_iter i; - upb_msg_begin(&i, upb_handlers_msgdef(h)); - for(; !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + upb_msg_field_begin(&i, md); + for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER; upb_handlerattr_sethandlerdata(&name_attr, newstrpc(h, f)); - if (upb_fielddef_isseq(f)) { + if (upb_fielddef_ismap(f)) { + upb_handlers_setstartseq(h, f, startmap, &name_attr); + upb_handlers_setendseq(h, f, endmap, &name_attr); + } else if (upb_fielddef_isseq(f)) { upb_handlers_setstartseq(h, f, startseq, &name_attr); upb_handlers_setendseq(h, f, endseq, &empty_attr); } @@ -462,12 +704,8 @@ void printer_sethandlers(const void *closure, upb_handlers *h) { // For now, we always emit symbolic names for enums. We may want an // option later to control this behavior, but we will wait for a real // need first. - EnumHandlerData *hd = malloc(sizeof(EnumHandlerData)); - hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f); - hd->keyname = newstrpc(h, f); - upb_handlers_addcleanup(h, hd, free); upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER; - upb_handlerattr_sethandlerdata(&enum_attr, hd); + set_enum_hd(h, f, &enum_attr); if (upb_fielddef_isseq(f)) { upb_handlers_setint32(h, f, repeated_enum, &enum_attr); @@ -514,25 +752,29 @@ void printer_sethandlers(const void *closure, upb_handlers *h) { #undef TYPE } -/* Public API *****************************************************************/ - -void upb_json_printer_init(upb_json_printer *p, const upb_handlers *h) { - p->output_ = NULL; +static void json_printer_reset(upb_json_printer *p) { p->depth_ = 0; - upb_sink_reset(&p->input_, h, p); } -void upb_json_printer_uninit(upb_json_printer *p) { - UPB_UNUSED(p); -} -void upb_json_printer_reset(upb_json_printer *p) { - p->depth_ = 0; -} +/* Public API *****************************************************************/ + +upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h, + upb_bytessink *output) { +#ifndef NDEBUG + size_t size_before = upb_env_bytesallocated(e); +#endif + + upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer)); + if (!p) return NULL; -void upb_json_printer_resetoutput(upb_json_printer *p, upb_bytessink *output) { - upb_json_printer_reset(p); p->output_ = output; + json_printer_reset(p); + upb_sink_reset(&p->input_, h, p); + + // If this fails, increase the value in printer.h. + assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE); + return p; } upb_sink *upb_json_printer_input(upb_json_printer *p) { diff --git a/upb/json/printer.h b/upb/json/printer.h index fbc206d..c73cb79 100644 --- a/upb/json/printer.h +++ b/upb/json/printer.h @@ -11,6 +11,7 @@ #ifndef UPB_JSON_TYPED_PRINTER_H_ #define UPB_JSON_TYPED_PRINTER_H_ +#include "upb/env.h" #include "upb/sink.h" #ifdef __cplusplus @@ -26,71 +27,48 @@ UPB_DECLARE_TYPE(upb::json::Printer, upb_json_printer); /* upb::json::Printer *********************************************************/ -// Prints an incoming stream of data to a BytesSink in JSON format. -UPB_DEFINE_CLASS0(upb::json::Printer, - public: - Printer(const upb::Handlers* handlers); - ~Printer(); +#define UPB_JSON_PRINTER_SIZE 168 - // Resets the state of the printer, so that it will expect to begin a new - // document. - void Reset(); +#ifdef __cplusplus - // Resets the output pointer which will serve as our closure. Implies - // Reset(). - void ResetOutput(BytesSink* output); +// Prints an incoming stream of data to a BytesSink in JSON format. +class upb::json::Printer { + public: + static Printer* Create(Environment* env, const upb::Handlers* handlers, + BytesSink* output); // The input to the printer. Sink* input(); // Returns handlers for printing according to the specified schema. static reffed_ptr NewHandlers(const upb::MessageDef* md); -, -UPB_DEFINE_STRUCT0(upb_json_printer, - upb_sink input_; - // BytesSink closure. - void *subc_; - upb_bytessink *output_; - - // We track the depth so that we know when to emit startstr/endstr on the - // output. - int depth_; - // Have we emitted the first element? This state is necessary to emit commas - // without leaving a trailing comma in arrays/maps. We keep this state per - // frame depth. - // - // Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages. - // We count frames (contexts in which we separate elements by commas) as both - // repeated fields and messages (maps), and the worst case is a - // message->repeated field->submessage->repeated field->... nesting. - bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2]; -)); - -UPB_BEGIN_EXTERN_C // { -// Native C API. + static const size_t kSize = UPB_JSON_PRINTER_SIZE; -void upb_json_printer_init(upb_json_printer *p, const upb_handlers *h); -void upb_json_printer_uninit(upb_json_printer *p); -void upb_json_printer_reset(upb_json_printer *p); -void upb_json_printer_resetoutput(upb_json_printer *p, upb_bytessink *output); + private: + UPB_DISALLOW_POD_OPS(Printer, upb::json::Printer); +}; + +#endif + +UPB_BEGIN_EXTERN_C + +// Native C API. +upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h, + upb_bytessink *output); upb_sink *upb_json_printer_input(upb_json_printer *p); const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md, const void *owner); -UPB_END_EXTERN_C // } +UPB_END_EXTERN_C #ifdef __cplusplus namespace upb { namespace json { -inline Printer::Printer(const upb::Handlers* handlers) { - upb_json_printer_init(this, handlers); -} -inline Printer::~Printer() { upb_json_printer_uninit(this); } -inline void Printer::Reset() { upb_json_printer_reset(this); } -inline void Printer::ResetOutput(BytesSink* output) { - upb_json_printer_resetoutput(this, output); +inline Printer* Printer::Create(Environment* env, const upb::Handlers* handlers, + BytesSink* output) { + return upb_json_printer_create(env, handlers, output); } inline Sink* Printer::input() { return upb_json_printer_input(this); } inline reffed_ptr Printer::NewHandlers( -- cgit v1.2.3