diff options
author | Joshua Haberman <jhaberman@gmail.com> | 2014-12-09 14:05:41 -0800 |
---|---|---|
committer | Joshua Haberman <jhaberman@gmail.com> | 2014-12-09 14:05:41 -0800 |
commit | bf51ef86b448138a281e796df5bdfa8fa118524d (patch) | |
tree | ce16586d7f0a8e5c87252727b4b3745d1a48a4eb /upb/json | |
parent | e257bd978d5e6278e7b188d543858852c0c4d856 (diff) | |
parent | 8f8113b4fff748b57b0ff2f1a301e86b4703be84 (diff) |
Merge pull request #7 from cfallin/master
JSON test, symbolic enum names in JSON, and a few improvements.
Diffstat (limited to 'upb/json')
-rw-r--r-- | upb/json/parser.c | 139 | ||||
-rw-r--r-- | upb/json/parser.rl | 69 | ||||
-rw-r--r-- | upb/json/printer.c | 122 |
3 files changed, 240 insertions, 90 deletions
diff --git a/upb/json/parser.c b/upb/json/parser.c index 2687713..78fc6c0 100644 --- a/upb/json/parser.c +++ b/upb/json/parser.c @@ -288,7 +288,7 @@ badpadding: return false; } -static bool end_text(upb_json_parser *p, const char *ptr) { +static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) { assert(!p->accumulated); // TODO: handle this case. p->accumulated = p->text_begin; p->accumulated_len = ptr - p->text_begin; @@ -302,6 +302,24 @@ static bool end_text(upb_json_parser *p, const char *ptr) { upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL); } p->accumulated = NULL; + } else if (p->top->f && + upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM && + !is_num) { + + // Enum case: resolve enum symbolic name to integer value. + const upb_enumdef *enumdef = + (const upb_enumdef*)upb_fielddef_subdef(p->top->f); + + int32_t int_val = 0; + if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len, + &int_val)) { + upb_selector_t sel = getsel(p); + upb_sink_putint32(&p->top->sink, sel, int_val); + } else { + upb_status_seterrmsg(p->status, "Enum value name unknown"); + return false; + } + p->accumulated = NULL; } return true; @@ -310,29 +328,38 @@ static bool end_text(upb_json_parser *p, const char *ptr) { static bool start_stringval(upb_json_parser *p) { assert(p->top->f); - if (!upb_fielddef_isstring(p->top->f)) { + if (upb_fielddef_isstring(p->top->f)) { + if (!check_stack(p)) return false; + + // Start a new parser frame: parser frames correspond one-to-one with + // handler frames, and string events occur in a sub-frame. + upb_jsonparser_frame *inner = p->top + 1; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); + upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); + inner->m = p->top->m; + inner->f = p->top->f; + p->top = inner; + + return true; + } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) { + // Do nothing -- symbolic enum names in quotes remain in the + // current parser frame. + return true; + } else { upb_status_seterrf(p->status, - "String specified for non-string field: %s", + "String specified for non-string/non-enum field: %s", upb_fielddef_name(p->top->f)); return false; } - if (!check_stack(p)) return false; - - upb_jsonparser_frame *inner = p->top + 1; // TODO: check for overflow. - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - p->top = inner; - - return true; } static void end_stringval(upb_json_parser *p) { - p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(&p->top->sink, sel); + if (upb_fielddef_isstring(p->top->f)) { + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); + upb_sink_endstr(&p->top->sink, sel); + p->top--; + } } static void start_number(upb_json_parser *p, const char *ptr) { @@ -341,7 +368,7 @@ static void start_number(upb_json_parser *p, const char *ptr) { } static void end_number(upb_json_parser *p, const char *ptr) { - end_text(p, ptr); + end_text(p, ptr, true); const char *myend = p->accumulated + p->accumulated_len; char *end; @@ -450,15 +477,15 @@ static void hex(upb_json_parser *p, const char *end) { // emit the codepoint as UTF-8. char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes. int length = 0; - if (codepoint < 0x7F) { + if (codepoint <= 0x7F) { utf8[0] = codepoint; length = 1; - } else if (codepoint < 0x07FF) { + } else if (codepoint <= 0x07FF) { utf8[1] = (codepoint & 0x3F) | 0x80; codepoint >>= 6; utf8[0] = (codepoint & 0x1F) | 0xC0; length = 2; - } else /* codepoint < 0xFFFF */ { + } else /* codepoint <= 0xFFFF */ { utf8[2] = (codepoint & 0x3F) | 0x80; codepoint >>= 6; utf8[1] = (codepoint & 0x3F) | 0x80; @@ -478,11 +505,11 @@ static void hex(upb_json_parser *p, const char *end) { // What follows is the Ragel parser itself. The language is specified in Ragel // and the actions call our C functions above. -#line 568 "upb/json/parser.rl" +#line 595 "upb/json/parser.rl" -#line 486 "upb/json/parser.c" +#line 513 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 2, 1, 3, 1, 4, 1, 5, 1, 6, 1, 7, 1, @@ -635,7 +662,7 @@ static const int json_en_value_machine = 27; static const int json_en_main = 1; -#line 571 "upb/json/parser.rl" +#line 598 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { @@ -652,7 +679,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, const char *pe = buf + size; -#line 656 "upb/json/parser.c" +#line 683 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -727,114 +754,114 @@ _match: switch ( *_acts++ ) { case 0: -#line 489 "upb/json/parser.rl" +#line 516 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 1: -#line 490 "upb/json/parser.rl" +#line 517 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 10; goto _again;} } break; case 2: -#line 494 "upb/json/parser.rl" +#line 521 "upb/json/parser.rl" { start_text(parser, p); } break; case 3: -#line 495 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_text(parser, p)); } +#line 522 "upb/json/parser.rl" + { CHECK_RETURN_TOP(end_text(parser, p, false)); } break; case 4: -#line 501 "upb/json/parser.rl" +#line 528 "upb/json/parser.rl" { start_hex(parser, p); } break; case 5: -#line 502 "upb/json/parser.rl" +#line 529 "upb/json/parser.rl" { hex(parser, p); } break; case 6: -#line 508 "upb/json/parser.rl" +#line 535 "upb/json/parser.rl" { escape(parser, p); } break; case 7: -#line 511 "upb/json/parser.rl" +#line 538 "upb/json/parser.rl" { {cs = stack[--top]; goto _again;} } break; case 8: -#line 512 "upb/json/parser.rl" +#line 539 "upb/json/parser.rl" { {stack[top++] = cs; cs = 19; goto _again;} } break; case 9: -#line 514 "upb/json/parser.rl" +#line 541 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 27; goto _again;} } break; case 10: -#line 519 "upb/json/parser.rl" +#line 546 "upb/json/parser.rl" { start_member(parser); } break; case 11: -#line 520 "upb/json/parser.rl" +#line 547 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_member(parser)); } break; case 12: -#line 523 "upb/json/parser.rl" +#line 550 "upb/json/parser.rl" { clear_member(parser); } break; case 13: -#line 529 "upb/json/parser.rl" +#line 556 "upb/json/parser.rl" { start_object(parser); } break; case 14: -#line 532 "upb/json/parser.rl" +#line 559 "upb/json/parser.rl" { end_object(parser); } break; case 15: -#line 538 "upb/json/parser.rl" +#line 565 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_array(parser)); } break; case 16: -#line 542 "upb/json/parser.rl" +#line 569 "upb/json/parser.rl" { end_array(parser); } break; case 17: -#line 547 "upb/json/parser.rl" +#line 574 "upb/json/parser.rl" { start_number(parser, p); } break; case 18: -#line 548 "upb/json/parser.rl" +#line 575 "upb/json/parser.rl" { end_number(parser, p); } break; case 19: -#line 550 "upb/json/parser.rl" +#line 577 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 20: -#line 551 "upb/json/parser.rl" +#line 578 "upb/json/parser.rl" { end_stringval(parser); } break; case 21: -#line 553 "upb/json/parser.rl" +#line 580 "upb/json/parser.rl" { CHECK_RETURN_TOP(putbool(parser, true)); } break; case 22: -#line 555 "upb/json/parser.rl" +#line 582 "upb/json/parser.rl" { CHECK_RETURN_TOP(putbool(parser, false)); } break; case 23: -#line 557 "upb/json/parser.rl" +#line 584 "upb/json/parser.rl" { /* null value */ } break; case 24: -#line 559 "upb/json/parser.rl" +#line 586 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_subobject(parser)); } break; case 25: -#line 560 "upb/json/parser.rl" +#line 587 "upb/json/parser.rl" { end_subobject(parser); } break; case 26: -#line 565 "upb/json/parser.rl" +#line 592 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 838 "upb/json/parser.c" +#line 865 "upb/json/parser.c" } } @@ -847,7 +874,7 @@ _again: _out: {} } -#line 587 "upb/json/parser.rl" +#line 614 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(parser->status, "Parse error at %s\n", p); @@ -888,13 +915,13 @@ void upb_json_parser_reset(upb_json_parser *p) { int top; // Emit Ragel initialization of the parser. -#line 892 "upb/json/parser.c" +#line 919 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 627 "upb/json/parser.rl" +#line 654 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; p->text_begin = NULL; diff --git a/upb/json/parser.rl b/upb/json/parser.rl index 92a1566..8ceca77 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -286,7 +286,7 @@ badpadding: return false; } -static bool end_text(upb_json_parser *p, const char *ptr) { +static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) { assert(!p->accumulated); // TODO: handle this case. p->accumulated = p->text_begin; p->accumulated_len = ptr - p->text_begin; @@ -300,6 +300,24 @@ static bool end_text(upb_json_parser *p, const char *ptr) { upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL); } p->accumulated = NULL; + } else if (p->top->f && + upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM && + !is_num) { + + // Enum case: resolve enum symbolic name to integer value. + const upb_enumdef *enumdef = + (const upb_enumdef*)upb_fielddef_subdef(p->top->f); + + int32_t int_val = 0; + if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len, + &int_val)) { + upb_selector_t sel = getsel(p); + upb_sink_putint32(&p->top->sink, sel, int_val); + } else { + upb_status_seterrmsg(p->status, "Enum value name unknown"); + return false; + } + p->accumulated = NULL; } return true; @@ -308,29 +326,38 @@ static bool end_text(upb_json_parser *p, const char *ptr) { static bool start_stringval(upb_json_parser *p) { assert(p->top->f); - if (!upb_fielddef_isstring(p->top->f)) { + if (upb_fielddef_isstring(p->top->f)) { + if (!check_stack(p)) return false; + + // Start a new parser frame: parser frames correspond one-to-one with + // handler frames, and string events occur in a sub-frame. + upb_jsonparser_frame *inner = p->top + 1; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); + upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); + inner->m = p->top->m; + inner->f = p->top->f; + p->top = inner; + + return true; + } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) { + // Do nothing -- symbolic enum names in quotes remain in the + // current parser frame. + return true; + } else { upb_status_seterrf(p->status, - "String specified for non-string field: %s", + "String specified for non-string/non-enum field: %s", upb_fielddef_name(p->top->f)); return false; } - if (!check_stack(p)) return false; - - upb_jsonparser_frame *inner = p->top + 1; // TODO: check for overflow. - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - p->top = inner; - - return true; } static void end_stringval(upb_json_parser *p) { - p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(&p->top->sink, sel); + if (upb_fielddef_isstring(p->top->f)) { + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); + upb_sink_endstr(&p->top->sink, sel); + p->top--; + } } static void start_number(upb_json_parser *p, const char *ptr) { @@ -339,7 +366,7 @@ static void start_number(upb_json_parser *p, const char *ptr) { } static void end_number(upb_json_parser *p, const char *ptr) { - end_text(p, ptr); + end_text(p, ptr, true); const char *myend = p->accumulated + p->accumulated_len; char *end; @@ -448,15 +475,15 @@ static void hex(upb_json_parser *p, const char *end) { // emit the codepoint as UTF-8. char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes. int length = 0; - if (codepoint < 0x7F) { + if (codepoint <= 0x7F) { utf8[0] = codepoint; length = 1; - } else if (codepoint < 0x07FF) { + } else if (codepoint <= 0x07FF) { utf8[1] = (codepoint & 0x3F) | 0x80; codepoint >>= 6; utf8[0] = (codepoint & 0x1F) | 0xC0; length = 2; - } else /* codepoint < 0xFFFF */ { + } else /* codepoint <= 0xFFFF */ { utf8[2] = (codepoint & 0x3F) | 0x80; codepoint >>= 6; utf8[1] = (codepoint & 0x3F) | 0x80; @@ -492,7 +519,7 @@ static void hex(upb_json_parser *p, const char *end) { text = /[^\\"]/+ >{ start_text(parser, p); } - %{ CHECK_RETURN_TOP(end_text(parser, p)); } + %{ CHECK_RETURN_TOP(end_text(parser, p, false)); } ; unicode_char = diff --git a/upb/json/printer.c b/upb/json/printer.c index 44e6f83..28f3e4a 100644 --- a/upb/json/printer.c +++ b/upb/json/printer.c @@ -69,10 +69,10 @@ static inline char* json_nice_escape(char c) { } } -// Write a properly quoted and escaped string. +// Write a properly escaped string chunk. The surrounding quotes are *not* +// printed; this is so that the caller has the option of emitting the string +// content in chunks. static void putstring(upb_json_printer *p, const char *buf, unsigned int len) { - print_data(p, "\"", 1); - const char* unescaped_run = NULL; for (unsigned int i = 0; i < len; i++) { char c = buf[i]; @@ -112,8 +112,6 @@ static void putstring(upb_json_printer *p, const char *buf, unsigned int len) { if (unescaped_run) { print_data(p, unescaped_run, &buf[len] - unescaped_run); } - - print_data(p, "\"", 1); } #define CHKLENGTH(x) if (!(x)) return -1; @@ -158,8 +156,9 @@ static bool putkey(void *closure, const void *handler_data) { upb_json_printer *p = closure; const strpc *key = handler_data; print_comma(p); + print_data(p, "\"", 1); putstring(p, key->ptr, key->len); - print_data(p, ":", 1); + print_data(p, "\":", 2); return true; } @@ -200,6 +199,47 @@ TYPE_HANDLERS(uint64_t, fmt_uint64); #undef TYPE_HANDLERS +typedef struct { + void *keyname; + const upb_enumdef *enumdef; +} EnumHandlerData; + +static bool scalar_enum(void *closure, const void *handler_data, + int32_t val) { + const EnumHandlerData *hd = handler_data; + upb_json_printer *p = closure; + CHK(putkey(closure, hd->keyname)); + + const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val); + if (symbolic_name) { + print_data(p, "\"", 1); + putstring(p, symbolic_name, strlen(symbolic_name)); + print_data(p, "\"", 1); + } else { + putint32_t(closure, NULL, val); + } + + return true; +} + +static bool repeated_enum(void *closure, const void *handler_data, + int32_t val) { + const EnumHandlerData *hd = handler_data; + upb_json_printer *p = closure; + print_comma(p); + + const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val); + if (symbolic_name) { + print_data(p, "\"", 1); + putstring(p, symbolic_name, strlen(symbolic_name)); + print_data(p, "\"", 1); + } else { + putint32_t(closure, NULL, val); + } + + return true; +} + static void *scalar_startsubmsg(void *closure, const void *handler_data) { return putkey(closure, handler_data) ? closure : UPB_BREAK; } @@ -310,27 +350,60 @@ static size_t putbytes(void *closure, const void *handler_data, const char *str, } size_t bytes = to - data; + print_data(p, "\"", 1); putstring(p, data, bytes); + print_data(p, "\"", 1); return len; } +static void *scalar_startstr(void *closure, const void *handler_data, + size_t size_hint) { + UPB_UNUSED(handler_data); + UPB_UNUSED(size_hint); + upb_json_printer *p = closure; + CHK(putkey(closure, handler_data)); + print_data(p, "\"", 1); + return p; +} + static size_t scalar_str(void *closure, const void *handler_data, const char *str, size_t len, const upb_bufhandle *handle) { - CHK(putkey(closure, handler_data)); CHK(putstr(closure, handler_data, str, len, handle)); return len; } +static bool scalar_endstr(void *closure, const void *handler_data) { + UPB_UNUSED(handler_data); + upb_json_printer *p = closure; + print_data(p, "\"", 1); + return true; +} + +static void *repeated_startstr(void *closure, const void *handler_data, + size_t size_hint) { + UPB_UNUSED(handler_data); + UPB_UNUSED(size_hint); + upb_json_printer *p = closure; + print_comma(p); + print_data(p, "\"", 1); + return p; +} + static size_t repeated_str(void *closure, const void *handler_data, const char *str, size_t len, const upb_bufhandle *handle) { - upb_json_printer *p = closure; - print_comma(p); CHK(putstr(closure, handler_data, str, len, handle)); return len; } +static bool repeated_endstr(void *closure, const void *handler_data) { + UPB_UNUSED(handler_data); + upb_json_printer *p = closure; + print_data(p, "\"", 1); + return true; +} + static size_t scalar_bytes(void *closure, const void *handler_data, const char *str, size_t len, const upb_bufhandle *handle) { @@ -381,21 +454,44 @@ void sethandlers(const void *closure, upb_handlers *h) { TYPE(UPB_TYPE_FLOAT, float, float); TYPE(UPB_TYPE_DOUBLE, double, double); TYPE(UPB_TYPE_BOOL, bool, bool); - TYPE(UPB_TYPE_ENUM, int32, int32_t); TYPE(UPB_TYPE_INT32, int32, int32_t); TYPE(UPB_TYPE_UINT32, uint32, uint32_t); TYPE(UPB_TYPE_INT64, int64, int64_t); TYPE(UPB_TYPE_UINT64, uint64, uint64_t); + case UPB_TYPE_ENUM: { + // For now, we always emit symbolic names for enums. We may want an + // option later to control this behavior, but we will wait for a real + // need first. + EnumHandlerData *hd = malloc(sizeof(EnumHandlerData)); + hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f); + hd->keyname = newstrpc(h, f); + upb_handlers_addcleanup(h, hd, free); + upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&enum_attr, hd); + + if (upb_fielddef_isseq(f)) { + upb_handlers_setint32(h, f, repeated_enum, &enum_attr); + } else { + upb_handlers_setint32(h, f, scalar_enum, &enum_attr); + } + + upb_handlerattr_uninit(&enum_attr); + break; + } case UPB_TYPE_STRING: - // XXX: this doesn't support strings that span buffers yet. if (upb_fielddef_isseq(f)) { + upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr); upb_handlers_setstring(h, f, repeated_str, &empty_attr); + upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr); } else { - upb_handlers_setstring(h, f, scalar_str, &name_attr); + upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr); + upb_handlers_setstring(h, f, scalar_str, &empty_attr); + upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr); } break; case UPB_TYPE_BYTES: - // XXX: this doesn't support strings that span buffers yet. + // XXX: this doesn't support strings that span buffers yet. The base64 + // encoder will need to be made resumable for this to work properly. if (upb_fielddef_isseq(f)) { upb_handlers_setstring(h, f, repeated_bytes, &empty_attr); } else { |