summaryrefslogtreecommitdiff
path: root/upb/json/parser.rl
diff options
context:
space:
mode:
authorChris Fallin <cfallin@google.com>2014-12-09 12:27:22 -0800
committerChris Fallin <cfallin@google.com>2014-12-09 13:23:58 -0800
commit8f8113b4fff748b57b0ff2f1a301e86b4703be84 (patch)
treece16586d7f0a8e5c87252727b4b3745d1a48a4eb /upb/json/parser.rl
parente257bd978d5e6278e7b188d543858852c0c4d856 (diff)
JSON test, symbolic enum names in JSON, and a few improvements.
- Added a JSON test that round-trips (parses then re-serializes) several test messages, ensuring that the re-serialized form matches the original exactly. - Added support for printing and parsing symbolic enum names (rather than integer values) in JSON. - Updated JSON printer to properly handle string fields that come in multiple pieces. ('bytes' fields still do not support this, and this work is more challenging because it requires making the base64 encoder resumable. Base64 encoding is not separable at an input-byte granularity, unlike string escaping.) - Fixed a < vs. <= bug in UTF-8 encoding generation (oops).
Diffstat (limited to 'upb/json/parser.rl')
-rw-r--r--upb/json/parser.rl69
1 files changed, 48 insertions, 21 deletions
diff --git a/upb/json/parser.rl b/upb/json/parser.rl
index 92a1566..8ceca77 100644
--- a/upb/json/parser.rl
+++ b/upb/json/parser.rl
@@ -286,7 +286,7 @@ badpadding:
return false;
}
-static bool end_text(upb_json_parser *p, const char *ptr) {
+static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) {
assert(!p->accumulated); // TODO: handle this case.
p->accumulated = p->text_begin;
p->accumulated_len = ptr - p->text_begin;
@@ -300,6 +300,24 @@ static bool end_text(upb_json_parser *p, const char *ptr) {
upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL);
}
p->accumulated = NULL;
+ } else if (p->top->f &&
+ upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM &&
+ !is_num) {
+
+ // Enum case: resolve enum symbolic name to integer value.
+ const upb_enumdef *enumdef =
+ (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
+
+ int32_t int_val = 0;
+ if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len,
+ &int_val)) {
+ upb_selector_t sel = getsel(p);
+ upb_sink_putint32(&p->top->sink, sel, int_val);
+ } else {
+ upb_status_seterrmsg(p->status, "Enum value name unknown");
+ return false;
+ }
+ p->accumulated = NULL;
}
return true;
@@ -308,29 +326,38 @@ static bool end_text(upb_json_parser *p, const char *ptr) {
static bool start_stringval(upb_json_parser *p) {
assert(p->top->f);
- if (!upb_fielddef_isstring(p->top->f)) {
+ if (upb_fielddef_isstring(p->top->f)) {
+ if (!check_stack(p)) return false;
+
+ // Start a new parser frame: parser frames correspond one-to-one with
+ // handler frames, and string events occur in a sub-frame.
+ upb_jsonparser_frame *inner = p->top + 1;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
+ upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
+ inner->m = p->top->m;
+ inner->f = p->top->f;
+ p->top = inner;
+
+ return true;
+ } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
+ // Do nothing -- symbolic enum names in quotes remain in the
+ // current parser frame.
+ return true;
+ } else {
upb_status_seterrf(p->status,
- "String specified for non-string field: %s",
+ "String specified for non-string/non-enum field: %s",
upb_fielddef_name(p->top->f));
return false;
}
- if (!check_stack(p)) return false;
-
- upb_jsonparser_frame *inner = p->top + 1; // TODO: check for overflow.
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
- upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
- inner->m = p->top->m;
- inner->f = p->top->f;
- p->top = inner;
-
- return true;
}
static void end_stringval(upb_json_parser *p) {
- p->top--;
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
- upb_sink_endstr(&p->top->sink, sel);
+ if (upb_fielddef_isstring(p->top->f)) {
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
+ upb_sink_endstr(&p->top->sink, sel);
+ p->top--;
+ }
}
static void start_number(upb_json_parser *p, const char *ptr) {
@@ -339,7 +366,7 @@ static void start_number(upb_json_parser *p, const char *ptr) {
}
static void end_number(upb_json_parser *p, const char *ptr) {
- end_text(p, ptr);
+ end_text(p, ptr, true);
const char *myend = p->accumulated + p->accumulated_len;
char *end;
@@ -448,15 +475,15 @@ static void hex(upb_json_parser *p, const char *end) {
// emit the codepoint as UTF-8.
char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes.
int length = 0;
- if (codepoint < 0x7F) {
+ if (codepoint <= 0x7F) {
utf8[0] = codepoint;
length = 1;
- } else if (codepoint < 0x07FF) {
+ } else if (codepoint <= 0x07FF) {
utf8[1] = (codepoint & 0x3F) | 0x80;
codepoint >>= 6;
utf8[0] = (codepoint & 0x1F) | 0xC0;
length = 2;
- } else /* codepoint < 0xFFFF */ {
+ } else /* codepoint <= 0xFFFF */ {
utf8[2] = (codepoint & 0x3F) | 0x80;
codepoint >>= 6;
utf8[1] = (codepoint & 0x3F) | 0x80;
@@ -492,7 +519,7 @@ static void hex(upb_json_parser *p, const char *end) {
text =
/[^\\"]/+
>{ start_text(parser, p); }
- %{ CHECK_RETURN_TOP(end_text(parser, p)); }
+ %{ CHECK_RETURN_TOP(end_text(parser, p, false)); }
;
unicode_char =
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback