summaryrefslogtreecommitdiff
path: root/upb/json
diff options
context:
space:
mode:
authorJosh Haberman <jhaberman@gmail.com>2015-05-08 16:56:29 -0700
committerJosh Haberman <jhaberman@gmail.com>2015-05-08 16:56:29 -0700
commit3bd691a4975b2267ff04611507e766a7f9f87e83 (patch)
treee5628144f6f920d9ccf792a1499e55503e6ff4d2 /upb/json
parent87fc2c516bff207f880c71526926842fd8dcc77e (diff)
Google-internal development.
Diffstat (limited to 'upb/json')
-rw-r--r--upb/json/parser.c466
-rw-r--r--upb/json/parser.h82
-rw-r--r--upb/json/parser.rl394
-rw-r--r--upb/json/printer.c340
-rw-r--r--upb/json/printer.h70
5 files changed, 1018 insertions, 334 deletions
diff --git a/upb/json/parser.c b/upb/json/parser.c
index cfe1def..4f4a96e 100644
--- a/upb/json/parser.c
+++ b/upb/json/parser.c
@@ -33,6 +33,71 @@
#include "upb/json/parser.h"
+#define UPB_JSON_MAX_DEPTH 64
+
+typedef struct {
+ upb_sink sink;
+
+ // The current message in which we're parsing, and the field whose value we're
+ // expecting next.
+ const upb_msgdef *m;
+ const upb_fielddef *f;
+
+ // We are in a repeated-field context, ready to emit mapentries as
+ // submessages. This flag alters the start-of-object (open-brace) behavior to
+ // begin a sequence of mapentry messages rather than a single submessage.
+ bool is_map;
+
+ // We are in a map-entry message context. This flag is set when parsing the
+ // value field of a single map entry and indicates to all value-field parsers
+ // (subobjects, strings, numbers, and bools) that the map-entry submessage
+ // should end as soon as the value is parsed.
+ bool is_mapentry;
+
+ // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
+ // message's map field that we're currently parsing. This differs from |f|
+ // because |f| is the field in the *current* message (i.e., the map-entry
+ // message itself), not the parent's field that leads to this map.
+ const upb_fielddef *mapfield;
+} upb_jsonparser_frame;
+
+struct upb_json_parser {
+ upb_env *env;
+ upb_byteshandler input_handler_;
+ upb_bytessink input_;
+
+ // Stack to track the JSON scopes we are in.
+ upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
+ upb_jsonparser_frame *top;
+ upb_jsonparser_frame *limit;
+
+ upb_status *status;
+
+ // Ragel's internal parsing stack for the parsing state machine.
+ int current_state;
+ int parser_stack[UPB_JSON_MAX_DEPTH];
+ int parser_top;
+
+ // The handle for the current buffer.
+ const upb_bufhandle *handle;
+
+ // Accumulate buffer. See details in parser.rl.
+ const char *accumulated;
+ size_t accumulated_len;
+ char *accumulate_buf;
+ size_t accumulate_buf_size;
+
+ // Multi-part text data. See details in parser.rl.
+ int multipart_state;
+ upb_selector_t string_selector;
+
+ // Input capture. See details in parser.rl.
+ const char *capture;
+
+ // Intermediate result of parsing a unicode escape sequence.
+ uint32_t digit;
+};
+
#define PARSER_CHECK_RETURN(x) if (!(x)) return false
// Used to signal that a capture has been suspended.
@@ -235,12 +300,13 @@ static void accumulate_clear(upb_json_parser *p) {
// Used internally by accumulate_append().
static bool accumulate_realloc(upb_json_parser *p, size_t need) {
- size_t new_size = UPB_MAX(p->accumulate_buf_size, 128);
+ size_t old_size = p->accumulate_buf_size;
+ size_t new_size = UPB_MAX(old_size, 128);
while (new_size < need) {
new_size = saturating_multiply(new_size, 2);
}
- void *mem = realloc(p->accumulate_buf, new_size);
+ void *mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
if (!mem) {
upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
return false;
@@ -262,16 +328,14 @@ static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
return true;
}
- if (p->accumulate_buf_size - p->accumulated_len < len) {
- size_t need;
- if (!checked_add(p->accumulated_len, len, &need)) {
- upb_status_seterrmsg(p->status, "Integer overflow.");
- return false;
- }
+ size_t need;
+ if (!checked_add(p->accumulated_len, len, &need)) {
+ upb_status_seterrmsg(p->status, "Integer overflow.");
+ return false;
+ }
- if (!accumulate_realloc(p, need)) {
- return false;
- }
+ if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
+ return false;
}
if (p->accumulated != p->accumulate_buf) {
@@ -510,16 +574,28 @@ static void start_number(upb_json_parser *p, const char *ptr) {
capture_begin(p, ptr);
}
+static bool parse_number(upb_json_parser *p);
+
static bool end_number(upb_json_parser *p, const char *ptr) {
if (!capture_end(p, ptr)) {
return false;
}
+ return parse_number(p);
+}
+
+static bool parse_number(upb_json_parser *p) {
+ // strtol() and friends unfortunately do not support specifying the length of
+ // the input string, so we need to force a copy into a NULL-terminated buffer.
+ if (!multipart_text(p, "\0", 1, false)) {
+ return false;
+ }
+
size_t len;
const char *buf = accumulate_getptr(p, &len);
- const char *myend = buf + len;
- char *end;
+ const char *myend = buf + len - 1; // One for NULL.
+ char *end;
switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_ENUM:
case UPB_TYPE_INT32: {
@@ -575,10 +651,11 @@ static bool end_number(upb_json_parser *p, const char *ptr) {
}
multipart_end(p);
+
return true;
err:
- upb_status_seterrf(p->status, "error parsing number: %.*s", buf, len);
+ upb_status_seterrf(p->status, "error parsing number: %s", buf);
multipart_end(p);
return false;
}
@@ -593,6 +670,7 @@ static bool parser_putbool(upb_json_parser *p, bool val) {
bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
UPB_ASSERT_VAR(ok, ok);
+
return true;
}
@@ -609,6 +687,8 @@ static bool start_stringval(upb_json_parser *p) {
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
+ inner->is_map = false;
+ inner->is_mapentry = false;
p->top = inner;
if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
@@ -686,6 +766,7 @@ static bool end_stringval(upb_json_parser *p) {
}
multipart_end(p);
+
return ok;
}
@@ -694,54 +775,217 @@ static void start_member(upb_json_parser *p) {
multipart_startaccum(p);
}
-static bool end_member(upb_json_parser *p) {
- assert(!p->top->f);
+// Helper: invoked during parse_mapentry() to emit the mapentry message's key
+// field based on the current contents of the accumulate buffer.
+static bool parse_mapentry_key(upb_json_parser *p) {
+
size_t len;
const char *buf = accumulate_getptr(p, &len);
- const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
+ // Emit the key field. We do a bit of ad-hoc parsing here because the
+ // parser state machine has already decided that this is a string field
+ // name, and we are reinterpreting it as some arbitrary key type. In
+ // particular, integer and bool keys are quoted, so we need to parse the
+ // quoted string contents here.
- if (!f) {
- // TODO(haberman): Ignore unknown fields if requested/configured to do so.
- upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
+ p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
+ if (p->top->f == NULL) {
+ upb_status_seterrmsg(p->status, "mapentry message has no key");
return false;
}
+ switch (upb_fielddef_type(p->top->f)) {
+ case UPB_TYPE_INT32:
+ case UPB_TYPE_INT64:
+ case UPB_TYPE_UINT32:
+ case UPB_TYPE_UINT64:
+ // Invoke end_number. The accum buffer has the number's text already.
+ if (!parse_number(p)) {
+ return false;
+ }
+ break;
+ case UPB_TYPE_BOOL:
+ if (len == 4 && !strncmp(buf, "true", 4)) {
+ if (!parser_putbool(p, true)) {
+ return false;
+ }
+ } else if (len == 5 && !strncmp(buf, "false", 5)) {
+ if (!parser_putbool(p, false)) {
+ return false;
+ }
+ } else {
+ upb_status_seterrmsg(p->status,
+ "Map bool key not 'true' or 'false'");
+ return false;
+ }
+ multipart_end(p);
+ break;
+ case UPB_TYPE_STRING:
+ case UPB_TYPE_BYTES: {
+ upb_sink subsink;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
+ upb_sink_startstr(&p->top->sink, sel, len, &subsink);
+ sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
+ upb_sink_putstring(&subsink, sel, buf, len, NULL);
+ sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
+ upb_sink_endstr(&subsink, sel);
+ multipart_end(p);
+ break;
+ }
+ default:
+ upb_status_seterrmsg(p->status, "Invalid field type for map key");
+ return false;
+ }
- p->top->f = f;
- multipart_end(p);
+ return true;
+}
+
+// Helper: emit one map entry (as a submessage in the map field sequence). This
+// is invoked from end_membername(), at the end of the map entry's key string,
+// with the map key in the accumulate buffer. It parses the key from that
+// buffer, emits the handler calls to start the mapentry submessage (setting up
+// its subframe in the process), and sets up state in the subframe so that the
+// value parser (invoked next) will emit the mapentry's value field and then
+// end the mapentry message.
+
+static bool handle_mapentry(upb_json_parser *p) {
+ // Map entry: p->top->sink is the seq frame, so we need to start a frame
+ // for the mapentry itself, and then set |f| in that frame so that the map
+ // value field is parsed, and also set a flag to end the frame after the
+ // map-entry value is parsed.
+ if (!check_stack(p)) return false;
+
+ const upb_fielddef *mapfield = p->top->mapfield;
+ const upb_msgdef *mapentrymsg = upb_fielddef_msgsubdef(mapfield);
+
+ upb_jsonparser_frame *inner = p->top + 1;
+ p->top->f = mapfield;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
+ upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
+ inner->m = mapentrymsg;
+ inner->mapfield = mapfield;
+ inner->is_map = false;
+
+ // Don't set this to true *yet* -- we reuse parsing handlers below to push
+ // the key field value to the sink, and these handlers will pop the frame
+ // if they see is_mapentry (when invoked by the parser state machine, they
+ // would have just seen the map-entry value, not key).
+ inner->is_mapentry = false;
+ p->top = inner;
+
+ // send STARTMSG in submsg frame.
+ upb_sink_startmsg(&p->top->sink);
+
+ parse_mapentry_key(p);
+
+ // Set up the value field to receive the map-entry value.
+ p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
+ p->top->is_mapentry = true; // set up to pop frame after value is parsed.
+ p->top->mapfield = mapfield;
+ if (p->top->f == NULL) {
+ upb_status_seterrmsg(p->status, "mapentry message has no value");
+ return false;
+ }
return true;
}
-static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
+static bool end_membername(upb_json_parser *p) {
+ assert(!p->top->f);
+
+ if (p->top->is_map) {
+ return handle_mapentry(p);
+ } else {
+ size_t len;
+ const char *buf = accumulate_getptr(p, &len);
+ const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
+
+ if (!f) {
+ // TODO(haberman): Ignore unknown fields if requested/configured to do so.
+ upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
+ return false;
+ }
+
+ p->top->f = f;
+ multipart_end(p);
+
+ return true;
+ }
+}
+
+static void end_member(upb_json_parser *p) {
+ // If we just parsed a map-entry value, end that frame too.
+ if (p->top->is_mapentry) {
+ assert(p->top > p->stack);
+ // send ENDMSG on submsg.
+ upb_status s = UPB_STATUS_INIT;
+ upb_sink_endmsg(&p->top->sink, &s);
+ const upb_fielddef* mapfield = p->top->mapfield;
+
+ // send ENDSUBMSG in repeated-field-of-mapentries frame.
+ p->top--;
+ upb_selector_t sel;
+ bool ok = upb_handlers_getselector(mapfield,
+ UPB_HANDLER_ENDSUBMSG, &sel);
+ UPB_ASSERT_VAR(ok, ok);
+ upb_sink_endsubmsg(&p->top->sink, sel);
+ }
+
+ p->top->f = NULL;
+}
static bool start_subobject(upb_json_parser *p) {
assert(p->top->f);
- if (!upb_fielddef_issubmsg(p->top->f)) {
+ if (upb_fielddef_ismap(p->top->f)) {
+ // Beginning of a map. Start a new parser frame in a repeated-field
+ // context.
+ if (!check_stack(p)) return false;
+
+ upb_jsonparser_frame *inner = p->top + 1;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
+ upb_sink_startseq(&p->top->sink, sel, &inner->sink);
+ inner->m = upb_fielddef_msgsubdef(p->top->f);
+ inner->mapfield = p->top->f;
+ inner->f = NULL;
+ inner->is_map = true;
+ inner->is_mapentry = false;
+ p->top = inner;
+
+ return true;
+ } else if (upb_fielddef_issubmsg(p->top->f)) {
+ // Beginning of a subobject. Start a new parser frame in the submsg
+ // context.
+ if (!check_stack(p)) return false;
+
+ upb_jsonparser_frame *inner = p->top + 1;
+
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
+ upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
+ inner->m = upb_fielddef_msgsubdef(p->top->f);
+ inner->f = NULL;
+ inner->is_map = false;
+ inner->is_mapentry = false;
+ p->top = inner;
+
+ return true;
+ } else {
upb_status_seterrf(p->status,
"Object specified for non-message/group field: %s",
upb_fielddef_name(p->top->f));
return false;
}
-
- if (!check_stack(p)) return false;
-
- upb_jsonparser_frame *inner = p->top + 1;
-
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
- upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
- inner->m = upb_fielddef_msgsubdef(p->top->f);
- inner->f = NULL;
- p->top = inner;
-
- return true;
}
static void end_subobject(upb_json_parser *p) {
- p->top--;
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
- upb_sink_endsubmsg(&p->top->sink, sel);
+ if (p->top->is_map) {
+ p->top--;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
+ upb_sink_endseq(&p->top->sink, sel);
+ } else {
+ p->top--;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
+ upb_sink_endsubmsg(&p->top->sink, sel);
+ }
}
static bool start_array(upb_json_parser *p) {
@@ -761,6 +1005,8 @@ static bool start_array(upb_json_parser *p) {
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
+ inner->is_map = false;
+ inner->is_mapentry = false;
p->top = inner;
return true;
@@ -775,12 +1021,16 @@ static void end_array(upb_json_parser *p) {
}
static void start_object(upb_json_parser *p) {
- upb_sink_startmsg(&p->top->sink);
+ if (!p->top->is_map) {
+ upb_sink_startmsg(&p->top->sink);
+ }
}
static void end_object(upb_json_parser *p) {
- upb_status status;
- upb_sink_endmsg(&p->top->sink, &status);
+ if (!p->top->is_map) {
+ upb_status status;
+ upb_sink_endmsg(&p->top->sink, &status);
+ }
}
@@ -805,11 +1055,11 @@ static void end_object(upb_json_parser *p) {
// final state once, when the closing '"' is seen.
-#line 901 "upb/json/parser.rl"
+#line 1151 "upb/json/parser.rl"
-#line 813 "upb/json/parser.c"
+#line 1063 "upb/json/parser.c"
static const char _json_actions[] = {
0, 1, 0, 1, 2, 1, 3, 1,
5, 1, 6, 1, 7, 1, 8, 1,
@@ -960,7 +1210,7 @@ static const int json_en_value_machine = 27;
static const int json_en_main = 1;
-#line 904 "upb/json/parser.rl"
+#line 1154 "upb/json/parser.rl"
size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const upb_bufhandle *handle) {
@@ -980,7 +1230,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
capture_resume(parser, buf);
-#line 984 "upb/json/parser.c"
+#line 1234 "upb/json/parser.c"
{
int _klen;
unsigned int _trans;
@@ -1055,118 +1305,118 @@ _match:
switch ( *_acts++ )
{
case 0:
-#line 816 "upb/json/parser.rl"
+#line 1066 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 1:
-#line 817 "upb/json/parser.rl"
+#line 1067 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 10; goto _again;} }
break;
case 2:
-#line 821 "upb/json/parser.rl"
+#line 1071 "upb/json/parser.rl"
{ start_text(parser, p); }
break;
case 3:
-#line 822 "upb/json/parser.rl"
+#line 1072 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_text(parser, p)); }
break;
case 4:
-#line 828 "upb/json/parser.rl"
+#line 1078 "upb/json/parser.rl"
{ start_hex(parser); }
break;
case 5:
-#line 829 "upb/json/parser.rl"
+#line 1079 "upb/json/parser.rl"
{ hexdigit(parser, p); }
break;
case 6:
-#line 830 "upb/json/parser.rl"
+#line 1080 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_hex(parser)); }
break;
case 7:
-#line 836 "upb/json/parser.rl"
+#line 1086 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(escape(parser, p)); }
break;
case 8:
-#line 842 "upb/json/parser.rl"
+#line 1092 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 9:
-#line 845 "upb/json/parser.rl"
+#line 1095 "upb/json/parser.rl"
{ {stack[top++] = cs; cs = 19; goto _again;} }
break;
case 10:
-#line 847 "upb/json/parser.rl"
+#line 1097 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 27; goto _again;} }
break;
case 11:
-#line 852 "upb/json/parser.rl"
+#line 1102 "upb/json/parser.rl"
{ start_member(parser); }
break;
case 12:
-#line 853 "upb/json/parser.rl"
- { CHECK_RETURN_TOP(end_member(parser)); }
+#line 1103 "upb/json/parser.rl"
+ { CHECK_RETURN_TOP(end_membername(parser)); }
break;
case 13:
-#line 856 "upb/json/parser.rl"
- { clear_member(parser); }
+#line 1106 "upb/json/parser.rl"
+ { end_member(parser); }
break;
case 14:
-#line 862 "upb/json/parser.rl"
+#line 1112 "upb/json/parser.rl"
{ start_object(parser); }
break;
case 15:
-#line 865 "upb/json/parser.rl"
+#line 1115 "upb/json/parser.rl"
{ end_object(parser); }
break;
case 16:
-#line 871 "upb/json/parser.rl"
+#line 1121 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_array(parser)); }
break;
case 17:
-#line 875 "upb/json/parser.rl"
+#line 1125 "upb/json/parser.rl"
{ end_array(parser); }
break;
case 18:
-#line 880 "upb/json/parser.rl"
+#line 1130 "upb/json/parser.rl"
{ start_number(parser, p); }
break;
case 19:
-#line 881 "upb/json/parser.rl"
+#line 1131 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_number(parser, p)); }
break;
case 20:
-#line 883 "upb/json/parser.rl"
+#line 1133 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_stringval(parser)); }
break;
case 21:
-#line 884 "upb/json/parser.rl"
+#line 1134 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_stringval(parser)); }
break;
case 22:
-#line 886 "upb/json/parser.rl"
+#line 1136 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(parser_putbool(parser, true)); }
break;
case 23:
-#line 888 "upb/json/parser.rl"
+#line 1138 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(parser_putbool(parser, false)); }
break;
case 24:
-#line 890 "upb/json/parser.rl"
+#line 1140 "upb/json/parser.rl"
{ /* null value */ }
break;
case 25:
-#line 892 "upb/json/parser.rl"
+#line 1142 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_subobject(parser)); }
break;
case 26:
-#line 893 "upb/json/parser.rl"
+#line 1143 "upb/json/parser.rl"
{ end_subobject(parser); }
break;
case 27:
-#line 898 "upb/json/parser.rl"
+#line 1148 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
-#line 1170 "upb/json/parser.c"
+#line 1420 "upb/json/parser.c"
}
}
@@ -1179,7 +1429,7 @@ _again:
_out: {}
}
-#line 923 "upb/json/parser.rl"
+#line 1173 "upb/json/parser.rl"
if (p != pe) {
upb_status_seterrf(parser->status, "Parse error at %s\n", p);
@@ -1201,52 +1451,58 @@ bool end(void *closure, const void *hd) {
return true;
}
-
-/* Public API *****************************************************************/
-
-void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
- p->limit = p->stack + UPB_JSON_MAX_DEPTH;
- p->accumulate_buf = NULL;
- p->accumulate_buf_size = 0;
- upb_byteshandler_init(&p->input_handler_);
- upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
- upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
- upb_bytessink_reset(&p->input_, &p->input_handler_, p);
- p->status = status;
-}
-
-void upb_json_parser_uninit(upb_json_parser *p) {
- upb_byteshandler_uninit(&p->input_handler_);
- free(p->accumulate_buf);
-}
-
-void upb_json_parser_reset(upb_json_parser *p) {
+static void json_parser_reset(upb_json_parser *p) {
p->top = p->stack;
p->top->f = NULL;
+ p->top->is_map = false;
+ p->top->is_mapentry = false;
int cs;
int top;
// Emit Ragel initialization of the parser.
-#line 1232 "upb/json/parser.c"
+#line 1465 "upb/json/parser.c"
{
cs = json_start;
top = 0;
}
-#line 971 "upb/json/parser.rl"
+#line 1204 "upb/json/parser.rl"
p->current_state = cs;
p->parser_top = top;
accumulate_clear(p);
p->multipart_state = MULTIPART_INACTIVE;
p->capture = NULL;
+ p->accumulated = NULL;
}
-void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) {
- upb_json_parser_reset(p);
- upb_sink_reset(&p->top->sink, sink->handlers, sink->closure);
- p->top->m = upb_handlers_msgdef(sink->handlers);
- p->accumulated = NULL;
+
+/* Public API *****************************************************************/
+
+upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
+#ifndef NDEBUG
+ const size_t size_before = upb_env_bytesallocated(env);
+#endif
+ upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
+ if (!p) return false;
+
+ p->env = env;
+ p->limit = p->stack + UPB_JSON_MAX_DEPTH;
+ p->accumulate_buf = NULL;
+ p->accumulate_buf_size = 0;
+ upb_byteshandler_init(&p->input_handler_);
+ upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
+ upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
+ upb_bytessink_reset(&p->input_, &p->input_handler_, p);
+
+ json_parser_reset(p);
+ upb_sink_reset(&p->top->sink, output->handlers, output->closure);
+ p->top->m = upb_handlers_msgdef(output->handlers);
+
+ // If this fails, uncomment and increase the value in parser.h.
+ // fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before);
+ assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
+ return p;
}
upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
diff --git a/upb/json/parser.h b/upb/json/parser.h
index 51578f2..b932adf 100644
--- a/upb/json/parser.h
+++ b/upb/json/parser.h
@@ -11,6 +11,7 @@
#ifndef UPB_JSON_PARSER_H_
#define UPB_JSON_PARSER_H_
+#include "upb/env.h"
#include "upb/sink.h"
#ifdef __cplusplus
@@ -23,78 +24,32 @@ class Parser;
UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser);
-// Internal-only struct used by the parser.
-typedef struct {
- UPB_PRIVATE_FOR_CPP
- upb_sink sink;
- const upb_msgdef *m;
- const upb_fielddef *f;
-} upb_jsonparser_frame;
-
-
/* upb::json::Parser **********************************************************/
-#define UPB_JSON_MAX_DEPTH 64
+// Preallocation hint: parser won't allocate more bytes than this when first
+// constructed. This hint may be an overestimate for some build configurations.
+// But if the parser library is upgraded without recompiling the application,
+// it may be an underestimate.
+#define UPB_JSON_PARSER_SIZE 3568
+
+#ifdef __cplusplus
// Parses an incoming BytesStream, pushing the results to the destination sink.
-UPB_DEFINE_CLASS0(upb::json::Parser,
+class upb::json::Parser {
public:
- Parser(Status* status);
- ~Parser();
+ static Parser* Create(Environment* env, Sink* output);
- // Resets the state of the printer, so that it will expect to begin a new
- // document.
- void Reset();
-
- // Resets the output pointer which will serve as our closure. Implies
- // Reset().
- void ResetOutput(Sink* output);
-
- // The input to the printer.
BytesSink* input();
-,
-UPB_DEFINE_STRUCT0(upb_json_parser,
- upb_byteshandler input_handler_;
- upb_bytessink input_;
-
- // Stack to track the JSON scopes we are in.
- upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
- upb_jsonparser_frame *top;
- upb_jsonparser_frame *limit;
- upb_status *status;
+ private:
+ UPB_DISALLOW_POD_OPS(Parser, upb::json::Parser);
+};
- // Ragel's internal parsing stack for the parsing state machine.
- int current_state;
- int parser_stack[UPB_JSON_MAX_DEPTH];
- int parser_top;
-
- // The handle for the current buffer.
- const upb_bufhandle *handle;
-
- // Accumulate buffer. See details in parser.rl.
- const char *accumulated;
- size_t accumulated_len;
- char *accumulate_buf;
- size_t accumulate_buf_size;
-
- // Multi-part text data. See details in parser.rl.
- int multipart_state;
- upb_selector_t string_selector;
-
- // Input capture. See details in parser.rl.
- const char *capture;
-
- // Intermediate result of parsing a unicode escape sequence.
- uint32_t digit;
-));
+#endif
UPB_BEGIN_EXTERN_C
-void upb_json_parser_init(upb_json_parser *p, upb_status *status);
-void upb_json_parser_uninit(upb_json_parser *p);
-void upb_json_parser_reset(upb_json_parser *p);
-void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *output);
+upb_json_parser *upb_json_parser_create(upb_env *e, upb_sink *output);
upb_bytessink *upb_json_parser_input(upb_json_parser *p);
UPB_END_EXTERN_C
@@ -103,11 +58,8 @@ UPB_END_EXTERN_C
namespace upb {
namespace json {
-inline Parser::Parser(Status* status) { upb_json_parser_init(this, status); }
-inline Parser::~Parser() { upb_json_parser_uninit(this); }
-inline void Parser::Reset() { upb_json_parser_reset(this); }
-inline void Parser::ResetOutput(Sink* output) {
- upb_json_parser_resetoutput(this, output);
+inline Parser* Parser::Create(Environment* env, Sink* output) {
+ return upb_json_parser_create(env, output);
}
inline BytesSink* Parser::input() {
return upb_json_parser_input(this);
diff --git a/upb/json/parser.rl b/upb/json/parser.rl
index b72bc10..3a400ea 100644
--- a/upb/json/parser.rl
+++ b/upb/json/parser.rl
@@ -31,6 +31,71 @@
#include "upb/json/parser.h"
+#define UPB_JSON_MAX_DEPTH 64
+
+typedef struct {
+ upb_sink sink;
+
+ // The current message in which we're parsing, and the field whose value we're
+ // expecting next.
+ const upb_msgdef *m;
+ const upb_fielddef *f;
+
+ // We are in a repeated-field context, ready to emit mapentries as
+ // submessages. This flag alters the start-of-object (open-brace) behavior to
+ // begin a sequence of mapentry messages rather than a single submessage.
+ bool is_map;
+
+ // We are in a map-entry message context. This flag is set when parsing the
+ // value field of a single map entry and indicates to all value-field parsers
+ // (subobjects, strings, numbers, and bools) that the map-entry submessage
+ // should end as soon as the value is parsed.
+ bool is_mapentry;
+
+ // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
+ // message's map field that we're currently parsing. This differs from |f|
+ // because |f| is the field in the *current* message (i.e., the map-entry
+ // message itself), not the parent's field that leads to this map.
+ const upb_fielddef *mapfield;
+} upb_jsonparser_frame;
+
+struct upb_json_parser {
+ upb_env *env;
+ upb_byteshandler input_handler_;
+ upb_bytessink input_;
+
+ // Stack to track the JSON scopes we are in.
+ upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
+ upb_jsonparser_frame *top;
+ upb_jsonparser_frame *limit;
+
+ upb_status *status;
+
+ // Ragel's internal parsing stack for the parsing state machine.
+ int current_state;
+ int parser_stack[UPB_JSON_MAX_DEPTH];
+ int parser_top;
+
+ // The handle for the current buffer.
+ const upb_bufhandle *handle;
+
+ // Accumulate buffer. See details in parser.rl.
+ const char *accumulated;
+ size_t accumulated_len;
+ char *accumulate_buf;
+ size_t accumulate_buf_size;
+
+ // Multi-part text data. See details in parser.rl.
+ int multipart_state;
+ upb_selector_t string_selector;
+
+ // Input capture. See details in parser.rl.
+ const char *capture;
+
+ // Intermediate result of parsing a unicode escape sequence.
+ uint32_t digit;
+};
+
#define PARSER_CHECK_RETURN(x) if (!(x)) return false
// Used to signal that a capture has been suspended.
@@ -233,12 +298,13 @@ static void accumulate_clear(upb_json_parser *p) {
// Used internally by accumulate_append().
static bool accumulate_realloc(upb_json_parser *p, size_t need) {
- size_t new_size = UPB_MAX(p->accumulate_buf_size, 128);
+ size_t old_size = p->accumulate_buf_size;
+ size_t new_size = UPB_MAX(old_size, 128);
while (new_size < need) {
new_size = saturating_multiply(new_size, 2);
}
- void *mem = realloc(p->accumulate_buf, new_size);
+ void *mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
if (!mem) {
upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
return false;
@@ -260,16 +326,14 @@ static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
return true;
}
- if (p->accumulate_buf_size - p->accumulated_len < len) {
- size_t need;
- if (!checked_add(p->accumulated_len, len, &need)) {
- upb_status_seterrmsg(p->status, "Integer overflow.");
- return false;
- }
+ size_t need;
+ if (!checked_add(p->accumulated_len, len, &need)) {
+ upb_status_seterrmsg(p->status, "Integer overflow.");
+ return false;
+ }
- if (!accumulate_realloc(p, need)) {
- return false;
- }
+ if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
+ return false;
}
if (p->accumulated != p->accumulate_buf) {
@@ -508,16 +572,28 @@ static void start_number(upb_json_parser *p, const char *ptr) {
capture_begin(p, ptr);
}
+static bool parse_number(upb_json_parser *p);
+
static bool end_number(upb_json_parser *p, const char *ptr) {
if (!capture_end(p, ptr)) {
return false;
}
+ return parse_number(p);
+}
+
+static bool parse_number(upb_json_parser *p) {
+ // strtol() and friends unfortunately do not support specifying the length of
+ // the input string, so we need to force a copy into a NULL-terminated buffer.
+ if (!multipart_text(p, "\0", 1, false)) {
+ return false;
+ }
+
size_t len;
const char *buf = accumulate_getptr(p, &len);
- const char *myend = buf + len;
- char *end;
+ const char *myend = buf + len - 1; // One for NULL.
+ char *end;
switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_ENUM:
case UPB_TYPE_INT32: {
@@ -573,10 +649,11 @@ static bool end_number(upb_json_parser *p, const char *ptr) {
}
multipart_end(p);
+
return true;
err:
- upb_status_seterrf(p->status, "error parsing number: %.*s", buf, len);
+ upb_status_seterrf(p->status, "error parsing number: %s", buf);
multipart_end(p);
return false;
}
@@ -591,6 +668,7 @@ static bool parser_putbool(upb_json_parser *p, bool val) {
bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
UPB_ASSERT_VAR(ok, ok);
+
return true;
}
@@ -607,6 +685,8 @@ static bool start_stringval(upb_json_parser *p) {
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
+ inner->is_map = false;
+ inner->is_mapentry = false;
p->top = inner;
if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
@@ -684,6 +764,7 @@ static bool end_stringval(upb_json_parser *p) {
}
multipart_end(p);
+
return ok;
}
@@ -692,54 +773,217 @@ static void start_member(upb_json_parser *p) {
multipart_startaccum(p);
}
-static bool end_member(upb_json_parser *p) {
- assert(!p->top->f);
+// Helper: invoked during parse_mapentry() to emit the mapentry message's key
+// field based on the current contents of the accumulate buffer.
+static bool parse_mapentry_key(upb_json_parser *p) {
+
size_t len;
const char *buf = accumulate_getptr(p, &len);
- const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
+ // Emit the key field. We do a bit of ad-hoc parsing here because the
+ // parser state machine has already decided that this is a string field
+ // name, and we are reinterpreting it as some arbitrary key type. In
+ // particular, integer and bool keys are quoted, so we need to parse the
+ // quoted string contents here.
- if (!f) {
- // TODO(haberman): Ignore unknown fields if requested/configured to do so.
- upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
+ p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
+ if (p->top->f == NULL) {
+ upb_status_seterrmsg(p->status, "mapentry message has no key");
return false;
}
+ switch (upb_fielddef_type(p->top->f)) {
+ case UPB_TYPE_INT32:
+ case UPB_TYPE_INT64:
+ case UPB_TYPE_UINT32:
+ case UPB_TYPE_UINT64:
+ // Invoke end_number. The accum buffer has the number's text already.
+ if (!parse_number(p)) {
+ return false;
+ }
+ break;
+ case UPB_TYPE_BOOL:
+ if (len == 4 && !strncmp(buf, "true", 4)) {
+ if (!parser_putbool(p, true)) {
+ return false;
+ }
+ } else if (len == 5 && !strncmp(buf, "false", 5)) {
+ if (!parser_putbool(p, false)) {
+ return false;
+ }
+ } else {
+ upb_status_seterrmsg(p->status,
+ "Map bool key not 'true' or 'false'");
+ return false;
+ }
+ multipart_end(p);
+ break;
+ case UPB_TYPE_STRING:
+ case UPB_TYPE_BYTES: {
+ upb_sink subsink;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
+ upb_sink_startstr(&p->top->sink, sel, len, &subsink);
+ sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
+ upb_sink_putstring(&subsink, sel, buf, len, NULL);
+ sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
+ upb_sink_endstr(&subsink, sel);
+ multipart_end(p);
+ break;
+ }
+ default:
+ upb_status_seterrmsg(p->status, "Invalid field type for map key");
+ return false;
+ }
- p->top->f = f;
- multipart_end(p);
+ return true;
+}
+
+// Helper: emit one map entry (as a submessage in the map field sequence). This
+// is invoked from end_membername(), at the end of the map entry's key string,
+// with the map key in the accumulate buffer. It parses the key from that
+// buffer, emits the handler calls to start the mapentry submessage (setting up
+// its subframe in the process), and sets up state in the subframe so that the
+// value parser (invoked next) will emit the mapentry's value field and then
+// end the mapentry message.
+
+static bool handle_mapentry(upb_json_parser *p) {
+ // Map entry: p->top->sink is the seq frame, so we need to start a frame
+ // for the mapentry itself, and then set |f| in that frame so that the map
+ // value field is parsed, and also set a flag to end the frame after the
+ // map-entry value is parsed.
+ if (!check_stack(p)) return false;
+
+ const upb_fielddef *mapfield = p->top->mapfield;
+ const upb_msgdef *mapentrymsg = upb_fielddef_msgsubdef(mapfield);
+
+ upb_jsonparser_frame *inner = p->top + 1;
+ p->top->f = mapfield;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
+ upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
+ inner->m = mapentrymsg;
+ inner->mapfield = mapfield;
+ inner->is_map = false;
+
+ // Don't set this to true *yet* -- we reuse parsing handlers below to push
+ // the key field value to the sink, and these handlers will pop the frame
+ // if they see is_mapentry (when invoked by the parser state machine, they
+ // would have just seen the map-entry value, not key).
+ inner->is_mapentry = false;
+ p->top = inner;
+
+ // send STARTMSG in submsg frame.
+ upb_sink_startmsg(&p->top->sink);
+
+ parse_mapentry_key(p);
+
+ // Set up the value field to receive the map-entry value.
+ p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
+ p->top->is_mapentry = true; // set up to pop frame after value is parsed.
+ p->top->mapfield = mapfield;
+ if (p->top->f == NULL) {
+ upb_status_seterrmsg(p->status, "mapentry message has no value");
+ return false;
+ }
return true;
}
-static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
+static bool end_membername(upb_json_parser *p) {
+ assert(!p->top->f);
+
+ if (p->top->is_map) {
+ return handle_mapentry(p);
+ } else {
+ size_t len;
+ const char *buf = accumulate_getptr(p, &len);
+ const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
+
+ if (!f) {
+ // TODO(haberman): Ignore unknown fields if requested/configured to do so.
+ upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
+ return false;
+ }
+
+ p->top->f = f;
+ multipart_end(p);
+
+ return true;
+ }
+}
+
+static void end_member(upb_json_parser *p) {
+ // If we just parsed a map-entry value, end that frame too.
+ if (p->top->is_mapentry) {
+ assert(p->top > p->stack);
+ // send ENDMSG on submsg.
+ upb_status s = UPB_STATUS_INIT;
+ upb_sink_endmsg(&p->top->sink, &s);
+ const upb_fielddef* mapfield = p->top->mapfield;
+
+ // send ENDSUBMSG in repeated-field-of-mapentries frame.
+ p->top--;
+ upb_selector_t sel;
+ bool ok = upb_handlers_getselector(mapfield,
+ UPB_HANDLER_ENDSUBMSG, &sel);
+ UPB_ASSERT_VAR(ok, ok);
+ upb_sink_endsubmsg(&p->top->sink, sel);
+ }
+
+ p->top->f = NULL;
+}
static bool start_subobject(upb_json_parser *p) {
assert(p->top->f);
- if (!upb_fielddef_issubmsg(p->top->f)) {
+ if (upb_fielddef_ismap(p->top->f)) {
+ // Beginning of a map. Start a new parser frame in a repeated-field
+ // context.
+ if (!check_stack(p)) return false;
+
+ upb_jsonparser_frame *inner = p->top + 1;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
+ upb_sink_startseq(&p->top->sink, sel, &inner->sink);
+ inner->m = upb_fielddef_msgsubdef(p->top->f);
+ inner->mapfield = p->top->f;
+ inner->f = NULL;
+ inner->is_map = true;
+ inner->is_mapentry = false;
+ p->top = inner;
+
+ return true;
+ } else if (upb_fielddef_issubmsg(p->top->f)) {
+ // Beginning of a subobject. Start a new parser frame in the submsg
+ // context.
+ if (!check_stack(p)) return false;
+
+ upb_jsonparser_frame *inner = p->top + 1;
+
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
+ upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
+ inner->m = upb_fielddef_msgsubdef(p->top->f);
+ inner->f = NULL;
+ inner->is_map = false;
+ inner->is_mapentry = false;
+ p->top = inner;
+
+ return true;
+ } else {
upb_status_seterrf(p->status,
"Object specified for non-message/group field: %s",
upb_fielddef_name(p->top->f));
return false;
}
-
- if (!check_stack(p)) return false;
-
- upb_jsonparser_frame *inner = p->top + 1;
-
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
- upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
- inner->m = upb_fielddef_msgsubdef(p->top->f);
- inner->f = NULL;
- p->top = inner;
-
- return true;
}
static void end_subobject(upb_json_parser *p) {
- p->top--;
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
- upb_sink_endsubmsg(&p->top->sink, sel);
+ if (p->top->is_map) {
+ p->top--;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
+ upb_sink_endseq(&p->top->sink, sel);
+ } else {
+ p->top--;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
+ upb_sink_endsubmsg(&p->top->sink, sel);
+ }
}
static bool start_array(upb_json_parser *p) {
@@ -759,6 +1003,8 @@ static bool start_array(upb_json_parser *p) {
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
+ inner->is_map = false;
+ inner->is_mapentry = false;
p->top = inner;
return true;
@@ -773,12 +1019,16 @@ static void end_array(upb_json_parser *p) {
}
static void start_object(upb_json_parser *p) {
- upb_sink_startmsg(&p->top->sink);
+ if (!p->top->is_map) {
+ upb_sink_startmsg(&p->top->sink);
+ }
}
static void end_object(upb_json_parser *p) {
- upb_status status;
- upb_sink_endmsg(&p->top->sink, &status);
+ if (!p->top->is_map) {
+ upb_status status;
+ upb_sink_endmsg(&p->top->sink, &status);
+ }
}
@@ -850,10 +1100,10 @@ static void end_object(upb_json_parser *p) {
ws
string
>{ start_member(parser); }
- @{ CHECK_RETURN_TOP(end_member(parser)); }
+ @{ CHECK_RETURN_TOP(end_membername(parser)); }
ws ":" ws
value2
- %{ clear_member(parser); }
+ %{ end_member(parser); }
ws;
object =
@@ -941,28 +1191,11 @@ bool end(void *closure, const void *hd) {
return true;
}
-
-/* Public API *****************************************************************/
-
-void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
- p->limit = p->stack + UPB_JSON_MAX_DEPTH;
- p->accumulate_buf = NULL;
- p->accumulate_buf_size = 0;
- upb_byteshandler_init(&p->input_handler_);
- upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
- upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
- upb_bytessink_reset(&p->input_, &p->input_handler_, p);
- p->status = status;
-}
-
-void upb_json_parser_uninit(upb_json_parser *p) {
- upb_byteshandler_uninit(&p->input_handler_);
- free(p->accumulate_buf);
-}
-
-void upb_json_parser_reset(upb_json_parser *p) {
+static void json_parser_reset(upb_json_parser *p) {
p->top = p->stack;
p->top->f = NULL;
+ p->top->is_map = false;
+ p->top->is_mapentry = false;
int cs;
int top;
@@ -973,13 +1206,36 @@ void upb_json_parser_reset(upb_json_parser *p) {
accumulate_clear(p);
p->multipart_state = MULTIPART_INACTIVE;
p->capture = NULL;
+ p->accumulated = NULL;
}
-void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) {
- upb_json_parser_reset(p);
- upb_sink_reset(&p->top->sink, sink->handlers, sink->closure);
- p->top->m = upb_handlers_msgdef(sink->handlers);
- p->accumulated = NULL;
+
+/* Public API *****************************************************************/
+
+upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
+#ifndef NDEBUG
+ const size_t size_before = upb_env_bytesallocated(env);
+#endif
+ upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
+ if (!p) return false;
+
+ p->env = env;
+ p->limit = p->stack + UPB_JSON_MAX_DEPTH;
+ p->accumulate_buf = NULL;
+ p->accumulate_buf_size = 0;
+ upb_byteshandler_init(&p->input_handler_);
+ upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
+ upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
+ upb_bytessink_reset(&p->input_, &p->input_handler_, p);
+
+ json_parser_reset(p);
+ upb_sink_reset(&p->top->sink, output->handlers, output->closure);
+ p->top->m = upb_handlers_msgdef(output->handlers);
+
+ // If this fails, uncomment and increase the value in parser.h.
+ // fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before);
+ assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
+ return p;
}
upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
diff --git a/upb/json/printer.c b/upb/json/printer.c
index b996ccf..132736c 100644
--- a/upb/json/printer.c
+++ b/upb/json/printer.c
@@ -15,6 +15,27 @@
#include <string.h>
#include <stdint.h>
+struct upb_json_printer {
+ upb_sink input_;
+ // BytesSink closure.
+ void *subc_;
+ upb_bytessink *output_;
+
+ // We track the depth so that we know when to emit startstr/endstr on the
+ // output.
+ int depth_;
+
+ // Have we emitted the first element? This state is necessary to emit commas
+ // without leaving a trailing comma in arrays/maps. We keep this state per
+ // frame depth.
+ //
+ // Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
+ // We count frames (contexts in which we separate elements by commas) as both
+ // repeated fields and messages (maps), and the worst case is a
+ // message->repeated field->submessage->repeated field->... nesting.
+ bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
+};
+
// StringPiece; a pointer plus a length.
typedef struct {
const char *ptr;
@@ -182,13 +203,23 @@ static bool putkey(void *closure, const void *handler_data) {
return true; \
} \
static bool repeated_##type(void *closure, const void *handler_data, \
- type val) { \
+ type val) { \
upb_json_printer *p = closure; \
print_comma(p); \
CHK(put##type(closure, handler_data, val)); \
return true; \
}
+#define TYPE_HANDLERS_MAPKEY(type, fmt_func) \
+ static bool putmapkey_##type(void *closure, const void *handler_data, \
+ type val) { \
+ upb_json_printer *p = closure; \
+ print_data(p, "\"", 1); \
+ CHK(put##type(closure, handler_data, val)); \
+ print_data(p, "\":", 2); \
+ return true; \
+ }
+
TYPE_HANDLERS(double, fmt_double);
TYPE_HANDLERS(float, fmt_float);
TYPE_HANDLERS(bool, fmt_bool);
@@ -197,7 +228,15 @@ TYPE_HANDLERS(uint32_t, fmt_int64);
TYPE_HANDLERS(int64_t, fmt_int64);
TYPE_HANDLERS(uint64_t, fmt_uint64);
+// double and float are not allowed to be map keys.
+TYPE_HANDLERS_MAPKEY(bool, fmt_bool);
+TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64);
+TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64);
+TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64);
+TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64);
+
#undef TYPE_HANDLERS
+#undef TYPE_HANDLERS_MAPKEY
typedef struct {
void *keyname;
@@ -222,20 +261,36 @@ static bool scalar_enum(void *closure, const void *handler_data,
return true;
}
-static bool repeated_enum(void *closure, const void *handler_data,
- int32_t val) {
- const EnumHandlerData *hd = handler_data;
- upb_json_printer *p = closure;
- print_comma(p);
-
- const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val);
+static void print_enum_symbolic_name(upb_json_printer *p,
+ const upb_enumdef *def,
+ int32_t val) {
+ const char *symbolic_name = upb_enumdef_iton(def, val);
if (symbolic_name) {
print_data(p, "\"", 1);
putstring(p, symbolic_name, strlen(symbolic_name));
print_data(p, "\"", 1);
} else {
- putint32_t(closure, NULL, val);
+ putint32_t(p, NULL, val);
}
+}
+
+static bool repeated_enum(void *closure, const void *handler_data,
+ int32_t val) {
+ const EnumHandlerData *hd = handler_data;
+ upb_json_printer *p = closure;
+ print_comma(p);
+
+ print_enum_symbolic_name(p, hd->enumdef, val);
+
+ return true;
+}
+
+static bool mapvalue_enum(void *closure, const void *handler_data,
+ int32_t val) {
+ const EnumHandlerData *hd = handler_data;
+ upb_json_printer *p = closure;
+
+ print_enum_symbolic_name(p, hd->enumdef, val);
return true;
}
@@ -251,25 +306,35 @@ static void *repeated_startsubmsg(void *closure, const void *handler_data) {
return closure;
}
-static bool startmap(void *closure, const void *handler_data) {
+static void start_frame(upb_json_printer *p) {
+ p->depth_++;
+ p->first_elem_[p->depth_] = true;
+ print_data(p, "{", 1);
+}
+
+static void end_frame(upb_json_printer *p) {
+ print_data(p, "}", 1);
+ p->depth_--;
+}
+
+static bool printer_startmsg(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
- if (p->depth_++ == 0) {
+ if (p->depth_ == 0) {
upb_bytessink_start(p->output_, 0, &p->subc_);
}
- p->first_elem_[p->depth_] = true;
- print_data(p, "{", 1);
+ start_frame(p);
return true;
}
-static bool endmap(void *closure, const void *handler_data, upb_status *s) {
+static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
UPB_UNUSED(handler_data);
UPB_UNUSED(s);
upb_json_printer *p = closure;
- if (--p->depth_ == 0) {
+ end_frame(p);
+ if (p->depth_ == 0) {
upb_bytessink_end(p->output_);
}
- print_data(p, "}", 1);
return true;
}
@@ -290,6 +355,23 @@ static bool endseq(void *closure, const void *handler_data) {
return true;
}
+static void *startmap(void *closure, const void *handler_data) {
+ upb_json_printer *p = closure;
+ CHK(putkey(closure, handler_data));
+ p->depth_++;
+ p->first_elem_[p->depth_] = true;
+ print_data(p, "{", 1);
+ return closure;
+}
+
+static bool endmap(void *closure, const void *handler_data) {
+ UPB_UNUSED(handler_data);
+ upb_json_printer *p = closure;
+ print_data(p, "}", 1);
+ p->depth_--;
+ return true;
+}
+
static size_t putstr(void *closure, const void *handler_data, const char *str,
size_t len, const upb_bufhandle *handle) {
UPB_UNUSED(handler_data);
@@ -404,6 +486,36 @@ static bool repeated_endstr(void *closure, const void *handler_data) {
return true;
}
+static void *mapkeyval_startstr(void *closure, const void *handler_data,
+ size_t size_hint) {
+ UPB_UNUSED(handler_data);
+ UPB_UNUSED(size_hint);
+ upb_json_printer *p = closure;
+ print_data(p, "\"", 1);
+ return p;
+}
+
+static size_t mapkey_str(void *closure, const void *handler_data,
+ const char *str, size_t len,
+ const upb_bufhandle *handle) {
+ CHK(putstr(closure, handler_data, str, len, handle));
+ return len;
+}
+
+static bool mapkey_endstr(void *closure, const void *handler_data) {
+ UPB_UNUSED(handler_data);
+ upb_json_printer *p = closure;
+ print_data(p, "\":", 2);
+ return true;
+}
+
+static bool mapvalue_endstr(void *closure, const void *handler_data) {
+ UPB_UNUSED(handler_data);
+ upb_json_printer *p = closure;
+ print_data(p, "\"", 1);
+ return true;
+}
+
static size_t scalar_bytes(void *closure, const void *handler_data,
const char *str, size_t len,
const upb_bufhandle *handle) {
@@ -421,31 +533,161 @@ static size_t repeated_bytes(void *closure, const void *handler_data,
return len;
}
-void printer_sethandlers(const void *closure, upb_handlers *h) {
+static size_t mapkey_bytes(void *closure, const void *handler_data,
+ const char *str, size_t len,
+ const upb_bufhandle *handle) {
+ upb_json_printer *p = closure;
+ CHK(putbytes(closure, handler_data, str, len, handle));
+ print_data(p, ":", 1);
+ return len;
+}
+
+static void set_enum_hd(upb_handlers *h,
+ const upb_fielddef *f,
+ upb_handlerattr *attr) {
+ EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
+ hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
+ hd->keyname = newstrpc(h, f);
+ upb_handlers_addcleanup(h, hd, free);
+ upb_handlerattr_sethandlerdata(attr, hd);
+}
+
+// Set up handlers for a mapentry submessage (i.e., an individual key/value pair
+// in a map).
+//
+// TODO: Handle missing key, missing value, out-of-order key/value, or repeated
+// key or value cases properly. The right way to do this is to allocate a
+// temporary structure at the start of a mapentry submessage, store key and
+// value data in it as key and value handlers are called, and then print the
+// key/value pair once at the end of the submessage. If we don't do this, we
+// should at least detect the case and throw an error. However, so far all of
+// our sources that emit mapentry messages do so canonically (with one key
+// field, and then one value field), so this is not a pressing concern at the
+// moment.
+void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) {
UPB_UNUSED(closure);
+ const upb_msgdef *md = upb_handlers_msgdef(h);
+
+ // A mapentry message is printed simply as '"key": value'. Rather than
+ // special-case key and value for every type below, we just handle both
+ // fields explicitly here.
+ const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
+ const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
+
+ upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
+
+ switch (upb_fielddef_type(key_field)) {
+ case UPB_TYPE_INT32:
+ upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
+ break;
+ case UPB_TYPE_INT64:
+ upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
+ break;
+ case UPB_TYPE_UINT32:
+ upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
+ break;
+ case UPB_TYPE_UINT64:
+ upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
+ break;
+ case UPB_TYPE_BOOL:
+ upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
+ break;
+ case UPB_TYPE_STRING:
+ upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
+ upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
+ upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
+ break;
+ case UPB_TYPE_BYTES:
+ upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
+ break;
+ default:
+ assert(false);
+ break;
+ }
+ switch (upb_fielddef_type(value_field)) {
+ case UPB_TYPE_INT32:
+ upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
+ break;
+ case UPB_TYPE_INT64:
+ upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
+ break;
+ case UPB_TYPE_UINT32:
+ upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
+ break;
+ case UPB_TYPE_UINT64:
+ upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
+ break;
+ case UPB_TYPE_BOOL:
+ upb_handlers_setbool(h, value_field, putbool, &empty_attr);
+ break;
+ case UPB_TYPE_FLOAT:
+ upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
+ break;
+ case UPB_TYPE_DOUBLE:
+ upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
+ break;
+ case UPB_TYPE_STRING:
+ upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
+ upb_handlers_setstring(h, value_field, putstr, &empty_attr);
+ upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
+ break;
+ case UPB_TYPE_BYTES:
+ upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
+ break;
+ case UPB_TYPE_ENUM: {
+ upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
+ set_enum_hd(h, value_field, &enum_attr);
+ upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
+ upb_handlerattr_uninit(&enum_attr);
+ break;
+ }
+ case UPB_TYPE_MESSAGE:
+ // No handler necessary -- the submsg handlers will print the message
+ // as appropriate.
+ break;
+ }
+
+ upb_handlerattr_uninit(&empty_attr);
+}
+
+void printer_sethandlers(const void *closure, upb_handlers *h) {
+ UPB_UNUSED(closure);
+ const upb_msgdef *md = upb_handlers_msgdef(h);
+ bool is_mapentry = upb_msgdef_mapentry(md);
upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
- upb_handlers_setstartmsg(h, startmap, &empty_attr);
- upb_handlers_setendmsg(h, endmap, &empty_attr);
-
-#define TYPE(type, name, ctype) \
- case type: \
- if (upb_fielddef_isseq(f)) { \
- upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \
- } else { \
- upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \
- } \
+
+ if (is_mapentry) {
+ // mapentry messages are sufficiently different that we handle them
+ // separately.
+ printer_sethandlers_mapentry(closure, h);
+ return;
+ }
+
+ upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
+ upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
+
+#define TYPE(type, name, ctype) \
+ case type: \
+ if (upb_fielddef_isseq(f)) { \
+ upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \
+ } else { \
+ upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \
+ } \
break;
- upb_msg_iter i;
- upb_msg_begin(&i, upb_handlers_msgdef(h));
- for(; !upb_msg_done(&i); upb_msg_next(&i)) {
+ upb_msg_field_iter i;
+ upb_msg_field_begin(&i, md);
+ for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&name_attr, newstrpc(h, f));
- if (upb_fielddef_isseq(f)) {
+ if (upb_fielddef_ismap(f)) {
+ upb_handlers_setstartseq(h, f, startmap, &name_attr);
+ upb_handlers_setendseq(h, f, endmap, &name_attr);
+ } else if (upb_fielddef_isseq(f)) {
upb_handlers_setstartseq(h, f, startseq, &name_attr);
upb_handlers_setendseq(h, f, endseq, &empty_attr);
}
@@ -462,12 +704,8 @@ void printer_sethandlers(const void *closure, upb_handlers *h) {
// For now, we always emit symbolic names for enums. We may want an
// option later to control this behavior, but we will wait for a real
// need first.
- EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
- hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
- hd->keyname = newstrpc(h, f);
- upb_handlers_addcleanup(h, hd, free);
upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
- upb_handlerattr_sethandlerdata(&enum_attr, hd);
+ set_enum_hd(h, f, &enum_attr);
if (upb_fielddef_isseq(f)) {
upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
@@ -514,25 +752,29 @@ void printer_sethandlers(const void *closure, upb_handlers *h) {
#undef TYPE
}
-/* Public API *****************************************************************/
-
-void upb_json_printer_init(upb_json_printer *p, const upb_handlers *h) {
- p->output_ = NULL;
+static void json_printer_reset(upb_json_printer *p) {
p->depth_ = 0;
- upb_sink_reset(&p->input_, h, p);
}
-void upb_json_printer_uninit(upb_json_printer *p) {
- UPB_UNUSED(p);
-}
-void upb_json_printer_reset(upb_json_printer *p) {
- p->depth_ = 0;
-}
+/* Public API *****************************************************************/
+
+upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
+ upb_bytessink *output) {
+#ifndef NDEBUG
+ size_t size_before = upb_env_bytesallocated(e);
+#endif
+
+ upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
+ if (!p) return NULL;
-void upb_json_printer_resetoutput(upb_json_printer *p, upb_bytessink *output) {
- upb_json_printer_reset(p);
p->output_ = output;
+ json_printer_reset(p);
+ upb_sink_reset(&p->input_, h, p);
+
+ // If this fails, increase the value in printer.h.
+ assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE);
+ return p;
}
upb_sink *upb_json_printer_input(upb_json_printer *p) {
diff --git a/upb/json/printer.h b/upb/json/printer.h
index fbc206d..c73cb79 100644
--- a/upb/json/printer.h
+++ b/upb/json/printer.h
@@ -11,6 +11,7 @@
#ifndef UPB_JSON_TYPED_PRINTER_H_
#define UPB_JSON_TYPED_PRINTER_H_
+#include "upb/env.h"
#include "upb/sink.h"
#ifdef __cplusplus
@@ -26,71 +27,48 @@ UPB_DECLARE_TYPE(upb::json::Printer, upb_json_printer);
/* upb::json::Printer *********************************************************/
-// Prints an incoming stream of data to a BytesSink in JSON format.
-UPB_DEFINE_CLASS0(upb::json::Printer,
- public:
- Printer(const upb::Handlers* handlers);
- ~Printer();
+#define UPB_JSON_PRINTER_SIZE 168
- // Resets the state of the printer, so that it will expect to begin a new
- // document.
- void Reset();
+#ifdef __cplusplus
- // Resets the output pointer which will serve as our closure. Implies
- // Reset().
- void ResetOutput(BytesSink* output);
+// Prints an incoming stream of data to a BytesSink in JSON format.
+class upb::json::Printer {
+ public:
+ static Printer* Create(Environment* env, const upb::Handlers* handlers,
+ BytesSink* output);
// The input to the printer.
Sink* input();
// Returns handlers for printing according to the specified schema.
static reffed_ptr<const Handlers> NewHandlers(const upb::MessageDef* md);
-,
-UPB_DEFINE_STRUCT0(upb_json_printer,
- upb_sink input_;
- // BytesSink closure.
- void *subc_;
- upb_bytessink *output_;
-
- // We track the depth so that we know when to emit startstr/endstr on the
- // output.
- int depth_;
- // Have we emitted the first element? This state is necessary to emit commas
- // without leaving a trailing comma in arrays/maps. We keep this state per
- // frame depth.
- //
- // Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
- // We count frames (contexts in which we separate elements by commas) as both
- // repeated fields and messages (maps), and the worst case is a
- // message->repeated field->submessage->repeated field->... nesting.
- bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
-));
-
-UPB_BEGIN_EXTERN_C // {
-// Native C API.
+ static const size_t kSize = UPB_JSON_PRINTER_SIZE;
-void upb_json_printer_init(upb_json_printer *p, const upb_handlers *h);
-void upb_json_printer_uninit(upb_json_printer *p);
-void upb_json_printer_reset(upb_json_printer *p);
-void upb_json_printer_resetoutput(upb_json_printer *p, upb_bytessink *output);
+ private:
+ UPB_DISALLOW_POD_OPS(Printer, upb::json::Printer);
+};
+
+#endif
+
+UPB_BEGIN_EXTERN_C
+
+// Native C API.
+upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
+ upb_bytessink *output);
upb_sink *upb_json_printer_input(upb_json_printer *p);
const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
const void *owner);
-UPB_END_EXTERN_C // }
+UPB_END_EXTERN_C
#ifdef __cplusplus
namespace upb {
namespace json {
-inline Printer::Printer(const upb::Handlers* handlers) {
- upb_json_printer_init(this, handlers);
-}
-inline Printer::~Printer() { upb_json_printer_uninit(this); }
-inline void Printer::Reset() { upb_json_printer_reset(this); }
-inline void Printer::ResetOutput(BytesSink* output) {
- upb_json_printer_resetoutput(this, output);
+inline Printer* Printer::Create(Environment* env, const upb::Handlers* handlers,
+ BytesSink* output) {
+ return upb_json_printer_create(env, handlers, output);
}
inline Sink* Printer::input() { return upb_json_printer_input(this); }
inline reffed_ptr<const Handlers> Printer::NewHandlers(
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback