From d04af15acb274025dd403ce42f96bf159a6855c2 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Thu, 8 Jun 2017 16:41:47 -0700 Subject: Some fixes to make JSON properly recognize numbers in quotes. --- upb/json/parser.rl | 211 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 137 insertions(+), 74 deletions(-) (limited to 'upb/json/parser.rl') diff --git a/upb/json/parser.rl b/upb/json/parser.rl index b7f1386..cdd71c5 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -20,6 +20,8 @@ */ #include +#include +#include #include #include #include @@ -605,103 +607,156 @@ static void start_number(upb_json_parser *p, const char *ptr) { capture_begin(p, ptr); } -static bool parse_number(upb_json_parser *p); +static bool parse_number(upb_json_parser *p, bool is_quoted); static bool end_number(upb_json_parser *p, const char *ptr) { if (!capture_end(p, ptr)) { return false; } - return parse_number(p); + return parse_number(p, false); } -static bool parse_number(upb_json_parser *p) { - size_t len; - const char *buf; - const char *myend; +static bool parse_number_from_buffer(upb_json_parser *p, const char *buf, + const char *bufend, bool is_quoted) { + size_t len = bufend - buf; char *end; + upb_fieldtype_t type = upb_fielddef_type(p->top->f); + double val; + double dummy; - /* strtol() and friends unfortunately do not support specifying the length of - * the input string, so we need to force a copy into a NULL-terminated buffer. */ - if (!multipart_text(p, "\0", 1, false)) { + if (buf[0] == ' ') { return false; } - buf = accumulate_getptr(p, &len); - myend = buf + len - 1; /* One for NULL. */ - - /* XXX: We are using strtol to parse integers, but this is wrong as even - * integers can be represented as 1e6 (for example), which strtol can't - * handle correctly. - * - * XXX: Also, we can't handle large integers properly because strto[u]ll - * isn't in C89. - * - * XXX: Also, we don't properly check floats for overflow, since strtof - * isn't in C89. */ - switch (upb_fielddef_type(p->top->f)) { + /* For integer types, first try parsing with integer-specific routines. + * If these succeed, they will be more accurate for int64/uint64 than + * strtod(). + */ + switch (type) { case UPB_TYPE_ENUM: case UPB_TYPE_INT32: { - long val = strtol(p->accumulated, &end, 0); - if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend) - goto err; - else + long val = strtol(buf, &end, 0); + if (errno == ERANGE || end != bufend) { + break; + } else if (val > INT32_MAX || val < INT32_MIN) { + return false; + } else { upb_sink_putint32(&p->top->sink, parser_getsel(p), val); - break; - } - case UPB_TYPE_INT64: { - long long val = strtol(p->accumulated, &end, 0); - if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend) - goto err; - else - upb_sink_putint64(&p->top->sink, parser_getsel(p), val); - break; + return true; + } } case UPB_TYPE_UINT32: { - unsigned long val = strtoul(p->accumulated, &end, 0); - if (val > UINT32_MAX || errno == ERANGE || end != myend) - goto err; - else + unsigned long val = strtoul(buf, &end, 0); + if (end != bufend) { + break; + } else if (val > UINT32_MAX || errno == ERANGE) { + return false; + } else { upb_sink_putuint32(&p->top->sink, parser_getsel(p), val); - break; + return true; + } + } + /* XXX: We can't handle [u]int64 properly on 32-bit machines because + * strto[u]ll isn't in C89. */ + case UPB_TYPE_INT64: { + long val = strtol(buf, &end, 0); + if (errno == ERANGE || end != bufend) { + break; + } else { + upb_sink_putint64(&p->top->sink, parser_getsel(p), val); + return true; + } } case UPB_TYPE_UINT64: { - unsigned long long val = strtoul(p->accumulated, &end, 0); - if (val > UINT64_MAX || errno == ERANGE || end != myend) - goto err; - else + unsigned long val = strtoul(p->accumulated, &end, 0); + if (end != bufend) { + break; + } else if (errno == ERANGE) { + return false; + } else { upb_sink_putuint64(&p->top->sink, parser_getsel(p), val); - break; + return true; + } } - case UPB_TYPE_DOUBLE: { - double val = strtod(p->accumulated, &end); - if (errno == ERANGE || end != myend) - goto err; - else - upb_sink_putdouble(&p->top->sink, parser_getsel(p), val); + default: break; + } + + if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) { + /* Quoted numbers shouldn't support double forms for integer types. */ + return false; + } + + if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) { + /* C89 does not have an INFINITY macro. */ + val = 1.0 / 0.0; + } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) { + val = -1.0 / 0.0; + } else { + val = strtod(buf, &end); + if (errno == ERANGE || end != bufend) { + return false; } - case UPB_TYPE_FLOAT: { - float val = strtod(p->accumulated, &end); - if (errno == ERANGE || end != myend) - goto err; - else - upb_sink_putfloat(&p->top->sink, parser_getsel(p), val); - break; + } + + switch (type) { +#define CASE(capitaltype, smalltype, min, max) \ + case UPB_TYPE_ ## capitaltype: { \ + if (modf(val, &dummy) != 0 || val > max || val < min) { \ + return false; \ + } else { \ + upb_sink_put ## smalltype(&p->top->sink, parser_getsel(p), val); \ + return true; \ + } \ + break; \ } + case UPB_TYPE_ENUM: + CASE(INT32, int32, INT32_MIN, INT32_MAX); + CASE(INT64, int64, INT64_MIN, INT64_MAX); + CASE(UINT32, uint32, 0, UINT32_MAX); + CASE(UINT64, uint64, 0, UINT64_MAX); +#undef CASE + + case UPB_TYPE_DOUBLE: + upb_sink_putdouble(&p->top->sink, parser_getsel(p), val); + return true; + case UPB_TYPE_FLOAT: + if (false /*val > FLT_MAX || val < -FLT_MAX*/) { + return false; + } else { + upb_sink_putfloat(&p->top->sink, parser_getsel(p), val); + return true; + } default: - UPB_ASSERT(false); + return false; } +} - multipart_end(p); +static bool parse_number(upb_json_parser *p, bool is_quoted) { + size_t len; + const char *buf; + const char *bufend; - return true; + /* strtol() and friends unfortunately do not support specifying the length of + * the input string, so we need to force a copy into a NULL-terminated buffer. */ + if (!multipart_text(p, "\0", 1, false)) { + return false; + } -err: - upb_status_seterrf(&p->status, "error parsing number: %s", buf); - upb_env_reporterror(p->env, &p->status); - multipart_end(p); - return false; + buf = accumulate_getptr(p, &len); + bufend = buf + len - 1; /* One for NULL. */ + errno = 0; + + if (parse_number_from_buffer(p, buf, bufend, is_quoted)) { + multipart_end(p); + return true; + } else { + upb_status_seterrf(&p->status, "error parsing number: %s", buf); + upb_env_reporterror(p->env, &p->status); + multipart_end(p); + return false; + } } static bool parser_putbool(upb_json_parser *p, bool val) { @@ -754,17 +809,16 @@ static bool start_stringval(upb_json_parser *p) { multipart_startaccum(p); return true; } - } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) { - /* No need to push a frame -- symbolic enum names in quotes remain in the - * current parser frame. - * - * Enum string values must accumulate so we can look up the value in a table - * once it is complete. */ + } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL && + upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) { + /* No need to push a frame -- numeric values in quotes remain in the + * current parser frame. These values must accmulate so we can convert + * them all at once at the end. */ multipart_startaccum(p); return true; } else { upb_status_seterrf(&p->status, - "String specified for non-string/non-enum field: %s", + "String specified for bool or submessage field: %s", upb_fielddef_name(p->top->f)); upb_env_reporterror(p->env, &p->status); return false; @@ -811,6 +865,15 @@ static bool end_stringval(upb_json_parser *p) { break; } + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: + case UPB_TYPE_DOUBLE: + case UPB_TYPE_FLOAT: + ok = parse_number(p, true); + break; + default: UPB_ASSERT(false); upb_status_seterrmsg(&p->status, "Internal error in JSON decoder"); @@ -854,7 +917,7 @@ static bool parse_mapentry_key(upb_json_parser *p) { case UPB_TYPE_UINT32: case UPB_TYPE_UINT64: /* Invoke end_number. The accum buffer has the number's text already. */ - if (!parse_number(p)) { + if (!parse_number(p, true)) { return false; } break; -- cgit v1.2.3 From cbc2d7af561e149e7c445e63a485793e2669b89c Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Mon, 19 Jun 2017 17:14:29 -0700 Subject: Responded to PR comments. This also fixed a few more conformance tests. --- upb/json/parser.c | 110 +++++++++++++++++++++++++++-------------------------- upb/json/parser.rl | 38 +++++++++--------- 2 files changed, 78 insertions(+), 70 deletions(-) (limited to 'upb/json/parser.rl') diff --git a/upb/json/parser.c b/upb/json/parser.c index 81eda44..4899401 100644 --- a/upb/json/parser.c +++ b/upb/json/parser.c @@ -619,15 +619,21 @@ static bool end_number(upb_json_parser *p, const char *ptr) { return parse_number(p, false); } +/* |buf| is NULL-terminated. |buf| itself will never include quotes; + * |is_quoted| tells us whether this text originally appeared inside quotes. */ static bool parse_number_from_buffer(upb_json_parser *p, const char *buf, - const char *bufend, bool is_quoted) { - size_t len = bufend - buf; + bool is_quoted) { + size_t len = strlen(buf); + const char *bufend = buf + len; char *end; upb_fieldtype_t type = upb_fielddef_type(p->top->f); double val; double dummy; + double inf = 1.0 / 0.0; /* C89 does not have an INFINITY macro. */ - if (buf[0] == ' ') { + errno = 0; + + if (len == 0 || buf[0] == ' ') { return false; } @@ -686,15 +692,15 @@ static bool parse_number_from_buffer(upb_json_parser *p, const char *buf, } if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) { - /* Quoted numbers shouldn't support double forms for integer types. */ + /* Quoted numbers for integer types are not allowed to be in double form. */ return false; } if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) { /* C89 does not have an INFINITY macro. */ - val = 1.0 / 0.0; + val = inf; } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) { - val = -1.0 / 0.0; + val = -inf; } else { val = strtod(buf, &end); if (errno == ERANGE || end != bufend) { @@ -703,28 +709,29 @@ static bool parse_number_from_buffer(upb_json_parser *p, const char *buf, } switch (type) { -#define CASE(capitaltype, smalltype, min, max) \ +#define CASE(capitaltype, smalltype, ctype, min, max) \ case UPB_TYPE_ ## capitaltype: { \ if (modf(val, &dummy) != 0 || val > max || val < min) { \ return false; \ } else { \ - upb_sink_put ## smalltype(&p->top->sink, parser_getsel(p), val); \ + upb_sink_put ## smalltype(&p->top->sink, parser_getsel(p), \ + (ctype)val); \ return true; \ } \ break; \ } case UPB_TYPE_ENUM: - CASE(INT32, int32, INT32_MIN, INT32_MAX); - CASE(INT64, int64, INT64_MIN, INT64_MAX); - CASE(UINT32, uint32, 0, UINT32_MAX); - CASE(UINT64, uint64, 0, UINT64_MAX); + CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX); + CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX); + CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX); + CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX); #undef CASE case UPB_TYPE_DOUBLE: upb_sink_putdouble(&p->top->sink, parser_getsel(p), val); return true; case UPB_TYPE_FLOAT: - if (false /*val > FLT_MAX || val < -FLT_MAX*/) { + if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) { return false; } else { upb_sink_putfloat(&p->top->sink, parser_getsel(p), val); @@ -738,7 +745,6 @@ static bool parse_number_from_buffer(upb_json_parser *p, const char *buf, static bool parse_number(upb_json_parser *p, bool is_quoted) { size_t len; const char *buf; - const char *bufend; /* strtol() and friends unfortunately do not support specifying the length of * the input string, so we need to force a copy into a NULL-terminated buffer. */ @@ -747,10 +753,8 @@ static bool parse_number(upb_json_parser *p, bool is_quoted) { } buf = accumulate_getptr(p, &len); - bufend = buf + len - 1; /* One for NULL. */ - errno = 0; - if (parse_number_from_buffer(p, buf, bufend, is_quoted)) { + if (parse_number_from_buffer(p, buf, is_quoted)) { multipart_end(p); return true; } else { @@ -1210,11 +1214,11 @@ static void end_object(upb_json_parser *p) { * final state once, when the closing '"' is seen. */ -#line 1306 "upb/json/parser.rl" +#line 1310 "upb/json/parser.rl" -#line 1218 "upb/json/parser.c" +#line 1222 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 2, 1, 3, 1, 5, 1, 6, 1, 7, 1, 8, 1, @@ -1363,7 +1367,7 @@ static const int json_en_value_machine = 27; static const int json_en_main = 1; -#line 1309 "upb/json/parser.rl" +#line 1313 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { @@ -1385,7 +1389,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, capture_resume(parser, buf); -#line 1389 "upb/json/parser.c" +#line 1393 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -1460,118 +1464,118 @@ _match: switch ( *_acts++ ) { case 0: -#line 1221 "upb/json/parser.rl" +#line 1225 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 1: -#line 1222 "upb/json/parser.rl" +#line 1226 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 10; goto _again;} } break; case 2: -#line 1226 "upb/json/parser.rl" +#line 1230 "upb/json/parser.rl" { start_text(parser, p); } break; case 3: -#line 1227 "upb/json/parser.rl" +#line 1231 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_text(parser, p)); } break; case 4: -#line 1233 "upb/json/parser.rl" +#line 1237 "upb/json/parser.rl" { start_hex(parser); } break; case 5: -#line 1234 "upb/json/parser.rl" +#line 1238 "upb/json/parser.rl" { hexdigit(parser, p); } break; case 6: -#line 1235 "upb/json/parser.rl" +#line 1239 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_hex(parser)); } break; case 7: -#line 1241 "upb/json/parser.rl" +#line 1245 "upb/json/parser.rl" { CHECK_RETURN_TOP(escape(parser, p)); } break; case 8: -#line 1247 "upb/json/parser.rl" +#line 1251 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 9: -#line 1250 "upb/json/parser.rl" +#line 1254 "upb/json/parser.rl" { {stack[top++] = cs; cs = 19; goto _again;} } break; case 10: -#line 1252 "upb/json/parser.rl" +#line 1256 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 27; goto _again;} } break; case 11: -#line 1257 "upb/json/parser.rl" +#line 1261 "upb/json/parser.rl" { start_member(parser); } break; case 12: -#line 1258 "upb/json/parser.rl" +#line 1262 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_membername(parser)); } break; case 13: -#line 1261 "upb/json/parser.rl" +#line 1265 "upb/json/parser.rl" { end_member(parser); } break; case 14: -#line 1267 "upb/json/parser.rl" +#line 1271 "upb/json/parser.rl" { start_object(parser); } break; case 15: -#line 1270 "upb/json/parser.rl" +#line 1274 "upb/json/parser.rl" { end_object(parser); } break; case 16: -#line 1276 "upb/json/parser.rl" +#line 1280 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_array(parser)); } break; case 17: -#line 1280 "upb/json/parser.rl" +#line 1284 "upb/json/parser.rl" { end_array(parser); } break; case 18: -#line 1285 "upb/json/parser.rl" +#line 1289 "upb/json/parser.rl" { start_number(parser, p); } break; case 19: -#line 1286 "upb/json/parser.rl" +#line 1290 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 20: -#line 1288 "upb/json/parser.rl" +#line 1292 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 21: -#line 1289 "upb/json/parser.rl" +#line 1293 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_stringval(parser)); } break; case 22: -#line 1291 "upb/json/parser.rl" +#line 1295 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, true)); } break; case 23: -#line 1293 "upb/json/parser.rl" +#line 1297 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, false)); } break; case 24: -#line 1295 "upb/json/parser.rl" +#line 1299 "upb/json/parser.rl" { /* null value */ } break; case 25: -#line 1297 "upb/json/parser.rl" +#line 1301 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_subobject(parser)); } break; case 26: -#line 1298 "upb/json/parser.rl" +#line 1302 "upb/json/parser.rl" { end_subobject(parser); } break; case 27: -#line 1303 "upb/json/parser.rl" +#line 1307 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 1575 "upb/json/parser.c" +#line 1579 "upb/json/parser.c" } } @@ -1584,7 +1588,7 @@ _again: _out: {} } -#line 1330 "upb/json/parser.rl" +#line 1334 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(&parser->status, "Parse error at '%.*s'\n", pe - p, p); @@ -1625,13 +1629,13 @@ static void json_parser_reset(upb_json_parser *p) { /* Emit Ragel initialization of the parser. */ -#line 1629 "upb/json/parser.c" +#line 1633 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 1370 "upb/json/parser.rl" +#line 1374 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; accumulate_clear(p); diff --git a/upb/json/parser.rl b/upb/json/parser.rl index cdd71c5..0312628 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -617,15 +617,21 @@ static bool end_number(upb_json_parser *p, const char *ptr) { return parse_number(p, false); } +/* |buf| is NULL-terminated. |buf| itself will never include quotes; + * |is_quoted| tells us whether this text originally appeared inside quotes. */ static bool parse_number_from_buffer(upb_json_parser *p, const char *buf, - const char *bufend, bool is_quoted) { - size_t len = bufend - buf; + bool is_quoted) { + size_t len = strlen(buf); + const char *bufend = buf + len; char *end; upb_fieldtype_t type = upb_fielddef_type(p->top->f); double val; double dummy; + double inf = 1.0 / 0.0; /* C89 does not have an INFINITY macro. */ - if (buf[0] == ' ') { + errno = 0; + + if (len == 0 || buf[0] == ' ') { return false; } @@ -684,15 +690,15 @@ static bool parse_number_from_buffer(upb_json_parser *p, const char *buf, } if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) { - /* Quoted numbers shouldn't support double forms for integer types. */ + /* Quoted numbers for integer types are not allowed to be in double form. */ return false; } if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) { /* C89 does not have an INFINITY macro. */ - val = 1.0 / 0.0; + val = inf; } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) { - val = -1.0 / 0.0; + val = -inf; } else { val = strtod(buf, &end); if (errno == ERANGE || end != bufend) { @@ -701,28 +707,29 @@ static bool parse_number_from_buffer(upb_json_parser *p, const char *buf, } switch (type) { -#define CASE(capitaltype, smalltype, min, max) \ +#define CASE(capitaltype, smalltype, ctype, min, max) \ case UPB_TYPE_ ## capitaltype: { \ if (modf(val, &dummy) != 0 || val > max || val < min) { \ return false; \ } else { \ - upb_sink_put ## smalltype(&p->top->sink, parser_getsel(p), val); \ + upb_sink_put ## smalltype(&p->top->sink, parser_getsel(p), \ + (ctype)val); \ return true; \ } \ break; \ } case UPB_TYPE_ENUM: - CASE(INT32, int32, INT32_MIN, INT32_MAX); - CASE(INT64, int64, INT64_MIN, INT64_MAX); - CASE(UINT32, uint32, 0, UINT32_MAX); - CASE(UINT64, uint64, 0, UINT64_MAX); + CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX); + CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX); + CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX); + CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX); #undef CASE case UPB_TYPE_DOUBLE: upb_sink_putdouble(&p->top->sink, parser_getsel(p), val); return true; case UPB_TYPE_FLOAT: - if (false /*val > FLT_MAX || val < -FLT_MAX*/) { + if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) { return false; } else { upb_sink_putfloat(&p->top->sink, parser_getsel(p), val); @@ -736,7 +743,6 @@ static bool parse_number_from_buffer(upb_json_parser *p, const char *buf, static bool parse_number(upb_json_parser *p, bool is_quoted) { size_t len; const char *buf; - const char *bufend; /* strtol() and friends unfortunately do not support specifying the length of * the input string, so we need to force a copy into a NULL-terminated buffer. */ @@ -745,10 +751,8 @@ static bool parse_number(upb_json_parser *p, bool is_quoted) { } buf = accumulate_getptr(p, &len); - bufend = buf + len - 1; /* One for NULL. */ - errno = 0; - if (parse_number_from_buffer(p, buf, bufend, is_quoted)) { + if (parse_number_from_buffer(p, buf, is_quoted)) { multipart_end(p); return true; } else { -- cgit v1.2.3