From d04af15acb274025dd403ce42f96bf159a6855c2 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Thu, 8 Jun 2017 16:41:47 -0700 Subject: Some fixes to make JSON properly recognize numbers in quotes. --- upb/json/parser.rl | 211 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 137 insertions(+), 74 deletions(-) (limited to 'upb/json/parser.rl') diff --git a/upb/json/parser.rl b/upb/json/parser.rl index b7f1386..cdd71c5 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -20,6 +20,8 @@ */ #include +#include +#include #include #include #include @@ -605,103 +607,156 @@ static void start_number(upb_json_parser *p, const char *ptr) { capture_begin(p, ptr); } -static bool parse_number(upb_json_parser *p); +static bool parse_number(upb_json_parser *p, bool is_quoted); static bool end_number(upb_json_parser *p, const char *ptr) { if (!capture_end(p, ptr)) { return false; } - return parse_number(p); + return parse_number(p, false); } -static bool parse_number(upb_json_parser *p) { - size_t len; - const char *buf; - const char *myend; +static bool parse_number_from_buffer(upb_json_parser *p, const char *buf, + const char *bufend, bool is_quoted) { + size_t len = bufend - buf; char *end; + upb_fieldtype_t type = upb_fielddef_type(p->top->f); + double val; + double dummy; - /* strtol() and friends unfortunately do not support specifying the length of - * the input string, so we need to force a copy into a NULL-terminated buffer. */ - if (!multipart_text(p, "\0", 1, false)) { + if (buf[0] == ' ') { return false; } - buf = accumulate_getptr(p, &len); - myend = buf + len - 1; /* One for NULL. */ - - /* XXX: We are using strtol to parse integers, but this is wrong as even - * integers can be represented as 1e6 (for example), which strtol can't - * handle correctly. - * - * XXX: Also, we can't handle large integers properly because strto[u]ll - * isn't in C89. - * - * XXX: Also, we don't properly check floats for overflow, since strtof - * isn't in C89. */ - switch (upb_fielddef_type(p->top->f)) { + /* For integer types, first try parsing with integer-specific routines. + * If these succeed, they will be more accurate for int64/uint64 than + * strtod(). + */ + switch (type) { case UPB_TYPE_ENUM: case UPB_TYPE_INT32: { - long val = strtol(p->accumulated, &end, 0); - if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend) - goto err; - else + long val = strtol(buf, &end, 0); + if (errno == ERANGE || end != bufend) { + break; + } else if (val > INT32_MAX || val < INT32_MIN) { + return false; + } else { upb_sink_putint32(&p->top->sink, parser_getsel(p), val); - break; - } - case UPB_TYPE_INT64: { - long long val = strtol(p->accumulated, &end, 0); - if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend) - goto err; - else - upb_sink_putint64(&p->top->sink, parser_getsel(p), val); - break; + return true; + } } case UPB_TYPE_UINT32: { - unsigned long val = strtoul(p->accumulated, &end, 0); - if (val > UINT32_MAX || errno == ERANGE || end != myend) - goto err; - else + unsigned long val = strtoul(buf, &end, 0); + if (end != bufend) { + break; + } else if (val > UINT32_MAX || errno == ERANGE) { + return false; + } else { upb_sink_putuint32(&p->top->sink, parser_getsel(p), val); - break; + return true; + } + } + /* XXX: We can't handle [u]int64 properly on 32-bit machines because + * strto[u]ll isn't in C89. */ + case UPB_TYPE_INT64: { + long val = strtol(buf, &end, 0); + if (errno == ERANGE || end != bufend) { + break; + } else { + upb_sink_putint64(&p->top->sink, parser_getsel(p), val); + return true; + } } case UPB_TYPE_UINT64: { - unsigned long long val = strtoul(p->accumulated, &end, 0); - if (val > UINT64_MAX || errno == ERANGE || end != myend) - goto err; - else + unsigned long val = strtoul(p->accumulated, &end, 0); + if (end != bufend) { + break; + } else if (errno == ERANGE) { + return false; + } else { upb_sink_putuint64(&p->top->sink, parser_getsel(p), val); - break; + return true; + } } - case UPB_TYPE_DOUBLE: { - double val = strtod(p->accumulated, &end); - if (errno == ERANGE || end != myend) - goto err; - else - upb_sink_putdouble(&p->top->sink, parser_getsel(p), val); + default: break; + } + + if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) { + /* Quoted numbers shouldn't support double forms for integer types. */ + return false; + } + + if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) { + /* C89 does not have an INFINITY macro. */ + val = 1.0 / 0.0; + } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) { + val = -1.0 / 0.0; + } else { + val = strtod(buf, &end); + if (errno == ERANGE || end != bufend) { + return false; } - case UPB_TYPE_FLOAT: { - float val = strtod(p->accumulated, &end); - if (errno == ERANGE || end != myend) - goto err; - else - upb_sink_putfloat(&p->top->sink, parser_getsel(p), val); - break; + } + + switch (type) { +#define CASE(capitaltype, smalltype, min, max) \ + case UPB_TYPE_ ## capitaltype: { \ + if (modf(val, &dummy) != 0 || val > max || val < min) { \ + return false; \ + } else { \ + upb_sink_put ## smalltype(&p->top->sink, parser_getsel(p), val); \ + return true; \ + } \ + break; \ } + case UPB_TYPE_ENUM: + CASE(INT32, int32, INT32_MIN, INT32_MAX); + CASE(INT64, int64, INT64_MIN, INT64_MAX); + CASE(UINT32, uint32, 0, UINT32_MAX); + CASE(UINT64, uint64, 0, UINT64_MAX); +#undef CASE + + case UPB_TYPE_DOUBLE: + upb_sink_putdouble(&p->top->sink, parser_getsel(p), val); + return true; + case UPB_TYPE_FLOAT: + if (false /*val > FLT_MAX || val < -FLT_MAX*/) { + return false; + } else { + upb_sink_putfloat(&p->top->sink, parser_getsel(p), val); + return true; + } default: - UPB_ASSERT(false); + return false; } +} - multipart_end(p); +static bool parse_number(upb_json_parser *p, bool is_quoted) { + size_t len; + const char *buf; + const char *bufend; - return true; + /* strtol() and friends unfortunately do not support specifying the length of + * the input string, so we need to force a copy into a NULL-terminated buffer. */ + if (!multipart_text(p, "\0", 1, false)) { + return false; + } -err: - upb_status_seterrf(&p->status, "error parsing number: %s", buf); - upb_env_reporterror(p->env, &p->status); - multipart_end(p); - return false; + buf = accumulate_getptr(p, &len); + bufend = buf + len - 1; /* One for NULL. */ + errno = 0; + + if (parse_number_from_buffer(p, buf, bufend, is_quoted)) { + multipart_end(p); + return true; + } else { + upb_status_seterrf(&p->status, "error parsing number: %s", buf); + upb_env_reporterror(p->env, &p->status); + multipart_end(p); + return false; + } } static bool parser_putbool(upb_json_parser *p, bool val) { @@ -754,17 +809,16 @@ static bool start_stringval(upb_json_parser *p) { multipart_startaccum(p); return true; } - } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) { - /* No need to push a frame -- symbolic enum names in quotes remain in the - * current parser frame. - * - * Enum string values must accumulate so we can look up the value in a table - * once it is complete. */ + } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL && + upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) { + /* No need to push a frame -- numeric values in quotes remain in the + * current parser frame. These values must accmulate so we can convert + * them all at once at the end. */ multipart_startaccum(p); return true; } else { upb_status_seterrf(&p->status, - "String specified for non-string/non-enum field: %s", + "String specified for bool or submessage field: %s", upb_fielddef_name(p->top->f)); upb_env_reporterror(p->env, &p->status); return false; @@ -811,6 +865,15 @@ static bool end_stringval(upb_json_parser *p) { break; } + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: + case UPB_TYPE_DOUBLE: + case UPB_TYPE_FLOAT: + ok = parse_number(p, true); + break; + default: UPB_ASSERT(false); upb_status_seterrmsg(&p->status, "Internal error in JSON decoder"); @@ -854,7 +917,7 @@ static bool parse_mapentry_key(upb_json_parser *p) { case UPB_TYPE_UINT32: case UPB_TYPE_UINT64: /* Invoke end_number. The accum buffer has the number's text already. */ - if (!parse_number(p)) { + if (!parse_number(p, true)) { return false; } break; -- cgit v1.2.3