summaryrefslogtreecommitdiff
path: root/upb/json/parser.rl
diff options
context:
space:
mode:
Diffstat (limited to 'upb/json/parser.rl')
-rw-r--r--upb/json/parser.rl215
1 files changed, 141 insertions, 74 deletions
diff --git a/upb/json/parser.rl b/upb/json/parser.rl
index b7f1386..0312628 100644
--- a/upb/json/parser.rl
+++ b/upb/json/parser.rl
@@ -20,6 +20,8 @@
*/
#include <errno.h>
+#include <float.h>
+#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
@@ -605,103 +607,160 @@ static void start_number(upb_json_parser *p, const char *ptr) {
capture_begin(p, ptr);
}
-static bool parse_number(upb_json_parser *p);
+static bool parse_number(upb_json_parser *p, bool is_quoted);
static bool end_number(upb_json_parser *p, const char *ptr) {
if (!capture_end(p, ptr)) {
return false;
}
- return parse_number(p);
+ return parse_number(p, false);
}
-static bool parse_number(upb_json_parser *p) {
- size_t len;
- const char *buf;
- const char *myend;
+/* |buf| is NULL-terminated. |buf| itself will never include quotes;
+ * |is_quoted| tells us whether this text originally appeared inside quotes. */
+static bool parse_number_from_buffer(upb_json_parser *p, const char *buf,
+ bool is_quoted) {
+ size_t len = strlen(buf);
+ const char *bufend = buf + len;
char *end;
+ upb_fieldtype_t type = upb_fielddef_type(p->top->f);
+ double val;
+ double dummy;
+ double inf = 1.0 / 0.0; /* C89 does not have an INFINITY macro. */
- /* strtol() and friends unfortunately do not support specifying the length of
- * the input string, so we need to force a copy into a NULL-terminated buffer. */
- if (!multipart_text(p, "\0", 1, false)) {
+ errno = 0;
+
+ if (len == 0 || buf[0] == ' ') {
return false;
}
- buf = accumulate_getptr(p, &len);
- myend = buf + len - 1; /* One for NULL. */
-
- /* XXX: We are using strtol to parse integers, but this is wrong as even
- * integers can be represented as 1e6 (for example), which strtol can't
- * handle correctly.
- *
- * XXX: Also, we can't handle large integers properly because strto[u]ll
- * isn't in C89.
- *
- * XXX: Also, we don't properly check floats for overflow, since strtof
- * isn't in C89. */
- switch (upb_fielddef_type(p->top->f)) {
+ /* For integer types, first try parsing with integer-specific routines.
+ * If these succeed, they will be more accurate for int64/uint64 than
+ * strtod().
+ */
+ switch (type) {
case UPB_TYPE_ENUM:
case UPB_TYPE_INT32: {
- long val = strtol(p->accumulated, &end, 0);
- if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
- goto err;
- else
+ long val = strtol(buf, &end, 0);
+ if (errno == ERANGE || end != bufend) {
+ break;
+ } else if (val > INT32_MAX || val < INT32_MIN) {
+ return false;
+ } else {
upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
- break;
- }
- case UPB_TYPE_INT64: {
- long long val = strtol(p->accumulated, &end, 0);
- if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
- goto err;
- else
- upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
- break;
+ return true;
+ }
}
case UPB_TYPE_UINT32: {
- unsigned long val = strtoul(p->accumulated, &end, 0);
- if (val > UINT32_MAX || errno == ERANGE || end != myend)
- goto err;
- else
+ unsigned long val = strtoul(buf, &end, 0);
+ if (end != bufend) {
+ break;
+ } else if (val > UINT32_MAX || errno == ERANGE) {
+ return false;
+ } else {
upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
- break;
+ return true;
+ }
+ }
+ /* XXX: We can't handle [u]int64 properly on 32-bit machines because
+ * strto[u]ll isn't in C89. */
+ case UPB_TYPE_INT64: {
+ long val = strtol(buf, &end, 0);
+ if (errno == ERANGE || end != bufend) {
+ break;
+ } else {
+ upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
+ return true;
+ }
}
case UPB_TYPE_UINT64: {
- unsigned long long val = strtoul(p->accumulated, &end, 0);
- if (val > UINT64_MAX || errno == ERANGE || end != myend)
- goto err;
- else
+ unsigned long val = strtoul(p->accumulated, &end, 0);
+ if (end != bufend) {
+ break;
+ } else if (errno == ERANGE) {
+ return false;
+ } else {
upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
- break;
+ return true;
+ }
}
- case UPB_TYPE_DOUBLE: {
- double val = strtod(p->accumulated, &end);
- if (errno == ERANGE || end != myend)
- goto err;
- else
- upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
+ default:
break;
+ }
+
+ if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) {
+ /* Quoted numbers for integer types are not allowed to be in double form. */
+ return false;
+ }
+
+ if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) {
+ /* C89 does not have an INFINITY macro. */
+ val = inf;
+ } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) {
+ val = -inf;
+ } else {
+ val = strtod(buf, &end);
+ if (errno == ERANGE || end != bufend) {
+ return false;
}
- case UPB_TYPE_FLOAT: {
- float val = strtod(p->accumulated, &end);
- if (errno == ERANGE || end != myend)
- goto err;
- else
- upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
- break;
+ }
+
+ switch (type) {
+#define CASE(capitaltype, smalltype, ctype, min, max) \
+ case UPB_TYPE_ ## capitaltype: { \
+ if (modf(val, &dummy) != 0 || val > max || val < min) { \
+ return false; \
+ } else { \
+ upb_sink_put ## smalltype(&p->top->sink, parser_getsel(p), \
+ (ctype)val); \
+ return true; \
+ } \
+ break; \
}
+ case UPB_TYPE_ENUM:
+ CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX);
+ CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX);
+ CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX);
+ CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX);
+#undef CASE
+
+ case UPB_TYPE_DOUBLE:
+ upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
+ return true;
+ case UPB_TYPE_FLOAT:
+ if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) {
+ return false;
+ } else {
+ upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
+ return true;
+ }
default:
- UPB_ASSERT(false);
+ return false;
}
+}
- multipart_end(p);
+static bool parse_number(upb_json_parser *p, bool is_quoted) {
+ size_t len;
+ const char *buf;
- return true;
+ /* strtol() and friends unfortunately do not support specifying the length of
+ * the input string, so we need to force a copy into a NULL-terminated buffer. */
+ if (!multipart_text(p, "\0", 1, false)) {
+ return false;
+ }
-err:
- upb_status_seterrf(&p->status, "error parsing number: %s", buf);
- upb_env_reporterror(p->env, &p->status);
- multipart_end(p);
- return false;
+ buf = accumulate_getptr(p, &len);
+
+ if (parse_number_from_buffer(p, buf, is_quoted)) {
+ multipart_end(p);
+ return true;
+ } else {
+ upb_status_seterrf(&p->status, "error parsing number: %s", buf);
+ upb_env_reporterror(p->env, &p->status);
+ multipart_end(p);
+ return false;
+ }
}
static bool parser_putbool(upb_json_parser *p, bool val) {
@@ -754,17 +813,16 @@ static bool start_stringval(upb_json_parser *p) {
multipart_startaccum(p);
return true;
}
- } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
- /* No need to push a frame -- symbolic enum names in quotes remain in the
- * current parser frame.
- *
- * Enum string values must accumulate so we can look up the value in a table
- * once it is complete. */
+ } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL &&
+ upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) {
+ /* No need to push a frame -- numeric values in quotes remain in the
+ * current parser frame. These values must accmulate so we can convert
+ * them all at once at the end. */
multipart_startaccum(p);
return true;
} else {
upb_status_seterrf(&p->status,
- "String specified for non-string/non-enum field: %s",
+ "String specified for bool or submessage field: %s",
upb_fielddef_name(p->top->f));
upb_env_reporterror(p->env, &p->status);
return false;
@@ -811,6 +869,15 @@ static bool end_stringval(upb_json_parser *p) {
break;
}
+ case UPB_TYPE_INT32:
+ case UPB_TYPE_INT64:
+ case UPB_TYPE_UINT32:
+ case UPB_TYPE_UINT64:
+ case UPB_TYPE_DOUBLE:
+ case UPB_TYPE_FLOAT:
+ ok = parse_number(p, true);
+ break;
+
default:
UPB_ASSERT(false);
upb_status_seterrmsg(&p->status, "Internal error in JSON decoder");
@@ -854,7 +921,7 @@ static bool parse_mapentry_key(upb_json_parser *p) {
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64:
/* Invoke end_number. The accum buffer has the number's text already. */
- if (!parse_number(p)) {
+ if (!parse_number(p, true)) {
return false;
}
break;
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback