From 919fea438a5ac5366684cfa26d2bb3d17519cb60 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Mon, 18 May 2015 10:55:20 -0700 Subject: Ported upb to C89, for greater portability. A large part of this change contains surface-level porting, like moving variable declarations to the top of the block. However there are a few more substantial things too: - moved internal-only struct definitions to a separate file (structdefs.int.h), for greater encapsulation and ABI compatibility. - removed the UPB_UPCAST macro, since it requires access to the internal-only struct definitions. Replaced uses with calls to inline, type-safe casting functions. - removed the UPB_DEFINE_CLASS/UPB_DEFINE_STRUCT macros. Class and struct definitions are now more explicit -- you get to see the actual class/struct keywords in the source. The casting convenience functions have been moved into UPB_DECLARE_DERIVED_TYPE() and UPB_DECLARE_DERIVED_TYPE2(). - the new way that we duplicate base methods in derived types is also more convenient and requires less duplication. It is also less greppable, but hopefully that is not too big a problem. Compiler flags (-std=c89 -pedantic) should help to rigorously enforce that the code is free of C99-isms. A few functions are not available in C89 (strtoll). There are temporary, hacky solutions in place. --- upb/json/parser.c | 562 +++++++++++++++++++++++++++++------------------------- 1 file changed, 306 insertions(+), 256 deletions(-) (limited to 'upb/json/parser.c') diff --git a/upb/json/parser.c b/upb/json/parser.c index f4f5628..896f18f 100644 --- a/upb/json/parser.c +++ b/upb/json/parser.c @@ -38,26 +38,26 @@ typedef struct { upb_sink sink; - // The current message in which we're parsing, and the field whose value we're - // expecting next. + /* The current message in which we're parsing, and the field whose value we're + * expecting next. */ const upb_msgdef *m; const upb_fielddef *f; - // We are in a repeated-field context, ready to emit mapentries as - // submessages. This flag alters the start-of-object (open-brace) behavior to - // begin a sequence of mapentry messages rather than a single submessage. + /* We are in a repeated-field context, ready to emit mapentries as + * submessages. This flag alters the start-of-object (open-brace) behavior to + * begin a sequence of mapentry messages rather than a single submessage. */ bool is_map; - // We are in a map-entry message context. This flag is set when parsing the - // value field of a single map entry and indicates to all value-field parsers - // (subobjects, strings, numbers, and bools) that the map-entry submessage - // should end as soon as the value is parsed. + /* We are in a map-entry message context. This flag is set when parsing the + * value field of a single map entry and indicates to all value-field parsers + * (subobjects, strings, numbers, and bools) that the map-entry submessage + * should end as soon as the value is parsed. */ bool is_mapentry; - // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent - // message's map field that we're currently parsing. This differs from |f| - // because |f| is the field in the *current* message (i.e., the map-entry - // message itself), not the parent's field that leads to this map. + /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent + * message's map field that we're currently parsing. This differs from |f| + * because |f| is the field in the *current* message (i.e., the map-entry + * message itself), not the parent's field that leads to this map. */ const upb_fielddef *mapfield; } upb_jsonparser_frame; @@ -66,41 +66,41 @@ struct upb_json_parser { upb_byteshandler input_handler_; upb_bytessink input_; - // Stack to track the JSON scopes we are in. + /* Stack to track the JSON scopes we are in. */ upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH]; upb_jsonparser_frame *top; upb_jsonparser_frame *limit; upb_status *status; - // Ragel's internal parsing stack for the parsing state machine. + /* Ragel's internal parsing stack for the parsing state machine. */ int current_state; int parser_stack[UPB_JSON_MAX_DEPTH]; int parser_top; - // The handle for the current buffer. + /* The handle for the current buffer. */ const upb_bufhandle *handle; - // Accumulate buffer. See details in parser.rl. + /* Accumulate buffer. See details in parser.rl. */ const char *accumulated; size_t accumulated_len; char *accumulate_buf; size_t accumulate_buf_size; - // Multi-part text data. See details in parser.rl. + /* Multi-part text data. See details in parser.rl. */ int multipart_state; upb_selector_t string_selector; - // Input capture. See details in parser.rl. + /* Input capture. See details in parser.rl. */ const char *capture; - // Intermediate result of parsing a unicode escape sequence. + /* Intermediate result of parsing a unicode escape sequence. */ uint32_t digit; }; #define PARSER_CHECK_RETURN(x) if (!(x)) return false -// Used to signal that a capture has been suspended. +/* Used to signal that a capture has been suspended. */ static char suspend_capture; static upb_selector_t getsel_for_handlertype(upb_json_parser *p, @@ -125,8 +125,8 @@ static bool check_stack(upb_json_parser *p) { return true; } -// There are GCC/Clang built-ins for overflow checking which we could start -// using if there was any performance benefit to it. +/* There are GCC/Clang built-ins for overflow checking which we could start + * using if there was any performance benefit to it. */ static bool checked_add(size_t a, size_t b, size_t *c) { if (SIZE_MAX - a < b) return false; @@ -135,7 +135,7 @@ static bool checked_add(size_t a, size_t b, size_t *c) { } static size_t saturating_multiply(size_t a, size_t b) { - // size_t is unsigned, so this is defined behavior even on overflow. + /* size_t is unsigned, so this is defined behavior even on overflow. */ size_t ret = a * b; if (b != 0 && ret / b != a) { ret = SIZE_MAX; @@ -146,7 +146,7 @@ static size_t saturating_multiply(size_t a, size_t b) { /* Base64 decoding ************************************************************/ -// TODO(haberman): make this streaming. +/* TODO(haberman): make this streaming. */ static const signed char b64table[] = { -1, -1, -1, -1, -1, -1, -1, -1, @@ -183,19 +183,22 @@ static const signed char b64table[] = { -1, -1, -1, -1, -1, -1, -1, -1 }; -// Returns the table value sign-extended to 32 bits. Knowing that the upper -// bits will be 1 for unrecognized characters makes it easier to check for -// this error condition later (see below). +/* Returns the table value sign-extended to 32 bits. Knowing that the upper + * bits will be 1 for unrecognized characters makes it easier to check for + * this error condition later (see below). */ int32_t b64lookup(unsigned char ch) { return b64table[ch]; } -// Returns true if the given character is not a valid base64 character or -// padding. +/* Returns true if the given character is not a valid base64 character or + * padding. */ bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; } static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr, size_t len) { const char *limit = ptr + len; for (; ptr < limit; ptr += 4) { + uint32_t val; + char output[3]; + if (limit - ptr < 4) { upb_status_seterrf(p->status, "Base64 input for bytes field not a multiple of 4: %s", @@ -203,17 +206,16 @@ static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr, return false; } - uint32_t val = b64lookup(ptr[0]) << 18 | - b64lookup(ptr[1]) << 12 | - b64lookup(ptr[2]) << 6 | - b64lookup(ptr[3]); + val = b64lookup(ptr[0]) << 18 | + b64lookup(ptr[1]) << 12 | + b64lookup(ptr[2]) << 6 | + b64lookup(ptr[3]); - // Test the upper bit; returns true if any of the characters returned -1. + /* Test the upper bit; returns true if any of the characters returned -1. */ if (val & 0x80000000) { goto otherchar; } - char output[3]; output[0] = val >> 16; output[1] = (val >> 8) & 0xff; output[2] = val & 0xff; @@ -229,29 +231,34 @@ otherchar: upb_fielddef_name(p->top->f)); return false; } if (ptr[2] == '=') { - // Last group contains only two input bytes, one output byte. + uint32_t val; + char output; + + /* Last group contains only two input bytes, one output byte. */ if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') { goto badpadding; } - uint32_t val = b64lookup(ptr[0]) << 18 | - b64lookup(ptr[1]) << 12; + val = b64lookup(ptr[0]) << 18 | + b64lookup(ptr[1]) << 12; assert(!(val & 0x80000000)); - char output = val >> 16; + output = val >> 16; upb_sink_putstring(&p->top->sink, sel, &output, 1, NULL); return true; } else { - // Last group contains only three input bytes, two output bytes. + uint32_t val; + char output[2]; + + /* Last group contains only three input bytes, two output bytes. */ if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') { goto badpadding; } - uint32_t val = b64lookup(ptr[0]) << 18 | - b64lookup(ptr[1]) << 12 | - b64lookup(ptr[2]) << 6; + val = b64lookup(ptr[0]) << 18 | + b64lookup(ptr[1]) << 12 | + b64lookup(ptr[2]) << 6; - char output[2]; output[0] = val >> 16; output[1] = (val >> 8) & 0xff; upb_sink_putstring(&p->top->sink, sel, output, 2, NULL); @@ -269,23 +276,23 @@ badpadding: /* Accumulate buffer **********************************************************/ -// Functionality for accumulating a buffer. -// -// Some parts of the parser need an entire value as a contiguous string. For -// example, to look up a member name in a hash table, or to turn a string into -// a number, the relevant library routines need the input string to be in -// contiguous memory, even if the value spanned two or more buffers in the -// input. These routines handle that. -// -// In the common case we can just point to the input buffer to get this -// contiguous string and avoid any actual copy. So we optimistically begin -// this way. But there are a few cases where we must instead copy into a -// separate buffer: -// -// 1. The string was not contiguous in the input (it spanned buffers). -// -// 2. The string included escape sequences that need to be interpreted to get -// the true value in a contiguous buffer. +/* Functionality for accumulating a buffer. + * + * Some parts of the parser need an entire value as a contiguous string. For + * example, to look up a member name in a hash table, or to turn a string into + * a number, the relevant library routines need the input string to be in + * contiguous memory, even if the value spanned two or more buffers in the + * input. These routines handle that. + * + * In the common case we can just point to the input buffer to get this + * contiguous string and avoid any actual copy. So we optimistically begin + * this way. But there are a few cases where we must instead copy into a + * separate buffer: + * + * 1. The string was not contiguous in the input (it spanned buffers). + * + * 2. The string included escape sequences that need to be interpreted to get + * the true value in a contiguous buffer. */ static void assert_accumulate_empty(upb_json_parser *p) { UPB_UNUSED(p); @@ -298,15 +305,16 @@ static void accumulate_clear(upb_json_parser *p) { p->accumulated_len = 0; } -// Used internally by accumulate_append(). +/* Used internally by accumulate_append(). */ static bool accumulate_realloc(upb_json_parser *p, size_t need) { + void *mem; size_t old_size = p->accumulate_buf_size; size_t new_size = UPB_MAX(old_size, 128); while (new_size < need) { new_size = saturating_multiply(new_size, 2); } - void *mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size); + mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size); if (!mem) { upb_status_seterrmsg(p->status, "Out of memory allocating buffer."); return false; @@ -317,18 +325,19 @@ static bool accumulate_realloc(upb_json_parser *p, size_t need) { return true; } -// Logically appends the given data to the append buffer. -// If "can_alias" is true, we will try to avoid actually copying, but the buffer -// must be valid until the next accumulate_append() call (if any). +/* Logically appends the given data to the append buffer. + * If "can_alias" is true, we will try to avoid actually copying, but the buffer + * must be valid until the next accumulate_append() call (if any). */ static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len, bool can_alias) { + size_t need; + if (!p->accumulated && can_alias) { p->accumulated = buf; p->accumulated_len = len; return true; } - size_t need; if (!checked_add(p->accumulated_len, len, &need)) { upb_status_seterrmsg(p->status, "Integer overflow."); return false; @@ -348,9 +357,9 @@ static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len, return true; } -// Returns a pointer to the data accumulated since the last accumulate_clear() -// call, and writes the length to *len. This with point either to the input -// buffer or a temporary accumulate buffer. +/* Returns a pointer to the data accumulated since the last accumulate_clear() + * call, and writes the length to *len. This with point either to the input + * buffer or a temporary accumulate buffer. */ static const char *accumulate_getptr(upb_json_parser *p, size_t *len) { assert(p->accumulated); *len = p->accumulated_len; @@ -360,42 +369,42 @@ static const char *accumulate_getptr(upb_json_parser *p, size_t *len) { /* Mult-part text data ********************************************************/ -// When we have text data in the input, it can often come in multiple segments. -// For example, there may be some raw string data followed by an escape -// sequence. The two segments are processed with different logic. Also buffer -// seams in the input can cause multiple segments. -// -// As we see segments, there are two main cases for how we want to process them: -// -// 1. we want to push the captured input directly to string handlers. -// -// 2. we need to accumulate all the parts into a contiguous buffer for further -// processing (field name lookup, string->number conversion, etc). - -// This is the set of states for p->multipart_state. +/* When we have text data in the input, it can often come in multiple segments. + * For example, there may be some raw string data followed by an escape + * sequence. The two segments are processed with different logic. Also buffer + * seams in the input can cause multiple segments. + * + * As we see segments, there are two main cases for how we want to process them: + * + * 1. we want to push the captured input directly to string handlers. + * + * 2. we need to accumulate all the parts into a contiguous buffer for further + * processing (field name lookup, string->number conversion, etc). */ + +/* This is the set of states for p->multipart_state. */ enum { - // We are not currently processing multipart data. + /* We are not currently processing multipart data. */ MULTIPART_INACTIVE = 0, - // We are processing multipart data by accumulating it into a contiguous - // buffer. + /* We are processing multipart data by accumulating it into a contiguous + * buffer. */ MULTIPART_ACCUMULATE = 1, - // We are processing multipart data by pushing each part directly to the - // current string handlers. + /* We are processing multipart data by pushing each part directly to the + * current string handlers. */ MULTIPART_PUSHEAGERLY = 2 }; -// Start a multi-part text value where we accumulate the data for processing at -// the end. +/* Start a multi-part text value where we accumulate the data for processing at + * the end. */ static void multipart_startaccum(upb_json_parser *p) { assert_accumulate_empty(p); assert(p->multipart_state == MULTIPART_INACTIVE); p->multipart_state = MULTIPART_ACCUMULATE; } -// Start a multi-part text value where we immediately push text data to a string -// value with the given selector. +/* Start a multi-part text value where we immediately push text data to a string + * value with the given selector. */ static void multipart_start(upb_json_parser *p, upb_selector_t sel) { assert_accumulate_empty(p); assert(p->multipart_state == MULTIPART_INACTIVE); @@ -427,8 +436,8 @@ static bool multipart_text(upb_json_parser *p, const char *buf, size_t len, return true; } -// Note: this invalidates the accumulate buffer! Call only after reading its -// contents. +/* Note: this invalidates the accumulate buffer! Call only after reading its + * contents. */ static void multipart_end(upb_json_parser *p) { assert(p->multipart_state != MULTIPART_INACTIVE); p->multipart_state = MULTIPART_INACTIVE; @@ -438,9 +447,9 @@ static void multipart_end(upb_json_parser *p) { /* Input capture **************************************************************/ -// Functionality for capturing a region of the input as text. Gracefully -// handles the case where a buffer seam occurs in the middle of the captured -// region. +/* Functionality for capturing a region of the input as text. Gracefully + * handles the case where a buffer seam occurs in the middle of the captured + * region. */ static void capture_begin(upb_json_parser *p, const char *ptr) { assert(p->multipart_state != MULTIPART_INACTIVE); @@ -458,24 +467,24 @@ static bool capture_end(upb_json_parser *p, const char *ptr) { } } -// This is called at the end of each input buffer (ie. when we have hit a -// buffer seam). If we are in the middle of capturing the input, this -// processes the unprocessed capture region. +/* This is called at the end of each input buffer (ie. when we have hit a + * buffer seam). If we are in the middle of capturing the input, this + * processes the unprocessed capture region. */ static void capture_suspend(upb_json_parser *p, const char **ptr) { if (!p->capture) return; if (multipart_text(p, p->capture, *ptr - p->capture, false)) { - // We use this as a signal that we were in the middle of capturing, and - // that capturing should resume at the beginning of the next buffer. - // - // We can't use *ptr here, because we have no guarantee that this pointer - // will be valid when we resume (if the underlying memory is freed, then - // using the pointer at all, even to compare to NULL, is likely undefined - // behavior). + /* We use this as a signal that we were in the middle of capturing, and + * that capturing should resume at the beginning of the next buffer. + * + * We can't use *ptr here, because we have no guarantee that this pointer + * will be valid when we resume (if the underlying memory is freed, then + * using the pointer at all, even to compare to NULL, is likely undefined + * behavior). */ p->capture = &suspend_capture; } else { - // Need to back up the pointer to the beginning of the capture, since - // we were not able to actually preserve it. + /* Need to back up the pointer to the beginning of the capture, since + * we were not able to actually preserve it. */ *ptr = p->capture; } } @@ -490,8 +499,8 @@ static void capture_resume(upb_json_parser *p, const char *ptr) { /* Callbacks from the parser **************************************************/ -// These are the functions called directly from the parser itself. -// We define these in the same order as their declarations in the parser. +/* These are the functions called directly from the parser itself. + * We define these in the same order as their declarations in the parser. */ static char escape_char(char in) { switch (in) { @@ -536,8 +545,8 @@ static void hexdigit(upb_json_parser *p, const char *ptr) { static bool end_hex(upb_json_parser *p) { uint32_t codepoint = p->digit; - // emit the codepoint as UTF-8. - char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes. + /* emit the codepoint as UTF-8. */ + char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */ int length = 0; if (codepoint <= 0x7F) { utf8[0] = codepoint; @@ -555,8 +564,8 @@ static bool end_hex(upb_json_parser *p) { utf8[0] = (codepoint & 0x0F) | 0xE0; length = 3; } - // TODO(haberman): Handle high surrogates: if codepoint is a high surrogate - // we have to wait for the next escape to get the full code point). + /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate + * we have to wait for the next escape to get the full code point). */ return multipart_text(p, utf8, length, false); } @@ -585,17 +594,29 @@ static bool end_number(upb_json_parser *p, const char *ptr) { } static bool parse_number(upb_json_parser *p) { - // strtol() and friends unfortunately do not support specifying the length of - // the input string, so we need to force a copy into a NULL-terminated buffer. + size_t len; + const char *buf; + const char *myend; + char *end; + + /* strtol() and friends unfortunately do not support specifying the length of + * the input string, so we need to force a copy into a NULL-terminated buffer. */ if (!multipart_text(p, "\0", 1, false)) { return false; } - size_t len; - const char *buf = accumulate_getptr(p, &len); - const char *myend = buf + len - 1; // One for NULL. - - char *end; + buf = accumulate_getptr(p, &len); + myend = buf + len - 1; /* One for NULL. */ + + /* XXX: We are using strtol to parse integers, but this is wrong as even + * integers can be represented as 1e6 (for example), which strtol can't + * handle correctly. + * + * XXX: Also, we can't handle large integers properly because strto[u]ll + * isn't in C89. + * + * XXX: Also, we don't properly check floats for overflow, since strtof + * isn't in C89. */ switch (upb_fielddef_type(p->top->f)) { case UPB_TYPE_ENUM: case UPB_TYPE_INT32: { @@ -607,7 +628,7 @@ static bool parse_number(upb_json_parser *p) { break; } case UPB_TYPE_INT64: { - long long val = strtoll(p->accumulated, &end, 0); + long long val = strtol(p->accumulated, &end, 0); if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend) goto err; else @@ -623,7 +644,7 @@ static bool parse_number(upb_json_parser *p) { break; } case UPB_TYPE_UINT64: { - unsigned long long val = strtoull(p->accumulated, &end, 0); + unsigned long long val = strtoul(p->accumulated, &end, 0); if (val > UINT64_MAX || errno == ERANGE || end != myend) goto err; else @@ -639,7 +660,7 @@ static bool parse_number(upb_json_parser *p) { break; } case UPB_TYPE_FLOAT: { - float val = strtof(p->accumulated, &end); + float val = strtod(p->accumulated, &end); if (errno == ERANGE || end != myend) goto err; else @@ -661,6 +682,8 @@ err: } static bool parser_putbool(upb_json_parser *p, bool val) { + bool ok; + if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) { upb_status_seterrf(p->status, "Boolean value specified for non-bool field: %s", @@ -668,7 +691,7 @@ static bool parser_putbool(upb_json_parser *p, bool val) { return false; } - bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val); + ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val); UPB_ASSERT_VAR(ok, ok); return true; @@ -678,12 +701,15 @@ static bool start_stringval(upb_json_parser *p) { assert(p->top->f); if (upb_fielddef_isstring(p->top->f)) { + upb_jsonparser_frame *inner; + upb_selector_t sel; + if (!check_stack(p)) return false; - // Start a new parser frame: parser frames correspond one-to-one with - // handler frames, and string events occur in a sub-frame. - upb_jsonparser_frame *inner = p->top + 1; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); + /* Start a new parser frame: parser frames correspond one-to-one with + * handler frames, and string events occur in a sub-frame. */ + inner = p->top + 1; + sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); inner->m = p->top->m; inner->f = p->top->f; @@ -692,11 +718,11 @@ static bool start_stringval(upb_json_parser *p) { p->top = inner; if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) { - // For STRING fields we push data directly to the handlers as it is - // parsed. We don't do this yet for BYTES fields, because our base64 - // decoder is not streaming. - // - // TODO(haberman): make base64 decoding streaming also. + /* For STRING fields we push data directly to the handlers as it is + * parsed. We don't do this yet for BYTES fields, because our base64 + * decoder is not streaming. + * + * TODO(haberman): make base64 decoding streaming also. */ multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING)); return true; } else { @@ -704,11 +730,11 @@ static bool start_stringval(upb_json_parser *p) { return true; } } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) { - // No need to push a frame -- symbolic enum names in quotes remain in the - // current parser frame. - // - // Enum string values must accumulate so we can look up the value in a table - // once it is complete. + /* No need to push a frame -- symbolic enum names in quotes remain in the + * current parser frame. + * + * Enum string values must accumulate so we can look up the value in a table + * once it is complete. */ multipart_startaccum(p); return true; } else { @@ -728,7 +754,7 @@ static bool end_stringval(upb_json_parser *p) { p->accumulated, p->accumulated_len)) { return false; } - // Fall through. + /* Fall through. */ case UPB_TYPE_STRING: { upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); @@ -738,7 +764,7 @@ static bool end_stringval(upb_json_parser *p) { } case UPB_TYPE_ENUM: { - // Resolve enum symbolic name to integer value. + /* Resolve enum symbolic name to integer value. */ const upb_enumdef *enumdef = (const upb_enumdef*)upb_fielddef_subdef(p->top->f); @@ -775,18 +801,18 @@ static void start_member(upb_json_parser *p) { multipart_startaccum(p); } -// Helper: invoked during parse_mapentry() to emit the mapentry message's key -// field based on the current contents of the accumulate buffer. +/* Helper: invoked during parse_mapentry() to emit the mapentry message's key + * field based on the current contents of the accumulate buffer. */ static bool parse_mapentry_key(upb_json_parser *p) { size_t len; const char *buf = accumulate_getptr(p, &len); - // Emit the key field. We do a bit of ad-hoc parsing here because the - // parser state machine has already decided that this is a string field - // name, and we are reinterpreting it as some arbitrary key type. In - // particular, integer and bool keys are quoted, so we need to parse the - // quoted string contents here. + /* Emit the key field. We do a bit of ad-hoc parsing here because the + * parser state machine has already decided that this is a string field + * name, and we are reinterpreting it as some arbitrary key type. In + * particular, integer and bool keys are quoted, so we need to parse the + * quoted string contents here. */ p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY); if (p->top->f == NULL) { @@ -798,7 +824,7 @@ static bool parse_mapentry_key(upb_json_parser *p) { case UPB_TYPE_INT64: case UPB_TYPE_UINT32: case UPB_TYPE_UINT64: - // Invoke end_number. The accum buffer has the number's text already. + /* Invoke end_number. The accum buffer has the number's text already. */ if (!parse_number(p)) { return false; } @@ -839,47 +865,52 @@ static bool parse_mapentry_key(upb_json_parser *p) { return true; } -// Helper: emit one map entry (as a submessage in the map field sequence). This -// is invoked from end_membername(), at the end of the map entry's key string, -// with the map key in the accumulate buffer. It parses the key from that -// buffer, emits the handler calls to start the mapentry submessage (setting up -// its subframe in the process), and sets up state in the subframe so that the -// value parser (invoked next) will emit the mapentry's value field and then -// end the mapentry message. +/* Helper: emit one map entry (as a submessage in the map field sequence). This + * is invoked from end_membername(), at the end of the map entry's key string, + * with the map key in the accumulate buffer. It parses the key from that + * buffer, emits the handler calls to start the mapentry submessage (setting up + * its subframe in the process), and sets up state in the subframe so that the + * value parser (invoked next) will emit the mapentry's value field and then + * end the mapentry message. */ static bool handle_mapentry(upb_json_parser *p) { - // Map entry: p->top->sink is the seq frame, so we need to start a frame - // for the mapentry itself, and then set |f| in that frame so that the map - // value field is parsed, and also set a flag to end the frame after the - // map-entry value is parsed. + const upb_fielddef *mapfield; + const upb_msgdef *mapentrymsg; + upb_jsonparser_frame *inner; + upb_selector_t sel; + + /* Map entry: p->top->sink is the seq frame, so we need to start a frame + * for the mapentry itself, and then set |f| in that frame so that the map + * value field is parsed, and also set a flag to end the frame after the + * map-entry value is parsed. */ if (!check_stack(p)) return false; - const upb_fielddef *mapfield = p->top->mapfield; - const upb_msgdef *mapentrymsg = upb_fielddef_msgsubdef(mapfield); + mapfield = p->top->mapfield; + mapentrymsg = upb_fielddef_msgsubdef(mapfield); - upb_jsonparser_frame *inner = p->top + 1; + inner = p->top + 1; p->top->f = mapfield; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); + sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); inner->m = mapentrymsg; inner->mapfield = mapfield; inner->is_map = false; - // Don't set this to true *yet* -- we reuse parsing handlers below to push - // the key field value to the sink, and these handlers will pop the frame - // if they see is_mapentry (when invoked by the parser state machine, they - // would have just seen the map-entry value, not key). + /* Don't set this to true *yet* -- we reuse parsing handlers below to push + * the key field value to the sink, and these handlers will pop the frame + * if they see is_mapentry (when invoked by the parser state machine, they + * would have just seen the map-entry value, not key). */ inner->is_mapentry = false; p->top = inner; - // send STARTMSG in submsg frame. + /* send STARTMSG in submsg frame. */ upb_sink_startmsg(&p->top->sink); parse_mapentry_key(p); - // Set up the value field to receive the map-entry value. + /* Set up the value field to receive the map-entry value. */ p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE); - p->top->is_mapentry = true; // set up to pop frame after value is parsed. + p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */ p->top->mapfield = mapfield; if (p->top->f == NULL) { upb_status_seterrmsg(p->status, "mapentry message has no value"); @@ -900,7 +931,8 @@ static bool end_membername(upb_json_parser *p) { const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); if (!f) { - // TODO(haberman): Ignore unknown fields if requested/configured to do so. + /* TODO(haberman): Ignore unknown fields if requested/configured to do + * so. */ upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); return false; } @@ -913,19 +945,21 @@ static bool end_membername(upb_json_parser *p) { } static void end_member(upb_json_parser *p) { - // If we just parsed a map-entry value, end that frame too. + /* If we just parsed a map-entry value, end that frame too. */ if (p->top->is_mapentry) { - assert(p->top > p->stack); - // send ENDMSG on submsg. upb_status s = UPB_STATUS_INIT; + upb_selector_t sel; + bool ok; + const upb_fielddef *mapfield; + + assert(p->top > p->stack); + /* send ENDMSG on submsg. */ upb_sink_endmsg(&p->top->sink, &s); - const upb_fielddef* mapfield = p->top->mapfield; + mapfield = p->top->mapfield; - // send ENDSUBMSG in repeated-field-of-mapentries frame. + /* send ENDSUBMSG in repeated-field-of-mapentries frame. */ p->top--; - upb_selector_t sel; - bool ok = upb_handlers_getselector(mapfield, - UPB_HANDLER_ENDSUBMSG, &sel); + ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel); UPB_ASSERT_VAR(ok, ok); upb_sink_endsubmsg(&p->top->sink, sel); } @@ -937,12 +971,15 @@ static bool start_subobject(upb_json_parser *p) { assert(p->top->f); if (upb_fielddef_ismap(p->top->f)) { - // Beginning of a map. Start a new parser frame in a repeated-field - // context. + upb_jsonparser_frame *inner; + upb_selector_t sel; + + /* Beginning of a map. Start a new parser frame in a repeated-field + * context. */ if (!check_stack(p)) return false; - upb_jsonparser_frame *inner = p->top + 1; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); + inner = p->top + 1; + sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); upb_sink_startseq(&p->top->sink, sel, &inner->sink); inner->m = upb_fielddef_msgsubdef(p->top->f); inner->mapfield = p->top->f; @@ -953,13 +990,16 @@ static bool start_subobject(upb_json_parser *p) { return true; } else if (upb_fielddef_issubmsg(p->top->f)) { - // Beginning of a subobject. Start a new parser frame in the submsg - // context. + upb_jsonparser_frame *inner; + upb_selector_t sel; + + /* Beginning of a subobject. Start a new parser frame in the submsg + * context. */ if (!check_stack(p)) return false; - upb_jsonparser_frame *inner = p->top + 1; + inner = p->top + 1; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); + sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); inner->m = upb_fielddef_msgsubdef(p->top->f); inner->f = NULL; @@ -978,17 +1018,22 @@ static bool start_subobject(upb_json_parser *p) { static void end_subobject(upb_json_parser *p) { if (p->top->is_map) { + upb_selector_t sel; p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); + sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); upb_sink_endseq(&p->top->sink, sel); } else { + upb_selector_t sel; p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); + sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); upb_sink_endsubmsg(&p->top->sink, sel); } } static bool start_array(upb_json_parser *p) { + upb_jsonparser_frame *inner; + upb_selector_t sel; + assert(p->top->f); if (!upb_fielddef_isseq(p->top->f)) { @@ -1000,8 +1045,8 @@ static bool start_array(upb_json_parser *p) { if (!check_stack(p)) return false; - upb_jsonparser_frame *inner = p->top + 1; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); + inner = p->top + 1; + sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); upb_sink_startseq(&p->top->sink, sel, &inner->sink); inner->m = p->top->m; inner->f = p->top->f; @@ -1013,10 +1058,12 @@ static bool start_array(upb_json_parser *p) { } static void end_array(upb_json_parser *p) { + upb_selector_t sel; + assert(p->top > p->stack); p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); + sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); upb_sink_endseq(&p->top->sink, sel); } @@ -1039,27 +1086,27 @@ static void end_object(upb_json_parser *p) { /* The actual parser **********************************************************/ -// What follows is the Ragel parser itself. The language is specified in Ragel -// and the actions call our C functions above. -// -// Ragel has an extensive set of functionality, and we use only a small part of -// it. There are many action types but we only use a few: -// -// ">" -- transition into a machine -// "%" -- transition out of a machine -// "@" -- transition into a final state of a machine. -// -// "@" transitions are tricky because a machine can transition into a final -// state repeatedly. But in some cases we know this can't happen, for example -// a string which is delimited by a final '"' can only transition into its -// final state once, when the closing '"' is seen. +/* What follows is the Ragel parser itself. The language is specified in Ragel + * and the actions call our C functions above. + * + * Ragel has an extensive set of functionality, and we use only a small part of + * it. There are many action types but we only use a few: + * + * ">" -- transition into a machine + * "%" -- transition out of a machine + * "@" -- transition into a final state of a machine. + * + * "@" transitions are tricky because a machine can transition into a final + * state repeatedly. But in some cases we know this can't happen, for example + * a string which is delimited by a final '"' can only transition into its + * final state once, when the closing '"' is seen. */ -#line 1151 "upb/json/parser.rl" +#line 1198 "upb/json/parser.rl" -#line 1063 "upb/json/parser.c" +#line 1110 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 2, 1, 3, 1, 5, 1, 6, 1, 7, 1, 8, 1, @@ -1208,16 +1255,13 @@ static const int json_en_value_machine = 27; static const int json_en_main = 1; -#line 1154 "upb/json/parser.rl" +#line 1201 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { - UPB_UNUSED(hd); - UPB_UNUSED(handle); upb_json_parser *parser = closure; - parser->handle = handle; - // Variables used by Ragel's generated code. + /* Variables used by Ragel's generated code. */ int cs = parser->current_state; int *stack = parser->parser_stack; int top = parser->parser_top; @@ -1225,10 +1269,15 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, const char *p = buf; const char *pe = buf + size; + parser->handle = handle; + + UPB_UNUSED(hd); + UPB_UNUSED(handle); + capture_resume(parser, buf); -#line 1232 "upb/json/parser.c" +#line 1281 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -1303,118 +1352,118 @@ _match: switch ( *_acts++ ) { case 0: -#line 1066 "upb/json/parser.rl" +#line 1113 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 1: -#line 1067 "upb/json/parser.rl" +#line 1114 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 10; goto _again;} } break; case 2: -#line 1071 "upb/json/parser.rl" +#line 1118 "upb/json/parser.rl" { start_text(parser, p); } break; case 3: -#line 1072 "upb/json/parser.rl" +#line 1119 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_text(parser, p)); } break; case 4: -#line 1078 "upb/json/parser.rl" +#line 1125 "upb/json/parser.rl" { start_hex(parser); } break; case 5: -#line 1079 "upb/json/parser.rl" +#line 1126 "upb/json/parser.rl" { hexdigit(parser, p); } break; case 6: -#line 1080 "upb/json/parser.rl" +#line 1127 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_hex(parser)); } break; case 7: -#line 1086 "upb/json/parser.rl" +#line 1133 "upb/json/parser.rl" { CHECK_RETURN_TOP(escape(parser, p)); } break; case 8: -#line 1092 "upb/json/parser.rl" +#line 1139 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 9: -#line 1095 "upb/json/parser.rl" +#line 1142 "upb/json/parser.rl" { {stack[top++] = cs; cs = 19; goto _again;} } break; case 10: -#line 1097 "upb/json/parser.rl" +#line 1144 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 27; goto _again;} } break; case 11: -#line 1102 "upb/json/parser.rl" +#line 1149 "upb/json/parser.rl" { start_member(parser); } break; case 12: -#line 1103 "upb/json/parser.rl" +#line 1150 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_membername(parser)); } break; case 13: -#line 1106 "upb/json/parser.rl" +#line 1153 "upb/json/parser.rl" { end_member(parser); } break; case 14: -#line 1112 "upb/json/parser.rl" +#line 1159 "upb/json/parser.rl" { start_object(parser); } break; case 15: -#line 1115 "upb/json/parser.rl" +#line 1162 "upb/json/parser.rl" { end_object(parser); } break; case 16: -#line 1121 "upb/json/parser.rl" +#line 1168 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_array(parser)); } break; case 17: -#line 1125 "upb/json/parser.rl" +#line 1172 "upb/json/parser.rl" { end_array(parser); } break; case 18: -#line 1130 "upb/json/parser.rl" +#line 1177 "upb/json/parser.rl" { start_number(parser, p); } break; case 19: -#line 1131 "upb/json/parser.rl" +#line 1178 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 20: -#line 1133 "upb/json/parser.rl" +#line 1180 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 21: -#line 1134 "upb/json/parser.rl" +#line 1181 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_stringval(parser)); } break; case 22: -#line 1136 "upb/json/parser.rl" +#line 1183 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, true)); } break; case 23: -#line 1138 "upb/json/parser.rl" +#line 1185 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, false)); } break; case 24: -#line 1140 "upb/json/parser.rl" +#line 1187 "upb/json/parser.rl" { /* null value */ } break; case 25: -#line 1142 "upb/json/parser.rl" +#line 1189 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_subobject(parser)); } break; case 26: -#line 1143 "upb/json/parser.rl" +#line 1190 "upb/json/parser.rl" { end_subobject(parser); } break; case 27: -#line 1148 "upb/json/parser.rl" +#line 1195 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 1418 "upb/json/parser.c" +#line 1467 "upb/json/parser.c" } } @@ -1427,7 +1476,7 @@ _again: _out: {} } -#line 1173 "upb/json/parser.rl" +#line 1222 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(parser->status, "Parse error at %s\n", p); @@ -1436,7 +1485,7 @@ _again: } error: - // Save parsing state back to parser. + /* Save parsing state back to parser. */ parser->current_state = cs; parser->parser_top = top; @@ -1447,7 +1496,7 @@ bool end(void *closure, const void *hd) { UPB_UNUSED(closure); UPB_UNUSED(hd); - // Prevent compile warning on unused static constants. + /* Prevent compile warning on unused static constants. */ UPB_UNUSED(json_start); UPB_UNUSED(json_en_number_machine); UPB_UNUSED(json_en_string_machine); @@ -1457,22 +1506,23 @@ bool end(void *closure, const void *hd) { } static void json_parser_reset(upb_json_parser *p) { + int cs; + int top; + p->top = p->stack; p->top->f = NULL; p->top->is_map = false; p->top->is_mapentry = false; - int cs; - int top; - // Emit Ragel initialization of the parser. + /* Emit Ragel initialization of the parser. */ -#line 1470 "upb/json/parser.c" +#line 1520 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 1211 "upb/json/parser.rl" +#line 1261 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; accumulate_clear(p); @@ -1504,8 +1554,8 @@ upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) { upb_sink_reset(&p->top->sink, output->handlers, output->closure); p->top->m = upb_handlers_msgdef(output->handlers); - // If this fails, uncomment and increase the value in parser.h. - // fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); + /* If this fails, uncomment and increase the value in parser.h. + * fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */ assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE); return p; } -- cgit v1.2.3