diff options
author | Joshua Haberman <jhaberman@gmail.com> | 2015-05-08 17:30:22 -0700 |
---|---|---|
committer | Joshua Haberman <jhaberman@gmail.com> | 2015-05-08 17:30:22 -0700 |
commit | ccc0fd0dbbcebb43f4d85d7df1439e1fc7993bf8 (patch) | |
tree | da3cbc97eed1eb70af5e0f3a687ff37ad239d119 /upb/json/parser.rl | |
parent | bd7ea8c6f1854aa37b7792c6f23334ffc0fd94ff (diff) | |
parent | 838009ba2b8ea1e99061c66e0fbd9cb53a96ec20 (diff) |
Merge pull request #18 from haberman/google-internal
Sync from Google-internal development.
Diffstat (limited to 'upb/json/parser.rl')
-rw-r--r-- | upb/json/parser.rl | 124 |
1 files changed, 97 insertions, 27 deletions
diff --git a/upb/json/parser.rl b/upb/json/parser.rl index b171617..81d1514 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -31,6 +31,71 @@ #include "upb/json/parser.h" +#define UPB_JSON_MAX_DEPTH 64 + +typedef struct { + upb_sink sink; + + // The current message in which we're parsing, and the field whose value we're + // expecting next. + const upb_msgdef *m; + const upb_fielddef *f; + + // We are in a repeated-field context, ready to emit mapentries as + // submessages. This flag alters the start-of-object (open-brace) behavior to + // begin a sequence of mapentry messages rather than a single submessage. + bool is_map; + + // We are in a map-entry message context. This flag is set when parsing the + // value field of a single map entry and indicates to all value-field parsers + // (subobjects, strings, numbers, and bools) that the map-entry submessage + // should end as soon as the value is parsed. + bool is_mapentry; + + // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent + // message's map field that we're currently parsing. This differs from |f| + // because |f| is the field in the *current* message (i.e., the map-entry + // message itself), not the parent's field that leads to this map. + const upb_fielddef *mapfield; +} upb_jsonparser_frame; + +struct upb_json_parser { + upb_env *env; + upb_byteshandler input_handler_; + upb_bytessink input_; + + // Stack to track the JSON scopes we are in. + upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH]; + upb_jsonparser_frame *top; + upb_jsonparser_frame *limit; + + upb_status *status; + + // Ragel's internal parsing stack for the parsing state machine. + int current_state; + int parser_stack[UPB_JSON_MAX_DEPTH]; + int parser_top; + + // The handle for the current buffer. + const upb_bufhandle *handle; + + // Accumulate buffer. See details in parser.rl. + const char *accumulated; + size_t accumulated_len; + char *accumulate_buf; + size_t accumulate_buf_size; + + // Multi-part text data. See details in parser.rl. + int multipart_state; + upb_selector_t string_selector; + + // Input capture. See details in parser.rl. + const char *capture; + + // Intermediate result of parsing a unicode escape sequence. + uint32_t digit; +}; + #define PARSER_CHECK_RETURN(x) if (!(x)) return false // Used to signal that a capture has been suspended. @@ -233,12 +298,13 @@ static void accumulate_clear(upb_json_parser *p) { // Used internally by accumulate_append(). static bool accumulate_realloc(upb_json_parser *p, size_t need) { - size_t new_size = UPB_MAX(p->accumulate_buf_size, 128); + size_t old_size = p->accumulate_buf_size; + size_t new_size = UPB_MAX(old_size, 128); while (new_size < need) { new_size = saturating_multiply(new_size, 2); } - void *mem = realloc(p->accumulate_buf, new_size); + void *mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size); if (!mem) { upb_status_seterrmsg(p->status, "Out of memory allocating buffer."); return false; @@ -1132,26 +1198,7 @@ bool end(void *closure, const void *hd) { return true; } - -/* Public API *****************************************************************/ - -void upb_json_parser_init(upb_json_parser *p, upb_status *status) { - p->limit = p->stack + UPB_JSON_MAX_DEPTH; - p->accumulate_buf = NULL; - p->accumulate_buf_size = 0; - upb_byteshandler_init(&p->input_handler_); - upb_byteshandler_setstring(&p->input_handler_, parse, NULL); - upb_byteshandler_setendstr(&p->input_handler_, end, NULL); - upb_bytessink_reset(&p->input_, &p->input_handler_, p); - p->status = status; -} - -void upb_json_parser_uninit(upb_json_parser *p) { - upb_byteshandler_uninit(&p->input_handler_); - free(p->accumulate_buf); -} - -void upb_json_parser_reset(upb_json_parser *p) { +static void json_parser_reset(upb_json_parser *p) { p->top = p->stack; p->top->f = NULL; p->top->is_map = false; @@ -1166,13 +1213,36 @@ void upb_json_parser_reset(upb_json_parser *p) { accumulate_clear(p); p->multipart_state = MULTIPART_INACTIVE; p->capture = NULL; + p->accumulated = NULL; } -void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) { - upb_json_parser_reset(p); - upb_sink_reset(&p->top->sink, sink->handlers, sink->closure); - p->top->m = upb_handlers_msgdef(sink->handlers); - p->accumulated = NULL; + +/* Public API *****************************************************************/ + +upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) { +#ifndef NDEBUG + const size_t size_before = upb_env_bytesallocated(env); +#endif + upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser)); + if (!p) return false; + + p->env = env; + p->limit = p->stack + UPB_JSON_MAX_DEPTH; + p->accumulate_buf = NULL; + p->accumulate_buf_size = 0; + upb_byteshandler_init(&p->input_handler_); + upb_byteshandler_setstring(&p->input_handler_, parse, NULL); + upb_byteshandler_setendstr(&p->input_handler_, end, NULL); + upb_bytessink_reset(&p->input_, &p->input_handler_, p); + + json_parser_reset(p); + upb_sink_reset(&p->top->sink, output->handlers, output->closure); + p->top->m = upb_handlers_msgdef(output->handlers); + + // If this fails, uncomment and increase the value in parser.h. + // fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); + assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE); + return p; } upb_bytessink *upb_json_parser_input(upb_json_parser *p) { |