From abcb6428ad9bf7d650455a0a180647a05183fd9d Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Thu, 30 Jul 2015 14:54:03 -0700 Subject: Changed parser semantics around skipping. Prior to this change: parse(buf, len) -> len + N ...would indicate that the next N bytes of the input are not needed, *and* would advance the decoding position by this much. After this change: parse(buf, len) -> len + N parse(NULL, N) -> N ...can be used to achieve the same thing. But skipping the N bytes is not explicitly performed by the user. A user that doesn't want/need to skip can just say: parsed = parse(buf, len); if (parsed < len) { // Handle suspend, advance stream by "parsed". } else { // Stream was advanced by "len" (even if parsed > len). } Updated unit tests to test this new behavior, and refactored test utility code a bit to support it. --- tests/json/test_json.cc | 37 ++++------- tests/pb/test_decoder.cc | 168 +++++++++++++++++++++++------------------------ tests/test_util.h | 135 +++++++++++++++++++++++++++---------- upb/pb/decoder.c | 155 +++++++++++++++++++++++++------------------ upb/pb/decoder.int.h | 6 ++ upb/sink.h | 2 +- 6 files changed, 294 insertions(+), 209 deletions(-) diff --git a/tests/json/test_json.cc b/tests/json/test_json.cc index 8dd4062..c483cf0 100644 --- a/tests/json/test_json.cc +++ b/tests/json/test_json.cc @@ -288,33 +288,22 @@ void test_json_roundtrip_message(const char* json_src, const char* json_expected, const upb::Handlers* serialize_handlers, int seam) { - upb::Status st; - upb::Environment env; - env.ReportErrorsTo(&st); + VerboseParserEnvironment env(verbose); StringSink data_sink; - upb::json::Printer* printer = - upb::json::Printer::Create(&env, serialize_handlers, data_sink.Sink()); - upb::json::Parser* parser = upb::json::Parser::Create(&env, printer->input()); - - upb::BytesSink* input = parser->input(); - void *sub; - size_t len = strlen(json_src); - size_t ofs = 0; - - bool ok = input->Start(0, &sub) && - parse_buffer(input, sub, json_src, 0, seam, &ofs, &st, verbose) && - parse_buffer(input, sub, json_src, seam, len, &ofs, &st, verbose) && - ofs == len; - - if (ok) { - if (verbose) { - fprintf(stderr, "calling end()\n"); - } - ok = input->End(); - } + upb::json::Printer* printer = upb::json::Printer::Create( + env.env(), serialize_handlers, data_sink.Sink()); + upb::json::Parser* parser = + upb::json::Parser::Create(env.env(), printer->input()); + env.ResetBytesSink(parser->input()); + env.Reset(json_src, strlen(json_src), false); + + bool ok = env.Start() && + env.ParseBuffer(seam) && + env.ParseBuffer(-1) && + env.End(); if (!ok) { - fprintf(stderr, "upb parse error: %s\n", st.error_message()); + fprintf(stderr, "upb parse error: %s\n", env.status().error_message()); } ASSERT(ok); diff --git a/tests/pb/test_decoder.cc b/tests/pb/test_decoder.cc index 6bb53fc..310f8c1 100644 --- a/tests/pb/test_decoder.cc +++ b/tests/pb/test_decoder.cc @@ -63,6 +63,8 @@ #define MAX_NESTING 64 +#define LINE(x) x "\n" + uint32_t filter_hash = 0; double completed; double total; @@ -517,12 +519,13 @@ upb::pb::Decoder* CreateDecoder(upb::Environment* env, } uint32_t Hash(const string& proto, const string* expected_output, size_t seam1, - size_t seam2) { + size_t seam2, bool may_skip) { uint32_t hash = MurmurHash2(proto.c_str(), proto.size(), 0); if (expected_output) hash = MurmurHash2(expected_output->c_str(), expected_output->size(), hash); hash = MurmurHash2(&seam1, sizeof(seam1), hash); hash = MurmurHash2(&seam2, sizeof(seam2), hash); + hash = MurmurHash2(&may_skip, sizeof(may_skip), hash); return hash; } @@ -538,91 +541,88 @@ void CheckBytesParsed(const upb::pb::Decoder& decoder, size_t ofs) { ASSERT(ofs <= (decoder.BytesParsed() + MAX_BUFFERED)); } -static bool parse(upb::pb::Decoder* decoder, void* subc, const char* buf, - size_t start, size_t end, size_t* ofs, upb::Status* status) { - CheckBytesParsed(*decoder, *ofs); - bool ret = parse_buffer(decoder->input(), subc, buf, start, end, ofs, status, - filter_hash != 0); +static bool parse(VerboseParserEnvironment* env, + const upb::pb::Decoder& decoder, int bytes) { + CheckBytesParsed(decoder, env->ofs()); + bool ret = env->ParseBuffer(bytes); if (ret) { - CheckBytesParsed(*decoder, *ofs); + CheckBytesParsed(decoder, env->ofs()); } return ret; } -#define LINE(x) x "\n" -void run_decoder(const string& proto, const string* expected_output) { - upb::Status status; - upb::Sink sink(global_handlers, &closures[0]); - for (size_t i = 0; i < proto.size(); i++) { - for (size_t j = i; j < UPB_MIN(proto.size(), i + 5); j++) { - // TODO(haberman): hoist this again once the environment supports reset. - upb::Environment env; - env.ReportErrorsTo(&status); - upb::pb::Decoder *decoder = CreateDecoder(&env, global_method, &sink); - - testhash = Hash(proto, expected_output, i, j); - if (filter_hash && testhash != filter_hash) continue; - if (test_mode != COUNT_ONLY) { - output.clear(); - status.Clear(); - size_t ofs = 0; - upb::BytesSink* input = decoder->input(); - void *sub; - - if (filter_hash) { - fprintf(stderr, "RUNNING TEST CASE, hash=%x\n", testhash); - fprintf(stderr, "JIT on: %s\n", - global_method->is_native() ? "true" : "false"); - fprintf(stderr, "Input (len=%u): ", (unsigned)proto.size()); - PrintBinary(proto); - fprintf(stderr, "\n"); - if (expected_output) { - fprintf(stderr, "Expected output: %s\n", expected_output->c_str()); - } else { - fprintf(stderr, "Expected to FAIL\n"); - } - fprintf(stderr, "Calling start()\n"); - } +void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder, + const string& proto, const string* expected_output, + size_t i, size_t j, bool may_skip) { + env->Reset(proto.c_str(), proto.size(), may_skip); + decoder->Reset(); - bool ok = input->Start(proto.size(), &sub) && - parse(decoder, sub, proto.c_str(), 0, i, &ofs, &status) && - parse(decoder, sub, proto.c_str(), i, j, &ofs, &status) && - parse(decoder, sub, proto.c_str(), j, proto.size(), &ofs, - &status) && - ofs == proto.size(); + testhash = Hash(proto, expected_output, i, j, may_skip); + if (filter_hash && testhash != filter_hash) return; + if (test_mode != COUNT_ONLY) { + output.clear(); + + if (filter_hash) { + fprintf(stderr, "RUNNING TEST CASE, hash=%x\n", testhash); + fprintf(stderr, "JIT on: %s\n", + global_method->is_native() ? "true" : "false"); + fprintf(stderr, "Input (len=%u): ", (unsigned)proto.size()); + PrintBinary(proto); + fprintf(stderr, "\n"); + if (expected_output) { + fprintf(stderr, "Expected output: %s\n", expected_output->c_str()); + } else { + fprintf(stderr, "Expected to FAIL\n"); + } + fprintf(stderr, "Calling start()\n"); + } + bool ok = env->Start() && + parse(env, *decoder, i) && + parse(env, *decoder, j - i) && + parse(env, *decoder, -1) && + env->End(); + + ASSERT(ok == env->status().ok()); + + if (test_mode == ALL_HANDLERS) { + if (expected_output) { + if (output != *expected_output) { + fprintf(stderr, "Text mismatch: '%s' vs '%s'\n", + output.c_str(), expected_output->c_str()); + } + if (!ok) { + fprintf(stderr, "Failed: %s\n", env->status().error_message()); + } + ASSERT(ok); + ASSERT(output == *expected_output); + } else { if (ok) { - if (filter_hash) { - fprintf(stderr, "calling end()\n"); - } - ok = input->End(); + fprintf(stderr, "Didn't expect ok result, but got output: '%s'\n", + output.c_str()); + } else if (filter_hash) { + fprintf(stderr, "Failed as we expected, with message: %s\n", + env->status().error_message()); } + ASSERT(!ok); + } + } + } + (*count)++; +} - if (test_mode == ALL_HANDLERS) { - if (expected_output) { - if (output != *expected_output) { - fprintf(stderr, "Text mismatch: '%s' vs '%s'\n", - output.c_str(), expected_output->c_str()); - } - if (!ok) { - fprintf(stderr, "Failed: %s\n", status.error_message()); - } - ASSERT(ok); - ASSERT(output == *expected_output); - } else { - if (ok) { - fprintf(stderr, "Didn't expect ok result, but got output: '%s'\n", - output.c_str()); - } else if (filter_hash) { - fprintf(stderr, "Failed as we expected, with message: %s\n", - status.error_message()); - } - ASSERT(!ok); - } - } +void run_decoder(const string& proto, const string* expected_output) { + VerboseParserEnvironment env(filter_hash != 0); + upb::Sink sink(global_handlers, &closures[0]); + upb::pb::Decoder *decoder = CreateDecoder(env.env(), global_method, &sink); + env.ResetBytesSink(decoder->input()); + for (size_t i = 0; i < proto.size(); i++) { + for (size_t j = i; j < UPB_MIN(proto.size(), i + 5); j++) { + do_run_decoder(&env, decoder, proto, expected_output, i, j, true); + if (env.SkippedWithNull()) { + do_run_decoder(&env, decoder, proto, expected_output, i, j, false); } - (*count)++; } } testhash = 0; @@ -1146,20 +1146,14 @@ upb::reffed_ptr method = { NULL, 0 }, }; for (int i = 0; testdata[i].data; i++) { - upb::Environment env; - upb::Status status; - env.ReportErrorsTo(&status); + VerboseParserEnvironment env(filter_hash != 0); upb::Sink sink(method->dest_handlers(), &closures[0]); - upb::pb::Decoder* decoder = CreateDecoder(&env, method.get(), &sink); - upb::BytesSink* input = decoder->input(); - void* subc; - ASSERT(input->Start(0, &subc)); - size_t ofs = 0; - ASSERT(parse_buffer(input, subc, - testdata[i].data, 0, testdata[i].length, - &ofs, &status, false)); - ASSERT(ofs == testdata[i].length); - ASSERT(input->End()); + upb::pb::Decoder* decoder = CreateDecoder(env.env(), method.get(), &sink); + env.ResetBytesSink(decoder->input()); + env.Reset(testdata[i].data, testdata[i].length, true); + ASSERT(env.Start()); + ASSERT(env.ParseBuffer(-1)); + ASSERT(env.End()); } } diff --git a/tests/test_util.h b/tests/test_util.h index d9e8d25..6408557 100644 --- a/tests/test_util.h +++ b/tests/test_util.h @@ -8,59 +8,90 @@ #include #include #include "tests/upb_test.h" +#include "upb/env.h" #include "upb/sink.h" upb::BufferHandle global_handle; -// Puts a region of the given buffer [start, end) into the given sink (which -// probably represents a parser. Can gracefully handle the case where the -// parser returns a "parsed" length that is less or greater than the input -// buffer length, and tracks the overall parse offset in *ofs. -// -// Pass verbose=true to print detailed diagnostics to stderr. -bool parse_buffer(upb::BytesSink* sink, void* subc, const char* buf, - size_t start, size_t end, size_t* ofs, - upb::Status* status, bool verbose) { - start = UPB_MAX(start, *ofs); +class VerboseParserEnvironment { + public: + VerboseParserEnvironment(bool verbose) : verbose_(verbose) { + env_.ReportErrorsTo(&status_); + } + + void Reset(const char *buf, size_t len, bool may_skip) { + buf_ = buf; + len_ = len; + ofs_ = 0; + skip_until_ = may_skip ? 0 : -1; + status_.Clear(); + } + + bool Start() { + return sink_->Start(len_, &subc_); + } + + bool End() { + return sink_->End(); + } + + // Puts a region of the given buffer [start, end) into the given sink (which + // probably represents a parser. Can gracefully handle the case where the + // parser returns a "parsed" length that is less or greater than the input + // buffer length, and tracks the overall parse offset in *ofs. + // + // Pass verbose=true to print detailed diagnostics to stderr. + bool ParseBuffer(int bytes) { + if (bytes < 0) { + bytes = len_ - ofs_; + } - if (start <= end) { - size_t len = end - start; + ASSERT((size_t)bytes <= (len_ - ofs_)); // Copy buffer into a separate, temporary buffer. // This is necessary to verify that the parser is not erroneously // reading outside the specified bounds. - char *buf2 = (char*)malloc(len); - assert(buf2); - memcpy(buf2, buf + start, len); + char *buf2 = NULL; - if (verbose) { + if ((int)(ofs_ + bytes) > skip_until_) { + buf2 = (char*)malloc(bytes); + assert(buf2); + memcpy(buf2, buf_ + ofs_, bytes); + } + + if (buf2 == NULL && bytes == 0) { + // Decoders dont' support buf=NULL, bytes=0. + return true; + } + + if (verbose_) { fprintf(stderr, "Calling parse(%u) for bytes %u-%u of the input\n", - (unsigned)len, (unsigned)start, (unsigned)end); + (unsigned)bytes, (unsigned)ofs_, (unsigned)(ofs_ + bytes)); } - size_t parsed = sink->PutBuffer(subc, buf2, len, &global_handle); + int parsed = sink_->PutBuffer(subc_, buf2, bytes, &global_handle); free(buf2); - if (verbose) { - if (parsed == len) { + if (verbose_) { + if (parsed == bytes) { fprintf(stderr, "parse(%u) = %u, complete byte count indicates success\n", - (unsigned)len, (unsigned)len); - } else if (parsed > len) { + (unsigned)bytes, (unsigned)bytes); + } else if (parsed > bytes) { fprintf(stderr, - "parse(%u) = %u, long byte count indicates success and skip" + "parse(%u) = %u, long byte count indicates success and skip " "of the next %u bytes\n", - (unsigned)len, (unsigned)parsed, (unsigned)(parsed - len)); + (unsigned)bytes, (unsigned)parsed, (unsigned)(parsed - bytes)); } else { fprintf(stderr, "parse(%u) = %u, short byte count indicates failure; " "last %u bytes were not consumed\n", - (unsigned)len, (unsigned)parsed, (unsigned)(len - parsed)); + (unsigned)bytes, (unsigned)parsed, (unsigned)(bytes - parsed)); } } - if (status->ok() != (parsed >= len)) { - if (status->ok()) { + if (status_.ok() != (parsed >= bytes)) { + if (status_.ok()) { fprintf(stderr, "Error: decode function returned short byte count but set no " "error status\n"); @@ -69,17 +100,55 @@ bool parse_buffer(upb::BytesSink* sink, void* subc, const char* buf, "Error: decode function returned complete byte count but set " "error status\n"); } - fprintf(stderr, "Status: %s, parsed=%u, len=%u\n", - status->error_message(), (unsigned)parsed, (unsigned)len); + fprintf(stderr, "Status: %s, parsed=%u, bytes=%u\n", + status_.error_message(), (unsigned)parsed, (unsigned)bytes); ASSERT(false); } - if (!status->ok()) + if (!status_.ok()) return false; - *ofs += parsed; + if (parsed > bytes && skip_until_ >= 0) { + skip_until_ = ofs_ + parsed; + } + + ofs_ += UPB_MIN(parsed, bytes); + + return true; + } + + void ResetBytesSink(upb::BytesSink* sink) { + sink_ = sink; } - return true; -} + + const upb::Status& status() { return status_; } + + size_t ofs() { return ofs_; } + upb::Environment* env() { return &env_; } + + bool SkippedWithNull() { return skip_until_ > 0; } + + private: + upb::Environment env_; + upb::Status status_; + upb::BytesSink* sink_; + const char* buf_; + size_t len_; + bool verbose_; + size_t ofs_; + void *subc_; + + // When our parse call returns a value greater than the number of bytes + // we passed in, the decoder is indicating to us that the next N bytes + // in the stream are not needed and can be skipped. The user is allowed + // to pass a NULL buffer for those N bytes. + // + // skip_until_ is initially set to 0 if we should do this NULL-buffer + // skipping or -1 if we should not. If we are open to doing NULL-buffer + // skipping and we get an opportunity to do it, we set skip_until to the + // stream offset where we can skip until. The user can then test whether + // this happened by testing SkippedWithNull(). + int skip_until_; +}; #endif diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 905fdd1..34aed1f 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -60,6 +60,28 @@ static bool consumes_input(opcode op) { } } +static size_t stacksize(upb_pbdecoder *d, size_t entries) { + UPB_UNUSED(d); + return entries * sizeof(upb_pbdecoder_frame); +} + +static size_t callstacksize(upb_pbdecoder *d, size_t entries) { + UPB_UNUSED(d); + +#ifdef UPB_USE_JIT_X64 + if (d->method_->is_native_) { + /* Each native stack frame needs two pointers, plus we need a few frames for + * the enter/exit trampolines. */ + size_t ret = entries * sizeof(void*) * 2; + ret += sizeof(void*) * 10; + return ret; + } +#endif + + return entries * sizeof(uint32_t*); +} + + static bool in_residual_buf(const upb_pbdecoder *d, const char *p); /* It's unfortunate that we have to micro-manage the compiler with @@ -145,24 +167,66 @@ static void checkpoint(upb_pbdecoder *d) { d->checkpoint = d->ptr; } +/* Skips "bytes" bytes in the stream, which may be more than available. If we + * skip more bytes than are available, we return a long read count to the caller + * indicating how many bytes can be skipped over before passing actual data + * again. Skipped bytes can pass a NULL buffer and the decoder guarantees they + * won't actually be read. + */ +static int32_t skip(upb_pbdecoder *d, size_t bytes) { + assert(!in_residual_buf(d, d->ptr) || d->size_param == 0); + if (curbufleft(d) > bytes) { + /* Skipped data is all in current buffer, and more is still available. */ + advance(d, bytes); + d->skip = 0; + return DECODE_OK; + } else { + /* Skipped data extends beyond currently available buffers. */ + d->pc = d->last; + d->skip = bytes - curbufleft(d); + d->bufstart_ofs += (d->end - d->buf); + d->residual_end = d->residual; + switchtobuf(d, d->residual, d->residual_end); + return d->size_param + d->skip; + } +} + + /* Resumes the decoder from an initial state or from a previous suspend. */ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, size_t size, const upb_bufhandle *handle) { UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */ + d->buf_param = buf; d->size_param = size; d->handle = handle; + if (d->residual_end > d->residual) { /* We have residual bytes from the last buffer. */ assert(d->ptr == d->residual); } else { switchtobuf(d, buf, buf + size); } + d->checkpoint = d->ptr; + + if (d->skip) { + CHECK_RETURN(skip(d, d->skip)); + d->checkpoint = d->ptr; + } + + if (!buf) { + /* NULL buf is ok if its entire span is covered by the "skip" above, but + * by this point we know that "skip" doesn't cover the buffer. */ + seterr(d, "Passed NULL buffer over non-skippable region."); + return upb_pbdecoder_suspend(d); + } + if (d->top->groupnum < 0) { CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0)); d->checkpoint = d->ptr; } + return DECODE_OK; } @@ -222,28 +286,6 @@ static size_t suspend_save(upb_pbdecoder *d) { return d->size_param; } -/* Skips "bytes" bytes in the stream, which may be more than available. If we - * skip more bytes than are available, we return a long read count to the caller - * indicating how many bytes the caller should skip before passing a new buffer. - */ -static int32_t skip(upb_pbdecoder *d, size_t bytes) { - assert(!in_residual_buf(d, d->ptr) || d->size_param == 0); - if (curbufleft(d) >= bytes) { - /* Skipped data is all in current buffer. */ - advance(d, bytes); - return DECODE_OK; - } else { - /* Skipped data extends beyond currently available buffers. */ - size_t skip; - d->pc = d->last; - skip = bytes - curbufleft(d); - d->bufstart_ofs += (d->end - d->buf) + skip; - d->residual_end = d->residual; - switchtobuf(d, d->residual, d->residual_end); - return d->size_param + skip; - } -} - /* Copies the next "bytes" bytes into "buf" and advances the stream. * Requires that this many bytes are available in the current buffer. */ UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf, @@ -618,18 +660,8 @@ upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) { /* The main decoder VM function. Uses traditional bytecode dispatch loop with a * switch() statement. */ -size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, - size_t size, const upb_bufhandle *handle) { - upb_pbdecoder *d = closure; - const mgroup *group = hd; - int32_t result; - assert(buf); - result = upb_pbdecoder_resume(d, NULL, buf, size, handle); - if (result == DECODE_ENDGROUP) { - goto_endmsg(d); - } - CHECK_RETURN(result); - UPB_UNUSED(group); +size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group, + const upb_bufhandle* handle) { #define VMCASE(op, code) \ case op: { code; if (consumes_input(op)) checkpoint(d); break; } @@ -652,6 +684,7 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, arg = instruction >> 8; longofs = arg; assert(d->ptr != d->residual_end); + UPB_UNUSED(group); #ifdef UPB_DUMP_BYTECODE fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d " "%x %s (%d)\n", @@ -827,12 +860,15 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, CHECK_RETURN(dispatch(d)); }) VMCASE(OP_HALT, { - return size; + return d->size_param; }) } } } + +/* BytesHandler handlers ******************************************************/ + void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) { upb_pbdecoder *d = closure; UPB_UNUSED(size_hint); @@ -841,6 +877,7 @@ void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) { d->call_len = 1; d->callstack[0] = &halt; d->pc = pc; + d->skip = 0; return d; } @@ -851,6 +888,7 @@ void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) { d->top->end_ofs = UINT64_MAX; d->bufstart_ofs = 0; d->call_len = 0; + d->skip = 0; return d; } @@ -859,12 +897,9 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) { const upb_pbdecodermethod *method = handler_data; uint64_t end; char dummy; -#ifdef UPB_USE_JIT_X64 - const mgroup *group = (const mgroup*)method->group; -#endif if (d->residual_end > d->residual) { - seterr(d, "Unexpected EOF"); + seterr(d, "Unexpected EOF: decoder still has buffered unparsed data"); return false; } @@ -873,15 +908,15 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) { return false; } - /* Message ends here. */ + /* The user's end() call indicates that the message ends here. */ end = offset(d); d->top->end_ofs = end; #ifdef UPB_USE_JIT_X64 - if (group->jit_code) { + if (method->group->jit_code) { if (d->top != d->stack) d->stack->end_ofs = 0; - group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL); + method->group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL); } else #endif { @@ -901,13 +936,26 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) { } if (d->call_len != 0) { - seterr(d, "Unexpected EOF"); + seterr(d, "Unexpected EOF inside submessage or group"); return false; } return true; } +size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf, + size_t size, const upb_bufhandle *handle) { + int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle); + + if (result == DECODE_ENDGROUP) goto_endmsg(decoder); + CHECK_RETURN(result); + + return run_decoder_vm(decoder, group, handle); +} + + +/* Public API *****************************************************************/ + void upb_pbdecoder_reset(upb_pbdecoder *d) { d->top = d->stack; d->top->groupnum = 0; @@ -917,27 +965,6 @@ void upb_pbdecoder_reset(upb_pbdecoder *d) { d->residual_end = d->residual; } -static size_t stacksize(upb_pbdecoder *d, size_t entries) { - UPB_UNUSED(d); - return entries * sizeof(upb_pbdecoder_frame); -} - -static size_t callstacksize(upb_pbdecoder *d, size_t entries) { - UPB_UNUSED(d); - -#ifdef UPB_USE_JIT_X64 - if (d->method_->is_native_) { - /* Each native stack frame needs two pointers, plus we need a few frames for - * the enter/exit trampolines. */ - size_t ret = entries * sizeof(void*) * 2; - ret += sizeof(void*) * 10; - return ret; - } -#endif - - return entries * sizeof(uint32_t*); -} - upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m, upb_sink *sink) { const size_t default_max_nesting = 64; diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h index 2d4485a..be5d044 100644 --- a/upb/pb/decoder.int.h +++ b/upb/pb/decoder.int.h @@ -225,6 +225,12 @@ struct upb_pbdecoder { char residual[12]; char *residual_end; + /* Bytes of data that should be discarded from the input beore we start + * parsing again. We set this when we internally determine that we can + * safely skip the next N bytes, but this region extends past the current + * user buffer. */ + size_t skip; + /* Stores the user buffer passed to our decode function. */ const char *buf_param; size_t size_param; diff --git a/upb/sink.h b/upb/sink.h index e7d4960..765916e 100644 --- a/upb/sink.h +++ b/upb/sink.h @@ -272,7 +272,7 @@ UPB_INLINE bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bufhandle_setbuf(&handle, buf, 0); ret = upb_bytessink_start(sink, len, &subc); if (ret && len != 0) { - ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) == len); + ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len); } if (ret) { ret = upb_bytessink_end(sink); -- cgit v1.2.3 From 7dcd017f4ed829b2ea4dd2d44165cd7ef0594057 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Thu, 30 Jul 2015 17:17:09 -0700 Subject: Fixed PR for JIT-enabled builds. --- tests/test_util.h | 38 ++++++++++++++++++++++++++++++-------- upb/pb/decoder.c | 5 +++-- upb/pb/decoder.h | 2 +- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/tests/test_util.h b/tests/test_util.h index 6408557..d2d30a4 100644 --- a/tests/test_util.h +++ b/tests/test_util.h @@ -13,8 +13,21 @@ upb::BufferHandle global_handle; +// A convenience class for parser tests. Provides some useful features: +// +// - can support multiple calls to parse, to test the parser's handling +// of buffer seams. +// +// - can output verbose output about each parse call when requested, for +// ease of debugging. +// +// - can pass NULL for skipped regions of the input if requested. +// +// - allocates and passes a separate buffer for each parsed region, to +// ensure that the parser is not erroneously overreading its buffer. class VerboseParserEnvironment { public: + // Pass verbose=true to print detailed diagnostics to stderr. VerboseParserEnvironment(bool verbose) : verbose_(verbose) { env_.ReportErrorsTo(&status_); } @@ -24,9 +37,21 @@ class VerboseParserEnvironment { len_ = len; ofs_ = 0; skip_until_ = may_skip ? 0 : -1; + skipped_with_null_ = false; status_.Clear(); } + // The user should call a series of: + // + // Reset(buf, len, may_skip); + // Start() + // ParseBuffer(X); + // ParseBuffer(Y); + // // Repeat ParseBuffer as desired, but last call should pass -1. + // ParseBuffer(-1); + // End(); + + bool Start() { return sink_->Start(len_, &subc_); } @@ -35,12 +60,6 @@ class VerboseParserEnvironment { return sink_->End(); } - // Puts a region of the given buffer [start, end) into the given sink (which - // probably represents a parser. Can gracefully handle the case where the - // parser returns a "parsed" length that is less or greater than the input - // buffer length, and tracks the overall parse offset in *ofs. - // - // Pass verbose=true to print detailed diagnostics to stderr. bool ParseBuffer(int bytes) { if (bytes < 0) { bytes = len_ - ofs_; @@ -53,7 +72,9 @@ class VerboseParserEnvironment { // reading outside the specified bounds. char *buf2 = NULL; - if ((int)(ofs_ + bytes) > skip_until_) { + if ((int)(ofs_ + bytes) <= skip_until_) { + skipped_with_null_ = true; + } else { buf2 = (char*)malloc(bytes); assert(buf2); memcpy(buf2, buf_ + ofs_, bytes); @@ -126,7 +147,7 @@ class VerboseParserEnvironment { size_t ofs() { return ofs_; } upb::Environment* env() { return &env_; } - bool SkippedWithNull() { return skip_until_ > 0; } + bool SkippedWithNull() { return skipped_with_null_; } private: upb::Environment env_; @@ -149,6 +170,7 @@ class VerboseParserEnvironment { // stream offset where we can skip until. The user can then test whether // this happened by testing SkippedWithNull(). int skip_until_; + bool skipped_with_null_; }; #endif diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 34aed1f..b624812 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -913,10 +913,11 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) { d->top->end_ofs = end; #ifdef UPB_USE_JIT_X64 - if (method->group->jit_code) { + if (method->is_native_) { + const mgroup *group = (const mgroup*)method->group; if (d->top != d->stack) d->stack->end_ofs = 0; - method->group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL); + group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL); } else #endif { diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h index f28e5e6..65316b4 100644 --- a/upb/pb/decoder.h +++ b/upb/pb/decoder.h @@ -92,7 +92,7 @@ class upb::pb::DecoderMethod { * constructed. This hint may be an overestimate for some build configurations. * But if the decoder library is upgraded without recompiling the application, * it may be an underestimate. */ -#define UPB_PB_DECODER_SIZE 4400 +#define UPB_PB_DECODER_SIZE 4408 #ifdef __cplusplus -- cgit v1.2.3