diff options
Diffstat (limited to 'upb/pb/decoder.c')
-rw-r--r-- | upb/pb/decoder.c | 225 |
1 files changed, 159 insertions, 66 deletions
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index c5fae0e..9c54b8a 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -18,7 +18,6 @@ #endif #define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d); -#define CHECK_RETURN(x) { int32_t ret = x; if (ret >= 0) return ret; } // Error messages that are shared between the bytecode and JIT decoders. const char *kPbDecoderStackOverflow = "Nesting too deep."; @@ -45,10 +44,10 @@ static bool consumes_input(opcode op) { case OP_PUSHTAGDELIM: case OP_POP: case OP_SETDELIM: - case OP_SETGROUPNUM: case OP_SETBIGGROUPNUM: case OP_CHECKDELIM: case OP_CALL: + case OP_RET: case OP_BRANCH: return false; default: @@ -147,13 +146,12 @@ static void checkpoint(upb_pbdecoder *d) { } // Resumes the decoder from an initial state or from a previous suspend. -void *upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, - size_t size, const upb_bufhandle *handle) { +int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, + size_t size, const upb_bufhandle *handle) { UPB_UNUSED(p); // Useless; just for the benefit of the JIT. d->buf_param = buf; d->size_param = size; d->handle = handle; - d->skip = 0; if (d->residual_end > d->residual) { // We have residual bytes from the last buffer. assert(ptr(d) == d->residual); @@ -161,7 +159,11 @@ void *upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, switchtobuf(d, buf, buf + size); } d->checkpoint = ptr(d); - return d; // For the JIT. + if (d->top->groupnum < 0) { + CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0)); + d->checkpoint = ptr(d); + } + return DECODE_OK; } // Suspends the decoder at the last checkpoint, without saving any residual @@ -176,10 +178,10 @@ size_t upb_pbdecoder_suspend(upb_pbdecoder *d) { assert(!in_residual_buf(d, d->checkpoint)); assert(d->buf == d->buf_param); size_t consumed = d->checkpoint - d->buf; - d->bufstart_ofs += consumed + d->skip; + d->bufstart_ofs += consumed; d->residual_end = d->residual; switchtobuf(d, d->residual, d->residual_end); - return consumed + d->skip; + return consumed; } } @@ -209,11 +211,11 @@ static size_t suspend_save(upb_pbdecoder *d) { assert(save <= sizeof(d->residual)); memcpy(d->residual, ptr(d), save); d->residual_end = d->residual + save; - d->bufstart_ofs = offset(d) + d->skip; + d->bufstart_ofs = offset(d); } switchtobuf(d, d->residual, d->residual_end); - return d->size_param + d->skip; + return d->size_param; } static int32_t skip(upb_pbdecoder *d, size_t bytes) { @@ -221,12 +223,16 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) { if (curbufleft(d) >= bytes) { // Skipped data is all in current buffer. advance(d, bytes); + return DECODE_OK; } else { // Skipped data extends beyond currently available buffers. - d->skip = bytes - curbufleft(d); - advance(d, curbufleft(d)); + d->pc = d->last; + size_t skip = bytes - curbufleft(d); + d->bufstart_ofs += (d->end - d->buf) + skip; + d->residual_end = d->residual; + switchtobuf(d, d->residual, d->residual_end); + return d->size_param + skip; } - return DECODE_OK; } FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) { @@ -247,8 +253,8 @@ static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf, if (curbufleft(d) >= bytes) { consumebytes(d, buf + avail, bytes); return DECODE_OK; - } else if (d->data_end - d->buf == d->top->end_ofs - d->bufstart_ofs) { - seterr(d, "Submessage ended in the middle of a value"); + } else if (d->data_end == d->delim_end) { + seterr(d, "Submessage ended in the middle of a value or group"); return upb_pbdecoder_suspend(d); } else { return suspend_save(d); @@ -378,11 +384,24 @@ static bool push(upb_pbdecoder *d, uint64_t end) { fr++; fr->end_ofs = end; fr->dispatch = NULL; - fr->groupnum = -1; + fr->groupnum = 0; d->top = fr; return true; } +static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) { + // While we expect to see an "end" tag (either ENDGROUP or a non-sequence + // field number) prior to hitting any enclosing submessage end, pushing our + // existing delim end prevents us from continuing to parse values from a + // corrupt proto that doesn't give us an END tag in time. + if (!push(d, d->top->end_ofs)) + return false; + d->top->groupnum = arg; + return true; +} + +static void pop(upb_pbdecoder *d) { d->top--; } + NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, uint64_t expected) { uint64_t data = 0; @@ -400,46 +419,103 @@ NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, } } -int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, uint32_t fieldnum, +int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum, uint8_t wire_type) { - if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER) { - seterr(d, "Invalid field number"); - return upb_pbdecoder_suspend(d); - } - - if (wire_type == UPB_WIRE_TYPE_END_GROUP) { - if (fieldnum != d->top->groupnum) { - seterr(d, "Unmatched ENDGROUP tag."); + if (fieldnum >= 0) + goto have_tag; + + while (true) { + uint32_t tag; + CHECK_RETURN(decode_v32(d, &tag)); + wire_type = tag & 0x7; + fieldnum = tag >> 3; + +have_tag: + if (fieldnum == 0) { + seterr(d, "Saw invalid field number (0)"); return upb_pbdecoder_suspend(d); } - return DECODE_ENDGROUP; - } - // TODO: deliver to unknown field callback. - switch (wire_type) { - case UPB_WIRE_TYPE_VARINT: { - uint64_t u64; - return decode_varint(d, &u64); + // TODO: deliver to unknown field callback. + switch (wire_type) { + case UPB_WIRE_TYPE_32BIT: + CHECK_RETURN(skip(d, 4)); + break; + case UPB_WIRE_TYPE_64BIT: + CHECK_RETURN(skip(d, 8)); + break; + case UPB_WIRE_TYPE_VARINT: { + uint64_t u64; + CHECK_RETURN(decode_varint(d, &u64)); + break; + } + case UPB_WIRE_TYPE_DELIMITED: { + uint32_t len; + CHECK_RETURN(decode_v32(d, &len)); + CHECK_RETURN(skip(d, len)); + break; + } + case UPB_WIRE_TYPE_START_GROUP: + CHECK_SUSPEND(pushtagdelim(d, -fieldnum)); + break; + case UPB_WIRE_TYPE_END_GROUP: + if (fieldnum == -d->top->groupnum) { + pop(d); + } else if (fieldnum == d->top->groupnum) { + return DECODE_ENDGROUP; + } else { + seterr(d, "Unmatched ENDGROUP tag."); + return upb_pbdecoder_suspend(d); + } + break; + default: + seterr(d, "Invalid wire type"); + return upb_pbdecoder_suspend(d); } - case UPB_WIRE_TYPE_32BIT: - return skip(d, 4); - case UPB_WIRE_TYPE_64BIT: - return skip(d, 8); - case UPB_WIRE_TYPE_DELIMITED: { - uint32_t len; - CHECK_RETURN(decode_v32(d, &len)); - return skip(d, len); + + if (d->top->groupnum >= 0) { + return DECODE_OK; } - case UPB_WIRE_TYPE_START_GROUP: - seterr(d, "Can't handle unknown groups yet"); - return upb_pbdecoder_suspend(d); - case UPB_WIRE_TYPE_END_GROUP: - default: - seterr(d, "Invalid wire type"); + + if (ptr(d) == d->delim_end) { + seterr(d, "Enclosing submessage ended in the middle of value or group"); + // Unlike most errors we notice during parsing, right now we have consumed + // all of the user's input. + // + // There are three different options for how to handle this case: + // + // 1. decode() = short count, error = set + // 2. decode() = full count, error = set + // 3. decode() = full count, error NOT set, short count and error will + // be reported on next call to decode() (or end()) + // + // (1) and (3) have the advantage that they preserve the invariant that an + // error occurs iff decode() returns a short count. + // + // (2) and (3) have the advantage of reflecting the fact that all of the + // bytes were in fact parsed (and possibly delivered to the unknown field + // handler, in the future when that is supported). + // + // (3) requires extra state in the decode (a place to store the "permanent + // error" that we should return for all subsequent attempts to decode). + // But we likely want this anyway. + // + // Right now we do (1), thanks to the fact that we checkpoint *after* this + // check. (3) may be a better choice long term; unclear at the moment. return upb_pbdecoder_suspend(d); + } + + checkpoint(d); } } +static void goto_endmsg(upb_pbdecoder *d) { + upb_value v; + bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v); + UPB_ASSERT_VAR(found, found); + d->pc = d->top->base + upb_value_getuint64(v); +} + static int32_t dispatch(upb_pbdecoder *d) { upb_inttable *dispatch = d->top->dispatch; @@ -470,7 +546,7 @@ static int32_t dispatch(upb_pbdecoder *d) { int32_t ret = upb_pbdecoder_skipunknown(d, fieldnum, wire_type); if (ret == DECODE_ENDGROUP) { - d->pc = d->top->base - 1; // Back to OP_ENDMSG. + goto_endmsg(d); return DECODE_OK; } else { d->pc = d->last - 1; // Rewind to CHECKDELIM. @@ -493,7 +569,11 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, upb_pbdecoder *d = closure; const mgroup *group = hd; assert(buf); - upb_pbdecoder_resume(d, NULL, buf, size, handle); + int32_t result = upb_pbdecoder_resume(d, NULL, buf, size, handle); + if (result == DECODE_ENDGROUP) { + goto_endmsg(d); + } + CHECK_RETURN(result); UPB_UNUSED(group); #define VMCASE(op, code) \ @@ -552,8 +632,6 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, ) VMCASE(OP_ENDMSG, CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status)); - assert(d->call_len > 0); - d->pc = d->callstack[--d->call_len]; ) VMCASE(OP_STARTSEQ, upb_pbdecoder_frame *outer = outer_frame(d); @@ -579,25 +657,39 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, ) VMCASE(OP_STRING, uint32_t len = curbufleft(d); - CHECK_SUSPEND( - upb_sink_putstring(&d->top->sink, arg, ptr(d), len, handle)); - advance(d, len); - if (d->delim_end == NULL) { // String extends beyond this buf? - d->pc--; - d->bufstart_ofs += size; - d->residual_end = d->residual; - return size; + size_t n = upb_sink_putstring(&d->top->sink, arg, ptr(d), len, handle); + if (n > len) { + if (n > d->top->end_ofs - offset(d)) { + seterr(d, "Tried to skip past end of string."); + return upb_pbdecoder_suspend(d); + } else { + return skip(d, n); + } + } else if (n < len) { + advance(d, n); + return upb_pbdecoder_suspend(d); + } else { + advance(d, n); + if (d->delim_end == NULL) { // String extends beyond this buf? + d->pc--; // Do OP_STRING again when we resume. + d->bufstart_ofs += size; + d->residual_end = d->residual; + return size; + } } ) VMCASE(OP_ENDSTR, CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg)); ) VMCASE(OP_PUSHTAGDELIM, - CHECK_SUSPEND(push(d, d->top->end_ofs)); + CHECK_SUSPEND(pushtagdelim(d, arg)); + ) + VMCASE(OP_SETBIGGROUPNUM, + d->top->groupnum = *d->pc++; ) VMCASE(OP_POP, assert(d->top > d->stack); - d->top--; + pop(d); ) VMCASE(OP_PUSHLENDELIM, uint32_t len; @@ -608,13 +700,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, VMCASE(OP_SETDELIM, set_delim_end(d); ) - VMCASE(OP_SETGROUPNUM, - d->top->groupnum = arg; - ) - VMCASE(OP_SETBIGGROUPNUM, - d->top->groupnum = *d->pc++; - ) VMCASE(OP_CHECKDELIM, + // We are guaranteed of this assert because we never allow ourselves to + // consume bytes beyond data_end, which covers delim_end when non-NULL. assert(!(d->delim_end && ptr(d) > d->delim_end)); if (ptr(d) == d->delim_end) d->pc += longofs; @@ -623,6 +711,10 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, d->callstack[d->call_len++] = d->pc; d->pc += longofs; ) + VMCASE(OP_RET, + assert(d->call_len > 0); + d->pc = d->callstack[--d->call_len]; + ) VMCASE(OP_BRANCH, d->pc += longofs; ) @@ -755,6 +847,7 @@ void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *m, void upb_pbdecoder_reset(upb_pbdecoder *d) { d->top = d->stack; d->top->end_ofs = UINT64_MAX; + d->top->groupnum = 0; d->bufstart_ofs = 0; d->ptr = d->residual; d->buf = d->residual; |