diff options
author | Joshua Haberman <jhaberman@gmail.com> | 2015-08-19 16:57:49 -0700 |
---|---|---|
committer | Joshua Haberman <jhaberman@gmail.com> | 2015-08-19 16:57:49 -0700 |
commit | 77d45edfb39171aef5e0ae09a471e5d8e2679a7f (patch) | |
tree | a307fbb9bf6dbd7930be74ff481631486dc3b494 /upb/pb | |
parent | 782670e6a3d924a66adf23c94732603103b97223 (diff) | |
parent | 1e870951d7707d45fa7104658a4dde177d9fb62e (diff) |
Merge pull request #38 from haberman/decoderfix2
Added lots of decoder tests and fixed lots of bugs.
Diffstat (limited to 'upb/pb')
-rw-r--r-- | upb/pb/compile_decoder.c | 7 | ||||
-rw-r--r-- | upb/pb/decoder.c | 63 | ||||
-rw-r--r-- | upb/pb/decoder.h | 7 | ||||
-rw-r--r-- | upb/pb/decoder.int.h | 7 |
4 files changed, 60 insertions, 24 deletions
diff --git a/upb/pb/compile_decoder.c b/upb/pb/compile_decoder.c index 2828247..b75f45c 100644 --- a/upb/pb/compile_decoder.c +++ b/upb/pb/compile_decoder.c @@ -596,7 +596,12 @@ static void generate_msgfield(compiler *c, const upb_fielddef *f, if (!sub_m) { /* Don't emit any code for this field at all; it will be parsed as an - * unknown field. */ + * unknown field. + * + * TODO(haberman): we should change this to parse it as a string field + * instead. It will probably be faster, but more importantly, once we + * start vending unknown fields, a field shouldn't be treated as unknown + * just because it doesn't have subhandlers registered. */ return; } diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 7eca6d7..e2e79ae 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -36,6 +36,11 @@ static const char *kUnterminatedVarint = "Unterminated varint."; static opcode halt = OP_HALT; +/* A dummy character we can point to when the user passes us a NULL buffer. + * We need this because in C (NULL + 0) and (NULL - NULL) are undefined + * behavior, which would invalidate functions like curbufleft(). */ +static const char dummy_char; + /* Whether an op consumes any of the input buffer. */ static bool consumes_input(opcode op) { switch (op) { @@ -191,7 +196,7 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) { if (bytes > delim_remaining(d)) { seterr(d, "Skipped value extended beyond enclosing submessage."); return upb_pbdecoder_suspend(d); - } else if (bufleft(d) > bytes) { + } else if (bufleft(d) >= bytes) { /* Skipped data is all in current buffer, and more is still available. */ advance(d, bytes); d->skip = 0; @@ -213,10 +218,39 @@ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, size_t size, const upb_bufhandle *handle) { UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */ - d->buf_param = buf; + /* d->skip and d->residual_end could probably elegantly be represented + * as a single variable, to more easily represent this invariant. */ + assert(!(d->skip && d->residual_end > d->residual)); + + /* We need to remember the original size_param, so that the value we return + * is relative to it, even if we do some skipping first. */ d->size_param = size; d->handle = handle; + /* Have to handle this case specially (ie. not with skip()) because the user + * is allowed to pass a NULL buffer here, which won't allow us to safely + * calculate a d->end or use our normal functions like curbufleft(). */ + if (d->skip && d->skip >= size) { + d->skip -= size; + d->bufstart_ofs += size; + buf = &dummy_char; + size = 0; + + /* We can't just return now, because we might need to execute some ops + * like CHECKDELIM, which could call some callbacks and pop the stack. */ + } + + /* We need to pretend that this was the actual buffer param, since some of the + * calculations assume that d->ptr/d->buf is relative to this. */ + d->buf_param = buf; + + if (!buf) { + /* NULL buf is ok if its entire span is covered by the "skip" above, but + * by this point we know that "skip" doesn't cover the buffer. */ + seterr(d, "Passed NULL buffer over non-skippable region."); + return upb_pbdecoder_suspend(d); + } + if (d->residual_end > d->residual) { /* We have residual bytes from the last buffer. */ assert(d->ptr == d->residual); @@ -226,23 +260,18 @@ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, d->checkpoint = d->ptr; + /* Handle skips that don't cover the whole buffer (as above). */ if (d->skip) { size_t skip_bytes = d->skip; d->skip = 0; CHECK_RETURN(skip(d, skip_bytes)); - d->checkpoint = d->ptr; - } - - if (!buf) { - /* NULL buf is ok if its entire span is covered by the "skip" above, but - * by this point we know that "skip" doesn't cover the buffer. */ - seterr(d, "Passed NULL buffer over non-skippable region."); - return upb_pbdecoder_suspend(d); + checkpoint(d); } + /* If we're inside an unknown group, continue to parse unknown values. */ if (d->top->groupnum < 0) { CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0)); - d->checkpoint = d->ptr; + checkpoint(d); } return DECODE_OK; @@ -257,15 +286,14 @@ size_t upb_pbdecoder_suspend(upb_pbdecoder *d) { d->ptr = d->residual; return 0; } else { - size_t consumed; + size_t ret = d->size_param - (d->end - d->checkpoint); assert(!in_residual_buf(d, d->checkpoint)); - assert(d->buf == d->buf_param); + assert(d->buf == d->buf_param || d->buf == &dummy_char); - consumed = d->checkpoint - d->buf; - d->bufstart_ofs += consumed; + d->bufstart_ofs += (d->checkpoint - d->buf); d->residual_end = d->residual; switchtobuf(d, d->residual, d->residual_end); - return consumed; + return ret; } } @@ -383,8 +411,7 @@ UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, int bitpos; *u64 = 0; for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) { - int32_t ret = getbytes(d, &byte, 1); - if (ret >= 0) return ret; + CHECK_RETURN(getbytes(d, &byte, 1)); *u64 |= (uint64_t)(byte & 0x7F) << bitpos; } if(bitpos == 70 && (byte & 0x80)) { diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h index 65316b4..8172a99 100644 --- a/upb/pb/decoder.h +++ b/upb/pb/decoder.h @@ -36,6 +36,13 @@ UPB_DECLARE_TYPE(upb::pb::DecoderMethodOptions, upb_pbdecodermethodopts) UPB_DECLARE_DERIVED_TYPE(upb::pb::DecoderMethod, upb::RefCounted, upb_pbdecodermethod, upb_refcounted) +/* The maximum number of bytes we are required to buffer internally between + * calls to the decoder. The value is 14: a 5 byte unknown tag plus ten-byte + * varint, less one because we are buffering an incomplete value. + * + * Should only be used by unit tests. */ +#define UPB_DECODER_MAX_RESIDUAL_BYTES 14 + #ifdef __cplusplus /* The parameters one uses to construct a DecoderMethod. diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h index a63f74b..f2bf242 100644 --- a/upb/pb/decoder.int.h +++ b/upb/pb/decoder.int.h @@ -218,11 +218,8 @@ struct upb_pbdecoder { /* Overall stream offset of "buf." */ uint64_t bufstart_ofs; - /* Buffer for residual bytes not parsed from the previous buffer. - * The maximum number of residual bytes we require is 12; a five-byte - * unknown tag plus an eight-byte value, less one because the value - * is only a partial value. */ - char residual[12]; + /* Buffer for residual bytes not parsed from the previous buffer. */ + char residual[UPB_DECODER_MAX_RESIDUAL_BYTES]; char *residual_end; /* Bytes of data that should be discarded from the input beore we start |