From 146a9c22efac6cb746ef4024144f28af891dd2b4 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 14 Aug 2015 18:54:11 -0700 Subject: Added lots of decoder tests and fixed lots of bugs. --- upb/pb/compile_decoder.c | 7 +++++- upb/pb/decoder.c | 63 ++++++++++++++++++++++++++++++++++-------------- upb/pb/decoder.int.h | 4 +-- 3 files changed, 53 insertions(+), 21 deletions(-) (limited to 'upb/pb') diff --git a/upb/pb/compile_decoder.c b/upb/pb/compile_decoder.c index 2828247..b75f45c 100644 --- a/upb/pb/compile_decoder.c +++ b/upb/pb/compile_decoder.c @@ -596,7 +596,12 @@ static void generate_msgfield(compiler *c, const upb_fielddef *f, if (!sub_m) { /* Don't emit any code for this field at all; it will be parsed as an - * unknown field. */ + * unknown field. + * + * TODO(haberman): we should change this to parse it as a string field + * instead. It will probably be faster, but more importantly, once we + * start vending unknown fields, a field shouldn't be treated as unknown + * just because it doesn't have subhandlers registered. */ return; } diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 7eca6d7..e2e79ae 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -36,6 +36,11 @@ static const char *kUnterminatedVarint = "Unterminated varint."; static opcode halt = OP_HALT; +/* A dummy character we can point to when the user passes us a NULL buffer. + * We need this because in C (NULL + 0) and (NULL - NULL) are undefined + * behavior, which would invalidate functions like curbufleft(). */ +static const char dummy_char; + /* Whether an op consumes any of the input buffer. */ static bool consumes_input(opcode op) { switch (op) { @@ -191,7 +196,7 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) { if (bytes > delim_remaining(d)) { seterr(d, "Skipped value extended beyond enclosing submessage."); return upb_pbdecoder_suspend(d); - } else if (bufleft(d) > bytes) { + } else if (bufleft(d) >= bytes) { /* Skipped data is all in current buffer, and more is still available. */ advance(d, bytes); d->skip = 0; @@ -213,10 +218,39 @@ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, size_t size, const upb_bufhandle *handle) { UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */ - d->buf_param = buf; + /* d->skip and d->residual_end could probably elegantly be represented + * as a single variable, to more easily represent this invariant. */ + assert(!(d->skip && d->residual_end > d->residual)); + + /* We need to remember the original size_param, so that the value we return + * is relative to it, even if we do some skipping first. */ d->size_param = size; d->handle = handle; + /* Have to handle this case specially (ie. not with skip()) because the user + * is allowed to pass a NULL buffer here, which won't allow us to safely + * calculate a d->end or use our normal functions like curbufleft(). */ + if (d->skip && d->skip >= size) { + d->skip -= size; + d->bufstart_ofs += size; + buf = &dummy_char; + size = 0; + + /* We can't just return now, because we might need to execute some ops + * like CHECKDELIM, which could call some callbacks and pop the stack. */ + } + + /* We need to pretend that this was the actual buffer param, since some of the + * calculations assume that d->ptr/d->buf is relative to this. */ + d->buf_param = buf; + + if (!buf) { + /* NULL buf is ok if its entire span is covered by the "skip" above, but + * by this point we know that "skip" doesn't cover the buffer. */ + seterr(d, "Passed NULL buffer over non-skippable region."); + return upb_pbdecoder_suspend(d); + } + if (d->residual_end > d->residual) { /* We have residual bytes from the last buffer. */ assert(d->ptr == d->residual); @@ -226,23 +260,18 @@ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, d->checkpoint = d->ptr; + /* Handle skips that don't cover the whole buffer (as above). */ if (d->skip) { size_t skip_bytes = d->skip; d->skip = 0; CHECK_RETURN(skip(d, skip_bytes)); - d->checkpoint = d->ptr; - } - - if (!buf) { - /* NULL buf is ok if its entire span is covered by the "skip" above, but - * by this point we know that "skip" doesn't cover the buffer. */ - seterr(d, "Passed NULL buffer over non-skippable region."); - return upb_pbdecoder_suspend(d); + checkpoint(d); } + /* If we're inside an unknown group, continue to parse unknown values. */ if (d->top->groupnum < 0) { CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0)); - d->checkpoint = d->ptr; + checkpoint(d); } return DECODE_OK; @@ -257,15 +286,14 @@ size_t upb_pbdecoder_suspend(upb_pbdecoder *d) { d->ptr = d->residual; return 0; } else { - size_t consumed; + size_t ret = d->size_param - (d->end - d->checkpoint); assert(!in_residual_buf(d, d->checkpoint)); - assert(d->buf == d->buf_param); + assert(d->buf == d->buf_param || d->buf == &dummy_char); - consumed = d->checkpoint - d->buf; - d->bufstart_ofs += consumed; + d->bufstart_ofs += (d->checkpoint - d->buf); d->residual_end = d->residual; switchtobuf(d, d->residual, d->residual_end); - return consumed; + return ret; } } @@ -383,8 +411,7 @@ UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, int bitpos; *u64 = 0; for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) { - int32_t ret = getbytes(d, &byte, 1); - if (ret >= 0) return ret; + CHECK_RETURN(getbytes(d, &byte, 1)); *u64 |= (uint64_t)(byte & 0x7F) << bitpos; } if(bitpos == 70 && (byte & 0x80)) { diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h index a63f74b..b7e462b 100644 --- a/upb/pb/decoder.int.h +++ b/upb/pb/decoder.int.h @@ -220,9 +220,9 @@ struct upb_pbdecoder { /* Buffer for residual bytes not parsed from the previous buffer. * The maximum number of residual bytes we require is 12; a five-byte - * unknown tag plus an eight-byte value, less one because the value + * unknown tag plus a ten-byte value, less one because the value * is only a partial value. */ - char residual[12]; + char residual[14]; char *residual_end; /* Bytes of data that should be discarded from the input beore we start -- cgit v1.2.3 From 1e870951d7707d45fa7104658a4dde177d9fb62e Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Wed, 19 Aug 2015 15:49:29 -0700 Subject: Refer to a shared definition for max buffered bytes. --- tests/pb/test_decoder.cc | 11 +++++------ upb/pb/decoder.h | 7 +++++++ upb/pb/decoder.int.h | 7 ++----- 3 files changed, 14 insertions(+), 11 deletions(-) (limited to 'upb/pb') diff --git a/tests/pb/test_decoder.cc b/tests/pb/test_decoder.cc index 94293cb..a01e999 100644 --- a/tests/pb/test_decoder.cc +++ b/tests/pb/test_decoder.cc @@ -570,15 +570,14 @@ uint32_t Hash(const string& proto, const string* expected_output, size_t seam1, } void CheckBytesParsed(const upb::pb::Decoder& decoder, size_t ofs) { - // We could have parsed as many as 10 bytes fewer than what the decoder - // previously accepted, since we can buffer up to 12 partial bytes internally - // before accumulating an entire value. - const int MAX_BUFFERED = 14; - // We can't have parsed more data than the decoder callback is telling us it // parsed. ASSERT(decoder.BytesParsed() <= ofs); - ASSERT(ofs <= (decoder.BytesParsed() + MAX_BUFFERED)); + + // The difference between what we've decoded and what the decoder has accepted + // represents the internally buffered amount. This amount should not exceed + // this value which comes from decoder.int.h. + ASSERT(ofs <= (decoder.BytesParsed() + UPB_DECODER_MAX_RESIDUAL_BYTES)); } static bool parse(VerboseParserEnvironment* env, diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h index 65316b4..8172a99 100644 --- a/upb/pb/decoder.h +++ b/upb/pb/decoder.h @@ -36,6 +36,13 @@ UPB_DECLARE_TYPE(upb::pb::DecoderMethodOptions, upb_pbdecodermethodopts) UPB_DECLARE_DERIVED_TYPE(upb::pb::DecoderMethod, upb::RefCounted, upb_pbdecodermethod, upb_refcounted) +/* The maximum number of bytes we are required to buffer internally between + * calls to the decoder. The value is 14: a 5 byte unknown tag plus ten-byte + * varint, less one because we are buffering an incomplete value. + * + * Should only be used by unit tests. */ +#define UPB_DECODER_MAX_RESIDUAL_BYTES 14 + #ifdef __cplusplus /* The parameters one uses to construct a DecoderMethod. diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h index b7e462b..f2bf242 100644 --- a/upb/pb/decoder.int.h +++ b/upb/pb/decoder.int.h @@ -218,11 +218,8 @@ struct upb_pbdecoder { /* Overall stream offset of "buf." */ uint64_t bufstart_ofs; - /* Buffer for residual bytes not parsed from the previous buffer. - * The maximum number of residual bytes we require is 12; a five-byte - * unknown tag plus a ten-byte value, less one because the value - * is only a partial value. */ - char residual[14]; + /* Buffer for residual bytes not parsed from the previous buffer. */ + char residual[UPB_DECODER_MAX_RESIDUAL_BYTES]; char *residual_end; /* Bytes of data that should be discarded from the input beore we start -- cgit v1.2.3