summaryrefslogtreecommitdiff
path: root/upb/pb
diff options
context:
space:
mode:
authorJosh Haberman <jhaberman@gmail.com>2015-07-30 14:54:03 -0700
committerJosh Haberman <jhaberman@gmail.com>2015-07-30 14:54:03 -0700
commitabcb6428ad9bf7d650455a0a180647a05183fd9d (patch)
tree0408268278840d8954ef3d0c6d18c8ba16524817 /upb/pb
parentecaf82d13401bf4d8ae3fd7e099a11c94d554555 (diff)
Changed parser semantics around skipping.
Prior to this change: parse(buf, len) -> len + N ...would indicate that the next N bytes of the input are not needed, *and* would advance the decoding position by this much. After this change: parse(buf, len) -> len + N parse(NULL, N) -> N ...can be used to achieve the same thing. But skipping the N bytes is not explicitly performed by the user. A user that doesn't want/need to skip can just say: parsed = parse(buf, len); if (parsed < len) { // Handle suspend, advance stream by "parsed". } else { // Stream was advanced by "len" (even if parsed > len). } Updated unit tests to test this new behavior, and refactored test utility code a bit to support it.
Diffstat (limited to 'upb/pb')
-rw-r--r--upb/pb/decoder.c155
-rw-r--r--upb/pb/decoder.int.h6
2 files changed, 97 insertions, 64 deletions
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index 905fdd1..34aed1f 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -60,6 +60,28 @@ static bool consumes_input(opcode op) {
}
}
+static size_t stacksize(upb_pbdecoder *d, size_t entries) {
+ UPB_UNUSED(d);
+ return entries * sizeof(upb_pbdecoder_frame);
+}
+
+static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
+ UPB_UNUSED(d);
+
+#ifdef UPB_USE_JIT_X64
+ if (d->method_->is_native_) {
+ /* Each native stack frame needs two pointers, plus we need a few frames for
+ * the enter/exit trampolines. */
+ size_t ret = entries * sizeof(void*) * 2;
+ ret += sizeof(void*) * 10;
+ return ret;
+ }
+#endif
+
+ return entries * sizeof(uint32_t*);
+}
+
+
static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
/* It's unfortunate that we have to micro-manage the compiler with
@@ -145,24 +167,66 @@ static void checkpoint(upb_pbdecoder *d) {
d->checkpoint = d->ptr;
}
+/* Skips "bytes" bytes in the stream, which may be more than available. If we
+ * skip more bytes than are available, we return a long read count to the caller
+ * indicating how many bytes can be skipped over before passing actual data
+ * again. Skipped bytes can pass a NULL buffer and the decoder guarantees they
+ * won't actually be read.
+ */
+static int32_t skip(upb_pbdecoder *d, size_t bytes) {
+ assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
+ if (curbufleft(d) > bytes) {
+ /* Skipped data is all in current buffer, and more is still available. */
+ advance(d, bytes);
+ d->skip = 0;
+ return DECODE_OK;
+ } else {
+ /* Skipped data extends beyond currently available buffers. */
+ d->pc = d->last;
+ d->skip = bytes - curbufleft(d);
+ d->bufstart_ofs += (d->end - d->buf);
+ d->residual_end = d->residual;
+ switchtobuf(d, d->residual, d->residual_end);
+ return d->size_param + d->skip;
+ }
+}
+
+
/* Resumes the decoder from an initial state or from a previous suspend. */
int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
size_t size, const upb_bufhandle *handle) {
UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */
+
d->buf_param = buf;
d->size_param = size;
d->handle = handle;
+
if (d->residual_end > d->residual) {
/* We have residual bytes from the last buffer. */
assert(d->ptr == d->residual);
} else {
switchtobuf(d, buf, buf + size);
}
+
d->checkpoint = d->ptr;
+
+ if (d->skip) {
+ CHECK_RETURN(skip(d, d->skip));
+ d->checkpoint = d->ptr;
+ }
+
+ if (!buf) {
+ /* NULL buf is ok if its entire span is covered by the "skip" above, but
+ * by this point we know that "skip" doesn't cover the buffer. */
+ seterr(d, "Passed NULL buffer over non-skippable region.");
+ return upb_pbdecoder_suspend(d);
+ }
+
if (d->top->groupnum < 0) {
CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
d->checkpoint = d->ptr;
}
+
return DECODE_OK;
}
@@ -222,28 +286,6 @@ static size_t suspend_save(upb_pbdecoder *d) {
return d->size_param;
}
-/* Skips "bytes" bytes in the stream, which may be more than available. If we
- * skip more bytes than are available, we return a long read count to the caller
- * indicating how many bytes the caller should skip before passing a new buffer.
- */
-static int32_t skip(upb_pbdecoder *d, size_t bytes) {
- assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
- if (curbufleft(d) >= bytes) {
- /* Skipped data is all in current buffer. */
- advance(d, bytes);
- return DECODE_OK;
- } else {
- /* Skipped data extends beyond currently available buffers. */
- size_t skip;
- d->pc = d->last;
- skip = bytes - curbufleft(d);
- d->bufstart_ofs += (d->end - d->buf) + skip;
- d->residual_end = d->residual;
- switchtobuf(d, d->residual, d->residual_end);
- return d->size_param + skip;
- }
-}
-
/* Copies the next "bytes" bytes into "buf" and advances the stream.
* Requires that this many bytes are available in the current buffer. */
UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
@@ -618,18 +660,8 @@ upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
/* The main decoder VM function. Uses traditional bytecode dispatch loop with a
* switch() statement. */
-size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
- size_t size, const upb_bufhandle *handle) {
- upb_pbdecoder *d = closure;
- const mgroup *group = hd;
- int32_t result;
- assert(buf);
- result = upb_pbdecoder_resume(d, NULL, buf, size, handle);
- if (result == DECODE_ENDGROUP) {
- goto_endmsg(d);
- }
- CHECK_RETURN(result);
- UPB_UNUSED(group);
+size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
+ const upb_bufhandle* handle) {
#define VMCASE(op, code) \
case op: { code; if (consumes_input(op)) checkpoint(d); break; }
@@ -652,6 +684,7 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
arg = instruction >> 8;
longofs = arg;
assert(d->ptr != d->residual_end);
+ UPB_UNUSED(group);
#ifdef UPB_DUMP_BYTECODE
fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
"%x %s (%d)\n",
@@ -827,12 +860,15 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
CHECK_RETURN(dispatch(d));
})
VMCASE(OP_HALT, {
- return size;
+ return d->size_param;
})
}
}
}
+
+/* BytesHandler handlers ******************************************************/
+
void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
upb_pbdecoder *d = closure;
UPB_UNUSED(size_hint);
@@ -841,6 +877,7 @@ void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
d->call_len = 1;
d->callstack[0] = &halt;
d->pc = pc;
+ d->skip = 0;
return d;
}
@@ -851,6 +888,7 @@ void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
d->top->end_ofs = UINT64_MAX;
d->bufstart_ofs = 0;
d->call_len = 0;
+ d->skip = 0;
return d;
}
@@ -859,12 +897,9 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
const upb_pbdecodermethod *method = handler_data;
uint64_t end;
char dummy;
-#ifdef UPB_USE_JIT_X64
- const mgroup *group = (const mgroup*)method->group;
-#endif
if (d->residual_end > d->residual) {
- seterr(d, "Unexpected EOF");
+ seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
return false;
}
@@ -873,15 +908,15 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
return false;
}
- /* Message ends here. */
+ /* The user's end() call indicates that the message ends here. */
end = offset(d);
d->top->end_ofs = end;
#ifdef UPB_USE_JIT_X64
- if (group->jit_code) {
+ if (method->group->jit_code) {
if (d->top != d->stack)
d->stack->end_ofs = 0;
- group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
+ method->group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
} else
#endif
{
@@ -901,13 +936,26 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
}
if (d->call_len != 0) {
- seterr(d, "Unexpected EOF");
+ seterr(d, "Unexpected EOF inside submessage or group");
return false;
}
return true;
}
+size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
+ size_t size, const upb_bufhandle *handle) {
+ int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
+
+ if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
+ CHECK_RETURN(result);
+
+ return run_decoder_vm(decoder, group, handle);
+}
+
+
+/* Public API *****************************************************************/
+
void upb_pbdecoder_reset(upb_pbdecoder *d) {
d->top = d->stack;
d->top->groupnum = 0;
@@ -917,27 +965,6 @@ void upb_pbdecoder_reset(upb_pbdecoder *d) {
d->residual_end = d->residual;
}
-static size_t stacksize(upb_pbdecoder *d, size_t entries) {
- UPB_UNUSED(d);
- return entries * sizeof(upb_pbdecoder_frame);
-}
-
-static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
- UPB_UNUSED(d);
-
-#ifdef UPB_USE_JIT_X64
- if (d->method_->is_native_) {
- /* Each native stack frame needs two pointers, plus we need a few frames for
- * the enter/exit trampolines. */
- size_t ret = entries * sizeof(void*) * 2;
- ret += sizeof(void*) * 10;
- return ret;
- }
-#endif
-
- return entries * sizeof(uint32_t*);
-}
-
upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
upb_sink *sink) {
const size_t default_max_nesting = 64;
diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h
index 2d4485a..be5d044 100644
--- a/upb/pb/decoder.int.h
+++ b/upb/pb/decoder.int.h
@@ -225,6 +225,12 @@ struct upb_pbdecoder {
char residual[12];
char *residual_end;
+ /* Bytes of data that should be discarded from the input beore we start
+ * parsing again. We set this when we internally determine that we can
+ * safely skip the next N bytes, but this region extends past the current
+ * user buffer. */
+ size_t skip;
+
/* Stores the user buffer passed to our decode function. */
const char *buf_param;
size_t size_param;
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback