From 9aa7e559d634a3ecf087ee376f82704e2290f478 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 30 Jan 2011 16:28:37 -0800 Subject: Fixes to decoder and textprinter: it works (for some input)! A protobuf -> text stream for descriptor.proto now outputs the same text as proto2. --- stream/upb_decoder.c | 98 ++++++++++++++++++++++++------------------------ stream/upb_textprinter.c | 19 ++++------ tests/test_decoder.c | 8 +++- 3 files changed, 62 insertions(+), 63 deletions(-) diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index a7a2c76..3a279a1 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -51,7 +51,6 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } // upb_decoder_frame is one frame of that stack. typedef struct { upb_msgdef *msgdef; - upb_fielddef *field; size_t end_offset; // For groups, 0. } upb_decoder_frame; @@ -82,29 +81,37 @@ typedef struct { // Our current position in the data buffer. const char *ptr; - // Number of bytes available at ptr, until either end-of-buf or - // end-of-submessage (whichever is smaller). + // End of this submessage, relative to *ptr. + const char *submsg_end; + + // Number of bytes available at ptr. size_t len; // Msgdef for the current level. upb_msgdef *msgdef; } upb_dstate; +// Constant used to signal that the submessage is a group and therefore we +// don't know its end offset. This cannot be the offset of a real submessage +// end because it takes at least one byte to begin a submessage. +#define UPB_GROUP_END_OFFSET 0 +#define UPB_MAX_VARINT_ENCODED_SIZE 10 + INLINE void upb_dstate_advance(upb_dstate *s, size_t len) { s->ptr += len; s->len -= len; } -static upb_flow_t upb_pop(upb_decoder *d); +INLINE void upb_dstate_setmsgend(upb_decoder *d, upb_dstate *s) { + s->submsg_end = (d->top->end_offset == UPB_GROUP_END_OFFSET) ? + (void*)UINTPTR_MAX : + upb_string_getrobuf(d->buf) + (d->top->end_offset - d->buf_stream_offset); +} -// Constant used to signal that the submessage is a group and therefore we -// don't know its end offset. This cannot be the offset of a real submessage -// end because it takes at least one byte to begin a submessage. -#define UPB_GROUP_END_OFFSET 0 -#define UPB_MAX_VARINT_ENCODED_SIZE 10 +static upb_flow_t upb_pop(upb_decoder *d, upb_dstate *s); // Called only from the slow path, this function copies the next "len" bytes -// from the stream to "data", adjusting "buf" and "len" appropriately. +// from the stream to "data", adjusting the dstate appropriately. static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, upb_dstate *s) { while (1) { @@ -112,41 +119,17 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, memcpy(data, s->ptr, to_copy); upb_dstate_advance(s, to_copy); bytes_wanted -= to_copy; - if (bytes_wanted == 0) return true; - - // Did "len" indicate end-of-submessage or end-of-buffer? - ssize_t buf_offset = - d->buf ? ((const char*)s->ptr - upb_string_getrobuf(d->buf)) : 0; - if (d->top->end_offset > 0 && - d->top->end_offset == d->buf_stream_offset + buf_offset) { - // End-of-submessage. - if (bytes_wanted > 0) { - upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); - return false; - } - if (upb_pop(d) != UPB_CONTINUE) return false; - } else { - // End-of-buffer. - if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); - upb_string_recycle(&d->buf); - if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; - s->ptr = upb_string_getrobuf(d->buf); + if (bytes_wanted == 0) { + upb_dstate_setmsgend(d, s); + return true; } - // Wait for end-of-submessage or end-of-buffer, whichever comes first. - size_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf); - size_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr; - size_t submsg_remaining = - d->top->end_offset - d->buf_stream_offset - offset_in_buf; - if (d->top->end_offset == UPB_GROUP_END_OFFSET || - buf_remaining < submsg_remaining) { - s->len = buf_remaining; - } else { - // Check that non of our subtraction overflowed. - assert(d->top->end_offset > d->buf_stream_offset); - assert(d->top->end_offset - d->buf_stream_offset > offset_in_buf); - s->len = submsg_remaining; - } + // Get next buffer. + if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); + upb_string_recycle(&d->buf); + if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; + s->ptr = upb_string_getrobuf(d->buf); + s->len = upb_string_len(d->buf); } } @@ -266,7 +249,6 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) { static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, upb_value submsg_len, upb_fieldtype_t type) { - d->top->field = f; d->top++; if(d->top >= d->limit) { upb_seterr(d->status, UPB_ERROR, "Nesting too deep."); @@ -274,13 +256,16 @@ static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, } d->top->end_offset = (type == UPB_TYPE(GROUP)) ? UPB_GROUP_END_OFFSET : - d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + upb_value_getint32(submsg_len); + d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + + upb_value_getint32(submsg_len); d->top->msgdef = upb_downcast_msgdef(f->def); + upb_dstate_setmsgend(d, s); return upb_dispatch_startsubmsg(&d->dispatcher, f); } -static upb_flow_t upb_pop(upb_decoder *d) { +static upb_flow_t upb_pop(upb_decoder *d, upb_dstate *s) { d->top--; + upb_dstate_setmsgend(d, s); return upb_dispatch_endsubmsg(&d->dispatcher); } @@ -290,7 +275,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // We put our dstate on the stack so the compiler knows they can't be changed // by external code (like when we dispatch a callback). We must be sure not // to let its address escape this source file. - upb_dstate state = {NULL, 0, d->top->msgdef}; + upb_dstate state = {NULL, (void*)0x1, 0, d->top->msgdef}; upb_string *str = NULL; // TODO: handle UPB_SKIPSUBMSG @@ -301,6 +286,15 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // Main loop: executed once per tag/field pair. while(1) { + // Check for end-of-submessage. + while (state.ptr >= state.submsg_end) { + if (state.ptr > state.submsg_end) { + upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); + goto err; + } + CHECK_FLOW(upb_pop(d, &state)); + } + // Parse/handle tag. upb_tag tag; if (!upb_decode_tag(d, &state, &tag)) { @@ -308,6 +302,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // Normal end-of-file. upb_clearerr(status); CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); + upb_string_unref(str); return; } else { if (status->code == UPB_EOF) { @@ -322,12 +317,14 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // since most types will read a varint here. upb_value val; switch (tag.wire_type) { + case UPB_WIRE_TYPE_START_GROUP: + break; // Nothing to do now, below we will push appropriately. case UPB_WIRE_TYPE_END_GROUP: if(d->top->end_offset != UPB_GROUP_END_OFFSET) { upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } - CHECK_FLOW(upb_pop(d)); + CHECK_FLOW(upb_pop(d, &state)); continue; // We have no value to dispatch. case UPB_WIRE_TYPE_VARINT: case UPB_WIRE_TYPE_DELIMITED: @@ -383,6 +380,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { } err: + upb_string_unref(str); if (upb_ok(status)) { upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK"); } @@ -417,12 +415,14 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { d->bytesrc = bytesrc; d->top = &d->stack[0]; d->top->msgdef = d->toplevel_msgdef; - d->top->end_offset = SIZE_MAX; // never want to end top-level message. + // Never want to end top-level message, so treat it like a group. + d->top->end_offset = UPB_GROUP_END_OFFSET; upb_string_unref(d->buf); d->buf = NULL; } void upb_decoder_free(upb_decoder *d) { + upb_string_unref(d->buf); free(d); } diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 531da12..894a1ea 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -30,14 +30,6 @@ err: return -1; } -static int upb_textprinter_startfield(upb_textprinter *p, upb_fielddef *f) { - upb_textprinter_indent(p); - CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name))); - return 0; -err: - return -1; -} - static int upb_textprinter_endfield(upb_textprinter *p) { if(p->single_line) { CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); @@ -52,7 +44,8 @@ err: static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, upb_value val) { upb_textprinter *p = _p; - upb_textprinter_startfield(p, f); + upb_textprinter_indent(p); + CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name))); #define CASE(fmtstr, member) \ CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break; switch(f->type) { @@ -105,11 +98,13 @@ static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f, upb_handlers *delegate_to) { (void)delegate_to; upb_textprinter *p = _p; - upb_textprinter_startfield(p, f); - p->indent_depth++; - upb_bytesink_putstr(p->bytesink, UPB_STRLIT("{"), &p->status); + upb_textprinter_indent(p); + CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT " {", UPB_STRARG(f->name))); if(!p->single_line) upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status); + p->indent_depth++; return UPB_CONTINUE; +err: + return UPB_BREAK; } static upb_flow_t upb_textprinter_endsubmsg(void *_p) diff --git a/tests/test_decoder.c b/tests/test_decoder.c index ed5a77e..f48472d 100644 --- a/tests/test_decoder.c +++ b/tests/test_decoder.c @@ -25,14 +25,18 @@ int main() { upb_status status = UPB_STATUS_INIT; upb_src_run(src, &status); - upb_printerr(&status); assert(upb_ok(&status)); - + upb_status_uninit(&status); upb_stdio_free(in); upb_stdio_free(out); upb_decoder_free(d); upb_textprinter_free(p); upb_def_unref(fds); upb_symtab_unref(symtab); + + // Prevent C library from holding buffers open, so Valgrind doesn't see + // memory leaks. + fclose(stdin); + fclose(stdout); } -- cgit v1.2.3