From 02a8cdfff29d6a17836847490a06dfe535855d52 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 29 Jan 2011 23:22:33 -0800 Subject: Fixes to decoder, stdio, textprinter. --- core/upb_stream_vtbl.h | 6 ++--- stream/upb_decoder.c | 68 ++++++++++++++++++++++++++++++++---------------- stream/upb_stdio.c | 38 ++++++++++++++++++--------- stream/upb_textprinter.c | 4 +-- tests/test_decoder.c | 10 +++++-- 5 files changed, 85 insertions(+), 41 deletions(-) diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 8e8971f..a6990bc 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -34,10 +34,10 @@ typedef bool (*upb_bytesrc_getstr_fptr)( // upb_bytesink. typedef upb_strlen_t (*upb_bytesink_write_fptr)( upb_bytesink *bytesink, void *buf, upb_strlen_t count); -typedef bool (*upb_bytesink_putstr_fptr)( +typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( upb_bytesink *bytesink, upb_string *str, upb_status *status); typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)( - upb_status *status, const char *fmt, va_list args); + upb_bytesink *bytesink, upb_status *status, const char *fmt, va_list args); // Vtables for the above interfaces. typedef struct { @@ -153,7 +153,7 @@ INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, const char *fmt, ...) { va_list args; va_start(args, fmt); - upb_strlen_t ret = sink->vtbl->vprintf(status, fmt, args); + upb_strlen_t ret = sink->vtbl->vprintf(sink, status, fmt, args); va_end(args); return ret; } diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index e60915f..a7a2c76 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -18,22 +18,24 @@ // possibilities for optimization/experimentation here. INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *status) { + const char *p = *ptr; uint32_t low, high = 0; uint32_t b; - b = *(*ptr++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(*ptr++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(*ptr++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(*ptr++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(*ptr++); low |= (b & 0x7f) << 28; - high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(*ptr++); high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(*ptr++); high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(*ptr++); high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(*ptr++); high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 28; + high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; upb_seterr(status, UPB_ERROR, "Unterminated varint"); return false; done: + *ptr = p; *val = ((uint64_t)high << 32) | low; return true; } @@ -50,7 +52,7 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } typedef struct { upb_msgdef *msgdef; upb_fielddef *field; - ssize_t end_offset; // For groups, 0. + size_t end_offset; // For groups, 0. } upb_decoder_frame; struct upb_decoder { @@ -73,7 +75,7 @@ struct upb_decoder { upb_string *buf; // The offset within the overall stream represented by the *beginning* of buf. - upb_strlen_t buf_stream_offset; + size_t buf_stream_offset; }; typedef struct { @@ -98,7 +100,7 @@ static upb_flow_t upb_pop(upb_decoder *d); // Constant used to signal that the submessage is a group and therefore we // don't know its end offset. This cannot be the offset of a real submessage // end because it takes at least one byte to begin a submessage. -#define UPB_GROUP_END_OFFSET -1 +#define UPB_GROUP_END_OFFSET 0 #define UPB_MAX_VARINT_ENCODED_SIZE 10 // Called only from the slow path, this function copies the next "len" bytes @@ -132,12 +134,12 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, } // Wait for end-of-submessage or end-of-buffer, whichever comes first. - ssize_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf); - ssize_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr; - ssize_t submsg_remaining = + size_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf); + size_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr; + size_t submsg_remaining = d->top->end_offset - d->buf_stream_offset - offset_in_buf; if (d->top->end_offset == UPB_GROUP_END_OFFSET || - buf_remaining > submsg_remaining) { + buf_remaining < submsg_remaining) { s->len = buf_remaining; } else { // Check that non of our subtraction overflowed. @@ -165,13 +167,16 @@ static bool upb_decode_varint_slow(upb_decoder *d, upb_dstate *s, upb_seterr(d->status, UPB_ERROR, "Varint was unterminated after 10 bytes.\n"); return false; + } else if (d->status->code == UPB_EOF && bitpos == 0) { + // Regular EOF. + return false; } else if (d->status->code == UPB_EOF && (byte & 0x80)) { upb_seterr(d->status, UPB_ERROR, "Provided data ended in the middle of a varint.\n"); return false; } else { // Success. - upb_value_setint64(val, val64); + upb_value_setraw(val, val64); return true; } } @@ -210,7 +215,7 @@ INLINE bool upb_decode_varint(upb_decoder *d, upb_dstate *s, upb_value *val) { const char *p = s->ptr; if (!upb_decode_varint_fast(&p, &val64, d->status)) return false; upb_dstate_advance(s, p - s->ptr); - upb_value_setint64(val, val64); + upb_value_setraw(val, val64); return true; } else { return upb_decode_varint_slow(d, s, val); @@ -245,6 +250,7 @@ INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str, if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen, s)) return false; } + upb_value_setstr(val, *str); return true; } @@ -259,7 +265,7 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) { } static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, - upb_strlen_t submsg_len, upb_fieldtype_t type) { + upb_value submsg_len, upb_fieldtype_t type) { d->top->field = f; d->top++; if(d->top >= d->limit) { @@ -268,7 +274,7 @@ static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, } d->top->end_offset = (type == UPB_TYPE(GROUP)) ? UPB_GROUP_END_OFFSET : - d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + submsg_len; + d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + upb_value_getint32(submsg_len); d->top->msgdef = upb_downcast_msgdef(f->def); return upb_dispatch_startsubmsg(&d->dispatcher, f); } @@ -280,6 +286,7 @@ static upb_flow_t upb_pop(upb_decoder *d) { void upb_decoder_run(upb_src *src, upb_status *status) { upb_decoder *d = (upb_decoder*)src; + d->status = status; // We put our dstate on the stack so the compiler knows they can't be changed // by external code (like when we dispatch a callback). We must be sure not // to let its address escape this source file. @@ -299,9 +306,14 @@ void upb_decoder_run(upb_src *src, upb_status *status) { if (!upb_decode_tag(d, &state, &tag)) { if (status->code == UPB_EOF && d->top == d->stack) { // Normal end-of-file. + upb_clearerr(status); CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); return; } else { + if (status->code == UPB_EOF) { + upb_seterr(status, UPB_ERROR, + "Input ended in the middle of a submessage."); + } goto err; } } @@ -352,7 +364,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK_FLOW(upb_push(d, &state, f, upb_value_getint32(val), f->type)); + CHECK_FLOW(upb_push(d, &state, f, val, f->type)); continue; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): @@ -397,9 +409,21 @@ upb_decoder *upb_decoder_new(upb_msgdef *msgdef) { upb_dispatcher_init(&d->dispatcher); d->toplevel_msgdef = msgdef; d->limit = &d->stack[UPB_MAX_NESTING]; + d->buf = NULL; return d; } +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { + d->bytesrc = bytesrc; + d->top = &d->stack[0]; + d->top->msgdef = d->toplevel_msgdef; + d->top->end_offset = SIZE_MAX; // never want to end top-level message. + upb_string_unref(d->buf); + d->buf = NULL; +} + void upb_decoder_free(upb_decoder *d) { free(d); } + +upb_src *upb_decoder_src(upb_decoder *d) { return &d->src; } diff --git a/stream/upb_stdio.c b/stream/upb_stdio.c index 7923664..8857677 100644 --- a/stream/upb_stdio.c +++ b/stream/upb_stdio.c @@ -23,6 +23,9 @@ void upb_stdio_reset(upb_stdio *stdio, FILE* file) { stdio->file = file; } + +/* upb_bytesrc methods ********************************************************/ + static upb_strlen_t upb_stdio_read(upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status) { upb_stdio *stdio = (upb_stdio*)src; @@ -50,18 +53,27 @@ static bool upb_stdio_getstr(upb_bytesrc *src, upb_string *str, return true; } -int32_t upb_stdio_put(upb_bytesink *sink, upb_string *str) { + +/* upb_bytesink methods *******************************************************/ + +upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); upb_strlen_t len = upb_string_len(str); upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); if(written < len) { - // Error or EOF. - stdio->bytesink.eof = feof(stdio->file); - if(ferror(stdio->file)) { - upb_seterr(&stdio->bytesink.status, UPB_ERROR, - "Error writing to stdio stream."); - return 0; - } + upb_seterr(status, UPB_ERROR, "Error writing to stdio stream."); + return -1; + } + return written; +} + +upb_strlen_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status, + const char *fmt, va_list args) { + upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); + upb_strlen_t written = vfprintf(stdio->file, fmt, args); + if (written < 0) { + upb_seterr(status, UPB_ERROR, "Error writing to stdio stream."); + return -1; } return written; } @@ -72,13 +84,15 @@ upb_stdio *upb_stdio_new() { upb_stdio_getstr, }; - //static upb_bytesink_vtbl bytesink_vtbl = { - // upb_stdio_put - //}; + static upb_bytesink_vtbl bytesink_vtbl = { + NULL, + upb_stdio_putstr, + upb_stdio_vprintf + }; upb_stdio *stdio = malloc(sizeof(*stdio)); upb_bytesrc_init(&stdio->bytesrc, &bytesrc_vtbl); - //upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); + upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); return stdio; } diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 7025494..531da12 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -90,7 +90,7 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, case UPB_TYPE(STRING): case UPB_TYPE(BYTES): // TODO: escaping. - CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(": \""), &p->status)); + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); CHECK(upb_bytesink_putstr(p->bytesink, upb_value_getstr(val), &p->status)) CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); break; @@ -107,7 +107,7 @@ static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f, upb_textprinter *p = _p; upb_textprinter_startfield(p, f); p->indent_depth++; - upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" {"), &p->status); + upb_bytesink_putstr(p->bytesink, UPB_STRLIT("{"), &p->status); if(!p->single_line) upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status); return UPB_CONTINUE; } diff --git a/tests/test_decoder.c b/tests/test_decoder.c index 0e6f19c..ed5a77e 100644 --- a/tests/test_decoder.c +++ b/tests/test_decoder.c @@ -16,13 +16,19 @@ int main() { upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds)); upb_decoder_reset(d, upb_stdio_bytesrc(in)); upb_textprinter *p = upb_textprinter_new(); - upb_textprinter_reset(p, upb_stdio_bytesink(out), false); + upb_handlers handlers; + upb_handlers_init(&handlers); + upb_textprinter_reset(p, &handlers, upb_stdio_bytesink(out), false); + upb_src *src = upb_decoder_src(d); + upb_src_sethandlers(src, &handlers); upb_status status = UPB_STATUS_INIT; - upb_streamdata(upb_decoder_src(d), upb_textprinter_sink(p), &status); + upb_src_run(src, &status); + upb_printerr(&status); assert(upb_ok(&status)); + upb_stdio_free(in); upb_stdio_free(out); upb_decoder_free(d); -- cgit v1.2.3