From 2c86e7eddb23c92cc83391c3f751c72237ec3759 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Sat, 26 Mar 2011 18:14:01 -0700 Subject: Small semantics changes in the decoder. Simplified some of the semantics around the decoder's data structures, in anticipation of sharing them between the regular C decoder and a JIT-ted decoder. --- src/upb_decoder.c | 59 +++++++++++++++++++++++++++++++++---------------------- src/upb_decoder.h | 16 ++++++++------- src/upb_msg.c | 5 +++-- src/upb_stream.c | 18 ++++++++--------- src/upb_stream.h | 10 ++++++---- 5 files changed, 62 insertions(+), 46 deletions(-) diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 6c23c22..1b9b5f8 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -40,7 +40,7 @@ INLINE void upb_decoder_advance(upb_decoder *d, size_t len) { INLINE size_t upb_decoder_offset(upb_decoder *d) { size_t offset = d->buf_stream_offset; - if (d->buf) offset += (d->ptr - upb_string_getrobuf(d->buf)); + if (d->buf) offset += (d->ptr - d->buf); return offset; } @@ -49,9 +49,9 @@ INLINE size_t upb_decoder_bufleft(upb_decoder *d) { } INLINE void upb_dstate_setmsgend(upb_decoder *d) { - size_t end_offset = d->dispatcher.top->end_offset; - d->submsg_end = (end_offset == UPB_GROUP_END_OFFSET) ? (void*)UINTPTR_MAX : - d->ptr + (end_offset - upb_decoder_offset(d)); + uint32_t end_offset = d->dispatcher.top->end_offset; + d->submsg_end = (end_offset == UINT32_MAX) ? + (void*)UINTPTR_MAX : d->buf + end_offset; } // Called only from the slow path, this function copies the next "len" bytes @@ -68,11 +68,21 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted) { } // Get next buffer. - if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); - upb_string_recycle(&d->buf); - if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; - d->ptr = upb_string_getrobuf(d->buf); - d->end = d->ptr + upb_string_len(d->buf); + int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1; + upb_string_recycle(&d->bufstr); + if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) { + d->buf = NULL; + return false; + } + if (last_buf_len != -1) { + d->buf_stream_offset += last_buf_len; + for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f) + if (f->end_offset != UINT32_MAX) + f->end_offset -= last_buf_len; + } + d->buf = upb_string_getrobuf(d->bufstr); + d->ptr = upb_string_getrobuf(d->bufstr); + d->end = d->buf + upb_string_len(d->bufstr); } } @@ -165,7 +175,7 @@ INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, uint32_t strlen = upb_value_getint32(*val); if (upb_decoder_bufleft(d) >= strlen) { // Common (fast) case. - upb_string_substr(*str, d->buf, d->ptr - upb_string_getrobuf(d->buf), strlen); + upb_string_substr(*str, d->bufstr, d->ptr - d->buf, strlen); upb_decoder_advance(d, strlen); } else { if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen)) @@ -196,17 +206,14 @@ static upb_flow_t upb_decoder_skipsubmsg(upb_decoder *d) { fprintf(stderr, "upb_decoder: Can't skip groups yet.\n"); abort(); } - upb_decoder_advance(d, d->dispatcher.top->end_offset - d->buf_stream_offset - - (d->ptr - upb_string_getrobuf(d->buf))); + upb_decoder_advance(d, d->dispatcher.top->end_offset - (d->ptr - d->buf)); upb_pop(d); return UPB_CONTINUE; } static upb_flow_t upb_push(upb_decoder *d, upb_handlers_fieldent *f, - upb_value submsg_len) { - upb_flow_t flow = upb_dispatch_startsubmsg(&d->dispatcher, f, - (f->type == UPB_TYPE(GROUP)) ? UPB_GROUP_END_OFFSET : - upb_decoder_offset(d) + upb_value_getint32(submsg_len)); + uint32_t end_offset) { + upb_flow_t flow = upb_dispatch_startsubmsg(&d->dispatcher, f, end_offset); upb_dstate_setmsgend(d); return flow; } @@ -222,7 +229,7 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) { } #define CHECK(expr) if (!expr) { assert(!upb_ok(status)); goto err; } - if (upb_dispatch_startmsg(&d->dispatcher, d->closure) != UPB_CONTINUE) goto err; + if (upb_dispatch_startmsg(&d->dispatcher) != UPB_CONTINUE) goto err; // Main loop: executed once per tag/field pair. while(1) { @@ -272,7 +279,8 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) { case UPB_WIRE_TYPE_START_GROUP: break; // Nothing to do now, below we will push appropriately. case UPB_WIRE_TYPE_END_GROUP: - if(d->dispatcher.top->end_offset != UPB_GROUP_END_OFFSET) { + // Strictly speaking we should also check the field number here. + if(d->dispatcher.top->f->type != UPB_TYPE(GROUP)) { upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } @@ -311,9 +319,11 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) { // If this is not true we are losing data. But the main protobuf library // doesn't check this, and it would slow us down, so pass for now. switch (f->type) { - case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK_FLOW(upb_push(d, f, val)); + CHECK_FLOW(upb_push(d, f, UINT32_MAX)); + continue; // We have no value to dispatch. + case UPB_TYPE(MESSAGE): + CHECK_FLOW(upb_push(d, f, upb_value_getuint32(val) + (d->ptr - d->buf))); continue; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): @@ -343,15 +353,16 @@ err: } void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) { - upb_dispatcher_init(&d->dispatcher, handlers, UPB_GROUP_END_OFFSET); + upb_dispatcher_init(&d->dispatcher, handlers); + d->bufstr = NULL; d->buf = NULL; d->tmp = NULL; } void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) { + upb_dispatcher_reset(&d->dispatcher, closure, UINT32_MAX); d->bytesrc = bytesrc; - d->closure = closure; - upb_dispatcher_reset(&d->dispatcher); + d->buf = NULL; d->ptr = NULL; d->end = NULL; // Force a buffer pull. d->submsg_end = (void*)0x1; // But don't let end-of-message get triggered. @@ -360,6 +371,6 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) { void upb_decoder_uninit(upb_decoder *d) { upb_dispatcher_uninit(&d->dispatcher); - upb_string_unref(d->buf); + upb_string_unref(d->bufstr); upb_string_unref(d->tmp); } diff --git a/src/upb_decoder.h b/src/upb_decoder.h index 32a989a..bb54930 100644 --- a/src/upb_decoder.h +++ b/src/upb_decoder.h @@ -34,8 +34,8 @@ struct _upb_decoder { // Dispatcher to which we push parsed data. upb_dispatcher dispatcher; - // Current input buffer. - upb_string *buf; + // String to hold our input buffer; is only active if d->buf != NULL. + upb_string *bufstr; // Temporary string for passing string data to callbacks. upb_string *tmp; @@ -43,18 +43,20 @@ struct _upb_decoder { // The offset within the overall stream represented by the *beginning* of buf. size_t buf_stream_offset; - // Our current position in the data buffer. - const char *ptr; + // Pointer to the beginning of our current data buffer, or NULL if none. + const char *buf; // End of this buffer, relative to *ptr. const char *end; + // Members which may also be written by the JIT: + + // Our current position in the data buffer. + const char *ptr; + // End of this submessage, relative to *ptr. const char *submsg_end; - // The closure that was passed by the caller for the top-level message. - void *closure; - // Where we will store any errors that occur. upb_status *status; }; diff --git a/src/upb_msg.c b/src/upb_msg.c index 1705b35..6fc321e 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -233,9 +233,10 @@ static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md, void upb_msg_runhandlers(upb_msg *msg, upb_msgdef *md, upb_handlers *h, void *closure, upb_status *status) { upb_dispatcher d; - upb_dispatcher_init(&d, h, 0); + upb_dispatcher_init(&d, h); + upb_dispatcher_reset(&d, closure, 0); - upb_dispatch_startmsg(&d, closure); + upb_dispatch_startmsg(&d); upb_msg_dispatch(msg, md, &d); upb_dispatch_endmsg(&d, status); diff --git a/src/upb_stream.c b/src/upb_stream.c index 3b0119c..aebdb42 100644 --- a/src/upb_stream.c +++ b/src/upb_stream.c @@ -222,7 +222,8 @@ void upb_handlers_pop(upb_handlers *h, upb_fielddef *f) { /* upb_dispatcher *************************************************************/ static upb_handlers_fieldent toplevel_f = { - false, 0, 0, // The one value that is actually read + false, UPB_TYPE(GROUP), + 0, // msgent_index #ifdef NDEBUG {{0}}, #else @@ -230,17 +231,15 @@ static upb_handlers_fieldent toplevel_f = { #endif {NULL}, NULL}; -void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h, - size_t top_end_offset) { +void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h) { d->handlers = h; for (int i = 0; i < h->msgs_len; i++) upb_inttable_compact(&h->msgs[i].fieldtab); - d->stack[0].end_offset = top_end_offset; d->stack[0].f = &toplevel_f; upb_status_init(&d->status); } -void upb_dispatcher_reset(upb_dispatcher *d) { +void upb_dispatcher_reset(upb_dispatcher *d, void *top_closure, uint32_t top_end_offset) { d->msgent = &d->handlers->msgs[0]; d->dispatch_table = &d->msgent->fieldtab; d->current_depth = 0; @@ -248,6 +247,8 @@ void upb_dispatcher_reset(upb_dispatcher *d) { d->noframe_depth = INT_MAX; d->delegated_depth = 0; d->top = d->stack; + d->top->closure = top_closure; + d->top->end_offset = top_end_offset; d->limit = &d->stack[UPB_MAX_NESTING]; } @@ -261,9 +262,8 @@ void upb_dispatcher_break(upb_dispatcher *d) { d->noframe_depth = d->current_depth; } -upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d, void *closure) { - d->top->closure = closure; - upb_flow_t flow = d->msgent->startmsg(closure); +upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d) { + upb_flow_t flow = d->msgent->startmsg(d->top->closure); if (flow != UPB_CONTINUE) { d->noframe_depth = d->current_depth + 1; d->skip_depth = (flow == UPB_BREAK) ? d->delegated_depth : d->current_depth; @@ -304,7 +304,7 @@ upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, d->top->closure = sflow.closure; d->msgent = upb_handlers_getmsgent(d->handlers, f); d->dispatch_table = &d->msgent->fieldtab; - return upb_dispatch_startmsg(d, d->top->closure); + return upb_dispatch_startmsg(d); } upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { diff --git a/src/upb_stream.h b/src/upb_stream.h index 783f6c8..0c75acd 100644 --- a/src/upb_stream.h +++ b/src/upb_stream.h @@ -280,7 +280,9 @@ upb_handlers_fieldent *upb_handlers_lookup(upb_inttable *dispatch_table, upb_fie typedef struct { upb_handlers_fieldent *f; void *closure; - size_t end_offset; // For groups, 0. + // Relative to the beginning of this buffer. + // For groups and the top-level: UINT32_MAX. + uint32_t end_offset; } upb_dispatcher_frame; typedef struct { @@ -322,11 +324,11 @@ INLINE bool upb_dispatcher_noframe(upb_dispatcher *d) { typedef upb_handlers_fieldent upb_dispatcher_field; -void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h, size_t top_end_offset); -void upb_dispatcher_reset(upb_dispatcher *d); +void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h); +void upb_dispatcher_reset(upb_dispatcher *d, void *top_closure, uint32_t top_end_offset); void upb_dispatcher_uninit(upb_dispatcher *d); -upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d, void *closure); +upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status); // Looks up a field by number for the current message. -- cgit v1.2.3