From 6a1f3a66939308668ab8dce0d195afec16e02af9 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 14 Jul 2011 23:15:00 -0700 Subject: Major refactoring: upb_string is gone in favor of upb_strref. --- src/upb_decoder.c | 276 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 165 insertions(+), 111 deletions(-) (limited to 'src/upb_decoder.c') diff --git a/src/upb_decoder.c b/src/upb_decoder.c index a44b561..fed48af 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -8,6 +8,7 @@ #include #include #include +#include "bswap.h" #include "upb_bytestream.h" #include "upb_decoder.h" #include "upb_varint.h" @@ -38,83 +39,97 @@ static void upb_decoder_exit2(void *_d) { upb_decoder *d = _d; upb_decoder_exit(d); } +static void upb_decoder_abort(upb_decoder *d, const char *msg) { + upb_status_setf(d->status, UPB_ERROR, msg); + upb_decoder_exit(d); +} /* Decoding/Buffering of wire types *******************************************/ -#define UPB_MAX_VARINT_ENCODED_SIZE 10 - -static void upb_decoder_advance(upb_decoder *d, size_t len) { d->ptr += len; } static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; } +static void upb_decoder_advance(upb_decoder *d, size_t len) { + assert((size_t)(d->end - d->ptr) >= len); + d->ptr += len; +} size_t upb_decoder_offset(upb_decoder *d) { - size_t offset = d->buf_stream_offset; - if (d->buf) offset += (d->ptr - d->buf); + size_t offset = d->bufstart_ofs; + if (d->ptr) offset += (d->ptr - d->buf); return offset; } static void upb_decoder_setmsgend(upb_decoder *d) { - uint32_t end = d->dispatcher.top->end_offset; - d->submsg_end = (end == UPB_NONDELIMITED) ? (void*)UINTPTR_MAX : d->buf + end; + upb_dispatcher_frame *f = d->dispatcher.top; + size_t delimlen = f->end_ofs - d->bufstart_ofs; + size_t buflen = d->end - d->buf; + if (f->end_ofs != UINT64_MAX && delimlen <= buflen) { + d->delim_end = (uintptr_t)(d->buf + delimlen); + } else { + // Buffers must not run up against the end of memory. + assert((uintptr_t)d->end < UINTPTR_MAX); + d->delim_end = UINTPTR_MAX; + } } // Pulls the next buffer from the bytesrc. Should be called only when the // current buffer is completely empty. -static void upb_pullbuf(upb_decoder *d, bool need) { +static bool upb_trypullbuf(upb_decoder *d) { assert(upb_decoder_bufleft(d) == 0); - int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1; - upb_string_recycle(&d->bufstr); - if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) { - d->buf = NULL; - d->end = NULL; - if (need) upb_seterr(d->status, UPB_ERROR, "Unexpected EOF."); - upb_decoder_exit(d); - } - if (last_buf_len != -1) { - d->buf_stream_offset += last_buf_len; - for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f) - if (f->end_offset != UPB_NONDELIMITED) - f->end_offset -= last_buf_len; + if (d->bufend_ofs == d->refend_ofs) { + d->refend_ofs += upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status); + if (!upb_ok(d->status)) { + d->ptr = NULL; + d->end = NULL; + if (upb_iseof(d->status)) return false; + upb_decoder_exit(d); + } } - d->buf = upb_string_getrobuf(d->bufstr); - d->ptr = upb_string_getrobuf(d->bufstr); - d->end = d->buf + upb_string_len(d->bufstr); + d->bufstart_ofs = d->bufend_ofs; + size_t len; + d->buf = upb_bytesrc_getptr(d->bytesrc, d->bufstart_ofs, &len); + assert(len > 0); + d->bufend_ofs = d->bufstart_ofs + len; + d->ptr = d->buf; + d->end = d->buf + len; +#ifdef UPB_USE_JIT_X64 d->jit_end = d->end - 20; - upb_string_recycle(&d->tmp); - upb_string_substr(d->tmp, d->bufstr, 0, 0); +#endif upb_decoder_setmsgend(d); + return true; } -// Called only from the slow path, this function copies the next "len" bytes -// from the stream to "data", adjusting the decoder state appropriately. -NOINLINE void upb_getbuf(upb_decoder *d, void *data, size_t bytes, bool need) { - while (1) { - size_t to_copy = UPB_MIN(bytes, upb_decoder_bufleft(d)); - memcpy(data, d->ptr, to_copy); - upb_decoder_advance(d, to_copy); - bytes -= to_copy; - if (bytes == 0) return; - upb_pullbuf(d, need); +static void upb_pullbuf(upb_decoder *d) { + if (!upb_trypullbuf(d)) upb_decoder_abort(d, "Unexpected EOF"); +} + +void upb_decoder_commit(upb_decoder *d) { + d->completed_ptr = d->ptr; + if (d->refstart_ofs < d->bufstart_ofs) { + // Drop our ref on the previous buf's region. + upb_bytesrc_refregion(d->bytesrc, d->bufstart_ofs, d->refend_ofs); + upb_bytesrc_unrefregion(d->bytesrc, d->refstart_ofs, d->refend_ofs); + d->refstart_ofs = d->bufstart_ofs; } } -NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d, bool need) { +NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) { uint8_t byte = 0x80; uint64_t u64 = 0; int bitpos; + const char *ptr = d->ptr; for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) { - upb_getbuf(d, &byte, 1, need); - u64 |= ((uint64_t)byte & 0x7F) << bitpos; - } - - if(bitpos == 70 && (byte & 0x80)) { - upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n"); - upb_decoder_exit(d); + if (upb_decoder_bufleft(d) == 0) { + upb_pullbuf(d); + ptr = d->ptr; + } + u64 |= ((uint64_t)(byte = *ptr++) & 0x7F) << bitpos; } + if(bitpos == 70 && (byte & 0x80)) upb_decoder_abort(d, "Unterminated varint"); return u64; } // For tags and delimited lengths, which must be <=32bit and are usually small. -FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d, bool need) { +FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) { const char *p = d->ptr; uint32_t ret; uint64_t u64; @@ -125,11 +140,8 @@ FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d, bool need) { ret |= (*p & 0x7f) << 7; if ((*(p++) & 0x80) == 0) goto done; // likely slow: - u64 = upb_decode_varint_slow(d, need); - if (u64 > 0xffffffff) { - upb_seterr(d->status, UPB_ERROR, "Unterminated 32-bit varint.\n"); - upb_decoder_exit(d); - } + u64 = upb_decode_varint_slow(d); + if (u64 > 0xffffffff) upb_decoder_abort(d, "Unterminated 32-bit varint"); ret = (uint32_t)u64; p = d->ptr; // Turn the next line into a nop. done: @@ -137,57 +149,90 @@ done: return ret; } +FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) { + if (upb_decoder_bufleft(d) == 0) { + // Check for our two normal end-of-message conditions. + if (d->bufend_ofs == d->end_ofs) return false; + if (!upb_trypullbuf(d)) return false; + } + *val = upb_decode_varint32(d); + return true; +} + FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) { - if (upb_decoder_bufleft(d) >= 16) { - // Common (fast) case. + if (upb_decoder_bufleft(d) >= 10) { + // Fast case. upb_decoderet r = upb_vdecode_fast(d->ptr); - if (r.p == NULL) { - upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n"); - upb_decoder_exit(d); - } + if (r.p == NULL) upb_decoder_abort(d, "Unterminated varint"); upb_decoder_advance(d, r.p - d->ptr); return r.val; - } else { - return upb_decode_varint_slow(d, true); + } else if (upb_decoder_bufleft(d) > 0) { + // Intermediate case -- worth it? + char tmpbuf[10]; + memset(tmpbuf, 0x80, 10); + memcpy(tmpbuf, d->ptr, upb_decoder_bufleft(d)); + upb_decoderet r = upb_vdecode_fast(tmpbuf); + if (r.p != NULL) { + upb_decoder_advance(d, r.p - tmpbuf); + return r.val; + } } + // Slow case -- varint spans buffer seam. + return upb_decode_varint_slow(d); } -FORCEINLINE void upb_decode_fixed(upb_decoder *d, void *val, size_t bytes) { +FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) { if (upb_decoder_bufleft(d) >= bytes) { - // Common (fast) case. - memcpy(val, d->ptr, bytes); + // Fast case. + memcpy(buf, d->ptr, bytes); upb_decoder_advance(d, bytes); } else { - upb_getbuf(d, val, bytes, true); + // Slow case. + size_t read = 0; + while (read < bytes) { + size_t avail = upb_decoder_bufleft(d); + memcpy(buf + read, d->ptr, avail); + upb_decoder_advance(d, avail); + read += avail; + } } } FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) { uint32_t u32; - upb_decode_fixed(d, &u32, sizeof(uint32_t)); - return u32; + upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t)); + return le32toh(u32); } FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) { uint64_t u64; - upb_decode_fixed(d, &u64, sizeof(uint64_t)); - return u64; + upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t)); + return le64toh(u64); } -INLINE upb_string *upb_decode_string(upb_decoder *d) { - upb_string_recycle(&d->tmp); - uint32_t strlen = upb_decode_varint32(d, true); +INLINE upb_strref *upb_decode_string(upb_decoder *d) { + uint32_t strlen = upb_decode_varint32(d); + d->strref.stream_offset = upb_decoder_offset(d); + d->strref.len = strlen; + if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d); if (upb_decoder_bufleft(d) >= strlen) { - // Common (fast) case. - upb_string_substr(d->tmp, d->bufstr, d->ptr - d->buf, strlen); + // Fast case. + d->strref.ptr = d->ptr; upb_decoder_advance(d, strlen); } else { - upb_getbuf(d, upb_string_getrwbuf(d->tmp, strlen), strlen, true); + // Slow case. + while (1) { + size_t consume = UPB_MIN(upb_decoder_bufleft(d), strlen); + upb_decoder_advance(d, consume); + strlen -= consume; + if (strlen == 0) break; + upb_pullbuf(d); + } } - return d->tmp; + return &d->strref; } INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) { - upb_dispatch_startsubmsg(&d->dispatcher, f)->end_offset = end; + upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end; upb_decoder_setmsgend(d); } @@ -224,7 +269,7 @@ T(DOUBLE, fixed64, double, upb_asdouble) T(FLOAT, fixed32, float, upb_asfloat) T(SINT32, varint, int32, upb_zzdec_32) T(SINT64, varint, int64, upb_zzdec_64) -T(STRING, string, str, upb_string*) +T(STRING, string, strref, upb_strref*) static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) { upb_push(d, f, UPB_NONDELIMITED); @@ -235,28 +280,24 @@ static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) { upb_decoder_setmsgend(d); } static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) { - upb_push(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf)); + upb_push(d, f, upb_decode_varint32(d) + (d->ptr - d->buf)); } /* The main decoding loop *****************************************************/ -// Called when a user callback returns something other than UPB_CONTINUE. -// This should unwind one or more stack frames, skipping the corresponding -// data in the input. +static void upb_decoder_checkdelim(upb_decoder *d) { + while ((uintptr_t)d->ptr >= d->delim_end) { + if ((uintptr_t)d->ptr > d->delim_end) + upb_decoder_abort(d, "Bad submessage end"); -static void upb_delimend(upb_decoder *d) { - if (d->ptr > d->submsg_end) { - upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); - upb_decoder_exit(d); - } - - if (d->dispatcher.top->is_sequence) { - upb_dispatch_endseq(&d->dispatcher); - } else { - upb_dispatch_endsubmsg(&d->dispatcher); + if (d->dispatcher.top->is_sequence) { + upb_dispatch_endseq(&d->dispatcher); + } else { + upb_dispatch_endsubmsg(&d->dispatcher); + } + upb_decoder_setmsgend(d); } - upb_decoder_setmsgend(d); } static void upb_decoder_enterjit(upb_decoder *d) { @@ -273,7 +314,8 @@ static void upb_decoder_enterjit(upb_decoder *d) { INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { while (1) { - uint32_t tag = upb_decode_varint32(d, false); + uint32_t tag; + if (!upb_trydecode_varint32(d, &tag)) return NULL; upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag); // There are no explicit "startseq" or "endseq" markers in protobuf @@ -287,8 +329,8 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { // TODO: support packed. assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) || (tag & 0x7) != UPB_WIRE_TYPE_DELIMITED); - uint32_t end = d->dispatcher.top->end_offset; - upb_dispatch_startseq(&d->dispatcher, f)->end_offset = end; + uint32_t end = d->dispatcher.top->end_ofs; + upb_dispatch_startseq(&d->dispatcher, f)->end_ofs = end; upb_decoder_setmsgend(d); } if (f) return f; @@ -299,11 +341,13 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break; case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break; case UPB_WIRE_TYPE_DELIMITED: - upb_decoder_advance(d, upb_decode_varint32(d, true)); - break; + upb_decoder_advance(d, upb_decode_varint32(d)); break; + default: + upb_decoder_abort(d, "Invavlid wire type"); } // TODO: deliver to unknown field callback. - while (d->ptr >= d->submsg_end) upb_delimend(d); + upb_decoder_commit(d); + upb_decoder_checkdelim(d); } } @@ -311,11 +355,11 @@ void upb_decoder_onexit(upb_decoder *d) { if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher); if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) { // Normal end-of-file. - upb_clearerr(d->status); + upb_status_clear(d->status); upb_dispatch_endmsg(&d->dispatcher, d->status); } else { if (d->status->code == UPB_EOF) - upb_seterr(d->status, UPB_ERROR, "Input ended mid-submessage."); + upb_status_setf(d->status, UPB_ERROR, "Input ended mid-submessage."); } } @@ -325,26 +369,32 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) { return; } d->status = status; - upb_pullbuf(d, true); upb_dispatch_startmsg(&d->dispatcher); while(1) { // Main loop: executed once per tag/field pair. - while (d->ptr >= d->submsg_end) upb_delimend(d); + upb_decoder_checkdelim(d); upb_decoder_enterjit(d); // if (!d->dispatcher.top->is_packed) upb_fhandlers *f = upb_decode_tag(d); + if (!f) upb_decoder_exit2(d); f->decode(d, f); + upb_decoder_commit(d); } } static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top, upb_dispatcher_frame *bottom) { (void)top; + (void)bottom; + (void)_d; +#if 0 upb_decoder *d = _d; + // TODO if (bottom->end_offset == UPB_NONDELIMITED) { // TODO: support skipping groups. abort(); } - d->ptr = d->buf + bottom->end_offset; + d->ptr = d->buf.ptr + bottom->end_offset; +#endif } void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) { @@ -354,10 +404,6 @@ void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) { d->jit_code = NULL; if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d); #endif - d->bufstr = NULL; - d->tmp = NULL; - upb_string_recycle(&d->tmp); - // Set function pointers for each field's decode function. for (int i = 0; i < handlers->msgs_len; i++) { upb_mhandlers *m = handlers->msgs[i]; @@ -396,19 +442,27 @@ void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m) { upb_handlers_unref(h); } -void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) { - upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UPB_NONDELIMITED; +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, uint64_t start_ofs, + uint64_t end_ofs, void *closure) { + upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure); + f->end_ofs = end_ofs; + d->end_ofs = end_ofs; + d->refstart_ofs = start_ofs; + d->refend_ofs = start_ofs; + d->bufstart_ofs = start_ofs; + d->bufend_ofs = start_ofs; d->bytesrc = bytesrc; d->buf = NULL; d->ptr = NULL; d->end = NULL; // Force a buffer pull. - d->submsg_end = (void*)0x1; // But don't let end-of-message get triggered. - d->buf_stream_offset = 0; +#ifdef UPB_USE_JIT_X64 + d->jit_end = NULL; +#endif + d->delim_end = UINTPTR_MAX; // But don't let end-of-message get triggered. + d->strref.bytesrc = bytesrc; } void upb_decoder_uninit(upb_decoder *d) { - upb_string_unref(d->bufstr); - upb_string_unref(d->tmp); #ifdef UPB_USE_JIT_X64 if (d->dispatcher.handlers->should_jit) upb_decoder_freejit(d); #endif -- cgit v1.2.3