summaryrefslogtreecommitdiff
path: root/src/upb_decoder.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/upb_decoder.c')
-rw-r--r--src/upb_decoder.c276
1 files changed, 165 insertions, 111 deletions
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index a44b561..fed48af 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -8,6 +8,7 @@
#include <inttypes.h>
#include <stddef.h>
#include <stdlib.h>
+#include "bswap.h"
#include "upb_bytestream.h"
#include "upb_decoder.h"
#include "upb_varint.h"
@@ -38,83 +39,97 @@ static void upb_decoder_exit2(void *_d) {
upb_decoder *d = _d;
upb_decoder_exit(d);
}
+static void upb_decoder_abort(upb_decoder *d, const char *msg) {
+ upb_status_setf(d->status, UPB_ERROR, msg);
+ upb_decoder_exit(d);
+}
/* Decoding/Buffering of wire types *******************************************/
-#define UPB_MAX_VARINT_ENCODED_SIZE 10
-
-static void upb_decoder_advance(upb_decoder *d, size_t len) { d->ptr += len; }
static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
+static void upb_decoder_advance(upb_decoder *d, size_t len) {
+ assert((size_t)(d->end - d->ptr) >= len);
+ d->ptr += len;
+}
size_t upb_decoder_offset(upb_decoder *d) {
- size_t offset = d->buf_stream_offset;
- if (d->buf) offset += (d->ptr - d->buf);
+ size_t offset = d->bufstart_ofs;
+ if (d->ptr) offset += (d->ptr - d->buf);
return offset;
}
static void upb_decoder_setmsgend(upb_decoder *d) {
- uint32_t end = d->dispatcher.top->end_offset;
- d->submsg_end = (end == UPB_NONDELIMITED) ? (void*)UINTPTR_MAX : d->buf + end;
+ upb_dispatcher_frame *f = d->dispatcher.top;
+ size_t delimlen = f->end_ofs - d->bufstart_ofs;
+ size_t buflen = d->end - d->buf;
+ if (f->end_ofs != UINT64_MAX && delimlen <= buflen) {
+ d->delim_end = (uintptr_t)(d->buf + delimlen);
+ } else {
+ // Buffers must not run up against the end of memory.
+ assert((uintptr_t)d->end < UINTPTR_MAX);
+ d->delim_end = UINTPTR_MAX;
+ }
}
// Pulls the next buffer from the bytesrc. Should be called only when the
// current buffer is completely empty.
-static void upb_pullbuf(upb_decoder *d, bool need) {
+static bool upb_trypullbuf(upb_decoder *d) {
assert(upb_decoder_bufleft(d) == 0);
- int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1;
- upb_string_recycle(&d->bufstr);
- if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) {
- d->buf = NULL;
- d->end = NULL;
- if (need) upb_seterr(d->status, UPB_ERROR, "Unexpected EOF.");
- upb_decoder_exit(d);
- }
- if (last_buf_len != -1) {
- d->buf_stream_offset += last_buf_len;
- for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f)
- if (f->end_offset != UPB_NONDELIMITED)
- f->end_offset -= last_buf_len;
+ if (d->bufend_ofs == d->refend_ofs) {
+ d->refend_ofs += upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status);
+ if (!upb_ok(d->status)) {
+ d->ptr = NULL;
+ d->end = NULL;
+ if (upb_iseof(d->status)) return false;
+ upb_decoder_exit(d);
+ }
}
- d->buf = upb_string_getrobuf(d->bufstr);
- d->ptr = upb_string_getrobuf(d->bufstr);
- d->end = d->buf + upb_string_len(d->bufstr);
+ d->bufstart_ofs = d->bufend_ofs;
+ size_t len;
+ d->buf = upb_bytesrc_getptr(d->bytesrc, d->bufstart_ofs, &len);
+ assert(len > 0);
+ d->bufend_ofs = d->bufstart_ofs + len;
+ d->ptr = d->buf;
+ d->end = d->buf + len;
+#ifdef UPB_USE_JIT_X64
d->jit_end = d->end - 20;
- upb_string_recycle(&d->tmp);
- upb_string_substr(d->tmp, d->bufstr, 0, 0);
+#endif
upb_decoder_setmsgend(d);
+ return true;
}
-// Called only from the slow path, this function copies the next "len" bytes
-// from the stream to "data", adjusting the decoder state appropriately.
-NOINLINE void upb_getbuf(upb_decoder *d, void *data, size_t bytes, bool need) {
- while (1) {
- size_t to_copy = UPB_MIN(bytes, upb_decoder_bufleft(d));
- memcpy(data, d->ptr, to_copy);
- upb_decoder_advance(d, to_copy);
- bytes -= to_copy;
- if (bytes == 0) return;
- upb_pullbuf(d, need);
+static void upb_pullbuf(upb_decoder *d) {
+ if (!upb_trypullbuf(d)) upb_decoder_abort(d, "Unexpected EOF");
+}
+
+void upb_decoder_commit(upb_decoder *d) {
+ d->completed_ptr = d->ptr;
+ if (d->refstart_ofs < d->bufstart_ofs) {
+ // Drop our ref on the previous buf's region.
+ upb_bytesrc_refregion(d->bytesrc, d->bufstart_ofs, d->refend_ofs);
+ upb_bytesrc_unrefregion(d->bytesrc, d->refstart_ofs, d->refend_ofs);
+ d->refstart_ofs = d->bufstart_ofs;
}
}
-NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d, bool need) {
+NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) {
uint8_t byte = 0x80;
uint64_t u64 = 0;
int bitpos;
+ const char *ptr = d->ptr;
for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
- upb_getbuf(d, &byte, 1, need);
- u64 |= ((uint64_t)byte & 0x7F) << bitpos;
- }
-
- if(bitpos == 70 && (byte & 0x80)) {
- upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
- upb_decoder_exit(d);
+ if (upb_decoder_bufleft(d) == 0) {
+ upb_pullbuf(d);
+ ptr = d->ptr;
+ }
+ u64 |= ((uint64_t)(byte = *ptr++) & 0x7F) << bitpos;
}
+ if(bitpos == 70 && (byte & 0x80)) upb_decoder_abort(d, "Unterminated varint");
return u64;
}
// For tags and delimited lengths, which must be <=32bit and are usually small.
-FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d, bool need) {
+FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) {
const char *p = d->ptr;
uint32_t ret;
uint64_t u64;
@@ -125,11 +140,8 @@ FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d, bool need) {
ret |= (*p & 0x7f) << 7;
if ((*(p++) & 0x80) == 0) goto done; // likely
slow:
- u64 = upb_decode_varint_slow(d, need);
- if (u64 > 0xffffffff) {
- upb_seterr(d->status, UPB_ERROR, "Unterminated 32-bit varint.\n");
- upb_decoder_exit(d);
- }
+ u64 = upb_decode_varint_slow(d);
+ if (u64 > 0xffffffff) upb_decoder_abort(d, "Unterminated 32-bit varint");
ret = (uint32_t)u64;
p = d->ptr; // Turn the next line into a nop.
done:
@@ -137,57 +149,90 @@ done:
return ret;
}
+FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) {
+ if (upb_decoder_bufleft(d) == 0) {
+ // Check for our two normal end-of-message conditions.
+ if (d->bufend_ofs == d->end_ofs) return false;
+ if (!upb_trypullbuf(d)) return false;
+ }
+ *val = upb_decode_varint32(d);
+ return true;
+}
+
FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) {
- if (upb_decoder_bufleft(d) >= 16) {
- // Common (fast) case.
+ if (upb_decoder_bufleft(d) >= 10) {
+ // Fast case.
upb_decoderet r = upb_vdecode_fast(d->ptr);
- if (r.p == NULL) {
- upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
- upb_decoder_exit(d);
- }
+ if (r.p == NULL) upb_decoder_abort(d, "Unterminated varint");
upb_decoder_advance(d, r.p - d->ptr);
return r.val;
- } else {
- return upb_decode_varint_slow(d, true);
+ } else if (upb_decoder_bufleft(d) > 0) {
+ // Intermediate case -- worth it?
+ char tmpbuf[10];
+ memset(tmpbuf, 0x80, 10);
+ memcpy(tmpbuf, d->ptr, upb_decoder_bufleft(d));
+ upb_decoderet r = upb_vdecode_fast(tmpbuf);
+ if (r.p != NULL) {
+ upb_decoder_advance(d, r.p - tmpbuf);
+ return r.val;
+ }
}
+ // Slow case -- varint spans buffer seam.
+ return upb_decode_varint_slow(d);
}
-FORCEINLINE void upb_decode_fixed(upb_decoder *d, void *val, size_t bytes) {
+FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
if (upb_decoder_bufleft(d) >= bytes) {
- // Common (fast) case.
- memcpy(val, d->ptr, bytes);
+ // Fast case.
+ memcpy(buf, d->ptr, bytes);
upb_decoder_advance(d, bytes);
} else {
- upb_getbuf(d, val, bytes, true);
+ // Slow case.
+ size_t read = 0;
+ while (read < bytes) {
+ size_t avail = upb_decoder_bufleft(d);
+ memcpy(buf + read, d->ptr, avail);
+ upb_decoder_advance(d, avail);
+ read += avail;
+ }
}
}
FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) {
uint32_t u32;
- upb_decode_fixed(d, &u32, sizeof(uint32_t));
- return u32;
+ upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t));
+ return le32toh(u32);
}
FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
uint64_t u64;
- upb_decode_fixed(d, &u64, sizeof(uint64_t));
- return u64;
+ upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t));
+ return le64toh(u64);
}
-INLINE upb_string *upb_decode_string(upb_decoder *d) {
- upb_string_recycle(&d->tmp);
- uint32_t strlen = upb_decode_varint32(d, true);
+INLINE upb_strref *upb_decode_string(upb_decoder *d) {
+ uint32_t strlen = upb_decode_varint32(d);
+ d->strref.stream_offset = upb_decoder_offset(d);
+ d->strref.len = strlen;
+ if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d);
if (upb_decoder_bufleft(d) >= strlen) {
- // Common (fast) case.
- upb_string_substr(d->tmp, d->bufstr, d->ptr - d->buf, strlen);
+ // Fast case.
+ d->strref.ptr = d->ptr;
upb_decoder_advance(d, strlen);
} else {
- upb_getbuf(d, upb_string_getrwbuf(d->tmp, strlen), strlen, true);
+ // Slow case.
+ while (1) {
+ size_t consume = UPB_MIN(upb_decoder_bufleft(d), strlen);
+ upb_decoder_advance(d, consume);
+ strlen -= consume;
+ if (strlen == 0) break;
+ upb_pullbuf(d);
+ }
}
- return d->tmp;
+ return &d->strref;
}
INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) {
- upb_dispatch_startsubmsg(&d->dispatcher, f)->end_offset = end;
+ upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end;
upb_decoder_setmsgend(d);
}
@@ -224,7 +269,7 @@ T(DOUBLE, fixed64, double, upb_asdouble)
T(FLOAT, fixed32, float, upb_asfloat)
T(SINT32, varint, int32, upb_zzdec_32)
T(SINT64, varint, int64, upb_zzdec_64)
-T(STRING, string, str, upb_string*)
+T(STRING, string, strref, upb_strref*)
static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) {
upb_push(d, f, UPB_NONDELIMITED);
@@ -235,28 +280,24 @@ static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
upb_decoder_setmsgend(d);
}
static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
- upb_push(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf));
+ upb_push(d, f, upb_decode_varint32(d) + (d->ptr - d->buf));
}
/* The main decoding loop *****************************************************/
-// Called when a user callback returns something other than UPB_CONTINUE.
-// This should unwind one or more stack frames, skipping the corresponding
-// data in the input.
+static void upb_decoder_checkdelim(upb_decoder *d) {
+ while ((uintptr_t)d->ptr >= d->delim_end) {
+ if ((uintptr_t)d->ptr > d->delim_end)
+ upb_decoder_abort(d, "Bad submessage end");
-static void upb_delimend(upb_decoder *d) {
- if (d->ptr > d->submsg_end) {
- upb_seterr(d->status, UPB_ERROR, "Bad submessage end.");
- upb_decoder_exit(d);
- }
-
- if (d->dispatcher.top->is_sequence) {
- upb_dispatch_endseq(&d->dispatcher);
- } else {
- upb_dispatch_endsubmsg(&d->dispatcher);
+ if (d->dispatcher.top->is_sequence) {
+ upb_dispatch_endseq(&d->dispatcher);
+ } else {
+ upb_dispatch_endsubmsg(&d->dispatcher);
+ }
+ upb_decoder_setmsgend(d);
}
- upb_decoder_setmsgend(d);
}
static void upb_decoder_enterjit(upb_decoder *d) {
@@ -273,7 +314,8 @@ static void upb_decoder_enterjit(upb_decoder *d) {
INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
while (1) {
- uint32_t tag = upb_decode_varint32(d, false);
+ uint32_t tag;
+ if (!upb_trydecode_varint32(d, &tag)) return NULL;
upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag);
// There are no explicit "startseq" or "endseq" markers in protobuf
@@ -287,8 +329,8 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
// TODO: support packed.
assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) ||
(tag & 0x7) != UPB_WIRE_TYPE_DELIMITED);
- uint32_t end = d->dispatcher.top->end_offset;
- upb_dispatch_startseq(&d->dispatcher, f)->end_offset = end;
+ uint32_t end = d->dispatcher.top->end_ofs;
+ upb_dispatch_startseq(&d->dispatcher, f)->end_ofs = end;
upb_decoder_setmsgend(d);
}
if (f) return f;
@@ -299,11 +341,13 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break;
case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break;
case UPB_WIRE_TYPE_DELIMITED:
- upb_decoder_advance(d, upb_decode_varint32(d, true));
- break;
+ upb_decoder_advance(d, upb_decode_varint32(d)); break;
+ default:
+ upb_decoder_abort(d, "Invavlid wire type");
}
// TODO: deliver to unknown field callback.
- while (d->ptr >= d->submsg_end) upb_delimend(d);
+ upb_decoder_commit(d);
+ upb_decoder_checkdelim(d);
}
}
@@ -311,11 +355,11 @@ void upb_decoder_onexit(upb_decoder *d) {
if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher);
if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) {
// Normal end-of-file.
- upb_clearerr(d->status);
+ upb_status_clear(d->status);
upb_dispatch_endmsg(&d->dispatcher, d->status);
} else {
if (d->status->code == UPB_EOF)
- upb_seterr(d->status, UPB_ERROR, "Input ended mid-submessage.");
+ upb_status_setf(d->status, UPB_ERROR, "Input ended mid-submessage.");
}
}
@@ -325,26 +369,32 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
return;
}
d->status = status;
- upb_pullbuf(d, true);
upb_dispatch_startmsg(&d->dispatcher);
while(1) { // Main loop: executed once per tag/field pair.
- while (d->ptr >= d->submsg_end) upb_delimend(d);
+ upb_decoder_checkdelim(d);
upb_decoder_enterjit(d);
// if (!d->dispatcher.top->is_packed)
upb_fhandlers *f = upb_decode_tag(d);
+ if (!f) upb_decoder_exit2(d);
f->decode(d, f);
+ upb_decoder_commit(d);
}
}
static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top,
upb_dispatcher_frame *bottom) {
(void)top;
+ (void)bottom;
+ (void)_d;
+#if 0
upb_decoder *d = _d;
+ // TODO
if (bottom->end_offset == UPB_NONDELIMITED) {
// TODO: support skipping groups.
abort();
}
- d->ptr = d->buf + bottom->end_offset;
+ d->ptr = d->buf.ptr + bottom->end_offset;
+#endif
}
void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
@@ -354,10 +404,6 @@ void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
d->jit_code = NULL;
if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d);
#endif
- d->bufstr = NULL;
- d->tmp = NULL;
- upb_string_recycle(&d->tmp);
-
// Set function pointers for each field's decode function.
for (int i = 0; i < handlers->msgs_len; i++) {
upb_mhandlers *m = handlers->msgs[i];
@@ -396,19 +442,27 @@ void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m) {
upb_handlers_unref(h);
}
-void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) {
- upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UPB_NONDELIMITED;
+void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, uint64_t start_ofs,
+ uint64_t end_ofs, void *closure) {
+ upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure);
+ f->end_ofs = end_ofs;
+ d->end_ofs = end_ofs;
+ d->refstart_ofs = start_ofs;
+ d->refend_ofs = start_ofs;
+ d->bufstart_ofs = start_ofs;
+ d->bufend_ofs = start_ofs;
d->bytesrc = bytesrc;
d->buf = NULL;
d->ptr = NULL;
d->end = NULL; // Force a buffer pull.
- d->submsg_end = (void*)0x1; // But don't let end-of-message get triggered.
- d->buf_stream_offset = 0;
+#ifdef UPB_USE_JIT_X64
+ d->jit_end = NULL;
+#endif
+ d->delim_end = UINTPTR_MAX; // But don't let end-of-message get triggered.
+ d->strref.bytesrc = bytesrc;
}
void upb_decoder_uninit(upb_decoder *d) {
- upb_string_unref(d->bufstr);
- upb_string_unref(d->tmp);
#ifdef UPB_USE_JIT_X64
if (d->dispatcher.handlers->should_jit) upb_decoder_freejit(d);
#endif
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback