summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--benchmarks/parsestream.upb.c3
-rw-r--r--benchmarks/parsetoproto2.upb.cc3
-rw-r--r--benchmarks/parsetostruct.upb.c3
-rw-r--r--tests/test_decoder.c2
-rw-r--r--upb/handlers.c18
-rw-r--r--upb/handlers.h6
-rw-r--r--upb/pb/decoder.c38
-rw-r--r--upb/pb/decoder.h9
8 files changed, 56 insertions, 26 deletions
diff --git a/benchmarks/parsestream.upb.c b/benchmarks/parsestream.upb.c
index 37ccb42..80f9444 100644
--- a/benchmarks/parsestream.upb.c
+++ b/benchmarks/parsestream.upb.c
@@ -75,7 +75,8 @@ static size_t run(int i)
(void)i;
upb_status status = UPB_STATUS_INIT;
upb_stringsrc_reset(&stringsrc, input_str, input_len);
- upb_decoder_reset(&decoder, upb_stringsrc_bytesrc(&stringsrc), 0, UINT64_MAX, NULL);
+ upb_decoder_reset(&decoder, upb_stringsrc_bytesrc(&stringsrc),
+ 0, UPB_NONDELIMITED, NULL);
upb_decoder_decode(&decoder, &status);
if(!upb_ok(&status)) goto err;
return input_len;
diff --git a/benchmarks/parsetoproto2.upb.cc b/benchmarks/parsetoproto2.upb.cc
index af3e1f2..74d0680 100644
--- a/benchmarks/parsetoproto2.upb.cc
+++ b/benchmarks/parsetoproto2.upb.cc
@@ -283,7 +283,8 @@ static size_t run(int i)
(void)i;
upb_status status = UPB_STATUS_INIT;
msg[i % NUM_MESSAGES].Clear();
- upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, &msg[i % NUM_MESSAGES]);
+ upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc),
+ 0, UPB_NONDELIMITED, &msg[i % NUM_MESSAGES]);
upb_decoder_decode(&d, &status);
if(!upb_ok(&status)) goto err;
return len;
diff --git a/benchmarks/parsetostruct.upb.c b/benchmarks/parsetostruct.upb.c
index 64a4d35..f44b650 100644
--- a/benchmarks/parsetostruct.upb.c
+++ b/benchmarks/parsetostruct.upb.c
@@ -69,7 +69,8 @@ static size_t run(int i)
upb_status status = UPB_STATUS_INIT;
i %= NUM_MESSAGES;
upb_msg_clear(msg[i], def);
- upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, msg[i]);
+ upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc),
+ 0, UPB_NONDELIMITED, msg[i]);
upb_decoder_decode(&d, &status);
if(!upb_ok(&status)) goto err;
return len;
diff --git a/tests/test_decoder.c b/tests/test_decoder.c
index 88b8741..e3fc1ea 100644
--- a/tests/test_decoder.c
+++ b/tests/test_decoder.c
@@ -53,7 +53,7 @@ int main(int argc, char *argv[]) {
upb_decoder d;
upb_decoder_initforhandlers(&d, handlers);
- upb_decoder_reset(&d, upb_stdio_bytesrc(&in), 0, UINT64_MAX, p);
+ upb_decoder_reset(&d, upb_stdio_bytesrc(&in), 0, UPB_NONDELIMITED, p);
upb_status_clear(&status);
upb_decoder_decode(&d, &status);
diff --git a/upb/handlers.c b/upb/handlers.c
index 05300c0..d02a32a 100644
--- a/upb/handlers.c
+++ b/upb/handlers.c
@@ -16,8 +16,10 @@ static upb_mhandlers *upb_mhandlers_new() {
upb_inttable_init(&m->fieldtab, 8, sizeof(upb_fhandlers));
m->startmsg = NULL;
m->endmsg = NULL;
- m->tablearray = NULL;
m->is_group = false;
+#ifdef UPB_USE_JIT_X64
+ m->tablearray = NULL;
+#endif
return m;
}
@@ -29,7 +31,11 @@ static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
if (f) abort();
upb_fhandlers new_f = {false, type, repeated,
repeated && upb_isprimitivetype(type), UPB_ATOMIC_INIT(0),
- n, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL};
+ n, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL,
+#ifdef UPB_USE_JIT_X64
+ 0, 0, 0,
+#endif
+ NULL};
upb_inttable_insert(&m->fieldtab, tag, &new_f);
f = upb_inttable_lookup(&m->fieldtab, tag);
assert(f);
@@ -77,7 +83,9 @@ void upb_handlers_unref(upb_handlers *h) {
for (int i = 0; i < h->msgs_len; i++) {
upb_mhandlers *mh = h->msgs[i];
upb_inttable_free(&mh->fieldtab);
+#ifdef UPB_USE_JIT_X64
free(mh->tablearray);
+#endif
free(mh);
}
free(h->msgs);
@@ -154,7 +162,11 @@ static upb_fhandlers toplevel_f = {
#else
{{0}, -1},
#endif
- NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL};
+ NULL, NULL, NULL, NULL, NULL,
+#ifdef UPB_USE_JIT_X64
+ 0, 0, 0,
+#endif
+ NULL};
void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
upb_skip_handler *skip, upb_exit_handler *exit,
diff --git a/upb/handlers.h b/upb/handlers.h
index b9e120a..db28705 100644
--- a/upb/handlers.h
+++ b/upb/handlers.h
@@ -149,9 +149,11 @@ typedef struct _upb_fieldent {
upb_endfield_handler *endsubmsg;
upb_startfield_handler *startseq;
upb_endfield_handler *endseq;
+#ifdef UPB_USE_JIT_X64
uint32_t jit_pclabel;
uint32_t jit_pclabel_notypecheck;
uint32_t jit_submsg_done_pclabel;
+#endif
void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
} upb_fhandlers;
@@ -184,16 +186,18 @@ typedef struct _upb_mhandlers {
upb_startmsg_handler *startmsg;
upb_endmsg_handler *endmsg;
upb_inttable fieldtab; // Maps field number -> upb_fhandlers.
+ bool is_group;
+#ifdef UPB_USE_JIT_X64
uint32_t jit_startmsg_pclabel;
uint32_t jit_endofbuf_pclabel;
uint32_t jit_endofmsg_pclabel;
uint32_t jit_unknownfield_pclabel;
- bool is_group;
int32_t jit_parent_field_done_pclabel;
uint32_t max_field_number;
// Currently keyed on field number. Could also try keying it
// on encoded or decoded tag, or on encoded field number.
void **tablearray;
+#endif
} upb_mhandlers;
// mhandlers are created as part of a upb_handlers instance, but can be ref'd
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index df74b48..2083849 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -13,10 +13,6 @@
#include "upb/pb/decoder.h"
#include "upb/pb/varint.h"
-// Used for frames that have no specific end offset: groups, repeated primitive
-// fields inside groups, and the top-level message.
-#define UPB_NONDELIMITED UINT32_MAX
-
#ifdef UPB_USE_JIT_X64
#define Dst_DECL upb_decoder *d
#define Dst_REF (d->dynasm)
@@ -33,7 +29,11 @@
#define FORCEINLINE static __attribute__((always_inline))
#define NOINLINE static __attribute__((noinline))
-static void upb_decoder_exit(upb_decoder *d) { siglongjmp(d->exitjmp, 1); }
+static void upb_decoder_exit(upb_decoder *d) {
+ // If/when we support resumable decoding, we would want to back our progress
+ // up to completed_ptr and possibly get a previous buffer.
+ siglongjmp(d->exitjmp, 1);
+}
static void upb_decoder_exit2(void *_d) {
upb_decoder *d = _d;
upb_decoder_exit(d);
@@ -43,7 +43,12 @@ static void upb_decoder_abort(upb_decoder *d, const char *msg) {
upb_decoder_exit(d);
}
-/* Decoding/Buffering of wire types *******************************************/
+/* Buffering ******************************************************************/
+
+// We operate on one buffer at a time, which may be a subset of the bytesrc
+// region we have ref'd. When data for the buffer is gone we pull the next
+// one. When we've committed our progress we release our ref on any previous
+// buffers' regions.
static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
static void upb_decoder_advance(upb_decoder *d, size_t len) {
@@ -61,12 +66,11 @@ static void upb_decoder_setmsgend(upb_decoder *d) {
upb_dispatcher_frame *f = d->dispatcher.top;
size_t delimlen = f->end_ofs - d->bufstart_ofs;
size_t buflen = d->end - d->buf;
- if (f->end_ofs != UINT64_MAX && delimlen <= buflen) {
- d->delim_end = (uintptr_t)(d->buf + delimlen);
+ if (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) {
+ // Delimited message ends in this buffer.
+ d->delim_end = d->buf + delimlen;
} else {
- // Buffers must not run up against the end of memory.
- assert((uintptr_t)d->end < UINTPTR_MAX);
- d->delim_end = UINTPTR_MAX;
+ d->delim_end = NULL;
}
}
@@ -111,6 +115,9 @@ void upb_decoder_commit(upb_decoder *d) {
}
}
+
+/* Decoding of wire types *****************************************************/
+
NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) {
uint8_t byte = 0x80;
uint64_t u64 = 0;
@@ -150,7 +157,8 @@ done:
FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) {
if (upb_decoder_bufleft(d) == 0) {
- // Check for our two normal end-of-message conditions.
+ // Check for our two successful end-of-message conditions
+ // (user-specified EOM and bytesrc EOF).
if (d->bufend_ofs == d->end_ofs) return false;
if (!upb_trypullbuf(d)) return false;
}
@@ -286,8 +294,8 @@ static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
/* The main decoding loop *****************************************************/
static void upb_decoder_checkdelim(upb_decoder *d) {
- while ((uintptr_t)d->ptr >= d->delim_end) {
- if ((uintptr_t)d->ptr > d->delim_end)
+ while (d->delim_end != NULL && d->ptr >= d->delim_end) {
+ if (d->ptr > d->delim_end)
upb_decoder_abort(d, "Bad submessage end");
if (d->dispatcher.top->is_sequence) {
@@ -460,7 +468,7 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, uint64_t start_ofs,
#ifdef UPB_USE_JIT_X64
d->jit_end = NULL;
#endif
- d->delim_end = UINTPTR_MAX; // But don't let end-of-message get triggered.
+ d->delim_end = NULL; // But don't let end-of-message get triggered.
d->strref.bytesrc = bytesrc;
}
diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h
index 3981359..9a20d76 100644
--- a/upb/pb/decoder.h
+++ b/upb/pb/decoder.h
@@ -50,9 +50,8 @@ typedef struct _upb_decoder {
// UPB_TRYAGAIN (or in the future, UPB_SUSPEND).
const char *completed_ptr;
- // End of the delimited region, relative to ptr, or UINTPTR_MAX if not in
- // this buf.
- uintptr_t delim_end;
+ // End of the delimited region, relative to ptr, or NULL if not in this buf.
+ const char *delim_end;
#ifdef UPB_USE_JIT_X64
// For JIT, which doesn't do bounds checks in the middle of parsing a field.
@@ -69,6 +68,10 @@ typedef struct _upb_decoder {
sigjmp_buf exitjmp;
} upb_decoder;
+// Used for frames that have no specific end offset: groups, repeated primitive
+// fields inside groups, and the top-level message.
+#define UPB_NONDELIMITED UINT32_MAX
+
// Initializes/uninitializes a decoder for calling into the given handlers
// or to write into the given msgdef, given its accessors). Takes a ref
// on the handlers or msgdef.
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback