summaryrefslogtreecommitdiff
path: root/upb/pb
diff options
context:
space:
mode:
Diffstat (limited to 'upb/pb')
-rw-r--r--upb/pb/decoder.c157
-rw-r--r--upb/pb/decoder.h38
-rw-r--r--upb/pb/decoder_x64.dasc47
-rw-r--r--upb/pb/glue.c13
-rw-r--r--upb/pb/textprinter.c13
-rw-r--r--upb/pb/varint.h2
6 files changed, 128 insertions, 142 deletions
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index 5844377..ae54e47 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -45,27 +45,29 @@ static void upb_decoder_abort(upb_decoder *d, const char *msg) {
/* Buffering ******************************************************************/
-// We operate on one buffer at a time, which may be a subset of the bytesrc
-// region we have ref'd. When data for the buffer is completely gone we pull
-// the next one. When we've committed our progress we release our ref on any
-// previous buffers' regions.
-
-static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
-static void upb_decoder_advance(upb_decoder *d, size_t len) {
- assert((size_t)(d->end - d->ptr) >= len);
+// We operate on one buffer at a time, which may be a subset of the currently
+// loaded byteregion data. When data for the buffer is completely gone we pull
+// the next one. When we've committed our progress we discard any previous
+// buffers' regions.
+
+static uint32_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
+static void upb_decoder_advance(upb_decoder *d, uint32_t len) {
+ assert(upb_decoder_bufleft(d) >= len);
d->ptr += len;
}
-size_t upb_decoder_offset(upb_decoder *d) {
- size_t offset = d->bufstart_ofs;
- if (d->ptr) offset += (d->ptr - d->buf);
- return offset;
+uint64_t upb_decoder_offset(upb_decoder *d) {
+ return d->bufstart_ofs + (d->ptr - d->buf);
+}
+
+uint64_t upb_decoder_bufendofs(upb_decoder *d) {
+ return d->bufstart_ofs + (d->end - d->buf);
}
static void upb_decoder_setmsgend(upb_decoder *d) {
upb_dispatcher_frame *f = d->dispatcher.top;
- size_t delimlen = f->end_ofs - d->bufstart_ofs;
- size_t buflen = d->end - d->buf;
+ uint32_t delimlen = f->end_ofs - d->bufstart_ofs;
+ uint32_t buflen = d->end - d->buf;
d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ?
d->buf + delimlen : NULL; // NULL if not in this buf.
d->top_is_packed = f->is_packed;
@@ -73,24 +75,25 @@ static void upb_decoder_setmsgend(upb_decoder *d) {
static bool upb_trypullbuf(upb_decoder *d) {
assert(upb_decoder_bufleft(d) == 0);
- if (d->bufend_ofs == d->refend_ofs) {
- size_t read = upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status);
- if (read <= 0) {
- d->ptr = NULL;
- d->end = NULL;
- if (read == 0) return false; // EOF
- upb_decoder_exit(d); // Non-EOF error.
- }
- d->refend_ofs += read;
+ d->bufstart_ofs = upb_decoder_offset(d);
+ d->buf = NULL;
+ d->ptr = NULL;
+ d->end = NULL;
+ if (upb_byteregion_available(d->input, upb_decoder_offset(d)) == 0 &&
+ !upb_byteregion_fetch(d->input, d->status)) {
+ if (upb_eof(d->status)) return false;
+ upb_decoder_exit(d); // Non-EOF error.
}
- d->bufstart_ofs = d->bufend_ofs;
- size_t len;
- d->buf = upb_bytesrc_getptr(d->bytesrc, d->bufstart_ofs, &len);
+ uint32_t len;
+ d->buf = upb_byteregion_getptr(d->input, d->bufstart_ofs, &len);
assert(len > 0);
- d->bufend_ofs = d->bufstart_ofs + len;
d->ptr = d->buf;
d->end = d->buf + len;
#ifdef UPB_USE_JIT_X64
+ // If we start parsing a value, we can parse up to 20 bytes without
+ // having to bounds-check anything (2 10-byte varints). Since the
+ // JIT bounds-checks only *between* values (and for strings), the
+ // JIT bails if there are not 20 bytes available.
d->jit_end = d->end - 20;
#endif
upb_decoder_setmsgend(d);
@@ -101,16 +104,21 @@ static void upb_pullbuf(upb_decoder *d) {
if (!upb_trypullbuf(d)) upb_decoder_abort(d, "Unexpected EOF");
}
-void upb_decoder_commit(upb_decoder *d) {
- d->completed_ptr = d->ptr;
- if (d->refstart_ofs < d->bufstart_ofs) {
- // Drop our ref on the previous buf's region.
- upb_bytesrc_refregion(d->bytesrc, d->bufstart_ofs, d->refend_ofs);
- upb_bytesrc_unrefregion(d->bytesrc, d->refstart_ofs, d->refend_ofs);
- d->refstart_ofs = d->bufstart_ofs;
+void upb_decoder_skipto(upb_decoder *d, uint64_t ofs) {
+ if (ofs < upb_decoder_bufendofs(d)) {
+ upb_decoder_advance(d, ofs - upb_decoder_offset(d));
+ } else {
+ d->buf = NULL;
+ d->ptr = NULL;
+ d->end = NULL;
+ d->bufstart_ofs = ofs;
}
}
+void upb_decoder_checkpoint(upb_decoder *d) {
+ upb_byteregion_discard(d->input, upb_decoder_offset(d));
+}
+
/* Decoding of wire types *****************************************************/
@@ -151,11 +159,12 @@ done:
return ret;
}
+// Returns true on success or false if we've hit a valid EOF.
FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) {
- if (upb_decoder_bufleft(d) == 0 && upb_dispatcher_islegalend(&d->dispatcher)) {
- // Check for our two successful end-of-message conditions
- // (user-specified EOM and bytesrc EOF).
- if (d->bufend_ofs == d->end_ofs || !upb_trypullbuf(d)) return false;
+ if (upb_decoder_bufleft(d) == 0 &&
+ upb_dispatcher_islegalend(&d->dispatcher) &&
+ !upb_trypullbuf(d)) {
+ return false;
}
*val = upb_decode_varint32(d);
return true;
@@ -212,26 +221,15 @@ FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
return u64; // TODO: proper byte swapping
}
-INLINE upb_strref *upb_decode_string(upb_decoder *d) {
+INLINE upb_byteregion *upb_decode_string(upb_decoder *d) {
uint32_t strlen = upb_decode_varint32(d);
- d->strref.stream_offset = upb_decoder_offset(d);
- d->strref.len = strlen;
- if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d);
- if (upb_decoder_bufleft(d) >= strlen) {
- // Fast case.
- d->strref.ptr = d->ptr;
- upb_decoder_advance(d, strlen);
- } else {
- // Slow case.
- while (1) {
- size_t consume = UPB_MIN(upb_decoder_bufleft(d), strlen);
- upb_decoder_advance(d, consume);
- strlen -= consume;
- if (strlen == 0) break;
- upb_pullbuf(d);
- }
- }
- return &d->strref;
+ uint64_t offset = upb_decoder_offset(d);
+ upb_byteregion_reset(&d->str_byteregion, d->input, offset, strlen);
+ // Could make it an option on the callback whether we fetchall() first or not.
+ upb_byteregion_fetchall(&d->str_byteregion, d->status);
+ if (!upb_ok(d->status)) upb_decoder_exit(d);
+ upb_decoder_skipto(d, offset + strlen);
+ return &d->str_byteregion;
}
INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
@@ -272,7 +270,7 @@ T(DOUBLE, fixed64, double, upb_asdouble)
T(FLOAT, fixed32, float, upb_asfloat)
T(SINT32, varint, int32, upb_zzdec_32)
T(SINT64, varint, int64, upb_zzdec_64)
-T(STRING, string, strref, upb_strref*)
+T(STRING, string, byteregion, upb_byteregion*)
static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) {
upb_push(d, f, UPB_NONDELIMITED);
@@ -352,10 +350,10 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
case UPB_WIRE_TYPE_DELIMITED:
upb_decoder_advance(d, upb_decode_varint32(d)); break;
default:
- upb_decoder_abort(d, "Invavlid wire type");
+ upb_decoder_abort(d, "Invalid wire type");
}
// TODO: deliver to unknown field callback.
- upb_decoder_commit(d);
+ upb_decoder_checkpoint(d);
upb_decoder_checkdelim(d);
}
}
@@ -380,24 +378,18 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
return;
}
f->decode(d, f);
- upb_decoder_commit(d);
+ upb_decoder_checkpoint(d);
}
}
-static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top,
- upb_dispatcher_frame *bottom) {
- (void)top;
- (void)bottom;
- (void)_d;
-#if 0
+static void upb_decoder_skip(void *_d, upb_dispatcher_frame *f) {
upb_decoder *d = _d;
- // TODO
- if (bottom->end_offset == UPB_NONDELIMITED) {
- // TODO: support skipping groups.
- abort();
+ if (f->end_ofs != UPB_NONDELIMITED) {
+ upb_decoder_skipto(d, d->dispatcher.top->end_ofs);
+ } else {
+ // TODO: how to support skipping groups? Dispatcher could drop callbacks,
+ // or it could be special-cased inside the decoder.
}
- d->ptr = d->buf.ptr + bottom->end_offset;
-#endif
}
void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
@@ -423,24 +415,19 @@ void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
}
}
-void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, uint64_t start_ofs,
- uint64_t end_ofs, void *closure) {
+void upb_decoder_reset(upb_decoder *d, upb_byteregion *input, void *closure) {
upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure);
- f->end_ofs = end_ofs;
- d->end_ofs = end_ofs;
- d->refstart_ofs = start_ofs;
- d->refend_ofs = start_ofs;
- d->bufstart_ofs = start_ofs;
- d->bufend_ofs = start_ofs;
- d->bytesrc = bytesrc;
+ f->end_ofs = UPB_NONDELIMITED;
+ d->input = input;
+ d->bufstart_ofs = upb_byteregion_startofs(input);
d->buf = NULL;
d->ptr = NULL;
- d->end = NULL; // Force a buffer pull.
+ d->end = NULL; // Force a buffer pull.
+ d->delim_end = NULL; // But don't let end-of-message get triggered.
+ d->str_byteregion.bytesrc = input->bytesrc;
#ifdef UPB_USE_JIT_X64
d->jit_end = NULL;
#endif
- d->delim_end = NULL; // But don't let end-of-message get triggered.
- d->strref.bytesrc = bytesrc;
}
void upb_decoder_uninit(upb_decoder *d) {
diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h
index 2232c52..c35bec4 100644
--- a/upb/pb/decoder.h
+++ b/upb/pb/decoder.h
@@ -5,7 +5,7 @@
* Author: Josh Haberman <jhaberman@gmail.com>
*
* upb_decoder implements a high performance, streaming decoder for protobuf
- * data that works by getting its input data from a upb_bytesrc and calling
+ * data that works by getting its input data from a upb_byteregion and calling
* into a upb_handlers.
*/
@@ -26,24 +26,14 @@ extern "C" {
struct dasm_State;
typedef struct _upb_decoder {
- upb_bytesrc *bytesrc; // Source of our serialized data.
- upb_dispatcher dispatcher; // Dispatcher to which we push parsed data.
- upb_status *status; // Where we will store any errors that occur.
- upb_strref strref; // For passing string data to callbacks.
-
- // Offsets for the bytesrc region we currently have ref'd.
- uint64_t refstart_ofs, refend_ofs;
+ upb_byteregion *input; // Input data (serialized).
+ upb_dispatcher dispatcher; // Dispatcher to which we push parsed data.
+ upb_status *status; // Where we will store any errors that occur.
+ upb_byteregion str_byteregion; // For passing string data to callbacks.
// Current input buffer and its stream offset.
const char *buf, *ptr, *end;
- uint64_t bufstart_ofs, bufend_ofs;
-
- // Stream offset for the end of the top-level message, if any.
- uint64_t end_ofs;
-
- // Buf offset as of which we've delivered calbacks; needed for rollback if
- // a callback returns UPB_BREAK.
- const char *completed_ptr;
+ uint64_t bufstart_ofs;
// End of the delimited region, relative to ptr, or NULL if not in this buf.
const char *delim_end;
@@ -65,10 +55,6 @@ typedef struct _upb_decoder {
sigjmp_buf exitjmp;
} upb_decoder;
-// Used for frames that have no specific end offset: groups, repeated primitive
-// fields inside groups, and the top-level message.
-#define UPB_NONDELIMITED UINT64_MAX
-
// Initializes/uninitializes a decoder for calling into the given handlers
// or to write into the given msgdef, given its accessors). Takes a ref
// on the handlers.
@@ -77,13 +63,13 @@ void upb_decoder_uninit(upb_decoder *d);
// Resets the internal state of an already-allocated decoder. This puts it in a
// state where it has not seen any data, and expects the next data to be from
-// the beginning of a new protobuf. Parsers must be reset before they can be
-// used. A decoder can be reset multiple times.
-//
-// Pass UINT64_MAX for end_ofs to indicate a non-delimited top-level message.
-void upb_decoder_reset(upb_decoder *d, upb_bytesrc *src, uint64_t start_ofs,
- uint64_t end_ofs, void *closure);
+// the beginning of a new protobuf. Decoders must be reset before they can be
+// used. A decoder can be reset multiple times. "input" must live until the
+// decoder is reset again (or destroyed).
+void upb_decoder_reset(upb_decoder *d, upb_byteregion *input, void *closure);
+// Decodes serialized data (calling handlers as the data is parsed) until error
+// or EOF (see *status for details).
void upb_decoder_decode(upb_decoder *d, upb_status *status);
#ifdef __cplusplus
diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc
index 72c4aa1..75e5b6b 100644
--- a/upb/pb/decoder_x64.dasc
+++ b/upb/pb/decoder_x64.dasc
@@ -129,7 +129,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|.define PTR, rbx
|.define CLOSURE, r12
|.type FRAME, upb_dispatcher_frame, r13
-|.type STRREF, upb_strref, r14
+|.type BYTEREGION,upb_byteregion, r14
|.type DECODER, upb_decoder, r15
|.type STDARRAY, upb_stdarray
|
@@ -365,23 +365,26 @@ static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m,
// robust checks.
| mov ecx, dword [PTR + tag_size]
| decode_loaded_varint tag_size
+ | mov rdi, DECODER->effective_end
+ | sub rdi, rax
+ | cmp ARG3_64, rdi // if (len > d->effective_end - str)
+ | ja ->exit_jit // Can't deliver, whole string not in buf.
+
+ // Update PTR to point past end of string.
| mov rdi, rax
| add rdi, ARG3_64
- | mov STRREF->len, ARG3_32
- | mov STRREF->ptr, rax
- | sub rax, DECODER->buf
- | add eax, DECODER->bufstart_ofs // = d->ptr - d->buf + d->bufstart_ofs
- | mov STRREF->stream_offset, eax
- | mov ARG3_64, STRREF
- | cmp rdi, DECODER->effective_end
- | ja ->exit_jit // Can't deliver, whole string not in buf.
| mov PTR, rdi
- break;
- case UPB_TYPE_ENDGROUP: // A pseudo-type.
- | add PTR, tag_size
- | jmp =>m->jit_endofmsg_pclabel
- return;
+ // Populate BYTEREGION appropriately.
+ | sub rax, DECODER->buf
+ | add rax, DECODER->bufstart_ofs // = d->ptr - d->buf + d->bufstart_ofs
+ | mov BYTEREGION->start, rax
+ | mov BYTEREGION->discard, rax
+ | add rax, ARG3_64
+ | mov BYTEREGION->end, rax
+ | mov BYTEREGION->fetch, rax // Fast path ensures whole string is loaded
+ | mov ARG3_64, BYTEREGION
+ break;
// Will dispatch callbacks and call submessage in a second.
case UPB_TYPE(MESSAGE):
@@ -471,7 +474,6 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
| callp f->endsubmsg
}
| popframe upb_fhandlers_getmsg(f)
-
} else {
| mov ARG1_64, CLOSURE
// Test for callbacks we can specialize.
@@ -522,8 +524,8 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
}
// PTR should point to the beginning of the tag.
-static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag,
- upb_mhandlers *m,
+static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag,
+ uint32_t next_tag, upb_mhandlers *m,
upb_fhandlers *f, upb_fhandlers *next_f) {
// PC-label for the dispatch table.
// We check the wire type (which must be loaded in edx) because the
@@ -546,7 +548,14 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_ta
|1: // Label for repeating this field.
- upb_decoder_jit_decodefield(d, m, f->type, upb_value_size(tag));
+ int tag_size = upb_value_size(tag);
+ if (f->type == UPB_TYPE_ENDGROUP) {
+ | add PTR, tag_size
+ | jmp =>m->jit_endofmsg_pclabel
+ return;
+ }
+
+ upb_decoder_jit_decodefield(d, m, f->type, tag_size);
upb_decoder_jit_callcb(d, f);
// Epilogue: load next tag, check for repeated field.
@@ -673,7 +682,7 @@ static void upb_decoder_jit(upb_decoder *d) {
| sub rsp, 8
| mov DECODER, ARG1_64
| mov FRAME, DECODER:ARG1_64->dispatcher.top
- | lea STRREF, DECODER:ARG1_64->strref
+ | lea BYTEREGION, DECODER:ARG1_64->str_byteregion
| mov CLOSURE, FRAME->closure
| mov PTR, DECODER->ptr
diff --git a/upb/pb/glue.c b/upb/pb/glue.c
index 37b86d9..3176355 100644
--- a/upb/pb/glue.c
+++ b/upb/pb/glue.c
@@ -23,7 +23,7 @@ void upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md,
upb_accessors_reghandlers(h, md);
upb_decoder_init(&d, h);
upb_handlers_unref(h);
- upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, msg);
+ upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), msg);
upb_decoder_decode(&d, status);
upb_stringsrc_uninit(&strsrc);
@@ -84,16 +84,19 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
upb_handlers_unref(h);
upb_descreader r;
upb_descreader_init(&r);
- upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, &r);
+ upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), &r);
upb_decoder_decode(&d, status);
+ upb_stringsrc_uninit(&strsrc);
+ upb_decoder_uninit(&d);
+ if (!upb_ok(status)) {
+ upb_descreader_uninit(&r);
+ return NULL;
+ }
upb_def **defs = upb_descreader_getdefs(&r, n);
upb_def **defscopy = malloc(sizeof(upb_def*) * (*n));
memcpy(defscopy, defs, sizeof(upb_def*) * (*n));
-
upb_descreader_uninit(&r);
- upb_stringsrc_uninit(&strsrc);
- upb_decoder_uninit(&d);
// Set default accessors and layouts on all messages.
for(int i = 0; i < *n; i++) {
diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c
index 4056b8f..3f68f90 100644
--- a/upb/pb/textprinter.c
+++ b/upb/pb/textprinter.c
@@ -35,15 +35,16 @@ err:
return -1;
}
-static int upb_textprinter_putescaped(upb_textprinter *p, const upb_strref *strref,
+static int upb_textprinter_putescaped(upb_textprinter *p,
+ const upb_byteregion *bytes,
bool preserve_utf8) {
// Based on CEscapeInternal() from Google's protobuf release.
// TODO; we could read directly from a bytesrc's buffer instead.
- // TODO; we could write strrefs to the sink when possible.
+ // TODO; we could write byteregions to the sink when possible.
char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
- char *buf = malloc(strref->len), *src = buf;
- char *end = src + strref->len;
- upb_bytesrc_read(strref->bytesrc, strref->stream_offset, strref->len, buf);
+ char *buf = malloc(upb_byteregion_len(bytes)), *src = buf;
+ char *end = src + upb_byteregion_len(bytes);
+ upb_byteregion_copyall(bytes, buf);
// I think hex is prettier and more useful, but proto2 uses octal; should
// investigate whether it can parse hex also.
@@ -142,7 +143,7 @@ static upb_flow_t upb_textprinter_putstr(void *_p, upb_value fval,
uint64_t start_ofs = upb_bytesink_getoffset(p->sink);
const upb_fielddef *f = upb_value_getfielddef(fval);
CHECK(upb_bytesink_putc(p->sink, '"'));
- CHECK(upb_textprinter_putescaped(p, upb_value_getstrref(val),
+ CHECK(upb_textprinter_putescaped(p, upb_value_getbyteregion(val),
f->type == UPB_TYPE(STRING)));
CHECK(upb_bytesink_putc(p->sink, '"'));
return UPB_CONTINUE;
diff --git a/upb/pb/varint.h b/upb/pb/varint.h
index 1bbd193..19977e9 100644
--- a/upb/pb/varint.h
+++ b/upb/pb/varint.h
@@ -113,7 +113,7 @@ INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
/* Encoding *******************************************************************/
-INLINE size_t upb_value_size(uint64_t val) {
+INLINE int upb_value_size(uint64_t val) {
#ifdef __GNUC__
int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0.
#else
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback