summaryrefslogtreecommitdiff
path: root/upb/pb
diff options
context:
space:
mode:
Diffstat (limited to 'upb/pb')
-rw-r--r--upb/pb/compile_decoder.c418
-rw-r--r--upb/pb/compile_decoder_x64.c2
-rw-r--r--upb/pb/compile_decoder_x64.dasc78
-rw-r--r--upb/pb/decoder.c225
-rw-r--r--upb/pb/decoder.h97
-rw-r--r--upb/pb/decoder.int.h31
-rw-r--r--upb/pb/glue.c4
7 files changed, 495 insertions, 360 deletions
diff --git a/upb/pb/compile_decoder.c b/upb/pb/compile_decoder.c
index f96f07a..400d6fa 100644
--- a/upb/pb/compile_decoder.c
+++ b/upb/pb/compile_decoder.c
@@ -19,13 +19,6 @@
#define MAXLABEL 5
#define EMPTYLABEL -1
-static const void *methodkey(const upb_msgdef *md, const upb_handlers *h) {
- const void *ret = h ? (const void*)h : (const void*)md;
- assert(ret);
- return ret;
-}
-
-
/* mgroup *********************************************************************/
static void freegroup(upb_refcounted *r) {
@@ -80,10 +73,8 @@ static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit,
visit(r, m->group, closure);
}
-static upb_pbdecodermethod *newmethod(const upb_msgdef *msg,
- const upb_handlers *dest_handlers,
- mgroup *group,
- const void *key) {
+static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
+ mgroup *group) {
static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod};
upb_pbdecodermethod *ret = malloc(sizeof(*ret));
upb_refcounted_init(UPB_UPCAST(ret), &vtbl, &ret);
@@ -92,11 +83,10 @@ static upb_pbdecodermethod *newmethod(const upb_msgdef *msg,
// The method references the group and vice-versa, in a circular reference.
upb_ref2(ret, group);
upb_ref2(group, ret);
- upb_inttable_insertptr(&group->methods, key, upb_value_ptr(ret)); // Owns ref
+ upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret));
upb_refcounted_unref(UPB_UPCAST(ret), &ret);
ret->group = UPB_UPCAST(group);
- ret->schema_ = msg;
ret->dest_handlers_ = dest_handlers;
ret->is_native_ = false; // If we JIT, it will update this later.
upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
@@ -126,10 +116,6 @@ void upb_pbdecodermethod_checkref(const upb_pbdecodermethod *m,
upb_refcounted_checkref(UPB_UPCAST(m), owner);
}
-const upb_msgdef *upb_pbdecodermethod_schema(const upb_pbdecodermethod *m) {
- return m->schema_;
-}
-
const upb_handlers *upb_pbdecodermethod_desthandlers(
const upb_pbdecodermethod *m) {
return m->dest_handlers_;
@@ -144,12 +130,12 @@ bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
return m->is_native_;
}
-const upb_pbdecodermethod *upb_pbdecodermethod_newfordesthandlers(
- const upb_handlers *dest, const void *owner) {
+const upb_pbdecodermethod *upb_pbdecodermethod_new(
+ const upb_pbdecodermethodopts *opts, const void *owner) {
upb_pbcodecache cache;
upb_pbcodecache_init(&cache);
const upb_pbdecodermethod *ret =
- upb_pbcodecache_getdecodermethodfordesthandlers(&cache, dest);
+ upb_pbcodecache_getdecodermethod(&cache, opts);
upb_pbdecodermethod_ref(ret, owner);
upb_pbcodecache_uninit(&cache);
return ret;
@@ -165,11 +151,15 @@ typedef struct {
uint32_t *pc;
int fwd_labels[MAXLABEL];
int back_labels[MAXLABEL];
+
+ // For fields marked "lazy", parse them lazily or eagerly?
+ bool lazy;
} compiler;
-static compiler *newcompiler(mgroup *group) {
+static compiler *newcompiler(mgroup *group, bool lazy) {
compiler *ret = malloc(sizeof(*ret));
ret->group = group;
+ ret->lazy = lazy;
for (int i = 0; i < MAXLABEL; i++) {
ret->fwd_labels[i] = EMPTYLABEL;
ret->back_labels[i] = EMPTYLABEL;
@@ -300,11 +290,11 @@ static void putop(compiler *c, opcode op, ...) {
}
case OP_STARTMSG:
case OP_ENDMSG:
- case OP_PUSHTAGDELIM:
case OP_PUSHLENDELIM:
case OP_POP:
case OP_SETDELIM:
case OP_HALT:
+ case OP_RET:
put32(c, op);
break;
case OP_PARSE_DOUBLE:
@@ -321,13 +311,13 @@ static void putop(compiler *c, opcode op, ...) {
case OP_PARSE_SINT32:
case OP_PARSE_SINT64:
case OP_STARTSEQ:
- case OP_SETGROUPNUM:
case OP_ENDSEQ:
case OP_STARTSUBMSG:
case OP_ENDSUBMSG:
case OP_STARTSTR:
case OP_STRING:
case OP_ENDSTR:
+ case OP_PUSHTAGDELIM:
put32(c, op | va_arg(ap, upb_selector_t) << 8);
break;
case OP_SETBIGGROUPNUM:
@@ -382,10 +372,10 @@ const char *upb_pbdecoder_getopname(unsigned int op) {
T(DOUBLE), T(FLOAT), T(INT64), T(UINT64), T(INT32), T(FIXED64), T(FIXED32),
T(BOOL), T(UINT32), T(SFIXED32), T(SFIXED64), T(SINT32), T(SINT64),
OP(STARTMSG), OP(ENDMSG), OP(STARTSEQ), OP(ENDSEQ), OP(STARTSUBMSG),
- OP(ENDSUBMSG), OP(STARTSTR), OP(STRING), OP(ENDSTR), OP(CALL),
+ OP(ENDSUBMSG), OP(STARTSTR), OP(STRING), OP(ENDSTR), OP(CALL), OP(RET),
OP(PUSHLENDELIM), OP(PUSHTAGDELIM), OP(SETDELIM), OP(CHECKDELIM),
OP(BRANCH), OP(TAG1), OP(TAG2), OP(TAGN), OP(SETDISPATCH), OP(POP),
- OP(SETGROUPNUM), OP(SETBIGGROUPNUM), OP(HALT),
+ OP(SETBIGGROUPNUM), OP(HALT),
};
return op > OP_HALT ? names[0] : names[op];
#undef OP
@@ -413,16 +403,17 @@ static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
const upb_pbdecodermethod *method =
(void *)((char *)dispatch -
offsetof(upb_pbdecodermethod, dispatch));
- fprintf(f, " %s", upb_msgdef_fullname(method->schema_));
+ fprintf(f, " %s", upb_msgdef_fullname(
+ upb_handlers_msgdef(method->dest_handlers_)));
break;
}
case OP_STARTMSG:
case OP_ENDMSG:
case OP_PUSHLENDELIM:
- case OP_PUSHTAGDELIM:
case OP_POP:
case OP_SETDELIM:
case OP_HALT:
+ case OP_RET:
break;
case OP_PARSE_DOUBLE:
case OP_PARSE_FLOAT:
@@ -444,7 +435,7 @@ static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
case OP_STARTSTR:
case OP_STRING:
case OP_ENDSTR:
- case OP_SETGROUPNUM:
+ case OP_PUSHTAGDELIM:
fprintf(f, " %d", instr >> 8);
break;
case OP_SETBIGGROUPNUM:
@@ -537,11 +528,11 @@ static void putpush(compiler *c, const upb_fielddef *f) {
putop(c, OP_PUSHLENDELIM);
} else {
uint32_t fn = upb_fielddef_number(f);
- putop(c, OP_PUSHTAGDELIM);
if (fn >= 1 << 24) {
+ putop(c, OP_PUSHTAGDELIM, 0);
putop(c, OP_SETBIGGROUPNUM, fn);
} else {
- putop(c, OP_SETGROUPNUM, fn);
+ putop(c, OP_PUSHTAGDELIM, fn);
}
}
}
@@ -549,13 +540,35 @@ static void putpush(compiler *c, const upb_fielddef *f) {
static upb_pbdecodermethod *find_submethod(const compiler *c,
const upb_pbdecodermethod *method,
const upb_fielddef *f) {
- const upb_handlers *sub = method->dest_handlers_ ?
- upb_handlers_getsubhandlers(method->dest_handlers_, f) : NULL;
- const void *key = methodkey(upb_downcast_msgdef(upb_fielddef_subdef(f)), sub);
+ const upb_handlers *sub =
+ upb_handlers_getsubhandlers(method->dest_handlers_, f);
upb_value v;
- bool ok = upb_inttable_lookupptr(&c->group->methods, key, &v);
- UPB_ASSERT_VAR(ok, ok);
- return upb_value_getptr(v);
+ return upb_inttable_lookupptr(&c->group->methods, sub, &v)
+ ? upb_value_getptr(v)
+ : NULL;
+}
+
+static void putsel(compiler *c, opcode op, upb_selector_t sel,
+ const upb_handlers *h) {
+ if (upb_handlers_gethandler(h, sel)) {
+ putop(c, op, sel);
+ }
+}
+
+// Puts an opcode to call a callback, but only if a callback actually exists for
+// this field and handler type.
+static void putcb(compiler *c, opcode op, const upb_handlers *h,
+ const upb_fielddef *f, upb_handlertype_t type) {
+ putsel(c, op, getsel(f, type), h);
+}
+
+static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
+ if (!upb_fielddef_lazy(f))
+ return false;
+
+ return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR)) ||
+ upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING)) ||
+ upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR));
}
// Adds bytecode for parsing the given message to the given decoderplan,
@@ -596,177 +609,178 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
upb_inttable_uninit(&method->dispatch);
upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
+ const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
+ const upb_msgdef *md = upb_handlers_msgdef(h);
+
method->code_base.ofs = pcofs(c);
putop(c, OP_SETDISPATCH, &method->dispatch);
- putop(c, OP_STARTMSG);
+ putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
label(c, LABEL_FIELD);
upb_msg_iter i;
- for(upb_msg_begin(&i, method->schema_); !upb_msg_done(&i); upb_msg_next(&i)) {
+ for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
- upb_descriptortype_t type = upb_fielddef_descriptortype(f);
+ upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
+ upb_fieldtype_t type = upb_fielddef_type(f);
// From a decoding perspective, ENUM is the same as INT32.
- if (type == UPB_DESCRIPTOR_TYPE_ENUM)
- type = UPB_DESCRIPTOR_TYPE_INT32;
-
- label(c, LABEL_FIELD);
-
- switch (upb_fielddef_type(f)) {
- case UPB_TYPE_MESSAGE: {
- const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
- int wire_type = (type == UPB_DESCRIPTOR_TYPE_MESSAGE) ?
- UPB_WIRE_TYPE_DELIMITED : UPB_WIRE_TYPE_START_GROUP;
- if (upb_fielddef_isseq(f)) {
- putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
- putchecktag(c, f, wire_type, LABEL_DISPATCH);
- dispatchtarget(c, method, f, wire_type);
- putop(c, OP_PUSHTAGDELIM);
- putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
- label(c, LABEL_LOOPSTART);
- putpush(c, f);
- putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
- putop(c, OP_CALL, sub_m);
- putop(c, OP_POP);
- putop(c, OP_ENDSUBMSG, getsel(f, UPB_HANDLER_ENDSUBMSG));
- if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
- putop(c, OP_SETDELIM);
- }
- putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
- putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
- putop(c, OP_BRANCH, -LABEL_LOOPSTART);
- label(c, LABEL_LOOPBREAK);
- putop(c, OP_POP);
- putop(c, OP_ENDSEQ, getsel(f, UPB_HANDLER_ENDSEQ));
- } else {
- putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
- putchecktag(c, f, wire_type, LABEL_DISPATCH);
- dispatchtarget(c, method, f, wire_type);
- putpush(c, f);
- putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
- putop(c, OP_CALL, sub_m);
- putop(c, OP_POP);
- putop(c, OP_ENDSUBMSG, getsel(f, UPB_HANDLER_ENDSUBMSG));
- if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
- putop(c, OP_SETDELIM);
- }
- }
- break;
+ if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
+ descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
+
+ if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
+ const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
+ if (!sub_m) {
+ // Don't emit any code for this field at all; it will be parsed as an
+ // unknown field.
+ continue;
}
- case UPB_TYPE_STRING:
- case UPB_TYPE_BYTES:
- if (upb_fielddef_isseq(f)) {
- putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
- putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
- dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
- putop(c, OP_PUSHTAGDELIM);
- putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
- label(c, LABEL_LOOPSTART);
- putop(c, OP_PUSHLENDELIM);
- putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
- putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
- putop(c, OP_POP);
- putop(c, OP_ENDSTR, getsel(f, UPB_HANDLER_ENDSTR));
- putop(c, OP_SETDELIM);
- putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
- putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
- putop(c, OP_BRANCH, -LABEL_LOOPSTART);
- label(c, LABEL_LOOPBREAK);
- putop(c, OP_POP);
- putop(c, OP_ENDSEQ, getsel(f, UPB_HANDLER_ENDSEQ));
- } else {
- putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
- putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
- dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
- putop(c, OP_PUSHLENDELIM);
- putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
- putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
- putop(c, OP_POP);
- putop(c, OP_ENDSTR, getsel(f, UPB_HANDLER_ENDSTR));
+
+ label(c, LABEL_FIELD);
+
+ int wire_type = (descriptor_type == UPB_DESCRIPTOR_TYPE_MESSAGE)
+ ? UPB_WIRE_TYPE_DELIMITED
+ : UPB_WIRE_TYPE_START_GROUP;
+ if (upb_fielddef_isseq(f)) {
+ putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+ putchecktag(c, f, wire_type, LABEL_DISPATCH);
+ dispatchtarget(c, method, f, wire_type);
+ putop(c, OP_PUSHTAGDELIM, 0);
+ putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
+ label(c, LABEL_LOOPSTART);
+ putpush(c, f);
+ putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
+ putop(c, OP_CALL, sub_m);
+ putop(c, OP_POP);
+ putcb(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
+ if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
putop(c, OP_SETDELIM);
}
- break;
- default: {
- opcode parse_type = (opcode)type;
- assert((int)parse_type >= 0 && parse_type <= OP_MAX);
- upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
- int wire_type = native_wire_types[upb_fielddef_descriptortype(f)];
- if (upb_fielddef_isseq(f)) {
- putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
- putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
- dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
- putop(c, OP_PUSHLENDELIM);
- putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Packed
- label(c, LABEL_LOOPSTART);
- putop(c, parse_type, sel);
- putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
- putop(c, OP_BRANCH, -LABEL_LOOPSTART);
- dispatchtarget(c, method, f, wire_type);
- putop(c, OP_PUSHTAGDELIM);
- putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Non-packed
- label(c, LABEL_LOOPSTART);
- putop(c, parse_type, sel);
- putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
- putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
- putop(c, OP_BRANCH, -LABEL_LOOPSTART);
- label(c, LABEL_LOOPBREAK);
- putop(c, OP_POP); // Packed and non-packed join.
- putop(c, OP_ENDSEQ, getsel(f, UPB_HANDLER_ENDSEQ));
- putop(c, OP_SETDELIM); // Could remove for non-packed by dup ENDSEQ.
- } else {
- putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
- putchecktag(c, f, wire_type, LABEL_DISPATCH);
- dispatchtarget(c, method, f, wire_type);
- putop(c, parse_type, sel);
+ putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
+ putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
+ putop(c, OP_BRANCH, -LABEL_LOOPSTART);
+ label(c, LABEL_LOOPBREAK);
+ putop(c, OP_POP);
+ putcb(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
+ } else {
+ putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+ putchecktag(c, f, wire_type, LABEL_DISPATCH);
+ dispatchtarget(c, method, f, wire_type);
+ putpush(c, f);
+ putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
+ putop(c, OP_CALL, sub_m);
+ putop(c, OP_POP);
+ putcb(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
+ if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
+ putop(c, OP_SETDELIM);
}
}
+ } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
+ type == UPB_TYPE_MESSAGE) {
+ label(c, LABEL_FIELD);
+ if (upb_fielddef_isseq(f)) {
+ putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+ putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
+ dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
+ putop(c, OP_PUSHTAGDELIM, 0);
+ putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
+ label(c, LABEL_LOOPSTART);
+ putop(c, OP_PUSHLENDELIM);
+ putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
+ // Need to emit even if no handler to skip past the string.
+ putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
+ putop(c, OP_POP);
+ putcb(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
+ putop(c, OP_SETDELIM);
+ putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
+ putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
+ putop(c, OP_BRANCH, -LABEL_LOOPSTART);
+ label(c, LABEL_LOOPBREAK);
+ putop(c, OP_POP);
+ putcb(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
+ } else {
+ putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+ putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
+ dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
+ putop(c, OP_PUSHLENDELIM);
+ putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
+ putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
+ putop(c, OP_POP);
+ putcb(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
+ putop(c, OP_SETDELIM);
+ }
+ } else {
+ label(c, LABEL_FIELD);
+ opcode parse_type = (opcode)descriptor_type;
+ assert((int)parse_type >= 0 && parse_type <= OP_MAX);
+ upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
+ int wire_type = native_wire_types[upb_fielddef_descriptortype(f)];
+ if (upb_fielddef_isseq(f)) {
+ putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+ putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
+ dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
+ putop(c, OP_PUSHLENDELIM);
+ putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Packed
+ label(c, LABEL_LOOPSTART);
+ putop(c, parse_type, sel);
+ putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
+ putop(c, OP_BRANCH, -LABEL_LOOPSTART);
+ dispatchtarget(c, method, f, wire_type);
+ putop(c, OP_PUSHTAGDELIM, 0);
+ putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Non-packed
+ label(c, LABEL_LOOPSTART);
+ putop(c, parse_type, sel);
+ putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
+ putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
+ putop(c, OP_BRANCH, -LABEL_LOOPSTART);
+ label(c, LABEL_LOOPBREAK);
+ putop(c, OP_POP); // Packed and non-packed join.
+ putcb(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
+ putop(c, OP_SETDELIM); // Could remove for non-packed by dup ENDSEQ.
+ } else {
+ putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+ putchecktag(c, f, wire_type, LABEL_DISPATCH);
+ dispatchtarget(c, method, f, wire_type);
+ putop(c, parse_type, sel);
+ }
}
}
+
// For now we just loop back to the last field of the message (or if none,
// the DISPATCH opcode for the message.
putop(c, OP_BRANCH, -LABEL_FIELD);
+
+ // Insert both a label and a dispatch table entry for this end-of-msg.
label(c, LABEL_ENDMSG);
- putop(c, OP_ENDMSG);
+ upb_value val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
+ upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
+
+ putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
+ putop(c, OP_RET);
upb_inttable_compact(&method->dispatch);
}
-// Populate "methods" with new upb_pbdecodermethod objects reachable from "md".
-// "h" can be NULL, in which case the methods will not be statically bound to
-// destination handlers.
-//
-// Returns the method for this msgdef/handlers.
+// Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
+// Returns the method for these handlers.
//
-// Note that there is a deep difference between keying the method table on
-// upb_msgdef and keying it on upb_handlers. Since upb_msgdef : upb_handlers
-// can be 1:many, binding a handlers statically can result in *more* methods
-// being generated than if the methods are dynamically-bound.
-//
-// On the other hand, if/when the optimization mentioned below is implemented,
-// binding to a upb_handlers can result in *fewer* methods being generated if
-// many of the submessages have no handlers bound to them.
-static void find_methods(compiler *c, const upb_msgdef *md,
- const upb_handlers *h) {
- const void *key = methodkey(md, h);
+// Generates a new method for every destination handlers reachable from "h".
+static void find_methods(compiler *c, const upb_handlers *h) {
upb_value v;
- if (upb_inttable_lookupptr(&c->group->methods, key, &v))
+ if (upb_inttable_lookupptr(&c->group->methods, h, &v))
return;
- newmethod(md, h, c->group, key);
+ newmethod(h, c->group);
// Find submethods.
upb_msg_iter i;
+ const upb_msgdef *md = upb_handlers_msgdef(h);
for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
- if (upb_fielddef_type(f) != UPB_TYPE_MESSAGE)
- continue;
- const upb_handlers *sub_h = h ? upb_handlers_getsubhandlers(h, f) : NULL;
-
- if (h && !sub_h &&
- upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
- // OPT: We could optimize away the sub-method, but would have to make sure
- // this field is compiled as a string instead of a submessage.
+ const upb_handlers *sub_h;
+ if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
+ (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
+ // We only generate a decoder method for submessages with handlers.
+ // Others will be parsed as unknown fields.
+ find_methods(c, sub_h);
}
-
- find_methods(c, upb_downcast_msgdef(upb_fielddef_subdef(f)), sub_h);
}
}
@@ -814,12 +828,6 @@ static void sethandlers(mgroup *g, bool allowjit) {
}
}
-static bool bind_dynamic(bool allowjit) {
- // For the moment, JIT handlers always bind statically, but bytecode handlers
- // never do.
- return !allowjit;
-}
-
#else // UPB_USE_JIT_X64
static void sethandlers(mgroup *g, bool allowjit) {
@@ -828,33 +836,19 @@ static void sethandlers(mgroup *g, bool allowjit) {
set_bytecode_handlers(g);
}
-static bool bind_dynamic(bool allowjit) {
- // Bytecode handlers never bind statically.
- UPB_UNUSED(allowjit);
- return true;
-}
-
#endif // UPB_USE_JIT_X64
// TODO(haberman): allow this to be constructed for an arbitrary set of dest
// handlers and other mgroups (but verify we have a transitive closure).
-const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit,
+const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy,
const void *owner) {
UPB_UNUSED(allowjit);
assert(upb_handlers_isfrozen(dest));
- const upb_msgdef *md = upb_handlers_msgdef(dest);
mgroup *g = newgroup(owner);
- compiler *c = newcompiler(g);
-
- if (bind_dynamic(allowjit)) {
- // If binding dynamically, remove the reference against destination
- // handlers.
- dest = NULL;
- }
-
- find_methods(c, md, dest);
+ compiler *c = newcompiler(g, lazy);
+ find_methods(c, dest);
// We compile in two passes:
// 1. all messages are assigned relative offsets from the beginning of the
@@ -909,20 +903,28 @@ bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
return true;
}
-const upb_pbdecodermethod *upb_pbcodecache_getdecodermethodfordesthandlers(
- upb_pbcodecache *c, const upb_handlers *handlers) {
+const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
+ upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) {
// Right now we build a new DecoderMethod every time.
// TODO(haberman): properly cache methods by their true key.
- const mgroup *g = mgroup_new(handlers, c->allow_jit_, c);
+ const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c);
upb_inttable_push(&c->groups, upb_value_constptr(g));
- const upb_msgdef *md = upb_handlers_msgdef(handlers);
- if (bind_dynamic(c->allow_jit_)) {
- handlers = NULL;
- }
-
upb_value v;
- bool ok = upb_inttable_lookupptr(&g->methods, methodkey(md, handlers), &v);
+ bool ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
UPB_ASSERT_VAR(ok, ok);
return upb_value_getptr(v);
}
+
+
+/* upb_pbdecodermethodopts ****************************************************/
+
+void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
+ const upb_handlers *h) {
+ opts->handlers = h;
+ opts->lazy = false;
+}
+
+void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) {
+ opts->lazy = lazy;
+}
diff --git a/upb/pb/compile_decoder_x64.c b/upb/pb/compile_decoder_x64.c
index 44331b8..913a748 100644
--- a/upb/pb/compile_decoder_x64.c
+++ b/upb/pb/compile_decoder_x64.c
@@ -194,6 +194,8 @@ static void patchdispatch(jitcompiler *jc) {
}
// Define for JIT debugging.
+//#define UPB_JIT_LOAD_SO
+
#ifdef UPB_JIT_LOAD_SO
static void load_so(jitcompiler *jc) {
// Dump to a .so file in /tmp and load that, so all the tooling works right
diff --git a/upb/pb/compile_decoder_x64.dasc b/upb/pb/compile_decoder_x64.dasc
index 571aa9b..97fb5ce 100644
--- a/upb/pb/compile_decoder_x64.dasc
+++ b/upb/pb/compile_decoder_x64.dasc
@@ -42,6 +42,9 @@
| mov DECODER->top, FRAME
| mov DECODER->ptr, PTR
| mov DECODER->data_end, DATAEND
+| // We don't guarantee that delim_end is NULL when out of range like the
+| // interpreter does.
+| mov DECODER->delim_end, DELIMEND
| sub DELIMEND, DECODER->buf
| add DELIMEND, DECODER->bufstart_ofs
| mov FRAME->end_ofs, DELIMEND
@@ -205,6 +208,8 @@ static void emit_static_asm(jitcompiler *jc) {
|
| mov DECODER, rdi
| callp upb_pbdecoder_resume // Same args as us; reuse regs.
+ | test eax, eax
+ | jns >1
| mov DECODER->saved_rsp, rsp
| mov rax, rbx
| load_regs
@@ -212,12 +217,13 @@ static void emit_static_asm(jitcompiler *jc) {
| // Test whether we have a saved stack to resume.
| mov ARG3_64, DECODER->call_len
| test ARG3_64, ARG3_64
- | jnz >1
+ | jnz >2
|
| call rax
|
| mov rax, DECODER->size_param
| mov qword DECODER->call_len, 0
+ |1:
| add rsp, 8 // Counter previous alignment.
| pop rbx
| pop r12
@@ -227,7 +233,7 @@ static void emit_static_asm(jitcompiler *jc) {
| pop rbp
| ret
|
- |1:
+ |2:
| // Resume decoder.
| lea ARG2_64, DECODER->callstack
| sub rsp, ARG3_64
@@ -293,6 +299,7 @@ static void emit_static_asm(jitcompiler *jc) {
| add DELIMEND, rdx
| cmp FRAME, DECODER->limit
| je >3 // Stack overflow
+ | mov dword FRAME->groupnum, 0
| test rcx, rcx
| jz >2
| mov DATAEND, DECODER->end
@@ -850,24 +857,7 @@ static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs,
|5:
}
-// Emit message-specific assembly. Overall code layout is:
-// +---------------------------------------------------------------------------+
-// | Message A |
-// | 1. function prologue (startmsg), jmps to OP_CHECKDELIM_RET before first |
-// | OP_TAG* in 4. |
-// | 2. function epilogue (endmsg), returns from function. |
-// | 3. dispatch function (returns fptr to 4) |
-// | - loops internally to skip unknown fields |
-// | - after each unknown field does OP_CHECKDELIM_RET (returns 2) |
-// | - also returns 2 for END_GROUP.
-// | 4. code for each op: |
-// | - OP_TAG* on mismatch calls 3 to get addr, then jumps to 4 (or 2 on EOM).|
-// | - OP_CHECKDELIM_RET jumps to 2 |
-// +---------------------------------------------------------------------------+
-// | Message B |
-// | 1. ... |
-// | ... |
-// +---------------------------------------------------------------------------+
+// Compile the bytecode to x64.
static void jitbytecode(jitcompiler *jc) {
upb_pbdecodermethod *method = NULL;
const upb_handlers *h = NULL;
@@ -877,20 +867,21 @@ static void jitbytecode(jitcompiler *jc) {
uint32_t arg = instr >> 8;
int32_t longofs = arg;
- if (op != OP_STARTMSG && op != OP_SETDISPATCH) {
+ if (op != OP_SETDISPATCH) {
+ // Skipped for SETDISPATCH because it defines its own asmlabel for the
+ // dispatch code it emits.
asmlabel(jc, "0x%lx.%s", pcofs(jc), upb_pbdecoder_getopname(op));
+
+ // Skipped for SETDISPATCH because it should point at the function
+ // prologue, not the dispatch function that is emitted first.
+ // TODO: optimize this to only define pclabels that are actually used.
+ |=>define_pclabel(jc, jc->pc):
}
- // TODO: optimize this to only define pclabels that are actually used.
- |=>define_pclabel(jc, jc->pc):
+
jc->pc++;
switch (op) {
case OP_STARTMSG: {
- // This opcode serves as a function prolouge also.
- const char *msgname = upb_msgdef_fullname(method->schema_);
- asmlabel(jc, "0x%lx.parse.%s", pcofs(jc), msgname);
- |=>define_pclabel(jc, method):
- | sub rsp, 8
upb_func *startmsg = gethandler(h, UPB_STARTMSG_SELECTOR);
if (startmsg) {
// bool startmsg(void *closure, const void *hd)
@@ -905,11 +896,12 @@ static void jitbytecode(jitcompiler *jc) {
| jmp <1
|2:
}
+ } else {
+ | nop
}
break;
}
case OP_ENDMSG: {
- // This opcode serves as a function epiloue also.
upb_func *endmsg = gethandler(h, UPB_ENDMSG_SELECTOR);
|9:
if (endmsg) {
@@ -919,11 +911,12 @@ static void jitbytecode(jitcompiler *jc) {
| mov ARG3_64, DECODER->status
| callp endmsg
}
- | add rsp, 8
- | ret
break;
}
case OP_SETDISPATCH: {
+ uint32_t *op_pc = jc->pc - 1;
+
+ // Load info for new method.
upb_inttable *dispatch;
memcpy(&dispatch, jc->pc, sizeof(void*));
jc->pc += sizeof(void*) / sizeof(uint32_t);
@@ -936,9 +929,18 @@ static void jitbytecode(jitcompiler *jc) {
// case instead of parsing it field by field. We should also do the skip
// in the containing message's code.
h = method->dest_handlers_;
- const char *msgname = upb_msgdef_fullname(method->schema_);
+ const char *msgname = upb_msgdef_fullname(upb_handlers_msgdef(h));
+
+ // Emit dispatch code for new method.
asmlabel(jc, "0x%lx.dispatch.%s", pcofs(jc), msgname);
jitdispatch(jc, method);
+
+ // Emit function prologue for new method.
+ asmlabel(jc, "0x%lx.parse.%s", pcofs(jc), msgname);
+ |=>define_pclabel(jc, op_pc):
+ |=>define_pclabel(jc, method):
+ | sub rsp, 8
+
break;
}
case OP_PARSE_DOUBLE:
@@ -1056,6 +1058,7 @@ static void jitbytecode(jitcompiler *jc) {
| add FRAME, sizeof(upb_pbdecoder_frame)
| cmp FRAME, DECODER->limit
| je ->err
+ | mov dword FRAME->groupnum, arg
break;
case OP_PUSHLENDELIM:
| call ->pushlendelim
@@ -1075,9 +1078,6 @@ static void jitbytecode(jitcompiler *jc) {
| mov DATAEND, DELIMEND
|1:
break;
- case OP_SETGROUPNUM:
- | mov dword FRAME->groupnum, arg
- break;
case OP_SETBIGGROUPNUM:
| mov dword FRAME->groupnum, *jc->pc++
break;
@@ -1086,11 +1086,16 @@ static void jitbytecode(jitcompiler *jc) {
| je =>pclabel(jc, jc->pc + longofs)
break;
case OP_CALL:
- | call =>pclabel(jc, jc->pc + longofs + 3)
+ | call =>pclabel(jc, jc->pc + longofs)
break;
case OP_BRANCH:
| jmp =>pclabel(jc, jc->pc + longofs);
break;
+ case OP_RET:
+ |9:
+ | add rsp, 8
+ | ret
+ break;
case OP_TAG1:
jittag(jc, (arg >> 8) & 0xff, 1, (int8_t)arg, method);
break;
@@ -1107,6 +1112,7 @@ static void jitbytecode(jitcompiler *jc) {
assert(false);
}
}
+
asmlabel(jc, "eof");
| nop
}
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index c5fae0e..9c54b8a 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -18,7 +18,6 @@
#endif
#define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
-#define CHECK_RETURN(x) { int32_t ret = x; if (ret >= 0) return ret; }
// Error messages that are shared between the bytecode and JIT decoders.
const char *kPbDecoderStackOverflow = "Nesting too deep.";
@@ -45,10 +44,10 @@ static bool consumes_input(opcode op) {
case OP_PUSHTAGDELIM:
case OP_POP:
case OP_SETDELIM:
- case OP_SETGROUPNUM:
case OP_SETBIGGROUPNUM:
case OP_CHECKDELIM:
case OP_CALL:
+ case OP_RET:
case OP_BRANCH:
return false;
default:
@@ -147,13 +146,12 @@ static void checkpoint(upb_pbdecoder *d) {
}
// Resumes the decoder from an initial state or from a previous suspend.
-void *upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
- size_t size, const upb_bufhandle *handle) {
+int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
+ size_t size, const upb_bufhandle *handle) {
UPB_UNUSED(p); // Useless; just for the benefit of the JIT.
d->buf_param = buf;
d->size_param = size;
d->handle = handle;
- d->skip = 0;
if (d->residual_end > d->residual) {
// We have residual bytes from the last buffer.
assert(ptr(d) == d->residual);
@@ -161,7 +159,11 @@ void *upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
switchtobuf(d, buf, buf + size);
}
d->checkpoint = ptr(d);
- return d; // For the JIT.
+ if (d->top->groupnum < 0) {
+ CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
+ d->checkpoint = ptr(d);
+ }
+ return DECODE_OK;
}
// Suspends the decoder at the last checkpoint, without saving any residual
@@ -176,10 +178,10 @@ size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
assert(!in_residual_buf(d, d->checkpoint));
assert(d->buf == d->buf_param);
size_t consumed = d->checkpoint - d->buf;
- d->bufstart_ofs += consumed + d->skip;
+ d->bufstart_ofs += consumed;
d->residual_end = d->residual;
switchtobuf(d, d->residual, d->residual_end);
- return consumed + d->skip;
+ return consumed;
}
}
@@ -209,11 +211,11 @@ static size_t suspend_save(upb_pbdecoder *d) {
assert(save <= sizeof(d->residual));
memcpy(d->residual, ptr(d), save);
d->residual_end = d->residual + save;
- d->bufstart_ofs = offset(d) + d->skip;
+ d->bufstart_ofs = offset(d);
}
switchtobuf(d, d->residual, d->residual_end);
- return d->size_param + d->skip;
+ return d->size_param;
}
static int32_t skip(upb_pbdecoder *d, size_t bytes) {
@@ -221,12 +223,16 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) {
if (curbufleft(d) >= bytes) {
// Skipped data is all in current buffer.
advance(d, bytes);
+ return DECODE_OK;
} else {
// Skipped data extends beyond currently available buffers.
- d->skip = bytes - curbufleft(d);
- advance(d, curbufleft(d));
+ d->pc = d->last;
+ size_t skip = bytes - curbufleft(d);
+ d->bufstart_ofs += (d->end - d->buf) + skip;
+ d->residual_end = d->residual;
+ switchtobuf(d, d->residual, d->residual_end);
+ return d->size_param + skip;
}
- return DECODE_OK;
}
FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) {
@@ -247,8 +253,8 @@ static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
if (curbufleft(d) >= bytes) {
consumebytes(d, buf + avail, bytes);
return DECODE_OK;
- } else if (d->data_end - d->buf == d->top->end_ofs - d->bufstart_ofs) {
- seterr(d, "Submessage ended in the middle of a value");
+ } else if (d->data_end == d->delim_end) {
+ seterr(d, "Submessage ended in the middle of a value or group");
return upb_pbdecoder_suspend(d);
} else {
return suspend_save(d);
@@ -378,11 +384,24 @@ static bool push(upb_pbdecoder *d, uint64_t end) {
fr++;
fr->end_ofs = end;
fr->dispatch = NULL;
- fr->groupnum = -1;
+ fr->groupnum = 0;
d->top = fr;
return true;
}
+static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
+ // While we expect to see an "end" tag (either ENDGROUP or a non-sequence
+ // field number) prior to hitting any enclosing submessage end, pushing our
+ // existing delim end prevents us from continuing to parse values from a
+ // corrupt proto that doesn't give us an END tag in time.
+ if (!push(d, d->top->end_ofs))
+ return false;
+ d->top->groupnum = arg;
+ return true;
+}
+
+static void pop(upb_pbdecoder *d) { d->top--; }
+
NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
uint64_t expected) {
uint64_t data = 0;
@@ -400,46 +419,103 @@ NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
}
}
-int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, uint32_t fieldnum,
+int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
uint8_t wire_type) {
- if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER) {
- seterr(d, "Invalid field number");
- return upb_pbdecoder_suspend(d);
- }
-
- if (wire_type == UPB_WIRE_TYPE_END_GROUP) {
- if (fieldnum != d->top->groupnum) {
- seterr(d, "Unmatched ENDGROUP tag.");
+ if (fieldnum >= 0)
+ goto have_tag;
+
+ while (true) {
+ uint32_t tag;
+ CHECK_RETURN(decode_v32(d, &tag));
+ wire_type = tag & 0x7;
+ fieldnum = tag >> 3;
+
+have_tag:
+ if (fieldnum == 0) {
+ seterr(d, "Saw invalid field number (0)");
return upb_pbdecoder_suspend(d);
}
- return DECODE_ENDGROUP;
- }
- // TODO: deliver to unknown field callback.
- switch (wire_type) {
- case UPB_WIRE_TYPE_VARINT: {
- uint64_t u64;
- return decode_varint(d, &u64);
+ // TODO: deliver to unknown field callback.
+ switch (wire_type) {
+ case UPB_WIRE_TYPE_32BIT:
+ CHECK_RETURN(skip(d, 4));
+ break;
+ case UPB_WIRE_TYPE_64BIT:
+ CHECK_RETURN(skip(d, 8));
+ break;
+ case UPB_WIRE_TYPE_VARINT: {
+ uint64_t u64;
+ CHECK_RETURN(decode_varint(d, &u64));
+ break;
+ }
+ case UPB_WIRE_TYPE_DELIMITED: {
+ uint32_t len;
+ CHECK_RETURN(decode_v32(d, &len));
+ CHECK_RETURN(skip(d, len));
+ break;
+ }
+ case UPB_WIRE_TYPE_START_GROUP:
+ CHECK_SUSPEND(pushtagdelim(d, -fieldnum));
+ break;
+ case UPB_WIRE_TYPE_END_GROUP:
+ if (fieldnum == -d->top->groupnum) {
+ pop(d);
+ } else if (fieldnum == d->top->groupnum) {
+ return DECODE_ENDGROUP;
+ } else {
+ seterr(d, "Unmatched ENDGROUP tag.");
+ return upb_pbdecoder_suspend(d);
+ }
+ break;
+ default:
+ seterr(d, "Invalid wire type");
+ return upb_pbdecoder_suspend(d);
}
- case UPB_WIRE_TYPE_32BIT:
- return skip(d, 4);
- case UPB_WIRE_TYPE_64BIT:
- return skip(d, 8);
- case UPB_WIRE_TYPE_DELIMITED: {
- uint32_t len;
- CHECK_RETURN(decode_v32(d, &len));
- return skip(d, len);
+
+ if (d->top->groupnum >= 0) {
+ return DECODE_OK;
}
- case UPB_WIRE_TYPE_START_GROUP:
- seterr(d, "Can't handle unknown groups yet");
- return upb_pbdecoder_suspend(d);
- case UPB_WIRE_TYPE_END_GROUP:
- default:
- seterr(d, "Invalid wire type");
+
+ if (ptr(d) == d->delim_end) {
+ seterr(d, "Enclosing submessage ended in the middle of value or group");
+ // Unlike most errors we notice during parsing, right now we have consumed
+ // all of the user's input.
+ //
+ // There are three different options for how to handle this case:
+ //
+ // 1. decode() = short count, error = set
+ // 2. decode() = full count, error = set
+ // 3. decode() = full count, error NOT set, short count and error will
+ // be reported on next call to decode() (or end())
+ //
+ // (1) and (3) have the advantage that they preserve the invariant that an
+ // error occurs iff decode() returns a short count.
+ //
+ // (2) and (3) have the advantage of reflecting the fact that all of the
+ // bytes were in fact parsed (and possibly delivered to the unknown field
+ // handler, in the future when that is supported).
+ //
+ // (3) requires extra state in the decode (a place to store the "permanent
+ // error" that we should return for all subsequent attempts to decode).
+ // But we likely want this anyway.
+ //
+ // Right now we do (1), thanks to the fact that we checkpoint *after* this
+ // check. (3) may be a better choice long term; unclear at the moment.
return upb_pbdecoder_suspend(d);
+ }
+
+ checkpoint(d);
}
}
+static void goto_endmsg(upb_pbdecoder *d) {
+ upb_value v;
+ bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
+ UPB_ASSERT_VAR(found, found);
+ d->pc = d->top->base + upb_value_getuint64(v);
+}
+
static int32_t dispatch(upb_pbdecoder *d) {
upb_inttable *dispatch = d->top->dispatch;
@@ -470,7 +546,7 @@ static int32_t dispatch(upb_pbdecoder *d) {
int32_t ret = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
if (ret == DECODE_ENDGROUP) {
- d->pc = d->top->base - 1; // Back to OP_ENDMSG.
+ goto_endmsg(d);
return DECODE_OK;
} else {
d->pc = d->last - 1; // Rewind to CHECKDELIM.
@@ -493,7 +569,11 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
upb_pbdecoder *d = closure;
const mgroup *group = hd;
assert(buf);
- upb_pbdecoder_resume(d, NULL, buf, size, handle);
+ int32_t result = upb_pbdecoder_resume(d, NULL, buf, size, handle);
+ if (result == DECODE_ENDGROUP) {
+ goto_endmsg(d);
+ }
+ CHECK_RETURN(result);
UPB_UNUSED(group);
#define VMCASE(op, code) \
@@ -552,8 +632,6 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
)
VMCASE(OP_ENDMSG,
CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status));
- assert(d->call_len > 0);
- d->pc = d->callstack[--d->call_len];
)
VMCASE(OP_STARTSEQ,
upb_pbdecoder_frame *outer = outer_frame(d);
@@ -579,25 +657,39 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
)
VMCASE(OP_STRING,
uint32_t len = curbufleft(d);
- CHECK_SUSPEND(
- upb_sink_putstring(&d->top->sink, arg, ptr(d), len, handle));
- advance(d, len);
- if (d->delim_end == NULL) { // String extends beyond this buf?
- d->pc--;
- d->bufstart_ofs += size;
- d->residual_end = d->residual;
- return size;
+ size_t n = upb_sink_putstring(&d->top->sink, arg, ptr(d), len, handle);
+ if (n > len) {
+ if (n > d->top->end_ofs - offset(d)) {
+ seterr(d, "Tried to skip past end of string.");
+ return upb_pbdecoder_suspend(d);
+ } else {
+ return skip(d, n);
+ }
+ } else if (n < len) {
+ advance(d, n);
+ return upb_pbdecoder_suspend(d);
+ } else {
+ advance(d, n);
+ if (d->delim_end == NULL) { // String extends beyond this buf?
+ d->pc--; // Do OP_STRING again when we resume.
+ d->bufstart_ofs += size;
+ d->residual_end = d->residual;
+ return size;
+ }
}
)
VMCASE(OP_ENDSTR,
CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg));
)
VMCASE(OP_PUSHTAGDELIM,
- CHECK_SUSPEND(push(d, d->top->end_ofs));
+ CHECK_SUSPEND(pushtagdelim(d, arg));
+ )
+ VMCASE(OP_SETBIGGROUPNUM,
+ d->top->groupnum = *d->pc++;
)
VMCASE(OP_POP,
assert(d->top > d->stack);
- d->top--;
+ pop(d);
)
VMCASE(OP_PUSHLENDELIM,
uint32_t len;
@@ -608,13 +700,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
VMCASE(OP_SETDELIM,
set_delim_end(d);
)
- VMCASE(OP_SETGROUPNUM,
- d->top->groupnum = arg;
- )
- VMCASE(OP_SETBIGGROUPNUM,
- d->top->groupnum = *d->pc++;
- )
VMCASE(OP_CHECKDELIM,
+ // We are guaranteed of this assert because we never allow ourselves to
+ // consume bytes beyond data_end, which covers delim_end when non-NULL.
assert(!(d->delim_end && ptr(d) > d->delim_end));
if (ptr(d) == d->delim_end)
d->pc += longofs;
@@ -623,6 +711,10 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
d->callstack[d->call_len++] = d->pc;
d->pc += longofs;
)
+ VMCASE(OP_RET,
+ assert(d->call_len > 0);
+ d->pc = d->callstack[--d->call_len];
+ )
VMCASE(OP_BRANCH,
d->pc += longofs;
)
@@ -755,6 +847,7 @@ void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *m,
void upb_pbdecoder_reset(upb_pbdecoder *d) {
d->top = d->stack;
d->top->end_ofs = UINT64_MAX;
+ d->top->groupnum = 0;
d->bufstart_ofs = 0;
d->ptr = d->residual;
d->buf = d->residual;
diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h
index 4313bb3..586d2d5 100644
--- a/upb/pb/decoder.h
+++ b/upb/pb/decoder.h
@@ -20,19 +20,23 @@ namespace pb {
class CodeCache;
class Decoder;
class DecoderMethod;
+class DecoderMethodOptions;
} // namespace pb
} // namespace upb
typedef upb::pb::CodeCache upb_pbcodecache;
typedef upb::pb::Decoder upb_pbdecoder;
typedef upb::pb::DecoderMethod upb_pbdecodermethod;
+typedef upb::pb::DecoderMethodOptions upb_pbdecodermethodopts;
#else
struct upb_pbdecoder;
struct upb_pbdecodermethod;
+struct upb_pbdecodermethodopts;
struct upb_pbcodecache;
typedef struct upb_pbdecoder upb_pbdecoder;
typedef struct upb_pbdecodermethod upb_pbdecodermethod;
+typedef struct upb_pbdecodermethodopts upb_pbdecodermethodopts;
typedef struct upb_pbcodecache upb_pbcodecache;
#endif
@@ -72,14 +76,40 @@ typedef struct {
// that calls from the JIT into C work correctly.
uint64_t end_ofs;
const uint32_t *base;
- uint32_t groupnum;
+
+ // 0 indicates a length-delimited field.
+ // A positive number indicates a known group.
+ // A negative number indicates an unknown group.
+ int32_t groupnum;
upb_inttable *dispatch; // Not used by the JIT.
} upb_pbdecoder_frame;
#ifdef __cplusplus
-// Represents the code to parse a protobuf according to a specific schema,
-// optionally bound to a set of destination handlers.
+// The parameters one uses to construct a DecoderMethod.
+// TODO(haberman): move allowjit here? Seems more convenient for users.
+class upb::pb::DecoderMethodOptions {
+ public:
+ // Parameter represents the destination handlers that this method will push
+ // to.
+ explicit DecoderMethodOptions(const Handlers* dest_handlers);
+
+ // Should the decoder push submessages to lazy handlers for fields that have
+ // them? The caller should set this iff the lazy handlers expect data that is
+ // in protobuf binary format and the caller wishes to lazy parse it.
+ void set_lazy(bool lazy);
+
+ private:
+#else
+struct upb_pbdecodermethodopts {
+#endif
+ const upb_handlers *handlers;
+ bool lazy;
+};
+
+#ifdef __cplusplus
+
+// Represents the code to parse a protobuf according to a destination Handlers.
class upb::pb::DecoderMethod /* : public upb::RefCounted */ {
public:
// From upb::ReferenceCounted.
@@ -88,14 +118,9 @@ class upb::pb::DecoderMethod /* : public upb::RefCounted */ {
void DonateRef(const void* from, const void* to) const;
void CheckRef(const void* owner) const;
- // The schema that this method parses. Never NULL.
- const MessageDef* schema() const;
-
// The destination handlers that are statically bound to this method.
// This method is only capable of outputting to a sink that uses these
// handlers.
- //
- // Will be NULL if this method is not statically bound.
const Handlers* dest_handlers() const;
// The input handlers for this decoder method.
@@ -106,8 +131,7 @@ class upb::pb::DecoderMethod /* : public upb::RefCounted */ {
// Convenience method for generating a DecoderMethod without explicitly
// creating a CodeCache.
- static reffed_ptr<const DecoderMethod> NewForDestHandlers(
- const upb::Handlers *dest);
+ static reffed_ptr<const DecoderMethod> New(const DecoderMethodOptions& opts);
private:
UPB_DISALLOW_POD_OPS(DecoderMethod, upb::pb::DecoderMethod);
@@ -138,13 +162,7 @@ struct upb_pbdecodermethod {
// The handler one calls to invoke this method.
upb_byteshandler input_handler_;
- // The message type that this method is parsing.
- const upb_msgdef *schema_;
-
- // The destination handlers this method is bound to, or NULL if this method
- // can be bound to a destination handlers instance at runtime.
- //
- // If non-NULL, we own a ref.
+ // The destination handlers this method is bound to. We own a ref.
const upb_handlers *dest_handlers_;
// The dispatch table layout is:
@@ -183,8 +201,7 @@ class upb::pb::Decoder {
void Reset();
// Resets the output sink of the Decoder.
- // The given sink must match method()->schema() as well as
- // method()->dest_handlers() if the latter is non-NULL.
+ // The given sink must match method()->dest_handlers().
//
// This must be called at least once before the decoder can be used. It may
// only be called with the decoder is in a state where it was just created or
@@ -221,9 +238,6 @@ struct upb_pbdecoder {
// Overall stream offset of "buf."
uint64_t bufstart_ofs;
- // How many bytes past the end of the user buffer we want to skip.
- size_t skip;
-
// Buffer for residual bytes not parsed from the previous buffer.
// The maximum number of residual bytes we require is 12; a five-byte
// unknown tag plus an eight-byte value, less one because the value
@@ -290,8 +304,7 @@ class upb::pb::CodeCache {
// more efficient decoding. However the returned method may or may not
// actually be statically bound. But in all cases, the returned method can
// push data to the given handlers.
- const DecoderMethod *GetDecoderMethodForDestHandlers(
- const upb::Handlers *handlers);
+ const DecoderMethod *GetDecoderMethod(const DecoderMethodOptions& opts);
// If/when someone needs to explicitly create a dynamically-bound
// DecoderMethod*, we can add a method to get it here.
@@ -320,27 +333,30 @@ const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d);
bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink *sink);
upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d);
+void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
+ const upb_handlers *h);
+void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy);
+
void upb_pbdecodermethod_ref(const upb_pbdecodermethod *m, const void *owner);
void upb_pbdecodermethod_unref(const upb_pbdecodermethod *m, const void *owner);
void upb_pbdecodermethod_donateref(const upb_pbdecodermethod *m,
const void *from, const void *to);
void upb_pbdecodermethod_checkref(const upb_pbdecodermethod *m,
const void *owner);
-const upb_msgdef *upb_pbdecodermethod_schema(const upb_pbdecodermethod *m);
const upb_handlers *upb_pbdecodermethod_desthandlers(
const upb_pbdecodermethod *m);
const upb_byteshandler *upb_pbdecodermethod_inputhandler(
const upb_pbdecodermethod *m);
bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m);
-const upb_pbdecodermethod *upb_pbdecodermethod_newfordesthandlers(
- const upb_handlers *dest, const void *owner);
+const upb_pbdecodermethod *upb_pbdecodermethod_new(
+ const upb_pbdecodermethodopts *opts, const void *owner);
void upb_pbcodecache_init(upb_pbcodecache *c);
void upb_pbcodecache_uninit(upb_pbcodecache *c);
bool upb_pbcodecache_allowjit(const upb_pbcodecache *c);
bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow);
-const upb_pbdecodermethod *upb_pbcodecache_getdecodermethodfordesthandlers(
- upb_pbcodecache *c, const upb_handlers *handlers);
+const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
+ upb_pbcodecache *c, const upb_pbdecodermethodopts *opts);
#ifdef __cplusplus
} /* extern "C" */
@@ -391,6 +407,13 @@ inline BytesSink* Decoder::input() {
return upb_pbdecoder_input(this);
}
+inline DecoderMethodOptions::DecoderMethodOptions(const Handlers* h) {
+ upb_pbdecodermethodopts_init(this, h);
+}
+inline void DecoderMethodOptions::set_lazy(bool lazy) {
+ upb_pbdecodermethodopts_setlazy(this, lazy);
+}
+
inline void DecoderMethod::Ref(const void *owner) const {
upb_pbdecodermethod_ref(this, owner);
}
@@ -403,9 +426,6 @@ inline void DecoderMethod::DonateRef(const void *from, const void *to) const {
inline void DecoderMethod::CheckRef(const void *owner) const {
upb_pbdecodermethod_checkref(this, owner);
}
-inline const MessageDef* DecoderMethod::schema() const {
- return upb_pbdecodermethod_schema(this);
-}
inline const Handlers* DecoderMethod::dest_handlers() const {
return upb_pbdecodermethod_desthandlers(this);
}
@@ -416,10 +436,9 @@ inline bool DecoderMethod::is_native() const {
return upb_pbdecodermethod_isnative(this);
}
// static
-inline reffed_ptr<const DecoderMethod> DecoderMethod::NewForDestHandlers(
- const Handlers *dest) {
- const upb_pbdecodermethod *m =
- upb_pbdecodermethod_newfordesthandlers(dest, &m);
+inline reffed_ptr<const DecoderMethod> DecoderMethod::New(
+ const DecoderMethodOptions &opts) {
+ const upb_pbdecodermethod *m = upb_pbdecodermethod_new(&opts, &m);
return reffed_ptr<const DecoderMethod>(m, &m);
}
@@ -435,9 +454,9 @@ inline bool CodeCache::allow_jit() const {
inline bool CodeCache::set_allow_jit(bool allow) {
return upb_pbcodecache_setallowjit(this, allow);
}
-inline const DecoderMethod* CodeCache::GetDecoderMethodForDestHandlers(
- const upb::Handlers* handlers) {
- return upb_pbcodecache_getdecodermethodfordesthandlers(this, handlers);
+inline const DecoderMethod *CodeCache::GetDecoderMethod(
+ const DecoderMethodOptions& opts) {
+ return upb_pbcodecache_getdecodermethod(this, &opts);
}
} // namespace pb
diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h
index 20afa68..11aa133 100644
--- a/upb/pb/decoder.int.h
+++ b/upb/pb/decoder.int.h
@@ -40,12 +40,10 @@ typedef enum {
OP_PUSHLENDELIM = 24, // No arg.
OP_POP = 25, // No arg.
OP_SETDELIM = 26, // No arg.
- OP_SETGROUPNUM = 27,
- OP_SETBIGGROUPNUM = 28, // two words: | unused (24) | opc || groupnum (32) |
-
- // The arg for these opcodes is a local label reference.
- OP_CHECKDELIM = 29,
- OP_CALL = 30,
+ OP_SETBIGGROUPNUM = 27, // two words: | unused (24) | opc || groupnum (32) |
+ OP_CHECKDELIM = 28,
+ OP_CALL = 29,
+ OP_RET = 30,
OP_BRANCH = 31,
// Different opcodes depending on how many bytes expected.
@@ -112,10 +110,10 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
bool upb_pbdecoder_end(void *closure, const void *handler_data);
// Decoder-internal functions that the JIT calls to handle fallback paths.
-void *upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
- size_t size, const upb_bufhandle *handle);
+int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
+ size_t size, const upb_bufhandle *handle);
size_t upb_pbdecoder_suspend(upb_pbdecoder *d);
-int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, uint32_t fieldnum,
+int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
uint8_t wire_type);
int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, uint64_t expected);
int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, uint64_t *u64);
@@ -137,8 +135,21 @@ void upb_pbdecoder_freejit(mgroup *group);
// wherever that takes you."
#define LABEL_DISPATCH 0
+// A special slot in the dispatch table that stores the epilogue (ENDMSG and/or
+// RET) for branching to when we find an appropriate ENDGROUP tag.
+#define DISPATCH_ENDMSG 0
+
+// All of the functions in decoder.c that return int32_t return values according
+// to the following scheme:
+// 1. negative values indicate a return code from the following list.
+// 2. positive values indicate that error or end of buffer was hit, and
+// that the decode function should immediately return the given value
+// (the decoder state has already been suspended and is ready to be
+// resumed).
#define DECODE_OK -1
#define DECODE_MISMATCH -2 // Used only from checktag_slow().
-#define DECODE_ENDGROUP -2 // Used only from checkunknown().
+#define DECODE_ENDGROUP -3 // Used only from checkunknown().
+
+#define CHECK_RETURN(x) { int32_t ret = x; if (ret >= 0) return ret; }
#endif // UPB_DECODER_INT_H_
diff --git a/upb/pb/glue.c b/upb/pb/glue.c
index 73ef145..fde2dd1 100644
--- a/upb/pb/glue.c
+++ b/upb/pb/glue.c
@@ -17,8 +17,10 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
void *owner, upb_status *status) {
// Create handlers.
const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h);
+ upb_pbdecodermethodopts opts;
+ upb_pbdecodermethodopts_init(&opts, reader_h);
const upb_pbdecodermethod *decoder_m =
- upb_pbdecodermethod_newfordesthandlers(reader_h, &decoder_m);
+ upb_pbdecodermethod_new(&opts, &decoder_m);
upb_pbdecoder decoder;
upb_descreader reader;
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback