diff options
Diffstat (limited to 'upb/pb')
-rw-r--r-- | upb/pb/compile_decoder.c | 418 | ||||
-rw-r--r-- | upb/pb/compile_decoder_x64.c | 2 | ||||
-rw-r--r-- | upb/pb/compile_decoder_x64.dasc | 78 | ||||
-rw-r--r-- | upb/pb/decoder.c | 225 | ||||
-rw-r--r-- | upb/pb/decoder.h | 97 | ||||
-rw-r--r-- | upb/pb/decoder.int.h | 31 | ||||
-rw-r--r-- | upb/pb/glue.c | 4 |
7 files changed, 495 insertions, 360 deletions
diff --git a/upb/pb/compile_decoder.c b/upb/pb/compile_decoder.c index f96f07a..400d6fa 100644 --- a/upb/pb/compile_decoder.c +++ b/upb/pb/compile_decoder.c @@ -19,13 +19,6 @@ #define MAXLABEL 5 #define EMPTYLABEL -1 -static const void *methodkey(const upb_msgdef *md, const upb_handlers *h) { - const void *ret = h ? (const void*)h : (const void*)md; - assert(ret); - return ret; -} - - /* mgroup *********************************************************************/ static void freegroup(upb_refcounted *r) { @@ -80,10 +73,8 @@ static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit, visit(r, m->group, closure); } -static upb_pbdecodermethod *newmethod(const upb_msgdef *msg, - const upb_handlers *dest_handlers, - mgroup *group, - const void *key) { +static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers, + mgroup *group) { static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod}; upb_pbdecodermethod *ret = malloc(sizeof(*ret)); upb_refcounted_init(UPB_UPCAST(ret), &vtbl, &ret); @@ -92,11 +83,10 @@ static upb_pbdecodermethod *newmethod(const upb_msgdef *msg, // The method references the group and vice-versa, in a circular reference. upb_ref2(ret, group); upb_ref2(group, ret); - upb_inttable_insertptr(&group->methods, key, upb_value_ptr(ret)); // Owns ref + upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret)); upb_refcounted_unref(UPB_UPCAST(ret), &ret); ret->group = UPB_UPCAST(group); - ret->schema_ = msg; ret->dest_handlers_ = dest_handlers; ret->is_native_ = false; // If we JIT, it will update this later. upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64); @@ -126,10 +116,6 @@ void upb_pbdecodermethod_checkref(const upb_pbdecodermethod *m, upb_refcounted_checkref(UPB_UPCAST(m), owner); } -const upb_msgdef *upb_pbdecodermethod_schema(const upb_pbdecodermethod *m) { - return m->schema_; -} - const upb_handlers *upb_pbdecodermethod_desthandlers( const upb_pbdecodermethod *m) { return m->dest_handlers_; @@ -144,12 +130,12 @@ bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) { return m->is_native_; } -const upb_pbdecodermethod *upb_pbdecodermethod_newfordesthandlers( - const upb_handlers *dest, const void *owner) { +const upb_pbdecodermethod *upb_pbdecodermethod_new( + const upb_pbdecodermethodopts *opts, const void *owner) { upb_pbcodecache cache; upb_pbcodecache_init(&cache); const upb_pbdecodermethod *ret = - upb_pbcodecache_getdecodermethodfordesthandlers(&cache, dest); + upb_pbcodecache_getdecodermethod(&cache, opts); upb_pbdecodermethod_ref(ret, owner); upb_pbcodecache_uninit(&cache); return ret; @@ -165,11 +151,15 @@ typedef struct { uint32_t *pc; int fwd_labels[MAXLABEL]; int back_labels[MAXLABEL]; + + // For fields marked "lazy", parse them lazily or eagerly? + bool lazy; } compiler; -static compiler *newcompiler(mgroup *group) { +static compiler *newcompiler(mgroup *group, bool lazy) { compiler *ret = malloc(sizeof(*ret)); ret->group = group; + ret->lazy = lazy; for (int i = 0; i < MAXLABEL; i++) { ret->fwd_labels[i] = EMPTYLABEL; ret->back_labels[i] = EMPTYLABEL; @@ -300,11 +290,11 @@ static void putop(compiler *c, opcode op, ...) { } case OP_STARTMSG: case OP_ENDMSG: - case OP_PUSHTAGDELIM: case OP_PUSHLENDELIM: case OP_POP: case OP_SETDELIM: case OP_HALT: + case OP_RET: put32(c, op); break; case OP_PARSE_DOUBLE: @@ -321,13 +311,13 @@ static void putop(compiler *c, opcode op, ...) { case OP_PARSE_SINT32: case OP_PARSE_SINT64: case OP_STARTSEQ: - case OP_SETGROUPNUM: case OP_ENDSEQ: case OP_STARTSUBMSG: case OP_ENDSUBMSG: case OP_STARTSTR: case OP_STRING: case OP_ENDSTR: + case OP_PUSHTAGDELIM: put32(c, op | va_arg(ap, upb_selector_t) << 8); break; case OP_SETBIGGROUPNUM: @@ -382,10 +372,10 @@ const char *upb_pbdecoder_getopname(unsigned int op) { T(DOUBLE), T(FLOAT), T(INT64), T(UINT64), T(INT32), T(FIXED64), T(FIXED32), T(BOOL), T(UINT32), T(SFIXED32), T(SFIXED64), T(SINT32), T(SINT64), OP(STARTMSG), OP(ENDMSG), OP(STARTSEQ), OP(ENDSEQ), OP(STARTSUBMSG), - OP(ENDSUBMSG), OP(STARTSTR), OP(STRING), OP(ENDSTR), OP(CALL), + OP(ENDSUBMSG), OP(STARTSTR), OP(STRING), OP(ENDSTR), OP(CALL), OP(RET), OP(PUSHLENDELIM), OP(PUSHTAGDELIM), OP(SETDELIM), OP(CHECKDELIM), OP(BRANCH), OP(TAG1), OP(TAG2), OP(TAGN), OP(SETDISPATCH), OP(POP), - OP(SETGROUPNUM), OP(SETBIGGROUPNUM), OP(HALT), + OP(SETBIGGROUPNUM), OP(HALT), }; return op > OP_HALT ? names[0] : names[op]; #undef OP @@ -413,16 +403,17 @@ static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) { const upb_pbdecodermethod *method = (void *)((char *)dispatch - offsetof(upb_pbdecodermethod, dispatch)); - fprintf(f, " %s", upb_msgdef_fullname(method->schema_)); + fprintf(f, " %s", upb_msgdef_fullname( + upb_handlers_msgdef(method->dest_handlers_))); break; } case OP_STARTMSG: case OP_ENDMSG: case OP_PUSHLENDELIM: - case OP_PUSHTAGDELIM: case OP_POP: case OP_SETDELIM: case OP_HALT: + case OP_RET: break; case OP_PARSE_DOUBLE: case OP_PARSE_FLOAT: @@ -444,7 +435,7 @@ static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) { case OP_STARTSTR: case OP_STRING: case OP_ENDSTR: - case OP_SETGROUPNUM: + case OP_PUSHTAGDELIM: fprintf(f, " %d", instr >> 8); break; case OP_SETBIGGROUPNUM: @@ -537,11 +528,11 @@ static void putpush(compiler *c, const upb_fielddef *f) { putop(c, OP_PUSHLENDELIM); } else { uint32_t fn = upb_fielddef_number(f); - putop(c, OP_PUSHTAGDELIM); if (fn >= 1 << 24) { + putop(c, OP_PUSHTAGDELIM, 0); putop(c, OP_SETBIGGROUPNUM, fn); } else { - putop(c, OP_SETGROUPNUM, fn); + putop(c, OP_PUSHTAGDELIM, fn); } } } @@ -549,13 +540,35 @@ static void putpush(compiler *c, const upb_fielddef *f) { static upb_pbdecodermethod *find_submethod(const compiler *c, const upb_pbdecodermethod *method, const upb_fielddef *f) { - const upb_handlers *sub = method->dest_handlers_ ? - upb_handlers_getsubhandlers(method->dest_handlers_, f) : NULL; - const void *key = methodkey(upb_downcast_msgdef(upb_fielddef_subdef(f)), sub); + const upb_handlers *sub = + upb_handlers_getsubhandlers(method->dest_handlers_, f); upb_value v; - bool ok = upb_inttable_lookupptr(&c->group->methods, key, &v); - UPB_ASSERT_VAR(ok, ok); - return upb_value_getptr(v); + return upb_inttable_lookupptr(&c->group->methods, sub, &v) + ? upb_value_getptr(v) + : NULL; +} + +static void putsel(compiler *c, opcode op, upb_selector_t sel, + const upb_handlers *h) { + if (upb_handlers_gethandler(h, sel)) { + putop(c, op, sel); + } +} + +// Puts an opcode to call a callback, but only if a callback actually exists for +// this field and handler type. +static void putcb(compiler *c, opcode op, const upb_handlers *h, + const upb_fielddef *f, upb_handlertype_t type) { + putsel(c, op, getsel(f, type), h); +} + +static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) { + if (!upb_fielddef_lazy(f)) + return false; + + return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR)) || + upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING)) || + upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR)); } // Adds bytecode for parsing the given message to the given decoderplan, @@ -596,177 +609,178 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) { upb_inttable_uninit(&method->dispatch); upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64); + const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); + const upb_msgdef *md = upb_handlers_msgdef(h); + method->code_base.ofs = pcofs(c); putop(c, OP_SETDISPATCH, &method->dispatch); - putop(c, OP_STARTMSG); + putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h); label(c, LABEL_FIELD); upb_msg_iter i; - for(upb_msg_begin(&i, method->schema_); !upb_msg_done(&i); upb_msg_next(&i)) { + for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); - upb_descriptortype_t type = upb_fielddef_descriptortype(f); + upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f); + upb_fieldtype_t type = upb_fielddef_type(f); // From a decoding perspective, ENUM is the same as INT32. - if (type == UPB_DESCRIPTOR_TYPE_ENUM) - type = UPB_DESCRIPTOR_TYPE_INT32; - - label(c, LABEL_FIELD); - - switch (upb_fielddef_type(f)) { - case UPB_TYPE_MESSAGE: { - const upb_pbdecodermethod *sub_m = find_submethod(c, method, f); - int wire_type = (type == UPB_DESCRIPTOR_TYPE_MESSAGE) ? - UPB_WIRE_TYPE_DELIMITED : UPB_WIRE_TYPE_START_GROUP; - if (upb_fielddef_isseq(f)) { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, wire_type, LABEL_DISPATCH); - dispatchtarget(c, method, f, wire_type); - putop(c, OP_PUSHTAGDELIM); - putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); - label(c, LABEL_LOOPSTART); - putpush(c, f); - putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); - putop(c, OP_CALL, sub_m); - putop(c, OP_POP); - putop(c, OP_ENDSUBMSG, getsel(f, UPB_HANDLER_ENDSUBMSG)); - if (wire_type == UPB_WIRE_TYPE_DELIMITED) { - putop(c, OP_SETDELIM); - } - putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); - putchecktag(c, f, wire_type, LABEL_LOOPBREAK); - putop(c, OP_BRANCH, -LABEL_LOOPSTART); - label(c, LABEL_LOOPBREAK); - putop(c, OP_POP); - putop(c, OP_ENDSEQ, getsel(f, UPB_HANDLER_ENDSEQ)); - } else { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, wire_type, LABEL_DISPATCH); - dispatchtarget(c, method, f, wire_type); - putpush(c, f); - putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); - putop(c, OP_CALL, sub_m); - putop(c, OP_POP); - putop(c, OP_ENDSUBMSG, getsel(f, UPB_HANDLER_ENDSUBMSG)); - if (wire_type == UPB_WIRE_TYPE_DELIMITED) { - putop(c, OP_SETDELIM); - } - } - break; + if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM) + descriptor_type = UPB_DESCRIPTOR_TYPE_INT32; + + if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) { + const upb_pbdecodermethod *sub_m = find_submethod(c, method, f); + if (!sub_m) { + // Don't emit any code for this field at all; it will be parsed as an + // unknown field. + continue; } - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - if (upb_fielddef_isseq(f)) { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); - dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); - putop(c, OP_PUSHTAGDELIM); - putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); - label(c, LABEL_LOOPSTART); - putop(c, OP_PUSHLENDELIM); - putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); - putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); - putop(c, OP_POP); - putop(c, OP_ENDSTR, getsel(f, UPB_HANDLER_ENDSTR)); - putop(c, OP_SETDELIM); - putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); - putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK); - putop(c, OP_BRANCH, -LABEL_LOOPSTART); - label(c, LABEL_LOOPBREAK); - putop(c, OP_POP); - putop(c, OP_ENDSEQ, getsel(f, UPB_HANDLER_ENDSEQ)); - } else { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); - dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); - putop(c, OP_PUSHLENDELIM); - putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); - putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); - putop(c, OP_POP); - putop(c, OP_ENDSTR, getsel(f, UPB_HANDLER_ENDSTR)); + + label(c, LABEL_FIELD); + + int wire_type = (descriptor_type == UPB_DESCRIPTOR_TYPE_MESSAGE) + ? UPB_WIRE_TYPE_DELIMITED + : UPB_WIRE_TYPE_START_GROUP; + if (upb_fielddef_isseq(f)) { + putop(c, OP_CHECKDELIM, LABEL_ENDMSG); + putchecktag(c, f, wire_type, LABEL_DISPATCH); + dispatchtarget(c, method, f, wire_type); + putop(c, OP_PUSHTAGDELIM, 0); + putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); + label(c, LABEL_LOOPSTART); + putpush(c, f); + putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); + putop(c, OP_CALL, sub_m); + putop(c, OP_POP); + putcb(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG); + if (wire_type == UPB_WIRE_TYPE_DELIMITED) { putop(c, OP_SETDELIM); } - break; - default: { - opcode parse_type = (opcode)type; - assert((int)parse_type >= 0 && parse_type <= OP_MAX); - upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); - int wire_type = native_wire_types[upb_fielddef_descriptortype(f)]; - if (upb_fielddef_isseq(f)) { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); - dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); - putop(c, OP_PUSHLENDELIM); - putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Packed - label(c, LABEL_LOOPSTART); - putop(c, parse_type, sel); - putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); - putop(c, OP_BRANCH, -LABEL_LOOPSTART); - dispatchtarget(c, method, f, wire_type); - putop(c, OP_PUSHTAGDELIM); - putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Non-packed - label(c, LABEL_LOOPSTART); - putop(c, parse_type, sel); - putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); - putchecktag(c, f, wire_type, LABEL_LOOPBREAK); - putop(c, OP_BRANCH, -LABEL_LOOPSTART); - label(c, LABEL_LOOPBREAK); - putop(c, OP_POP); // Packed and non-packed join. - putop(c, OP_ENDSEQ, getsel(f, UPB_HANDLER_ENDSEQ)); - putop(c, OP_SETDELIM); // Could remove for non-packed by dup ENDSEQ. - } else { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, wire_type, LABEL_DISPATCH); - dispatchtarget(c, method, f, wire_type); - putop(c, parse_type, sel); + putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); + putchecktag(c, f, wire_type, LABEL_LOOPBREAK); + putop(c, OP_BRANCH, -LABEL_LOOPSTART); + label(c, LABEL_LOOPBREAK); + putop(c, OP_POP); + putcb(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); + } else { + putop(c, OP_CHECKDELIM, LABEL_ENDMSG); + putchecktag(c, f, wire_type, LABEL_DISPATCH); + dispatchtarget(c, method, f, wire_type); + putpush(c, f); + putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); + putop(c, OP_CALL, sub_m); + putop(c, OP_POP); + putcb(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG); + if (wire_type == UPB_WIRE_TYPE_DELIMITED) { + putop(c, OP_SETDELIM); } } + } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES || + type == UPB_TYPE_MESSAGE) { + label(c, LABEL_FIELD); + if (upb_fielddef_isseq(f)) { + putop(c, OP_CHECKDELIM, LABEL_ENDMSG); + putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); + dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); + putop(c, OP_PUSHTAGDELIM, 0); + putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); + label(c, LABEL_LOOPSTART); + putop(c, OP_PUSHLENDELIM); + putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); + // Need to emit even if no handler to skip past the string. + putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); + putop(c, OP_POP); + putcb(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR); + putop(c, OP_SETDELIM); + putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); + putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK); + putop(c, OP_BRANCH, -LABEL_LOOPSTART); + label(c, LABEL_LOOPBREAK); + putop(c, OP_POP); + putcb(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); + } else { + putop(c, OP_CHECKDELIM, LABEL_ENDMSG); + putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); + dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); + putop(c, OP_PUSHLENDELIM); + putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); + putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); + putop(c, OP_POP); + putcb(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR); + putop(c, OP_SETDELIM); + } + } else { + label(c, LABEL_FIELD); + opcode parse_type = (opcode)descriptor_type; + assert((int)parse_type >= 0 && parse_type <= OP_MAX); + upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); + int wire_type = native_wire_types[upb_fielddef_descriptortype(f)]; + if (upb_fielddef_isseq(f)) { + putop(c, OP_CHECKDELIM, LABEL_ENDMSG); + putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); + dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); + putop(c, OP_PUSHLENDELIM); + putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Packed + label(c, LABEL_LOOPSTART); + putop(c, parse_type, sel); + putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); + putop(c, OP_BRANCH, -LABEL_LOOPSTART); + dispatchtarget(c, method, f, wire_type); + putop(c, OP_PUSHTAGDELIM, 0); + putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Non-packed + label(c, LABEL_LOOPSTART); + putop(c, parse_type, sel); + putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); + putchecktag(c, f, wire_type, LABEL_LOOPBREAK); + putop(c, OP_BRANCH, -LABEL_LOOPSTART); + label(c, LABEL_LOOPBREAK); + putop(c, OP_POP); // Packed and non-packed join. + putcb(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); + putop(c, OP_SETDELIM); // Could remove for non-packed by dup ENDSEQ. + } else { + putop(c, OP_CHECKDELIM, LABEL_ENDMSG); + putchecktag(c, f, wire_type, LABEL_DISPATCH); + dispatchtarget(c, method, f, wire_type); + putop(c, parse_type, sel); + } } } + // For now we just loop back to the last field of the message (or if none, // the DISPATCH opcode for the message. putop(c, OP_BRANCH, -LABEL_FIELD); + + // Insert both a label and a dispatch table entry for this end-of-msg. label(c, LABEL_ENDMSG); - putop(c, OP_ENDMSG); + upb_value val = upb_value_uint64(pcofs(c) - method->code_base.ofs); + upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val); + + putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h); + putop(c, OP_RET); upb_inttable_compact(&method->dispatch); } -// Populate "methods" with new upb_pbdecodermethod objects reachable from "md". -// "h" can be NULL, in which case the methods will not be statically bound to -// destination handlers. -// -// Returns the method for this msgdef/handlers. +// Populate "methods" with new upb_pbdecodermethod objects reachable from "h". +// Returns the method for these handlers. // -// Note that there is a deep difference between keying the method table on -// upb_msgdef and keying it on upb_handlers. Since upb_msgdef : upb_handlers -// can be 1:many, binding a handlers statically can result in *more* methods -// being generated than if the methods are dynamically-bound. -// -// On the other hand, if/when the optimization mentioned below is implemented, -// binding to a upb_handlers can result in *fewer* methods being generated if -// many of the submessages have no handlers bound to them. -static void find_methods(compiler *c, const upb_msgdef *md, - const upb_handlers *h) { - const void *key = methodkey(md, h); +// Generates a new method for every destination handlers reachable from "h". +static void find_methods(compiler *c, const upb_handlers *h) { upb_value v; - if (upb_inttable_lookupptr(&c->group->methods, key, &v)) + if (upb_inttable_lookupptr(&c->group->methods, h, &v)) return; - newmethod(md, h, c->group, key); + newmethod(h, c->group); // Find submethods. upb_msg_iter i; + const upb_msgdef *md = upb_handlers_msgdef(h); for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); - if (upb_fielddef_type(f) != UPB_TYPE_MESSAGE) - continue; - const upb_handlers *sub_h = h ? upb_handlers_getsubhandlers(h, f) : NULL; - - if (h && !sub_h && - upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) { - // OPT: We could optimize away the sub-method, but would have to make sure - // this field is compiled as a string instead of a submessage. + const upb_handlers *sub_h; + if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE && + (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) { + // We only generate a decoder method for submessages with handlers. + // Others will be parsed as unknown fields. + find_methods(c, sub_h); } - - find_methods(c, upb_downcast_msgdef(upb_fielddef_subdef(f)), sub_h); } } @@ -814,12 +828,6 @@ static void sethandlers(mgroup *g, bool allowjit) { } } -static bool bind_dynamic(bool allowjit) { - // For the moment, JIT handlers always bind statically, but bytecode handlers - // never do. - return !allowjit; -} - #else // UPB_USE_JIT_X64 static void sethandlers(mgroup *g, bool allowjit) { @@ -828,33 +836,19 @@ static void sethandlers(mgroup *g, bool allowjit) { set_bytecode_handlers(g); } -static bool bind_dynamic(bool allowjit) { - // Bytecode handlers never bind statically. - UPB_UNUSED(allowjit); - return true; -} - #endif // UPB_USE_JIT_X64 // TODO(haberman): allow this to be constructed for an arbitrary set of dest // handlers and other mgroups (but verify we have a transitive closure). -const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, +const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy, const void *owner) { UPB_UNUSED(allowjit); assert(upb_handlers_isfrozen(dest)); - const upb_msgdef *md = upb_handlers_msgdef(dest); mgroup *g = newgroup(owner); - compiler *c = newcompiler(g); - - if (bind_dynamic(allowjit)) { - // If binding dynamically, remove the reference against destination - // handlers. - dest = NULL; - } - - find_methods(c, md, dest); + compiler *c = newcompiler(g, lazy); + find_methods(c, dest); // We compile in two passes: // 1. all messages are assigned relative offsets from the beginning of the @@ -909,20 +903,28 @@ bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) { return true; } -const upb_pbdecodermethod *upb_pbcodecache_getdecodermethodfordesthandlers( - upb_pbcodecache *c, const upb_handlers *handlers) { +const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod( + upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) { // Right now we build a new DecoderMethod every time. // TODO(haberman): properly cache methods by their true key. - const mgroup *g = mgroup_new(handlers, c->allow_jit_, c); + const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c); upb_inttable_push(&c->groups, upb_value_constptr(g)); - const upb_msgdef *md = upb_handlers_msgdef(handlers); - if (bind_dynamic(c->allow_jit_)) { - handlers = NULL; - } - upb_value v; - bool ok = upb_inttable_lookupptr(&g->methods, methodkey(md, handlers), &v); + bool ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v); UPB_ASSERT_VAR(ok, ok); return upb_value_getptr(v); } + + +/* upb_pbdecodermethodopts ****************************************************/ + +void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts, + const upb_handlers *h) { + opts->handlers = h; + opts->lazy = false; +} + +void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) { + opts->lazy = lazy; +} diff --git a/upb/pb/compile_decoder_x64.c b/upb/pb/compile_decoder_x64.c index 44331b8..913a748 100644 --- a/upb/pb/compile_decoder_x64.c +++ b/upb/pb/compile_decoder_x64.c @@ -194,6 +194,8 @@ static void patchdispatch(jitcompiler *jc) { } // Define for JIT debugging. +//#define UPB_JIT_LOAD_SO + #ifdef UPB_JIT_LOAD_SO static void load_so(jitcompiler *jc) { // Dump to a .so file in /tmp and load that, so all the tooling works right diff --git a/upb/pb/compile_decoder_x64.dasc b/upb/pb/compile_decoder_x64.dasc index 571aa9b..97fb5ce 100644 --- a/upb/pb/compile_decoder_x64.dasc +++ b/upb/pb/compile_decoder_x64.dasc @@ -42,6 +42,9 @@ | mov DECODER->top, FRAME | mov DECODER->ptr, PTR | mov DECODER->data_end, DATAEND +| // We don't guarantee that delim_end is NULL when out of range like the +| // interpreter does. +| mov DECODER->delim_end, DELIMEND | sub DELIMEND, DECODER->buf | add DELIMEND, DECODER->bufstart_ofs | mov FRAME->end_ofs, DELIMEND @@ -205,6 +208,8 @@ static void emit_static_asm(jitcompiler *jc) { | | mov DECODER, rdi | callp upb_pbdecoder_resume // Same args as us; reuse regs. + | test eax, eax + | jns >1 | mov DECODER->saved_rsp, rsp | mov rax, rbx | load_regs @@ -212,12 +217,13 @@ static void emit_static_asm(jitcompiler *jc) { | // Test whether we have a saved stack to resume. | mov ARG3_64, DECODER->call_len | test ARG3_64, ARG3_64 - | jnz >1 + | jnz >2 | | call rax | | mov rax, DECODER->size_param | mov qword DECODER->call_len, 0 + |1: | add rsp, 8 // Counter previous alignment. | pop rbx | pop r12 @@ -227,7 +233,7 @@ static void emit_static_asm(jitcompiler *jc) { | pop rbp | ret | - |1: + |2: | // Resume decoder. | lea ARG2_64, DECODER->callstack | sub rsp, ARG3_64 @@ -293,6 +299,7 @@ static void emit_static_asm(jitcompiler *jc) { | add DELIMEND, rdx | cmp FRAME, DECODER->limit | je >3 // Stack overflow + | mov dword FRAME->groupnum, 0 | test rcx, rcx | jz >2 | mov DATAEND, DECODER->end @@ -850,24 +857,7 @@ static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs, |5: } -// Emit message-specific assembly. Overall code layout is: -// +---------------------------------------------------------------------------+ -// | Message A | -// | 1. function prologue (startmsg), jmps to OP_CHECKDELIM_RET before first | -// | OP_TAG* in 4. | -// | 2. function epilogue (endmsg), returns from function. | -// | 3. dispatch function (returns fptr to 4) | -// | - loops internally to skip unknown fields | -// | - after each unknown field does OP_CHECKDELIM_RET (returns 2) | -// | - also returns 2 for END_GROUP. -// | 4. code for each op: | -// | - OP_TAG* on mismatch calls 3 to get addr, then jumps to 4 (or 2 on EOM).| -// | - OP_CHECKDELIM_RET jumps to 2 | -// +---------------------------------------------------------------------------+ -// | Message B | -// | 1. ... | -// | ... | -// +---------------------------------------------------------------------------+ +// Compile the bytecode to x64. static void jitbytecode(jitcompiler *jc) { upb_pbdecodermethod *method = NULL; const upb_handlers *h = NULL; @@ -877,20 +867,21 @@ static void jitbytecode(jitcompiler *jc) { uint32_t arg = instr >> 8; int32_t longofs = arg; - if (op != OP_STARTMSG && op != OP_SETDISPATCH) { + if (op != OP_SETDISPATCH) { + // Skipped for SETDISPATCH because it defines its own asmlabel for the + // dispatch code it emits. asmlabel(jc, "0x%lx.%s", pcofs(jc), upb_pbdecoder_getopname(op)); + + // Skipped for SETDISPATCH because it should point at the function + // prologue, not the dispatch function that is emitted first. + // TODO: optimize this to only define pclabels that are actually used. + |=>define_pclabel(jc, jc->pc): } - // TODO: optimize this to only define pclabels that are actually used. - |=>define_pclabel(jc, jc->pc): + jc->pc++; switch (op) { case OP_STARTMSG: { - // This opcode serves as a function prolouge also. - const char *msgname = upb_msgdef_fullname(method->schema_); - asmlabel(jc, "0x%lx.parse.%s", pcofs(jc), msgname); - |=>define_pclabel(jc, method): - | sub rsp, 8 upb_func *startmsg = gethandler(h, UPB_STARTMSG_SELECTOR); if (startmsg) { // bool startmsg(void *closure, const void *hd) @@ -905,11 +896,12 @@ static void jitbytecode(jitcompiler *jc) { | jmp <1 |2: } + } else { + | nop } break; } case OP_ENDMSG: { - // This opcode serves as a function epiloue also. upb_func *endmsg = gethandler(h, UPB_ENDMSG_SELECTOR); |9: if (endmsg) { @@ -919,11 +911,12 @@ static void jitbytecode(jitcompiler *jc) { | mov ARG3_64, DECODER->status | callp endmsg } - | add rsp, 8 - | ret break; } case OP_SETDISPATCH: { + uint32_t *op_pc = jc->pc - 1; + + // Load info for new method. upb_inttable *dispatch; memcpy(&dispatch, jc->pc, sizeof(void*)); jc->pc += sizeof(void*) / sizeof(uint32_t); @@ -936,9 +929,18 @@ static void jitbytecode(jitcompiler *jc) { // case instead of parsing it field by field. We should also do the skip // in the containing message's code. h = method->dest_handlers_; - const char *msgname = upb_msgdef_fullname(method->schema_); + const char *msgname = upb_msgdef_fullname(upb_handlers_msgdef(h)); + + // Emit dispatch code for new method. asmlabel(jc, "0x%lx.dispatch.%s", pcofs(jc), msgname); jitdispatch(jc, method); + + // Emit function prologue for new method. + asmlabel(jc, "0x%lx.parse.%s", pcofs(jc), msgname); + |=>define_pclabel(jc, op_pc): + |=>define_pclabel(jc, method): + | sub rsp, 8 + break; } case OP_PARSE_DOUBLE: @@ -1056,6 +1058,7 @@ static void jitbytecode(jitcompiler *jc) { | add FRAME, sizeof(upb_pbdecoder_frame) | cmp FRAME, DECODER->limit | je ->err + | mov dword FRAME->groupnum, arg break; case OP_PUSHLENDELIM: | call ->pushlendelim @@ -1075,9 +1078,6 @@ static void jitbytecode(jitcompiler *jc) { | mov DATAEND, DELIMEND |1: break; - case OP_SETGROUPNUM: - | mov dword FRAME->groupnum, arg - break; case OP_SETBIGGROUPNUM: | mov dword FRAME->groupnum, *jc->pc++ break; @@ -1086,11 +1086,16 @@ static void jitbytecode(jitcompiler *jc) { | je =>pclabel(jc, jc->pc + longofs) break; case OP_CALL: - | call =>pclabel(jc, jc->pc + longofs + 3) + | call =>pclabel(jc, jc->pc + longofs) break; case OP_BRANCH: | jmp =>pclabel(jc, jc->pc + longofs); break; + case OP_RET: + |9: + | add rsp, 8 + | ret + break; case OP_TAG1: jittag(jc, (arg >> 8) & 0xff, 1, (int8_t)arg, method); break; @@ -1107,6 +1112,7 @@ static void jitbytecode(jitcompiler *jc) { assert(false); } } + asmlabel(jc, "eof"); | nop } diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index c5fae0e..9c54b8a 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -18,7 +18,6 @@ #endif #define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d); -#define CHECK_RETURN(x) { int32_t ret = x; if (ret >= 0) return ret; } // Error messages that are shared between the bytecode and JIT decoders. const char *kPbDecoderStackOverflow = "Nesting too deep."; @@ -45,10 +44,10 @@ static bool consumes_input(opcode op) { case OP_PUSHTAGDELIM: case OP_POP: case OP_SETDELIM: - case OP_SETGROUPNUM: case OP_SETBIGGROUPNUM: case OP_CHECKDELIM: case OP_CALL: + case OP_RET: case OP_BRANCH: return false; default: @@ -147,13 +146,12 @@ static void checkpoint(upb_pbdecoder *d) { } // Resumes the decoder from an initial state or from a previous suspend. -void *upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, - size_t size, const upb_bufhandle *handle) { +int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, + size_t size, const upb_bufhandle *handle) { UPB_UNUSED(p); // Useless; just for the benefit of the JIT. d->buf_param = buf; d->size_param = size; d->handle = handle; - d->skip = 0; if (d->residual_end > d->residual) { // We have residual bytes from the last buffer. assert(ptr(d) == d->residual); @@ -161,7 +159,11 @@ void *upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, switchtobuf(d, buf, buf + size); } d->checkpoint = ptr(d); - return d; // For the JIT. + if (d->top->groupnum < 0) { + CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0)); + d->checkpoint = ptr(d); + } + return DECODE_OK; } // Suspends the decoder at the last checkpoint, without saving any residual @@ -176,10 +178,10 @@ size_t upb_pbdecoder_suspend(upb_pbdecoder *d) { assert(!in_residual_buf(d, d->checkpoint)); assert(d->buf == d->buf_param); size_t consumed = d->checkpoint - d->buf; - d->bufstart_ofs += consumed + d->skip; + d->bufstart_ofs += consumed; d->residual_end = d->residual; switchtobuf(d, d->residual, d->residual_end); - return consumed + d->skip; + return consumed; } } @@ -209,11 +211,11 @@ static size_t suspend_save(upb_pbdecoder *d) { assert(save <= sizeof(d->residual)); memcpy(d->residual, ptr(d), save); d->residual_end = d->residual + save; - d->bufstart_ofs = offset(d) + d->skip; + d->bufstart_ofs = offset(d); } switchtobuf(d, d->residual, d->residual_end); - return d->size_param + d->skip; + return d->size_param; } static int32_t skip(upb_pbdecoder *d, size_t bytes) { @@ -221,12 +223,16 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) { if (curbufleft(d) >= bytes) { // Skipped data is all in current buffer. advance(d, bytes); + return DECODE_OK; } else { // Skipped data extends beyond currently available buffers. - d->skip = bytes - curbufleft(d); - advance(d, curbufleft(d)); + d->pc = d->last; + size_t skip = bytes - curbufleft(d); + d->bufstart_ofs += (d->end - d->buf) + skip; + d->residual_end = d->residual; + switchtobuf(d, d->residual, d->residual_end); + return d->size_param + skip; } - return DECODE_OK; } FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) { @@ -247,8 +253,8 @@ static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf, if (curbufleft(d) >= bytes) { consumebytes(d, buf + avail, bytes); return DECODE_OK; - } else if (d->data_end - d->buf == d->top->end_ofs - d->bufstart_ofs) { - seterr(d, "Submessage ended in the middle of a value"); + } else if (d->data_end == d->delim_end) { + seterr(d, "Submessage ended in the middle of a value or group"); return upb_pbdecoder_suspend(d); } else { return suspend_save(d); @@ -378,11 +384,24 @@ static bool push(upb_pbdecoder *d, uint64_t end) { fr++; fr->end_ofs = end; fr->dispatch = NULL; - fr->groupnum = -1; + fr->groupnum = 0; d->top = fr; return true; } +static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) { + // While we expect to see an "end" tag (either ENDGROUP or a non-sequence + // field number) prior to hitting any enclosing submessage end, pushing our + // existing delim end prevents us from continuing to parse values from a + // corrupt proto that doesn't give us an END tag in time. + if (!push(d, d->top->end_ofs)) + return false; + d->top->groupnum = arg; + return true; +} + +static void pop(upb_pbdecoder *d) { d->top--; } + NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, uint64_t expected) { uint64_t data = 0; @@ -400,46 +419,103 @@ NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, } } -int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, uint32_t fieldnum, +int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum, uint8_t wire_type) { - if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER) { - seterr(d, "Invalid field number"); - return upb_pbdecoder_suspend(d); - } - - if (wire_type == UPB_WIRE_TYPE_END_GROUP) { - if (fieldnum != d->top->groupnum) { - seterr(d, "Unmatched ENDGROUP tag."); + if (fieldnum >= 0) + goto have_tag; + + while (true) { + uint32_t tag; + CHECK_RETURN(decode_v32(d, &tag)); + wire_type = tag & 0x7; + fieldnum = tag >> 3; + +have_tag: + if (fieldnum == 0) { + seterr(d, "Saw invalid field number (0)"); return upb_pbdecoder_suspend(d); } - return DECODE_ENDGROUP; - } - // TODO: deliver to unknown field callback. - switch (wire_type) { - case UPB_WIRE_TYPE_VARINT: { - uint64_t u64; - return decode_varint(d, &u64); + // TODO: deliver to unknown field callback. + switch (wire_type) { + case UPB_WIRE_TYPE_32BIT: + CHECK_RETURN(skip(d, 4)); + break; + case UPB_WIRE_TYPE_64BIT: + CHECK_RETURN(skip(d, 8)); + break; + case UPB_WIRE_TYPE_VARINT: { + uint64_t u64; + CHECK_RETURN(decode_varint(d, &u64)); + break; + } + case UPB_WIRE_TYPE_DELIMITED: { + uint32_t len; + CHECK_RETURN(decode_v32(d, &len)); + CHECK_RETURN(skip(d, len)); + break; + } + case UPB_WIRE_TYPE_START_GROUP: + CHECK_SUSPEND(pushtagdelim(d, -fieldnum)); + break; + case UPB_WIRE_TYPE_END_GROUP: + if (fieldnum == -d->top->groupnum) { + pop(d); + } else if (fieldnum == d->top->groupnum) { + return DECODE_ENDGROUP; + } else { + seterr(d, "Unmatched ENDGROUP tag."); + return upb_pbdecoder_suspend(d); + } + break; + default: + seterr(d, "Invalid wire type"); + return upb_pbdecoder_suspend(d); } - case UPB_WIRE_TYPE_32BIT: - return skip(d, 4); - case UPB_WIRE_TYPE_64BIT: - return skip(d, 8); - case UPB_WIRE_TYPE_DELIMITED: { - uint32_t len; - CHECK_RETURN(decode_v32(d, &len)); - return skip(d, len); + + if (d->top->groupnum >= 0) { + return DECODE_OK; } - case UPB_WIRE_TYPE_START_GROUP: - seterr(d, "Can't handle unknown groups yet"); - return upb_pbdecoder_suspend(d); - case UPB_WIRE_TYPE_END_GROUP: - default: - seterr(d, "Invalid wire type"); + + if (ptr(d) == d->delim_end) { + seterr(d, "Enclosing submessage ended in the middle of value or group"); + // Unlike most errors we notice during parsing, right now we have consumed + // all of the user's input. + // + // There are three different options for how to handle this case: + // + // 1. decode() = short count, error = set + // 2. decode() = full count, error = set + // 3. decode() = full count, error NOT set, short count and error will + // be reported on next call to decode() (or end()) + // + // (1) and (3) have the advantage that they preserve the invariant that an + // error occurs iff decode() returns a short count. + // + // (2) and (3) have the advantage of reflecting the fact that all of the + // bytes were in fact parsed (and possibly delivered to the unknown field + // handler, in the future when that is supported). + // + // (3) requires extra state in the decode (a place to store the "permanent + // error" that we should return for all subsequent attempts to decode). + // But we likely want this anyway. + // + // Right now we do (1), thanks to the fact that we checkpoint *after* this + // check. (3) may be a better choice long term; unclear at the moment. return upb_pbdecoder_suspend(d); + } + + checkpoint(d); } } +static void goto_endmsg(upb_pbdecoder *d) { + upb_value v; + bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v); + UPB_ASSERT_VAR(found, found); + d->pc = d->top->base + upb_value_getuint64(v); +} + static int32_t dispatch(upb_pbdecoder *d) { upb_inttable *dispatch = d->top->dispatch; @@ -470,7 +546,7 @@ static int32_t dispatch(upb_pbdecoder *d) { int32_t ret = upb_pbdecoder_skipunknown(d, fieldnum, wire_type); if (ret == DECODE_ENDGROUP) { - d->pc = d->top->base - 1; // Back to OP_ENDMSG. + goto_endmsg(d); return DECODE_OK; } else { d->pc = d->last - 1; // Rewind to CHECKDELIM. @@ -493,7 +569,11 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, upb_pbdecoder *d = closure; const mgroup *group = hd; assert(buf); - upb_pbdecoder_resume(d, NULL, buf, size, handle); + int32_t result = upb_pbdecoder_resume(d, NULL, buf, size, handle); + if (result == DECODE_ENDGROUP) { + goto_endmsg(d); + } + CHECK_RETURN(result); UPB_UNUSED(group); #define VMCASE(op, code) \ @@ -552,8 +632,6 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, ) VMCASE(OP_ENDMSG, CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status)); - assert(d->call_len > 0); - d->pc = d->callstack[--d->call_len]; ) VMCASE(OP_STARTSEQ, upb_pbdecoder_frame *outer = outer_frame(d); @@ -579,25 +657,39 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, ) VMCASE(OP_STRING, uint32_t len = curbufleft(d); - CHECK_SUSPEND( - upb_sink_putstring(&d->top->sink, arg, ptr(d), len, handle)); - advance(d, len); - if (d->delim_end == NULL) { // String extends beyond this buf? - d->pc--; - d->bufstart_ofs += size; - d->residual_end = d->residual; - return size; + size_t n = upb_sink_putstring(&d->top->sink, arg, ptr(d), len, handle); + if (n > len) { + if (n > d->top->end_ofs - offset(d)) { + seterr(d, "Tried to skip past end of string."); + return upb_pbdecoder_suspend(d); + } else { + return skip(d, n); + } + } else if (n < len) { + advance(d, n); + return upb_pbdecoder_suspend(d); + } else { + advance(d, n); + if (d->delim_end == NULL) { // String extends beyond this buf? + d->pc--; // Do OP_STRING again when we resume. + d->bufstart_ofs += size; + d->residual_end = d->residual; + return size; + } } ) VMCASE(OP_ENDSTR, CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg)); ) VMCASE(OP_PUSHTAGDELIM, - CHECK_SUSPEND(push(d, d->top->end_ofs)); + CHECK_SUSPEND(pushtagdelim(d, arg)); + ) + VMCASE(OP_SETBIGGROUPNUM, + d->top->groupnum = *d->pc++; ) VMCASE(OP_POP, assert(d->top > d->stack); - d->top--; + pop(d); ) VMCASE(OP_PUSHLENDELIM, uint32_t len; @@ -608,13 +700,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, VMCASE(OP_SETDELIM, set_delim_end(d); ) - VMCASE(OP_SETGROUPNUM, - d->top->groupnum = arg; - ) - VMCASE(OP_SETBIGGROUPNUM, - d->top->groupnum = *d->pc++; - ) VMCASE(OP_CHECKDELIM, + // We are guaranteed of this assert because we never allow ourselves to + // consume bytes beyond data_end, which covers delim_end when non-NULL. assert(!(d->delim_end && ptr(d) > d->delim_end)); if (ptr(d) == d->delim_end) d->pc += longofs; @@ -623,6 +711,10 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, d->callstack[d->call_len++] = d->pc; d->pc += longofs; ) + VMCASE(OP_RET, + assert(d->call_len > 0); + d->pc = d->callstack[--d->call_len]; + ) VMCASE(OP_BRANCH, d->pc += longofs; ) @@ -755,6 +847,7 @@ void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *m, void upb_pbdecoder_reset(upb_pbdecoder *d) { d->top = d->stack; d->top->end_ofs = UINT64_MAX; + d->top->groupnum = 0; d->bufstart_ofs = 0; d->ptr = d->residual; d->buf = d->residual; diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h index 4313bb3..586d2d5 100644 --- a/upb/pb/decoder.h +++ b/upb/pb/decoder.h @@ -20,19 +20,23 @@ namespace pb { class CodeCache; class Decoder; class DecoderMethod; +class DecoderMethodOptions; } // namespace pb } // namespace upb typedef upb::pb::CodeCache upb_pbcodecache; typedef upb::pb::Decoder upb_pbdecoder; typedef upb::pb::DecoderMethod upb_pbdecodermethod; +typedef upb::pb::DecoderMethodOptions upb_pbdecodermethodopts; #else struct upb_pbdecoder; struct upb_pbdecodermethod; +struct upb_pbdecodermethodopts; struct upb_pbcodecache; typedef struct upb_pbdecoder upb_pbdecoder; typedef struct upb_pbdecodermethod upb_pbdecodermethod; +typedef struct upb_pbdecodermethodopts upb_pbdecodermethodopts; typedef struct upb_pbcodecache upb_pbcodecache; #endif @@ -72,14 +76,40 @@ typedef struct { // that calls from the JIT into C work correctly. uint64_t end_ofs; const uint32_t *base; - uint32_t groupnum; + + // 0 indicates a length-delimited field. + // A positive number indicates a known group. + // A negative number indicates an unknown group. + int32_t groupnum; upb_inttable *dispatch; // Not used by the JIT. } upb_pbdecoder_frame; #ifdef __cplusplus -// Represents the code to parse a protobuf according to a specific schema, -// optionally bound to a set of destination handlers. +// The parameters one uses to construct a DecoderMethod. +// TODO(haberman): move allowjit here? Seems more convenient for users. +class upb::pb::DecoderMethodOptions { + public: + // Parameter represents the destination handlers that this method will push + // to. + explicit DecoderMethodOptions(const Handlers* dest_handlers); + + // Should the decoder push submessages to lazy handlers for fields that have + // them? The caller should set this iff the lazy handlers expect data that is + // in protobuf binary format and the caller wishes to lazy parse it. + void set_lazy(bool lazy); + + private: +#else +struct upb_pbdecodermethodopts { +#endif + const upb_handlers *handlers; + bool lazy; +}; + +#ifdef __cplusplus + +// Represents the code to parse a protobuf according to a destination Handlers. class upb::pb::DecoderMethod /* : public upb::RefCounted */ { public: // From upb::ReferenceCounted. @@ -88,14 +118,9 @@ class upb::pb::DecoderMethod /* : public upb::RefCounted */ { void DonateRef(const void* from, const void* to) const; void CheckRef(const void* owner) const; - // The schema that this method parses. Never NULL. - const MessageDef* schema() const; - // The destination handlers that are statically bound to this method. // This method is only capable of outputting to a sink that uses these // handlers. - // - // Will be NULL if this method is not statically bound. const Handlers* dest_handlers() const; // The input handlers for this decoder method. @@ -106,8 +131,7 @@ class upb::pb::DecoderMethod /* : public upb::RefCounted */ { // Convenience method for generating a DecoderMethod without explicitly // creating a CodeCache. - static reffed_ptr<const DecoderMethod> NewForDestHandlers( - const upb::Handlers *dest); + static reffed_ptr<const DecoderMethod> New(const DecoderMethodOptions& opts); private: UPB_DISALLOW_POD_OPS(DecoderMethod, upb::pb::DecoderMethod); @@ -138,13 +162,7 @@ struct upb_pbdecodermethod { // The handler one calls to invoke this method. upb_byteshandler input_handler_; - // The message type that this method is parsing. - const upb_msgdef *schema_; - - // The destination handlers this method is bound to, or NULL if this method - // can be bound to a destination handlers instance at runtime. - // - // If non-NULL, we own a ref. + // The destination handlers this method is bound to. We own a ref. const upb_handlers *dest_handlers_; // The dispatch table layout is: @@ -183,8 +201,7 @@ class upb::pb::Decoder { void Reset(); // Resets the output sink of the Decoder. - // The given sink must match method()->schema() as well as - // method()->dest_handlers() if the latter is non-NULL. + // The given sink must match method()->dest_handlers(). // // This must be called at least once before the decoder can be used. It may // only be called with the decoder is in a state where it was just created or @@ -221,9 +238,6 @@ struct upb_pbdecoder { // Overall stream offset of "buf." uint64_t bufstart_ofs; - // How many bytes past the end of the user buffer we want to skip. - size_t skip; - // Buffer for residual bytes not parsed from the previous buffer. // The maximum number of residual bytes we require is 12; a five-byte // unknown tag plus an eight-byte value, less one because the value @@ -290,8 +304,7 @@ class upb::pb::CodeCache { // more efficient decoding. However the returned method may or may not // actually be statically bound. But in all cases, the returned method can // push data to the given handlers. - const DecoderMethod *GetDecoderMethodForDestHandlers( - const upb::Handlers *handlers); + const DecoderMethod *GetDecoderMethod(const DecoderMethodOptions& opts); // If/when someone needs to explicitly create a dynamically-bound // DecoderMethod*, we can add a method to get it here. @@ -320,27 +333,30 @@ const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d); bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink *sink); upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d); +void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts, + const upb_handlers *h); +void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy); + void upb_pbdecodermethod_ref(const upb_pbdecodermethod *m, const void *owner); void upb_pbdecodermethod_unref(const upb_pbdecodermethod *m, const void *owner); void upb_pbdecodermethod_donateref(const upb_pbdecodermethod *m, const void *from, const void *to); void upb_pbdecodermethod_checkref(const upb_pbdecodermethod *m, const void *owner); -const upb_msgdef *upb_pbdecodermethod_schema(const upb_pbdecodermethod *m); const upb_handlers *upb_pbdecodermethod_desthandlers( const upb_pbdecodermethod *m); const upb_byteshandler *upb_pbdecodermethod_inputhandler( const upb_pbdecodermethod *m); bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m); -const upb_pbdecodermethod *upb_pbdecodermethod_newfordesthandlers( - const upb_handlers *dest, const void *owner); +const upb_pbdecodermethod *upb_pbdecodermethod_new( + const upb_pbdecodermethodopts *opts, const void *owner); void upb_pbcodecache_init(upb_pbcodecache *c); void upb_pbcodecache_uninit(upb_pbcodecache *c); bool upb_pbcodecache_allowjit(const upb_pbcodecache *c); bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow); -const upb_pbdecodermethod *upb_pbcodecache_getdecodermethodfordesthandlers( - upb_pbcodecache *c, const upb_handlers *handlers); +const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod( + upb_pbcodecache *c, const upb_pbdecodermethodopts *opts); #ifdef __cplusplus } /* extern "C" */ @@ -391,6 +407,13 @@ inline BytesSink* Decoder::input() { return upb_pbdecoder_input(this); } +inline DecoderMethodOptions::DecoderMethodOptions(const Handlers* h) { + upb_pbdecodermethodopts_init(this, h); +} +inline void DecoderMethodOptions::set_lazy(bool lazy) { + upb_pbdecodermethodopts_setlazy(this, lazy); +} + inline void DecoderMethod::Ref(const void *owner) const { upb_pbdecodermethod_ref(this, owner); } @@ -403,9 +426,6 @@ inline void DecoderMethod::DonateRef(const void *from, const void *to) const { inline void DecoderMethod::CheckRef(const void *owner) const { upb_pbdecodermethod_checkref(this, owner); } -inline const MessageDef* DecoderMethod::schema() const { - return upb_pbdecodermethod_schema(this); -} inline const Handlers* DecoderMethod::dest_handlers() const { return upb_pbdecodermethod_desthandlers(this); } @@ -416,10 +436,9 @@ inline bool DecoderMethod::is_native() const { return upb_pbdecodermethod_isnative(this); } // static -inline reffed_ptr<const DecoderMethod> DecoderMethod::NewForDestHandlers( - const Handlers *dest) { - const upb_pbdecodermethod *m = - upb_pbdecodermethod_newfordesthandlers(dest, &m); +inline reffed_ptr<const DecoderMethod> DecoderMethod::New( + const DecoderMethodOptions &opts) { + const upb_pbdecodermethod *m = upb_pbdecodermethod_new(&opts, &m); return reffed_ptr<const DecoderMethod>(m, &m); } @@ -435,9 +454,9 @@ inline bool CodeCache::allow_jit() const { inline bool CodeCache::set_allow_jit(bool allow) { return upb_pbcodecache_setallowjit(this, allow); } -inline const DecoderMethod* CodeCache::GetDecoderMethodForDestHandlers( - const upb::Handlers* handlers) { - return upb_pbcodecache_getdecodermethodfordesthandlers(this, handlers); +inline const DecoderMethod *CodeCache::GetDecoderMethod( + const DecoderMethodOptions& opts) { + return upb_pbcodecache_getdecodermethod(this, &opts); } } // namespace pb diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h index 20afa68..11aa133 100644 --- a/upb/pb/decoder.int.h +++ b/upb/pb/decoder.int.h @@ -40,12 +40,10 @@ typedef enum { OP_PUSHLENDELIM = 24, // No arg. OP_POP = 25, // No arg. OP_SETDELIM = 26, // No arg. - OP_SETGROUPNUM = 27, - OP_SETBIGGROUPNUM = 28, // two words: | unused (24) | opc || groupnum (32) | - - // The arg for these opcodes is a local label reference. - OP_CHECKDELIM = 29, - OP_CALL = 30, + OP_SETBIGGROUPNUM = 27, // two words: | unused (24) | opc || groupnum (32) | + OP_CHECKDELIM = 28, + OP_CALL = 29, + OP_RET = 30, OP_BRANCH = 31, // Different opcodes depending on how many bytes expected. @@ -112,10 +110,10 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, bool upb_pbdecoder_end(void *closure, const void *handler_data); // Decoder-internal functions that the JIT calls to handle fallback paths. -void *upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, - size_t size, const upb_bufhandle *handle); +int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, + size_t size, const upb_bufhandle *handle); size_t upb_pbdecoder_suspend(upb_pbdecoder *d); -int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, uint32_t fieldnum, +int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum, uint8_t wire_type); int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, uint64_t expected); int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, uint64_t *u64); @@ -137,8 +135,21 @@ void upb_pbdecoder_freejit(mgroup *group); // wherever that takes you." #define LABEL_DISPATCH 0 +// A special slot in the dispatch table that stores the epilogue (ENDMSG and/or +// RET) for branching to when we find an appropriate ENDGROUP tag. +#define DISPATCH_ENDMSG 0 + +// All of the functions in decoder.c that return int32_t return values according +// to the following scheme: +// 1. negative values indicate a return code from the following list. +// 2. positive values indicate that error or end of buffer was hit, and +// that the decode function should immediately return the given value +// (the decoder state has already been suspended and is ready to be +// resumed). #define DECODE_OK -1 #define DECODE_MISMATCH -2 // Used only from checktag_slow(). -#define DECODE_ENDGROUP -2 // Used only from checkunknown(). +#define DECODE_ENDGROUP -3 // Used only from checkunknown(). + +#define CHECK_RETURN(x) { int32_t ret = x; if (ret >= 0) return ret; } #endif // UPB_DECODER_INT_H_ diff --git a/upb/pb/glue.c b/upb/pb/glue.c index 73ef145..fde2dd1 100644 --- a/upb/pb/glue.c +++ b/upb/pb/glue.c @@ -17,8 +17,10 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, void *owner, upb_status *status) { // Create handlers. const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h); + upb_pbdecodermethodopts opts; + upb_pbdecodermethodopts_init(&opts, reader_h); const upb_pbdecodermethod *decoder_m = - upb_pbdecodermethod_newfordesthandlers(reader_h, &decoder_m); + upb_pbdecodermethod_new(&opts, &decoder_m); upb_pbdecoder decoder; upb_descreader reader; |