diff options
Diffstat (limited to 'upb/pb')
-rw-r--r-- | upb/pb/compile_decoder.c | 385 | ||||
-rw-r--r-- | upb/pb/compile_decoder_x64.c | 106 | ||||
-rw-r--r-- | upb/pb/compile_decoder_x64.dasc | 92 | ||||
-rw-r--r-- | upb/pb/decoder.c | 144 | ||||
-rw-r--r-- | upb/pb/decoder.h | 461 | ||||
-rw-r--r-- | upb/pb/decoder.int.h | 180 | ||||
-rw-r--r-- | upb/pb/glue.c | 46 | ||||
-rw-r--r-- | upb/pb/textprinter.c | 30 |
8 files changed, 908 insertions, 536 deletions
diff --git a/upb/pb/compile_decoder.c b/upb/pb/compile_decoder.c index 200eef5..f96f07a 100644 --- a/upb/pb/compile_decoder.c +++ b/upb/pb/compile_decoder.c @@ -11,7 +11,6 @@ #include <stdarg.h> #include "upb/pb/decoder.int.h" #include "upb/pb/varint.int.h" -#include "upb/bytestream.h" #ifdef UPB_DUMP_BYTECODE #include <stdio.h> @@ -20,76 +19,140 @@ #define MAXLABEL 5 #define EMPTYLABEL -1 +static const void *methodkey(const upb_msgdef *md, const upb_handlers *h) { + const void *ret = h ? (const void*)h : (const void*)md; + assert(ret); + return ret; +} + + +/* mgroup *********************************************************************/ + +static void freegroup(upb_refcounted *r) { + mgroup *g = (mgroup*)r; + upb_inttable_uninit(&g->methods); +#ifdef UPB_USE_JIT_X64 + upb_pbdecoder_freejit(g); +#endif + free(g->bytecode); + free(g); +} + +static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit, + void *closure) { + const mgroup *g = (const mgroup*)r; + upb_inttable_iter i; + upb_inttable_begin(&i, &g->methods); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i)); + visit(r, UPB_UPCAST(method), closure); + } +} + +mgroup *newgroup(const void *owner) { + mgroup *g = malloc(sizeof(*g)); + static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup}; + upb_refcounted_init(UPB_UPCAST(g), &vtbl, owner); + upb_inttable_init(&g->methods, UPB_CTYPE_PTR); + g->bytecode = NULL; + g->bytecode_end = NULL; + return g; +} + + /* upb_pbdecodermethod ********************************************************/ +static void freemethod(upb_refcounted *r) { + upb_pbdecodermethod *method = (upb_pbdecodermethod*)r; + upb_byteshandler_uninit(&method->input_handler_); + + if (method->dest_handlers_) { + upb_handlers_unref(method->dest_handlers_, method); + } + + upb_inttable_uninit(&method->dispatch); + free(method); +} + +static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit, + void *closure) { + const upb_pbdecodermethod *m = (const upb_pbdecodermethod*)r; + visit(r, m->group, closure); +} + static upb_pbdecodermethod *newmethod(const upb_msgdef *msg, - const upb_handlers *dest_handlers) { - upb_pbdecodermethod *ret = malloc(sizeof(upb_pbdecodermethod)); - ret->msg = msg; - ret->dest_handlers = dest_handlers; - ret->native_code = false; // If we JIT, it will update this later. + const upb_handlers *dest_handlers, + mgroup *group, + const void *key) { + static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod}; + upb_pbdecodermethod *ret = malloc(sizeof(*ret)); + upb_refcounted_init(UPB_UPCAST(ret), &vtbl, &ret); + upb_byteshandler_init(&ret->input_handler_); + + // The method references the group and vice-versa, in a circular reference. + upb_ref2(ret, group); + upb_ref2(group, ret); + upb_inttable_insertptr(&group->methods, key, upb_value_ptr(ret)); // Owns ref + upb_refcounted_unref(UPB_UPCAST(ret), &ret); + + ret->group = UPB_UPCAST(group); + ret->schema_ = msg; + ret->dest_handlers_ = dest_handlers; + ret->is_native_ = false; // If we JIT, it will update this later. upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64); - if (ret->dest_handlers) { - upb_handlers_ref(ret->dest_handlers, ret); + if (ret->dest_handlers_) { + upb_handlers_ref(ret->dest_handlers_, ret); } return ret; } -static void freemethod(upb_pbdecodermethod *method) { - if (method->dest_handlers) { - upb_handlers_unref(method->dest_handlers, method); - } +void upb_pbdecodermethod_ref(const upb_pbdecodermethod *m, const void *owner) { + upb_refcounted_ref(UPB_UPCAST(m), owner); +} - upb_inttable_uninit(&method->dispatch); - free(method); +void upb_pbdecodermethod_unref(const upb_pbdecodermethod *m, + const void *owner) { + upb_refcounted_unref(UPB_UPCAST(m), owner); } +void upb_pbdecodermethod_donateref(const upb_pbdecodermethod *m, + const void *from, const void *to) { + upb_refcounted_donateref(UPB_UPCAST(m), from, to); +} -/* upb_pbdecoderplan **********************************************************/ +void upb_pbdecodermethod_checkref(const upb_pbdecodermethod *m, + const void *owner) { + upb_refcounted_checkref(UPB_UPCAST(m), owner); +} -upb_pbdecoderplan *newplan() { - upb_pbdecoderplan *p = malloc(sizeof(*p)); - upb_inttable_init(&p->methods, UPB_CTYPE_PTR); - p->code = NULL; - p->code_end = NULL; - return p; +const upb_msgdef *upb_pbdecodermethod_schema(const upb_pbdecodermethod *m) { + return m->schema_; } -void freeplan(void *_p) { - upb_pbdecoderplan *p = _p; +const upb_handlers *upb_pbdecodermethod_desthandlers( + const upb_pbdecodermethod *m) { + return m->dest_handlers_; +} - upb_inttable_iter i; - upb_inttable_begin(&i, &p->methods); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i)); - freemethod(method); - } - upb_inttable_uninit(&p->methods); - free(p->code); -#ifdef UPB_USE_JIT_X64 - upb_pbdecoder_freejit(p); -#endif - free(p); +const upb_byteshandler *upb_pbdecodermethod_inputhandler( + const upb_pbdecodermethod *m) { + return &m->input_handler_; } -void set_bytecode_handlers(upb_pbdecoderplan *p, upb_handlers *h) { - upb_handlers_setstartstr(h, UPB_BYTESTREAM_BYTES, upb_pbdecoder_start, p, - NULL); - upb_handlers_setstring(h, UPB_BYTESTREAM_BYTES, upb_pbdecoder_decode, p, - freeplan); - upb_handlers_setendstr(h, UPB_BYTESTREAM_BYTES, upb_pbdecoder_end, p, NULL); +bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) { + return m->is_native_; } -static const upb_pbdecoderplan *getdecoderplan(const upb_handlers *h) { - if (upb_handlers_frametype(h) != &upb_pbdecoder_frametype) - return NULL; - upb_selector_t sel; - if (!upb_handlers_getselector(UPB_BYTESTREAM_BYTES, UPB_HANDLER_STARTSTR, - &sel)) { - return NULL; - } - return upb_handlers_gethandlerdata(h, sel); +const upb_pbdecodermethod *upb_pbdecodermethod_newfordesthandlers( + const upb_handlers *dest, const void *owner) { + upb_pbcodecache cache; + upb_pbcodecache_init(&cache); + const upb_pbdecodermethod *ret = + upb_pbcodecache_getdecodermethodfordesthandlers(&cache, dest); + upb_pbdecodermethod_ref(ret, owner); + upb_pbcodecache_uninit(&cache); + return ret; } @@ -97,16 +160,16 @@ static const upb_pbdecoderplan *getdecoderplan(const upb_handlers *h) { // Data used only at compilation time. typedef struct { - upb_pbdecoderplan *plan; + mgroup *group; uint32_t *pc; int fwd_labels[MAXLABEL]; int back_labels[MAXLABEL]; } compiler; -static compiler *newcompiler(upb_pbdecoderplan *plan) { - compiler *ret = malloc(sizeof(compiler)); - ret->plan = plan; +static compiler *newcompiler(mgroup *group) { + compiler *ret = malloc(sizeof(*ret)); + ret->group = group; for (int i = 0; i < MAXLABEL; i++) { ret->fwd_labels[i] = EMPTYLABEL; ret->back_labels[i] = EMPTYLABEL; @@ -165,7 +228,7 @@ static void setofs(uint32_t *instruction, int32_t ofs) { assert(getofs(*instruction) == ofs); // Would fail in cases of overflow. } -static uint32_t pcofs(compiler *c) { return c->pc - c->plan->code; } +static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; } // Defines a local label at the current PC location. All previous forward // references are updated to point to this location. The location is noted @@ -173,7 +236,7 @@ static uint32_t pcofs(compiler *c) { return c->pc - c->plan->code; } static void label(compiler *c, unsigned int label) { assert(label < MAXLABEL); int val = c->fwd_labels[label]; - uint32_t *codep = (val == EMPTYLABEL) ? NULL : c->plan->code + val; + uint32_t *codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val; while (codep) { int ofs = getofs(*codep); setofs(codep, c->pc - codep - instruction_len(*codep)); @@ -197,7 +260,7 @@ static int32_t labelref(compiler *c, int label) { return 0; } else if (label < 0) { // Backward local label. Relative to the next instruction. - uint32_t from = (c->pc + 1) - c->plan->code; + uint32_t from = (c->pc + 1) - c->group->bytecode; return c->back_labels[-label] - from; } else { // Forward local label: prepend to (possibly-empty) linked list. @@ -209,14 +272,15 @@ static int32_t labelref(compiler *c, int label) { } static void put32(compiler *c, uint32_t v) { - if (c->pc == c->plan->code_end) { + mgroup *g = c->group; + if (c->pc == g->bytecode_end) { int ofs = pcofs(c); - size_t oldsize = c->plan->code_end - c->plan->code; + size_t oldsize = g->bytecode_end - g->bytecode; size_t newsize = UPB_MAX(oldsize * 2, 64); // TODO(haberman): handle OOM. - c->plan->code = realloc(c->plan->code, newsize * sizeof(uint32_t)); - c->plan->code_end = c->plan->code + newsize; - c->pc = c->plan->code + ofs; + g->bytecode = realloc(g->bytecode, newsize * sizeof(uint32_t)); + g->bytecode_end = g->bytecode + newsize; + c->pc = g->bytecode + ofs; } *c->pc++ = v; } @@ -272,7 +336,7 @@ static void putop(compiler *c, opcode op, ...) { break; case OP_CALL: { const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *); - put32(c, op | (method->base.ofs - (pcofs(c) + 1)) << 8); + put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8); break; } case OP_CHECKDELIM: @@ -349,7 +413,7 @@ static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) { const upb_pbdecodermethod *method = (void *)((char *)dispatch - offsetof(upb_pbdecodermethod, dispatch)); - fprintf(f, " %s", upb_msgdef_fullname(method->msg)); + fprintf(f, " %s", upb_msgdef_fullname(method->schema_)); break; } case OP_STARTMSG: @@ -453,7 +517,7 @@ static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) { static void dispatchtarget(compiler *c, upb_pbdecodermethod *method, const upb_fielddef *f, int wire_type) { // Offset is relative to msg base. - uint64_t ofs = pcofs(c) - method->base.ofs; + uint64_t ofs = pcofs(c) - method->code_base.ofs; uint32_t fn = upb_fielddef_number(f); upb_inttable *d = &method->dispatch; upb_value v; @@ -485,11 +549,11 @@ static void putpush(compiler *c, const upb_fielddef *f) { static upb_pbdecodermethod *find_submethod(const compiler *c, const upb_pbdecodermethod *method, const upb_fielddef *f) { - const void *key = method->dest_handlers ? - (const void*)upb_handlers_getsubhandlers(method->dest_handlers, f) : - (const void*)upb_downcast_msgdef(upb_fielddef_subdef(f)); + const upb_handlers *sub = method->dest_handlers_ ? + upb_handlers_getsubhandlers(method->dest_handlers_, f) : NULL; + const void *key = methodkey(upb_downcast_msgdef(upb_fielddef_subdef(f)), sub); upb_value v; - bool ok = upb_inttable_lookupptr(&c->plan->methods, key, &v); + bool ok = upb_inttable_lookupptr(&c->group->methods, key, &v); UPB_ASSERT_VAR(ok, ok); return upb_value_getptr(v); } @@ -532,12 +596,12 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) { upb_inttable_uninit(&method->dispatch); upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64); - method->base.ofs = pcofs(c); + method->code_base.ofs = pcofs(c); putop(c, OP_SETDISPATCH, &method->dispatch); putop(c, OP_STARTMSG); label(c, LABEL_FIELD); upb_msg_iter i; - for(upb_msg_begin(&i, method->msg); !upb_msg_done(&i); upb_msg_next(&i)) { + for(upb_msg_begin(&i, method->schema_); !upb_msg_done(&i); upb_msg_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); upb_descriptortype_t type = upb_fielddef_descriptortype(f); @@ -680,17 +744,15 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) { // On the other hand, if/when the optimization mentioned below is implemented, // binding to a upb_handlers can result in *fewer* methods being generated if // many of the submessages have no handlers bound to them. -static upb_pbdecodermethod *find_methods(compiler *c, - const upb_msgdef *md, - const upb_handlers *h) { - const void *key = h ? (const void*)h : (const void*)md; +static void find_methods(compiler *c, const upb_msgdef *md, + const upb_handlers *h) { + const void *key = methodkey(md, h); upb_value v; - if (upb_inttable_lookupptr(&c->plan->methods, key, &v)) - return upb_value_getptr(v); - upb_pbdecodermethod *method = newmethod(md, h); - // Takes ownership of method. - upb_inttable_insertptr(&c->plan->methods, key, upb_value_ptr(method)); + if (upb_inttable_lookupptr(&c->group->methods, key, &v)) + return; + newmethod(md, h, c->group, key); + // Find submethods. upb_msg_iter i; for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); @@ -706,24 +768,34 @@ static upb_pbdecodermethod *find_methods(compiler *c, find_methods(c, upb_downcast_msgdef(upb_fielddef_subdef(f)), sub_h); } - - return method; } -// (Re-)compile bytecode for all messages in "msgs", ensuring that the code -// for "md" is emitted first. Overwrites any existing bytecode in "c". +// (Re-)compile bytecode for all messages in "msgs." +// Overwrites any existing bytecode in "c". static void compile_methods(compiler *c) { // Start over at the beginning of the bytecode. - c->pc = c->plan->code; - compile_method(c, c->plan->topmethod); + c->pc = c->group->bytecode; upb_inttable_iter i; - upb_inttable_begin(&i, &c->plan->methods); + upb_inttable_begin(&i, &c->group->methods); for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i)); - if (method != c->plan->topmethod) { - compile_method(c, method); - } + compile_method(c, method); + } +} + +static void set_bytecode_handlers(mgroup *g) { + upb_inttable_iter i; + upb_inttable_begin(&i, &g->methods); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i)); + + m->code_base.ptr = g->bytecode + m->code_base.ofs; + + upb_byteshandler *h = &m->input_handler_; + upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr); + upb_byteshandler_setstring(h, upb_pbdecoder_decode, g); + upb_byteshandler_setendstr(h, upb_pbdecoder_end, m); } } @@ -732,17 +804,13 @@ static void compile_methods(compiler *c) { #ifdef UPB_USE_JIT_X64 -static void sethandlers(upb_pbdecoderplan *p, upb_handlers *h, bool allowjit) { - p->jit_code = NULL; - +static void sethandlers(mgroup *g, bool allowjit) { + g->jit_code = NULL; if (allowjit) { - upb_pbdecoder_jit(p); // Compile byte-code into machine code. - upb_handlers_setstartstr(h, UPB_BYTESTREAM_BYTES, upb_pbdecoder_start, p, - freeplan); - upb_handlers_setstring(h, UPB_BYTESTREAM_BYTES, p->jit_code, NULL, NULL); - upb_handlers_setendstr(h, UPB_BYTESTREAM_BYTES, upb_pbdecoder_end, p, NULL); + // Compile byte-code into machine code, create handlers. + upb_pbdecoder_jit(g); } else { - set_bytecode_handlers(p, h); + set_bytecode_handlers(g); } } @@ -754,10 +822,10 @@ static bool bind_dynamic(bool allowjit) { #else // UPB_USE_JIT_X64 -static void sethandlers(upb_pbdecoderplan *p, upb_handlers *h, bool allowjit) { +static void sethandlers(mgroup *g, bool allowjit) { // No JIT compiled in; use bytecode handlers unconditionally. UPB_UNUSED(allowjit); - set_bytecode_handlers(p, h); + set_bytecode_handlers(g); } static bool bind_dynamic(bool allowjit) { @@ -769,56 +837,16 @@ static bool bind_dynamic(bool allowjit) { #endif // UPB_USE_JIT_X64 -/* Public interface ***********************************************************/ - -bool upb_pbdecoder_isdecoder(const upb_handlers *h) { - return getdecoderplan(h) != NULL; -} - -bool upb_pbdecoderplan_hasjitcode(const upb_pbdecoderplan *p) { -#ifdef UPB_USE_JIT_X64 - return p->jit_code != NULL; -#else - UPB_UNUSED(p); - return false; -#endif -} - -bool upb_pbdecoder_hasjitcode(const upb_handlers *h) { - const upb_pbdecoderplan *p = getdecoderplan(h); - if (!p) return false; - return upb_pbdecoderplan_hasjitcode(p); -} - -uint32_t *upb_pbdecoderplan_codebase(const upb_pbdecoderplan *p) { - return p->code; -} - -upb_string_handler *upb_pbdecoderplan_jitcode(const upb_pbdecoderplan *p) { -#ifdef UPB_USE_JIT_X64 - return p->jit_code; -#else - UPB_UNUSED(p); - assert(false); - return NULL; -#endif -} - -const upb_handlers *upb_pbdecoder_getdesthandlers(const upb_handlers *h) { - const upb_pbdecoderplan *p = getdecoderplan(h); - if (!p) return NULL; - return p->topmethod->dest_handlers; -} - -const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest, - bool allowjit, - const void *owner) { +// TODO(haberman): allow this to be constructed for an arbitrary set of dest +// handlers and other mgroups (but verify we have a transitive closure). +const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, + const void *owner) { UPB_UNUSED(allowjit); assert(upb_handlers_isfrozen(dest)); const upb_msgdef *md = upb_handlers_msgdef(dest); - upb_pbdecoderplan *p = newplan(); - compiler *c = newcompiler(p); + mgroup *g = newgroup(owner); + compiler *c = newcompiler(g); if (bind_dynamic(allowjit)) { // If binding dynamically, remove the reference against destination @@ -826,32 +854,75 @@ const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest, dest = NULL; } - p->topmethod = find_methods(c, md, dest); + find_methods(c, md, dest); // We compile in two passes: // 1. all messages are assigned relative offsets from the beginning of the - // bytecode (saved in method->base). + // bytecode (saved in method->code_base). // 2. forwards OP_CALL instructions can be correctly linked since message // offsets have been previously assigned. // // Could avoid the second pass by linking OP_CALL instructions somehow. compile_methods(c); compile_methods(c); - p->code_end = c->pc; + g->bytecode_end = c->pc; + freecompiler(c); #ifdef UPB_DUMP_BYTECODE FILE *f = fopen("/tmp/upb-bytecode", "wb"); assert(f); - dumpbc(p->code, p->code_end, stderr); - dumpbc(p->code, p->code_end, f); + dumpbc(g->bytecode, g->bytecode_end, stderr); + dumpbc(g->bytecode, g->bytecode_end, f); fclose(f); #endif - upb_handlers *h = upb_handlers_new( - UPB_BYTESTREAM, &upb_pbdecoder_frametype, owner); - sethandlers(p, h, allowjit); + sethandlers(g, allowjit); + return g; +} - freecompiler(c); - return h; +/* upb_pbcodecache ************************************************************/ + +void upb_pbcodecache_init(upb_pbcodecache *c) { + upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR); + c->allow_jit_ = true; +} + +void upb_pbcodecache_uninit(upb_pbcodecache *c) { + upb_inttable_iter i; + upb_inttable_begin(&i, &c->groups); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i)); + upb_refcounted_unref(UPB_UPCAST(group), c); + } + upb_inttable_uninit(&c->groups); +} + +bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) { + return c->allow_jit_; +} + +bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) { + if (upb_inttable_count(&c->groups) > 0) + return false; + c->allow_jit_ = allow; + return true; +} + +const upb_pbdecodermethod *upb_pbcodecache_getdecodermethodfordesthandlers( + upb_pbcodecache *c, const upb_handlers *handlers) { + // Right now we build a new DecoderMethod every time. + // TODO(haberman): properly cache methods by their true key. + const mgroup *g = mgroup_new(handlers, c->allow_jit_, c); + upb_inttable_push(&c->groups, upb_value_constptr(g)); + + const upb_msgdef *md = upb_handlers_msgdef(handlers); + if (bind_dynamic(c->allow_jit_)) { + handlers = NULL; + } + + upb_value v; + bool ok = upb_inttable_lookupptr(&g->methods, methodkey(md, handlers), &v); + UPB_ASSERT_VAR(ok, ok); + return upb_value_getptr(v); } diff --git a/upb/pb/compile_decoder_x64.c b/upb/pb/compile_decoder_x64.c index 2e8132e..429f690 100644 --- a/upb/pb/compile_decoder_x64.c +++ b/upb/pb/compile_decoder_x64.c @@ -34,13 +34,13 @@ #define DECODE_EOF -3 typedef struct { - upb_pbdecoderplan *plan; + mgroup *group; uint32_t *pc; // This pointer is allocated by dasm_init() and freed by dasm_free(). struct dasm_State *dynasm; - // Maps bytecode pc location -> pclabel. + // Maps arbitrary void* -> pclabel. upb_inttable pclabels; upb_inttable pcdefined; @@ -57,7 +57,6 @@ typedef struct { // Used by DynASM to store globals. void **globals; - bool usefp; bool chkret; } jitcompiler; @@ -65,16 +64,16 @@ typedef struct { static int pclabel(jitcompiler *jc, const void *here); static int define_pclabel(jitcompiler *jc, const void *here); static void asmlabel(jitcompiler *jc, const char *fmt, ...); +static int pcofs(jitcompiler* jc); #include "dynasm/dasm_proto.h" #include "dynasm/dasm_x86.h" #include "upb/pb/compile_decoder_x64.h" -static jitcompiler *newjitcompiler(upb_pbdecoderplan *plan) { +static jitcompiler *newjitcompiler(mgroup *group) { jitcompiler *jc = malloc(sizeof(jitcompiler)); - jc->usefp = false; jc->chkret = false; - jc->plan = plan; + jc->group = group; jc->pclabel_count = 0; jc->lastlabelofs = -1; upb_inttable_init(&jc->pclabels, UPB_CTYPE_UINT32); @@ -123,13 +122,22 @@ static int define_pclabel(jitcompiler *jc, const void *here) { return pclabel(jc, here); } +// Returns a bytecode pc offset relative to the beginning of the group's code. +static int pcofs(jitcompiler *jc) { + return jc->pc - jc->group->bytecode; +} + static void upb_reg_jit_gdb(jitcompiler *jc); +static int getpclabel(jitcompiler *jc, const void *target) { + return dasm_getpclabel(jc, pclabel(jc, target)); +} + // Given a pcofs relative to method, returns the machine code offset for it // (relative to the beginning of the machine code). int nativeofs(jitcompiler *jc, const upb_pbdecodermethod *method, int pcofs) { - void *target = jc->plan->code + method->base.ofs + pcofs; - return dasm_getpclabel(jc, pclabel(jc, target)); + void *target = jc->group->bytecode + method->code_base.ofs + pcofs; + return getpclabel(jc, target); } // Given a pcofs relative to this method's base, returns a machine code offset @@ -137,7 +145,7 @@ int nativeofs(jitcompiler *jc, const upb_pbdecodermethod *method, int pcofs) { // machine code base for dispatch table lookups). uint32_t dispatchofs(jitcompiler *jc, const upb_pbdecodermethod *method, int pcofs) { - int ofs1 = dasm_getpclabel(jc, pclabel(jc, method->dispatch.array)); + int ofs1 = getpclabel(jc, method->dispatch.array); int ofs2 = nativeofs(jc, method, pcofs); assert(ofs1 > 0); assert(ofs2 > 0); @@ -149,9 +157,11 @@ uint32_t dispatchofs(jitcompiler *jc, const upb_pbdecodermethod *method, // Rewrites the dispatch tables into machine code offsets. static void patchdispatch(jitcompiler *jc) { upb_inttable_iter i; - upb_inttable_begin(&i, &jc->plan->methods); + upb_inttable_begin(&i, &jc->group->methods); for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i)); + method->is_native_ = true; + upb_inttable *dispatch = &method->dispatch; upb_inttable_iter i2; upb_inttable_begin(&i2, dispatch); @@ -169,11 +179,20 @@ static void patchdispatch(jitcompiler *jc) { } else { // Secondary slot. Since we have 64 bits for the value, we use an // absolute offset. - newval = (uint64_t)(jc->plan->jit_code + nativeofs(jc, method, val)); + newval = (uint64_t)(jc->group->jit_code + nativeofs(jc, method, val)); } bool ok = upb_inttable_replace(dispatch, key, upb_value_uint64(newval)); UPB_ASSERT_VAR(ok, ok); } + + // Set this only *after* we have patched the offsets (nativeofs() above + // reads this). + method->code_base.ptr = jc->group->jit_code + getpclabel(jc, method); + + upb_byteshandler *h = &method->input_handler_; + upb_byteshandler_setstartstr(h, upb_pbdecoder_startjit, NULL); + upb_byteshandler_setstring(h, jc->group->jit_code, method->code_base.ptr); + upb_byteshandler_setendstr(h, upb_pbdecoder_end, method); } } @@ -202,9 +221,10 @@ static void load_so(jitcompiler *jc) { FILE *f = fopen("/tmp/upb-jit-code.s", "w"); if (f) { + uint8_t *jit_code = (uint8_t*)jc->group->jit_code; fputs(" .text\n\n", f); size_t linelen = 0; - for (size_t i = 0; i < jc->plan->jit_size; i++) { + for (size_t i = 0; i < jc->group->jit_size; i++) { upb_value v; if (upb_inttable_lookup(&mclabels, i, &v)) { const char *label = upb_value_getptr(v); @@ -223,23 +243,25 @@ static void load_so(jitcompiler *jc) { fputs("\n", f); fclose(f); } else { - fprintf(stderr, "Couldn't open /tmp/upb-jit-code.s for writing/\n"); + fprintf(stderr, "Couldn't open /tmp/upb-jit-code.s for writing\n"); + abort(); } // TODO: racy if (system("gcc -shared -o /tmp/upb-jit-code.so /tmp/upb-jit-code.s") != 0) { + fprintf(stderr, "Error compiling upb-jit-code.s\n"); abort(); } - jc->dl = dlopen("/tmp/upb-jit-code.so", RTLD_LAZY); - if (!jc->dl) { + jc->group->dl = dlopen("/tmp/upb-jit-code.so", RTLD_LAZY); + if (!jc->group->dl) { fprintf(stderr, "Couldn't dlopen(): %s\n", dlerror()); abort(); } - munmap(jit_code, jc->plan->jit_size); - jit_code = dlsym(jc->dl, "X.enterjit"); - if (!jit_code) { + munmap(jc->group->jit_code, jc->group->jit_size); + jc->group->jit_code = dlsym(jc->group->dl, "X.enterjit"); + if (!jc->group->jit_code) { fprintf(stderr, "Couldn't find enterjit sym\n"); abort(); } @@ -248,45 +270,51 @@ static void load_so(jitcompiler *jc) { } #endif -void upb_pbdecoder_jit(upb_pbdecoderplan *plan) { - plan->debug_info = NULL; - plan->dl = NULL; +void upb_pbdecoder_jit(mgroup *group) { + group->debug_info = NULL; + group->dl = NULL; - jitcompiler *jc = newjitcompiler(plan); + assert(group->bytecode); + jitcompiler *jc = newjitcompiler(group); emit_static_asm(jc); jitbytecode(jc); - int dasm_status = dasm_link(jc, &jc->plan->jit_size); + int dasm_status = dasm_link(jc, &jc->group->jit_size); if (dasm_status != DASM_S_OK) { fprintf(stderr, "DynASM error; returned status: 0x%08x\n", dasm_status); abort(); } - char *jit_code = mmap(NULL, jc->plan->jit_size, PROT_READ | PROT_WRITE, + char *jit_code = mmap(NULL, jc->group->jit_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); dasm_encode(jc, jit_code); - mprotect(jit_code, jc->plan->jit_size, PROT_EXEC | PROT_READ); + mprotect(jit_code, jc->group->jit_size, PROT_EXEC | PROT_READ); upb_reg_jit_gdb(jc); + jc->group->jit_code = (upb_string_handlerfunc *)jit_code; #ifdef UPB_JIT_LOAD_SO load_so(jc); #endif - jc->plan->jit_code = (upb_string_handler *)jit_code; patchdispatch(jc); + freejitcompiler(jc); + + // Now the bytecode is no longer needed. + free(group->bytecode); + group->bytecode = NULL; } -void upb_pbdecoder_freejit(upb_pbdecoderplan *plan) { - if (!plan->jit_code) return; - if (plan->dl) { +void upb_pbdecoder_freejit(mgroup *group) { + if (!group->jit_code) return; + if (group->dl) { #ifdef UPB_JIT_LOAD_SO - dlclose(plan->dl); + dlclose(group->dl); #endif } else { - munmap(plan->jit_code, plan->jit_size); + munmap(group->jit_code, group->jit_size); } - free(plan->debug_info); + free(group->debug_info); // TODO: unregister GDB JIT interface. } @@ -338,15 +366,15 @@ void __attribute__((noinline)) __jit_debug_register_code() { static void upb_reg_jit_gdb(jitcompiler *jc) { // Create debug info. size_t elf_len = sizeof(upb_jit_debug_elf_file); - jc->plan->debug_info = malloc(elf_len); - memcpy(jc->plan->debug_info, upb_jit_debug_elf_file, elf_len); - uint64_t *p = (void *)jc->plan->debug_info; - for (; (void *)(p + 1) <= (void *)jc->plan->debug_info + elf_len; ++p) { + jc->group->debug_info = malloc(elf_len); + memcpy(jc->group->debug_info, upb_jit_debug_elf_file, elf_len); + uint64_t *p = (void *)jc->group->debug_info; + for (; (void *)(p + 1) <= (void *)jc->group->debug_info + elf_len; ++p) { if (*p == 0x12345678) { - *p = (uintptr_t)jc->plan->jit_code; + *p = (uintptr_t)jc->group->jit_code; } if (*p == 0x321) { - *p = jc->plan->jit_size; + *p = jc->group->jit_size; } } @@ -355,7 +383,7 @@ static void upb_reg_jit_gdb(jitcompiler *jc) { e->next_entry = __jit_debug_descriptor.first_entry; e->prev_entry = NULL; if (e->next_entry) e->next_entry->prev_entry = e; - e->symfile_addr = jc->plan->debug_info; + e->symfile_addr = jc->group->debug_info; e->symfile_size = elf_len; __jit_debug_descriptor.first_entry = e; __jit_debug_descriptor.relevant_entry = e; diff --git a/upb/pb/compile_decoder_x64.dasc b/upb/pb/compile_decoder_x64.dasc index 0bddade..fec822a 100644 --- a/upb/pb/compile_decoder_x64.dasc +++ b/upb/pb/compile_decoder_x64.dasc @@ -44,7 +44,7 @@ | sub DELIMEND, DECODER->buf | add DELIMEND, DECODER->bufstart_ofs | mov FRAME->end_ofs, DELIMEND -| mov FRAME->u.closure, CLOSURE +| mov FRAME->sink.closure, CLOSURE |.endmacro | | // Loads unsynced registers from memory back into registers. @@ -52,7 +52,7 @@ | mov FRAME, DECODER->top | mov PTR, DECODER->ptr | mov DATAEND, DECODER->data_end -| mov CLOSURE, FRAME->u.closure +| mov CLOSURE, FRAME->sink.closure | mov DELIMEND, FRAME->end_ofs | sub DELIMEND, DECODER->bufstart_ofs | add DELIMEND, DECODER->buf @@ -145,7 +145,7 @@ static void asmlabel(jitcompiler *jc, const char *fmt, ...) { char *str = malloc(len + 1); // + 1 for NULL terminator. if (!str) exit(1); - int written = vsnprintf(str, len, fmt, args); + int written = vsnprintf(str, len + 1, fmt, args); va_end(args); UPB_ASSERT_VAR(written, written == len); @@ -155,6 +155,10 @@ static void asmlabel(jitcompiler *jc, const char *fmt, ...) { upb_inttable_insert(&jc->asmlabels, label, upb_value_ptr(str)); } +static upb_func *gethandler(const upb_handlers *h, upb_selector_t sel) { + return h ? upb_handlers_gethandler(h, sel) : NULL; +} + // Emit static assembly routines; code that does not vary based on the message // schema. Since it's not input-dependent, we only need one single copy of it. // For the moment we generate a single copy per generated handlers. Eventually @@ -174,9 +178,6 @@ static void emit_static_asm(jitcompiler *jc) { |->enterjit: |1: | push rbp - if (jc->usefp) { - | mov rbp, rsp - } | push r15 | push r14 | push r13 @@ -189,9 +190,12 @@ static void emit_static_asm(jitcompiler *jc) { | // 16-byte stack alignment. | sub rsp, 8 | + | mov rbx, ARG2_64 // Preserve JIT method. + | | mov DECODER, rdi | callp upb_pbdecoder_resume // Same args as us; reuse regs. | mov DECODER->saved_rsp, rsp + | mov rax, rbx | load_regs | | // Test whether we have a saved stack to resume. @@ -199,7 +203,7 @@ static void emit_static_asm(jitcompiler *jc) { | test ARG3_64, ARG3_64 | jnz >1 | - | call =>pclabel(jc, jc->plan->topmethod) + | call rax | | mov rax, DECODER->size_param | mov qword DECODER->call_len, 0 @@ -265,7 +269,7 @@ static void emit_static_asm(jitcompiler *jc) { asmlabel(jc, "pushlendelim"); |->pushlendelim: |1: - | mov FRAME->u.closure, CLOSURE + | mov FRAME->sink.closure, CLOSURE | mov DECODER->checkpoint, PTR | dv32 | mov rcx, DELIMEND @@ -511,7 +515,7 @@ static void jitprimitive(jitcompiler *jc, opcode op, static char fastpath_bytes[] = { 1, 1, 4, 8 }; const valtype_t type = types[op]; const int fastbytes = fastpath_bytes[type]; - upb_func *handler = upb_handlers_gethandler(h, sel); + upb_func *handler = gethandler(h, sel); if (handler) { |1: @@ -678,12 +682,20 @@ static void jitdispatch(jitcompiler *jc, |=>define_pclabel(jc, &method->dispatch): |1: // Decode the field tag. - // OPT: inline two bytes of varint decoding for big messages. | mov aword DECODER->checkpoint, PTR - | chkeob 1, >6 + | chkeob 2, >6 | movzx edx, byte [PTR] | test dl, dl - | jns >7 + | jns >7 // Jump if first byte has no continuation bit. + | movzx ecx, byte [PTR + 1] + | test cl, cl + | js >6 // Jump if second byte has continuation bit. + | // Confirmed two-byte varint. + | shl ecx, 7 + | and edx, 0x7f + | or edx, ecx + | add PTR, 2 + | jmp >8 |6: | call ->decode_unknown_tag_fallback | test eax, eax // Hit DELIMEND? @@ -848,15 +860,14 @@ static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs, static void jitbytecode(jitcompiler *jc) { upb_pbdecodermethod *method = NULL; const upb_handlers *h = NULL; - for (jc->pc = jc->plan->code; jc->pc < jc->plan->code_end; ) { + for (jc->pc = jc->group->bytecode; jc->pc < jc->group->bytecode_end; ) { int32_t instr = *jc->pc; opcode op = instr & 0xff; uint32_t arg = instr >> 8; int32_t longofs = arg; if (op != OP_STARTMSG && op != OP_SETDISPATCH) { - asmlabel(jc, "0x%lx.%s", jc->pc - jc->plan->code, - upb_pbdecoder_getopname(op)); + asmlabel(jc, "0x%lx.%s", pcofs(jc), upb_pbdecoder_getopname(op)); } // TODO: optimize this to only define pclabels that are actually used. |=>define_pclabel(jc, jc->pc): @@ -865,16 +876,11 @@ static void jitbytecode(jitcompiler *jc) { switch (op) { case OP_STARTMSG: { // This opcode serves as a function prolouge also. - const char *msgname = upb_msgdef_fullname(method->msg); - asmlabel(jc, "parse.%s", msgname); + const char *msgname = upb_msgdef_fullname(method->schema_); + asmlabel(jc, "0x%lx.parse.%s", pcofs(jc), msgname); |=>define_pclabel(jc, method): - if (jc->usefp) { - | push rbp - | mov rbp, rsp - } else { - | sub rsp, 8 - } - upb_func *startmsg = upb_handlers_gethandler(h, UPB_STARTMSG_SELECTOR); + | sub rsp, 8 + upb_func *startmsg = gethandler(h, UPB_STARTMSG_SELECTOR); if (startmsg) { // bool startmsg(void *closure, const void *hd) |1: @@ -892,7 +898,7 @@ static void jitbytecode(jitcompiler *jc) { } case OP_ENDMSG: { // This opcode serves as a function epiloue also. - upb_func *endmsg = upb_handlers_gethandler(h, UPB_ENDMSG_SELECTOR); + upb_func *endmsg = gethandler(h, UPB_ENDMSG_SELECTOR); |9: if (endmsg) { // bool endmsg(void *closure, const void *hd, upb_status *status) @@ -901,11 +907,7 @@ static void jitbytecode(jitcompiler *jc) { | mov ARG3_64, DECODER->status | callp endmsg } - if (jc->usefp) { - | pop rbp - } else { - | add rsp, 8 - } + | add rsp, 8 | ret break; } @@ -917,10 +919,13 @@ static void jitbytecode(jitcompiler *jc) { // &method->dispatch; we want to go backwards and recover method. method = (void*)((char*)dispatch - offsetof(upb_pbdecodermethod, dispatch)); - h = method->dest_handlers; - assert(h); // We only support statically-bound handlers for now. - const char *msgname = upb_msgdef_fullname(method->msg); - asmlabel(jc, "dispatch.%s", msgname); + // May be NULL, in which case no handlers for this message will be found. + // OPT: we should do better by completely skipping the message in this + // case instead of parsing it field by field. We should also do the skip + // in the containing message's code. + h = method->dest_handlers_; + const char *msgname = upb_msgdef_fullname(method->schema_); + asmlabel(jc, "0x%lx.dispatch.%s", pcofs(jc), msgname); jitdispatch(jc, method); break; } @@ -942,7 +947,7 @@ static void jitbytecode(jitcompiler *jc) { case OP_STARTSEQ: case OP_STARTSUBMSG: case OP_STARTSTR: { - upb_func *start = upb_handlers_gethandler(h, arg); + upb_func *start = gethandler(h, arg); if (start) { // void *startseq(void *closure, const void *hd) // void *startsubmsg(void *closure, const void *hd) @@ -972,7 +977,7 @@ static void jitbytecode(jitcompiler *jc) { case OP_ENDSEQ: case OP_ENDSUBMSG: case OP_ENDSTR: { - upb_func *end = upb_handlers_gethandler(h, arg); + upb_func *end = gethandler(h, arg); if (end) { // bool endseq(void *closure, const void *hd) // bool endsubmsg(void *closure, const void *hd) @@ -995,7 +1000,7 @@ static void jitbytecode(jitcompiler *jc) { break; } case OP_STRING: { - upb_func *str = upb_handlers_gethandler(h, arg); + upb_func *str = gethandler(h, arg); | cmp PTR, DELIMEND | je >4 |1: @@ -1028,7 +1033,13 @@ static void jitbytecode(jitcompiler *jc) { break; } case OP_PUSHTAGDELIM: - | mov FRAME->u.closure, CLOSURE + | mov FRAME->sink.closure, CLOSURE + | // This shouldn't need to be read, because tag-delimited fields + | // shouldn't have an OP_SETDELIM after them. But for the moment + | // non-packed repeated fields do OP_SETDELIM so they can share more + | // code with the packed code-path. If this is changed later, this + | // store can be removed. + | mov qword FRAME->end_ofs, 0 | add FRAME, sizeof(upb_pbdecoder_frame) | cmp FRAME, DECODER->limit | je ->err @@ -1038,13 +1049,14 @@ static void jitbytecode(jitcompiler *jc) { break; case OP_POP: | sub FRAME, sizeof(upb_pbdecoder_frame) - | mov CLOSURE, FRAME->u.closure + | mov CLOSURE, FRAME->sink.closure break; case OP_SETDELIM: // OPT: experiment with testing vs old offset to optimize away. | mov DATAEND, DECODER->end | add DELIMEND, FRAME->end_ofs - | jc >1 + | cmp DELIMEND, DECODER->buf + | jb >1 | cmp DELIMEND, DATAEND | ja >1 // OPT: try cmov. | mov DATAEND, DELIMEND diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 70862d5..6fd6576 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -10,7 +10,6 @@ #include <stdarg.h> #include <stddef.h> #include <stdlib.h> -#include "upb/bytestream.h" #include "upb/pb/decoder.int.h" #include "upb/pb/varint.int.h" @@ -70,7 +69,7 @@ static bool in_residual_buf(upb_pbdecoder *d, const char *p); static void seterr(upb_pbdecoder *d, const char *msg) { // TODO(haberman): encapsulate this access to pipeline->status, but not sure // exactly what that interface should look like. - upb_status_seterrliteral(&d->sink->pipeline_->status_, msg); + upb_status_seterrmsg(d->status, msg); } void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) { @@ -377,7 +376,7 @@ static bool push(upb_pbdecoder *d, uint64_t end) { fr++; fr->end_ofs = end; - fr->u.dispatch = NULL; + fr->dispatch = NULL; fr->groupnum = -1; d->top = fr; return true; @@ -441,7 +440,7 @@ int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, uint32_t fieldnum, } static int32_t dispatch(upb_pbdecoder *d) { - upb_inttable *dispatch = d->top->u.dispatch; + upb_inttable *dispatch = d->top->dispatch; // Decode tag. uint32_t tag; @@ -478,16 +477,23 @@ static int32_t dispatch(upb_pbdecoder *d) { } } +// Callers know that the stack is more than one deep because the opcodes that +// call this only occur after PUSH operations. +upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) { + assert(d->top != d->stack); + return d->top - 1; +} + /* The main decoding loop *****************************************************/ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, size_t size) { upb_pbdecoder *d = closure; - const upb_pbdecoderplan *p = hd; + const mgroup *group = hd; assert(buf); upb_pbdecoder_resume(d, NULL, buf, size); - UPB_UNUSED(p); + UPB_UNUSED(group); #define VMCASE(op, code) \ case op: { code; if (consumes_input(op)) checkpoint(d); break; } @@ -495,7 +501,7 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, VMCASE(OP_PARSE_ ## type, { \ ctype val; \ CHECK_RETURN(decode_ ## wt(d, &val)); \ - upb_sink_put ## name(d->sink, arg, (convfunc)(val)); \ + upb_sink_put ## name(&d->top->sink, arg, (convfunc)(val)); \ }) while(1) { @@ -513,7 +519,7 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, (int)(d->data_end - ptr(d)), (int)(d->end - ptr(d)), (int)((d->top->end_ofs - d->bufstart_ofs) - (ptr(d) - d->buf)), - (int)(d->pc - 1 - upb_pbdecoderplan_codebase(p)), + (int)(d->pc - 1 - group->bytecode), upb_pbdecoder_getopname(op), arg); #endif @@ -537,39 +543,42 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, VMCASE(OP_SETDISPATCH, d->top->base = d->pc - 1; - memcpy(&d->top->u.dispatch, d->pc, sizeof(void*)); + memcpy(&d->top->dispatch, d->pc, sizeof(void*)); d->pc += sizeof(void*) / sizeof(uint32_t); ) VMCASE(OP_STARTMSG, - CHECK_SUSPEND(upb_sink_startmsg(d->sink)); + CHECK_SUSPEND(upb_sink_startmsg(&d->top->sink)); ) VMCASE(OP_ENDMSG, - CHECK_SUSPEND(upb_sink_endmsg(d->sink)); + CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status)); assert(d->call_len > 0); d->pc = d->callstack[--d->call_len]; ) VMCASE(OP_STARTSEQ, - CHECK_SUSPEND(upb_sink_startseq(d->sink, arg)); + upb_pbdecoder_frame *outer = outer_frame(d); + CHECK_SUSPEND(upb_sink_startseq(&outer->sink, arg, &d->top->sink)); ) VMCASE(OP_ENDSEQ, - CHECK_SUSPEND(upb_sink_endseq(d->sink, arg)); + CHECK_SUSPEND(upb_sink_endseq(&d->top->sink, arg)); ) VMCASE(OP_STARTSUBMSG, - CHECK_SUSPEND(upb_sink_startsubmsg(d->sink, arg)); + upb_pbdecoder_frame *outer = outer_frame(d); + CHECK_SUSPEND(upb_sink_startsubmsg(&outer->sink, arg, &d->top->sink)); ) VMCASE(OP_ENDSUBMSG, - CHECK_SUSPEND(upb_sink_endsubmsg(d->sink, arg)); + CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg)); ) VMCASE(OP_STARTSTR, uint32_t len = d->top->end_ofs - offset(d); - CHECK_SUSPEND(upb_sink_startstr(d->sink, arg, len)); + upb_pbdecoder_frame *outer = outer_frame(d); + CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink)); if (len == 0) { d->pc++; // Skip OP_STRING. } ) VMCASE(OP_STRING, uint32_t len = curbufleft(d); - CHECK_SUSPEND(upb_sink_putstring(d->sink, arg, ptr(d), len)); + CHECK_SUSPEND(upb_sink_putstring(&d->top->sink, arg, ptr(d), len)); advance(d, len); if (d->delim_end == NULL) { // String extends beyond this buf? d->pc--; @@ -579,7 +588,7 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, } ) VMCASE(OP_ENDSTR, - CHECK_SUSPEND(upb_sink_endstr(d->sink, arg)); + CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg)); ) VMCASE(OP_PUSHTAGDELIM, CHECK_SUSPEND(push(d, d->top->end_ofs)); @@ -664,50 +673,52 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, } } -void *upb_pbdecoder_start(void *closure, const void *handler_data, - size_t size_hint) { +void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) { + upb_pbdecoder *d = closure; UPB_UNUSED(size_hint); + d->call_len = 1; + d->pc = pc; + return d; +} + +void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) { + UPB_UNUSED(hd); upb_pbdecoder *d = closure; - const upb_pbdecoderplan *plan = handler_data; - UPB_UNUSED(plan); - if (upb_pbdecoderplan_hasjitcode(plan)) { - d->top->u.closure = d->sink->top->closure; - d->call_len = 0; - } else { - d->call_len = 1; - d->pc = upb_pbdecoderplan_codebase(plan); - } - assert(d); - assert(d->sink); - if (plan->topmethod->dest_handlers) { - assert(d->sink->top->h == plan->topmethod->dest_handlers); - } - d->status = &d->sink->pipeline_->status_; + d->call_len = 0; return d; } bool upb_pbdecoder_end(void *closure, const void *handler_data) { upb_pbdecoder *d = closure; - const upb_pbdecoderplan *plan = handler_data; + const upb_pbdecodermethod *method = handler_data; if (d->residual_end > d->residual) { seterr(d, "Unexpected EOF"); return false; } + if (d->top->end_ofs != UINT64_MAX) { + seterr(d, "Unexpected EOF inside delimited string"); + return false; + } + // Message ends here. uint64_t end = offset(d); d->top->end_ofs = end; + char dummy; - if (upb_pbdecoderplan_hasjitcode(plan)) { #ifdef UPB_USE_JIT_X64 + const mgroup *group = (const mgroup*)method->group; + if (group->jit_code) { if (d->top != d->stack) d->stack->end_ofs = 0; - upb_pbdecoderplan_jitcode(plan)(closure, handler_data, &dummy, 0); -#endif + group->jit_code(closure, method->code_base.ptr, &dummy, 0); } else { +#endif d->stack->end_ofs = end; - uint32_t *p = d->pc - 1; + const uint32_t *p = d->pc; + // Check the previous bytecode, but guard against beginning. + if (p != method->code_base.ptr) p--; if (getop(*p) == OP_CHECKDELIM) { // Rewind from OP_TAG* to OP_CHECKDELIM. assert(getop(*d->pc) == OP_TAG1 || @@ -716,28 +727,29 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) { d->pc = p; } upb_pbdecoder_decode(closure, handler_data, &dummy, 0); +#ifdef UPB_USE_JIT_X64 } +#endif if (d->call_len != 0) { seterr(d, "Unexpected EOF"); return false; } - return upb_ok(&d->sink->pipeline_->status_); + return true; } -void init(void *_d, upb_pipeline *p) { - UPB_UNUSED(p); - upb_pbdecoder *d = _d; +void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *m, + upb_status *s) { d->limit = &d->stack[UPB_DECODER_MAX_NESTING]; - d->sink = NULL; + upb_bytessink_reset(&d->input_, &m->input_handler_, d); + d->method_ = m; d->callstack[0] = &halt; - // reset() must be called before decoding; this is guaranteed by assert() in - // start(). + d->status = s; + upb_pbdecoder_reset(d); } -void reset(void *_d) { - upb_pbdecoder *d = _d; +void upb_pbdecoder_reset(upb_pbdecoder *d) { d->top = d->stack; d->top->end_ofs = UINT64_MAX; d->bufstart_ofs = 0; @@ -748,21 +760,27 @@ void reset(void *_d) { d->call_len = 1; } -bool upb_pbdecoder_resetsink(upb_pbdecoder *d, upb_sink* sink) { - // TODO(haberman): typecheck the sink, and test whether the decoder is in the - // middle of decoding. Return false if either assumption is violated. - d->sink = sink; - reset(d); - return true; +// Not currently required, but to support outgrowing the static stack we need +// this. +void upb_pbdecoder_uninit(upb_pbdecoder *d) {} + +const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) { + return d->method_; } -const upb_frametype upb_pbdecoder_frametype = { - sizeof(upb_pbdecoder), - init, - NULL, - reset, -}; +bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink* sink) { + // TODO(haberman): do we need to test whether the decoder is already on the + // stack (like calling this from within a callback)? Should we support + // rebinding the output at all? + assert(sink); + if (d->method_->dest_handlers_) { + if (sink->handlers != d->method_->dest_handlers_) + return false; + } + upb_sink_reset(&d->top->sink, sink->handlers, sink->closure); + return true; +} -const upb_frametype *upb_pbdecoder_getframetype() { - return &upb_pbdecoder_frametype; +upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) { + return &d->input_; } diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h index c645688..4529324 100644 --- a/upb/pb/decoder.h +++ b/upb/pb/decoder.h @@ -1,102 +1,447 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2009-2010 Google Inc. See LICENSE for details. + * Copyright (c) 2009-2013 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> * - * upb::Decoder implements a high performance, streaming decoder for protobuf - * data that works by parsing input data one buffer at a time and calling into - * a upb::Handlers. + * upb::pb::Decoder implements a high performance, streaming, resumable decoder + * for the binary protobuf format. */ #ifndef UPB_DECODER_H_ #define UPB_DECODER_H_ +#include "upb/table.int.h" #include "upb/sink.h" +#ifdef __cplusplus +namespace upb { +namespace pb { +class CodeCache; +class Decoder; +class DecoderMethod; +} // namespace pb +} // namespace upb + +typedef upb::pb::CodeCache upb_pbcodecache; +typedef upb::pb::Decoder upb_pbdecoder; +typedef upb::pb::DecoderMethod upb_pbdecodermethod; +#else +struct upb_pbdecoder; +struct upb_pbdecodermethod; +struct upb_pbcodecache; + +typedef struct upb_pbdecoder upb_pbdecoder; +typedef struct upb_pbdecodermethod upb_pbdecodermethod; +typedef struct upb_pbcodecache upb_pbcodecache; +#endif + // The maximum that any submessages can be nested. Matches proto2's limit. -// At the moment this specifies the size of several statically-sized arrays -// and therefore setting it high will cause more memory to be used. Will -// be replaced by a runtime-configurable limit and dynamically-resizing arrays. -// TODO: make this a runtime-settable property of Decoder. +// This specifies the size of the decoder's statically-sized array and therefore +// setting it high will cause the upb::pb::Decoder object to be larger. +// +// If necessary we can add a runtime-settable property to Decoder that allow +// this to be larger than the compile-time setting, but this would add +// complexity, particularly since we would have to decide how/if to give users +// the ability to set a custom memory allocation function. #define UPB_DECODER_MAX_NESTING 64 +// Internal-only struct used by the decoder. +typedef struct { #ifdef __cplusplus -namespace upb { -namespace pb { + private: +#endif + // Space optimization note: we store two pointers here that the JIT + // doesn't need at all; the upb_handlers* inside the sink and + // the dispatch table pointer. We can optimze so that the JIT uses + // smaller stack frames than the interpreter. The only thing we need + // to guarantee is that the fallback routines can find end_ofs. -// Frame type that encapsulates decoder state. -class Decoder; +#ifdef __cplusplus + char sink[sizeof(upb_sink)]; +#else + upb_sink sink; +#endif + // The absolute stream offset of the end-of-frame delimiter. + // Non-delimited frames (groups and non-packed repeated fields) reuse the + // delimiter of their parent, even though the frame may not end there. + // + // NOTE: the JIT stores a slightly different value here for non-top frames. + // It stores the value relative to the end of the enclosed message. But the + // top frame is still stored the same way, which is important for ensuring + // that calls from the JIT into C work correctly. + uint64_t end_ofs; + const uint32_t *base; + uint32_t groupnum; + upb_inttable *dispatch; // Not used by the JIT. +} upb_pbdecoder_frame; -// Resets the sink of the Decoder. This must be called at least once before -// the decoder can be used. It may only be called with the decoder is in a -// state where it was just created or reset. The given sink must be from the -// same pipeline as this decoder. -inline bool ResetDecoderSink(Decoder* d, Sink* sink); +#ifdef __cplusplus -// Gets the handlers suitable for parsing protobuf data according to the given -// destination handlers. The protobuf schema to parse is taken from dest. -inline const upb::Handlers *GetDecoderHandlers(const upb::Handlers *dest, - bool allowjit, - const void *owner); +// Represents the code to parse a protobuf according to a specific schema, +// optionally bound to a set of destination handlers. +class upb::pb::DecoderMethod /* : public upb::RefCounted */ { + public: + // From upb::ReferenceCounted. + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void* from, const void* to) const; + void CheckRef(const void* owner) const; -// Returns true if these handlers represent a upb::pb::Decoder. -bool IsDecoder(const upb::Handlers *h); + // The schema that this method parses. Never NULL. + const MessageDef* schema() const; -// Returns true if IsDecoder(h) and the given handlers have JIT code. -inline bool HasJitCode(const upb::Handlers* h); + // The destination handlers that are statically bound to this method. + // This method is only capable of outputting to a sink that uses these + // handlers. + // + // Will be NULL if this method is not statically bound. + const Handlers* dest_handlers() const; -// Returns the destination handlers if IsDecoder(h), otherwise returns NULL. -const upb::Handlers* GetDestHandlers(const upb::Handlers* h); + // The input handlers for this decoder method. + const BytesHandler* input_handler() const; -} // namespace pb -} // namespace upb + // Whether this method is native. + bool is_native() const; -typedef upb::pb::Decoder upb_pbdecoder; + // Convenience method for generating a DecoderMethod without explicitly + // creating a CodeCache. + static reffed_ptr<const DecoderMethod> NewForDestHandlers( + const upb::Handlers *dest); -extern "C" { + private: + UPB_DISALLOW_POD_OPS(DecoderMethod, upb::pb::DecoderMethod); #else -struct upb_pbdecoder; -typedef struct upb_pbdecoder upb_pbdecoder; +struct upb_pbdecodermethod { +#endif + upb_refcounted base; + + // While compiling, the base is relative in "ofs", after compiling it is + // absolute in "ptr". + union { + uint32_t ofs; // PC offset of method. + void *ptr; // Pointer to bytecode or machine code for this method. + } code_base; + + // The decoder method group to which this method belongs. We own a ref. + // Owning a ref on the entire group is more coarse-grained than is strictly + // necessary; all we truly require is that methods we directly reference + // outlive us, while the group could contain many other messages we don't + // require. But the group represents the messages that were + // allocated+compiled together, so it makes the most sense to free them + // together also. + const upb_refcounted *group; + + // Whether this method is native code or bytecode. + bool is_native_; + + // The handler one calls to invoke this method. + upb_byteshandler input_handler_; + + // The message type that this method is parsing. + const upb_msgdef *schema_; + + // The destination handlers this method is bound to, or NULL if this method + // can be bound to a destination handlers instance at runtime. + // + // If non-NULL, we own a ref. + const upb_handlers *dest_handlers_; + + // The dispatch table layout is: + // [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ] + // + // If wt1 matches, jump to the 48-bit offset. If wt2 matches, lookup + // (UPB_MAX_FIELDNUMBER + fieldnum) and jump there. + // + // We need two wire types because of packed/non-packed compatibility. A + // primitive repeated field can use either wire type and be valid. While we + // could key the table on fieldnum+wiretype, the table would be 8x sparser. + // + // Storing two wire types in the primary value allows us to quickly rule out + // the second wire type without needing to do a separate lookup (this case is + // less common than an unknown field). + upb_inttable dispatch; +}; + +#ifdef __cplusplus + +// A Decoder receives binary protobuf data on its input sink and pushes the +// decoded data to its output sink. +class upb::pb::Decoder { + public: + // Constructs a decoder instance for the given method, which must outlive this + // decoder. Any errors during parsing will be set on the given status, which + // must also outlive this decoder. + Decoder(const DecoderMethod* method, Status* status); + ~Decoder(); + + // Returns the DecoderMethod this decoder is parsing from. + // TODO(haberman): Do users need to be able to rebind this? + const DecoderMethod* method() const; + + // Resets the state of the decoder. + void Reset(); + + // Resets the output sink of the Decoder. + // The given sink must match method()->schema() as well as + // method()->dest_handlers() if the latter is non-NULL. + // + // This must be called at least once before the decoder can be used. It may + // only be called with the decoder is in a state where it was just created or + // reset with pipeline.Reset(). The given sink must be from the same pipeline + // as this decoder. + bool ResetOutput(Sink* sink); + + // The sink on which this decoder receives input. + BytesSink* input(); + + private: + UPB_DISALLOW_COPY_AND_ASSIGN(Decoder); +#else +struct upb_pbdecoder { +#endif + // Our input sink. + upb_bytessink input_; + + // The decoder method we are parsing with (owned). + const upb_pbdecodermethod *method_; + + size_t call_len; + const uint32_t *pc, *last; + + // Current input buffer and its stream offset. + const char *buf, *ptr, *end, *checkpoint; + + // End of the delimited region, relative to ptr, or NULL if not in this buf. + const char *delim_end; + + // End of the delimited region, relative to ptr, or end if not in this buf. + const char *data_end; + + // Overall stream offset of "buf." + uint64_t bufstart_ofs; + + // How many bytes past the end of the user buffer we want to skip. + size_t skip; + + // Buffer for residual bytes not parsed from the previous buffer. + // The maximum number of residual bytes we require is 12; a five-byte + // unknown tag plus an eight-byte value, less one because the value + // is only a partial value. + char residual[12]; + char *residual_end; + + // Stores the user buffer passed to our decode function. + const char *buf_param; + size_t size_param; + +#ifdef UPB_USE_JIT_X64 + // Used momentarily by the generated code to store a value while a user + // function is called. + uint32_t tmp_len; + + const void *saved_rsp; +#endif + + upb_status *status; + + // Our internal stack. + upb_pbdecoder_frame *top, *limit; + upb_pbdecoder_frame stack[UPB_DECODER_MAX_NESTING]; +#ifdef UPB_USE_JIT_X64 + // Each native stack frame needs two pointers, plus we need a few frames for + // the enter/exit trampolines. + const uint32_t *callstack[(UPB_DECODER_MAX_NESTING * 2) + 10]; +#else + const uint32_t *callstack[UPB_DECODER_MAX_NESTING]; +#endif +}; + +#ifdef __cplusplus + +// A class for caching protobuf processing code, whether bytecode for the +// interpreted decoder or machine code for the JIT. +// +// This class is not thread-safe. +class upb::pb::CodeCache { + public: + CodeCache(); + ~CodeCache(); + + // Whether the cache is allowed to generate machine code. Defaults to true. + // There is no real reason to turn it off except for testing or if you are + // having a specific problem with the JIT. + // + // Note that allow_jit = true does not *guarantee* that the code will be JIT + // compiled. If this platform is not supported or the JIT was not compiled + // in, the code may still be interpreted. + bool allow_jit() const; + + // This may only be called when the object is first constructed, and prior to + // any code generation, otherwise returns false and does nothing. + bool set_allow_jit(bool allow); + + // Returns a DecoderMethod that can push data to the given handlers. + // If a suitable method already exists, it will be returned from the cache. + // + // Specifying the destination handlers here allows the DecoderMethod to be + // statically bound to the destination handlers if possible, which can allow + // more efficient decoding. However the returned method may or may not + // actually be statically bound. But in all cases, the returned method can + // push data to the given handlers. + const DecoderMethod *GetDecoderMethodForDestHandlers( + const upb::Handlers *handlers); + + // If/when someone needs to explicitly create a dynamically-bound + // DecoderMethod*, we can add a method to get it here. + + private: + UPB_DISALLOW_COPY_AND_ASSIGN(CodeCache); +#else +struct upb_pbcodecache { #endif + bool allow_jit_; + + // Array of mgroups. + upb_inttable groups; +}; + + +#ifdef __cplusplus +extern "C" { +#endif + +void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *method, + upb_status *status); +void upb_pbdecoder_uninit(upb_pbdecoder *d); +void upb_pbdecoder_reset(upb_pbdecoder *d); +const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d); +bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink *sink); +upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d); -// C API. -const upb_frametype *upb_pbdecoder_getframetype(); -bool upb_pbdecoder_resetsink(upb_pbdecoder *d, upb_sink *sink); -const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest, - bool allowjit, - const void *owner); -bool upb_pbdecoder_isdecoder(const upb_handlers *h); -bool upb_pbdecoder_hasjitcode(const upb_handlers *h); -const upb_handlers *upb_pbdecoder_getdesthandlers(const upb_handlers *h); +void upb_pbdecodermethod_ref(const upb_pbdecodermethod *m, const void *owner); +void upb_pbdecodermethod_unref(const upb_pbdecodermethod *m, const void *owner); +void upb_pbdecodermethod_donateref(const upb_pbdecodermethod *m, + const void *from, const void *to); +void upb_pbdecodermethod_checkref(const upb_pbdecodermethod *m, + const void *owner); +const upb_msgdef *upb_pbdecodermethod_schema(const upb_pbdecodermethod *m); +const upb_handlers *upb_pbdecodermethod_desthandlers( + const upb_pbdecodermethod *m); +const upb_byteshandler *upb_pbdecodermethod_inputhandler( + const upb_pbdecodermethod *m); +bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m); +const upb_pbdecodermethod *upb_pbdecodermethod_newfordesthandlers( + const upb_handlers *dest, const void *owner); -// C++ implementation details. ///////////////////////////////////////////////// +void upb_pbcodecache_init(upb_pbcodecache *c); +void upb_pbcodecache_uninit(upb_pbcodecache *c); +bool upb_pbcodecache_allowjit(const upb_pbcodecache *c); +bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow); +const upb_pbdecodermethod *upb_pbcodecache_getdecodermethodfordesthandlers( + upb_pbcodecache *c, const upb_handlers *handlers); + +#ifdef __cplusplus +} /* extern "C" */ +#endif #ifdef __cplusplus -} // extern "C" namespace upb { +template<> +class Pointer<pb::DecoderMethod> { + public: + explicit Pointer(pb::DecoderMethod* ptr) : ptr_(ptr) {} + operator pb::DecoderMethod*() { return ptr_; } + operator RefCounted*() { return UPB_UPCAST(ptr_); } + private: + pb::DecoderMethod* ptr_; +}; + +template<> +class Pointer<const pb::DecoderMethod> { + public: + explicit Pointer(const pb::DecoderMethod* ptr) : ptr_(ptr) {} + operator const pb::DecoderMethod*() { return ptr_; } + operator const RefCounted*() { return UPB_UPCAST(ptr_); } + private: + const pb::DecoderMethod* ptr_; +}; + namespace pb { -inline bool ResetDecoderSink(Decoder* r, Sink* sink) { - return upb_pbdecoder_resetsink(r, sink); + +inline Decoder::Decoder(const DecoderMethod* m, Status* s) { + upb_pbdecoder_init(this, m, s); +} +inline Decoder::~Decoder() { + upb_pbdecoder_uninit(this); +} +inline const DecoderMethod* Decoder::method() const { + return upb_pbdecoder_method(this); +} +inline void Decoder::Reset() { + upb_pbdecoder_reset(this); +} +inline bool Decoder::ResetOutput(Sink* sink) { + return upb_pbdecoder_resetoutput(this, sink); +} +inline BytesSink* Decoder::input() { + return upb_pbdecoder_input(this); +} + +inline void DecoderMethod::Ref(const void *owner) const { + upb_pbdecodermethod_ref(this, owner); } -inline const upb::Handlers* GetDecoderHandlers(const upb::Handlers* dest, - bool allowjit, - const void* owner) { - return upb_pbdecoder_gethandlers(dest, allowjit, owner); +inline void DecoderMethod::Unref(const void *owner) const { + upb_pbdecodermethod_unref(this, owner); } -inline bool IsDecoder(const upb::Handlers* h) { - return upb_pbdecoder_isdecoder(h); +inline void DecoderMethod::DonateRef(const void *from, const void *to) const { + upb_pbdecodermethod_donateref(this, from, to); } -inline bool HasJitCode(const upb::Handlers* h) { - return upb_pbdecoder_hasjitcode(h); +inline void DecoderMethod::CheckRef(const void *owner) const { + upb_pbdecodermethod_checkref(this, owner); } -inline const upb::Handlers* GetDestHandlers(const upb::Handlers* h) { - return upb_pbdecoder_getdesthandlers(h); +inline const MessageDef* DecoderMethod::schema() const { + return upb_pbdecodermethod_schema(this); } +inline const Handlers* DecoderMethod::dest_handlers() const { + return upb_pbdecodermethod_desthandlers(this); +} +inline const BytesHandler* DecoderMethod::input_handler() const { + return upb_pbdecodermethod_inputhandler(this); +} +inline bool DecoderMethod::is_native() const { + return upb_pbdecodermethod_isnative(this); +} +// static +inline reffed_ptr<const DecoderMethod> DecoderMethod::NewForDestHandlers( + const Handlers *dest) { + const upb_pbdecodermethod *m = + upb_pbdecodermethod_newfordesthandlers(dest, &m); + return reffed_ptr<const DecoderMethod>(m, &m); +} + +inline CodeCache::CodeCache() { + upb_pbcodecache_init(this); +} +inline CodeCache::~CodeCache() { + upb_pbcodecache_uninit(this); +} +inline bool CodeCache::allow_jit() const { + return upb_pbcodecache_allowjit(this); +} +inline bool CodeCache::set_allow_jit(bool allow) { + return upb_pbcodecache_setallowjit(this, allow); +} +inline const DecoderMethod* CodeCache::GetDecoderMethodForDestHandlers( + const upb::Handlers* handlers) { + return upb_pbcodecache_getdecodermethodfordesthandlers(this, handlers); +} + } // namespace pb } // namespace upb -#endif + +#endif // __cplusplus #endif /* UPB_DECODER_H_ */ diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h index 8c8710c..1c10eb3 100644 --- a/upb/pb/decoder.int.h +++ b/upb/pb/decoder.int.h @@ -67,11 +67,46 @@ typedef enum { UPB_INLINE opcode getop(uint32_t instr) { return instr & 0xff; } -const upb_frametype upb_pbdecoder_frametype; +// Method group; represents a set of decoder methods that had their code +// emitted together, and must therefore be freed together. Immutable once +// created. It is possible we may want to expose this to users at some point. +// +// Overall ownership of Decoder objects looks like this: +// +// +----------+ +// | | <---> DecoderMethod +// | method | +// CodeCache ---> | group | <---> DecoderMethod +// | | +// | (mgroup) | <---> DecoderMethod +// +----------+ +typedef struct { + upb_refcounted base; + + // Maps upb_msgdef/upb_handlers -> upb_pbdecodermethod. We own refs on the + // methods. + upb_inttable methods; + + // When we add the ability to link to previously existing mgroups, we'll + // need an array of mgroups we reference here, and own refs on them. + + // The bytecode for our methods, if any exists. Owned by us. + uint32_t *bytecode; + uint32_t *bytecode_end; + +#ifdef UPB_USE_JIT_X64 + // JIT-generated machine code, if any. + upb_string_handlerfunc *jit_code; + // The size of the jit_code (required to munmap()). + size_t jit_size; + char *debug_info; + void *dl; +#endif +} mgroup; // Decoder entry points; used as handlers. -void *upb_pbdecoder_start(void *closure, const void *handler_data, - size_t size_hint); +void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint); +void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint); size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, size_t size); bool upb_pbdecoder_end(void *closure, const void *handler_data); @@ -91,18 +126,12 @@ void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg); // Error messages that are shared between the bytecode and JIT decoders. extern const char *kPbDecoderStackOverflow; -typedef struct _upb_pbdecoderplan upb_pbdecoderplan; - // Access to decoderplan members needed by the decoder. -bool upb_pbdecoderplan_hasjitcode(const upb_pbdecoderplan *p); -uint32_t *upb_pbdecoderplan_codebase(const upb_pbdecoderplan *p); const char *upb_pbdecoder_getopname(unsigned int op); -upb_string_handler *upb_pbdecoderplan_jitcode(const upb_pbdecoderplan *p); - -// JIT entry point. -void upb_pbdecoder_jit(upb_pbdecoderplan *plan); -void upb_pbdecoder_freejit(upb_pbdecoderplan *plan); +// JIT codegen entry point. +void upb_pbdecoder_jit(mgroup *group); +void upb_pbdecoder_freejit(mgroup *group); // A special label that means "do field dispatch for this message and branch to // wherever that takes you." @@ -112,131 +141,4 @@ void upb_pbdecoder_freejit(upb_pbdecoderplan *plan); #define DECODE_MISMATCH -2 // Used only from checktag_slow(). #define DECODE_ENDGROUP -2 // Used only from checkunknown(). -typedef struct { - // The absolute stream offset of the end-of-frame delimiter. - // Non-delimited frames (groups and non-packed repeated fields) reuse the - // delimiter of their parent, even though the frame may not end there. - // - // NOTE: the JIT stores a slightly different value here for non-top frames. - // It stores the value relative to the end of the enclosed message. But the - // innermost frame is still stored the same way, which is important for - // ensuring that calls from the JIT into C work correctly. - uint64_t end_ofs; - uint32_t *base; - uint32_t groupnum; - union { - upb_inttable *dispatch; // Not used by the JIT. - void *closure; // Only used by the JIT. - } u; -} upb_pbdecoder_frame; - -struct upb_pbdecoder { - // Where we push parsed data (not owned). - upb_sink *sink; - - size_t call_len; - uint32_t *pc, *last; - - // Current input buffer and its stream offset. - const char *buf, *ptr, *end, *checkpoint; - - // End of the delimited region, relative to ptr, or NULL if not in this buf. - const char *delim_end; - - // End of the delimited region, relative to ptr, or end if not in this buf. - const char *data_end; - - // Overall stream offset of "buf." - uint64_t bufstart_ofs; - - // How many bytes past the end of the user buffer we want to skip. - size_t skip; - - // Buffer for residual bytes not parsed from the previous buffer. - // The maximum number of residual bytes we require is 12; a five-byte - // unknown tag plus an eight-byte value, less one because the value - // is only a partial value. - char residual[12]; - char *residual_end; - - // Stores the user buffer passed to our decode function. - const char *buf_param; - size_t size_param; - -#ifdef UPB_USE_JIT_X64 - // Used momentarily by the generated code to store a value while a user - // function is called. - uint32_t tmp_len; - - const void *saved_rsp; -#endif - - upb_status *status; - - // Our internal stack. - upb_pbdecoder_frame *top, *limit; - upb_pbdecoder_frame stack[UPB_DECODER_MAX_NESTING]; - uint32_t *callstack[UPB_DECODER_MAX_NESTING * 2]; -}; - -// Data pertaining to a single decoding method/function. -// Each method contains code to parse a single message type. -// If may or may not be bound to a destination handlers object. -typedef struct { - // While compiling, the base is relative in "ofs", after compiling it is - // absolute in "ptr". - union { - uint32_t ofs; // PC offset of method. - const void *ptr; // Pointer to bytecode or machine code for this method. - } base; - - // Whether this method is native code or bytecode. - bool native_code; - - // The message type that this method is parsing. - const upb_msgdef *msg; - - // The destination handlers this method is bound to, or NULL if this method - // can be bound to a destination handlers instance at runtime. - // - // If non-NULL, we own a ref. - const upb_handlers *dest_handlers; - - // The dispatch table layout is: - // [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ] - // - // If wt1 matches, jump to the 48-bit offset. If wt2 matches, lookup - // (UPB_MAX_FIELDNUMBER + fieldnum) and jump there. - // - // We need two wire types because of packed/non-packed compatibility. A - // primitive repeated field can use either wire type and be valid. While we - // could key the table on fieldnum+wiretype, the table would be 8x sparser. - // - // Storing two wire types in the primary value allows us to quickly rule out - // the second wire type without needing to do a separate lookup (this case is - // less common than an unknown field). - upb_inttable dispatch; -} upb_pbdecodermethod; - -struct _upb_pbdecoderplan { - // Pointer to bytecode. - uint32_t *code, *code_end; - - // Maps upb_msgdef*/upb_handlers* -> upb_pbdecodermethod - upb_inttable methods; - - // The method that starts parsing when we first call into the plan. - // Ideally we will remove the idea that any of the methods in the plan - // are special like this, so that any method can be the top-level one. - upb_pbdecodermethod *topmethod; - -#ifdef UPB_USE_JIT_X64 - // JIT-generated machine code (else NULL). - upb_string_handler *jit_code; - size_t jit_size; - char *debug_info; - void *dl; -#endif -}; - #endif // UPB_DECODER_INT_H_ diff --git a/upb/pb/glue.c b/upb/pb/glue.c index 9027e0f..73ef145 100644 --- a/upb/pb/glue.c +++ b/upb/pb/glue.c @@ -10,45 +10,39 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include "upb/bytestream.h" #include "upb/descriptor/reader.h" #include "upb/pb/decoder.h" upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, void *owner, upb_status *status) { // Create handlers. - const upb_handlers *reader_h = upb_descreader_gethandlers(&reader_h); - const upb_handlers *decoder_h = - upb_pbdecoder_gethandlers(reader_h, true, &decoder_h); + const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h); + const upb_pbdecodermethod *decoder_m = + upb_pbdecodermethod_newfordesthandlers(reader_h, &decoder_m); - // Create pipeline. - upb_pipeline pipeline; - upb_pipeline_init(&pipeline, NULL, 0, upb_realloc, NULL); - upb_pipeline_donateref(&pipeline, reader_h, &reader_h); - upb_pipeline_donateref(&pipeline, decoder_h, &decoder_h); + upb_pbdecoder decoder; + upb_descreader reader; - // Create sinks. - upb_sink *reader_sink = upb_pipeline_newsink(&pipeline, reader_h); - upb_sink *decoder_sink = upb_pipeline_newsink(&pipeline, decoder_h); - upb_pbdecoder *d = upb_sink_getobj(decoder_sink); - upb_pbdecoder_resetsink(d, reader_sink); + upb_pbdecoder_init(&decoder, decoder_m, status); + upb_descreader_init(&reader, reader_h, status); + upb_pbdecoder_resetoutput(&decoder, upb_descreader_input(&reader)); // Push input data. - bool ok = upb_bytestream_putstr(decoder_sink, str, len); + bool ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(&decoder)); - if (status) upb_status_copy(status, upb_pipeline_status(&pipeline)); - if (!ok) { - upb_pipeline_uninit(&pipeline); - return NULL; - } + upb_def **ret = NULL; - upb_descreader *r = upb_sink_getobj(reader_sink); - upb_def **defs = upb_descreader_getdefs(r, owner, n); - upb_def **defscopy = malloc(sizeof(upb_def*) * (*n)); - memcpy(defscopy, defs, sizeof(upb_def*) * (*n)); - upb_pipeline_uninit(&pipeline); + if (!ok) goto cleanup; + upb_def **defs = upb_descreader_getdefs(&reader, owner, n); + ret = malloc(sizeof(upb_def*) * (*n)); + memcpy(ret, defs, sizeof(upb_def*) * (*n)); - return defscopy; +cleanup: + upb_pbdecoder_uninit(&decoder); + upb_descreader_uninit(&reader); + upb_handlers_unref(reader_h, &reader_h); + upb_pbdecodermethod_unref(decoder_m, &decoder_m); + return ret; } bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len, diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c index 08eda15..0c12571 100644 --- a/upb/pb/textprinter.c +++ b/upb/pb/textprinter.c @@ -203,40 +203,42 @@ static void onmreg(void *c, upb_handlers *h) { upb_msg_iter i; for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, f, NULL); switch (upb_fielddef_type(f)) { case UPB_TYPE_INT32: - upb_handlers_setint32(h, f, putint32, f, NULL); + upb_handlers_setint32(h, f, putint32, &attr); break; case UPB_TYPE_INT64: - upb_handlers_setint64(h, f, putint64, f, NULL); + upb_handlers_setint64(h, f, putint64, &attr); break; case UPB_TYPE_UINT32: - upb_handlers_setuint32(h, f, putuint32, f, NULL); + upb_handlers_setuint32(h, f, putuint32, &attr); break; case UPB_TYPE_UINT64: - upb_handlers_setuint64(h, f, putuint64, f, NULL); + upb_handlers_setuint64(h, f, putuint64, &attr); break; case UPB_TYPE_FLOAT: - upb_handlers_setfloat(h, f, putfloat, f, NULL); + upb_handlers_setfloat(h, f, putfloat, &attr); break; case UPB_TYPE_DOUBLE: - upb_handlers_setdouble(h, f, putdouble, f, NULL); + upb_handlers_setdouble(h, f, putdouble, &attr); break; case UPB_TYPE_BOOL: - upb_handlers_setbool(h, f, putbool, f, NULL); + upb_handlers_setbool(h, f, putbool, &attr); break; case UPB_TYPE_STRING: case UPB_TYPE_BYTES: - upb_handlers_setstartstr(h, f, startstr, f, NULL); - upb_handlers_setstring(h, f, putstr, f, NULL); - upb_handlers_setendstr(h, f, endstr, f, NULL); + upb_handlers_setstartstr(h, f, startstr, &attr); + upb_handlers_setstring(h, f, putstr, &attr); + upb_handlers_setendstr(h, f, endstr, &attr); break; case UPB_TYPE_MESSAGE: - upb_handlers_setstartsubmsg(h, f, &startsubmsg, f, NULL); - upb_handlers_setendsubmsg(h, f, &endsubmsg, f, NULL); + upb_handlers_setstartsubmsg(h, f, startsubmsg, &attr); + upb_handlers_setendsubmsg(h, f, endsubmsg, &attr); break; case UPB_TYPE_ENUM: - upb_handlers_setint32(h, f, putenum, f, NULL); + upb_handlers_setint32(h, f, putenum, &attr); default: assert(false); break; @@ -246,5 +248,5 @@ static void onmreg(void *c, upb_handlers *h) { const upb_handlers *upb_textprinter_newhandlers(const void *owner, const upb_msgdef *m) { - return upb_handlers_newfrozen(m, NULL, owner, &onmreg, NULL); + return upb_handlers_newfrozen(m, owner, &onmreg, NULL); } |