summaryrefslogtreecommitdiff
path: root/upb/pb
diff options
context:
space:
mode:
Diffstat (limited to 'upb/pb')
-rw-r--r--upb/pb/compile_decoder.c385
-rw-r--r--upb/pb/compile_decoder_x64.c106
-rw-r--r--upb/pb/compile_decoder_x64.dasc92
-rw-r--r--upb/pb/decoder.c144
-rw-r--r--upb/pb/decoder.h461
-rw-r--r--upb/pb/decoder.int.h180
-rw-r--r--upb/pb/glue.c46
-rw-r--r--upb/pb/textprinter.c30
8 files changed, 908 insertions, 536 deletions
diff --git a/upb/pb/compile_decoder.c b/upb/pb/compile_decoder.c
index 200eef5..f96f07a 100644
--- a/upb/pb/compile_decoder.c
+++ b/upb/pb/compile_decoder.c
@@ -11,7 +11,6 @@
#include <stdarg.h>
#include "upb/pb/decoder.int.h"
#include "upb/pb/varint.int.h"
-#include "upb/bytestream.h"
#ifdef UPB_DUMP_BYTECODE
#include <stdio.h>
@@ -20,76 +19,140 @@
#define MAXLABEL 5
#define EMPTYLABEL -1
+static const void *methodkey(const upb_msgdef *md, const upb_handlers *h) {
+ const void *ret = h ? (const void*)h : (const void*)md;
+ assert(ret);
+ return ret;
+}
+
+
+/* mgroup *********************************************************************/
+
+static void freegroup(upb_refcounted *r) {
+ mgroup *g = (mgroup*)r;
+ upb_inttable_uninit(&g->methods);
+#ifdef UPB_USE_JIT_X64
+ upb_pbdecoder_freejit(g);
+#endif
+ free(g->bytecode);
+ free(g);
+}
+
+static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit,
+ void *closure) {
+ const mgroup *g = (const mgroup*)r;
+ upb_inttable_iter i;
+ upb_inttable_begin(&i, &g->methods);
+ for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+ upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
+ visit(r, UPB_UPCAST(method), closure);
+ }
+}
+
+mgroup *newgroup(const void *owner) {
+ mgroup *g = malloc(sizeof(*g));
+ static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup};
+ upb_refcounted_init(UPB_UPCAST(g), &vtbl, owner);
+ upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
+ g->bytecode = NULL;
+ g->bytecode_end = NULL;
+ return g;
+}
+
+
/* upb_pbdecodermethod ********************************************************/
+static void freemethod(upb_refcounted *r) {
+ upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
+ upb_byteshandler_uninit(&method->input_handler_);
+
+ if (method->dest_handlers_) {
+ upb_handlers_unref(method->dest_handlers_, method);
+ }
+
+ upb_inttable_uninit(&method->dispatch);
+ free(method);
+}
+
+static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit,
+ void *closure) {
+ const upb_pbdecodermethod *m = (const upb_pbdecodermethod*)r;
+ visit(r, m->group, closure);
+}
+
static upb_pbdecodermethod *newmethod(const upb_msgdef *msg,
- const upb_handlers *dest_handlers) {
- upb_pbdecodermethod *ret = malloc(sizeof(upb_pbdecodermethod));
- ret->msg = msg;
- ret->dest_handlers = dest_handlers;
- ret->native_code = false; // If we JIT, it will update this later.
+ const upb_handlers *dest_handlers,
+ mgroup *group,
+ const void *key) {
+ static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod};
+ upb_pbdecodermethod *ret = malloc(sizeof(*ret));
+ upb_refcounted_init(UPB_UPCAST(ret), &vtbl, &ret);
+ upb_byteshandler_init(&ret->input_handler_);
+
+ // The method references the group and vice-versa, in a circular reference.
+ upb_ref2(ret, group);
+ upb_ref2(group, ret);
+ upb_inttable_insertptr(&group->methods, key, upb_value_ptr(ret)); // Owns ref
+ upb_refcounted_unref(UPB_UPCAST(ret), &ret);
+
+ ret->group = UPB_UPCAST(group);
+ ret->schema_ = msg;
+ ret->dest_handlers_ = dest_handlers;
+ ret->is_native_ = false; // If we JIT, it will update this later.
upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
- if (ret->dest_handlers) {
- upb_handlers_ref(ret->dest_handlers, ret);
+ if (ret->dest_handlers_) {
+ upb_handlers_ref(ret->dest_handlers_, ret);
}
return ret;
}
-static void freemethod(upb_pbdecodermethod *method) {
- if (method->dest_handlers) {
- upb_handlers_unref(method->dest_handlers, method);
- }
+void upb_pbdecodermethod_ref(const upb_pbdecodermethod *m, const void *owner) {
+ upb_refcounted_ref(UPB_UPCAST(m), owner);
+}
- upb_inttable_uninit(&method->dispatch);
- free(method);
+void upb_pbdecodermethod_unref(const upb_pbdecodermethod *m,
+ const void *owner) {
+ upb_refcounted_unref(UPB_UPCAST(m), owner);
}
+void upb_pbdecodermethod_donateref(const upb_pbdecodermethod *m,
+ const void *from, const void *to) {
+ upb_refcounted_donateref(UPB_UPCAST(m), from, to);
+}
-/* upb_pbdecoderplan **********************************************************/
+void upb_pbdecodermethod_checkref(const upb_pbdecodermethod *m,
+ const void *owner) {
+ upb_refcounted_checkref(UPB_UPCAST(m), owner);
+}
-upb_pbdecoderplan *newplan() {
- upb_pbdecoderplan *p = malloc(sizeof(*p));
- upb_inttable_init(&p->methods, UPB_CTYPE_PTR);
- p->code = NULL;
- p->code_end = NULL;
- return p;
+const upb_msgdef *upb_pbdecodermethod_schema(const upb_pbdecodermethod *m) {
+ return m->schema_;
}
-void freeplan(void *_p) {
- upb_pbdecoderplan *p = _p;
+const upb_handlers *upb_pbdecodermethod_desthandlers(
+ const upb_pbdecodermethod *m) {
+ return m->dest_handlers_;
+}
- upb_inttable_iter i;
- upb_inttable_begin(&i, &p->methods);
- for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
- upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
- freemethod(method);
- }
- upb_inttable_uninit(&p->methods);
- free(p->code);
-#ifdef UPB_USE_JIT_X64
- upb_pbdecoder_freejit(p);
-#endif
- free(p);
+const upb_byteshandler *upb_pbdecodermethod_inputhandler(
+ const upb_pbdecodermethod *m) {
+ return &m->input_handler_;
}
-void set_bytecode_handlers(upb_pbdecoderplan *p, upb_handlers *h) {
- upb_handlers_setstartstr(h, UPB_BYTESTREAM_BYTES, upb_pbdecoder_start, p,
- NULL);
- upb_handlers_setstring(h, UPB_BYTESTREAM_BYTES, upb_pbdecoder_decode, p,
- freeplan);
- upb_handlers_setendstr(h, UPB_BYTESTREAM_BYTES, upb_pbdecoder_end, p, NULL);
+bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
+ return m->is_native_;
}
-static const upb_pbdecoderplan *getdecoderplan(const upb_handlers *h) {
- if (upb_handlers_frametype(h) != &upb_pbdecoder_frametype)
- return NULL;
- upb_selector_t sel;
- if (!upb_handlers_getselector(UPB_BYTESTREAM_BYTES, UPB_HANDLER_STARTSTR,
- &sel)) {
- return NULL;
- }
- return upb_handlers_gethandlerdata(h, sel);
+const upb_pbdecodermethod *upb_pbdecodermethod_newfordesthandlers(
+ const upb_handlers *dest, const void *owner) {
+ upb_pbcodecache cache;
+ upb_pbcodecache_init(&cache);
+ const upb_pbdecodermethod *ret =
+ upb_pbcodecache_getdecodermethodfordesthandlers(&cache, dest);
+ upb_pbdecodermethod_ref(ret, owner);
+ upb_pbcodecache_uninit(&cache);
+ return ret;
}
@@ -97,16 +160,16 @@ static const upb_pbdecoderplan *getdecoderplan(const upb_handlers *h) {
// Data used only at compilation time.
typedef struct {
- upb_pbdecoderplan *plan;
+ mgroup *group;
uint32_t *pc;
int fwd_labels[MAXLABEL];
int back_labels[MAXLABEL];
} compiler;
-static compiler *newcompiler(upb_pbdecoderplan *plan) {
- compiler *ret = malloc(sizeof(compiler));
- ret->plan = plan;
+static compiler *newcompiler(mgroup *group) {
+ compiler *ret = malloc(sizeof(*ret));
+ ret->group = group;
for (int i = 0; i < MAXLABEL; i++) {
ret->fwd_labels[i] = EMPTYLABEL;
ret->back_labels[i] = EMPTYLABEL;
@@ -165,7 +228,7 @@ static void setofs(uint32_t *instruction, int32_t ofs) {
assert(getofs(*instruction) == ofs); // Would fail in cases of overflow.
}
-static uint32_t pcofs(compiler *c) { return c->pc - c->plan->code; }
+static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
// Defines a local label at the current PC location. All previous forward
// references are updated to point to this location. The location is noted
@@ -173,7 +236,7 @@ static uint32_t pcofs(compiler *c) { return c->pc - c->plan->code; }
static void label(compiler *c, unsigned int label) {
assert(label < MAXLABEL);
int val = c->fwd_labels[label];
- uint32_t *codep = (val == EMPTYLABEL) ? NULL : c->plan->code + val;
+ uint32_t *codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
while (codep) {
int ofs = getofs(*codep);
setofs(codep, c->pc - codep - instruction_len(*codep));
@@ -197,7 +260,7 @@ static int32_t labelref(compiler *c, int label) {
return 0;
} else if (label < 0) {
// Backward local label. Relative to the next instruction.
- uint32_t from = (c->pc + 1) - c->plan->code;
+ uint32_t from = (c->pc + 1) - c->group->bytecode;
return c->back_labels[-label] - from;
} else {
// Forward local label: prepend to (possibly-empty) linked list.
@@ -209,14 +272,15 @@ static int32_t labelref(compiler *c, int label) {
}
static void put32(compiler *c, uint32_t v) {
- if (c->pc == c->plan->code_end) {
+ mgroup *g = c->group;
+ if (c->pc == g->bytecode_end) {
int ofs = pcofs(c);
- size_t oldsize = c->plan->code_end - c->plan->code;
+ size_t oldsize = g->bytecode_end - g->bytecode;
size_t newsize = UPB_MAX(oldsize * 2, 64);
// TODO(haberman): handle OOM.
- c->plan->code = realloc(c->plan->code, newsize * sizeof(uint32_t));
- c->plan->code_end = c->plan->code + newsize;
- c->pc = c->plan->code + ofs;
+ g->bytecode = realloc(g->bytecode, newsize * sizeof(uint32_t));
+ g->bytecode_end = g->bytecode + newsize;
+ c->pc = g->bytecode + ofs;
}
*c->pc++ = v;
}
@@ -272,7 +336,7 @@ static void putop(compiler *c, opcode op, ...) {
break;
case OP_CALL: {
const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
- put32(c, op | (method->base.ofs - (pcofs(c) + 1)) << 8);
+ put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
break;
}
case OP_CHECKDELIM:
@@ -349,7 +413,7 @@ static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
const upb_pbdecodermethod *method =
(void *)((char *)dispatch -
offsetof(upb_pbdecodermethod, dispatch));
- fprintf(f, " %s", upb_msgdef_fullname(method->msg));
+ fprintf(f, " %s", upb_msgdef_fullname(method->schema_));
break;
}
case OP_STARTMSG:
@@ -453,7 +517,7 @@ static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
const upb_fielddef *f, int wire_type) {
// Offset is relative to msg base.
- uint64_t ofs = pcofs(c) - method->base.ofs;
+ uint64_t ofs = pcofs(c) - method->code_base.ofs;
uint32_t fn = upb_fielddef_number(f);
upb_inttable *d = &method->dispatch;
upb_value v;
@@ -485,11 +549,11 @@ static void putpush(compiler *c, const upb_fielddef *f) {
static upb_pbdecodermethod *find_submethod(const compiler *c,
const upb_pbdecodermethod *method,
const upb_fielddef *f) {
- const void *key = method->dest_handlers ?
- (const void*)upb_handlers_getsubhandlers(method->dest_handlers, f) :
- (const void*)upb_downcast_msgdef(upb_fielddef_subdef(f));
+ const upb_handlers *sub = method->dest_handlers_ ?
+ upb_handlers_getsubhandlers(method->dest_handlers_, f) : NULL;
+ const void *key = methodkey(upb_downcast_msgdef(upb_fielddef_subdef(f)), sub);
upb_value v;
- bool ok = upb_inttable_lookupptr(&c->plan->methods, key, &v);
+ bool ok = upb_inttable_lookupptr(&c->group->methods, key, &v);
UPB_ASSERT_VAR(ok, ok);
return upb_value_getptr(v);
}
@@ -532,12 +596,12 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
upb_inttable_uninit(&method->dispatch);
upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
- method->base.ofs = pcofs(c);
+ method->code_base.ofs = pcofs(c);
putop(c, OP_SETDISPATCH, &method->dispatch);
putop(c, OP_STARTMSG);
label(c, LABEL_FIELD);
upb_msg_iter i;
- for(upb_msg_begin(&i, method->msg); !upb_msg_done(&i); upb_msg_next(&i)) {
+ for(upb_msg_begin(&i, method->schema_); !upb_msg_done(&i); upb_msg_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
upb_descriptortype_t type = upb_fielddef_descriptortype(f);
@@ -680,17 +744,15 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
// On the other hand, if/when the optimization mentioned below is implemented,
// binding to a upb_handlers can result in *fewer* methods being generated if
// many of the submessages have no handlers bound to them.
-static upb_pbdecodermethod *find_methods(compiler *c,
- const upb_msgdef *md,
- const upb_handlers *h) {
- const void *key = h ? (const void*)h : (const void*)md;
+static void find_methods(compiler *c, const upb_msgdef *md,
+ const upb_handlers *h) {
+ const void *key = methodkey(md, h);
upb_value v;
- if (upb_inttable_lookupptr(&c->plan->methods, key, &v))
- return upb_value_getptr(v);
- upb_pbdecodermethod *method = newmethod(md, h);
- // Takes ownership of method.
- upb_inttable_insertptr(&c->plan->methods, key, upb_value_ptr(method));
+ if (upb_inttable_lookupptr(&c->group->methods, key, &v))
+ return;
+ newmethod(md, h, c->group, key);
+ // Find submethods.
upb_msg_iter i;
for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
@@ -706,24 +768,34 @@ static upb_pbdecodermethod *find_methods(compiler *c,
find_methods(c, upb_downcast_msgdef(upb_fielddef_subdef(f)), sub_h);
}
-
- return method;
}
-// (Re-)compile bytecode for all messages in "msgs", ensuring that the code
-// for "md" is emitted first. Overwrites any existing bytecode in "c".
+// (Re-)compile bytecode for all messages in "msgs."
+// Overwrites any existing bytecode in "c".
static void compile_methods(compiler *c) {
// Start over at the beginning of the bytecode.
- c->pc = c->plan->code;
- compile_method(c, c->plan->topmethod);
+ c->pc = c->group->bytecode;
upb_inttable_iter i;
- upb_inttable_begin(&i, &c->plan->methods);
+ upb_inttable_begin(&i, &c->group->methods);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
- if (method != c->plan->topmethod) {
- compile_method(c, method);
- }
+ compile_method(c, method);
+ }
+}
+
+static void set_bytecode_handlers(mgroup *g) {
+ upb_inttable_iter i;
+ upb_inttable_begin(&i, &g->methods);
+ for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+ upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
+
+ m->code_base.ptr = g->bytecode + m->code_base.ofs;
+
+ upb_byteshandler *h = &m->input_handler_;
+ upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
+ upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
+ upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
}
}
@@ -732,17 +804,13 @@ static void compile_methods(compiler *c) {
#ifdef UPB_USE_JIT_X64
-static void sethandlers(upb_pbdecoderplan *p, upb_handlers *h, bool allowjit) {
- p->jit_code = NULL;
-
+static void sethandlers(mgroup *g, bool allowjit) {
+ g->jit_code = NULL;
if (allowjit) {
- upb_pbdecoder_jit(p); // Compile byte-code into machine code.
- upb_handlers_setstartstr(h, UPB_BYTESTREAM_BYTES, upb_pbdecoder_start, p,
- freeplan);
- upb_handlers_setstring(h, UPB_BYTESTREAM_BYTES, p->jit_code, NULL, NULL);
- upb_handlers_setendstr(h, UPB_BYTESTREAM_BYTES, upb_pbdecoder_end, p, NULL);
+ // Compile byte-code into machine code, create handlers.
+ upb_pbdecoder_jit(g);
} else {
- set_bytecode_handlers(p, h);
+ set_bytecode_handlers(g);
}
}
@@ -754,10 +822,10 @@ static bool bind_dynamic(bool allowjit) {
#else // UPB_USE_JIT_X64
-static void sethandlers(upb_pbdecoderplan *p, upb_handlers *h, bool allowjit) {
+static void sethandlers(mgroup *g, bool allowjit) {
// No JIT compiled in; use bytecode handlers unconditionally.
UPB_UNUSED(allowjit);
- set_bytecode_handlers(p, h);
+ set_bytecode_handlers(g);
}
static bool bind_dynamic(bool allowjit) {
@@ -769,56 +837,16 @@ static bool bind_dynamic(bool allowjit) {
#endif // UPB_USE_JIT_X64
-/* Public interface ***********************************************************/
-
-bool upb_pbdecoder_isdecoder(const upb_handlers *h) {
- return getdecoderplan(h) != NULL;
-}
-
-bool upb_pbdecoderplan_hasjitcode(const upb_pbdecoderplan *p) {
-#ifdef UPB_USE_JIT_X64
- return p->jit_code != NULL;
-#else
- UPB_UNUSED(p);
- return false;
-#endif
-}
-
-bool upb_pbdecoder_hasjitcode(const upb_handlers *h) {
- const upb_pbdecoderplan *p = getdecoderplan(h);
- if (!p) return false;
- return upb_pbdecoderplan_hasjitcode(p);
-}
-
-uint32_t *upb_pbdecoderplan_codebase(const upb_pbdecoderplan *p) {
- return p->code;
-}
-
-upb_string_handler *upb_pbdecoderplan_jitcode(const upb_pbdecoderplan *p) {
-#ifdef UPB_USE_JIT_X64
- return p->jit_code;
-#else
- UPB_UNUSED(p);
- assert(false);
- return NULL;
-#endif
-}
-
-const upb_handlers *upb_pbdecoder_getdesthandlers(const upb_handlers *h) {
- const upb_pbdecoderplan *p = getdecoderplan(h);
- if (!p) return NULL;
- return p->topmethod->dest_handlers;
-}
-
-const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest,
- bool allowjit,
- const void *owner) {
+// TODO(haberman): allow this to be constructed for an arbitrary set of dest
+// handlers and other mgroups (but verify we have a transitive closure).
+const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit,
+ const void *owner) {
UPB_UNUSED(allowjit);
assert(upb_handlers_isfrozen(dest));
const upb_msgdef *md = upb_handlers_msgdef(dest);
- upb_pbdecoderplan *p = newplan();
- compiler *c = newcompiler(p);
+ mgroup *g = newgroup(owner);
+ compiler *c = newcompiler(g);
if (bind_dynamic(allowjit)) {
// If binding dynamically, remove the reference against destination
@@ -826,32 +854,75 @@ const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest,
dest = NULL;
}
- p->topmethod = find_methods(c, md, dest);
+ find_methods(c, md, dest);
// We compile in two passes:
// 1. all messages are assigned relative offsets from the beginning of the
- // bytecode (saved in method->base).
+ // bytecode (saved in method->code_base).
// 2. forwards OP_CALL instructions can be correctly linked since message
// offsets have been previously assigned.
//
// Could avoid the second pass by linking OP_CALL instructions somehow.
compile_methods(c);
compile_methods(c);
- p->code_end = c->pc;
+ g->bytecode_end = c->pc;
+ freecompiler(c);
#ifdef UPB_DUMP_BYTECODE
FILE *f = fopen("/tmp/upb-bytecode", "wb");
assert(f);
- dumpbc(p->code, p->code_end, stderr);
- dumpbc(p->code, p->code_end, f);
+ dumpbc(g->bytecode, g->bytecode_end, stderr);
+ dumpbc(g->bytecode, g->bytecode_end, f);
fclose(f);
#endif
- upb_handlers *h = upb_handlers_new(
- UPB_BYTESTREAM, &upb_pbdecoder_frametype, owner);
- sethandlers(p, h, allowjit);
+ sethandlers(g, allowjit);
+ return g;
+}
- freecompiler(c);
- return h;
+/* upb_pbcodecache ************************************************************/
+
+void upb_pbcodecache_init(upb_pbcodecache *c) {
+ upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR);
+ c->allow_jit_ = true;
+}
+
+void upb_pbcodecache_uninit(upb_pbcodecache *c) {
+ upb_inttable_iter i;
+ upb_inttable_begin(&i, &c->groups);
+ for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+ const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i));
+ upb_refcounted_unref(UPB_UPCAST(group), c);
+ }
+ upb_inttable_uninit(&c->groups);
+}
+
+bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) {
+ return c->allow_jit_;
+}
+
+bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
+ if (upb_inttable_count(&c->groups) > 0)
+ return false;
+ c->allow_jit_ = allow;
+ return true;
+}
+
+const upb_pbdecodermethod *upb_pbcodecache_getdecodermethodfordesthandlers(
+ upb_pbcodecache *c, const upb_handlers *handlers) {
+ // Right now we build a new DecoderMethod every time.
+ // TODO(haberman): properly cache methods by their true key.
+ const mgroup *g = mgroup_new(handlers, c->allow_jit_, c);
+ upb_inttable_push(&c->groups, upb_value_constptr(g));
+
+ const upb_msgdef *md = upb_handlers_msgdef(handlers);
+ if (bind_dynamic(c->allow_jit_)) {
+ handlers = NULL;
+ }
+
+ upb_value v;
+ bool ok = upb_inttable_lookupptr(&g->methods, methodkey(md, handlers), &v);
+ UPB_ASSERT_VAR(ok, ok);
+ return upb_value_getptr(v);
}
diff --git a/upb/pb/compile_decoder_x64.c b/upb/pb/compile_decoder_x64.c
index 2e8132e..429f690 100644
--- a/upb/pb/compile_decoder_x64.c
+++ b/upb/pb/compile_decoder_x64.c
@@ -34,13 +34,13 @@
#define DECODE_EOF -3
typedef struct {
- upb_pbdecoderplan *plan;
+ mgroup *group;
uint32_t *pc;
// This pointer is allocated by dasm_init() and freed by dasm_free().
struct dasm_State *dynasm;
- // Maps bytecode pc location -> pclabel.
+ // Maps arbitrary void* -> pclabel.
upb_inttable pclabels;
upb_inttable pcdefined;
@@ -57,7 +57,6 @@ typedef struct {
// Used by DynASM to store globals.
void **globals;
- bool usefp;
bool chkret;
} jitcompiler;
@@ -65,16 +64,16 @@ typedef struct {
static int pclabel(jitcompiler *jc, const void *here);
static int define_pclabel(jitcompiler *jc, const void *here);
static void asmlabel(jitcompiler *jc, const char *fmt, ...);
+static int pcofs(jitcompiler* jc);
#include "dynasm/dasm_proto.h"
#include "dynasm/dasm_x86.h"
#include "upb/pb/compile_decoder_x64.h"
-static jitcompiler *newjitcompiler(upb_pbdecoderplan *plan) {
+static jitcompiler *newjitcompiler(mgroup *group) {
jitcompiler *jc = malloc(sizeof(jitcompiler));
- jc->usefp = false;
jc->chkret = false;
- jc->plan = plan;
+ jc->group = group;
jc->pclabel_count = 0;
jc->lastlabelofs = -1;
upb_inttable_init(&jc->pclabels, UPB_CTYPE_UINT32);
@@ -123,13 +122,22 @@ static int define_pclabel(jitcompiler *jc, const void *here) {
return pclabel(jc, here);
}
+// Returns a bytecode pc offset relative to the beginning of the group's code.
+static int pcofs(jitcompiler *jc) {
+ return jc->pc - jc->group->bytecode;
+}
+
static void upb_reg_jit_gdb(jitcompiler *jc);
+static int getpclabel(jitcompiler *jc, const void *target) {
+ return dasm_getpclabel(jc, pclabel(jc, target));
+}
+
// Given a pcofs relative to method, returns the machine code offset for it
// (relative to the beginning of the machine code).
int nativeofs(jitcompiler *jc, const upb_pbdecodermethod *method, int pcofs) {
- void *target = jc->plan->code + method->base.ofs + pcofs;
- return dasm_getpclabel(jc, pclabel(jc, target));
+ void *target = jc->group->bytecode + method->code_base.ofs + pcofs;
+ return getpclabel(jc, target);
}
// Given a pcofs relative to this method's base, returns a machine code offset
@@ -137,7 +145,7 @@ int nativeofs(jitcompiler *jc, const upb_pbdecodermethod *method, int pcofs) {
// machine code base for dispatch table lookups).
uint32_t dispatchofs(jitcompiler *jc, const upb_pbdecodermethod *method,
int pcofs) {
- int ofs1 = dasm_getpclabel(jc, pclabel(jc, method->dispatch.array));
+ int ofs1 = getpclabel(jc, method->dispatch.array);
int ofs2 = nativeofs(jc, method, pcofs);
assert(ofs1 > 0);
assert(ofs2 > 0);
@@ -149,9 +157,11 @@ uint32_t dispatchofs(jitcompiler *jc, const upb_pbdecodermethod *method,
// Rewrites the dispatch tables into machine code offsets.
static void patchdispatch(jitcompiler *jc) {
upb_inttable_iter i;
- upb_inttable_begin(&i, &jc->plan->methods);
+ upb_inttable_begin(&i, &jc->group->methods);
for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
+ method->is_native_ = true;
+
upb_inttable *dispatch = &method->dispatch;
upb_inttable_iter i2;
upb_inttable_begin(&i2, dispatch);
@@ -169,11 +179,20 @@ static void patchdispatch(jitcompiler *jc) {
} else {
// Secondary slot. Since we have 64 bits for the value, we use an
// absolute offset.
- newval = (uint64_t)(jc->plan->jit_code + nativeofs(jc, method, val));
+ newval = (uint64_t)(jc->group->jit_code + nativeofs(jc, method, val));
}
bool ok = upb_inttable_replace(dispatch, key, upb_value_uint64(newval));
UPB_ASSERT_VAR(ok, ok);
}
+
+ // Set this only *after* we have patched the offsets (nativeofs() above
+ // reads this).
+ method->code_base.ptr = jc->group->jit_code + getpclabel(jc, method);
+
+ upb_byteshandler *h = &method->input_handler_;
+ upb_byteshandler_setstartstr(h, upb_pbdecoder_startjit, NULL);
+ upb_byteshandler_setstring(h, jc->group->jit_code, method->code_base.ptr);
+ upb_byteshandler_setendstr(h, upb_pbdecoder_end, method);
}
}
@@ -202,9 +221,10 @@ static void load_so(jitcompiler *jc) {
FILE *f = fopen("/tmp/upb-jit-code.s", "w");
if (f) {
+ uint8_t *jit_code = (uint8_t*)jc->group->jit_code;
fputs(" .text\n\n", f);
size_t linelen = 0;
- for (size_t i = 0; i < jc->plan->jit_size; i++) {
+ for (size_t i = 0; i < jc->group->jit_size; i++) {
upb_value v;
if (upb_inttable_lookup(&mclabels, i, &v)) {
const char *label = upb_value_getptr(v);
@@ -223,23 +243,25 @@ static void load_so(jitcompiler *jc) {
fputs("\n", f);
fclose(f);
} else {
- fprintf(stderr, "Couldn't open /tmp/upb-jit-code.s for writing/\n");
+ fprintf(stderr, "Couldn't open /tmp/upb-jit-code.s for writing\n");
+ abort();
}
// TODO: racy
if (system("gcc -shared -o /tmp/upb-jit-code.so /tmp/upb-jit-code.s") != 0) {
+ fprintf(stderr, "Error compiling upb-jit-code.s\n");
abort();
}
- jc->dl = dlopen("/tmp/upb-jit-code.so", RTLD_LAZY);
- if (!jc->dl) {
+ jc->group->dl = dlopen("/tmp/upb-jit-code.so", RTLD_LAZY);
+ if (!jc->group->dl) {
fprintf(stderr, "Couldn't dlopen(): %s\n", dlerror());
abort();
}
- munmap(jit_code, jc->plan->jit_size);
- jit_code = dlsym(jc->dl, "X.enterjit");
- if (!jit_code) {
+ munmap(jc->group->jit_code, jc->group->jit_size);
+ jc->group->jit_code = dlsym(jc->group->dl, "X.enterjit");
+ if (!jc->group->jit_code) {
fprintf(stderr, "Couldn't find enterjit sym\n");
abort();
}
@@ -248,45 +270,51 @@ static void load_so(jitcompiler *jc) {
}
#endif
-void upb_pbdecoder_jit(upb_pbdecoderplan *plan) {
- plan->debug_info = NULL;
- plan->dl = NULL;
+void upb_pbdecoder_jit(mgroup *group) {
+ group->debug_info = NULL;
+ group->dl = NULL;
- jitcompiler *jc = newjitcompiler(plan);
+ assert(group->bytecode);
+ jitcompiler *jc = newjitcompiler(group);
emit_static_asm(jc);
jitbytecode(jc);
- int dasm_status = dasm_link(jc, &jc->plan->jit_size);
+ int dasm_status = dasm_link(jc, &jc->group->jit_size);
if (dasm_status != DASM_S_OK) {
fprintf(stderr, "DynASM error; returned status: 0x%08x\n", dasm_status);
abort();
}
- char *jit_code = mmap(NULL, jc->plan->jit_size, PROT_READ | PROT_WRITE,
+ char *jit_code = mmap(NULL, jc->group->jit_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
dasm_encode(jc, jit_code);
- mprotect(jit_code, jc->plan->jit_size, PROT_EXEC | PROT_READ);
+ mprotect(jit_code, jc->group->jit_size, PROT_EXEC | PROT_READ);
upb_reg_jit_gdb(jc);
+ jc->group->jit_code = (upb_string_handlerfunc *)jit_code;
#ifdef UPB_JIT_LOAD_SO
load_so(jc);
#endif
- jc->plan->jit_code = (upb_string_handler *)jit_code;
patchdispatch(jc);
+
freejitcompiler(jc);
+
+ // Now the bytecode is no longer needed.
+ free(group->bytecode);
+ group->bytecode = NULL;
}
-void upb_pbdecoder_freejit(upb_pbdecoderplan *plan) {
- if (!plan->jit_code) return;
- if (plan->dl) {
+void upb_pbdecoder_freejit(mgroup *group) {
+ if (!group->jit_code) return;
+ if (group->dl) {
#ifdef UPB_JIT_LOAD_SO
- dlclose(plan->dl);
+ dlclose(group->dl);
#endif
} else {
- munmap(plan->jit_code, plan->jit_size);
+ munmap(group->jit_code, group->jit_size);
}
- free(plan->debug_info);
+ free(group->debug_info);
// TODO: unregister GDB JIT interface.
}
@@ -338,15 +366,15 @@ void __attribute__((noinline)) __jit_debug_register_code() {
static void upb_reg_jit_gdb(jitcompiler *jc) {
// Create debug info.
size_t elf_len = sizeof(upb_jit_debug_elf_file);
- jc->plan->debug_info = malloc(elf_len);
- memcpy(jc->plan->debug_info, upb_jit_debug_elf_file, elf_len);
- uint64_t *p = (void *)jc->plan->debug_info;
- for (; (void *)(p + 1) <= (void *)jc->plan->debug_info + elf_len; ++p) {
+ jc->group->debug_info = malloc(elf_len);
+ memcpy(jc->group->debug_info, upb_jit_debug_elf_file, elf_len);
+ uint64_t *p = (void *)jc->group->debug_info;
+ for (; (void *)(p + 1) <= (void *)jc->group->debug_info + elf_len; ++p) {
if (*p == 0x12345678) {
- *p = (uintptr_t)jc->plan->jit_code;
+ *p = (uintptr_t)jc->group->jit_code;
}
if (*p == 0x321) {
- *p = jc->plan->jit_size;
+ *p = jc->group->jit_size;
}
}
@@ -355,7 +383,7 @@ static void upb_reg_jit_gdb(jitcompiler *jc) {
e->next_entry = __jit_debug_descriptor.first_entry;
e->prev_entry = NULL;
if (e->next_entry) e->next_entry->prev_entry = e;
- e->symfile_addr = jc->plan->debug_info;
+ e->symfile_addr = jc->group->debug_info;
e->symfile_size = elf_len;
__jit_debug_descriptor.first_entry = e;
__jit_debug_descriptor.relevant_entry = e;
diff --git a/upb/pb/compile_decoder_x64.dasc b/upb/pb/compile_decoder_x64.dasc
index 0bddade..fec822a 100644
--- a/upb/pb/compile_decoder_x64.dasc
+++ b/upb/pb/compile_decoder_x64.dasc
@@ -44,7 +44,7 @@
| sub DELIMEND, DECODER->buf
| add DELIMEND, DECODER->bufstart_ofs
| mov FRAME->end_ofs, DELIMEND
-| mov FRAME->u.closure, CLOSURE
+| mov FRAME->sink.closure, CLOSURE
|.endmacro
|
| // Loads unsynced registers from memory back into registers.
@@ -52,7 +52,7 @@
| mov FRAME, DECODER->top
| mov PTR, DECODER->ptr
| mov DATAEND, DECODER->data_end
-| mov CLOSURE, FRAME->u.closure
+| mov CLOSURE, FRAME->sink.closure
| mov DELIMEND, FRAME->end_ofs
| sub DELIMEND, DECODER->bufstart_ofs
| add DELIMEND, DECODER->buf
@@ -145,7 +145,7 @@ static void asmlabel(jitcompiler *jc, const char *fmt, ...) {
char *str = malloc(len + 1); // + 1 for NULL terminator.
if (!str) exit(1);
- int written = vsnprintf(str, len, fmt, args);
+ int written = vsnprintf(str, len + 1, fmt, args);
va_end(args);
UPB_ASSERT_VAR(written, written == len);
@@ -155,6 +155,10 @@ static void asmlabel(jitcompiler *jc, const char *fmt, ...) {
upb_inttable_insert(&jc->asmlabels, label, upb_value_ptr(str));
}
+static upb_func *gethandler(const upb_handlers *h, upb_selector_t sel) {
+ return h ? upb_handlers_gethandler(h, sel) : NULL;
+}
+
// Emit static assembly routines; code that does not vary based on the message
// schema. Since it's not input-dependent, we only need one single copy of it.
// For the moment we generate a single copy per generated handlers. Eventually
@@ -174,9 +178,6 @@ static void emit_static_asm(jitcompiler *jc) {
|->enterjit:
|1:
| push rbp
- if (jc->usefp) {
- | mov rbp, rsp
- }
| push r15
| push r14
| push r13
@@ -189,9 +190,12 @@ static void emit_static_asm(jitcompiler *jc) {
| // 16-byte stack alignment.
| sub rsp, 8
|
+ | mov rbx, ARG2_64 // Preserve JIT method.
+ |
| mov DECODER, rdi
| callp upb_pbdecoder_resume // Same args as us; reuse regs.
| mov DECODER->saved_rsp, rsp
+ | mov rax, rbx
| load_regs
|
| // Test whether we have a saved stack to resume.
@@ -199,7 +203,7 @@ static void emit_static_asm(jitcompiler *jc) {
| test ARG3_64, ARG3_64
| jnz >1
|
- | call =>pclabel(jc, jc->plan->topmethod)
+ | call rax
|
| mov rax, DECODER->size_param
| mov qword DECODER->call_len, 0
@@ -265,7 +269,7 @@ static void emit_static_asm(jitcompiler *jc) {
asmlabel(jc, "pushlendelim");
|->pushlendelim:
|1:
- | mov FRAME->u.closure, CLOSURE
+ | mov FRAME->sink.closure, CLOSURE
| mov DECODER->checkpoint, PTR
| dv32
| mov rcx, DELIMEND
@@ -511,7 +515,7 @@ static void jitprimitive(jitcompiler *jc, opcode op,
static char fastpath_bytes[] = { 1, 1, 4, 8 };
const valtype_t type = types[op];
const int fastbytes = fastpath_bytes[type];
- upb_func *handler = upb_handlers_gethandler(h, sel);
+ upb_func *handler = gethandler(h, sel);
if (handler) {
|1:
@@ -678,12 +682,20 @@ static void jitdispatch(jitcompiler *jc,
|=>define_pclabel(jc, &method->dispatch):
|1:
// Decode the field tag.
- // OPT: inline two bytes of varint decoding for big messages.
| mov aword DECODER->checkpoint, PTR
- | chkeob 1, >6
+ | chkeob 2, >6
| movzx edx, byte [PTR]
| test dl, dl
- | jns >7
+ | jns >7 // Jump if first byte has no continuation bit.
+ | movzx ecx, byte [PTR + 1]
+ | test cl, cl
+ | js >6 // Jump if second byte has continuation bit.
+ | // Confirmed two-byte varint.
+ | shl ecx, 7
+ | and edx, 0x7f
+ | or edx, ecx
+ | add PTR, 2
+ | jmp >8
|6:
| call ->decode_unknown_tag_fallback
| test eax, eax // Hit DELIMEND?
@@ -848,15 +860,14 @@ static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs,
static void jitbytecode(jitcompiler *jc) {
upb_pbdecodermethod *method = NULL;
const upb_handlers *h = NULL;
- for (jc->pc = jc->plan->code; jc->pc < jc->plan->code_end; ) {
+ for (jc->pc = jc->group->bytecode; jc->pc < jc->group->bytecode_end; ) {
int32_t instr = *jc->pc;
opcode op = instr & 0xff;
uint32_t arg = instr >> 8;
int32_t longofs = arg;
if (op != OP_STARTMSG && op != OP_SETDISPATCH) {
- asmlabel(jc, "0x%lx.%s", jc->pc - jc->plan->code,
- upb_pbdecoder_getopname(op));
+ asmlabel(jc, "0x%lx.%s", pcofs(jc), upb_pbdecoder_getopname(op));
}
// TODO: optimize this to only define pclabels that are actually used.
|=>define_pclabel(jc, jc->pc):
@@ -865,16 +876,11 @@ static void jitbytecode(jitcompiler *jc) {
switch (op) {
case OP_STARTMSG: {
// This opcode serves as a function prolouge also.
- const char *msgname = upb_msgdef_fullname(method->msg);
- asmlabel(jc, "parse.%s", msgname);
+ const char *msgname = upb_msgdef_fullname(method->schema_);
+ asmlabel(jc, "0x%lx.parse.%s", pcofs(jc), msgname);
|=>define_pclabel(jc, method):
- if (jc->usefp) {
- | push rbp
- | mov rbp, rsp
- } else {
- | sub rsp, 8
- }
- upb_func *startmsg = upb_handlers_gethandler(h, UPB_STARTMSG_SELECTOR);
+ | sub rsp, 8
+ upb_func *startmsg = gethandler(h, UPB_STARTMSG_SELECTOR);
if (startmsg) {
// bool startmsg(void *closure, const void *hd)
|1:
@@ -892,7 +898,7 @@ static void jitbytecode(jitcompiler *jc) {
}
case OP_ENDMSG: {
// This opcode serves as a function epiloue also.
- upb_func *endmsg = upb_handlers_gethandler(h, UPB_ENDMSG_SELECTOR);
+ upb_func *endmsg = gethandler(h, UPB_ENDMSG_SELECTOR);
|9:
if (endmsg) {
// bool endmsg(void *closure, const void *hd, upb_status *status)
@@ -901,11 +907,7 @@ static void jitbytecode(jitcompiler *jc) {
| mov ARG3_64, DECODER->status
| callp endmsg
}
- if (jc->usefp) {
- | pop rbp
- } else {
- | add rsp, 8
- }
+ | add rsp, 8
| ret
break;
}
@@ -917,10 +919,13 @@ static void jitbytecode(jitcompiler *jc) {
// &method->dispatch; we want to go backwards and recover method.
method =
(void*)((char*)dispatch - offsetof(upb_pbdecodermethod, dispatch));
- h = method->dest_handlers;
- assert(h); // We only support statically-bound handlers for now.
- const char *msgname = upb_msgdef_fullname(method->msg);
- asmlabel(jc, "dispatch.%s", msgname);
+ // May be NULL, in which case no handlers for this message will be found.
+ // OPT: we should do better by completely skipping the message in this
+ // case instead of parsing it field by field. We should also do the skip
+ // in the containing message's code.
+ h = method->dest_handlers_;
+ const char *msgname = upb_msgdef_fullname(method->schema_);
+ asmlabel(jc, "0x%lx.dispatch.%s", pcofs(jc), msgname);
jitdispatch(jc, method);
break;
}
@@ -942,7 +947,7 @@ static void jitbytecode(jitcompiler *jc) {
case OP_STARTSEQ:
case OP_STARTSUBMSG:
case OP_STARTSTR: {
- upb_func *start = upb_handlers_gethandler(h, arg);
+ upb_func *start = gethandler(h, arg);
if (start) {
// void *startseq(void *closure, const void *hd)
// void *startsubmsg(void *closure, const void *hd)
@@ -972,7 +977,7 @@ static void jitbytecode(jitcompiler *jc) {
case OP_ENDSEQ:
case OP_ENDSUBMSG:
case OP_ENDSTR: {
- upb_func *end = upb_handlers_gethandler(h, arg);
+ upb_func *end = gethandler(h, arg);
if (end) {
// bool endseq(void *closure, const void *hd)
// bool endsubmsg(void *closure, const void *hd)
@@ -995,7 +1000,7 @@ static void jitbytecode(jitcompiler *jc) {
break;
}
case OP_STRING: {
- upb_func *str = upb_handlers_gethandler(h, arg);
+ upb_func *str = gethandler(h, arg);
| cmp PTR, DELIMEND
| je >4
|1:
@@ -1028,7 +1033,13 @@ static void jitbytecode(jitcompiler *jc) {
break;
}
case OP_PUSHTAGDELIM:
- | mov FRAME->u.closure, CLOSURE
+ | mov FRAME->sink.closure, CLOSURE
+ | // This shouldn't need to be read, because tag-delimited fields
+ | // shouldn't have an OP_SETDELIM after them. But for the moment
+ | // non-packed repeated fields do OP_SETDELIM so they can share more
+ | // code with the packed code-path. If this is changed later, this
+ | // store can be removed.
+ | mov qword FRAME->end_ofs, 0
| add FRAME, sizeof(upb_pbdecoder_frame)
| cmp FRAME, DECODER->limit
| je ->err
@@ -1038,13 +1049,14 @@ static void jitbytecode(jitcompiler *jc) {
break;
case OP_POP:
| sub FRAME, sizeof(upb_pbdecoder_frame)
- | mov CLOSURE, FRAME->u.closure
+ | mov CLOSURE, FRAME->sink.closure
break;
case OP_SETDELIM:
// OPT: experiment with testing vs old offset to optimize away.
| mov DATAEND, DECODER->end
| add DELIMEND, FRAME->end_ofs
- | jc >1
+ | cmp DELIMEND, DECODER->buf
+ | jb >1
| cmp DELIMEND, DATAEND
| ja >1 // OPT: try cmov.
| mov DATAEND, DELIMEND
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index 70862d5..6fd6576 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -10,7 +10,6 @@
#include <stdarg.h>
#include <stddef.h>
#include <stdlib.h>
-#include "upb/bytestream.h"
#include "upb/pb/decoder.int.h"
#include "upb/pb/varint.int.h"
@@ -70,7 +69,7 @@ static bool in_residual_buf(upb_pbdecoder *d, const char *p);
static void seterr(upb_pbdecoder *d, const char *msg) {
// TODO(haberman): encapsulate this access to pipeline->status, but not sure
// exactly what that interface should look like.
- upb_status_seterrliteral(&d->sink->pipeline_->status_, msg);
+ upb_status_seterrmsg(d->status, msg);
}
void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
@@ -377,7 +376,7 @@ static bool push(upb_pbdecoder *d, uint64_t end) {
fr++;
fr->end_ofs = end;
- fr->u.dispatch = NULL;
+ fr->dispatch = NULL;
fr->groupnum = -1;
d->top = fr;
return true;
@@ -441,7 +440,7 @@ int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, uint32_t fieldnum,
}
static int32_t dispatch(upb_pbdecoder *d) {
- upb_inttable *dispatch = d->top->u.dispatch;
+ upb_inttable *dispatch = d->top->dispatch;
// Decode tag.
uint32_t tag;
@@ -478,16 +477,23 @@ static int32_t dispatch(upb_pbdecoder *d) {
}
}
+// Callers know that the stack is more than one deep because the opcodes that
+// call this only occur after PUSH operations.
+upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
+ assert(d->top != d->stack);
+ return d->top - 1;
+}
+
/* The main decoding loop *****************************************************/
size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
size_t size) {
upb_pbdecoder *d = closure;
- const upb_pbdecoderplan *p = hd;
+ const mgroup *group = hd;
assert(buf);
upb_pbdecoder_resume(d, NULL, buf, size);
- UPB_UNUSED(p);
+ UPB_UNUSED(group);
#define VMCASE(op, code) \
case op: { code; if (consumes_input(op)) checkpoint(d); break; }
@@ -495,7 +501,7 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
VMCASE(OP_PARSE_ ## type, { \
ctype val; \
CHECK_RETURN(decode_ ## wt(d, &val)); \
- upb_sink_put ## name(d->sink, arg, (convfunc)(val)); \
+ upb_sink_put ## name(&d->top->sink, arg, (convfunc)(val)); \
})
while(1) {
@@ -513,7 +519,7 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
(int)(d->data_end - ptr(d)),
(int)(d->end - ptr(d)),
(int)((d->top->end_ofs - d->bufstart_ofs) - (ptr(d) - d->buf)),
- (int)(d->pc - 1 - upb_pbdecoderplan_codebase(p)),
+ (int)(d->pc - 1 - group->bytecode),
upb_pbdecoder_getopname(op),
arg);
#endif
@@ -537,39 +543,42 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
VMCASE(OP_SETDISPATCH,
d->top->base = d->pc - 1;
- memcpy(&d->top->u.dispatch, d->pc, sizeof(void*));
+ memcpy(&d->top->dispatch, d->pc, sizeof(void*));
d->pc += sizeof(void*) / sizeof(uint32_t);
)
VMCASE(OP_STARTMSG,
- CHECK_SUSPEND(upb_sink_startmsg(d->sink));
+ CHECK_SUSPEND(upb_sink_startmsg(&d->top->sink));
)
VMCASE(OP_ENDMSG,
- CHECK_SUSPEND(upb_sink_endmsg(d->sink));
+ CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status));
assert(d->call_len > 0);
d->pc = d->callstack[--d->call_len];
)
VMCASE(OP_STARTSEQ,
- CHECK_SUSPEND(upb_sink_startseq(d->sink, arg));
+ upb_pbdecoder_frame *outer = outer_frame(d);
+ CHECK_SUSPEND(upb_sink_startseq(&outer->sink, arg, &d->top->sink));
)
VMCASE(OP_ENDSEQ,
- CHECK_SUSPEND(upb_sink_endseq(d->sink, arg));
+ CHECK_SUSPEND(upb_sink_endseq(&d->top->sink, arg));
)
VMCASE(OP_STARTSUBMSG,
- CHECK_SUSPEND(upb_sink_startsubmsg(d->sink, arg));
+ upb_pbdecoder_frame *outer = outer_frame(d);
+ CHECK_SUSPEND(upb_sink_startsubmsg(&outer->sink, arg, &d->top->sink));
)
VMCASE(OP_ENDSUBMSG,
- CHECK_SUSPEND(upb_sink_endsubmsg(d->sink, arg));
+ CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg));
)
VMCASE(OP_STARTSTR,
uint32_t len = d->top->end_ofs - offset(d);
- CHECK_SUSPEND(upb_sink_startstr(d->sink, arg, len));
+ upb_pbdecoder_frame *outer = outer_frame(d);
+ CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink));
if (len == 0) {
d->pc++; // Skip OP_STRING.
}
)
VMCASE(OP_STRING,
uint32_t len = curbufleft(d);
- CHECK_SUSPEND(upb_sink_putstring(d->sink, arg, ptr(d), len));
+ CHECK_SUSPEND(upb_sink_putstring(&d->top->sink, arg, ptr(d), len));
advance(d, len);
if (d->delim_end == NULL) { // String extends beyond this buf?
d->pc--;
@@ -579,7 +588,7 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
}
)
VMCASE(OP_ENDSTR,
- CHECK_SUSPEND(upb_sink_endstr(d->sink, arg));
+ CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg));
)
VMCASE(OP_PUSHTAGDELIM,
CHECK_SUSPEND(push(d, d->top->end_ofs));
@@ -664,50 +673,52 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
}
}
-void *upb_pbdecoder_start(void *closure, const void *handler_data,
- size_t size_hint) {
+void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
+ upb_pbdecoder *d = closure;
UPB_UNUSED(size_hint);
+ d->call_len = 1;
+ d->pc = pc;
+ return d;
+}
+
+void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
+ UPB_UNUSED(hd);
upb_pbdecoder *d = closure;
- const upb_pbdecoderplan *plan = handler_data;
- UPB_UNUSED(plan);
- if (upb_pbdecoderplan_hasjitcode(plan)) {
- d->top->u.closure = d->sink->top->closure;
- d->call_len = 0;
- } else {
- d->call_len = 1;
- d->pc = upb_pbdecoderplan_codebase(plan);
- }
- assert(d);
- assert(d->sink);
- if (plan->topmethod->dest_handlers) {
- assert(d->sink->top->h == plan->topmethod->dest_handlers);
- }
- d->status = &d->sink->pipeline_->status_;
+ d->call_len = 0;
return d;
}
bool upb_pbdecoder_end(void *closure, const void *handler_data) {
upb_pbdecoder *d = closure;
- const upb_pbdecoderplan *plan = handler_data;
+ const upb_pbdecodermethod *method = handler_data;
if (d->residual_end > d->residual) {
seterr(d, "Unexpected EOF");
return false;
}
+ if (d->top->end_ofs != UINT64_MAX) {
+ seterr(d, "Unexpected EOF inside delimited string");
+ return false;
+ }
+
// Message ends here.
uint64_t end = offset(d);
d->top->end_ofs = end;
+
char dummy;
- if (upb_pbdecoderplan_hasjitcode(plan)) {
#ifdef UPB_USE_JIT_X64
+ const mgroup *group = (const mgroup*)method->group;
+ if (group->jit_code) {
if (d->top != d->stack)
d->stack->end_ofs = 0;
- upb_pbdecoderplan_jitcode(plan)(closure, handler_data, &dummy, 0);
-#endif
+ group->jit_code(closure, method->code_base.ptr, &dummy, 0);
} else {
+#endif
d->stack->end_ofs = end;
- uint32_t *p = d->pc - 1;
+ const uint32_t *p = d->pc;
+ // Check the previous bytecode, but guard against beginning.
+ if (p != method->code_base.ptr) p--;
if (getop(*p) == OP_CHECKDELIM) {
// Rewind from OP_TAG* to OP_CHECKDELIM.
assert(getop(*d->pc) == OP_TAG1 ||
@@ -716,28 +727,29 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
d->pc = p;
}
upb_pbdecoder_decode(closure, handler_data, &dummy, 0);
+#ifdef UPB_USE_JIT_X64
}
+#endif
if (d->call_len != 0) {
seterr(d, "Unexpected EOF");
return false;
}
- return upb_ok(&d->sink->pipeline_->status_);
+ return true;
}
-void init(void *_d, upb_pipeline *p) {
- UPB_UNUSED(p);
- upb_pbdecoder *d = _d;
+void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *m,
+ upb_status *s) {
d->limit = &d->stack[UPB_DECODER_MAX_NESTING];
- d->sink = NULL;
+ upb_bytessink_reset(&d->input_, &m->input_handler_, d);
+ d->method_ = m;
d->callstack[0] = &halt;
- // reset() must be called before decoding; this is guaranteed by assert() in
- // start().
+ d->status = s;
+ upb_pbdecoder_reset(d);
}
-void reset(void *_d) {
- upb_pbdecoder *d = _d;
+void upb_pbdecoder_reset(upb_pbdecoder *d) {
d->top = d->stack;
d->top->end_ofs = UINT64_MAX;
d->bufstart_ofs = 0;
@@ -748,21 +760,27 @@ void reset(void *_d) {
d->call_len = 1;
}
-bool upb_pbdecoder_resetsink(upb_pbdecoder *d, upb_sink* sink) {
- // TODO(haberman): typecheck the sink, and test whether the decoder is in the
- // middle of decoding. Return false if either assumption is violated.
- d->sink = sink;
- reset(d);
- return true;
+// Not currently required, but to support outgrowing the static stack we need
+// this.
+void upb_pbdecoder_uninit(upb_pbdecoder *d) {}
+
+const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
+ return d->method_;
}
-const upb_frametype upb_pbdecoder_frametype = {
- sizeof(upb_pbdecoder),
- init,
- NULL,
- reset,
-};
+bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink* sink) {
+ // TODO(haberman): do we need to test whether the decoder is already on the
+ // stack (like calling this from within a callback)? Should we support
+ // rebinding the output at all?
+ assert(sink);
+ if (d->method_->dest_handlers_) {
+ if (sink->handlers != d->method_->dest_handlers_)
+ return false;
+ }
+ upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
+ return true;
+}
-const upb_frametype *upb_pbdecoder_getframetype() {
- return &upb_pbdecoder_frametype;
+upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
+ return &d->input_;
}
diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h
index c645688..4529324 100644
--- a/upb/pb/decoder.h
+++ b/upb/pb/decoder.h
@@ -1,102 +1,447 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
- * Copyright (c) 2009-2010 Google Inc. See LICENSE for details.
+ * Copyright (c) 2009-2013 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
- * upb::Decoder implements a high performance, streaming decoder for protobuf
- * data that works by parsing input data one buffer at a time and calling into
- * a upb::Handlers.
+ * upb::pb::Decoder implements a high performance, streaming, resumable decoder
+ * for the binary protobuf format.
*/
#ifndef UPB_DECODER_H_
#define UPB_DECODER_H_
+#include "upb/table.int.h"
#include "upb/sink.h"
+#ifdef __cplusplus
+namespace upb {
+namespace pb {
+class CodeCache;
+class Decoder;
+class DecoderMethod;
+} // namespace pb
+} // namespace upb
+
+typedef upb::pb::CodeCache upb_pbcodecache;
+typedef upb::pb::Decoder upb_pbdecoder;
+typedef upb::pb::DecoderMethod upb_pbdecodermethod;
+#else
+struct upb_pbdecoder;
+struct upb_pbdecodermethod;
+struct upb_pbcodecache;
+
+typedef struct upb_pbdecoder upb_pbdecoder;
+typedef struct upb_pbdecodermethod upb_pbdecodermethod;
+typedef struct upb_pbcodecache upb_pbcodecache;
+#endif
+
// The maximum that any submessages can be nested. Matches proto2's limit.
-// At the moment this specifies the size of several statically-sized arrays
-// and therefore setting it high will cause more memory to be used. Will
-// be replaced by a runtime-configurable limit and dynamically-resizing arrays.
-// TODO: make this a runtime-settable property of Decoder.
+// This specifies the size of the decoder's statically-sized array and therefore
+// setting it high will cause the upb::pb::Decoder object to be larger.
+//
+// If necessary we can add a runtime-settable property to Decoder that allow
+// this to be larger than the compile-time setting, but this would add
+// complexity, particularly since we would have to decide how/if to give users
+// the ability to set a custom memory allocation function.
#define UPB_DECODER_MAX_NESTING 64
+// Internal-only struct used by the decoder.
+typedef struct {
#ifdef __cplusplus
-namespace upb {
-namespace pb {
+ private:
+#endif
+ // Space optimization note: we store two pointers here that the JIT
+ // doesn't need at all; the upb_handlers* inside the sink and
+ // the dispatch table pointer. We can optimze so that the JIT uses
+ // smaller stack frames than the interpreter. The only thing we need
+ // to guarantee is that the fallback routines can find end_ofs.
-// Frame type that encapsulates decoder state.
-class Decoder;
+#ifdef __cplusplus
+ char sink[sizeof(upb_sink)];
+#else
+ upb_sink sink;
+#endif
+ // The absolute stream offset of the end-of-frame delimiter.
+ // Non-delimited frames (groups and non-packed repeated fields) reuse the
+ // delimiter of their parent, even though the frame may not end there.
+ //
+ // NOTE: the JIT stores a slightly different value here for non-top frames.
+ // It stores the value relative to the end of the enclosed message. But the
+ // top frame is still stored the same way, which is important for ensuring
+ // that calls from the JIT into C work correctly.
+ uint64_t end_ofs;
+ const uint32_t *base;
+ uint32_t groupnum;
+ upb_inttable *dispatch; // Not used by the JIT.
+} upb_pbdecoder_frame;
-// Resets the sink of the Decoder. This must be called at least once before
-// the decoder can be used. It may only be called with the decoder is in a
-// state where it was just created or reset. The given sink must be from the
-// same pipeline as this decoder.
-inline bool ResetDecoderSink(Decoder* d, Sink* sink);
+#ifdef __cplusplus
-// Gets the handlers suitable for parsing protobuf data according to the given
-// destination handlers. The protobuf schema to parse is taken from dest.
-inline const upb::Handlers *GetDecoderHandlers(const upb::Handlers *dest,
- bool allowjit,
- const void *owner);
+// Represents the code to parse a protobuf according to a specific schema,
+// optionally bound to a set of destination handlers.
+class upb::pb::DecoderMethod /* : public upb::RefCounted */ {
+ public:
+ // From upb::ReferenceCounted.
+ void Ref(const void* owner) const;
+ void Unref(const void* owner) const;
+ void DonateRef(const void* from, const void* to) const;
+ void CheckRef(const void* owner) const;
-// Returns true if these handlers represent a upb::pb::Decoder.
-bool IsDecoder(const upb::Handlers *h);
+ // The schema that this method parses. Never NULL.
+ const MessageDef* schema() const;
-// Returns true if IsDecoder(h) and the given handlers have JIT code.
-inline bool HasJitCode(const upb::Handlers* h);
+ // The destination handlers that are statically bound to this method.
+ // This method is only capable of outputting to a sink that uses these
+ // handlers.
+ //
+ // Will be NULL if this method is not statically bound.
+ const Handlers* dest_handlers() const;
-// Returns the destination handlers if IsDecoder(h), otherwise returns NULL.
-const upb::Handlers* GetDestHandlers(const upb::Handlers* h);
+ // The input handlers for this decoder method.
+ const BytesHandler* input_handler() const;
-} // namespace pb
-} // namespace upb
+ // Whether this method is native.
+ bool is_native() const;
-typedef upb::pb::Decoder upb_pbdecoder;
+ // Convenience method for generating a DecoderMethod without explicitly
+ // creating a CodeCache.
+ static reffed_ptr<const DecoderMethod> NewForDestHandlers(
+ const upb::Handlers *dest);
-extern "C" {
+ private:
+ UPB_DISALLOW_POD_OPS(DecoderMethod, upb::pb::DecoderMethod);
#else
-struct upb_pbdecoder;
-typedef struct upb_pbdecoder upb_pbdecoder;
+struct upb_pbdecodermethod {
+#endif
+ upb_refcounted base;
+
+ // While compiling, the base is relative in "ofs", after compiling it is
+ // absolute in "ptr".
+ union {
+ uint32_t ofs; // PC offset of method.
+ void *ptr; // Pointer to bytecode or machine code for this method.
+ } code_base;
+
+ // The decoder method group to which this method belongs. We own a ref.
+ // Owning a ref on the entire group is more coarse-grained than is strictly
+ // necessary; all we truly require is that methods we directly reference
+ // outlive us, while the group could contain many other messages we don't
+ // require. But the group represents the messages that were
+ // allocated+compiled together, so it makes the most sense to free them
+ // together also.
+ const upb_refcounted *group;
+
+ // Whether this method is native code or bytecode.
+ bool is_native_;
+
+ // The handler one calls to invoke this method.
+ upb_byteshandler input_handler_;
+
+ // The message type that this method is parsing.
+ const upb_msgdef *schema_;
+
+ // The destination handlers this method is bound to, or NULL if this method
+ // can be bound to a destination handlers instance at runtime.
+ //
+ // If non-NULL, we own a ref.
+ const upb_handlers *dest_handlers_;
+
+ // The dispatch table layout is:
+ // [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ]
+ //
+ // If wt1 matches, jump to the 48-bit offset. If wt2 matches, lookup
+ // (UPB_MAX_FIELDNUMBER + fieldnum) and jump there.
+ //
+ // We need two wire types because of packed/non-packed compatibility. A
+ // primitive repeated field can use either wire type and be valid. While we
+ // could key the table on fieldnum+wiretype, the table would be 8x sparser.
+ //
+ // Storing two wire types in the primary value allows us to quickly rule out
+ // the second wire type without needing to do a separate lookup (this case is
+ // less common than an unknown field).
+ upb_inttable dispatch;
+};
+
+#ifdef __cplusplus
+
+// A Decoder receives binary protobuf data on its input sink and pushes the
+// decoded data to its output sink.
+class upb::pb::Decoder {
+ public:
+ // Constructs a decoder instance for the given method, which must outlive this
+ // decoder. Any errors during parsing will be set on the given status, which
+ // must also outlive this decoder.
+ Decoder(const DecoderMethod* method, Status* status);
+ ~Decoder();
+
+ // Returns the DecoderMethod this decoder is parsing from.
+ // TODO(haberman): Do users need to be able to rebind this?
+ const DecoderMethod* method() const;
+
+ // Resets the state of the decoder.
+ void Reset();
+
+ // Resets the output sink of the Decoder.
+ // The given sink must match method()->schema() as well as
+ // method()->dest_handlers() if the latter is non-NULL.
+ //
+ // This must be called at least once before the decoder can be used. It may
+ // only be called with the decoder is in a state where it was just created or
+ // reset with pipeline.Reset(). The given sink must be from the same pipeline
+ // as this decoder.
+ bool ResetOutput(Sink* sink);
+
+ // The sink on which this decoder receives input.
+ BytesSink* input();
+
+ private:
+ UPB_DISALLOW_COPY_AND_ASSIGN(Decoder);
+#else
+struct upb_pbdecoder {
+#endif
+ // Our input sink.
+ upb_bytessink input_;
+
+ // The decoder method we are parsing with (owned).
+ const upb_pbdecodermethod *method_;
+
+ size_t call_len;
+ const uint32_t *pc, *last;
+
+ // Current input buffer and its stream offset.
+ const char *buf, *ptr, *end, *checkpoint;
+
+ // End of the delimited region, relative to ptr, or NULL if not in this buf.
+ const char *delim_end;
+
+ // End of the delimited region, relative to ptr, or end if not in this buf.
+ const char *data_end;
+
+ // Overall stream offset of "buf."
+ uint64_t bufstart_ofs;
+
+ // How many bytes past the end of the user buffer we want to skip.
+ size_t skip;
+
+ // Buffer for residual bytes not parsed from the previous buffer.
+ // The maximum number of residual bytes we require is 12; a five-byte
+ // unknown tag plus an eight-byte value, less one because the value
+ // is only a partial value.
+ char residual[12];
+ char *residual_end;
+
+ // Stores the user buffer passed to our decode function.
+ const char *buf_param;
+ size_t size_param;
+
+#ifdef UPB_USE_JIT_X64
+ // Used momentarily by the generated code to store a value while a user
+ // function is called.
+ uint32_t tmp_len;
+
+ const void *saved_rsp;
+#endif
+
+ upb_status *status;
+
+ // Our internal stack.
+ upb_pbdecoder_frame *top, *limit;
+ upb_pbdecoder_frame stack[UPB_DECODER_MAX_NESTING];
+#ifdef UPB_USE_JIT_X64
+ // Each native stack frame needs two pointers, plus we need a few frames for
+ // the enter/exit trampolines.
+ const uint32_t *callstack[(UPB_DECODER_MAX_NESTING * 2) + 10];
+#else
+ const uint32_t *callstack[UPB_DECODER_MAX_NESTING];
+#endif
+};
+
+#ifdef __cplusplus
+
+// A class for caching protobuf processing code, whether bytecode for the
+// interpreted decoder or machine code for the JIT.
+//
+// This class is not thread-safe.
+class upb::pb::CodeCache {
+ public:
+ CodeCache();
+ ~CodeCache();
+
+ // Whether the cache is allowed to generate machine code. Defaults to true.
+ // There is no real reason to turn it off except for testing or if you are
+ // having a specific problem with the JIT.
+ //
+ // Note that allow_jit = true does not *guarantee* that the code will be JIT
+ // compiled. If this platform is not supported or the JIT was not compiled
+ // in, the code may still be interpreted.
+ bool allow_jit() const;
+
+ // This may only be called when the object is first constructed, and prior to
+ // any code generation, otherwise returns false and does nothing.
+ bool set_allow_jit(bool allow);
+
+ // Returns a DecoderMethod that can push data to the given handlers.
+ // If a suitable method already exists, it will be returned from the cache.
+ //
+ // Specifying the destination handlers here allows the DecoderMethod to be
+ // statically bound to the destination handlers if possible, which can allow
+ // more efficient decoding. However the returned method may or may not
+ // actually be statically bound. But in all cases, the returned method can
+ // push data to the given handlers.
+ const DecoderMethod *GetDecoderMethodForDestHandlers(
+ const upb::Handlers *handlers);
+
+ // If/when someone needs to explicitly create a dynamically-bound
+ // DecoderMethod*, we can add a method to get it here.
+
+ private:
+ UPB_DISALLOW_COPY_AND_ASSIGN(CodeCache);
+#else
+struct upb_pbcodecache {
#endif
+ bool allow_jit_;
+
+ // Array of mgroups.
+ upb_inttable groups;
+};
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *method,
+ upb_status *status);
+void upb_pbdecoder_uninit(upb_pbdecoder *d);
+void upb_pbdecoder_reset(upb_pbdecoder *d);
+const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d);
+bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink *sink);
+upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d);
-// C API.
-const upb_frametype *upb_pbdecoder_getframetype();
-bool upb_pbdecoder_resetsink(upb_pbdecoder *d, upb_sink *sink);
-const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest,
- bool allowjit,
- const void *owner);
-bool upb_pbdecoder_isdecoder(const upb_handlers *h);
-bool upb_pbdecoder_hasjitcode(const upb_handlers *h);
-const upb_handlers *upb_pbdecoder_getdesthandlers(const upb_handlers *h);
+void upb_pbdecodermethod_ref(const upb_pbdecodermethod *m, const void *owner);
+void upb_pbdecodermethod_unref(const upb_pbdecodermethod *m, const void *owner);
+void upb_pbdecodermethod_donateref(const upb_pbdecodermethod *m,
+ const void *from, const void *to);
+void upb_pbdecodermethod_checkref(const upb_pbdecodermethod *m,
+ const void *owner);
+const upb_msgdef *upb_pbdecodermethod_schema(const upb_pbdecodermethod *m);
+const upb_handlers *upb_pbdecodermethod_desthandlers(
+ const upb_pbdecodermethod *m);
+const upb_byteshandler *upb_pbdecodermethod_inputhandler(
+ const upb_pbdecodermethod *m);
+bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m);
+const upb_pbdecodermethod *upb_pbdecodermethod_newfordesthandlers(
+ const upb_handlers *dest, const void *owner);
-// C++ implementation details. /////////////////////////////////////////////////
+void upb_pbcodecache_init(upb_pbcodecache *c);
+void upb_pbcodecache_uninit(upb_pbcodecache *c);
+bool upb_pbcodecache_allowjit(const upb_pbcodecache *c);
+bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow);
+const upb_pbdecodermethod *upb_pbcodecache_getdecodermethodfordesthandlers(
+ upb_pbcodecache *c, const upb_handlers *handlers);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
#ifdef __cplusplus
-} // extern "C"
namespace upb {
+template<>
+class Pointer<pb::DecoderMethod> {
+ public:
+ explicit Pointer(pb::DecoderMethod* ptr) : ptr_(ptr) {}
+ operator pb::DecoderMethod*() { return ptr_; }
+ operator RefCounted*() { return UPB_UPCAST(ptr_); }
+ private:
+ pb::DecoderMethod* ptr_;
+};
+
+template<>
+class Pointer<const pb::DecoderMethod> {
+ public:
+ explicit Pointer(const pb::DecoderMethod* ptr) : ptr_(ptr) {}
+ operator const pb::DecoderMethod*() { return ptr_; }
+ operator const RefCounted*() { return UPB_UPCAST(ptr_); }
+ private:
+ const pb::DecoderMethod* ptr_;
+};
+
namespace pb {
-inline bool ResetDecoderSink(Decoder* r, Sink* sink) {
- return upb_pbdecoder_resetsink(r, sink);
+
+inline Decoder::Decoder(const DecoderMethod* m, Status* s) {
+ upb_pbdecoder_init(this, m, s);
+}
+inline Decoder::~Decoder() {
+ upb_pbdecoder_uninit(this);
+}
+inline const DecoderMethod* Decoder::method() const {
+ return upb_pbdecoder_method(this);
+}
+inline void Decoder::Reset() {
+ upb_pbdecoder_reset(this);
+}
+inline bool Decoder::ResetOutput(Sink* sink) {
+ return upb_pbdecoder_resetoutput(this, sink);
+}
+inline BytesSink* Decoder::input() {
+ return upb_pbdecoder_input(this);
+}
+
+inline void DecoderMethod::Ref(const void *owner) const {
+ upb_pbdecodermethod_ref(this, owner);
}
-inline const upb::Handlers* GetDecoderHandlers(const upb::Handlers* dest,
- bool allowjit,
- const void* owner) {
- return upb_pbdecoder_gethandlers(dest, allowjit, owner);
+inline void DecoderMethod::Unref(const void *owner) const {
+ upb_pbdecodermethod_unref(this, owner);
}
-inline bool IsDecoder(const upb::Handlers* h) {
- return upb_pbdecoder_isdecoder(h);
+inline void DecoderMethod::DonateRef(const void *from, const void *to) const {
+ upb_pbdecodermethod_donateref(this, from, to);
}
-inline bool HasJitCode(const upb::Handlers* h) {
- return upb_pbdecoder_hasjitcode(h);
+inline void DecoderMethod::CheckRef(const void *owner) const {
+ upb_pbdecodermethod_checkref(this, owner);
}
-inline const upb::Handlers* GetDestHandlers(const upb::Handlers* h) {
- return upb_pbdecoder_getdesthandlers(h);
+inline const MessageDef* DecoderMethod::schema() const {
+ return upb_pbdecodermethod_schema(this);
}
+inline const Handlers* DecoderMethod::dest_handlers() const {
+ return upb_pbdecodermethod_desthandlers(this);
+}
+inline const BytesHandler* DecoderMethod::input_handler() const {
+ return upb_pbdecodermethod_inputhandler(this);
+}
+inline bool DecoderMethod::is_native() const {
+ return upb_pbdecodermethod_isnative(this);
+}
+// static
+inline reffed_ptr<const DecoderMethod> DecoderMethod::NewForDestHandlers(
+ const Handlers *dest) {
+ const upb_pbdecodermethod *m =
+ upb_pbdecodermethod_newfordesthandlers(dest, &m);
+ return reffed_ptr<const DecoderMethod>(m, &m);
+}
+
+inline CodeCache::CodeCache() {
+ upb_pbcodecache_init(this);
+}
+inline CodeCache::~CodeCache() {
+ upb_pbcodecache_uninit(this);
+}
+inline bool CodeCache::allow_jit() const {
+ return upb_pbcodecache_allowjit(this);
+}
+inline bool CodeCache::set_allow_jit(bool allow) {
+ return upb_pbcodecache_setallowjit(this, allow);
+}
+inline const DecoderMethod* CodeCache::GetDecoderMethodForDestHandlers(
+ const upb::Handlers* handlers) {
+ return upb_pbcodecache_getdecodermethodfordesthandlers(this, handlers);
+}
+
} // namespace pb
} // namespace upb
-#endif
+
+#endif // __cplusplus
#endif /* UPB_DECODER_H_ */
diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h
index 8c8710c..1c10eb3 100644
--- a/upb/pb/decoder.int.h
+++ b/upb/pb/decoder.int.h
@@ -67,11 +67,46 @@ typedef enum {
UPB_INLINE opcode getop(uint32_t instr) { return instr & 0xff; }
-const upb_frametype upb_pbdecoder_frametype;
+// Method group; represents a set of decoder methods that had their code
+// emitted together, and must therefore be freed together. Immutable once
+// created. It is possible we may want to expose this to users at some point.
+//
+// Overall ownership of Decoder objects looks like this:
+//
+// +----------+
+// | | <---> DecoderMethod
+// | method |
+// CodeCache ---> | group | <---> DecoderMethod
+// | |
+// | (mgroup) | <---> DecoderMethod
+// +----------+
+typedef struct {
+ upb_refcounted base;
+
+ // Maps upb_msgdef/upb_handlers -> upb_pbdecodermethod. We own refs on the
+ // methods.
+ upb_inttable methods;
+
+ // When we add the ability to link to previously existing mgroups, we'll
+ // need an array of mgroups we reference here, and own refs on them.
+
+ // The bytecode for our methods, if any exists. Owned by us.
+ uint32_t *bytecode;
+ uint32_t *bytecode_end;
+
+#ifdef UPB_USE_JIT_X64
+ // JIT-generated machine code, if any.
+ upb_string_handlerfunc *jit_code;
+ // The size of the jit_code (required to munmap()).
+ size_t jit_size;
+ char *debug_info;
+ void *dl;
+#endif
+} mgroup;
// Decoder entry points; used as handlers.
-void *upb_pbdecoder_start(void *closure, const void *handler_data,
- size_t size_hint);
+void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint);
+void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint);
size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
size_t size);
bool upb_pbdecoder_end(void *closure, const void *handler_data);
@@ -91,18 +126,12 @@ void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg);
// Error messages that are shared between the bytecode and JIT decoders.
extern const char *kPbDecoderStackOverflow;
-typedef struct _upb_pbdecoderplan upb_pbdecoderplan;
-
// Access to decoderplan members needed by the decoder.
-bool upb_pbdecoderplan_hasjitcode(const upb_pbdecoderplan *p);
-uint32_t *upb_pbdecoderplan_codebase(const upb_pbdecoderplan *p);
const char *upb_pbdecoder_getopname(unsigned int op);
-upb_string_handler *upb_pbdecoderplan_jitcode(const upb_pbdecoderplan *p);
-
-// JIT entry point.
-void upb_pbdecoder_jit(upb_pbdecoderplan *plan);
-void upb_pbdecoder_freejit(upb_pbdecoderplan *plan);
+// JIT codegen entry point.
+void upb_pbdecoder_jit(mgroup *group);
+void upb_pbdecoder_freejit(mgroup *group);
// A special label that means "do field dispatch for this message and branch to
// wherever that takes you."
@@ -112,131 +141,4 @@ void upb_pbdecoder_freejit(upb_pbdecoderplan *plan);
#define DECODE_MISMATCH -2 // Used only from checktag_slow().
#define DECODE_ENDGROUP -2 // Used only from checkunknown().
-typedef struct {
- // The absolute stream offset of the end-of-frame delimiter.
- // Non-delimited frames (groups and non-packed repeated fields) reuse the
- // delimiter of their parent, even though the frame may not end there.
- //
- // NOTE: the JIT stores a slightly different value here for non-top frames.
- // It stores the value relative to the end of the enclosed message. But the
- // innermost frame is still stored the same way, which is important for
- // ensuring that calls from the JIT into C work correctly.
- uint64_t end_ofs;
- uint32_t *base;
- uint32_t groupnum;
- union {
- upb_inttable *dispatch; // Not used by the JIT.
- void *closure; // Only used by the JIT.
- } u;
-} upb_pbdecoder_frame;
-
-struct upb_pbdecoder {
- // Where we push parsed data (not owned).
- upb_sink *sink;
-
- size_t call_len;
- uint32_t *pc, *last;
-
- // Current input buffer and its stream offset.
- const char *buf, *ptr, *end, *checkpoint;
-
- // End of the delimited region, relative to ptr, or NULL if not in this buf.
- const char *delim_end;
-
- // End of the delimited region, relative to ptr, or end if not in this buf.
- const char *data_end;
-
- // Overall stream offset of "buf."
- uint64_t bufstart_ofs;
-
- // How many bytes past the end of the user buffer we want to skip.
- size_t skip;
-
- // Buffer for residual bytes not parsed from the previous buffer.
- // The maximum number of residual bytes we require is 12; a five-byte
- // unknown tag plus an eight-byte value, less one because the value
- // is only a partial value.
- char residual[12];
- char *residual_end;
-
- // Stores the user buffer passed to our decode function.
- const char *buf_param;
- size_t size_param;
-
-#ifdef UPB_USE_JIT_X64
- // Used momentarily by the generated code to store a value while a user
- // function is called.
- uint32_t tmp_len;
-
- const void *saved_rsp;
-#endif
-
- upb_status *status;
-
- // Our internal stack.
- upb_pbdecoder_frame *top, *limit;
- upb_pbdecoder_frame stack[UPB_DECODER_MAX_NESTING];
- uint32_t *callstack[UPB_DECODER_MAX_NESTING * 2];
-};
-
-// Data pertaining to a single decoding method/function.
-// Each method contains code to parse a single message type.
-// If may or may not be bound to a destination handlers object.
-typedef struct {
- // While compiling, the base is relative in "ofs", after compiling it is
- // absolute in "ptr".
- union {
- uint32_t ofs; // PC offset of method.
- const void *ptr; // Pointer to bytecode or machine code for this method.
- } base;
-
- // Whether this method is native code or bytecode.
- bool native_code;
-
- // The message type that this method is parsing.
- const upb_msgdef *msg;
-
- // The destination handlers this method is bound to, or NULL if this method
- // can be bound to a destination handlers instance at runtime.
- //
- // If non-NULL, we own a ref.
- const upb_handlers *dest_handlers;
-
- // The dispatch table layout is:
- // [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ]
- //
- // If wt1 matches, jump to the 48-bit offset. If wt2 matches, lookup
- // (UPB_MAX_FIELDNUMBER + fieldnum) and jump there.
- //
- // We need two wire types because of packed/non-packed compatibility. A
- // primitive repeated field can use either wire type and be valid. While we
- // could key the table on fieldnum+wiretype, the table would be 8x sparser.
- //
- // Storing two wire types in the primary value allows us to quickly rule out
- // the second wire type without needing to do a separate lookup (this case is
- // less common than an unknown field).
- upb_inttable dispatch;
-} upb_pbdecodermethod;
-
-struct _upb_pbdecoderplan {
- // Pointer to bytecode.
- uint32_t *code, *code_end;
-
- // Maps upb_msgdef*/upb_handlers* -> upb_pbdecodermethod
- upb_inttable methods;
-
- // The method that starts parsing when we first call into the plan.
- // Ideally we will remove the idea that any of the methods in the plan
- // are special like this, so that any method can be the top-level one.
- upb_pbdecodermethod *topmethod;
-
-#ifdef UPB_USE_JIT_X64
- // JIT-generated machine code (else NULL).
- upb_string_handler *jit_code;
- size_t jit_size;
- char *debug_info;
- void *dl;
-#endif
-};
-
#endif // UPB_DECODER_INT_H_
diff --git a/upb/pb/glue.c b/upb/pb/glue.c
index 9027e0f..73ef145 100644
--- a/upb/pb/glue.c
+++ b/upb/pb/glue.c
@@ -10,45 +10,39 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include "upb/bytestream.h"
#include "upb/descriptor/reader.h"
#include "upb/pb/decoder.h"
upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
void *owner, upb_status *status) {
// Create handlers.
- const upb_handlers *reader_h = upb_descreader_gethandlers(&reader_h);
- const upb_handlers *decoder_h =
- upb_pbdecoder_gethandlers(reader_h, true, &decoder_h);
+ const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h);
+ const upb_pbdecodermethod *decoder_m =
+ upb_pbdecodermethod_newfordesthandlers(reader_h, &decoder_m);
- // Create pipeline.
- upb_pipeline pipeline;
- upb_pipeline_init(&pipeline, NULL, 0, upb_realloc, NULL);
- upb_pipeline_donateref(&pipeline, reader_h, &reader_h);
- upb_pipeline_donateref(&pipeline, decoder_h, &decoder_h);
+ upb_pbdecoder decoder;
+ upb_descreader reader;
- // Create sinks.
- upb_sink *reader_sink = upb_pipeline_newsink(&pipeline, reader_h);
- upb_sink *decoder_sink = upb_pipeline_newsink(&pipeline, decoder_h);
- upb_pbdecoder *d = upb_sink_getobj(decoder_sink);
- upb_pbdecoder_resetsink(d, reader_sink);
+ upb_pbdecoder_init(&decoder, decoder_m, status);
+ upb_descreader_init(&reader, reader_h, status);
+ upb_pbdecoder_resetoutput(&decoder, upb_descreader_input(&reader));
// Push input data.
- bool ok = upb_bytestream_putstr(decoder_sink, str, len);
+ bool ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(&decoder));
- if (status) upb_status_copy(status, upb_pipeline_status(&pipeline));
- if (!ok) {
- upb_pipeline_uninit(&pipeline);
- return NULL;
- }
+ upb_def **ret = NULL;
- upb_descreader *r = upb_sink_getobj(reader_sink);
- upb_def **defs = upb_descreader_getdefs(r, owner, n);
- upb_def **defscopy = malloc(sizeof(upb_def*) * (*n));
- memcpy(defscopy, defs, sizeof(upb_def*) * (*n));
- upb_pipeline_uninit(&pipeline);
+ if (!ok) goto cleanup;
+ upb_def **defs = upb_descreader_getdefs(&reader, owner, n);
+ ret = malloc(sizeof(upb_def*) * (*n));
+ memcpy(ret, defs, sizeof(upb_def*) * (*n));
- return defscopy;
+cleanup:
+ upb_pbdecoder_uninit(&decoder);
+ upb_descreader_uninit(&reader);
+ upb_handlers_unref(reader_h, &reader_h);
+ upb_pbdecodermethod_unref(decoder_m, &decoder_m);
+ return ret;
}
bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len,
diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c
index 08eda15..0c12571 100644
--- a/upb/pb/textprinter.c
+++ b/upb/pb/textprinter.c
@@ -203,40 +203,42 @@ static void onmreg(void *c, upb_handlers *h) {
upb_msg_iter i;
for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
+ upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+ upb_handlerattr_sethandlerdata(&attr, f, NULL);
switch (upb_fielddef_type(f)) {
case UPB_TYPE_INT32:
- upb_handlers_setint32(h, f, putint32, f, NULL);
+ upb_handlers_setint32(h, f, putint32, &attr);
break;
case UPB_TYPE_INT64:
- upb_handlers_setint64(h, f, putint64, f, NULL);
+ upb_handlers_setint64(h, f, putint64, &attr);
break;
case UPB_TYPE_UINT32:
- upb_handlers_setuint32(h, f, putuint32, f, NULL);
+ upb_handlers_setuint32(h, f, putuint32, &attr);
break;
case UPB_TYPE_UINT64:
- upb_handlers_setuint64(h, f, putuint64, f, NULL);
+ upb_handlers_setuint64(h, f, putuint64, &attr);
break;
case UPB_TYPE_FLOAT:
- upb_handlers_setfloat(h, f, putfloat, f, NULL);
+ upb_handlers_setfloat(h, f, putfloat, &attr);
break;
case UPB_TYPE_DOUBLE:
- upb_handlers_setdouble(h, f, putdouble, f, NULL);
+ upb_handlers_setdouble(h, f, putdouble, &attr);
break;
case UPB_TYPE_BOOL:
- upb_handlers_setbool(h, f, putbool, f, NULL);
+ upb_handlers_setbool(h, f, putbool, &attr);
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
- upb_handlers_setstartstr(h, f, startstr, f, NULL);
- upb_handlers_setstring(h, f, putstr, f, NULL);
- upb_handlers_setendstr(h, f, endstr, f, NULL);
+ upb_handlers_setstartstr(h, f, startstr, &attr);
+ upb_handlers_setstring(h, f, putstr, &attr);
+ upb_handlers_setendstr(h, f, endstr, &attr);
break;
case UPB_TYPE_MESSAGE:
- upb_handlers_setstartsubmsg(h, f, &startsubmsg, f, NULL);
- upb_handlers_setendsubmsg(h, f, &endsubmsg, f, NULL);
+ upb_handlers_setstartsubmsg(h, f, startsubmsg, &attr);
+ upb_handlers_setendsubmsg(h, f, endsubmsg, &attr);
break;
case UPB_TYPE_ENUM:
- upb_handlers_setint32(h, f, putenum, f, NULL);
+ upb_handlers_setint32(h, f, putenum, &attr);
default:
assert(false);
break;
@@ -246,5 +248,5 @@ static void onmreg(void *c, upb_handlers *h) {
const upb_handlers *upb_textprinter_newhandlers(const void *owner,
const upb_msgdef *m) {
- return upb_handlers_newfrozen(m, NULL, owner, &onmreg, NULL);
+ return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback