summaryrefslogtreecommitdiff
path: root/upb
diff options
context:
space:
mode:
authorJoshua Haberman <jhaberman@gmail.com>2019-01-16 15:53:13 -0800
committerJoshua Haberman <jhaberman@gmail.com>2019-01-16 15:53:13 -0800
commit84fb01ad0f7301b416e03d97fbffef1a7512e7ea (patch)
treee2c9c024b52f387e70c43daff9d0d6cbba549580 /upb
parent2c26f60dbbc49bca6233cc20a15ff4b32454c6e8 (diff)
Removed the JIT. Nobody was actually using it, and table-driven should achieve 80-90% of the perf.
Diffstat (limited to 'upb')
-rw-r--r--upb/pb/compile_decoder.c38
-rw-r--r--upb/pb/compile_decoder_x64.c511
-rw-r--r--upb/pb/compile_decoder_x64.dasc1150
-rw-r--r--upb/pb/compile_decoder_x64.h1737
4 files changed, 2 insertions, 3434 deletions
diff --git a/upb/pb/compile_decoder.c b/upb/pb/compile_decoder.c
index ca497ed..c5d8d9b 100644
--- a/upb/pb/compile_decoder.c
+++ b/upb/pb/compile_decoder.c
@@ -4,11 +4,6 @@
** Code to compile a upb::Handlers into bytecode for decoding a protobuf
** according to that specific schema and destination handlers.
**
-** Compiling to bytecode is always the first step. If we are using the
-** interpreted decoder we leave it as bytecode and interpret that. If we are
-** using a JIT decoder we use a code generator to turn the bytecode into native
-** code, LLVM IR, etc.
-**
** Bytecode definition is in decoder.int.h.
*/
@@ -37,7 +32,6 @@ static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
ret->group = group;
ret->dest_handlers_ = dest_handlers;
- ret->is_native_ = false; /* If we JIT, it will update this later. */
upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
return ret;
@@ -69,9 +63,6 @@ static void freegroup(mgroup *g) {
}
upb_inttable_uninit(&g->methods);
-#ifdef UPB_USE_JIT_X64
- upb_pbdecoder_freejit(g);
-#endif
upb_gfree(g->bytecode);
upb_gfree(g);
}
@@ -313,7 +304,7 @@ static void putop(compiler *c, int op, ...) {
va_end(ap);
}
-#if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE)
+#if defined(UPB_DUMP_BYTECODE)
const char *upb_pbdecoder_getopname(unsigned int op) {
#define QUOTE(x) #x
@@ -827,31 +818,6 @@ static void set_bytecode_handlers(mgroup *g) {
}
-/* JIT setup. *****************************************************************/
-
-#ifdef UPB_USE_JIT_X64
-
-static void sethandlers(mgroup *g, bool allowjit) {
- g->jit_code = NULL;
- if (allowjit) {
- /* Compile byte-code into machine code, create handlers. */
- upb_pbdecoder_jit(g);
- } else {
- set_bytecode_handlers(g);
- }
-}
-
-#else /* UPB_USE_JIT_X64 */
-
-static void sethandlers(mgroup *g, bool allowjit) {
- /* No JIT compiled in; use bytecode handlers unconditionally. */
- UPB_UNUSED(allowjit);
- set_bytecode_handlers(g);
-}
-
-#endif /* UPB_USE_JIT_X64 */
-
-
/* TODO(haberman): allow this to be constructed for an arbitrary set of dest
* handlers and other mgroups (but verify we have a transitive closure). */
const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy) {
@@ -891,7 +857,7 @@ const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy) {
}
#endif
- sethandlers(g, allowjit);
+ set_bytecode_handlers(g);
return g;
}
diff --git a/upb/pb/compile_decoder_x64.c b/upb/pb/compile_decoder_x64.c
deleted file mode 100644
index 7c716e8..0000000
--- a/upb/pb/compile_decoder_x64.c
+++ /dev/null
@@ -1,511 +0,0 @@
-/*
-** Driver code for the x64 JIT compiler.
-*/
-
-/* Needed to ensure we get defines like MAP_ANON. */
-#define _GNU_SOURCE
-
-#include <dlfcn.h>
-#include <stdio.h>
-#include <sys/mman.h>
-#include <unistd.h>
-#include "upb/msg.h"
-#include "upb/pb/decoder.h"
-#include "upb/pb/decoder.int.h"
-#include "upb/pb/varint.int.h"
-
-/* To debug the JIT:
- *
- * 1. Uncomment:
- * #define UPB_JIT_LOAD_SO
- *
- * Note: this mode requires that we can shell out to gcc.
- *
- * 2. Run the test locally. This will load the JIT code by building a
- * .so (/tmp/upb-jit-code.so) and using dlopen, so more of the tooling will
- * work properly (like GDB).
- *
- * IF YOU ALSO WANT AUTOMATIC JIT DEBUG OUTPUT:
- *
- * 3. Run: upb/pb/make-gdb-script.rb > script.gdb. This reads
- * /tmp/upb-jit-code.so as input and generates a GDB script that is specific
- * to this jit code.
- *
- * 4. Run: gdb --command=script.gdb --args path/to/test
- * This will drop you to a GDB prompt which you can now use normally.
- * But when you run the test it will print a message to stdout every time
- * the JIT executes assembly for a particular bytecode. Sample output:
- *
- * X.enterjit bytes=18
- * buf_ofs=1 data_rem=17 delim_rem=-2 X.0x6.OP_PARSE_DOUBLE
- * buf_ofs=9 data_rem=9 delim_rem=-10 X.0x7.OP_CHECKDELIM
- * buf_ofs=9 data_rem=9 delim_rem=-10 X.0x8.OP_TAG1
- * X.0x3.dispatch.DecoderTest
- * X.parse_unknown
- * X.0x3.dispatch.DecoderTest
- * X.decode_unknown_tag_fallback
- * X.exitjit
- *
- * This output should roughly correspond to the output that the bytecode
- * interpreter emits when compiled with UPB_DUMP_BYTECODE (modulo some
- * extra JIT-specific output). */
-
-/* These defines are necessary for DynASM codegen.
- * See dynasm/dasm_proto.h for more info. */
-#define Dst_DECL jitcompiler *jc
-#define Dst_REF (jc->dynasm)
-#define Dst (jc)
-
-/* In debug mode, make DynASM do internal checks (must be defined before any
- * dasm header is included. */
-#ifndef NDEBUG
-#define DASM_CHECKS
-#endif
-
-#ifndef MAP_ANONYMOUS
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-
-typedef struct {
- mgroup *group;
- uint32_t *pc;
-
- /* This pointer is allocated by dasm_init() and freed by dasm_free(). */
- struct dasm_State *dynasm;
-
- /* Maps some key (an arbitrary void*) to a pclabel.
- *
- * The pclabel represents a location in the generated code -- DynASM exposes
- * a pclabel -> (machine code offset) lookup function.
- *
- * The key can be anything. There are two main kinds of keys:
- * - bytecode location -- the void* points to the bytecode instruction
- * itself. We can then use this to generate jumps to this instruction.
- * - other object (like dispatch table). We use these to represent parts
- * of the generated code that do not exactly correspond to a bytecode
- * instruction. */
- upb_inttable jmptargets;
-
-#ifndef NDEBUG
- /* Like jmptargets, but members are present in the table when they have had
- * define_jmptarget() (as opposed to jmptarget) called. Used to verify that
- * define_jmptarget() is called exactly once for every target.
- * The value is ignored. */
- upb_inttable jmpdefined;
-
- /* For checking that two asmlabels aren't defined for the same byte. */
- int lastlabelofs;
-#endif
-
-#ifdef UPB_JIT_LOAD_SO
- /* For marking labels that should go into the generated code.
- * Maps pclabel -> char* label (string is owned by the table). */
- upb_inttable asmlabels;
-#endif
-
- /* The total number of pclabels currently defined.
- * Note that this contains both jmptargets and asmlabels, which both use
- * pclabels but for different purposes. */
- uint32_t pclabel_count;
-
- /* Used by DynASM to store globals. */
- void **globals;
-} jitcompiler;
-
-/* Functions called by codegen. */
-static int jmptarget(jitcompiler *jc, const void *key);
-static int define_jmptarget(jitcompiler *jc, const void *key);
-static void asmlabel(jitcompiler *jc, const char *fmt, ...);
-static int pcofs(jitcompiler* jc);
-static int alloc_pclabel(jitcompiler *jc);
-
-#ifdef UPB_JIT_LOAD_SO
-static char *upb_vasprintf(const char *fmt, va_list ap);
-static char *upb_asprintf(const char *fmt, ...);
-#endif
-
-#include "third_party/dynasm/dasm_proto.h"
-#include "third_party/dynasm/dasm_x86.h"
-#include "upb/pb/compile_decoder_x64.h"
-
-static jitcompiler *newjitcompiler(mgroup *group) {
- jitcompiler *jc = malloc(sizeof(jitcompiler));
- jc->group = group;
- jc->pclabel_count = 0;
- upb_inttable_init(&jc->jmptargets, UPB_CTYPE_UINT32);
-#ifndef NDEBUG
- jc->lastlabelofs = -1;
- upb_inttable_init(&jc->jmpdefined, UPB_CTYPE_BOOL);
-#endif
-#ifdef UPB_JIT_LOAD_SO
- upb_inttable_init(&jc->asmlabels, UPB_CTYPE_PTR);
-#endif
- jc->globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*jc->globals));
-
- dasm_init(jc, 1);
- dasm_setupglobal(jc, jc->globals, UPB_JIT_GLOBAL__MAX);
- dasm_setup(jc, upb_jit_actionlist);
-
- return jc;
-}
-
-static void freejitcompiler(jitcompiler *jc) {
-#ifdef UPB_JIT_LOAD_SO
- upb_inttable_iter i;
- upb_inttable_begin(&i, &jc->asmlabels);
- for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
- free(upb_value_getptr(upb_inttable_iter_value(&i)));
- }
- upb_inttable_uninit(&jc->asmlabels);
-#endif
-#ifndef NDEBUG
- upb_inttable_uninit(&jc->jmpdefined);
-#endif
- upb_inttable_uninit(&jc->jmptargets);
- dasm_free(jc);
- free(jc->globals);
- free(jc);
-}
-
-#ifdef UPB_JIT_LOAD_SO
-
-/* Like sprintf except allocates the string, which is returned and owned by the
- * caller.
- *
- * Like the GNU extension asprintf(), except we abort on error (since this is
- * only for debugging). */
-static char *upb_vasprintf(const char *fmt, va_list args) {
- /* Run once to get the length of the string. */
- va_list args_copy;
- va_copy(args_copy, args);
- int len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
- va_end(args_copy);
-
- char *ret = malloc(len + 1); /* + 1 for NULL terminator. */
- if (!ret) abort();
- int written = _upb_vsnprintf(ret, len + 1, fmt, args);
- UPB_ASSERT(written == len);
-
- return ret;
-}
-
-static char *upb_asprintf(const char *fmt, ...) {
- va_list args;
- va_start(args, fmt);
- char *ret = upb_vasprintf(fmt, args);
- va_end(args);
- return ret;
-}
-
-#endif
-
-static int alloc_pclabel(jitcompiler *jc) {
- int newpc = jc->pclabel_count++;
- dasm_growpc(jc, jc->pclabel_count);
- return newpc;
-}
-
-static bool try_getjmptarget(jitcompiler *jc, const void *key, int *pclabel) {
- upb_value v;
- if (upb_inttable_lookupptr(&jc->jmptargets, key, &v)) {
- *pclabel = upb_value_getuint32(v);
- return true;
- } else {
- return false;
- }
-}
-
-/* Gets the pclabel for this bytecode location's jmptarget. Requires that the
- * jmptarget() has been previously defined. */
-static int getjmptarget(jitcompiler *jc, const void *key) {
- int pclabel = 0;
- bool ok;
-
- UPB_ASSERT_DEBUGVAR(upb_inttable_lookupptr(&jc->jmpdefined, key, NULL));
- ok = try_getjmptarget(jc, key, &pclabel);
- UPB_ASSERT(ok);
- return pclabel;
-}
-
-/* Returns a pclabel that serves as a jmp target for the given bytecode pointer.
- * This should only be called for code that is jumping to the target; code
- * defining the target should use define_jmptarget().
- *
- * Creates/allocates a pclabel for this target if one does not exist already. */
-static int jmptarget(jitcompiler *jc, const void *key) {
- /* Optimizer sometimes can't figure out that initializing this is unnecessary.
- */
- int pclabel = 0;
- if (!try_getjmptarget(jc, key, &pclabel)) {
- pclabel = alloc_pclabel(jc);
- upb_inttable_insertptr(&jc->jmptargets, key, upb_value_uint32(pclabel));
- }
- return pclabel;
-}
-
-/* Defines a pclabel associated with the given bytecode location.
- * Must be called exactly once by the code that is generating the code for this
- * bytecode.
- *
- * Must be called exactly once before bytecode generation is complete (this is a
- * sanity check to make sure the label is defined exactly once). */
-static int define_jmptarget(jitcompiler *jc, const void *key) {
-#ifndef NDEBUG
- upb_inttable_insertptr(&jc->jmpdefined, key, upb_value_bool(true));
-#endif
- return jmptarget(jc, key);
-}
-
-/* Returns a bytecode pc offset relative to the beginning of the group's
- * code. */
-static int pcofs(jitcompiler *jc) {
- return jc->pc - jc->group->bytecode;
-}
-
-/* Returns a machine code offset corresponding to the given key.
- * Requires that this key was defined with define_jmptarget. */
-static int machine_code_ofs(jitcompiler *jc, const void *key) {
- int pclabel = getjmptarget(jc, key);
- /* Despite its name, this function takes a pclabel and returns the
- * corresponding machine code offset. */
- return dasm_getpclabel(jc, pclabel);
-}
-
-/* Returns a machine code offset corresponding to the given method-relative
- * bytecode offset. Note that the bytecode offset is relative to the given
- * method, but the returned machine code offset is relative to the beginning of
- * *all* the machine code. */
-static int machine_code_ofs2(jitcompiler *jc, const upb_pbdecodermethod *method,
- int pcofs) {
- void *bc_target = jc->group->bytecode + method->code_base.ofs + pcofs;
- return machine_code_ofs(jc, bc_target);
-}
-
-/* Given a pcofs relative to this method's base, returns a machine code offset
- * relative to jmptarget(dispatch->array) (which is used in jitdispatch as the
- * machine code base for dispatch table lookups). */
-uint32_t dispatchofs(jitcompiler *jc, const upb_pbdecodermethod *method,
- int pcofs) {
- int mc_base = machine_code_ofs(jc, method->dispatch.array);
- int mc_target = machine_code_ofs2(jc, method, pcofs);
- int ret;
-
- UPB_ASSERT(mc_base > 0);
- UPB_ASSERT(mc_target > 0);
- ret = mc_target - mc_base;
- UPB_ASSERT(ret > 0);
- return ret;
-}
-
-/* Rewrites the dispatch tables into machine code offsets. */
-static void patchdispatch(jitcompiler *jc) {
- upb_inttable_iter i;
- upb_inttable_begin(&i, &jc->group->methods);
- for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
- upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
- upb_inttable *dispatch = &method->dispatch;
- upb_inttable_iter i2;
-
- method->is_native_ = true;
-
- /* Remove DISPATCH_ENDMSG -- only the bytecode interpreter needs it.
- * And leaving it around will cause us to find field 0 improperly. */
- upb_inttable_remove(dispatch, DISPATCH_ENDMSG, NULL);
-
- upb_inttable_begin(&i2, dispatch);
- for (; !upb_inttable_done(&i2); upb_inttable_next(&i2)) {
- uintptr_t key = upb_inttable_iter_key(&i2);
- uint64_t val = upb_value_getuint64(upb_inttable_iter_value(&i2));
- uint64_t newval;
- bool ok;
- if (key <= UPB_MAX_FIELDNUMBER) {
- /* Primary slot. */
- uint64_t ofs;
- uint8_t wt1;
- uint8_t wt2;
- upb_pbdecoder_unpackdispatch(val, &ofs, &wt1, &wt2);
-
- /* Update offset and repack. */
- ofs = dispatchofs(jc, method, ofs);
- newval = upb_pbdecoder_packdispatch(ofs, wt1, wt2);
- UPB_ASSERT((int64_t)newval > 0);
- } else {
- /* Secondary slot. Since we have 64 bits for the value, we use an
- * absolute offset. */
- int mcofs = machine_code_ofs2(jc, method, val);
- newval = (uint64_t)((char*)jc->group->jit_code + mcofs);
- }
- ok = upb_inttable_replace(dispatch, key, upb_value_uint64(newval));
- UPB_ASSERT(ok);
- }
-
- /* Update entry point for this method to point at mc base instead of bc
- * base. Set this only *after* we have patched the offsets
- * (machine_code_ofs2() uses this). */
- method->code_base.ptr = (char*)jc->group->jit_code + machine_code_ofs(jc, method);
-
- {
- upb_byteshandler *h = &method->input_handler_;
- upb_byteshandler_setstartstr(h, upb_pbdecoder_startjit, NULL);
- upb_byteshandler_setstring(h, jc->group->jit_code, method->code_base.ptr);
- upb_byteshandler_setendstr(h, upb_pbdecoder_end, method);
- }
- }
-}
-
-#ifdef UPB_JIT_LOAD_SO
-
-static void load_so(jitcompiler *jc) {
- /* Dump to a .so file in /tmp and load that, so all the tooling works right
- * (for example, debuggers and profilers will see symbol names for the JIT-ted
- * code). This is the same goal of the GDB JIT code below, but the GDB JIT
- * interface is only used/understood by GDB. Hopefully a standard will
- * develop for registering JIT-ted code that all tools will recognize,
- * rendering this obsolete.
- *
- * jc->asmlabels maps:
- * pclabel -> char* label
- *
- * Use this to build mclabels, which maps:
- * machine code offset -> char* label
- *
- * Then we can use mclabels to emit the labels as we iterate over the bytes we
- * are outputting. */
- upb_inttable_iter i;
- upb_inttable mclabels;
- upb_inttable_init(&mclabels, UPB_CTYPE_PTR);
- upb_inttable_begin(&i, &jc->asmlabels);
- for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
- upb_inttable_insert(&mclabels,
- dasm_getpclabel(jc, upb_inttable_iter_key(&i)),
- upb_inttable_iter_value(&i));
- }
-
- /* We write a .s file in text format, as input to the assembler.
- * Then we run gcc to turn it into a .so file.
- *
- * The last "XXXXXX" will be replaced with something randomly generated by
- * mkstmemp(). We don't add ".s" to this filename because it makes the string
- * processing for mkstemp() and system() more complicated. */
- char s_filename[] = "/tmp/upb-jit-codeXXXXXX";
- int fd = mkstemp(s_filename);
- FILE *f;
- if (fd >= 0 && (f = fdopen(fd, "wb")) != NULL) {
- uint8_t *jit_code = (uint8_t*)jc->group->jit_code;
- size_t linelen = 0;
- size_t i;
- fputs(" .text\n\n", f);
- for (i = 0; i < jc->group->jit_size; i++) {
- upb_value v;
- if (upb_inttable_lookup(&mclabels, i, &v)) {
- const char *label = upb_value_getptr(v);
- /* "X." makes our JIT syms recognizable as such, which we build into
- * other tooling. */
- fprintf(f, "\n\nX.%s:\n", label);
- fprintf(f, " .globl X.%s", label);
- linelen = 1000;
- }
- if (linelen >= 77) {
- linelen = fprintf(f, "\n .byte %u", jit_code[i]);
- } else {
- linelen += fprintf(f, ",%u", jit_code[i]);
- }
- }
- fputs("\n", f);
- fclose(f);
- } else {
- fprintf(stderr, "Error opening tmp file for JIT debug output.\n");
- abort();
- }
-
- /* This is exploitable if you have an adversary on your machine who can write
- * to this tmp directory. But this is just for debugging so we don't worry
- * too much about that. It shouldn't be prone to races against concurrent
- * (non-adversarial) upb JIT's because we used mkstemp(). */
- char *cmd = upb_asprintf("gcc -shared -o %s.so -x assembler %s", s_filename,
- s_filename);
- if (system(cmd) != 0) {
- fprintf(stderr, "Error compiling %s\n", s_filename);
- abort();
- }
- free(cmd);
-
- char *so_filename = upb_asprintf("%s.so", s_filename);
-
- /* Some convenience symlinks.
- * This is racy, but just for convenience. */
- int ret;
- unlink("/tmp/upb-jit-code.so");
- unlink("/tmp/upb-jit-code.s");
- ret = symlink(s_filename, "/tmp/upb-jit-code.s");
- ret = symlink(so_filename, "/tmp/upb-jit-code.so");
- UPB_UNUSED(ret); // We don't care if this fails.
-
- jc->group->dl = dlopen(so_filename, RTLD_LAZY);
- free(so_filename);
- if (!jc->group->dl) {
- fprintf(stderr, "Couldn't dlopen(): %s\n", dlerror());
- abort();
- }
-
- munmap(jc->group->jit_code, jc->group->jit_size);
- jc->group->jit_code = dlsym(jc->group->dl, "X.enterjit");
- if (!jc->group->jit_code) {
- fprintf(stderr, "Couldn't find enterjit sym\n");
- abort();
- }
-
- upb_inttable_uninit(&mclabels);
-}
-
-#endif
-
-void upb_pbdecoder_jit(mgroup *group) {
- jitcompiler *jc;
- char *jit_code;
- int dasm_status;
-
- group->debug_info = NULL;
- group->dl = NULL;
-
- UPB_ASSERT(group->bytecode);
- jc = newjitcompiler(group);
- emit_static_asm(jc);
- jitbytecode(jc);
-
- dasm_status = dasm_link(jc, &jc->group->jit_size);
- if (dasm_status != DASM_S_OK) {
- fprintf(stderr, "DynASM error; returned status: 0x%08x\n", dasm_status);
- abort();
- }
-
- jit_code = mmap(NULL, jc->group->jit_size, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
- dasm_encode(jc, jit_code);
- mprotect(jit_code, jc->group->jit_size, PROT_EXEC | PROT_READ);
- jc->group->jit_code = (upb_string_handlerfunc *)jit_code;
-
-#ifdef UPB_JIT_LOAD_SO
- load_so(jc);
-#endif
-
- patchdispatch(jc);
-
- freejitcompiler(jc);
-
- /* Now the bytecode is no longer needed. */
- free(group->bytecode);
- group->bytecode = NULL;
-}
-
-void upb_pbdecoder_freejit(mgroup *group) {
- if (!group->jit_code) return;
- if (group->dl) {
-#ifdef UPB_JIT_LOAD_SO
- dlclose(group->dl);
-#endif
- } else {
- munmap((void*)group->jit_code, group->jit_size);
- }
- free(group->debug_info);
-}
diff --git a/upb/pb/compile_decoder_x64.dasc b/upb/pb/compile_decoder_x64.dasc
deleted file mode 100644
index 7dc1987..0000000
--- a/upb/pb/compile_decoder_x64.dasc
+++ /dev/null
@@ -1,1150 +0,0 @@
-|//
-|// upb - a minimalist implementation of protocol buffers.
-|//
-|// Copyright (c) 2011-2013 Google Inc. See LICENSE for details.
-|// Author: Josh Haberman <jhaberman@gmail.com>
-|//
-|// JIT compiler for upb_pbdecoder on x86-64. Generates machine code from the
-|// bytecode generated in compile_decoder.c.
-|
-|.arch x64
-|.actionlist upb_jit_actionlist
-|.globals UPB_JIT_GLOBAL_
-|.globalnames upb_jit_globalnames
-|
-|// Calling conventions. Note -- this will need to be changed for
-|// Windows, which uses a different calling convention!
-|.define ARG1_64, rdi
-|.define ARG2_8, r6b // DynASM's equivalent to "sil" -- low byte of esi.
-|.define ARG2_32, esi
-|.define ARG2_64, rsi
-|.define ARG3_8, dl
-|.define ARG3_32, edx
-|.define ARG3_64, rdx
-|.define ARG4_64, rcx
-|.define ARG5_64, r8
-|.define XMMARG1, xmm0
-|
-|// Register allocation / type map.
-|// ALL of the code in this file uses these register allocations.
-|// When we "call" within this file, we do not use regular calling
-|// conventions, but of course when calling to user callbacks we must.
-|.define PTR, rbx // DECODER->ptr (unsynced)
-|.define DATAEND, r12 // DECODER->data_end (unsynced)
-|.define CLOSURE, r13 // FRAME->closure (unsynced)
-|.type FRAME, upb_pbdecoder_frame, r14 // DECODER->top (unsynced)
-|.type DECODER, upb_pbdecoder, r15 // DECODER (immutable)
-|.define DELIMEND, rbp
-|
-| // Spills unsynced registers back to memory.
-|.macro commit_regs
-| mov DECODER->top, FRAME
-| mov DECODER->ptr, PTR
-| mov DECODER->data_end, DATAEND
-| // We don't guarantee that delim_end is NULL when out of range like the
-| // interpreter does.
-| mov DECODER->delim_end, DELIMEND
-| sub DELIMEND, DECODER->buf
-| add DELIMEND, DECODER->bufstart_ofs
-| mov FRAME->end_ofs, DELIMEND
-| mov FRAME->sink.closure, CLOSURE
-|.endmacro
-|
-| // Loads unsynced registers from memory back into registers.
-|.macro load_regs
-| mov FRAME, DECODER->top
-| mov PTR, DECODER->ptr
-| mov DATAEND, DECODER->data_end
-| mov CLOSURE, FRAME->sink.closure
-| mov DELIMEND, FRAME->end_ofs
-| sub DELIMEND, DECODER->bufstart_ofs
-| add DELIMEND, DECODER->buf
-|.endmacro
-|
-| // Calls an external C function at address "addr".
-|.macro callp, addr
-| mov64 rax, (uintptr_t)addr
-|
-| // Stack must be 16-byte aligned (x86-64 ABI requires this).
-| //
-| // OPT: possibly remove this by statically ensuring correct alignment.
-| //
-| // OPT: use "call rel32" where possible.
-| push r12
-| mov r12, rsp
-| and rsp, 0xfffffffffffffff0UL // Align stack.
-| call rax
-| mov rsp, r12
-| pop r12
-|.endmacro
-|
-|.macro ld64, val
-|| {
-|| uintptr_t v = (uintptr_t)val;
-|| if (v > 0xffffffff) {
-| mov64 ARG2_64, v
-|| } else if (v) {
-| mov ARG2_32, v
-|| } else {
-| xor ARG2_32, ARG2_32
-|| }
-|| }
-|.endmacro
-|
-|.macro load_handler_data, h, arg
-| ld64 gethandlerdata(h, arg)
-|.endmacro
-|
-|.macro chkeob, bytes, target
-|| if (bytes == 1) {
-| cmp PTR, DATAEND
-| je target
-|| } else {
-| mov rcx, DATAEND
-| sub rcx, PTR
-| cmp rcx, bytes
-| jb target
-|| }
-|.endmacro
-|
-|.macro chkneob, bytes, target
-|| if (bytes == 1) {
-| cmp PTR, DATAEND
-| jne target
-|| } else {
-| mov rcx, DATAEND
-| sub rcx, PTR
-| cmp rcx, bytes
-| jae target
-|| }
-|.endmacro
-
-|.macro sethas, reg, hasbit
-|| if (hasbit >= 0) {
-| or byte [reg + ((uint32_t)hasbit / 8)], (1 << ((uint32_t)hasbit % 8))
-|| }
-|.endmacro
-|
-| // Decodes 32-bit varint into rdx, inlining 1 byte.
-|.macro dv32
-| chkeob 1, >7
-| movzx edx, byte [PTR]
-| test dl, dl
-| jns >8
-|7:
-| call ->decodev32_fallback
-|8:
-| add PTR, 1
-|.endmacro
-
-#define DECODE_EOF -3
-
-static upb_func *gethandler(const upb_handlers *h, upb_selector_t sel) {
- return h ? upb_handlers_gethandler(h, sel, NULL) : NULL;
-}
-
-/* Defines an "assembly label" for the current code generation offset.
- * This label exists *purely* for debugging purposes: it is emitted into
- * the .so, and printed as part of JIT debugging output when UPB_JIT_LOAD_SO is
- * defined.
- *
- * We would define this in the .c file except that it conditionally defines a
- * pclabel. */
-static void asmlabel(jitcompiler *jc, const char *fmt, ...) {
-#ifndef NDEBUG
- int ofs = jc->dynasm->section->ofs;
- UPB_ASSERT(ofs != jc->lastlabelofs);
- jc->lastlabelofs = ofs;
-#endif
-
-#ifndef UPB_JIT_LOAD_SO
- UPB_UNUSED(jc);
- UPB_UNUSED(fmt);
-#else
- va_list args;
- va_start(args, fmt);
- char *str = upb_vasprintf(fmt, args);
- va_end(args);
-
- int pclabel = alloc_pclabel(jc);
- /* Normally we would prefer to allocate this inline with the codegen,
- * ie.
- * |=>asmlabel(...)
- * But since we do this conditionally, only when UPB_JIT_LOAD_SO is defined,
- * we do it here instead. */
- |=>pclabel:
- upb_inttable_insert(&jc->asmlabels, pclabel, upb_value_ptr(str));
-#endif
-}
-
-/* Should only be called when the associated handler is known to exist. */
-static bool alwaysok(const upb_handlers *h, upb_selector_t sel) {
- upb_handlerattr attr = UPB_HANDLERATTR_INIT;
- bool ok = upb_handlers_getattr(h, sel, &attr);
-
- UPB_ASSERT(ok);
- return attr.alwaysok;
-}
-
-static const void *gethandlerdata(const upb_handlers *h, upb_selector_t sel) {
- upb_handlerattr attr = UPB_HANDLERATTR_INIT;
- bool ok = upb_handlers_getattr(h, sel, &attr);
-
- UPB_ASSERT(ok);
- return attr.handler_data;
-}
-
-/* Emit static assembly routines; code that does not vary based on the message
- * schema. Since it's not input-dependent, we only need one single copy of it.
- * For the moment we generate a single copy per generated handlers. Eventually
- * we should generate this code at compile time and link it into the binary so
- * we have one copy total. To do that we'll want to be sure that it is within
- * 2GB of our JIT code, so that branches between the two are near (rel32).
- *
- * We'd put this assembly in a .s file directly, but DynASM's ability to
- * calculate structure offsets automatically is too useful to pass up (it's way
- * more convenient to write DECODER->sink than [rbx + 0x96], especially since
- * the latter would have to be changed whenever the structure is updated). */
-static void emit_static_asm(jitcompiler *jc) {
- | // Trampolines for entering/exiting the JIT. These are a bit tricky to
- | // support full resuming; when we suspend we copy the JIT's portion of
- | // the call stack into the upb_pbdecoder and restore it when we resume.
- asmlabel(jc, "enterjit");
- |->enterjit:
- |1:
- | push rbp
- | push r15
- | push r14
- | push r13
- | push r12
- | push rbx
- |
- | mov rbx, ARG2_64 // Preserve JIT method.
- |
- | mov DECODER, rdi
- | callp upb_pbdecoder_resume // Same args as us; reuse regs.
- | test eax, eax
- | jns >1
- | mov DECODER->saved_rsp, rsp
- | mov rax, rbx
- | load_regs
- |
- | // Test whether we have a saved stack to resume.
- | mov ARG3_64, DECODER->call_len
- | test ARG3_64, ARG3_64
- | jnz >2
- |
- | call rax
- |
- | mov rax, DECODER->size_param
- | mov qword DECODER->call_len, 0
- |1:
- | pop rbx
- | pop r12
- | pop r13
- | pop r14
- | pop r15
- | pop rbp
- | ret
- |
- |2:
- | // Resume decoder.
- | mov ARG2_64, DECODER->callstack
- | sub rsp, ARG3_64
- | mov ARG1_64, rsp
- | callp memcpy // Restore stack.
- | ret // Return to resumed function (not ->enterjit caller).
- |
- | // Other code can call this to suspend the JIT.
- | // To the calling code, it will appear that the function returns when
- | // the JIT resumes, and more buffer space will be available.
- | // Args: eax=the value that decode() should return.
- asmlabel(jc, "exitjit");
- |->exitjit:
- | // Save the stack into DECODER->callstack.
- | mov ARG1_64, DECODER->callstack
- | mov ARG2_64, rsp
- | mov ARG3_64, DECODER->saved_rsp
- | sub ARG3_64, rsp
- | mov DECODER->call_len, ARG3_64 // Preserve len for next resume.
- | mov ebx, eax // Preserve return value across memcpy.
- | callp memcpy // Copy stack into decoder.
- | mov eax, ebx // This will be our return value.
- |
- | // Must NOT do this before the memcpy(), otherwise memcpy() will
- | // clobber the stack we are trying to save!
- | mov rsp, DECODER->saved_rsp
- | pop rbx
- | pop r12
- | pop r13
- | pop r14
- | pop r15
- | pop rbp
- | ret
- |
- | // Like suspend() in the C decoder, except that the function appears
- | // (from the caller's perspective) not to return until the decoder is
- | // resumed.
- asmlabel(jc, "suspend");
- |->suspend:
- | cmp DECODER->ptr, PTR
- | je >1
- | mov DECODER->checkpoint, PTR
- |1:
- | commit_regs
- | mov rdi, DECODER
- | callp upb_pbdecoder_suspend
- | jmp ->exitjit
- |
- asmlabel(jc, "pushlendelim");
- |->pushlendelim:
- |1:
- | mov FRAME->sink.closure, CLOSURE
- | mov DECODER->checkpoint, PTR
- | dv32
- | mov rcx, DELIMEND
- | sub rcx, PTR
- | sub rcx, rdx
- | jb >4 // Len is greater than enclosing message.
- | mov FRAME->end_ofs, rcx
- | cmp FRAME, DECODER->limit
- | je >3 // Stack overflow
- | add FRAME, sizeof(upb_pbdecoder_frame)
- | mov DELIMEND, PTR
- | add DELIMEND, rdx
- | mov dword FRAME->groupnum, 0
- | test rcx, rcx
- | jz >2
- | mov DATAEND, DECODER->end
- | cmp PTR, DELIMEND
- | ja >2
- | cmp DELIMEND, DATAEND
- | ja >2
- | mov DATAEND, DELIMEND // If DELIMEND >= PTR && DELIMEND < DATAEND
- |2:
- | ret
- |3:
- | // Stack overflow error.
- | mov PTR, DECODER->checkpoint // Rollback to before the delim len.
- | // Prepare seterr args.
- | mov ARG1_64, DECODER
- | ld64 kPbDecoderStackOverflow
- | callp upb_pbdecoder_seterr
- | call ->suspend
- | jmp <1
- |4:
- | // Overextended len.
- | mov PTR, DECODER->checkpoint // Rollback to before the delim len.
- | // Prepare seterr args.
- | mov ARG1_64, DECODER
- | ld64 kPbDecoderSubmessageTooLong
- | callp upb_pbdecoder_seterr
- | call ->suspend
- | jmp <1
- |
- | // For getting a value that spans a buffer seam. Falls back to C.
- |.macro getvalue_slow, func, bytes
- | sub rsp, 8 // Need stack space for func to write value to.
- |1:
- | mov qword [rsp], 0 // For parsing routines that only parse 32 bits.
- | mov ARG1_64, DECODER
- | mov ARG2_64, rsp
- | mov DECODER->checkpoint, PTR
- | commit_regs
- | callp func
- | load_regs
- | test eax, eax
- | jns >2
- | // Success; return parsed data (in rdx AND xmm0).
- | mov rdx, [rsp]
- | movsd xmm0, qword [rsp]
- | add rsp, 8
- | sub PTR, bytes // Bias our buffer pointer to rejoin the fast-path.
- | mov DECODER->ptr, PTR
- | ret
- |2:
- | call ->exitjit // Return eax from decode function.
- | jmp <1
- |.endmacro
- |
- asmlabel(jc, "parse_unknown");
- | // Args: edx=fieldnum, cl=wire type
- |->parse_unknown:
- | // OPT: handle directly instead of kicking to C.
- | // Check for ENDGROUP.
- | mov ARG1_64, DECODER
- | mov ARG2_32, edx
- | movzx ARG3_32, cl
- | commit_regs
- | callp upb_pbdecoder_skipunknown
- | load_regs
- | cmp eax, DECODE_ENDGROUP
- | jne >1
- | ret // Return eax=DECODE_ENDGROUP, not zero
- |1:
- | cmp eax, DECODE_OK
- | je >1
- | call ->exitjit // Return eax from decode function.
- |1:
- | xor eax, eax
- | ret
- |
- | // Fallback functions for parsing single values. These are used when the
- | // buffer doesn't contain enough remaining data for the fast path. Each
- | // primitive type (v32, v64, f32, f64) has two functions: decode & skip.
- | // Decode functions return their value in rsi/esi.
- | //
- | // These functions leave PTR = value_end - fast_path_bytes, so that we can
- | // re-join the fast path which will add fast_path_bytes after the callback
- | // completes. We also set DECODER->ptr to this value which is a signal to
- | // ->suspend that DECODER->checkpoint is up to date.
- asmlabel(jc, "skip_decode_f32_fallback");
- |->skipf32_fallback:
- |->decodef32_fallback:
- | getvalue_slow upb_pbdecoder_decode_f32, 4
- |
- asmlabel(jc, "skip_decode_f64_fallback");
- |->skipf64_fallback:
- |->decodef64_fallback:
- | getvalue_slow upb_pbdecoder_decode_f64, 8
- |
- | // Called for varint >= 1 byte.
- asmlabel(jc, "skip_decode_v32_fallback");
- |->skipv32_fallback:
- |->skipv64_fallback:
- | chkeob 16, >1
- | // With at least 16 bytes left, we can do a branch-less SSE version.
- | movdqu xmm0, [PTR]
- | pmovmskb eax, xmm0 // bits 0-15 are continuation bits, 16-31 are 0.
- | not eax
- | bsf eax, eax
- | cmp al, 10
- | jae ->decode_varint_slow // Error (>10 byte varint).
- | add PTR, rax // bsf result is 0-based, so PTR=end-1, as desired.
- | ret
- |
- |1:
- | // With fewer than 16 bytes, we have to read byte by byte.
- | lea rcx, [PTR + 10]
- | mov rax, PTR // Preserve PTR in case of fallback to slow path.
- | cmp rcx, DATAEND
- | cmova rcx, DATAEND // rcx = MIN(DATAEND, PTR + 10)
- |2:
- | cmp rax, rcx
- | je ->decode_varint_slow
- | test byte [rax], 0x80
- | jz >3
- | add rax, 1
- | jmp <2
- |3:
- | mov PTR, rax // PTR = varint_end - 1, as desired
- | ret
- |
- | // Returns tag in edx
- asmlabel(jc, "decode_unknown_tag_fallback");
- |->decode_unknown_tag_fallback:
- | sub rsp, 16
- |1:
- | cmp PTR, DELIMEND
- | jne >2
- | add rsp, 16
- | xor eax, eax
- | ret
- |2:
- | // OPT: Have a medium-fast path before falling back to _slow.
- | mov ARG1_64, DECODER
- | mov ARG2_64, rsp
- | commit_regs
- | callp upb_pbdecoder_decode_varint_slow
- | load_regs
- | cmp eax, 0
- | jge >3
- | mov edx, [rsp] // Success; return parsed data.
- | add rsp, 16
- | ret
- |3:
- | call ->exitjit // Return eax from decode function.
- | jmp <1
- |
- | // Called for varint >= 1 byte.
- asmlabel(jc, "decode_v32_v64_fallback");
- |->decodev32_fallback:
- |->decodev64_fallback:
- | chkeob 10, ->decode_varint_slow
- | // OPT: do something faster than just calling the C version.
- | mov rdi, PTR
- | callp upb_vdecode_fast
- | test rax, rax
- | je ->decode_varint_slow // Unterminated varint.
- | mov PTR, rax
- | sub PTR, 1
- | mov DECODER->ptr, PTR
- | ret
- |
- asmlabel(jc, "decode_varint_slow");
- |->decode_varint_slow:
- | // Slow path: end of buffer or error (varint length >= 10).
- | getvalue_slow upb_pbdecoder_decode_varint_slow, 1
- |
- | // Args: rsi=expected tag, return=rax (DECODE_{OK,MISMATCH})
- asmlabel(jc, "checktag_fallback");
- |->checktag_fallback:
- | sub rsp, 8
- | mov [rsp], rsi // Preserve expected tag.
- |1:
- | mov ARG1_64, DECODER
- | commit_regs
- | mov DECODER->checkpoint, PTR
- | callp upb_pbdecoder_checktag_slow
- | load_regs
- | cmp eax, 0
- | jge >2
- | add rsp, 8
- | ret
- |2:
- | call ->exitjit
- | mov rsi, [rsp]
- | cmp PTR, DELIMEND
- | jne <1
- | mov eax, DECODE_EOF
- | add rsp, 8
- | ret
- |
- | // Args: rsi=upb_inttable, rdx=key, return=rax (-1 if not found).
- | // Preserves: rcx, rdx
- | // OPT: Could write this in assembly if it's a hotspot.
- asmlabel(jc, "hashlookup");
- |->hashlookup:
- | push rcx
- | push rdx
- | sub rsp, 16
- | mov rdi, rsi
- | mov rsi, rdx
- | mov rdx, rsp
- | callp upb_inttable_lookup
- | add rsp, 16
- | pop rdx
- | pop rcx
- | test al, al
- | jz >2 // Unknown field.
- | mov rax, [rsp-32] // Value from table.
- | ret
- |2:
- | xor rax, rax
- | not rax
- | ret
-}
-
-static void jitprimitive(jitcompiler *jc, opcode op,
- const upb_handlers *h, upb_selector_t sel) {
- typedef enum { V32, V64, F32, F64, X } valtype_t;
- static valtype_t types[] = {
- X, F64, F32, V64, V64, V32, F64, F32, V64, X, X, X, X, V32, V32, F32, F64,
- V32, V64 };
- static char fastpath_bytes[] = { 1, 1, 4, 8 };
- const valtype_t vtype = types[op];
- const int fastbytes = fastpath_bytes[vtype];
- upb_func *handler = gethandler(h, sel);
- upb_fieldtype_t ftype;
- size_t offset;
- int32_t hasbit;
-
- if (handler) {
- |1:
- | chkneob fastbytes, >3
- |2:
- switch (vtype) {
- case V32:
- | call ->decodev32_fallback
- break;
- case V64:
- | call ->decodev64_fallback
- break;
- case F32:
- | call ->decodef32_fallback
- break;
- case F64:
- | call ->decodef64_fallback
- break;
- case X: break;
- }
- | jmp >4
-
- /* Fast path decode; for when check_bytes bytes are available. */
- |3:
- switch (op) {
- case OP_PARSE_SFIXED32:
- case OP_PARSE_FIXED32:
- | mov edx, dword [PTR]
- break;
- case OP_PARSE_SFIXED64:
- case OP_PARSE_FIXED64:
- | mov rdx, qword [PTR]
- break;
- case OP_PARSE_FLOAT:
- | movss xmm0, dword [PTR]
- break;
- case OP_PARSE_DOUBLE:
- | movsd xmm0, qword [PTR]
- break;
- default:
- /* Inline one byte of varint decoding. */
- | movzx edx, byte [PTR]
- | test dl, dl
- | js <2 // Fallback to slow path for >1 byte varint.
- break;
- }
-
- /* Second-stage decode; used for both fast and slow paths */
- /* (only needed for a few types). */
- |4:
- switch (op) {
- case OP_PARSE_SINT32:
- /* 32-bit zig-zag decode. */
- | mov eax, edx
- | shr edx, 1
- | and eax, 1
- | neg eax
- | xor edx, eax
- break;
- case OP_PARSE_SINT64:
- /* 64-bit zig-zag decode. */
- | mov rax, rdx
- | shr rdx, 1
- | and rax, 1
- | neg rax
- | xor rdx, rax
- break;
- case OP_PARSE_BOOL:
- | test rdx, rdx
- | setne dl
- break;
- default: break;
- }
-
- /* Call callback (or specialize if we can). */
- if (upb_msg_getscalarhandlerdata(h, sel, &ftype, &offset, &hasbit)) {
- switch (ftype) {
- case UPB_TYPE_INT64:
- case UPB_TYPE_UINT64:
- | mov [CLOSURE + offset], rdx
- break;
- case UPB_TYPE_INT32:
- case UPB_TYPE_UINT32:
- case UPB_TYPE_ENUM:
- | mov [CLOSURE + offset], edx
- break;
- case UPB_TYPE_DOUBLE:
- | movsd qword [CLOSURE + offset], XMMARG1
- break;
- case UPB_TYPE_FLOAT:
- | movss dword [CLOSURE + offset], XMMARG1
- break;
- case UPB_TYPE_BOOL:
- | mov [CLOSURE + offset], dl
- break;
- case UPB_TYPE_STRING:
- case UPB_TYPE_BYTES:
- case UPB_TYPE_MESSAGE:
- UPB_ASSERT(false); break;
- }
- | sethas CLOSURE, hasbit
- } else if (handler) {
- | mov ARG1_64, CLOSURE
- | load_handler_data h, sel
- | callp handler
- if (!alwaysok(h, sel)) {
- | test al, al
- | jnz >5
- | call ->suspend
- | jmp <1
- |5:
- }
- }
-
- /* We do this last so that the checkpoint is not advanced past the user's
- * data until the callback has returned success. */
- | add PTR, fastbytes
- } else {
- /* No handler registered for this value, just skip it. */
- | chkneob fastbytes, >3
- |2:
- switch (vtype) {
- case V32:
- | call ->skipv32_fallback
- break;
- case V64:
- | call ->skipv64_fallback
- break;
- case F32:
- | call ->skipf32_fallback
- break;
- case F64:
- | call ->skipf64_fallback
- break;
- case X: break;
- }
-
- /* Fast-path skip. */
- |3:
- if (vtype == V32 || vtype == V64) {
- | test byte [PTR], 0x80
- | jnz <2
- }
- | add PTR, fastbytes
- }
-}
-
-static void jitdispatch(jitcompiler *jc,
- const upb_pbdecodermethod *method) {
- /* Lots of room for tweaking/optimization here. */
-
- const upb_inttable *dispatch = &method->dispatch;
- bool has_hash_entries = (dispatch->t.count > 0);
-
- /* Whether any of the fields for this message can have two wire types which
- * are both valid (packed & non-packed).
- *
- * OPT: populate this more precisely; not all messages with hash entries have
- * this characteristic. */
- bool has_multi_wiretype = has_hash_entries;
-
- |=>define_jmptarget(jc, &method->dispatch):
- |1:
- /* Decode the field tag. */
- | mov aword DECODER->checkpoint, PTR
- | chkeob 2, >6
- | movzx edx, byte [PTR]
- | test dl, dl
- | jns >7 // Jump if first byte has no continuation bit.
- | movzx ecx, byte [PTR + 1]
- | test cl, cl
- | js >6 // Jump if second byte has continuation bit.
- | // Confirmed two-byte varint.
- | shl ecx, 7
- | and edx, 0x7f
- | or edx, ecx
- | add PTR, 2
- | jmp >8
- |6:
- | call ->decode_unknown_tag_fallback
- | test eax, eax // Hit DELIMEND?
- | jnz >8
- | ret
- |7:
- | add PTR, 1
- |8:
- | mov ecx, edx
- | shr edx, 3
- | and cl, 7
-
- /* See comment attached to upb_pbdecodermethod.dispatch for layout of the
- * dispatch table. */
- |2:
- | cmp edx, dispatch->array_size
- if (has_hash_entries) {
- | jae >7
- } else {
- | jae >5
- }
- | // OPT: Compact the lookup arr into 32-bit entries.
- if ((uintptr_t)dispatch->array > 0x7fffffff) {
- | mov64 rax, (uintptr_t)dispatch->array
- | mov rax, qword [rax + rdx * 8]
- } else {
- | mov rax, qword [rdx * 8 + dispatch->array]
- }
- |3:
- | // We take advantage of the fact that non-present entries are stored
- | // as -1, which will result in wire types that will never match.
- | cmp al, cl
- if (has_multi_wiretype) {
- | jne >6
- } else {
- | jne >5
- }
- | shr rax, 16
- |
- | // Load the machine code address from the table entry.
- | // The table entry is relative to the dispatch->array jmptarget
- | // (patchdispatch() took care of this) which is the same as
- | // local label "4". The "lea" is really just trying to do
- | // lea rax, [>4 + rax]
- | //
- | // But we can't write that directly for some reason, so we use
- | // rdx as a temporary.
- | lea rdx, [>4]
- |=>define_jmptarget(jc, dispatch->array):
- |4:
- | add rax, rdx
- | ret
- |
- |5:
- | // Field isn't in our table.
- |
- | // For pushing unknown fields to the unknown field handler.
- | mov64 rax, (uintptr_t)method->dest_handlers_
- | mov FRAME->sink.handlers, rax
- |
- | call ->parse_unknown
- | test eax, eax // ENDGROUP?
- | jz <1
- | lea rax, [>9] // ENDGROUP; Load address of OP_ENDMSG.
- | ret
-
- if (has_multi_wiretype) {
- |6:
- | // Primary wire type didn't match, check secondary wire type.
- | cmp ah, cl
- | jne <5
- | // Secondary wire type is a match, look up fn + UPB_MAX_FIELDNUMBER.
- | add rdx, UPB_MAX_FIELDNUMBER
- | // This key will never be in the array part, so do a hash lookup.
- UPB_ASSERT(has_hash_entries);
- | ld64 dispatch
- | jmp ->hashlookup // Tail call.
- }
-
- if (has_hash_entries) {
- |7:
- | // Hash table lookup.
- | ld64 dispatch
- | call ->hashlookup
- | jmp <3
- }
-}
-
-static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs,
- const upb_pbdecodermethod *method) {
- /* Internally we parse unknown fields; if this runs us into DELIMEND we jump
- * to the corresponding DELIMEND target (either msg end or repeated field
- * end), which we find from the OP_CHECKDELIM which must have necessarily
- * preceded us. */
- uint32_t last_instruction = *(jc->pc - 2);
- int last_arg = (int32_t)last_instruction >> 8;
- uint32_t *delimend = (jc->pc - 1) + last_arg;
- const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
-
- UPB_ASSERT((last_instruction & 0xff) == OP_CHECKDELIM);
-
- if (getop(*(jc->pc - 1)) == OP_TAGN) {
- jc->pc += ptr_words;
- }
-
- | chkneob n, >1
-
- | // OPT: this is way too much fallback code to put here.
- | // Reduce and/or move to a separate section to make better icache usage.
- | ld64 tag
- | call ->checktag_fallback
- | cmp eax, DECODE_MISMATCH
- | je >3
- | cmp eax, DECODE_EOF
- | je =>jmptarget(jc, delimend)
- | jmp >5
-
- |1:
- switch (n) {
- case 1:
- | cmp byte [PTR], tag
- break;
- case 2:
- | cmp word [PTR], tag
- break;
- case 3:
- | // OPT: Slightly more efficient code, but depends on an extra byte.
- | // mov eax, dword [PTR]
- | // shl eax, 8
- | // cmp eax, tag << 8
- | cmp word [PTR], (tag & 0xffff)
- | jne >2
- | cmp byte [PTR + 2], (tag >> 16)
- |2:
- break;
- case 4:
- | cmp dword [PTR], tag
- break;
- case 5:
- | cmp dword [PTR], (tag & 0xffffffff)
- | jne >3
- | cmp byte [PTR + 4], (tag >> 32)
- }
- | je >4
- |3:
- if (ofs == 0) {
- | call =>jmptarget(jc, &method->dispatch)
- | test rax, rax
- | jz =>jmptarget(jc, delimend)
- | jmp rax
- } else {
- | jmp =>jmptarget(jc, jc->pc + ofs)
- }
- |4:
- | add PTR, n
- |5:
-}
-
-/* Compile the bytecode to x64. */
-static void jitbytecode(jitcompiler *jc) {
- upb_pbdecodermethod *method = NULL;
- const upb_handlers *h = NULL;
- for (jc->pc = jc->group->bytecode; jc->pc < jc->group->bytecode_end; ) {
- int32_t instr = *jc->pc;
- opcode op = instr & 0xff;
- uint32_t arg = instr >> 8;
- int32_t longofs = arg;
-
- if (op != OP_SETDISPATCH) {
- /* Skipped for SETDISPATCH because it defines its own asmlabel for the
- * dispatch code it emits. */
- asmlabel(jc, "0x%lx.%s", pcofs(jc), upb_pbdecoder_getopname(op));
-
- /* Skipped for SETDISPATCH because it should point at the function
- * prologue, not the dispatch function that is emitted first.
- * TODO: optimize this to only define pclabels that are actually used. */
- |=>define_jmptarget(jc, jc->pc):
- }
-
- jc->pc++;
-
- switch (op) {
- case OP_STARTMSG: {
- upb_func *startmsg = gethandler(h, UPB_STARTMSG_SELECTOR);
- if (startmsg) {
- /* bool startmsg(void *closure, const void *hd) */
- |1:
- | mov ARG1_64, CLOSURE
- | load_handler_data h, UPB_STARTMSG_SELECTOR
- | callp startmsg
- if (!alwaysok(h, UPB_STARTMSG_SELECTOR)) {
- | test al, al
- | jnz >2
- | call ->suspend
- | jmp <1
- |2:
- }
- } else {
- | nop
- }
- break;
- }
- case OP_ENDMSG: {
- upb_func *endmsg = gethandler(h, UPB_ENDMSG_SELECTOR);
- |9:
- if (endmsg) {
- /* bool endmsg(void *closure, const void *hd, upb_status *status) */
- | mov ARG1_64, CLOSURE
- | load_handler_data h, UPB_ENDMSG_SELECTOR
- | mov ARG3_64, DECODER->status
- | callp endmsg
- }
- break;
- }
- case OP_SETDISPATCH: {
- uint32_t *op_pc = jc->pc - 1;
- const char *msgname;
- upb_inttable *dispatch;
-
- /* Load info for new method. */
- memcpy(&dispatch, jc->pc, sizeof(void*));
- jc->pc += sizeof(void*) / sizeof(uint32_t);
- /* The OP_SETDISPATCH bytecode contains a pointer that is
- * &method->dispatch; we want to go backwards and recover method. */
- method =
- (void*)((char*)dispatch - offsetof(upb_pbdecodermethod, dispatch));
- /* May be NULL, in which case no handlers for this message will be found.
- * OPT: we should do better by completely skipping the message in this
- * case instead of parsing it field by field. We should also do the skip
- * in the containing message's code. */
- h = method->dest_handlers_;
- msgname = upb_msgdef_fullname(upb_handlers_msgdef(h));
-
- /* Emit dispatch code for new method. */
- asmlabel(jc, "0x%lx.dispatch.%s", pcofs(jc), msgname);
- jitdispatch(jc, method);
-
- /* Emit function prologue for new method. */
- asmlabel(jc, "0x%lx.parse.%s", pcofs(jc), msgname);
- |=>define_jmptarget(jc, op_pc):
- |=>define_jmptarget(jc, method):
- | sub rsp, 8
-
- break;
- }
- case OP_PARSE_DOUBLE:
- case OP_PARSE_FLOAT:
- case OP_PARSE_INT64:
- case OP_PARSE_UINT64:
- case OP_PARSE_INT32:
- case OP_PARSE_FIXED64:
- case OP_PARSE_FIXED32:
- case OP_PARSE_BOOL:
- case OP_PARSE_UINT32:
- case OP_PARSE_SFIXED32:
- case OP_PARSE_SFIXED64:
- case OP_PARSE_SINT32:
- case OP_PARSE_SINT64:
- jitprimitive(jc, op, h, arg);
- break;
- case OP_STARTSEQ:
- case OP_STARTSUBMSG:
- case OP_STARTSTR: {
- upb_func *start = gethandler(h, arg);
- if (start) {
- /* void *startseq(void *closure, const void *hd)
- * void *startsubmsg(void *closure, const void *hd)
- * void *startstr(void *closure, const void *hd, size_t size_hint) */
- |1:
- | mov ARG1_64, CLOSURE
- | load_handler_data h, arg
- if (op == OP_STARTSTR) {
- | mov ARG3_64, DELIMEND
- | sub ARG3_64, PTR
- }
- | callp start
- if (!alwaysok(h, arg)) {
- | test rax, rax
- | jnz >2
- | call ->suspend
- | jmp <1
- |2:
- }
- | mov CLOSURE, rax
- } else {
- /* TODO: nop is only required because of asmlabel(). */
- | nop
- }
- break;
- }
- case OP_ENDSEQ:
- case OP_ENDSUBMSG:
- case OP_ENDSTR: {
- upb_func *end = gethandler(h, arg);
- if (end) {
- /* bool endseq(void *closure, const void *hd)
- * bool endsubmsg(void *closure, const void *hd)
- * bool endstr(void *closure, const void *hd) */
- |1:
- | mov ARG1_64, CLOSURE
- | load_handler_data h, arg
- | callp end
- if (!alwaysok(h, arg)) {
- | test al, al
- | jnz >2
- | call ->suspend
- | jmp <1
- |2:
- }
- } else {
- /* TODO: nop is only required because of asmlabel(). */
- | nop
- }
- break;
- }
- case OP_STRING: {
- upb_func *str = gethandler(h, arg);
- | cmp PTR, DELIMEND
- | je >4
- |1:
- | cmp PTR, DATAEND
- | jne >2
- | call ->suspend
- | jmp <1
- |2:
- if (str) {
- /* size_t str(void *closure, const void *hd, const char *str,
- * size_t n) */
- | mov ARG1_64, CLOSURE
- | load_handler_data h, arg
- | mov ARG3_64, PTR
- | mov ARG4_64, DATAEND
- | sub ARG4_64, PTR
- | mov ARG5_64, qword DECODER->handle
- | callp str
- | add PTR, rax
- if (!alwaysok(h, arg)) {
- | cmp PTR, DATAEND
- | je >3
- | call ->strret_fallback
- |3:
- }
- } else {
- | mov PTR, DATAEND
- }
- | cmp PTR, DELIMEND
- | jne <1
- |4:
- break;
- }
- case OP_PUSHTAGDELIM:
- | mov FRAME->sink.closure, CLOSURE
- | // This shouldn't need to be read, because tag-delimited fields
- | // shouldn't have an OP_SETDELIM after them. But for the moment
- | // non-packed repeated fields do OP_SETDELIM so they can share more
- | // code with the packed code-path. If this is changed later, this
- | // store can be removed.
- | mov qword FRAME->end_ofs, 0
- | cmp FRAME, DECODER->limit
- | je ->err
- | add FRAME, sizeof(upb_pbdecoder_frame)
- | mov dword FRAME->groupnum, arg
- break;
- case OP_PUSHLENDELIM:
- | call ->pushlendelim
- break;
- case OP_POP:
- | sub FRAME, sizeof(upb_pbdecoder_frame)
- | mov CLOSURE, FRAME->sink.closure
- break;
- case OP_SETDELIM:
- /* OPT: experiment with testing vs old offset to optimize away. */
- | mov DATAEND, DECODER->end
- | add DELIMEND, FRAME->end_ofs
- | cmp DELIMEND, DECODER->buf
- | jb >1
- | cmp DELIMEND, DATAEND
- | ja >1 // OPT: try cmov.
- | mov DATAEND, DELIMEND
- |1:
- break;
- case OP_SETBIGGROUPNUM:
- | mov dword FRAME->groupnum, *jc->pc++
- break;
- case OP_CHECKDELIM:
- | cmp DELIMEND, PTR
- | je =>jmptarget(jc, jc->pc + longofs)
- break;
- case OP_CALL:
- | call =>jmptarget(jc, jc->pc + longofs)
- break;
- case OP_BRANCH:
- | jmp =>jmptarget(jc, jc->pc + longofs);
- break;
- case OP_RET:
- |9:
- | add rsp, 8
- | ret
- break;
- case OP_TAG1:
- jittag(jc, (arg >> 8) & 0xff, 1, (int8_t)arg, method);
- break;
- case OP_TAG2:
- jittag(jc, (arg >> 8) & 0xffff, 2, (int8_t)arg, method);
- break;
- case OP_TAGN: {
- uint64_t tag;
- memcpy(&tag, jc->pc, 8);
- jittag(jc, tag, arg >> 8, (int8_t)arg, method);
- break;
- }
- case OP_DISPATCH:
- | call =>jmptarget(jc, &method->dispatch)
- break;
- case OP_HALT:
- UPB_ASSERT(false);
- }
- }
-
- asmlabel(jc, "eof");
- | nop
-}
diff --git a/upb/pb/compile_decoder_x64.h b/upb/pb/compile_decoder_x64.h
deleted file mode 100644
index 4a4dffc..0000000
--- a/upb/pb/compile_decoder_x64.h
+++ /dev/null
@@ -1,1737 +0,0 @@
-/*
-** This file has been pre-processed with DynASM.
-** http://luajit.org/dynasm.html
-** DynASM version 1.3.0, DynASM x64 version 1.3.0
-** DO NOT EDIT! The original file is in "upb/pb/compile_decoder_x64.dasc".
-*/
-
-#if DASM_VERSION != 10300
-#error "Version mismatch between DynASM and included encoding engine"
-#endif
-
-# 1 "upb/pb/compile_decoder_x64.dasc"
-/*|// */
-/*|// upb - a minimalist implementation of protocol buffers. */
-/*|// */
-/*|// Copyright (c) 2011-2013 Google Inc. See LICENSE for details. */
-/*|// Author: Josh Haberman <jhaberman@gmail.com> */
-/*|// */
-/*|// JIT compiler for upb_pbdecoder on x86-64. Generates machine code from the */
-/*|// bytecode generated in compile_decoder.c. */
-/*| */
-/*|.arch x64 */
-/*|.actionlist upb_jit_actionlist */
-static const unsigned char upb_jit_actionlist[2467] = {
- 249,255,248,10,248,1,85,65,87,65,86,65,85,65,84,83,72,137,252,243,73,137,
- 252,255,72,184,237,237,65,84,73,137,228,72,129,228,239,252,255,208,76,137,
- 228,65,92,133,192,15,137,244,247,73,137,167,233,72,137,216,77,139,183,233,
- 73,139,159,233,77,139,167,233,77,139,174,233,73,139,174,233,73,43,175,233,
- 73,3,175,233,73,139,151,233,72,133,210,15,133,244,248,252,255,208,73,139,
- 135,233,73,199,135,233,0,0,0,0,248,1,255,91,65,92,65,93,65,94,65,95,93,195,
- 248,2,73,139,183,233,72,41,212,72,137,231,72,184,237,237,65,84,73,137,228,
- 72,129,228,239,252,255,208,76,137,228,65,92,195,255,248,11,73,139,191,233,
- 72,137,230,73,139,151,233,72,41,226,73,137,151,233,137,195,72,184,237,237,
- 65,84,73,137,228,72,129,228,239,252,255,208,76,137,228,65,92,137,216,73,139,
- 167,233,91,65,92,65,93,65,94,65,95,93,195,255,248,12,73,57,159,233,15,132,
- 244,247,73,137,159,233,248,1,77,137,183,233,73,137,159,233,77,137,167,233,
- 73,137,175,233,73,43,175,233,73,3,175,233,73,137,174,233,77,137,174,233,76,
- 137,252,255,72,184,237,237,65,84,73,137,228,72,129,228,239,252,255,208,76,
- 137,228,65,92,252,233,244,11,255,248,13,248,1,77,137,174,233,73,137,159,233,
- 255,76,57,227,15,132,244,253,255,76,137,225,72,41,217,72,131,252,249,1,15,
- 130,244,253,255,15,182,19,132,210,15,137,244,254,248,7,232,244,14,248,8,72,
- 131,195,1,72,137,252,233,72,41,217,72,41,209,15,130,244,250,73,137,142,233,
- 77,59,183,233,15,132,244,249,73,129,198,239,72,137,221,72,1,213,65,199,134,
- 233,0,0,0,0,72,133,201,15,132,244,248,77,139,167,233,72,57,252,235,15,135,
- 244,248,76,57,229,15,135,244,248,255,73,137,252,236,248,2,195,248,3,73,139,
- 159,233,76,137,252,255,255,72,190,237,237,255,190,237,255,49,252,246,255,
- 72,184,237,237,65,84,73,137,228,72,129,228,239,252,255,208,76,137,228,65,
- 92,232,244,12,252,233,244,1,248,4,73,139,159,233,76,137,252,255,255,72,184,
- 237,237,65,84,73,137,228,72,129,228,239,252,255,208,76,137,228,65,92,232,
- 244,12,252,233,244,1,255,248,15,76,137,252,255,137,214,15,182,209,77,137,
- 183,233,73,137,159,233,77,137,167,233,73,137,175,233,73,43,175,233,73,3,175,
- 233,73,137,174,233,77,137,174,233,72,184,237,237,65,84,73,137,228,72,129,
- 228,239,252,255,208,76,137,228,65,92,77,139,183,233,73,139,159,233,77,139,
- 167,233,77,139,174,233,73,139,174,233,73,43,175,233,73,3,175,233,129,252,
- 248,239,255,15,133,244,247,195,248,1,129,252,248,239,15,132,244,247,232,244,
- 11,248,1,49,192,195,255,248,16,248,17,72,131,252,236,8,248,1,72,199,4,36,
- 0,0,0,0,76,137,252,255,72,137,230,73,137,159,233,77,137,183,233,73,137,159,
- 233,77,137,167,233,73,137,175,233,73,43,175,233,73,3,175,233,73,137,174,233,
- 77,137,174,233,72,184,237,237,65,84,73,137,228,72,129,228,239,252,255,208,
- 76,137,228,65,92,77,139,183,233,73,139,159,233,77,139,167,233,77,139,174,
- 233,73,139,174,233,255,73,43,175,233,73,3,175,233,133,192,15,137,244,248,
- 72,139,20,36,252,242,15,16,4,36,72,131,196,8,72,131,252,235,4,73,137,159,
- 233,195,248,2,232,244,11,252,233,244,1,255,248,18,248,19,72,131,252,236,8,
- 248,1,72,199,4,36,0,0,0,0,76,137,252,255,72,137,230,73,137,159,233,77,137,
- 183,233,73,137,159,233,77,137,167,233,73,137,175,233,73,43,175,233,73,3,175,
- 233,73,137,174,233,77,137,174,233,72,184,237,237,65,84,73,137,228,72,129,
- 228,239,252,255,208,76,137,228,65,92,77,139,183,233,73,139,159,233,77,139,
- 167,233,77,139,174,233,73,139,174,233,255,73,43,175,233,73,3,175,233,133,
- 192,15,137,244,248,72,139,20,36,252,242,15,16,4,36,72,131,196,8,72,131,252,
- 235,8,73,137,159,233,195,248,2,232,244,11,252,233,244,1,255,248,20,248,21,
- 255,76,57,227,15,132,244,247,255,76,137,225,72,41,217,72,131,252,249,16,15,
- 130,244,247,255,252,243,15,111,3,102,15,215,192,252,247,208,15,188,192,60,
- 10,15,131,244,22,72,1,195,195,248,1,72,141,139,233,72,137,216,76,57,225,73,
- 15,71,204,248,2,72,57,200,15,132,244,22,252,246,0,128,15,132,244,249,72,131,
- 192,1,252,233,244,2,248,3,72,137,195,195,255,248,23,72,131,252,236,16,248,
- 1,72,57,252,235,15,133,244,248,72,131,196,16,49,192,195,248,2,76,137,252,
- 255,72,137,230,77,137,183,233,73,137,159,233,77,137,167,233,73,137,175,233,
- 73,43,175,233,73,3,175,233,73,137,174,233,77,137,174,233,72,184,237,237,65,
- 84,73,137,228,72,129,228,239,252,255,208,76,137,228,65,92,77,139,183,233,
- 73,139,159,233,77,139,167,233,77,139,174,233,255,73,139,174,233,73,43,175,
- 233,73,3,175,233,131,252,248,0,15,141,244,249,139,20,36,72,131,196,16,195,
- 248,3,232,244,11,252,233,244,1,255,248,14,248,24,255,76,57,227,15,132,244,
- 22,255,76,137,225,72,41,217,72,131,252,249,10,15,130,244,22,255,72,137,223,
- 72,184,237,237,65,84,73,137,228,72,129,228,239,252,255,208,76,137,228,65,
- 92,72,133,192,15,132,244,22,72,137,195,72,131,252,235,1,73,137,159,233,195,
- 255,248,22,72,131,252,236,8,248,1,72,199,4,36,0,0,0,0,76,137,252,255,72,137,
- 230,73,137,159,233,77,137,183,233,73,137,159,233,77,137,167,233,73,137,175,
- 233,73,43,175,233,73,3,175,233,73,137,174,233,77,137,174,233,72,184,237,237,
- 65,84,73,137,228,72,129,228,239,252,255,208,76,137,228,65,92,77,139,183,233,
- 73,139,159,233,77,139,167,233,77,139,174,233,73,139,174,233,73,43,175,233,
- 255,73,3,175,233,133,192,15,137,244,248,72,139,20,36,252,242,15,16,4,36,72,
- 131,196,8,72,131,252,235,1,73,137,159,233,195,248,2,232,244,11,252,233,244,
- 1,255,248,25,72,131,252,236,8,72,137,52,36,248,1,76,137,252,255,77,137,183,
- 233,73,137,159,233,77,137,167,233,73,137,175,233,73,43,175,233,73,3,175,233,
- 73,137,174,233,77,137,174,233,73,137,159,233,72,184,237,237,65,84,73,137,
- 228,72,129,228,239,252,255,208,76,137,228,65,92,77,139,183,233,73,139,159,
- 233,77,139,167,233,77,139,174,233,73,139,174,233,73,43,175,233,255,73,3,175,
- 233,131,252,248,0,15,141,244,248,72,131,196,8,195,248,2,232,244,11,72,139,
- 52,36,72,57,252,235,15,133,244,1,184,237,72,131,196,8,195,255,248,26,81,82,
- 72,131,252,236,16,72,137,252,247,72,137,214,72,137,226,72,184,237,237,65,
- 84,73,137,228,72,129,228,239,252,255,208,76,137,228,65,92,72,131,196,16,90,
- 89,132,192,15,132,244,248,72,139,68,36,224,195,248,2,72,49,192,72,252,247,
- 208,195,255,76,57,227,15,133,244,249,255,76,137,225,72,41,217,72,129,252,
- 249,239,15,131,244,249,255,248,2,255,232,244,14,255,232,244,24,255,232,244,
- 17,255,232,244,19,255,252,233,244,250,255,248,3,255,139,19,255,72,139,19,
- 255,252,243,15,16,3,255,252,242,15,16,3,255,15,182,19,132,210,15,136,244,
- 2,255,248,4,255,137,208,209,252,234,131,224,1,252,247,216,49,194,255,72,137,
- 208,72,209,252,234,72,131,224,1,72,252,247,216,72,49,194,255,72,133,210,15,
- 149,210,255,73,137,149,233,255,65,137,149,233,255,252,242,65,15,17,133,233,
- 255,252,243,65,15,17,133,233,255,65,136,149,233,255,65,128,141,233,235,255,
- 76,137,252,239,255,72,184,237,237,65,84,73,137,228,72,129,228,239,252,255,
- 208,76,137,228,65,92,255,132,192,15,133,244,251,232,244,12,252,233,244,1,
- 248,5,255,72,129,195,239,255,232,244,20,255,232,244,21,255,232,244,16,255,
- 232,244,18,255,252,246,3,128,15,133,244,2,255,249,248,1,255,76,57,227,15,
- 132,244,252,255,76,137,225,72,41,217,72,131,252,249,2,15,130,244,252,255,
- 15,182,19,132,210,15,137,244,253,15,182,139,233,132,201,15,136,244,252,193,
- 225,7,131,226,127,9,202,72,131,195,2,252,233,244,254,248,6,232,244,23,133,
- 192,15,133,244,254,195,248,7,72,131,195,1,248,8,137,209,193,252,234,3,128,
- 225,7,255,248,2,129,252,250,239,255,15,131,244,253,255,15,131,244,251,255,
- 72,184,237,237,72,139,4,208,255,72,139,4,213,237,255,248,3,56,200,255,15,
- 133,244,252,255,15,133,244,251,255,72,193,232,16,72,141,21,244,250,249,248,
- 4,72,1,208,195,248,5,72,184,237,237,73,137,134,233,232,244,15,133,192,15,
- 132,244,1,72,141,5,244,255,195,255,248,6,56,204,15,133,244,5,72,129,194,239,
- 255,252,233,244,26,255,248,7,255,232,244,26,252,233,244,3,255,76,57,227,15,
- 133,244,247,255,76,137,225,72,41,217,72,129,252,249,239,15,131,244,247,255,
- 232,244,25,129,252,248,239,15,132,244,249,129,252,248,239,15,132,245,252,
- 233,244,251,255,128,59,235,255,102,129,59,238,255,102,129,59,238,15,133,244,
- 248,128,187,233,235,248,2,255,129,59,239,255,129,59,239,15,133,244,249,128,
- 187,233,235,255,15,132,244,250,248,3,255,232,245,72,133,192,15,132,245,252,
- 255,224,255,252,233,245,255,248,4,72,129,195,239,248,5,255,248,1,76,137,252,
- 239,255,132,192,15,133,244,248,232,244,12,252,233,244,1,248,2,255,144,255,
- 248,9,255,73,139,151,233,72,184,237,237,65,84,73,137,228,72,129,228,239,252,
- 255,208,76,137,228,65,92,255,249,249,72,131,252,236,8,255,72,137,252,234,
- 72,41,218,255,72,133,192,15,133,244,248,232,244,12,252,233,244,1,248,2,255,
- 73,137,197,255,72,57,252,235,15,132,244,250,248,1,76,57,227,15,133,244,248,
- 232,244,12,252,233,244,1,248,2,255,72,137,218,76,137,225,72,41,217,77,139,
- 135,233,72,184,237,237,65,84,73,137,228,72,129,228,239,252,255,208,76,137,
- 228,65,92,72,1,195,255,76,57,227,15,132,244,249,232,244,27,248,3,255,76,137,
- 227,255,72,57,252,235,15,133,244,1,248,4,255,77,137,174,233,73,199,134,233,
- 0,0,0,0,77,59,183,233,15,132,244,28,73,129,198,239,65,199,134,233,237,255,
- 232,244,13,255,73,129,252,238,239,77,139,174,233,255,77,139,167,233,73,3,
- 174,233,73,59,175,233,15,130,244,247,76,57,229,15,135,244,247,73,137,252,
- 236,248,1,255,72,57,221,15,132,245,255,232,245,255,248,9,72,131,196,8,195,
- 255
-};
-
-# 12 "upb/pb/compile_decoder_x64.dasc"
-/*|.globals UPB_JIT_GLOBAL_ */
-enum {
- UPB_JIT_GLOBAL_enterjit,
- UPB_JIT_GLOBAL_exitjit,
- UPB_JIT_GLOBAL_suspend,
- UPB_JIT_GLOBAL_pushlendelim,
- UPB_JIT_GLOBAL_decodev32_fallback,
- UPB_JIT_GLOBAL_parse_unknown,
- UPB_JIT_GLOBAL_skipf32_fallback,
- UPB_JIT_GLOBAL_decodef32_fallback,
- UPB_JIT_GLOBAL_skipf64_fallback,
- UPB_JIT_GLOBAL_decodef64_fallback,
- UPB_JIT_GLOBAL_skipv32_fallback,
- UPB_JIT_GLOBAL_skipv64_fallback,
- UPB_JIT_GLOBAL_decode_varint_slow,
- UPB_JIT_GLOBAL_decode_unknown_tag_fallback,
- UPB_JIT_GLOBAL_decodev64_fallback,
- UPB_JIT_GLOBAL_checktag_fallback,
- UPB_JIT_GLOBAL_hashlookup,
- UPB_JIT_GLOBAL_strret_fallback,
- UPB_JIT_GLOBAL_err,
- UPB_JIT_GLOBAL__MAX
-};
-# 13 "upb/pb/compile_decoder_x64.dasc"
-/*|.globalnames upb_jit_globalnames */
-static const char *const upb_jit_globalnames[] = {
- "enterjit",
- "exitjit",
- "suspend",
- "pushlendelim",
- "decodev32_fallback",
- "parse_unknown",
- "skipf32_fallback",
- "decodef32_fallback",
- "skipf64_fallback",
- "decodef64_fallback",
- "skipv32_fallback",
- "skipv64_fallback",
- "decode_varint_slow",
- "decode_unknown_tag_fallback",
- "decodev64_fallback",
- "checktag_fallback",
- "hashlookup",
- "strret_fallback",
- "err",
- (const char *)0
-};
-# 14 "upb/pb/compile_decoder_x64.dasc"
-/*| */
-/*|// Calling conventions. Note -- this will need to be changed for */
-/*|// Windows, which uses a different calling convention! */
-/*|.define ARG1_64, rdi */
-/*|.define ARG2_8, r6b // DynASM's equivalent to "sil" -- low byte of esi. */
-/*|.define ARG2_32, esi */
-/*|.define ARG2_64, rsi */
-/*|.define ARG3_8, dl */
-/*|.define ARG3_32, edx */
-/*|.define ARG3_64, rdx */
-/*|.define ARG4_64, rcx */
-/*|.define ARG5_64, r8 */
-/*|.define XMMARG1, xmm0 */
-/*| */
-/*|// Register allocation / type map. */
-/*|// ALL of the code in this file uses these register allocations. */
-/*|// When we "call" within this file, we do not use regular calling */
-/*|// conventions, but of course when calling to user callbacks we must. */
-/*|.define PTR, rbx // DECODER->ptr (unsynced) */
-/*|.define DATAEND, r12 // DECODER->data_end (unsynced) */
-/*|.define CLOSURE, r13 // FRAME->closure (unsynced) */
-/*|.type FRAME, upb_pbdecoder_frame, r14 // DECODER->top (unsynced) */
-#define Dt1(_V) (int)(ptrdiff_t)&(((upb_pbdecoder_frame *)0)_V)
-# 36 "upb/pb/compile_decoder_x64.dasc"
-/*|.type DECODER, upb_pbdecoder, r15 // DECODER (immutable) */
-#define Dt2(_V) (int)(ptrdiff_t)&(((upb_pbdecoder *)0)_V)
-# 37 "upb/pb/compile_decoder_x64.dasc"
-/*|.define DELIMEND, rbp */
-/*| */
-/*| // Spills unsynced registers back to memory. */
-/*|.macro commit_regs */
-/*| mov DECODER->top, FRAME */
-/*| mov DECODER->ptr, PTR */
-/*| mov DECODER->data_end, DATAEND */
-/*| // We don't guarantee that delim_end is NULL when out of range like the */
-/*| // interpreter does. */
-/*| mov DECODER->delim_end, DELIMEND */
-/*| sub DELIMEND, DECODER->buf */
-/*| add DELIMEND, DECODER->bufstart_ofs */
-/*| mov FRAME->end_ofs, DELIMEND */
-/*| mov FRAME->sink.closure, CLOSURE */
-/*|.endmacro */
-/*| */
-/*| // Loads unsynced registers from memory back into registers. */
-/*|.macro load_regs */
-/*| mov FRAME, DECODER->top */
-/*| mov PTR, DECODER->ptr */
-/*| mov DATAEND, DECODER->data_end */
-/*| mov CLOSURE, FRAME->sink.closure */
-/*| mov DELIMEND, FRAME->end_ofs */
-/*| sub DELIMEND, DECODER->bufstart_ofs */
-/*| add DELIMEND, DECODER->buf */
-/*|.endmacro */
-/*| */
-/*| // Calls an external C function at address "addr". */
-/*|.macro callp, addr */
-/*| mov64 rax, (uintptr_t)addr */
-/*| */
-/*| // Stack must be 16-byte aligned (x86-64 ABI requires this). */
-/*| // */
-/*| // OPT: possibly remove this by statically ensuring correct alignment. */
-/*| // */
-/*| // OPT: use "call rel32" where possible. */
-/*| push r12 */
-/*| mov r12, rsp */
-/*| and rsp, 0xfffffffffffffff0UL // Align stack. */
-/*| call rax */
-/*| mov rsp, r12 */
-/*| pop r12 */
-/*|.endmacro */
-/*| */
-/*|.macro ld64, val */
-/*|| { */
-/*|| uintptr_t v = (uintptr_t)val; */
-/*|| if (v > 0xffffffff) { */
-/*| mov64 ARG2_64, v */
-/*|| } else if (v) { */
-/*| mov ARG2_32, v */
-/*|| } else { */
-/*| xor ARG2_32, ARG2_32 */
-/*|| } */
-/*|| } */
-/*|.endmacro */
-/*| */
-/*|.macro load_handler_data, h, arg */
-/*| ld64 gethandlerdata(h, arg) */
-/*|.endmacro */
-/*| */
-/*|.macro chkeob, bytes, target */
-/*|| if (bytes == 1) { */
-/*| cmp PTR, DATAEND */
-/*| je target */
-/*|| } else { */
-/*| mov rcx, DATAEND */
-/*| sub rcx, PTR */
-/*| cmp rcx, bytes */
-/*| jb target */
-/*|| } */
-/*|.endmacro */
-/*| */
-/*|.macro chkneob, bytes, target */
-/*|| if (bytes == 1) { */
-/*| cmp PTR, DATAEND */
-/*| jne target */
-/*|| } else { */
-/*| mov rcx, DATAEND */
-/*| sub rcx, PTR */
-/*| cmp rcx, bytes */
-/*| jae target */
-/*|| } */
-/*|.endmacro */
-
-/*|.macro sethas, reg, hasbit */
-/*|| if (hasbit >= 0) { */
-/*| or byte [reg + ((uint32_t)hasbit / 8)], (1 << ((uint32_t)hasbit % 8)) */
-/*|| } */
-/*|.endmacro */
-/*| */
-/*| // Decodes 32-bit varint into rdx, inlining 1 byte. */
-/*|.macro dv32 */
-/*| chkeob 1, >7 */
-/*| movzx edx, byte [PTR] */
-/*| test dl, dl */
-/*| jns >8 */
-/*|7: */
-/*| call ->decodev32_fallback */
-/*|8: */
-/*| add PTR, 1 */
-/*|.endmacro */
-
-#define DECODE_EOF -3
-
-static upb_func *gethandler(const upb_handlers *h, upb_selector_t sel) {
- return h ? upb_handlers_gethandler(h, sel, NULL) : NULL;
-}
-
-/* Defines an "assembly label" for the current code generation offset.
- * This label exists *purely* for debugging purposes: it is emitted into
- * the .so, and printed as part of JIT debugging output when UPB_JIT_LOAD_SO is
- * defined.
- *
- * We would define this in the .c file except that it conditionally defines a
- * pclabel. */
-static void asmlabel(jitcompiler *jc, const char *fmt, ...) {
-#ifndef NDEBUG
- int ofs = jc->dynasm->section->ofs;
- UPB_ASSERT(ofs != jc->lastlabelofs);
- jc->lastlabelofs = ofs;
-#endif
-
-#ifndef UPB_JIT_LOAD_SO
- UPB_UNUSED(jc);
- UPB_UNUSED(fmt);
-#else
- va_list args;
- va_start(args, fmt);
- char *str = upb_vasprintf(fmt, args);
- va_end(args);
-
- int pclabel = alloc_pclabel(jc);
- /* Normally we would prefer to allocate this inline with the codegen,
- * ie.
- * |=>asmlabel(...)
- * But since we do this conditionally, only when UPB_JIT_LOAD_SO is defined,
- * we do it here instead. */
- /*|=>pclabel: */
- dasm_put(Dst, 0, pclabel);
-# 176 "upb/pb/compile_decoder_x64.dasc"
- upb_inttable_insert(&jc->asmlabels, pclabel, upb_value_ptr(str));
-#endif
-}
-
-/* Should only be called when the associated handler is known to exist. */
-static bool alwaysok(const upb_handlers *h, upb_selector_t sel) {
- upb_handlerattr attr = UPB_HANDLERATTR_INIT;
- bool ok = upb_handlers_getattr(h, sel, &attr);
-
- UPB_ASSERT(ok);
- return attr.alwaysok;
-}
-
-static const void *gethandlerdata(const upb_handlers *h, upb_selector_t sel) {
- upb_handlerattr attr = UPB_HANDLERATTR_INIT;
- bool ok = upb_handlers_getattr(h, sel, &attr);
-
- UPB_ASSERT(ok);
- return attr.handler_data;
-}
-
-/* Emit static assembly routines; code that does not vary based on the message
- * schema. Since it's not input-dependent, we only need one single copy of it.
- * For the moment we generate a single copy per generated handlers. Eventually
- * we should generate this code at compile time and link it into the binary so
- * we have one copy total. To do that we'll want to be sure that it is within
- * 2GB of our JIT code, so that branches between the two are near (rel32).
- *
- * We'd put this assembly in a .s file directly, but DynASM's ability to
- * calculate structure offsets automatically is too useful to pass up (it's way
- * more convenient to write DECODER->sink than [rbx + 0x96], especially since
- * the latter would have to be changed whenever the structure is updated). */
-static void emit_static_asm(jitcompiler *jc) {
- /*| // Trampolines for entering/exiting the JIT. These are a bit tricky to */
- /*| // support full resuming; when we suspend we copy the JIT's portion of */
- /*| // the call stack into the upb_pbdecoder and restore it when we resume. */
- asmlabel(jc, "enterjit");
- /*|->enterjit: */
- /*|1: */
- /*| push rbp */
- /*| push r15 */
- /*| push r14 */
- /*| push r13 */
- /*| push r12 */
- /*| push rbx */
- /*| */
- /*| mov rbx, ARG2_64 // Preserve JIT method. */
- /*| */
- /*| mov DECODER, rdi */
- /*| callp upb_pbdecoder_resume // Same args as us; reuse regs. */
- /*| test eax, eax */
- /*| jns >1 */
- /*| mov DECODER->saved_rsp, rsp */
- /*| mov rax, rbx */
- /*| load_regs */
- /*| */
- /*| // Test whether we have a saved stack to resume. */
- /*| mov ARG3_64, DECODER->call_len */
- /*| test ARG3_64, ARG3_64 */
- /*| jnz >2 */
- /*| */
- /*| call rax */
- /*| */
- /*| mov rax, DECODER->size_param */
- /*| mov qword DECODER->call_len, 0 */
- /*|1: */
- /*| pop rbx */
- dasm_put(Dst, 2, (unsigned int)((uintptr_t)upb_pbdecoder_resume), (unsigned int)(((uintptr_t)upb_pbdecoder_resume)>>32), 0xfffffffffffffff0UL, Dt2(->saved_rsp), Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt1(->sink.closure), Dt1(->end_ofs), Dt2(->bufstart_ofs), Dt2(->buf), Dt2(->call_len), Dt2(->size_param), Dt2(->call_len));
-# 243 "upb/pb/compile_decoder_x64.dasc"
- /*| pop r12 */
- /*| pop r13 */
- /*| pop r14 */
- /*| pop r15 */
- /*| pop rbp */
- /*| ret */
- /*| */
- /*|2: */
- /*| // Resume decoder. */
- /*| mov ARG2_64, DECODER->callstack */
- /*| sub rsp, ARG3_64 */
- /*| mov ARG1_64, rsp */
- /*| callp memcpy // Restore stack. */
- /*| ret // Return to resumed function (not ->enterjit caller). */
- /*| */
- /*| // Other code can call this to suspend the JIT. */
- /*| // To the calling code, it will appear that the function returns when */
- /*| // the JIT resumes, and more buffer space will be available. */
- /*| // Args: eax=the value that decode() should return. */
- dasm_put(Dst, 115, Dt2(->callstack), (unsigned int)((uintptr_t)memcpy), (unsigned int)(((uintptr_t)memcpy)>>32), 0xfffffffffffffff0UL);
-# 262 "upb/pb/compile_decoder_x64.dasc"
- asmlabel(jc, "exitjit");
- /*|->exitjit: */
- /*| // Save the stack into DECODER->callstack. */
- /*| mov ARG1_64, DECODER->callstack */
- /*| mov ARG2_64, rsp */
- /*| mov ARG3_64, DECODER->saved_rsp */
- /*| sub ARG3_64, rsp */
- /*| mov DECODER->call_len, ARG3_64 // Preserve len for next resume. */
- /*| mov ebx, eax // Preserve return value across memcpy. */
- /*| callp memcpy // Copy stack into decoder. */
- /*| mov eax, ebx // This will be our return value. */
- /*| */
- /*| // Must NOT do this before the memcpy(), otherwise memcpy() will */
- /*| // clobber the stack we are trying to save! */
- /*| mov rsp, DECODER->saved_rsp */
- /*| pop rbx */
- /*| pop r12 */
- /*| pop r13 */
- /*| pop r14 */
- /*| pop r15 */
- /*| pop rbp */
- /*| ret */
- /*| */
- /*| // Like suspend() in the C decoder, except that the function appears */
- /*| // (from the caller's perspective) not to return until the decoder is */
- /*| // resumed. */
- dasm_put(Dst, 161, Dt2(->callstack), Dt2(->saved_rsp), Dt2(->call_len), (unsigned int)((uintptr_t)memcpy), (unsigned int)(((uintptr_t)memcpy)>>32), 0xfffffffffffffff0UL, Dt2(->saved_rsp));
-# 288 "upb/pb/compile_decoder_x64.dasc"
- asmlabel(jc, "suspend");
- /*|->suspend: */
- /*| cmp DECODER->ptr, PTR */
- /*| je >1 */
- /*| mov DECODER->checkpoint, PTR */
- /*|1: */
- /*| commit_regs */
- /*| mov rdi, DECODER */
- /*| callp upb_pbdecoder_suspend */
- /*| jmp ->exitjit */
- /*| */
- dasm_put(Dst, 222, Dt2(->ptr), Dt2(->checkpoint), Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt2(->delim_end), Dt2(->buf), Dt2(->bufstart_ofs), Dt1(->end_ofs), Dt1(->sink.closure), (unsigned int)((uintptr_t)upb_pbdecoder_suspend), (unsigned int)(((uintptr_t)upb_pbdecoder_suspend)>>32), 0xfffffffffffffff0UL);
-# 299 "upb/pb/compile_decoder_x64.dasc"
- asmlabel(jc, "pushlendelim");
- /*|->pushlendelim: */
- /*|1: */
- /*| mov FRAME->sink.closure, CLOSURE */
- /*| mov DECODER->checkpoint, PTR */
- /*| dv32 */
- dasm_put(Dst, 300, Dt1(->sink.closure), Dt2(->checkpoint));
- if (1 == 1) {
- dasm_put(Dst, 313);
- } else {
- dasm_put(Dst, 321);
- }
-# 305 "upb/pb/compile_decoder_x64.dasc"
- /*| mov rcx, DELIMEND */
- /*| sub rcx, PTR */
- /*| sub rcx, rdx */
- /*| jb >4 // Len is greater than enclosing message. */
- /*| mov FRAME->end_ofs, rcx */
- /*| cmp FRAME, DECODER->limit */
- /*| je >3 // Stack overflow */
- /*| add FRAME, sizeof(upb_pbdecoder_frame) */
- /*| mov DELIMEND, PTR */
- /*| add DELIMEND, rdx */
- /*| mov dword FRAME->groupnum, 0 */
- /*| test rcx, rcx */
- /*| jz >2 */
- /*| mov DATAEND, DECODER->end */
- /*| cmp PTR, DELIMEND */
- /*| ja >2 */
- /*| cmp DELIMEND, DATAEND */
- /*| ja >2 */
- /*| mov DATAEND, DELIMEND // If DELIMEND >= PTR && DELIMEND < DATAEND */
- dasm_put(Dst, 337, Dt1(->end_ofs), Dt2(->limit), sizeof(upb_pbdecoder_frame), Dt1(->groupnum), Dt2(->end));
-# 324 "upb/pb/compile_decoder_x64.dasc"
- /*|2: */
- /*| ret */
- /*|3: */
- /*| // Stack overflow error. */
- /*| mov PTR, DECODER->checkpoint // Rollback to before the delim len. */
- /*| // Prepare seterr args. */
- /*| mov ARG1_64, DECODER */
- /*| ld64 kPbDecoderStackOverflow */
- dasm_put(Dst, 428, Dt2(->checkpoint));
- {
- uintptr_t v = (uintptr_t)kPbDecoderStackOverflow;
- if (v > 0xffffffff) {
- dasm_put(Dst, 446, (unsigned int)(v), (unsigned int)((v)>>32));
- } else if (v) {
- dasm_put(Dst, 451, v);
- } else {
- dasm_put(Dst, 454);
- }
- }
-# 332 "upb/pb/compile_decoder_x64.dasc"
- /*| callp upb_pbdecoder_seterr */
- /*| call ->suspend */
- /*| jmp <1 */
- /*|4: */
- /*| // Overextended len. */
- /*| mov PTR, DECODER->checkpoint // Rollback to before the delim len. */
- /*| // Prepare seterr args. */
- /*| mov ARG1_64, DECODER */
- /*| ld64 kPbDecoderSubmessageTooLong */
- dasm_put(Dst, 458, (unsigned int)((uintptr_t)upb_pbdecoder_seterr), (unsigned int)(((uintptr_t)upb_pbdecoder_seterr)>>32), 0xfffffffffffffff0UL, Dt2(->checkpoint));
- {
- uintptr_t v = (uintptr_t)kPbDecoderSubmessageTooLong;
- if (v > 0xffffffff) {
- dasm_put(Dst, 446, (unsigned int)(v), (unsigned int)((v)>>32));
- } else if (v) {
- dasm_put(Dst, 451, v);
- } else {
- dasm_put(Dst, 454);
- }
- }
-# 341 "upb/pb/compile_decoder_x64.dasc"
- /*| callp upb_pbdecoder_seterr */
- /*| call ->suspend */
- /*| jmp <1 */
- /*| */
- /*| // For getting a value that spans a buffer seam. Falls back to C. */
- /*|.macro getvalue_slow, func, bytes */
- /*| sub rsp, 8 // Need stack space for func to write value to. */
- /*|1: */
- /*| mov qword [rsp], 0 // For parsing routines that only parse 32 bits. */
- /*| mov ARG1_64, DECODER */
- /*| mov ARG2_64, rsp */
- /*| mov DECODER->checkpoint, PTR */
- /*| commit_regs */
- /*| callp func */
- /*| load_regs */
- /*| test eax, eax */
- /*| jns >2 */
- /*| // Success; return parsed data (in rdx AND xmm0). */
- /*| mov rdx, [rsp] */
- /*| movsd xmm0, qword [rsp] */
- /*| add rsp, 8 */
- /*| sub PTR, bytes // Bias our buffer pointer to rejoin the fast-path. */
- /*| mov DECODER->ptr, PTR */
- /*| ret */
- /*|2: */
- /*| call ->exitjit // Return eax from decode function. */
- /*| jmp <1 */
- /*|.endmacro */
- /*| */
- dasm_put(Dst, 497, (unsigned int)((uintptr_t)upb_pbdecoder_seterr), (unsigned int)(((uintptr_t)upb_pbdecoder_seterr)>>32), 0xfffffffffffffff0UL);
-# 370 "upb/pb/compile_decoder_x64.dasc"
- asmlabel(jc, "parse_unknown");
- /*| // Args: edx=fieldnum, cl=wire type */
- /*|->parse_unknown: */
- /*| // OPT: handle directly instead of kicking to C. */
- /*| // Check for ENDGROUP. */
- /*| mov ARG1_64, DECODER */
- /*| mov ARG2_32, edx */
- /*| movzx ARG3_32, cl */
- /*| commit_regs */
- /*| callp upb_pbdecoder_skipunknown */
- /*| load_regs */
- /*| cmp eax, DECODE_ENDGROUP */
- /*| jne >1 */
- dasm_put(Dst, 526, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt2(->delim_end), Dt2(->buf), Dt2(->bufstart_ofs), Dt1(->end_ofs), Dt1(->sink.closure), (unsigned int)((uintptr_t)upb_pbdecoder_skipunknown), (unsigned int)(((uintptr_t)upb_pbdecoder_skipunknown)>>32), 0xfffffffffffffff0UL, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt1(->sink.closure), Dt1(->end_ofs), Dt2(->bufstart_ofs), Dt2(->buf), DECODE_ENDGROUP);
-# 383 "upb/pb/compile_decoder_x64.dasc"
- /*| ret // Return eax=DECODE_ENDGROUP, not zero */
- /*|1: */
- /*| cmp eax, DECODE_OK */
- /*| je >1 */
- /*| call ->exitjit // Return eax from decode function. */
- /*|1: */
- /*| xor eax, eax */
- /*| ret */
- /*| */
- /*| // Fallback functions for parsing single values. These are used when the */
- /*| // buffer doesn't contain enough remaining data for the fast path. Each */
- /*| // primitive type (v32, v64, f32, f64) has two functions: decode & skip. */
- /*| // Decode functions return their value in rsi/esi. */
- /*| // */
- /*| // These functions leave PTR = value_end - fast_path_bytes, so that we can */
- /*| // re-join the fast path which will add fast_path_bytes after the callback */
- /*| // completes. We also set DECODER->ptr to this value which is a signal to */
- /*| // ->suspend that DECODER->checkpoint is up to date. */
- dasm_put(Dst, 623, DECODE_OK);
-# 401 "upb/pb/compile_decoder_x64.dasc"
- asmlabel(jc, "skip_decode_f32_fallback");
- /*|->skipf32_fallback: */
- /*|->decodef32_fallback: */
- /*| getvalue_slow upb_pbdecoder_decode_f32, 4 */
- dasm_put(Dst, 647, Dt2(->checkpoint), Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt2(->delim_end), Dt2(->buf), Dt2(->bufstart_ofs), Dt1(->end_ofs), Dt1(->sink.closure), (unsigned int)((uintptr_t)upb_pbdecoder_decode_f32), (unsigned int)(((uintptr_t)upb_pbdecoder_decode_f32)>>32), 0xfffffffffffffff0UL, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt1(->sink.closure), Dt1(->end_ofs));
-# 405 "upb/pb/compile_decoder_x64.dasc"
- /*| */
- dasm_put(Dst, 751, Dt2(->bufstart_ofs), Dt2(->buf), Dt2(->ptr));
-# 406 "upb/pb/compile_decoder_x64.dasc"
- asmlabel(jc, "skip_decode_f64_fallback");
- /*|->skipf64_fallback: */
- /*|->decodef64_fallback: */
- /*| getvalue_slow upb_pbdecoder_decode_f64, 8 */
- dasm_put(Dst, 799, Dt2(->checkpoint), Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt2(->delim_end), Dt2(->buf), Dt2(->bufstart_ofs), Dt1(->end_ofs), Dt1(->sink.closure), (unsigned int)((uintptr_t)upb_pbdecoder_decode_f64), (unsigned int)(((uintptr_t)upb_pbdecoder_decode_f64)>>32), 0xfffffffffffffff0UL, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt1(->sink.closure), Dt1(->end_ofs));
-# 410 "upb/pb/compile_decoder_x64.dasc"
- /*| */
- /*| // Called for varint >= 1 byte. */
- dasm_put(Dst, 903, Dt2(->bufstart_ofs), Dt2(->buf), Dt2(->ptr));
-# 412 "upb/pb/compile_decoder_x64.dasc"
- asmlabel(jc, "skip_decode_v32_fallback");
- /*|->skipv32_fallback: */
- /*|->skipv64_fallback: */
- /*| chkeob 16, >1 */
- dasm_put(Dst, 951);
- if (16 == 1) {
- dasm_put(Dst, 956);
- } else {
- dasm_put(Dst, 964);
- }
-# 416 "upb/pb/compile_decoder_x64.dasc"
- /*| // With at least 16 bytes left, we can do a branch-less SSE version. */
- /*| movdqu xmm0, [PTR] */
- /*| pmovmskb eax, xmm0 // bits 0-15 are continuation bits, 16-31 are 0. */
- /*| not eax */
- /*| bsf eax, eax */
- /*| cmp al, 10 */
- /*| jae ->decode_varint_slow // Error (>10 byte varint). */
- /*| add PTR, rax // bsf result is 0-based, so PTR=end-1, as desired. */
- /*| ret */
- /*| */
- /*|1: */
- /*| // With fewer than 16 bytes, we have to read byte by byte. */
- /*| lea rcx, [PTR + 10] */
- /*| mov rax, PTR // Preserve PTR in case of fallback to slow path. */
- /*| cmp rcx, DATAEND */
- /*| cmova rcx, DATAEND // rcx = MIN(DATAEND, PTR + 10) */
- /*|2: */
- /*| cmp rax, rcx */
- /*| je ->decode_varint_slow */
- /*| test byte [rax], 0x80 */
- /*| jz >3 */
- /*| add rax, 1 */
- /*| jmp <2 */
- /*|3: */
- /*| mov PTR, rax // PTR = varint_end - 1, as desired */
- /*| ret */
- /*| */
- /*| // Returns tag in edx */
- dasm_put(Dst, 980, 10);
-# 444 "upb/pb/compile_decoder_x64.dasc"
- asmlabel(jc, "decode_unknown_tag_fallback");
- /*|->decode_unknown_tag_fallback: */
- /*| sub rsp, 16 */
- /*|1: */
- /*| cmp PTR, DELIMEND */
- /*| jne >2 */
- /*| add rsp, 16 */
- /*| xor eax, eax */
- /*| ret */
- /*|2: */
- /*| // OPT: Have a medium-fast path before falling back to _slow. */
- /*| mov ARG1_64, DECODER */
- /*| mov ARG2_64, rsp */
- /*| commit_regs */
- /*| callp upb_pbdecoder_decode_varint_slow */
- /*| load_regs */
- dasm_put(Dst, 1053, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt2(->delim_end), Dt2(->buf), Dt2(->bufstart_ofs), Dt1(->end_ofs), Dt1(->sink.closure), (unsigned int)((uintptr_t)upb_pbdecoder_decode_varint_slow), (unsigned int)(((uintptr_t)upb_pbdecoder_decode_varint_slow)>>32), 0xfffffffffffffff0UL, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt1(->sink.closure));
-# 460 "upb/pb/compile_decoder_x64.dasc"
- /*| cmp eax, 0 */
- /*| jge >3 */
- /*| mov edx, [rsp] // Success; return parsed data. */
- /*| add rsp, 16 */
- /*| ret */
- /*|3: */
- /*| call ->exitjit // Return eax from decode function. */
- /*| jmp <1 */
- /*| */
- /*| // Called for varint >= 1 byte. */
- dasm_put(Dst, 1156, Dt1(->end_ofs), Dt2(->bufstart_ofs), Dt2(->buf));
-# 470 "upb/pb/compile_decoder_x64.dasc"
- asmlabel(jc, "decode_v32_v64_fallback");
- /*|->decodev32_fallback: */
- /*|->decodev64_fallback: */
- /*| chkeob 10, ->decode_varint_slow */
- dasm_put(Dst, 1194);
- if (10 == 1) {
- dasm_put(Dst, 1199);
- } else {
- dasm_put(Dst, 1207);
- }
-# 474 "upb/pb/compile_decoder_x64.dasc"
- /*| // OPT: do something faster than just calling the C version. */
- /*| mov rdi, PTR */
- /*| callp upb_vdecode_fast */
- /*| test rax, rax */
- /*| je ->decode_varint_slow // Unterminated varint. */
- /*| mov PTR, rax */
- /*| sub PTR, 1 */
- /*| mov DECODER->ptr, PTR */
- /*| ret */
- /*| */
- dasm_put(Dst, 1223, (unsigned int)((uintptr_t)upb_vdecode_fast), (unsigned int)(((uintptr_t)upb_vdecode_fast)>>32), 0xfffffffffffffff0UL, Dt2(->ptr));
-# 484 "upb/pb/compile_decoder_x64.dasc"
- asmlabel(jc, "decode_varint_slow");
- /*|->decode_varint_slow: */
- /*| // Slow path: end of buffer or error (varint length >= 10). */
- /*| getvalue_slow upb_pbdecoder_decode_varint_slow, 1 */
- dasm_put(Dst, 1268, Dt2(->checkpoint), Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt2(->delim_end), Dt2(->buf), Dt2(->bufstart_ofs), Dt1(->end_ofs), Dt1(->sink.closure), (unsigned int)((uintptr_t)upb_pbdecoder_decode_varint_slow), (unsigned int)(((uintptr_t)upb_pbdecoder_decode_varint_slow)>>32), 0xfffffffffffffff0UL, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt1(->sink.closure), Dt1(->end_ofs), Dt2(->bufstart_ofs));
-# 488 "upb/pb/compile_decoder_x64.dasc"
- /*| */
- /*| // Args: rsi=expected tag, return=rax (DECODE_{OK,MISMATCH}) */
- dasm_put(Dst, 1374, Dt2(->buf), Dt2(->ptr));
-# 490 "upb/pb/compile_decoder_x64.dasc"
- asmlabel(jc, "checktag_fallback");
- /*|->checktag_fallback: */
- /*| sub rsp, 8 */
- /*| mov [rsp], rsi // Preserve expected tag. */
- /*|1: */
- /*| mov ARG1_64, DECODER */
- /*| commit_regs */
- /*| mov DECODER->checkpoint, PTR */
- /*| callp upb_pbdecoder_checktag_slow */
- /*| load_regs */
- dasm_put(Dst, 1418, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt2(->delim_end), Dt2(->buf), Dt2(->bufstart_ofs), Dt1(->end_ofs), Dt1(->sink.closure), Dt2(->checkpoint), (unsigned int)((uintptr_t)upb_pbdecoder_checktag_slow), (unsigned int)(((uintptr_t)upb_pbdecoder_checktag_slow)>>32), 0xfffffffffffffff0UL, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt1(->sink.closure), Dt1(->end_ofs), Dt2(->bufstart_ofs));
-# 500 "upb/pb/compile_decoder_x64.dasc"
- /*| cmp eax, 0 */
- /*| jge >2 */
- /*| add rsp, 8 */
- /*| ret */
- /*|2: */
- /*| call ->exitjit */
- /*| mov rsi, [rsp] */
- /*| cmp PTR, DELIMEND */
- /*| jne <1 */
- /*| mov eax, DECODE_EOF */
- /*| add rsp, 8 */
- /*| ret */
- /*| */
- /*| // Args: rsi=upb_inttable, rdx=key, return=rax (-1 if not found). */
- /*| // Preserves: rcx, rdx */
- /*| // OPT: Could write this in assembly if it's a hotspot. */
- dasm_put(Dst, 1517, Dt2(->buf), DECODE_EOF);
-# 516 "upb/pb/compile_decoder_x64.dasc"
- asmlabel(jc, "hashlookup");
- /*|->hashlookup: */
- /*| push rcx */
- /*| push rdx */
- /*| sub rsp, 16 */
- /*| mov rdi, rsi */
- /*| mov rsi, rdx */
- /*| mov rdx, rsp */
- /*| callp upb_inttable_lookup */
- /*| add rsp, 16 */
- /*| pop rdx */
- /*| pop rcx */
- /*| test al, al */
- /*| jz >2 // Unknown field. */
- /*| mov rax, [rsp-32] // Value from table. */
- /*| ret */
- /*|2: */
- /*| xor rax, rax */
- /*| not rax */
- /*| ret */
- dasm_put(Dst, 1559, (unsigned int)((uintptr_t)upb_inttable_lookup), (unsigned int)(((uintptr_t)upb_inttable_lookup)>>32), 0xfffffffffffffff0UL);
-# 536 "upb/pb/compile_decoder_x64.dasc"
-}
-
-static void jitprimitive(jitcompiler *jc, opcode op,
- const upb_handlers *h, upb_selector_t sel) {
- typedef enum { V32, V64, F32, F64, X } valtype_t;
- static valtype_t types[] = {
- X, F64, F32, V64, V64, V32, F64, F32, V64, X, X, X, X, V32, V32, F32, F64,
- V32, V64 };
- static char fastpath_bytes[] = { 1, 1, 4, 8 };
- const valtype_t vtype = types[op];
- const int fastbytes = fastpath_bytes[vtype];
- upb_func *handler = gethandler(h, sel);
- upb_fieldtype_t ftype;
- size_t offset;
- int32_t hasbit;
-
- if (handler) {
- /*|1: */
- /*| chkneob fastbytes, >3 */
- dasm_put(Dst, 112);
- if (fastbytes == 1) {
- dasm_put(Dst, 1628);
- } else {
- dasm_put(Dst, 1636, fastbytes);
- }
-# 555 "upb/pb/compile_decoder_x64.dasc"
- /*|2: */
- dasm_put(Dst, 1652);
-# 556 "upb/pb/compile_decoder_x64.dasc"
- switch (vtype) {
- case V32:
- /*| call ->decodev32_fallback */
- dasm_put(Dst, 1655);
-# 559 "upb/pb/compile_decoder_x64.dasc"
- break;
- case V64:
- /*| call ->decodev64_fallback */
- dasm_put(Dst, 1659);
-# 562 "upb/pb/compile_decoder_x64.dasc"
- break;
- case F32:
- /*| call ->decodef32_fallback */
- dasm_put(Dst, 1663);
-# 565 "upb/pb/compile_decoder_x64.dasc"
- break;
- case F64:
- /*| call ->decodef64_fallback */
- dasm_put(Dst, 1667);
-# 568 "upb/pb/compile_decoder_x64.dasc"
- break;
- case X: break;
- }
- /*| jmp >4 */
- dasm_put(Dst, 1671);
-# 572 "upb/pb/compile_decoder_x64.dasc"
-
- /* Fast path decode; for when check_bytes bytes are available. */
- /*|3: */
- dasm_put(Dst, 1676);
-# 575 "upb/pb/compile_decoder_x64.dasc"
- switch (op) {
- case OP_PARSE_SFIXED32:
- case OP_PARSE_FIXED32:
- /*| mov edx, dword [PTR] */
- dasm_put(Dst, 1679);
-# 579 "upb/pb/compile_decoder_x64.dasc"
- break;
- case OP_PARSE_SFIXED64:
- case OP_PARSE_FIXED64:
- /*| mov rdx, qword [PTR] */
- dasm_put(Dst, 1682);
-# 583 "upb/pb/compile_decoder_x64.dasc"
- break;
- case OP_PARSE_FLOAT:
- /*| movss xmm0, dword [PTR] */
- dasm_put(Dst, 1686);
-# 586 "upb/pb/compile_decoder_x64.dasc"
- break;
- case OP_PARSE_DOUBLE:
- /*| movsd xmm0, qword [PTR] */
- dasm_put(Dst, 1692);
-# 589 "upb/pb/compile_decoder_x64.dasc"
- break;
- default:
- /* Inline one byte of varint decoding. */
- /*| movzx edx, byte [PTR] */
- /*| test dl, dl */
- /*| js <2 // Fallback to slow path for >1 byte varint. */
- dasm_put(Dst, 1698);
-# 595 "upb/pb/compile_decoder_x64.dasc"
- break;
- }
-
- /* Second-stage decode; used for both fast and slow paths */
- /* (only needed for a few types). */
- /*|4: */
- dasm_put(Dst, 1708);
-# 601 "upb/pb/compile_decoder_x64.dasc"
- switch (op) {
- case OP_PARSE_SINT32:
- /* 32-bit zig-zag decode. */
- /*| mov eax, edx */
- /*| shr edx, 1 */
- /*| and eax, 1 */
- /*| neg eax */
- /*| xor edx, eax */
- dasm_put(Dst, 1711);
-# 609 "upb/pb/compile_decoder_x64.dasc"
- break;
- case OP_PARSE_SINT64:
- /* 64-bit zig-zag decode. */
- /*| mov rax, rdx */
- /*| shr rdx, 1 */
- /*| and rax, 1 */
- /*| neg rax */
- /*| xor rdx, rax */
- dasm_put(Dst, 1725);
-# 617 "upb/pb/compile_decoder_x64.dasc"
- break;
- case OP_PARSE_BOOL:
- /*| test rdx, rdx */
- /*| setne dl */
- dasm_put(Dst, 1744);
-# 621 "upb/pb/compile_decoder_x64.dasc"
- break;
- default: break;
- }
-
- /* Call callback (or specialize if we can). */
- if (upb_msg_getscalarhandlerdata(h, sel, &ftype, &offset, &hasbit)) {
- switch (ftype) {
- case UPB_TYPE_INT64:
- case UPB_TYPE_UINT64:
- /*| mov [CLOSURE + offset], rdx */
- dasm_put(Dst, 1751, offset);
-# 631 "upb/pb/compile_decoder_x64.dasc"
- break;
- case UPB_TYPE_INT32:
- case UPB_TYPE_UINT32:
- case UPB_TYPE_ENUM:
- /*| mov [CLOSURE + offset], edx */
- dasm_put(Dst, 1756, offset);
-# 636 "upb/pb/compile_decoder_x64.dasc"
- break;
- case UPB_TYPE_DOUBLE:
- /*| movsd qword [CLOSURE + offset], XMMARG1 */
- dasm_put(Dst, 1761, offset);
-# 639 "upb/pb/compile_decoder_x64.dasc"
- break;
- case UPB_TYPE_FLOAT:
- /*| movss dword [CLOSURE + offset], XMMARG1 */
- dasm_put(Dst, 1769, offset);
-# 642 "upb/pb/compile_decoder_x64.dasc"
- break;
- case UPB_TYPE_BOOL:
- /*| mov [CLOSURE + offset], dl */
- dasm_put(Dst, 1777, offset);
-# 645 "upb/pb/compile_decoder_x64.dasc"
- break;
- case UPB_TYPE_STRING:
- case UPB_TYPE_BYTES:
- case UPB_TYPE_MESSAGE:
- UPB_ASSERT(false); break;
- }
- /*| sethas CLOSURE, hasbit */
- if (hasbit >= 0) {
- dasm_put(Dst, 1782, ((uint32_t)hasbit / 8), (1 << ((uint32_t)hasbit % 8)));
- }
-# 652 "upb/pb/compile_decoder_x64.dasc"
- } else if (handler) {
- /*| mov ARG1_64, CLOSURE */
- /*| load_handler_data h, sel */
- dasm_put(Dst, 1788);
- {
- uintptr_t v = (uintptr_t)gethandlerdata(h, sel);
- if (v > 0xffffffff) {
- dasm_put(Dst, 446, (unsigned int)(v), (unsigned int)((v)>>32));
- } else if (v) {
- dasm_put(Dst, 451, v);
- } else {
- dasm_put(Dst, 454);
- }
- }
-# 655 "upb/pb/compile_decoder_x64.dasc"
- /*| callp handler */
- dasm_put(Dst, 1793, (unsigned int)((uintptr_t)handler), (unsigned int)(((uintptr_t)handler)>>32), 0xfffffffffffffff0UL);
-# 656 "upb/pb/compile_decoder_x64.dasc"
- if (!alwaysok(h, sel)) {
- /*| test al, al */
- /*| jnz >5 */
- /*| call ->suspend */
- /*| jmp <1 */
- /*|5: */
- dasm_put(Dst, 1815);
-# 662 "upb/pb/compile_decoder_x64.dasc"
- }
- }
-
- /* We do this last so that the checkpoint is not advanced past the user's
- * data until the callback has returned success. */
- /*| add PTR, fastbytes */
- dasm_put(Dst, 1831, fastbytes);
-# 668 "upb/pb/compile_decoder_x64.dasc"
- } else {
- /* No handler registered for this value, just skip it. */
- /*| chkneob fastbytes, >3 */
- if (fastbytes == 1) {
- dasm_put(Dst, 1628);
- } else {
- dasm_put(Dst, 1636, fastbytes);
- }
-# 671 "upb/pb/compile_decoder_x64.dasc"
- /*|2: */
- dasm_put(Dst, 1652);
-# 672 "upb/pb/compile_decoder_x64.dasc"
- switch (vtype) {
- case V32:
- /*| call ->skipv32_fallback */
- dasm_put(Dst, 1836);
-# 675 "upb/pb/compile_decoder_x64.dasc"
- break;
- case V64:
- /*| call ->skipv64_fallback */
- dasm_put(Dst, 1840);
-# 678 "upb/pb/compile_decoder_x64.dasc"
- break;
- case F32:
- /*| call ->skipf32_fallback */
- dasm_put(Dst, 1844);
-# 681 "upb/pb/compile_decoder_x64.dasc"
- break;
- case F64:
- /*| call ->skipf64_fallback */
- dasm_put(Dst, 1848);
-# 684 "upb/pb/compile_decoder_x64.dasc"
- break;
- case X: break;
- }
-
- /* Fast-path skip. */
- /*|3: */
- dasm_put(Dst, 1676);
-# 690 "upb/pb/compile_decoder_x64.dasc"
- if (vtype == V32 || vtype == V64) {
- /*| test byte [PTR], 0x80 */
- /*| jnz <2 */
- dasm_put(Dst, 1852);
-# 693 "upb/pb/compile_decoder_x64.dasc"
- }
- /*| add PTR, fastbytes */
- dasm_put(Dst, 1831, fastbytes);
-# 695 "upb/pb/compile_decoder_x64.dasc"
- }
-}
-
-static void jitdispatch(jitcompiler *jc,
- const upb_pbdecodermethod *method) {
- /* Lots of room for tweaking/optimization here. */
-
- const upb_inttable *dispatch = &method->dispatch;
- bool has_hash_entries = (dispatch->t.count > 0);
-
- /* Whether any of the fields for this message can have two wire types which
- * are both valid (packed & non-packed).
- *
- * OPT: populate this more precisely; not all messages with hash entries have
- * this characteristic. */
- bool has_multi_wiretype = has_hash_entries;
-
- /*|=>define_jmptarget(jc, &method->dispatch): */
- /*|1: */
- dasm_put(Dst, 1861, define_jmptarget(jc, &method->dispatch));
-# 714 "upb/pb/compile_decoder_x64.dasc"
- /* Decode the field tag. */
- /*| mov aword DECODER->checkpoint, PTR */
- /*| chkeob 2, >6 */
- dasm_put(Dst, 308, Dt2(->checkpoint));
- if (2 == 1) {
- dasm_put(Dst, 1865);
- } else {
- dasm_put(Dst, 1873);
- }
-# 717 "upb/pb/compile_decoder_x64.dasc"
- /*| movzx edx, byte [PTR] */
- /*| test dl, dl */
- /*| jns >7 // Jump if first byte has no continuation bit. */
- /*| movzx ecx, byte [PTR + 1] */
- /*| test cl, cl */
- /*| js >6 // Jump if second byte has continuation bit. */
- /*| // Confirmed two-byte varint. */
- /*| shl ecx, 7 */
- /*| and edx, 0x7f */
- /*| or edx, ecx */
- /*| add PTR, 2 */
- /*| jmp >8 */
- /*|6: */
- /*| call ->decode_unknown_tag_fallback */
- /*| test eax, eax // Hit DELIMEND? */
- /*| jnz >8 */
- /*| ret */
- /*|7: */
- /*| add PTR, 1 */
- /*|8: */
- /*| mov ecx, edx */
- /*| shr edx, 3 */
- /*| and cl, 7 */
- dasm_put(Dst, 1889, 1);
-# 740 "upb/pb/compile_decoder_x64.dasc"
-
- /* See comment attached to upb_pbdecodermethod.dispatch for layout of the
- * dispatch table. */
- /*|2: */
- /*| cmp edx, dispatch->array_size */
- dasm_put(Dst, 1954, dispatch->array_size);
-# 745 "upb/pb/compile_decoder_x64.dasc"
- if (has_hash_entries) {
- /*| jae >7 */
- dasm_put(Dst, 1961);
-# 747 "upb/pb/compile_decoder_x64.dasc"
- } else {
- /*| jae >5 */
- dasm_put(Dst, 1966);
-# 749 "upb/pb/compile_decoder_x64.dasc"
- }
- /*| // OPT: Compact the lookup arr into 32-bit entries. */
- if ((uintptr_t)dispatch->array > 0x7fffffff) {
- /*| mov64 rax, (uintptr_t)dispatch->array */
- /*| mov rax, qword [rax + rdx * 8] */
- dasm_put(Dst, 1971, (unsigned int)((uintptr_t)dispatch->array), (unsigned int)(((uintptr_t)dispatch->array)>>32));
-# 754 "upb/pb/compile_decoder_x64.dasc"
- } else {
- /*| mov rax, qword [rdx * 8 + dispatch->array] */
- dasm_put(Dst, 1980, dispatch->array);
-# 756 "upb/pb/compile_decoder_x64.dasc"
- }
- /*|3: */
- /*| // We take advantage of the fact that non-present entries are stored */
- /*| // as -1, which will result in wire types that will never match. */
- /*| cmp al, cl */
- dasm_put(Dst, 1986);
-# 761 "upb/pb/compile_decoder_x64.dasc"
- if (has_multi_wiretype) {
- /*| jne >6 */
- dasm_put(Dst, 1991);
-# 763 "upb/pb/compile_decoder_x64.dasc"
- } else {
- /*| jne >5 */
- dasm_put(Dst, 1996);
-# 765 "upb/pb/compile_decoder_x64.dasc"
- }
- /*| shr rax, 16 */
- /*| */
- /*| // Load the machine code address from the table entry. */
- /*| // The table entry is relative to the dispatch->array jmptarget */
- /*| // (patchdispatch() took care of this) which is the same as */
- /*| // local label "4". The "lea" is really just trying to do */
- /*| // lea rax, [>4 + rax] */
- /*| // */
- /*| // But we can't write that directly for some reason, so we use */
- /*| // rdx as a temporary. */
- /*| lea rdx, [>4] */
- /*|=>define_jmptarget(jc, dispatch->array): */
- /*|4: */
- /*| add rax, rdx */
- /*| ret */
- /*| */
- /*|5: */
- /*| // Field isn't in our table. */
- /*| */
- /*| // For pushing unknown fields to the unknown field handler. */
- /*| mov64 rax, (uintptr_t)method->dest_handlers_ */
- /*| mov FRAME->sink.handlers, rax */
- /*| */
- /*| call ->parse_unknown */
- /*| test eax, eax // ENDGROUP? */
- /*| jz <1 */
- /*| lea rax, [>9] // ENDGROUP; Load address of OP_ENDMSG. */
- /*| ret */
- dasm_put(Dst, 2001, define_jmptarget(jc, dispatch->array), (unsigned int)((uintptr_t)method->dest_handlers_), (unsigned int)(((uintptr_t)method->dest_handlers_)>>32), Dt1(->sink.handlers));
-# 794 "upb/pb/compile_decoder_x64.dasc"
-
- if (has_multi_wiretype) {
- /*|6: */
- /*| // Primary wire type didn't match, check secondary wire type. */
- /*| cmp ah, cl */
- /*| jne <5 */
- /*| // Secondary wire type is a match, look up fn + UPB_MAX_FIELDNUMBER. */
- /*| add rdx, UPB_MAX_FIELDNUMBER */
- /*| // This key will never be in the array part, so do a hash lookup. */
- dasm_put(Dst, 2043, UPB_MAX_FIELDNUMBER);
-# 803 "upb/pb/compile_decoder_x64.dasc"
- UPB_ASSERT(has_hash_entries);
- /*| ld64 dispatch */
- {
- uintptr_t v = (uintptr_t)dispatch;
- if (v > 0xffffffff) {
- dasm_put(Dst, 446, (unsigned int)(v), (unsigned int)((v)>>32));
- } else if (v) {
- dasm_put(Dst, 451, v);
- } else {
- dasm_put(Dst, 454);
- }
- }
-# 805 "upb/pb/compile_decoder_x64.dasc"
- /*| jmp ->hashlookup // Tail call. */
- dasm_put(Dst, 2056);
-# 806 "upb/pb/compile_decoder_x64.dasc"
- }
-
- if (has_hash_entries) {
- /*|7: */
- /*| // Hash table lookup. */
- /*| ld64 dispatch */
- dasm_put(Dst, 2061);
- {
- uintptr_t v = (uintptr_t)dispatch;
- if (v > 0xffffffff) {
- dasm_put(Dst, 446, (unsigned int)(v), (unsigned int)((v)>>32));
- } else if (v) {
- dasm_put(Dst, 451, v);
- } else {
- dasm_put(Dst, 454);
- }
- }
-# 812 "upb/pb/compile_decoder_x64.dasc"
- /*| call ->hashlookup */
- /*| jmp <3 */
- dasm_put(Dst, 2064);
-# 814 "upb/pb/compile_decoder_x64.dasc"
- }
-}
-
-static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs,
- const upb_pbdecodermethod *method) {
- /* Internally we parse unknown fields; if this runs us into DELIMEND we jump
- * to the corresponding DELIMEND target (either msg end or repeated field
- * end), which we find from the OP_CHECKDELIM which must have necessarily
- * preceded us. */
- uint32_t last_instruction = *(jc->pc - 2);
- int last_arg = (int32_t)last_instruction >> 8;
- uint32_t *delimend = (jc->pc - 1) + last_arg;
- const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
-
- UPB_ASSERT((last_instruction & 0xff) == OP_CHECKDELIM);
-
- if (getop(*(jc->pc - 1)) == OP_TAGN) {
- jc->pc += ptr_words;
- }
-
- /*| chkneob n, >1 */
- if (n == 1) {
- dasm_put(Dst, 2072);
- } else {
- dasm_put(Dst, 2080, n);
- }
-# 835 "upb/pb/compile_decoder_x64.dasc"
-
- /*| // OPT: this is way too much fallback code to put here. */
- /*| // Reduce and/or move to a separate section to make better icache usage. */
- /*| ld64 tag */
- {
- uintptr_t v = (uintptr_t)tag;
- if (v > 0xffffffff) {
- dasm_put(Dst, 446, (unsigned int)(v), (unsigned int)((v)>>32));
- } else if (v) {
- dasm_put(Dst, 451, v);
- } else {
- dasm_put(Dst, 454);
- }
- }
-# 839 "upb/pb/compile_decoder_x64.dasc"
- /*| call ->checktag_fallback */
- /*| cmp eax, DECODE_MISMATCH */
- /*| je >3 */
- /*| cmp eax, DECODE_EOF */
- /*| je =>jmptarget(jc, delimend) */
- /*| jmp >5 */
- dasm_put(Dst, 2096, DECODE_MISMATCH, DECODE_EOF, jmptarget(jc, delimend));
-# 845 "upb/pb/compile_decoder_x64.dasc"
-
- /*|1: */
- dasm_put(Dst, 112);
-# 847 "upb/pb/compile_decoder_x64.dasc"
- switch (n) {
- case 1:
- /*| cmp byte [PTR], tag */
- dasm_put(Dst, 2119, tag);
-# 850 "upb/pb/compile_decoder_x64.dasc"
- break;
- case 2:
- /*| cmp word [PTR], tag */
- dasm_put(Dst, 2123, tag);
-# 853 "upb/pb/compile_decoder_x64.dasc"
- break;
- case 3:
- /*| // OPT: Slightly more efficient code, but depends on an extra byte. */
- /*| // mov eax, dword [PTR] */
- /*| // shl eax, 8 */
- /*| // cmp eax, tag << 8 */
- /*| cmp word [PTR], (tag & 0xffff) */
- /*| jne >2 */
- /*| cmp byte [PTR + 2], (tag >> 16) */
- /*|2: */
- dasm_put(Dst, 2128, (tag & 0xffff), 2, (tag >> 16));
-# 863 "upb/pb/compile_decoder_x64.dasc"
- break;
- case 4:
- /*| cmp dword [PTR], tag */
- dasm_put(Dst, 2143, tag);
-# 866 "upb/pb/compile_decoder_x64.dasc"
- break;
- case 5:
- /*| cmp dword [PTR], (tag & 0xffffffff) */
- /*| jne >3 */
- /*| cmp byte [PTR + 4], (tag >> 32) */
- dasm_put(Dst, 2147, (tag & 0xffffffff), 4, (tag >> 32));
-# 871 "upb/pb/compile_decoder_x64.dasc"
- }
- /*| je >4 */
- /*|3: */
- dasm_put(Dst, 2159);
-# 874 "upb/pb/compile_decoder_x64.dasc"
- if (ofs == 0) {
- /*| call =>jmptarget(jc, &method->dispatch) */
- /*| test rax, rax */
- /*| jz =>jmptarget(jc, delimend) */
- /*| jmp rax */
- dasm_put(Dst, 2166, jmptarget(jc, &method->dispatch), jmptarget(jc, delimend));
-# 879 "upb/pb/compile_decoder_x64.dasc"
- } else {
- /*| jmp =>jmptarget(jc, jc->pc + ofs) */
- dasm_put(Dst, 2178, jmptarget(jc, jc->pc + ofs));
-# 881 "upb/pb/compile_decoder_x64.dasc"
- }
- /*|4: */
- /*| add PTR, n */
- /*|5: */
- dasm_put(Dst, 2182, n);
-# 885 "upb/pb/compile_decoder_x64.dasc"
-}
-
-/* Compile the bytecode to x64. */
-static void jitbytecode(jitcompiler *jc) {
- upb_pbdecodermethod *method = NULL;
- const upb_handlers *h = NULL;
- for (jc->pc = jc->group->bytecode; jc->pc < jc->group->bytecode_end; ) {
- int32_t instr = *jc->pc;
- opcode op = instr & 0xff;
- uint32_t arg = instr >> 8;
- int32_t longofs = arg;
-
- if (op != OP_SETDISPATCH) {
- /* Skipped for SETDISPATCH because it defines its own asmlabel for the
- * dispatch code it emits. */
- asmlabel(jc, "0x%lx.%s", pcofs(jc), upb_pbdecoder_getopname(op));
-
- /* Skipped for SETDISPATCH because it should point at the function
- * prologue, not the dispatch function that is emitted first.
- * TODO: optimize this to only define pclabels that are actually used. */
- /*|=>define_jmptarget(jc, jc->pc): */
- dasm_put(Dst, 0, define_jmptarget(jc, jc->pc));
-# 906 "upb/pb/compile_decoder_x64.dasc"
- }
-
- jc->pc++;
-
- switch (op) {
- case OP_STARTMSG: {
- upb_func *startmsg = gethandler(h, UPB_STARTMSG_SELECTOR);
- if (startmsg) {
- /* bool startmsg(void *closure, const void *hd) */
- /*|1: */
- /*| mov ARG1_64, CLOSURE */
- /*| load_handler_data h, UPB_STARTMSG_SELECTOR */
- dasm_put(Dst, 2191);
- {
- uintptr_t v = (uintptr_t)gethandlerdata(h, UPB_STARTMSG_SELECTOR);
- if (v > 0xffffffff) {
- dasm_put(Dst, 446, (unsigned int)(v), (unsigned int)((v)>>32));
- } else if (v) {
- dasm_put(Dst, 451, v);
- } else {
- dasm_put(Dst, 454);
- }
- }
-# 918 "upb/pb/compile_decoder_x64.dasc"
- /*| callp startmsg */
- dasm_put(Dst, 1793, (unsigned int)((uintptr_t)startmsg), (unsigned int)(((uintptr_t)startmsg)>>32), 0xfffffffffffffff0UL);
-# 919 "upb/pb/compile_decoder_x64.dasc"
- if (!alwaysok(h, UPB_STARTMSG_SELECTOR)) {
- /*| test al, al */
- /*| jnz >2 */
- /*| call ->suspend */
- /*| jmp <1 */
- /*|2: */
- dasm_put(Dst, 2198);
-# 925 "upb/pb/compile_decoder_x64.dasc"
- }
- } else {
- /*| nop */
- dasm_put(Dst, 2214);
-# 928 "upb/pb/compile_decoder_x64.dasc"
- }
- break;
- }
- case OP_ENDMSG: {
- upb_func *endmsg = gethandler(h, UPB_ENDMSG_SELECTOR);
- /*|9: */
- dasm_put(Dst, 2216);
-# 934 "upb/pb/compile_decoder_x64.dasc"
- if (endmsg) {
- /* bool endmsg(void *closure, const void *hd, upb_status *status) */
- /*| mov ARG1_64, CLOSURE */
- /*| load_handler_data h, UPB_ENDMSG_SELECTOR */
- dasm_put(Dst, 1788);
- {
- uintptr_t v = (uintptr_t)gethandlerdata(h, UPB_ENDMSG_SELECTOR);
- if (v > 0xffffffff) {
- dasm_put(Dst, 446, (unsigned int)(v), (unsigned int)((v)>>32));
- } else if (v) {
- dasm_put(Dst, 451, v);
- } else {
- dasm_put(Dst, 454);
- }
- }
-# 938 "upb/pb/compile_decoder_x64.dasc"
- /*| mov ARG3_64, DECODER->status */
- /*| callp endmsg */
- dasm_put(Dst, 2219, Dt2(->status), (unsigned int)((uintptr_t)endmsg), (unsigned int)(((uintptr_t)endmsg)>>32), 0xfffffffffffffff0UL);
-# 940 "upb/pb/compile_decoder_x64.dasc"
- }
- break;
- }
- case OP_SETDISPATCH: {
- uint32_t *op_pc = jc->pc - 1;
- const char *msgname;
- upb_inttable *dispatch;
-
- /* Load info for new method. */
- memcpy(&dispatch, jc->pc, sizeof(void*));
- jc->pc += sizeof(void*) / sizeof(uint32_t);
- /* The OP_SETDISPATCH bytecode contains a pointer that is
- * &method->dispatch; we want to go backwards and recover method. */
- method =
- (void*)((char*)dispatch - offsetof(upb_pbdecodermethod, dispatch));
- /* May be NULL, in which case no handlers for this message will be found.
- * OPT: we should do better by completely skipping the message in this
- * case instead of parsing it field by field. We should also do the skip
- * in the containing message's code. */
- h = method->dest_handlers_;
- msgname = upb_msgdef_fullname(upb_handlers_msgdef(h));
-
- /* Emit dispatch code for new method. */
- asmlabel(jc, "0x%lx.dispatch.%s", pcofs(jc), msgname);
- jitdispatch(jc, method);
-
- /* Emit function prologue for new method. */
- asmlabel(jc, "0x%lx.parse.%s", pcofs(jc), msgname);
- /*|=>define_jmptarget(jc, op_pc): */
- /*|=>define_jmptarget(jc, method): */
- /*| sub rsp, 8 */
- dasm_put(Dst, 2245, define_jmptarget(jc, op_pc), define_jmptarget(jc, method));
-# 971 "upb/pb/compile_decoder_x64.dasc"
-
- break;
- }
- case OP_PARSE_DOUBLE:
- case OP_PARSE_FLOAT:
- case OP_PARSE_INT64:
- case OP_PARSE_UINT64:
- case OP_PARSE_INT32:
- case OP_PARSE_FIXED64:
- case OP_PARSE_FIXED32:
- case OP_PARSE_BOOL:
- case OP_PARSE_UINT32:
- case OP_PARSE_SFIXED32:
- case OP_PARSE_SFIXED64:
- case OP_PARSE_SINT32:
- case OP_PARSE_SINT64:
- jitprimitive(jc, op, h, arg);
- break;
- case OP_STARTSEQ:
- case OP_STARTSUBMSG:
- case OP_STARTSTR: {
- upb_func *start = gethandler(h, arg);
- if (start) {
- /* void *startseq(void *closure, const void *hd)
- * void *startsubmsg(void *closure, const void *hd)
- * void *startstr(void *closure, const void *hd, size_t size_hint) */
- /*|1: */
- /*| mov ARG1_64, CLOSURE */
- /*| load_handler_data h, arg */
- dasm_put(Dst, 2191);
- {
- uintptr_t v = (uintptr_t)gethandlerdata(h, arg);
- if (v > 0xffffffff) {
- dasm_put(Dst, 446, (unsigned int)(v), (unsigned int)((v)>>32));
- } else if (v) {
- dasm_put(Dst, 451, v);
- } else {
- dasm_put(Dst, 454);
- }
- }
-# 1000 "upb/pb/compile_decoder_x64.dasc"
- if (op == OP_STARTSTR) {
- /*| mov ARG3_64, DELIMEND */
- /*| sub ARG3_64, PTR */
- dasm_put(Dst, 2253);
-# 1003 "upb/pb/compile_decoder_x64.dasc"
- }
- /*| callp start */
- dasm_put(Dst, 1793, (unsigned int)((uintptr_t)start), (unsigned int)(((uintptr_t)start)>>32), 0xfffffffffffffff0UL);
-# 1005 "upb/pb/compile_decoder_x64.dasc"
- if (!alwaysok(h, arg)) {
- /*| test rax, rax */
- /*| jnz >2 */
- /*| call ->suspend */
- /*| jmp <1 */
- /*|2: */
- dasm_put(Dst, 2261);
-# 1011 "upb/pb/compile_decoder_x64.dasc"
- }
- /*| mov CLOSURE, rax */
- dasm_put(Dst, 2278);
-# 1013 "upb/pb/compile_decoder_x64.dasc"
- } else {
- /* TODO: nop is only required because of asmlabel(). */
- /*| nop */
- dasm_put(Dst, 2214);
-# 1016 "upb/pb/compile_decoder_x64.dasc"
- }
- break;
- }
- case OP_ENDSEQ:
- case OP_ENDSUBMSG:
- case OP_ENDSTR: {
- upb_func *end = gethandler(h, arg);
- if (end) {
- /* bool endseq(void *closure, const void *hd)
- * bool endsubmsg(void *closure, const void *hd)
- * bool endstr(void *closure, const void *hd) */
- /*|1: */
- /*| mov ARG1_64, CLOSURE */
- /*| load_handler_data h, arg */
- dasm_put(Dst, 2191);
- {
- uintptr_t v = (uintptr_t)gethandlerdata(h, arg);
- if (v > 0xffffffff) {
- dasm_put(Dst, 446, (unsigned int)(v), (unsigned int)((v)>>32));
- } else if (v) {
- dasm_put(Dst, 451, v);
- } else {
- dasm_put(Dst, 454);
- }
- }
-# 1030 "upb/pb/compile_decoder_x64.dasc"
- /*| callp end */
- dasm_put(Dst, 1793, (unsigned int)((uintptr_t)end), (unsigned int)(((uintptr_t)end)>>32), 0xfffffffffffffff0UL);
-# 1031 "upb/pb/compile_decoder_x64.dasc"
- if (!alwaysok(h, arg)) {
- /*| test al, al */
- /*| jnz >2 */
- /*| call ->suspend */
- /*| jmp <1 */
- /*|2: */
- dasm_put(Dst, 2198);
-# 1037 "upb/pb/compile_decoder_x64.dasc"
- }
- } else {
- /* TODO: nop is only required because of asmlabel(). */
- /*| nop */
- dasm_put(Dst, 2214);
-# 1041 "upb/pb/compile_decoder_x64.dasc"
- }
- break;
- }
- case OP_STRING: {
- upb_func *str = gethandler(h, arg);
- /*| cmp PTR, DELIMEND */
- /*| je >4 */
- /*|1: */
- /*| cmp PTR, DATAEND */
- /*| jne >2 */
- /*| call ->suspend */
- /*| jmp <1 */
- /*|2: */
- dasm_put(Dst, 2282);
-# 1054 "upb/pb/compile_decoder_x64.dasc"
- if (str) {
- /* size_t str(void *closure, const void *hd, const char *str,
- * size_t n) */
- /*| mov ARG1_64, CLOSURE */
- /*| load_handler_data h, arg */
- dasm_put(Dst, 1788);
- {
- uintptr_t v = (uintptr_t)gethandlerdata(h, arg);
- if (v > 0xffffffff) {
- dasm_put(Dst, 446, (unsigned int)(v), (unsigned int)((v)>>32));
- } else if (v) {
- dasm_put(Dst, 451, v);
- } else {
- dasm_put(Dst, 454);
- }
- }
-# 1059 "upb/pb/compile_decoder_x64.dasc"
- /*| mov ARG3_64, PTR */
- /*| mov ARG4_64, DATAEND */
- /*| sub ARG4_64, PTR */
- /*| mov ARG5_64, qword DECODER->handle */
- /*| callp str */
- /*| add PTR, rax */
- dasm_put(Dst, 2309, Dt2(->handle), (unsigned int)((uintptr_t)str), (unsigned int)(((uintptr_t)str)>>32), 0xfffffffffffffff0UL);
-# 1065 "upb/pb/compile_decoder_x64.dasc"
- if (!alwaysok(h, arg)) {
- /*| cmp PTR, DATAEND */
- /*| je >3 */
- /*| call ->strret_fallback */
- /*|3: */
- dasm_put(Dst, 2347);
-# 1070 "upb/pb/compile_decoder_x64.dasc"
- }
- } else {
- /*| mov PTR, DATAEND */
- dasm_put(Dst, 2360);
-# 1073 "upb/pb/compile_decoder_x64.dasc"
- }
- /*| cmp PTR, DELIMEND */
- /*| jne <1 */
- /*|4: */
- dasm_put(Dst, 2364);
-# 1077 "upb/pb/compile_decoder_x64.dasc"
- break;
- }
- case OP_PUSHTAGDELIM:
- /*| mov FRAME->sink.closure, CLOSURE */
- /*| // This shouldn't need to be read, because tag-delimited fields */
- /*| // shouldn't have an OP_SETDELIM after them. But for the moment */
- /*| // non-packed repeated fields do OP_SETDELIM so they can share more */
- /*| // code with the packed code-path. If this is changed later, this */
- /*| // store can be removed. */
- /*| mov qword FRAME->end_ofs, 0 */
- /*| cmp FRAME, DECODER->limit */
- /*| je ->err */
- /*| add FRAME, sizeof(upb_pbdecoder_frame) */
- /*| mov dword FRAME->groupnum, arg */
- dasm_put(Dst, 2375, Dt1(->sink.closure), Dt1(->end_ofs), Dt2(->limit), sizeof(upb_pbdecoder_frame), Dt1(->groupnum), arg);
-# 1091 "upb/pb/compile_decoder_x64.dasc"
- break;
- case OP_PUSHLENDELIM:
- /*| call ->pushlendelim */
- dasm_put(Dst, 2405);
-# 1094 "upb/pb/compile_decoder_x64.dasc"
- break;
- case OP_POP:
- /*| sub FRAME, sizeof(upb_pbdecoder_frame) */
- /*| mov CLOSURE, FRAME->sink.closure */
- dasm_put(Dst, 2409, sizeof(upb_pbdecoder_frame), Dt1(->sink.closure));
-# 1098 "upb/pb/compile_decoder_x64.dasc"
- break;
- case OP_SETDELIM:
- /* OPT: experiment with testing vs old offset to optimize away. */
- /*| mov DATAEND, DECODER->end */
- /*| add DELIMEND, FRAME->end_ofs */
- /*| cmp DELIMEND, DECODER->buf */
- /*| jb >1 */
- /*| cmp DELIMEND, DATAEND */
- /*| ja >1 // OPT: try cmov. */
- /*| mov DATAEND, DELIMEND */
- /*|1: */
- dasm_put(Dst, 2419, Dt2(->end), Dt1(->end_ofs), Dt2(->buf));
-# 1109 "upb/pb/compile_decoder_x64.dasc"
- break;
- case OP_SETBIGGROUPNUM:
- /*| mov dword FRAME->groupnum, *jc->pc++ */
- dasm_put(Dst, 2399, Dt1(->groupnum), *jc->pc++);
-# 1112 "upb/pb/compile_decoder_x64.dasc"
- break;
- case OP_CHECKDELIM:
- /*| cmp DELIMEND, PTR */
- /*| je =>jmptarget(jc, jc->pc + longofs) */
- dasm_put(Dst, 2449, jmptarget(jc, jc->pc + longofs));
-# 1116 "upb/pb/compile_decoder_x64.dasc"
- break;
- case OP_CALL:
- /*| call =>jmptarget(jc, jc->pc + longofs) */
- dasm_put(Dst, 2456, jmptarget(jc, jc->pc + longofs));
-# 1119 "upb/pb/compile_decoder_x64.dasc"
- break;
- case OP_BRANCH:
- /*| jmp =>jmptarget(jc, jc->pc + longofs); */
- dasm_put(Dst, 2178, jmptarget(jc, jc->pc + longofs));
-# 1122 "upb/pb/compile_decoder_x64.dasc"
- break;
- case OP_RET:
- /*|9: */
- /*| add rsp, 8 */
- /*| ret */
- dasm_put(Dst, 2459);
-# 1127 "upb/pb/compile_decoder_x64.dasc"
- break;
- case OP_TAG1:
- jittag(jc, (arg >> 8) & 0xff, 1, (int8_t)arg, method);
- break;
- case OP_TAG2:
- jittag(jc, (arg >> 8) & 0xffff, 2, (int8_t)arg, method);
- break;
- case OP_TAGN: {
- uint64_t tag;
- memcpy(&tag, jc->pc, 8);
- jittag(jc, tag, arg >> 8, (int8_t)arg, method);
- break;
- }
- case OP_DISPATCH:
- /*| call =>jmptarget(jc, &method->dispatch) */
- dasm_put(Dst, 2456, jmptarget(jc, &method->dispatch));
-# 1142 "upb/pb/compile_decoder_x64.dasc"
- break;
- case OP_HALT:
- UPB_ASSERT(false);
- }
- }
-
- asmlabel(jc, "eof");
- /*| nop */
- dasm_put(Dst, 2214);
-# 1150 "upb/pb/compile_decoder_x64.dasc"
-}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback