summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile33
-rw-r--r--benchmarks/parsestream.upb_table.c16
-rw-r--r--benchmarks/parsetostruct.upb_table.c1
-rw-r--r--dynasm/COPYRIGHT58
-rw-r--r--dynasm/dasm_arm.h440
-rw-r--r--dynasm/dasm_arm.lua933
-rw-r--r--dynasm/dasm_ppc.h408
-rw-r--r--dynasm/dasm_ppc.lua1225
-rw-r--r--dynasm/dasm_proto.h83
-rw-r--r--dynasm/dasm_x64.lua12
-rw-r--r--dynasm/dasm_x86.h470
-rw-r--r--dynasm/dasm_x86.lua1930
-rw-r--r--dynasm/dynasm.lua1076
-rw-r--r--src/upb.h1
-rw-r--r--src/upb_decoder.c95
-rw-r--r--src/upb_decoder.h19
-rw-r--r--src/upb_decoder_x64.asm228
-rw-r--r--src/upb_decoder_x86.dasc649
-rw-r--r--src/upb_def.c37
-rw-r--r--src/upb_glue.c3
-rw-r--r--src/upb_msg.c12
-rw-r--r--src/upb_stream.c63
-rw-r--r--src/upb_stream.h40
-rw-r--r--src/upb_string.c1
-rw-r--r--src/upb_string.h14
-rw-r--r--src/upb_table.c6
-rw-r--r--src/upb_varint_decoder.h74
-rw-r--r--tests/test.proto10
-rw-r--r--tests/test_varint.c19
-rw-r--r--tests/tests.c35
30 files changed, 7589 insertions, 402 deletions
diff --git a/Makefile b/Makefile
index 41e4c8f..4a2b80d 100644
--- a/Makefile
+++ b/Makefile
@@ -38,7 +38,7 @@ endif
# Basic compiler/flag setup.
CC=gcc
CXX=g++
-CFLAGS=-std=c99
+CFLAGS=-std=gnu99
INCLUDE=-Isrc -Itests -I.
CPPFLAGS=$(INCLUDE) -Wall -Wextra $(USER_CFLAGS)
LDLIBS=-lpthread src/libupb.a
@@ -86,9 +86,6 @@ STREAM= \
src/upb_strstream.c \
src/upb_glue.c \
-ASMCORE= \
- src/upb_decoder_x64.asm
-
# Parts of core that are yet to be converted.
OTHERSRC=src/upb_encoder.c
@@ -115,7 +112,10 @@ ALLSRC=$(CORE) $(STREAM) $(BENCHMARKS_SRC) $(TESTS_SRC)
clean_leave_profile:
rm -rf $(LIBUPB) $(LIBUPB_PIC)
rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(call rwildcard,,*.dSYM)
+ rm -rf src/upb_decoder_x86.h
rm -rf benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb*
+ rm -rf src/jit_debug_elf_file.o
+ rm -rf src/jit_debug_elf_file.h
rm -rf $(TESTS) tests/t.*
rm -rf src/descriptor.pb
rm -rf src/upbc deps
@@ -135,9 +135,11 @@ lib: $(LIBUPB)
OBJ=$(patsubst %.c,%.o,$(SRC))
PICOBJ=$(patsubst %.c,%.lo,$(SRC))
-ifneq (, $(findstring DUSE_X64_FASTPATH, $(USER_CFLAGS)))
- OBJ += src/upb_decoder_x64.o
- PICOBJ += src/upb_decoder_x64.o
+ifneq (, $(findstring DUPB_USE_JIT_X64, $(USER_CFLAGS)))
+src/upb_decoder.o: src/upb_decoder_x86.h
+ ifeq (, $(findstring DNDEBUG, $(USER_CFLAGS)))
+ $(error "JIT only works with -DNDEBUG enabled!")
+ endif
endif
$(LIBUPB): $(OBJ)
$(E) AR $(LIBUPB)
@@ -164,13 +166,18 @@ src/upb_def.lo: src/upb_def.c
$(E) 'CC -fPIC' $<
$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) $(DEF_OPT) -c -o $@ $< -fPIC
-src/upb_decoder_x64.o: src/upb_decoder_x64.asm
- $(E) NASM $<
- $(Q) nasm -Ox src/upb_decoder_x64.asm -o src/upb_decoder_x64.o -f macho64
+src/upb_decoder_x86.h: src/jit_debug_elf_file.h
+src/upb_decoder_x86.h: src/upb_decoder_x86.dasc
+ $(E) DYNASM $<
+ $(Q) lua dynasm/dynasm.lua src/upb_decoder_x86.dasc > src/upb_decoder_x86.h
+
+src/jit_debug_elf_file.o: src/jit_debug_elf_file.s
+ $(E) GAS $<
+ $(Q) gcc -c src/jit_debug_elf_file.s -o src/jit_debug_elf_file.o
-src/upb_decoder_x64.lo: src/upb_decoder_x64.asm
- $(E) NASM $<
- $(Q) nasm -Ox src/upb_decoder_x64.asm -o src/upb_decoder_x64.lo -f macho64
+src/jit_debug_elf_file.h: src/jit_debug_elf_file.o
+ $(E) XXD $<
+ $(Q) xxd -i src/jit_debug_elf_file.o > src/jit_debug_elf_file.h
# Function to expand a wildcard pattern recursively.
rwildcard=$(strip $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2)$(filter $(subst *,%,$2),$d)))
diff --git a/benchmarks/parsestream.upb_table.c b/benchmarks/parsestream.upb_table.c
index 89649e6..7659e1e 100644
--- a/benchmarks/parsestream.upb_table.c
+++ b/benchmarks/parsestream.upb_table.c
@@ -12,6 +12,19 @@ static upb_decoder decoder;
static upb_stringsrc stringsrc;
upb_handlers handlers;
+static upb_sflow_t startsubmsg(void *_m, upb_value fval) {
+ (void)_m;
+ (void)fval;
+ return UPB_CONTINUE_WITH(NULL);
+}
+
+static upb_flow_t value(void *closure, upb_value fval, upb_value val) {
+ (void)closure;
+ (void)fval;
+ (void)val;
+ return UPB_CONTINUE;
+}
+
static bool initialize()
{
// Initialize upb state, decode descriptor.
@@ -50,7 +63,7 @@ static bool initialize()
upb_handlers_init(&handlers, def);
// Cause all messages to be read, but do nothing when they are.
- upb_register_all(&handlers, NULL, NULL, NULL, NULL, NULL, NULL);
+ upb_register_all(&handlers, NULL, NULL, value, startsubmsg, NULL, NULL);
upb_decoder_init(&decoder, &handlers);
upb_stringsrc_init(&stringsrc);
return true;
@@ -62,7 +75,6 @@ static void cleanup()
upb_def_unref(UPB_UPCAST(def));
upb_decoder_uninit(&decoder);
upb_stringsrc_uninit(&stringsrc);
- upb_handlers_uninit(&handlers);
}
static size_t run(int i)
diff --git a/benchmarks/parsetostruct.upb_table.c b/benchmarks/parsetostruct.upb_table.c
index e778bc5..397f964 100644
--- a/benchmarks/parsetostruct.upb_table.c
+++ b/benchmarks/parsetostruct.upb_table.c
@@ -78,7 +78,6 @@ static void cleanup()
upb_def_unref(UPB_UPCAST(def));
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
- upb_handlers_uninit(&h);
}
static size_t run(int i)
diff --git a/dynasm/COPYRIGHT b/dynasm/COPYRIGHT
new file mode 100644
index 0000000..032f2de
--- /dev/null
+++ b/dynasm/COPYRIGHT
@@ -0,0 +1,58 @@
+DynASM is taken from LuaJIT 2, which carries the following license statement:
+
+===============================================================================
+LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/
+
+Copyright (C) 2005-2011 Mike Pall. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+[ MIT license: http://www.opensource.org/licenses/mit-license.php ]
+
+===============================================================================
+[ LuaJIT includes code from Lua 5.1/5.2, which has this license statement: ]
+
+Copyright (C) 1994-2011 Lua.org, PUC-Rio.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+===============================================================================
+[ LuaJIT includes code from dlmalloc, which has this license statement: ]
+
+This is a version (aka dlmalloc) of malloc/free/realloc written by
+Doug Lea and released to the public domain, as explained at
+http://creativecommons.org/licenses/publicdomain
+
+===============================================================================
diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h
new file mode 100644
index 0000000..3fd795b
--- /dev/null
+++ b/dynasm/dasm_arm.h
@@ -0,0 +1,440 @@
+/*
+** DynASM ARM encoding engine.
+** Copyright (C) 2005-2011 Mike Pall. All rights reserved.
+** Released under the MIT/X license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH "arm"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d) 0
+#endif
+
+/* Action definitions. */
+enum {
+ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
+ /* The following actions need a buffer position. */
+ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+ /* The following actions also have an argument. */
+ DASM_REL_PC, DASM_LABEL_PC,
+ DASM_IMM, DASM_IMM12, DASM_IMM16, DASM_IMML8, DASM_IMML12,
+ DASM__MAX
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS 25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK 0x00000000
+#define DASM_S_NOMEM 0x01000000
+#define DASM_S_PHASE 0x02000000
+#define DASM_S_MATCH_SEC 0x03000000
+#define DASM_S_RANGE_I 0x11000000
+#define DASM_S_RANGE_SEC 0x12000000
+#define DASM_S_RANGE_LG 0x13000000
+#define DASM_S_RANGE_PC 0x14000000
+#define DASM_S_RANGE_REL 0x15000000
+#define DASM_S_UNDEF_LG 0x21000000
+#define DASM_S_UNDEF_PC 0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
+#define DASM_SEC2POS(sec) ((sec)<<24)
+#define DASM_POS2SEC(pos) ((pos)>>24)
+#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned int *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+ int *rbuf; /* Biased buffer pointer (negative section bias). */
+ int *buf; /* True buffer pointer. */
+ size_t bsize; /* Buffer size in bytes. */
+ int pos; /* Biased buffer position. */
+ int epos; /* End of biased buffer position - max single put. */
+ int ofs; /* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+ size_t psize; /* Allocated size of this structure. */
+ dasm_ActList actionlist; /* Current actionlist pointer. */
+ int *lglabels; /* Local/global chain/pos ptrs. */
+ size_t lgsize;
+ int *pclabels; /* PC label chains/pos ptrs. */
+ size_t pcsize;
+ void **globals; /* Array of globals (bias -10). */
+ dasm_Section *section; /* Pointer to active section. */
+ size_t codesize; /* Total size of all code sections. */
+ int maxsection; /* 0 <= sectionidx < maxsection. */
+ int status; /* Status code. */
+ dasm_Section sections[1]; /* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+ dasm_State *D;
+ size_t psz = 0;
+ int i;
+ Dst_REF = NULL;
+ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+ D = Dst_REF;
+ D->psize = psz;
+ D->lglabels = NULL;
+ D->lgsize = 0;
+ D->pclabels = NULL;
+ D->pcsize = 0;
+ D->globals = NULL;
+ D->maxsection = maxsection;
+ for (i = 0; i < maxsection; i++) {
+ D->sections[i].buf = NULL; /* Need this for pass3. */
+ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+ D->sections[i].bsize = 0;
+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
+ }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ for (i = 0; i < D->maxsection; i++)
+ if (D->sections[i].buf)
+ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+ DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+ dasm_State *D = Dst_REF;
+ D->globals = gl - 10; /* Negative bias to compensate for locals. */
+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+ dasm_State *D = Dst_REF;
+ size_t osz = D->pcsize;
+ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ D->actionlist = (dasm_ActList)actionlist;
+ D->status = DASM_S_OK;
+ D->section = &D->sections[0];
+ memset((void *)D->lglabels, 0, D->lgsize);
+ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+ for (i = 0; i < D->maxsection; i++) {
+ D->sections[i].pos = DASM_SEC2POS(i);
+ D->sections[i].ofs = 0;
+ }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+ do { if (!(x)) { \
+ D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+ D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st) ((void)0)
+#define CKPL(kind, st) ((void)0)
+#endif
+
+static int dasm_imm12(unsigned int n)
+{
+ int i;
+ for (i = 0; i < 16; i++, n = (n << 2) | (n >> 30))
+ if (n <= 255) return (int)(n + (i << 8));
+ return -1;
+}
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+ va_list ap;
+ dasm_State *D = Dst_REF;
+ dasm_ActList p = D->actionlist + start;
+ dasm_Section *sec = D->section;
+ int pos = sec->pos, ofs = sec->ofs;
+ int *b;
+
+ if (pos >= sec->epos) {
+ DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+ }
+
+ b = sec->rbuf;
+ b[pos++] = start;
+
+ va_start(ap, start);
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16);
+ if (action >= DASM__MAX) {
+ ofs += 4;
+ } else {
+ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+ switch (action) {
+ case DASM_STOP: goto stop;
+ case DASM_SECTION:
+ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+ D->section = &D->sections[n]; goto stop;
+ case DASM_ESC: p++; ofs += 4; break;
+ case DASM_REL_EXT: break;
+ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+ case DASM_REL_LG:
+ n = (ins & 2047) - 10; pl = D->lglabels + n;
+ if (n >= 0) { CKPL(lg, LG); goto putrel; } /* Bkwd rel or global. */
+ pl += 10; n = *pl;
+ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
+ goto linkrel;
+ case DASM_REL_PC:
+ pl = D->pclabels + n; CKPL(pc, PC);
+ putrel:
+ n = *pl;
+ if (n < 0) { /* Label exists. Get label pos and store it. */
+ b[pos] = -n;
+ } else {
+ linkrel:
+ b[pos] = n; /* Else link to rel chain, anchored at label. */
+ *pl = pos;
+ }
+ pos++;
+ break;
+ case DASM_LABEL_LG:
+ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+ case DASM_LABEL_PC:
+ pl = D->pclabels + n; CKPL(pc, PC);
+ putlabel:
+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
+ }
+ *pl = -pos; /* Label exists now. */
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_IMM:
+ case DASM_IMM16:
+#ifdef DASM_CHECKS
+ CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
+ if ((ins & 0x8000))
+ CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
+ else
+ CK((n>>((ins>>5)&31)) == 0, RANGE_I);
+#endif
+ b[pos++] = n;
+ break;
+ case DASM_IMML8:
+ case DASM_IMML12:
+ CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) :
+ (((-n)>>((ins>>5)&31)) == 0), RANGE_I);
+ b[pos++] = n;
+ break;
+ case DASM_IMM12:
+ CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
+ b[pos++] = n;
+ break;
+ }
+ }
+ }
+stop:
+ va_end(ap);
+ sec->pos = pos;
+ sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+ dasm_State *D = Dst_REF;
+ int secnum;
+ int ofs = 0;
+
+#ifdef DASM_CHECKS
+ *szp = 0;
+ if (D->status != DASM_S_OK) return D->status;
+ {
+ int pc;
+ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+ }
+#endif
+
+ { /* Handle globals not defined in this translation unit. */
+ int idx;
+ for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+ }
+ }
+
+ /* Combine all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->rbuf;
+ int pos = DASM_SEC2POS(secnum);
+ int lastpos = sec->pos;
+
+ while (pos != lastpos) {
+ dasm_ActList p = D->actionlist + b[pos++];
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16);
+ switch (action) {
+ case DASM_STOP: case DASM_SECTION: goto stop;
+ case DASM_ESC: p++; break;
+ case DASM_REL_EXT: break;
+ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+ case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+ case DASM_IMM: case DASM_IMM12: case DASM_IMM16:
+ case DASM_IMML8: case DASM_IMML12: pos++; break;
+ }
+ }
+ stop: (void)0;
+ }
+ ofs += sec->ofs; /* Next section starts right after current section. */
+ }
+
+ D->codesize = ofs; /* Total size of all code sections */
+ *szp = ofs;
+ return DASM_S_OK;
+}
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+ do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+#else
+#define CK(x, st) ((void)0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+ dasm_State *D = Dst_REF;
+ char *base = (char *)buffer;
+ unsigned int *cp = (unsigned int *)buffer;
+ int secnum;
+
+ /* Encode all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->buf;
+ int *endb = sec->rbuf + sec->pos;
+
+ while (b != endb) {
+ dasm_ActList p = D->actionlist + *b++;
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16);
+ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+ switch (action) {
+ case DASM_STOP: case DASM_SECTION: goto stop;
+ case DASM_ESC: *cp++ = *p++; break;
+ case DASM_REL_EXT:
+ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1);
+ goto patchrel;
+ case DASM_ALIGN:
+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
+ break;
+ case DASM_REL_LG:
+ CK(n >= 0, UNDEF_LG);
+ case DASM_REL_PC:
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
+ patchrel:
+ CK((n & 3) == 0 && ((n-4+0x02000000) >> 26) == 0, RANGE_REL);
+ cp[-1] |= (((n-4) >> 2) & 0x00ffffff);
+ break;
+ case DASM_LABEL_LG:
+ ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+ break;
+ case DASM_LABEL_PC: break;
+ case DASM_IMM:
+ cp[-1] |= ((n>>((ins>>10)&31)) & ((1<<((ins>>5)&31))-1)) << (ins&31);
+ break;
+ case DASM_IMM12:
+ cp[-1] |= dasm_imm12((unsigned int)n);
+ break;
+ case DASM_IMM16:
+ cp[-1] |= ((n & 0xf000) << 4) | (n & 0x0fff);
+ break;
+ case DASM_IMML8:
+ cp[-1] |= n >= 0 ? (0x00800000 | (n & 0x0f) | ((n & 0xf0) << 4)) :
+ ((-n & 0x0f) | ((-n & 0xf0) << 4));
+ break;
+ case DASM_IMML12:
+ cp[-1] |= n >= 0 ? (0x00800000 | n) : (-n);
+ break;
+ default: *cp++ = ins; break;
+ }
+ }
+ stop: (void)0;
+ }
+ }
+
+ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
+ return DASM_S_PHASE;
+ return DASM_S_OK;
+}
+#undef CK
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+ dasm_State *D = Dst_REF;
+ if (pc*sizeof(int) < D->pcsize) {
+ int pos = D->pclabels[pc];
+ if (pos < 0) return *DASM_POS2PTR(D, -pos);
+ if (pos > 0) return -1; /* Undefined. */
+ }
+ return -2; /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+ dasm_State *D = Dst_REF;
+ if (D->status == DASM_S_OK) {
+ int i;
+ for (i = 1; i <= 9; i++) {
+ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
+ D->lglabels[i] = 0;
+ }
+ }
+ if (D->status == DASM_S_OK && secmatch >= 0 &&
+ D->section != &D->sections[secmatch])
+ D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
+ return D->status;
+}
+#endif
+
diff --git a/dynasm/dasm_arm.lua b/dynasm/dasm_arm.lua
new file mode 100644
index 0000000..37ba7fa
--- /dev/null
+++ b/dynasm/dasm_arm.lua
@@ -0,0 +1,933 @@
+------------------------------------------------------------------------------
+-- DynASM ARM module.
+--
+-- Copyright (C) 2005-2011 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Module information:
+local _info = {
+ arch = "arm",
+ description = "DynASM ARM module",
+ version = "1.2.2",
+ vernum = 10202,
+ release = "2011-03-23",
+ author = "Mike Pall",
+ license = "MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable, rawget = assert, setmetatable, rawget
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
+local concat, sort = table.concat, table.sort
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+ "STOP", "SECTION", "ESC", "REL_EXT",
+ "ALIGN", "REL_LG", "LABEL_LG",
+ "REL_PC", "LABEL_PC", "IMM", "IMM12", "IMM16", "IMML8", "IMML12",
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number.
+local map_action = {}
+for n,name in ipairs(action_names) do
+ map_action[name] = n-1
+end
+
+-- Action list buffer.
+local actlist = {}
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Return 8 digit hex number.
+local function tohex(x)
+ return sub(format("%08x", x), -8) -- Avoid 64 bit portability problem in Lua.
+end
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+ out:write("DynASM encoding engine action codes:\n")
+ for n,name in ipairs(action_names) do
+ local num = map_action[name]
+ out:write(format(" %-10s %02X %d\n", name, num, num))
+ end
+ out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+ local nn = #actlist
+ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+ out:write("static const unsigned int ", name, "[", nn, "] = {\n")
+ for i = 1,nn-1 do
+ assert(out:write("0x", tohex(actlist[i]), ",\n"))
+ end
+ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
+end
+
+------------------------------------------------------------------------------
+
+-- Add word to action list.
+local function wputxw(n)
+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+ actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+ local w = assert(map_action[action], "bad action name `"..action.."'")
+ wputxw(w * 0x10000 + (val or 0))
+ if a then actargs[#actargs+1] = a end
+ if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+ if #actlist == actargs[1] then return end -- Nothing to flush.
+ if not term then waction("STOP") end -- Terminate action list.
+ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+ secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped word.
+local function wputw(n)
+ if n <= 0x000fffff then waction("ESC") end
+ wputxw(n)
+end
+
+-- Reserve position for word.
+local function wpos()
+ local pos = #actlist+1
+ actlist[pos] = ""
+ return pos
+end
+
+-- Store word to reserved position.
+local function wputpos(pos, n)
+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+ actlist[pos] = n
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+ local n = next_global
+ if n > 2047 then werror("too many global labels") end
+ next_global = n + 1
+ t[name] = n
+ return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("Global labels:\n")
+ for i=20,next_global-1 do
+ out:write(format(" %s\n", t[i]))
+ end
+ out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("enum {\n")
+ for i=20,next_global-1 do
+ out:write(" ", prefix, t[i], ",\n")
+ end
+ out:write(" ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=20,next_global-1 do
+ out:write(" \"", t[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+ -- No restrictions on the name for now.
+ local n = next_extern
+ if n > 2047 then werror("too many extern labels") end
+ next_extern = n + 1
+ t[name] = n
+ map_extern_[n] = name
+ return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+ out:write("Extern labels:\n")
+ for i=0,next_extern-1 do
+ out:write(format(" %s\n", map_extern_[i]))
+ end
+ out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=0,next_extern-1 do
+ out:write(" \"", map_extern_[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+
+-- Ext. register name -> int. name.
+local map_archdef = { sp = "r13", lr = "r14", pc = "r15", }
+
+-- Int. register name -> ext. name.
+local map_reg_rev = { r13 = "sp", r14 = "lr", r15 = "pc", }
+
+local map_type = {} -- Type name -> { ctype, reg }
+local ctypenum = 0 -- Type number (for Dt... macros).
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+ return map_reg_rev[s] or s
+end
+
+local map_shift = { lsl = 0, lsr = 1, asr = 2, ror = 3, }
+
+local map_cond = {
+ eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
+ hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
+ hs = 2, lo = 3,
+}
+
+------------------------------------------------------------------------------
+
+-- Template strings for ARM instructions.
+local map_op = {
+ -- Basic data processing instructions.
+ and_3 = "e0000000DNPs",
+ eor_3 = "e0200000DNPs",
+ sub_3 = "e0400000DNPs",
+ rsb_3 = "e0600000DNPs",
+ add_3 = "e0800000DNPs",
+ adc_3 = "e0a00000DNPs",
+ sbc_3 = "e0c00000DNPs",
+ rsc_3 = "e0e00000DNPs",
+ tst_2 = "e1100000NP",
+ teq_2 = "e1300000NP",
+ cmp_2 = "e1500000NP",
+ cmn_2 = "e1700000NP",
+ orr_3 = "e1800000DNPs",
+ mov_2 = "e1a00000DPs",
+ bic_3 = "e1c00000DNPs",
+ mvn_2 = "e1e00000DPs",
+
+ and_4 = "e0000000DNMps",
+ eor_4 = "e0200000DNMps",
+ sub_4 = "e0400000DNMps",
+ rsb_4 = "e0600000DNMps",
+ add_4 = "e0800000DNMps",
+ adc_4 = "e0a00000DNMps",
+ sbc_4 = "e0c00000DNMps",
+ rsc_4 = "e0e00000DNMps",
+ tst_3 = "e1100000NMp",
+ teq_3 = "e1300000NMp",
+ cmp_3 = "e1500000NMp",
+ cmn_3 = "e1700000NMp",
+ orr_4 = "e1800000DNMps",
+ mov_3 = "e1a00000DMps",
+ bic_4 = "e1c00000DNMps",
+ mvn_3 = "e1e00000DMps",
+
+ lsl_3 = "e1a00000DMvs",
+ lsr_3 = "e1a00020DMvs",
+ asr_3 = "e1a00040DMvs",
+ ror_3 = "e1a00060DMvs",
+ rrx_2 = "e1a00060DMs",
+
+ -- Multiply and multiply-accumulate.
+ mul_3 = "e0000090NMSs",
+ mla_4 = "e0200090NMSDs",
+ umaal_4 = "e0400090DNMSs", -- v6
+ mls_4 = "e0600090DNMSs", -- v6T2
+ umull_4 = "e0800090DNMSs",
+ umlal_4 = "e0a00090DNMSs",
+ smull_4 = "e0c00090DNMSs",
+ smlal_4 = "e0e00090DNMSs",
+
+ -- Halfword multiply and multiply-accumulate.
+ smlabb_4 = "e1000080NMSD", -- v5TE
+ smlatb_4 = "e10000a0NMSD", -- v5TE
+ smlabt_4 = "e10000c0NMSD", -- v5TE
+ smlatt_4 = "e10000e0NMSD", -- v5TE
+ smlawb_4 = "e1200080NMSD", -- v5TE
+ smulwb_3 = "e12000a0NMS", -- v5TE
+ smlawt_4 = "e12000c0NMSD", -- v5TE
+ smulwt_3 = "e12000e0NMS", -- v5TE
+ smlalbb_4 = "e1400080NMSD", -- v5TE
+ smlaltb_4 = "e14000a0NMSD", -- v5TE
+ smlalbt_4 = "e14000c0NMSD", -- v5TE
+ smlaltt_4 = "e14000e0NMSD", -- v5TE
+ smulbb_3 = "e1600080NMS", -- v5TE
+ smultb_3 = "e16000a0NMS", -- v5TE
+ smulbt_3 = "e16000c0NMS", -- v5TE
+ smultt_3 = "e16000e0NMS", -- v5TE
+
+ -- Miscellaneous data processing instructions.
+ clz_2 = "e16f0f10DM", -- v5T
+ rev_2 = "e6bf0f30DM", -- v6
+ rev16_2 = "e6bf0fb0DM", -- v6
+ revsh_2 = "e6ff0fb0DM", -- v6
+ sel_3 = "e6800fb0DNM", -- v6
+ usad8_3 = "e780f010NMS", -- v6
+ usada8_4 = "e7800010NMSD", -- v6
+ rbit_2 = "e6ff0f30DM", -- v6T2
+ movw_2 = "e3000000DW", -- v6T2
+ movt_2 = "e3400000DW", -- v6T2
+ -- Note: the X encodes width-1, not width.
+ sbfx_4 = "e7a00050DMvX", -- v6T2
+ ubfx_4 = "e7e00050DMvX", -- v6T2
+ -- Note: the X encodes the msb field, not the width.
+ bfc_3 = "e7c0001fDvX", -- v6T2
+ bfi_4 = "e7c00010DMvX", -- v6T2
+
+ -- Packing and unpacking instructions.
+ pkhbt_3 = "e6800010DNM", pkhbt_4 = "e6800010DNMv", -- v6
+ pkhtb_3 = "e6800050DNM", pkhtb_4 = "e6800050DNMv", -- v6
+ sxtab_3 = "e6a00070DNM", sxtab_4 = "e6a00070DNMv", -- v6
+ sxtab16_3 = "e6800070DNM", sxtab16_4 = "e6800070DNMv", -- v6
+ sxtah_3 = "e6b00070DNM", sxtah_4 = "e6b00070DNMv", -- v6
+ sxtb_2 = "e6af0070DM", sxtb_3 = "e6af0070DMv", -- v6
+ sxtb16_2 = "e68f0070DM", sxtb16_3 = "e68f0070DMv", -- v6
+ sxth_2 = "e6bf0070DM", sxth_3 = "e6bf0070DMv", -- v6
+ uxtab_3 = "e6e00070DNM", uxtab_4 = "e6e00070DNMv", -- v6
+ uxtab16_3 = "e6c00070DNM", uxtab16_4 = "e6c00070DNMv", -- v6
+ uxtah_3 = "e6f00070DNM", uxtah_4 = "e6f00070DNMv", -- v6
+ uxtb_2 = "e6ef0070DM", uxtb_3 = "e6ef0070DMv", -- v6
+ uxtb16_2 = "e6cf0070DM", uxtb16_3 = "e6cf0070DMv", -- v6
+ uxth_2 = "e6ff0070DM", uxth_3 = "e6ff0070DMv", -- v6
+
+ -- Saturating instructions.
+ qadd_3 = "e1000050DMN", -- v5TE
+ qsub_3 = "e1200050DMN", -- v5TE
+ qdadd_3 = "e1400050DMN", -- v5TE
+ qdsub_3 = "e1600050DMN", -- v5TE
+ -- Note: the X for ssat* encodes sat_imm-1, not sat_imm.
+ ssat_3 = "e6a00010DXM", ssat_4 = "e6a00010DXMp", -- v6
+ usat_3 = "e6e00010DXM", usat_4 = "e6e00010DXMp", -- v6
+ ssat16_3 = "e6a00f30DXM", -- v6
+ usat16_3 = "e6e00f30DXM", -- v6
+
+ -- Parallel addition and subtraction.
+ sadd16_3 = "e6100f10DNM", -- v6
+ sasx_3 = "e6100f30DNM", -- v6
+ ssax_3 = "e6100f50DNM", -- v6
+ ssub16_3 = "e6100f70DNM", -- v6
+ sadd8_3 = "e6100f90DNM", -- v6
+ ssub8_3 = "e6100ff0DNM", -- v6
+ qadd16_3 = "e6200f10DNM", -- v6
+ qasx_3 = "e6200f30DNM", -- v6
+ qsax_3 = "e6200f50DNM", -- v6
+ qsub16_3 = "e6200f70DNM", -- v6
+ qadd8_3 = "e6200f90DNM", -- v6
+ qsub8_3 = "e6200ff0DNM", -- v6
+ shadd16_3 = "e6300f10DNM", -- v6
+ shasx_3 = "e6300f30DNM", -- v6
+ shsax_3 = "e6300f50DNM", -- v6
+ shsub16_3 = "e6300f70DNM", -- v6
+ shadd8_3 = "e6300f90DNM", -- v6
+ shsub8_3 = "e6300ff0DNM", -- v6
+ uadd16_3 = "e6500f10DNM", -- v6
+ uasx_3 = "e6500f30DNM", -- v6
+ usax_3 = "e6500f50DNM", -- v6
+ usub16_3 = "e6500f70DNM", -- v6
+ uadd8_3 = "e6500f90DNM", -- v6
+ usub8_3 = "e6500ff0DNM", -- v6
+ uqadd16_3 = "e6600f10DNM", -- v6
+ uqasx_3 = "e6600f30DNM", -- v6
+ uqsax_3 = "e6600f50DNM", -- v6
+ uqsub16_3 = "e6600f70DNM", -- v6
+ uqadd8_3 = "e6600f90DNM", -- v6
+ uqsub8_3 = "e6600ff0DNM", -- v6
+ uhadd16_3 = "e6700f10DNM", -- v6
+ uhasx_3 = "e6700f30DNM", -- v6
+ uhsax_3 = "e6700f50DNM", -- v6
+ uhsub16_3 = "e6700f70DNM", -- v6
+ uhadd8_3 = "e6700f90DNM", -- v6
+ uhsub8_3 = "e6700ff0DNM", -- v6
+
+ -- Load/store instructions.
+ str_2 = "e4000000DL", str_3 = "e4000000DL", str_4 = "e4000000DL",
+ strb_2 = "e4400000DL", strb_3 = "e4400000DL", strb_4 = "e4400000DL",
+ ldr_2 = "e4100000DL", ldr_3 = "e4100000DL", ldr_4 = "e4100000DL",
+ ldrb_2 = "e4500000DL", ldrb_3 = "e4500000DL", ldrb_4 = "e4500000DL",
+ strh_2 = "e00000b0DL", strh_3 = "e00000b0DL",
+ ldrh_2 = "e01000b0DL", ldrh_3 = "e01000b0DL",
+ ldrd_2 = "e00000d0DL", ldrd_3 = "e00000d0DL", -- v5TE
+ ldrsb_2 = "e01000d0DL", ldrsb_3 = "e01000d0DL",
+ strd_2 = "e00000f0DL", strd_3 = "e00000f0DL", -- v5TE
+ ldrsh_2 = "e01000f0DL", ldrsh_3 = "e01000f0DL",
+
+ ldm_2 = "e8900000nR", ldmia_2 = "e8900000nR", ldmfd_2 = "e8900000nR",
+ ldmda_2 = "e8100000nR", ldmfa_2 = "e8100000nR",
+ ldmdb_2 = "e9100000nR", ldmea_2 = "e9100000nR",
+ ldmib_2 = "e9900000nR", ldmed_2 = "e9900000nR",
+ stm_2 = "e8800000nR", stmia_2 = "e8800000nR", stmfd_2 = "e8800000nR",
+ stmda_2 = "e8000000nR", stmfa_2 = "e8000000nR",
+ stmdb_2 = "e9000000nR", stmea_2 = "e9000000nR",
+ stmib_2 = "e9800000nR", stmed_2 = "e9800000nR",
+ pop_1 = "e8bd0000R", push_1 = "e92d0000R",
+
+ -- Branch instructions.
+ b_1 = "ea000000B",
+ bl_1 = "eb000000B",
+ blx_1 = "e12fff30C",
+ bx_1 = "e12fff10M",
+
+ -- Miscellaneous instructions.
+ nop_0 = "e1a00000",
+ mrs_1 = "e10f0000D",
+ bkpt_1 = "e1200070K", -- v5T
+ svc_1 = "ef000000T", swi_1 = "ef000000T",
+ ud_0 = "e7f001f0",
+
+ -- NYI: Advanced SIMD and VFP instructions.
+
+ -- NYI instructions, since I have no need for them right now:
+ -- swp, swpb, strex, ldrex, strexd, ldrexd, strexb, ldrexb, strexh, ldrexh
+ -- msr, nopv6, yield, wfe, wfi, sev, dbg, bxj, smc, srs, rfe
+ -- cps, setend, pli, pld, pldw, clrex, dsb, dmb, isb
+ -- stc, ldc, mcr, mcr2, mrc, mrc2, mcrr, mcrr2, mrrc, mrrc2, cdp, cdp2
+}
+
+-- Add mnemonics for "s" variants.
+do
+ local t = {}
+ for k,v in pairs(map_op) do
+ if sub(v, -1) == "s" then
+ local v2 = sub(v, 1, 2)..char(byte(v, 3)+1)..sub(v, 4, -2)
+ t[sub(k, 1, -3).."s"..sub(k, -2)] = v2
+ end
+ end
+ for k,v in pairs(t) do
+ map_op[k] = v
+ end
+end
+
+------------------------------------------------------------------------------
+
+local function parse_gpr(expr)
+ local tname, ovreg = match(expr, "^([%w_]+):(r1?[0-9])$")
+ local tp = map_type[tname or expr]
+ if tp then
+ local reg = ovreg or tp.reg
+ if not reg then
+ werror("type `"..(tname or expr).."' needs a register override")
+ end
+ expr = reg
+ end
+ local r = match(expr, "^r(1?[0-9])$")
+ if r then
+ r = tonumber(r)
+ if r <= 15 then return r, tp end
+ end
+ werror("bad register name `"..expr.."'")
+end
+
+local function parse_gpr_pm(expr)
+ local pm, expr2 = match(expr, "^([+-]?)(.*)$")
+ return parse_gpr(expr2), (pm == "-")
+end
+
+local function parse_reglist(reglist)
+ reglist = match(reglist, "^{%s*([^}]*)}$")
+ if not reglist then werror("register list expected") end
+ local rr = 0
+ for p in gmatch(reglist..",", "%s*([^,]*),") do
+ local rbit = 2^parse_gpr(gsub(p, "%s+$", ""))
+ if ((rr - (rr % rbit)) / rbit) % 2 ~= 0 then
+ werror("duplicate register `"..p.."'")
+ end
+ rr = rr + rbit
+ end
+ return rr
+end
+
+local function parse_imm(imm, bits, shift, scale, signed)
+ imm = match(imm, "^#(.*)$")
+ if not imm then werror("expected immediate operand") end
+ local n = tonumber(imm)
+ if n then
+ if n % 2^scale == 0 then
+ n = n / 2^scale
+ if signed then
+ if n >= 0 then
+ if n < 2^(bits-1) then return n*2^shift end
+ else
+ if n >= -(2^(bits-1))-1 then return (n+2^bits)*2^shift end
+ end
+ else
+ if n >= 0 and n <= 2^bits-1 then return n*2^shift end
+ end
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+ return 0
+ end
+end
+
+local function parse_imm12(imm)
+ local n = tonumber(imm)
+ if n then
+ local m = n
+ for i=0,-15,-1 do
+ if m >= 0 and m <= 255 and n % 1 == 0 then return m + (i%16) * 256 end
+ local t = m % 4
+ m = (m - t) / 4 + t * 2^30
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction("IMM12", 0, imm)
+ return 0
+ end
+end
+
+local function parse_imm16(imm)
+ imm = match(imm, "^#(.*)$")
+ if not imm then werror("expected immediate operand") end
+ local n = tonumber(imm)
+ if n then
+ if n >= 0 and n <= 65535 and n % 1 == 0 then
+ local t = n % 4096
+ return (n - t) * 16 + t
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction("IMM16", 32*16, imm)
+ return 0
+ end
+end
+
+local function parse_imm_load(imm, ext)
+ local n = tonumber(imm)
+ if n then
+ if ext then
+ if n >= -255 and n <= 255 then
+ local up = 0x00800000
+ if n < 0 then n = -n; up = 0 end
+ return (n-(n%16))*16+(n%16) + up
+ end
+ else
+ if n >= -4095 and n <= 4095 then
+ if n >= 0 then return n+0x00800000 end
+ return -n
+ end
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction(ext and "IMML8" or "IMML12", 32768 + 32*(ext and 8 or 12), imm)
+ return 0
+ end
+end
+
+local function parse_shift(shift, gprok)
+ if shift == "rrx" then
+ return 3 * 32
+ else
+ local s, s2 = match(shift, "^(%S+)%s*(.*)$")
+ s = map_shift[s]
+ if not s then werror("expected shift operand") end
+ if sub(s2, 1, 1) == "#" then
+ return parse_imm(s2, 5, 7, 0, false) + s * 32
+ else
+ if not gprok then werror("expected immediate shift operand") end
+ return parse_gpr(s2) * 256 + s * 32 + 16
+ end
+ end
+end
+
+local function parse_load(params, nparams, n, op)
+ local oplo = op % 256
+ local ext, ldrd = (oplo ~= 0), (oplo == 208)
+ local d
+ if (ldrd or oplo == 240) then
+ d = ((op - (op % 4096)) / 4096) % 16
+ if d % 2 ~= 0 then werror("odd destination register") end
+ end
+ local p1, wb = match(params[n], "^%[%s*(.-)%s*%](!?)$")
+ local p2 = params[n+1]
+ if not p1 then
+ if not p2 then
+ local reg, tailr = match(params[n], "^([%w_:]+)%s*(.*)$")
+ if reg and tailr ~= "" then
+ local d, tp = parse_gpr(reg)
+ if tp then
+ waction(ext and "IMML8" or "IMML12", 32768 + 32*(ext and 8 or 12),
+ format(tp.ctypefmt, tailr))
+ return op + d * 65536 + 0x01000000 + (ext and 0x00400000 or 0)
+ end
+ end
+ end
+ werror("expected address operand")
+ end
+ if wb == "!" then op = op + 0x00200000 end
+ if p2 then
+ if wb == "!" then werror("bad use of '!'") end
+ local p3 = params[n+2]
+ op = op + parse_gpr(p1) * 65536
+ local imm = match(p2, "^#(.*)$")
+ if imm then
+ local m = parse_imm_load(imm, ext)
+ if p3 then werror("too many parameters") end
+ op = op + m + (ext and 0x00400000 or 0)
+ else
+ local m, neg = parse_gpr_pm(p2)
+ if ldrd and (m == d or m-1 == d) then werror("register conflict") end
+ op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000)
+ if p3 then op = op + parse_shift(p3) end
+ end
+ else
+ local p1a, p2 = match(p1, "^([^,%s]*)%s*(.*)$")
+ local n = parse_gpr(p1a)
+ op = op + parse_gpr(p1a) * 65536 + 0x01000000
+ if p2 ~= "" then
+ local imm = match(p2, "^,%s*#(.*)$")
+ if imm then
+ local m = parse_imm_load(imm, ext)
+ op = op + m + (ext and 0x00400000 or 0)
+ else
+ local p2a, p3 = match(p2, "^,%s*([^,%s]*)%s*,?%s*(.*)$")
+ local m, neg = parse_gpr_pm(p2a)
+ if ldrd and (m == d or m-1 == d) then werror("register conflict") end
+ op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000)
+ if p3 ~= "" then
+ if ext then werror("too many parameters") end
+ op = op + parse_shift(p3)
+ end
+ end
+ else
+ if wb == "!" then werror("bad use of '!'") end
+ op = op + (ext and 0x00c00000 or 0x00800000)
+ end
+ end
+ return op
+end
+
+local function parse_label(label, def)
+ local prefix = sub(label, 1, 2)
+ -- =>label (pc label reference)
+ if prefix == "=>" then
+ return "PC", 0, sub(label, 3)
+ end
+ -- ->name (global label reference)
+ if prefix == "->" then
+ return "LG", map_global[sub(label, 3)]
+ end
+ if def then
+ -- [1-9] (local label definition)
+ if match(label, "^[1-9]$") then
+ return "LG", 10+tonumber(label)
+ end
+ else
+ -- [<>][1-9] (local label reference)
+ local dir, lnum = match(label, "^([<>])([1-9])$")
+ if dir then -- Fwd: 1-9, Bkwd: 11-19.
+ return "LG", lnum + (dir == ">" and 0 or 10)
+ end
+ -- extern label (extern label reference)
+ local extname = match(label, "^extern%s+(%S+)$")
+ if extname then
+ return "EXT", map_extern[extname]
+ end
+ end
+ werror("bad label `"..label.."'")
+end
+
+------------------------------------------------------------------------------
+
+-- Handle opcodes defined with template strings.
+map_op[".template__"] = function(params, template, nparams)
+ if not params then return sub(template, 9) end
+ local op = tonumber(sub(template, 1, 8), 16)
+ local n = 1
+
+ -- Limit number of section buffer positions used by a single dasm_put().
+ -- A single opcode needs a maximum of 3 positions (rlwinm).
+ if secpos+3 > maxsecpos then wflush() end
+ local pos = wpos()
+
+ -- Process each character.
+ for p in gmatch(sub(template, 9), ".") do
+ if p == "D" then
+ op = op + parse_gpr(params[n]) * 4096; n = n + 1
+ elseif p == "N" then
+ op = op + parse_gpr(params[n]) * 65536; n = n + 1
+ elseif p == "S" then
+ op = op + parse_gpr(params[n]) * 256; n = n + 1
+ elseif p == "M" then
+ op = op + parse_gpr(params[n]); n = n + 1
+ elseif p == "P" then
+ local imm = match(params[n], "^#(.*)$")
+ if imm then
+ op = op + parse_imm12(imm) + 0x02000000
+ else
+ op = op + parse_gpr(params[n])
+ end
+ n = n + 1
+ elseif p == "p" then
+ op = op + parse_shift(params[n], true); n = n + 1
+ elseif p == "L" then
+ op = parse_load(params, nparams, n, op)
+ elseif p == "B" then
+ local mode, n, s = parse_label(params[n], false)
+ waction("REL_"..mode, n, s, 1)
+ elseif p == "C" then -- blx gpr vs. blx label.
+ local p = params[n]
+ if match(p, "^([%w_]+):(r1?[0-9])$") or match(p, "^r(1?[0-9])$") then
+ op = op + parse_gpr(p)
+ else
+ if op < 0xe0000000 then werror("unconditional instruction") end
+ local mode, n, s = parse_label(params[n], false)
+ waction("REL_"..mode, n, s, 1)
+ op = 0xfa000000
+ end
+ elseif p == "n" then
+ local r, wb = match(params[n], "^([^!]*)(!?)$")
+ op = op + parse_gpr(r) * 65536 + (wb == "!" and 0x00200000 or 0)
+ n = n + 1
+ elseif p == "R" then
+ op = op + parse_reglist(params[n]); n = n + 1
+ elseif p == "W" then
+ op = op + parse_imm16(params[n]); n = n + 1
+ elseif p == "v" then
+ op = op + parse_imm(params[n], 5, 7, 0, false); n = n + 1
+ elseif p == "X" then
+ op = op + parse_imm(params[n], 5, 16, 0, false); n = n + 1
+ elseif p == "K" then
+ local imm = tonumber(match(params[n], "^#(.*)$")); n = n + 1
+ if not imm or imm % 1 ~= 0 or imm < 0 or imm > 0xffff then
+ werror("bad immediate operand")
+ end
+ local t = imm % 16
+ op = op + (imm - t) * 16 + t
+ elseif p == "T" then
+ op = op + parse_imm(params[n], 24, 0, 0, false); n = n + 1
+ elseif p == "s" then
+ -- Ignored.
+ else
+ assert(false)
+ end
+ end
+ wputpos(pos, op)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+ if not params then return "prefix" end
+ local prefix = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+ if not params then return "[1-9] | ->global | =>pcexpr" end
+ if secpos+1 > maxsecpos then wflush() end
+ local mode, n, s = parse_label(params[1], true)
+ if mode == "EXT" then werror("bad label definition") end
+ waction("LABEL_"..mode, n, s, 1)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+map_op[".long_*"] = function(params)
+ if not params then return "imm..." end
+ for _,p in ipairs(params) do
+ local n = tonumber(p)
+ if not n then werror("bad immediate `"..p.."'") end
+ if n < 0 then n = n + 2^32 end
+ wputw(n)
+ if secpos+2 > maxsecpos then wflush() end
+ end
+end
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+ if not params then return "numpow2" end
+ if secpos+1 > maxsecpos then wflush() end
+ local align = tonumber(params[1])
+ if align then
+ local x = align
+ -- Must be a power of 2 in the range (2 ... 256).
+ for i=1,8 do
+ x = x / 2
+ if x == 1 then
+ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
+ return
+ end
+ end
+ end
+ werror("bad alignment")
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+ if not params then
+ return nparams == 2 and "name, ctype" or "name, ctype, reg"
+ end
+ local name, ctype, reg = params[1], params[2], params[3]
+ if not match(name, "^[%a_][%w_]*$") then
+ werror("bad type name `"..name.."'")
+ end
+ local tp = map_type[name]
+ if tp then
+ werror("duplicate type `"..name.."'")
+ end
+ -- Add #type to defines. A bit unclean to put it in map_archdef.
+ map_archdef["#"..name] = "sizeof("..ctype..")"
+ -- Add new type and emit shortcut define.
+ local num = ctypenum + 1
+ map_type[name] = {
+ ctype = ctype,
+ ctypefmt = format("Dt%X(%%s)", num),
+ reg = reg,
+ }
+ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+ ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+ local t = {}
+ for name in pairs(map_type) do t[#t+1] = name end
+ sort(t)
+ out:write("Type definitions:\n")
+ for _,name in ipairs(t) do
+ local tp = map_type[name]
+ local reg = tp.reg or ""
+ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
+ end
+ out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+ waction("SECTION", num)
+ wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+ out:write(format("DynASM %s version %s, released %s\n\n",
+ _info.arch, _info.version, _info.release))
+ dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+ dumptypes(out, lvl)
+ dumpglobals(out, lvl)
+ dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+ wline, werror, wfatal, wwarn = wl, we, wf, ww
+ return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+ g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+ setmetatable(map_op, { __index = function(t, k)
+ local v = map_coreop[k]
+ if v then return v end
+ local cc = sub(k, -4, -3)
+ local cv = map_cond[cc]
+ if cv then
+ local v = rawget(t, sub(k, 1, -5)..sub(k, -2))
+ if v then return format("%x%s", cv, sub(v, 2)) end
+ end
+ end })
+ setmetatable(map_def, { __index = map_archdef })
+ return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+
diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h
new file mode 100644
index 0000000..e928ffe
--- /dev/null
+++ b/dynasm/dasm_ppc.h
@@ -0,0 +1,408 @@
+/*
+** DynASM PPC encoding engine.
+** Copyright (C) 2005-2011 Mike Pall. All rights reserved.
+** Released under the MIT/X license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH "ppc"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d) 0
+#endif
+
+/* Action definitions. */
+enum {
+ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
+ /* The following actions need a buffer position. */
+ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+ /* The following actions also have an argument. */
+ DASM_REL_PC, DASM_LABEL_PC, DASM_IMM,
+ DASM__MAX
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS 25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK 0x00000000
+#define DASM_S_NOMEM 0x01000000
+#define DASM_S_PHASE 0x02000000
+#define DASM_S_MATCH_SEC 0x03000000
+#define DASM_S_RANGE_I 0x11000000
+#define DASM_S_RANGE_SEC 0x12000000
+#define DASM_S_RANGE_LG 0x13000000
+#define DASM_S_RANGE_PC 0x14000000
+#define DASM_S_RANGE_REL 0x15000000
+#define DASM_S_UNDEF_LG 0x21000000
+#define DASM_S_UNDEF_PC 0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
+#define DASM_SEC2POS(sec) ((sec)<<24)
+#define DASM_POS2SEC(pos) ((pos)>>24)
+#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned int *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+ int *rbuf; /* Biased buffer pointer (negative section bias). */
+ int *buf; /* True buffer pointer. */
+ size_t bsize; /* Buffer size in bytes. */
+ int pos; /* Biased buffer position. */
+ int epos; /* End of biased buffer position - max single put. */
+ int ofs; /* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+ size_t psize; /* Allocated size of this structure. */
+ dasm_ActList actionlist; /* Current actionlist pointer. */
+ int *lglabels; /* Local/global chain/pos ptrs. */
+ size_t lgsize;
+ int *pclabels; /* PC label chains/pos ptrs. */
+ size_t pcsize;
+ void **globals; /* Array of globals (bias -10). */
+ dasm_Section *section; /* Pointer to active section. */
+ size_t codesize; /* Total size of all code sections. */
+ int maxsection; /* 0 <= sectionidx < maxsection. */
+ int status; /* Status code. */
+ dasm_Section sections[1]; /* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+ dasm_State *D;
+ size_t psz = 0;
+ int i;
+ Dst_REF = NULL;
+ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+ D = Dst_REF;
+ D->psize = psz;
+ D->lglabels = NULL;
+ D->lgsize = 0;
+ D->pclabels = NULL;
+ D->pcsize = 0;
+ D->globals = NULL;
+ D->maxsection = maxsection;
+ for (i = 0; i < maxsection; i++) {
+ D->sections[i].buf = NULL; /* Need this for pass3. */
+ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+ D->sections[i].bsize = 0;
+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
+ }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ for (i = 0; i < D->maxsection; i++)
+ if (D->sections[i].buf)
+ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+ DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+ dasm_State *D = Dst_REF;
+ D->globals = gl - 10; /* Negative bias to compensate for locals. */
+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+ dasm_State *D = Dst_REF;
+ size_t osz = D->pcsize;
+ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ D->actionlist = (dasm_ActList)actionlist;
+ D->status = DASM_S_OK;
+ D->section = &D->sections[0];
+ memset((void *)D->lglabels, 0, D->lgsize);
+ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+ for (i = 0; i < D->maxsection; i++) {
+ D->sections[i].pos = DASM_SEC2POS(i);
+ D->sections[i].ofs = 0;
+ }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+ do { if (!(x)) { \
+ D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+ D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st) ((void)0)
+#define CKPL(kind, st) ((void)0)
+#endif
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+ va_list ap;
+ dasm_State *D = Dst_REF;
+ dasm_ActList p = D->actionlist + start;
+ dasm_Section *sec = D->section;
+ int pos = sec->pos, ofs = sec->ofs;
+ int *b;
+
+ if (pos >= sec->epos) {
+ DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+ }
+
+ b = sec->rbuf;
+ b[pos++] = start;
+
+ va_start(ap, start);
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16);
+ if (action >= DASM__MAX) {
+ ofs += 4;
+ } else {
+ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+ switch (action) {
+ case DASM_STOP: goto stop;
+ case DASM_SECTION:
+ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+ D->section = &D->sections[n]; goto stop;
+ case DASM_ESC: p++; ofs += 4; break;
+ case DASM_REL_EXT: break;
+ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+ case DASM_REL_LG:
+ n = (ins & 2047) - 10; pl = D->lglabels + n;
+ if (n >= 0) { CKPL(lg, LG); goto putrel; } /* Bkwd rel or global. */
+ pl += 10; n = *pl;
+ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
+ goto linkrel;
+ case DASM_REL_PC:
+ pl = D->pclabels + n; CKPL(pc, PC);
+ putrel:
+ n = *pl;
+ if (n < 0) { /* Label exists. Get label pos and store it. */
+ b[pos] = -n;
+ } else {
+ linkrel:
+ b[pos] = n; /* Else link to rel chain, anchored at label. */
+ *pl = pos;
+ }
+ pos++;
+ break;
+ case DASM_LABEL_LG:
+ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+ case DASM_LABEL_PC:
+ pl = D->pclabels + n; CKPL(pc, PC);
+ putlabel:
+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
+ }
+ *pl = -pos; /* Label exists now. */
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_IMM:
+#ifdef DASM_CHECKS
+ CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
+ if (ins & 0x8000)
+ CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
+ else
+ CK((n>>((ins>>5)&31)) == 0, RANGE_I);
+#endif
+ b[pos++] = n;
+ break;
+ }
+ }
+ }
+stop:
+ va_end(ap);
+ sec->pos = pos;
+ sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+ dasm_State *D = Dst_REF;
+ int secnum;
+ int ofs = 0;
+
+#ifdef DASM_CHECKS
+ *szp = 0;
+ if (D->status != DASM_S_OK) return D->status;
+ {
+ int pc;
+ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+ }
+#endif
+
+ { /* Handle globals not defined in this translation unit. */
+ int idx;
+ for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+ }
+ }
+
+ /* Combine all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->rbuf;
+ int pos = DASM_SEC2POS(secnum);
+ int lastpos = sec->pos;
+
+ while (pos != lastpos) {
+ dasm_ActList p = D->actionlist + b[pos++];
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16);
+ switch (action) {
+ case DASM_STOP: case DASM_SECTION: goto stop;
+ case DASM_ESC: p++; break;
+ case DASM_REL_EXT: break;
+ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+ case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+ case DASM_IMM: pos++; break;
+ }
+ }
+ stop: (void)0;
+ }
+ ofs += sec->ofs; /* Next section starts right after current section. */
+ }
+
+ D->codesize = ofs; /* Total size of all code sections */
+ *szp = ofs;
+ return DASM_S_OK;
+}
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+ do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+#else
+#define CK(x, st) ((void)0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+ dasm_State *D = Dst_REF;
+ char *base = (char *)buffer;
+ unsigned int *cp = (unsigned int *)buffer;
+ int secnum;
+
+ /* Encode all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->buf;
+ int *endb = sec->rbuf + sec->pos;
+
+ while (b != endb) {
+ dasm_ActList p = D->actionlist + *b++;
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16);
+ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+ switch (action) {
+ case DASM_STOP: case DASM_SECTION: goto stop;
+ case DASM_ESC: *cp++ = *p++; break;
+ case DASM_REL_EXT:
+ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1);
+ goto patchrel;
+ case DASM_ALIGN:
+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
+ break;
+ case DASM_REL_LG:
+ CK(n >= 0, UNDEF_LG);
+ case DASM_REL_PC:
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
+ patchrel:
+ CK((n & 3) == 0 &&
+ (((n+4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
+ ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
+ cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc));
+ break;
+ case DASM_LABEL_LG:
+ ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+ break;
+ case DASM_LABEL_PC: break;
+ case DASM_IMM:
+ cp[-1] |= ((n>>((ins>>10)&31)) & ((1<<((ins>>5)&31))-1)) << (ins&31);
+ break;
+ default: *cp++ = ins; break;
+ }
+ }
+ stop: (void)0;
+ }
+ }
+
+ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
+ return DASM_S_PHASE;
+ return DASM_S_OK;
+}
+#undef CK
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+ dasm_State *D = Dst_REF;
+ if (pc*sizeof(int) < D->pcsize) {
+ int pos = D->pclabels[pc];
+ if (pos < 0) return *DASM_POS2PTR(D, -pos);
+ if (pos > 0) return -1; /* Undefined. */
+ }
+ return -2; /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+ dasm_State *D = Dst_REF;
+ if (D->status == DASM_S_OK) {
+ int i;
+ for (i = 1; i <= 9; i++) {
+ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
+ D->lglabels[i] = 0;
+ }
+ }
+ if (D->status == DASM_S_OK && secmatch >= 0 &&
+ D->section != &D->sections[secmatch])
+ D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
+ return D->status;
+}
+#endif
+
diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua
new file mode 100644
index 0000000..01376f7
--- /dev/null
+++ b/dynasm/dasm_ppc.lua
@@ -0,0 +1,1225 @@
+------------------------------------------------------------------------------
+-- DynASM PPC module.
+--
+-- Copyright (C) 2005-2011 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Module information:
+local _info = {
+ arch = "ppc",
+ description = "DynASM PPC module",
+ version = "1.2.2",
+ vernum = 10202,
+ release = "2011-01-09",
+ author = "Mike Pall",
+ license = "MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable = assert, setmetatable
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local match, gmatch = _s.match, _s.gmatch
+local concat, sort = table.concat, table.sort
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+ "STOP", "SECTION", "ESC", "REL_EXT",
+ "ALIGN", "REL_LG", "LABEL_LG",
+ "REL_PC", "LABEL_PC", "IMM",
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number.
+local map_action = {}
+for n,name in ipairs(action_names) do
+ map_action[name] = n-1
+end
+
+-- Action list buffer.
+local actlist = {}
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Return 8 digit hex number.
+local function tohex(x)
+ return sub(format("%08x", x), -8) -- Avoid 64 bit portability problem in Lua.
+end
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+ out:write("DynASM encoding engine action codes:\n")
+ for n,name in ipairs(action_names) do
+ local num = map_action[name]
+ out:write(format(" %-10s %02X %d\n", name, num, num))
+ end
+ out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+ local nn = #actlist
+ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+ out:write("static const unsigned int ", name, "[", nn, "] = {\n")
+ for i = 1,nn-1 do
+ assert(out:write("0x", tohex(actlist[i]), ",\n"))
+ end
+ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
+end
+
+------------------------------------------------------------------------------
+
+-- Add word to action list.
+local function wputxw(n)
+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+ actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+ local w = assert(map_action[action], "bad action name `"..action.."'")
+ wputxw(w * 0x10000 + (val or 0))
+ if a then actargs[#actargs+1] = a end
+ if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+ if #actlist == actargs[1] then return end -- Nothing to flush.
+ if not term then waction("STOP") end -- Terminate action list.
+ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+ secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped word.
+local function wputw(n)
+ if n <= 0xffffff then waction("ESC") end
+ wputxw(n)
+end
+
+-- Reserve position for word.
+local function wpos()
+ local pos = #actlist+1
+ actlist[pos] = ""
+ return pos
+end
+
+-- Store word to reserved position.
+local function wputpos(pos, n)
+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+ actlist[pos] = n
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+ local n = next_global
+ if n > 2047 then werror("too many global labels") end
+ next_global = n + 1
+ t[name] = n
+ return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("Global labels:\n")
+ for i=20,next_global-1 do
+ out:write(format(" %s\n", t[i]))
+ end
+ out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("enum {\n")
+ for i=20,next_global-1 do
+ out:write(" ", prefix, t[i], ",\n")
+ end
+ out:write(" ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=20,next_global-1 do
+ out:write(" \"", t[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+ -- No restrictions on the name for now.
+ local n = next_extern
+ if n > 2047 then werror("too many extern labels") end
+ next_extern = n + 1
+ t[name] = n
+ map_extern_[n] = name
+ return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+ out:write("Extern labels:\n")
+ for i=0,next_extern-1 do
+ out:write(format(" %s\n", map_extern_[i]))
+ end
+ out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=0,next_extern-1 do
+ out:write(" \"", map_extern_[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+local map_archdef = { sp = "r1" } -- Ext. register name -> int. name.
+
+local map_type = {} -- Type name -> { ctype, reg }
+local ctypenum = 0 -- Type number (for Dt... macros).
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+ if s == "r1" then return "sp" end
+ return s
+end
+
+local map_cond = {
+ lt = 0, gt = 1, eq = 2, so = 3,
+ ge = 4, le = 5, ne = 6, ns = 7,
+}
+
+------------------------------------------------------------------------------
+
+-- Template strings for PPC instructions.
+local map_op = {
+ tdi_3 = "08000000ARI",
+ twi_3 = "0c000000ARI",
+ mulli_3 = "1c000000RRI",
+ subfic_3 = "20000000RRI",
+ cmplwi_3 = "28000000XRU",
+ cmplwi_2 = "28000000-RU",
+ cmpldi_3 = "28200000XRU",
+ cmpldi_2 = "28200000-RU",
+ cmpwi_3 = "2c000000XRI",
+ cmpwi_2 = "2c000000-RI",
+ cmpdi_3 = "2c200000XRI",
+ cmpdi_2 = "2c200000-RI",
+ addic_3 = "30000000RRI",
+ ["addic._3"] = "34000000RRI",
+ addi_3 = "38000000RR0I",
+ li_2 = "38000000RI",
+ la_2 = "38000000RD",
+ addis_3 = "3c000000RR0I",
+ lis_2 = "3c000000RI",
+ lus_2 = "3c000000RU",
+ bc_3 = "40000000AAK",
+ bcl_3 = "40000001AAK",
+ bdnz_1 = "42000000K",
+ bdz_1 = "42400000K",
+ sc_0 = "44000000",
+ b_1 = "48000000J",
+ bl_1 = "48000001J",
+ rlwimi_5 = "50000000RR~AAA.",
+ rlwinm_5 = "54000000RR~AAA.",
+ rlwnm_5 = "5c000000RR~RAA.",
+ ori_3 = "60000000RR~U",
+ nop_0 = "60000000",
+ oris_3 = "64000000RR~U",
+ xori_3 = "68000000RR~U",
+ xoris_3 = "6c000000RR~U",
+ ["andi._3"] = "70000000RR~U",
+ ["andis._3"] = "74000000RR~U",
+ lwz_2 = "80000000RD",
+ lwzu_2 = "84000000RD",
+ lbz_2 = "88000000RD",
+ lbzu_2 = "8c000000RD",
+ stw_2 = "90000000RD",
+ stwu_2 = "94000000RD",
+ stb_2 = "98000000RD",
+ stbu_2 = "9c000000RD",
+ lhz_2 = "a0000000RD",
+ lhzu_2 = "a4000000RD",
+ lha_2 = "a8000000RD",
+ lhau_2 = "ac000000RD",
+ sth_2 = "b0000000RD",
+ sthu_2 = "b4000000RD",
+ lmw_2 = "b8000000RD",
+ stmw_2 = "bc000000RD",
+ lfs_2 = "c0000000FD",
+ lfsu_2 = "c4000000FD",
+ lfd_2 = "c8000000FD",
+ lfdu_2 = "cc000000FD",
+ stfs_2 = "d0000000FD",
+ stfsu_2 = "d4000000FD",
+ stfd_2 = "d8000000FD",
+ stfdu_2 = "dc000000FD",
+ ld_2 = "e8000000RD", -- NYI: displacement must be divisible by 4.
+ ldu_2 = "e8000001RD",
+ lwa_2 = "e8000002RD",
+ std_2 = "f8000000RD",
+ stdu_2 = "f8000001RD",
+
+ -- Primary opcode 19:
+ mcrf_2 = "4c000000XX",
+ isync_0 = "4c00012c",
+ crnor_3 = "4c000042CCC",
+ crnot_2 = "4c000042CC=",
+ crandc_3 = "4c000102CCC",
+ crxor_3 = "4c000182CCC",
+ crclr_1 = "4c000182C==",
+ crnand_3 = "4c0001c2CCC",
+ crand_3 = "4c000202CCC",
+ creqv_3 = "4c000242CCC",
+ crset_1 = "4c000242C==",
+ crorc_3 = "4c000342CCC",
+ cror_3 = "4c000382CCC",
+ crmove_2 = "4c000382CC=",
+ bclr_2 = "4c000020AA",
+ bclrl_2 = "4c000021AA",
+ bcctr_2 = "4c000420AA",
+ bcctrl_2 = "4c000421AA",
+ blr_0 = "4e800020",
+ blrl_0 = "4e800021",
+ bctr_0 = "4e800420",
+ bctrl_0 = "4e800421",
+
+ -- Primary opcode 31:
+ cmpw_3 = "7c000000XRR",
+ cmpw_2 = "7c000000-RR",
+ cmpd_3 = "7c200000XRR",
+ cmpd_2 = "7c200000-RR",
+ tw_3 = "7c000008ARR",
+ subfc_3 = "7c000010RRR.",
+ subc_3 = "7c000010RRR~.",
+ mulhdu_3 = "7c000012RRR.",
+ addc_3 = "7c000014RRR.",
+ mulhwu_3 = "7c000016RRR.",
+ isel_4 = "7c00001eRRRC",
+ isellt_3 = "7c00001eRRR",
+ iselgt_3 = "7c00005eRRR",
+ iseleq_3 = "7c00009eRRR",
+ mfcr_1 = "7c000026R",
+ -- NYI: mtcrf, mtocrf, mfocrf
+ lwarx_3 = "7c000028RR0R",
+ ldx_3 = "7c00002aRR0R",
+ lwzx_3 = "7c00002eRR0R",
+ slw_3 = "7c000030RR~R.",
+ cntlzw_2 = "7c000034RR~",
+ sld_3 = "7c000036RR~R.",
+ and_3 = "7c000038RR~R.",
+ cmplw_3 = "7c000040XRR",
+ cmplw_2 = "7c000040-RR",
+ cmpld_3 = "7c200040XRR",
+ cmpld_2 = "7c200040-RR",
+ subf_3 = "7c000050RRR.",
+ sub_3 = "7c000050RRR~.",
+ ldux_3 = "7c00006aRR0R",
+ dcbst_2 = "7c00006c-RR",
+ lwzux_3 = "7c00006eRR0R",
+ cntlzd_2 = "7c000074RR~",
+ andc_3 = "7c000078RR~R.",
+ td_3 = "7c000088ARR",
+ mulhd_3 = "7c000092RRR.",
+ mulhw_3 = "7c000096RRR.",
+ ldarx_3 = "7c0000a8RR0R",
+ dcbf_2 = "7c0000ac-RR",
+ lbzx_3 = "7c0000aeRR0R",
+ neg_2 = "7c0000d0RR.",
+ lbzux_3 = "7c0000eeRR0R",
+ popcntb_2 = "7c0000f4RR~",
+ not_2 = "7c0000f8RR~%.",
+ nor_3 = "7c0000f8RR~R.",
+ subfe_3 = "7c000110RRR.",
+ sube_3 = "7c000110RRR~.",
+ adde_3 = "7c000114RRR.",
+ stdx_3 = "7c00012aRR0R",
+ stwcx_3 = "7c00012cRR0R.",
+ stwx_3 = "7c00012eRR0R",
+ prtyw_2 = "7c000134RR~",
+ stdux_3 = "7c00016aRR0R",
+ stwux_3 = "7c00016eRR0R",
+ prtyd_2 = "7c000174RR~",
+ subfze_2 = "7c000190RR.",
+ addze_2 = "7c000194RR.",
+ stdcx_3 = "7c0001acRR0R.",
+ stbx_3 = "7c0001aeRR0R",
+ subfme_2 = "7c0001d0RR.",
+ mulld_3 = "7c0001d2RRR.",
+ addme_2 = "7c0001d4RR.",
+ mullw_3 = "7c0001d6RRR.",
+ dcbtst_2 = "7c0001ec-RR",
+ stbux_3 = "7c0001eeRR0R",
+ add_3 = "7c000214RRR.",
+ dcbt_2 = "7c00022c-RR",
+ lhzx_3 = "7c00022eRR0R",
+ eqv_3 = "7c000238RR~R.",
+ eciwx_3 = "7c00026cRR0R",
+ lhzux_3 = "7c00026eRR0R",
+ xor_3 = "7c000278RR~R.",
+ mfspefscr_1 = "7c0082a6R",
+ mfxer_1 = "7c0102a6R",
+ mflr_1 = "7c0802a6R",
+ mfctr_1 = "7c0902a6R",
+ lwax_3 = "7c0002aaRR0R",
+ lhax_3 = "7c0002aeRR0R",
+ mftb_1 = "7c0c42e6R",
+ mftbu_1 = "7c0d42e6R",
+ lwaux_3 = "7c0002eaRR0R",
+ lhaux_3 = "7c0002eeRR0R",
+ sthx_3 = "7c00032eRR0R",
+ orc_3 = "7c000338RR~R.",
+ ecowx_3 = "7c00036cRR0R",
+ sthux_3 = "7c00036eRR0R",
+ or_3 = "7c000378RR~R.",
+ mr_2 = "7c000378RR~%.",
+ divdu_3 = "7c000392RRR.",
+ divwu_3 = "7c000396RRR.",
+ mtspefscr_1 = "7c0083a6R",
+ mtxer_1 = "7c0103a6R",
+ mtlr_1 = "7c0803a6R",
+ mtctr_1 = "7c0903a6R",
+ dcbi_2 = "7c0003ac-RR",
+ nand_3 = "7c0003b8RR~R.",
+ divd_3 = "7c0003d2RRR.",
+ divw_3 = "7c0003d6RRR.",
+ cmpb_3 = "7c0003f8RR~R.",
+ mcrxr_1 = "7c000400X",
+ subfco_3 = "7c000410RRR.",
+ subco_3 = "7c000410RRR~.",
+ addco_3 = "7c000414RRR.",
+ ldbrx_3 = "7c000428RR0R",
+ lswx_3 = "7c00042aRR0R",
+ lwbrx_3 = "7c00042cRR0R",
+ lfsx_3 = "7c00042eFR0R",
+ srw_3 = "7c000430RR~R.",
+ srd_3 = "7c000436RR~R.",
+ subfo_3 = "7c000450RRR.",
+ subo_3 = "7c000450RRR~.",
+ lfsux_3 = "7c00046eFR0R",
+ lswi_3 = "7c0004aaRR0A",
+ sync_0 = "7c0004ac",
+ lwsync_0 = "7c2004ac",
+ ptesync_0 = "7c4004ac",
+ lfdx_3 = "7c0004aeFR0R",
+ nego_2 = "7c0004d0RR.",
+ lfdux_3 = "7c0004eeFR0R",
+ subfeo_3 = "7c000510RRR.",
+ subeo_3 = "7c000510RRR~.",
+ addeo_3 = "7c000514RRR.",
+ stdbrx_3 = "7c000528RR0R",
+ stswx_3 = "7c00052aRR0R",
+ stwbrx_3 = "7c00052cRR0R",
+ stfsx_3 = "7c00052eFR0R",
+ stfsux_3 = "7c00056eFR0R",
+ subfzeo_2 = "7c000590RR.",
+ addzeo_2 = "7c000594RR.",
+ stswi_3 = "7c0005aaRR0A",
+ stfdx_3 = "7c0005aeFR0R",
+ subfmeo_2 = "7c0005d0RR.",
+ mulldo_3 = "7c0005d2RRR.",
+ addmeo_2 = "7c0005d4RR.",
+ mullwo_3 = "7c0005d6RRR.",
+ dcba_2 = "7c0005ec-RR",
+ stfdux_3 = "7c0005eeFR0R",
+ addo_3 = "7c000614RRR.",
+ lhbrx_3 = "7c00062cRR0R",
+ sraw_3 = "7c000630RR~R.",
+ srad_3 = "7c000634RR~R.",
+ srawi_3 = "7c000670RR~A.",
+ eieio_0 = "7c0006ac",
+ lfiwax_3 = "7c0006aeFR0R",
+ sthbrx_3 = "7c00072cRR0R",
+ extsh_2 = "7c000734RR~.",
+ extsb_2 = "7c000774RR~.",
+ divduo_3 = "7c000792RRR.",
+ divwou_3 = "7c000796RRR.",
+ icbi_2 = "7c0007ac-RR",
+ stfiwx_3 = "7c0007aeFR0R",
+ extsw_2 = "7c0007b4RR~.",
+ divdo_3 = "7c0007d2RRR.",
+ divwo_3 = "7c0007d6RRR.",
+ dcbz_2 = "7c0007ec-RR",
+
+ -- Primary opcode 59:
+ fdivs_3 = "ec000024FFF.",
+ fsubs_3 = "ec000028FFF.",
+ fadds_3 = "ec00002aFFF.",
+ fsqrts_2 = "ec00002cF-F.",
+ fres_2 = "ec000030F-F.",
+ fmuls_3 = "ec000032FF-F.",
+ frsqrtes_2 = "ec000034F-F.",
+ fmsubs_4 = "ec000038FFFF~.",
+ fmadds_4 = "ec00003aFFFF~.",
+ fnmsubs_4 = "ec00003cFFFF~.",
+ fnmadds_4 = "ec00003eFFFF~.",
+
+ -- Primary opcode 63:
+ fdiv_3 = "fc000024FFF.",
+ fsub_3 = "fc000028FFF.",
+ fadd_3 = "fc00002aFFF.",
+ fsqrt_2 = "fc00002cF-F.",
+ fsel_4 = "fc00002eFFFF~.",
+ fre_2 = "fc000030F-F.",
+ fmul_3 = "fc000032FF-F.",
+ frsqrte_2 = "fc000034F-F.",
+ fmsub_4 = "fc000038FFFF~.",
+ fmadd_4 = "fc00003aFFFF~.",
+ fnmsub_4 = "fc00003cFFFF~.",
+ fnmadd_4 = "fc00003eFFFF~.",
+ fcmpu_3 = "fc000000XFF",
+ fcpsgn_3 = "fc000010FFF.",
+ fcmpo_3 = "fc000040XFF",
+ mtfsb1_1 = "fc00004cA",
+ fneg_2 = "fc000050F-F.",
+ mcrfs_2 = "fc000080XX",
+ mtfsb0_1 = "fc00008cA",
+ fmr_2 = "fc000090F-F.",
+ frsp_2 = "fc000018F-F.",
+ fctiw_2 = "fc00001cF-F.",
+ fctiwz_2 = "fc00001eF-F.",
+ mtfsfi_2 = "fc00010cAA", -- NYI: upshift.
+ fnabs_2 = "fc000110F-F.",
+ fabs_2 = "fc000210F-F.",
+ frin_2 = "fc000310F-F.",
+ friz_2 = "fc000350F-F.",
+ frip_2 = "fc000390F-F.",
+ frim_2 = "fc0003d0F-F.",
+ mffs_1 = "fc00048eF.",
+ mtfsf_1 = "fc00058eF.",
+ fctid_2 = "fc00065cF-F.",
+ fctidz_2 = "fc00065eF-F.",
+ fcfid_2 = "fc00069cF-F.",
+
+ -- Primary opcode 4, SPE APU extension:
+ evaddw_3 = "10000200RRR",
+ evaddiw_3 = "10000202RAR~",
+ evsubw_3 = "10000204RRR~",
+ evsubiw_3 = "10000206RAR~",
+ evabs_2 = "10000208RR",
+ evneg_2 = "10000209RR",
+ evextsb_2 = "1000020aRR",
+ evextsh_2 = "1000020bRR",
+ evrndw_2 = "1000020cRR",
+ evcntlzw_2 = "1000020dRR",
+ evcntlsw_2 = "1000020eRR",
+ brinc_3 = "1000020fRRR",
+ evand_3 = "10000211RRR",
+ evandc_3 = "10000212RRR",
+ evxor_3 = "10000216RRR",
+ evor_3 = "10000217RRR",
+ evmr_2 = "10000217RR=",
+ evnor_3 = "10000218RRR",
+ evnot_2 = "10000218RR=",
+ eveqv_3 = "10000219RRR",
+ evorc_3 = "1000021bRRR",
+ evnand_3 = "1000021eRRR",
+ evsrwu_3 = "10000220RRR",
+ evsrws_3 = "10000221RRR",
+ evsrwiu_3 = "10000222RRA",
+ evsrwis_3 = "10000223RRA",
+ evslw_3 = "10000224RRR",
+ evslwi_3 = "10000226RRA",
+ evrlw_3 = "10000228RRR",
+ evsplati_2 = "10000229RS",
+ evrlwi_3 = "1000022aRRA",
+ evsplatfi_2 = "1000022bRS",
+ evmergehi_3 = "1000022cRRR",
+ evmergelo_3 = "1000022dRRR",
+ evcmpgtu_3 = "10000230XRR",
+ evcmpgtu_2 = "10000230-RR",
+ evcmpgts_3 = "10000231XRR",
+ evcmpgts_2 = "10000231-RR",
+ evcmpltu_3 = "10000232XRR",
+ evcmpltu_2 = "10000232-RR",
+ evcmplts_3 = "10000233XRR",
+ evcmplts_2 = "10000233-RR",
+ evcmpeq_3 = "10000234XRR",
+ evcmpeq_2 = "10000234-RR",
+ evsel_4 = "10000278RRRW",
+ evsel_3 = "10000278RRR",
+ evfsadd_3 = "10000280RRR",
+ evfssub_3 = "10000281RRR",
+ evfsabs_2 = "10000284RR",
+ evfsnabs_2 = "10000285RR",
+ evfsneg_2 = "10000286RR",
+ evfsmul_3 = "10000288RRR",
+ evfsdiv_3 = "10000289RRR",
+ evfscmpgt_3 = "1000028cXRR",
+ evfscmpgt_2 = "1000028c-RR",
+ evfscmplt_3 = "1000028dXRR",
+ evfscmplt_2 = "1000028d-RR",
+ evfscmpeq_3 = "1000028eXRR",
+ evfscmpeq_2 = "1000028e-RR",
+ evfscfui_2 = "10000290R-R",
+ evfscfsi_2 = "10000291R-R",
+ evfscfuf_2 = "10000292R-R",
+ evfscfsf_2 = "10000293R-R",
+ evfsctui_2 = "10000294R-R",
+ evfsctsi_2 = "10000295R-R",
+ evfsctuf_2 = "10000296R-R",
+ evfsctsf_2 = "10000297R-R",
+ evfsctuiz_2 = "10000298R-R",
+ evfsctsiz_2 = "1000029aR-R",
+ evfststgt_3 = "1000029cXRR",
+ evfststgt_2 = "1000029c-RR",
+ evfststlt_3 = "1000029dXRR",
+ evfststlt_2 = "1000029d-RR",
+ evfststeq_3 = "1000029eXRR",
+ evfststeq_2 = "1000029e-RR",
+ efsadd_3 = "100002c0RRR",
+ efssub_3 = "100002c1RRR",
+ efsabs_2 = "100002c4RR",
+ efsnabs_2 = "100002c5RR",
+ efsneg_2 = "100002c6RR",
+ efsmul_3 = "100002c8RRR",
+ efsdiv_3 = "100002c9RRR",
+ efscmpgt_3 = "100002ccXRR",
+ efscmpgt_2 = "100002cc-RR",
+ efscmplt_3 = "100002cdXRR",
+ efscmplt_2 = "100002cd-RR",
+ efscmpeq_3 = "100002ceXRR",
+ efscmpeq_2 = "100002ce-RR",
+ efscfd_2 = "100002cfR-R",
+ efscfui_2 = "100002d0R-R",
+ efscfsi_2 = "100002d1R-R",
+ efscfuf_2 = "100002d2R-R",
+ efscfsf_2 = "100002d3R-R",
+ efsctui_2 = "100002d4R-R",
+ efsctsi_2 = "100002d5R-R",
+ efsctuf_2 = "100002d6R-R",
+ efsctsf_2 = "100002d7R-R",
+ efsctuiz_2 = "100002d8R-R",
+ efsctsiz_2 = "100002daR-R",
+ efststgt_3 = "100002dcXRR",
+ efststgt_2 = "100002dc-RR",
+ efststlt_3 = "100002ddXRR",
+ efststlt_2 = "100002dd-RR",
+ efststeq_3 = "100002deXRR",
+ efststeq_2 = "100002de-RR",
+ efdadd_3 = "100002e0RRR",
+ efdsub_3 = "100002e1RRR",
+ efdcfuid_2 = "100002e2R-R",
+ efdcfsid_2 = "100002e3R-R",
+ efdabs_2 = "100002e4RR",
+ efdnabs_2 = "100002e5RR",
+ efdneg_2 = "100002e6RR",
+ efdmul_3 = "100002e8RRR",
+ efddiv_3 = "100002e9RRR",
+ efdctuidz_2 = "100002eaR-R",
+ efdctsidz_2 = "100002ebR-R",
+ efdcmpgt_3 = "100002ecXRR",
+ efdcmpgt_2 = "100002ec-RR",
+ efdcmplt_3 = "100002edXRR",
+ efdcmplt_2 = "100002ed-RR",
+ efdcmpeq_3 = "100002eeXRR",
+ efdcmpeq_2 = "100002ee-RR",
+ efdcfs_2 = "100002efR-R",
+ efdcfui_2 = "100002f0R-R",
+ efdcfsi_2 = "100002f1R-R",
+ efdcfuf_2 = "100002f2R-R",
+ efdcfsf_2 = "100002f3R-R",
+ efdctui_2 = "100002f4R-R",
+ efdctsi_2 = "100002f5R-R",
+ efdctuf_2 = "100002f6R-R",
+ efdctsf_2 = "100002f7R-R",
+ efdctuiz_2 = "100002f8R-R",
+ efdctsiz_2 = "100002faR-R",
+ efdtstgt_3 = "100002fcXRR",
+ efdtstgt_2 = "100002fc-RR",
+ efdtstlt_3 = "100002fdXRR",
+ efdtstlt_2 = "100002fd-RR",
+ efdtsteq_3 = "100002feXRR",
+ efdtsteq_2 = "100002fe-RR",
+ evlddx_3 = "10000300RR0R",
+ evldd_2 = "10000301R8",
+ evldwx_3 = "10000302RR0R",
+ evldw_2 = "10000303R8",
+ evldhx_3 = "10000304RR0R",
+ evldh_2 = "10000305R8",
+ evlwhex_3 = "10000310RR0R",
+ evlwhe_2 = "10000311R4",
+ evlwhoux_3 = "10000314RR0R",
+ evlwhou_2 = "10000315R4",
+ evlwhosx_3 = "10000316RR0R",
+ evlwhos_2 = "10000317R4",
+ evstddx_3 = "10000320RR0R",
+ evstdd_2 = "10000321R8",
+ evstdwx_3 = "10000322RR0R",
+ evstdw_2 = "10000323R8",
+ evstdhx_3 = "10000324RR0R",
+ evstdh_2 = "10000325R8",
+ evstwhex_3 = "10000330RR0R",
+ evstwhe_2 = "10000331R4",
+ evstwhox_3 = "10000334RR0R",
+ evstwho_2 = "10000335R4",
+ evstwwex_3 = "10000338RR0R",
+ evstwwe_2 = "10000339R4",
+ evstwwox_3 = "1000033cRR0R",
+ evstwwo_2 = "1000033dR4",
+ evmhessf_3 = "10000403RRR",
+ evmhossf_3 = "10000407RRR",
+ evmheumi_3 = "10000408RRR",
+ evmhesmi_3 = "10000409RRR",
+ evmhesmf_3 = "1000040bRRR",
+ evmhoumi_3 = "1000040cRRR",
+ evmhosmi_3 = "1000040dRRR",
+ evmhosmf_3 = "1000040fRRR",
+ evmhessfa_3 = "10000423RRR",
+ evmhossfa_3 = "10000427RRR",
+ evmheumia_3 = "10000428RRR",
+ evmhesmia_3 = "10000429RRR",
+ evmhesmfa_3 = "1000042bRRR",
+ evmhoumia_3 = "1000042cRRR",
+ evmhosmia_3 = "1000042dRRR",
+ evmhosmfa_3 = "1000042fRRR",
+ evmwhssf_3 = "10000447RRR",
+ evmwlumi_3 = "10000448RRR",
+ evmwhumi_3 = "1000044cRRR",
+ evmwhsmi_3 = "1000044dRRR",
+ evmwhsmf_3 = "1000044fRRR",
+ evmwssf_3 = "10000453RRR",
+ evmwumi_3 = "10000458RRR",
+ evmwsmi_3 = "10000459RRR",
+ evmwsmf_3 = "1000045bRRR",
+ evmwhssfa_3 = "10000467RRR",
+ evmwlumia_3 = "10000468RRR",
+ evmwhumia_3 = "1000046cRRR",
+ evmwhsmia_3 = "1000046dRRR",
+ evmwhsmfa_3 = "1000046fRRR",
+ evmwssfa_3 = "10000473RRR",
+ evmwumia_3 = "10000478RRR",
+ evmwsmia_3 = "10000479RRR",
+ evmwsmfa_3 = "1000047bRRR",
+ evmra_2 = "100004c4RR",
+ evdivws_3 = "100004c6RRR",
+ evdivwu_3 = "100004c7RRR",
+ evmwssfaa_3 = "10000553RRR",
+ evmwumiaa_3 = "10000558RRR",
+ evmwsmiaa_3 = "10000559RRR",
+ evmwsmfaa_3 = "1000055bRRR",
+ evmwssfan_3 = "100005d3RRR",
+ evmwumian_3 = "100005d8RRR",
+ evmwsmian_3 = "100005d9RRR",
+ evmwsmfan_3 = "100005dbRRR",
+ evmergehilo_3 = "1000022eRRR",
+ evmergelohi_3 = "1000022fRRR",
+ evlhhesplatx_3 = "10000308RR0R",
+ evlhhesplat_2 = "10000309R2",
+ evlhhousplatx_3 = "1000030cRR0R",
+ evlhhousplat_2 = "1000030dR2",
+ evlhhossplatx_3 = "1000030eRR0R",
+ evlhhossplat_2 = "1000030fR2",
+ evlwwsplatx_3 = "10000318RR0R",
+ evlwwsplat_2 = "10000319R4",
+ evlwhsplatx_3 = "1000031cRR0R",
+ evlwhsplat_2 = "1000031dR4",
+ evaddusiaaw_2 = "100004c0RR",
+ evaddssiaaw_2 = "100004c1RR",
+ evsubfusiaaw_2 = "100004c2RR",
+ evsubfssiaaw_2 = "100004c3RR",
+ evaddumiaaw_2 = "100004c8RR",
+ evaddsmiaaw_2 = "100004c9RR",
+ evsubfumiaaw_2 = "100004caRR",
+ evsubfsmiaaw_2 = "100004cbRR",
+ evmheusiaaw_3 = "10000500RRR",
+ evmhessiaaw_3 = "10000501RRR",
+ evmhessfaaw_3 = "10000503RRR",
+ evmhousiaaw_3 = "10000504RRR",
+ evmhossiaaw_3 = "10000505RRR",
+ evmhossfaaw_3 = "10000507RRR",
+ evmheumiaaw_3 = "10000508RRR",
+ evmhesmiaaw_3 = "10000509RRR",
+ evmhesmfaaw_3 = "1000050bRRR",
+ evmhoumiaaw_3 = "1000050cRRR",
+ evmhosmiaaw_3 = "1000050dRRR",
+ evmhosmfaaw_3 = "1000050fRRR",
+ evmhegumiaa_3 = "10000528RRR",
+ evmhegsmiaa_3 = "10000529RRR",
+ evmhegsmfaa_3 = "1000052bRRR",
+ evmhogumiaa_3 = "1000052cRRR",
+ evmhogsmiaa_3 = "1000052dRRR",
+ evmhogsmfaa_3 = "1000052fRRR",
+ evmwlusiaaw_3 = "10000540RRR",
+ evmwlssiaaw_3 = "10000541RRR",
+ evmwlumiaaw_3 = "10000548RRR",
+ evmwlsmiaaw_3 = "10000549RRR",
+ evmheusianw_3 = "10000580RRR",
+ evmhessianw_3 = "10000581RRR",
+ evmhessfanw_3 = "10000583RRR",
+ evmhousianw_3 = "10000584RRR",
+ evmhossianw_3 = "10000585RRR",
+ evmhossfanw_3 = "10000587RRR",
+ evmheumianw_3 = "10000588RRR",
+ evmhesmianw_3 = "10000589RRR",
+ evmhesmfanw_3 = "1000058bRRR",
+ evmhoumianw_3 = "1000058cRRR",
+ evmhosmianw_3 = "1000058dRRR",
+ evmhosmfanw_3 = "1000058fRRR",
+ evmhegumian_3 = "100005a8RRR",
+ evmhegsmian_3 = "100005a9RRR",
+ evmhegsmfan_3 = "100005abRRR",
+ evmhogumian_3 = "100005acRRR",
+ evmhogsmian_3 = "100005adRRR",
+ evmhogsmfan_3 = "100005afRRR",
+ evmwlusianw_3 = "100005c0RRR",
+ evmwlssianw_3 = "100005c1RRR",
+ evmwlumianw_3 = "100005c8RRR",
+ evmwlsmianw_3 = "100005c9RRR",
+
+ -- NYI: some 64 bit PowerPC and Book E instructions:
+ -- rldicl, rldicr, rldic, rldimi, rldcl, rldcr, sradi, 64 bit ext. add/sub,
+ -- extended addressing branches, cache management, loads and stores
+}
+
+-- Add mnemonics for "." variants.
+do
+ local t = {}
+ for k,v in pairs(map_op) do
+ if sub(v, -1) == "." then
+ local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2)
+ t[sub(k, 1, -3).."."..sub(k, -2)] = v2
+ end
+ end
+ for k,v in pairs(t) do
+ map_op[k] = v
+ end
+end
+
+-- Add more branch mnemonics.
+for cond,c in pairs(map_cond) do
+ local b1 = "b"..cond
+ local c1 = (c%4)*0x00010000 + (c < 4 and 0x01000000 or 0)
+ -- bX[l]
+ map_op[b1.."_1"] = tohex(0x40800000 + c1).."K"
+ map_op[b1.."l_1"] = tohex(0x40800001 + c1).."K"
+ map_op[b1.."_2"] = tohex(0x40800000 + c1).."-XK"
+ map_op[b1.."l_2"] = tohex(0x40800001 + c1).."-XK"
+ -- bXlr[l]
+ map_op[b1.."lr_0"] = tohex(0x4c800020 + c1)
+ map_op[b1.."lrl_0"] = tohex(0x4c800021 + c1)
+ map_op[b1.."ctr_0"] = tohex(0x4c800420 + c1)
+ map_op[b1.."ctrl_0"] = tohex(0x4c800421 + c1)
+ -- bXctr[l]
+ map_op[b1.."lr_1"] = tohex(0x4c800020 + c1).."-X"
+ map_op[b1.."lrl_1"] = tohex(0x4c800021 + c1).."-X"
+ map_op[b1.."ctr_1"] = tohex(0x4c800420 + c1).."-X"
+ map_op[b1.."ctrl_1"] = tohex(0x4c800421 + c1).."-X"
+end
+
+------------------------------------------------------------------------------
+
+local function parse_gpr(expr)
+ local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$")
+ local tp = map_type[tname or expr]
+ if tp then
+ local reg = ovreg or tp.reg
+ if not reg then
+ werror("type `"..(tname or expr).."' needs a register override")
+ end
+ expr = reg
+ end
+ local r = match(expr, "^r([1-3]?[0-9])$")
+ if r then
+ r = tonumber(r)
+ if r <= 31 then return r, tp end
+ end
+ werror("bad register name `"..expr.."'")
+end
+
+local function parse_fpr(expr)
+ local r = match(expr, "^f([1-3]?[0-9])$")
+ if r then
+ r = tonumber(r)
+ if r <= 31 then return r end
+ end
+ werror("bad register name `"..expr.."'")
+end
+
+local function parse_cr(expr)
+ local r = match(expr, "^cr([0-7])$")
+ if r then return tonumber(r) end
+ werror("bad condition register name `"..expr.."'")
+end
+
+local function parse_cond(expr)
+ local r, cond = match(expr, "^4%*cr([0-7])%+(%w%w)$")
+ if r then
+ r = tonumber(r)
+ local c = map_cond[cond]
+ if c and c < 4 then return r*4+c end
+ end
+ werror("bad condition bit name `"..expr.."'")
+end
+
+local function parse_imm(imm, bits, shift, scale, signed)
+ local n = tonumber(imm)
+ if n then
+ if n % 2^scale == 0 then
+ n = n / 2^scale
+ if signed then
+ if n >= 0 then
+ if n < 2^(bits-1) then return n*2^shift end
+ else
+ if n >= -(2^(bits-1))-1 then return (n+2^bits)*2^shift end
+ end
+ else
+ if n >= 0 and n <= 2^bits-1 then return n*2^shift end
+ end
+ end
+ werror("out of range immediate `"..imm.."'")
+ elseif match(imm, "^r([1-3]?[0-9])$") or
+ match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
+ werror("expected immediate operand, got register")
+ else
+ waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+ return 0
+ end
+end
+
+local function parse_disp(disp)
+ local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$")
+ if imm then
+ local r = parse_gpr(reg)
+ if r == 0 then werror("cannot use r0 in displacement") end
+ return r*65536 + parse_imm(imm, 16, 0, 0, true)
+ end
+ local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$")
+ if reg and tailr ~= "" then
+ local r, tp = parse_gpr(reg)
+ if r == 0 then werror("cannot use r0 in displacement") end
+ if tp then
+ waction("IMM", 32768+16*32, format(tp.ctypefmt, tailr))
+ return r*65536
+ end
+ end
+ werror("bad displacement `"..disp.."'")
+end
+
+local function parse_u5disp(disp, scale)
+ local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$")
+ if imm then
+ local r = parse_gpr(reg)
+ if r == 0 then werror("cannot use r0 in displacement") end
+ return r*65536 + parse_imm(imm, 5, 11, scale, false)
+ end
+ local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$")
+ if reg and tailr ~= "" then
+ local r, tp = parse_gpr(reg)
+ if r == 0 then werror("cannot use r0 in displacement") end
+ if tp then
+ waction("IMM", scale*1024+5*32+11, format(tp.ctypefmt, tailr))
+ return r*65536
+ end
+ end
+ werror("bad displacement `"..disp.."'")
+end
+
+local function parse_label(label, def)
+ local prefix = sub(label, 1, 2)
+ -- =>label (pc label reference)
+ if prefix == "=>" then
+ return "PC", 0, sub(label, 3)
+ end
+ -- ->name (global label reference)
+ if prefix == "->" then
+ return "LG", map_global[sub(label, 3)]
+ end
+ if def then
+ -- [1-9] (local label definition)
+ if match(label, "^[1-9]$") then
+ return "LG", 10+tonumber(label)
+ end
+ else
+ -- [<>][1-9] (local label reference)
+ local dir, lnum = match(label, "^([<>])([1-9])$")
+ if dir then -- Fwd: 1-9, Bkwd: 11-19.
+ return "LG", lnum + (dir == ">" and 0 or 10)
+ end
+ -- extern label (extern label reference)
+ local extname = match(label, "^extern%s+(%S+)$")
+ if extname then
+ return "EXT", map_extern[extname]
+ end
+ end
+ werror("bad label `"..label.."'")
+end
+
+------------------------------------------------------------------------------
+
+-- Handle opcodes defined with template strings.
+map_op[".template__"] = function(params, template, nparams)
+ if not params then return sub(template, 9) end
+ local op = tonumber(sub(template, 1, 8), 16)
+ local n, rs = 1, 26
+
+ -- Limit number of section buffer positions used by a single dasm_put().
+ -- A single opcode needs a maximum of 3 positions (rlwinm).
+ if secpos+3 > maxsecpos then wflush() end
+ local pos = wpos()
+
+ -- Process each character.
+ for p in gmatch(sub(template, 9), ".") do
+ if p == "R" then
+ rs = rs - 5; op = op + parse_gpr(params[n]) * 2^rs; n = n + 1
+ elseif p == "F" then
+ rs = rs - 5; op = op + parse_fpr(params[n]) * 2^rs; n = n + 1
+ elseif p == "A" then
+ rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1
+ elseif p == "S" then
+ rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, true); n = n + 1
+ elseif p == "I" then
+ op = op + parse_imm(params[n], 16, 0, 0, true); n = n + 1
+ elseif p == "U" then
+ op = op + parse_imm(params[n], 16, 0, 0, false); n = n + 1
+ elseif p == "D" then
+ op = op + parse_disp(params[n]); n = n + 1
+ elseif p == "2" then
+ op = op + parse_u5disp(params[n], 1); n = n + 1
+ elseif p == "4" then
+ op = op + parse_u5disp(params[n], 2); n = n + 1
+ elseif p == "8" then
+ op = op + parse_u5disp(params[n], 3); n = n + 1
+ elseif p == "C" then
+ rs = rs - 5; op = op + parse_cond(params[n]) * 2^rs; n = n + 1
+ elseif p == "X" then
+ rs = rs - 5; op = op + parse_cr(params[n]) * 2^(rs+2); n = n + 1
+ elseif p == "W" then
+ op = op + parse_cr(params[n]); n = n + 1
+ elseif p == "J" or p == "K" then
+ local mode, n, s = parse_label(params[n], false)
+ if p == "K" then n = n + 2048 end
+ waction("REL_"..mode, n, s, 1)
+ n = n + 1
+ elseif p == "0" then
+ local mm = 2^rs
+ local t = op % mm
+ if ((op - t) / mm) % 32 == 0 then werror("cannot use r0") end
+ elseif p == "=" or p == "%" then
+ local mm = 2^(rs + (p == "%" and 5 or 0))
+ local t = ((op - op % mm) / mm) % 32
+ rs = rs - 5
+ op = op + t * 2^rs
+ elseif p == "~" then
+ local mm = 2^rs
+ local t1l = op % mm
+ local t1h = (op - t1l) / mm
+ local t2l = t1h % 32
+ local t2h = (t1h - t2l) / 32
+ local t3l = t2h % 32
+ op = ((t2h - t3l + t2l)*32 + t3l)*mm + t1l
+ elseif p == "-" then
+ rs = rs - 5
+ elseif p == "." then
+ -- Ignored.
+ else
+ assert(false)
+ end
+ end
+ wputpos(pos, op)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+ if not params then return "prefix" end
+ local prefix = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+ if not params then return "[1-9] | ->global | =>pcexpr" end
+ if secpos+1 > maxsecpos then wflush() end
+ local mode, n, s = parse_label(params[1], true)
+ if mode == "EXT" then werror("bad label definition") end
+ waction("LABEL_"..mode, n, s, 1)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+map_op[".long_*"] = function(params)
+ if not params then return "imm..." end
+ for _,p in ipairs(params) do
+ local n = tonumber(p)
+ if not n then werror("bad immediate `"..p.."'") end
+ if n < 0 then n = n + 2^32 end
+ wputw(n)
+ if secpos+2 > maxsecpos then wflush() end
+ end
+end
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+ if not params then return "numpow2" end
+ if secpos+1 > maxsecpos then wflush() end
+ local align = tonumber(params[1])
+ if align then
+ local x = align
+ -- Must be a power of 2 in the range (2 ... 256).
+ for i=1,8 do
+ x = x / 2
+ if x == 1 then
+ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
+ return
+ end
+ end
+ end
+ werror("bad alignment")
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+ if not params then
+ return nparams == 2 and "name, ctype" or "name, ctype, reg"
+ end
+ local name, ctype, reg = params[1], params[2], params[3]
+ if not match(name, "^[%a_][%w_]*$") then
+ werror("bad type name `"..name.."'")
+ end
+ local tp = map_type[name]
+ if tp then
+ werror("duplicate type `"..name.."'")
+ end
+ -- Add #type to defines. A bit unclean to put it in map_archdef.
+ map_archdef["#"..name] = "sizeof("..ctype..")"
+ -- Add new type and emit shortcut define.
+ local num = ctypenum + 1
+ map_type[name] = {
+ ctype = ctype,
+ ctypefmt = format("Dt%X(%%s)", num),
+ reg = reg,
+ }
+ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+ ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+ local t = {}
+ for name in pairs(map_type) do t[#t+1] = name end
+ sort(t)
+ out:write("Type definitions:\n")
+ for _,name in ipairs(t) do
+ local tp = map_type[name]
+ local reg = tp.reg or ""
+ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
+ end
+ out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+ waction("SECTION", num)
+ wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+ out:write(format("DynASM %s version %s, released %s\n\n",
+ _info.arch, _info.version, _info.release))
+ dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+ dumptypes(out, lvl)
+ dumpglobals(out, lvl)
+ dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+ wline, werror, wfatal, wwarn = wl, we, wf, ww
+ return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+ g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+ setmetatable(map_op, { __index = map_coreop })
+ setmetatable(map_def, { __index = map_archdef })
+ return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+
diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h
new file mode 100644
index 0000000..7fac22a
--- /dev/null
+++ b/dynasm/dasm_proto.h
@@ -0,0 +1,83 @@
+/*
+** DynASM encoding engine prototypes.
+** Copyright (C) 2005-2011 Mike Pall. All rights reserved.
+** Released under the MIT/X license. See dynasm.lua for full copyright notice.
+*/
+
+#ifndef _DASM_PROTO_H
+#define _DASM_PROTO_H
+
+#include <stddef.h>
+#include <stdarg.h>
+
+#define DASM_IDENT "DynASM 1.2.2"
+#define DASM_VERSION 10202 /* 1.2.2 */
+
+#ifndef Dst_DECL
+#define Dst_DECL dasm_State **Dst
+#endif
+
+#ifndef Dst_REF
+#define Dst_REF (*Dst)
+#endif
+
+#ifndef DASM_FDEF
+#define DASM_FDEF extern
+#endif
+
+#ifndef DASM_M_GROW
+#define DASM_M_GROW(ctx, t, p, sz, need) \
+ do { \
+ size_t _sz = (sz), _need = (need); \
+ if (_sz < _need) { \
+ if (_sz < 16) _sz = 16; \
+ while (_sz < _need) _sz += _sz; \
+ (p) = (t *)realloc((p), _sz); \
+ if ((p) == NULL) exit(1); \
+ (sz) = _sz; \
+ } \
+ } while(0)
+#endif
+
+#ifndef DASM_M_FREE
+#define DASM_M_FREE(ctx, p, sz) free(p)
+#endif
+
+/* Internal DynASM encoder state. */
+typedef struct dasm_State dasm_State;
+
+
+/* Initialize and free DynASM state. */
+DASM_FDEF void dasm_init(Dst_DECL, int maxsection);
+DASM_FDEF void dasm_free(Dst_DECL);
+
+/* Setup global array. Must be called before dasm_setup(). */
+DASM_FDEF void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl);
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+DASM_FDEF void dasm_growpc(Dst_DECL, unsigned int maxpc);
+
+/* Setup encoder. */
+DASM_FDEF void dasm_setup(Dst_DECL, const void *actionlist);
+
+/* Feed encoder with actions. Calls are generated by pre-processor. */
+DASM_FDEF void dasm_put(Dst_DECL, int start, ...);
+
+/* Link sections and return the resulting size. */
+DASM_FDEF int dasm_link(Dst_DECL, size_t *szp);
+
+/* Encode sections into buffer. */
+DASM_FDEF int dasm_encode(Dst_DECL, void *buffer);
+
+/* Get PC label offset. */
+DASM_FDEF int dasm_getpclabel(Dst_DECL, unsigned int pc);
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+DASM_FDEF int dasm_checkstep(Dst_DECL, int secmatch);
+#else
+#define dasm_checkstep(a, b) 0
+#endif
+
+
+#endif /* _DASM_PROTO_H */
diff --git a/dynasm/dasm_x64.lua b/dynasm/dasm_x64.lua
new file mode 100644
index 0000000..73e01e9
--- /dev/null
+++ b/dynasm/dasm_x64.lua
@@ -0,0 +1,12 @@
+------------------------------------------------------------------------------
+-- DynASM x64 module.
+--
+-- Copyright (C) 2005-2011 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+-- This module just sets 64 bit mode for the combined x86/x64 module.
+-- All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+x64 = true -- Using a global is an ugly, but effective solution.
+return require("dasm_x86")
diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h
new file mode 100644
index 0000000..23e213c
--- /dev/null
+++ b/dynasm/dasm_x86.h
@@ -0,0 +1,470 @@
+/*
+** DynASM x86 encoding engine.
+** Copyright (C) 2005-2011 Mike Pall. All rights reserved.
+** Released under the MIT/X license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH "x86"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d) 0
+#endif
+
+/* Action definitions. DASM_STOP must be 255. */
+enum {
+ DASM_DISP = 233,
+ DASM_IMM_S, DASM_IMM_B, DASM_IMM_W, DASM_IMM_D, DASM_IMM_WB, DASM_IMM_DB,
+ DASM_VREG, DASM_SPACE, DASM_SETLABEL, DASM_REL_A, DASM_REL_LG, DASM_REL_PC,
+ DASM_IMM_LG, DASM_IMM_PC, DASM_LABEL_LG, DASM_LABEL_PC, DASM_ALIGN,
+ DASM_EXTERN, DASM_ESC, DASM_MARK, DASM_SECTION, DASM_STOP
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS 25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK 0x00000000
+#define DASM_S_NOMEM 0x01000000
+#define DASM_S_PHASE 0x02000000
+#define DASM_S_MATCH_SEC 0x03000000
+#define DASM_S_RANGE_I 0x11000000
+#define DASM_S_RANGE_SEC 0x12000000
+#define DASM_S_RANGE_LG 0x13000000
+#define DASM_S_RANGE_PC 0x14000000
+#define DASM_S_RANGE_VREG 0x15000000
+#define DASM_S_UNDEF_L 0x21000000
+#define DASM_S_UNDEF_PC 0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
+#define DASM_SEC2POS(sec) ((sec)<<24)
+#define DASM_POS2SEC(pos) ((pos)>>24)
+#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned char *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+ int *rbuf; /* Biased buffer pointer (negative section bias). */
+ int *buf; /* True buffer pointer. */
+ size_t bsize; /* Buffer size in bytes. */
+ int pos; /* Biased buffer position. */
+ int epos; /* End of biased buffer position - max single put. */
+ int ofs; /* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+ size_t psize; /* Allocated size of this structure. */
+ dasm_ActList actionlist; /* Current actionlist pointer. */
+ int *lglabels; /* Local/global chain/pos ptrs. */
+ size_t lgsize;
+ int *pclabels; /* PC label chains/pos ptrs. */
+ size_t pcsize;
+ void **globals; /* Array of globals (bias -10). */
+ dasm_Section *section; /* Pointer to active section. */
+ size_t codesize; /* Total size of all code sections. */
+ int maxsection; /* 0 <= sectionidx < maxsection. */
+ int status; /* Status code. */
+ dasm_Section sections[1]; /* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+ dasm_State *D;
+ size_t psz = 0;
+ int i;
+ Dst_REF = NULL;
+ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+ D = Dst_REF;
+ D->psize = psz;
+ D->lglabels = NULL;
+ D->lgsize = 0;
+ D->pclabels = NULL;
+ D->pcsize = 0;
+ D->globals = NULL;
+ D->maxsection = maxsection;
+ for (i = 0; i < maxsection; i++) {
+ D->sections[i].buf = NULL; /* Need this for pass3. */
+ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+ D->sections[i].bsize = 0;
+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
+ }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ for (i = 0; i < D->maxsection; i++)
+ if (D->sections[i].buf)
+ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+ DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+ dasm_State *D = Dst_REF;
+ D->globals = gl - 10; /* Negative bias to compensate for locals. */
+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+ dasm_State *D = Dst_REF;
+ size_t osz = D->pcsize;
+ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ D->actionlist = (dasm_ActList)actionlist;
+ D->status = DASM_S_OK;
+ D->section = &D->sections[0];
+ memset((void *)D->lglabels, 0, D->lgsize);
+ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+ for (i = 0; i < D->maxsection; i++) {
+ D->sections[i].pos = DASM_SEC2POS(i);
+ D->sections[i].ofs = 0;
+ }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+ do { if (!(x)) { \
+ D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+ D->status=DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st) ((void)0)
+#define CKPL(kind, st) ((void)0)
+#endif
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+ va_list ap;
+ dasm_State *D = Dst_REF;
+ dasm_ActList p = D->actionlist + start;
+ dasm_Section *sec = D->section;
+ int pos = sec->pos, ofs = sec->ofs, mrm = 4;
+ int *b;
+
+ if (pos >= sec->epos) {
+ DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+ }
+
+ b = sec->rbuf;
+ b[pos++] = start;
+
+ va_start(ap, start);
+ while (1) {
+ int action = *p++;
+ if (action < DASM_DISP) {
+ ofs++;
+ } else if (action <= DASM_REL_A) {
+ int n = va_arg(ap, int);
+ b[pos++] = n;
+ switch (action) {
+ case DASM_DISP:
+ if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; }
+ case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
+ case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
+ case DASM_IMM_D: ofs += 4; break;
+ case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob;
+ case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break;
+ case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob;
+ case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
+ case DASM_SPACE: p++; ofs += n; break;
+ case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
+ case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG);
+ if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue;
+ }
+ mrm = 4;
+ } else {
+ int *pl, n;
+ switch (action) {
+ case DASM_REL_LG:
+ case DASM_IMM_LG:
+ n = *p++; pl = D->lglabels + n;
+ if (n <= 246) { CKPL(lg, LG); goto putrel; } /* Bkwd rel or global. */
+ pl -= 246; n = *pl;
+ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
+ goto linkrel;
+ case DASM_REL_PC:
+ case DASM_IMM_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
+ putrel:
+ n = *pl;
+ if (n < 0) { /* Label exists. Get label pos and store it. */
+ b[pos] = -n;
+ } else {
+ linkrel:
+ b[pos] = n; /* Else link to rel chain, anchored at label. */
+ *pl = pos;
+ }
+ pos++;
+ ofs += 4; /* Maximum offset needed. */
+ if (action == DASM_REL_LG || action == DASM_REL_PC)
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel;
+ case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
+ putlabel:
+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; }
+ *pl = -pos; /* Label exists now. */
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_ALIGN:
+ ofs += *p++; /* Maximum alignment needed (arg is 2**n-1). */
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_EXTERN: p += 2; ofs += 4; break;
+ case DASM_ESC: p++; ofs++; break;
+ case DASM_MARK: mrm = p[-2]; break;
+ case DASM_SECTION:
+ n = *p; CK(n < D->maxsection, RANGE_SEC); D->section = &D->sections[n];
+ case DASM_STOP: goto stop;
+ }
+ }
+ }
+stop:
+ va_end(ap);
+ sec->pos = pos;
+ sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink branches/aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+ dasm_State *D = Dst_REF;
+ int secnum;
+ int ofs = 0;
+
+#ifdef DASM_CHECKS
+ *szp = 0;
+ if (D->status != DASM_S_OK) return D->status;
+ {
+ int pc;
+ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+ }
+#endif
+
+ { /* Handle globals not defined in this translation unit. */
+ int idx;
+ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+ }
+ }
+
+ /* Combine all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->rbuf;
+ int pos = DASM_SEC2POS(secnum);
+ int lastpos = sec->pos;
+
+ while (pos != lastpos) {
+ dasm_ActList p = D->actionlist + b[pos++];
+ while (1) {
+ int op, action = *p++;
+ switch (action) {
+ case DASM_REL_LG: p++; op = p[-3]; goto rel_pc;
+ case DASM_REL_PC: op = p[-2]; rel_pc: {
+ int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0);
+ if (shrink) { /* Shrinkable branch opcode? */
+ int lofs, lpos = b[pos];
+ if (lpos < 0) goto noshrink; /* Ext global? */
+ lofs = *DASM_POS2PTR(D, lpos);
+ if (lpos > pos) { /* Fwd label: add cumulative section offsets. */
+ int i;
+ for (i = secnum; i < DASM_POS2SEC(lpos); i++)
+ lofs += D->sections[i].ofs;
+ } else {
+ lofs -= ofs; /* Bkwd label: unfix offset. */
+ }
+ lofs -= b[pos+1]; /* Short branch ok? */
+ if (lofs >= -128-shrink && lofs <= 127) ofs -= shrink; /* Yes. */
+ else { noshrink: shrink = 0; } /* No, cannot shrink op. */
+ }
+ b[pos+1] = shrink;
+ pos += 2;
+ break;
+ }
+ case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++;
+ case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W:
+ case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB:
+ case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break;
+ case DASM_LABEL_LG: p++;
+ case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */
+ case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */
+ case DASM_EXTERN: p += 2; break;
+ case DASM_ESC: p++; break;
+ case DASM_MARK: break;
+ case DASM_SECTION: case DASM_STOP: goto stop;
+ }
+ }
+ stop: (void)0;
+ }
+ ofs += sec->ofs; /* Next section starts right after current section. */
+ }
+
+ D->codesize = ofs; /* Total size of all code sections */
+ *szp = ofs;
+ return DASM_S_OK;
+}
+
+#define dasmb(x) *cp++ = (unsigned char)(x)
+#ifndef DASM_ALIGNED_WRITES
+#define dasmw(x) \
+ do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0)
+#define dasmd(x) \
+ do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0)
+#else
+#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0)
+#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+ dasm_State *D = Dst_REF;
+ unsigned char *base = (unsigned char *)buffer;
+ unsigned char *cp = base;
+ int secnum;
+
+ /* Encode all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->buf;
+ int *endb = sec->rbuf + sec->pos;
+
+ while (b != endb) {
+ dasm_ActList p = D->actionlist + *b++;
+ unsigned char *mark = NULL;
+ while (1) {
+ int action = *p++;
+ int n = (action >= DASM_DISP && action <= DASM_ALIGN) ? *b++ : 0;
+ switch (action) {
+ case DASM_DISP: if (!mark) mark = cp; {
+ unsigned char *mm = mark;
+ if (*p != DASM_IMM_DB && *p != DASM_IMM_WB) mark = NULL;
+ if (n == 0) { int mrm = mm[-1]&7; if (mrm == 4) mrm = mm[0]&7;
+ if (mrm != 5) { mm[-1] -= 0x80; break; } }
+ if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40;
+ }
+ case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break;
+ case DASM_IMM_DB: if (((n+128)&-256) == 0) {
+ db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb;
+ } else mark = NULL;
+ case DASM_IMM_D: wd: dasmd(n); break;
+ case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
+ case DASM_IMM_W: dasmw(n); break;
+ case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; }
+ case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
+ b++; n = (int)(ptrdiff_t)D->globals[-n];
+ case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
+ case DASM_REL_PC: rel_pc: {
+ int shrink = *b++;
+ int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; }
+ n = *pb - ((int)(cp-base) + 4-shrink);
+ if (shrink == 0) goto wd;
+ if (shrink == 4) { cp--; cp[-1] = *cp-0x10; } else cp[-1] = 0xeb;
+ goto wb;
+ }
+ case DASM_IMM_LG:
+ p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; }
+ case DASM_IMM_PC: {
+ int *pb = DASM_POS2PTR(D, n);
+ n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base);
+ goto wd;
+ }
+ case DASM_LABEL_LG: {
+ int idx = *p++;
+ if (idx >= 10)
+ D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n));
+ break;
+ }
+ case DASM_LABEL_PC: case DASM_SETLABEL: break;
+ case DASM_SPACE: { int fill = *p++; while (n--) *cp++ = fill; break; }
+ case DASM_ALIGN:
+ n = *p++;
+ while (((cp-base) & n)) *cp++ = 0x90; /* nop */
+ break;
+ case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd;
+ case DASM_MARK: mark = cp; break;
+ case DASM_ESC: action = *p++;
+ default: *cp++ = action; break;
+ case DASM_SECTION: case DASM_STOP: goto stop;
+ }
+ }
+ stop: (void)0;
+ }
+ }
+
+ if (base + D->codesize != cp) /* Check for phase errors. */
+ return DASM_S_PHASE;
+ return DASM_S_OK;
+}
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+ dasm_State *D = Dst_REF;
+ if (pc*sizeof(int) < D->pcsize) {
+ int pos = D->pclabels[pc];
+ if (pos < 0) return *DASM_POS2PTR(D, -pos);
+ if (pos > 0) return -1; /* Undefined. */
+ }
+ return -2; /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+ dasm_State *D = Dst_REF;
+ if (D->status == DASM_S_OK) {
+ int i;
+ for (i = 1; i <= 9; i++) {
+ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_L|i; break; }
+ D->lglabels[i] = 0;
+ }
+ }
+ if (D->status == DASM_S_OK && secmatch >= 0 &&
+ D->section != &D->sections[secmatch])
+ D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
+ return D->status;
+}
+#endif
+
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
new file mode 100644
index 0000000..0d4bd4c
--- /dev/null
+++ b/dynasm/dasm_x86.lua
@@ -0,0 +1,1930 @@
+------------------------------------------------------------------------------
+-- DynASM x86/x64 module.
+--
+-- Copyright (C) 2005-2011 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+local x64 = x64
+
+-- Module information:
+local _info = {
+ arch = x64 and "x64" or "x86",
+ description = "DynASM x86/x64 module",
+ version = "1.2.2",
+ vernum = 10202,
+ release = "2011-01-09",
+ author = "Mike Pall",
+ license = "MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, unpack, setmetatable = assert, unpack, setmetatable
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
+local concat, sort = table.concat, table.sort
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+ -- int arg, 1 buffer pos:
+ "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
+ -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
+ "VREG", "SPACE", -- !x64: VREG support NYI.
+ -- ptrdiff_t arg, 1 buffer pos (address): !x64
+ "SETLABEL", "REL_A",
+ -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
+ "REL_LG", "REL_PC",
+ -- action arg (1 byte) or int arg, 1 buffer pos (link):
+ "IMM_LG", "IMM_PC",
+ -- action arg (1 byte) or int arg, 1 buffer pos (offset):
+ "LABEL_LG", "LABEL_PC",
+ -- action arg (1 byte), 1 buffer pos (offset):
+ "ALIGN",
+ -- action args (2 bytes), no buffer pos.
+ "EXTERN",
+ -- action arg (1 byte), no buffer pos.
+ "ESC",
+ -- no action arg, no buffer pos.
+ "MARK",
+ -- action arg (1 byte), no buffer pos, terminal action:
+ "SECTION",
+ -- no args, no buffer pos, terminal action:
+ "STOP"
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number (dynamically generated below).
+local map_action = {}
+-- First action number. Everything below does not need to be escaped.
+local actfirst = 256-#action_names
+
+-- Action list buffer and string (only used to remove dupes).
+local actlist = {}
+local actstr = ""
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Compute action numbers for action names.
+for n,name in ipairs(action_names) do
+ local num = actfirst + n - 1
+ map_action[name] = num
+end
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+ out:write("DynASM encoding engine action codes:\n")
+ for n,name in ipairs(action_names) do
+ local num = map_action[name]
+ out:write(format(" %-10s %02X %d\n", name, num, num))
+ end
+ out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+ local nn = #actlist
+ local last = actlist[nn] or 255
+ actlist[nn] = nil -- Remove last byte.
+ if nn == 0 then nn = 1 end
+ out:write("static const unsigned char ", name, "[", nn, "] = {\n")
+ local s = " "
+ for n,b in ipairs(actlist) do
+ s = s..b..","
+ if #s >= 75 then
+ assert(out:write(s, "\n"))
+ s = " "
+ end
+ end
+ out:write(s, last, "\n};\n\n") -- Add last byte back.
+end
+
+------------------------------------------------------------------------------
+
+-- Add byte to action list.
+local function wputxb(n)
+ assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range")
+ actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, a, num)
+ wputxb(assert(map_action[action], "bad action name `"..action.."'"))
+ if a then actargs[#actargs+1] = a end
+ if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Add call to embedded DynASM C code.
+local function wcall(func, args)
+ wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
+end
+
+-- Delete duplicate action list chunks. A tad slow, but so what.
+local function dedupechunk(offset)
+ local al, as = actlist, actstr
+ local chunk = char(unpack(al, offset+1, #al))
+ local orig = find(as, chunk, 1, true)
+ if orig then
+ actargs[1] = orig-1 -- Replace with original offset.
+ for i=offset+1,#al do al[i] = nil end -- Kill dupe.
+ else
+ actstr = as..chunk
+ end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+ local offset = actargs[1]
+ if #actlist == offset then return end -- Nothing to flush.
+ if not term then waction("STOP") end -- Terminate action list.
+ dedupechunk(offset)
+ wcall("put", actargs) -- Add call to dasm_put().
+ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+ secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped byte.
+local function wputb(n)
+ if n >= actfirst then waction("ESC") end -- Need to escape byte.
+ wputxb(n)
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 10
+local map_global = setmetatable({}, { __index = function(t, name)
+ if not match(name, "^[%a_][%w_@]*$") then werror("bad global label") end
+ local n = next_global
+ if n > 246 then werror("too many global labels") end
+ next_global = n + 1
+ t[name] = n
+ return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("Global labels:\n")
+ for i=10,next_global-1 do
+ out:write(format(" %s\n", t[i]))
+ end
+ out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("enum {\n")
+ for i=10,next_global-1 do
+ out:write(" ", prefix, gsub(t[i], "@.*", ""), ",\n")
+ end
+ out:write(" ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=10,next_global-1 do
+ out:write(" \"", t[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = -1
+local map_extern = setmetatable({}, { __index = function(t, name)
+ -- No restrictions on the name for now.
+ local n = next_extern
+ if n < -256 then werror("too many extern labels") end
+ next_extern = n - 1
+ t[name] = n
+ return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+ local t = {}
+ for name, n in pairs(map_extern) do t[-n] = name end
+ out:write("Extern labels:\n")
+ for i=1,-next_extern-1 do
+ out:write(format(" %s\n", t[i]))
+ end
+ out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+ local t = {}
+ for name, n in pairs(map_extern) do t[-n] = name end
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=1,-next_extern-1 do
+ out:write(" \"", t[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+local map_archdef = {} -- Ext. register name -> int. name.
+local map_reg_rev = {} -- Int. register name -> ext. name.
+local map_reg_num = {} -- Int. register name -> register number.
+local map_reg_opsize = {} -- Int. register name -> operand size.
+local map_reg_valid_base = {} -- Int. register name -> valid base register?
+local map_reg_valid_index = {} -- Int. register name -> valid index register?
+local map_reg_needrex = {} -- Int. register name -> need rex vs. no rex.
+local reg_list = {} -- Canonical list of int. register names.
+
+local map_type = {} -- Type name -> { ctype, reg }
+local ctypenum = 0 -- Type number (for _PTx macros).
+
+local addrsize = x64 and "q" or "d" -- Size for address operands.
+
+-- Helper functions to fill register maps.
+local function mkrmap(sz, cl, names)
+ local cname = format("@%s", sz)
+ reg_list[#reg_list+1] = cname
+ map_archdef[cl] = cname
+ map_reg_rev[cname] = cl
+ map_reg_num[cname] = -1
+ map_reg_opsize[cname] = sz
+ if sz == addrsize or sz == "d" then
+ map_reg_valid_base[cname] = true
+ map_reg_valid_index[cname] = true
+ end
+ if names then
+ for n,name in ipairs(names) do
+ local iname = format("@%s%x", sz, n-1)
+ reg_list[#reg_list+1] = iname
+ map_archdef[name] = iname
+ map_reg_rev[iname] = name
+ map_reg_num[iname] = n-1
+ map_reg_opsize[iname] = sz
+ if sz == "b" and n > 4 then map_reg_needrex[iname] = false end
+ if sz == addrsize or sz == "d" then
+ map_reg_valid_base[iname] = true
+ map_reg_valid_index[iname] = true
+ end
+ end
+ end
+ for i=0,(x64 and sz ~= "f") and 15 or 7 do
+ local needrex = sz == "b" and i > 3
+ local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
+ if needrex then map_reg_needrex[iname] = true end
+ local name
+ if sz == "o" then name = format("xmm%d", i)
+ elseif sz == "f" then name = format("st%d", i)
+ else name = format("r%d%s", i, sz == addrsize and "" or sz) end
+ map_archdef[name] = iname
+ if not map_reg_rev[iname] then
+ reg_list[#reg_list+1] = iname
+ map_reg_rev[iname] = name
+ map_reg_num[iname] = i
+ map_reg_opsize[iname] = sz
+ if sz == addrsize or sz == "d" then
+ map_reg_valid_base[iname] = true
+ map_reg_valid_index[iname] = true
+ end
+ end
+ end
+ reg_list[#reg_list+1] = ""
+end
+
+-- Integer registers (qword, dword, word and byte sized).
+if x64 then
+ mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"})
+end
+mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"})
+mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
+mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
+map_reg_valid_index[map_archdef.esp] = false
+if x64 then map_reg_valid_index[map_archdef.rsp] = false end
+map_archdef["Ra"] = "@"..addrsize
+
+-- FP registers (internally tword sized, but use "f" as operand size).
+mkrmap("f", "Rf")
+
+-- SSE registers (oword sized, but qword and dword accessible).
+mkrmap("o", "xmm")
+
+-- Operand size prefixes to codes.
+local map_opsize = {
+ byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t",
+ aword = addrsize,
+}
+
+-- Operand size code to number.
+local map_opsizenum = {
+ b = 1, w = 2, d = 4, q = 8, o = 16, t = 10,
+}
+
+-- Operand size code to name.
+local map_opsizename = {
+ b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword",
+ f = "fpword",
+}
+
+-- Valid index register scale factors.
+local map_xsc = {
+ ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3,
+}
+
+-- Condition codes.
+local map_cc = {
+ o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7,
+ s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15,
+ c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7,
+ pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15,
+}
+
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+ return gsub(s, "@%w+", map_reg_rev)
+end
+
+-- Dump register names and numbers
+local function dumpregs(out)
+ out:write("Register names, sizes and internal numbers:\n")
+ for _,reg in ipairs(reg_list) do
+ if reg == "" then
+ out:write("\n")
+ else
+ local name = map_reg_rev[reg]
+ local num = map_reg_num[reg]
+ local opsize = map_opsizename[map_reg_opsize[reg]]
+ out:write(format(" %-5s %-8s %s\n", name, opsize,
+ num < 0 and "(variable)" or num))
+ end
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC).
+local function wputlabel(aprefix, imm, num)
+ if type(imm) == "number" then
+ if imm < 0 then
+ waction("EXTERN")
+ wputxb(aprefix == "IMM_" and 0 or 1)
+ imm = -imm-1
+ else
+ waction(aprefix.."LG", nil, num);
+ end
+ wputxb(imm)
+ else
+ waction(aprefix.."PC", imm, num)
+ end
+end
+
+-- Put signed byte or arg.
+local function wputsbarg(n)
+ if type(n) == "number" then
+ if n < -128 or n > 127 then
+ werror("signed immediate byte out of range")
+ end
+ if n < 0 then n = n + 256 end
+ wputb(n)
+ else waction("IMM_S", n) end
+end
+
+-- Put unsigned byte or arg.
+local function wputbarg(n)
+ if type(n) == "number" then
+ if n < 0 or n > 255 then
+ werror("unsigned immediate byte out of range")
+ end
+ wputb(n)
+ else waction("IMM_B", n) end
+end
+
+-- Put unsigned word or arg.
+local function wputwarg(n)
+ if type(n) == "number" then
+ if n < 0 or n > 65535 then
+ werror("unsigned immediate word out of range")
+ end
+ local r = n%256; n = (n-r)/256; wputb(r); wputb(n);
+ else waction("IMM_W", n) end
+end
+
+-- Put signed or unsigned dword or arg.
+local function wputdarg(n)
+ local tn = type(n)
+ if tn == "number" then
+ if n < 0 then n = n + 4294967296 end
+ local r = n%256; n = (n-r)/256; wputb(r);
+ r = n%256; n = (n-r)/256; wputb(r);
+ r = n%256; n = (n-r)/256; wputb(r); wputb(n);
+ elseif tn == "table" then
+ wputlabel("IMM_", n[1], 1)
+ else
+ waction("IMM_D", n)
+ end
+end
+
+-- Put operand-size dependent number or arg (defaults to dword).
+local function wputszarg(sz, n)
+ if not sz or sz == "d" or sz == "q" then wputdarg(n)
+ elseif sz == "w" then wputwarg(n)
+ elseif sz == "b" then wputbarg(n)
+ elseif sz == "s" then wputsbarg(n)
+ else werror("bad operand size") end
+end
+
+-- Put multi-byte opcode with operand-size dependent modifications.
+local function wputop(sz, op, rex)
+ local r
+ if rex ~= 0 and not x64 then werror("bad operand size") end
+ if sz == "w" then wputb(102) end
+ -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
+ if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
+ if op >= 16777216 then r = op % 16777216 wputb((op-r) / 16777216) op = r end
+ if op >= 65536 then
+ if rex ~= 0 then
+ local opc3 = op - op % 256
+ if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
+ wputb(64 + rex % 16); rex = 0
+ end
+ end
+ r = op % 65536 wputb((op-r) / 65536) op = r
+ end
+ if op >= 256 then
+ r = op % 256
+ local b = (op-r) / 256
+ if b == 15 and rex ~= 0 then wputb(64 + rex % 16); rex = 0 end
+ wputb(b)
+ op = r
+ end
+ if rex ~= 0 then wputb(64 + rex % 16) end
+ if sz == "b" then op = op - 1 end
+ wputb(op)
+end
+
+-- Put ModRM or SIB formatted byte.
+local function wputmodrm(m, s, rm, vs, vrm)
+ assert(m < 4 and s < 16 and rm < 16, "bad modrm operands")
+ wputb(64*m + 8*(s%8) + (rm%8))
+end
+
+-- Put ModRM/SIB plus optional displacement.
+local function wputmrmsib(t, imark, s, vsreg)
+ local vreg, vxreg
+ local reg, xreg = t.reg, t.xreg
+ if reg and reg < 0 then reg = 0; vreg = t.vreg end
+ if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end
+ if s < 0 then s = 0 end
+
+ -- Register mode.
+ if sub(t.mode, 1, 1) == "r" then
+ wputmodrm(3, s, reg)
+ if vsreg then waction("VREG", vsreg); wputxb(2) end
+ if vreg then waction("VREG", vreg); wputxb(0) end
+ return
+ end
+
+ local disp = t.disp
+ local tdisp = type(disp)
+ -- No base register?
+ if not reg then
+ local riprel = false
+ if xreg then
+ -- Indexed mode with index register only.
+ -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
+ wputmodrm(0, s, 4)
+ if imark == "I" then waction("MARK") end
+ if vsreg then waction("VREG", vsreg); wputxb(2) end
+ wputmodrm(t.xsc, xreg, 5)
+ if vxreg then waction("VREG", vxreg); wputxb(3) end
+ else
+ -- Pure 32 bit displacement.
+ if x64 and tdisp ~= "table" then
+ wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
+ wputmodrm(0, 4, 5)
+ else
+ riprel = x64
+ wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
+ end
+ if imark == "I" then waction("MARK") end
+ if vsreg then waction("VREG", vsreg); wputxb(2) end
+ end
+ if riprel then -- Emit rip-relative displacement.
+ if match("UWSiI", imark) then
+ werror("NYI: rip-relative displacement followed by immediate")
+ end
+ -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
+ wputlabel("REL_", disp[1], 2)
+ else
+ wputdarg(disp)
+ end
+ return
+ end
+
+ local m
+ if tdisp == "number" then -- Check displacement size at assembly time.
+ if disp == 0 and (reg%8) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too)
+ if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0]
+ elseif disp >= -128 and disp <= 127 then m = 1
+ else m = 2 end
+ elseif tdisp == "table" then
+ m = 2
+ end
+
+ -- Index register present or esp as base register: need SIB encoding.
+ if xreg or (reg%8) == 4 then
+ wputmodrm(m or 2, s, 4) -- ModRM.
+ if m == nil or imark == "I" then waction("MARK") end
+ if vsreg then waction("VREG", vsreg); wputxb(2) end
+ wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
+ if vxreg then waction("VREG", vxreg); wputxb(3) end
+ if vreg then waction("VREG", vreg); wputxb(1) end
+ else
+ wputmodrm(m or 2, s, reg) -- ModRM.
+ if (imark == "I" and (m == 1 or m == 2)) or
+ (m == nil and (vsreg or vreg)) then waction("MARK") end
+ if vsreg then waction("VREG", vsreg); wputxb(2) end
+ if vreg then waction("VREG", vreg); wputxb(1) end
+ end
+
+ -- Put displacement.
+ if m == 1 then wputsbarg(disp)
+ elseif m == 2 then wputdarg(disp)
+ elseif m == nil then waction("DISP", disp) end
+end
+
+------------------------------------------------------------------------------
+
+-- Return human-readable operand mode string.
+local function opmodestr(op, args)
+ local m = {}
+ for i=1,#args do
+ local a = args[i]
+ m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?")
+ end
+ return op.." "..concat(m, ",")
+end
+
+-- Convert number to valid integer or nil.
+local function toint(expr)
+ local n = tonumber(expr)
+ if n then
+ if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then
+ werror("bad integer number `"..expr.."'")
+ end
+ return n
+ end
+end
+
+-- Parse immediate expression.
+local function immexpr(expr)
+ -- &expr (pointer)
+ if sub(expr, 1, 1) == "&" then
+ return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2))
+ end
+
+ local prefix = sub(expr, 1, 2)
+ -- =>expr (pc label reference)
+ if prefix == "=>" then
+ return "iJ", sub(expr, 3)
+ end
+ -- ->name (global label reference)
+ if prefix == "->" then
+ return "iJ", map_global[sub(expr, 3)]
+ end
+
+ -- [<>][1-9] (local label reference)
+ local dir, lnum = match(expr, "^([<>])([1-9])$")
+ if dir then -- Fwd: 247-255, Bkwd: 1-9.
+ return "iJ", lnum + (dir == ">" and 246 or 0)
+ end
+
+ local extname = match(expr, "^extern%s+(%S+)$")
+ if extname then
+ return "iJ", map_extern[extname]
+ end
+
+ -- expr (interpreted as immediate)
+ return "iI", expr
+end
+
+-- Parse displacement expression: +-num, +-expr, +-opsize*num
+local function dispexpr(expr)
+ local disp = expr == "" and 0 or toint(expr)
+ if disp then return disp end
+ local c, dispt = match(expr, "^([+-])%s*(.+)$")
+ if c == "+" then
+ expr = dispt
+ elseif not c then
+ werror("bad displacement expression `"..expr.."'")
+ end
+ local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$")
+ local ops, imm = map_opsize[opsize], toint(tailops)
+ if ops and imm then
+ if c == "-" then imm = -imm end
+ return imm*map_opsizenum[ops]
+ end
+ local mode, iexpr = immexpr(dispt)
+ if mode == "iJ" then
+ if c == "-" then werror("cannot invert label reference") end
+ return { iexpr }
+ end
+ return expr -- Need to return original signed expression.
+end
+
+-- Parse register or type expression.
+local function rtexpr(expr)
+ if not expr then return end
+ local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$")
+ local tp = map_type[tname or expr]
+ if tp then
+ local reg = ovreg or tp.reg
+ local rnum = map_reg_num[reg]
+ if not rnum then
+ werror("type `"..(tname or expr).."' needs a register override")
+ end
+ if not map_reg_valid_base[reg] then
+ werror("bad base register override `"..(map_reg_rev[reg] or reg).."'")
+ end
+ return reg, rnum, tp
+ end
+ return expr, map_reg_num[expr]
+end
+
+-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
+local function parseoperand(param)
+ local t = {}
+
+ local expr = param
+ local opsize, tailops = match(param, "^(%w+)%s*(.+)$")
+ if opsize then
+ t.opsize = map_opsize[opsize]
+ if t.opsize then expr = tailops end
+ end
+
+ local br = match(expr, "^%[%s*(.-)%s*%]$")
+ repeat
+ if br then
+ t.mode = "xm"
+
+ -- [disp]
+ t.disp = toint(br)
+ if t.disp then
+ t.mode = x64 and "xm" or "xmO"
+ break
+ end
+
+ -- [reg...]
+ local tp
+ local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$")
+ reg, t.reg, tp = rtexpr(reg)
+ if not t.reg then
+ -- [expr]
+ t.mode = x64 and "xm" or "xmO"
+ t.disp = dispexpr("+"..br)
+ break
+ end
+
+ if t.reg == -1 then
+ t.vreg, tailr = match(tailr, "^(%b())(.*)$")
+ if not t.vreg then werror("bad variable register expression") end
+ end
+
+ -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr]
+ local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$")
+ if xsc then
+ if not map_reg_valid_index[reg] then
+ werror("bad index register `"..map_reg_rev[reg].."'")
+ end
+ t.xsc = map_xsc[xsc]
+ t.xreg = t.reg
+ t.vxreg = t.vreg
+ t.reg = nil
+ t.vreg = nil
+ t.disp = dispexpr(tailsc)
+ break
+ end
+ if not map_reg_valid_base[reg] then
+ werror("bad base register `"..map_reg_rev[reg].."'")
+ end
+
+ -- [reg] or [reg+-disp]
+ t.disp = toint(tailr) or (tailr == "" and 0)
+ if t.disp then break end
+
+ -- [reg+xreg...]
+ local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$")
+ xreg, t.xreg, tp = rtexpr(xreg)
+ if not t.xreg then
+ -- [reg+-expr]
+ t.disp = dispexpr(tailr)
+ break
+ end
+ if not map_reg_valid_index[xreg] then
+ werror("bad index register `"..map_reg_rev[xreg].."'")
+ end
+
+ if t.xreg == -1 then
+ t.vxreg, tailx = match(tailx, "^(%b())(.*)$")
+ if not t.vxreg then werror("bad variable register expression") end
+ end
+
+ -- [reg+xreg*xsc...]
+ local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$")
+ if xsc then
+ t.xsc = map_xsc[xsc]
+ tailx = tailsc
+ end
+
+ -- [...] or [...+-disp] or [...+-expr]
+ t.disp = dispexpr(tailx)
+ else
+ -- imm or opsize*imm
+ local imm = toint(expr)
+ if not imm and sub(expr, 1, 1) == "*" and t.opsize then
+ imm = toint(sub(expr, 2))
+ if imm then
+ imm = imm * map_opsizenum[t.opsize]
+ t.opsize = nil
+ end
+ end
+ if imm then
+ if t.opsize then werror("bad operand size override") end
+ local m = "i"
+ if imm == 1 then m = m.."1" end
+ if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end
+ if imm >= -128 and imm <= 127 then m = m.."S" end
+ t.imm = imm
+ t.mode = m
+ break
+ end
+
+ local tp
+ local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$")
+ reg, t.reg, tp = rtexpr(reg)
+ if t.reg then
+ if t.reg == -1 then
+ t.vreg, tailr = match(tailr, "^(%b())(.*)$")
+ if not t.vreg then werror("bad variable register expression") end
+ end
+ -- reg
+ if tailr == "" then
+ if t.opsize then werror("bad operand size override") end
+ t.opsize = map_reg_opsize[reg]
+ if t.opsize == "f" then
+ t.mode = t.reg == 0 and "fF" or "f"
+ else
+ if reg == "@w4" or (x64 and reg == "@d4") then
+ wwarn("bad idea, try again with `"..(x64 and "rsp'" or "esp'"))
+ end
+ t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm")
+ end
+ t.needrex = map_reg_needrex[reg]
+ break
+ end
+
+ -- type[idx], type[idx].field, type->field -> [reg+offset_expr]
+ if not tp then werror("bad operand `"..param.."'") end
+ t.mode = "xm"
+ t.disp = format(tp.ctypefmt, tailr)
+ else
+ t.mode, t.imm = immexpr(expr)
+ if sub(t.mode, -1) == "J" then
+ if t.opsize and t.opsize ~= addrsize then
+ werror("bad operand size override")
+ end
+ t.opsize = addrsize
+ end
+ end
+ end
+ until true
+ return t
+end
+
+------------------------------------------------------------------------------
+-- x86 Template String Description
+-- ===============================
+--
+-- Each template string is a list of [match:]pattern pairs,
+-- separated by "|". The first match wins. No match means a
+-- bad or unsupported combination of operand modes or sizes.
+--
+-- The match part and the ":" is omitted if the operation has
+-- no operands. Otherwise the first N characters are matched
+-- against the mode strings of each of the N operands.
+--
+-- The mode string for each operand type is (see parseoperand()):
+-- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl
+-- FP register: "f", +"F" for st0
+-- Index operand: "xm", +"O" for [disp] (pure offset)
+-- Immediate: "i", +"S" for signed 8 bit, +"1" for 1,
+-- +"I" for arg, +"P" for pointer
+-- Any: +"J" for valid jump targets
+--
+-- So a match character "m" (mixed) matches both an integer register
+-- and an index operand (to be encoded with the ModRM/SIB scheme).
+-- But "r" matches only a register and "x" only an index operand
+-- (e.g. for FP memory access operations).
+--
+-- The operand size match string starts right after the mode match
+-- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty.
+-- The effective data size of the operation is matched against this list.
+--
+-- If only the regular "b", "w", "d", "q", "t" operand sizes are
+-- present, then all operands must be the same size. Unspecified sizes
+-- are ignored, but at least one operand must have a size or the pattern
+-- won't match (use the "byte", "word", "dword", "qword", "tword"
+-- operand size overrides. E.g.: mov dword [eax], 1).
+--
+-- If the list has a "1" or "2" prefix, the operand size is taken
+-- from the respective operand and any other operand sizes are ignored.
+-- If the list contains only ".", all operand sizes are ignored.
+-- If the list has a "/" prefix, the concatenated (mixed) operand sizes
+-- are compared to the match.
+--
+-- E.g. "rrdw" matches for either two dword registers or two word
+-- registers. "Fx2dq" matches an st0 operand plus an index operand
+-- pointing to a dword (float) or qword (double).
+--
+-- Every character after the ":" is part of the pattern string:
+-- Hex chars are accumulated to form the opcode (left to right).
+-- "n" disables the standard opcode mods
+-- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q")
+-- "X" Force REX.W.
+-- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode.
+-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
+-- The spare 3 bits are either filled with the last hex digit or
+-- the result from a previous "r"/"R". The opcode is restored.
+--
+-- All of the following characters force a flush of the opcode:
+-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
+-- "S" stores a signed 8 bit immediate from the last operand.
+-- "U" stores an unsigned 8 bit immediate from the last operand.
+-- "W" stores an unsigned 16 bit immediate from the last operand.
+-- "i" stores an operand sized immediate from the last operand.
+-- "I" dito, but generates an action code to optionally modify
+-- the opcode (+2) for a signed 8 bit immediate.
+-- "J" generates one of the REL action codes from the last operand.
+--
+------------------------------------------------------------------------------
+
+-- Template strings for x86 instructions. Ordered by first opcode byte.
+-- Unimplemented opcodes (deliberate omissions) are marked with *.
+local map_op = {
+ -- 00-05: add...
+ -- 06: *push es
+ -- 07: *pop es
+ -- 08-0D: or...
+ -- 0E: *push cs
+ -- 0F: two byte opcode prefix
+ -- 10-15: adc...
+ -- 16: *push ss
+ -- 17: *pop ss
+ -- 18-1D: sbb...
+ -- 1E: *push ds
+ -- 1F: *pop ds
+ -- 20-25: and...
+ es_0 = "26",
+ -- 27: *daa
+ -- 28-2D: sub...
+ cs_0 = "2E",
+ -- 2F: *das
+ -- 30-35: xor...
+ ss_0 = "36",
+ -- 37: *aaa
+ -- 38-3D: cmp...
+ ds_0 = "3E",
+ -- 3F: *aas
+ inc_1 = x64 and "m:FF0m" or "rdw:40r|m:FF0m",
+ dec_1 = x64 and "m:FF1m" or "rdw:48r|m:FF1m",
+ push_1 = (x64 and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or
+ "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i",
+ pop_1 = x64 and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m",
+ -- 60: *pusha, *pushad, *pushaw
+ -- 61: *popa, *popad, *popaw
+ -- 62: *bound rdw,x
+ -- 63: x86: *arpl mw,rw
+ movsxd_2 = x64 and "rm/qd:63rM",
+ fs_0 = "64",
+ gs_0 = "65",
+ o16_0 = "66",
+ a16_0 = not x64 and "67" or nil,
+ a32_0 = x64 and "67",
+ -- 68: push idw
+ -- 69: imul rdw,mdw,idw
+ -- 6A: push ib
+ -- 6B: imul rdw,mdw,S
+ -- 6C: *insb
+ -- 6D: *insd, *insw
+ -- 6E: *outsb
+ -- 6F: *outsd, *outsw
+ -- 70-7F: jcc lb
+ -- 80: add... mb,i
+ -- 81: add... mdw,i
+ -- 82: *undefined
+ -- 83: add... mdw,S
+ test_2 = "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi",
+ -- 86: xchg rb,mb
+ -- 87: xchg rdw,mdw
+ -- 88: mov mb,r
+ -- 89: mov mdw,r
+ -- 8A: mov r,mb
+ -- 8B: mov r,mdw
+ -- 8C: *mov mdw,seg
+ lea_2 = "rx1dq:8DrM",
+ -- 8E: *mov seg,mdw
+ -- 8F: pop mdw
+ nop_0 = "90",
+ xchg_2 = "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm",
+ cbw_0 = "6698",
+ cwde_0 = "98",
+ cdqe_0 = "4898",
+ cwd_0 = "6699",
+ cdq_0 = "99",
+ cqo_0 = "4899",
+ -- 9A: *call iw:idw
+ wait_0 = "9B",
+ fwait_0 = "9B",
+ pushf_0 = "9C",
+ pushfd_0 = not x64 and "9C",
+ pushfq_0 = x64 and "9C",
+ popf_0 = "9D",
+ popfd_0 = not x64 and "9D",
+ popfq_0 = x64 and "9D",
+ sahf_0 = "9E",
+ lahf_0 = "9F",
+ mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi",
+ movsb_0 = "A4",
+ movsw_0 = "66A5",
+ movsd_0 = "A5",
+ cmpsb_0 = "A6",
+ cmpsw_0 = "66A7",
+ cmpsd_0 = "A7",
+ -- A8: test Rb,i
+ -- A9: test Rdw,i
+ stosb_0 = "AA",
+ stosw_0 = "66AB",
+ stosd_0 = "AB",
+ lodsb_0 = "AC",
+ lodsw_0 = "66AD",
+ lodsd_0 = "AD",
+ scasb_0 = "AE",
+ scasw_0 = "66AF",
+ scasd_0 = "AF",
+ -- B0-B7: mov rb,i
+ -- B8-BF: mov rdw,i
+ -- C0: rol... mb,i
+ -- C1: rol... mdw,i
+ ret_1 = "i.:nC2W",
+ ret_0 = "C3",
+ -- C4: *les rdw,mq
+ -- C5: *lds rdw,mq
+ -- C6: mov mb,i
+ -- C7: mov mdw,i
+ -- C8: *enter iw,ib
+ leave_0 = "C9",
+ -- CA: *retf iw
+ -- CB: *retf
+ int3_0 = "CC",
+ int_1 = "i.:nCDU",
+ into_0 = "CE",
+ -- CF: *iret
+ -- D0: rol... mb,1
+ -- D1: rol... mdw,1
+ -- D2: rol... mb,cl
+ -- D3: rol... mb,cl
+ -- D4: *aam ib
+ -- D5: *aad ib
+ -- D6: *salc
+ -- D7: *xlat
+ -- D8-DF: floating point ops
+ -- E0: *loopne
+ -- E1: *loope
+ -- E2: *loop
+ -- E3: *jcxz, *jecxz
+ -- E4: *in Rb,ib
+ -- E5: *in Rdw,ib
+ -- E6: *out ib,Rb
+ -- E7: *out ib,Rdw
+ call_1 = x64 and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J",
+ jmp_1 = x64 and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB
+ -- EA: *jmp iw:idw
+ -- EB: jmp ib
+ -- EC: *in Rb,dx
+ -- ED: *in Rdw,dx
+ -- EE: *out dx,Rb
+ -- EF: *out dx,Rdw
+ -- F0: *lock
+ int1_0 = "F1",
+ repne_0 = "F2",
+ repnz_0 = "F2",
+ rep_0 = "F3",
+ repe_0 = "F3",
+ repz_0 = "F3",
+ -- F4: *hlt
+ cmc_0 = "F5",
+ -- F6: test... mb,i; div... mb
+ -- F7: test... mdw,i; div... mdw
+ clc_0 = "F8",
+ stc_0 = "F9",
+ -- FA: *cli
+ cld_0 = "FC",
+ std_0 = "FD",
+ -- FE: inc... mb
+ -- FF: inc... mdw
+
+ -- misc ops
+ not_1 = "m:F72m",
+ neg_1 = "m:F73m",
+ mul_1 = "m:F74m",
+ imul_1 = "m:F75m",
+ div_1 = "m:F76m",
+ idiv_1 = "m:F77m",
+
+ imul_2 = "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi",
+ imul_3 = "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi",
+
+ movzx_2 = "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:",
+ movsx_2 = "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:",
+
+ bswap_1 = "rqd:0FC8r",
+ bsf_2 = "rmqdw:0FBCrM",
+ bsr_2 = "rmqdw:0FBDrM",
+ bt_2 = "mrqdw:0FA3Rm|miqdw:0FBA4mU",
+ btc_2 = "mrqdw:0FBBRm|miqdw:0FBA7mU",
+ btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU",
+ bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU",
+
+ rdtsc_0 = "0F31", -- P1+
+ cpuid_0 = "0FA2", -- P1+
+
+ -- floating point ops
+ fst_1 = "ff:DDD0r|xd:D92m|xq:nDD2m",
+ fstp_1 = "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m",
+ fld_1 = "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m",
+
+ fpop_0 = "DDD8", -- Alias for fstp st0.
+
+ fist_1 = "xw:nDF2m|xd:DB2m",
+ fistp_1 = "xw:nDF3m|xd:DB3m|xq:nDF7m",
+ fild_1 = "xw:nDF0m|xd:DB0m|xq:nDF5m",
+
+ fxch_0 = "D9C9",
+ fxch_1 = "ff:D9C8r",
+ fxch_2 = "fFf:D9C8r|Fff:D9C8R",
+
+ fucom_1 = "ff:DDE0r",
+ fucom_2 = "Fff:DDE0R",
+ fucomp_1 = "ff:DDE8r",
+ fucomp_2 = "Fff:DDE8R",
+ fucomi_1 = "ff:DBE8r", -- P6+
+ fucomi_2 = "Fff:DBE8R", -- P6+
+ fucomip_1 = "ff:DFE8r", -- P6+
+ fucomip_2 = "Fff:DFE8R", -- P6+
+ fcomi_1 = "ff:DBF0r", -- P6+
+ fcomi_2 = "Fff:DBF0R", -- P6+
+ fcomip_1 = "ff:DFF0r", -- P6+
+ fcomip_2 = "Fff:DFF0R", -- P6+
+ fucompp_0 = "DAE9",
+ fcompp_0 = "DED9",
+
+ fldcw_1 = "xw:nD95m",
+ fstcw_1 = "xw:n9BD97m",
+ fnstcw_1 = "xw:nD97m",
+ fstsw_1 = "Rw:n9BDFE0|xw:n9BDD7m",
+ fnstsw_1 = "Rw:nDFE0|xw:nDD7m",
+ fclex_0 = "9BDBE2",
+ fnclex_0 = "DBE2",
+
+ fnop_0 = "D9D0",
+ -- D9D1-D9DF: unassigned
+
+ fchs_0 = "D9E0",
+ fabs_0 = "D9E1",
+ -- D9E2: unassigned
+ -- D9E3: unassigned
+ ftst_0 = "D9E4",
+ fxam_0 = "D9E5",
+ -- D9E6: unassigned
+ -- D9E7: unassigned
+ fld1_0 = "D9E8",
+ fldl2t_0 = "D9E9",
+ fldl2e_0 = "D9EA",
+ fldpi_0 = "D9EB",
+ fldlg2_0 = "D9EC",
+ fldln2_0 = "D9ED",
+ fldz_0 = "D9EE",
+ -- D9EF: unassigned
+
+ f2xm1_0 = "D9F0",
+ fyl2x_0 = "D9F1",
+ fptan_0 = "D9F2",
+ fpatan_0 = "D9F3",
+ fxtract_0 = "D9F4",
+ fprem1_0 = "D9F5",
+ fdecstp_0 = "D9F6",
+ fincstp_0 = "D9F7",
+ fprem_0 = "D9F8",
+ fyl2xp1_0 = "D9F9",
+ fsqrt_0 = "D9FA",
+ fsincos_0 = "D9FB",
+ frndint_0 = "D9FC",
+ fscale_0 = "D9FD",
+ fsin_0 = "D9FE",
+ fcos_0 = "D9FF",
+
+ -- SSE, SSE2
+ andnpd_2 = "rmo:660F55rM",
+ andnps_2 = "rmo:0F55rM",
+ andpd_2 = "rmo:660F54rM",
+ andps_2 = "rmo:0F54rM",
+ clflush_1 = "x.:0FAE7m",
+ cmppd_3 = "rmio:660FC2rMU",
+ cmpps_3 = "rmio:0FC2rMU",
+ cmpsd_3 = "rrio:F20FC2rMU|rxi/oq:",
+ cmpss_3 = "rrio:F30FC2rMU|rxi/od:",
+ comisd_2 = "rro:660F2FrM|rx/oq:",
+ comiss_2 = "rro:0F2FrM|rx/od:",
+ cvtdq2pd_2 = "rro:F30FE6rM|rx/oq:",
+ cvtdq2ps_2 = "rmo:0F5BrM",
+ cvtpd2dq_2 = "rmo:F20FE6rM",
+ cvtpd2ps_2 = "rmo:660F5ArM",
+ cvtpi2pd_2 = "rx/oq:660F2ArM",
+ cvtpi2ps_2 = "rx/oq:0F2ArM",
+ cvtps2dq_2 = "rmo:660F5BrM",
+ cvtps2pd_2 = "rro:0F5ArM|rx/oq:",
+ cvtsd2si_2 = "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:",
+ cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:",
+ cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM",
+ cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM",
+ cvtss2sd_2 = "rro:F30F5ArM|rx/od:",
+ cvtss2si_2 = "rr/do:F20F2CrM|rr/qo:|rxd:|rx/qd:",
+ cvttpd2dq_2 = "rmo:660FE6rM",
+ cvttps2dq_2 = "rmo:F30F5BrM",
+ cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
+ cvttss2si_2 = "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
+ ldmxcsr_1 = "xd:0FAE2m",
+ lfence_0 = "0FAEE8",
+ maskmovdqu_2 = "rro:660FF7rM",
+ mfence_0 = "0FAEF0",
+ movapd_2 = "rmo:660F28rM|mro:660F29Rm",
+ movaps_2 = "rmo:0F28rM|mro:0F29Rm",
+ movd_2 = "rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:",
+ movdqa_2 = "rmo:660F6FrM|mro:660F7FRm",
+ movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm",
+ movhlps_2 = "rro:0F12rM",
+ movhpd_2 = "rx/oq:660F16rM|xr/qo:n660F17Rm",
+ movhps_2 = "rx/oq:0F16rM|xr/qo:n0F17Rm",
+ movlhps_2 = "rro:0F16rM",
+ movlpd_2 = "rx/oq:660F12rM|xr/qo:n660F13Rm",
+ movlps_2 = "rx/oq:0F12rM|xr/qo:n0F13Rm",
+ movmskpd_2 = "rr/do:660F50rM",
+ movmskps_2 = "rr/do:0F50rM",
+ movntdq_2 = "xro:660FE7Rm",
+ movnti_2 = "xrqd:0FC3Rm",
+ movntpd_2 = "xro:660F2BRm",
+ movntps_2 = "xro:0F2BRm",
+ movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm",
+ movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm",
+ movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm",
+ movupd_2 = "rmo:660F10rM|mro:660F11Rm",
+ movups_2 = "rmo:0F10rM|mro:0F11Rm",
+ orpd_2 = "rmo:660F56rM",
+ orps_2 = "rmo:0F56rM",
+ packssdw_2 = "rmo:660F6BrM",
+ packsswb_2 = "rmo:660F63rM",
+ packuswb_2 = "rmo:660F67rM",
+ paddb_2 = "rmo:660FFCrM",
+ paddd_2 = "rmo:660FFErM",
+ paddq_2 = "rmo:660FD4rM",
+ paddsb_2 = "rmo:660FECrM",
+ paddsw_2 = "rmo:660FEDrM",
+ paddusb_2 = "rmo:660FDCrM",
+ paddusw_2 = "rmo:660FDDrM",
+ paddw_2 = "rmo:660FFDrM",
+ pand_2 = "rmo:660FDBrM",
+ pandn_2 = "rmo:660FDFrM",
+ pause_0 = "F390",
+ pavgb_2 = "rmo:660FE0rM",
+ pavgw_2 = "rmo:660FE3rM",
+ pcmpeqb_2 = "rmo:660F74rM",
+ pcmpeqd_2 = "rmo:660F76rM",
+ pcmpeqw_2 = "rmo:660F75rM",
+ pcmpgtb_2 = "rmo:660F64rM",
+ pcmpgtd_2 = "rmo:660F66rM",
+ pcmpgtw_2 = "rmo:660F65rM",
+ pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only.
+ pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
+ pmaddwd_2 = "rmo:660FF5rM",
+ pmaxsw_2 = "rmo:660FEErM",
+ pmaxub_2 = "rmo:660FDErM",
+ pminsw_2 = "rmo:660FEArM",
+ pminub_2 = "rmo:660FDArM",
+ pmovmskb_2 = "rr/do:660FD7rM",
+ pmulhuw_2 = "rmo:660FE4rM",
+ pmulhw_2 = "rmo:660FE5rM",
+ pmullw_2 = "rmo:660FD5rM",
+ pmuludq_2 = "rmo:660FF4rM",
+ por_2 = "rmo:660FEBrM",
+ prefetchnta_1 = "xb:n0F180m",
+ prefetcht0_1 = "xb:n0F181m",
+ prefetcht1_1 = "xb:n0F182m",
+ prefetcht2_1 = "xb:n0F183m",
+ psadbw_2 = "rmo:660FF6rM",
+ pshufd_3 = "rmio:660F70rMU",
+ pshufhw_3 = "rmio:F30F70rMU",
+ pshuflw_3 = "rmio:F20F70rMU",
+ pslld_2 = "rmo:660FF2rM|rio:660F726mU",
+ pslldq_2 = "rio:660F737mU",
+ psllq_2 = "rmo:660FF3rM|rio:660F736mU",
+ psllw_2 = "rmo:660FF1rM|rio:660F716mU",
+ psrad_2 = "rmo:660FE2rM|rio:660F724mU",
+ psraw_2 = "rmo:660FE1rM|rio:660F714mU",
+ psrld_2 = "rmo:660FD2rM|rio:660F722mU",
+ psrldq_2 = "rio:660F733mU",
+ psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
+ psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
+ psubb_2 = "rmo:660FF8rM",
+ psubd_2 = "rmo:660FFArM",
+ psubq_2 = "rmo:660FFBrM",
+ psubsb_2 = "rmo:660FE8rM",
+ psubsw_2 = "rmo:660FE9rM",
+ psubusb_2 = "rmo:660FD8rM",
+ psubusw_2 = "rmo:660FD9rM",
+ psubw_2 = "rmo:660FF9rM",
+ punpckhbw_2 = "rmo:660F68rM",
+ punpckhdq_2 = "rmo:660F6ArM",
+ punpckhqdq_2 = "rmo:660F6DrM",
+ punpckhwd_2 = "rmo:660F69rM",
+ punpcklbw_2 = "rmo:660F60rM",
+ punpckldq_2 = "rmo:660F62rM",
+ punpcklqdq_2 = "rmo:660F6CrM",
+ punpcklwd_2 = "rmo:660F61rM",
+ pxor_2 = "rmo:660FEFrM",
+ rcpps_2 = "rmo:0F53rM",
+ rcpss_2 = "rro:F30F53rM|rx/od:",
+ rsqrtps_2 = "rmo:0F52rM",
+ rsqrtss_2 = "rmo:F30F52rM",
+ sfence_0 = "0FAEF8",
+ shufpd_3 = "rmio:660FC6rMU",
+ shufps_3 = "rmio:0FC6rMU",
+ stmxcsr_1 = "xd:0FAE3m",
+ ucomisd_2 = "rro:660F2ErM|rx/oq:",
+ ucomiss_2 = "rro:0F2ErM|rx/od:",
+ unpckhpd_2 = "rmo:660F15rM",
+ unpckhps_2 = "rmo:0F15rM",
+ unpcklpd_2 = "rmo:660F14rM",
+ unpcklps_2 = "rmo:0F14rM",
+ xorpd_2 = "rmo:660F57rM",
+ xorps_2 = "rmo:0F57rM",
+
+ -- SSE3 ops
+ fisttp_1 = "xw:nDF1m|xd:DB1m|xq:nDD1m",
+ addsubpd_2 = "rmo:660FD0rM",
+ addsubps_2 = "rmo:F20FD0rM",
+ haddpd_2 = "rmo:660F7CrM",
+ haddps_2 = "rmo:F20F7CrM",
+ hsubpd_2 = "rmo:660F7DrM",
+ hsubps_2 = "rmo:F20F7DrM",
+ lddqu_2 = "rxo:F20FF0rM",
+ movddup_2 = "rmo:F20F12rM",
+ movshdup_2 = "rmo:F30F16rM",
+ movsldup_2 = "rmo:F30F12rM",
+
+ -- SSSE3 ops
+ pabsb_2 = "rmo:660F381CrM",
+ pabsd_2 = "rmo:660F381ErM",
+ pabsw_2 = "rmo:660F381DrM",
+ palignr_3 = "rmio:660F3A0FrMU",
+ phaddd_2 = "rmo:660F3802rM",
+ phaddsw_2 = "rmo:660F3803rM",
+ phaddw_2 = "rmo:660F3801rM",
+ phsubd_2 = "rmo:660F3806rM",
+ phsubsw_2 = "rmo:660F3807rM",
+ phsubw_2 = "rmo:660F3805rM",
+ pmaddubsw_2 = "rmo:660F3804rM",
+ pmulhrsw_2 = "rmo:660F380BrM",
+ pshufb_2 = "rmo:660F3800rM",
+ psignb_2 = "rmo:660F3808rM",
+ psignd_2 = "rmo:660F380ArM",
+ psignw_2 = "rmo:660F3809rM",
+
+ -- SSE4.1 ops
+ blendpd_3 = "rmio:660F3A0DrMU",
+ blendps_3 = "rmio:660F3A0CrMU",
+ blendvpd_3 = "rmRo:660F3815rM",
+ blendvps_3 = "rmRo:660F3814rM",
+ dppd_3 = "rmio:660F3A41rMU",
+ dpps_3 = "rmio:660F3A40rMU",
+ extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
+ insertps_3 = "rrio:660F3A41rMU|rxi/od:",
+ movntdqa_2 = "rmo:660F382ArM",
+ mpsadbw_3 = "rmio:660F3A42rMU",
+ packusdw_2 = "rmo:660F382BrM",
+ pblendvb_3 = "rmRo:660F3810rM",
+ pblendw_3 = "rmio:660F3A0ErMU",
+ pcmpeqq_2 = "rmo:660F3829rM",
+ pextrb_3 = "rri/do:660F3A14nRmU|rri/qo:|xri/bo:",
+ pextrd_3 = "mri/do:660F3A16RmU",
+ pextrq_3 = "mri/qo:660F3A16RmU",
+ -- pextrw is SSE2, mem operand is SSE4.1 only
+ phminposuw_2 = "rmo:660F3841rM",
+ pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:",
+ pinsrd_3 = "rmi/od:660F3A22rMU",
+ pinsrq_3 = "rmi/oq:660F3A22rXMU",
+ pmaxsb_2 = "rmo:660F383CrM",
+ pmaxsd_2 = "rmo:660F383DrM",
+ pmaxud_2 = "rmo:660F383FrM",
+ pmaxuw_2 = "rmo:660F383ErM",
+ pminsb_2 = "rmo:660F3838rM",
+ pminsd_2 = "rmo:660F3839rM",
+ pminud_2 = "rmo:660F383BrM",
+ pminuw_2 = "rmo:660F383ArM",
+ pmovsxbd_2 = "rro:660F3821rM|rx/od:",
+ pmovsxbq_2 = "rro:660F3822rM|rx/ow:",
+ pmovsxbw_2 = "rro:660F3820rM|rx/oq:",
+ pmovsxdq_2 = "rro:660F3825rM|rx/oq:",
+ pmovsxwd_2 = "rro:660F3823rM|rx/oq:",
+ pmovsxwq_2 = "rro:660F3824rM|rx/od:",
+ pmovzxbd_2 = "rro:660F3831rM|rx/od:",
+ pmovzxbq_2 = "rro:660F3832rM|rx/ow:",
+ pmovzxbw_2 = "rro:660F3830rM|rx/oq:",
+ pmovzxdq_2 = "rro:660F3835rM|rx/oq:",
+ pmovzxwd_2 = "rro:660F3833rM|rx/oq:",
+ pmovzxwq_2 = "rro:660F3834rM|rx/od:",
+ pmuldq_2 = "rmo:660F3828rM",
+ pmulld_2 = "rmo:660F3840rM",
+ ptest_2 = "rmo:660F3817rM",
+ roundpd_3 = "rmio:660F3A09rMU",
+ roundps_3 = "rmio:660F3A08rMU",
+ roundsd_3 = "rrio:660F3A0BrMU|rxi/oq:",
+ roundss_3 = "rrio:660F3A0ArMU|rxi/od:",
+
+ -- SSE4.2 ops
+ crc32_2 = "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:",
+ pcmpestri_3 = "rmio:660F3A61rMU",
+ pcmpestrm_3 = "rmio:660F3A60rMU",
+ pcmpgtq_2 = "rmo:660F3837rM",
+ pcmpistri_3 = "rmio:660F3A63rMU",
+ pcmpistrm_3 = "rmio:660F3A62rMU",
+ popcnt_2 = "rmqdw:F30FB8rM",
+
+ -- SSE4a
+ extrq_2 = "rro:660F79rM",
+ extrq_3 = "riio:660F780mUU",
+ insertq_2 = "rro:F20F79rM",
+ insertq_4 = "rriio:F20F78rMUU",
+ lzcnt_2 = "rmqdw:F30FBDrM",
+ movntsd_2 = "xr/qo:nF20F2BRm",
+ movntss_2 = "xr/do:F30F2BRm",
+ -- popcnt is also in SSE4.2
+}
+
+------------------------------------------------------------------------------
+
+-- Arithmetic ops.
+for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3,
+ ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do
+ local n8 = n * 8
+ map_op[name.."_2"] = format(
+ "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi",
+ 1+n8, 3+n8, n, n, 5+n8, n)
+end
+
+-- Shift ops.
+for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3,
+ shl = 4, shr = 5, sar = 7, sal = 4 } do
+ map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n)
+end
+
+-- Conditional ops.
+for cc,n in pairs(map_cc) do
+ map_op["j"..cc.."_1"] = format("J.:n0F8%XJ", n) -- short: 7%X
+ map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n)
+ map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+
+end
+
+-- FP arithmetic ops.
+for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3,
+ sub = 4, subr = 5, div = 6, divr = 7 } do
+ local nc = 192 + n * 8
+ local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8))
+ local fn = "f"..name
+ map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n)
+ if n == 2 or n == 3 then
+ map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n)
+ else
+ map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n)
+ map_op[fn.."p_1"] = format("ff:DE%02Xr", nr)
+ map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr)
+ end
+ map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n)
+end
+
+-- FP conditional moves.
+for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
+ local n4 = n % 4
+ local nc = 56000 + n4 * 8 + (n-n4) * 64
+ map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+
+ map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
+end
+
+-- SSE FP arithmetic ops.
+for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
+ sub = 12, min = 13, div = 14, max = 15 } do
+ map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
+ map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
+ map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
+ map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
+end
+
+------------------------------------------------------------------------------
+
+-- Process pattern string.
+local function dopattern(pat, args, sz, op, needrex)
+ local digit, addin
+ local opcode = 0
+ local szov = sz
+ local narg = 1
+ local rex = 0
+
+ -- Limit number of section buffer positions used by a single dasm_put().
+ -- A single opcode needs a maximum of 5 positions.
+ if secpos+5 > maxsecpos then wflush() end
+
+ -- Process each character.
+ for c in gmatch(pat.."|", ".") do
+ if match(c, "%x") then -- Hex digit.
+ digit = byte(c) - 48
+ if digit > 48 then digit = digit - 39
+ elseif digit > 16 then digit = digit - 7 end
+ opcode = opcode*16 + digit
+ addin = nil
+ elseif c == "n" then -- Disable operand size mods for opcode.
+ szov = nil
+ elseif c == "X" then -- Force REX.W.
+ rex = 8
+ elseif c == "r" then -- Merge 1st operand regno. into opcode.
+ addin = args[1]; opcode = opcode + (addin.reg % 8)
+ if narg < 2 then narg = 2 end
+ elseif c == "R" then -- Merge 2nd operand regno. into opcode.
+ addin = args[2]; opcode = opcode + (addin.reg % 8)
+ narg = 3
+ elseif c == "m" or c == "M" then -- Encode ModRM/SIB.
+ local s
+ if addin then
+ s = addin.reg
+ opcode = opcode - (s%8) -- Undo regno opcode merge.
+ else
+ s = opcode % 16 -- Undo last digit.
+ opcode = (opcode - s) / 16
+ end
+ local nn = c == "m" and 1 or 2
+ local t = args[nn]
+ if narg <= nn then narg = nn + 1 end
+ if szov == "q" and rex == 0 then rex = rex + 8 end
+ if t.reg and t.reg > 7 then rex = rex + 1 end
+ if t.xreg and t.xreg > 7 then rex = rex + 2 end
+ if s > 7 then rex = rex + 4 end
+ if needrex then rex = rex + 16 end
+ wputop(szov, opcode, rex); opcode = nil
+ local imark = sub(pat, -1) -- Force a mark (ugly).
+ -- Put ModRM/SIB with regno/last digit as spare.
+ wputmrmsib(t, imark, s, addin and addin.vreg)
+ addin = nil
+ else
+ if opcode then -- Flush opcode.
+ if szov == "q" and rex == 0 then rex = rex + 8 end
+ if needrex then rex = rex + 16 end
+ if addin and addin.reg == -1 then
+ wputop(szov, opcode + 1, rex)
+ waction("VREG", addin.vreg); wputxb(0)
+ else
+ if addin and addin.reg > 7 then rex = rex + 1 end
+ wputop(szov, opcode, rex)
+ end
+ opcode = nil
+ end
+ if c == "|" then break end
+ if c == "o" then -- Offset (pure 32 bit displacement).
+ wputdarg(args[1].disp); if narg < 2 then narg = 2 end
+ elseif c == "O" then
+ wputdarg(args[2].disp); narg = 3
+ else
+ -- Anything else is an immediate operand.
+ local a = args[narg]
+ narg = narg + 1
+ local mode, imm = a.mode, a.imm
+ if mode == "iJ" and not match("iIJ", c) then
+ werror("bad operand size for label")
+ end
+ if c == "S" then
+ wputsbarg(imm)
+ elseif c == "U" then
+ wputbarg(imm)
+ elseif c == "W" then
+ wputwarg(imm)
+ elseif c == "i" or c == "I" then
+ if mode == "iJ" then
+ wputlabel("IMM_", imm, 1)
+ elseif mode == "iI" and c == "I" then
+ waction(sz == "w" and "IMM_WB" or "IMM_DB", imm)
+ else
+ wputszarg(sz, imm)
+ end
+ elseif c == "J" then
+ if mode == "iPJ" then
+ waction("REL_A", imm) -- !x64 (secpos)
+ else
+ wputlabel("REL_", imm, 2)
+ end
+ else
+ werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
+ end
+ end
+ end
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Mapping of operand modes to short names. Suppress output with '#'.
+local map_modename = {
+ r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm",
+ f = "stx", F = "st0", J = "lbl", ["1"] = "1",
+ I = "#", S = "#", O = "#",
+}
+
+-- Return a table/string showing all possible operand modes.
+local function templatehelp(template, nparams)
+ if nparams == 0 then return "" end
+ local t = {}
+ for tm in gmatch(template, "[^%|]+") do
+ local s = map_modename[sub(tm, 1, 1)]
+ s = s..gsub(sub(tm, 2, nparams), ".", function(c)
+ return ", "..map_modename[c]
+ end)
+ if not match(s, "#") then t[#t+1] = s end
+ end
+ return t
+end
+
+-- Match operand modes against mode match part of template.
+local function matchtm(tm, args)
+ for i=1,#args do
+ if not match(args[i].mode, sub(tm, i, i)) then return end
+ end
+ return true
+end
+
+-- Handle opcodes defined with template strings.
+map_op[".template__"] = function(params, template, nparams)
+ if not params then return templatehelp(template, nparams) end
+ local args = {}
+
+ -- Zero-operand opcodes have no match part.
+ if #params == 0 then
+ dopattern(template, args, "d", params.op, nil)
+ return
+ end
+
+ -- Determine common operand size (coerce undefined size) or flag as mixed.
+ local sz, szmix, needrex
+ for i,p in ipairs(params) do
+ args[i] = parseoperand(p)
+ local nsz = args[i].opsize
+ if nsz then
+ if sz and sz ~= nsz then szmix = true else sz = nsz end
+ end
+ local nrex = args[i].needrex
+ if nrex ~= nil then
+ if needrex == nil then
+ needrex = nrex
+ elseif needrex ~= nrex then
+ werror("bad mix of byte-addressable registers")
+ end
+ end
+ end
+
+ -- Try all match:pattern pairs (separated by '|').
+ local gotmatch, lastpat
+ for tm in gmatch(template, "[^%|]+") do
+ -- Split off size match (starts after mode match) and pattern string.
+ local szm, pat = match(tm, "^(.-):(.*)$", #args+1)
+ if pat == "" then pat = lastpat else lastpat = pat end
+ if matchtm(tm, args) then
+ local prefix = sub(szm, 1, 1)
+ if prefix == "/" then -- Match both operand sizes.
+ if args[1].opsize == sub(szm, 2, 2) and
+ args[2].opsize == sub(szm, 3, 3) then
+ dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
+ return
+ end
+ else -- Match common operand size.
+ local szp = sz
+ if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes.
+ if prefix == "1" then szp = args[1].opsize; szmix = nil
+ elseif prefix == "2" then szp = args[2].opsize; szmix = nil end
+ if not szmix and (prefix == "." or match(szm, szp or "#")) then
+ dopattern(pat, args, szp, params.op, needrex) -- Process pattern.
+ return
+ end
+ end
+ gotmatch = true
+ end
+ end
+
+ local msg = "bad operand mode"
+ if gotmatch then
+ if szmix then
+ msg = "mixed operand size"
+ else
+ msg = sz and "bad operand size" or "missing operand size"
+ end
+ end
+
+ werror(msg.." in `"..opmodestr(params.op, args).."'")
+end
+
+------------------------------------------------------------------------------
+
+-- x64-specific opcode for 64 bit immediates and displacements.
+if x64 then
+ function map_op.mov64_2(params)
+ if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end
+ if secpos+2 > maxsecpos then wflush() end
+ local opcode, op64, sz, rex
+ local op64 = match(params[1], "^%[%s*(.-)%s*%]$")
+ if op64 then
+ local a = parseoperand(params[2])
+ if a.mode ~= "rmR" then werror("bad operand mode") end
+ sz = a.opsize
+ rex = sz == "q" and 8 or 0
+ opcode = 0xa3
+ else
+ op64 = match(params[2], "^%[%s*(.-)%s*%]$")
+ local a = parseoperand(params[1])
+ if op64 then
+ if a.mode ~= "rmR" then werror("bad operand mode") end
+ sz = a.opsize
+ rex = sz == "q" and 8 or 0
+ opcode = 0xa1
+ else
+ if sub(a.mode, 1, 1) ~= "r" or a.opsize ~= "q" then
+ werror("bad operand mode")
+ end
+ op64 = params[2]
+ opcode = 0xb8 + (a.reg%8) -- !x64: no VREG support.
+ rex = a.reg > 7 and 9 or 8
+ end
+ end
+ wputop(sz, opcode, rex)
+ waction("IMM_D", format("(unsigned int)(%s)", op64))
+ waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+local function op_data(params)
+ if not params then return "imm..." end
+ local sz = sub(params.op, 2, 2)
+ if sz == "a" then sz = addrsize end
+ for _,p in ipairs(params) do
+ local a = parseoperand(p)
+ if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
+ werror("bad mode or size in `"..p.."'")
+ end
+ if a.mode == "iJ" then
+ wputlabel("IMM_", a.imm, 1)
+ else
+ wputszarg(sz, a.imm)
+ end
+ if secpos+2 > maxsecpos then wflush() end
+ end
+end
+
+map_op[".byte_*"] = op_data
+map_op[".sbyte_*"] = op_data
+map_op[".word_*"] = op_data
+map_op[".dword_*"] = op_data
+map_op[".aword_*"] = op_data
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+ if not params then return "prefix" end
+ local prefix = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_2"] = function(params)
+ if not params then return "[1-9] | ->global | =>pcexpr [, addr]" end
+ if secpos+2 > maxsecpos then wflush() end
+ local a = parseoperand(params[1])
+ local mode, imm = a.mode, a.imm
+ if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then
+ -- Local label (1: ... 9:) or global label (->global:).
+ waction("LABEL_LG", nil, 1)
+ wputxb(imm)
+ elseif mode == "iJ" then
+ -- PC label (=>pcexpr:).
+ waction("LABEL_PC", imm)
+ else
+ werror("bad label definition")
+ end
+ -- SETLABEL must immediately follow LABEL_LG/LABEL_PC.
+ local addr = params[2]
+ if addr then
+ local a = parseoperand(addr)
+ if a.mode == "iPJ" then
+ waction("SETLABEL", a.imm)
+ else
+ werror("bad label assignment")
+ end
+ end
+end
+map_op[".label_1"] = map_op[".label_2"]
+
+------------------------------------------------------------------------------
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+ if not params then return "numpow2" end
+ if secpos+1 > maxsecpos then wflush() end
+ local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]]
+ if align then
+ local x = align
+ -- Must be a power of 2 in the range (2 ... 256).
+ for i=1,8 do
+ x = x / 2
+ if x == 1 then
+ waction("ALIGN", nil, 1)
+ wputxb(align-1) -- Action byte is 2**n-1.
+ return
+ end
+ end
+ end
+ werror("bad alignment")
+end
+
+-- Spacing pseudo-opcode.
+map_op[".space_2"] = function(params)
+ if not params then return "num [, filler]" end
+ if secpos+1 > maxsecpos then wflush() end
+ waction("SPACE", params[1])
+ local fill = params[2]
+ if fill then
+ fill = tonumber(fill)
+ if not fill or fill < 0 or fill > 255 then werror("bad filler") end
+ end
+ wputxb(fill or 0)
+end
+map_op[".space_1"] = map_op[".space_2"]
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+ if not params then
+ return nparams == 2 and "name, ctype" or "name, ctype, reg"
+ end
+ local name, ctype, reg = params[1], params[2], params[3]
+ if not match(name, "^[%a_][%w_]*$") then
+ werror("bad type name `"..name.."'")
+ end
+ local tp = map_type[name]
+ if tp then
+ werror("duplicate type `"..name.."'")
+ end
+ if reg and not map_reg_valid_base[reg] then
+ werror("bad base register `"..(map_reg_rev[reg] or reg).."'")
+ end
+ -- Add #type to defines. A bit unclean to put it in map_archdef.
+ map_archdef["#"..name] = "sizeof("..ctype..")"
+ -- Add new type and emit shortcut define.
+ local num = ctypenum + 1
+ map_type[name] = {
+ ctype = ctype,
+ ctypefmt = format("Dt%X(%%s)", num),
+ reg = reg,
+ }
+ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+ ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+ local t = {}
+ for name in pairs(map_type) do t[#t+1] = name end
+ sort(t)
+ out:write("Type definitions:\n")
+ for _,name in ipairs(t) do
+ local tp = map_type[name]
+ local reg = tp.reg and map_reg_rev[tp.reg] or ""
+ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
+ end
+ out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+ waction("SECTION")
+ wputxb(num)
+ wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+ out:write(format("DynASM %s version %s, released %s\n\n",
+ _info.arch, _info.version, _info.release))
+ dumpregs(out)
+ dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+ dumptypes(out, lvl)
+ dumpglobals(out, lvl)
+ dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+ wline, werror, wfatal, wwarn = wl, we, wf, ww
+ return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+ g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+ setmetatable(map_op, { __index = map_coreop })
+ setmetatable(map_def, { __index = map_archdef })
+ return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+
diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua
new file mode 100644
index 0000000..da94744
--- /dev/null
+++ b/dynasm/dynasm.lua
@@ -0,0 +1,1076 @@
+------------------------------------------------------------------------------
+-- DynASM. A dynamic assembler for code generation engines.
+-- Originally designed and implemented for LuaJIT.
+--
+-- Copyright (C) 2005-2011 Mike Pall. All rights reserved.
+-- See below for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Application information.
+local _info = {
+ name = "DynASM",
+ description = "A dynamic assembler for code generation engines",
+ version = "1.2.2",
+ vernum = 10202,
+ release = "2011-01-09",
+ author = "Mike Pall",
+ url = "http://luajit.org/dynasm.html",
+ license = "MIT",
+ copyright = [[
+Copyright (C) 2005-2011 Mike Pall. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+[ MIT license: http://www.opensource.org/licenses/mit-license.php ]
+]],
+}
+
+-- Cache library functions.
+local type, pairs, ipairs = type, pairs, ipairs
+local pcall, error, assert = pcall, error, assert
+local _s = string
+local sub, match, gmatch, gsub = _s.sub, _s.match, _s.gmatch, _s.gsub
+local format, rep, upper = _s.format, _s.rep, _s.upper
+local _t = table
+local insert, remove, concat, sort = _t.insert, _t.remove, _t.concat, _t.sort
+local exit = os.exit
+local io = io
+local stdin, stdout, stderr = io.stdin, io.stdout, io.stderr
+
+------------------------------------------------------------------------------
+
+-- Program options.
+local g_opt = {}
+
+-- Global state for current file.
+local g_fname, g_curline, g_indent, g_lineno, g_synclineno, g_arch
+local g_errcount = 0
+
+-- Write buffer for output file.
+local g_wbuffer, g_capbuffer
+
+------------------------------------------------------------------------------
+
+-- Write an output line (or callback function) to the buffer.
+local function wline(line, needindent)
+ local buf = g_capbuffer or g_wbuffer
+ buf[#buf+1] = needindent and g_indent..line or line
+ g_synclineno = g_synclineno + 1
+end
+
+-- Write assembler line as a comment, if requestd.
+local function wcomment(aline)
+ if g_opt.comment then
+ wline(g_opt.comment..aline..g_opt.endcomment, true)
+ end
+end
+
+-- Resync CPP line numbers.
+local function wsync()
+ if g_synclineno ~= g_lineno and g_opt.cpp then
+ wline("# "..g_lineno..' "'..g_fname..'"')
+ g_synclineno = g_lineno
+ end
+end
+
+-- Dummy action flush function. Replaced with arch-specific function later.
+local function wflush(term)
+end
+
+-- Dump all buffered output lines.
+local function wdumplines(out, buf)
+ for _,line in ipairs(buf) do
+ if type(line) == "string" then
+ assert(out:write(line, "\n"))
+ else
+ -- Special callback to dynamically insert lines after end of processing.
+ line(out)
+ end
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Emit an error. Processing continues with next statement.
+local function werror(msg)
+ error(format("%s:%s: error: %s:\n%s", g_fname, g_lineno, msg, g_curline), 0)
+end
+
+-- Emit a fatal error. Processing stops.
+local function wfatal(msg)
+ g_errcount = "fatal"
+ werror(msg)
+end
+
+-- Print a warning. Processing continues.
+local function wwarn(msg)
+ stderr:write(format("%s:%s: warning: %s:\n%s\n",
+ g_fname, g_lineno, msg, g_curline))
+end
+
+-- Print caught error message. But suppress excessive errors.
+local function wprinterr(...)
+ if type(g_errcount) == "number" then
+ -- Regular error.
+ g_errcount = g_errcount + 1
+ if g_errcount < 21 then -- Seems to be a reasonable limit.
+ stderr:write(...)
+ elseif g_errcount == 21 then
+ stderr:write(g_fname,
+ ":*: warning: too many errors (suppressed further messages).\n")
+ end
+ else
+ -- Fatal error.
+ stderr:write(...)
+ return true -- Stop processing.
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Map holding all option handlers.
+local opt_map = {}
+local opt_current
+
+-- Print error and exit with error status.
+local function opterror(...)
+ stderr:write("dynasm.lua: ERROR: ", ...)
+ stderr:write("\n")
+ exit(1)
+end
+
+-- Get option parameter.
+local function optparam(args)
+ local argn = args.argn
+ local p = args[argn]
+ if not p then
+ opterror("missing parameter for option `", opt_current, "'.")
+ end
+ args.argn = argn + 1
+ return p
+end
+
+------------------------------------------------------------------------------
+
+-- Core pseudo-opcodes.
+local map_coreop = {}
+-- Dummy opcode map. Replaced by arch-specific map.
+local map_op = {}
+
+-- Forward declarations.
+local dostmt
+local readfile
+
+------------------------------------------------------------------------------
+
+-- Map for defines (initially empty, chains to arch-specific map).
+local map_def = {}
+
+-- Pseudo-opcode to define a substitution.
+map_coreop[".define_2"] = function(params, nparams)
+ if not params then return nparams == 1 and "name" or "name, subst" end
+ local name, def = params[1], params[2] or "1"
+ if not match(name, "^[%a_][%w_]*$") then werror("bad or duplicate define") end
+ map_def[name] = def
+end
+map_coreop[".define_1"] = map_coreop[".define_2"]
+
+-- Define a substitution on the command line.
+function opt_map.D(args)
+ local namesubst = optparam(args)
+ local name, subst = match(namesubst, "^([%a_][%w_]*)=(.*)$")
+ if name then
+ map_def[name] = subst
+ elseif match(namesubst, "^[%a_][%w_]*$") then
+ map_def[namesubst] = "1"
+ else
+ opterror("bad define")
+ end
+end
+
+-- Undefine a substitution on the command line.
+function opt_map.U(args)
+ local name = optparam(args)
+ if match(name, "^[%a_][%w_]*$") then
+ map_def[name] = nil
+ else
+ opterror("bad define")
+ end
+end
+
+-- Helper for definesubst.
+local gotsubst
+
+local function definesubst_one(word)
+ local subst = map_def[word]
+ if subst then gotsubst = word; return subst else return word end
+end
+
+-- Iteratively substitute defines.
+local function definesubst(stmt)
+ -- Limit number of iterations.
+ for i=1,100 do
+ gotsubst = false
+ stmt = gsub(stmt, "#?[%w_]+", definesubst_one)
+ if not gotsubst then break end
+ end
+ if gotsubst then wfatal("recursive define involving `"..gotsubst.."'") end
+ return stmt
+end
+
+-- Dump all defines.
+local function dumpdefines(out, lvl)
+ local t = {}
+ for name in pairs(map_def) do
+ t[#t+1] = name
+ end
+ sort(t)
+ out:write("Defines:\n")
+ for _,name in ipairs(t) do
+ local subst = map_def[name]
+ if g_arch then subst = g_arch.revdef(subst) end
+ out:write(format(" %-20s %s\n", name, subst))
+ end
+ out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Support variables for conditional assembly.
+local condlevel = 0
+local condstack = {}
+
+-- Evaluate condition with a Lua expression. Substitutions already performed.
+local function cond_eval(cond)
+ local func, err = loadstring("return "..cond)
+ if func then
+ setfenv(func, {}) -- No globals. All unknown identifiers evaluate to nil.
+ local ok, res = pcall(func)
+ if ok then
+ if res == 0 then return false end -- Oh well.
+ return not not res
+ end
+ err = res
+ end
+ wfatal("bad condition: "..err)
+end
+
+-- Skip statements until next conditional pseudo-opcode at the same level.
+local function stmtskip()
+ local dostmt_save = dostmt
+ local lvl = 0
+ dostmt = function(stmt)
+ local op = match(stmt, "^%s*(%S+)")
+ if op == ".if" then
+ lvl = lvl + 1
+ elseif lvl ~= 0 then
+ if op == ".endif" then lvl = lvl - 1 end
+ elseif op == ".elif" or op == ".else" or op == ".endif" then
+ dostmt = dostmt_save
+ dostmt(stmt)
+ end
+ end
+end
+
+-- Pseudo-opcodes for conditional assembly.
+map_coreop[".if_1"] = function(params)
+ if not params then return "condition" end
+ local lvl = condlevel + 1
+ local res = cond_eval(params[1])
+ condlevel = lvl
+ condstack[lvl] = res
+ if not res then stmtskip() end
+end
+
+map_coreop[".elif_1"] = function(params)
+ if not params then return "condition" end
+ if condlevel == 0 then wfatal(".elif without .if") end
+ local lvl = condlevel
+ local res = condstack[lvl]
+ if res then
+ if res == "else" then wfatal(".elif after .else") end
+ else
+ res = cond_eval(params[1])
+ if res then
+ condstack[lvl] = res
+ return
+ end
+ end
+ stmtskip()
+end
+
+map_coreop[".else_0"] = function(params)
+ if condlevel == 0 then wfatal(".else without .if") end
+ local lvl = condlevel
+ local res = condstack[lvl]
+ condstack[lvl] = "else"
+ if res then
+ if res == "else" then wfatal(".else after .else") end
+ stmtskip()
+ end
+end
+
+map_coreop[".endif_0"] = function(params)
+ local lvl = condlevel
+ if lvl == 0 then wfatal(".endif without .if") end
+ condlevel = lvl - 1
+end
+
+-- Check for unfinished conditionals.
+local function checkconds()
+ if g_errcount ~= "fatal" and condlevel ~= 0 then
+ wprinterr(g_fname, ":*: error: unbalanced conditional\n")
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Search for a file in the given path and open it for reading.
+local function pathopen(path, name)
+ local dirsep = match(package.path, "\\") and "\\" or "/"
+ for _,p in ipairs(path) do
+ local fullname = p == "" and name or p..dirsep..name
+ local fin = io.open(fullname, "r")
+ if fin then
+ g_fname = fullname
+ return fin
+ end
+ end
+end
+
+-- Include a file.
+map_coreop[".include_1"] = function(params)
+ if not params then return "filename" end
+ local name = params[1]
+ -- Save state. Ugly, I know. but upvalues are fast.
+ local gf, gl, gcl, gi = g_fname, g_lineno, g_curline, g_indent
+ -- Read the included file.
+ local fatal = readfile(pathopen(g_opt.include, name) or
+ wfatal("include file `"..name.."' not found"))
+ -- Restore state.
+ g_synclineno = -1
+ g_fname, g_lineno, g_curline, g_indent = gf, gl, gcl, gi
+ if fatal then wfatal("in include file") end
+end
+
+-- Make .include and conditionals initially available, too.
+map_op[".include_1"] = map_coreop[".include_1"]
+map_op[".if_1"] = map_coreop[".if_1"]
+map_op[".elif_1"] = map_coreop[".elif_1"]
+map_op[".else_0"] = map_coreop[".else_0"]
+map_op[".endif_0"] = map_coreop[".endif_0"]
+
+------------------------------------------------------------------------------
+
+-- Support variables for macros.
+local mac_capture, mac_lineno, mac_name
+local mac_active = {}
+local mac_list = {}
+
+-- Pseudo-opcode to define a macro.
+map_coreop[".macro_*"] = function(mparams)
+ if not mparams then return "name [, params...]" end
+ -- Split off and validate macro name.
+ local name = remove(mparams, 1)
+ if not name then werror("missing macro name") end
+ if not (match(name, "^[%a_][%w_%.]*$") or match(name, "^%.[%w_%.]+$")) then
+ wfatal("bad macro name `"..name.."'")
+ end
+ -- Validate macro parameter names.
+ local mdup = {}
+ for _,mp in ipairs(mparams) do
+ if not match(mp, "^[%a_][%w_]*$") then
+ wfatal("bad macro parameter name `"..mp.."'")
+ end
+ if mdup[mp] then wfatal("duplicate macro parameter name `"..mp.."'") end
+ mdup[mp] = true
+ end
+ -- Check for duplicate or recursive macro definitions.
+ local opname = name.."_"..#mparams
+ if map_op[opname] or map_op[name.."_*"] then
+ wfatal("duplicate macro `"..name.."' ("..#mparams.." parameters)")
+ end
+ if mac_capture then wfatal("recursive macro definition") end
+
+ -- Enable statement capture.
+ local lines = {}
+ mac_lineno = g_lineno
+ mac_name = name
+ mac_capture = function(stmt) -- Statement capture function.
+ -- Stop macro definition with .endmacro pseudo-opcode.
+ if not match(stmt, "^%s*.endmacro%s*$") then
+ lines[#lines+1] = stmt
+ return
+ end
+ mac_capture = nil
+ mac_lineno = nil
+ mac_name = nil
+ mac_list[#mac_list+1] = opname
+ -- Add macro-op definition.
+ map_op[opname] = function(params)
+ if not params then return mparams, lines end
+ -- Protect against recursive macro invocation.
+ if mac_active[opname] then wfatal("recursive macro invocation") end
+ mac_active[opname] = true
+ -- Setup substitution map.
+ local subst = {}
+ for i,mp in ipairs(mparams) do subst[mp] = params[i] end
+ local mcom
+ if g_opt.maccomment and g_opt.comment then
+ mcom = " MACRO "..name.." ("..#mparams..")"
+ wcomment("{"..mcom)
+ end
+ -- Loop through all captured statements
+ for _,stmt in ipairs(lines) do
+ -- Substitute macro parameters.
+ local st = gsub(stmt, "[%w_]+", subst)
+ st = definesubst(st)
+ st = gsub(st, "%s*%.%.%s*", "") -- Token paste a..b.
+ if mcom and sub(st, 1, 1) ~= "|" then wcomment(st) end
+ -- Emit statement. Use a protected call for better diagnostics.
+ local ok, err = pcall(dostmt, st)
+ if not ok then
+ -- Add the captured statement to the error.
+ wprinterr(err, "\n", g_indent, "| ", stmt,
+ "\t[MACRO ", name, " (", #mparams, ")]\n")
+ end
+ end
+ if mcom then wcomment("}"..mcom) end
+ mac_active[opname] = nil
+ end
+ end
+end
+
+-- An .endmacro pseudo-opcode outside of a macro definition is an error.
+map_coreop[".endmacro_0"] = function(params)
+ wfatal(".endmacro without .macro")
+end
+
+-- Dump all macros and their contents (with -PP only).
+local function dumpmacros(out, lvl)
+ sort(mac_list)
+ out:write("Macros:\n")
+ for _,opname in ipairs(mac_list) do
+ local name = sub(opname, 1, -3)
+ local params, lines = map_op[opname]()
+ out:write(format(" %-20s %s\n", name, concat(params, ", ")))
+ if lvl > 1 then
+ for _,line in ipairs(lines) do
+ out:write(" |", line, "\n")
+ end
+ out:write("\n")
+ end
+ end
+ out:write("\n")
+end
+
+-- Check for unfinished macro definitions.
+local function checkmacros()
+ if mac_capture then
+ wprinterr(g_fname, ":", mac_lineno,
+ ": error: unfinished .macro `", mac_name ,"'\n")
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Support variables for captures.
+local cap_lineno, cap_name
+local cap_buffers = {}
+local cap_used = {}
+
+-- Start a capture.
+map_coreop[".capture_1"] = function(params)
+ if not params then return "name" end
+ wflush()
+ local name = params[1]
+ if not match(name, "^[%a_][%w_]*$") then
+ wfatal("bad capture name `"..name.."'")
+ end
+ if cap_name then
+ wfatal("already capturing to `"..cap_name.."' since line "..cap_lineno)
+ end
+ cap_name = name
+ cap_lineno = g_lineno
+ -- Create or continue a capture buffer and start the output line capture.
+ local buf = cap_buffers[name]
+ if not buf then buf = {}; cap_buffers[name] = buf end
+ g_capbuffer = buf
+ g_synclineno = 0
+end
+
+-- Stop a capture.
+map_coreop[".endcapture_0"] = function(params)
+ wflush()
+ if not cap_name then wfatal(".endcapture without a valid .capture") end
+ cap_name = nil
+ cap_lineno = nil
+ g_capbuffer = nil
+ g_synclineno = 0
+end
+
+-- Dump a capture buffer.
+map_coreop[".dumpcapture_1"] = function(params)
+ if not params then return "name" end
+ wflush()
+ local name = params[1]
+ if not match(name, "^[%a_][%w_]*$") then
+ wfatal("bad capture name `"..name.."'")
+ end
+ cap_used[name] = true
+ wline(function(out)
+ local buf = cap_buffers[name]
+ if buf then wdumplines(out, buf) end
+ end)
+ g_synclineno = 0
+end
+
+-- Dump all captures and their buffers (with -PP only).
+local function dumpcaptures(out, lvl)
+ out:write("Captures:\n")
+ for name,buf in pairs(cap_buffers) do
+ out:write(format(" %-20s %4s)\n", name, "("..#buf))
+ if lvl > 1 then
+ local bar = rep("=", 76)
+ out:write(" ", bar, "\n")
+ for _,line in ipairs(buf) do
+ out:write(" ", line, "\n")
+ end
+ out:write(" ", bar, "\n\n")
+ end
+ end
+ out:write("\n")
+end
+
+-- Check for unfinished or unused captures.
+local function checkcaptures()
+ if cap_name then
+ wprinterr(g_fname, ":", cap_lineno,
+ ": error: unfinished .capture `", cap_name,"'\n")
+ return
+ end
+ for name in pairs(cap_buffers) do
+ if not cap_used[name] then
+ wprinterr(g_fname, ":*: error: missing .dumpcapture ", name ,"\n")
+ end
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Sections names.
+local map_sections = {}
+
+-- Pseudo-opcode to define code sections.
+-- TODO: Data sections, BSS sections. Needs extra C code and API.
+map_coreop[".section_*"] = function(params)
+ if not params then return "name..." end
+ if #map_sections > 0 then werror("duplicate section definition") end
+ wflush()
+ for sn,name in ipairs(params) do
+ local opname = "."..name.."_0"
+ if not match(name, "^[%a][%w_]*$") or
+ map_op[opname] or map_op["."..name.."_*"] then
+ werror("bad section name `"..name.."'")
+ end
+ map_sections[#map_sections+1] = name
+ wline(format("#define DASM_SECTION_%s\t%d", upper(name), sn-1))
+ map_op[opname] = function(params) g_arch.section(sn-1) end
+ end
+ wline(format("#define DASM_MAXSECTION\t\t%d", #map_sections))
+end
+
+-- Dump all sections.
+local function dumpsections(out, lvl)
+ out:write("Sections:\n")
+ for _,name in ipairs(map_sections) do
+ out:write(format(" %s\n", name))
+ end
+ out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Load architecture-specific module.
+local function loadarch(arch)
+ if not match(arch, "^[%w_]+$") then return "bad arch name" end
+ local ok, m_arch = pcall(require, "dasm_"..arch)
+ if not ok then return "cannot load module: "..m_arch end
+ g_arch = m_arch
+ wflush = m_arch.passcb(wline, werror, wfatal, wwarn)
+ m_arch.setup(arch, g_opt)
+ map_op, map_def = m_arch.mergemaps(map_coreop, map_def)
+end
+
+-- Dump architecture description.
+function opt_map.dumparch(args)
+ local name = optparam(args)
+ if not g_arch then
+ local err = loadarch(name)
+ if err then opterror(err) end
+ end
+
+ local t = {}
+ for name in pairs(map_coreop) do t[#t+1] = name end
+ for name in pairs(map_op) do t[#t+1] = name end
+ sort(t)
+
+ local out = stdout
+ local _arch = g_arch._info
+ out:write(format("%s version %s, released %s, %s\n",
+ _info.name, _info.version, _info.release, _info.url))
+ g_arch.dumparch(out)
+
+ local pseudo = true
+ out:write("Pseudo-Opcodes:\n")
+ for _,sname in ipairs(t) do
+ local name, nparam = match(sname, "^(.+)_([0-9%*])$")
+ if name then
+ if pseudo and sub(name, 1, 1) ~= "." then
+ out:write("\nOpcodes:\n")
+ pseudo = false
+ end
+ local f = map_op[sname]
+ local s
+ if nparam ~= "*" then nparam = nparam + 0 end
+ if nparam == 0 then
+ s = ""
+ elseif type(f) == "string" then
+ s = map_op[".template__"](nil, f, nparam)
+ else
+ s = f(nil, nparam)
+ end
+ if type(s) == "table" then
+ for _,s2 in ipairs(s) do
+ out:write(format(" %-12s %s\n", name, s2))
+ end
+ else
+ out:write(format(" %-12s %s\n", name, s))
+ end
+ end
+ end
+ out:write("\n")
+ exit(0)
+end
+
+-- Pseudo-opcode to set the architecture.
+-- Only initially available (map_op is replaced when called).
+map_op[".arch_1"] = function(params)
+ if not params then return "name" end
+ local err = loadarch(params[1])
+ if err then wfatal(err) end
+end
+
+-- Dummy .arch pseudo-opcode to improve the error report.
+map_coreop[".arch_1"] = function(params)
+ if not params then return "name" end
+ wfatal("duplicate .arch statement")
+end
+
+------------------------------------------------------------------------------
+
+-- Dummy pseudo-opcode. Don't confuse '.nop' with 'nop'.
+map_coreop[".nop_*"] = function(params)
+ if not params then return "[ignored...]" end
+end
+
+-- Pseudo-opcodes to raise errors.
+map_coreop[".error_1"] = function(params)
+ if not params then return "message" end
+ werror(params[1])
+end
+
+map_coreop[".fatal_1"] = function(params)
+ if not params then return "message" end
+ wfatal(params[1])
+end
+
+-- Dump all user defined elements.
+local function dumpdef(out)
+ local lvl = g_opt.dumpdef
+ if lvl == 0 then return end
+ dumpsections(out, lvl)
+ dumpdefines(out, lvl)
+ if g_arch then g_arch.dumpdef(out, lvl) end
+ dumpmacros(out, lvl)
+ dumpcaptures(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Helper for splitstmt.
+local splitlvl
+
+local function splitstmt_one(c)
+ if c == "(" then
+ splitlvl = ")"..splitlvl
+ elseif c == "[" then
+ splitlvl = "]"..splitlvl
+ elseif c == "{" then
+ splitlvl = "}"..splitlvl
+ elseif c == ")" or c == "]" or c == "}" then
+ if sub(splitlvl, 1, 1) ~= c then werror("unbalanced (), [] or {}") end
+ splitlvl = sub(splitlvl, 2)
+ elseif splitlvl == "" then
+ return " \0 "
+ end
+ return c
+end
+
+-- Split statement into (pseudo-)opcode and params.
+local function splitstmt(stmt)
+ -- Convert label with trailing-colon into .label statement.
+ local label = match(stmt, "^%s*(.+):%s*$")
+ if label then return ".label", {label} end
+
+ -- Split at commas and equal signs, but obey parentheses and brackets.
+ splitlvl = ""
+ stmt = gsub(stmt, "[,%(%)%[%]{}]", splitstmt_one)
+ if splitlvl ~= "" then werror("unbalanced () or []") end
+
+ -- Split off opcode.
+ local op, other = match(stmt, "^%s*([^%s%z]+)%s*(.*)$")
+ if not op then werror("bad statement syntax") end
+
+ -- Split parameters.
+ local params = {}
+ for p in gmatch(other, "%s*(%Z+)%z?") do
+ params[#params+1] = gsub(p, "%s+$", "")
+ end
+ if #params > 16 then werror("too many parameters") end
+
+ params.op = op
+ return op, params
+end
+
+-- Process a single statement.
+dostmt = function(stmt)
+ -- Ignore empty statements.
+ if match(stmt, "^%s*$") then return end
+
+ -- Capture macro defs before substitution.
+ if mac_capture then return mac_capture(stmt) end
+ stmt = definesubst(stmt)
+
+ -- Emit C code without parsing the line.
+ if sub(stmt, 1, 1) == "|" then
+ local tail = sub(stmt, 2)
+ wflush()
+ if sub(tail, 1, 2) == "//" then wcomment(tail) else wline(tail, true) end
+ return
+ end
+
+ -- Split into (pseudo-)opcode and params.
+ local op, params = splitstmt(stmt)
+
+ -- Get opcode handler (matching # of parameters or generic handler).
+ local f = map_op[op.."_"..#params] or map_op[op.."_*"]
+ if not f then
+ if not g_arch then wfatal("first statement must be .arch") end
+ -- Improve error report.
+ for i=0,9 do
+ if map_op[op.."_"..i] then
+ werror("wrong number of parameters for `"..op.."'")
+ end
+ end
+ werror("unknown statement `"..op.."'")
+ end
+
+ -- Call opcode handler or special handler for template strings.
+ if type(f) == "string" then
+ map_op[".template__"](params, f)
+ else
+ f(params)
+ end
+end
+
+-- Process a single line.
+local function doline(line)
+ if g_opt.flushline then wflush() end
+
+ -- Assembler line?
+ local indent, aline = match(line, "^(%s*)%|(.*)$")
+ if not aline then
+ -- No, plain C code line, need to flush first.
+ wflush()
+ wsync()
+ wline(line, false)
+ return
+ end
+
+ g_indent = indent -- Remember current line indentation.
+
+ -- Emit C code (even from macros). Avoids echo and line parsing.
+ if sub(aline, 1, 1) == "|" then
+ if not mac_capture then
+ wsync()
+ elseif g_opt.comment then
+ wsync()
+ wcomment(aline)
+ end
+ dostmt(aline)
+ return
+ end
+
+ -- Echo assembler line as a comment.
+ if g_opt.comment then
+ wsync()
+ wcomment(aline)
+ end
+
+ -- Strip assembler comments.
+ aline = gsub(aline, "//.*$", "")
+
+ -- Split line into statements at semicolons.
+ if match(aline, ";") then
+ for stmt in gmatch(aline, "[^;]+") do dostmt(stmt) end
+ else
+ dostmt(aline)
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Write DynASM header.
+local function dasmhead(out)
+ out:write(format([[
+/*
+** This file has been pre-processed with DynASM.
+** %s
+** DynASM version %s, DynASM %s version %s
+** DO NOT EDIT! The original file is in "%s".
+*/
+
+#if DASM_VERSION != %d
+#error "Version mismatch between DynASM and included encoding engine"
+#endif
+
+]], _info.url,
+ _info.version, g_arch._info.arch, g_arch._info.version,
+ g_fname, _info.vernum))
+end
+
+-- Read input file.
+readfile = function(fin)
+ g_indent = ""
+ g_lineno = 0
+ g_synclineno = -1
+
+ -- Process all lines.
+ for line in fin:lines() do
+ g_lineno = g_lineno + 1
+ g_curline = line
+ local ok, err = pcall(doline, line)
+ if not ok and wprinterr(err, "\n") then return true end
+ end
+ wflush()
+
+ -- Close input file.
+ assert(fin == stdin or fin:close())
+end
+
+-- Write output file.
+local function writefile(outfile)
+ local fout
+
+ -- Open output file.
+ if outfile == nil or outfile == "-" then
+ fout = stdout
+ else
+ fout = assert(io.open(outfile, "w"))
+ end
+
+ -- Write all buffered lines
+ wdumplines(fout, g_wbuffer)
+
+ -- Close output file.
+ assert(fout == stdout or fout:close())
+
+ -- Optionally dump definitions.
+ dumpdef(fout == stdout and stderr or stdout)
+end
+
+-- Translate an input file to an output file.
+local function translate(infile, outfile)
+ g_wbuffer = {}
+ g_indent = ""
+ g_lineno = 0
+ g_synclineno = -1
+
+ -- Put header.
+ wline(dasmhead)
+
+ -- Read input file.
+ local fin
+ if infile == "-" then
+ g_fname = "(stdin)"
+ fin = stdin
+ else
+ g_fname = infile
+ fin = assert(io.open(infile, "r"))
+ end
+ readfile(fin)
+
+ -- Check for errors.
+ if not g_arch then
+ wprinterr(g_fname, ":*: error: missing .arch directive\n")
+ end
+ checkconds()
+ checkmacros()
+ checkcaptures()
+
+ if g_errcount ~= 0 then
+ stderr:write(g_fname, ":*: info: ", g_errcount, " error",
+ (type(g_errcount) == "number" and g_errcount > 1) and "s" or "",
+ " in input file -- no output file generated.\n")
+ dumpdef(stderr)
+ exit(1)
+ end
+
+ -- Write output file.
+ writefile(outfile)
+end
+
+------------------------------------------------------------------------------
+
+-- Print help text.
+function opt_map.help()
+ stdout:write("DynASM -- ", _info.description, ".\n")
+ stdout:write("DynASM ", _info.version, " ", _info.release, " ", _info.url, "\n")
+ stdout:write[[
+
+Usage: dynasm [OPTION]... INFILE.dasc|-
+
+ -h, --help Display this help text.
+ -V, --version Display version and copyright information.
+
+ -o, --outfile FILE Output file name (default is stdout).
+ -I, --include DIR Add directory to the include search path.
+
+ -c, --ccomment Use /* */ comments for assembler lines.
+ -C, --cppcomment Use // comments for assembler lines (default).
+ -N, --nocomment Suppress assembler lines in output.
+ -M, --maccomment Show macro expansions as comments (default off).
+
+ -L, --nolineno Suppress CPP line number information in output.
+ -F, --flushline Flush action list for every line.
+
+ -D NAME[=SUBST] Define a substitution.
+ -U NAME Undefine a substitution.
+
+ -P, --dumpdef Dump defines, macros, etc. Repeat for more output.
+ -A, --dumparch ARCH Load architecture ARCH and dump description.
+]]
+ exit(0)
+end
+
+-- Print version information.
+function opt_map.version()
+ stdout:write(format("%s version %s, released %s\n%s\n\n%s",
+ _info.name, _info.version, _info.release, _info.url, _info.copyright))
+ exit(0)
+end
+
+-- Misc. options.
+function opt_map.outfile(args) g_opt.outfile = optparam(args) end
+function opt_map.include(args) insert(g_opt.include, 1, optparam(args)) end
+function opt_map.ccomment() g_opt.comment = "/*|"; g_opt.endcomment = " */" end
+function opt_map.cppcomment() g_opt.comment = "//|"; g_opt.endcomment = "" end
+function opt_map.nocomment() g_opt.comment = false end
+function opt_map.maccomment() g_opt.maccomment = true end
+function opt_map.nolineno() g_opt.cpp = false end
+function opt_map.flushline() g_opt.flushline = true end
+function opt_map.dumpdef() g_opt.dumpdef = g_opt.dumpdef + 1 end
+
+------------------------------------------------------------------------------
+
+-- Short aliases for long options.
+local opt_alias = {
+ h = "help", ["?"] = "help", V = "version",
+ o = "outfile", I = "include",
+ c = "ccomment", C = "cppcomment", N = "nocomment", M = "maccomment",
+ L = "nolineno", F = "flushline",
+ P = "dumpdef", A = "dumparch",
+}
+
+-- Parse single option.
+local function parseopt(opt, args)
+ opt_current = #opt == 1 and "-"..opt or "--"..opt
+ local f = opt_map[opt] or opt_map[opt_alias[opt]]
+ if not f then
+ opterror("unrecognized option `", opt_current, "'. Try `--help'.\n")
+ end
+ f(args)
+end
+
+-- Parse arguments.
+local function parseargs(args)
+ -- Default options.
+ g_opt.comment = "//|"
+ g_opt.endcomment = ""
+ g_opt.cpp = true
+ g_opt.dumpdef = 0
+ g_opt.include = { "" }
+
+ -- Process all option arguments.
+ args.argn = 1
+ repeat
+ local a = args[args.argn]
+ if not a then break end
+ local lopt, opt = match(a, "^%-(%-?)(.+)")
+ if not opt then break end
+ args.argn = args.argn + 1
+ if lopt == "" then
+ -- Loop through short options.
+ for o in gmatch(opt, ".") do parseopt(o, args) end
+ else
+ -- Long option.
+ parseopt(opt, args)
+ end
+ until false
+
+ -- Check for proper number of arguments.
+ local nargs = #args - args.argn + 1
+ if nargs ~= 1 then
+ if nargs == 0 then
+ if g_opt.dumpdef > 0 then return dumpdef(stdout) end
+ end
+ opt_map.help()
+ end
+
+ -- Translate a single input file to a single output file
+ -- TODO: Handle multiple files?
+ translate(args[args.argn], g_opt.outfile)
+end
+
+------------------------------------------------------------------------------
+
+-- Add the directory dynasm.lua resides in to the Lua module search path.
+local arg = arg
+if arg and arg[0] then
+ local prefix = match(arg[0], "^(.*[/\\])")
+ if prefix then package.path = prefix.."?.lua;"..package.path end
+end
+
+-- Start DynASM.
+parseargs{...}
+
+------------------------------------------------------------------------------
+
diff --git a/src/upb.h b/src/upb.h
index 5dfd65e..0dfcd5e 100644
--- a/src/upb.h
+++ b/src/upb.h
@@ -165,6 +165,7 @@ typedef uint8_t upb_valuetype_t;
#define UPB_VALUETYPE_BYTESRC 32
#define UPB_VALUETYPE_RAW 33
#define UPB_VALUETYPE_FIELDDEF 34
+#define UPB_TYPE_ENDGROUP 35
// A single .proto value. The owner must have an out-of-band way of knowing
// the type, so that it knows which union member to use.
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index 1b9b5f8..4b71ccd 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -11,16 +11,13 @@
#include "upb_decoder.h"
#include "upb_varint_decoder.h"
-// If the return value is other than UPB_CONTINUE, that is what the last
-// callback returned.
-typedef struct {
- upb_flow_t flow;
- const char *ptr;
-} fastdecode_ret;
-extern fastdecode_ret upb_fastdecode(const char *p, const char *end,
- upb_value_handler_t value_cb, void *closure,
- void *table, int table_size);
-
+#ifdef UPB_USE_JIT_X64
+#define Dst_DECL upb_decoder *d
+#define Dst_REF (d->dynasm)
+#define Dst (d)
+#include "dynasm/dasm_proto.h"
+#include "upb_decoder_x86.h"
+#endif
/* Decoding/Buffering of individual values ************************************/
@@ -28,10 +25,6 @@ extern fastdecode_ret upb_fastdecode(const char *p, const char *end,
INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
-// Constant used to signal that the submessage is a group and therefore we
-// don't know its end offset. This cannot be the offset of a real submessage
-// end because it takes at least one byte to begin a submessage.
-#define UPB_GROUP_END_OFFSET 0
#define UPB_MAX_VARINT_ENCODED_SIZE 10
INLINE void upb_decoder_advance(upb_decoder *d, size_t len) {
@@ -54,6 +47,32 @@ INLINE void upb_dstate_setmsgend(upb_decoder *d) {
(void*)UINTPTR_MAX : d->buf + end_offset;
}
+// Pulls the next buffer from the bytesrc. Should be called only when the
+// current buffer is completely empty.
+static bool upb_pullbuf(upb_decoder *d) {
+ assert(upb_decoder_bufleft(d) == 0);
+ int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1;
+ upb_string_recycle(&d->bufstr);
+ if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) {
+ d->buf = NULL;
+ d->end = NULL;
+ return false;
+ }
+ if (last_buf_len != -1) {
+ d->buf_stream_offset += last_buf_len;
+ for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f)
+ if (f->end_offset != UINT32_MAX)
+ f->end_offset -= last_buf_len;
+ }
+ d->buf = upb_string_getrobuf(d->bufstr);
+ d->ptr = upb_string_getrobuf(d->bufstr);
+ d->end = d->buf + upb_string_len(d->bufstr);
+ d->jit_end = d->end; //d->end - 12;
+ upb_string_substr(d->tmp, d->bufstr, 0, 0);
+ upb_dstate_setmsgend(d);
+ return true;
+}
+
// Called only from the slow path, this function copies the next "len" bytes
// from the stream to "data", adjusting the dstate appropriately.
static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted) {
@@ -62,27 +81,8 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted) {
memcpy(data, d->ptr, to_copy);
upb_decoder_advance(d, to_copy);
bytes_wanted -= to_copy;
- if (bytes_wanted == 0) {
- upb_dstate_setmsgend(d);
- return true;
- }
-
- // Get next buffer.
- int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1;
- upb_string_recycle(&d->bufstr);
- if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) {
- d->buf = NULL;
- return false;
- }
- if (last_buf_len != -1) {
- d->buf_stream_offset += last_buf_len;
- for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f)
- if (f->end_offset != UINT32_MAX)
- f->end_offset -= last_buf_len;
- }
- d->buf = upb_string_getrobuf(d->bufstr);
- d->ptr = upb_string_getrobuf(d->bufstr);
- d->end = d->buf + upb_string_len(d->bufstr);
+ if (bytes_wanted == 0) return true;
+ if (!upb_pullbuf(d)) return false;
}
}
@@ -143,7 +143,7 @@ done:
INLINE bool upb_decode_varint(upb_decoder *d, upb_value *val) {
if (upb_decoder_bufleft(d) >= 16) {
// Common (fast) case.
- upb_decoderet r = upb_decode_varint_fast(d->ptr);
+ upb_decoderet r = upb_vdecode_fast(d->ptr);
if (r.p == NULL) {
upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
return false;
@@ -229,6 +229,7 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
}
#define CHECK(expr) if (!expr) { assert(!upb_ok(status)); goto err; }
+ CHECK(upb_pullbuf(d));
if (upb_dispatch_startmsg(&d->dispatcher) != UPB_CONTINUE) goto err;
// Main loop: executed once per tag/field pair.
@@ -244,14 +245,13 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
// Decodes as many fields as possible, updating d->ptr appropriately,
// before falling through to the slow(er) path.
-#ifdef USE_X64_FASTPATH
- const char *end = UPB_MIN(d->end, d->submsg_end);
- fastdecode_ret ret = upb_fastdecode(d->ptr, end,
- d->dispatcher.top->handlers.set->value,
- d->dispatcher.top->handlers.closure,
- d->msgdef->itof.array,
- d->msgdef->itof.array_size);
- CHECK_FLOW(ret.flow);
+#ifdef UPB_USE_JIT_X64
+ void (*upb_jit_decode)(upb_decoder *d) = (void*)d->jit_code;
+ if (d->dispatcher.handlers->should_jit && d->buf) {
+ //fprintf(stderr, "Entering JIT, ptr: %p\n", d->ptr);
+ upb_jit_decode(d);
+ //fprintf(stderr, "Exiting JIT, ptr: %p\n", d->ptr);
+ }
#endif
// Parse/handle tag.
@@ -354,9 +354,13 @@ err:
void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
upb_dispatcher_init(&d->dispatcher, handlers);
+#ifdef UPB_USE_JIT_X64
+ upb_decoder_makejit(d);
+#endif
d->bufstr = NULL;
d->buf = NULL;
d->tmp = NULL;
+ upb_string_recycle(&d->tmp);
}
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) {
@@ -373,4 +377,7 @@ void upb_decoder_uninit(upb_decoder *d) {
upb_dispatcher_uninit(&d->dispatcher);
upb_string_unref(d->bufstr);
upb_string_unref(d->tmp);
+#ifdef UPB_USE_JIT_X64
+ upb_decoder_freejit(d);
+#endif
}
diff --git a/src/upb_decoder.h b/src/upb_decoder.h
index bb54930..1be31c4 100644
--- a/src/upb_decoder.h
+++ b/src/upb_decoder.h
@@ -27,13 +27,12 @@ extern "C" {
/* upb_decoder *****************************************************************/
+struct dasm_State;
+
struct _upb_decoder {
// Bytesrc from which we pull serialized data.
upb_bytesrc *bytesrc;
- // Dispatcher to which we push parsed data.
- upb_dispatcher dispatcher;
-
// String to hold our input buffer; is only active if d->buf != NULL.
upb_string *bufstr;
@@ -48,6 +47,7 @@ struct _upb_decoder {
// End of this buffer, relative to *ptr.
const char *end;
+ const char *jit_end;
// Members which may also be written by the JIT:
@@ -57,8 +57,21 @@ struct _upb_decoder {
// End of this submessage, relative to *ptr.
const char *submsg_end;
+ // MIN(end, submsg_end)
+ const char *effective_end;
+
// Where we will store any errors that occur.
upb_status *status;
+
+ // Dispatcher to which we push parsed data.
+ upb_dispatcher dispatcher;
+
+ // JIT-generated machine code (else NULL).
+ char *jit_code;
+ size_t jit_size;
+ char *debug_info;
+
+ struct dasm_State *dynasm;
};
// A upb_decoder decodes the binary protocol buffer format, writing the data it
diff --git a/src/upb_decoder_x64.asm b/src/upb_decoder_x64.asm
deleted file mode 100644
index c417644..0000000
--- a/src/upb_decoder_x64.asm
+++ /dev/null
@@ -1,228 +0,0 @@
-DEFAULT REL ; Default to RIP-relative addressing instead of absolute.
-
-extern _upb_decode_varint_fast64
-
-SECTION .data
-
-; Our dispatch table; used to jump to the right handler, keyed on the field's
-; type.
-dispatch_table:
- dq _upb_fastdecode.cant_fast_path ; field not in table (type == 0). (check_4).
- dq _upb_fastdecode.fixed64 ; double
- dq _upb_fastdecode.fixed32 ; float
- dq _upb_fastdecode.varint ; int64
- dq _upb_fastdecode.varint ; uint64
- dq _upb_fastdecode.varint ; int32
- dq _upb_fastdecode.fixed64 ; fixed64
- dq _upb_fastdecode.fixed32 ; fixed32
- dq _upb_fastdecode.varint ; bool
- dq _upb_fastdecode.string ; string
- dq _upb_fastdecode.cant_fast_path ; group (check_6)
- dq _upb_fastdecode.cant_fast_path ; message
- dq _upb_fastdecode.string ; bytes
- dq _upb_fastdecode.varint ; uint32
- dq _upb_fastdecode.varint ; enum
- dq _upb_fastdecode.fixed32 ; sfixed32
- dq _upb_fastdecode.fixed64 ; sfixed64
- dq _upb_fastdecode.varint_sint32 ; sint32
- dq _upb_fastdecode.varint_sint64 ; sint64
-
- GLOBAL _upb_decode_fast
-
-SECTION .text
-; Register allocation.
-%define BUF rbx ; const char *p, current buf position.
-%define END rbp ; const char *end, where the buf ends (either submsg end or buf end)
-%define STRING r12 ; unused
-%define FVAL r13 ; upb_value fval, needs to be preserved across varint decoding call.
-%define UNUSED r14
-%define CLOSURE r15
-
-; Stack layout: *tableptr, uint32_t maxfield_times_8
-%define STACK_SPACE 24 ; this value + 8 must be a multiple of 16.
-%define TABLE_SPILL [rsp] ; our lookup table, indexed by field number.
-%define COMMITTED_BUF_SPILL [rsp+8]
-%define MAXFIELD_TIMES_8_SPILL [rsp+16]
-
-
-; Executing the fast path requires the following conditions:
-; - check_1: there are >=12 bytes left (<=2 byte tag and <=10 byte varint).
-; - check_2: the tag is <= 2 bytes.
-; - check_3: the field number is <= the table size
-; (ie. it must be an array lookup, not a hash lookup).
-; - check_4: the field is known (found in the table).
-; - check_5: the wire type we read is correct for the field number,
-; ("packed" fields are not accepted, yet. this could be handled
-; efficiently by doing an extra check on the "type check failed"
-; path that goes into a tight loop if the encoding was packed).
-; - check_6: the field is not a group or a message (or string, TODO)
-; (this could be relaxed, but due to delegation it's a bit tricky).
-; - check_7: if the value is a string, the entire string is available in
-; the buffer, and our cached string object can be recycled, and
-; our string object already references the source buffer, so
-; absolutely no refcount twiddling is required.
-
-
-%macro decode_and_dispatch_ 0
-align 16
-.decode_and_dispatch:
- ; Load a few values we'll need in a sec.
- mov r8, TABLE_SPILL
- mov r9d, MAXFIELD_TIMES_8_SPILL
-
- mov rax, END
- sub rax, BUF
- cmp rax, 12
- jb _upb_fastdecode.cant_fast_path ; check_1 (<12 bytes left).
-
- ; Decode a 1 or 2-byte varint -> eax.
- mov cl, byte [BUF]
- lea rdi, [BUF+1]
- movzx eax, cl
- and eax, 0x7f
- test cl, cl
- jns .one_byte_tag ; Should be predictable if fields are in order.
- movzx ecx, byte [BUF+1]
- lea rdi, [BUF+2]
- mov edx, ecx
- and edx, 0x7f
- shl edx, 7
- or eax, edx
- test al, al
- js _upb_fastdecode.cant_fast_path ; check_2 (tag was >2 bytes).
-.one_byte_tag:
- mov BUF, rdi
-
- ; Decode tag and dispatch.
- mov ecx, eax
- and eax, 0x3ff8 ; eax now contains field number * 8
- lea r11, [r8+rax*2] ; *2 is really *16, since rax is already *8.
- and ecx, 0x7 ; ecx now contains wire type.
- cmp eax, r9d
- jae _upb_fastdecode.cant_fast_path ; check_3 (field number > table size)
- mov FIELDDEF, [r11+8] ; Lookup fielddef (upb_itof_ent.f)
- movzx rdx, BYTE [r11+1] ; Lookup field type.
- mov rax, qword dispatch_table
- jmp [rax+rdx*8]
-%endmacro
-
-%macro decode_and_dispatch 0
- jmp .decode_and_dispatch
-%endmacro
-
-%macro call_callback 0
- ; Value arg must already be in rdx when macro is called.
- mov rdi, CLOSURE
- mov rsi, FIELDDEF
- mov rcx, 33 ; RAW; we could pass the correct type, or only do this in non-debug modes.
- call CALLBACK
- mov COMMITTED_BUF_SPILL, BUF
- cmp eax, 0
- jne .done ; Caller requested BREAK or SKIPSUBMSG.
-%endmacro
-
-%macro check_type 1
- cmp ecx, %1
- jne _upb_fastdecode.cant_fast_path ; check_5 (wire type check failed).
-%endmacro
-
-; extern upb_flow_t upb_fastdecode(const char **p, const char *end,
-; upb_value_handler_t value_cb, void *closure,
-; void *table, int table_size);
-align 16
-global _upb_fastdecode
-_upb_fastdecode:
- ; We use all callee-save regs.
- push rbx
- push rbp
- push r12
- push r13
- push r14
- push r15
- sub rsp, STACK_SPACE
-
- ; Parse arguments into reg vals and stack.
- mov BUF, rdi
- mov COMMITTED_BUF_SPILL, rdi
- mov END, rsi
- mov CALLBACK, rdx
- mov CLOSURE, rcx
- mov TABLE_SPILL, r8
- shl r9, 3
- mov MAXFIELD_TIMES_8_SPILL, r9
-
- decode_and_dispatch
-
-align 16
-.varint:
- call _upb_decode_varint_fast64 ; BUF is already in rdi.
- test rax, rax
- jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error.
- mov BUF, rax
- call_callback ; rdx already holds value.
- decode_and_dispatch_
-
-align 16
-.fixed32:
- mov edx, DWORD [BUF] ; Might be unaligned, but that's ok.
- add BUF, 4
- call_callback
- decode_and_dispatch
-
-align 16
-.fixed64:
- mov rdx, QWORD [BUF] ; Might be unaligned, but that's ok.
- add BUF, 8
- call_callback
- decode_and_dispatch
-
-align 16
-.varint_sint32:
- call _upb_decode_varint_fast64 ; BUF is already in rdi.
- test rax, rax
- jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error.
- mov BUF, rax
-
- ; Perform 32-bit zig-zag decoding.
- mov ecx, edx
- shr edx, 1
- and ecx, 0x1
- neg ecx
- xor edx, ecx
- call_callback
- decode_and_dispatch
-
-align 16
-.varint_sint64:
- call _upb_decode_varint_fast64 ; BUF is already in rdi.
- test rax, rax
- jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error.
- mov BUF, rax
-
- ; Perform 64-bit zig-zag decoding.
- mov rcx, rdx
- shr rdx, 1
- and ecx, 0x1
- neg rcx
- xor rdx, rcx
- call_callback
- decode_and_dispatch
-
-align 16
-.string:
-
-.cant_fast_path:
- mov rax, 0 ; UPB_CONTINUE -- continue as before.
-.done:
- ; If coming via done, preserve the user callback's return in rax.
-
- ; Return committed buf pointer as second parameter.
- mov rdx, COMMITTED_BUF_SPILL
- add rsp, STACK_SPACE
- pop r15
- pop r14
- pop r13
- pop r12
- pop rbp
- pop rbx
- ret
diff --git a/src/upb_decoder_x86.dasc b/src/upb_decoder_x86.dasc
new file mode 100644
index 0000000..71df08f
--- /dev/null
+++ b/src/upb_decoder_x86.dasc
@@ -0,0 +1,649 @@
+|//
+|// upb - a minimalist implementation of protocol buffers.
+|//
+|// Copyright (c) 2011 Google Inc. See LICENSE for details.
+|// Author: Josh Haberman <jhaberman@gmail.com>
+|//
+|// JIT compiler for upb_decoder on x86. Given a upb_handlers object,
+|// generates code specialized to parsing the specific message and
+|// calling specific handlers.
+
+#define UPB_NONE -1
+#define UPB_MULTIPLE -2
+#define UPB_TOPLEVEL_ONE -3
+
+#include <sys/mman.h>
+#include "dynasm/dasm_proto.h"
+#include "dynasm/dasm_x86.h"
+
+// To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code
+// at runtime. GDB 7.x+ has defined an interface for doing this, and these
+// structure/function defintions are copied out of gdb/jit.h
+//
+// We need to give GDB an ELF file at runtime describing the symbols we have
+// generated. To avoid implementing the ELF format, we generate an ELF file
+// at compile-time and compile it in as a character string. We can replace
+// a few key constants (address of JIT-ted function and its size) by looking
+// for a few magic numbers and doing a dumb string replacement.
+#include "jit_debug_elf_file.h"
+
+typedef enum
+{
+ GDB_JIT_NOACTION = 0,
+ GDB_JIT_REGISTER,
+ GDB_JIT_UNREGISTER
+} jit_actions_t;
+
+typedef struct gdb_jit_entry {
+ struct gdb_jit_entry *next_entry;
+ struct gdb_jit_entry *prev_entry;
+ const char *symfile_addr;
+ uint64_t symfile_size;
+} gdb_jit_entry;
+
+typedef struct {
+ uint32_t version;
+ uint32_t action_flag;
+ gdb_jit_entry *relevant_entry;
+ gdb_jit_entry *first_entry;
+} gdb_jit_descriptor;
+
+gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL};
+
+void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); }
+
+|.arch x64
+|.actionlist upb_jit_actionlist
+|.globals UPB_JIT_GLOBAL_
+|.globalnames upb_jit_globalnames
+|
+|// Calling conventions.
+|.define ARG1_64, rdi
+|.define ARG2_8, sil
+|.define ARG2_32, esi
+|.define ARG2_64, rsi
+|.define ARG3_8, dl
+|.define ARG3_32, edx
+|.define ARG3_64, rdx
+|
+|// Register allocation / type map.
+|// ALL of the code in this file uses these register allocations.
+|// When we "call" within this file, we do not use regular calling
+|// conventions, but of course when calling to user callbacks we must.
+|.define PTR, rbx
+|.define CLOSURE, r12
+|.type FRAME, upb_dispatcher_frame, r13
+|.type STRING, upb_string, r14
+|.type DECODER, upb_decoder, r15
+|
+|.macro callp, addr
+|| if ((uintptr_t)addr < 0xffffffff) {
+ | call &addr
+|| } else {
+ | mov64 rax, (uintptr_t)addr
+ | call rax
+|| }
+|.endmacro
+|
+|// Checks PTR for end-of-buffer.
+|.macro check_eob, m
+| cmp PTR, DECODER->effective_end
+|| if (m->is_group) {
+ | jae ->exit_jit
+|| } else {
+ | jae =>m->jit_endofbuf_pclabel
+|| }
+|.endmacro
+|
+|// Decodes varint from [PTR + offset] -> ARG3.
+|// Saves new pointer as rax.
+|.macro decode_loaded_varint, offset
+| // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder.
+| lea rax, [PTR + offset + 1]
+| mov ARG3_32, ecx
+| and ARG3_32, 0x7f
+| test cl, cl
+| jns >9
+| lea rax, [PTR + offset + 2]
+| movzx esi, ch
+| and esi, 0x7f
+| shl esi, 7
+| or ARG3_32, esi
+| test cx, cx
+| jns >9
+| mov ARG1_64, rax
+| mov ARG2_32, ARG3_32
+| callp upb_vdecode_max8_fast
+| test rax, rax
+| jz ->exit_jit // >10-byte varint.
+|9:
+|.endmacro
+|
+|.macro decode_varint, offset
+| mov ecx, dword [PTR + offset]
+| decode_loaded_varint offset
+| mov PTR, rax
+|.endmacro
+|
+|// Decode the tag -> edx.
+|// Could specialize this by avoiding the value masking: could just key the
+|// table on the raw (length-masked) varint to save 3-4 cycles of latency.
+|// Currently only support tables where all entries are in the array part.
+|.macro dyndispatch, m
+| decode_loaded_varint, 0
+| mov ecx, edx
+| shr ecx, 3
+| and edx, 0x7
+| cmp ecx, m->max_field_number // Bounds-check the field.
+| ja ->exit_jit // In the future; could be unknown label
+| mov rcx, qword [rcx*8 + m->tablearray] // TODO: support hybrid array/hash tables.
+| jmp rcx // Dispatch: unpredictable jump.
+|.endmacro
+|
+|.macro setmsgend, m
+| mov rsi, DECODER->jit_end
+|| if (m->is_group) {
+| mov64 rax, 0xffffffffffffffff
+| mov qword DECODER->submsg_end, rax
+| mov DECODER->effective_end, rsi
+|| } else {
+| // Could store a correctly-biased version in the frame, at the cost of
+| // a larger stack.
+| mov eax, dword FRAME->end_offset
+| add rax, qword DECODER->buf
+| mov DECODER->submsg_end, rax // submsg_end = d->buf + f->end_offset
+| cmp rax, rsi
+| jb >1
+| mov rax, rsi // effective_end = min(d->submsg_end, d->jit_end)
+|1:
+| mov DECODER->effective_end, rax
+|| }
+|.endmacro
+|
+|// rax contains the tag, compare it against "tag", but since it is a varint
+|// we must only compare as many bytes as actually have data.
+|.macro checktag, tag
+|| switch (upb_value_size(tag)) {
+|| case 1:
+| cmp cl, tag
+|| break;
+|| case 2:
+| cmp cx, tag
+|| break;
+|| case 3:
+| and ecx, 0xffffff // 3 bytes
+| cmp rcx, tag
+|| case 4:
+| cmp ecx, tag
+|| break;
+|| case 5:
+| mov64 rdx, 0xffffffffff // 5 bytes
+| and rcx, rdx
+| cmp rcx, tag
+|| break;
+|| default: abort();
+|| }
+|.endmacro
+|
+|// TODO: optimize for 0 (xor) and 32-bits.
+|.macro loadfval, f
+|| if (f->fval.val.uint64 == 0) {
+| xor ARG2_32, ARG2_32
+|| } else {
+| mov ARG2_64, f->fval.val.uint64
+|| }
+|.endmacro
+
+#include <stdlib.h>
+#include "upb_varint_decoder.h"
+
+static size_t upb_value_size(uint64_t val) {
+#ifdef __GNUC__
+ int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0.
+#else
+ int high_bit = 0;
+ uint64_t tmp = val;
+ while(tmp >>= 1) high_bit++;
+#endif
+ return val == 0 ? 1 : high_bit / 8 + 1;
+}
+
+static uint64_t upb_encode_varint(uint64_t val)
+{
+ uint64_t ret = 0;
+ for (int bitpos = 0; val; bitpos+=8, val >>=7) {
+ if (bitpos > 0) ret |= (1 << (bitpos-1));
+ ret |= (val & 0x7f) << bitpos;
+ }
+ return ret;
+}
+
+// PTR should point to the beginning of the tag.
+static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag,
+ upb_handlers_msgent *m,
+ upb_handlers_fieldent *f, upb_handlers_fieldent *next_f) {
+ int tag_size = upb_value_size(tag);
+
+ // PC-label for the dispatch table.
+ // We check the wire type (which must be loaded in edx) because the
+ // table is keyed on field number, not type.
+ |=>f->jit_pclabel:
+ | cmp edx, upb_types[f->type].native_wire_type
+ | jne ->exit_jit // In the future: could be an unknown field.
+ |=>f->jit_pclabel_notypecheck:
+ |1: // Label for repeating this field.
+
+ // Decode the value into arg 3 for the callback.
+ switch (f->type) {
+ case UPB_TYPE(DOUBLE):
+ case UPB_TYPE(FIXED64):
+ case UPB_TYPE(SFIXED64):
+ | mov ARG3_64, qword [PTR + tag_size]
+ | add PTR, 8 + tag_size
+ break;
+
+ case UPB_TYPE(FLOAT):
+ case UPB_TYPE(FIXED32):
+ case UPB_TYPE(SFIXED32):
+ | mov ARG3_32, dword [PTR + tag_size]
+ | add PTR, 4 + tag_size
+ break;
+
+ case UPB_TYPE(BOOL):
+ // Can't assume it's one byte long, because bool must be wire-compatible
+ // with all of the varint integer types.
+ | decode_varint tag_size
+ | test ARG3_64, ARG3_64
+ | setne ARG3_8 // Other bytes left with val, should be ok.
+ break;
+
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(ENUM):
+ | decode_varint tag_size
+ break;
+
+ case UPB_TYPE(SINT64):
+ // 64-bit zig-zag decoding.
+ | decode_varint tag_size
+ | mov rax, ARG3_64
+ | shr ARG3_64, 1
+ | and rax, 1
+ | neg rax
+ | xor ARG3_64, rax
+ break;
+
+ case UPB_TYPE(SINT32):
+ // 32-bit zig-zag decoding.
+ | decode_varint tag_size
+ | mov eax, ARG3_32
+ | shr ARG3_32, 1
+ | and eax, 1
+ | neg eax
+ | xor ARG3_32, eax
+ break;
+
+ case UPB_TYPE(STRING):
+ case UPB_TYPE(BYTES):
+ // We only handle the case where the entire string is in our current
+ // buf, which sidesteps any security problems. The C path has more
+ // robust checks.
+ | decode_varint tag_size
+ | mov STRING->len, ARG3_32
+ | mov STRING->ptr, PTR
+ | add PTR, ARG3_64
+ | mov ARG3_64, STRING
+ | cmp PTR, DECODER->effective_end
+ | ja ->exit_jit // Can't deliver, whole string not in buf.
+ break;
+
+ case UPB_TYPE_ENDGROUP: // A pseudo-type.
+ | add PTR, tag_size
+ | mov DECODER->ptr, PTR
+ | jmp =>m->jit_endofmsg_pclabel
+ return;
+
+ case UPB_TYPE(MESSAGE):
+ | decode_varint tag_size
+ case UPB_TYPE(GROUP):
+ // Will dispatch callbacks and call submessage in a second.
+ break;
+
+ default: abort();
+ }
+ // Commit our work by advancing ptr.
+ // (If in the future we wanted to support a UPB_SUSPEND_AGAIN that
+ // suspends the decoder and redelivers the value later, we would
+ // need to adjust this to happen perhaps after the callback ran).
+ | mov DECODER->ptr, PTR
+
+ // Load closure and fval into arg registers.
+ | mov ARG1_64, CLOSURE
+ | loadfval f
+
+ // Call callbacks.
+ if (upb_issubmsgtype(f->type)) {
+ // Call startsubmsg handler (if any).
+ if (f->cb.startsubmsg != upb_startsubmsg_nop) {
+ // upb_sflow_t startsubmsg(void *closure, upb_value fval)
+ | mov r12d, ARG3_32
+ | callp f->cb.startsubmsg
+ } else {
+ | mov rdx, CLOSURE
+ | mov r12d, ARG3_32
+ }
+ // Push a stack frame (not the CPU stack, the upb_decoder stack).
+ | lea rax, [FRAME + sizeof(upb_dispatcher_frame)] // rax for shorter addressing.
+ | cmp rax, qword DECODER->dispatcher.limit
+ | jae ->exit_jit // Frame stack overflow.
+ | mov qword FRAME:rax->f, f
+ | mov qword FRAME:rax->closure, rdx
+ | mov rsi, PTR
+ | sub rsi, DECODER->buf
+ | add r12d, esi
+ | mov dword FRAME:rax->end_offset, r12d // = (d->ptr - d->buf) + delim_len
+ | mov CLOSURE, rdx
+ | mov DECODER->dispatcher.top, rax
+ | mov FRAME, rax
+
+ upb_handlers_msgent *sub_m = upb_handlers_getmsgent(d->dispatcher.handlers, f);
+ if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) {
+ | jmp =>sub_m->jit_startmsg_pclabel;
+ } else {
+ | call =>sub_m->jit_startmsg_pclabel;
+ }
+
+ |=>f->jit_submsg_done_pclabel:
+ // Pop a stack frame.
+ | sub FRAME, sizeof(upb_dispatcher_frame)
+ | mov DECODER->dispatcher.top, FRAME
+ | setmsgend m
+ | mov CLOSURE, FRAME->closure
+
+ // Call endsubmsg handler (if any).
+ if (f->endsubmsg != upb_endsubmsg_nop) {
+ // upb_flow_t endsubmsg(void *closure, upb_value fval);
+ | mov ARG1_64, CLOSURE
+ | loadfval f
+ | callp f->endsubmsg
+ }
+ } else {
+ | callp f->cb.value
+ }
+ // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+
+ // Epilogue: load next tag, check for repeated field.
+ | check_eob m
+ | mov rcx, qword [PTR]
+ if (f->repeated) {
+ | checktag tag
+ | je <1
+ }
+ if (next_tag != 0) {
+ | checktag next_tag
+ | je =>next_f->jit_pclabel_notypecheck
+ }
+
+ // Fall back to dynamic dispatch. Replicate the dispatch
+ // here so we can learn what fields generally follow others.
+ | dyndispatch m
+ |1:
+}
+
+static int upb_compare_uint32(const void *a, const void *b) {
+ return *(uint32_t*)a - *(uint32_t*)b;
+}
+
+static void upb_decoder_jit_msg(upb_decoder *d, upb_handlers_msgent *m) {
+ |=>m->jit_startmsg_pclabel:
+ // Call startmsg handler (if any):
+ if (m->startmsg != upb_startmsg_nop) {
+ // upb_flow_t startmsg(void *closure);
+ | mov ARG1_64, FRAME->closure
+ | callp m->startmsg
+ // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+ }
+
+ | setmsgend m
+ | check_eob m
+ | mov ecx, dword [PTR]
+ | dyndispatch m
+
+ // --------- New code section (does not fall through) ------------------------
+
+ // Emit code for parsing each field (dynamic dispatch contains pointers to
+ // all of these).
+
+ // Create an ordering over the fields (inttable ordering is undefined).
+ int num_keys = upb_inttable_count(&m->fieldtab);
+ uint32_t *keys = malloc(num_keys * sizeof(*keys));
+ int idx = 0;
+ for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+ i = upb_inttable_next(&m->fieldtab, i)) {
+ keys[idx++] = upb_inttable_iter_key(i);
+ }
+ qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
+
+
+ upb_handlers_fieldent *last_f = NULL;
+ uint32_t last_tag = 0;
+ for(int i = 0; i < num_keys; i++) {
+ uint32_t key = keys[i];
+ upb_handlers_fieldent *f = upb_inttable_lookup(&m->fieldtab, key);
+ uint32_t tag = upb_encode_varint(key);
+ if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f);
+ last_tag = tag;
+ last_f = f;
+ }
+
+ free(keys);
+
+ if (m->is_group) {
+ // Create a fake fieldent for handling "end group."
+ upb_handlers_fieldent f = {0, UPB_TYPE_ENDGROUP, 0, UPB_NO_VALUE, {NULL}, NULL, 0, 0, 0, false};
+ upb_decoder_jit_field(d, last_tag, m->groupnum, m, last_f, &f);
+ upb_decoder_jit_field(d, m->groupnum, 0, m, &f, NULL);
+ } else {
+ upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL);
+ }
+
+ // --------- New code section (does not fall through) ------------------------
+
+ // End-of-buf / end-of-message.
+ if (!m->is_group) {
+ // This case doesn't exist for groups, because there eob really means
+ // eob, so that case just exits the jit directly.
+ |=>m->jit_endofbuf_pclabel:
+ | cmp PTR, DECODER->submsg_end
+ | jb ->exit_jit // We are at eob, but not end-of-submsg.
+ }
+
+ |=>m->jit_endofmsg_pclabel:
+ // We are at end-of-submsg: call endmsg handler (if any):
+ if (m->endmsg != upb_endmsg_nop) {
+ // void endmsg(void *closure, upb_status *status) {
+ | mov ARG1_64, FRAME->closure
+ | lea ARG2_64, DECODER->dispatcher.status
+ | callp m->endmsg
+ }
+
+ if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
+ | ret
+ } else if (m->jit_parent_field_done_pclabel == UPB_TOPLEVEL_ONE) {
+ | jmp ->exit_jit
+ } else {
+ | jmp =>m->jit_parent_field_done_pclabel
+ }
+
+}
+
+static void upb_decoder_jit(upb_decoder *d) {
+ | push rbp
+ | mov rbp, rsp
+ | push r15
+ | push r14
+ | push r13
+ | push r12
+ | push rbx
+ | mov DECODER, ARG1_64
+ | mov FRAME, DECODER:ARG1_64->dispatcher.top
+ | mov STRING, DECODER:ARG1_64->tmp
+ | mov CLOSURE, FRAME->closure
+ | mov PTR, DECODER->ptr
+
+ upb_handlers *h = d->dispatcher.handlers;
+ if (h->msgs[0].jit_parent_field_done_pclabel == UPB_MULTIPLE) {
+ | call =>h->msgs[0].jit_startmsg_pclabel
+ | jmp ->exit_jit
+ }
+
+ // TODO: push return addresses for re-entry (will be necessary for multiple
+ // buffer support).
+ for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_msg(d, &h->msgs[i]);
+
+ |->exit_jit:
+ | pop rbx
+ | pop r12
+ | pop r13
+ | pop r14
+ | pop r15
+ | leave
+ | ret
+ |=>0:
+ | callp &abort
+}
+
+void upb_decoder_jit_assignfieldlabs(upb_handlers_fieldent *f,
+ uint32_t *pclabel_count) {
+ f->jit_pclabel = (*pclabel_count)++;
+ f->jit_pclabel_notypecheck = (*pclabel_count)++;
+ f->jit_submsg_done_pclabel = (*pclabel_count)++;
+}
+
+void upb_decoder_jit_assignmsglabs(upb_handlers_msgent *m,
+ uint32_t *pclabel_count) {
+ m->jit_startmsg_pclabel = (*pclabel_count)++;
+ m->jit_endofbuf_pclabel = (*pclabel_count)++;
+ m->jit_endofmsg_pclabel = (*pclabel_count)++;
+ m->jit_unknownfield_pclabel = (*pclabel_count)++;
+ m->jit_parent_field_done_pclabel = UPB_NONE;
+ m->max_field_number = 0;
+ upb_inttable_iter i;
+ for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+ i = upb_inttable_next(&m->fieldtab, i)) {
+ uint32_t key = upb_inttable_iter_key(i);
+ m->max_field_number = UPB_MAX(m->max_field_number, key);
+ upb_handlers_fieldent *f = upb_inttable_iter_value(i);
+ upb_decoder_jit_assignfieldlabs(f, pclabel_count);
+ }
+ // XXX: Won't work for large field numbers; will need to use a upb_table.
+ m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*));
+}
+
+// Second pass: for messages that have only one parent, link them to the field
+// from which they are called.
+void upb_decoder_jit_assignmsglabs2(upb_handlers *h, upb_handlers_msgent *m) {
+ upb_inttable_iter i;
+ for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+ i = upb_inttable_next(&m->fieldtab, i)) {
+ upb_handlers_fieldent *f = upb_inttable_iter_value(i);
+ if (upb_issubmsgtype(f->type)) {
+ upb_handlers_msgent *sub_m = upb_handlers_getmsgent(h, f);
+ if (f->type == UPB_TYPE(GROUP)) {
+ sub_m->is_group = true;
+ sub_m->groupnum = upb_inttable_iter_key(i);
+ }
+ if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) {
+ sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel;
+ } else {
+ sub_m->jit_parent_field_done_pclabel = UPB_MULTIPLE;
+ }
+ }
+ }
+}
+
+void upb_decoder_makejit(upb_decoder *d) {
+ // Assign pclabels.
+ uint32_t pclabel_count = 1;
+ upb_handlers *h = d->dispatcher.handlers;
+ for (int i = 0; i < h->msgs_len; i++)
+ upb_decoder_jit_assignmsglabs(&h->msgs[i], &pclabel_count);
+ for (int i = 0; i < h->msgs_len; i++)
+ upb_decoder_jit_assignmsglabs2(h, &h->msgs[i]);
+
+ if (h->msgs[0].jit_parent_field_done_pclabel == UPB_NONE) {
+ h->msgs[0].jit_parent_field_done_pclabel = UPB_TOPLEVEL_ONE;
+ }
+
+ void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals));
+ dasm_init(d, 1);
+ dasm_setupglobal(d, globals, UPB_JIT_GLOBAL__MAX);
+ dasm_growpc(d, pclabel_count);
+ dasm_setup(d, upb_jit_actionlist);
+
+ upb_decoder_jit(d);
+
+ dasm_link(d, &d->jit_size);
+
+ d->jit_code = mmap(NULL, d->jit_size, PROT_READ | PROT_WRITE,
+ MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+
+ dasm_encode(d, d->jit_code);
+
+ // Create dispatch tables.
+ for (int i = 0; i < h->msgs_len; i++) {
+ upb_handlers_msgent *m = &h->msgs[i];
+ for (uint32_t j = 0; j <= m->max_field_number; j++) {
+ upb_handlers_fieldent *f = NULL;
+ for (int k = 0; k < 8; k++) {
+ f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k);
+ if (f) break;
+ }
+ if (f) {
+ m->tablearray[j] = d->jit_code + dasm_getpclabel(d, f->jit_pclabel);
+ } else {
+ // Don't handle unknown fields yet.
+ m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0);
+ }
+ }
+ }
+
+ // Create debug info.
+ size_t elf_len = src_jit_debug_elf_file_o_len;
+ d->debug_info = malloc(elf_len);
+ memcpy(d->debug_info, src_jit_debug_elf_file_o, elf_len);
+ uint64_t *p = (void*)d->debug_info;
+ for (; (void*)(p+1) <= (void*)d->debug_info + elf_len; ++p) {
+ if (*p == 0x12345678) { *p = (uintptr_t)d->jit_code; }
+ if (*p == 0x321) { *p = d->jit_size; }
+ }
+
+ // Register the JIT-ted code with GDB.
+ gdb_jit_entry *e = malloc(sizeof(gdb_jit_entry));
+ e->next_entry = __jit_debug_descriptor.first_entry;
+ e->prev_entry = NULL;
+ if (e->next_entry) e->next_entry->prev_entry = e;
+ e->symfile_addr = d->debug_info;
+ e->symfile_size = elf_len;
+ __jit_debug_descriptor.first_entry = e;
+ __jit_debug_descriptor.relevant_entry = e;
+ __jit_debug_descriptor.action_flag = GDB_JIT_REGISTER;
+ __jit_debug_register_code();
+
+ dasm_free(d);
+ free(globals);
+
+ mprotect(d->jit_code, d->jit_size, PROT_EXEC | PROT_READ);
+
+ FILE *f = fopen("/tmp/machine-code", "wb");
+ fwrite(d->jit_code, d->jit_size, 1, f);
+ fclose(f);
+}
+
+void upb_decoder_freejit(upb_decoder *d) {
+ munmap(d->jit_code, d->jit_size);
+ free(d->debug_info);
+ // TODO: unregister
+}
diff --git a/src/upb_def.c b/src/upb_def.c
index 059edd6..338bd3d 100644
--- a/src/upb_def.c
+++ b/src/upb_def.c
@@ -349,18 +349,18 @@ static void upb_defbuilder_register_FileDescriptorProto(upb_handlers *h) {
upb_defbuilder_FileDescriptorProto_endmsg);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDNUM,
- GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDTYPE,
+ GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDTYPE, false,
&upb_defbuilder_FileDescriptorProto_package, UPB_NO_VALUE);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDNUM,
- GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDTYPE);
+ GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDTYPE, true);
upb_msgdef_register_DescriptorProto(h);
upb_handlers_typed_pop(h);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNUM,
- GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE);
+ GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE, true);
upb_enumdef_register_EnumDescriptorProto(h);
upb_handlers_typed_pop(h);
@@ -383,12 +383,13 @@ static void upb_defbuilder_register_FileDescriptorSet(upb_handlers *h) {
upb_register_startend(h, NULL, upb_defbuilder_FileDescriptorSet_onendmsg);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM,
- GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE);
+ GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE, true);
upb_defbuilder_register_FileDescriptorProto(h);
upb_handlers_typed_pop(h);
}
void upb_defbuilder_reghandlers(upb_handlers *h) {
upb_defbuilder_register_FileDescriptorSet(h);
+ h->should_jit = false;
}
@@ -492,11 +493,11 @@ static void upb_enumdef_register_EnumValueDescriptorProto(upb_handlers *h) {
upb_enumdef_EnumValueDescriptorProto_endmsg);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM,
- GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE,
+ GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE, false,
&upb_enumdef_EnumValueDescriptorProto_name, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM,
- GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE,
+ GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE, false,
&upb_enumdef_EnumValueDescriptorProto_number, UPB_NO_VALUE);
}
@@ -540,12 +541,12 @@ static void upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) {
&upb_enumdef_EnumDescriptorProto_endmsg);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM,
- GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE,
+ GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE, false,
&upb_enumdef_EnumDescriptorProto_name, UPB_NO_VALUE);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM,
- GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE);
+ GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE, true);
upb_enumdef_register_EnumValueDescriptorProto(h);
upb_handlers_typed_pop(h);
}
@@ -813,27 +814,27 @@ static void upb_fielddef_register_FieldDescriptorProto(upb_handlers *h) {
upb_register_startend(h, upb_fielddef_startmsg, upb_fielddef_endmsg);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNUM,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE, false,
&upb_fielddef_ontype, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDNUM,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDTYPE,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDTYPE, false,
&upb_fielddef_onlabel, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDNUM,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDTYPE,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDTYPE, false,
&upb_fielddef_onnumber, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDNUM,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDTYPE,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDTYPE, false,
&upb_fielddef_onname, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNUM,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE, false,
&upb_fielddef_ontypename, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNUM,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE, false,
&upb_fielddef_ondefaultval, UPB_NO_VALUE);
}
@@ -954,23 +955,23 @@ static void upb_msgdef_register_DescriptorProto(upb_handlers *h) {
upb_register_startend(h, &upb_msgdef_startmsg, &upb_msgdef_endmsg);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM,
- GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE,
+ GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE, false,
&upb_msgdef_onname, UPB_NO_VALUE);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM,
- GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE);
+ GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE, true);
upb_fielddef_register_FieldDescriptorProto(h);
upb_handlers_typed_pop(h);
// DescriptorProto is self-recursive, so we must link the definition.
upb_handlers_typed_link(h,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM,
- GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE, 0);
+ GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE, true, 0);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM,
- GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE);
+ GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE, true);
upb_enumdef_register_EnumDescriptorProto(h);
upb_handlers_typed_pop(h);
diff --git a/src/upb_glue.c b/src/upb_glue.c
index 41f974b..b6a0273 100644
--- a/src/upb_glue.c
+++ b/src/upb_glue.c
@@ -29,7 +29,6 @@ void upb_strtomsg(upb_string *str, upb_msg *msg, upb_msgdef *md,
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
- upb_handlers_uninit(&h);
}
void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
@@ -53,7 +52,6 @@ void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
upb_stringsink_uninit(&strsink);
upb_textprinter_free(p);
- upb_handlers_uninit(&h);
}
void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) {
@@ -72,7 +70,6 @@ void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) {
upb_decoder_decode(&d, status);
- upb_handlers_uninit(&h);
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
}
diff --git a/src/upb_msg.c b/src/upb_msg.c
index 6fc321e..aac2c91 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -299,6 +299,13 @@ upb_msg *upb_msg_appendmsg(upb_msg *msg, upb_fielddef *f, upb_msgdef *msgdef) {
static upb_flow_t upb_dmsgsink_value(void *_m, upb_value fval, upb_value val) {
upb_msg *m = _m;
upb_fielddef *f = upb_value_getfielddef(fval);
+ if (upb_isstring(f)) {
+ //fprintf(stderr, "dmsg_value! this=%p f=%p name=" UPB_STRFMT ",
+ // " UPB_STRFMT " %p\n", m, f, UPB_STRARG(f->name), UPB_STRARG(val.val.str));
+ } else {
+ //fprintf(stderr, "dmsg_value! this=%p f=%p name=" UPB_STRFMT ",
+ // %llu\n", m, f, UPB_STRARG(f->name), val.val.uint64);
+ }
upb_msg_appendval(m, f, val);
return UPB_CONTINUE;
}
@@ -306,8 +313,11 @@ static upb_flow_t upb_dmsgsink_value(void *_m, upb_value fval, upb_value val) {
static upb_sflow_t upb_dmsgsink_startsubmsg(void *_m, upb_value fval) {
upb_msg *m = _m;
upb_fielddef *f = upb_value_getfielddef(fval);
+ //fprintf(stderr, "dmsg_startsubmsg! " UPB_STRFMT " %p\n", UPB_STRARG(fval.val.fielddef->name), f);
upb_msgdef *msgdef = upb_downcast_msgdef(f->def);
- return UPB_CONTINUE_WITH(upb_msg_appendmsg(m, f, msgdef));
+ void *p = upb_msg_appendmsg(m, f, msgdef);
+ //printf("Continuing with: %p\n", p);
+ return UPB_CONTINUE_WITH(p);
}
void upb_msg_regdhandlers(upb_handlers *h) {
diff --git a/src/upb_stream.c b/src/upb_stream.c
index aebdb42..982c8a3 100644
--- a/src/upb_stream.c
+++ b/src/upb_stream.c
@@ -11,36 +11,36 @@
/* upb_handlers ***************************************************************/
-static upb_flow_t upb_startmsg_nop(void *closure) {
+upb_flow_t upb_startmsg_nop(void *closure) {
(void)closure;
return UPB_CONTINUE;
}
-static void upb_endmsg_nop(void *closure, upb_status *status) {
+void upb_endmsg_nop(void *closure, upb_status *status) {
(void)closure;
(void)status;
}
-static upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val) {
+upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val) {
(void)closure;
(void)fval;
(void)val;
return UPB_CONTINUE;
}
-static upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval) {
+upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval) {
(void)fval;
return UPB_CONTINUE_WITH(closure);
}
-static upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval) {
+upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval) {
(void)closure;
(void)fval;
return UPB_CONTINUE;
}
-static upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum,
- upb_value val) {
+upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum,
+ upb_value val) {
(void)closure;
(void)fieldnum;
(void)val;
@@ -52,6 +52,8 @@ static void upb_msgent_init(upb_handlers_msgent *e) {
e->startmsg = &upb_startmsg_nop;
e->endmsg = &upb_endmsg_nop;
e->unknownval = &upb_unknownval_nop;
+ e->is_group = false;
+ e->tablearray = NULL;
}
void upb_handlers_init(upb_handlers *h, upb_msgdef *md) {
@@ -61,6 +63,7 @@ void upb_handlers_init(upb_handlers *h, upb_msgdef *md) {
h->top = &h->stack[0];
h->limit = &h->stack[UPB_MAX_TYPE_DEPTH];
h->toplevel_msgdef = md;
+ h->should_jit = true;
if (md) upb_msgdef_ref(md);
h->top->msgent_index = 0;
@@ -70,19 +73,22 @@ void upb_handlers_init(upb_handlers *h, upb_msgdef *md) {
}
void upb_handlers_uninit(upb_handlers *h) {
- for (int i = 0; i < h->msgs_len; i++) upb_inttable_free(&h->msgs[i].fieldtab);
+ for (int i = 0; i < h->msgs_len; i++) {
+ upb_inttable_free(&h->msgs[i].fieldtab);
+ free(h->msgs[i].tablearray);
+ }
free(h->msgs);
upb_msgdef_unref(h->toplevel_msgdef);
}
static upb_handlers_fieldent *upb_handlers_getorcreate_without_fval(
- upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type) {
+ upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type, bool repeated) {
uint32_t tag = fieldnum << 3 | upb_types[type].native_wire_type;
upb_handlers_fieldent *f =
upb_inttable_lookup(&h->msgent->fieldtab, tag);
if (!f) {
upb_handlers_fieldent new_f = {false, type, -1, UPB_NO_VALUE,
- {&upb_value_nop}, &upb_endsubmsg_nop};
+ {&upb_value_nop}, &upb_endsubmsg_nop, 0, 0, 0, repeated};
if (upb_issubmsgtype(type)) new_f.cb.startsubmsg = &upb_startsubmsg_nop;
upb_inttable_insert(&h->msgent->fieldtab, tag, &new_f);
@@ -95,9 +101,9 @@ static upb_handlers_fieldent *upb_handlers_getorcreate_without_fval(
static upb_handlers_fieldent *upb_handlers_getorcreate(
upb_handlers *h, upb_field_number_t fieldnum,
- upb_fieldtype_t type, upb_value fval) {
+ upb_fieldtype_t type, bool repeated, upb_value fval) {
upb_handlers_fieldent *f =
- upb_handlers_getorcreate_without_fval(h, fieldnum, type);
+ upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated);
f->fval = fval;
return f;
}
@@ -140,42 +146,40 @@ void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start,
}
void upb_register_typed_value(upb_handlers *h, upb_field_number_t fieldnum,
- upb_fieldtype_t type, upb_value_handler_t value,
- upb_value fval) {
- upb_handlers_getorcreate(h, fieldnum, type, fval)->cb.value =
+ upb_fieldtype_t type, bool repeated,
+ upb_value_handler_t value, upb_value fval) {
+ upb_handlers_getorcreate(h, fieldnum, type, repeated, fval)->cb.value =
value ? value : &upb_value_nop;
}
void upb_register_value(upb_handlers *h, upb_fielddef *f,
upb_value_handler_t value, upb_value fval) {
assert(f->msgdef == h->top->msgdef);
- upb_register_typed_value(h, f->number, f->type, value, fval);
+ upb_register_typed_value(h, f->number, f->type, upb_isarray(f), value, fval);
}
void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum,
- upb_fieldtype_t type,
+ upb_fieldtype_t type, bool repeated,
upb_startsubmsg_handler_t start,
upb_endsubmsg_handler_t end,
upb_value fval) {
- upb_handlers_fieldent *f = upb_handlers_getorcreate(h, fieldnum, type, fval);
+ upb_handlers_fieldent *f = upb_handlers_getorcreate(h, fieldnum, type, repeated, fval);
f->cb.startsubmsg = start ? start : &upb_startsubmsg_nop;
f->endsubmsg = end ? end : &upb_endsubmsg_nop;
}
-void upb_handlers_typed_link(upb_handlers *h,
- upb_field_number_t fieldnum,
- upb_fieldtype_t type,
- int frames) {
+void upb_handlers_typed_link(upb_handlers *h, upb_field_number_t fieldnum,
+ upb_fieldtype_t type, bool repeated, int frames) {
assert(frames <= (h->top - h->stack));
upb_handlers_fieldent *f =
- upb_handlers_getorcreate_without_fval(h, fieldnum, type);
+ upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated);
f->msgent_index = (h->top - frames)->msgent_index;
}
void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum,
- upb_fieldtype_t type) {
+ upb_fieldtype_t type, bool repeated) {
upb_handlers_fieldent *f =
- upb_handlers_getorcreate_without_fval(h, fieldnum, type);
+ upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated);
if (h->top == h->limit) abort(); // TODO: make growable.
++h->top;
if (f->msgent_index == -1) {
@@ -204,8 +208,8 @@ void upb_handlers_push(upb_handlers *h, upb_fielddef *f,
bool delegate) {
assert(f->msgdef == h->top->msgdef);
(void)delegate; // TODO
- upb_register_typed_submsg(h, f->number, f->type, start, end, fval);
- upb_handlers_typed_push(h, f->number, f->type);
+ upb_register_typed_submsg(h, f->number, f->type, upb_isarray(f), start, end, fval);
+ upb_handlers_typed_push(h, f->number, f->type, upb_isarray(f));
}
void upb_handlers_typed_pop(upb_handlers *h) {
@@ -229,13 +233,14 @@ static upb_handlers_fieldent toplevel_f = {
#else
{{0}, UPB_VALUETYPE_RAW},
#endif
- {NULL}, NULL};
+ {NULL}, NULL, 0, 0, 0, false};
void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h) {
d->handlers = h;
for (int i = 0; i < h->msgs_len; i++)
upb_inttable_compact(&h->msgs[i].fieldtab);
d->stack[0].f = &toplevel_f;
+ d->limit = &d->stack[UPB_MAX_NESTING];
upb_status_init(&d->status);
}
@@ -249,10 +254,10 @@ void upb_dispatcher_reset(upb_dispatcher *d, void *top_closure, uint32_t top_end
d->top = d->stack;
d->top->closure = top_closure;
d->top->end_offset = top_end_offset;
- d->limit = &d->stack[UPB_MAX_NESTING];
}
void upb_dispatcher_uninit(upb_dispatcher *d) {
+ upb_handlers_uninit(d->handlers);
upb_status_uninit(&d->status);
}
diff --git a/src/upb_stream.h b/src/upb_stream.h
index 0c75acd..7ae9b8d 100644
--- a/src/upb_stream.h
+++ b/src/upb_stream.h
@@ -81,6 +81,14 @@ typedef upb_flow_t (*upb_endsubmsg_handler_t)(void *closure, upb_value fval);
typedef upb_flow_t (*upb_unknownval_handler_t)(
void *closure, upb_field_number_t fieldnum, upb_value val);
+upb_flow_t upb_startmsg_nop(void *closure);
+void upb_endmsg_nop(void *closure, upb_status *status);
+upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val);
+upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval);
+upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval);
+upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum,
+ upb_value val);
+
typedef struct {
bool junk;
upb_fieldtype_t type;
@@ -93,14 +101,27 @@ typedef struct {
upb_startsubmsg_handler_t startsubmsg;
} cb;
upb_endsubmsg_handler_t endsubmsg;
+ uint32_t jit_pclabel;
+ uint32_t jit_pclabel_notypecheck;
+ uint32_t jit_submsg_done_pclabel;
+ bool repeated;
} upb_handlers_fieldent;
-typedef struct {
+typedef struct _upb_handlers_msgent {
upb_startmsg_handler_t startmsg;
upb_endmsg_handler_t endmsg;
upb_unknownval_handler_t unknownval;
// Maps field number -> upb_handlers_fieldent.
upb_inttable fieldtab;
+ uint32_t jit_startmsg_pclabel;
+ uint32_t jit_endofbuf_pclabel;
+ uint32_t jit_endofmsg_pclabel;
+ uint32_t jit_unknownfield_pclabel;
+ uint32_t groupnum;
+ bool is_group;
+ int32_t jit_parent_field_done_pclabel;
+ uint32_t max_field_number;
+ void **tablearray;
} upb_handlers_msgent;
typedef struct {
@@ -115,6 +136,7 @@ struct _upb_handlers {
upb_msgdef *toplevel_msgdef; // We own a ref.
upb_handlers_msgent *msgent;
upb_handlers_frame stack[UPB_MAX_TYPE_DEPTH], *top, *limit;
+ bool should_jit;
};
typedef struct _upb_handlers upb_handlers;
@@ -237,19 +259,17 @@ void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start,
// Low-level functions -- internal-only.
void upb_register_typed_value(upb_handlers *h, upb_field_number_t fieldnum,
- upb_fieldtype_t type, upb_value_handler_t value,
- upb_value fval);
+ upb_fieldtype_t type, bool repeated,
+ upb_value_handler_t value, upb_value fval);
void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum,
- upb_fieldtype_t type,
+ upb_fieldtype_t type, bool repeated,
upb_startsubmsg_handler_t start,
upb_endsubmsg_handler_t end,
upb_value fval);
-void upb_handlers_typed_link(upb_handlers *h,
- upb_field_number_t fieldnum,
- upb_fieldtype_t type,
- int frames);
+void upb_handlers_typed_link(upb_handlers *h, upb_field_number_t fieldnum,
+ upb_fieldtype_t type, bool repeated, int frames);
void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum,
- upb_fieldtype_t type);
+ upb_fieldtype_t type, bool repeated);
void upb_handlers_typed_pop(upb_handlers *h);
INLINE upb_handlers_msgent *upb_handlers_getmsgent(upb_handlers *h,
@@ -308,8 +328,8 @@ typedef struct {
int delegated_depth;
// Stack.
- upb_dispatcher_frame stack[UPB_MAX_NESTING];
upb_status status;
+ upb_dispatcher_frame stack[UPB_MAX_NESTING];
} upb_dispatcher;
INLINE bool upb_dispatcher_skipping(upb_dispatcher *d) {
diff --git a/src/upb_string.c b/src/upb_string.c
index de633bc..8625f76 100644
--- a/src/upb_string.c
+++ b/src/upb_string.c
@@ -72,7 +72,6 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) {
void upb_string_substr(upb_string *str, upb_string *target_str,
upb_strlen_t start, upb_strlen_t len) {
- if(str->ptr) *(char*)0 = 0;
assert(str->ptr == NULL);
assert(start + len <= upb_string_len(target_str));
if (target_str->src) {
diff --git a/src/upb_string.h b/src/upb_string.h
index 88a513f..5aa5f3b 100644
--- a/src/upb_string.h
+++ b/src/upb_string.h
@@ -155,9 +155,13 @@ INLINE const char *upb_string_getbufend(upb_string *str) {
}
// Attempts to recycle the string "str" so it may be reused and have different
-// data written to it. After the function returns, "str" points to a writable
-// string, which is either the original string if it had no other references
-// or a newly created string if it did have other references.
+// data written to it. The caller MUST own a reference on the given string
+// prior to making this call (ie. the caller must have either created the
+// string or obtained a reference with upb_string_getref()).
+//
+// After the function returns, "str" points to a writable string, which is
+// either the original string if it had no other references or a newly created
+// string if it did have other references.
//
// As a special case, passing a pointer to NULL will allocate a new string.
// This is convenient for the pattern:
@@ -171,7 +175,9 @@ INLINE const char *upb_string_getbufend(upb_string *str) {
// }
INLINE void upb_string_recycle(upb_string **_str) {
upb_string *str = *_str;
- if(str && upb_atomic_only(&str->refcount)) {
+ int r;
+ if(str && ((r = upb_atomic_read(&str->refcount)) == 1 ||
+ (r == _UPB_STRING_REFCOUNT_STACK))) {
str->ptr = NULL;
str->len = 0;
_upb_string_release(str);
diff --git a/src/upb_table.c b/src/upb_table.c
index b9b9824..a754097 100644
--- a/src/upb_table.c
+++ b/src/upb_table.c
@@ -102,6 +102,7 @@ static void intinsert(upb_inttable *t, upb_inttable_key_t key, void *val) {
upb_inttable_value *table_val;
if (_upb_inttable_isarrkey(t, key)) {
table_val = UPB_INDEX(t->array, key, upb_table_valuesize(&t->t));
+ t->array_count++;
//printf("Inserting key %d to Array part! %p\n", key, table_val);
} else {
t->t.count++;
@@ -152,8 +153,8 @@ static void intinsert(upb_inttable *t, upb_inttable_key_t key, void *val) {
static void upb_inttable_insertall(upb_inttable *dst, upb_inttable *src) {
for(upb_inttable_iter i = upb_inttable_begin(src); !upb_inttable_done(i);
i = upb_inttable_next(src, i)) {
- //printf("load check: %d %d\n", upb_inttable_count(dst), upb_inttable_hashtablesize(dst));
- assert((double)(upb_inttable_count(dst)) /
+ //printf("load check: %d %d\n", upb_table_count(&dst->t), upb_inttable_hashtablesize(dst));
+ assert((double)(upb_table_count(&dst->t)) /
upb_inttable_hashtablesize(dst) <= MAX_LOAD);
intinsert(dst, upb_inttable_iter_key(i), upb_inttable_iter_value(i));
}
@@ -209,6 +210,7 @@ void upb_inttable_compact(upb_inttable *t) {
}
upb_inttable new_table;
int hash_size = (upb_inttable_count(t) - array_count + 1) / MAX_LOAD;
+ //printf("array_count: %d, array_size: %d, hash_size: %d, table size: %d\n", array_count, array_size, hash_size, upb_inttable_count(t));
upb_inttable_sizedinit(&new_table, array_size, hash_size,
upb_table_valuesize(&t->t));
//printf("For %d things, using array size=%d, hash_size = %d\n", upb_inttable_count(t), array_size, hash_size);
diff --git a/src/upb_varint_decoder.h b/src/upb_varint_decoder.h
index 7297f43..d7af90a 100644
--- a/src/upb_varint_decoder.h
+++ b/src/upb_varint_decoder.h
@@ -30,7 +30,7 @@ typedef struct {
// A basic branch-based decoder, uses 32-bit values to get good performance
// on 32-bit architectures (but performs well on 64-bits also).
-INLINE upb_decoderet upb_decode_varint_branch32(const char *p) {
+INLINE upb_decoderet upb_vdecode_branch32(const char *p) {
upb_decoderet r = {NULL, 0};
uint32_t low, high = 0;
uint32_t b;
@@ -54,7 +54,7 @@ done:
}
// Like the previous, but uses 64-bit values.
-INLINE upb_decoderet upb_decode_varint_branch64(const char *p) {
+INLINE upb_decoderet upb_vdecode_branch64(const char *p) {
uint64_t val;
uint64_t b;
upb_decoderet r = {(void*)0, 0};
@@ -76,17 +76,9 @@ done:
return r;
}
-// Avoids branches for values >2-bytes.
-INLINE upb_decoderet upb_decode_varint_nobranch1(const char *p) {
- uint64_t b = 0;
- upb_decoderet r = {p, 0};
- memcpy(&b, r.p, 2);
- if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; }
- r.val = (b & 0x7f) | ((b & 0x7f00) >> 1);
- r.p = p + 2;
- if ((b & 0x8000) == 0) return r;
-
- // >2-byte varint.
+// Decodes a varint of at most 8 bytes without branching (except for error).
+INLINE upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
+ uint64_t b;
memcpy(&b, r.p, sizeof(b));
uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL;
uint64_t stop_bit = ~cbits & (cbits+1);
@@ -94,27 +86,19 @@ INLINE upb_decoderet upb_decode_varint_nobranch1(const char *p) {
b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f);
b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff);
b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff);
- r.val |= b << 14;
- r.p += (__builtin_ctzll(stop_bit) + 1) / 8;
if (stop_bit == 0) {
// Error: unterminated varint.
upb_decoderet err_r = {(void*)0, 0};
return err_r;
}
- return r;
+ upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
+ r.val | (b << 14)};
+ return my_r;
}
-// Avoids branches for values >2-bytes.
-INLINE upb_decoderet upb_decode_varint_nobranch2(const char *p) {
- uint64_t b = 0;
- upb_decoderet r = {p, 0};
- memcpy(&b, r.p, 2);
- if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; }
- r.val = (b & 0x7f) | ((b & 0x7f00) >> 1);
- r.p = p + 2;
- if ((b & 0x8000) == 0) return r;
-
- // >2-byte varint.
+// Another implementation of the previous.
+INLINE upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
+ uint64_t b;
memcpy(&b, r.p, sizeof(b));
uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL;
uint64_t stop_bit = ~cbits & (cbits + 1);
@@ -122,22 +106,46 @@ INLINE upb_decoderet upb_decode_varint_nobranch2(const char *p) {
b += b & 0x007f007f007f007fULL;
b += 3 * (b & 0x0000ffff0000ffffULL);
b += 15 * (b & 0x00000000ffffffffULL);
- r.val |= b << 7;
- r.p += (__builtin_ctzll(stop_bit) + 1) / 8;
if (stop_bit == 0) {
// Error: unterminated varint.
upb_decoderet err_r = {(void*)0, 0};
return err_r;
}
- return r;
+ upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
+ r.val | (b << 7)};
+ return my_r;
}
-INLINE upb_decoderet upb_decode_varint_fast(const char *p) {
+// Template for a function that checks the first two bytes with branching
+// and dispatches 2-10 bytes with a separate function.
+#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \
+INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *p) { \
+ uint64_t b = 0; \
+ upb_decoderet r = {p, 0}; \
+ memcpy(&b, r.p, 2); \
+ if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; } \
+ r.val = (b & 0x7f) | ((b & 0x7f00) >> 1); \
+ r.p = p + 2; \
+ if ((b & 0x8000) == 0) return r; \
+ return decode_max8_function(r); \
+}
+
+UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright);
+UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino);
+#undef UPB_VARINT_DECODER_CHECK2
+
+// Our canonical functions for decoding varints, based on the currently
+// favored best-performing implementations.
+INLINE upb_decoderet upb_vdecode_fast(const char *p) {
// Use nobranch2 on 64-bit, branch32 on 32-bit.
if (sizeof(long) == 8)
- return upb_decode_varint_nobranch2(p);
+ return upb_vdecode_check2_massimino(p);
else
- return upb_decode_varint_branch32(p);
+ return upb_vdecode_branch32(p);
+}
+
+INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
+ return upb_vdecode_max8_massimino(r);
}
#ifdef __cplusplus
diff --git a/tests/test.proto b/tests/test.proto
index b51bd6b..f3dde24 100644
--- a/tests/test.proto
+++ b/tests/test.proto
@@ -29,3 +29,13 @@ message D {
optional A a = 1;
optional D d = 2;
}
+
+// A proto with a bunch of simple primitives.
+message SimplePrimitives {
+ optional fixed64 a = 1;
+ optional fixed32 b = 2;
+ optional double c = 3;
+ optional float d = 5;
+ //optional sint64 e = 6;
+ //optional sint32 f = 7;
+}
diff --git a/tests/test_varint.c b/tests/test_varint.c
index efe9418..f0a8993 100644
--- a/tests/test_varint.c
+++ b/tests/test_varint.c
@@ -33,7 +33,7 @@ static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
const char *twelvebyte_buf = twelvebyte;
// A varint that terminates before hitting the end of the provided buffer,
// but in too many bytes (11 instead of 10).
- upb_decoderet r = upb_decode_varint_fast(twelvebyte_buf);
+ upb_decoderet r = decoder(twelvebyte_buf);
ASSERT(r.p == NULL);
}
@@ -41,23 +41,26 @@ static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
#define TEST_VARINT_DECODER(decoder) \
/* Create non-inline versions for convenient inspection of assembly language \
* output. */ \
- upb_decoderet _upb_decode_varint_ ## decoder(const char *p) { \
- return upb_decode_varint_ ## decoder(p); \
+ upb_decoderet _upb_vdecode_ ## decoder(const char *p) { \
+ return upb_vdecode_ ## decoder(p); \
} \
void test_ ## decoder() { \
- test_varint_decoder(&_upb_decode_varint_ ## decoder); \
+ printf("Testing varint decoder: " #decoder "..."); \
+ fflush(stdout); \
+ test_varint_decoder(&_upb_vdecode_ ## decoder); \
+ printf("ok.\n"); \
} \
TEST_VARINT_DECODER(branch32);
TEST_VARINT_DECODER(branch64);
-TEST_VARINT_DECODER(nobranch1);
-TEST_VARINT_DECODER(nobranch2);
+TEST_VARINT_DECODER(check2_wright);
+TEST_VARINT_DECODER(check2_massimino);
int main() {
test_branch32();
test_branch64();
- test_nobranch1();
- test_nobranch2();
+ test_check2_wright();
+ test_check2_massimino();
}
#if 0
diff --git a/tests/tests.c b/tests/tests.c
index a04b1da..2fe8b8d 100644
--- a/tests/tests.c
+++ b/tests/tests.c
@@ -5,8 +5,10 @@
#include "upb_def.h"
#include "upb_glue.h"
#include "upb_test.h"
+#include "upb_stream.h"
+#include "upb_decoder.h"
-static void test_upb_symtab() {
+static upb_symtab *load_test_proto() {
upb_symtab *s = upb_symtab_new();
ASSERT(s);
upb_string *descriptor = upb_strreadfile("tests/test.proto.pb");
@@ -16,10 +18,38 @@ static void test_upb_symtab() {
}
upb_status status = UPB_STATUS_INIT;
upb_parsedesc(s, descriptor, &status);
- upb_printerr(&status);
ASSERT(upb_ok(&status));
upb_status_uninit(&status);
upb_string_unref(descriptor);
+ return s;
+}
+
+static upb_flow_t upb_test_onvalue(void *closure, upb_value fval, upb_value val) {
+ (void)closure;
+ (void)fval;
+ (void)val;
+ return UPB_CONTINUE;
+}
+
+static void test_upb_jit() {
+ upb_symtab *s = load_test_proto();
+ upb_string *symname = upb_strdupc("SimplePrimitives");
+ upb_def *def = upb_symtab_lookup(s, symname);
+ upb_string_unref(symname);
+ ASSERT(def);
+
+ upb_handlers h;
+ upb_handlers_init(&h, upb_downcast_msgdef(def));
+ upb_register_all(&h, NULL, NULL, &upb_test_onvalue, NULL, NULL, NULL);
+ upb_decoder d;
+ upb_decoder_init(&d, &h);
+ upb_decoder_uninit(&d);
+ upb_symtab_unref(s);
+ upb_def_unref(def);
+}
+
+static void test_upb_symtab() {
+ upb_symtab *s = load_test_proto();
// Test cycle detection by making a cyclic def's main refcount go to zero
// and then be incremented to one again.
@@ -53,6 +83,7 @@ int main()
} while (0)
TEST(test_upb_symtab);
+ TEST(test_upb_jit);
printf("All tests passed (%d assertions).\n", num_assertions);
return 0;
}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback