summaryrefslogtreecommitdiff
path: root/upb/pb/decoder_x64.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'upb/pb/decoder_x64.dasc')
-rw-r--r--upb/pb/decoder_x64.dasc1086
1 files changed, 0 insertions, 1086 deletions
diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc
deleted file mode 100644
index dee063a..0000000
--- a/upb/pb/decoder_x64.dasc
+++ /dev/null
@@ -1,1086 +0,0 @@
-|//
-|// upb - a minimalist implementation of protocol buffers.
-|//
-|// Copyright (c) 2011 Google Inc. See LICENSE for details.
-|// Author: Josh Haberman <jhaberman@gmail.com>
-|//
-|// JIT compiler for upb_pbdecoder on x86. Given a decoderplan object (which
-|// contains an embedded set of upb_handlers), generates code specialized to
-|// parsing the specific message and calling specific handlers.
-|//
-|// Since the JIT can call other functions (the JIT'ted code is not a leaf
-|// function) we must respect alignment rules. All x86-64 systems require
-|// 16-byte stack alignment.
-
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <sys/mman.h>
-#include "dynasm/dasm_x86.h"
-#include "upb/shim/shim.h"
-
-#ifndef MAP_ANONYMOUS
-# define MAP_ANONYMOUS MAP_ANON
-#endif
-
-// We map into the low 32 bits when we can, but if this is not available
-// (like on OS X) we take what we can get. It's not required for correctness,
-// it's just a performance thing that makes it more likely that our jumps
-// can be rel32 (i.e. within 32-bits of our pc) instead of the longer
-// sequence required for other jumps (see callp).
-#ifndef MAP_32BIT
-#define MAP_32BIT 0
-#endif
-
-// These are used to track jump targets for messages and fields.
-enum {
- STARTMSG = 0,
- AFTER_STARTMSG = 1,
- ENDOFBUF = 2,
- ENDOFMSG = 3,
- DYNDISPATCH = 4,
- TOTAL_MSG_PCLABELS = 5,
-};
-
-enum {
- FIELD = 0,
- FIELD_NO_TYPECHECK = 1,
- TOTAL_FIELD_PCLABELS = 2,
-};
-
-typedef struct {
- uint32_t max_field_number;
- // Currently keyed on field number. Could also try keying it
- // on encoded or decoded tag, or on encoded field number.
- void **tablearray;
- // Pointer to the JIT code for parsing this message.
- void *jit_func;
-} upb_jitmsginfo;
-
-static uint32_t upb_getpclabel(decoderplan *plan, const void *obj, int n) {
- upb_value v;
- bool found = upb_inttable_lookupptr(&plan->pclabels, obj, &v);
- UPB_ASSERT_VAR(found, found);
- return upb_value_getuint32(v) + n;
-}
-
-static upb_jitmsginfo *upb_getmsginfo(const decoderplan *plan,
- const upb_handlers *h) {
- upb_value v;
- bool found = upb_inttable_lookupptr(&plan->msginfo, h, &v);
- UPB_ASSERT_VAR(found, found);
- return upb_value_getptr(v);
-}
-
-// To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code
-// at runtime. GDB 7.x+ has defined an interface for doing this, and these
-// structure/function defintions are copied out of gdb/jit.h
-//
-// We need to give GDB an ELF file at runtime describing the symbols we have
-// generated. To avoid implementing the ELF format, we generate an ELF file
-// at compile-time and compile it in as a character string. We can replace
-// a few key constants (address of JIT-ted function and its size) by looking
-// for a few magic numbers and doing a dumb string replacement.
-
-#ifndef __APPLE__
-const unsigned char upb_jit_debug_elf_file[] = {
-#include "upb/pb/jit_debug_elf_file.h"
-};
-
-typedef enum
-{
- GDB_JIT_NOACTION = 0,
- GDB_JIT_REGISTER,
- GDB_JIT_UNREGISTER
-} jit_actions_t;
-
-typedef struct gdb_jit_entry {
- struct gdb_jit_entry *next_entry;
- struct gdb_jit_entry *prev_entry;
- const char *symfile_addr;
- uint64_t symfile_size;
-} gdb_jit_entry;
-
-typedef struct {
- uint32_t version;
- uint32_t action_flag;
- gdb_jit_entry *relevant_entry;
- gdb_jit_entry *first_entry;
-} gdb_jit_descriptor;
-
-gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL};
-
-void __attribute__((noinline)) __jit_debug_register_code() {
- __asm__ __volatile__("");
-}
-
-void upb_reg_jit_gdb(decoderplan *plan) {
- // Create debug info.
- size_t elf_len = sizeof(upb_jit_debug_elf_file);
- plan->debug_info = malloc(elf_len);
- memcpy(plan->debug_info, upb_jit_debug_elf_file, elf_len);
- uint64_t *p = (void*)plan->debug_info;
- for (; (void*)(p+1) <= (void*)plan->debug_info + elf_len; ++p) {
- if (*p == 0x12345678) { *p = (uintptr_t)plan->jit_code; }
- if (*p == 0x321) { *p = plan->jit_size; }
- }
-
- // Register the JIT-ted code with GDB.
- gdb_jit_entry *e = malloc(sizeof(gdb_jit_entry));
- e->next_entry = __jit_debug_descriptor.first_entry;
- e->prev_entry = NULL;
- if (e->next_entry) e->next_entry->prev_entry = e;
- e->symfile_addr = plan->debug_info;
- e->symfile_size = elf_len;
- __jit_debug_descriptor.first_entry = e;
- __jit_debug_descriptor.relevant_entry = e;
- __jit_debug_descriptor.action_flag = GDB_JIT_REGISTER;
- __jit_debug_register_code();
-}
-
-#else
-
-void upb_reg_jit_gdb(decoderplan *plan) {
- (void)plan;
-}
-
-#endif
-
-// Has to be a separate function, otherwise GCC will complain about
-// expressions like (&foo != NULL) because they will never evaluate
-// to false.
-static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
-
-|.arch x64
-|.actionlist upb_jit_actionlist
-|.globals UPB_JIT_GLOBAL_
-|.globalnames upb_jit_globalnames
-|
-|// Calling conventions. Note -- this will need to be changed for
-|// Windows, which uses a different calling convention!
-|.define ARG1_64, rdi
-|.define ARG2_8, r6b // DynASM's equivalent to "sil" -- low byte of esi.
-|.define ARG2_32, esi
-|.define ARG2_64, rsi
-|.define ARG3_32, edx
-|.define ARG3_64, rdx
-|.define ARG4_32, ecx
-|.define ARG4_64, rcx
-|.define XMMARG1, xmm0
-
-|
-|// Register allocation / type map.
-|// ALL of the code in this file uses these register allocations.
-|// When we "call" within this file, we do not use regular calling
-|// conventions, but of course when calling to user callbacks we must.
-|.define PTR, rbx // Writing this to DECODER->ptr commits our progress.
-|.define CLOSURE, r12
-|.type SINKFRAME, upb_sinkframe, r13
-|.type FRAME, frame, r14
-|.type DECODER, upb_pbdecoder, r15
-|.type SINK, upb_sink
-|
-|.macro callp, addr
-|| upb_assert_notnull(addr);
-|// TODO(haberman): fix this. I believe the predicate we should actually be
-|// testing is whether the jump distance is greater than INT32_MAX, not the
-|// absolute address of the target.
-|| if ((uintptr_t)addr < 0xffffffff) {
- | call &addr
-|| } else {
- | mov64 rax, (uintptr_t)addr
- | call rax
-|| }
-|.endmacro
-|
-|.macro loadarg2, val
-||{
-|| uintptr_t data = (uintptr_t)val;
-|| if (data > 0xffffffff) {
-| mov64 ARG2_64, data
-|| } else if (data) {
-| mov ARG2_32, data
-|| } else {
-| xor ARG2_32, ARG2_32
-|| }
-|| }
-|.endmacro
-|
-|.macro load_handler_data, h, f, type
-| loadarg2 gethandlerdata(h, f, type)
-|.endmacro
-|
-|// Checkpoints our progress by writing PTR to DECODER, and
-|// checks for end-of-buffer.
-|.macro checkpoint, h
-| mov DECODER->ptr, PTR
-| cmp PTR, DECODER->effective_end
-| jae =>upb_getpclabel(plan, h, ENDOFBUF)
-|.endmacro
-|
-|.macro check_bool_ret
-| test al, al
-| jz ->exit_jit
-|.endmacro
-|
-|.macro check_ptr_ret
-| test rax, rax
-| jz ->exit_jit
-|.endmacro
-|
-|// Decodes varint into ARG2.
-|// Inputs:
-|// - ecx: first 4 bytes of varint
-|// - offset: offset from PTR where varint begins
-|// Outputs:
-|// - ARG2: contains decoded varint
-|// - rax: new PTR
-|.macro decode_loaded_varint, offset
-| // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder.
-| lea rax, [PTR + offset + 1]
-| mov ARG2_32, ecx
-| and ARG2_32, 0x7f
-| test cl, cl
-| jns >9
-| lea rax, [PTR + offset + 2]
-| movzx edx, ch
-| and edx, 0x7f
-| shl edx, 7
-| or ARG2_32, edx
-| test cx, cx
-| jns >9
-| mov ARG1_64, rax
-|// XXX: I don't think this handles 64-bit values correctly.
-|// Test with UINT64_MAX
-| callp upb_vdecode_max8_fast
-|// rax return from function will contain new pointer
-| mov ARG2_64, rdx
-| check_ptr_ret // Check for unterminated, >10-byte varint.
-|9:
-|.endmacro
-|
-|.macro decode_varint, offset
-| mov ecx, dword [PTR + offset]
-| decode_loaded_varint offset
-| mov PTR, rax
-|.endmacro
-|
-|// Table-based field dispatch.
-|// Inputs:
-|// - ecx: first 4 bytes of tag
-|// Outputs:
-|// - edx: field number
-|// - esi: wire type
-|// Could specialize this by avoiding the value masking: could just key the
-|// table on the raw (length-masked) varint to save 3-4 cycles of latency.
-|// Currently only support tables where all entries are in the array part.
-|.macro dyndispatch_, h
-|| asmlabel(plan, "_UPB_MCODE_DISPATCH_%s.%d",
-|| upb_msgdef_fullname(upb_handlers_msgdef(h)), rand());
-|=>upb_getpclabel(plan, h, DYNDISPATCH):
-| decode_loaded_varint, 0
-| mov ecx, esi
-| shr ecx, 3
-| and esi, 0x7 // Note: this value is used in the FIELD pclabel below.
-| cmp esi, UPB_WIRE_TYPE_END_GROUP
-| je >1
-|| upb_jitmsginfo *mi = upb_getmsginfo(plan, h);
-| cmp ecx, mi->max_field_number // Bounds-check the field.
-| ja ->exit_jit // In the future; could be unknown label
-|| if ((uintptr_t)mi->tablearray < 0xffffffff) {
-| // TODO: support hybrid array/hash tables.
-| mov rax, qword [rcx*8 + mi->tablearray]
-|| } else {
-| mov64 rax, (uintptr_t)mi->tablearray
-| mov rax, qword [rax + rcx*8]
-|| }
-| jmp rax // Dispatch: unpredictable jump.
-|1:
-|// End group.
-| cmp ecx, FRAME->group_fieldnum
-| jne ->exit_jit // Unexpected END_GROUP tag.
-| mov PTR, rax // rax came from decode_loaded_varint
-| mov DECODER->ptr, PTR
-| jmp =>upb_getpclabel(plan, h, ENDOFMSG)
-|.endmacro
-|
-|.if 1
-| // Replicated dispatch: larger code, but better branch prediction.
-| .define dyndispatch, dyndispatch_
-|.else
-| // Single dispatch: smaller code, could be faster because of reduced
-| // icache usage. We keep this around to allow for easy comparison between
-| // the two.
-| .macro dyndispatch, h
-| jmp =>upb_getpclabel(plan, h, DYNDISPATCH)
-| .endmacro
-|.endif
-|
-|.macro pushsinkframe, handlers, field, endtype
-| mov rax, DECODER->sink
-| mov dword SINKFRAME->selector, getselector(field, endtype)
-| lea rcx, [SINKFRAME + sizeof(upb_sinkframe)] // rcx for short addressing
-| cmp rcx, SINK:rax->limit
-| jae ->exit_jit // Frame stack overflow.
-| mov64 r9, (uintptr_t)handlers
-| mov SINKFRAME:rcx->h, r9
-| mov SINKFRAME:rcx->closure, CLOSURE
-| mov SINK:rax->top, rcx
-| mov SINKFRAME, rcx
-|.endmacro
-|
-|.macro popsinkframe
-| sub SINKFRAME, sizeof(upb_sinkframe)
-| mov rax, DECODER->sink
-| mov SINK:rax->top, SINKFRAME
-| mov CLOSURE, SINKFRAME->closure
-|.endmacro
-|
-|// Push a stack frame (not the CPU stack, the upb_pbdecoder stack).
-|.macro pushframe, handlers, field, end_offset_, endtype
-|// Decoder Frame.
-| lea rax, [FRAME + sizeof(frame)] // rax for short addressing
-| cmp rax, DECODER->limit
-| jae ->exit_jit // Frame stack overflow.
-| mov64 r10, (uintptr_t)field
-| mov FRAME:rax->f, r10
-| mov qword FRAME:rax->end_ofs, end_offset_
-| mov byte FRAME:rax->is_sequence, (endtype == UPB_HANDLER_ENDSEQ)
-| mov byte FRAME:rax->is_packed, 0
-|| if (upb_fielddef_istagdelim(field) && endtype == UPB_HANDLER_ENDSUBMSG) {
-| mov dword FRAME:rax->group_fieldnum, upb_fielddef_number(field)
-|| } else {
-| mov dword FRAME:rax->group_fieldnum, 0xffffffff
-|| }
-| mov DECODER->top, rax
-| mov FRAME, rax
-| pushsinkframe handlers, field, endtype
-|.endmacro
-|
-|.macro popframe
-| sub FRAME, sizeof(frame)
-| mov DECODER->top, FRAME
-| popsinkframe
-| setmsgend
-|.endmacro
-|
-|.macro setmsgend
-| mov rsi, DECODER->jit_end
-| mov rax, qword FRAME->end_ofs // Will be UINT64_MAX for groups.
-| sub rax, qword DECODER->bufstart_ofs
-| add rax, qword DECODER->buf // rax = d->buf + f->end_ofs - d->bufstart_ofs
-| jc >8 // If the addition overflowed, use jit_end
-| cmp rax, rsi
-| ja >8 // If jit_end is less, use jit_end
-| mov rsi, rax // Use frame end.
-|8:
-| mov DECODER->effective_end, rsi
-|.endmacro
-|
-|// rcx contains the tag, compare it against "tag", but since it is a varint
-|// we must only compare as many bytes as actually have data.
-|.macro checktag, tag
-|| switch (upb_value_size(tag)) {
-|| case 1:
-| cmp cl, tag
-|| break;
-|| case 2:
-| cmp cx, tag
-|| break;
-|| case 3:
-| and ecx, 0xffffff // 3 bytes
-| cmp rcx, tag
-|| case 4:
-| cmp ecx, tag
-|| break;
-|| case 5:
-| mov64 rdx, 0xffffffffff // 5 bytes
-| and rcx, rdx
-| cmp rcx, tag
-|| break;
-|| default: abort();
-|| }
-|.endmacro
-|
-|.macro sethas, reg, hasbit
-|| if (hasbit >= 0) {
-| or byte [reg + ((uint32_t)hasbit / 8)], (1 << ((uint32_t)hasbit % 8))
-|| }
-|.endmacro
-
-
-#include <stdlib.h>
-#include "upb/pb/varint.h"
-
-static upb_func *gethandler(const upb_handlers *h, const upb_fielddef *f,
- upb_handlertype_t type) {
- return upb_handlers_gethandler(h, getselector(f, type));
-}
-
-static uintptr_t gethandlerdata(const upb_handlers *h, const upb_fielddef *f,
- upb_handlertype_t type) {
- return (uintptr_t)upb_handlers_gethandlerdata(h, getselector(f, type));
-}
-
-static void asmlabel(decoderplan *plan, const char *fmt, ...) {
- va_list ap;
- va_start(ap, fmt);
- char *str = NULL;
- size_t size = 0;
- upb_vrprintf(&str, &size, 0, fmt, ap);
- va_end(ap);
- uint32_t label = plan->pclabel_count++;
- dasm_growpc(plan, plan->pclabel_count);
- |=>label:
- upb_inttable_insert(&plan->asmlabels, label, upb_value_ptr(str));
-}
-
-// Decodes the next val into ARG2, advances PTR.
-static void upb_decoderplan_jit_decodefield(decoderplan *plan,
- size_t tag_size,
- const upb_handlers *h,
- const upb_fielddef *f) {
- // Decode the value into arg 3 for the callback.
- asmlabel(plan, "UPB_MCODE_DECODE_FIELD_%s.%s",
- upb_msgdef_fullname(upb_handlers_msgdef(h)),
- upb_fielddef_name(f));
- switch (upb_fielddef_descriptortype(f)) {
- case UPB_DESCRIPTOR_TYPE_DOUBLE:
- | movsd XMMARG1, qword [PTR + tag_size]
- | add PTR, 8 + tag_size
- break;
-
- case UPB_DESCRIPTOR_TYPE_FIXED64:
- case UPB_DESCRIPTOR_TYPE_SFIXED64:
- | mov ARG2_64, qword [PTR + tag_size]
- | add PTR, 8 + tag_size
- break;
-
- case UPB_DESCRIPTOR_TYPE_FLOAT:
- | movss XMMARG1, dword [PTR + tag_size]
- | add PTR, 4 + tag_size
- break;
-
- case UPB_DESCRIPTOR_TYPE_FIXED32:
- case UPB_DESCRIPTOR_TYPE_SFIXED32:
- | mov ARG2_32, dword [PTR + tag_size]
- | add PTR, 4 + tag_size
- break;
-
- case UPB_DESCRIPTOR_TYPE_BOOL:
- // Can't assume it's one byte long, because bool must be wire-compatible
- // with all of the varint integer types.
- | decode_varint tag_size
- | test ARG2_64, ARG2_64
- | setne al
- | movzx ARG2_32, al
- break;
-
- case UPB_DESCRIPTOR_TYPE_INT64:
- case UPB_DESCRIPTOR_TYPE_UINT64:
- case UPB_DESCRIPTOR_TYPE_INT32:
- case UPB_DESCRIPTOR_TYPE_UINT32:
- case UPB_DESCRIPTOR_TYPE_ENUM:
- | decode_varint tag_size
- break;
-
- case UPB_DESCRIPTOR_TYPE_SINT64:
- // 64-bit zig-zag decoding.
- | decode_varint tag_size
- | mov rax, ARG2_64
- | shr ARG2_64, 1
- | and rax, 1
- | neg rax
- | xor ARG2_64, rax
- break;
-
- case UPB_DESCRIPTOR_TYPE_SINT32:
- // 32-bit zig-zag decoding.
- | decode_varint tag_size
- | mov eax, ARG2_32
- | shr ARG2_32, 1
- | and eax, 1
- | neg eax
- | xor ARG2_32, eax
- break;
-
- case UPB_DESCRIPTOR_TYPE_STRING:
- case UPB_DESCRIPTOR_TYPE_BYTES: {
- // We only handle the case where the entire string is in our current
- // buf, which sidesteps any security problems. The C path has more
- // robust checks.
- | mov ecx, dword [PTR + tag_size]
- | decode_loaded_varint tag_size
- | mov rdi, DECODER->end
- | sub rdi, rax
- | cmp ARG2_64, rdi // if (len > d->end - str)
- | ja ->exit_jit // Can't deliver, whole string not in buf.
- | mov PTR, rax
-
- upb_func *handler = gethandler(h, f, UPB_HANDLER_STARTSTR);
- if (handler) {
- // void* startstr(void *c, const void *hd, size_t hint)
- | mov DECODER->tmp_len, ARG2_32
- | mov ARG1_64, CLOSURE
- | mov ARG3_64, ARG2_64
- | load_handler_data h, f, UPB_HANDLER_STARTSTR
- | callp handler
- | check_ptr_ret
- | mov ARG1_64, rax // sub-closure
- | mov ARG4_32, DECODER->tmp_len
- } else {
- | mov ARG1_64, CLOSURE
- | mov ARG4_64, ARG2_64
- }
-
- handler = gethandler(h, f, UPB_HANDLER_STRING);
- if (handler) {
- // size_t str(void *c, const void *hd, const char *buf, size_t len)
- | load_handler_data h, f, UPB_HANDLER_STRING
- | mov ARG3_64, PTR
- | callp handler
- // TODO: properly handle returns other than "n" (the whole string).
- | add PTR, rax
- } else {
- | add PTR, ARG4_64
- }
-
- handler = gethandler(h, f, UPB_HANDLER_ENDSTR);
- if (handler) {
- // bool endstr(const upb_sinkframe *frame);
- | mov ARG1_64, CLOSURE
- | load_handler_data h, f, UPB_HANDLER_ENDSTR
- | callp handler
- | check_bool_ret
- }
- break;
- }
-
- // Will dispatch callbacks and call submessage in a second.
- case UPB_DESCRIPTOR_TYPE_MESSAGE:
- | decode_varint tag_size
- break;
- case UPB_DESCRIPTOR_TYPE_GROUP:
- | add PTR, tag_size
- break;
-
- default: abort();
- }
-}
-
-static void upb_decoderplan_jit_callcb(decoderplan *plan,
- const upb_handlers *h,
- const upb_fielddef *f) {
- // Call callbacks. Specializing the append accessors didn't yield a speed
- // increase in benchmarks.
- asmlabel(plan, "UPB_MCODE_CALLCB_%s.%s",
- upb_msgdef_fullname(upb_handlers_msgdef(h)),
- upb_fielddef_name(f));
- if (upb_fielddef_issubmsg(f)) {
- // Call startsubmsg handler (if any).
- upb_func *startsubmsg = gethandler(h, f, UPB_HANDLER_STARTSUBMSG);
- if (startsubmsg) {
- // upb_sflow_t startsubmsg(const upb_sinkframe *frame)
- | mov DECODER->tmp_len, ARG2_32
- | mov ARG1_64, CLOSURE
- | load_handler_data h, f, UPB_HANDLER_STARTSUBMSG
- | callp startsubmsg
- | check_ptr_ret
- | mov CLOSURE, rax
- }
-
- const upb_handlers *sub_h = upb_handlers_getsubhandlers(h, f);
- if (sub_h) {
- if (upb_fielddef_istagdelim(f)) {
- | mov rdx, UPB_NONDELIMITED
- } else {
- | mov esi, DECODER->tmp_len
- | mov rdx, PTR
- | sub rdx, DECODER->buf
- | add rdx, DECODER->bufstart_ofs
- | add rdx, rsi // = d->bufstart_ofs + (d->ptr - d->buf) + delim_len
- }
- | pushframe sub_h, f, rdx, UPB_HANDLER_ENDSUBMSG
- | call =>upb_getpclabel(plan, sub_h, STARTMSG)
- | popframe
- } else {
- if (upb_fielddef_istagdelim(f)) {
- // Groups with no handlers not supported yet.
- assert(false);
- } else {
- | mov esi, DECODER->tmp_len
- | add PTR, rsi
- }
- }
-
- // Call endsubmsg handler (if any).
- upb_func *endsubmsg = gethandler(h, f, UPB_HANDLER_ENDSUBMSG);
- if (endsubmsg) {
- // upb_flow_t endsubmsg(void *closure, upb_value fval);
- | mov ARG1_64, CLOSURE
- | load_handler_data h, f, UPB_HANDLER_ENDSUBMSG
- | callp endsubmsg
- | check_bool_ret
- }
- } else if (!upb_fielddef_isstring(f)) {
- upb_handlertype_t handlertype = upb_handlers_getprimitivehandlertype(f);
- upb_selector_t sel = getselector(f, handlertype);
- upb_func *handler = gethandler(h, f, handlertype);
- const upb_shim_data *data = upb_shim_getdata(h, sel);
- if (data) {
- switch (upb_fielddef_type(f)) {
- case UPB_TYPE_INT64:
- case UPB_TYPE_UINT64:
- | mov [CLOSURE + data->offset], ARG2_64
- break;
- case UPB_TYPE_INT32:
- case UPB_TYPE_UINT32:
- case UPB_TYPE_ENUM:
- | mov [CLOSURE + data->offset], ARG2_32
- break;
- case UPB_TYPE_DOUBLE:
- | movsd qword [CLOSURE + data->offset], XMMARG1
- break;
- case UPB_TYPE_FLOAT:
- | movss dword [CLOSURE + data->offset], XMMARG1
- break;
- case UPB_TYPE_BOOL:
- | mov [CLOSURE + data->offset], ARG2_8
- break;
- case UPB_TYPE_STRING:
- case UPB_TYPE_BYTES:
- case UPB_TYPE_MESSAGE:
- assert(false); break;
- }
- | sethas CLOSURE, data->hasbit
- } else if (handler) {
- // bool value(const upb_sinkframe* frame, ctype val)
- | mov ARG1_64, CLOSURE
- | mov ARG3_64, ARG2_64
- | load_handler_data h, f, handlertype
- | callp handler
- | check_bool_ret
- }
- }
-}
-
-static uint64_t upb_get_encoded_tag(const upb_fielddef *f) {
- uint32_t tag = (upb_fielddef_number(f) << 3) |
- upb_decoder_types[upb_fielddef_descriptortype(f)].native_wire_type;
- uint64_t encoded_tag = upb_vencode32(tag);
- // No tag should be greater than 5 bytes.
- assert(encoded_tag <= 0xffffffffff);
- return encoded_tag;
-}
-
-static void upb_decoderplan_jit_endseq(decoderplan *plan,
- const upb_handlers *h,
- const upb_fielddef *f) {
- | popframe
- upb_func *endseq = gethandler(h, f, UPB_HANDLER_ENDSEQ);
- if (endseq) {
- | mov ARG1_64, CLOSURE
- | load_handler_data h, f, UPB_HANDLER_ENDSEQ
- | callp endseq
- }
-}
-
-// PTR should point to the beginning of the tag.
-static void upb_decoderplan_jit_field(decoderplan *plan,
- const upb_handlers *h,
- const upb_fielddef *f,
- const upb_fielddef *next_f) {
- asmlabel(plan, "UPB_MCODE_FIELD_%s.%s",
- upb_msgdef_fullname(upb_handlers_msgdef(h)),
- upb_fielddef_name(f));
- uint64_t tag = upb_get_encoded_tag(f);
- uint64_t next_tag = next_f ? upb_get_encoded_tag(next_f) : 0;
- int tag_size = upb_value_size(tag);
-
- // PC-label for the dispatch table.
- // We check the wire type (which must be loaded in edi) because the
- // table is keyed on field number, not type.
- |=>upb_getpclabel(plan, f, FIELD):
- | cmp esi, (tag & 0x7)
- | jne ->exit_jit // In the future: could be an unknown field or packed.
- |=>upb_getpclabel(plan, f, FIELD_NO_TYPECHECK):
- if (upb_fielddef_isseq(f)) {
- upb_func *startseq = gethandler(h, f, UPB_HANDLER_STARTSEQ);
- if (startseq) {
- | mov ARG1_64, CLOSURE
- | load_handler_data h, f, UPB_HANDLER_STARTSEQ
- | callp startseq
- | check_ptr_ret
- | mov CLOSURE, rax
- }
- | mov rsi, FRAME->end_ofs
- | pushframe h, f, rsi, UPB_HANDLER_ENDSEQ
- }
-
- |1: // Label for repeating this field.
-
- upb_decoderplan_jit_decodefield(plan, tag_size, h, f);
- upb_decoderplan_jit_callcb(plan, h, f);
-
- // This is kind of gross; future redesign should take into account how to
- // make this work nicely. The difficult part is that the sequence can be
- // broken either by end-of-message or by seeing a different field; in both
- // cases we need to call the endseq handler, but what we do after that
- // depends on which case triggered the end-of-sequence.
- | mov DECODER->ptr, PTR
- | cmp PTR, DECODER->jit_end
- | jae ->exit_jit
- | cmp PTR, DECODER->effective_end
- | jb >2
- if (upb_fielddef_isseq(f)) {
- upb_decoderplan_jit_endseq(plan, h, f);
- }
- | jmp =>upb_getpclabel(plan, h, ENDOFMSG)
- |2:
- | mov rcx, qword [PTR]
- if (upb_fielddef_isseq(f)) {
- | checktag tag
- | je <1
- upb_decoderplan_jit_endseq(plan, h, f);
- // Load next tag again (popframe/endseq clobbered it).
- | mov rcx, qword [PTR]
- }
-
- if (next_tag != 0) {
- | checktag next_tag
- | je =>upb_getpclabel(plan, next_f, FIELD_NO_TYPECHECK)
- }
-
- // Fall back to dynamic dispatch.
- | dyndispatch h
-}
-
-static int upb_compare_uint32(const void *a, const void *b) {
- return *(uint32_t*)a - *(uint32_t*)b;
-}
-
-static void upb_decoderplan_jit_msg(decoderplan *plan,
- const upb_handlers *h) {
- asmlabel(plan, "UPB_MCODE_DECODEMSG_%s",
- upb_msgdef_fullname(upb_handlers_msgdef(h)));
- |=>upb_getpclabel(plan, h, AFTER_STARTMSG):
- | push rbp
- | mov rbp, rsp
- | jmp >1
-
- |=>upb_getpclabel(plan, h, STARTMSG):
- | push rbp
- | mov rbp, rsp
-
- // Call startmsg handler (if any):
- upb_func *startmsg = upb_handlers_gethandler(h, UPB_STARTMSG_SELECTOR);
- if (startmsg) {
- // upb_flow_t startmsg(void *closure, const void *hd);
- | mov ARG1_64, CLOSURE
- | loadarg2 upb_handlers_gethandlerdata(h, UPB_STARTMSG_SELECTOR)
- | callp startmsg
- | check_bool_ret
- }
-
- |1:
- | setmsgend
- | checkpoint h
- | mov ecx, dword [PTR]
- | dyndispatch_ h
-
- // --------- New code section (does not fall through) ------------------------
-
- // Emit code for parsing each field (dynamic dispatch contains pointers to
- // all of these).
-
- // Create an ordering over the fields in field number order.
- // Parsing will theoretically be fastest if we emit code in the same
- // order as field numbers are seen on-the-wire because of an optimization
- // in the generated code that skips dynamic dispatch if the next field is
- // as expected.
- const upb_msgdef *md = upb_handlers_msgdef(h);
- int num_keys = upb_msgdef_numfields(md);
- uint32_t *keys = malloc(num_keys * sizeof(*keys));
- int idx = 0;
- upb_msg_iter i;
- for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
- keys[idx++] = upb_fielddef_number(upb_msg_iter_field(&i));
- }
- qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
-
- for(int i = 0; i < num_keys; i++) {
- const upb_fielddef *f = upb_msgdef_itof(md, keys[i]);
- const upb_fielddef *next_f =
- (i + 1 < num_keys) ? upb_msgdef_itof(md, keys[i + 1]) : NULL;
- upb_decoderplan_jit_field(plan, h, f, next_f);
- }
-
- free(keys);
-
- // --------- New code section (does not fall through) ------------------------
-
- // End-of-buf / end-of-message.
- // We hit a buffer limit; either we hit jit_end or end-of-submessage.
- |=>upb_getpclabel(plan, h, ENDOFBUF):
- | cmp PTR, DECODER->jit_end
- | jae ->exit_jit
-
- |=>upb_getpclabel(plan, h, ENDOFMSG):
- // We are at end-of-submsg: call endmsg handler (if any):
- upb_func *endmsg = upb_handlers_gethandler(h, UPB_ENDMSG_SELECTOR);
- if (endmsg) {
- // void endmsg(void *closure, const void *hd, upb_status *status) {
- | mov ARG1_64, CLOSURE
- | loadarg2 upb_handlers_gethandlerdata(h, UPB_ENDMSG_SELECTOR)
- | mov ARG3_64, DECODER->sink
- | mov ARG3_64, SINK:ARG3_64->pipeline_
- | add ARG3_64, offsetof(upb_pipeline, status_)
- | callp endmsg
- }
-
- | leave
- | ret
-}
-
-static void upb_decoderplan_jit(decoderplan *plan) {
- // The JIT prologue/epilogue trampoline that is generated in this function
- // does not depend on the handlers, so it will never vary. Ideally we would
- // put it in an object file and just link it into upb so we could have only a
- // single copy of it instead of one copy for each decoderplan. But our
- // options for doing that are undesirable: GCC inline assembly is
- // complicated, not portable to other compilers, and comes with subtle
- // caveats about incorrect things what the optimizer might do if you eg.
- // execute non-local jumps. Putting this code in a .s file would force us to
- // calculate the structure offsets ourself instead of symbolically
- // (ie. [r15 + 0xcd] instead of DECODER->ptr). So we tolerate a bit of
- // unnecessary duplication/redundancy.
- asmlabel(plan, "upb_jit_trampoline");
- | push rbp
- | mov rbp, rsp
- | push r15
- | push r14
- | push r13
- | push r12
- | push rbx
- // Align stack.
- | sub rsp, 8
- | mov DECODER, ARG1_64
- | mov DECODER->saved_rbp, rbp
- | mov FRAME, DECODER:ARG1_64->top
- | mov rax, DECODER:ARG1_64->sink
- | mov SINKFRAME, SINK:rax->top
- | mov CLOSURE, SINKFRAME->closure
- | mov PTR, DECODER->ptr
-
- // TODO: push return addresses for re-entry (will be necessary for multiple
- // buffer support).
- | call ARG2_64
- asmlabel(plan, "exitjit");
- |->exit_jit:
- | mov rbp, DECODER->saved_rbp
- | lea rsp, [rbp - 48]
- // Counter previous alignment.
- | add rsp, 8
- | pop rbx
- | pop r12
- | pop r13
- | pop r14
- | pop r15
- | leave
- | ret
-
- upb_inttable_iter i;
- upb_inttable_begin(&i, &plan->msginfo);
- for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
- const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i);
- upb_decoderplan_jit_msg(plan, h);
- }
-}
-
-static void upb_decoderplan_jit_assignpclabels(decoderplan *plan,
- const upb_handlers *h) {
- // Limit the DFS.
- if (upb_inttable_lookupptr(&plan->pclabels, h, NULL)) return;
-
- upb_inttable_insertptr(&plan->pclabels, h,
- upb_value_uint32(plan->pclabel_count));
- plan->pclabel_count += TOTAL_MSG_PCLABELS;
-
- upb_jitmsginfo *info = malloc(sizeof(*info));
- info->max_field_number = 0;
- upb_inttable_insertptr(&plan->msginfo, h, upb_value_ptr(info));
-
- upb_msg_iter i;
- upb_msg_begin(&i, upb_handlers_msgdef(h));
- for(; !upb_msg_done(&i); upb_msg_next(&i)) {
- const upb_fielddef *f = upb_msg_iter_field(&i);
- info->max_field_number =
- UPB_MAX(info->max_field_number, upb_fielddef_number(f));
- upb_inttable_insertptr(&plan->pclabels, f,
- upb_value_uint32(plan->pclabel_count));
- plan->pclabel_count += TOTAL_FIELD_PCLABELS;
-
- // Discover the whole graph of handlers depth-first. We will probably
- // revise this later to be more explicit about the list of handlers that
- // the plan should include.
- if (upb_fielddef_issubmsg(f)) {
- const upb_handlers *subh = upb_handlers_getsubhandlers(h, f);
- if (subh) upb_decoderplan_jit_assignpclabels(plan, subh);
- }
- }
- // TODO: support large field numbers by either using a hash table or
- // generating code for a binary search. For now large field numbers
- // will just fall back to the table decoder.
- info->max_field_number = UPB_MIN(info->max_field_number, 16000);
- info->tablearray = malloc((info->max_field_number + 1) * sizeof(void*));
-}
-
-static void upb_decoderplan_makejit(decoderplan *plan) {
- upb_inttable_init(&plan->msginfo, UPB_CTYPE_PTR);
- plan->debug_info = NULL;
-
- // Assign pclabels.
- plan->pclabel_count = 0;
- upb_inttable_init(&plan->pclabels, UPB_CTYPE_UINT32);
- upb_decoderplan_jit_assignpclabels(plan, plan->dest_handlers);
-
- upb_inttable_init(&plan->asmlabels, UPB_CTYPE_PTR);
-
- void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals));
- dasm_init(plan, 1);
- dasm_setupglobal(plan, globals, UPB_JIT_GLOBAL__MAX);
- dasm_growpc(plan, plan->pclabel_count);
- dasm_setup(plan, upb_jit_actionlist);
-
- upb_decoderplan_jit(plan);
-
- int dasm_status = dasm_link(plan, &plan->jit_size);
- (void)dasm_status;
- assert(dasm_status == DASM_S_OK);
-
- plan->jit_code = mmap(NULL, plan->jit_size, PROT_READ | PROT_WRITE,
- MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
-
- upb_reg_jit_gdb(plan);
-
- dasm_encode(plan, plan->jit_code);
-
- // Create dispatch tables.
- upb_inttable_iter i;
- upb_inttable_begin(&i, &plan->msginfo);
- for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
- const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i);
- upb_jitmsginfo *mi = upb_getmsginfo(plan, h);
- // We jump to after the startmsg handler since it is called before entering
- // the JIT (either by upb_pbdecoder or by a previous call to the JIT).
- mi->jit_func = plan->jit_code +
- dasm_getpclabel(plan, upb_getpclabel(plan, h, AFTER_STARTMSG));
- for (uint32_t j = 0; j <= mi->max_field_number; j++) {
- const upb_fielddef *f = upb_msgdef_itof(upb_handlers_msgdef(h), j);
- if (f) {
- mi->tablearray[j] = plan->jit_code +
- dasm_getpclabel(plan, upb_getpclabel(plan, f, FIELD));
- } else {
- // TODO: extend the JIT to handle unknown fields.
- // For the moment we exit the JIT for any unknown field.
- mi->tablearray[j] = globals[UPB_JIT_GLOBAL_exit_jit];
- }
- }
- }
-
- upb_inttable_uninit(&plan->pclabels);
-
- mprotect(plan->jit_code, plan->jit_size, PROT_EXEC | PROT_READ);
-
-#ifndef NDEBUG
- // Dump to a .o file in /tmp, for easy inspection.
-
- // Convert all asm labels from pclabel offsets to machine code offsets.
- upb_inttable mclabels;
- upb_inttable_init(&mclabels, UPB_CTYPE_PTR);
- upb_inttable_begin(&i, &plan->asmlabels);
- for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
- upb_inttable_insert(
- &mclabels,
- dasm_getpclabel(plan, upb_inttable_iter_key(&i)),
- upb_inttable_iter_value(&i));
- }
-
- FILE *f = fopen("/tmp/upb-jit-code.s", "w");
- if (f) {
- fputs(" .text", f);
- size_t linelen = 0;
- for (size_t i = 0; i < plan->jit_size; i++) {
- upb_value v;
- if (upb_inttable_lookup(&mclabels, i, &v)) {
- const char *label = upb_value_getptr(v);
- fprintf(f, "\n\n_%s:\n", label);
- fprintf(f, " .globl _%s", label);
- linelen = 1000;
- }
- if (linelen >= 77) {
- linelen = fprintf(f, "\n .byte %u", plan->jit_code[i]);
- } else {
- linelen += fprintf(f, ",%u", plan->jit_code[i]);
- }
- }
- fputs("\n", f);
- fclose(f);
- } else {
- fprintf(stderr, "Couldn't open /tmp/upb-jit-code.s for writing/\n");
- }
-
- upb_inttable_uninit(&mclabels);
-#endif
-
- upb_inttable_begin(&i, &plan->asmlabels);
- for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
- free(upb_value_getptr(upb_inttable_iter_value(&i)));
- }
- upb_inttable_uninit(&plan->asmlabels);
-
- dasm_free(plan);
- free(globals);
-}
-
-static void upb_decoderplan_freejit(decoderplan *plan) {
- upb_inttable_iter i;
- upb_inttable_begin(&i, &plan->msginfo);
- for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
- upb_jitmsginfo *mi = upb_value_getptr(upb_inttable_iter_value(&i));
- free(mi->tablearray);
- free(mi);
- }
- upb_inttable_uninit(&plan->msginfo);
- munmap(plan->jit_code, plan->jit_size);
- free(plan->debug_info);
- // TODO: unregister
-}
-
-static void upb_decoder_enterjit(upb_pbdecoder *d, const decoderplan *plan) {
- if (plan->jit_code &&
- d->top == d->stack &&
- d->sink->top == d->sink->stack &&
- d->ptr && d->ptr < d->jit_end) {
-#ifndef NDEBUG
- register uint64_t rbx asm ("rbx") = 11;
- register uint64_t r12 asm ("r12") = 12;
- register uint64_t r13 asm ("r13") = 13;
- register uint64_t r14 asm ("r14") = 14;
- register uint64_t r15 asm ("r15") = 15;
-#endif
- // Decodes as many fields as possible, updating d->ptr appropriately,
- // before falling through to the slow(er) path.
- void (*upb_jit_decode)(upb_pbdecoder *d, void*) = (void*)plan->jit_code;
- upb_jitmsginfo *mi = upb_getmsginfo(plan, plan->dest_handlers);
- assert(mi);
- upb_jit_decode(d, mi->jit_func);
- assert(d->ptr <= d->end);
-
- // Test that callee-save registers were properly restored.
- assert(rbx == 11);
- assert(r12 == 12);
- assert(r13 == 13);
- assert(r14 == 14);
- assert(r15 == 15);
- }
-}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback