diff options
Diffstat (limited to 'upb/pb/decoder_x64.dasc')
-rw-r--r-- | upb/pb/decoder_x64.dasc | 1086 |
1 files changed, 0 insertions, 1086 deletions
diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc deleted file mode 100644 index dee063a..0000000 --- a/upb/pb/decoder_x64.dasc +++ /dev/null @@ -1,1086 +0,0 @@ -|// -|// upb - a minimalist implementation of protocol buffers. -|// -|// Copyright (c) 2011 Google Inc. See LICENSE for details. -|// Author: Josh Haberman <jhaberman@gmail.com> -|// -|// JIT compiler for upb_pbdecoder on x86. Given a decoderplan object (which -|// contains an embedded set of upb_handlers), generates code specialized to -|// parsing the specific message and calling specific handlers. -|// -|// Since the JIT can call other functions (the JIT'ted code is not a leaf -|// function) we must respect alignment rules. All x86-64 systems require -|// 16-byte stack alignment. - -#define _GNU_SOURCE -#include <stdio.h> -#include <sys/mman.h> -#include "dynasm/dasm_x86.h" -#include "upb/shim/shim.h" - -#ifndef MAP_ANONYMOUS -# define MAP_ANONYMOUS MAP_ANON -#endif - -// We map into the low 32 bits when we can, but if this is not available -// (like on OS X) we take what we can get. It's not required for correctness, -// it's just a performance thing that makes it more likely that our jumps -// can be rel32 (i.e. within 32-bits of our pc) instead of the longer -// sequence required for other jumps (see callp). -#ifndef MAP_32BIT -#define MAP_32BIT 0 -#endif - -// These are used to track jump targets for messages and fields. -enum { - STARTMSG = 0, - AFTER_STARTMSG = 1, - ENDOFBUF = 2, - ENDOFMSG = 3, - DYNDISPATCH = 4, - TOTAL_MSG_PCLABELS = 5, -}; - -enum { - FIELD = 0, - FIELD_NO_TYPECHECK = 1, - TOTAL_FIELD_PCLABELS = 2, -}; - -typedef struct { - uint32_t max_field_number; - // Currently keyed on field number. Could also try keying it - // on encoded or decoded tag, or on encoded field number. - void **tablearray; - // Pointer to the JIT code for parsing this message. - void *jit_func; -} upb_jitmsginfo; - -static uint32_t upb_getpclabel(decoderplan *plan, const void *obj, int n) { - upb_value v; - bool found = upb_inttable_lookupptr(&plan->pclabels, obj, &v); - UPB_ASSERT_VAR(found, found); - return upb_value_getuint32(v) + n; -} - -static upb_jitmsginfo *upb_getmsginfo(const decoderplan *plan, - const upb_handlers *h) { - upb_value v; - bool found = upb_inttable_lookupptr(&plan->msginfo, h, &v); - UPB_ASSERT_VAR(found, found); - return upb_value_getptr(v); -} - -// To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code -// at runtime. GDB 7.x+ has defined an interface for doing this, and these -// structure/function defintions are copied out of gdb/jit.h -// -// We need to give GDB an ELF file at runtime describing the symbols we have -// generated. To avoid implementing the ELF format, we generate an ELF file -// at compile-time and compile it in as a character string. We can replace -// a few key constants (address of JIT-ted function and its size) by looking -// for a few magic numbers and doing a dumb string replacement. - -#ifndef __APPLE__ -const unsigned char upb_jit_debug_elf_file[] = { -#include "upb/pb/jit_debug_elf_file.h" -}; - -typedef enum -{ - GDB_JIT_NOACTION = 0, - GDB_JIT_REGISTER, - GDB_JIT_UNREGISTER -} jit_actions_t; - -typedef struct gdb_jit_entry { - struct gdb_jit_entry *next_entry; - struct gdb_jit_entry *prev_entry; - const char *symfile_addr; - uint64_t symfile_size; -} gdb_jit_entry; - -typedef struct { - uint32_t version; - uint32_t action_flag; - gdb_jit_entry *relevant_entry; - gdb_jit_entry *first_entry; -} gdb_jit_descriptor; - -gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL}; - -void __attribute__((noinline)) __jit_debug_register_code() { - __asm__ __volatile__(""); -} - -void upb_reg_jit_gdb(decoderplan *plan) { - // Create debug info. - size_t elf_len = sizeof(upb_jit_debug_elf_file); - plan->debug_info = malloc(elf_len); - memcpy(plan->debug_info, upb_jit_debug_elf_file, elf_len); - uint64_t *p = (void*)plan->debug_info; - for (; (void*)(p+1) <= (void*)plan->debug_info + elf_len; ++p) { - if (*p == 0x12345678) { *p = (uintptr_t)plan->jit_code; } - if (*p == 0x321) { *p = plan->jit_size; } - } - - // Register the JIT-ted code with GDB. - gdb_jit_entry *e = malloc(sizeof(gdb_jit_entry)); - e->next_entry = __jit_debug_descriptor.first_entry; - e->prev_entry = NULL; - if (e->next_entry) e->next_entry->prev_entry = e; - e->symfile_addr = plan->debug_info; - e->symfile_size = elf_len; - __jit_debug_descriptor.first_entry = e; - __jit_debug_descriptor.relevant_entry = e; - __jit_debug_descriptor.action_flag = GDB_JIT_REGISTER; - __jit_debug_register_code(); -} - -#else - -void upb_reg_jit_gdb(decoderplan *plan) { - (void)plan; -} - -#endif - -// Has to be a separate function, otherwise GCC will complain about -// expressions like (&foo != NULL) because they will never evaluate -// to false. -static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } - -|.arch x64 -|.actionlist upb_jit_actionlist -|.globals UPB_JIT_GLOBAL_ -|.globalnames upb_jit_globalnames -| -|// Calling conventions. Note -- this will need to be changed for -|// Windows, which uses a different calling convention! -|.define ARG1_64, rdi -|.define ARG2_8, r6b // DynASM's equivalent to "sil" -- low byte of esi. -|.define ARG2_32, esi -|.define ARG2_64, rsi -|.define ARG3_32, edx -|.define ARG3_64, rdx -|.define ARG4_32, ecx -|.define ARG4_64, rcx -|.define XMMARG1, xmm0 - -| -|// Register allocation / type map. -|// ALL of the code in this file uses these register allocations. -|// When we "call" within this file, we do not use regular calling -|// conventions, but of course when calling to user callbacks we must. -|.define PTR, rbx // Writing this to DECODER->ptr commits our progress. -|.define CLOSURE, r12 -|.type SINKFRAME, upb_sinkframe, r13 -|.type FRAME, frame, r14 -|.type DECODER, upb_pbdecoder, r15 -|.type SINK, upb_sink -| -|.macro callp, addr -|| upb_assert_notnull(addr); -|// TODO(haberman): fix this. I believe the predicate we should actually be -|// testing is whether the jump distance is greater than INT32_MAX, not the -|// absolute address of the target. -|| if ((uintptr_t)addr < 0xffffffff) { - | call &addr -|| } else { - | mov64 rax, (uintptr_t)addr - | call rax -|| } -|.endmacro -| -|.macro loadarg2, val -||{ -|| uintptr_t data = (uintptr_t)val; -|| if (data > 0xffffffff) { -| mov64 ARG2_64, data -|| } else if (data) { -| mov ARG2_32, data -|| } else { -| xor ARG2_32, ARG2_32 -|| } -|| } -|.endmacro -| -|.macro load_handler_data, h, f, type -| loadarg2 gethandlerdata(h, f, type) -|.endmacro -| -|// Checkpoints our progress by writing PTR to DECODER, and -|// checks for end-of-buffer. -|.macro checkpoint, h -| mov DECODER->ptr, PTR -| cmp PTR, DECODER->effective_end -| jae =>upb_getpclabel(plan, h, ENDOFBUF) -|.endmacro -| -|.macro check_bool_ret -| test al, al -| jz ->exit_jit -|.endmacro -| -|.macro check_ptr_ret -| test rax, rax -| jz ->exit_jit -|.endmacro -| -|// Decodes varint into ARG2. -|// Inputs: -|// - ecx: first 4 bytes of varint -|// - offset: offset from PTR where varint begins -|// Outputs: -|// - ARG2: contains decoded varint -|// - rax: new PTR -|.macro decode_loaded_varint, offset -| // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder. -| lea rax, [PTR + offset + 1] -| mov ARG2_32, ecx -| and ARG2_32, 0x7f -| test cl, cl -| jns >9 -| lea rax, [PTR + offset + 2] -| movzx edx, ch -| and edx, 0x7f -| shl edx, 7 -| or ARG2_32, edx -| test cx, cx -| jns >9 -| mov ARG1_64, rax -|// XXX: I don't think this handles 64-bit values correctly. -|// Test with UINT64_MAX -| callp upb_vdecode_max8_fast -|// rax return from function will contain new pointer -| mov ARG2_64, rdx -| check_ptr_ret // Check for unterminated, >10-byte varint. -|9: -|.endmacro -| -|.macro decode_varint, offset -| mov ecx, dword [PTR + offset] -| decode_loaded_varint offset -| mov PTR, rax -|.endmacro -| -|// Table-based field dispatch. -|// Inputs: -|// - ecx: first 4 bytes of tag -|// Outputs: -|// - edx: field number -|// - esi: wire type -|// Could specialize this by avoiding the value masking: could just key the -|// table on the raw (length-masked) varint to save 3-4 cycles of latency. -|// Currently only support tables where all entries are in the array part. -|.macro dyndispatch_, h -|| asmlabel(plan, "_UPB_MCODE_DISPATCH_%s.%d", -|| upb_msgdef_fullname(upb_handlers_msgdef(h)), rand()); -|=>upb_getpclabel(plan, h, DYNDISPATCH): -| decode_loaded_varint, 0 -| mov ecx, esi -| shr ecx, 3 -| and esi, 0x7 // Note: this value is used in the FIELD pclabel below. -| cmp esi, UPB_WIRE_TYPE_END_GROUP -| je >1 -|| upb_jitmsginfo *mi = upb_getmsginfo(plan, h); -| cmp ecx, mi->max_field_number // Bounds-check the field. -| ja ->exit_jit // In the future; could be unknown label -|| if ((uintptr_t)mi->tablearray < 0xffffffff) { -| // TODO: support hybrid array/hash tables. -| mov rax, qword [rcx*8 + mi->tablearray] -|| } else { -| mov64 rax, (uintptr_t)mi->tablearray -| mov rax, qword [rax + rcx*8] -|| } -| jmp rax // Dispatch: unpredictable jump. -|1: -|// End group. -| cmp ecx, FRAME->group_fieldnum -| jne ->exit_jit // Unexpected END_GROUP tag. -| mov PTR, rax // rax came from decode_loaded_varint -| mov DECODER->ptr, PTR -| jmp =>upb_getpclabel(plan, h, ENDOFMSG) -|.endmacro -| -|.if 1 -| // Replicated dispatch: larger code, but better branch prediction. -| .define dyndispatch, dyndispatch_ -|.else -| // Single dispatch: smaller code, could be faster because of reduced -| // icache usage. We keep this around to allow for easy comparison between -| // the two. -| .macro dyndispatch, h -| jmp =>upb_getpclabel(plan, h, DYNDISPATCH) -| .endmacro -|.endif -| -|.macro pushsinkframe, handlers, field, endtype -| mov rax, DECODER->sink -| mov dword SINKFRAME->selector, getselector(field, endtype) -| lea rcx, [SINKFRAME + sizeof(upb_sinkframe)] // rcx for short addressing -| cmp rcx, SINK:rax->limit -| jae ->exit_jit // Frame stack overflow. -| mov64 r9, (uintptr_t)handlers -| mov SINKFRAME:rcx->h, r9 -| mov SINKFRAME:rcx->closure, CLOSURE -| mov SINK:rax->top, rcx -| mov SINKFRAME, rcx -|.endmacro -| -|.macro popsinkframe -| sub SINKFRAME, sizeof(upb_sinkframe) -| mov rax, DECODER->sink -| mov SINK:rax->top, SINKFRAME -| mov CLOSURE, SINKFRAME->closure -|.endmacro -| -|// Push a stack frame (not the CPU stack, the upb_pbdecoder stack). -|.macro pushframe, handlers, field, end_offset_, endtype -|// Decoder Frame. -| lea rax, [FRAME + sizeof(frame)] // rax for short addressing -| cmp rax, DECODER->limit -| jae ->exit_jit // Frame stack overflow. -| mov64 r10, (uintptr_t)field -| mov FRAME:rax->f, r10 -| mov qword FRAME:rax->end_ofs, end_offset_ -| mov byte FRAME:rax->is_sequence, (endtype == UPB_HANDLER_ENDSEQ) -| mov byte FRAME:rax->is_packed, 0 -|| if (upb_fielddef_istagdelim(field) && endtype == UPB_HANDLER_ENDSUBMSG) { -| mov dword FRAME:rax->group_fieldnum, upb_fielddef_number(field) -|| } else { -| mov dword FRAME:rax->group_fieldnum, 0xffffffff -|| } -| mov DECODER->top, rax -| mov FRAME, rax -| pushsinkframe handlers, field, endtype -|.endmacro -| -|.macro popframe -| sub FRAME, sizeof(frame) -| mov DECODER->top, FRAME -| popsinkframe -| setmsgend -|.endmacro -| -|.macro setmsgend -| mov rsi, DECODER->jit_end -| mov rax, qword FRAME->end_ofs // Will be UINT64_MAX for groups. -| sub rax, qword DECODER->bufstart_ofs -| add rax, qword DECODER->buf // rax = d->buf + f->end_ofs - d->bufstart_ofs -| jc >8 // If the addition overflowed, use jit_end -| cmp rax, rsi -| ja >8 // If jit_end is less, use jit_end -| mov rsi, rax // Use frame end. -|8: -| mov DECODER->effective_end, rsi -|.endmacro -| -|// rcx contains the tag, compare it against "tag", but since it is a varint -|// we must only compare as many bytes as actually have data. -|.macro checktag, tag -|| switch (upb_value_size(tag)) { -|| case 1: -| cmp cl, tag -|| break; -|| case 2: -| cmp cx, tag -|| break; -|| case 3: -| and ecx, 0xffffff // 3 bytes -| cmp rcx, tag -|| case 4: -| cmp ecx, tag -|| break; -|| case 5: -| mov64 rdx, 0xffffffffff // 5 bytes -| and rcx, rdx -| cmp rcx, tag -|| break; -|| default: abort(); -|| } -|.endmacro -| -|.macro sethas, reg, hasbit -|| if (hasbit >= 0) { -| or byte [reg + ((uint32_t)hasbit / 8)], (1 << ((uint32_t)hasbit % 8)) -|| } -|.endmacro - - -#include <stdlib.h> -#include "upb/pb/varint.h" - -static upb_func *gethandler(const upb_handlers *h, const upb_fielddef *f, - upb_handlertype_t type) { - return upb_handlers_gethandler(h, getselector(f, type)); -} - -static uintptr_t gethandlerdata(const upb_handlers *h, const upb_fielddef *f, - upb_handlertype_t type) { - return (uintptr_t)upb_handlers_gethandlerdata(h, getselector(f, type)); -} - -static void asmlabel(decoderplan *plan, const char *fmt, ...) { - va_list ap; - va_start(ap, fmt); - char *str = NULL; - size_t size = 0; - upb_vrprintf(&str, &size, 0, fmt, ap); - va_end(ap); - uint32_t label = plan->pclabel_count++; - dasm_growpc(plan, plan->pclabel_count); - |=>label: - upb_inttable_insert(&plan->asmlabels, label, upb_value_ptr(str)); -} - -// Decodes the next val into ARG2, advances PTR. -static void upb_decoderplan_jit_decodefield(decoderplan *plan, - size_t tag_size, - const upb_handlers *h, - const upb_fielddef *f) { - // Decode the value into arg 3 for the callback. - asmlabel(plan, "UPB_MCODE_DECODE_FIELD_%s.%s", - upb_msgdef_fullname(upb_handlers_msgdef(h)), - upb_fielddef_name(f)); - switch (upb_fielddef_descriptortype(f)) { - case UPB_DESCRIPTOR_TYPE_DOUBLE: - | movsd XMMARG1, qword [PTR + tag_size] - | add PTR, 8 + tag_size - break; - - case UPB_DESCRIPTOR_TYPE_FIXED64: - case UPB_DESCRIPTOR_TYPE_SFIXED64: - | mov ARG2_64, qword [PTR + tag_size] - | add PTR, 8 + tag_size - break; - - case UPB_DESCRIPTOR_TYPE_FLOAT: - | movss XMMARG1, dword [PTR + tag_size] - | add PTR, 4 + tag_size - break; - - case UPB_DESCRIPTOR_TYPE_FIXED32: - case UPB_DESCRIPTOR_TYPE_SFIXED32: - | mov ARG2_32, dword [PTR + tag_size] - | add PTR, 4 + tag_size - break; - - case UPB_DESCRIPTOR_TYPE_BOOL: - // Can't assume it's one byte long, because bool must be wire-compatible - // with all of the varint integer types. - | decode_varint tag_size - | test ARG2_64, ARG2_64 - | setne al - | movzx ARG2_32, al - break; - - case UPB_DESCRIPTOR_TYPE_INT64: - case UPB_DESCRIPTOR_TYPE_UINT64: - case UPB_DESCRIPTOR_TYPE_INT32: - case UPB_DESCRIPTOR_TYPE_UINT32: - case UPB_DESCRIPTOR_TYPE_ENUM: - | decode_varint tag_size - break; - - case UPB_DESCRIPTOR_TYPE_SINT64: - // 64-bit zig-zag decoding. - | decode_varint tag_size - | mov rax, ARG2_64 - | shr ARG2_64, 1 - | and rax, 1 - | neg rax - | xor ARG2_64, rax - break; - - case UPB_DESCRIPTOR_TYPE_SINT32: - // 32-bit zig-zag decoding. - | decode_varint tag_size - | mov eax, ARG2_32 - | shr ARG2_32, 1 - | and eax, 1 - | neg eax - | xor ARG2_32, eax - break; - - case UPB_DESCRIPTOR_TYPE_STRING: - case UPB_DESCRIPTOR_TYPE_BYTES: { - // We only handle the case where the entire string is in our current - // buf, which sidesteps any security problems. The C path has more - // robust checks. - | mov ecx, dword [PTR + tag_size] - | decode_loaded_varint tag_size - | mov rdi, DECODER->end - | sub rdi, rax - | cmp ARG2_64, rdi // if (len > d->end - str) - | ja ->exit_jit // Can't deliver, whole string not in buf. - | mov PTR, rax - - upb_func *handler = gethandler(h, f, UPB_HANDLER_STARTSTR); - if (handler) { - // void* startstr(void *c, const void *hd, size_t hint) - | mov DECODER->tmp_len, ARG2_32 - | mov ARG1_64, CLOSURE - | mov ARG3_64, ARG2_64 - | load_handler_data h, f, UPB_HANDLER_STARTSTR - | callp handler - | check_ptr_ret - | mov ARG1_64, rax // sub-closure - | mov ARG4_32, DECODER->tmp_len - } else { - | mov ARG1_64, CLOSURE - | mov ARG4_64, ARG2_64 - } - - handler = gethandler(h, f, UPB_HANDLER_STRING); - if (handler) { - // size_t str(void *c, const void *hd, const char *buf, size_t len) - | load_handler_data h, f, UPB_HANDLER_STRING - | mov ARG3_64, PTR - | callp handler - // TODO: properly handle returns other than "n" (the whole string). - | add PTR, rax - } else { - | add PTR, ARG4_64 - } - - handler = gethandler(h, f, UPB_HANDLER_ENDSTR); - if (handler) { - // bool endstr(const upb_sinkframe *frame); - | mov ARG1_64, CLOSURE - | load_handler_data h, f, UPB_HANDLER_ENDSTR - | callp handler - | check_bool_ret - } - break; - } - - // Will dispatch callbacks and call submessage in a second. - case UPB_DESCRIPTOR_TYPE_MESSAGE: - | decode_varint tag_size - break; - case UPB_DESCRIPTOR_TYPE_GROUP: - | add PTR, tag_size - break; - - default: abort(); - } -} - -static void upb_decoderplan_jit_callcb(decoderplan *plan, - const upb_handlers *h, - const upb_fielddef *f) { - // Call callbacks. Specializing the append accessors didn't yield a speed - // increase in benchmarks. - asmlabel(plan, "UPB_MCODE_CALLCB_%s.%s", - upb_msgdef_fullname(upb_handlers_msgdef(h)), - upb_fielddef_name(f)); - if (upb_fielddef_issubmsg(f)) { - // Call startsubmsg handler (if any). - upb_func *startsubmsg = gethandler(h, f, UPB_HANDLER_STARTSUBMSG); - if (startsubmsg) { - // upb_sflow_t startsubmsg(const upb_sinkframe *frame) - | mov DECODER->tmp_len, ARG2_32 - | mov ARG1_64, CLOSURE - | load_handler_data h, f, UPB_HANDLER_STARTSUBMSG - | callp startsubmsg - | check_ptr_ret - | mov CLOSURE, rax - } - - const upb_handlers *sub_h = upb_handlers_getsubhandlers(h, f); - if (sub_h) { - if (upb_fielddef_istagdelim(f)) { - | mov rdx, UPB_NONDELIMITED - } else { - | mov esi, DECODER->tmp_len - | mov rdx, PTR - | sub rdx, DECODER->buf - | add rdx, DECODER->bufstart_ofs - | add rdx, rsi // = d->bufstart_ofs + (d->ptr - d->buf) + delim_len - } - | pushframe sub_h, f, rdx, UPB_HANDLER_ENDSUBMSG - | call =>upb_getpclabel(plan, sub_h, STARTMSG) - | popframe - } else { - if (upb_fielddef_istagdelim(f)) { - // Groups with no handlers not supported yet. - assert(false); - } else { - | mov esi, DECODER->tmp_len - | add PTR, rsi - } - } - - // Call endsubmsg handler (if any). - upb_func *endsubmsg = gethandler(h, f, UPB_HANDLER_ENDSUBMSG); - if (endsubmsg) { - // upb_flow_t endsubmsg(void *closure, upb_value fval); - | mov ARG1_64, CLOSURE - | load_handler_data h, f, UPB_HANDLER_ENDSUBMSG - | callp endsubmsg - | check_bool_ret - } - } else if (!upb_fielddef_isstring(f)) { - upb_handlertype_t handlertype = upb_handlers_getprimitivehandlertype(f); - upb_selector_t sel = getselector(f, handlertype); - upb_func *handler = gethandler(h, f, handlertype); - const upb_shim_data *data = upb_shim_getdata(h, sel); - if (data) { - switch (upb_fielddef_type(f)) { - case UPB_TYPE_INT64: - case UPB_TYPE_UINT64: - | mov [CLOSURE + data->offset], ARG2_64 - break; - case UPB_TYPE_INT32: - case UPB_TYPE_UINT32: - case UPB_TYPE_ENUM: - | mov [CLOSURE + data->offset], ARG2_32 - break; - case UPB_TYPE_DOUBLE: - | movsd qword [CLOSURE + data->offset], XMMARG1 - break; - case UPB_TYPE_FLOAT: - | movss dword [CLOSURE + data->offset], XMMARG1 - break; - case UPB_TYPE_BOOL: - | mov [CLOSURE + data->offset], ARG2_8 - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - case UPB_TYPE_MESSAGE: - assert(false); break; - } - | sethas CLOSURE, data->hasbit - } else if (handler) { - // bool value(const upb_sinkframe* frame, ctype val) - | mov ARG1_64, CLOSURE - | mov ARG3_64, ARG2_64 - | load_handler_data h, f, handlertype - | callp handler - | check_bool_ret - } - } -} - -static uint64_t upb_get_encoded_tag(const upb_fielddef *f) { - uint32_t tag = (upb_fielddef_number(f) << 3) | - upb_decoder_types[upb_fielddef_descriptortype(f)].native_wire_type; - uint64_t encoded_tag = upb_vencode32(tag); - // No tag should be greater than 5 bytes. - assert(encoded_tag <= 0xffffffffff); - return encoded_tag; -} - -static void upb_decoderplan_jit_endseq(decoderplan *plan, - const upb_handlers *h, - const upb_fielddef *f) { - | popframe - upb_func *endseq = gethandler(h, f, UPB_HANDLER_ENDSEQ); - if (endseq) { - | mov ARG1_64, CLOSURE - | load_handler_data h, f, UPB_HANDLER_ENDSEQ - | callp endseq - } -} - -// PTR should point to the beginning of the tag. -static void upb_decoderplan_jit_field(decoderplan *plan, - const upb_handlers *h, - const upb_fielddef *f, - const upb_fielddef *next_f) { - asmlabel(plan, "UPB_MCODE_FIELD_%s.%s", - upb_msgdef_fullname(upb_handlers_msgdef(h)), - upb_fielddef_name(f)); - uint64_t tag = upb_get_encoded_tag(f); - uint64_t next_tag = next_f ? upb_get_encoded_tag(next_f) : 0; - int tag_size = upb_value_size(tag); - - // PC-label for the dispatch table. - // We check the wire type (which must be loaded in edi) because the - // table is keyed on field number, not type. - |=>upb_getpclabel(plan, f, FIELD): - | cmp esi, (tag & 0x7) - | jne ->exit_jit // In the future: could be an unknown field or packed. - |=>upb_getpclabel(plan, f, FIELD_NO_TYPECHECK): - if (upb_fielddef_isseq(f)) { - upb_func *startseq = gethandler(h, f, UPB_HANDLER_STARTSEQ); - if (startseq) { - | mov ARG1_64, CLOSURE - | load_handler_data h, f, UPB_HANDLER_STARTSEQ - | callp startseq - | check_ptr_ret - | mov CLOSURE, rax - } - | mov rsi, FRAME->end_ofs - | pushframe h, f, rsi, UPB_HANDLER_ENDSEQ - } - - |1: // Label for repeating this field. - - upb_decoderplan_jit_decodefield(plan, tag_size, h, f); - upb_decoderplan_jit_callcb(plan, h, f); - - // This is kind of gross; future redesign should take into account how to - // make this work nicely. The difficult part is that the sequence can be - // broken either by end-of-message or by seeing a different field; in both - // cases we need to call the endseq handler, but what we do after that - // depends on which case triggered the end-of-sequence. - | mov DECODER->ptr, PTR - | cmp PTR, DECODER->jit_end - | jae ->exit_jit - | cmp PTR, DECODER->effective_end - | jb >2 - if (upb_fielddef_isseq(f)) { - upb_decoderplan_jit_endseq(plan, h, f); - } - | jmp =>upb_getpclabel(plan, h, ENDOFMSG) - |2: - | mov rcx, qword [PTR] - if (upb_fielddef_isseq(f)) { - | checktag tag - | je <1 - upb_decoderplan_jit_endseq(plan, h, f); - // Load next tag again (popframe/endseq clobbered it). - | mov rcx, qword [PTR] - } - - if (next_tag != 0) { - | checktag next_tag - | je =>upb_getpclabel(plan, next_f, FIELD_NO_TYPECHECK) - } - - // Fall back to dynamic dispatch. - | dyndispatch h -} - -static int upb_compare_uint32(const void *a, const void *b) { - return *(uint32_t*)a - *(uint32_t*)b; -} - -static void upb_decoderplan_jit_msg(decoderplan *plan, - const upb_handlers *h) { - asmlabel(plan, "UPB_MCODE_DECODEMSG_%s", - upb_msgdef_fullname(upb_handlers_msgdef(h))); - |=>upb_getpclabel(plan, h, AFTER_STARTMSG): - | push rbp - | mov rbp, rsp - | jmp >1 - - |=>upb_getpclabel(plan, h, STARTMSG): - | push rbp - | mov rbp, rsp - - // Call startmsg handler (if any): - upb_func *startmsg = upb_handlers_gethandler(h, UPB_STARTMSG_SELECTOR); - if (startmsg) { - // upb_flow_t startmsg(void *closure, const void *hd); - | mov ARG1_64, CLOSURE - | loadarg2 upb_handlers_gethandlerdata(h, UPB_STARTMSG_SELECTOR) - | callp startmsg - | check_bool_ret - } - - |1: - | setmsgend - | checkpoint h - | mov ecx, dword [PTR] - | dyndispatch_ h - - // --------- New code section (does not fall through) ------------------------ - - // Emit code for parsing each field (dynamic dispatch contains pointers to - // all of these). - - // Create an ordering over the fields in field number order. - // Parsing will theoretically be fastest if we emit code in the same - // order as field numbers are seen on-the-wire because of an optimization - // in the generated code that skips dynamic dispatch if the next field is - // as expected. - const upb_msgdef *md = upb_handlers_msgdef(h); - int num_keys = upb_msgdef_numfields(md); - uint32_t *keys = malloc(num_keys * sizeof(*keys)); - int idx = 0; - upb_msg_iter i; - for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { - keys[idx++] = upb_fielddef_number(upb_msg_iter_field(&i)); - } - qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32); - - for(int i = 0; i < num_keys; i++) { - const upb_fielddef *f = upb_msgdef_itof(md, keys[i]); - const upb_fielddef *next_f = - (i + 1 < num_keys) ? upb_msgdef_itof(md, keys[i + 1]) : NULL; - upb_decoderplan_jit_field(plan, h, f, next_f); - } - - free(keys); - - // --------- New code section (does not fall through) ------------------------ - - // End-of-buf / end-of-message. - // We hit a buffer limit; either we hit jit_end or end-of-submessage. - |=>upb_getpclabel(plan, h, ENDOFBUF): - | cmp PTR, DECODER->jit_end - | jae ->exit_jit - - |=>upb_getpclabel(plan, h, ENDOFMSG): - // We are at end-of-submsg: call endmsg handler (if any): - upb_func *endmsg = upb_handlers_gethandler(h, UPB_ENDMSG_SELECTOR); - if (endmsg) { - // void endmsg(void *closure, const void *hd, upb_status *status) { - | mov ARG1_64, CLOSURE - | loadarg2 upb_handlers_gethandlerdata(h, UPB_ENDMSG_SELECTOR) - | mov ARG3_64, DECODER->sink - | mov ARG3_64, SINK:ARG3_64->pipeline_ - | add ARG3_64, offsetof(upb_pipeline, status_) - | callp endmsg - } - - | leave - | ret -} - -static void upb_decoderplan_jit(decoderplan *plan) { - // The JIT prologue/epilogue trampoline that is generated in this function - // does not depend on the handlers, so it will never vary. Ideally we would - // put it in an object file and just link it into upb so we could have only a - // single copy of it instead of one copy for each decoderplan. But our - // options for doing that are undesirable: GCC inline assembly is - // complicated, not portable to other compilers, and comes with subtle - // caveats about incorrect things what the optimizer might do if you eg. - // execute non-local jumps. Putting this code in a .s file would force us to - // calculate the structure offsets ourself instead of symbolically - // (ie. [r15 + 0xcd] instead of DECODER->ptr). So we tolerate a bit of - // unnecessary duplication/redundancy. - asmlabel(plan, "upb_jit_trampoline"); - | push rbp - | mov rbp, rsp - | push r15 - | push r14 - | push r13 - | push r12 - | push rbx - // Align stack. - | sub rsp, 8 - | mov DECODER, ARG1_64 - | mov DECODER->saved_rbp, rbp - | mov FRAME, DECODER:ARG1_64->top - | mov rax, DECODER:ARG1_64->sink - | mov SINKFRAME, SINK:rax->top - | mov CLOSURE, SINKFRAME->closure - | mov PTR, DECODER->ptr - - // TODO: push return addresses for re-entry (will be necessary for multiple - // buffer support). - | call ARG2_64 - asmlabel(plan, "exitjit"); - |->exit_jit: - | mov rbp, DECODER->saved_rbp - | lea rsp, [rbp - 48] - // Counter previous alignment. - | add rsp, 8 - | pop rbx - | pop r12 - | pop r13 - | pop r14 - | pop r15 - | leave - | ret - - upb_inttable_iter i; - upb_inttable_begin(&i, &plan->msginfo); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i); - upb_decoderplan_jit_msg(plan, h); - } -} - -static void upb_decoderplan_jit_assignpclabels(decoderplan *plan, - const upb_handlers *h) { - // Limit the DFS. - if (upb_inttable_lookupptr(&plan->pclabels, h, NULL)) return; - - upb_inttable_insertptr(&plan->pclabels, h, - upb_value_uint32(plan->pclabel_count)); - plan->pclabel_count += TOTAL_MSG_PCLABELS; - - upb_jitmsginfo *info = malloc(sizeof(*info)); - info->max_field_number = 0; - upb_inttable_insertptr(&plan->msginfo, h, upb_value_ptr(info)); - - upb_msg_iter i; - upb_msg_begin(&i, upb_handlers_msgdef(h)); - for(; !upb_msg_done(&i); upb_msg_next(&i)) { - const upb_fielddef *f = upb_msg_iter_field(&i); - info->max_field_number = - UPB_MAX(info->max_field_number, upb_fielddef_number(f)); - upb_inttable_insertptr(&plan->pclabels, f, - upb_value_uint32(plan->pclabel_count)); - plan->pclabel_count += TOTAL_FIELD_PCLABELS; - - // Discover the whole graph of handlers depth-first. We will probably - // revise this later to be more explicit about the list of handlers that - // the plan should include. - if (upb_fielddef_issubmsg(f)) { - const upb_handlers *subh = upb_handlers_getsubhandlers(h, f); - if (subh) upb_decoderplan_jit_assignpclabels(plan, subh); - } - } - // TODO: support large field numbers by either using a hash table or - // generating code for a binary search. For now large field numbers - // will just fall back to the table decoder. - info->max_field_number = UPB_MIN(info->max_field_number, 16000); - info->tablearray = malloc((info->max_field_number + 1) * sizeof(void*)); -} - -static void upb_decoderplan_makejit(decoderplan *plan) { - upb_inttable_init(&plan->msginfo, UPB_CTYPE_PTR); - plan->debug_info = NULL; - - // Assign pclabels. - plan->pclabel_count = 0; - upb_inttable_init(&plan->pclabels, UPB_CTYPE_UINT32); - upb_decoderplan_jit_assignpclabels(plan, plan->dest_handlers); - - upb_inttable_init(&plan->asmlabels, UPB_CTYPE_PTR); - - void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals)); - dasm_init(plan, 1); - dasm_setupglobal(plan, globals, UPB_JIT_GLOBAL__MAX); - dasm_growpc(plan, plan->pclabel_count); - dasm_setup(plan, upb_jit_actionlist); - - upb_decoderplan_jit(plan); - - int dasm_status = dasm_link(plan, &plan->jit_size); - (void)dasm_status; - assert(dasm_status == DASM_S_OK); - - plan->jit_code = mmap(NULL, plan->jit_size, PROT_READ | PROT_WRITE, - MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); - - upb_reg_jit_gdb(plan); - - dasm_encode(plan, plan->jit_code); - - // Create dispatch tables. - upb_inttable_iter i; - upb_inttable_begin(&i, &plan->msginfo); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i); - upb_jitmsginfo *mi = upb_getmsginfo(plan, h); - // We jump to after the startmsg handler since it is called before entering - // the JIT (either by upb_pbdecoder or by a previous call to the JIT). - mi->jit_func = plan->jit_code + - dasm_getpclabel(plan, upb_getpclabel(plan, h, AFTER_STARTMSG)); - for (uint32_t j = 0; j <= mi->max_field_number; j++) { - const upb_fielddef *f = upb_msgdef_itof(upb_handlers_msgdef(h), j); - if (f) { - mi->tablearray[j] = plan->jit_code + - dasm_getpclabel(plan, upb_getpclabel(plan, f, FIELD)); - } else { - // TODO: extend the JIT to handle unknown fields. - // For the moment we exit the JIT for any unknown field. - mi->tablearray[j] = globals[UPB_JIT_GLOBAL_exit_jit]; - } - } - } - - upb_inttable_uninit(&plan->pclabels); - - mprotect(plan->jit_code, plan->jit_size, PROT_EXEC | PROT_READ); - -#ifndef NDEBUG - // Dump to a .o file in /tmp, for easy inspection. - - // Convert all asm labels from pclabel offsets to machine code offsets. - upb_inttable mclabels; - upb_inttable_init(&mclabels, UPB_CTYPE_PTR); - upb_inttable_begin(&i, &plan->asmlabels); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - upb_inttable_insert( - &mclabels, - dasm_getpclabel(plan, upb_inttable_iter_key(&i)), - upb_inttable_iter_value(&i)); - } - - FILE *f = fopen("/tmp/upb-jit-code.s", "w"); - if (f) { - fputs(" .text", f); - size_t linelen = 0; - for (size_t i = 0; i < plan->jit_size; i++) { - upb_value v; - if (upb_inttable_lookup(&mclabels, i, &v)) { - const char *label = upb_value_getptr(v); - fprintf(f, "\n\n_%s:\n", label); - fprintf(f, " .globl _%s", label); - linelen = 1000; - } - if (linelen >= 77) { - linelen = fprintf(f, "\n .byte %u", plan->jit_code[i]); - } else { - linelen += fprintf(f, ",%u", plan->jit_code[i]); - } - } - fputs("\n", f); - fclose(f); - } else { - fprintf(stderr, "Couldn't open /tmp/upb-jit-code.s for writing/\n"); - } - - upb_inttable_uninit(&mclabels); -#endif - - upb_inttable_begin(&i, &plan->asmlabels); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - free(upb_value_getptr(upb_inttable_iter_value(&i))); - } - upb_inttable_uninit(&plan->asmlabels); - - dasm_free(plan); - free(globals); -} - -static void upb_decoderplan_freejit(decoderplan *plan) { - upb_inttable_iter i; - upb_inttable_begin(&i, &plan->msginfo); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - upb_jitmsginfo *mi = upb_value_getptr(upb_inttable_iter_value(&i)); - free(mi->tablearray); - free(mi); - } - upb_inttable_uninit(&plan->msginfo); - munmap(plan->jit_code, plan->jit_size); - free(plan->debug_info); - // TODO: unregister -} - -static void upb_decoder_enterjit(upb_pbdecoder *d, const decoderplan *plan) { - if (plan->jit_code && - d->top == d->stack && - d->sink->top == d->sink->stack && - d->ptr && d->ptr < d->jit_end) { -#ifndef NDEBUG - register uint64_t rbx asm ("rbx") = 11; - register uint64_t r12 asm ("r12") = 12; - register uint64_t r13 asm ("r13") = 13; - register uint64_t r14 asm ("r14") = 14; - register uint64_t r15 asm ("r15") = 15; -#endif - // Decodes as many fields as possible, updating d->ptr appropriately, - // before falling through to the slow(er) path. - void (*upb_jit_decode)(upb_pbdecoder *d, void*) = (void*)plan->jit_code; - upb_jitmsginfo *mi = upb_getmsginfo(plan, plan->dest_handlers); - assert(mi); - upb_jit_decode(d, mi->jit_func); - assert(d->ptr <= d->end); - - // Test that callee-save registers were properly restored. - assert(rbx == 11); - assert(r12 == 12); - assert(r13 == 13); - assert(r14 == 14); - assert(r15 == 15); - } -} |