|// |// upb - a minimalist implementation of protocol buffers. |// |// Copyright (c) 2011 Google Inc. See LICENSE for details. |// Author: Josh Haberman |// |// JIT compiler for upb_decoder on x86. Given a upb_handlers object, |// generates code specialized to parsing the specific message and |// calling specific handlers. |// |// Since the JIT can call other functions (the JIT'ted code is not a leaf |// function) we must respect alignment rules. On OS X, this means aligning |// the stack to 16 bytes. #define UPB_NONE -1 #define UPB_MULTIPLE -2 #define UPB_TOPLEVEL_ONE -3 #include #include "dynasm/dasm_proto.h" #include "dynasm/dasm_x86.h" #ifndef MAP_ANONYMOUS # define MAP_ANONYMOUS MAP_ANON #endif // We map into the low 32 bits when we can, but if this is not available // (like on OS X) we take what we can get. It's not required for correctness, // it's just a performance thing that makes it more likely that our jumps // can be rel32 (i.e. within 32-bits of our pc) instead of the longer // sequence required for other jumps (see callp). #ifndef MAP_32BIT #define MAP_32BIT 0 #endif // To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code // at runtime. GDB 7.x+ has defined an interface for doing this, and these // structure/function defintions are copied out of gdb/jit.h // // We need to give GDB an ELF file at runtime describing the symbols we have // generated. To avoid implementing the ELF format, we generate an ELF file // at compile-time and compile it in as a character string. We can replace // a few key constants (address of JIT-ted function and its size) by looking // for a few magic numbers and doing a dumb string replacement. #ifndef __APPLE__ const unsigned char upb_jit_debug_elf_file[] = { #include "upb/pb/jit_debug_elf_file.h" }; typedef enum { GDB_JIT_NOACTION = 0, GDB_JIT_REGISTER, GDB_JIT_UNREGISTER } jit_actions_t; typedef struct gdb_jit_entry { struct gdb_jit_entry *next_entry; struct gdb_jit_entry *prev_entry; const char *symfile_addr; uint64_t symfile_size; } gdb_jit_entry; typedef struct { uint32_t version; uint32_t action_flag; gdb_jit_entry *relevant_entry; gdb_jit_entry *first_entry; } gdb_jit_descriptor; gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL}; void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); } void upb_reg_jit_gdb(upb_decoder *d) { // Create debug info. size_t elf_len = sizeof(upb_jit_debug_elf_file); d->debug_info = malloc(elf_len); memcpy(d->debug_info, upb_jit_debug_elf_file, elf_len); uint64_t *p = (void*)d->debug_info; for (; (void*)(p+1) <= (void*)d->debug_info + elf_len; ++p) { if (*p == 0x12345678) { *p = (uintptr_t)d->jit_code; } if (*p == 0x321) { *p = d->jit_size; } } // Register the JIT-ted code with GDB. gdb_jit_entry *e = malloc(sizeof(gdb_jit_entry)); e->next_entry = __jit_debug_descriptor.first_entry; e->prev_entry = NULL; if (e->next_entry) e->next_entry->prev_entry = e; e->symfile_addr = d->debug_info; e->symfile_size = elf_len; __jit_debug_descriptor.first_entry = e; __jit_debug_descriptor.relevant_entry = e; __jit_debug_descriptor.action_flag = GDB_JIT_REGISTER; __jit_debug_register_code(); } #else void upb_reg_jit_gdb(upb_decoder *d) { (void)d; } #endif |.arch x64 |.actionlist upb_jit_actionlist |.globals UPB_JIT_GLOBAL_ |.globalnames upb_jit_globalnames | |// Calling conventions. Note -- this will need to be changed for |// Windows, which uses a different calling convention! |.define ARG1_64, rdi |.define ARG2_8, sil |.define ARG2_32, esi |.define ARG2_64, rsi |.define ARG3_8, dl |.define ARG3_32, edx |.define ARG3_64, rdx |.define ARG4_64, rcx |.define ARG5_32, r8d | |// Register allocation / type map. |// ALL of the code in this file uses these register allocations. |// When we "call" within this file, we do not use regular calling |// conventions, but of course when calling to user callbacks we must. |.define PTR, rbx |.define CLOSURE, r12 |.type FRAME, upb_dispatcher_frame, r13 |.type BYTEREGION,upb_byteregion, r14 |.type DECODER, upb_decoder, r15 |.type STDARRAY, upb_stdarray | |.macro callp, addr || if ((uintptr_t)addr < 0xffffffff) { | call &addr || } else { | mov64 rax, (uintptr_t)addr | call rax || } |.endmacro | |// Checks PTR for end-of-buffer. |.macro check_eob, m | cmp PTR, DECODER->effective_end || if (m->is_group) { | jae ->exit_jit || } else { | jae =>m->jit_endofbuf_pclabel || } |.endmacro | |// Decodes varint from [PTR + offset] -> ARG3. |// Saves new pointer as rax. |.macro decode_loaded_varint, offset | // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder. | lea rax, [PTR + offset + 1] | mov ARG3_32, ecx | and ARG3_32, 0x7f | test cl, cl | jns >9 | lea rax, [PTR + offset + 2] | movzx esi, ch | and esi, 0x7f | shl esi, 7 | or ARG3_32, esi | test cx, cx | jns >9 | mov ARG1_64, rax | mov ARG2_32, ARG3_32 | callp upb_vdecode_max8_fast | test rax, rax | jz ->exit_jit // >10-byte varint. |9: |.endmacro | |.macro decode_varint, offset | mov ecx, dword [PTR + offset] | decode_loaded_varint offset | mov PTR, rax |.endmacro | |// Decode the tag -> edx. |// Could specialize this by avoiding the value masking: could just key the |// table on the raw (length-masked) varint to save 3-4 cycles of latency. |// Currently only support tables where all entries are in the array part. |.macro dyndispatch_, m |=>m->jit_dyndispatch_pclabel: | decode_loaded_varint, 0 | mov ecx, edx | shr ecx, 3 | and edx, 0x7 | cmp ecx, m->max_field_number // Bounds-check the field. | ja ->exit_jit // In the future; could be unknown label || if ((uintptr_t)m->tablearray < 0xffffffff) { | mov rax, qword [rcx*8 + m->tablearray] // TODO: support hybrid array/hash tables. || } else { | mov64 rax, (uintptr_t)m->tablearray | mov rax, qword [rax + rcx*8] || } | jmp rax // Dispatch: unpredictable jump. |.endmacro | |.if 1 | // Replicated dispatch: larger code, but better branch prediction. | .define dyndispatch, dyndispatch_ |.else | .macro dyndispatch, m | jmp =>m->jit_dyndispatch_pclabel | .endmacro |.endif | |// Push a stack frame (not the CPU stack, the upb_decoder stack). |.macro pushframe, f, end_offset_, is_sequence_ | lea rax, [FRAME + sizeof(upb_dispatcher_frame)] // rax for shorter addressing. | cmp rax, qword DECODER->dispatcher.limit | jae ->exit_jit // Frame stack overflow. | mov qword FRAME:rax->f, f | mov dword FRAME:rax->end_ofs, end_offset_ | mov byte FRAME:rax->is_sequence, is_sequence_ | mov DECODER->dispatcher.top, rax | mov FRAME, rax |.endmacro | |.macro popframe, m | sub FRAME, sizeof(upb_dispatcher_frame) | mov DECODER->dispatcher.top, FRAME | setmsgend m | mov CLOSURE, FRAME->closure |.endmacro | |.macro setmsgend, m | mov rsi, DECODER->jit_end || if (m->is_group) { | mov64 rax, 0xffffffffffffffff | mov qword DECODER->delim_end, rax | mov DECODER->effective_end, rsi || } else { | // Could store a correctly-biased version in the frame, at the cost of | // a larger stack. | mov eax, dword FRAME->end_ofs | add rax, qword DECODER->buf | mov DECODER->delim_end, rax // delim_end = d->buf + f->end_ofs | cmp rax, rsi | jb >8 | mov rax, rsi // effective_end = min(d->delim_end, d->jit_end) |8: | mov DECODER->effective_end, rax || } |.endmacro | |// rax contains the tag, compare it against "tag", but since it is a varint |// we must only compare as many bytes as actually have data. |.macro checktag, tag || switch (upb_value_size(tag)) { || case 1: | cmp cl, tag || break; || case 2: | cmp cx, tag || break; || case 3: | and ecx, 0xffffff // 3 bytes | cmp rcx, tag || case 4: | cmp ecx, tag || break; || case 5: | mov64 rdx, 0xffffffffff // 5 bytes | and rcx, rdx | cmp rcx, tag || break; || default: abort(); || } |.endmacro | |// TODO: optimize for 0 (xor) and 32-bits. |.macro loadfval, f ||#ifndef NDEBUG ||// Since upb_value carries type information in debug mode ||// only, we need to pass the arguments slightly differently. | mov ARG3_32, f->fval.type ||#endif || if (f->fval.val.uint64 == 0) { | xor ARG2_32, ARG2_32 || } else if (f->fval.val.uint64 < 0xffffffff) { | mov ARG2_32, f->fval.val.uint64 || } else { | mov64 ARG2_64, f->fval.val.uint64 || } |.endmacro | |.macro sethas, reg, hasbit || if (hasbit >= 0) { | or byte [reg + (hasbit / 8)], (1 << (hasbit % 8)) || } |.endmacro #include #include "upb/pb/varint.h" #include "upb/msg.h" // Decodes the next val into ARG3, advances PTR. static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m, uint8_t type, size_t tag_size) { // Decode the value into arg 3 for the callback. switch (type) { case UPB_TYPE(DOUBLE): case UPB_TYPE(FIXED64): case UPB_TYPE(SFIXED64): | mov ARG3_64, qword [PTR + tag_size] | add PTR, 8 + tag_size break; case UPB_TYPE(FLOAT): case UPB_TYPE(FIXED32): case UPB_TYPE(SFIXED32): | mov ARG3_32, dword [PTR + tag_size] | add PTR, 4 + tag_size break; case UPB_TYPE(BOOL): // Can't assume it's one byte long, because bool must be wire-compatible // with all of the varint integer types. | decode_varint tag_size | test ARG3_64, ARG3_64 | setne ARG3_8 // Other bytes left with val, should be ok. break; case UPB_TYPE(INT64): case UPB_TYPE(UINT64): case UPB_TYPE(INT32): case UPB_TYPE(UINT32): case UPB_TYPE(ENUM): | decode_varint tag_size break; case UPB_TYPE(SINT64): // 64-bit zig-zag decoding. | decode_varint tag_size | mov rax, ARG3_64 | shr ARG3_64, 1 | and rax, 1 | neg rax | xor ARG3_64, rax break; case UPB_TYPE(SINT32): // 32-bit zig-zag decoding. | decode_varint tag_size | mov eax, ARG3_32 | shr ARG3_32, 1 | and eax, 1 | neg eax | xor ARG3_32, eax break; case UPB_TYPE(STRING): case UPB_TYPE(BYTES): // We only handle the case where the entire string is in our current // buf, which sidesteps any security problems. The C path has more // robust checks. | mov ecx, dword [PTR + tag_size] | decode_loaded_varint tag_size | mov rdi, DECODER->effective_end | sub rdi, rax | cmp ARG3_64, rdi // if (len > d->effective_end - str) | ja ->exit_jit // Can't deliver, whole string not in buf. // Update PTR to point past end of string. | mov rdi, rax | add rdi, ARG3_64 | mov PTR, rdi // Populate BYTEREGION appropriately. | sub rax, DECODER->buf | add rax, DECODER->bufstart_ofs // = d->ptr - d->buf + d->bufstart_ofs | mov BYTEREGION->start, rax | mov BYTEREGION->discard, rax | add rax, ARG3_64 | mov BYTEREGION->end, rax | mov BYTEREGION->fetch, rax // Fast path ensures whole string is loaded | mov ARG3_64, BYTEREGION break; // Will dispatch callbacks and call submessage in a second. case UPB_TYPE(MESSAGE): | decode_varint tag_size break; case UPB_TYPE(GROUP): | add PTR, tag_size break; default: abort(); } } #if 0 // These appear not to speed things up, but keeping around for // further experimentation. static void upb_decoder_jit_doappend(upb_decoder *d, uint8_t size, upb_fhandlers *f) { | mov eax, STDARRAY:ARG1_64->len | cmp eax, STDARRAY:ARG1_64->size | jne >2 // If array is full, fall back to actual function. | loadfval f | callp f->value | jmp >3 |2: | mov rcx, STDARRAY:ARG1_64->ptr | mov esi, eax | add eax, 1 switch (size) { case 8: | mov [rcx + rsi * 8], ARG3_64 break; case 4: | mov [rcx + rsi * 4], ARG3_32 break; case 1: | mov [rcx + rsi * 4], ARG3_8 break; } | mov STDARRAY:ARG1_64->len, eax |3: } #endif static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) { // Call callbacks. if (upb_issubmsgtype(f->type)) { if (f->type == UPB_TYPE(MESSAGE)) { | mov rsi, PTR | sub rsi, DECODER->buf | add esi, ARG3_32 // = (d->ptr - d->buf) + delim_len } else { assert(f->type == UPB_TYPE(GROUP)); | mov esi, UPB_NONDELIMITED } | pushframe f, esi, false // Call startsubmsg handler (if any). if (f->startsubmsg) { // upb_sflow_t startsubmsg(void *closure, upb_value fval) | mov ARG1_64, CLOSURE | loadfval f | callp f->startsubmsg | mov CLOSURE, rdx } | mov qword FRAME->closure, CLOSURE const upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f); if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) { | jmp =>sub_m->jit_startmsg_pclabel; } else { | call =>sub_m->jit_startmsg_pclabel; } |=>f->jit_submsg_done_pclabel: // Call endsubmsg handler (if any). if (f->endsubmsg) { // upb_flow_t endsubmsg(void *closure, upb_value fval); | mov ARG1_64, CLOSURE | loadfval f | callp f->endsubmsg } | popframe upb_fhandlers_getmsg(f) } else { | mov ARG1_64, CLOSURE // Test for callbacks we can specialize. // Can't switch() on function pointers. if (f->value == &upb_stdmsg_setint64 || f->value == &upb_stdmsg_setuint64 || f->value == &upb_stdmsg_setptr || f->value == &upb_stdmsg_setdouble) { const upb_fielddef *fd = upb_value_getfielddef(f->fval); | mov [ARG1_64 + fd->offset], ARG3_64 } else if (f->value == &upb_stdmsg_setint32 || f->value == &upb_stdmsg_setuint32 || f->value == &upb_stdmsg_setfloat) { const upb_fielddef *fd = upb_value_getfielddef(f->fval); | mov [ARG1_64 + fd->offset], ARG3_32 } else if (f->value == &upb_stdmsg_setbool) { const upb_fielddef *fd = upb_value_getfielddef(f->fval); | mov [ARG1_64 + fd->offset], ARG3_8 #if 0 // These appear not to speed things up, but keeping around for // further experimentation. } else if (f->value == &upb_stdmsg_setint64_r || f->value == &upb_stdmsg_setuint64_r || f->value == &upb_stdmsg_setptr_r || f->value == &upb_stdmsg_setdouble_r) { upb_decoder_jit_doappend(d, 8, f); } else if (f->value == &upb_stdmsg_setint32_r || f->value == &upb_stdmsg_setuint32_r || f->value == &upb_stdmsg_setfloat_r) { upb_decoder_jit_doappend(d, 4, f); } else if (f->value == &upb_stdmsg_setbool_r) { upb_decoder_jit_doappend(d, 1, f); #endif } else { // Load closure and fval into arg registers. ||#ifndef NDEBUG ||// Since upb_value carries type information in debug mode ||// only, we need to pass the arguments slightly differently. | mov ARG4_64, ARG3_64 | mov ARG5_32, upb_types[f->type].inmemory_type ||#endif | loadfval f | callp f->value } | sethas CLOSURE, f->valuehasbit } // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK } // PTR should point to the beginning of the tag. static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag, upb_mhandlers *m, upb_fhandlers *f, upb_fhandlers *next_f) { // PC-label for the dispatch table. // We check the wire type (which must be loaded in edx) because the // table is keyed on field number, not type. |=>f->jit_pclabel: | cmp edx, (tag & 0x7) | jne ->exit_jit // In the future: could be an unknown field or packed. |=>f->jit_pclabel_notypecheck: if (f->repeated) { | mov esi, FRAME->end_ofs | pushframe f, esi, true if (f->startseq) { | mov ARG1_64, CLOSURE | loadfval f | callp f->startseq | mov CLOSURE, rdx } | mov qword FRAME->closure, CLOSURE } |1: // Label for repeating this field. int tag_size = upb_value_size(tag); if (f->type == UPB_TYPE_ENDGROUP) { | add PTR, tag_size | jmp =>m->jit_endofmsg_pclabel return; } upb_decoder_jit_decodefield(d, m, f->type, tag_size); upb_decoder_jit_callcb(d, f); // Epilogue: load next tag, check for repeated field. | check_eob m | mov rcx, qword [PTR] if (f->repeated) { | checktag tag | je <1 if (f->endseq) { | mov ARG1_64, CLOSURE | loadfval f | callp f->endseq } | popframe m } if (next_tag != 0) { | checktag next_tag | je =>next_f->jit_pclabel_notypecheck } // Fall back to dynamic dispatch. | dyndispatch m |1: } static int upb_compare_uint32(const void *a, const void *b) { // TODO: always put ENDGROUP at the end. return *(uint32_t*)a - *(uint32_t*)b; } static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) { |=>m->jit_startmsg_pclabel: if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) { // There was a call to get here, so we need to align the stack. | sub rsp, 8 } // Call startmsg handler (if any): if (m->startmsg) { // upb_flow_t startmsg(void *closure); | mov ARG1_64, FRAME->closure | callp m->startmsg // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK } | setmsgend m | check_eob m | mov ecx, dword [PTR] | dyndispatch_ m // --------- New code section (does not fall through) ------------------------ // Emit code for parsing each field (dynamic dispatch contains pointers to // all of these). // Create an ordering over the fields (inttable ordering is undefined). int num_keys = upb_inttable_count(&m->fieldtab); uint32_t *keys = malloc(num_keys * sizeof(*keys)); int idx = 0; for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i); i = upb_inttable_next(&m->fieldtab, i)) { keys[idx++] = upb_inttable_iter_key(i); } qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32); upb_fhandlers *last_f = NULL; uint32_t last_tag = 0; for(int i = 0; i < num_keys; i++) { uint32_t key = keys[i]; upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, key); uint32_t tag = upb_vencode32(key); if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f); last_tag = tag; last_f = f; } upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL); free(keys); // --------- New code section (does not fall through) ------------------------ // End-of-buf / end-of-message. if (!m->is_group) { // This case doesn't exist for groups, because there eob really means // eob, so that case just exits the jit directly. |=>m->jit_endofbuf_pclabel: | cmp PTR, DECODER->delim_end | jb ->exit_jit // We are at eob, but not end-of-submsg. } |=>m->jit_endofmsg_pclabel: // We are at end-of-submsg: call endmsg handler (if any): if (m->endmsg) { // void endmsg(void *closure, upb_status *status) { | mov ARG1_64, FRAME->closure | lea ARG2_64, DECODER->dispatcher.status | callp m->endmsg } if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) { // Counter previous alignment. | add rsp, 8 | ret } else if (m->jit_parent_field_done_pclabel == UPB_TOPLEVEL_ONE) { | jmp ->exit_jit } else { | jmp =>m->jit_parent_field_done_pclabel } } static const char *dbgfmt = "JIT encountered unknown field! wt=%d, fn=%d\n"; static void upb_decoder_jit(upb_decoder *d) { | push rbp | mov rbp, rsp | push r15 | push r14 | push r13 | push r12 | push rbx // Align stack. | sub rsp, 8 | mov DECODER, ARG1_64 | mov FRAME, DECODER:ARG1_64->dispatcher.top | lea BYTEREGION, DECODER:ARG1_64->str_byteregion | mov CLOSURE, FRAME->closure | mov PTR, DECODER->ptr upb_handlers *h = d->dispatcher.handlers; if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_MULTIPLE) { | call =>h->msgs[0]->jit_startmsg_pclabel | jmp ->exit_jit } // TODO: push return addresses for re-entry (will be necessary for multiple // buffer support). for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_msg(d, h->msgs[i]); |->exit_jit: | mov DECODER->ptr, PTR // Counter previous alignment. | add rsp, 8 | pop rbx | pop r12 | pop r13 | pop r14 | pop r15 | leave | ret |=>0: | mov rdi, stderr | mov rsi, dbgfmt | callp fprintf | callp abort } void upb_decoder_jit_assignfieldlabs(upb_fhandlers *f, uint32_t *pclabel_count) { f->jit_pclabel = (*pclabel_count)++; f->jit_pclabel_notypecheck = (*pclabel_count)++; f->jit_submsg_done_pclabel = (*pclabel_count)++; } void upb_decoder_jit_assignmsglabs(upb_mhandlers *m, uint32_t *pclabel_count) { m->jit_startmsg_pclabel = (*pclabel_count)++; m->jit_endofbuf_pclabel = (*pclabel_count)++; m->jit_endofmsg_pclabel = (*pclabel_count)++; m->jit_dyndispatch_pclabel = (*pclabel_count)++; m->jit_unknownfield_pclabel = (*pclabel_count)++; m->jit_parent_field_done_pclabel = UPB_NONE; m->max_field_number = 0; upb_inttable_iter i; for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i); i = upb_inttable_next(&m->fieldtab, i)) { uint32_t key = upb_inttable_iter_key(i); m->max_field_number = UPB_MAX(m->max_field_number, key); upb_fhandlers *f = upb_inttable_iter_value(i); upb_decoder_jit_assignfieldlabs(f, pclabel_count); } // XXX: Won't work for large field numbers; will need to use a upb_table. m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*)); } // Second pass: for messages that have only one parent, link them to the field // from which they are called. void upb_decoder_jit_assignmsglabs2(upb_mhandlers *m) { upb_inttable_iter i; for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i); i = upb_inttable_next(&m->fieldtab, i)) { upb_fhandlers *f = upb_inttable_iter_value(i); if (upb_issubmsgtype(f->type)) { upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f); if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) { sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel; } else { sub_m->jit_parent_field_done_pclabel = UPB_MULTIPLE; } } } } void upb_decoder_makejit(upb_decoder *d) { d->debug_info = NULL; // Assign pclabels. uint32_t pclabel_count = 1; upb_handlers *h = d->dispatcher.handlers; for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_assignmsglabs(h->msgs[i], &pclabel_count); for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_assignmsglabs2(h->msgs[i]); if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_NONE) { h->msgs[0]->jit_parent_field_done_pclabel = UPB_TOPLEVEL_ONE; } void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals)); dasm_init(d, 1); dasm_setupglobal(d, globals, UPB_JIT_GLOBAL__MAX); dasm_growpc(d, pclabel_count); dasm_setup(d, upb_jit_actionlist); upb_decoder_jit(d); dasm_link(d, &d->jit_size); d->jit_code = mmap(NULL, d->jit_size, PROT_READ | PROT_WRITE, MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); upb_reg_jit_gdb(d); dasm_encode(d, d->jit_code); // Create dispatch tables. for (int i = 0; i < h->msgs_len; i++) { upb_mhandlers *m = h->msgs[i]; for (uint32_t j = 0; j <= m->max_field_number; j++) { upb_fhandlers *f = NULL; for (int k = 0; k < 8; k++) { f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k); if (f) break; } if (f) { m->tablearray[j] = d->jit_code + dasm_getpclabel(d, f->jit_pclabel); } else { // Don't handle unknown fields yet. m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0); } } } dasm_free(d); free(globals); mprotect(d->jit_code, d->jit_size, PROT_EXEC | PROT_READ); // View with: objdump -M intel -D -b binary -mi386 -Mx86-64 /tmp/machine-code // Or: ndisasm -b 64 /tmp/machine-code FILE *f = fopen("/tmp/machine-code", "wb"); fwrite(d->jit_code, d->jit_size, 1, f); fclose(f); } void upb_decoder_freejit(upb_decoder *d) { munmap(d->jit_code, d->jit_size); free(d->debug_info); // TODO: unregister }