From 9eb4d695c49a85f7f72ad68c3c31affd61fef984 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 1 Apr 2011 15:40:06 -0700 Subject: First rough version of the JIT. It can successfully parse SpeedMessage1. Preliminary results: 750MB/s on Core2 2.4GHz. This number is 2.5x proto2. This isn't apples-to-apples, because proto2 is parsing to a struct and we are just doing stream parsing, but for apps that are currently using proto2, this is the improvement they would see if they could move to stream-based processing. Unfortunately perf-regression-test.py is broken, and I'm not 100% sure why. It would be nice to fix it first (to ensure that there are no performance regressions for the table-based decoder) but I'm really impatient to get the JIT checked in. --- src/upb.h | 1 + src/upb_decoder.c | 95 +++---- src/upb_decoder.h | 19 +- src/upb_decoder_x64.asm | 228 ----------------- src/upb_decoder_x86.dasc | 649 +++++++++++++++++++++++++++++++++++++++++++++++ src/upb_def.c | 37 +-- src/upb_glue.c | 3 - src/upb_msg.c | 12 +- src/upb_stream.c | 63 ++--- src/upb_stream.h | 40 ++- src/upb_string.c | 1 - src/upb_string.h | 14 +- src/upb_table.c | 6 +- src/upb_varint_decoder.h | 74 +++--- 14 files changed, 866 insertions(+), 376 deletions(-) delete mode 100644 src/upb_decoder_x64.asm create mode 100644 src/upb_decoder_x86.dasc (limited to 'src') diff --git a/src/upb.h b/src/upb.h index 5dfd65e..0dfcd5e 100644 --- a/src/upb.h +++ b/src/upb.h @@ -165,6 +165,7 @@ typedef uint8_t upb_valuetype_t; #define UPB_VALUETYPE_BYTESRC 32 #define UPB_VALUETYPE_RAW 33 #define UPB_VALUETYPE_FIELDDEF 34 +#define UPB_TYPE_ENDGROUP 35 // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 1b9b5f8..4b71ccd 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -11,16 +11,13 @@ #include "upb_decoder.h" #include "upb_varint_decoder.h" -// If the return value is other than UPB_CONTINUE, that is what the last -// callback returned. -typedef struct { - upb_flow_t flow; - const char *ptr; -} fastdecode_ret; -extern fastdecode_ret upb_fastdecode(const char *p, const char *end, - upb_value_handler_t value_cb, void *closure, - void *table, int table_size); - +#ifdef UPB_USE_JIT_X64 +#define Dst_DECL upb_decoder *d +#define Dst_REF (d->dynasm) +#define Dst (d) +#include "dynasm/dasm_proto.h" +#include "upb_decoder_x86.h" +#endif /* Decoding/Buffering of individual values ************************************/ @@ -28,10 +25,6 @@ extern fastdecode_ret upb_fastdecode(const char *p, const char *end, INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } -// Constant used to signal that the submessage is a group and therefore we -// don't know its end offset. This cannot be the offset of a real submessage -// end because it takes at least one byte to begin a submessage. -#define UPB_GROUP_END_OFFSET 0 #define UPB_MAX_VARINT_ENCODED_SIZE 10 INLINE void upb_decoder_advance(upb_decoder *d, size_t len) { @@ -54,6 +47,32 @@ INLINE void upb_dstate_setmsgend(upb_decoder *d) { (void*)UINTPTR_MAX : d->buf + end_offset; } +// Pulls the next buffer from the bytesrc. Should be called only when the +// current buffer is completely empty. +static bool upb_pullbuf(upb_decoder *d) { + assert(upb_decoder_bufleft(d) == 0); + int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1; + upb_string_recycle(&d->bufstr); + if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) { + d->buf = NULL; + d->end = NULL; + return false; + } + if (last_buf_len != -1) { + d->buf_stream_offset += last_buf_len; + for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f) + if (f->end_offset != UINT32_MAX) + f->end_offset -= last_buf_len; + } + d->buf = upb_string_getrobuf(d->bufstr); + d->ptr = upb_string_getrobuf(d->bufstr); + d->end = d->buf + upb_string_len(d->bufstr); + d->jit_end = d->end; //d->end - 12; + upb_string_substr(d->tmp, d->bufstr, 0, 0); + upb_dstate_setmsgend(d); + return true; +} + // Called only from the slow path, this function copies the next "len" bytes // from the stream to "data", adjusting the dstate appropriately. static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted) { @@ -62,27 +81,8 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted) { memcpy(data, d->ptr, to_copy); upb_decoder_advance(d, to_copy); bytes_wanted -= to_copy; - if (bytes_wanted == 0) { - upb_dstate_setmsgend(d); - return true; - } - - // Get next buffer. - int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1; - upb_string_recycle(&d->bufstr); - if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) { - d->buf = NULL; - return false; - } - if (last_buf_len != -1) { - d->buf_stream_offset += last_buf_len; - for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f) - if (f->end_offset != UINT32_MAX) - f->end_offset -= last_buf_len; - } - d->buf = upb_string_getrobuf(d->bufstr); - d->ptr = upb_string_getrobuf(d->bufstr); - d->end = d->buf + upb_string_len(d->bufstr); + if (bytes_wanted == 0) return true; + if (!upb_pullbuf(d)) return false; } } @@ -143,7 +143,7 @@ done: INLINE bool upb_decode_varint(upb_decoder *d, upb_value *val) { if (upb_decoder_bufleft(d) >= 16) { // Common (fast) case. - upb_decoderet r = upb_decode_varint_fast(d->ptr); + upb_decoderet r = upb_vdecode_fast(d->ptr); if (r.p == NULL) { upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n"); return false; @@ -229,6 +229,7 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) { } #define CHECK(expr) if (!expr) { assert(!upb_ok(status)); goto err; } + CHECK(upb_pullbuf(d)); if (upb_dispatch_startmsg(&d->dispatcher) != UPB_CONTINUE) goto err; // Main loop: executed once per tag/field pair. @@ -244,14 +245,13 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) { // Decodes as many fields as possible, updating d->ptr appropriately, // before falling through to the slow(er) path. -#ifdef USE_X64_FASTPATH - const char *end = UPB_MIN(d->end, d->submsg_end); - fastdecode_ret ret = upb_fastdecode(d->ptr, end, - d->dispatcher.top->handlers.set->value, - d->dispatcher.top->handlers.closure, - d->msgdef->itof.array, - d->msgdef->itof.array_size); - CHECK_FLOW(ret.flow); +#ifdef UPB_USE_JIT_X64 + void (*upb_jit_decode)(upb_decoder *d) = (void*)d->jit_code; + if (d->dispatcher.handlers->should_jit && d->buf) { + //fprintf(stderr, "Entering JIT, ptr: %p\n", d->ptr); + upb_jit_decode(d); + //fprintf(stderr, "Exiting JIT, ptr: %p\n", d->ptr); + } #endif // Parse/handle tag. @@ -354,9 +354,13 @@ err: void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) { upb_dispatcher_init(&d->dispatcher, handlers); +#ifdef UPB_USE_JIT_X64 + upb_decoder_makejit(d); +#endif d->bufstr = NULL; d->buf = NULL; d->tmp = NULL; + upb_string_recycle(&d->tmp); } void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) { @@ -373,4 +377,7 @@ void upb_decoder_uninit(upb_decoder *d) { upb_dispatcher_uninit(&d->dispatcher); upb_string_unref(d->bufstr); upb_string_unref(d->tmp); +#ifdef UPB_USE_JIT_X64 + upb_decoder_freejit(d); +#endif } diff --git a/src/upb_decoder.h b/src/upb_decoder.h index bb54930..1be31c4 100644 --- a/src/upb_decoder.h +++ b/src/upb_decoder.h @@ -27,13 +27,12 @@ extern "C" { /* upb_decoder *****************************************************************/ +struct dasm_State; + struct _upb_decoder { // Bytesrc from which we pull serialized data. upb_bytesrc *bytesrc; - // Dispatcher to which we push parsed data. - upb_dispatcher dispatcher; - // String to hold our input buffer; is only active if d->buf != NULL. upb_string *bufstr; @@ -48,6 +47,7 @@ struct _upb_decoder { // End of this buffer, relative to *ptr. const char *end; + const char *jit_end; // Members which may also be written by the JIT: @@ -57,8 +57,21 @@ struct _upb_decoder { // End of this submessage, relative to *ptr. const char *submsg_end; + // MIN(end, submsg_end) + const char *effective_end; + // Where we will store any errors that occur. upb_status *status; + + // Dispatcher to which we push parsed data. + upb_dispatcher dispatcher; + + // JIT-generated machine code (else NULL). + char *jit_code; + size_t jit_size; + char *debug_info; + + struct dasm_State *dynasm; }; // A upb_decoder decodes the binary protocol buffer format, writing the data it diff --git a/src/upb_decoder_x64.asm b/src/upb_decoder_x64.asm deleted file mode 100644 index c417644..0000000 --- a/src/upb_decoder_x64.asm +++ /dev/null @@ -1,228 +0,0 @@ -DEFAULT REL ; Default to RIP-relative addressing instead of absolute. - -extern _upb_decode_varint_fast64 - -SECTION .data - -; Our dispatch table; used to jump to the right handler, keyed on the field's -; type. -dispatch_table: - dq _upb_fastdecode.cant_fast_path ; field not in table (type == 0). (check_4). - dq _upb_fastdecode.fixed64 ; double - dq _upb_fastdecode.fixed32 ; float - dq _upb_fastdecode.varint ; int64 - dq _upb_fastdecode.varint ; uint64 - dq _upb_fastdecode.varint ; int32 - dq _upb_fastdecode.fixed64 ; fixed64 - dq _upb_fastdecode.fixed32 ; fixed32 - dq _upb_fastdecode.varint ; bool - dq _upb_fastdecode.string ; string - dq _upb_fastdecode.cant_fast_path ; group (check_6) - dq _upb_fastdecode.cant_fast_path ; message - dq _upb_fastdecode.string ; bytes - dq _upb_fastdecode.varint ; uint32 - dq _upb_fastdecode.varint ; enum - dq _upb_fastdecode.fixed32 ; sfixed32 - dq _upb_fastdecode.fixed64 ; sfixed64 - dq _upb_fastdecode.varint_sint32 ; sint32 - dq _upb_fastdecode.varint_sint64 ; sint64 - - GLOBAL _upb_decode_fast - -SECTION .text -; Register allocation. -%define BUF rbx ; const char *p, current buf position. -%define END rbp ; const char *end, where the buf ends (either submsg end or buf end) -%define STRING r12 ; unused -%define FVAL r13 ; upb_value fval, needs to be preserved across varint decoding call. -%define UNUSED r14 -%define CLOSURE r15 - -; Stack layout: *tableptr, uint32_t maxfield_times_8 -%define STACK_SPACE 24 ; this value + 8 must be a multiple of 16. -%define TABLE_SPILL [rsp] ; our lookup table, indexed by field number. -%define COMMITTED_BUF_SPILL [rsp+8] -%define MAXFIELD_TIMES_8_SPILL [rsp+16] - - -; Executing the fast path requires the following conditions: -; - check_1: there are >=12 bytes left (<=2 byte tag and <=10 byte varint). -; - check_2: the tag is <= 2 bytes. -; - check_3: the field number is <= the table size -; (ie. it must be an array lookup, not a hash lookup). -; - check_4: the field is known (found in the table). -; - check_5: the wire type we read is correct for the field number, -; ("packed" fields are not accepted, yet. this could be handled -; efficiently by doing an extra check on the "type check failed" -; path that goes into a tight loop if the encoding was packed). -; - check_6: the field is not a group or a message (or string, TODO) -; (this could be relaxed, but due to delegation it's a bit tricky). -; - check_7: if the value is a string, the entire string is available in -; the buffer, and our cached string object can be recycled, and -; our string object already references the source buffer, so -; absolutely no refcount twiddling is required. - - -%macro decode_and_dispatch_ 0 -align 16 -.decode_and_dispatch: - ; Load a few values we'll need in a sec. - mov r8, TABLE_SPILL - mov r9d, MAXFIELD_TIMES_8_SPILL - - mov rax, END - sub rax, BUF - cmp rax, 12 - jb _upb_fastdecode.cant_fast_path ; check_1 (<12 bytes left). - - ; Decode a 1 or 2-byte varint -> eax. - mov cl, byte [BUF] - lea rdi, [BUF+1] - movzx eax, cl - and eax, 0x7f - test cl, cl - jns .one_byte_tag ; Should be predictable if fields are in order. - movzx ecx, byte [BUF+1] - lea rdi, [BUF+2] - mov edx, ecx - and edx, 0x7f - shl edx, 7 - or eax, edx - test al, al - js _upb_fastdecode.cant_fast_path ; check_2 (tag was >2 bytes). -.one_byte_tag: - mov BUF, rdi - - ; Decode tag and dispatch. - mov ecx, eax - and eax, 0x3ff8 ; eax now contains field number * 8 - lea r11, [r8+rax*2] ; *2 is really *16, since rax is already *8. - and ecx, 0x7 ; ecx now contains wire type. - cmp eax, r9d - jae _upb_fastdecode.cant_fast_path ; check_3 (field number > table size) - mov FIELDDEF, [r11+8] ; Lookup fielddef (upb_itof_ent.f) - movzx rdx, BYTE [r11+1] ; Lookup field type. - mov rax, qword dispatch_table - jmp [rax+rdx*8] -%endmacro - -%macro decode_and_dispatch 0 - jmp .decode_and_dispatch -%endmacro - -%macro call_callback 0 - ; Value arg must already be in rdx when macro is called. - mov rdi, CLOSURE - mov rsi, FIELDDEF - mov rcx, 33 ; RAW; we could pass the correct type, or only do this in non-debug modes. - call CALLBACK - mov COMMITTED_BUF_SPILL, BUF - cmp eax, 0 - jne .done ; Caller requested BREAK or SKIPSUBMSG. -%endmacro - -%macro check_type 1 - cmp ecx, %1 - jne _upb_fastdecode.cant_fast_path ; check_5 (wire type check failed). -%endmacro - -; extern upb_flow_t upb_fastdecode(const char **p, const char *end, -; upb_value_handler_t value_cb, void *closure, -; void *table, int table_size); -align 16 -global _upb_fastdecode -_upb_fastdecode: - ; We use all callee-save regs. - push rbx - push rbp - push r12 - push r13 - push r14 - push r15 - sub rsp, STACK_SPACE - - ; Parse arguments into reg vals and stack. - mov BUF, rdi - mov COMMITTED_BUF_SPILL, rdi - mov END, rsi - mov CALLBACK, rdx - mov CLOSURE, rcx - mov TABLE_SPILL, r8 - shl r9, 3 - mov MAXFIELD_TIMES_8_SPILL, r9 - - decode_and_dispatch - -align 16 -.varint: - call _upb_decode_varint_fast64 ; BUF is already in rdi. - test rax, rax - jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error. - mov BUF, rax - call_callback ; rdx already holds value. - decode_and_dispatch_ - -align 16 -.fixed32: - mov edx, DWORD [BUF] ; Might be unaligned, but that's ok. - add BUF, 4 - call_callback - decode_and_dispatch - -align 16 -.fixed64: - mov rdx, QWORD [BUF] ; Might be unaligned, but that's ok. - add BUF, 8 - call_callback - decode_and_dispatch - -align 16 -.varint_sint32: - call _upb_decode_varint_fast64 ; BUF is already in rdi. - test rax, rax - jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error. - mov BUF, rax - - ; Perform 32-bit zig-zag decoding. - mov ecx, edx - shr edx, 1 - and ecx, 0x1 - neg ecx - xor edx, ecx - call_callback - decode_and_dispatch - -align 16 -.varint_sint64: - call _upb_decode_varint_fast64 ; BUF is already in rdi. - test rax, rax - jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error. - mov BUF, rax - - ; Perform 64-bit zig-zag decoding. - mov rcx, rdx - shr rdx, 1 - and ecx, 0x1 - neg rcx - xor rdx, rcx - call_callback - decode_and_dispatch - -align 16 -.string: - -.cant_fast_path: - mov rax, 0 ; UPB_CONTINUE -- continue as before. -.done: - ; If coming via done, preserve the user callback's return in rax. - - ; Return committed buf pointer as second parameter. - mov rdx, COMMITTED_BUF_SPILL - add rsp, STACK_SPACE - pop r15 - pop r14 - pop r13 - pop r12 - pop rbp - pop rbx - ret diff --git a/src/upb_decoder_x86.dasc b/src/upb_decoder_x86.dasc new file mode 100644 index 0000000..71df08f --- /dev/null +++ b/src/upb_decoder_x86.dasc @@ -0,0 +1,649 @@ +|// +|// upb - a minimalist implementation of protocol buffers. +|// +|// Copyright (c) 2011 Google Inc. See LICENSE for details. +|// Author: Josh Haberman +|// +|// JIT compiler for upb_decoder on x86. Given a upb_handlers object, +|// generates code specialized to parsing the specific message and +|// calling specific handlers. + +#define UPB_NONE -1 +#define UPB_MULTIPLE -2 +#define UPB_TOPLEVEL_ONE -3 + +#include +#include "dynasm/dasm_proto.h" +#include "dynasm/dasm_x86.h" + +// To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code +// at runtime. GDB 7.x+ has defined an interface for doing this, and these +// structure/function defintions are copied out of gdb/jit.h +// +// We need to give GDB an ELF file at runtime describing the symbols we have +// generated. To avoid implementing the ELF format, we generate an ELF file +// at compile-time and compile it in as a character string. We can replace +// a few key constants (address of JIT-ted function and its size) by looking +// for a few magic numbers and doing a dumb string replacement. +#include "jit_debug_elf_file.h" + +typedef enum +{ + GDB_JIT_NOACTION = 0, + GDB_JIT_REGISTER, + GDB_JIT_UNREGISTER +} jit_actions_t; + +typedef struct gdb_jit_entry { + struct gdb_jit_entry *next_entry; + struct gdb_jit_entry *prev_entry; + const char *symfile_addr; + uint64_t symfile_size; +} gdb_jit_entry; + +typedef struct { + uint32_t version; + uint32_t action_flag; + gdb_jit_entry *relevant_entry; + gdb_jit_entry *first_entry; +} gdb_jit_descriptor; + +gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL}; + +void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); } + +|.arch x64 +|.actionlist upb_jit_actionlist +|.globals UPB_JIT_GLOBAL_ +|.globalnames upb_jit_globalnames +| +|// Calling conventions. +|.define ARG1_64, rdi +|.define ARG2_8, sil +|.define ARG2_32, esi +|.define ARG2_64, rsi +|.define ARG3_8, dl +|.define ARG3_32, edx +|.define ARG3_64, rdx +| +|// Register allocation / type map. +|// ALL of the code in this file uses these register allocations. +|// When we "call" within this file, we do not use regular calling +|// conventions, but of course when calling to user callbacks we must. +|.define PTR, rbx +|.define CLOSURE, r12 +|.type FRAME, upb_dispatcher_frame, r13 +|.type STRING, upb_string, r14 +|.type DECODER, upb_decoder, r15 +| +|.macro callp, addr +|| if ((uintptr_t)addr < 0xffffffff) { + | call &addr +|| } else { + | mov64 rax, (uintptr_t)addr + | call rax +|| } +|.endmacro +| +|// Checks PTR for end-of-buffer. +|.macro check_eob, m +| cmp PTR, DECODER->effective_end +|| if (m->is_group) { + | jae ->exit_jit +|| } else { + | jae =>m->jit_endofbuf_pclabel +|| } +|.endmacro +| +|// Decodes varint from [PTR + offset] -> ARG3. +|// Saves new pointer as rax. +|.macro decode_loaded_varint, offset +| // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder. +| lea rax, [PTR + offset + 1] +| mov ARG3_32, ecx +| and ARG3_32, 0x7f +| test cl, cl +| jns >9 +| lea rax, [PTR + offset + 2] +| movzx esi, ch +| and esi, 0x7f +| shl esi, 7 +| or ARG3_32, esi +| test cx, cx +| jns >9 +| mov ARG1_64, rax +| mov ARG2_32, ARG3_32 +| callp upb_vdecode_max8_fast +| test rax, rax +| jz ->exit_jit // >10-byte varint. +|9: +|.endmacro +| +|.macro decode_varint, offset +| mov ecx, dword [PTR + offset] +| decode_loaded_varint offset +| mov PTR, rax +|.endmacro +| +|// Decode the tag -> edx. +|// Could specialize this by avoiding the value masking: could just key the +|// table on the raw (length-masked) varint to save 3-4 cycles of latency. +|// Currently only support tables where all entries are in the array part. +|.macro dyndispatch, m +| decode_loaded_varint, 0 +| mov ecx, edx +| shr ecx, 3 +| and edx, 0x7 +| cmp ecx, m->max_field_number // Bounds-check the field. +| ja ->exit_jit // In the future; could be unknown label +| mov rcx, qword [rcx*8 + m->tablearray] // TODO: support hybrid array/hash tables. +| jmp rcx // Dispatch: unpredictable jump. +|.endmacro +| +|.macro setmsgend, m +| mov rsi, DECODER->jit_end +|| if (m->is_group) { +| mov64 rax, 0xffffffffffffffff +| mov qword DECODER->submsg_end, rax +| mov DECODER->effective_end, rsi +|| } else { +| // Could store a correctly-biased version in the frame, at the cost of +| // a larger stack. +| mov eax, dword FRAME->end_offset +| add rax, qword DECODER->buf +| mov DECODER->submsg_end, rax // submsg_end = d->buf + f->end_offset +| cmp rax, rsi +| jb >1 +| mov rax, rsi // effective_end = min(d->submsg_end, d->jit_end) +|1: +| mov DECODER->effective_end, rax +|| } +|.endmacro +| +|// rax contains the tag, compare it against "tag", but since it is a varint +|// we must only compare as many bytes as actually have data. +|.macro checktag, tag +|| switch (upb_value_size(tag)) { +|| case 1: +| cmp cl, tag +|| break; +|| case 2: +| cmp cx, tag +|| break; +|| case 3: +| and ecx, 0xffffff // 3 bytes +| cmp rcx, tag +|| case 4: +| cmp ecx, tag +|| break; +|| case 5: +| mov64 rdx, 0xffffffffff // 5 bytes +| and rcx, rdx +| cmp rcx, tag +|| break; +|| default: abort(); +|| } +|.endmacro +| +|// TODO: optimize for 0 (xor) and 32-bits. +|.macro loadfval, f +|| if (f->fval.val.uint64 == 0) { +| xor ARG2_32, ARG2_32 +|| } else { +| mov ARG2_64, f->fval.val.uint64 +|| } +|.endmacro + +#include +#include "upb_varint_decoder.h" + +static size_t upb_value_size(uint64_t val) { +#ifdef __GNUC__ + int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0. +#else + int high_bit = 0; + uint64_t tmp = val; + while(tmp >>= 1) high_bit++; +#endif + return val == 0 ? 1 : high_bit / 8 + 1; +} + +static uint64_t upb_encode_varint(uint64_t val) +{ + uint64_t ret = 0; + for (int bitpos = 0; val; bitpos+=8, val >>=7) { + if (bitpos > 0) ret |= (1 << (bitpos-1)); + ret |= (val & 0x7f) << bitpos; + } + return ret; +} + +// PTR should point to the beginning of the tag. +static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag, + upb_handlers_msgent *m, + upb_handlers_fieldent *f, upb_handlers_fieldent *next_f) { + int tag_size = upb_value_size(tag); + + // PC-label for the dispatch table. + // We check the wire type (which must be loaded in edx) because the + // table is keyed on field number, not type. + |=>f->jit_pclabel: + | cmp edx, upb_types[f->type].native_wire_type + | jne ->exit_jit // In the future: could be an unknown field. + |=>f->jit_pclabel_notypecheck: + |1: // Label for repeating this field. + + // Decode the value into arg 3 for the callback. + switch (f->type) { + case UPB_TYPE(DOUBLE): + case UPB_TYPE(FIXED64): + case UPB_TYPE(SFIXED64): + | mov ARG3_64, qword [PTR + tag_size] + | add PTR, 8 + tag_size + break; + + case UPB_TYPE(FLOAT): + case UPB_TYPE(FIXED32): + case UPB_TYPE(SFIXED32): + | mov ARG3_32, dword [PTR + tag_size] + | add PTR, 4 + tag_size + break; + + case UPB_TYPE(BOOL): + // Can't assume it's one byte long, because bool must be wire-compatible + // with all of the varint integer types. + | decode_varint tag_size + | test ARG3_64, ARG3_64 + | setne ARG3_8 // Other bytes left with val, should be ok. + break; + + case UPB_TYPE(INT64): + case UPB_TYPE(UINT64): + case UPB_TYPE(INT32): + case UPB_TYPE(UINT32): + case UPB_TYPE(ENUM): + | decode_varint tag_size + break; + + case UPB_TYPE(SINT64): + // 64-bit zig-zag decoding. + | decode_varint tag_size + | mov rax, ARG3_64 + | shr ARG3_64, 1 + | and rax, 1 + | neg rax + | xor ARG3_64, rax + break; + + case UPB_TYPE(SINT32): + // 32-bit zig-zag decoding. + | decode_varint tag_size + | mov eax, ARG3_32 + | shr ARG3_32, 1 + | and eax, 1 + | neg eax + | xor ARG3_32, eax + break; + + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + // We only handle the case where the entire string is in our current + // buf, which sidesteps any security problems. The C path has more + // robust checks. + | decode_varint tag_size + | mov STRING->len, ARG3_32 + | mov STRING->ptr, PTR + | add PTR, ARG3_64 + | mov ARG3_64, STRING + | cmp PTR, DECODER->effective_end + | ja ->exit_jit // Can't deliver, whole string not in buf. + break; + + case UPB_TYPE_ENDGROUP: // A pseudo-type. + | add PTR, tag_size + | mov DECODER->ptr, PTR + | jmp =>m->jit_endofmsg_pclabel + return; + + case UPB_TYPE(MESSAGE): + | decode_varint tag_size + case UPB_TYPE(GROUP): + // Will dispatch callbacks and call submessage in a second. + break; + + default: abort(); + } + // Commit our work by advancing ptr. + // (If in the future we wanted to support a UPB_SUSPEND_AGAIN that + // suspends the decoder and redelivers the value later, we would + // need to adjust this to happen perhaps after the callback ran). + | mov DECODER->ptr, PTR + + // Load closure and fval into arg registers. + | mov ARG1_64, CLOSURE + | loadfval f + + // Call callbacks. + if (upb_issubmsgtype(f->type)) { + // Call startsubmsg handler (if any). + if (f->cb.startsubmsg != upb_startsubmsg_nop) { + // upb_sflow_t startsubmsg(void *closure, upb_value fval) + | mov r12d, ARG3_32 + | callp f->cb.startsubmsg + } else { + | mov rdx, CLOSURE + | mov r12d, ARG3_32 + } + // Push a stack frame (not the CPU stack, the upb_decoder stack). + | lea rax, [FRAME + sizeof(upb_dispatcher_frame)] // rax for shorter addressing. + | cmp rax, qword DECODER->dispatcher.limit + | jae ->exit_jit // Frame stack overflow. + | mov qword FRAME:rax->f, f + | mov qword FRAME:rax->closure, rdx + | mov rsi, PTR + | sub rsi, DECODER->buf + | add r12d, esi + | mov dword FRAME:rax->end_offset, r12d // = (d->ptr - d->buf) + delim_len + | mov CLOSURE, rdx + | mov DECODER->dispatcher.top, rax + | mov FRAME, rax + + upb_handlers_msgent *sub_m = upb_handlers_getmsgent(d->dispatcher.handlers, f); + if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) { + | jmp =>sub_m->jit_startmsg_pclabel; + } else { + | call =>sub_m->jit_startmsg_pclabel; + } + + |=>f->jit_submsg_done_pclabel: + // Pop a stack frame. + | sub FRAME, sizeof(upb_dispatcher_frame) + | mov DECODER->dispatcher.top, FRAME + | setmsgend m + | mov CLOSURE, FRAME->closure + + // Call endsubmsg handler (if any). + if (f->endsubmsg != upb_endsubmsg_nop) { + // upb_flow_t endsubmsg(void *closure, upb_value fval); + | mov ARG1_64, CLOSURE + | loadfval f + | callp f->endsubmsg + } + } else { + | callp f->cb.value + } + // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK + + // Epilogue: load next tag, check for repeated field. + | check_eob m + | mov rcx, qword [PTR] + if (f->repeated) { + | checktag tag + | je <1 + } + if (next_tag != 0) { + | checktag next_tag + | je =>next_f->jit_pclabel_notypecheck + } + + // Fall back to dynamic dispatch. Replicate the dispatch + // here so we can learn what fields generally follow others. + | dyndispatch m + |1: +} + +static int upb_compare_uint32(const void *a, const void *b) { + return *(uint32_t*)a - *(uint32_t*)b; +} + +static void upb_decoder_jit_msg(upb_decoder *d, upb_handlers_msgent *m) { + |=>m->jit_startmsg_pclabel: + // Call startmsg handler (if any): + if (m->startmsg != upb_startmsg_nop) { + // upb_flow_t startmsg(void *closure); + | mov ARG1_64, FRAME->closure + | callp m->startmsg + // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK + } + + | setmsgend m + | check_eob m + | mov ecx, dword [PTR] + | dyndispatch m + + // --------- New code section (does not fall through) ------------------------ + + // Emit code for parsing each field (dynamic dispatch contains pointers to + // all of these). + + // Create an ordering over the fields (inttable ordering is undefined). + int num_keys = upb_inttable_count(&m->fieldtab); + uint32_t *keys = malloc(num_keys * sizeof(*keys)); + int idx = 0; + for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i); + i = upb_inttable_next(&m->fieldtab, i)) { + keys[idx++] = upb_inttable_iter_key(i); + } + qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32); + + + upb_handlers_fieldent *last_f = NULL; + uint32_t last_tag = 0; + for(int i = 0; i < num_keys; i++) { + uint32_t key = keys[i]; + upb_handlers_fieldent *f = upb_inttable_lookup(&m->fieldtab, key); + uint32_t tag = upb_encode_varint(key); + if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f); + last_tag = tag; + last_f = f; + } + + free(keys); + + if (m->is_group) { + // Create a fake fieldent for handling "end group." + upb_handlers_fieldent f = {0, UPB_TYPE_ENDGROUP, 0, UPB_NO_VALUE, {NULL}, NULL, 0, 0, 0, false}; + upb_decoder_jit_field(d, last_tag, m->groupnum, m, last_f, &f); + upb_decoder_jit_field(d, m->groupnum, 0, m, &f, NULL); + } else { + upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL); + } + + // --------- New code section (does not fall through) ------------------------ + + // End-of-buf / end-of-message. + if (!m->is_group) { + // This case doesn't exist for groups, because there eob really means + // eob, so that case just exits the jit directly. + |=>m->jit_endofbuf_pclabel: + | cmp PTR, DECODER->submsg_end + | jb ->exit_jit // We are at eob, but not end-of-submsg. + } + + |=>m->jit_endofmsg_pclabel: + // We are at end-of-submsg: call endmsg handler (if any): + if (m->endmsg != upb_endmsg_nop) { + // void endmsg(void *closure, upb_status *status) { + | mov ARG1_64, FRAME->closure + | lea ARG2_64, DECODER->dispatcher.status + | callp m->endmsg + } + + if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) { + | ret + } else if (m->jit_parent_field_done_pclabel == UPB_TOPLEVEL_ONE) { + | jmp ->exit_jit + } else { + | jmp =>m->jit_parent_field_done_pclabel + } + +} + +static void upb_decoder_jit(upb_decoder *d) { + | push rbp + | mov rbp, rsp + | push r15 + | push r14 + | push r13 + | push r12 + | push rbx + | mov DECODER, ARG1_64 + | mov FRAME, DECODER:ARG1_64->dispatcher.top + | mov STRING, DECODER:ARG1_64->tmp + | mov CLOSURE, FRAME->closure + | mov PTR, DECODER->ptr + + upb_handlers *h = d->dispatcher.handlers; + if (h->msgs[0].jit_parent_field_done_pclabel == UPB_MULTIPLE) { + | call =>h->msgs[0].jit_startmsg_pclabel + | jmp ->exit_jit + } + + // TODO: push return addresses for re-entry (will be necessary for multiple + // buffer support). + for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_msg(d, &h->msgs[i]); + + |->exit_jit: + | pop rbx + | pop r12 + | pop r13 + | pop r14 + | pop r15 + | leave + | ret + |=>0: + | callp &abort +} + +void upb_decoder_jit_assignfieldlabs(upb_handlers_fieldent *f, + uint32_t *pclabel_count) { + f->jit_pclabel = (*pclabel_count)++; + f->jit_pclabel_notypecheck = (*pclabel_count)++; + f->jit_submsg_done_pclabel = (*pclabel_count)++; +} + +void upb_decoder_jit_assignmsglabs(upb_handlers_msgent *m, + uint32_t *pclabel_count) { + m->jit_startmsg_pclabel = (*pclabel_count)++; + m->jit_endofbuf_pclabel = (*pclabel_count)++; + m->jit_endofmsg_pclabel = (*pclabel_count)++; + m->jit_unknownfield_pclabel = (*pclabel_count)++; + m->jit_parent_field_done_pclabel = UPB_NONE; + m->max_field_number = 0; + upb_inttable_iter i; + for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i); + i = upb_inttable_next(&m->fieldtab, i)) { + uint32_t key = upb_inttable_iter_key(i); + m->max_field_number = UPB_MAX(m->max_field_number, key); + upb_handlers_fieldent *f = upb_inttable_iter_value(i); + upb_decoder_jit_assignfieldlabs(f, pclabel_count); + } + // XXX: Won't work for large field numbers; will need to use a upb_table. + m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*)); +} + +// Second pass: for messages that have only one parent, link them to the field +// from which they are called. +void upb_decoder_jit_assignmsglabs2(upb_handlers *h, upb_handlers_msgent *m) { + upb_inttable_iter i; + for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i); + i = upb_inttable_next(&m->fieldtab, i)) { + upb_handlers_fieldent *f = upb_inttable_iter_value(i); + if (upb_issubmsgtype(f->type)) { + upb_handlers_msgent *sub_m = upb_handlers_getmsgent(h, f); + if (f->type == UPB_TYPE(GROUP)) { + sub_m->is_group = true; + sub_m->groupnum = upb_inttable_iter_key(i); + } + if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) { + sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel; + } else { + sub_m->jit_parent_field_done_pclabel = UPB_MULTIPLE; + } + } + } +} + +void upb_decoder_makejit(upb_decoder *d) { + // Assign pclabels. + uint32_t pclabel_count = 1; + upb_handlers *h = d->dispatcher.handlers; + for (int i = 0; i < h->msgs_len; i++) + upb_decoder_jit_assignmsglabs(&h->msgs[i], &pclabel_count); + for (int i = 0; i < h->msgs_len; i++) + upb_decoder_jit_assignmsglabs2(h, &h->msgs[i]); + + if (h->msgs[0].jit_parent_field_done_pclabel == UPB_NONE) { + h->msgs[0].jit_parent_field_done_pclabel = UPB_TOPLEVEL_ONE; + } + + void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals)); + dasm_init(d, 1); + dasm_setupglobal(d, globals, UPB_JIT_GLOBAL__MAX); + dasm_growpc(d, pclabel_count); + dasm_setup(d, upb_jit_actionlist); + + upb_decoder_jit(d); + + dasm_link(d, &d->jit_size); + + d->jit_code = mmap(NULL, d->jit_size, PROT_READ | PROT_WRITE, + MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + + dasm_encode(d, d->jit_code); + + // Create dispatch tables. + for (int i = 0; i < h->msgs_len; i++) { + upb_handlers_msgent *m = &h->msgs[i]; + for (uint32_t j = 0; j <= m->max_field_number; j++) { + upb_handlers_fieldent *f = NULL; + for (int k = 0; k < 8; k++) { + f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k); + if (f) break; + } + if (f) { + m->tablearray[j] = d->jit_code + dasm_getpclabel(d, f->jit_pclabel); + } else { + // Don't handle unknown fields yet. + m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0); + } + } + } + + // Create debug info. + size_t elf_len = src_jit_debug_elf_file_o_len; + d->debug_info = malloc(elf_len); + memcpy(d->debug_info, src_jit_debug_elf_file_o, elf_len); + uint64_t *p = (void*)d->debug_info; + for (; (void*)(p+1) <= (void*)d->debug_info + elf_len; ++p) { + if (*p == 0x12345678) { *p = (uintptr_t)d->jit_code; } + if (*p == 0x321) { *p = d->jit_size; } + } + + // Register the JIT-ted code with GDB. + gdb_jit_entry *e = malloc(sizeof(gdb_jit_entry)); + e->next_entry = __jit_debug_descriptor.first_entry; + e->prev_entry = NULL; + if (e->next_entry) e->next_entry->prev_entry = e; + e->symfile_addr = d->debug_info; + e->symfile_size = elf_len; + __jit_debug_descriptor.first_entry = e; + __jit_debug_descriptor.relevant_entry = e; + __jit_debug_descriptor.action_flag = GDB_JIT_REGISTER; + __jit_debug_register_code(); + + dasm_free(d); + free(globals); + + mprotect(d->jit_code, d->jit_size, PROT_EXEC | PROT_READ); + + FILE *f = fopen("/tmp/machine-code", "wb"); + fwrite(d->jit_code, d->jit_size, 1, f); + fclose(f); +} + +void upb_decoder_freejit(upb_decoder *d) { + munmap(d->jit_code, d->jit_size); + free(d->debug_info); + // TODO: unregister +} diff --git a/src/upb_def.c b/src/upb_def.c index 059edd6..338bd3d 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -349,18 +349,18 @@ static void upb_defbuilder_register_FileDescriptorProto(upb_handlers *h) { upb_defbuilder_FileDescriptorProto_endmsg); upb_register_typed_value(h, GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDNUM, - GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDTYPE, + GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDTYPE, false, &upb_defbuilder_FileDescriptorProto_package, UPB_NO_VALUE); upb_handlers_typed_push(h, GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDNUM, - GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDTYPE); + GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDTYPE, true); upb_msgdef_register_DescriptorProto(h); upb_handlers_typed_pop(h); upb_handlers_typed_push(h, GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNUM, - GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE); + GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE, true); upb_enumdef_register_EnumDescriptorProto(h); upb_handlers_typed_pop(h); @@ -383,12 +383,13 @@ static void upb_defbuilder_register_FileDescriptorSet(upb_handlers *h) { upb_register_startend(h, NULL, upb_defbuilder_FileDescriptorSet_onendmsg); upb_handlers_typed_push(h, GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM, - GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE); + GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE, true); upb_defbuilder_register_FileDescriptorProto(h); upb_handlers_typed_pop(h); } void upb_defbuilder_reghandlers(upb_handlers *h) { upb_defbuilder_register_FileDescriptorSet(h); + h->should_jit = false; } @@ -492,11 +493,11 @@ static void upb_enumdef_register_EnumValueDescriptorProto(upb_handlers *h) { upb_enumdef_EnumValueDescriptorProto_endmsg); upb_register_typed_value(h, GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM, - GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE, + GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE, false, &upb_enumdef_EnumValueDescriptorProto_name, UPB_NO_VALUE); upb_register_typed_value(h, GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM, - GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE, + GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE, false, &upb_enumdef_EnumValueDescriptorProto_number, UPB_NO_VALUE); } @@ -540,12 +541,12 @@ static void upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) { &upb_enumdef_EnumDescriptorProto_endmsg); upb_register_typed_value(h, GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM, - GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE, + GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE, false, &upb_enumdef_EnumDescriptorProto_name, UPB_NO_VALUE); upb_handlers_typed_push(h, GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM, - GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE); + GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE, true); upb_enumdef_register_EnumValueDescriptorProto(h); upb_handlers_typed_pop(h); } @@ -813,27 +814,27 @@ static void upb_fielddef_register_FieldDescriptorProto(upb_handlers *h) { upb_register_startend(h, upb_fielddef_startmsg, upb_fielddef_endmsg); upb_register_typed_value(h, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNUM, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE, false, &upb_fielddef_ontype, UPB_NO_VALUE); upb_register_typed_value(h, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDNUM, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDTYPE, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDTYPE, false, &upb_fielddef_onlabel, UPB_NO_VALUE); upb_register_typed_value(h, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDNUM, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDTYPE, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDTYPE, false, &upb_fielddef_onnumber, UPB_NO_VALUE); upb_register_typed_value(h, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDNUM, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDTYPE, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDTYPE, false, &upb_fielddef_onname, UPB_NO_VALUE); upb_register_typed_value(h, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNUM, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE, false, &upb_fielddef_ontypename, UPB_NO_VALUE); upb_register_typed_value(h, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNUM, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE, false, &upb_fielddef_ondefaultval, UPB_NO_VALUE); } @@ -954,23 +955,23 @@ static void upb_msgdef_register_DescriptorProto(upb_handlers *h) { upb_register_startend(h, &upb_msgdef_startmsg, &upb_msgdef_endmsg); upb_register_typed_value(h, GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM, - GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE, + GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE, false, &upb_msgdef_onname, UPB_NO_VALUE); upb_handlers_typed_push(h, GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM, - GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE); + GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE, true); upb_fielddef_register_FieldDescriptorProto(h); upb_handlers_typed_pop(h); // DescriptorProto is self-recursive, so we must link the definition. upb_handlers_typed_link(h, GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM, - GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE, 0); + GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE, true, 0); upb_handlers_typed_push(h, GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM, - GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE); + GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE, true); upb_enumdef_register_EnumDescriptorProto(h); upb_handlers_typed_pop(h); diff --git a/src/upb_glue.c b/src/upb_glue.c index 41f974b..b6a0273 100644 --- a/src/upb_glue.c +++ b/src/upb_glue.c @@ -29,7 +29,6 @@ void upb_strtomsg(upb_string *str, upb_msg *msg, upb_msgdef *md, upb_stringsrc_uninit(&strsrc); upb_decoder_uninit(&d); - upb_handlers_uninit(&h); } void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md, @@ -53,7 +52,6 @@ void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md, upb_stringsink_uninit(&strsink); upb_textprinter_free(p); - upb_handlers_uninit(&h); } void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) { @@ -72,7 +70,6 @@ void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) { upb_decoder_decode(&d, status); - upb_handlers_uninit(&h); upb_stringsrc_uninit(&strsrc); upb_decoder_uninit(&d); } diff --git a/src/upb_msg.c b/src/upb_msg.c index 6fc321e..aac2c91 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -299,6 +299,13 @@ upb_msg *upb_msg_appendmsg(upb_msg *msg, upb_fielddef *f, upb_msgdef *msgdef) { static upb_flow_t upb_dmsgsink_value(void *_m, upb_value fval, upb_value val) { upb_msg *m = _m; upb_fielddef *f = upb_value_getfielddef(fval); + if (upb_isstring(f)) { + //fprintf(stderr, "dmsg_value! this=%p f=%p name=" UPB_STRFMT ", + // " UPB_STRFMT " %p\n", m, f, UPB_STRARG(f->name), UPB_STRARG(val.val.str)); + } else { + //fprintf(stderr, "dmsg_value! this=%p f=%p name=" UPB_STRFMT ", + // %llu\n", m, f, UPB_STRARG(f->name), val.val.uint64); + } upb_msg_appendval(m, f, val); return UPB_CONTINUE; } @@ -306,8 +313,11 @@ static upb_flow_t upb_dmsgsink_value(void *_m, upb_value fval, upb_value val) { static upb_sflow_t upb_dmsgsink_startsubmsg(void *_m, upb_value fval) { upb_msg *m = _m; upb_fielddef *f = upb_value_getfielddef(fval); + //fprintf(stderr, "dmsg_startsubmsg! " UPB_STRFMT " %p\n", UPB_STRARG(fval.val.fielddef->name), f); upb_msgdef *msgdef = upb_downcast_msgdef(f->def); - return UPB_CONTINUE_WITH(upb_msg_appendmsg(m, f, msgdef)); + void *p = upb_msg_appendmsg(m, f, msgdef); + //printf("Continuing with: %p\n", p); + return UPB_CONTINUE_WITH(p); } void upb_msg_regdhandlers(upb_handlers *h) { diff --git a/src/upb_stream.c b/src/upb_stream.c index aebdb42..982c8a3 100644 --- a/src/upb_stream.c +++ b/src/upb_stream.c @@ -11,36 +11,36 @@ /* upb_handlers ***************************************************************/ -static upb_flow_t upb_startmsg_nop(void *closure) { +upb_flow_t upb_startmsg_nop(void *closure) { (void)closure; return UPB_CONTINUE; } -static void upb_endmsg_nop(void *closure, upb_status *status) { +void upb_endmsg_nop(void *closure, upb_status *status) { (void)closure; (void)status; } -static upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val) { +upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val) { (void)closure; (void)fval; (void)val; return UPB_CONTINUE; } -static upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval) { +upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval) { (void)fval; return UPB_CONTINUE_WITH(closure); } -static upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval) { +upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval) { (void)closure; (void)fval; return UPB_CONTINUE; } -static upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, - upb_value val) { +upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, + upb_value val) { (void)closure; (void)fieldnum; (void)val; @@ -52,6 +52,8 @@ static void upb_msgent_init(upb_handlers_msgent *e) { e->startmsg = &upb_startmsg_nop; e->endmsg = &upb_endmsg_nop; e->unknownval = &upb_unknownval_nop; + e->is_group = false; + e->tablearray = NULL; } void upb_handlers_init(upb_handlers *h, upb_msgdef *md) { @@ -61,6 +63,7 @@ void upb_handlers_init(upb_handlers *h, upb_msgdef *md) { h->top = &h->stack[0]; h->limit = &h->stack[UPB_MAX_TYPE_DEPTH]; h->toplevel_msgdef = md; + h->should_jit = true; if (md) upb_msgdef_ref(md); h->top->msgent_index = 0; @@ -70,19 +73,22 @@ void upb_handlers_init(upb_handlers *h, upb_msgdef *md) { } void upb_handlers_uninit(upb_handlers *h) { - for (int i = 0; i < h->msgs_len; i++) upb_inttable_free(&h->msgs[i].fieldtab); + for (int i = 0; i < h->msgs_len; i++) { + upb_inttable_free(&h->msgs[i].fieldtab); + free(h->msgs[i].tablearray); + } free(h->msgs); upb_msgdef_unref(h->toplevel_msgdef); } static upb_handlers_fieldent *upb_handlers_getorcreate_without_fval( - upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type) { + upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type, bool repeated) { uint32_t tag = fieldnum << 3 | upb_types[type].native_wire_type; upb_handlers_fieldent *f = upb_inttable_lookup(&h->msgent->fieldtab, tag); if (!f) { upb_handlers_fieldent new_f = {false, type, -1, UPB_NO_VALUE, - {&upb_value_nop}, &upb_endsubmsg_nop}; + {&upb_value_nop}, &upb_endsubmsg_nop, 0, 0, 0, repeated}; if (upb_issubmsgtype(type)) new_f.cb.startsubmsg = &upb_startsubmsg_nop; upb_inttable_insert(&h->msgent->fieldtab, tag, &new_f); @@ -95,9 +101,9 @@ static upb_handlers_fieldent *upb_handlers_getorcreate_without_fval( static upb_handlers_fieldent *upb_handlers_getorcreate( upb_handlers *h, upb_field_number_t fieldnum, - upb_fieldtype_t type, upb_value fval) { + upb_fieldtype_t type, bool repeated, upb_value fval) { upb_handlers_fieldent *f = - upb_handlers_getorcreate_without_fval(h, fieldnum, type); + upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated); f->fval = fval; return f; } @@ -140,42 +146,40 @@ void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start, } void upb_register_typed_value(upb_handlers *h, upb_field_number_t fieldnum, - upb_fieldtype_t type, upb_value_handler_t value, - upb_value fval) { - upb_handlers_getorcreate(h, fieldnum, type, fval)->cb.value = + upb_fieldtype_t type, bool repeated, + upb_value_handler_t value, upb_value fval) { + upb_handlers_getorcreate(h, fieldnum, type, repeated, fval)->cb.value = value ? value : &upb_value_nop; } void upb_register_value(upb_handlers *h, upb_fielddef *f, upb_value_handler_t value, upb_value fval) { assert(f->msgdef == h->top->msgdef); - upb_register_typed_value(h, f->number, f->type, value, fval); + upb_register_typed_value(h, f->number, f->type, upb_isarray(f), value, fval); } void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum, - upb_fieldtype_t type, + upb_fieldtype_t type, bool repeated, upb_startsubmsg_handler_t start, upb_endsubmsg_handler_t end, upb_value fval) { - upb_handlers_fieldent *f = upb_handlers_getorcreate(h, fieldnum, type, fval); + upb_handlers_fieldent *f = upb_handlers_getorcreate(h, fieldnum, type, repeated, fval); f->cb.startsubmsg = start ? start : &upb_startsubmsg_nop; f->endsubmsg = end ? end : &upb_endsubmsg_nop; } -void upb_handlers_typed_link(upb_handlers *h, - upb_field_number_t fieldnum, - upb_fieldtype_t type, - int frames) { +void upb_handlers_typed_link(upb_handlers *h, upb_field_number_t fieldnum, + upb_fieldtype_t type, bool repeated, int frames) { assert(frames <= (h->top - h->stack)); upb_handlers_fieldent *f = - upb_handlers_getorcreate_without_fval(h, fieldnum, type); + upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated); f->msgent_index = (h->top - frames)->msgent_index; } void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum, - upb_fieldtype_t type) { + upb_fieldtype_t type, bool repeated) { upb_handlers_fieldent *f = - upb_handlers_getorcreate_without_fval(h, fieldnum, type); + upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated); if (h->top == h->limit) abort(); // TODO: make growable. ++h->top; if (f->msgent_index == -1) { @@ -204,8 +208,8 @@ void upb_handlers_push(upb_handlers *h, upb_fielddef *f, bool delegate) { assert(f->msgdef == h->top->msgdef); (void)delegate; // TODO - upb_register_typed_submsg(h, f->number, f->type, start, end, fval); - upb_handlers_typed_push(h, f->number, f->type); + upb_register_typed_submsg(h, f->number, f->type, upb_isarray(f), start, end, fval); + upb_handlers_typed_push(h, f->number, f->type, upb_isarray(f)); } void upb_handlers_typed_pop(upb_handlers *h) { @@ -229,13 +233,14 @@ static upb_handlers_fieldent toplevel_f = { #else {{0}, UPB_VALUETYPE_RAW}, #endif - {NULL}, NULL}; + {NULL}, NULL, 0, 0, 0, false}; void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h) { d->handlers = h; for (int i = 0; i < h->msgs_len; i++) upb_inttable_compact(&h->msgs[i].fieldtab); d->stack[0].f = &toplevel_f; + d->limit = &d->stack[UPB_MAX_NESTING]; upb_status_init(&d->status); } @@ -249,10 +254,10 @@ void upb_dispatcher_reset(upb_dispatcher *d, void *top_closure, uint32_t top_end d->top = d->stack; d->top->closure = top_closure; d->top->end_offset = top_end_offset; - d->limit = &d->stack[UPB_MAX_NESTING]; } void upb_dispatcher_uninit(upb_dispatcher *d) { + upb_handlers_uninit(d->handlers); upb_status_uninit(&d->status); } diff --git a/src/upb_stream.h b/src/upb_stream.h index 0c75acd..7ae9b8d 100644 --- a/src/upb_stream.h +++ b/src/upb_stream.h @@ -81,6 +81,14 @@ typedef upb_flow_t (*upb_endsubmsg_handler_t)(void *closure, upb_value fval); typedef upb_flow_t (*upb_unknownval_handler_t)( void *closure, upb_field_number_t fieldnum, upb_value val); +upb_flow_t upb_startmsg_nop(void *closure); +void upb_endmsg_nop(void *closure, upb_status *status); +upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val); +upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval); +upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval); +upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, + upb_value val); + typedef struct { bool junk; upb_fieldtype_t type; @@ -93,14 +101,27 @@ typedef struct { upb_startsubmsg_handler_t startsubmsg; } cb; upb_endsubmsg_handler_t endsubmsg; + uint32_t jit_pclabel; + uint32_t jit_pclabel_notypecheck; + uint32_t jit_submsg_done_pclabel; + bool repeated; } upb_handlers_fieldent; -typedef struct { +typedef struct _upb_handlers_msgent { upb_startmsg_handler_t startmsg; upb_endmsg_handler_t endmsg; upb_unknownval_handler_t unknownval; // Maps field number -> upb_handlers_fieldent. upb_inttable fieldtab; + uint32_t jit_startmsg_pclabel; + uint32_t jit_endofbuf_pclabel; + uint32_t jit_endofmsg_pclabel; + uint32_t jit_unknownfield_pclabel; + uint32_t groupnum; + bool is_group; + int32_t jit_parent_field_done_pclabel; + uint32_t max_field_number; + void **tablearray; } upb_handlers_msgent; typedef struct { @@ -115,6 +136,7 @@ struct _upb_handlers { upb_msgdef *toplevel_msgdef; // We own a ref. upb_handlers_msgent *msgent; upb_handlers_frame stack[UPB_MAX_TYPE_DEPTH], *top, *limit; + bool should_jit; }; typedef struct _upb_handlers upb_handlers; @@ -237,19 +259,17 @@ void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start, // Low-level functions -- internal-only. void upb_register_typed_value(upb_handlers *h, upb_field_number_t fieldnum, - upb_fieldtype_t type, upb_value_handler_t value, - upb_value fval); + upb_fieldtype_t type, bool repeated, + upb_value_handler_t value, upb_value fval); void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum, - upb_fieldtype_t type, + upb_fieldtype_t type, bool repeated, upb_startsubmsg_handler_t start, upb_endsubmsg_handler_t end, upb_value fval); -void upb_handlers_typed_link(upb_handlers *h, - upb_field_number_t fieldnum, - upb_fieldtype_t type, - int frames); +void upb_handlers_typed_link(upb_handlers *h, upb_field_number_t fieldnum, + upb_fieldtype_t type, bool repeated, int frames); void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum, - upb_fieldtype_t type); + upb_fieldtype_t type, bool repeated); void upb_handlers_typed_pop(upb_handlers *h); INLINE upb_handlers_msgent *upb_handlers_getmsgent(upb_handlers *h, @@ -308,8 +328,8 @@ typedef struct { int delegated_depth; // Stack. - upb_dispatcher_frame stack[UPB_MAX_NESTING]; upb_status status; + upb_dispatcher_frame stack[UPB_MAX_NESTING]; } upb_dispatcher; INLINE bool upb_dispatcher_skipping(upb_dispatcher *d) { diff --git a/src/upb_string.c b/src/upb_string.c index de633bc..8625f76 100644 --- a/src/upb_string.c +++ b/src/upb_string.c @@ -72,7 +72,6 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { void upb_string_substr(upb_string *str, upb_string *target_str, upb_strlen_t start, upb_strlen_t len) { - if(str->ptr) *(char*)0 = 0; assert(str->ptr == NULL); assert(start + len <= upb_string_len(target_str)); if (target_str->src) { diff --git a/src/upb_string.h b/src/upb_string.h index 88a513f..5aa5f3b 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -155,9 +155,13 @@ INLINE const char *upb_string_getbufend(upb_string *str) { } // Attempts to recycle the string "str" so it may be reused and have different -// data written to it. After the function returns, "str" points to a writable -// string, which is either the original string if it had no other references -// or a newly created string if it did have other references. +// data written to it. The caller MUST own a reference on the given string +// prior to making this call (ie. the caller must have either created the +// string or obtained a reference with upb_string_getref()). +// +// After the function returns, "str" points to a writable string, which is +// either the original string if it had no other references or a newly created +// string if it did have other references. // // As a special case, passing a pointer to NULL will allocate a new string. // This is convenient for the pattern: @@ -171,7 +175,9 @@ INLINE const char *upb_string_getbufend(upb_string *str) { // } INLINE void upb_string_recycle(upb_string **_str) { upb_string *str = *_str; - if(str && upb_atomic_only(&str->refcount)) { + int r; + if(str && ((r = upb_atomic_read(&str->refcount)) == 1 || + (r == _UPB_STRING_REFCOUNT_STACK))) { str->ptr = NULL; str->len = 0; _upb_string_release(str); diff --git a/src/upb_table.c b/src/upb_table.c index b9b9824..a754097 100644 --- a/src/upb_table.c +++ b/src/upb_table.c @@ -102,6 +102,7 @@ static void intinsert(upb_inttable *t, upb_inttable_key_t key, void *val) { upb_inttable_value *table_val; if (_upb_inttable_isarrkey(t, key)) { table_val = UPB_INDEX(t->array, key, upb_table_valuesize(&t->t)); + t->array_count++; //printf("Inserting key %d to Array part! %p\n", key, table_val); } else { t->t.count++; @@ -152,8 +153,8 @@ static void intinsert(upb_inttable *t, upb_inttable_key_t key, void *val) { static void upb_inttable_insertall(upb_inttable *dst, upb_inttable *src) { for(upb_inttable_iter i = upb_inttable_begin(src); !upb_inttable_done(i); i = upb_inttable_next(src, i)) { - //printf("load check: %d %d\n", upb_inttable_count(dst), upb_inttable_hashtablesize(dst)); - assert((double)(upb_inttable_count(dst)) / + //printf("load check: %d %d\n", upb_table_count(&dst->t), upb_inttable_hashtablesize(dst)); + assert((double)(upb_table_count(&dst->t)) / upb_inttable_hashtablesize(dst) <= MAX_LOAD); intinsert(dst, upb_inttable_iter_key(i), upb_inttable_iter_value(i)); } @@ -209,6 +210,7 @@ void upb_inttable_compact(upb_inttable *t) { } upb_inttable new_table; int hash_size = (upb_inttable_count(t) - array_count + 1) / MAX_LOAD; + //printf("array_count: %d, array_size: %d, hash_size: %d, table size: %d\n", array_count, array_size, hash_size, upb_inttable_count(t)); upb_inttable_sizedinit(&new_table, array_size, hash_size, upb_table_valuesize(&t->t)); //printf("For %d things, using array size=%d, hash_size = %d\n", upb_inttable_count(t), array_size, hash_size); diff --git a/src/upb_varint_decoder.h b/src/upb_varint_decoder.h index 7297f43..d7af90a 100644 --- a/src/upb_varint_decoder.h +++ b/src/upb_varint_decoder.h @@ -30,7 +30,7 @@ typedef struct { // A basic branch-based decoder, uses 32-bit values to get good performance // on 32-bit architectures (but performs well on 64-bits also). -INLINE upb_decoderet upb_decode_varint_branch32(const char *p) { +INLINE upb_decoderet upb_vdecode_branch32(const char *p) { upb_decoderet r = {NULL, 0}; uint32_t low, high = 0; uint32_t b; @@ -54,7 +54,7 @@ done: } // Like the previous, but uses 64-bit values. -INLINE upb_decoderet upb_decode_varint_branch64(const char *p) { +INLINE upb_decoderet upb_vdecode_branch64(const char *p) { uint64_t val; uint64_t b; upb_decoderet r = {(void*)0, 0}; @@ -76,17 +76,9 @@ done: return r; } -// Avoids branches for values >2-bytes. -INLINE upb_decoderet upb_decode_varint_nobranch1(const char *p) { - uint64_t b = 0; - upb_decoderet r = {p, 0}; - memcpy(&b, r.p, 2); - if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; } - r.val = (b & 0x7f) | ((b & 0x7f00) >> 1); - r.p = p + 2; - if ((b & 0x8000) == 0) return r; - - // >2-byte varint. +// Decodes a varint of at most 8 bytes without branching (except for error). +INLINE upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) { + uint64_t b; memcpy(&b, r.p, sizeof(b)); uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL; uint64_t stop_bit = ~cbits & (cbits+1); @@ -94,27 +86,19 @@ INLINE upb_decoderet upb_decode_varint_nobranch1(const char *p) { b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f); b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff); b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff); - r.val |= b << 14; - r.p += (__builtin_ctzll(stop_bit) + 1) / 8; if (stop_bit == 0) { // Error: unterminated varint. upb_decoderet err_r = {(void*)0, 0}; return err_r; } - return r; + upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8), + r.val | (b << 14)}; + return my_r; } -// Avoids branches for values >2-bytes. -INLINE upb_decoderet upb_decode_varint_nobranch2(const char *p) { - uint64_t b = 0; - upb_decoderet r = {p, 0}; - memcpy(&b, r.p, 2); - if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; } - r.val = (b & 0x7f) | ((b & 0x7f00) >> 1); - r.p = p + 2; - if ((b & 0x8000) == 0) return r; - - // >2-byte varint. +// Another implementation of the previous. +INLINE upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) { + uint64_t b; memcpy(&b, r.p, sizeof(b)); uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL; uint64_t stop_bit = ~cbits & (cbits + 1); @@ -122,22 +106,46 @@ INLINE upb_decoderet upb_decode_varint_nobranch2(const char *p) { b += b & 0x007f007f007f007fULL; b += 3 * (b & 0x0000ffff0000ffffULL); b += 15 * (b & 0x00000000ffffffffULL); - r.val |= b << 7; - r.p += (__builtin_ctzll(stop_bit) + 1) / 8; if (stop_bit == 0) { // Error: unterminated varint. upb_decoderet err_r = {(void*)0, 0}; return err_r; } - return r; + upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8), + r.val | (b << 7)}; + return my_r; } -INLINE upb_decoderet upb_decode_varint_fast(const char *p) { +// Template for a function that checks the first two bytes with branching +// and dispatches 2-10 bytes with a separate function. +#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \ +INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *p) { \ + uint64_t b = 0; \ + upb_decoderet r = {p, 0}; \ + memcpy(&b, r.p, 2); \ + if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; } \ + r.val = (b & 0x7f) | ((b & 0x7f00) >> 1); \ + r.p = p + 2; \ + if ((b & 0x8000) == 0) return r; \ + return decode_max8_function(r); \ +} + +UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright); +UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino); +#undef UPB_VARINT_DECODER_CHECK2 + +// Our canonical functions for decoding varints, based on the currently +// favored best-performing implementations. +INLINE upb_decoderet upb_vdecode_fast(const char *p) { // Use nobranch2 on 64-bit, branch32 on 32-bit. if (sizeof(long) == 8) - return upb_decode_varint_nobranch2(p); + return upb_vdecode_check2_massimino(p); else - return upb_decode_varint_branch32(p); + return upb_vdecode_branch32(p); +} + +INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) { + return upb_vdecode_max8_massimino(r); } #ifdef __cplusplus -- cgit v1.2.3