summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2011-04-01 15:40:06 -0700
committerJoshua Haberman <joshua@reverberate.org>2011-04-01 15:40:06 -0700
commit9eb4d695c49a85f7f72ad68c3c31affd61fef984 (patch)
tree79b7fde57e6f31a19405688a5f9e29e3f9cf7ab2 /src
parent19517cc6f39871abf4a0705b49cfed9049ca6033 (diff)
First rough version of the JIT.
It can successfully parse SpeedMessage1. Preliminary results: 750MB/s on Core2 2.4GHz. This number is 2.5x proto2. This isn't apples-to-apples, because proto2 is parsing to a struct and we are just doing stream parsing, but for apps that are currently using proto2, this is the improvement they would see if they could move to stream-based processing. Unfortunately perf-regression-test.py is broken, and I'm not 100% sure why. It would be nice to fix it first (to ensure that there are no performance regressions for the table-based decoder) but I'm really impatient to get the JIT checked in.
Diffstat (limited to 'src')
-rw-r--r--src/upb.h1
-rw-r--r--src/upb_decoder.c95
-rw-r--r--src/upb_decoder.h19
-rw-r--r--src/upb_decoder_x64.asm228
-rw-r--r--src/upb_decoder_x86.dasc649
-rw-r--r--src/upb_def.c37
-rw-r--r--src/upb_glue.c3
-rw-r--r--src/upb_msg.c12
-rw-r--r--src/upb_stream.c63
-rw-r--r--src/upb_stream.h40
-rw-r--r--src/upb_string.c1
-rw-r--r--src/upb_string.h14
-rw-r--r--src/upb_table.c6
-rw-r--r--src/upb_varint_decoder.h74
14 files changed, 866 insertions, 376 deletions
diff --git a/src/upb.h b/src/upb.h
index 5dfd65e..0dfcd5e 100644
--- a/src/upb.h
+++ b/src/upb.h
@@ -165,6 +165,7 @@ typedef uint8_t upb_valuetype_t;
#define UPB_VALUETYPE_BYTESRC 32
#define UPB_VALUETYPE_RAW 33
#define UPB_VALUETYPE_FIELDDEF 34
+#define UPB_TYPE_ENDGROUP 35
// A single .proto value. The owner must have an out-of-band way of knowing
// the type, so that it knows which union member to use.
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index 1b9b5f8..4b71ccd 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -11,16 +11,13 @@
#include "upb_decoder.h"
#include "upb_varint_decoder.h"
-// If the return value is other than UPB_CONTINUE, that is what the last
-// callback returned.
-typedef struct {
- upb_flow_t flow;
- const char *ptr;
-} fastdecode_ret;
-extern fastdecode_ret upb_fastdecode(const char *p, const char *end,
- upb_value_handler_t value_cb, void *closure,
- void *table, int table_size);
-
+#ifdef UPB_USE_JIT_X64
+#define Dst_DECL upb_decoder *d
+#define Dst_REF (d->dynasm)
+#define Dst (d)
+#include "dynasm/dasm_proto.h"
+#include "upb_decoder_x86.h"
+#endif
/* Decoding/Buffering of individual values ************************************/
@@ -28,10 +25,6 @@ extern fastdecode_ret upb_fastdecode(const char *p, const char *end,
INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
-// Constant used to signal that the submessage is a group and therefore we
-// don't know its end offset. This cannot be the offset of a real submessage
-// end because it takes at least one byte to begin a submessage.
-#define UPB_GROUP_END_OFFSET 0
#define UPB_MAX_VARINT_ENCODED_SIZE 10
INLINE void upb_decoder_advance(upb_decoder *d, size_t len) {
@@ -54,6 +47,32 @@ INLINE void upb_dstate_setmsgend(upb_decoder *d) {
(void*)UINTPTR_MAX : d->buf + end_offset;
}
+// Pulls the next buffer from the bytesrc. Should be called only when the
+// current buffer is completely empty.
+static bool upb_pullbuf(upb_decoder *d) {
+ assert(upb_decoder_bufleft(d) == 0);
+ int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1;
+ upb_string_recycle(&d->bufstr);
+ if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) {
+ d->buf = NULL;
+ d->end = NULL;
+ return false;
+ }
+ if (last_buf_len != -1) {
+ d->buf_stream_offset += last_buf_len;
+ for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f)
+ if (f->end_offset != UINT32_MAX)
+ f->end_offset -= last_buf_len;
+ }
+ d->buf = upb_string_getrobuf(d->bufstr);
+ d->ptr = upb_string_getrobuf(d->bufstr);
+ d->end = d->buf + upb_string_len(d->bufstr);
+ d->jit_end = d->end; //d->end - 12;
+ upb_string_substr(d->tmp, d->bufstr, 0, 0);
+ upb_dstate_setmsgend(d);
+ return true;
+}
+
// Called only from the slow path, this function copies the next "len" bytes
// from the stream to "data", adjusting the dstate appropriately.
static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted) {
@@ -62,27 +81,8 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted) {
memcpy(data, d->ptr, to_copy);
upb_decoder_advance(d, to_copy);
bytes_wanted -= to_copy;
- if (bytes_wanted == 0) {
- upb_dstate_setmsgend(d);
- return true;
- }
-
- // Get next buffer.
- int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1;
- upb_string_recycle(&d->bufstr);
- if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) {
- d->buf = NULL;
- return false;
- }
- if (last_buf_len != -1) {
- d->buf_stream_offset += last_buf_len;
- for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f)
- if (f->end_offset != UINT32_MAX)
- f->end_offset -= last_buf_len;
- }
- d->buf = upb_string_getrobuf(d->bufstr);
- d->ptr = upb_string_getrobuf(d->bufstr);
- d->end = d->buf + upb_string_len(d->bufstr);
+ if (bytes_wanted == 0) return true;
+ if (!upb_pullbuf(d)) return false;
}
}
@@ -143,7 +143,7 @@ done:
INLINE bool upb_decode_varint(upb_decoder *d, upb_value *val) {
if (upb_decoder_bufleft(d) >= 16) {
// Common (fast) case.
- upb_decoderet r = upb_decode_varint_fast(d->ptr);
+ upb_decoderet r = upb_vdecode_fast(d->ptr);
if (r.p == NULL) {
upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
return false;
@@ -229,6 +229,7 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
}
#define CHECK(expr) if (!expr) { assert(!upb_ok(status)); goto err; }
+ CHECK(upb_pullbuf(d));
if (upb_dispatch_startmsg(&d->dispatcher) != UPB_CONTINUE) goto err;
// Main loop: executed once per tag/field pair.
@@ -244,14 +245,13 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
// Decodes as many fields as possible, updating d->ptr appropriately,
// before falling through to the slow(er) path.
-#ifdef USE_X64_FASTPATH
- const char *end = UPB_MIN(d->end, d->submsg_end);
- fastdecode_ret ret = upb_fastdecode(d->ptr, end,
- d->dispatcher.top->handlers.set->value,
- d->dispatcher.top->handlers.closure,
- d->msgdef->itof.array,
- d->msgdef->itof.array_size);
- CHECK_FLOW(ret.flow);
+#ifdef UPB_USE_JIT_X64
+ void (*upb_jit_decode)(upb_decoder *d) = (void*)d->jit_code;
+ if (d->dispatcher.handlers->should_jit && d->buf) {
+ //fprintf(stderr, "Entering JIT, ptr: %p\n", d->ptr);
+ upb_jit_decode(d);
+ //fprintf(stderr, "Exiting JIT, ptr: %p\n", d->ptr);
+ }
#endif
// Parse/handle tag.
@@ -354,9 +354,13 @@ err:
void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
upb_dispatcher_init(&d->dispatcher, handlers);
+#ifdef UPB_USE_JIT_X64
+ upb_decoder_makejit(d);
+#endif
d->bufstr = NULL;
d->buf = NULL;
d->tmp = NULL;
+ upb_string_recycle(&d->tmp);
}
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) {
@@ -373,4 +377,7 @@ void upb_decoder_uninit(upb_decoder *d) {
upb_dispatcher_uninit(&d->dispatcher);
upb_string_unref(d->bufstr);
upb_string_unref(d->tmp);
+#ifdef UPB_USE_JIT_X64
+ upb_decoder_freejit(d);
+#endif
}
diff --git a/src/upb_decoder.h b/src/upb_decoder.h
index bb54930..1be31c4 100644
--- a/src/upb_decoder.h
+++ b/src/upb_decoder.h
@@ -27,13 +27,12 @@ extern "C" {
/* upb_decoder *****************************************************************/
+struct dasm_State;
+
struct _upb_decoder {
// Bytesrc from which we pull serialized data.
upb_bytesrc *bytesrc;
- // Dispatcher to which we push parsed data.
- upb_dispatcher dispatcher;
-
// String to hold our input buffer; is only active if d->buf != NULL.
upb_string *bufstr;
@@ -48,6 +47,7 @@ struct _upb_decoder {
// End of this buffer, relative to *ptr.
const char *end;
+ const char *jit_end;
// Members which may also be written by the JIT:
@@ -57,8 +57,21 @@ struct _upb_decoder {
// End of this submessage, relative to *ptr.
const char *submsg_end;
+ // MIN(end, submsg_end)
+ const char *effective_end;
+
// Where we will store any errors that occur.
upb_status *status;
+
+ // Dispatcher to which we push parsed data.
+ upb_dispatcher dispatcher;
+
+ // JIT-generated machine code (else NULL).
+ char *jit_code;
+ size_t jit_size;
+ char *debug_info;
+
+ struct dasm_State *dynasm;
};
// A upb_decoder decodes the binary protocol buffer format, writing the data it
diff --git a/src/upb_decoder_x64.asm b/src/upb_decoder_x64.asm
deleted file mode 100644
index c417644..0000000
--- a/src/upb_decoder_x64.asm
+++ /dev/null
@@ -1,228 +0,0 @@
-DEFAULT REL ; Default to RIP-relative addressing instead of absolute.
-
-extern _upb_decode_varint_fast64
-
-SECTION .data
-
-; Our dispatch table; used to jump to the right handler, keyed on the field's
-; type.
-dispatch_table:
- dq _upb_fastdecode.cant_fast_path ; field not in table (type == 0). (check_4).
- dq _upb_fastdecode.fixed64 ; double
- dq _upb_fastdecode.fixed32 ; float
- dq _upb_fastdecode.varint ; int64
- dq _upb_fastdecode.varint ; uint64
- dq _upb_fastdecode.varint ; int32
- dq _upb_fastdecode.fixed64 ; fixed64
- dq _upb_fastdecode.fixed32 ; fixed32
- dq _upb_fastdecode.varint ; bool
- dq _upb_fastdecode.string ; string
- dq _upb_fastdecode.cant_fast_path ; group (check_6)
- dq _upb_fastdecode.cant_fast_path ; message
- dq _upb_fastdecode.string ; bytes
- dq _upb_fastdecode.varint ; uint32
- dq _upb_fastdecode.varint ; enum
- dq _upb_fastdecode.fixed32 ; sfixed32
- dq _upb_fastdecode.fixed64 ; sfixed64
- dq _upb_fastdecode.varint_sint32 ; sint32
- dq _upb_fastdecode.varint_sint64 ; sint64
-
- GLOBAL _upb_decode_fast
-
-SECTION .text
-; Register allocation.
-%define BUF rbx ; const char *p, current buf position.
-%define END rbp ; const char *end, where the buf ends (either submsg end or buf end)
-%define STRING r12 ; unused
-%define FVAL r13 ; upb_value fval, needs to be preserved across varint decoding call.
-%define UNUSED r14
-%define CLOSURE r15
-
-; Stack layout: *tableptr, uint32_t maxfield_times_8
-%define STACK_SPACE 24 ; this value + 8 must be a multiple of 16.
-%define TABLE_SPILL [rsp] ; our lookup table, indexed by field number.
-%define COMMITTED_BUF_SPILL [rsp+8]
-%define MAXFIELD_TIMES_8_SPILL [rsp+16]
-
-
-; Executing the fast path requires the following conditions:
-; - check_1: there are >=12 bytes left (<=2 byte tag and <=10 byte varint).
-; - check_2: the tag is <= 2 bytes.
-; - check_3: the field number is <= the table size
-; (ie. it must be an array lookup, not a hash lookup).
-; - check_4: the field is known (found in the table).
-; - check_5: the wire type we read is correct for the field number,
-; ("packed" fields are not accepted, yet. this could be handled
-; efficiently by doing an extra check on the "type check failed"
-; path that goes into a tight loop if the encoding was packed).
-; - check_6: the field is not a group or a message (or string, TODO)
-; (this could be relaxed, but due to delegation it's a bit tricky).
-; - check_7: if the value is a string, the entire string is available in
-; the buffer, and our cached string object can be recycled, and
-; our string object already references the source buffer, so
-; absolutely no refcount twiddling is required.
-
-
-%macro decode_and_dispatch_ 0
-align 16
-.decode_and_dispatch:
- ; Load a few values we'll need in a sec.
- mov r8, TABLE_SPILL
- mov r9d, MAXFIELD_TIMES_8_SPILL
-
- mov rax, END
- sub rax, BUF
- cmp rax, 12
- jb _upb_fastdecode.cant_fast_path ; check_1 (<12 bytes left).
-
- ; Decode a 1 or 2-byte varint -> eax.
- mov cl, byte [BUF]
- lea rdi, [BUF+1]
- movzx eax, cl
- and eax, 0x7f
- test cl, cl
- jns .one_byte_tag ; Should be predictable if fields are in order.
- movzx ecx, byte [BUF+1]
- lea rdi, [BUF+2]
- mov edx, ecx
- and edx, 0x7f
- shl edx, 7
- or eax, edx
- test al, al
- js _upb_fastdecode.cant_fast_path ; check_2 (tag was >2 bytes).
-.one_byte_tag:
- mov BUF, rdi
-
- ; Decode tag and dispatch.
- mov ecx, eax
- and eax, 0x3ff8 ; eax now contains field number * 8
- lea r11, [r8+rax*2] ; *2 is really *16, since rax is already *8.
- and ecx, 0x7 ; ecx now contains wire type.
- cmp eax, r9d
- jae _upb_fastdecode.cant_fast_path ; check_3 (field number > table size)
- mov FIELDDEF, [r11+8] ; Lookup fielddef (upb_itof_ent.f)
- movzx rdx, BYTE [r11+1] ; Lookup field type.
- mov rax, qword dispatch_table
- jmp [rax+rdx*8]
-%endmacro
-
-%macro decode_and_dispatch 0
- jmp .decode_and_dispatch
-%endmacro
-
-%macro call_callback 0
- ; Value arg must already be in rdx when macro is called.
- mov rdi, CLOSURE
- mov rsi, FIELDDEF
- mov rcx, 33 ; RAW; we could pass the correct type, or only do this in non-debug modes.
- call CALLBACK
- mov COMMITTED_BUF_SPILL, BUF
- cmp eax, 0
- jne .done ; Caller requested BREAK or SKIPSUBMSG.
-%endmacro
-
-%macro check_type 1
- cmp ecx, %1
- jne _upb_fastdecode.cant_fast_path ; check_5 (wire type check failed).
-%endmacro
-
-; extern upb_flow_t upb_fastdecode(const char **p, const char *end,
-; upb_value_handler_t value_cb, void *closure,
-; void *table, int table_size);
-align 16
-global _upb_fastdecode
-_upb_fastdecode:
- ; We use all callee-save regs.
- push rbx
- push rbp
- push r12
- push r13
- push r14
- push r15
- sub rsp, STACK_SPACE
-
- ; Parse arguments into reg vals and stack.
- mov BUF, rdi
- mov COMMITTED_BUF_SPILL, rdi
- mov END, rsi
- mov CALLBACK, rdx
- mov CLOSURE, rcx
- mov TABLE_SPILL, r8
- shl r9, 3
- mov MAXFIELD_TIMES_8_SPILL, r9
-
- decode_and_dispatch
-
-align 16
-.varint:
- call _upb_decode_varint_fast64 ; BUF is already in rdi.
- test rax, rax
- jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error.
- mov BUF, rax
- call_callback ; rdx already holds value.
- decode_and_dispatch_
-
-align 16
-.fixed32:
- mov edx, DWORD [BUF] ; Might be unaligned, but that's ok.
- add BUF, 4
- call_callback
- decode_and_dispatch
-
-align 16
-.fixed64:
- mov rdx, QWORD [BUF] ; Might be unaligned, but that's ok.
- add BUF, 8
- call_callback
- decode_and_dispatch
-
-align 16
-.varint_sint32:
- call _upb_decode_varint_fast64 ; BUF is already in rdi.
- test rax, rax
- jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error.
- mov BUF, rax
-
- ; Perform 32-bit zig-zag decoding.
- mov ecx, edx
- shr edx, 1
- and ecx, 0x1
- neg ecx
- xor edx, ecx
- call_callback
- decode_and_dispatch
-
-align 16
-.varint_sint64:
- call _upb_decode_varint_fast64 ; BUF is already in rdi.
- test rax, rax
- jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error.
- mov BUF, rax
-
- ; Perform 64-bit zig-zag decoding.
- mov rcx, rdx
- shr rdx, 1
- and ecx, 0x1
- neg rcx
- xor rdx, rcx
- call_callback
- decode_and_dispatch
-
-align 16
-.string:
-
-.cant_fast_path:
- mov rax, 0 ; UPB_CONTINUE -- continue as before.
-.done:
- ; If coming via done, preserve the user callback's return in rax.
-
- ; Return committed buf pointer as second parameter.
- mov rdx, COMMITTED_BUF_SPILL
- add rsp, STACK_SPACE
- pop r15
- pop r14
- pop r13
- pop r12
- pop rbp
- pop rbx
- ret
diff --git a/src/upb_decoder_x86.dasc b/src/upb_decoder_x86.dasc
new file mode 100644
index 0000000..71df08f
--- /dev/null
+++ b/src/upb_decoder_x86.dasc
@@ -0,0 +1,649 @@
+|//
+|// upb - a minimalist implementation of protocol buffers.
+|//
+|// Copyright (c) 2011 Google Inc. See LICENSE for details.
+|// Author: Josh Haberman <jhaberman@gmail.com>
+|//
+|// JIT compiler for upb_decoder on x86. Given a upb_handlers object,
+|// generates code specialized to parsing the specific message and
+|// calling specific handlers.
+
+#define UPB_NONE -1
+#define UPB_MULTIPLE -2
+#define UPB_TOPLEVEL_ONE -3
+
+#include <sys/mman.h>
+#include "dynasm/dasm_proto.h"
+#include "dynasm/dasm_x86.h"
+
+// To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code
+// at runtime. GDB 7.x+ has defined an interface for doing this, and these
+// structure/function defintions are copied out of gdb/jit.h
+//
+// We need to give GDB an ELF file at runtime describing the symbols we have
+// generated. To avoid implementing the ELF format, we generate an ELF file
+// at compile-time and compile it in as a character string. We can replace
+// a few key constants (address of JIT-ted function and its size) by looking
+// for a few magic numbers and doing a dumb string replacement.
+#include "jit_debug_elf_file.h"
+
+typedef enum
+{
+ GDB_JIT_NOACTION = 0,
+ GDB_JIT_REGISTER,
+ GDB_JIT_UNREGISTER
+} jit_actions_t;
+
+typedef struct gdb_jit_entry {
+ struct gdb_jit_entry *next_entry;
+ struct gdb_jit_entry *prev_entry;
+ const char *symfile_addr;
+ uint64_t symfile_size;
+} gdb_jit_entry;
+
+typedef struct {
+ uint32_t version;
+ uint32_t action_flag;
+ gdb_jit_entry *relevant_entry;
+ gdb_jit_entry *first_entry;
+} gdb_jit_descriptor;
+
+gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL};
+
+void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); }
+
+|.arch x64
+|.actionlist upb_jit_actionlist
+|.globals UPB_JIT_GLOBAL_
+|.globalnames upb_jit_globalnames
+|
+|// Calling conventions.
+|.define ARG1_64, rdi
+|.define ARG2_8, sil
+|.define ARG2_32, esi
+|.define ARG2_64, rsi
+|.define ARG3_8, dl
+|.define ARG3_32, edx
+|.define ARG3_64, rdx
+|
+|// Register allocation / type map.
+|// ALL of the code in this file uses these register allocations.
+|// When we "call" within this file, we do not use regular calling
+|// conventions, but of course when calling to user callbacks we must.
+|.define PTR, rbx
+|.define CLOSURE, r12
+|.type FRAME, upb_dispatcher_frame, r13
+|.type STRING, upb_string, r14
+|.type DECODER, upb_decoder, r15
+|
+|.macro callp, addr
+|| if ((uintptr_t)addr < 0xffffffff) {
+ | call &addr
+|| } else {
+ | mov64 rax, (uintptr_t)addr
+ | call rax
+|| }
+|.endmacro
+|
+|// Checks PTR for end-of-buffer.
+|.macro check_eob, m
+| cmp PTR, DECODER->effective_end
+|| if (m->is_group) {
+ | jae ->exit_jit
+|| } else {
+ | jae =>m->jit_endofbuf_pclabel
+|| }
+|.endmacro
+|
+|// Decodes varint from [PTR + offset] -> ARG3.
+|// Saves new pointer as rax.
+|.macro decode_loaded_varint, offset
+| // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder.
+| lea rax, [PTR + offset + 1]
+| mov ARG3_32, ecx
+| and ARG3_32, 0x7f
+| test cl, cl
+| jns >9
+| lea rax, [PTR + offset + 2]
+| movzx esi, ch
+| and esi, 0x7f
+| shl esi, 7
+| or ARG3_32, esi
+| test cx, cx
+| jns >9
+| mov ARG1_64, rax
+| mov ARG2_32, ARG3_32
+| callp upb_vdecode_max8_fast
+| test rax, rax
+| jz ->exit_jit // >10-byte varint.
+|9:
+|.endmacro
+|
+|.macro decode_varint, offset
+| mov ecx, dword [PTR + offset]
+| decode_loaded_varint offset
+| mov PTR, rax
+|.endmacro
+|
+|// Decode the tag -> edx.
+|// Could specialize this by avoiding the value masking: could just key the
+|// table on the raw (length-masked) varint to save 3-4 cycles of latency.
+|// Currently only support tables where all entries are in the array part.
+|.macro dyndispatch, m
+| decode_loaded_varint, 0
+| mov ecx, edx
+| shr ecx, 3
+| and edx, 0x7
+| cmp ecx, m->max_field_number // Bounds-check the field.
+| ja ->exit_jit // In the future; could be unknown label
+| mov rcx, qword [rcx*8 + m->tablearray] // TODO: support hybrid array/hash tables.
+| jmp rcx // Dispatch: unpredictable jump.
+|.endmacro
+|
+|.macro setmsgend, m
+| mov rsi, DECODER->jit_end
+|| if (m->is_group) {
+| mov64 rax, 0xffffffffffffffff
+| mov qword DECODER->submsg_end, rax
+| mov DECODER->effective_end, rsi
+|| } else {
+| // Could store a correctly-biased version in the frame, at the cost of
+| // a larger stack.
+| mov eax, dword FRAME->end_offset
+| add rax, qword DECODER->buf
+| mov DECODER->submsg_end, rax // submsg_end = d->buf + f->end_offset
+| cmp rax, rsi
+| jb >1
+| mov rax, rsi // effective_end = min(d->submsg_end, d->jit_end)
+|1:
+| mov DECODER->effective_end, rax
+|| }
+|.endmacro
+|
+|// rax contains the tag, compare it against "tag", but since it is a varint
+|// we must only compare as many bytes as actually have data.
+|.macro checktag, tag
+|| switch (upb_value_size(tag)) {
+|| case 1:
+| cmp cl, tag
+|| break;
+|| case 2:
+| cmp cx, tag
+|| break;
+|| case 3:
+| and ecx, 0xffffff // 3 bytes
+| cmp rcx, tag
+|| case 4:
+| cmp ecx, tag
+|| break;
+|| case 5:
+| mov64 rdx, 0xffffffffff // 5 bytes
+| and rcx, rdx
+| cmp rcx, tag
+|| break;
+|| default: abort();
+|| }
+|.endmacro
+|
+|// TODO: optimize for 0 (xor) and 32-bits.
+|.macro loadfval, f
+|| if (f->fval.val.uint64 == 0) {
+| xor ARG2_32, ARG2_32
+|| } else {
+| mov ARG2_64, f->fval.val.uint64
+|| }
+|.endmacro
+
+#include <stdlib.h>
+#include "upb_varint_decoder.h"
+
+static size_t upb_value_size(uint64_t val) {
+#ifdef __GNUC__
+ int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0.
+#else
+ int high_bit = 0;
+ uint64_t tmp = val;
+ while(tmp >>= 1) high_bit++;
+#endif
+ return val == 0 ? 1 : high_bit / 8 + 1;
+}
+
+static uint64_t upb_encode_varint(uint64_t val)
+{
+ uint64_t ret = 0;
+ for (int bitpos = 0; val; bitpos+=8, val >>=7) {
+ if (bitpos > 0) ret |= (1 << (bitpos-1));
+ ret |= (val & 0x7f) << bitpos;
+ }
+ return ret;
+}
+
+// PTR should point to the beginning of the tag.
+static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag,
+ upb_handlers_msgent *m,
+ upb_handlers_fieldent *f, upb_handlers_fieldent *next_f) {
+ int tag_size = upb_value_size(tag);
+
+ // PC-label for the dispatch table.
+ // We check the wire type (which must be loaded in edx) because the
+ // table is keyed on field number, not type.
+ |=>f->jit_pclabel:
+ | cmp edx, upb_types[f->type].native_wire_type
+ | jne ->exit_jit // In the future: could be an unknown field.
+ |=>f->jit_pclabel_notypecheck:
+ |1: // Label for repeating this field.
+
+ // Decode the value into arg 3 for the callback.
+ switch (f->type) {
+ case UPB_TYPE(DOUBLE):
+ case UPB_TYPE(FIXED64):
+ case UPB_TYPE(SFIXED64):
+ | mov ARG3_64, qword [PTR + tag_size]
+ | add PTR, 8 + tag_size
+ break;
+
+ case UPB_TYPE(FLOAT):
+ case UPB_TYPE(FIXED32):
+ case UPB_TYPE(SFIXED32):
+ | mov ARG3_32, dword [PTR + tag_size]
+ | add PTR, 4 + tag_size
+ break;
+
+ case UPB_TYPE(BOOL):
+ // Can't assume it's one byte long, because bool must be wire-compatible
+ // with all of the varint integer types.
+ | decode_varint tag_size
+ | test ARG3_64, ARG3_64
+ | setne ARG3_8 // Other bytes left with val, should be ok.
+ break;
+
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(ENUM):
+ | decode_varint tag_size
+ break;
+
+ case UPB_TYPE(SINT64):
+ // 64-bit zig-zag decoding.
+ | decode_varint tag_size
+ | mov rax, ARG3_64
+ | shr ARG3_64, 1
+ | and rax, 1
+ | neg rax
+ | xor ARG3_64, rax
+ break;
+
+ case UPB_TYPE(SINT32):
+ // 32-bit zig-zag decoding.
+ | decode_varint tag_size
+ | mov eax, ARG3_32
+ | shr ARG3_32, 1
+ | and eax, 1
+ | neg eax
+ | xor ARG3_32, eax
+ break;
+
+ case UPB_TYPE(STRING):
+ case UPB_TYPE(BYTES):
+ // We only handle the case where the entire string is in our current
+ // buf, which sidesteps any security problems. The C path has more
+ // robust checks.
+ | decode_varint tag_size
+ | mov STRING->len, ARG3_32
+ | mov STRING->ptr, PTR
+ | add PTR, ARG3_64
+ | mov ARG3_64, STRING
+ | cmp PTR, DECODER->effective_end
+ | ja ->exit_jit // Can't deliver, whole string not in buf.
+ break;
+
+ case UPB_TYPE_ENDGROUP: // A pseudo-type.
+ | add PTR, tag_size
+ | mov DECODER->ptr, PTR
+ | jmp =>m->jit_endofmsg_pclabel
+ return;
+
+ case UPB_TYPE(MESSAGE):
+ | decode_varint tag_size
+ case UPB_TYPE(GROUP):
+ // Will dispatch callbacks and call submessage in a second.
+ break;
+
+ default: abort();
+ }
+ // Commit our work by advancing ptr.
+ // (If in the future we wanted to support a UPB_SUSPEND_AGAIN that
+ // suspends the decoder and redelivers the value later, we would
+ // need to adjust this to happen perhaps after the callback ran).
+ | mov DECODER->ptr, PTR
+
+ // Load closure and fval into arg registers.
+ | mov ARG1_64, CLOSURE
+ | loadfval f
+
+ // Call callbacks.
+ if (upb_issubmsgtype(f->type)) {
+ // Call startsubmsg handler (if any).
+ if (f->cb.startsubmsg != upb_startsubmsg_nop) {
+ // upb_sflow_t startsubmsg(void *closure, upb_value fval)
+ | mov r12d, ARG3_32
+ | callp f->cb.startsubmsg
+ } else {
+ | mov rdx, CLOSURE
+ | mov r12d, ARG3_32
+ }
+ // Push a stack frame (not the CPU stack, the upb_decoder stack).
+ | lea rax, [FRAME + sizeof(upb_dispatcher_frame)] // rax for shorter addressing.
+ | cmp rax, qword DECODER->dispatcher.limit
+ | jae ->exit_jit // Frame stack overflow.
+ | mov qword FRAME:rax->f, f
+ | mov qword FRAME:rax->closure, rdx
+ | mov rsi, PTR
+ | sub rsi, DECODER->buf
+ | add r12d, esi
+ | mov dword FRAME:rax->end_offset, r12d // = (d->ptr - d->buf) + delim_len
+ | mov CLOSURE, rdx
+ | mov DECODER->dispatcher.top, rax
+ | mov FRAME, rax
+
+ upb_handlers_msgent *sub_m = upb_handlers_getmsgent(d->dispatcher.handlers, f);
+ if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) {
+ | jmp =>sub_m->jit_startmsg_pclabel;
+ } else {
+ | call =>sub_m->jit_startmsg_pclabel;
+ }
+
+ |=>f->jit_submsg_done_pclabel:
+ // Pop a stack frame.
+ | sub FRAME, sizeof(upb_dispatcher_frame)
+ | mov DECODER->dispatcher.top, FRAME
+ | setmsgend m
+ | mov CLOSURE, FRAME->closure
+
+ // Call endsubmsg handler (if any).
+ if (f->endsubmsg != upb_endsubmsg_nop) {
+ // upb_flow_t endsubmsg(void *closure, upb_value fval);
+ | mov ARG1_64, CLOSURE
+ | loadfval f
+ | callp f->endsubmsg
+ }
+ } else {
+ | callp f->cb.value
+ }
+ // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+
+ // Epilogue: load next tag, check for repeated field.
+ | check_eob m
+ | mov rcx, qword [PTR]
+ if (f->repeated) {
+ | checktag tag
+ | je <1
+ }
+ if (next_tag != 0) {
+ | checktag next_tag
+ | je =>next_f->jit_pclabel_notypecheck
+ }
+
+ // Fall back to dynamic dispatch. Replicate the dispatch
+ // here so we can learn what fields generally follow others.
+ | dyndispatch m
+ |1:
+}
+
+static int upb_compare_uint32(const void *a, const void *b) {
+ return *(uint32_t*)a - *(uint32_t*)b;
+}
+
+static void upb_decoder_jit_msg(upb_decoder *d, upb_handlers_msgent *m) {
+ |=>m->jit_startmsg_pclabel:
+ // Call startmsg handler (if any):
+ if (m->startmsg != upb_startmsg_nop) {
+ // upb_flow_t startmsg(void *closure);
+ | mov ARG1_64, FRAME->closure
+ | callp m->startmsg
+ // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+ }
+
+ | setmsgend m
+ | check_eob m
+ | mov ecx, dword [PTR]
+ | dyndispatch m
+
+ // --------- New code section (does not fall through) ------------------------
+
+ // Emit code for parsing each field (dynamic dispatch contains pointers to
+ // all of these).
+
+ // Create an ordering over the fields (inttable ordering is undefined).
+ int num_keys = upb_inttable_count(&m->fieldtab);
+ uint32_t *keys = malloc(num_keys * sizeof(*keys));
+ int idx = 0;
+ for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+ i = upb_inttable_next(&m->fieldtab, i)) {
+ keys[idx++] = upb_inttable_iter_key(i);
+ }
+ qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
+
+
+ upb_handlers_fieldent *last_f = NULL;
+ uint32_t last_tag = 0;
+ for(int i = 0; i < num_keys; i++) {
+ uint32_t key = keys[i];
+ upb_handlers_fieldent *f = upb_inttable_lookup(&m->fieldtab, key);
+ uint32_t tag = upb_encode_varint(key);
+ if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f);
+ last_tag = tag;
+ last_f = f;
+ }
+
+ free(keys);
+
+ if (m->is_group) {
+ // Create a fake fieldent for handling "end group."
+ upb_handlers_fieldent f = {0, UPB_TYPE_ENDGROUP, 0, UPB_NO_VALUE, {NULL}, NULL, 0, 0, 0, false};
+ upb_decoder_jit_field(d, last_tag, m->groupnum, m, last_f, &f);
+ upb_decoder_jit_field(d, m->groupnum, 0, m, &f, NULL);
+ } else {
+ upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL);
+ }
+
+ // --------- New code section (does not fall through) ------------------------
+
+ // End-of-buf / end-of-message.
+ if (!m->is_group) {
+ // This case doesn't exist for groups, because there eob really means
+ // eob, so that case just exits the jit directly.
+ |=>m->jit_endofbuf_pclabel:
+ | cmp PTR, DECODER->submsg_end
+ | jb ->exit_jit // We are at eob, but not end-of-submsg.
+ }
+
+ |=>m->jit_endofmsg_pclabel:
+ // We are at end-of-submsg: call endmsg handler (if any):
+ if (m->endmsg != upb_endmsg_nop) {
+ // void endmsg(void *closure, upb_status *status) {
+ | mov ARG1_64, FRAME->closure
+ | lea ARG2_64, DECODER->dispatcher.status
+ | callp m->endmsg
+ }
+
+ if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
+ | ret
+ } else if (m->jit_parent_field_done_pclabel == UPB_TOPLEVEL_ONE) {
+ | jmp ->exit_jit
+ } else {
+ | jmp =>m->jit_parent_field_done_pclabel
+ }
+
+}
+
+static void upb_decoder_jit(upb_decoder *d) {
+ | push rbp
+ | mov rbp, rsp
+ | push r15
+ | push r14
+ | push r13
+ | push r12
+ | push rbx
+ | mov DECODER, ARG1_64
+ | mov FRAME, DECODER:ARG1_64->dispatcher.top
+ | mov STRING, DECODER:ARG1_64->tmp
+ | mov CLOSURE, FRAME->closure
+ | mov PTR, DECODER->ptr
+
+ upb_handlers *h = d->dispatcher.handlers;
+ if (h->msgs[0].jit_parent_field_done_pclabel == UPB_MULTIPLE) {
+ | call =>h->msgs[0].jit_startmsg_pclabel
+ | jmp ->exit_jit
+ }
+
+ // TODO: push return addresses for re-entry (will be necessary for multiple
+ // buffer support).
+ for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_msg(d, &h->msgs[i]);
+
+ |->exit_jit:
+ | pop rbx
+ | pop r12
+ | pop r13
+ | pop r14
+ | pop r15
+ | leave
+ | ret
+ |=>0:
+ | callp &abort
+}
+
+void upb_decoder_jit_assignfieldlabs(upb_handlers_fieldent *f,
+ uint32_t *pclabel_count) {
+ f->jit_pclabel = (*pclabel_count)++;
+ f->jit_pclabel_notypecheck = (*pclabel_count)++;
+ f->jit_submsg_done_pclabel = (*pclabel_count)++;
+}
+
+void upb_decoder_jit_assignmsglabs(upb_handlers_msgent *m,
+ uint32_t *pclabel_count) {
+ m->jit_startmsg_pclabel = (*pclabel_count)++;
+ m->jit_endofbuf_pclabel = (*pclabel_count)++;
+ m->jit_endofmsg_pclabel = (*pclabel_count)++;
+ m->jit_unknownfield_pclabel = (*pclabel_count)++;
+ m->jit_parent_field_done_pclabel = UPB_NONE;
+ m->max_field_number = 0;
+ upb_inttable_iter i;
+ for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+ i = upb_inttable_next(&m->fieldtab, i)) {
+ uint32_t key = upb_inttable_iter_key(i);
+ m->max_field_number = UPB_MAX(m->max_field_number, key);
+ upb_handlers_fieldent *f = upb_inttable_iter_value(i);
+ upb_decoder_jit_assignfieldlabs(f, pclabel_count);
+ }
+ // XXX: Won't work for large field numbers; will need to use a upb_table.
+ m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*));
+}
+
+// Second pass: for messages that have only one parent, link them to the field
+// from which they are called.
+void upb_decoder_jit_assignmsglabs2(upb_handlers *h, upb_handlers_msgent *m) {
+ upb_inttable_iter i;
+ for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+ i = upb_inttable_next(&m->fieldtab, i)) {
+ upb_handlers_fieldent *f = upb_inttable_iter_value(i);
+ if (upb_issubmsgtype(f->type)) {
+ upb_handlers_msgent *sub_m = upb_handlers_getmsgent(h, f);
+ if (f->type == UPB_TYPE(GROUP)) {
+ sub_m->is_group = true;
+ sub_m->groupnum = upb_inttable_iter_key(i);
+ }
+ if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) {
+ sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel;
+ } else {
+ sub_m->jit_parent_field_done_pclabel = UPB_MULTIPLE;
+ }
+ }
+ }
+}
+
+void upb_decoder_makejit(upb_decoder *d) {
+ // Assign pclabels.
+ uint32_t pclabel_count = 1;
+ upb_handlers *h = d->dispatcher.handlers;
+ for (int i = 0; i < h->msgs_len; i++)
+ upb_decoder_jit_assignmsglabs(&h->msgs[i], &pclabel_count);
+ for (int i = 0; i < h->msgs_len; i++)
+ upb_decoder_jit_assignmsglabs2(h, &h->msgs[i]);
+
+ if (h->msgs[0].jit_parent_field_done_pclabel == UPB_NONE) {
+ h->msgs[0].jit_parent_field_done_pclabel = UPB_TOPLEVEL_ONE;
+ }
+
+ void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals));
+ dasm_init(d, 1);
+ dasm_setupglobal(d, globals, UPB_JIT_GLOBAL__MAX);
+ dasm_growpc(d, pclabel_count);
+ dasm_setup(d, upb_jit_actionlist);
+
+ upb_decoder_jit(d);
+
+ dasm_link(d, &d->jit_size);
+
+ d->jit_code = mmap(NULL, d->jit_size, PROT_READ | PROT_WRITE,
+ MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+
+ dasm_encode(d, d->jit_code);
+
+ // Create dispatch tables.
+ for (int i = 0; i < h->msgs_len; i++) {
+ upb_handlers_msgent *m = &h->msgs[i];
+ for (uint32_t j = 0; j <= m->max_field_number; j++) {
+ upb_handlers_fieldent *f = NULL;
+ for (int k = 0; k < 8; k++) {
+ f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k);
+ if (f) break;
+ }
+ if (f) {
+ m->tablearray[j] = d->jit_code + dasm_getpclabel(d, f->jit_pclabel);
+ } else {
+ // Don't handle unknown fields yet.
+ m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0);
+ }
+ }
+ }
+
+ // Create debug info.
+ size_t elf_len = src_jit_debug_elf_file_o_len;
+ d->debug_info = malloc(elf_len);
+ memcpy(d->debug_info, src_jit_debug_elf_file_o, elf_len);
+ uint64_t *p = (void*)d->debug_info;
+ for (; (void*)(p+1) <= (void*)d->debug_info + elf_len; ++p) {
+ if (*p == 0x12345678) { *p = (uintptr_t)d->jit_code; }
+ if (*p == 0x321) { *p = d->jit_size; }
+ }
+
+ // Register the JIT-ted code with GDB.
+ gdb_jit_entry *e = malloc(sizeof(gdb_jit_entry));
+ e->next_entry = __jit_debug_descriptor.first_entry;
+ e->prev_entry = NULL;
+ if (e->next_entry) e->next_entry->prev_entry = e;
+ e->symfile_addr = d->debug_info;
+ e->symfile_size = elf_len;
+ __jit_debug_descriptor.first_entry = e;
+ __jit_debug_descriptor.relevant_entry = e;
+ __jit_debug_descriptor.action_flag = GDB_JIT_REGISTER;
+ __jit_debug_register_code();
+
+ dasm_free(d);
+ free(globals);
+
+ mprotect(d->jit_code, d->jit_size, PROT_EXEC | PROT_READ);
+
+ FILE *f = fopen("/tmp/machine-code", "wb");
+ fwrite(d->jit_code, d->jit_size, 1, f);
+ fclose(f);
+}
+
+void upb_decoder_freejit(upb_decoder *d) {
+ munmap(d->jit_code, d->jit_size);
+ free(d->debug_info);
+ // TODO: unregister
+}
diff --git a/src/upb_def.c b/src/upb_def.c
index 059edd6..338bd3d 100644
--- a/src/upb_def.c
+++ b/src/upb_def.c
@@ -349,18 +349,18 @@ static void upb_defbuilder_register_FileDescriptorProto(upb_handlers *h) {
upb_defbuilder_FileDescriptorProto_endmsg);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDNUM,
- GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDTYPE,
+ GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDTYPE, false,
&upb_defbuilder_FileDescriptorProto_package, UPB_NO_VALUE);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDNUM,
- GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDTYPE);
+ GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDTYPE, true);
upb_msgdef_register_DescriptorProto(h);
upb_handlers_typed_pop(h);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNUM,
- GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE);
+ GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE, true);
upb_enumdef_register_EnumDescriptorProto(h);
upb_handlers_typed_pop(h);
@@ -383,12 +383,13 @@ static void upb_defbuilder_register_FileDescriptorSet(upb_handlers *h) {
upb_register_startend(h, NULL, upb_defbuilder_FileDescriptorSet_onendmsg);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM,
- GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE);
+ GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE, true);
upb_defbuilder_register_FileDescriptorProto(h);
upb_handlers_typed_pop(h);
}
void upb_defbuilder_reghandlers(upb_handlers *h) {
upb_defbuilder_register_FileDescriptorSet(h);
+ h->should_jit = false;
}
@@ -492,11 +493,11 @@ static void upb_enumdef_register_EnumValueDescriptorProto(upb_handlers *h) {
upb_enumdef_EnumValueDescriptorProto_endmsg);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM,
- GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE,
+ GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE, false,
&upb_enumdef_EnumValueDescriptorProto_name, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM,
- GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE,
+ GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE, false,
&upb_enumdef_EnumValueDescriptorProto_number, UPB_NO_VALUE);
}
@@ -540,12 +541,12 @@ static void upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) {
&upb_enumdef_EnumDescriptorProto_endmsg);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM,
- GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE,
+ GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE, false,
&upb_enumdef_EnumDescriptorProto_name, UPB_NO_VALUE);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM,
- GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE);
+ GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE, true);
upb_enumdef_register_EnumValueDescriptorProto(h);
upb_handlers_typed_pop(h);
}
@@ -813,27 +814,27 @@ static void upb_fielddef_register_FieldDescriptorProto(upb_handlers *h) {
upb_register_startend(h, upb_fielddef_startmsg, upb_fielddef_endmsg);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNUM,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE, false,
&upb_fielddef_ontype, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDNUM,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDTYPE,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDTYPE, false,
&upb_fielddef_onlabel, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDNUM,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDTYPE,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDTYPE, false,
&upb_fielddef_onnumber, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDNUM,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDTYPE,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDTYPE, false,
&upb_fielddef_onname, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNUM,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE, false,
&upb_fielddef_ontypename, UPB_NO_VALUE);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNUM,
- GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE, false,
&upb_fielddef_ondefaultval, UPB_NO_VALUE);
}
@@ -954,23 +955,23 @@ static void upb_msgdef_register_DescriptorProto(upb_handlers *h) {
upb_register_startend(h, &upb_msgdef_startmsg, &upb_msgdef_endmsg);
upb_register_typed_value(h,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM,
- GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE,
+ GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE, false,
&upb_msgdef_onname, UPB_NO_VALUE);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM,
- GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE);
+ GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE, true);
upb_fielddef_register_FieldDescriptorProto(h);
upb_handlers_typed_pop(h);
// DescriptorProto is self-recursive, so we must link the definition.
upb_handlers_typed_link(h,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM,
- GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE, 0);
+ GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE, true, 0);
upb_handlers_typed_push(h,
GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM,
- GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE);
+ GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE, true);
upb_enumdef_register_EnumDescriptorProto(h);
upb_handlers_typed_pop(h);
diff --git a/src/upb_glue.c b/src/upb_glue.c
index 41f974b..b6a0273 100644
--- a/src/upb_glue.c
+++ b/src/upb_glue.c
@@ -29,7 +29,6 @@ void upb_strtomsg(upb_string *str, upb_msg *msg, upb_msgdef *md,
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
- upb_handlers_uninit(&h);
}
void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
@@ -53,7 +52,6 @@ void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
upb_stringsink_uninit(&strsink);
upb_textprinter_free(p);
- upb_handlers_uninit(&h);
}
void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) {
@@ -72,7 +70,6 @@ void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) {
upb_decoder_decode(&d, status);
- upb_handlers_uninit(&h);
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
}
diff --git a/src/upb_msg.c b/src/upb_msg.c
index 6fc321e..aac2c91 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -299,6 +299,13 @@ upb_msg *upb_msg_appendmsg(upb_msg *msg, upb_fielddef *f, upb_msgdef *msgdef) {
static upb_flow_t upb_dmsgsink_value(void *_m, upb_value fval, upb_value val) {
upb_msg *m = _m;
upb_fielddef *f = upb_value_getfielddef(fval);
+ if (upb_isstring(f)) {
+ //fprintf(stderr, "dmsg_value! this=%p f=%p name=" UPB_STRFMT ",
+ // " UPB_STRFMT " %p\n", m, f, UPB_STRARG(f->name), UPB_STRARG(val.val.str));
+ } else {
+ //fprintf(stderr, "dmsg_value! this=%p f=%p name=" UPB_STRFMT ",
+ // %llu\n", m, f, UPB_STRARG(f->name), val.val.uint64);
+ }
upb_msg_appendval(m, f, val);
return UPB_CONTINUE;
}
@@ -306,8 +313,11 @@ static upb_flow_t upb_dmsgsink_value(void *_m, upb_value fval, upb_value val) {
static upb_sflow_t upb_dmsgsink_startsubmsg(void *_m, upb_value fval) {
upb_msg *m = _m;
upb_fielddef *f = upb_value_getfielddef(fval);
+ //fprintf(stderr, "dmsg_startsubmsg! " UPB_STRFMT " %p\n", UPB_STRARG(fval.val.fielddef->name), f);
upb_msgdef *msgdef = upb_downcast_msgdef(f->def);
- return UPB_CONTINUE_WITH(upb_msg_appendmsg(m, f, msgdef));
+ void *p = upb_msg_appendmsg(m, f, msgdef);
+ //printf("Continuing with: %p\n", p);
+ return UPB_CONTINUE_WITH(p);
}
void upb_msg_regdhandlers(upb_handlers *h) {
diff --git a/src/upb_stream.c b/src/upb_stream.c
index aebdb42..982c8a3 100644
--- a/src/upb_stream.c
+++ b/src/upb_stream.c
@@ -11,36 +11,36 @@
/* upb_handlers ***************************************************************/
-static upb_flow_t upb_startmsg_nop(void *closure) {
+upb_flow_t upb_startmsg_nop(void *closure) {
(void)closure;
return UPB_CONTINUE;
}
-static void upb_endmsg_nop(void *closure, upb_status *status) {
+void upb_endmsg_nop(void *closure, upb_status *status) {
(void)closure;
(void)status;
}
-static upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val) {
+upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val) {
(void)closure;
(void)fval;
(void)val;
return UPB_CONTINUE;
}
-static upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval) {
+upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval) {
(void)fval;
return UPB_CONTINUE_WITH(closure);
}
-static upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval) {
+upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval) {
(void)closure;
(void)fval;
return UPB_CONTINUE;
}
-static upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum,
- upb_value val) {
+upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum,
+ upb_value val) {
(void)closure;
(void)fieldnum;
(void)val;
@@ -52,6 +52,8 @@ static void upb_msgent_init(upb_handlers_msgent *e) {
e->startmsg = &upb_startmsg_nop;
e->endmsg = &upb_endmsg_nop;
e->unknownval = &upb_unknownval_nop;
+ e->is_group = false;
+ e->tablearray = NULL;
}
void upb_handlers_init(upb_handlers *h, upb_msgdef *md) {
@@ -61,6 +63,7 @@ void upb_handlers_init(upb_handlers *h, upb_msgdef *md) {
h->top = &h->stack[0];
h->limit = &h->stack[UPB_MAX_TYPE_DEPTH];
h->toplevel_msgdef = md;
+ h->should_jit = true;
if (md) upb_msgdef_ref(md);
h->top->msgent_index = 0;
@@ -70,19 +73,22 @@ void upb_handlers_init(upb_handlers *h, upb_msgdef *md) {
}
void upb_handlers_uninit(upb_handlers *h) {
- for (int i = 0; i < h->msgs_len; i++) upb_inttable_free(&h->msgs[i].fieldtab);
+ for (int i = 0; i < h->msgs_len; i++) {
+ upb_inttable_free(&h->msgs[i].fieldtab);
+ free(h->msgs[i].tablearray);
+ }
free(h->msgs);
upb_msgdef_unref(h->toplevel_msgdef);
}
static upb_handlers_fieldent *upb_handlers_getorcreate_without_fval(
- upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type) {
+ upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type, bool repeated) {
uint32_t tag = fieldnum << 3 | upb_types[type].native_wire_type;
upb_handlers_fieldent *f =
upb_inttable_lookup(&h->msgent->fieldtab, tag);
if (!f) {
upb_handlers_fieldent new_f = {false, type, -1, UPB_NO_VALUE,
- {&upb_value_nop}, &upb_endsubmsg_nop};
+ {&upb_value_nop}, &upb_endsubmsg_nop, 0, 0, 0, repeated};
if (upb_issubmsgtype(type)) new_f.cb.startsubmsg = &upb_startsubmsg_nop;
upb_inttable_insert(&h->msgent->fieldtab, tag, &new_f);
@@ -95,9 +101,9 @@ static upb_handlers_fieldent *upb_handlers_getorcreate_without_fval(
static upb_handlers_fieldent *upb_handlers_getorcreate(
upb_handlers *h, upb_field_number_t fieldnum,
- upb_fieldtype_t type, upb_value fval) {
+ upb_fieldtype_t type, bool repeated, upb_value fval) {
upb_handlers_fieldent *f =
- upb_handlers_getorcreate_without_fval(h, fieldnum, type);
+ upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated);
f->fval = fval;
return f;
}
@@ -140,42 +146,40 @@ void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start,
}
void upb_register_typed_value(upb_handlers *h, upb_field_number_t fieldnum,
- upb_fieldtype_t type, upb_value_handler_t value,
- upb_value fval) {
- upb_handlers_getorcreate(h, fieldnum, type, fval)->cb.value =
+ upb_fieldtype_t type, bool repeated,
+ upb_value_handler_t value, upb_value fval) {
+ upb_handlers_getorcreate(h, fieldnum, type, repeated, fval)->cb.value =
value ? value : &upb_value_nop;
}
void upb_register_value(upb_handlers *h, upb_fielddef *f,
upb_value_handler_t value, upb_value fval) {
assert(f->msgdef == h->top->msgdef);
- upb_register_typed_value(h, f->number, f->type, value, fval);
+ upb_register_typed_value(h, f->number, f->type, upb_isarray(f), value, fval);
}
void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum,
- upb_fieldtype_t type,
+ upb_fieldtype_t type, bool repeated,
upb_startsubmsg_handler_t start,
upb_endsubmsg_handler_t end,
upb_value fval) {
- upb_handlers_fieldent *f = upb_handlers_getorcreate(h, fieldnum, type, fval);
+ upb_handlers_fieldent *f = upb_handlers_getorcreate(h, fieldnum, type, repeated, fval);
f->cb.startsubmsg = start ? start : &upb_startsubmsg_nop;
f->endsubmsg = end ? end : &upb_endsubmsg_nop;
}
-void upb_handlers_typed_link(upb_handlers *h,
- upb_field_number_t fieldnum,
- upb_fieldtype_t type,
- int frames) {
+void upb_handlers_typed_link(upb_handlers *h, upb_field_number_t fieldnum,
+ upb_fieldtype_t type, bool repeated, int frames) {
assert(frames <= (h->top - h->stack));
upb_handlers_fieldent *f =
- upb_handlers_getorcreate_without_fval(h, fieldnum, type);
+ upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated);
f->msgent_index = (h->top - frames)->msgent_index;
}
void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum,
- upb_fieldtype_t type) {
+ upb_fieldtype_t type, bool repeated) {
upb_handlers_fieldent *f =
- upb_handlers_getorcreate_without_fval(h, fieldnum, type);
+ upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated);
if (h->top == h->limit) abort(); // TODO: make growable.
++h->top;
if (f->msgent_index == -1) {
@@ -204,8 +208,8 @@ void upb_handlers_push(upb_handlers *h, upb_fielddef *f,
bool delegate) {
assert(f->msgdef == h->top->msgdef);
(void)delegate; // TODO
- upb_register_typed_submsg(h, f->number, f->type, start, end, fval);
- upb_handlers_typed_push(h, f->number, f->type);
+ upb_register_typed_submsg(h, f->number, f->type, upb_isarray(f), start, end, fval);
+ upb_handlers_typed_push(h, f->number, f->type, upb_isarray(f));
}
void upb_handlers_typed_pop(upb_handlers *h) {
@@ -229,13 +233,14 @@ static upb_handlers_fieldent toplevel_f = {
#else
{{0}, UPB_VALUETYPE_RAW},
#endif
- {NULL}, NULL};
+ {NULL}, NULL, 0, 0, 0, false};
void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h) {
d->handlers = h;
for (int i = 0; i < h->msgs_len; i++)
upb_inttable_compact(&h->msgs[i].fieldtab);
d->stack[0].f = &toplevel_f;
+ d->limit = &d->stack[UPB_MAX_NESTING];
upb_status_init(&d->status);
}
@@ -249,10 +254,10 @@ void upb_dispatcher_reset(upb_dispatcher *d, void *top_closure, uint32_t top_end
d->top = d->stack;
d->top->closure = top_closure;
d->top->end_offset = top_end_offset;
- d->limit = &d->stack[UPB_MAX_NESTING];
}
void upb_dispatcher_uninit(upb_dispatcher *d) {
+ upb_handlers_uninit(d->handlers);
upb_status_uninit(&d->status);
}
diff --git a/src/upb_stream.h b/src/upb_stream.h
index 0c75acd..7ae9b8d 100644
--- a/src/upb_stream.h
+++ b/src/upb_stream.h
@@ -81,6 +81,14 @@ typedef upb_flow_t (*upb_endsubmsg_handler_t)(void *closure, upb_value fval);
typedef upb_flow_t (*upb_unknownval_handler_t)(
void *closure, upb_field_number_t fieldnum, upb_value val);
+upb_flow_t upb_startmsg_nop(void *closure);
+void upb_endmsg_nop(void *closure, upb_status *status);
+upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val);
+upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval);
+upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval);
+upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum,
+ upb_value val);
+
typedef struct {
bool junk;
upb_fieldtype_t type;
@@ -93,14 +101,27 @@ typedef struct {
upb_startsubmsg_handler_t startsubmsg;
} cb;
upb_endsubmsg_handler_t endsubmsg;
+ uint32_t jit_pclabel;
+ uint32_t jit_pclabel_notypecheck;
+ uint32_t jit_submsg_done_pclabel;
+ bool repeated;
} upb_handlers_fieldent;
-typedef struct {
+typedef struct _upb_handlers_msgent {
upb_startmsg_handler_t startmsg;
upb_endmsg_handler_t endmsg;
upb_unknownval_handler_t unknownval;
// Maps field number -> upb_handlers_fieldent.
upb_inttable fieldtab;
+ uint32_t jit_startmsg_pclabel;
+ uint32_t jit_endofbuf_pclabel;
+ uint32_t jit_endofmsg_pclabel;
+ uint32_t jit_unknownfield_pclabel;
+ uint32_t groupnum;
+ bool is_group;
+ int32_t jit_parent_field_done_pclabel;
+ uint32_t max_field_number;
+ void **tablearray;
} upb_handlers_msgent;
typedef struct {
@@ -115,6 +136,7 @@ struct _upb_handlers {
upb_msgdef *toplevel_msgdef; // We own a ref.
upb_handlers_msgent *msgent;
upb_handlers_frame stack[UPB_MAX_TYPE_DEPTH], *top, *limit;
+ bool should_jit;
};
typedef struct _upb_handlers upb_handlers;
@@ -237,19 +259,17 @@ void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start,
// Low-level functions -- internal-only.
void upb_register_typed_value(upb_handlers *h, upb_field_number_t fieldnum,
- upb_fieldtype_t type, upb_value_handler_t value,
- upb_value fval);
+ upb_fieldtype_t type, bool repeated,
+ upb_value_handler_t value, upb_value fval);
void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum,
- upb_fieldtype_t type,
+ upb_fieldtype_t type, bool repeated,
upb_startsubmsg_handler_t start,
upb_endsubmsg_handler_t end,
upb_value fval);
-void upb_handlers_typed_link(upb_handlers *h,
- upb_field_number_t fieldnum,
- upb_fieldtype_t type,
- int frames);
+void upb_handlers_typed_link(upb_handlers *h, upb_field_number_t fieldnum,
+ upb_fieldtype_t type, bool repeated, int frames);
void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum,
- upb_fieldtype_t type);
+ upb_fieldtype_t type, bool repeated);
void upb_handlers_typed_pop(upb_handlers *h);
INLINE upb_handlers_msgent *upb_handlers_getmsgent(upb_handlers *h,
@@ -308,8 +328,8 @@ typedef struct {
int delegated_depth;
// Stack.
- upb_dispatcher_frame stack[UPB_MAX_NESTING];
upb_status status;
+ upb_dispatcher_frame stack[UPB_MAX_NESTING];
} upb_dispatcher;
INLINE bool upb_dispatcher_skipping(upb_dispatcher *d) {
diff --git a/src/upb_string.c b/src/upb_string.c
index de633bc..8625f76 100644
--- a/src/upb_string.c
+++ b/src/upb_string.c
@@ -72,7 +72,6 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) {
void upb_string_substr(upb_string *str, upb_string *target_str,
upb_strlen_t start, upb_strlen_t len) {
- if(str->ptr) *(char*)0 = 0;
assert(str->ptr == NULL);
assert(start + len <= upb_string_len(target_str));
if (target_str->src) {
diff --git a/src/upb_string.h b/src/upb_string.h
index 88a513f..5aa5f3b 100644
--- a/src/upb_string.h
+++ b/src/upb_string.h
@@ -155,9 +155,13 @@ INLINE const char *upb_string_getbufend(upb_string *str) {
}
// Attempts to recycle the string "str" so it may be reused and have different
-// data written to it. After the function returns, "str" points to a writable
-// string, which is either the original string if it had no other references
-// or a newly created string if it did have other references.
+// data written to it. The caller MUST own a reference on the given string
+// prior to making this call (ie. the caller must have either created the
+// string or obtained a reference with upb_string_getref()).
+//
+// After the function returns, "str" points to a writable string, which is
+// either the original string if it had no other references or a newly created
+// string if it did have other references.
//
// As a special case, passing a pointer to NULL will allocate a new string.
// This is convenient for the pattern:
@@ -171,7 +175,9 @@ INLINE const char *upb_string_getbufend(upb_string *str) {
// }
INLINE void upb_string_recycle(upb_string **_str) {
upb_string *str = *_str;
- if(str && upb_atomic_only(&str->refcount)) {
+ int r;
+ if(str && ((r = upb_atomic_read(&str->refcount)) == 1 ||
+ (r == _UPB_STRING_REFCOUNT_STACK))) {
str->ptr = NULL;
str->len = 0;
_upb_string_release(str);
diff --git a/src/upb_table.c b/src/upb_table.c
index b9b9824..a754097 100644
--- a/src/upb_table.c
+++ b/src/upb_table.c
@@ -102,6 +102,7 @@ static void intinsert(upb_inttable *t, upb_inttable_key_t key, void *val) {
upb_inttable_value *table_val;
if (_upb_inttable_isarrkey(t, key)) {
table_val = UPB_INDEX(t->array, key, upb_table_valuesize(&t->t));
+ t->array_count++;
//printf("Inserting key %d to Array part! %p\n", key, table_val);
} else {
t->t.count++;
@@ -152,8 +153,8 @@ static void intinsert(upb_inttable *t, upb_inttable_key_t key, void *val) {
static void upb_inttable_insertall(upb_inttable *dst, upb_inttable *src) {
for(upb_inttable_iter i = upb_inttable_begin(src); !upb_inttable_done(i);
i = upb_inttable_next(src, i)) {
- //printf("load check: %d %d\n", upb_inttable_count(dst), upb_inttable_hashtablesize(dst));
- assert((double)(upb_inttable_count(dst)) /
+ //printf("load check: %d %d\n", upb_table_count(&dst->t), upb_inttable_hashtablesize(dst));
+ assert((double)(upb_table_count(&dst->t)) /
upb_inttable_hashtablesize(dst) <= MAX_LOAD);
intinsert(dst, upb_inttable_iter_key(i), upb_inttable_iter_value(i));
}
@@ -209,6 +210,7 @@ void upb_inttable_compact(upb_inttable *t) {
}
upb_inttable new_table;
int hash_size = (upb_inttable_count(t) - array_count + 1) / MAX_LOAD;
+ //printf("array_count: %d, array_size: %d, hash_size: %d, table size: %d\n", array_count, array_size, hash_size, upb_inttable_count(t));
upb_inttable_sizedinit(&new_table, array_size, hash_size,
upb_table_valuesize(&t->t));
//printf("For %d things, using array size=%d, hash_size = %d\n", upb_inttable_count(t), array_size, hash_size);
diff --git a/src/upb_varint_decoder.h b/src/upb_varint_decoder.h
index 7297f43..d7af90a 100644
--- a/src/upb_varint_decoder.h
+++ b/src/upb_varint_decoder.h
@@ -30,7 +30,7 @@ typedef struct {
// A basic branch-based decoder, uses 32-bit values to get good performance
// on 32-bit architectures (but performs well on 64-bits also).
-INLINE upb_decoderet upb_decode_varint_branch32(const char *p) {
+INLINE upb_decoderet upb_vdecode_branch32(const char *p) {
upb_decoderet r = {NULL, 0};
uint32_t low, high = 0;
uint32_t b;
@@ -54,7 +54,7 @@ done:
}
// Like the previous, but uses 64-bit values.
-INLINE upb_decoderet upb_decode_varint_branch64(const char *p) {
+INLINE upb_decoderet upb_vdecode_branch64(const char *p) {
uint64_t val;
uint64_t b;
upb_decoderet r = {(void*)0, 0};
@@ -76,17 +76,9 @@ done:
return r;
}
-// Avoids branches for values >2-bytes.
-INLINE upb_decoderet upb_decode_varint_nobranch1(const char *p) {
- uint64_t b = 0;
- upb_decoderet r = {p, 0};
- memcpy(&b, r.p, 2);
- if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; }
- r.val = (b & 0x7f) | ((b & 0x7f00) >> 1);
- r.p = p + 2;
- if ((b & 0x8000) == 0) return r;
-
- // >2-byte varint.
+// Decodes a varint of at most 8 bytes without branching (except for error).
+INLINE upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
+ uint64_t b;
memcpy(&b, r.p, sizeof(b));
uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL;
uint64_t stop_bit = ~cbits & (cbits+1);
@@ -94,27 +86,19 @@ INLINE upb_decoderet upb_decode_varint_nobranch1(const char *p) {
b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f);
b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff);
b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff);
- r.val |= b << 14;
- r.p += (__builtin_ctzll(stop_bit) + 1) / 8;
if (stop_bit == 0) {
// Error: unterminated varint.
upb_decoderet err_r = {(void*)0, 0};
return err_r;
}
- return r;
+ upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
+ r.val | (b << 14)};
+ return my_r;
}
-// Avoids branches for values >2-bytes.
-INLINE upb_decoderet upb_decode_varint_nobranch2(const char *p) {
- uint64_t b = 0;
- upb_decoderet r = {p, 0};
- memcpy(&b, r.p, 2);
- if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; }
- r.val = (b & 0x7f) | ((b & 0x7f00) >> 1);
- r.p = p + 2;
- if ((b & 0x8000) == 0) return r;
-
- // >2-byte varint.
+// Another implementation of the previous.
+INLINE upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
+ uint64_t b;
memcpy(&b, r.p, sizeof(b));
uint64_t cbits = b | 0x7f7f7f7f7f7f7f7fULL;
uint64_t stop_bit = ~cbits & (cbits + 1);
@@ -122,22 +106,46 @@ INLINE upb_decoderet upb_decode_varint_nobranch2(const char *p) {
b += b & 0x007f007f007f007fULL;
b += 3 * (b & 0x0000ffff0000ffffULL);
b += 15 * (b & 0x00000000ffffffffULL);
- r.val |= b << 7;
- r.p += (__builtin_ctzll(stop_bit) + 1) / 8;
if (stop_bit == 0) {
// Error: unterminated varint.
upb_decoderet err_r = {(void*)0, 0};
return err_r;
}
- return r;
+ upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
+ r.val | (b << 7)};
+ return my_r;
}
-INLINE upb_decoderet upb_decode_varint_fast(const char *p) {
+// Template for a function that checks the first two bytes with branching
+// and dispatches 2-10 bytes with a separate function.
+#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \
+INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *p) { \
+ uint64_t b = 0; \
+ upb_decoderet r = {p, 0}; \
+ memcpy(&b, r.p, 2); \
+ if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; } \
+ r.val = (b & 0x7f) | ((b & 0x7f00) >> 1); \
+ r.p = p + 2; \
+ if ((b & 0x8000) == 0) return r; \
+ return decode_max8_function(r); \
+}
+
+UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright);
+UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino);
+#undef UPB_VARINT_DECODER_CHECK2
+
+// Our canonical functions for decoding varints, based on the currently
+// favored best-performing implementations.
+INLINE upb_decoderet upb_vdecode_fast(const char *p) {
// Use nobranch2 on 64-bit, branch32 on 32-bit.
if (sizeof(long) == 8)
- return upb_decode_varint_nobranch2(p);
+ return upb_vdecode_check2_massimino(p);
else
- return upb_decode_varint_branch32(p);
+ return upb_vdecode_branch32(p);
+}
+
+INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
+ return upb_vdecode_max8_massimino(r);
}
#ifdef __cplusplus
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback