summaryrefslogtreecommitdiff
path: root/upb/pb/decoder_x64.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'upb/pb/decoder_x64.dasc')
-rw-r--r--upb/pb/decoder_x64.dasc141
1 files changed, 52 insertions, 89 deletions
diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc
index fa984ef..f58e403 100644
--- a/upb/pb/decoder_x64.dasc
+++ b/upb/pb/decoder_x64.dasc
@@ -9,8 +9,8 @@
|// parsing the specific message and calling specific handlers.
|//
|// Since the JIT can call other functions (the JIT'ted code is not a leaf
-|// function) we must respect alignment rules. On OS X, this means aligning
-|// the stack to 16 bytes.
+|// function) we must respect alignment rules. All x86-64 systems require
+|// 16-byte stack alignment.
#include <sys/mman.h>
#include "dynasm/dasm_x86.h"
@@ -103,7 +103,7 @@ void upb_reg_jit_gdb(upb_decoderplan *plan) {
// Has to be a separate function, otherwise GCC will complain about
// expressions like (&foo != NULL) because they will never evaluate
// to false.
-static void upb_assert_notnull(void *addr) { assert(addr != NULL); }
+static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
|.arch x64
|.actionlist upb_jit_actionlist
@@ -401,45 +401,10 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
}
}
-#if 0
-// These appear not to speed things up, but keeping around for
-// further experimentation.
-static void upb_decoderplan_jit_doappend(upb_decoderplan *plan, uint8_t size,
- upb_fhandlers *f) {
- | mov eax, STDARRAY:ARG1_64->len
- | cmp eax, STDARRAY:ARG1_64->size
- | jne >2
- // If array is full, fall back to actual function.
- | loadfval f
- | callp f->value
- | jmp >3
- |2:
- | mov rcx, STDARRAY:ARG1_64->ptr
- | mov esi, eax
- | add eax, 1
-
- switch (size) {
- case 8:
- | mov [rcx + rsi * 8], ARG3_64
- break;
-
- case 4:
- | mov [rcx + rsi * 4], ARG3_32
- break;
-
- case 1:
- | mov [rcx + rsi * 4], ARG3_8
- break;
- }
-
- | mov STDARRAY:ARG1_64->len, eax
- |3:
-}
-#endif
-
static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
upb_fhandlers *f) {
- // Call callbacks.
+ // Call callbacks. Specializing the append accessors didn't yield a speed
+ // increase in benchmarks.
if (upb_issubmsgtype(f->type)) {
if (f->type == UPB_TYPE(MESSAGE)) {
| mov rsi, PTR
@@ -457,7 +422,10 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
| mov ARG1_64, CLOSURE
| loadfval f
| callp f->startsubmsg
+ | sethas CLOSURE, f->hasbit
| mov CLOSURE, rdx
+ } else {
+ | sethas CLOSURE, f->hasbit
}
| mov qword FRAME->closure, CLOSURE
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
@@ -465,6 +433,7 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
const upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
| call =>sub_m->jit_startmsg_pclabel;
+ | popframe upb_fhandlers_getmsg(f)
// Call endsubmsg handler (if any).
if (f->endsubmsg) {
@@ -473,7 +442,6 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
| loadfval f
| callp f->endsubmsg
}
- | popframe upb_fhandlers_getmsg(f)
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
| mov DECODER->ptr, PTR
} else {
@@ -494,21 +462,6 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
} else if (f->value == &upb_stdmsg_setbool) {
const upb_fielddef *fd = upb_value_getfielddef(f->fval);
| mov [ARG1_64 + fd->offset], ARG3_8
-#if 0
- // These appear not to speed things up, but keeping around for
- // further experimentation.
- } else if (f->value == &upb_stdmsg_setint64_r ||
- f->value == &upb_stdmsg_setuint64_r ||
- f->value == &upb_stdmsg_setptr_r ||
- f->value == &upb_stdmsg_setdouble_r) {
- upb_decoderplan_jit_doappend(plan, 8, f);
- } else if (f->value == &upb_stdmsg_setint32_r ||
- f->value == &upb_stdmsg_setuint32_r ||
- f->value == &upb_stdmsg_setfloat_r) {
- upb_decoderplan_jit_doappend(plan, 4, f);
- } else if (f->value == &upb_stdmsg_setbool_r) {
- upb_decoderplan_jit_doappend(plan, 1, f);
-#endif
} else if (f->value) {
// Load closure and fval into arg registers.
||#ifndef NDEBUG
@@ -520,16 +473,26 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
| loadfval f
| callp f->value
}
- | sethas CLOSURE, f->valuehasbit
+ | sethas CLOSURE, f->hasbit
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
| mov DECODER->ptr, PTR
}
}
+static uint64_t upb_get_encoded_tag(upb_fhandlers *f) {
+ uint32_t tag = (f->number << 3) | upb_decoder_types[f->type].native_wire_type;
+ uint64_t encoded_tag = upb_vencode32(tag);
+ // No tag should be greater than 5 bytes.
+ assert(encoded_tag <= 0xffffffffff);
+ return encoded_tag;
+}
+
// PTR should point to the beginning of the tag.
-static void upb_decoderplan_jit_field(upb_decoderplan *plan, uint64_t tag,
- uint64_t next_tag, upb_mhandlers *m,
+static void upb_decoderplan_jit_field(upb_decoderplan *plan, upb_mhandlers *m,
upb_fhandlers *f, upb_fhandlers *next_f) {
+ uint64_t tag = upb_get_encoded_tag(f);
+ uint64_t next_tag = next_f ? upb_get_encoded_tag(next_f) : 0;
+
// PC-label for the dispatch table.
// We check the wire type (which must be loaded in edx) because the
// table is keyed on field number, not type.
@@ -541,10 +504,13 @@ static void upb_decoderplan_jit_field(upb_decoderplan *plan, uint64_t tag,
| mov rsi, FRAME->end_ofs
| pushframe f, rsi, true
if (f->startseq) {
- | mov ARG1_64, CLOSURE
+ | mov ARG1_64, CLOSURE
| loadfval f
- | callp f->startseq
- | mov CLOSURE, rdx
+ | callp f->startseq
+ | sethas CLOSURE, f->hasbit
+ | mov CLOSURE, rdx
+ } else {
+ | sethas CLOSURE, f->hasbit
}
| mov qword FRAME->closure, CLOSURE
}
@@ -590,6 +556,11 @@ static int upb_compare_uint32(const void *a, const void *b) {
}
static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
+ |=>m->jit_afterstartmsg_pclabel:
+ // There was a call to get here, so we need to align the stack.
+ | sub rsp, 8
+ | jmp >1
+
|=>m->jit_startmsg_pclabel:
// There was a call to get here, so we need to align the stack.
| sub rsp, 8
@@ -602,6 +573,7 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
}
+ |1:
| setmsgend m
| check_eob m
| mov ecx, dword [PTR]
@@ -616,30 +588,19 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
int num_keys = upb_inttable_count(&m->fieldtab);
uint32_t *keys = malloc(num_keys * sizeof(*keys));
int idx = 0;
- for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab);
- !upb_inttable_done(i);
- i = upb_inttable_next(&m->fieldtab, i)) {
- keys[idx++] = upb_inttable_iter_key(i);
+ upb_inttable_iter i;
+ upb_inttable_begin(&i, &m->fieldtab);
+ for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+ keys[idx++] = upb_inttable_iter_key(&i);
}
qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
- upb_fhandlers *last_f = NULL;
- uint64_t last_encoded_tag = 0;
for(int i = 0; i < num_keys; i++) {
- uint32_t fieldnum = keys[i];
- upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, fieldnum);
- upb_fhandlers *f = e->f;
- assert(f->number == fieldnum);
- uint32_t tag = (f->number << 3) | upb_types[f->type].native_wire_type;
- uint64_t encoded_tag = upb_vencode32(tag);
- // No tag should be greater than 5 bytes.
- assert(encoded_tag <= 0xffffffffff);
- if (last_f) upb_decoderplan_jit_field(
- plan, last_encoded_tag, encoded_tag, m, last_f, f);
- last_encoded_tag = encoded_tag;
- last_f = f;
+ upb_fhandlers *f = upb_mhandlers_lookup(m, keys[i]);
+ upb_fhandlers *next_f =
+ (i + 1 < num_keys) ? upb_mhandlers_lookup(m, keys[i + 1]) : NULL;
+ upb_decoderplan_jit_field(plan, m, f, next_f);
}
- upb_decoderplan_jit_field(plan, last_encoded_tag, 0, m, last_f, NULL);
free(keys);
@@ -733,18 +694,19 @@ static void upb_decoderplan_jit_assignfieldlabs(upb_fhandlers *f,
static void upb_decoderplan_jit_assignmsglabs(upb_mhandlers *m,
uint32_t *pclabel_count) {
m->jit_startmsg_pclabel = (*pclabel_count)++;
+ m->jit_afterstartmsg_pclabel = (*pclabel_count)++;
m->jit_endofbuf_pclabel = (*pclabel_count)++;
m->jit_endofmsg_pclabel = (*pclabel_count)++;
m->jit_dyndispatch_pclabel = (*pclabel_count)++;
m->jit_unknownfield_pclabel = (*pclabel_count)++;
m->max_field_number = 0;
upb_inttable_iter i;
- for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
- i = upb_inttable_next(&m->fieldtab, i)) {
- uint32_t key = upb_inttable_iter_key(i);
+ upb_inttable_begin(&i, &m->fieldtab);
+ for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+ uint32_t key = upb_inttable_iter_key(&i);
m->max_field_number = UPB_MAX(m->max_field_number, key);
- upb_itofhandlers_ent *e = upb_inttable_iter_value(i);
- upb_decoderplan_jit_assignfieldlabs(e->f, pclabel_count);
+ upb_fhandlers *f = upb_value_getptr(upb_inttable_iter_value(&i));
+ upb_decoderplan_jit_assignfieldlabs(f, pclabel_count);
}
// TODO: support large field numbers by either using a hash table or
// generating code for a binary search. For now large field numbers
@@ -784,11 +746,12 @@ static void upb_decoderplan_makejit(upb_decoderplan *plan) {
// Create dispatch tables.
for (int i = 0; i < h->msgs_len; i++) {
upb_mhandlers *m = h->msgs[i];
+ // We jump to after the startmsg handler since it is called before entering
+ // the JIT (either by upb_decoder or by a previous call to the JIT).
m->jit_func =
- plan->jit_code + dasm_getpclabel(plan, m->jit_startmsg_pclabel);
+ plan->jit_code + dasm_getpclabel(plan, m->jit_afterstartmsg_pclabel);
for (uint32_t j = 0; j <= m->max_field_number; j++) {
- upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, j);
- upb_fhandlers *f = e ? e->f : NULL;
+ upb_fhandlers *f = upb_mhandlers_lookup(m, j);
if (f) {
m->tablearray[j] =
plan->jit_code + dasm_getpclabel(plan, f->jit_pclabel);
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback