summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2011-02-17 23:07:17 -0800
committerJoshua Haberman <joshua@reverberate.org>2011-02-17 23:07:17 -0800
commitd8b215486245e84e33283b6047fb253bbb418e00 (patch)
tree4c07a4d3162a0390be0b55d619ddab0e7a6acb23 /src
parentf1e1cc4695b34b292454e903adbf09e66cf2e9d5 (diff)
First version of an assembly language decoder.
It is slower than the C decoder for now because it falls off the fast path too often. But it can successfully decode varints, fixed32 and fixed64.
Diffstat (limited to 'src')
-rw-r--r--src/upb.h10
-rw-r--r--src/upb_decoder.c33
-rw-r--r--src/upb_decoder.h2
-rw-r--r--src/upb_decoder_x64.asm219
-rw-r--r--src/upb_def.c2
-rw-r--r--src/upb_def.h2
-rw-r--r--src/upb_msg.h4
-rw-r--r--src/upb_table.c2
-rw-r--r--src/upb_table.h13
9 files changed, 274 insertions, 13 deletions
diff --git a/src/upb.h b/src/upb.h
index c5b4310..0698748 100644
--- a/src/upb.h
+++ b/src/upb.h
@@ -29,6 +29,16 @@ extern "C" {
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
+INLINE void nop_printf(const char *fmt, ...) {
+ (void)fmt;
+}
+
+#ifdef NDEBUG
+#define DEBUGPRINTF nop_printf
+#else
+#define DEBUGPRINTF printf
+#endif
+
// Rounds val up to the next multiple of align.
INLINE size_t upb_align_up(size_t val, size_t align) {
return val % align == 0 ? val : val + align - (val % align);
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index 4467440..0e6866b 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -14,7 +14,7 @@
// If the return value is other than UPB_CONTINUE, that is what the last
// callback returned.
extern upb_flow_t upb_fastdecode(const char **p, const char *end,
- upb_value_handler_t *value_cb, void *closure,
+ upb_value_handler_t value_cb, void *closure,
void *table, int table_size);
/* Pure Decoding **************************************************************/
@@ -131,6 +131,7 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
INLINE void upb_decoder_advance(upb_decoder *d, size_t len) {
d->ptr += len;
+ //d->bytes_parsed_slow += len;
}
INLINE size_t upb_decoder_bufleft(upb_decoder *d) {
@@ -214,6 +215,7 @@ INLINE bool upb_decode_tag(upb_decoder *d, upb_tag *tag) {
if ((*(p++) & 0x80) == 0) goto done; // likely
slow:
// Decode a full varint starting over from ptr.
+ DEBUGPRINTF("tag was >2 bytes.\n");
if (!upb_decode_varint_slow(d, &val)) return false;
tag_int = upb_value_getint64(val);
p = d->ptr; // Trick the next line into not overwriting us.
@@ -317,6 +319,8 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
d->end = NULL; // Force a buffer pull.
d->submsg_end = (void*)0x1; // But don't let end-of-message get triggered.
d->msgdef = d->top->msgdef;
+ //d->bytes_parsed_fast = 0;
+ //d->bytes_parsed_slow = 0;
// TODO: handle UPB_SKIPSUBMSG
#define CHECK_FLOW(expr) if ((expr) == UPB_BREAK) { /*assert(!upb_ok(status));*/ goto err; }
@@ -335,11 +339,25 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
CHECK_FLOW(upb_pop(d));
}
+ //const char *ptr = d->ptr;
// Decodes as many fields as possible, updating d->ptr appropriately,
// before falling through to the slow(er) path.
- //CHECK_FLOW(upb_fastdecode(&d->ptr, d->end,
- // d->dispatcher->top->handlers.set->value,
- // d->top->handlers.closure));
+ const char *end = UPB_MIN(d->end, d->submsg_end);
+#ifdef USE_ASSEMBLY_FASTPATH
+ CHECK_FLOW(upb_fastdecode(&d->ptr, end,
+ d->dispatcher.top->handlers.set->value,
+ d->dispatcher.top->handlers.closure,
+ d->top->msgdef->itof.array,
+ d->top->msgdef->itof.array_size));
+ if (end - d->ptr < 12) {
+ DEBUGPRINTF("Off the fast path because <12 bytes of data\n");
+ } else {
+ DEBUGPRINTF("Off the fast path for some other reason.\n");
+ }
+#endif
+ //if (d->ptr > ptr) {
+ // d->bytes_parsed_fast += (d->ptr - ptr);
+ //}
// Parse/handle tag.
upb_tag tag;
@@ -348,6 +366,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
// Normal end-of-file.
upb_clearerr(status);
CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher));
+ //DEBUGPRINTF("bytes parsed fast: %d, bytes parsed slow; %d\n", d->bytes_parsed_fast, d->bytes_parsed_slow);
return;
} else {
if (status->code == UPB_EOF) {
@@ -363,8 +382,10 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
upb_value val;
switch (tag.wire_type) {
case UPB_WIRE_TYPE_START_GROUP:
+ DEBUGPRINTF("parsing start group\n");
break; // Nothing to do now, below we will push appropriately.
case UPB_WIRE_TYPE_END_GROUP:
+ DEBUGPRINTF("parsing end group\n");
if(d->top->end_offset != UPB_GROUP_END_OFFSET) {
upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag.");
goto err;
@@ -395,6 +416,8 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
}
upb_fielddef *f = e->f;
+ assert(e->field_type == f->type);
+ assert(e->native_wire_type == upb_types[f->type].native_wire_type);
if (tag.wire_type != e->native_wire_type) {
// TODO: Support packed fields.
@@ -416,11 +439,13 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
// doesn't check this, and it would slow us down, so pass for now.
switch (e->field_type) {
case UPB_TYPE(MESSAGE):
+ DEBUGPRINTF("parsing submessage\n");
case UPB_TYPE(GROUP):
CHECK_FLOW(upb_push(d, f, val, e->field_type));
continue; // We have no value to dispatch.
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES):
+ DEBUGPRINTF("parsing string\n");
CHECK(upb_decode_string(d, &val, &d->tmp));
break;
case UPB_TYPE(SINT32):
diff --git a/src/upb_decoder.h b/src/upb_decoder.h
index 5662c9c..d0848f8 100644
--- a/src/upb_decoder.h
+++ b/src/upb_decoder.h
@@ -70,6 +70,8 @@ struct _upb_decoder {
// Msgdef for the current level.
upb_msgdef *msgdef;
+
+ size_t bytes_parsed_fast, bytes_parsed_slow;
};
// A upb_decoder decodes the binary protocol buffer format, writing the data it
diff --git a/src/upb_decoder_x64.asm b/src/upb_decoder_x64.asm
new file mode 100644
index 0000000..17d1ef7
--- /dev/null
+++ b/src/upb_decoder_x64.asm
@@ -0,0 +1,219 @@
+DEFAULT REL ; Default to RIP-relative addressing instead of absolute.
+
+extern _upb_decode_varint_fast64
+
+SECTION .data
+
+; Our dispatch table; used to jump to the right handler, keyed on the field's
+; type.
+dispatch_table:
+ dq _upb_fastdecode.cant_fast_path ; field not in table (type == 0). (check_4).
+ dq _upb_fastdecode.fixed64 ; double
+ dq _upb_fastdecode.fixed32 ; float
+ dq _upb_fastdecode.varint ; int64
+ dq _upb_fastdecode.varint ; uint64
+ dq _upb_fastdecode.varint ; int32
+ dq _upb_fastdecode.fixed64 ; fixed64
+ dq _upb_fastdecode.fixed32 ; fixed32
+ dq _upb_fastdecode.varint ; bool
+ dq _upb_fastdecode.cant_fast_path ; string (TODO)
+ dq _upb_fastdecode.cant_fast_path ; group (check_6)
+ dq _upb_fastdecode.cant_fast_path ; message
+ dq _upb_fastdecode.cant_fast_path ; bytes (TODO)
+ dq _upb_fastdecode.varint ; uint32
+ dq _upb_fastdecode.varint ; enum
+ dq _upb_fastdecode.fixed32 ; sfixed32
+ dq _upb_fastdecode.fixed64 ; sfixed64
+ dq _upb_fastdecode.varint_sint32 ; sint32
+ dq _upb_fastdecode.varint_sint64 ; sint64
+
+ GLOBAL _upb_decode_fast
+
+SECTION .text
+; Register allocation.
+%define BUF rbx ; const char *p, current buf position.
+%define END rbp ; const char *end, where the buf ends (either submsg end or buf end)
+%define BUF_ADDR r12 ; upb_decoder *d.
+%define FIELDDEF r13 ; upb_fielddef *f, needs to be preserved across varint decoding call.
+%define CALLBACK r14
+%define CLOSURE r15
+
+; Stack layout: *tableptr, uint32_t maxfield_times_8
+%define STACK_SPACE 24 ; this value + 8 must be a multiple of 16.
+%define TABLE_SPILL [rsp] ; our lookup table, indexed by field number.
+%define MAXFIELD_TIMES_8_SPILL [rsp+8]
+
+
+; Executing the fast path requires the following conditions:
+; - check_1: there are >=12 bytes left (<=2 byte tag and <=10 byte varint).
+; - check_2: the tag is <= 2 bytes.
+; - check_3: the field number is <= the table size
+; (ie. it must be an array lookup, not a hash lookup).
+; - check_4: the field is known (found in the table).
+; - check_5: the wire type we read is correct for the field number,
+; ("packed" fields are not accepted, yet. this could be handled
+; efficiently by doing an extra check on the "type check failed"
+; path that goes into a tight loop if the encoding was packed).
+; - check_6: the field is not a group or a message (or string, TODO)
+; (this could be relaxed, but due to delegation it's a bit tricky).
+; - if the value is a string, the entire string is available in
+; the buffer, and our cached string object can be recycled.
+
+
+%macro decode_and_dispatch_ 0
+align 16
+.decode_and_dispatch:
+ ; Load a few values we'll need in a sec.
+ mov r8, TABLE_SPILL
+ mov r9d, MAXFIELD_TIMES_8_SPILL
+
+ mov rax, END
+ sub rax, BUF
+ cmp rax, 12
+ jb _upb_fastdecode.cant_fast_path ; check_1 (<12 bytes left).
+
+ ; Decode a 1 or 2-byte varint -> eax.
+ mov cl, byte [BUF]
+ lea rdi, [BUF+1]
+ movzx rax, cl ; Need all of rax since we're doing a 64-bit lea later.
+ and eax, 0x7f
+ test cl, cl
+ jns .one_byte_tag ; Should be predictable if fields are in order.
+ movzx ecx, byte [BUF+1]
+ lea rdi, [BUF+2]
+ mov edx, ecx
+ and edx, 0x7f
+ shl edx, 7
+ or eax, edx
+ test al, al
+ js _upb_fastdecode.cant_fast_path ; check_2 (tag was >2 bytes).
+.one_byte_tag:
+ mov BUF, rdi
+
+ ; Decode tag and dispatch.
+ mov ecx, eax
+ and eax, 0x3ff8 ; eax now contains field number * 8
+ lea r11, [r8+rax*2] ; *2 is really *16, since rax is already *8.
+ and ecx, 0x7 ; ecx now contains wire type.
+ cmp eax, r9d
+ jae _upb_fastdecode.cant_fast_path ; check_3 (field number > table size)
+ mov FIELDDEF, [r11+8] ; Lookup fielddef (upb_itof_ent.f)
+ movzx rdx, BYTE [r11+1] ; Lookup field type.
+ mov rax, qword dispatch_table
+ jmp [rax+rdx*8]
+%endmacro
+
+%macro decode_and_dispatch 0
+ jmp .decode_and_dispatch
+%endmacro
+
+%macro call_callback 0
+ ; Value arg must already be in rdx when macro is called.
+ mov rdi, CLOSURE
+ mov rsi, FIELDDEF
+ mov rcx, 33 ; RAW; we could pass the correct type, or only do this in non-debug modes.
+ call CALLBACK
+ mov [BUF_ADDR], BUF
+ cmp eax, 0
+ jne .done ; Caller requested BREAK or SKIPSUBMSG.
+%endmacro
+
+%macro check_type 1
+ cmp ecx, %1
+ jne _upb_fastdecode.cant_fast_path ; check_5 (wire type check failed).
+%endmacro
+
+; extern upb_flow_t upb_fastdecode(const char **p, const char *end,
+; upb_value_handler_t value_cb, void *closure,
+; void *table, int table_size);
+align 16
+global _upb_fastdecode
+_upb_fastdecode:
+ ; We use all callee-save regs.
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp, STACK_SPACE
+
+ ; Parse arguments into reg vals and stack.
+ mov BUF_ADDR, rdi
+ mov BUF, [rdi]
+ mov END, rsi
+ mov CALLBACK, rdx
+ mov CLOSURE, rcx
+ mov TABLE_SPILL, r8
+ shl r9, 3
+ mov MAXFIELD_TIMES_8_SPILL, r9
+
+ decode_and_dispatch
+
+align 16
+.varint:
+ call _upb_decode_varint_fast64 ; BUF is already in rdi.
+ test rax, rax
+ jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error.
+ mov BUF, rax
+ call_callback ; rdx already holds value.
+ decode_and_dispatch_
+
+align 16
+.fixed32:
+ mov edx, DWORD [BUF] ; Might be unaligned, but that's ok.
+ add BUF, 4
+ call_callback
+ decode_and_dispatch
+
+align 16
+.fixed64:
+ mov rdx, QWORD [BUF] ; Might be unaligned, but that's ok.
+ add BUF, 8
+ call_callback
+ decode_and_dispatch
+
+align 16
+.varint_sint32:
+ call _upb_decode_varint_fast64 ; BUF is already in rdi.
+ test rax, rax
+ jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error.
+ mov BUF, rax
+
+ ; Perform 32-bit zig-zag decoding.
+ mov ecx, edx
+ shr edx, 1
+ and ecx, 0x1
+ neg ecx
+ xor edx, ecx
+ call_callback
+ decode_and_dispatch
+
+align 16
+.varint_sint64:
+ call _upb_decode_varint_fast64 ; BUF is already in rdi.
+ test rax, rax
+ jz _upb_fastdecode.cant_fast_path ; Varint was unterminated, slow path will handle error.
+ mov BUF, rax
+
+ ; Perform 64-bit zig-zag decoding.
+ mov rcx, rdx
+ shr rdx, 1
+ and ecx, 0x1
+ neg rcx
+ xor rdx, rcx
+ call_callback
+ decode_and_dispatch
+
+.cant_fast_path:
+ mov rax, 0 ; UPB_CONTINUE -- continue as before.
+.done:
+ ; If coming via done, preserve the user callback's return in rax.
+ add rsp, STACK_SPACE
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ ret
diff --git a/src/upb_def.c b/src/upb_def.c
index 01143da..6a1e543 100644
--- a/src/upb_def.c
+++ b/src/upb_def.c
@@ -615,7 +615,7 @@ static upb_flow_t upb_fielddef_endmsg(void *_b) {
// Field was successfully read, add it as a field of the msgdef.
upb_msgdef *m = upb_defbuilder_top(b);
- upb_itof_ent itof_ent = {0, upb_types[f->type].native_wire_type, f->type, f};
+ upb_itof_ent itof_ent = {0, f->type, upb_types[f->type].native_wire_type, f};
upb_ntof_ent ntof_ent = {{f->name, 0}, f};
upb_inttable_insert(&m->itof, f->number, &itof_ent);
upb_strtable_insert(&m->ntof, &ntof_ent.e);
diff --git a/src/upb_def.h b/src/upb_def.h
index cca908b..db33fbb 100644
--- a/src/upb_def.h
+++ b/src/upb_def.h
@@ -163,8 +163,8 @@ typedef struct _upb_msgdef {
// Hash table entries for looking up fields by name or number.
typedef struct {
bool junk;
- upb_wire_type_t native_wire_type;
upb_fieldtype_t field_type;
+ upb_wire_type_t native_wire_type;
upb_fielddef *f;
} upb_itof_ent;
typedef struct {
diff --git a/src/upb_msg.h b/src/upb_msg.h
index 8a3c63f..3246971 100644
--- a/src/upb_msg.h
+++ b/src/upb_msg.h
@@ -97,11 +97,13 @@ INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) {
INLINE void upb_value_write(upb_valueptr ptr, upb_value val,
upb_fieldtype_t ft) {
+#ifndef NDEBUG
if (ft == UPB_VALUETYPE_ARRAY) {
assert(val.type == UPB_VALUETYPE_ARRAY);
- } else {
+ } else if (val.type != UPB_VALUETYPE_RAW) {
assert(val.type == upb_types[ft].inmemory_type);
}
+#endif
#define CASE(t, member_name) \
case UPB_TYPE(t): *ptr.member_name = val.val.member_name; break;
diff --git a/src/upb_table.c b/src/upb_table.c
index 9a6cf7a..d2cb5da 100644
--- a/src/upb_table.c
+++ b/src/upb_table.c
@@ -188,7 +188,7 @@ void upb_inttable_compact(upb_inttable *t) {
while ((1UL << lg2_array) < largest_key) ++lg2_array;
++lg2_array; // Undo the first iteration.
size_t array_size;
- int array_count;
+ int array_count = 0;
while (lg2_array > 0) {
array_size = (1 << --lg2_array);
//printf("Considering size %d (btw, our table has %d things total)\n", array_size, upb_inttable_count(t));
diff --git a/src/upb_table.h b/src/upb_table.h
index 3903c75..c658a6e 100644
--- a/src/upb_table.h
+++ b/src/upb_table.h
@@ -123,18 +123,21 @@ INLINE void *_upb_inttable_fastlookup(upb_inttable *t, uint32_t key,
upb_inttable_value *arrval =
(upb_inttable_value*)UPB_INDEX(t->array, key, value_size);
if (_upb_inttable_isarrkey(t, key)) {
- //printf("array lookup for key %d, &val=%p, has_entry=%d\n", key, val, val->has_entry);
+ //DEBUGPRINTF("array lookup for key %d, &val=%p, has_entry=%d\n", key, val, val->has_entry);
return (arrval->has_entry) ? arrval : NULL;
}
uint32_t bucket = _upb_inttable_bucket(t, key);
upb_inttable_entry *e =
(upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
- //printf("looking in first bucket %d, entry size=%zd, addr=%p\n", bucket, entry_size, e);
+ //DEBUGPRINTF("looking in first bucket %d, entry size=%zd, addr=%p\n", bucket, entry_size, e);
while (1) {
- //printf("%d, %d, %d\n", e->val.has_entry, e->hdr.key, key);
- if (e->hdr.key == key) return &e->val;
+ //DEBUGPRINTF("%d, %d, %d\n", e->val.has_entry, e->hdr.key, key);
+ if (e->hdr.key == key) {
+ DEBUGPRINTF("returning val from hash part\n");
+ return &e->val;
+ }
if ((bucket = e->hdr.next) == UPB_END_OF_CHAIN) return NULL;
- //printf("looking in bucket %d\n", bucket);
+ //DEBUGPRINTF("looking in bucket %d\n", bucket);
e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
}
}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback