summaryrefslogtreecommitdiff
path: root/src/upb_decoder_x64.asm
diff options
context:
space:
mode:
authorJosh Haberman <jhaberman@gmail.com>2011-03-20 13:13:51 -0700
committerJosh Haberman <jhaberman@gmail.com>2011-03-20 13:13:51 -0700
commit8ef6873e0e14309a1715a252a650bab0ae1a33ef (patch)
treea9f81f9fa3ee24b923310cef964c1cbe1bf47a19 /src/upb_decoder_x64.asm
parent37e1c3102be15f1e57805e828993156e3492d764 (diff)
upb_stream: all callbacks registered ahead-of-time.
This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37)
Diffstat (limited to 'src/upb_decoder_x64.asm')
-rw-r--r--src/upb_decoder_x64.asm10
1 files changed, 5 insertions, 5 deletions
diff --git a/src/upb_decoder_x64.asm b/src/upb_decoder_x64.asm
index 032ea86..c417644 100644
--- a/src/upb_decoder_x64.asm
+++ b/src/upb_decoder_x64.asm
@@ -34,8 +34,8 @@ SECTION .text
%define BUF rbx ; const char *p, current buf position.
%define END rbp ; const char *end, where the buf ends (either submsg end or buf end)
%define STRING r12 ; unused
-%define FIELDDEF r13 ; upb_fielddef *f, needs to be preserved across varint decoding call.
-%define CALLBACK r14
+%define FVAL r13 ; upb_value fval, needs to be preserved across varint decoding call.
+%define UNUSED r14
%define CLOSURE r15
; Stack layout: *tableptr, uint32_t maxfield_times_8
@@ -57,10 +57,10 @@ SECTION .text
; path that goes into a tight loop if the encoding was packed).
; - check_6: the field is not a group or a message (or string, TODO)
; (this could be relaxed, but due to delegation it's a bit tricky).
-; - if the value is a string, the entire string is available in
+; - check_7: if the value is a string, the entire string is available in
; the buffer, and our cached string object can be recycled, and
; our string object already references the source buffer, so
-; absolutely no refcount twiddling is required. (check_7)
+; absolutely no refcount twiddling is required.
%macro decode_and_dispatch_ 0
@@ -78,7 +78,7 @@ align 16
; Decode a 1 or 2-byte varint -> eax.
mov cl, byte [BUF]
lea rdi, [BUF+1]
- movzx rax, cl ; Need all of rax since we're doing a 64-bit lea later.
+ movzx eax, cl
and eax, 0x7f
test cl, cl
jns .one_byte_tag ; Should be predictable if fields are in order.
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback