From 5edfe9a4c9badf0095f65c88611b107ee28dc9ad Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 15 Feb 2011 14:38:01 -0800 Subject: Remove upb_dstate and specialize upb_decode_fixed for perf improvement. The compiler wasn't keeping upb_dstate in memory anyway (which was the original goal). This simplifies the decoder. upb_decode_fixed was intended to minimize the number of branches, but since it was calling out to memcpy as a function, this turned out to be a pessimization. Performance is encouraging: plain32.parsestream_googlemessage1.upb_table: 254 -> 242 (-4.72) plain32.parsestream_googlemessage2.upb_table: 357 -> 400 (12.04) plain32.parsetostruct_googlemessage1.upb_table_byref: 143 -> 144 (0.70) plain32.parsetostruct_googlemessage1.upb_table_byval: 122 -> 118 (-3.28) plain32.parsetostruct_googlemessage2.upb_table_byref: 189 -> 200 (5.82) plain32.parsetostruct_googlemessage2.upb_table_byval: 198 -> 200 (1.01) omitfp32.parsestream_googlemessage1.upb_table: 267 -> 265 (-0.75) omitfp32.parsestream_googlemessage2.upb_table: 377 -> 465 (23.34) omitfp32.parsetostruct_googlemessage1.upb_table_byref: 140 -> 151 (7.86) omitfp32.parsetostruct_googlemessage1.upb_table_byval: 131 -> 131 (0.00) omitfp32.parsetostruct_googlemessage2.upb_table_byref: 204 -> 214 (4.90) omitfp32.parsetostruct_googlemessage2.upb_table_byval: 200 -> 206 (3.00) plain.parsestream_googlemessage1.upb_table: 313 -> 317 (1.28) plain.parsestream_googlemessage2.upb_table: 476 -> 541 (13.66) plain.parsetostruct_googlemessage1.upb_table_byref: 189 -> 189 (0.00) plain.parsetostruct_googlemessage1.upb_table_byval: 165 -> 165 (0.00) plain.parsetostruct_googlemessage2.upb_table_byref: 263 -> 270 (2.66) plain.parsetostruct_googlemessage2.upb_table_byval: 248 -> 255 (2.82) omitfp.parsestream_googlemessage1.upb_table: 306 -> 305 (-0.33) omitfp.parsestream_googlemessage2.upb_table: 471 -> 531 (12.74) omitfp.parsetostruct_googlemessage1.upb_table_byref: 189 -> 190 (0.53) omitfp.parsetostruct_googlemessage1.upb_table_byval: 166 -> 172 (3.61) omitfp.parsetostruct_googlemessage2.upb_table_byref: 258 -> 270 (4.65) omitfp.parsetostruct_googlemessage2.upb_table_byval: 248 -> 265 (6.85) --- perf-tests.sh | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'perf-tests.sh') diff --git a/perf-tests.sh b/perf-tests.sh index 3833572..ca7e4ff 100755 --- a/perf-tests.sh +++ b/perf-tests.sh @@ -11,25 +11,24 @@ fi rm -f perf-tests.out -make clean -echo "-O3 -DNDEBUG -msse3 -DUPB_THREAD_UNSAFE" > perf-cppflags -make $MAKETARGET -make benchmark | sed -e 's/^/plain./g' | tee -a perf-tests.out - -make clean -echo "-O3 -DNDEBUG -fomit-frame-pointer -msse3 -DUPB_THREAD_UNSAFE" > perf-cppflags -make $MAKETARGET -make benchmark | sed -e 's/^/omitfp./g' | tee -a perf-tests.out - if [ x`uname -m` == xx86_64 ]; then make clean - echo "-O3 -DNDEBUG -msse3 -m32 -DUPB_THREAD_UNSAFE" > perf-cppflags + echo "-O3 -DNDEBUG -msse3 -m32" > perf-cppflags make upb_benchmarks make benchmark | sed -e 's/^/plain32./g' | tee -a perf-tests.out make clean - echo "-O3 -DNDEBUG -fomit-frame-pointer -msse3 -m32 -DUPB_THREAD_UNSAFE" > perf-cppflags + echo "-O3 -DNDEBUG -fomit-frame-pointer -msse3 -m32" > perf-cppflags make upb_benchmarks make benchmark | sed -e 's/^/omitfp32./g' | tee -a perf-tests.out fi +make clean +echo "-O3 -DNDEBUG -msse3" > perf-cppflags +make $MAKETARGET +make benchmark | sed -e 's/^/plain./g' | tee -a perf-tests.out + +make clean +echo "-O3 -DNDEBUG -fomit-frame-pointer -msse3" > perf-cppflags +make $MAKETARGET +make benchmark | sed -e 's/^/omitfp./g' | tee -a perf-tests.out -- cgit v1.2.3