From 678799082b9775e601a09af9aa68e59fc1c64f6f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 24 Jul 2010 16:23:52 -0700 Subject: Stream decoding benchmark. --- Makefile | 24 ++++++-- benchmarks/parsestream.upb_table.c | 113 +++++++++++++++++++++++++++++++++++++ core/upb_stream.h | 3 +- core/upb_string.c | 18 ++++++ stream/upb_byteio.h | 43 -------------- 5 files changed, 152 insertions(+), 49 deletions(-) create mode 100644 benchmarks/parsestream.upb_table.c delete mode 100644 stream/upb_byteio.h diff --git a/Makefile b/Makefile index 749c5a7..203bed6 100644 --- a/Makefile +++ b/Makefile @@ -54,7 +54,7 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ - core/upb_stream.c stream/upb_stdio.c stream/upb_textprinter.c \ + core/upb_stream.c stream/upb_stdio.c stream/upb_strstream.c stream/upb_textprinter.c \ descriptor/descriptor.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. @@ -154,10 +154,10 @@ tests/tests: core/libupb.a tools/upbc: core/libupb.a # Benchmarks -UPB_BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \ - benchmarks/b.parsetostruct_googlemessage1.upb_table_byref \ - benchmarks/b.parsetostruct_googlemessage2.upb_table_byval \ - benchmarks/b.parsetostruct_googlemessage2.upb_table_byref +#UPB_BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table \ +# benchmarks/b.parsetostruct_googlemessage2.upb_table +UPB_BENCHMARKS=benchmarks/b.parsestream_googlemessage1.upb_table \ + benchmarks/b.parsestream_googlemessage2.upb_table BENCHMARKS=$(UPB_BENCHMARKS) \ benchmarks/b.parsetostruct_googlemessage1.proto2_table \ @@ -204,6 +204,20 @@ benchmarks/b.parsetostruct_googlemessage2.upb_table_byref: \ -DMESSAGE_FILE=\"google_message2.dat\" \ -DBYREF=true $(LIBUPB) +benchmarks/b.parsestream_googlemessage1.upb_table \ +benchmarks/b.parsestream_googlemessage2.upb_table: \ + benchmarks/parsestream.upb_table.c $(LIBUPB) benchmarks/google_messages.proto.pb + $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsestream_googlemessage1.upb_table $< \ + -DMESSAGE_NAME=\"benchmarks.SpeedMessage1\" \ + -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \ + -DMESSAGE_FILE=\"google_message1.dat\" \ + $(LIBUPB) + $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsestream_googlemessage2.upb_table $< \ + -DMESSAGE_NAME=\"benchmarks.SpeedMessage2\" \ + -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \ + -DMESSAGE_FILE=\"google_message2.dat\" \ + $(LIBUPB) + benchmarks/b.parsetostruct_googlemessage1.proto2_table \ benchmarks/b.parsetostruct_googlemessage2.proto2_table: \ benchmarks/parsetostruct.proto2_table.cc benchmarks/google_messages.pb.cc diff --git a/benchmarks/parsestream.upb_table.c b/benchmarks/parsestream.upb_table.c new file mode 100644 index 0000000..c6acad9 --- /dev/null +++ b/benchmarks/parsestream.upb_table.c @@ -0,0 +1,113 @@ + +#include "main.c" + +#include "upb_def.h" +#include "upb_decoder.h" +#include "upb_strstream.h" + +static upb_stringsrc *stringsrc; +static upb_string *input_str; +static upb_string *tmp_str; +static upb_msgdef *def; +static upb_decoder *decoder; + +static bool initialize() +{ + // Initialize upb state, decode descriptor. + upb_status status = UPB_STATUS_INIT; + upb_symtab *s = upb_symtab_new(); + upb_symtab_add_descriptorproto(s); + upb_string *fds_str = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); + if(fds_str == NULL) { + fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ":"), + upb_printerr(&status); + return false; + } + + upb_stringsrc *ssrc = upb_stringsrc_new(); + upb_stringsrc_reset(ssrc, fds_str); + upb_def *fds_def = upb_symtab_lookup( + s, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds_def)); + upb_decoder_reset(d, upb_stringsrc_bytesrc(ssrc)); + + upb_symtab_addfds(s, upb_decoder_src(d), &status); + + if(!upb_ok(&status)) { + fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ":"); + upb_printerr(&status); + return false; + } + + upb_string_unref(fds_str); + upb_decoder_free(d); + upb_stringsrc_free(ssrc); + upb_def_unref(fds_def); + + def = upb_downcast_msgdef(upb_symtab_lookup(s, UPB_STRLIT(MESSAGE_NAME))); + if(!def) { + fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n", + UPB_STRARG(UPB_STRLIT(MESSAGE_NAME))); + return false; + } + upb_symtab_unref(s); + + // Read the message data itself. + input_str = upb_strreadfile(MESSAGE_FILE); + if(input_str == NULL) { + fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); + return false; + } + tmp_str = NULL; + decoder = upb_decoder_new(def); + stringsrc = upb_stringsrc_new(); + return true; +} + +static void cleanup() +{ + upb_string_unref(input_str); + upb_string_unref(tmp_str); + upb_def_unref(UPB_UPCAST(def)); + upb_decoder_free(decoder); + upb_stringsrc_free(stringsrc); +} + +static size_t run(int i) +{ + (void)i; + upb_status status = UPB_STATUS_INIT; + upb_stringsrc_reset(stringsrc, input_str); + upb_decoder_reset(decoder, upb_stringsrc_bytesrc(stringsrc)); + upb_src *src = upb_decoder_src(decoder); + upb_fielddef *f; + upb_string *str = NULL; + int depth = 0; + while(1) { + while((f = upb_src_getdef(src)) != NULL) { + if(upb_issubmsg(f)) { + upb_src_startmsg(src); + ++depth; + } else if(upb_isstring(f)) { + tmp_str = upb_string_tryrecycle(str); + upb_src_getstr(src, tmp_str); + } else { + // Primitive type. + upb_value val; + upb_src_getval(src, upb_value_addrof(&val)); + } + } + // If we're not EOF now, the loop terminated due to an error. + if (!upb_src_eof(src)) goto err; + if (depth == 0) break; + --depth; + upb_src_endmsg(src); + } + if(!upb_ok(&status)) goto err; + return upb_string_len(input_str); + +err: + fprintf(stderr, "Decode error"); + upb_printerr(&status); + return 0; +} diff --git a/core/upb_stream.h b/core/upb_stream.h index b7400c5..861bd1c 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -128,7 +128,8 @@ bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); // Appends the next "len" bytes in the stream in-place to "str". This should // be used when the caller needs to build a contiguous string of the existing -// data in "str" with more data. +// data in "str" with more data. The call fails if fewer than len bytes are +// available in the stream. bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); // Returns the current error status for the stream. diff --git a/core/upb_string.c b/core/upb_string.c index 93686f5..847a3ee 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -131,3 +131,21 @@ upb_string *upb_strdup(upb_string *s) { upb_strcpy(str, s); return str; } + +upb_string *upb_strreadfile(const char *filename) { + FILE *f = fopen(filename, "rb"); + if(!f) return NULL; + if(fseek(f, 0, SEEK_END) != 0) goto error; + long size = ftell(f); + if(size < 0) goto error; + if(fseek(f, 0, SEEK_SET) != 0) goto error; + upb_string *s = upb_string_new(); + char *buf = upb_string_getrwbuf(s, size); + if(fread(buf, size, 1, f) != 1) goto error; + fclose(f); + return s; + +error: + fclose(f); + return NULL; +} diff --git a/stream/upb_byteio.h b/stream/upb_byteio.h deleted file mode 100644 index 69a28b3..0000000 --- a/stream/upb_byteio.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * This file contains upb_bytesrc and upb_bytesink implementations for common - * interfaces like strings, UNIX fds, and FILE*. - * - * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_BYTEIO_H -#define UPB_BYTEIO_H - -#include "upb_srcsink.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_stringsrc **************************************************************/ - -struct upb_stringsrc; -typedef struct upb_stringsrc upb_stringsrc; - -// Create/free a stringsrc. -upb_stringsrc *upb_stringsrc_new(); -void upb_stringsrc_free(upb_stringsrc *s); - -// Resets the stringsrc to a state where it will vend the given string. The -// stringsrc will take a reference on the string, so the caller need not ensure -// that it outlives the stringsrc. A stringsrc can be reset multiple times. -void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); - -// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. -upb_bytesrc *upb_stringsrc_bytesrc(); - - -/* upb_fdsrc ******************************************************************/ - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif -- cgit v1.2.3