From 28ec9a1fa0f9b1d741920dfa8afc91fa2532c43d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Jul 2010 20:20:33 -0700 Subject: Split src/ into core/ and stream/. --- core/upb_stream.h | 121 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 core/upb_stream.h (limited to 'core/upb_stream.h') diff --git a/core/upb_stream.h b/core/upb_stream.h new file mode 100644 index 0000000..e7b4074 --- /dev/null +++ b/core/upb_stream.h @@ -0,0 +1,121 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * This file defines four general-purpose streaming interfaces for protobuf + * data or bytes: + * + * - upb_src: pull interface for protobuf data. + * - upb_sink: push interface for protobuf data. + * - upb_bytesrc: pull interface for bytes. + * - upb_bytesink: push interface for bytes. + * + * These interfaces are used as general-purpose glue in upb. For example, the + * decoder interface works by implementing a upb_src and calling a upb_bytesrc. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + */ + +#ifndef UPB_SRCSINK_H +#define UPB_SRCSINK_H + +#include "upb_stream_vtbl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Forward-declare. We can't include upb_def.h; it would be circular. +struct _upb_fielddef; + +// Note! The "eof" flags work like feof() in C; they cannot report end-of-file +// until a read has failed due to eof. They cannot preemptively tell you that +// the next call will fail due to eof. Since these are the semantics that C +// and UNIX provide, we're stuck with them if we want to support eg. stdio. + +/* upb_src ********************************************************************/ + +// TODO: decide how to handle unknown fields. + +// Retrieves the fielddef for the next field in the stream. Returns NULL on +// error or end-of-stream. +struct _upb_fielddef *upb_src_getdef(upb_src *src); + +// Retrieves and stores the next value in "val". For string types "val" must +// be a newly-recycled string. Returns false on error. +bool upb_src_getval(upb_src *src, upb_valueptr val); +bool upb_src_getstr(upb_src *src, upb_string *val); + +// Like upb_src_getval() but skips the value. +bool upb_src_skipval(upb_src *src); + +// Descends into a submessage. May only be called after a def has been +// returned that indicates a submessage. +bool upb_src_startmsg(upb_src *src); + +// Stops reading a submessage. May be called before the stream is EOF, in +// which case the rest of the submessage is skipped. +bool upb_src_endmsg(upb_src *src); + +// Returns the current error/eof status for the stream. +INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; } +INLINE bool upb_src_eof(upb_src *src) { return src->eof; } + +// The following functions are equivalent to upb_src_getval(), but take +// pointers to specific types. In debug mode this may check that the type +// is compatible with the type being read. This check will *not* be performed +// in non-debug mode, and if you get the type wrong the behavior is undefined. +bool upb_src_getbool(upb_src *src, bool *val); +bool upb_src_getint32(upb_src *src, int32_t *val); +bool upb_src_getint64(upb_src *src, int64_t *val); +bool upb_src_getuint32(upb_src *src, uint32_t *val); +bool upb_src_getuint64(upb_src *src, uint64_t *val); +bool upb_src_getfloat(upb_src *src, float *val); +bool upb_src_getdouble(upb_src *src, double *val); + +/* upb_sink *******************************************************************/ + +// Puts the given fielddef into the stream. +bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); + +// Puts the given value into the stream. +bool upb_sink_putval(upb_sink *sink, upb_value val); + +// Starts a submessage. (needed? the def tells us we're starting a submsg.) +bool upb_sink_startmsg(upb_sink *sink); + +// Ends a submessage. +bool upb_sink_endmsg(upb_sink *sink); + +// Returns the current error status for the stream. +upb_status *upb_sink_status(upb_sink *sink); + +/* upb_bytesrc ****************************************************************/ + +// Returns the next string in the stream. false is returned on error or eof. +// The string must be at least "minlen" bytes long unless the stream is eof. +bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); + +// Appends the next "len" bytes in the stream in-place to "str". This should +// be used when the caller needs to build a contiguous string of the existing +// data in "str" with more data. +bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); + +// Returns the current error status for the stream. +INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } +INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } + +/* upb_bytesink ***************************************************************/ + +// Puts the given string. Returns the number of bytes that were actually, +// consumed, which may be fewer than were in the string, or <0 on error. +int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); + +// Returns the current error status for the stream. +upb_status *upb_bytesink_status(upb_bytesink *sink); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif -- cgit v1.2.3 From 8e138c4687bf021d40be5134228940dbfe2fbd45 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 12 Jul 2010 10:21:53 -0700 Subject: Added more comments for upb_src interface. --- Makefile | 1 + core/upb_stream.c | 4 ++-- core/upb_stream.h | 51 ++++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 45 insertions(+), 11 deletions(-) (limited to 'core/upb_stream.h') diff --git a/Makefile b/Makefile index c37df72..166ca3a 100644 --- a/Makefile +++ b/Makefile @@ -47,6 +47,7 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ + core/upb_stream.c \ descriptor/descriptor.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. diff --git a/core/upb_stream.c b/core/upb_stream.c index e0863b8..bda11de 100644 --- a/core/upb_stream.c +++ b/core/upb_stream.c @@ -16,10 +16,10 @@ void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status) { upb_string *str = NULL; while((f = upb_src_getdef(src)) != NULL) { CHECKSINK(upb_sink_putdef(sink, f)); - if(f->type == UPB_TYPE(GROUP) || f->type == UPB_TYPE(MESSAGE)) { + if(upb_issubmsg(f)) { // We always recurse into submessages, but the putdef above already told // the sink that. - } else if(f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES)) { + } else if(upb_isstring(f)) { str = upb_string_tryrecycle(str); CHECKSRC(upb_src_getstr(src, str)); CHECKSINK(upb_sink_putstr(sink, str)); diff --git a/core/upb_stream.h b/core/upb_stream.h index e7b4074..9147e45 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -35,29 +35,58 @@ struct _upb_fielddef; /* upb_src ********************************************************************/ +// A upb_src is a pull parser for protobuf data. Sample usage: +// +// #define CHECK(x) if(!x) goto err; +// +// bool parse_msg(upb_src *src, int indent) { +// upb_fielddef *f; +// while ((f = upb_src_getdef(src)) != NULL) { +// for (int i = 0; i < indent; i++) putchar(' '); +// printf("Parsed field; name=" UPB_STRFMT ", num=%d", +// UPB_STRARG(d->name), d->number); +// if (upb_issubmsg(f)) { +// CHECK(upb_src_startmsg(src)); +// CHECK(parse_msg(src, indent + 2)); +// CHECK(upb_src_endmsg(src)); +// } else { +// CHECK(upb_src_skipval(src)); +// } +// } +// // We should be EOF now, otherwise there was an error. +// CHECK(upb_src_eof(src)); +// return true; +// +// err: +// return false; +// } +// // TODO: decide how to handle unknown fields. // Retrieves the fielddef for the next field in the stream. Returns NULL on -// error or end-of-stream. +// error or end-of-stream. End of stream can simply mean end of submessage. struct _upb_fielddef *upb_src_getdef(upb_src *src); -// Retrieves and stores the next value in "val". For string types "val" must -// be a newly-recycled string. Returns false on error. +// Retrieves and stores the next value in "val". upb_src_getval() is for all +// numeric types and upb_src_getstr() is for strings. For string types "str" +// must be a newly-recycled string. Returns false on error. bool upb_src_getval(upb_src *src, upb_valueptr val); bool upb_src_getstr(upb_src *src, upb_string *val); // Like upb_src_getval() but skips the value. bool upb_src_skipval(upb_src *src); -// Descends into a submessage. May only be called after a def has been -// returned that indicates a submessage. +// Descends into a submessage. May only be called when upb_issubmsg(f) is true +// for an f = upb_src_getdef(src) that was just parsed. bool upb_src_startmsg(upb_src *src); // Stops reading a submessage. May be called before the stream is EOF, in // which case the rest of the submessage is skipped. bool upb_src_endmsg(upb_src *src); -// Returns the current error/eof status for the stream. +// Returns the current error/eof status for the stream. If a stream is eof +// but we are inside a submessage, calling upb_src_endmsg(src) will reset +// the eof marker. INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; } INLINE bool upb_src_eof(upb_src *src) { return src->eof; } @@ -80,9 +109,7 @@ bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); // Puts the given value into the stream. bool upb_sink_putval(upb_sink *sink, upb_value val); - -// Starts a submessage. (needed? the def tells us we're starting a submsg.) -bool upb_sink_startmsg(upb_sink *sink); +bool upb_sink_putstr(upb_sink *sink, upb_string *str); // Ends a submessage. bool upb_sink_endmsg(upb_sink *sink); @@ -114,6 +141,12 @@ int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); // Returns the current error status for the stream. upb_status *upb_bytesink_status(upb_bytesink *sink); +/* Utility functions **********************************************************/ + +// Streams data from src to sink until EOF or error. +void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status); + + #ifdef __cplusplus } /* extern "C" */ #endif -- cgit v1.2.3 From 87b2c69c15716b96a294f5918878fb8b7b9a0b40 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 12:56:04 -0700 Subject: Fleshed out upb_stdio and upb_textprinter. test_decoder now compiles and links! But it doesn't work yet. --- Makefile | 7 +-- core/upb_stream.h | 5 +- core/upb_stream_vtbl.h | 110 +++++++++++++++++++++++++++++++++++---- stream/upb_decoder.c | 4 ++ stream/upb_decoder.h | 2 +- stream/upb_stdio.c | 37 +++++++++++-- stream/upb_stdio.h | 2 +- stream/upb_text.c | 93 --------------------------------- stream/upb_text.h | 36 ------------- stream/upb_textprinter.c | 131 +++++++++++++++++++++++++++++++++++++++++++++++ stream/upb_textprinter.h | 30 +++++++++++ 11 files changed, 310 insertions(+), 147 deletions(-) delete mode 100644 stream/upb_text.c delete mode 100644 stream/upb_text.h create mode 100644 stream/upb_textprinter.c create mode 100644 stream/upb_textprinter.h (limited to 'core/upb_stream.h') diff --git a/Makefile b/Makefile index 166ca3a..10ef96d 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ rwildcard=$(strip $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2)$(filter $ CC=gcc CXX=g++ CFLAGS=-std=c99 -INCLUDE=-Idescriptor -Icore -Itests -I. +INCLUDE=-Idescriptor -Icore -Itests -Istream -I. CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) LDLIBS=-lpthread @@ -47,7 +47,7 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ - core/upb_stream.c \ + core/upb_stream.c stream/upb_stdio.c stream/upb_textprinter.c \ descriptor/descriptor.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. @@ -90,7 +90,8 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ - tests/test_def + tests/test_def \ + tests/test_decoder tests: $(TESTS) OTHER_TESTS=tests/tests \ diff --git a/core/upb_stream.h b/core/upb_stream.h index 9147e45..b7400c5 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -111,7 +111,10 @@ bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); bool upb_sink_putval(upb_sink *sink, upb_value val); bool upb_sink_putstr(upb_sink *sink, upb_string *str); -// Ends a submessage. +// Starts/ends a submessage. upb_sink_startmsg may seem redundant, but a +// client could have a submessage already serialized, and therefore put it +// as a string instead of its individual elements. +bool upb_sink_startmsg(upb_sink *sink); bool upb_sink_endmsg(upb_sink *sink); // Returns the current error status for the stream. diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index ba2670e..96f6cfe 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -5,6 +5,21 @@ * interfaces. Only components that are implementing these interfaces need * to worry about this file. * + * This is tedious; this is the place in upb where I most wish I had a C++ + * feature. In C++ the compiler would generate this all for me. If there's + * any consolation, it's that I have a bit of flexibility you don't have in + * C++: I could, with preprocessor magic alone "de-virtualize" this interface + * for a particular source file. Say I had a C file that called a upb_src, + * but didn't want to pay the virtual function overhead. I could define: + * + * #define upb_src_getdef(src) upb_decoder_getdef((upb_decoder*)src) + * #define upb_src_stargmsg(src) upb_decoder_startmsg(upb_decoder*)src) + * // etc. + * + * The source file is compatible with the regular upb_src interface, but here + * we bind it to a particular upb_src (upb_decoder), which could lead to + * improved performance at a loss of flexibility for this one upb_src client. + * * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. */ @@ -39,12 +54,13 @@ typedef bool (*upb_src_endmsg_fptr)(upb_src *src); // upb_sink. typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); +typedef bool (*upb_sink_putstr_fptr)(upb_sink *sink, upb_string *str); typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); // upb_bytesrc. -typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src); -typedef void (*upb_bytesrc_recycle_fptr)(upb_bytesrc *src, upb_string *str); +typedef bool (*upb_bytesrc_get_fptr)( + upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); typedef bool (*upb_bytesrc_append_fptr)( upb_bytesrc *src, upb_string *str, upb_strlen_t len); @@ -61,12 +77,23 @@ typedef struct { upb_src_endmsg_fptr endmsg; } upb_src_vtable; +typedef struct { + upb_sink_putdef_fptr putdef; + upb_sink_putval_fptr putval; + upb_sink_putstr_fptr putstr; + upb_sink_startmsg_fptr startmsg; + upb_sink_endmsg_fptr endmsg; +} upb_sink_vtable; + typedef struct { upb_bytesrc_get_fptr get; upb_bytesrc_append_fptr append; - upb_bytesrc_recycle_fptr recycle; } upb_bytesrc_vtable; +typedef struct { + upb_bytesink_put_fptr put; +} upb_bytesink_vtable; + // "Base Class" definitions; components that implement these interfaces should // contain one of these structures. @@ -74,9 +101,12 @@ struct upb_src { upb_src_vtable *vtbl; upb_status status; bool eof; -#ifndef NDEBUG - int state; // For debug-mode checking of API usage. -#endif +}; + +struct upb_sink { + upb_sink_vtable *vtbl; + upb_status status; + bool eof; }; struct upb_bytesrc { @@ -85,13 +115,34 @@ struct upb_bytesrc { bool eof; }; +struct upb_bytesink { + upb_bytesink_vtable *vtbl; + upb_status status; + bool eof; +}; + INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { s->vtbl = vtbl; s->eof = false; upb_status_init(&s->status); -#ifndef DEBUG - // TODO: initialize debug-mode checking. -#endif +} + +INLINE void upb_sink_init(upb_sink *s, upb_sink_vtable *vtbl) { + s->vtbl = vtbl; + s->eof = false; + upb_status_init(&s->status); +} + +INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtable *vtbl) { + s->vtbl = vtbl; + s->eof = false; + upb_status_init(&s->status); +} + +INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { + s->vtbl = vtbl; + s->eof = false; + upb_status_init(&s->status); } // Implementation of virtual function dispatch. @@ -136,6 +187,47 @@ bool upb_src_getuint64(upb_src *src, uint64_t *val); bool upb_src_getfloat(upb_src *src, float *val); bool upb_src_getdouble(upb_src *src, double *val); +// upb_bytesrc +INLINE bool upb_bytesrc_get( + upb_bytesrc *bytesrc, upb_string *str, upb_strlen_t minlen) { + return bytesrc->vtbl->get(bytesrc, str, minlen); +} + +INLINE bool upb_bytesrc_append( + upb_bytesrc *bytesrc, upb_string *str, upb_strlen_t len) { + return bytesrc->vtbl->append(bytesrc, str, len); +} + +// upb_sink +INLINE bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def) { + return sink->vtbl->putdef(sink, def); +} +INLINE bool upb_sink_putval(upb_sink *sink, upb_value val) { + return sink->vtbl->putval(sink, val); +} +INLINE bool upb_sink_putstr(upb_sink *sink, upb_string *str) { + return sink->vtbl->putstr(sink, str); +} +INLINE bool upb_sink_startmsg(upb_sink *sink) { + return sink->vtbl->startmsg(sink); +} +INLINE bool upb_sink_endmsg(upb_sink *sink) { + return sink->vtbl->endmsg(sink); +} + +INLINE upb_status *upb_sink_status(upb_sink *sink) { return &sink->status; } + +// upb_bytesink +INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { + return sink->vtbl->put(sink, str); +} +INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { + return &sink->status; +} + +// upb_bytesink + + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index c06660f..9a3f6b0 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -574,3 +574,7 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) d->buf_stream_offset = 0; d->buf_offset = 0; } + +upb_src *upb_decoder_src(upb_decoder *d) { + return &d->src; +} diff --git a/stream/upb_decoder.h b/stream/upb_decoder.h index dde61fc..6ba4d77 100644 --- a/stream/upb_decoder.h +++ b/stream/upb_decoder.h @@ -44,7 +44,7 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc); // Returns a upb_src pointer by which the decoder can be used. The returned // upb_src is invalidated by upb_decoder_reset() or upb_decoder_free(). -upb_src *upb_decoder_getsrc(upb_decoder *d); +upb_src *upb_decoder_src(upb_decoder *d); #ifdef __cplusplus } /* extern "C" */ diff --git a/stream/upb_stdio.c b/stream/upb_stdio.c index 7cbca91..89a6621 100644 --- a/stream/upb_stdio.c +++ b/stream/upb_stdio.c @@ -6,6 +6,10 @@ #include "upb_stdio.h" +#include +#include +#include "upb_string.h" + // We can make this configurable if necessary. #define BLOCK_SIZE 4096 @@ -13,11 +17,15 @@ struct upb_stdio { upb_bytesrc bytesrc; upb_bytesink bytesink; FILE *file; +}; + +void upb_stdio_reset(upb_stdio *stdio, FILE* file) { + stdio->file = file; } static bool upb_stdio_read(upb_stdio *stdio, upb_string *str, - int offset, int bytes_to_read) { - char *buf = upb_string_getrwbuf(offset + bytes_to_read) + offset; + int offset, size_t bytes_to_read) { + char *buf = upb_string_getrwbuf(str, offset + bytes_to_read) + offset; size_t read = fread(buf, 1, bytes_to_read, stdio->file); if(read < bytes_to_read) { // Error or EOF. @@ -44,7 +52,7 @@ bool upb_stdio_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len) { return upb_stdio_read((upb_stdio*)src, str, upb_string_len(str), len); } -int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { +int32_t upb_stdio_put(upb_bytesink *sink, upb_string *str) { upb_stdio *stdio = (upb_stdio*)sink - offsetof(upb_stdio, bytesink); upb_strlen_t len = upb_string_len(str); size_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); @@ -59,3 +67,26 @@ int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { } return written; } + +static upb_bytesrc_vtable upb_stdio_bytesrc_vtbl = { + (upb_bytesrc_get_fptr)upb_stdio_get, + (upb_bytesrc_append_fptr)upb_stdio_append, +}; + +static upb_bytesink_vtable upb_stdio_bytesink_vtbl = { + upb_stdio_put +}; + +upb_stdio *upb_stdio_new() { + upb_stdio *stdio = malloc(sizeof(*stdio)); + upb_bytesrc_init(&stdio->bytesrc, &upb_stdio_bytesrc_vtbl); + upb_bytesink_init(&stdio->bytesink, &upb_stdio_bytesink_vtbl); + return stdio; +} + +void upb_stdio_free(upb_stdio *stdio) { + free(stdio); +} + +upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->bytesrc; } +upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->bytesink; } diff --git a/stream/upb_stdio.h b/stream/upb_stdio.h index 3c29fcb..fd71fdd 100644 --- a/stream/upb_stdio.h +++ b/stream/upb_stdio.h @@ -21,7 +21,7 @@ struct upb_stdio; typedef struct upb_stdio upb_stdio; // Creation/deletion. -upb_stdio_ *upb_stdio__new(); +upb_stdio *upb_stdio_new(); void upb_stdio_free(upb_stdio *stdio); // Reset/initialize the object for use. The src or sink will call diff --git a/stream/upb_text.c b/stream/upb_text.c deleted file mode 100644 index 4a25ecd..0000000 --- a/stream/upb_text.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include -#include "descriptor.h" -#include "upb_text.h" -#include "upb_data.h" - -bool upb_textprinter_putval(upb_textprinter *p, upb_value val) { - upb_string *p->str = upb_string_tryrecycle(p->str); -#define CASE(fmtstr, member) upb_string_printf(p->str, fmtstr, val.member); break; - switch(type) { - case UPB_TYPE(DOUBLE): - CASE("%0.f", _double); - case UPB_TYPE(FLOAT): - CASE("%0.f", _float) - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): - CASE("%" PRId64, int64) - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - CASE("%" PRIu64, uint64) - case UPB_TYPE(INT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - CASE("%" PRId32, int32) - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): - case UPB_TYPE(ENUM): - CASE("%" PRIu32, uint32); - case UPB_TYPE(BOOL): - CASE("%hhu", _bool); - } - return upb_bytesink_put(p->str); -} - -bool upb_textprinter_putstr(upb_textprinter *p, upb_string *str) { - upb_bytesink_put(UPB_STRLIT("\"")); - // TODO: escaping. - upb_bytesink_put(str); - upb_bytesink_put(UPB_STRLIT("\"")); -} - -static void print_indent(upb_text_printer *p, FILE *stream) -{ - if(!p->single_line) - for(int i = 0; i < p->indent_depth; i++) - upb_bytesink_put(UPB_STRLIT(" ")); -} - -void upb_text_printfield(upb_text_printer *p, upb_strptr name, - upb_field_type_t valtype, upb_value val, - FILE *stream) -{ - print_indent(p, stream); - fprintf(stream, UPB_STRFMT ":", UPB_STRARG(name)); - upb_text_printval(valtype, val, stream); - if(p->single_line) - fputc(' ', stream); - else - fputc('\n', stream); -} - -void upb_textprinter_startmsg(upb_textprinter *p) -{ - print_indent(p, stream); - fprintf(stream, UPB_STRFMT " {", UPB_STRARG(submsg_type)); - if(!p->single_line) fputc('\n', stream); - p->indent_depth++; -} - -void upb_text_pop(upb_text_printer *p, FILE *stream) -{ - p->indent_depth--; - print_indent(p, stream); - fprintf(stream, "}\n"); -} - -static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, - FILE *stream) -{ - if(upb_issubmsg(f)) { - upb_text_push(printer, f->name, stream); - printmsg(printer, v.msg, upb_downcast_msgdef(f->def), stream); - upb_text_pop(printer, stream); - } else { - upb_text_printfield(printer, f->name, f->type, v, stream); - } -} diff --git a/stream/upb_text.h b/stream/upb_text.h deleted file mode 100644 index d89c9d6..0000000 --- a/stream/upb_text.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_TEXT_H_ -#define UPB_TEXT_H_ - -#include "upb.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - int indent_depth; - bool single_line; -} upb_text_printer; - -INLINE void upb_text_printer_init(upb_text_printer *p, bool single_line) { - p->indent_depth = 0; - p->single_line = single_line; -} -void upb_text_printval(upb_field_type_t type, upb_value p, FILE *file); -void upb_text_printfield(upb_text_printer *p, upb_strptr name, - upb_field_type_t valtype, upb_value val, FILE *stream); -void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, - FILE *stream); -void upb_text_pop(upb_text_printer *p, FILE *stream); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_TEXT_H_ */ diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c new file mode 100644 index 0000000..0f0357a --- /dev/null +++ b/stream/upb_textprinter.c @@ -0,0 +1,131 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_textprinter.h" + +#include +#include +#include "upb_def.h" +#include "upb_string.h" + +struct _upb_textprinter { + upb_sink sink; + upb_bytesink *bytesink; + upb_string *str; + int indent_depth; + bool single_line; + upb_fielddef *f; +}; + +static void upb_textprinter_endfield(upb_textprinter *p) +{ + if(p->single_line) + upb_bytesink_put(p->bytesink, UPB_STRLIT(' ')); + else + upb_bytesink_put(p->bytesink, UPB_STRLIT('\n')); +} + +static bool upb_textprinter_putval(upb_textprinter *p, upb_value val) { + p->str = upb_string_tryrecycle(p->str); +#define CASE(fmtstr, member) upb_string_printf(p->str, fmtstr, val.member); break; + switch(p->f->type) { + case UPB_TYPE(DOUBLE): + CASE("%0.f", _double); + case UPB_TYPE(FLOAT): + CASE("%0.f", _float) + case UPB_TYPE(INT64): + case UPB_TYPE(SFIXED64): + case UPB_TYPE(SINT64): + CASE("%" PRId64, int64) + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + CASE("%" PRIu64, uint64) + case UPB_TYPE(INT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(SINT32): + CASE("%" PRId32, int32) + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + case UPB_TYPE(ENUM): + CASE("%" PRIu32, uint32); + case UPB_TYPE(BOOL): + CASE("%hhu", _bool); + } + upb_bytesink_put(p->bytesink, p->str); + upb_textprinter_endfield(p); + return upb_ok(upb_bytesink_status(p->bytesink)); +} + +static bool upb_textprinter_putstr(upb_textprinter *p, upb_string *str) { + upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); + // TODO: escaping. + upb_bytesink_put(p->bytesink, str); + upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); + upb_textprinter_endfield(p); + return upb_ok(upb_bytesink_status(p->bytesink)); +} + +static void upb_textprinter_indent(upb_textprinter *p) +{ + if(!p->single_line) + for(int i = 0; i < p->indent_depth; i++) + upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); +} + +static bool upb_textprinter_putdef(upb_textprinter *p, upb_fielddef *f) +{ + upb_textprinter_indent(p); + upb_bytesink_put(p->bytesink, f->name); + upb_bytesink_put(p->bytesink, UPB_STRLIT(":")); + p->f = f; + return upb_ok(upb_bytesink_status(p->bytesink)); +} + +static bool upb_textprinter_startmsg(upb_textprinter *p) +{ + upb_textprinter_indent(p); + upb_bytesink_put(p->bytesink, p->f->def->fqname); + upb_bytesink_put(p->bytesink, UPB_STRLIT(" {")); + if(!p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT('\n')); + p->indent_depth++; + return upb_ok(upb_bytesink_status(p->bytesink)); +} + +static bool upb_textprinter_endmsg(upb_textprinter *p) +{ + p->indent_depth--; + upb_textprinter_indent(p); + upb_bytesink_put(p->bytesink, UPB_STRLIT("}")); + upb_textprinter_endfield(p); + return upb_ok(upb_bytesink_status(p->bytesink)); +} + +upb_sink_vtable upb_textprinter_vtbl = { + (upb_sink_putdef_fptr)upb_textprinter_putdef, + (upb_sink_putval_fptr)upb_textprinter_putval, + (upb_sink_putstr_fptr)upb_textprinter_putstr, + (upb_sink_startmsg_fptr)upb_textprinter_startmsg, + (upb_sink_endmsg_fptr)upb_textprinter_endmsg, +}; + +upb_textprinter *upb_textprinter_new() { + upb_textprinter *p = malloc(sizeof(*p)); + upb_sink_init(&p->sink, &upb_textprinter_vtbl); + return p; +} + +void upb_textprinter_free(upb_textprinter *p) { + free(p); +} + +void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, + bool single_line) { + p->bytesink = sink; + p->single_line = single_line; + p->indent_depth = 0; +} + +upb_sink *upb_textprinter_sink(upb_textprinter *p) { return &p->sink; } diff --git a/stream/upb_textprinter.h b/stream/upb_textprinter.h new file mode 100644 index 0000000..7e35412 --- /dev/null +++ b/stream/upb_textprinter.h @@ -0,0 +1,30 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_TEXT_H_ +#define UPB_TEXT_H_ + +#include "upb_stream.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct _upb_textprinter; +typedef struct _upb_textprinter upb_textprinter; + +upb_textprinter *upb_textprinter_new(); +void upb_textprinter_free(upb_textprinter *p); +void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, + bool single_line); + +upb_sink *upb_textprinter_sink(upb_textprinter *p); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_TEXT_H_ */ -- cgit v1.2.3 From 678799082b9775e601a09af9aa68e59fc1c64f6f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 24 Jul 2010 16:23:52 -0700 Subject: Stream decoding benchmark. --- Makefile | 24 ++++++-- benchmarks/parsestream.upb_table.c | 113 +++++++++++++++++++++++++++++++++++++ core/upb_stream.h | 3 +- core/upb_string.c | 18 ++++++ stream/upb_byteio.h | 43 -------------- 5 files changed, 152 insertions(+), 49 deletions(-) create mode 100644 benchmarks/parsestream.upb_table.c delete mode 100644 stream/upb_byteio.h (limited to 'core/upb_stream.h') diff --git a/Makefile b/Makefile index 749c5a7..203bed6 100644 --- a/Makefile +++ b/Makefile @@ -54,7 +54,7 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ - core/upb_stream.c stream/upb_stdio.c stream/upb_textprinter.c \ + core/upb_stream.c stream/upb_stdio.c stream/upb_strstream.c stream/upb_textprinter.c \ descriptor/descriptor.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. @@ -154,10 +154,10 @@ tests/tests: core/libupb.a tools/upbc: core/libupb.a # Benchmarks -UPB_BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \ - benchmarks/b.parsetostruct_googlemessage1.upb_table_byref \ - benchmarks/b.parsetostruct_googlemessage2.upb_table_byval \ - benchmarks/b.parsetostruct_googlemessage2.upb_table_byref +#UPB_BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table \ +# benchmarks/b.parsetostruct_googlemessage2.upb_table +UPB_BENCHMARKS=benchmarks/b.parsestream_googlemessage1.upb_table \ + benchmarks/b.parsestream_googlemessage2.upb_table BENCHMARKS=$(UPB_BENCHMARKS) \ benchmarks/b.parsetostruct_googlemessage1.proto2_table \ @@ -204,6 +204,20 @@ benchmarks/b.parsetostruct_googlemessage2.upb_table_byref: \ -DMESSAGE_FILE=\"google_message2.dat\" \ -DBYREF=true $(LIBUPB) +benchmarks/b.parsestream_googlemessage1.upb_table \ +benchmarks/b.parsestream_googlemessage2.upb_table: \ + benchmarks/parsestream.upb_table.c $(LIBUPB) benchmarks/google_messages.proto.pb + $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsestream_googlemessage1.upb_table $< \ + -DMESSAGE_NAME=\"benchmarks.SpeedMessage1\" \ + -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \ + -DMESSAGE_FILE=\"google_message1.dat\" \ + $(LIBUPB) + $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsestream_googlemessage2.upb_table $< \ + -DMESSAGE_NAME=\"benchmarks.SpeedMessage2\" \ + -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \ + -DMESSAGE_FILE=\"google_message2.dat\" \ + $(LIBUPB) + benchmarks/b.parsetostruct_googlemessage1.proto2_table \ benchmarks/b.parsetostruct_googlemessage2.proto2_table: \ benchmarks/parsetostruct.proto2_table.cc benchmarks/google_messages.pb.cc diff --git a/benchmarks/parsestream.upb_table.c b/benchmarks/parsestream.upb_table.c new file mode 100644 index 0000000..c6acad9 --- /dev/null +++ b/benchmarks/parsestream.upb_table.c @@ -0,0 +1,113 @@ + +#include "main.c" + +#include "upb_def.h" +#include "upb_decoder.h" +#include "upb_strstream.h" + +static upb_stringsrc *stringsrc; +static upb_string *input_str; +static upb_string *tmp_str; +static upb_msgdef *def; +static upb_decoder *decoder; + +static bool initialize() +{ + // Initialize upb state, decode descriptor. + upb_status status = UPB_STATUS_INIT; + upb_symtab *s = upb_symtab_new(); + upb_symtab_add_descriptorproto(s); + upb_string *fds_str = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); + if(fds_str == NULL) { + fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ":"), + upb_printerr(&status); + return false; + } + + upb_stringsrc *ssrc = upb_stringsrc_new(); + upb_stringsrc_reset(ssrc, fds_str); + upb_def *fds_def = upb_symtab_lookup( + s, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds_def)); + upb_decoder_reset(d, upb_stringsrc_bytesrc(ssrc)); + + upb_symtab_addfds(s, upb_decoder_src(d), &status); + + if(!upb_ok(&status)) { + fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ":"); + upb_printerr(&status); + return false; + } + + upb_string_unref(fds_str); + upb_decoder_free(d); + upb_stringsrc_free(ssrc); + upb_def_unref(fds_def); + + def = upb_downcast_msgdef(upb_symtab_lookup(s, UPB_STRLIT(MESSAGE_NAME))); + if(!def) { + fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n", + UPB_STRARG(UPB_STRLIT(MESSAGE_NAME))); + return false; + } + upb_symtab_unref(s); + + // Read the message data itself. + input_str = upb_strreadfile(MESSAGE_FILE); + if(input_str == NULL) { + fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); + return false; + } + tmp_str = NULL; + decoder = upb_decoder_new(def); + stringsrc = upb_stringsrc_new(); + return true; +} + +static void cleanup() +{ + upb_string_unref(input_str); + upb_string_unref(tmp_str); + upb_def_unref(UPB_UPCAST(def)); + upb_decoder_free(decoder); + upb_stringsrc_free(stringsrc); +} + +static size_t run(int i) +{ + (void)i; + upb_status status = UPB_STATUS_INIT; + upb_stringsrc_reset(stringsrc, input_str); + upb_decoder_reset(decoder, upb_stringsrc_bytesrc(stringsrc)); + upb_src *src = upb_decoder_src(decoder); + upb_fielddef *f; + upb_string *str = NULL; + int depth = 0; + while(1) { + while((f = upb_src_getdef(src)) != NULL) { + if(upb_issubmsg(f)) { + upb_src_startmsg(src); + ++depth; + } else if(upb_isstring(f)) { + tmp_str = upb_string_tryrecycle(str); + upb_src_getstr(src, tmp_str); + } else { + // Primitive type. + upb_value val; + upb_src_getval(src, upb_value_addrof(&val)); + } + } + // If we're not EOF now, the loop terminated due to an error. + if (!upb_src_eof(src)) goto err; + if (depth == 0) break; + --depth; + upb_src_endmsg(src); + } + if(!upb_ok(&status)) goto err; + return upb_string_len(input_str); + +err: + fprintf(stderr, "Decode error"); + upb_printerr(&status); + return 0; +} diff --git a/core/upb_stream.h b/core/upb_stream.h index b7400c5..861bd1c 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -128,7 +128,8 @@ bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); // Appends the next "len" bytes in the stream in-place to "str". This should // be used when the caller needs to build a contiguous string of the existing -// data in "str" with more data. +// data in "str" with more data. The call fails if fewer than len bytes are +// available in the stream. bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); // Returns the current error status for the stream. diff --git a/core/upb_string.c b/core/upb_string.c index 93686f5..847a3ee 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -131,3 +131,21 @@ upb_string *upb_strdup(upb_string *s) { upb_strcpy(str, s); return str; } + +upb_string *upb_strreadfile(const char *filename) { + FILE *f = fopen(filename, "rb"); + if(!f) return NULL; + if(fseek(f, 0, SEEK_END) != 0) goto error; + long size = ftell(f); + if(size < 0) goto error; + if(fseek(f, 0, SEEK_SET) != 0) goto error; + upb_string *s = upb_string_new(); + char *buf = upb_string_getrwbuf(s, size); + if(fread(buf, size, 1, f) != 1) goto error; + fclose(f); + return s; + +error: + fclose(f); + return NULL; +} diff --git a/stream/upb_byteio.h b/stream/upb_byteio.h deleted file mode 100644 index 69a28b3..0000000 --- a/stream/upb_byteio.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * This file contains upb_bytesrc and upb_bytesink implementations for common - * interfaces like strings, UNIX fds, and FILE*. - * - * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_BYTEIO_H -#define UPB_BYTEIO_H - -#include "upb_srcsink.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_stringsrc **************************************************************/ - -struct upb_stringsrc; -typedef struct upb_stringsrc upb_stringsrc; - -// Create/free a stringsrc. -upb_stringsrc *upb_stringsrc_new(); -void upb_stringsrc_free(upb_stringsrc *s); - -// Resets the stringsrc to a state where it will vend the given string. The -// stringsrc will take a reference on the string, so the caller need not ensure -// that it outlives the stringsrc. A stringsrc can be reset multiple times. -void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); - -// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. -upb_bytesrc *upb_stringsrc_bytesrc(); - - -/* upb_fdsrc ******************************************************************/ - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif -- cgit v1.2.3 From 2a7f51f3fd534b3e9e098c522cffbb96e1551474 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 6 Oct 2010 08:19:34 -0700 Subject: Change upb_src to use push-based interface. Unfortunately my previous detailed commit message was lost somehow by git or vi. Will have to explain in more detail at a later date the rationale for this change. The build will be broken until I port the old decoder to this new interface. --- core/upb.h | 4 ++ core/upb_stream.h | 124 +++++++++++++++++++++--------------------------------- 2 files changed, 51 insertions(+), 77 deletions(-) (limited to 'core/upb_stream.h') diff --git a/core/upb.h b/core/upb.h index 7ee0469..6ecc2a0 100644 --- a/core/upb.h +++ b/core/upb.h @@ -261,6 +261,10 @@ enum upb_status_code { UPB_ERROR_MAX_NESTING_EXCEEDED = -3 }; +// TODO: consider making this a single word: a upb_string* where we use the low +// bits as flags indicating whether there is an error and whether it is +// resumable. This would improve efficiency, because the code would not need +// to be loaded after a call to a function returning a status. typedef struct { enum upb_status_code code; upb_string *str; diff --git a/core/upb_stream.h b/core/upb_stream.h index 861bd1c..cd00c1e 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -28,98 +28,64 @@ extern "C" { // Forward-declare. We can't include upb_def.h; it would be circular. struct _upb_fielddef; -// Note! The "eof" flags work like feof() in C; they cannot report end-of-file -// until a read has failed due to eof. They cannot preemptively tell you that -// the next call will fail due to eof. Since these are the semantics that C -// and UNIX provide, we're stuck with them if we want to support eg. stdio. - -/* upb_src ********************************************************************/ +/* upb_sink *******************************************************************/ -// A upb_src is a pull parser for protobuf data. Sample usage: -// -// #define CHECK(x) if(!x) goto err; +// A upb_sink is a component that receives a stream of protobuf data. +// It is an abstract interface that is implemented either by the system or +// by users. // -// bool parse_msg(upb_src *src, int indent) { -// upb_fielddef *f; -// while ((f = upb_src_getdef(src)) != NULL) { -// for (int i = 0; i < indent; i++) putchar(' '); -// printf("Parsed field; name=" UPB_STRFMT ", num=%d", -// UPB_STRARG(d->name), d->number); -// if (upb_issubmsg(f)) { -// CHECK(upb_src_startmsg(src)); -// CHECK(parse_msg(src, indent + 2)); -// CHECK(upb_src_endmsg(src)); -// } else { -// CHECK(upb_src_skipval(src)); -// } -// } -// // We should be EOF now, otherwise there was an error. -// CHECK(upb_src_eof(src)); -// return true; -// -// err: -// return false; -// } -// -// TODO: decide how to handle unknown fields. - -// Retrieves the fielddef for the next field in the stream. Returns NULL on -// error or end-of-stream. End of stream can simply mean end of submessage. -struct _upb_fielddef *upb_src_getdef(upb_src *src); - -// Retrieves and stores the next value in "val". upb_src_getval() is for all -// numeric types and upb_src_getstr() is for strings. For string types "str" -// must be a newly-recycled string. Returns false on error. -bool upb_src_getval(upb_src *src, upb_valueptr val); -bool upb_src_getstr(upb_src *src, upb_string *val); - -// Like upb_src_getval() but skips the value. -bool upb_src_skipval(upb_src *src); - -// Descends into a submessage. May only be called when upb_issubmsg(f) is true -// for an f = upb_src_getdef(src) that was just parsed. -bool upb_src_startmsg(upb_src *src); - -// Stops reading a submessage. May be called before the stream is EOF, in -// which case the rest of the submessage is skipped. -bool upb_src_endmsg(upb_src *src); - -// Returns the current error/eof status for the stream. If a stream is eof -// but we are inside a submessage, calling upb_src_endmsg(src) will reset -// the eof marker. -INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; } -INLINE bool upb_src_eof(upb_src *src) { return src->eof; } - -// The following functions are equivalent to upb_src_getval(), but take -// pointers to specific types. In debug mode this may check that the type -// is compatible with the type being read. This check will *not* be performed -// in non-debug mode, and if you get the type wrong the behavior is undefined. -bool upb_src_getbool(upb_src *src, bool *val); -bool upb_src_getint32(upb_src *src, int32_t *val); -bool upb_src_getint64(upb_src *src, int64_t *val); -bool upb_src_getuint32(upb_src *src, uint32_t *val); -bool upb_src_getuint64(upb_src *src, uint64_t *val); -bool upb_src_getfloat(upb_src *src, float *val); -bool upb_src_getdouble(upb_src *src, double *val); +// TODO: unknown fields. -/* upb_sink *******************************************************************/ +// Constants that a sink returns to indicate to its caller whether it should +// continue or not. +typedef enum { + // Caller should continue sending values to the sink. + UPB_SINK_CONTINUE, + + // Return from upb_sink_putdef() to skip the next value (which may be a + // submessage). + UPB_SINK_SKIP, + + // Caller should stop sending values; check sink status for details. + // If processing resumes later, it should resume with the next value. + UPB_SINK_STOP, +} upb_sinkret_t; // Puts the given fielddef into the stream. -bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); +upb_sinkret_t upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); // Puts the given value into the stream. -bool upb_sink_putval(upb_sink *sink, upb_value val); -bool upb_sink_putstr(upb_sink *sink, upb_string *str); +upb_sinkret_t upb_sink_putval(upb_sink *sink, upb_value val); +upb_sinkret_t upb_sink_putstr(upb_sink *sink, upb_string *str); // Starts/ends a submessage. upb_sink_startmsg may seem redundant, but a // client could have a submessage already serialized, and therefore put it // as a string instead of its individual elements. -bool upb_sink_startmsg(upb_sink *sink); -bool upb_sink_endmsg(upb_sink *sink); +upb_sinkret_t upb_sink_startmsg(upb_sink *sink); +upb_sinkret_t upb_sink_endmsg(upb_sink *sink); // Returns the current error status for the stream. upb_status *upb_sink_status(upb_sink *sink); + +/* upb_src ********************************************************************/ + +// A upb_src is a resumable push parser for protobuf data. It works by first +// accepting registration of a upb_sink to which it will push data, then +// in a second phase is parses the actual data. +// + +// Sets the given sink as the target of this src. It will be called when the +// upb_src_parse() is run. +void upb_src_setsink(upb_src *src, upb_sink *sink); + +// Pushes data from this src to the previously registered sink, returning +// true if all data was processed. If false is returned, check +// upb_src_status() for details; if it is a resumable status, upb_src_run +// may be called again to resume processing. +bool upb_src_run(upb_src *src); + + /* upb_bytesrc ****************************************************************/ // Returns the next string in the stream. false is returned on error or eof. @@ -133,6 +99,10 @@ bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); // Returns the current error status for the stream. +// Note! The "eof" flag works like feof() in C; it cannot report end-of-file +// until a read has failed due to eof. It cannot preemptively tell you that +// the next call will fail due to eof. Since these are the semantics that C +// and UNIX provide, we're stuck with them if we want to support eg. stdio. INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } -- cgit v1.2.3 From b471ca6b81b88dc23aae6a53345d94d9a2714a7c Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 6 Dec 2010 15:52:40 -0800 Subject: The last major revision to the upb_stream protocol. Sources and sinks communicate by means of a upb_handlers object, which encapsulates a set of handler callbacks and will possibly offer richer semantics in the future like giving specific fields different callbacks. The upb_handlers protocol supports delegation, so sets of handlers can be written in reusable ways. For example, if a set of handlers is written to handle a specific .proto type, those handlers can be used whether that type is at the top level or whether it is a sub-message of a higher-level type. Delegation allows the streaming protocol to properly compose. --- Makefile | 41 +++++---- core/upb_stream.c | 55 ------------ core/upb_stream.h | 167 +++++++++++++++++++++++------------ core/upb_stream_vtbl.h | 235 +++++++++++++++++++++---------------------------- core/upb_string.c | 9 ++ core/upb_string.h | 7 +- 6 files changed, 249 insertions(+), 265 deletions(-) delete mode 100644 core/upb_stream.c (limited to 'core/upb_stream.h') diff --git a/Makefile b/Makefile index 131b3c0..5c6598c 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ CXX=g++ CFLAGS=-std=c99 INCLUDE=-Idescriptor -Icore -Itests -Istream -I. CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) -LDLIBS=-lpthread +LDLIBS=-lpthread core/libupb.a ifeq ($(shell uname), Darwin) CPPFLAGS += -I/usr/include/lua5.1 LDFLAGS += -L/usr/local/lib -llua @@ -47,16 +47,27 @@ clean: rm -rf $(LIBUPB) $(LIBUPB_PIC) rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(call rwildcard,,*.gc*) rm -rf benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb* - rm -rf tests/tests tests/t.* tests/test_table + rm -rf $(TESTS) tests/t.* rm -rf descriptor/descriptor.pb rm -rf tools/upbc deps cd lang_ext/python && python setup.py clean --all +-include deps +deps: gen-deps.sh Makefile $(call rwildcard,,*.c) $(call rwildcard,,*.h) + @./gen-deps.sh $(SRC) + # The core library (core/libupb.a) -SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ - core/upb_stream.c stream/upb_stdio.c stream/upb_strstream.c stream/upb_textprinter.c \ - core/upb_msg.c \ - descriptor/descriptor.c +SRC=core/upb.c \ + core/upb_table.c \ + core/upb_string.c \ + descriptor/descriptor.c \ +# core/upb_def.c \ +# core/upb_msg.c \ +# stream/upb_decoder.c \ +# stream/upb_stdio.c \ +# stream/upb_strstream.c \ +# stream/upb_textprinter.c + $(SRC): perf-cppflags # Parts of core that are yet to be converted. OTHERSRC=src/upb_encoder.c src/upb_text.c @@ -101,15 +112,16 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ - tests/test_def \ - tests/test_decoder \ - tests/t.test_vs_proto2.googlemessage1 \ - tests/t.test_vs_proto2.googlemessage2 \ - tests/test.proto.pb + tests/test_stream \ +# tests/test_def \ +# tests/test_decoder \ +# tests/t.test_vs_proto2.googlemessage1 \ +# tests/t.test_vs_proto2.googlemessage2 \ +# tests/test.proto.pb tests: $(TESTS) OTHER_TESTS=tests/tests \ -$(TESTS): core/libupb.a +$(TESTS): $(LIBUPB) VALGRIND=valgrind --leak-check=full --error-exitcode=1 #VALGRIND= @@ -118,7 +130,7 @@ test: tests @set -e # Abort on error. # Needs to be rewritten to separate the benchmark. # valgrind --error-exitcode=1 ./tests/test_table - @for test in tests/*; do \ + @for test in $(TESTS); do \ if [ -x ./$$test ] ; then \ echo !!! $(VALGRIND) ./$$test; \ $(VALGRIND) ./$$test || exit 1; \ @@ -247,6 +259,3 @@ benchmarks/b.parsetostruct_googlemessage2.proto2_compiled: \ -DMESSAGE_HFILE=\"google_messages.pb.h\" \ benchmarks/google_messages.pb.cc -lprotobuf -lpthread --include deps -deps: gen-deps.sh Makefile $(call rwildcard,,*.c) $(call rwildcard,,*.h) - @./gen-deps.sh $(SRC) diff --git a/core/upb_stream.c b/core/upb_stream.c deleted file mode 100644 index 0d47392..0000000 --- a/core/upb_stream.c +++ /dev/null @@ -1,55 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_stream.h" - -#include "upb_def.h" - -#define CHECKSRC(x) if(!x) goto src_err -#define CHECKSINK(x) if(!x) goto sink_err - -void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status) { - upb_fielddef *f; - upb_string *str = NULL; - int depth = 0; - while(1) { - while((f = upb_src_getdef(src)) != NULL) { - CHECKSINK(upb_sink_putdef(sink, f)); - if(upb_issubmsg(f)) { - upb_src_startmsg(src); - upb_sink_startmsg(sink); - ++depth; - } else if(upb_isstring(f)) { - str = upb_string_tryrecycle(str); - CHECKSRC(upb_src_getstr(src, str)); - CHECKSINK(upb_sink_putstr(sink, str)); - } else { - // Primitive type. - upb_value val; - CHECKSRC(upb_src_getval(src, upb_value_addrof(&val))); - CHECKSINK(upb_sink_putval(sink, val)); - } - } - // If we're not EOF now, the loop terminated due to an error. - CHECKSRC(upb_src_eof(src)); - if (depth == 0) break; - --depth; - upb_src_endmsg(src); - upb_sink_endmsg(sink); - } - upb_string_unref(str); - return; - -src_err: - upb_string_unref(str); - upb_copyerr(status, upb_src_status(src)); - return; - -sink_err: - upb_string_unref(str); - upb_copyerr(status, upb_sink_status(sink)); - return; -} diff --git a/core/upb_stream.h b/core/upb_stream.h index cd00c1e..1eb111e 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -19,7 +19,7 @@ #ifndef UPB_SRCSINK_H #define UPB_SRCSINK_H -#include "upb_stream_vtbl.h" +#include "upb.h" #ifdef __cplusplus extern "C" { @@ -28,98 +28,149 @@ extern "C" { // Forward-declare. We can't include upb_def.h; it would be circular. struct _upb_fielddef; -/* upb_sink *******************************************************************/ +/* upb_handlers ***************************************************************/ -// A upb_sink is a component that receives a stream of protobuf data. -// It is an abstract interface that is implemented either by the system or -// by users. -// -// TODO: unknown fields. +// upb_handlers define the interface by which a upb_src passes data to a +// upb_sink. -// Constants that a sink returns to indicate to its caller whether it should +// Constants that a handler returns to indicate to its caller whether it should // continue or not. typedef enum { // Caller should continue sending values to the sink. - UPB_SINK_CONTINUE, + UPB_CONTINUE, - // Return from upb_sink_putdef() to skip the next value (which may be a - // submessage). - UPB_SINK_SKIP, + // Skips to the end of the current submessage (or if we are at the top + // level, skips to the end of the entire message). + UPB_SKIP, // Caller should stop sending values; check sink status for details. // If processing resumes later, it should resume with the next value. - UPB_SINK_STOP, -} upb_sinkret_t; - -// Puts the given fielddef into the stream. -upb_sinkret_t upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); - -// Puts the given value into the stream. -upb_sinkret_t upb_sink_putval(upb_sink *sink, upb_value val); -upb_sinkret_t upb_sink_putstr(upb_sink *sink, upb_string *str); - -// Starts/ends a submessage. upb_sink_startmsg may seem redundant, but a -// client could have a submessage already serialized, and therefore put it -// as a string instead of its individual elements. -upb_sinkret_t upb_sink_startmsg(upb_sink *sink); -upb_sinkret_t upb_sink_endmsg(upb_sink *sink); - -// Returns the current error status for the stream. -upb_status *upb_sink_status(upb_sink *sink); - - -/* upb_src ********************************************************************/ - -// A upb_src is a resumable push parser for protobuf data. It works by first -// accepting registration of a upb_sink to which it will push data, then -// in a second phase is parses the actual data. + UPB_STOP, + + // When returned from a startsubmsg handler, indicates that the submessage + // should be handled by a different set of handlers, which have been + // registered on the provided upb_handlers object. May not be returned + // from any other callback. + UPB_DELEGATE, +} upb_flow_t; + +// upb_handlers +struct _upb_handlers; +typedef struct _upb_handlers upb_handlers; + +typedef void (*upb_startmsg_handler_t)(void *closure); +typedef void (*upb_endmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_value_handler_t)(void *closure, + struct _upb_fielddef *f, + upb_value val); +typedef upb_flow_t (*upb_startsubmsg_handler_t)(void *closure, + struct _upb_fielddef *f, + upb_handlers *delegate_to); +typedef upb_flow_t (*upb_endsubmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_unknownval_handler_t)(void *closure, + upb_field_number_t fieldnum, + upb_value val); + +// An empty set of handlers, for convenient copy/paste: // - -// Sets the given sink as the target of this src. It will be called when the -// upb_src_parse() is run. -void upb_src_setsink(upb_src *src, upb_sink *sink); - -// Pushes data from this src to the previously registered sink, returning -// true if all data was processed. If false is returned, check -// upb_src_status() for details; if it is a resumable status, upb_src_run -// may be called again to resume processing. -bool upb_src_run(upb_src *src); +// static void startmsg(void *closure) { +// // Called when the top-level message begins. +// } +// +// static void endmsg(void *closure) { +// // Called when the top-level message ends. +// } +// +// static upb_flow_t value(void *closure, upb_fielddef *f, upb_value val) { +// // Called for every value in the stream. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t startsubmsg(void *closure, upb_fielddef *f, +// upb_handlers *delegate_to) { +// // Called when a submessage begins; can delegate by returning UPB_DELEGATE. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t endsubmsg(void *closure) { +// // Called when a submessage ends. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum, +// upb_value val) { +// Called with an unknown value is encountered. +// return UPB_CONTINUE; +// } +typedef struct { + upb_startmsg_handler_t startmsg; + upb_endmsg_handler_t endmsg; + upb_value_handler_t value; + upb_startsubmsg_handler_t startsubmsg; + upb_endsubmsg_handler_t endsubmsg; + upb_unknownval_handler_t unknownval; +} upb_handlerset; + +// Functions to register handlers on a upb_handlers object. +INLINE void upb_handlers_init(upb_handlers *h); +INLINE void upb_handlers_uninit(upb_handlers *h); +INLINE void upb_handlers_reset(upb_handlers *h); +INLINE bool upb_handlers_isempty(upb_handlers *h); +INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); +INLINE void upb_set_handler_closure(upb_handlers *h, void *closure); + +// An object that transparently handles delegation so that the caller needs +// only follow the protocol as if delegation did not exist. +struct _upb_dispatcher; +typedef struct _upb_dispatcher upb_dispatcher; +INLINE void upb_dispatcher_init(upb_dispatcher *d); +INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); +INLINE void upb_dispatch_startmsg(upb_dispatcher *d); +INLINE void upb_dispatch_endmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f); +INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, + upb_value val); +INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, + upb_field_number_t fieldnum, upb_value val); /* upb_bytesrc ****************************************************************/ +struct _upb_bytesrc; +typedef struct _upb_bytesrc upb_bytesrc; + // Returns the next string in the stream. false is returned on error or eof. // The string must be at least "minlen" bytes long unless the stream is eof. -bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); +INLINE bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); // Appends the next "len" bytes in the stream in-place to "str". This should // be used when the caller needs to build a contiguous string of the existing // data in "str" with more data. The call fails if fewer than len bytes are // available in the stream. -bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); +INLINE bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); // Returns the current error status for the stream. // Note! The "eof" flag works like feof() in C; it cannot report end-of-file // until a read has failed due to eof. It cannot preemptively tell you that // the next call will fail due to eof. Since these are the semantics that C // and UNIX provide, we're stuck with them if we want to support eg. stdio. -INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } -INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } +INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src); +INLINE bool upb_bytesrc_eof(upb_bytesrc *src); /* upb_bytesink ***************************************************************/ +struct _upb_bytesink; +typedef struct _upb_bytesink upb_bytesink; + // Puts the given string. Returns the number of bytes that were actually, // consumed, which may be fewer than were in the string, or <0 on error. -int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); +INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); // Returns the current error status for the stream. -upb_status *upb_bytesink_status(upb_bytesink *sink); - -/* Utility functions **********************************************************/ - -// Streams data from src to sink until EOF or error. -void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status); +INLINE upb_status *upb_bytesink_status(upb_bytesink *sink); +#include "upb_stream_vtbl.h" #ifdef __cplusplus } /* extern "C" */ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 96f6cfe..91464a7 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -5,59 +5,21 @@ * interfaces. Only components that are implementing these interfaces need * to worry about this file. * - * This is tedious; this is the place in upb where I most wish I had a C++ - * feature. In C++ the compiler would generate this all for me. If there's - * any consolation, it's that I have a bit of flexibility you don't have in - * C++: I could, with preprocessor magic alone "de-virtualize" this interface - * for a particular source file. Say I had a C file that called a upb_src, - * but didn't want to pay the virtual function overhead. I could define: - * - * #define upb_src_getdef(src) upb_decoder_getdef((upb_decoder*)src) - * #define upb_src_stargmsg(src) upb_decoder_startmsg(upb_decoder*)src) - * // etc. - * - * The source file is compatible with the regular upb_src interface, but here - * we bind it to a particular upb_src (upb_decoder), which could lead to - * improved performance at a loss of flexibility for this one upb_src client. - * * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. */ #ifndef UPB_SRCSINK_VTBL_H_ #define UPB_SRCSINK_VTBL_H_ -#include "upb.h" +#include +#include "upb_stream.h" #ifdef __cplusplus extern "C" { #endif -struct upb_src; -typedef struct upb_src upb_src; -struct upb_sink; -typedef struct upb_sink upb_sink; -struct upb_bytesrc; -typedef struct upb_bytesrc upb_bytesrc; -struct upb_bytesink; -typedef struct upb_bytesink upb_bytesink; - // Typedefs for function pointers to all of the virtual functions. -// upb_src. -typedef struct _upb_fielddef *(*upb_src_getdef_fptr)(upb_src *src); -typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); -typedef bool (*upb_src_getstr_fptr)(upb_src *src, upb_string *str); -typedef bool (*upb_src_skipval_fptr)(upb_src *src); -typedef bool (*upb_src_startmsg_fptr)(upb_src *src); -typedef bool (*upb_src_endmsg_fptr)(upb_src *src); - -// upb_sink. -typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); -typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); -typedef bool (*upb_sink_putstr_fptr)(upb_sink *sink, upb_string *str); -typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); -typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); - // upb_bytesrc. typedef bool (*upb_bytesrc_get_fptr)( upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); @@ -68,23 +30,6 @@ typedef bool (*upb_bytesrc_append_fptr)( typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); // Vtables for the above interfaces. -typedef struct { - upb_src_getdef_fptr getdef; - upb_src_getval_fptr getval; - upb_src_getstr_fptr getstr; - upb_src_skipval_fptr skipval; - upb_src_startmsg_fptr startmsg; - upb_src_endmsg_fptr endmsg; -} upb_src_vtable; - -typedef struct { - upb_sink_putdef_fptr putdef; - upb_sink_putval_fptr putval; - upb_sink_putstr_fptr putstr; - upb_sink_startmsg_fptr startmsg; - upb_sink_endmsg_fptr endmsg; -} upb_sink_vtable; - typedef struct { upb_bytesrc_get_fptr get; upb_bytesrc_append_fptr append; @@ -97,42 +42,18 @@ typedef struct { // "Base Class" definitions; components that implement these interfaces should // contain one of these structures. -struct upb_src { - upb_src_vtable *vtbl; - upb_status status; - bool eof; -}; - -struct upb_sink { - upb_sink_vtable *vtbl; - upb_status status; - bool eof; -}; - -struct upb_bytesrc { +struct _upb_bytesrc { upb_bytesrc_vtable *vtbl; upb_status status; bool eof; }; -struct upb_bytesink { +struct _upb_bytesink { upb_bytesink_vtable *vtbl; upb_status status; bool eof; }; -INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { - s->vtbl = vtbl; - s->eof = false; - upb_status_init(&s->status); -} - -INLINE void upb_sink_init(upb_sink *s, upb_sink_vtable *vtbl) { - s->vtbl = vtbl; - s->eof = false; - upb_status_init(&s->status); -} - INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtable *vtbl) { s->vtbl = vtbl; s->eof = false; @@ -146,46 +67,6 @@ INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { } // Implementation of virtual function dispatch. -INLINE struct _upb_fielddef *upb_src_getdef(upb_src *src) { - return src->vtbl->getdef(src); -} -INLINE bool upb_src_getval(upb_src *src, upb_valueptr val) { - return src->vtbl->getval(src, val); -} -INLINE bool upb_src_getstr(upb_src *src, upb_string *str) { - return src->vtbl->getstr(src, str); -} -INLINE bool upb_src_skipval(upb_src *src) { return src->vtbl->skipval(src); } -INLINE bool upb_src_startmsg(upb_src *src) { return src->vtbl->startmsg(src); } -INLINE bool upb_src_endmsg(upb_src *src) { return src->vtbl->endmsg(src); } - -// Implementation of type-specific upb_src accessors. If we encounter a upb_src -// where these can be implemented directly in a measurably more efficient way, -// we can make these part of the vtable also. -// -// For <64-bit types we have to use a temporary to accommodate baredecoder, -// which does not know the actual width of the type. -INLINE bool upb_src_getbool(upb_src *src, bool *_bool) { - upb_value val; - bool ret = upb_src_getval(src, upb_value_addrof(&val)); - *_bool = val._bool; - return ret; -} - -INLINE bool upb_src_getint32(upb_src *src, int32_t *i32) { - upb_value val; - bool ret = upb_src_getval(src, upb_value_addrof(&val)); - *i32 = val.int32; - return ret; -} - -// TODO. -bool upb_src_getint32(upb_src *src, int32_t *val); -bool upb_src_getint64(upb_src *src, int64_t *val); -bool upb_src_getuint32(upb_src *src, uint32_t *val); -bool upb_src_getuint64(upb_src *src, uint64_t *val); -bool upb_src_getfloat(upb_src *src, float *val); -bool upb_src_getdouble(upb_src *src, double *val); // upb_bytesrc INLINE bool upb_bytesrc_get( @@ -198,24 +79,108 @@ INLINE bool upb_bytesrc_append( return bytesrc->vtbl->append(bytesrc, str, len); } -// upb_sink -INLINE bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def) { - return sink->vtbl->putdef(sink, def); +INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } +INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } + +// upb_handlers +struct _upb_handlers { + upb_handlerset *set; + void *closure; +}; + +INLINE void upb_handlers_init(upb_handlers *h) { + (void)h; +} +INLINE void upb_handlers_uninit(upb_handlers *h) { + (void)h; +} + +INLINE void upb_handlers_reset(upb_handlers *h) { + h->set = NULL; + h->closure = NULL; +} + +INLINE bool upb_handlers_isempty(upb_handlers *h) { + return !h->set && !h->closure; +} + +INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set) { + h->set = set; +} + +INLINE void upb_set_handler_closure(upb_handlers *h, void *closure) { + h->closure = closure; +} + +// upb_dispatcher +typedef struct { + upb_handlers handlers; + int depth; +} upb_dispatcher_frame; + +struct _upb_dispatcher { + upb_dispatcher_frame stack[UPB_MAX_NESTING], *top, *limit; +}; + +INLINE void upb_dispatcher_init(upb_dispatcher *d) { + d->limit = d->stack + sizeof(d->stack); } -INLINE bool upb_sink_putval(upb_sink *sink, upb_value val) { - return sink->vtbl->putval(sink, val); + +INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h) { + d->top = d->stack; + d->top->depth = 1; // Never want to trigger end-of-delegation. + d->top->handlers = *h; } -INLINE bool upb_sink_putstr(upb_sink *sink, upb_string *str) { - return sink->vtbl->putstr(sink, str); + +INLINE void upb_dispatch_startmsg(upb_dispatcher *d) { + assert(d->stack == d->top); + d->top->handlers.set->startmsg(d->top->handlers.closure); } -INLINE bool upb_sink_startmsg(upb_sink *sink) { - return sink->vtbl->startmsg(sink); + +INLINE void upb_dispatch_endmsg(upb_dispatcher *d) { + assert(d->stack == d->top); + d->top->handlers.set->endmsg(d->top->handlers.closure); } -INLINE bool upb_sink_endmsg(upb_sink *sink) { - return sink->vtbl->endmsg(sink); + +INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, + struct _upb_fielddef *f) { + upb_handlers handlers; + upb_handlers_init(&handlers); + upb_handlers_reset(&handlers); + upb_flow_t ret = d->top->handlers.set->startsubmsg(d->top->handlers.closure, f, &handlers); + assert((ret == UPB_DELEGATE) == !upb_handlers_isempty(&handlers)); + if (ret == UPB_DELEGATE) { + ++d->top; + d->top->handlers = handlers; + d->top->depth = 0; + d->top->handlers.set->startmsg(d->top->handlers.closure); + ret = UPB_CONTINUE; + } + ++d->top->depth; + upb_handlers_uninit(&handlers); + return ret; +} + +INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { + if (--d->top->depth == 0) { + d->top->handlers.set->endmsg(d->top->handlers.closure); + --d->top; + } + return d->top->handlers.set->endsubmsg(d->top->handlers.closure); } -INLINE upb_status *upb_sink_status(upb_sink *sink) { return &sink->status; } +INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, + struct _upb_fielddef *f, + upb_value val) { + return d->top->handlers.set->value(d->top->handlers.closure, f, val); +} + +INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, + upb_field_number_t fieldnum, + upb_value val) { + return d->top->handlers.set->unknownval(d->top->handlers.closure, + fieldnum, val); +} // upb_bytesink INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { diff --git a/core/upb_string.c b/core/upb_string.c index 847a3ee..4f5f5c2 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -29,6 +29,7 @@ upb_string *upb_string_new() { upb_string *str = malloc(sizeof(*str)); str->ptr = NULL; str->cached_mem = NULL; + str->len = 0; #ifndef UPB_HAVE_MSIZE str->size = 0; #endif @@ -132,6 +133,14 @@ upb_string *upb_strdup(upb_string *s) { return str; } +void upb_strcat(upb_string *s, upb_string *append) { + uint32_t old_size = upb_string_len(s); + uint32_t append_size = upb_string_len(append); + uint32_t new_size = old_size + append_size; + char *buf = upb_string_getrwbuf(s, new_size); + memcpy(buf + old_size, upb_string_getrobuf(append), append_size); +} + upb_string *upb_strreadfile(const char *filename) { FILE *f = fopen(filename, "rb"); if(!f) return NULL; diff --git a/core/upb_string.h b/core/upb_string.h index bd89f67..ee345e3 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -18,6 +18,11 @@ * string). * - strings are not thread-safe by default, but can be made so by calling a * function. This is not the default because it causes extra CPU overhead. + * + * Reference-counted strings have recently fallen out of favor because of the + * performance impacts of doing thread-safe reference counting with atomic + * operations. We side-step this issue by not performing atomic operations + * unless the string has been marked thread-safe. */ #ifndef UPB_STRING_H @@ -34,7 +39,7 @@ extern "C" { #endif // All members of this struct are private, and may only be read/written through -// the associated functions. Also, strings may *only* be allocated on the heap. +// the associated functions. struct _upb_string { // The pointer to our currently active data. This may be memory we own // or a pointer into memory we don't own. -- cgit v1.2.3 From 45599180905d45a882970f6ca8b6007436ac3f97 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 10 Jan 2011 09:43:28 -0800 Subject: More work on upb_src. --- Makefile | 4 +- core/upb.h | 43 +++--- core/upb_def.c | 445 ++++++++++++++++++++++++++++++++---------------------- core/upb_stream.h | 6 + 4 files changed, 302 insertions(+), 196 deletions(-) (limited to 'core/upb_stream.h') diff --git a/Makefile b/Makefile index 5c6598c..42c7d41 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ CC=gcc CXX=g++ CFLAGS=-std=c99 INCLUDE=-Idescriptor -Icore -Itests -Istream -I. -CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) +CPPFLAGS=-Wall -Wextra -Wno-missing-field-initializers -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) LDLIBS=-lpthread core/libupb.a ifeq ($(shell uname), Darwin) CPPFLAGS += -I/usr/include/lua5.1 @@ -61,7 +61,7 @@ SRC=core/upb.c \ core/upb_table.c \ core/upb_string.c \ descriptor/descriptor.c \ -# core/upb_def.c \ + core/upb_def.c \ # core/upb_msg.c \ # stream/upb_decoder.c \ # stream/upb_stdio.c \ diff --git a/core/upb.h b/core/upb.h index 7bed779..2057d60 100644 --- a/core/upb.h +++ b/core/upb.h @@ -12,6 +12,7 @@ #include #include #include // only for size_t. +#include #include "descriptor_const.h" #include "upb_atomic.h" @@ -128,6 +129,11 @@ typedef struct _upb_msg upb_msg; typedef uint32_t upb_strlen_t; +// The type of a upb_value. This is like a upb_fieldtype_t, but adds the +// constant UPB_VALUETYPE_ARRAY to represent an array. +typedef uint8_t upb_valuetype_t; +#define UPB_VALUETYPE_ARRAY 32 + // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. typedef struct { @@ -153,14 +159,20 @@ typedef struct { #endif } upb_value; +#ifdef NDEBUG +#define SET_TYPE(dest, val) +#else +#define SET_TYPE(dest, val) dest = val +#endif + #define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ ctype upb_value_get ## name(upb_value val) { \ assert(val.type == UPB_TYPE(proto_type)); \ - return val.membername; \ + return val.val.membername; \ } \ void upb_value_ ## name(upb_value *val, ctype cval) { \ - val.type = UPB_TYPE(proto_type); \ - val.membername = cval; \ + SET_TYPE(val->type, UPB_TYPE(proto_type)); \ + val->val.membername = cval; \ } UPB_VALUE_ACCESSORS(double, _double, double, DOUBLE); UPB_VALUE_ACCESSORS(float, _float, float, FLOAT); @@ -169,6 +181,7 @@ UPB_VALUE_ACCESSORS(int64, int64, int64_t, INT64); UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UINT32); UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UINT64); UPB_VALUE_ACCESSORS(bool, _bool, bool, BOOL); +UPB_VALUE_ACCESSORS(str, str, upb_string*, STRING); // A pointer to a .proto value. The owner must have an out-of-band way of // knowing the type, so it knows which union member to use. @@ -187,24 +200,23 @@ typedef union { void *_void; } upb_valueptr; -// The type of a upb_value. This is like a upb_fieldtype_t, but adds the -// constant UPB_VALUETYPE_ARRAY to represent an array. -typedef uint8_t upb_valuetype_t; -#define UPB_VALUETYPE_ARRAY 32 - INLINE upb_valueptr upb_value_addrof(upb_value *val) { - upb_valueptr ptr = {&val->_double}; + upb_valueptr ptr = {&val->val._double}; return ptr; } -// Converts upb_value_ptr -> upb_value by reading from the pointer. We need to -// know the value type to perform this operation, because we need to know how -// much memory to copy. +// Reads or writes a upb_value from an address represented by a upb_value_ptr. +// We need to know the value type to perform this operation, because we need to +// know how much memory to copy (and for big-endian machines, we need to know +// where in the upb_value the data goes). +// +// For little endian-machines where we didn't mind overreading, we could make +// upb_value_read simply use memcpy(). INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { upb_value val; #define CASE(t, member_name) \ - case UPB_TYPE(t): val.member_name = *ptr.member_name; break; + case UPB_TYPE(t): val.val.member_name = *ptr.member_name; break; switch(ft) { CASE(DOUBLE, _double) @@ -232,13 +244,10 @@ INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { #undef CASE } -// Writes a upb_value to a upb_value_ptr location. We need to know the value -// type to perform this operation, because we need to know how much memory to -// copy. INLINE void upb_value_write(upb_valueptr ptr, upb_value val, upb_fieldtype_t ft) { #define CASE(t, member_name) \ - case UPB_TYPE(t): *ptr.member_name = val.member_name; break; + case UPB_TYPE(t): *ptr.member_name = val.val.member_name; break; switch(ft) { CASE(DOUBLE, _double) diff --git a/core/upb_def.c b/core/upb_def.c index cc771dc..4320fb6 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -9,9 +9,6 @@ #include "descriptor.h" #include "upb_def.h" -#define CHECKSRC(x) if(!(x)) goto src_err -#define CHECK(x) if(!(x)) goto err - /* Rounds p up to the next multiple of t. */ static size_t upb_align_up(size_t val, size_t align) { return val % align == 0 ? val : val + align - (val % align); @@ -184,6 +181,188 @@ static void upb_def_uninit(upb_def *def) { } +/* upb_defbuilder ************************************************************/ + +// A upb_defbuilder builds a list of defs by handling a parse of a protobuf in +// the format defined in descriptor.proto. The output of a upb_defbuilder is +// a list of upb_def* that possibly contain unresolved references. +// +// We use a separate object (upb_defbuilder) instead of having the defs handle +// the parse themselves because we need to store state that is only necessary +// during the building process itself. + +// When we are bootstrapping descriptor.proto, we must help the bare decoder out +// by telling it when to descend into a submessage, because with the wire format +// alone we cannot tell the difference between a submessage and a string. +// +// TODO: In the long-term, we should bootstrap from a serialization format that +// contains this information, so we can remove this special-case code. This +// would involve defining a serialization format very similar to the existing +// protobuf format, but that contains more information about the wire type. +#define BEGIN_SUBMSG 100 + +// upb_deflist: A little dynamic array for storing a growing list of upb_defs. +typedef struct { + upb_def **defs; + uint32_t len; + uint32_t size; +} upb_deflist; + +static void upb_deflist_init(upb_deflist *l) { + l->size = 8; + l->defs = malloc(l->size * sizeof(void*)); + l->len = 0; +} + +static void upb_deflist_uninit(upb_deflist *l) { + for(uint32_t i = 0; i < l->len; i++) + if(l->defs[i]) upb_def_unref(l->defs[i]); + free(l->defs); +} + +static void upb_deflist_push(upb_deflist *l, upb_def *d) { + if(l->len == l->size) { + l->size *= 2; + l->defs = realloc(l->defs, l->size * sizeof(void*)); + } + l->defs[l->len++] = d; +} + +// Qualify the defname for all defs starting with offset "start" with "str". +static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { + for(uint32_t i = start; i < l->len; i++) { + upb_def *def = l->defs[i]; + upb_string *name = def->fqname; + def->fqname = upb_join(str, name); + upb_string_unref(name); + } +} + +typedef struct { + upb_string *name; + int start; +} upb_defbuilder_frame; + +struct _upb_defbuilder { + upb_deflist defs; + upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH]; + int stack_len; + + uint32_t number; + upb_string *name; +}; +typedef struct _upb_defbuilder upb_defbuilder; + +// Forward declares for top-level file descriptors. +static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, upb_handlers *h); +static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, + upb_handlers *h); + + +// Start/end handlers for FileDescriptorProto and DescriptorProto (the two +// entities that have names and can contain sub-definitions. +void upb_defbuilder_startcontainer(upb_defbuilder *b) { + upb_defbuilder_frame *f = &b->stack[b->stack_len++]; + f->start = b->defs.len; + f->name = NULL; +} + +void upb_defbuilder_endcontainer(upb_defbuilder *b) { + upb_defbuilder_frame *f = &b->stack[--b->stack_len]; + upb_deflist_qualify(&b->defs, f->name, f->start); + upb_string_unref(f->name); +} + +void upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { + upb_defbuilder_frame *f = &b->stack[b->stack_len-1]; + upb_string_unref(f->name); + f->name = upb_string_getref(str); +} + +// Handlers for google.protobuf.FileDescriptorProto. +static upb_flow_t upb_defbuilder_FileDescriptorProto_value(void *_b, + upb_fielddef *f, + upb_value val) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM: + upb_defbuilder_setscopename(b, upb_value_getstr(val)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + return BEGIN_SUBMSG; + default: + return UPB_SKIP; + } +} + +static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( + void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: + upb_msgdef_register_DescriptorProto(b, h); + return UPB_DELEGATE; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + upb_enumdef_register_EnumDescriptorProto(b, h); + return UPB_DELEGATE; + default: + // TODO: services and extensions. + return UPB_SKIP; + } +} + +static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset upb_defbuilder_FileDescriptorProto_handlers = { + NULL, // startmsg + NULL, // endmsg + &upb_defbuilder_FileDescriptorProto_value, + &upb_defbuilder_FileDescriptorProto_startsubmsg, + }; + upb_register_handlerset(h, &upb_defbuilder_FileDescriptorProto_handlers); + upb_set_handler_closure(h, b); +} + +// Handlers for google.protobuf.FileDescriptorSet. +static upb_flow_t upb_defbuilder_FileDescriptorSet_value(void *b, + upb_fielddef *f, + upb_value val) { + (void)b; + (void)val; + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: + return BEGIN_SUBMSG; + default: + return UPB_SKIP; + } +} + +static upb_flow_t upb_defbuilder_FileDescriptorSet_startsubmsg( + void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: + upb_defbuilder_register_FileDescriptorProto(b, h); + return UPB_DELEGATE; + default: + return UPB_SKIP; + } +} + +static void upb_defbuilder_register_FileDescriptorSet( + upb_defbuilder *b, upb_handlers *h) { + static upb_handlerset upb_defbuilder_FileDescriptorSet_handlers = { + NULL, // startmsg + NULL, // endmsg + &upb_defbuilder_FileDescriptorSet_value, + &upb_defbuilder_FileDescriptorSet_startsubmsg, + }; + upb_register_handlerset(h, &upb_defbuilder_FileDescriptorSet_handlers); + upb_set_handler_closure(h, b); +} + + /* upb_unresolveddef **********************************************************/ // Unresolved defs are used as temporary placeholders for a def whose name has @@ -227,28 +406,30 @@ static void upb_enumdef_free(upb_enumdef *e) { } // google.protobuf.EnumValueDescriptorProto. -static void upb_enumdef_startmsg(upb_defbuilder *b) { +static void upb_enumdef_EnumValueDescriptorProto_startmsg(upb_defbuilder *b) { b->number = -1; - name = NULL; + b->name = NULL; } -static upb_flow_t upb_enumdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(upb_defbuilder *b, + upb_fielddef *f, + upb_value val) { switch(f->number) { case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: - name = upb_string_tryrecycle(name); + b->name = upb_string_tryrecycle(name); CHECKSRC(upb_src_getstr(src, name)); break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &number)); + b->number = upb_value_getint32(val); break; default: - CHECKSRC(upb_src_skipval(src)); break; } + return UPB_CONTINUE; } -static void upb_enumdef_endmsg(upb_defbuilder *b) { - if(name == NULL || number == -1) { +static void upb_enumdef_EnumValueDescriptorProto_endmsg(upb_defbuilder *b) { + if(b->name == NULL || b->number == -1) { upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); goto err; } @@ -262,7 +443,66 @@ static void upb_enumdef_endmsg(upb_defbuilder *b) { return UPB_CONTINUE; } -upb_enum_iter upb_enum_begin(upb_enumdef *e) { +static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset upb_enumdef_EnumValueDescriptorProto_handlers = { + &upb_enumdef_EnumValueDescriptorProto_startmsg, + &upb_enumdef_EnumValueDescriptorProto_endmsg, + &upb_enumdef_EnumValueDescriptorProto_value, + } + upb_register_handlerset(h, &upb_enumdef_EnumValueDescriptorProto_handlers); + upb_set_handler_closure(h, b); +} + +// google.protobuf.EnumDescriptorProto. +void upb_enumdef_EnumDescriptorProto_startmsg(upb_defbuilder *b) { + upb_enumdef *e = malloc(sizeof(*e)); + upb_def_init(&e->base, UPB_DEF_ENUM); + upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); + upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); + upb_deflist_push(&b->defs, UPB_UPCAST(e)); +} + +void upb_enumdef_EnumDescriptorProto_endmsg(upb_defbuilder *b) { + assert(e->base.fqname); +} + +static upb_flow_t upb_enumdef_EnumDescriptorProto_value(upb_defbuilder *b, + upb_fielddef *f, + upb_value val) { + switch(f->number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: + upb_string_unref(e->base.fqname); + e->base.fqname = upb_value_getstr(val); + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: + return BEGIN_SUBMSG; + } + return UPB_CONTINUE; +} + +static upb_flow_t upb_enumdef_EnumDescriptorProto_startsubmsg(upb_defbuilder *b, + upb_fielddef *f, + upb_handlers *h) { + switch(f->number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: + upb_enumdef_register_EnumValueDescriptorProto(b, h); + return UPB_DELEGATE; + } + return UPB_SKIP; +} + +static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset upb_enumdef_EnumDescriptorProto_handlers = { + &upb_enumdef_EnumDescriptorProto_startmsg, + &upb_enumdef_EnumDescriptorProto_endmsg, + &upb_enumdef_EnumDescriptorProto_value, + } + upb_register_handlerset(h, &upb_enumdef_EnumDescriptorProto_handlers); + upb_set_handler_closure(h, b); +} + +upb_enum_iter upb_enum_begin(upb_enumdef *e) { // We could iterate over either table here; the choice is arbitrary. return upb_inttable_begin(&e->iton); } @@ -355,7 +595,7 @@ static int upb_compare_fields(const void *f1, const void *f2) { return upb_compare_typed_fields(*(void**)f1, *(void**)f2); } -// Processes a google.protobuf.DescriptorProto, adding defs to "defs." +// google.protobuf.DescriptorProto. static void upb_msgdef_startmsg(upb_defbuilder *b) { upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG); @@ -417,9 +657,6 @@ static void upb_msgdef_endmsg(upb_defbuilder *b) { static bool upb_msgdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { switch(f->number) { case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: - // XXX - m->base.fqname = upb_string_tryrecycle(m->base.fqname); - m->base.fqname = upb_value_getstr(val); upb_defbuilder_setscopename(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: @@ -432,13 +669,14 @@ static bool upb_msgdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) } } -static upb_flow_t upb_msgdef_startsubmsg(upb_defbuilder *b, upb_fielddef *f, upb_handlers *h) { +static upb_flow_t upb_msgdef_startsubmsg(upb_defbuilder *b, upb_fielddef *f, + upb_handlers *h) { switch(f->number) { case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: upb_register_FieldDescriptorProto(b, h); return UPB_DELEGATE; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: - upb_register_DescriptorProto(b, h); + upb_msgdef_register_DescriptorProto(b, h); return UPB_DELEGATE; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: upb_register_EnumDescriptorProto(b, h); @@ -449,6 +687,18 @@ static upb_flow_t upb_msgdef_startsubmsg(upb_defbuilder *b, upb_fielddef *f, upb } } +static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset upb_msgdef_DescriptorProto_handlers = { + &upb_msgdef_startmsg, + &upb_msgdef_endmsg, + &upb_msgdef_value, + &upb_msgdef_startsubmsg, + } + upb_register_handlerset(h, &upb_msgdef_DescriptorProto_handlers); + upb_set_handler_closure(h, b); +} + static void upb_msgdef_free(upb_msgdef *m) { upb_msg_iter i; @@ -477,165 +727,6 @@ upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) { return upb_inttable_next(&m->itof, &iter->e); } -/* upb_defbuilder ************************************************************/ - -// A upb_defbuilder builds a list of defs by handling a parse of a protobuf in -// the format defined in descriptor.proto. The output of a upb_defbuilder is -// a list of upb_def* that possibly contain unresolved references. -// -// We use a separate object (upb_defbuilder) instead of having the defs handle -// the parse themselves because we need to store state that is only necessary -// during the building process itself. - -// When we are bootstrapping descriptor.proto, we must help the bare decoder out -// by telling it when to descend into a submessage, because with the wire format -// alone we cannot tell the difference between a submessage and a string. -#define BEGIN_SUBMSG 100 - -// upb_deflist: A little dynamic array for storing a growing list of upb_defs. -typedef struct { - upb_def **defs; - uint32_t len; - uint32_t size; -} upb_deflist; - -static void upb_deflist_init(upb_deflist *l) { - l->size = 8; - l->defs = malloc(l->size * sizeof(void*)); - l->len = 0; -} - -static void upb_deflist_uninit(upb_deflist *l) { - for(uint32_t i = 0; i < l->len; i++) - if(l->defs[i]) upb_def_unref(l->defs[i]); - free(l->defs); -} - -static void upb_deflist_push(upb_deflist *l, upb_def *d) { - if(l->len == l->size) { - l->size *= 2; - l->defs = realloc(l->defs, l->size * sizeof(void*)); - } - l->defs[l->len++] = d; -} - -// Qualify the defname for all defs starting with offset "start" with "str". -static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { - for(uint32_t i = start; i < l->len; i++) { - upb_def *def = l->defs[i]; - upb_string *name = def->fqname; - def->fqname = upb_join(str, name); - upb_string_unref(name); - } -} - -typedef struct { - upb_deflist defs; - struct { - upb_string *name; - int start; - } upb_defbuilder_frame; - upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH]; - int stack_len; -} upb_defbuilder; - -// Start/end handlers for FileDescriptorProto and DescriptorProto (the two -// entities that have names and can contain sub-definitions. -upb_defbuilder_startcontainer(upb_defbuilder *b) { - upb_defbuilder_frame *f = b->stack[b->stack_len++]; - f->start = b->defs.len; - f->name = NULL; -} - -upb_defbuilder_endcontainer(upb_defbuilder *b) { - upb_defbuilder_frame *f = b->stack[--b->stack_len]; - upb_deflist_qualify(&b->defs, f->name, f->start); - upb_string_unref(f->name); -} - -upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { -} - -// Handlers for google.protobuf.FileDescriptorProto. -static bool upb_defbuilder_FileDescriptorProto_value(upb_defbuilder *b, - upb_fielddef *f, - upb_value val) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM: - upb_defbuilder_setscopename(b, val.str); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - return BEGIN_SUBMSG; - default: - return UPB_SKIP; - } -} - -static bool upb_defbuilder_FileDescriptorProto_startsubmsg(upb_defbuilder *b, - upb_fielddef *f, - upb_handlers *h) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: - upb_defbuilder_register_DescriptorProto(b, h); - return UPB_DELEGATE; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - upb_defbuilder_register_EnumDescriptorProto(b, h); - return UPB_DELEGATE; - default: - // TODO: services and extensions. - return UPB_SKIP; - } -} - -static upb_handlers upb_defbuilder_FileDescriptorProto_handlers = { - NULL, // startmsg - NULL, // endmsg - &upb_defbuilder_FileDescriptorProto_value, - &upb_defbuilder_FileDescriptorProto_startsubmsg, -} - -upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - upb_register_handlerset(h, &upb_defbuilder_FileDescriptorProto_handlers); - upb_set_handler_closure(h, b); -} - -// Handlers for google.protobuf.FileDescriptorSet. -upb_defbuilder_FileDescriptorSet_value(upb_defbuilder *b, upb_fielddef *f, - upb_value val) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: - return BEGIN_SUBMSG; - default: - return UPB_SKIP; - } -} - -upb_defbuilder_FileDescriptorSet_startsubmsg(upb_defbuilder *b, - upb_fielddef *f, upb_handlers *h) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: - upb_defbuilder_register_FileDescriptorProto(b, h); - return UPB_DELEGATE; - default: - return UPB_SKIP; - } -} - -static upb_handlers upb_defbuilder_FileDescriptorSet_handlers = { - NULL, // startmsg - NULL, // endmsg - &upb_defbuilder_FileDescriptorSet_value, - &upb_defbuilder_FileDescriptorSet_startsubmsg, -} - -upb_defbuilder_register_FileDescriptorSet(upb_defbuilder *b, upb_handlers *h) { - upb_register_handlerset(h, &upb_defbuilder_FileDescriptorSet_handlers); - upb_set_handler_closure(h, b); -} - - - /* upb_symtab adding defs *****************************************************/ // This is a self-contained group of functions that, given a list of upb_defs diff --git a/core/upb_stream.h b/core/upb_stream.h index 1eb111e..c96c544 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -135,6 +135,12 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, upb_field_number_t fieldnum, upb_value val); +/* upb_src ********************************************************************/ + +struct _upb_src; +typedef struct _upb_src upb_src; + + /* upb_bytesrc ****************************************************************/ struct _upb_bytesrc; -- cgit v1.2.3 From bcc688a303439c758a47da9f0eb1c064ece6ce09 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 10 Jan 2011 20:37:04 -0800 Subject: upb_def compiles again! --- core/upb.c | 2 +- core/upb.h | 37 ++++--- core/upb_def.c | 283 +++++++++++++++++++++++++++++++------------------ core/upb_msg.c | 13 ++- core/upb_stream.h | 62 +++++++---- core/upb_stream_vtbl.h | 88 ++++++++++----- core/upb_string.c | 2 +- core/upb_string.h | 13 +-- 8 files changed, 325 insertions(+), 175 deletions(-) (limited to 'core/upb_stream.h') diff --git a/core/upb.c b/core/upb.c index c396323..2f715d0 100644 --- a/core/upb.c +++ b/core/upb.c @@ -45,7 +45,7 @@ void upb_seterr(upb_status *status, enum upb_status_code code, { if(upb_ok(status)) { // The first error is the most interesting. status->code = code; - status->str = upb_string_tryrecycle(status->str); + upb_string_recycle(&status->str); va_list args; va_start(args, msg); upb_string_vprintf(status->str, msg, args); diff --git a/core/upb.h b/core/upb.h index 2057d60..64bc88c 100644 --- a/core/upb.h +++ b/core/upb.h @@ -126,14 +126,20 @@ struct _upb_array; typedef struct _upb_array upb_array; struct _upb_msg; typedef struct _upb_msg upb_msg; +struct _upb_bytesrc; +typedef struct _upb_bytesrc upb_bytesrc; -typedef uint32_t upb_strlen_t; +typedef int32_t upb_strlen_t; +#define UPB_STRLEN_MAX INT32_MAX // The type of a upb_value. This is like a upb_fieldtype_t, but adds the // constant UPB_VALUETYPE_ARRAY to represent an array. typedef uint8_t upb_valuetype_t; #define UPB_VALUETYPE_ARRAY 32 +#define UPB_VALUETYPE_BYTESRC 32 +#define UPB_VALUETYPE_RAW 33 + // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. typedef struct { @@ -146,6 +152,7 @@ typedef struct { uint64_t uint64; bool _bool; upb_string *str; + upb_bytesrc *bytesrc; upb_msg *msg; upb_array *arr; upb_atomic_refcount_t *refcount; @@ -167,21 +174,27 @@ typedef struct { #define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ ctype upb_value_get ## name(upb_value val) { \ - assert(val.type == UPB_TYPE(proto_type)); \ + assert(val.type == proto_type || val.type == UPB_VALUETYPE_RAW); \ return val.val.membername; \ } \ - void upb_value_ ## name(upb_value *val, ctype cval) { \ - SET_TYPE(val->type, UPB_TYPE(proto_type)); \ + void upb_value_set ## name(upb_value *val, ctype cval) { \ + SET_TYPE(val->type, proto_type); \ val->val.membername = cval; \ } -UPB_VALUE_ACCESSORS(double, _double, double, DOUBLE); -UPB_VALUE_ACCESSORS(float, _float, float, FLOAT); -UPB_VALUE_ACCESSORS(int32, int32, int32_t, INT32); -UPB_VALUE_ACCESSORS(int64, int64, int64_t, INT64); -UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UINT32); -UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UINT64); -UPB_VALUE_ACCESSORS(bool, _bool, bool, BOOL); -UPB_VALUE_ACCESSORS(str, str, upb_string*, STRING); +UPB_VALUE_ACCESSORS(double, _double, double, UPB_TYPE(DOUBLE)); +UPB_VALUE_ACCESSORS(float, _float, float, UPB_TYPE(FLOAT)); +UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_TYPE(INT32)); +UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64)); +UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32)); +UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64)); +UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); +UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); +UPB_VALUE_ACCESSORS(bytesrc, bytesrc, upb_bytesrc*, UPB_VALUETYPE_BYTESRC); + +void upb_value_setraw(upb_value *val, uint64_t cval) { + SET_TYPE(val->type, UPB_VALUETYPE_RAW); + val->val.uint64 = cval; +} // A pointer to a .proto value. The owner must have an out-of-band way of // knowing the type, so it knows which union member to use. diff --git a/core/upb_def.c b/core/upb_def.c index 4320fb6..4f12dbe 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -228,6 +228,10 @@ static void upb_deflist_push(upb_deflist *l, upb_def *d) { l->defs[l->len++] = d; } +static upb_def *upb_deflist_last(upb_deflist *l) { + return l->defs[l->len-1]; +} + // Qualify the defname for all defs starting with offset "start" with "str". static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { for(uint32_t i = start; i < l->len; i++) { @@ -238,8 +242,14 @@ static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) } } +// We keep a stack of all the messages scopes we are currently in, as well as +// the top-level file scope. This is necessary to correctly qualify the +// definitions that are contained inside. "name" tracks the name of the +// message or package (a bare name -- not qualified by any enclosing scopes). typedef struct { upb_string *name; + // Index of the first def that is under this scope. For msgdefs, the + // msgdef itself is at start-1. int start; } upb_defbuilder_frame; @@ -250,6 +260,10 @@ struct _upb_defbuilder { uint32_t number; upb_string *name; + bool saw_number; + bool saw_name; + + upb_fielddef *f; }; typedef struct _upb_defbuilder upb_defbuilder; @@ -259,6 +273,28 @@ static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, upb_handlers *h); +static void upb_defbuilder_init(upb_defbuilder *b) { + upb_deflist_init(&b->defs); + b->stack_len = 0; + b->name = NULL; +} + +static void upb_defbuilder_uninit(upb_defbuilder *b) { + upb_string_unref(b->name); + upb_deflist_uninit(&b->defs); +} + +static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) { + if (b->stack_len <= 1) return NULL; + int index = b->stack[b->stack_len-1].start - 1; + assert(index >= 0); + return upb_downcast_msgdef(b->defs.defs[index]); +} + +static upb_def *upb_defbuilder_last(upb_defbuilder *b) { + return upb_deflist_last(&b->defs); +} + // Start/end handlers for FileDescriptorProto and DescriptorProto (the two // entities that have names and can contain sub-definitions. void upb_defbuilder_startcontainer(upb_defbuilder *b) { @@ -291,9 +327,8 @@ static upb_flow_t upb_defbuilder_FileDescriptorProto_value(void *_b, case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: return BEGIN_SUBMSG; - default: - return UPB_SKIP; } + return UPB_CONTINUE; } static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( @@ -308,19 +343,19 @@ static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( return UPB_DELEGATE; default: // TODO: services and extensions. - return UPB_SKIP; + return UPB_SKIPSUBMSG; } } static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_defbuilder_FileDescriptorProto_handlers = { + static upb_handlerset handlers = { NULL, // startmsg NULL, // endmsg &upb_defbuilder_FileDescriptorProto_value, &upb_defbuilder_FileDescriptorProto_startsubmsg, }; - upb_register_handlerset(h, &upb_defbuilder_FileDescriptorProto_handlers); + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -333,9 +368,8 @@ static upb_flow_t upb_defbuilder_FileDescriptorSet_value(void *b, switch(f->number) { case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: return BEGIN_SUBMSG; - default: - return UPB_SKIP; } + return UPB_CONTINUE; } static upb_flow_t upb_defbuilder_FileDescriptorSet_startsubmsg( @@ -345,20 +379,19 @@ static upb_flow_t upb_defbuilder_FileDescriptorSet_startsubmsg( case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: upb_defbuilder_register_FileDescriptorProto(b, h); return UPB_DELEGATE; - default: - return UPB_SKIP; } + return UPB_SKIPSUBMSG; } static void upb_defbuilder_register_FileDescriptorSet( upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_defbuilder_FileDescriptorSet_handlers = { + static upb_handlerset handlers = { NULL, // startmsg NULL, // endmsg &upb_defbuilder_FileDescriptorSet_value, &upb_defbuilder_FileDescriptorSet_startsubmsg, }; - upb_register_handlerset(h, &upb_defbuilder_FileDescriptorSet_handlers); + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -406,18 +439,20 @@ static void upb_enumdef_free(upb_enumdef *e) { } // google.protobuf.EnumValueDescriptorProto. -static void upb_enumdef_EnumValueDescriptorProto_startmsg(upb_defbuilder *b) { - b->number = -1; - b->name = NULL; +static void upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; + b->saw_number = false; + b->saw_name = false; } -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(upb_defbuilder *b, +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; switch(f->number) { case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: - b->name = upb_string_tryrecycle(name); - CHECKSRC(upb_src_getstr(src, name)); + upb_string_unref(b->name); + upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: b->number = upb_value_getint32(val); @@ -428,34 +463,37 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(upb_defbuilder *b, return UPB_CONTINUE; } -static void upb_enumdef_EnumValueDescriptorProto_endmsg(upb_defbuilder *b) { - if(b->name == NULL || b->number == -1) { - upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); - goto err; +static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + if(!b->saw_number || !b->saw_name) { + //upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); + //goto err; + return; } - upb_ntoi_ent ntoi_ent = {{name, 0}, number}; - upb_iton_ent iton_ent = {{number, 0}, name}; + upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number}; + upb_iton_ent iton_ent = {{b->number, 0}, b->name}; + upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); upb_strtable_insert(&e->ntoi, &ntoi_ent.e); upb_inttable_insert(&e->iton, &iton_ent.e); // We don't unref "name" because we pass our ref to the iton entry of the // table. strtables can ref their keys, but the inttable doesn't know that // the value is a string. - return UPB_CONTINUE; } static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_enumdef_EnumValueDescriptorProto_handlers = { + static upb_handlerset handlers = { &upb_enumdef_EnumValueDescriptorProto_startmsg, &upb_enumdef_EnumValueDescriptorProto_endmsg, &upb_enumdef_EnumValueDescriptorProto_value, - } - upb_register_handlerset(h, &upb_enumdef_EnumValueDescriptorProto_handlers); + }; + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } // google.protobuf.EnumDescriptorProto. -void upb_enumdef_EnumDescriptorProto_startmsg(upb_defbuilder *b) { +void upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; upb_enumdef *e = malloc(sizeof(*e)); upb_def_init(&e->base, UPB_DEF_ENUM); upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); @@ -463,42 +501,51 @@ void upb_enumdef_EnumDescriptorProto_startmsg(upb_defbuilder *b) { upb_deflist_push(&b->defs, UPB_UPCAST(e)); } -void upb_enumdef_EnumDescriptorProto_endmsg(upb_defbuilder *b) { - assert(e->base.fqname); +void upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + assert(upb_defbuilder_last(b)->fqname != NULL); } -static upb_flow_t upb_enumdef_EnumDescriptorProto_value(upb_defbuilder *b, +static upb_flow_t upb_enumdef_EnumDescriptorProto_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; switch(f->number) { - case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: { + upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); upb_string_unref(e->base.fqname); - e->base.fqname = upb_value_getstr(val); + e->base.fqname = upb_string_getref(upb_value_getstr(val)); + return UPB_CONTINUE; + } case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: return BEGIN_SUBMSG; + default: + return UPB_CONTINUE; } - return UPB_CONTINUE; } -static upb_flow_t upb_enumdef_EnumDescriptorProto_startsubmsg(upb_defbuilder *b, +static upb_flow_t upb_enumdef_EnumDescriptorProto_startsubmsg(void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; switch(f->number) { case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: upb_enumdef_register_EnumValueDescriptorProto(b, h); return UPB_DELEGATE; + default: + return UPB_SKIPSUBMSG; } - return UPB_SKIP; } static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_enumdef_EnumDescriptorProto_handlers = { + static upb_handlerset handlers = { &upb_enumdef_EnumDescriptorProto_startmsg, &upb_enumdef_EnumDescriptorProto_endmsg, &upb_enumdef_EnumDescriptorProto_value, - } - upb_register_handlerset(h, &upb_enumdef_EnumDescriptorProto_handlers); + &upb_enumdef_EnumDescriptorProto_startsubmsg, + }; + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -529,56 +576,71 @@ static void upb_fielddef_free(upb_fielddef *f) { free(f); } -static void upb_fielddef_startmsg(upb_defbuilder *b) { +static void upb_fielddef_startmsg(void *_b) { + upb_defbuilder *b = _b; upb_fielddef *f = malloc(sizeof(*f)); f->number = -1; f->name = NULL; f->def = NULL; f->owned = false; - f->msgdef = m; + f->msgdef = upb_defbuilder_top(b); b->f = f; } -static void upb_fielddef_endmsg(upb_defbuilder *b) { +static void upb_fielddef_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_fielddef *f = b->f; // TODO: verify that all required fields were present. assert(f->number != -1 && f->name != NULL); assert((f->def != NULL) == upb_hasdef(f)); // Field was successfully read, add it as a field of the msgdef. + upb_msgdef *m = upb_defbuilder_top(b); upb_itof_ent itof_ent = {{f->number, 0}, f}; upb_ntof_ent ntof_ent = {{f->name, 0}, f}; upb_inttable_insert(&m->itof, &itof_ent.e); upb_strtable_insert(&m->ntof, &ntof_ent.e); - return true; } -static upb_flow_t upb_fielddef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { - switch(parsed_f->number) { +static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; + switch(f->number) { case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: - f->type = upb_value_getint32(val); + b->f->type = upb_value_getint32(val); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: - f->label = upb_value_getint32(val); + b->f->label = upb_value_getint32(val); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM: - f->number = upb_value_getint32(val); + b->f->number = upb_value_getint32(val); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: - f->name = upb_string_tryrecycle(f->name); - CHECKSRC(upb_src_getstr(src, f->name)); + upb_string_unref(b->f->name); + b->f->name = upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { upb_string *str = upb_string_new(); - CHECKSRC(upb_src_getstr(src, str)); - if(f->def) upb_def_unref(f->def); - f->def = UPB_UPCAST(upb_unresolveddef_new(str)); - f->owned = true; + if (!upb_value_getfullstr(val, str, NULL)) return UPB_ERROR; + if(b->f->def) upb_def_unref(b->f->def); + b->f->def = UPB_UPCAST(upb_unresolveddef_new(str)); + b->f->owned = true; break; } } return UPB_CONTINUE; } +static void upb_fielddef_register_FieldDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset handlers = { + &upb_fielddef_startmsg, + &upb_fielddef_endmsg, + &upb_fielddef_value, + }; + upb_register_handlerset(h, &handlers); + upb_set_handler_closure(h, b); +} + /* upb_msgdef *****************************************************************/ @@ -596,21 +658,24 @@ static int upb_compare_fields(const void *f1, const void *f2) { } // google.protobuf.DescriptorProto. -static void upb_msgdef_startmsg(upb_defbuilder *b) { +static void upb_msgdef_startmsg(void *_b) { + upb_defbuilder *b = _b; upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG); upb_atomic_refcount_init(&m->cycle_refcount, 0); upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); upb_deflist_push(&b->defs, UPB_UPCAST(m)); - upb_defbuilder_startcontainer(b, UPB_UPCAST(m)); + upb_defbuilder_startcontainer(b); } -static void upb_msgdef_endmsg(upb_defbuilder *b) { - upb_msgdef *m = upb_downcast_msgdef(upb_deflist_stacktop(&m->defs)); +static void upb_msgdef_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_msgdef *m = upb_defbuilder_top(b); if(!m->base.fqname) { - upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); - return UPB_ERROR; + //upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); + //return UPB_ERROR; + return; } // Create an ordering over the fields. @@ -651,51 +716,57 @@ static void upb_msgdef_endmsg(upb_defbuilder *b) { if (max_align > 0) m->size = upb_align_up(m->size, max_align); upb_defbuilder_endcontainer(b); - return UPB_CONTINUE; + //return UPB_CONTINUE; } -static bool upb_msgdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { +static upb_flow_t upb_msgdef_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; switch(f->number) { - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: - upb_defbuilder_setscopename(upb_value_getstr(val)); - break; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: { + upb_msgdef *m = upb_defbuilder_top(b); + upb_string_unref(m->base.fqname); + m->base.fqname = upb_string_getref(upb_value_getstr(val)); + upb_defbuilder_setscopename(b, upb_value_getstr(val)); + return UPB_CONTINUE; + } case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: return BEGIN_SUBMSG; default: // TODO: extensions. - return UPB_SKIP; + return UPB_CONTINUE; } } -static upb_flow_t upb_msgdef_startsubmsg(upb_defbuilder *b, upb_fielddef *f, +static upb_flow_t upb_msgdef_startsubmsg(void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; switch(f->number) { case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: - upb_register_FieldDescriptorProto(b, h); + upb_fielddef_register_FieldDescriptorProto(b, h); return UPB_DELEGATE; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: upb_msgdef_register_DescriptorProto(b, h); return UPB_DELEGATE; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - upb_register_EnumDescriptorProto(b, h); + upb_enumdef_register_EnumDescriptorProto(b, h); return UPB_DELEGATE; break; default: - return UPB_SKIP; + return UPB_SKIPSUBMSG; } } static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_msgdef_DescriptorProto_handlers = { + static upb_handlerset handlers = { &upb_msgdef_startmsg, &upb_msgdef_endmsg, &upb_msgdef_value, &upb_msgdef_startsubmsg, - } - upb_register_handlerset(h, &upb_msgdef_DescriptorProto_handlers); + }; + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -884,7 +955,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, // indicating whether the new defs can overwrite existing defs in the symtab, // attempts to add the given defs to the symtab. The whole operation either // succeeds or fails. Ownership of "defs" and "exts" is taken. -bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, +bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs, bool allow_redef, upb_status *status) { upb_rwlock_wrlock(&s->lock); @@ -892,9 +963,9 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, // Build a table of the defs we mean to add, for duplicate detection and name // resolution. upb_strtable tmptab; - upb_strtable_init(&tmptab, defs->len, sizeof(upb_symtab_ent)); - for (uint32_t i = 0; i < defs->len; i++) { - upb_def *def = defs->defs[i]; + upb_strtable_init(&tmptab, num_defs, sizeof(upb_symtab_ent)); + for (int i = 0; i < num_defs; i++) { + upb_def *def = defs[i]; upb_symtab_ent e = {{def->fqname, 0}, def}; // Redefinition is never allowed within a single FileDescriptorSet. @@ -909,13 +980,13 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, // Pass ownership from the deflist to the strtable. upb_strtable_insert(&tmptab, &e.e); - defs->defs[i] = NULL; + defs[i] = NULL; } // TODO: process the list of extensions by modifying entries from // tmptab in-place (copying them from the symtab first if necessary). - CHECK(upb_resolverefs(&tmptab, &s->symtab, status)); + if (!upb_resolverefs(&tmptab, &s->symtab, status)) goto err; // The defs in tmptab have been vetted, and can be added to the symtab // without causing errors. Now add all tmptab defs to the symtab, @@ -946,6 +1017,7 @@ err: upb_def_unref(e->def); } upb_strtable_free(&tmptab); + for (int i = 0; i < num_defs; i++) upb_def_unref(defs[i]); return false; } @@ -1026,20 +1098,18 @@ upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) { - upb_defbuilder *b = upb_defbuilder_new(); - upb_defbuilder_register_handlers(b, upb_src_gethandlers(src)); + upb_defbuilder b; + upb_defbuilder_init(&b); + //upb_defbuilder_register_FileDescriptorSet(&b, upb_src_gethandlers(src)); + upb_defbuilder_register_FileDescriptorSet(&b, NULL); if(!upb_src_run(src)) { upb_copyerr(status, upb_src_status(src)); + upb_defbuilder_uninit(&b); return; } - upb_symtab_add_defs(s, b->defs, b->defs_len, false, status); - upb_deflist_uninit(&defs); + upb_symtab_add_defs(s, b.defs.defs, b.defs.len, false, status); + upb_defbuilder_uninit(&b); return; - -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_deflist_uninit(&defs); } @@ -1074,8 +1144,10 @@ err: // complicated to support on big-endian machines. typedef struct { + upb_src src; upb_string *input; upb_strlen_t offset; + upb_dispatcher dispatcher; } upb_baredecoder; static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) @@ -1121,9 +1193,9 @@ bool upb_baredecoder_run(upb_baredecoder *d) { upb_dispatch_startmsg(&d->dispatcher); while(d->offset < upb_string_len(d->input)) { // Detect end-of-submessage. - while(d->offset >= *d->top) { + while(d->offset >= *top) { upb_dispatch_endsubmsg(&d->dispatcher); - d->offset = *(d->top--); + d->offset = *(top--); } uint32_t key = upb_baredecoder_readv64(d); @@ -1134,16 +1206,16 @@ bool upb_baredecoder_run(upb_baredecoder *d) { uint32_t delim_len = upb_baredecoder_readv32(d); // We don't know if it's a string or a submessage; deliver first as // string. - str = upb_string_tryrecycle(str); - upb_string_substr(str, d->input, d->offset, d->delimited_len); + upb_string_recycle(&str); + upb_string_substr(str, d->input, d->offset, delim_len); upb_value v; upb_value_setstr(&v, str); - if(upb_dispatch_value(&d->dispatcher, &f, v) == UPB_TREAT_AS_SUBMSG) { + if(upb_dispatch_value(&d->dispatcher, &f, v) == BEGIN_SUBMSG) { // Should deliver as a submessage instead. upb_dispatch_startsubmsg(&d->dispatcher, &f); - *(++d->top) = d->offset + delimited_len; + *(++top) = d->offset + delim_len; } else { - d->offset += delimited_len; + d->offset += delim_len; } } else { upb_value v; @@ -1167,23 +1239,24 @@ bool upb_baredecoder_run(upb_baredecoder *d) { } } upb_dispatch_endmsg(&d->dispatcher); + return true; } -static upb_src_vtable upb_baredecoder_src_vtbl = { - (upb_src_getdef_fptr)&upb_baredecoder_getdef, - (upb_src_getval_fptr)&upb_baredecoder_getval, - (upb_src_getstr_fptr)&upb_baredecoder_getstr, - (upb_src_skipval_fptr)&upb_baredecoder_skipval, - (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, - (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, -}; - static upb_baredecoder *upb_baredecoder_new(upb_string *str) { + //static upb_src_vtable vtbl = { + // (upb_src_getdef_fptr)&upb_baredecoder_getdef, + // (upb_src_getval_fptr)&upb_baredecoder_getval, + // (upb_src_getstr_fptr)&upb_baredecoder_getstr, + // (upb_src_skipval_fptr)&upb_baredecoder_skipval, + // (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, + // (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, + //}; upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); d->offset = 0; - upb_src_init(&d->src, &upb_baredecoder_src_vtbl); + upb_dispatcher_init(&d->dispatcher); + //upb_src_init(&d->src, &vtbl); return d; } diff --git a/core/upb_msg.c b/core/upb_msg.c index 75f7a35..a0a5196 100644 --- a/core/upb_msg.c +++ b/core/upb_msg.c @@ -7,6 +7,8 @@ */ #include "upb_msg.h" +#include "upb_decoder.h" +#include "upb_strstream.h" void _upb_elem_free(upb_value v, upb_fielddef *f) { switch(f->type) { @@ -108,10 +110,13 @@ upb_value upb_field_tryrecycle(upb_valueptr p, upb_value val, upb_fielddef *f, void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, upb_status *status) { - (void)msg; - (void)md; - (void)str; - (void)status; + upb_stringsrc *ssrc = upb_stringsrc_new(); + upb_stringsrc_reset(ssrc, str); + upb_decoder *d = upb_decoder_new(md); + upb_decoder_reset(d, upb_stringsrc_bytesrc(ssrc)); + + upb_decoder_free(d); + upb_stringsrc_free(ssrc); } void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, diff --git a/core/upb_stream.h b/core/upb_stream.h index c96c544..9ae69de 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -39,13 +39,16 @@ typedef enum { // Caller should continue sending values to the sink. UPB_CONTINUE, - // Skips to the end of the current submessage (or if we are at the top - // level, skips to the end of the entire message). - UPB_SKIP, + // An error occurred; check status for details. + UPB_ERROR, - // Caller should stop sending values; check sink status for details. + // Processing should stop for now, but could be resumed later. // If processing resumes later, it should resume with the next value. - UPB_STOP, + UPB_SUSPEND, + + // Skips to the end of the current submessage (or if we are at the top + // level, skips to the end of the entire message). + UPB_SKIPSUBMSG, // When returned from a startsubmsg handler, indicates that the submessage // should be handled by a different set of handlers, which have been @@ -117,6 +120,9 @@ INLINE void upb_handlers_uninit(upb_handlers *h); INLINE void upb_handlers_reset(upb_handlers *h); INLINE bool upb_handlers_isempty(upb_handlers *h); INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); +// TODO: for clients that want to increase efficiency by preventing bytesrcs +// from automatically being converted to strings in the value callback. +// INLINE void upb_handlers_use_bytesrcs(bool use_bytesrcs); INLINE void upb_set_handler_closure(upb_handlers *h, void *closure); // An object that transparently handles delegation so that the caller needs @@ -140,21 +146,30 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, struct _upb_src; typedef struct _upb_src upb_src; +bool upb_src_run(upb_src *src); +upb_status *upb_src_status(upb_src *src); -/* upb_bytesrc ****************************************************************/ - -struct _upb_bytesrc; -typedef struct _upb_bytesrc upb_bytesrc; -// Returns the next string in the stream. false is returned on error or eof. -// The string must be at least "minlen" bytes long unless the stream is eof. -INLINE bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); +/* upb_bytesrc ****************************************************************/ -// Appends the next "len" bytes in the stream in-place to "str". This should -// be used when the caller needs to build a contiguous string of the existing -// data in "str" with more data. The call fails if fewer than len bytes are -// available in the stream. -INLINE bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); +// Reads up to "count" bytes into "buf", returning the total number of bytes +// read. If <0, indicates error (check upb_bytesrc_status for details). +INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, + upb_strlen_t count); + +// Like upb_bytesrc_read(), but modifies "str" in-place, possibly aliasing +// existing string data (which avoids a copy). +INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, + upb_strlen_t count); + +// A convenience function for getting all the remaining data in a upb_bytesrc +// as a upb_string. Returns false and sets "status" if the operation fails. +INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, + upb_status *status); +INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, + upb_status *status) { + return upb_bytesrc_getfullstr(upb_value_getbytesrc(val), str, status); +} // Returns the current error status for the stream. // Note! The "eof" flag works like feof() in C; it cannot report end-of-file @@ -164,14 +179,21 @@ INLINE bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t l INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src); INLINE bool upb_bytesrc_eof(upb_bytesrc *src); + /* upb_bytesink ***************************************************************/ struct _upb_bytesink; typedef struct _upb_bytesink upb_bytesink; -// Puts the given string. Returns the number of bytes that were actually, -// consumed, which may be fewer than were in the string, or <0 on error. -INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); +// Writes up to "count" bytes from "buf", returning the total number of bytes +// written. If <0, indicates error (check upb_bytesink_status() for details). +INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, + upb_strlen_t count); + +// Puts the given string, which may alias the string data (which avoids a +// copy). Returns the number of bytes that were actually, consumed, which may +// be fewer than were in the string, or <0 on error. +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str); // Returns the current error status for the stream. INLINE upb_status *upb_bytesink_status(upb_bytesink *sink); diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 91464a7..c0cf04f 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -20,23 +20,33 @@ extern "C" { // Typedefs for function pointers to all of the virtual functions. +// upb_src +struct _upb_src { +}; +typedef struct { +} upb_src_vtbl; + // upb_bytesrc. -typedef bool (*upb_bytesrc_get_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); -typedef bool (*upb_bytesrc_append_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t len); +typedef upb_strlen_t (*upb_bytesrc_read_fptr)( + upb_bytesrc *src, void *buf, upb_strlen_t count); +typedef bool (*upb_bytesrc_getstr_fptr)( + upb_bytesrc *src, upb_string *str, upb_strlen_t count); // upb_bytesink. -typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); +typedef upb_strlen_t (*upb_bytesink_write_fptr)( + upb_bytesink *bytesink, void *buf, upb_strlen_t count); +typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( + upb_bytesink *bytesink, upb_string *str); // Vtables for the above interfaces. typedef struct { - upb_bytesrc_get_fptr get; - upb_bytesrc_append_fptr append; + upb_bytesrc_read_fptr read; + upb_bytesrc_getstr_fptr getstr; } upb_bytesrc_vtable; typedef struct { - upb_bytesink_put_fptr put; + upb_bytesink_write_fptr write; + upb_bytesink_putstr_fptr putstr; } upb_bytesink_vtable; // "Base Class" definitions; components that implement these interfaces should @@ -69,19 +79,56 @@ INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { // Implementation of virtual function dispatch. // upb_bytesrc -INLINE bool upb_bytesrc_get( - upb_bytesrc *bytesrc, upb_string *str, upb_strlen_t minlen) { - return bytesrc->vtbl->get(bytesrc, str, minlen); -} +INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, + upb_strlen_t count) { + return src->vtbl->read(src, buf, count); +} + +INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, + upb_strlen_t count) { + return src->vtbl->getstr(src, str, count); +} + +INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, + upb_status *status) { + // We start with a getstr, because that could possibly alias data instead of + // copying. + if (!upb_bytesrc_getstr(src, str, UPB_STRLEN_MAX)) goto error; + // Trade-off between number of read calls and amount of overallocation. + const size_t bufsize = 4096; + while (!upb_bytesrc_eof(src)) { + upb_strlen_t len = upb_string_len(str); + char *buf = upb_string_getrwbuf(str, len + bufsize); + upb_strlen_t read = upb_bytesrc_read(src, buf + len, bufsize); + if (read < 0) goto error; + // Resize to proper size. + upb_string_getrwbuf(str, len + read); + } + return true; -INLINE bool upb_bytesrc_append( - upb_bytesrc *bytesrc, upb_string *str, upb_strlen_t len) { - return bytesrc->vtbl->append(bytesrc, str, len); +error: + upb_copyerr(status, upb_bytesrc_status(src)); + return false; } INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } + +// upb_bytesink +INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, + upb_strlen_t count) { + return sink->vtbl->write(sink, buf, count); +} + +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str) { + return sink->vtbl->putstr(sink, str); +} + +INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { + return &sink->status; +} + // upb_handlers struct _upb_handlers { upb_handlerset *set; @@ -182,17 +229,6 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, fieldnum, val); } -// upb_bytesink -INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { - return sink->vtbl->put(sink, str); -} -INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { - return &sink->status; -} - -// upb_bytesink - - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/core/upb_string.c b/core/upb_string.c index 4f5f5c2..b243dfd 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -73,7 +73,7 @@ upb_string *upb_string_tryrecycle(upb_string *str) { char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { // assert(str->ptr == NULL); - uint32_t size = upb_string_size(str); + upb_strlen_t size = upb_string_size(str); if (size < len) { size = upb_round_up_pow2(len); str->cached_mem = realloc(str->cached_mem, size); diff --git a/core/upb_string.h b/core/upb_string.h index ee345e3..f82603b 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -119,20 +119,21 @@ INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } INLINE void upb_string_endread(upb_string *str) { (void)str; } // Attempts to recycle the string "str" so it may be reused and have different -// data written to it. The returned string is either "str" if it could be -// recycled or a newly created string if "str" has other references. +// data written to it. After the function returns, "str" points to a writable +// string, which is either the original string if it had no other references +// or a newly created string if it did have other references. // -// As a special case, passing NULL will allocate a new string. This is -// convenient for the pattern: +// As a special case, passing a pointer to NULL will allocate a new string. +// This is convenient for the pattern: // // upb_string *str = NULL; // while (x) { // if (y) { -// str = upb_string_tryrecycle(str); +// upb_string_recycle(&str); // upb_src_getstr(str); // } // } -upb_string *upb_string_tryrecycle(upb_string *str); +upb_string *upb_string_recycle(upb_string **str); // The options for setting the contents of a string. These may only be called // when a string is first created or recycled; once other functions have been -- cgit v1.2.3 From a38742bbe1cbc037f15edc053f5cf4dd53c5457a Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 18 Jan 2011 22:33:05 -0800 Subject: A few minor changes to the streaming protocol. 1. the start and end callbacks can now return a upb_flow_t and set a status message. 2. clarified some semantics around passing an error status back from the callbacks. --- core/upb.c | 4 +++ core/upb.h | 27 ++++++++++--------- core/upb_def.c | 70 ++++++++++++++++++++++++++++---------------------- core/upb_stream.h | 29 +++++++++++---------- core/upb_stream_vtbl.h | 5 +++- 5 files changed, 76 insertions(+), 59 deletions(-) (limited to 'core/upb_stream.h') diff --git a/core/upb.c b/core/upb.c index 2f715d0..05e9b7d 100644 --- a/core/upb.c +++ b/core/upb.c @@ -73,3 +73,7 @@ void upb_printerr(upb_status *status) { fprintf(stderr, "code: %d, no msg\n", status->code); } } + +void upb_status_uninit(upb_status *status) { + upb_string_unref(status->str); +} diff --git a/core/upb.h b/core/upb.h index 764e9ba..fb6d9ea 100644 --- a/core/upb.h +++ b/core/upb.h @@ -290,30 +290,27 @@ INLINE void upb_value_write(upb_valueptr ptr, upb_value val, // Status codes used as a return value. Codes >0 are not fatal and can be // resumed. enum upb_status_code { + // The operation completed successfully. UPB_STATUS_OK = 0, - // A read or write from a streaming src/sink could not be completed right now. - UPB_STATUS_TRYAGAIN = 1, + // The bytesrc is at EOF and all data was read successfully. + UPB_STATUS_EOF = 1, - // A value had an incorrect wire type and will be skipped. - UPB_STATUS_BADWIRETYPE = 2, + // A read or write from a streaming src/sink could not be completed right now. + UPB_STATUS_TRYAGAIN = 2, // An unrecoverable error occurred. UPB_STATUS_ERROR = -1, - // A varint went for 10 bytes without terminating. - UPB_ERROR_UNTERMINATED_VARINT = -2, - - // The max nesting level (UPB_MAX_NESTING) was exceeded. - UPB_ERROR_MAX_NESTING_EXCEEDED = -3 + // A recoverable error occurred (for example, data of the wrong type was + // encountered which we can skip over). + // UPB_STATUS_RECOVERABLE_ERROR = -2 }; -// TODO: consider making this a single word: a upb_string* where we use the low -// bits as flags indicating whether there is an error and whether it is -// resumable. This would improve efficiency, because the code would not need -// to be loaded after a call to a function returning a status. +// TODO: consider adding error space and code, to let ie. errno be stored +// as a proper code. typedef struct { - enum upb_status_code code; + char code; upb_string *str; } upb_status; @@ -329,6 +326,8 @@ INLINE void upb_status_init(upb_status *status) { status->str = NULL; } +void upb_status_uninit(upb_status *status); + void upb_printerr(upb_status *status); void upb_clearerr(upb_status *status); void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, diff --git a/core/upb_def.c b/core/upb_def.c index 4f12dbe..0176dc9 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -257,6 +257,7 @@ struct _upb_defbuilder { upb_deflist defs; upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH]; int stack_len; + upb_status status; uint32_t number; upb_string *name; @@ -275,12 +276,14 @@ static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, static void upb_defbuilder_init(upb_defbuilder *b) { upb_deflist_init(&b->defs); + upb_status_init(&b->status); b->stack_len = 0; b->name = NULL; } static void upb_defbuilder_uninit(upb_defbuilder *b) { upb_string_unref(b->name); + upb_status_uninit(&b->status); upb_deflist_uninit(&b->defs); } @@ -356,7 +359,7 @@ static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, &upb_defbuilder_FileDescriptorProto_startsubmsg, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } // Handlers for google.protobuf.FileDescriptorSet. @@ -392,7 +395,7 @@ static void upb_defbuilder_register_FileDescriptorSet( &upb_defbuilder_FileDescriptorSet_startsubmsg, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } @@ -439,10 +442,11 @@ static void upb_enumdef_free(upb_enumdef *e) { } // google.protobuf.EnumValueDescriptorProto. -static void upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) { +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) { upb_defbuilder *b = _b; b->saw_number = false; b->saw_name = false; + return UPB_CONTINUE; } static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, @@ -463,12 +467,12 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, return UPB_CONTINUE; } -static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { upb_defbuilder *b = _b; if(!b->saw_number || !b->saw_name) { - //upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); - //goto err; - return; + upb_seterr(&b->status, UPB_STATUS_ERROR, + "Enum value missing name or number."); + return UPB_STOP; } upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number}; upb_iton_ent iton_ent = {{b->number, 0}, b->name}; @@ -478,6 +482,7 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { // We don't unref "name" because we pass our ref to the iton entry of the // table. strtables can ref their keys, but the inttable doesn't know that // the value is a string. + return UPB_CONTINUE; } static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b, @@ -488,22 +493,24 @@ static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b, &upb_enumdef_EnumValueDescriptorProto_value, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } // google.protobuf.EnumDescriptorProto. -void upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { +static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { upb_defbuilder *b = _b; upb_enumdef *e = malloc(sizeof(*e)); upb_def_init(&e->base, UPB_DEF_ENUM); upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); upb_deflist_push(&b->defs, UPB_UPCAST(e)); + return UPB_CONTINUE; } -void upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { +static upb_flow_t upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { upb_defbuilder *b = _b; assert(upb_defbuilder_last(b)->fqname != NULL); + return UPB_CONTINUE; } static upb_flow_t upb_enumdef_EnumDescriptorProto_value(void *_b, @@ -546,7 +553,7 @@ static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, &upb_enumdef_EnumDescriptorProto_startsubmsg, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } upb_enum_iter upb_enum_begin(upb_enumdef *e) { @@ -576,7 +583,7 @@ static void upb_fielddef_free(upb_fielddef *f) { free(f); } -static void upb_fielddef_startmsg(void *_b) { +static upb_flow_t upb_fielddef_startmsg(void *_b) { upb_defbuilder *b = _b; upb_fielddef *f = malloc(sizeof(*f)); f->number = -1; @@ -585,9 +592,10 @@ static void upb_fielddef_startmsg(void *_b) { f->owned = false; f->msgdef = upb_defbuilder_top(b); b->f = f; + return UPB_CONTINUE; } -static void upb_fielddef_endmsg(void *_b) { +static upb_flow_t upb_fielddef_endmsg(void *_b) { upb_defbuilder *b = _b; upb_fielddef *f = b->f; // TODO: verify that all required fields were present. @@ -600,6 +608,7 @@ static void upb_fielddef_endmsg(void *_b) { upb_ntof_ent ntof_ent = {{f->name, 0}, f}; upb_inttable_insert(&m->itof, &itof_ent.e); upb_strtable_insert(&m->ntof, &ntof_ent.e); + return UPB_CONTINUE; } static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { @@ -620,7 +629,7 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { upb_string *str = upb_string_new(); - if (!upb_value_getfullstr(val, str, NULL)) return UPB_ERROR; + if (!upb_value_getfullstr(val, str, NULL)) return UPB_STOP; if(b->f->def) upb_def_unref(b->f->def); b->f->def = UPB_UPCAST(upb_unresolveddef_new(str)); b->f->owned = true; @@ -638,7 +647,7 @@ static void upb_fielddef_register_FieldDescriptorProto(upb_defbuilder *b, &upb_fielddef_value, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } @@ -658,7 +667,7 @@ static int upb_compare_fields(const void *f1, const void *f2) { } // google.protobuf.DescriptorProto. -static void upb_msgdef_startmsg(void *_b) { +static upb_flow_t upb_msgdef_startmsg(void *_b) { upb_defbuilder *b = _b; upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG); @@ -667,15 +676,16 @@ static void upb_msgdef_startmsg(void *_b) { upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); upb_deflist_push(&b->defs, UPB_UPCAST(m)); upb_defbuilder_startcontainer(b); + return UPB_CONTINUE; } -static void upb_msgdef_endmsg(void *_b) { +static upb_flow_t upb_msgdef_endmsg(void *_b) { upb_defbuilder *b = _b; upb_msgdef *m = upb_defbuilder_top(b); if(!m->base.fqname) { - //upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); - //return UPB_ERROR; - return; + upb_seterr(&b->status, UPB_STATUS_ERROR, + "Encountered message with no name."); + return UPB_STOP; } // Create an ordering over the fields. @@ -716,7 +726,7 @@ static void upb_msgdef_endmsg(void *_b) { if (max_align > 0) m->size = upb_align_up(m->size, max_align); upb_defbuilder_endcontainer(b); - //return UPB_CONTINUE; + return UPB_CONTINUE; } static upb_flow_t upb_msgdef_value(void *_b, upb_fielddef *f, upb_value val) { @@ -767,7 +777,7 @@ static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, &upb_msgdef_startsubmsg, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } static void upb_msgdef_free(upb_msgdef *m) @@ -1100,16 +1110,14 @@ void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) { upb_defbuilder b; upb_defbuilder_init(&b); - //upb_defbuilder_register_FileDescriptorSet(&b, upb_src_gethandlers(src)); - upb_defbuilder_register_FileDescriptorSet(&b, NULL); - if(!upb_src_run(src)) { - upb_copyerr(status, upb_src_status(src)); - upb_defbuilder_uninit(&b); - return; - } - upb_symtab_add_defs(s, b.defs.defs, b.defs.len, false, status); + upb_handlers handlers; + upb_handlers_init(&handlers); + upb_defbuilder_register_FileDescriptorSet(&b, &handlers); + upb_src_sethandlers(src, &handlers); + upb_src_run(src, status); + if (upb_ok(status)) + upb_symtab_add_defs(s, b.defs.defs, b.defs.len, false, status); upb_defbuilder_uninit(&b); - return; } diff --git a/core/upb_stream.h b/core/upb_stream.h index 9ae69de..40836e9 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -39,12 +39,8 @@ typedef enum { // Caller should continue sending values to the sink. UPB_CONTINUE, - // An error occurred; check status for details. - UPB_ERROR, - - // Processing should stop for now, but could be resumed later. - // If processing resumes later, it should resume with the next value. - UPB_SUSPEND, + // Stop processing for now; check status for details. + UPB_STOP, // Skips to the end of the current submessage (or if we are at the top // level, skips to the end of the entire message). @@ -61,8 +57,8 @@ typedef enum { struct _upb_handlers; typedef struct _upb_handlers upb_handlers; -typedef void (*upb_startmsg_handler_t)(void *closure); -typedef void (*upb_endmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_startmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_endmsg_handler_t)(void *closure); typedef upb_flow_t (*upb_value_handler_t)(void *closure, struct _upb_fielddef *f, upb_value val); @@ -76,12 +72,14 @@ typedef upb_flow_t (*upb_unknownval_handler_t)(void *closure, // An empty set of handlers, for convenient copy/paste: // -// static void startmsg(void *closure) { +// static upb_flow_t startmsg(void *closure) { // // Called when the top-level message begins. +// return UPB_CONTINUE; // } // -// static void endmsg(void *closure) { +// static upb_flow_t endmsg(void *closure) { // // Called when the top-level message ends. +// return UPB_CONTINUE; // } // // static upb_flow_t value(void *closure, upb_fielddef *f, upb_value val) { @@ -120,10 +118,15 @@ INLINE void upb_handlers_uninit(upb_handlers *h); INLINE void upb_handlers_reset(upb_handlers *h); INLINE bool upb_handlers_isempty(upb_handlers *h); INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); + // TODO: for clients that want to increase efficiency by preventing bytesrcs // from automatically being converted to strings in the value callback. // INLINE void upb_handlers_use_bytesrcs(bool use_bytesrcs); -INLINE void upb_set_handler_closure(upb_handlers *h, void *closure); + +// The closure will be passed to every handler. The status will be used +// only immediately after a handler has returned UPB_STOP. +INLINE void upb_set_handler_closure(upb_handlers *h, void *closure, + upb_status *status); // An object that transparently handles delegation so that the caller needs // only follow the protocol as if delegation did not exist. @@ -146,8 +149,8 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, struct _upb_src; typedef struct _upb_src upb_src; -bool upb_src_run(upb_src *src); -upb_status *upb_src_status(upb_src *src); +void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); +void upb_src_run(upb_src *src, upb_status *status); /* upb_bytesrc ****************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index c0cf04f..d017177 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -133,6 +133,7 @@ INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { struct _upb_handlers { upb_handlerset *set; void *closure; + upb_status *status; // We don't own this. }; INLINE void upb_handlers_init(upb_handlers *h) { @@ -155,8 +156,10 @@ INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set) { h->set = set; } -INLINE void upb_set_handler_closure(upb_handlers *h, void *closure) { +INLINE void upb_set_handler_closure(upb_handlers *h, void *closure, + upb_status *status) { h->closure = closure; + h->status = status; } // upb_dispatcher -- cgit v1.2.3 From 1dea81b1c244d357a6e46ee22c14b36280bf2100 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 21 Jan 2011 17:29:16 -0800 Subject: Interface refinement: rename some constants. * UPB_STOP -> UPB_BREAK, better represents breaking out of a parsing loop. * UPB_STATUS_OK -> UPB_OK, for all status codes, more concise at no readability cost (perhaps an improvement). --- core/upb.c | 5 ++--- core/upb.h | 16 ++++++++-------- core/upb_def.c | 22 ++++++++++------------ core/upb_stream.h | 6 ++++-- 4 files changed, 24 insertions(+), 25 deletions(-) (limited to 'core/upb_stream.h') diff --git a/core/upb.c b/core/upb.c index 05e9b7d..da2a0f0 100644 --- a/core/upb.c +++ b/core/upb.c @@ -60,9 +60,8 @@ void upb_copyerr(upb_status *to, upb_status *from) } void upb_clearerr(upb_status *status) { - status->code = UPB_STATUS_OK; - upb_string_unref(status->str); - status->str = NULL; + status->code = UPB_OK; + upb_string_recycle(&status->str); } void upb_printerr(upb_status *status) { diff --git a/core/upb.h b/core/upb.h index fb6d9ea..d394a08 100644 --- a/core/upb.h +++ b/core/upb.h @@ -291,16 +291,16 @@ INLINE void upb_value_write(upb_valueptr ptr, upb_value val, // resumed. enum upb_status_code { // The operation completed successfully. - UPB_STATUS_OK = 0, + UPB_OK = 0, // The bytesrc is at EOF and all data was read successfully. - UPB_STATUS_EOF = 1, + UPB_EOF = 1, // A read or write from a streaming src/sink could not be completed right now. - UPB_STATUS_TRYAGAIN = 2, + UPB_TRYAGAIN = 2, // An unrecoverable error occurred. - UPB_STATUS_ERROR = -1, + UPB_ERROR = -1, // A recoverable error occurred (for example, data of the wrong type was // encountered which we can skip over). @@ -308,21 +308,21 @@ enum upb_status_code { }; // TODO: consider adding error space and code, to let ie. errno be stored -// as a proper code. +// as a proper code, or application-specific error codes. typedef struct { char code; upb_string *str; } upb_status; -#define UPB_STATUS_INIT {UPB_STATUS_OK, NULL} +#define UPB_STATUS_INIT {UPB_OK, NULL} #define UPB_ERRORMSG_MAXLEN 256 INLINE bool upb_ok(upb_status *status) { - return status->code == UPB_STATUS_OK; + return status->code == UPB_OK; } INLINE void upb_status_init(upb_status *status) { - status->code = UPB_STATUS_OK; + status->code = UPB_OK; status->str = NULL; } diff --git a/core/upb_def.c b/core/upb_def.c index 0176dc9..79b6632 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -470,9 +470,8 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, static upb_flow_t upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { upb_defbuilder *b = _b; if(!b->saw_number || !b->saw_name) { - upb_seterr(&b->status, UPB_STATUS_ERROR, - "Enum value missing name or number."); - return UPB_STOP; + upb_seterr(&b->status, UPB_ERROR, "Enum value missing name or number."); + return UPB_BREAK; } upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number}; upb_iton_ent iton_ent = {{b->number, 0}, b->name}; @@ -629,7 +628,7 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { upb_string *str = upb_string_new(); - if (!upb_value_getfullstr(val, str, NULL)) return UPB_STOP; + if (!upb_value_getfullstr(val, str, NULL)) return UPB_BREAK; if(b->f->def) upb_def_unref(b->f->def); b->f->def = UPB_UPCAST(upb_unresolveddef_new(str)); b->f->owned = true; @@ -683,9 +682,8 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) { upb_defbuilder *b = _b; upb_msgdef *m = upb_defbuilder_top(b); if(!m->base.fqname) { - upb_seterr(&b->status, UPB_STATUS_ERROR, - "Encountered message with no name."); - return UPB_STOP; + upb_seterr(&b->status, UPB_ERROR, "Encountered message with no name."); + return UPB_BREAK; } // Create an ordering over the fields. @@ -864,7 +862,7 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) // where we recurse over the type tree (like for example, right now) and an // absurdly deep tree could cause us to stack overflow on systems with very // limited stacks. - upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was found at " + upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was found at " "depth %d in the type graph, which exceeds the maximum type " "depth of %d.", UPB_UPCAST(m)->fqname, depth, UPB_MAX_TYPE_DEPTH); @@ -873,7 +871,7 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) // Cycle! int cycle_len = depth - 1; if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) { - upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was involved " + upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was involved " "in a cycle of length %d, which exceeds the maximum type " "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len, UPB_MAX_TYPE_CYCLE_LEN); @@ -931,7 +929,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, upb_symtab_ent *found; if(!(found = upb_resolve(tmptab, base, name)) && !(found = upb_resolve(symtab, base, name))) { - upb_seterr(status, UPB_STATUS_ERROR, + upb_seterr(status, UPB_ERROR, "could not resolve symbol '" UPB_STRFMT "'" " in context '" UPB_STRFMT "'", UPB_STRARG(name), UPB_STRARG(base)); @@ -941,7 +939,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, // Check the type of the found def. upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; if(found->def->type != expected) { - upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type"); + upb_seterr(status, UPB_ERROR, "Unexpected type"); return false; } upb_msgdef_resolve(m, f, found->def); @@ -983,7 +981,7 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs, // allow_redef is set. if (upb_strtable_lookup(&tmptab, def->fqname) || (!allow_redef && upb_strtable_lookup(&s->symtab, def->fqname))) { - upb_seterr(status, UPB_STATUS_ERROR, "Redefinition of symbol " UPB_STRFMT, + upb_seterr(status, UPB_ERROR, "Redefinition of symbol " UPB_STRFMT, UPB_STRARG(def->fqname)); goto err; } diff --git a/core/upb_stream.h b/core/upb_stream.h index 40836e9..66bfec2 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -39,8 +39,10 @@ typedef enum { // Caller should continue sending values to the sink. UPB_CONTINUE, - // Stop processing for now; check status for details. - UPB_STOP, + // Stop processing for now; check status for details. If no status was set, + // a generic error will be returned. If the error is resumable, processing + // will resume by delivering this callback again. + UPB_BREAK, // Skips to the end of the current submessage (or if we are at the top // level, skips to the end of the entire message). -- cgit v1.2.3 From a695b92ccea4b82180ae45d21d7ed4445f7d0769 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 21 Jan 2011 19:18:22 -0800 Subject: Debugging test_def, it's close to working again! --- Makefile | 10 +++--- core/upb_def.c | 80 +++++++++++++++++++++++++++-------------- core/upb_stream.h | 21 ++++++++--- core/upb_stream_vtbl.h | 96 ++++++++++++++++++++++++++++++++++++++++---------- core/upb_string.c | 8 ++--- core/upb_string.h | 2 +- tests/test_def.c | 1 + tests/test_string.c | 19 +++++----- 8 files changed, 171 insertions(+), 66 deletions(-) (limited to 'core/upb_stream.h') diff --git a/Makefile b/Makefile index 42c7d41..af79363 100644 --- a/Makefile +++ b/Makefile @@ -74,9 +74,9 @@ OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. core/upb_def.o: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< + $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< core/upb_def.lo: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC + $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< -fPIC lang_ext/lua/upb.so: lang_ext/lua/upb.lo $(CC) $(CFLAGS) $(CPPFLAGS) -shared -o $@ $< core/libupb_pic.a @@ -112,13 +112,13 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ - tests/test_stream \ -# tests/test_def \ + tests/test_def \ +# tests/test_stream \ # tests/test_decoder \ # tests/t.test_vs_proto2.googlemessage1 \ # tests/t.test_vs_proto2.googlemessage2 \ # tests/test.proto.pb -tests: $(TESTS) +tests: $(LIBUPB) $(TESTS) OTHER_TESTS=tests/tests \ $(TESTS): $(LIBUPB) diff --git a/core/upb_def.c b/core/upb_def.c index 79b6632..a935930 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -319,6 +319,18 @@ void upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { } // Handlers for google.protobuf.FileDescriptorProto. +static upb_flow_t upb_defbuilder_FileDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; + upb_defbuilder_startcontainer(b); + return UPB_CONTINUE; +} + +static upb_flow_t upb_defbuilder_FileDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_defbuilder_endcontainer(b); + return UPB_CONTINUE; +} + static upb_flow_t upb_defbuilder_FileDescriptorProto_value(void *_b, upb_fielddef *f, upb_value val) { @@ -353,8 +365,8 @@ static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, upb_handlers *h) { static upb_handlerset handlers = { - NULL, // startmsg - NULL, // endmsg + &upb_defbuilder_FileDescriptorProto_startmsg, + &upb_defbuilder_FileDescriptorProto_endmsg, &upb_defbuilder_FileDescriptorProto_value, &upb_defbuilder_FileDescriptorProto_startsubmsg, }; @@ -457,9 +469,11 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: upb_string_unref(b->name); upb_string_getref(upb_value_getstr(val)); + b->saw_name = true; break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: b->number = upb_value_getint32(val); + b->saw_number = true; break; default: break; @@ -507,8 +521,8 @@ static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { } static upb_flow_t upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { - upb_defbuilder *b = _b; - assert(upb_defbuilder_last(b)->fqname != NULL); + (void)_b; + assert(upb_defbuilder_last((upb_defbuilder*)_b)->fqname != NULL); return UPB_CONTINUE; } @@ -627,10 +641,8 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { b->f->name = upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { - upb_string *str = upb_string_new(); - if (!upb_value_getfullstr(val, str, NULL)) return UPB_BREAK; if(b->f->def) upb_def_unref(b->f->def); - b->f->def = UPB_UPCAST(upb_unresolveddef_new(str)); + b->f->def = UPB_UPCAST(upb_unresolveddef_new(upb_value_getstr(val))); b->f->owned = true; break; } @@ -720,6 +732,7 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) { m->size = offset + type_info->size; max_align = UPB_MAX(max_align, type_info->align); } + free(sorted_fields); if (max_align > 0) m->size = upb_align_up(m->size, max_align); @@ -1131,6 +1144,7 @@ void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) // * keeping a pointer to the upb_fielddef* and reading it later (the same // upb_fielddef is reused over and over). // * detecting errors in the input (we trust that our input is known-good). +// * skipping the rest of the submessage (UPB_SKIPSUBMSG). // // It also does not support any of the follow protobuf features: // * packed fields. @@ -1189,18 +1203,27 @@ static uint32_t upb_baredecoder_readf32(upb_baredecoder *d) return val; } -bool upb_baredecoder_run(upb_baredecoder *d) { +static void upb_baredecoder_sethandlers(upb_src *src, upb_handlers *handlers) { + upb_baredecoder *d = (upb_baredecoder*)src; + upb_dispatcher_reset(&d->dispatcher, handlers); +} + +static void upb_baredecoder_run(upb_src *src, upb_status *status) { + upb_baredecoder *d = (upb_baredecoder*)src; + assert(!upb_handlers_isempty(&d->dispatcher.top->handlers)); upb_string *str = NULL; upb_strlen_t stack[UPB_MAX_NESTING]; upb_strlen_t *top = &stack[0]; *top = upb_string_len(d->input); d->offset = 0; - upb_dispatch_startmsg(&d->dispatcher); +#define CHECK(x) if (x != UPB_CONTINUE && x != BEGIN_SUBMSG) goto err; + + CHECK(upb_dispatch_startmsg(&d->dispatcher)); while(d->offset < upb_string_len(d->input)) { // Detect end-of-submessage. while(d->offset >= *top) { - upb_dispatch_endsubmsg(&d->dispatcher); + CHECK(upb_dispatch_endsubmsg(&d->dispatcher)); d->offset = *(top--); } @@ -1216,9 +1239,11 @@ bool upb_baredecoder_run(upb_baredecoder *d) { upb_string_substr(str, d->input, d->offset, delim_len); upb_value v; upb_value_setstr(&v, str); - if(upb_dispatch_value(&d->dispatcher, &f, v) == BEGIN_SUBMSG) { + upb_flow_t ret = upb_dispatch_value(&d->dispatcher, &f, v); + CHECK(ret); + if(ret == BEGIN_SUBMSG) { // Should deliver as a submessage instead. - upb_dispatch_startsubmsg(&d->dispatcher, &f); + CHECK(upb_dispatch_startsubmsg(&d->dispatcher, &f)); *(++top) = d->offset + delim_len; } else { d->offset += delim_len; @@ -1228,11 +1253,9 @@ bool upb_baredecoder_run(upb_baredecoder *d) { switch(wt) { case UPB_WIRE_TYPE_VARINT: upb_value_setraw(&v, upb_baredecoder_readv64(d)); - upb_dispatch_value(&d->dispatcher, &f, v); break; case UPB_WIRE_TYPE_64BIT: upb_value_setraw(&v, upb_baredecoder_readf64(d)); - upb_dispatch_value(&d->dispatcher, &f, v); break; case UPB_WIRE_TYPE_32BIT: upb_value_setraw(&v, upb_baredecoder_readf32(d)); @@ -1241,28 +1264,33 @@ bool upb_baredecoder_run(upb_baredecoder *d) { assert(false); abort(); } - upb_dispatch_value(&d->dispatcher, &f, v); + CHECK(upb_dispatch_value(&d->dispatcher, &f, v)); } } - upb_dispatch_endmsg(&d->dispatcher); - return true; + CHECK(upb_dispatch_endmsg(&d->dispatcher)); + printf("SUCCESS!!\n"); + upb_string_unref(str); + return; + +err: + upb_copyerr(status, d->dispatcher.top->handlers.status); + upb_printerr(d->dispatcher.top->handlers.status); + upb_printerr(status); + upb_string_unref(str); + printf("ERROR!!\n"); } static upb_baredecoder *upb_baredecoder_new(upb_string *str) { - //static upb_src_vtable vtbl = { - // (upb_src_getdef_fptr)&upb_baredecoder_getdef, - // (upb_src_getval_fptr)&upb_baredecoder_getval, - // (upb_src_getstr_fptr)&upb_baredecoder_getstr, - // (upb_src_skipval_fptr)&upb_baredecoder_skipval, - // (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, - // (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, - //}; + static upb_src_vtbl vtbl = { + &upb_baredecoder_sethandlers, + &upb_baredecoder_run, + }; upb_baredecoder *d = malloc(sizeof(*d)); + upb_src_init(&d->src, &vtbl); d->input = upb_string_getref(str); d->offset = 0; upb_dispatcher_init(&d->dispatcher); - //upb_src_init(&d->src, &vtbl); return d; } diff --git a/core/upb_stream.h b/core/upb_stream.h index 66bfec2..cf01a5f 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -136,8 +136,8 @@ struct _upb_dispatcher; typedef struct _upb_dispatcher upb_dispatcher; INLINE void upb_dispatcher_init(upb_dispatcher *d); INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); -INLINE void upb_dispatch_startmsg(upb_dispatcher *d); -INLINE void upb_dispatch_endmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d); INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f); INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, @@ -151,8 +151,21 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, struct _upb_src; typedef struct _upb_src upb_src; -void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); -void upb_src_run(upb_src *src, upb_status *status); +// upb_src_sethandlers() must be called once and only once before upb_src_run() +// is called. This sets up the callbacks that will handle the parse. A +// upb_src that is fully initialized except for the call to +// upb_src_sethandlers() is called "prepared" -- this is useful for library +// functions that want to consume the output of a generic upb_src. +// Calling sethandlers() multiple times is an error and will trigger an abort(). +INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); + +// Runs the src, calling the callbacks that were registered with +// upb_src_sethandlers(), and returning the status of the operation in +// "status." The status might indicate UPB_TRYAGAIN (indicating EAGAIN on a +// non-blocking socket) or a resumable error; in both cases upb_src_run can be +// called again later. TRYAGAIN could come from either the src (input buffers +// are empty) or the handlers (output buffers are full). +INLINE void upb_src_run(upb_src *src, upb_status *status); /* upb_bytesrc ****************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index d017177..e462122 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -13,6 +13,7 @@ #include #include "upb_stream.h" +#include "upb_string.h" #ifdef __cplusplus extern "C" { @@ -21,10 +22,8 @@ extern "C" { // Typedefs for function pointers to all of the virtual functions. // upb_src -struct _upb_src { -}; -typedef struct { -} upb_src_vtbl; +typedef void (*upb_src_sethandlers_fptr)(upb_src *src, upb_handlers *handlers); +typedef void (*upb_src_run_fptr)(upb_src *src, upb_status *status); // upb_bytesrc. typedef upb_strlen_t (*upb_bytesrc_read_fptr)( @@ -42,42 +41,65 @@ typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( typedef struct { upb_bytesrc_read_fptr read; upb_bytesrc_getstr_fptr getstr; -} upb_bytesrc_vtable; +} upb_bytesrc_vtbl; typedef struct { upb_bytesink_write_fptr write; upb_bytesink_putstr_fptr putstr; -} upb_bytesink_vtable; +} upb_bytesink_vtbl; + +typedef struct { + upb_src_sethandlers_fptr sethandlers; + upb_src_run_fptr run; +} upb_src_vtbl; + // "Base Class" definitions; components that implement these interfaces should // contain one of these structures. struct _upb_bytesrc { - upb_bytesrc_vtable *vtbl; + upb_bytesrc_vtbl *vtbl; upb_status status; bool eof; }; struct _upb_bytesink { - upb_bytesink_vtable *vtbl; + upb_bytesink_vtbl *vtbl; upb_status status; bool eof; }; -INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtable *vtbl) { +struct _upb_src { + upb_src_vtbl *vtbl; +}; + +INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) { s->vtbl = vtbl; s->eof = false; upb_status_init(&s->status); } -INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { +INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) { s->vtbl = vtbl; s->eof = false; upb_status_init(&s->status); } +INLINE void upb_src_init(upb_src *s, upb_src_vtbl *vtbl) { + s->vtbl = vtbl; +} + // Implementation of virtual function dispatch. +// upb_src +INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers) { + src->vtbl->sethandlers(src, handlers); +} + +INLINE void upb_src_run(upb_src *src, upb_status *status) { + src->vtbl->run(src, status); +} + // upb_bytesrc INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, upb_strlen_t count) { @@ -152,7 +174,41 @@ INLINE bool upb_handlers_isempty(upb_handlers *h) { return !h->set && !h->closure; } +INLINE upb_flow_t upb_nop(void *closure) { + (void)closure; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_value_nop(void *closure, struct _upb_fielddef *f, upb_value val) { + (void)closure; + (void)f; + (void)val; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_startsubmsg_nop(void *closure, struct _upb_fielddef *f, + upb_handlers *delegate_to) { + (void)closure; + (void)f; + (void)delegate_to; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, + upb_value val) { + (void)closure; + (void)fieldnum; + (void)val; + return UPB_CONTINUE; +} + INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set) { + if (!set->startmsg) set->startmsg = &upb_nop; + if (!set->endmsg) set->endmsg = &upb_nop; + if (!set->value) set->value = &upb_value_nop; + if (!set->startsubmsg) set->startsubmsg = &upb_startsubmsg_nop; + if (!set->endsubmsg) set->endsubmsg = &upb_nop; + if (!set->unknownval) set->unknownval = &upb_unknownval_nop; h->set = set; } @@ -182,16 +238,19 @@ INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h) { d->top->handlers = *h; } -INLINE void upb_dispatch_startmsg(upb_dispatcher *d) { +INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d) { assert(d->stack == d->top); - d->top->handlers.set->startmsg(d->top->handlers.closure); + return d->top->handlers.set->startmsg(d->top->handlers.closure); } -INLINE void upb_dispatch_endmsg(upb_dispatcher *d) { +INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d) { assert(d->stack == d->top); - d->top->handlers.set->endmsg(d->top->handlers.closure); + return d->top->handlers.set->endmsg(d->top->handlers.closure); } +// TODO: several edge cases to fix: +// - delegated start returns UPB_BREAK, should replay the start on resume. +// - endsubmsg returns UPB_BREAK, should NOT replay the delegated endmsg. INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f) { upb_handlers handlers; @@ -203,17 +262,18 @@ INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, ++d->top; d->top->handlers = handlers; d->top->depth = 0; - d->top->handlers.set->startmsg(d->top->handlers.closure); - ret = UPB_CONTINUE; + ret = d->top->handlers.set->startmsg(d->top->handlers.closure); } - ++d->top->depth; + if (ret == UPB_CONTINUE) ++d->top->depth; upb_handlers_uninit(&handlers); return ret; } INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { + upb_flow_t ret; if (--d->top->depth == 0) { - d->top->handlers.set->endmsg(d->top->handlers.closure); + ret = d->top->handlers.set->endmsg(d->top->handlers.closure); + if (ret != UPB_CONTINUE) return ret; --d->top; } return d->top->handlers.set->endsubmsg(d->top->handlers.closure); diff --git a/core/upb_string.c b/core/upb_string.c index b243dfd..e9ff0d9 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -61,13 +61,13 @@ void _upb_string_free(upb_string *str) { free(str); } -upb_string *upb_string_tryrecycle(upb_string *str) { +void upb_string_recycle(upb_string **_str) { + upb_string *str = *_str; if(str && upb_atomic_read(&str->refcount) == 1) { str->ptr = NULL; upb_string_release(str); - return str; } else { - return upb_string_new(); + *_str = upb_string_new(); } } @@ -111,7 +111,7 @@ void upb_string_vprintf(upb_string *str, const char *format, va_list args) { // We don't care about the terminating NULL, but snprintf might // bail out of printing even other characters if it doesn't have // enough space to write the NULL also. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); buf = upb_string_getrwbuf(str, true_size + 1); vsnprintf(buf, true_size + 1, format, args); } diff --git a/core/upb_string.h b/core/upb_string.h index f82603b..1f4b20c 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -133,7 +133,7 @@ INLINE void upb_string_endread(upb_string *str) { (void)str; } // upb_src_getstr(str); // } // } -upb_string *upb_string_recycle(upb_string **str); +void upb_string_recycle(upb_string **str); // The options for setting the contents of a string. These may only be called // when a string is first created or recycled; once other functions have been diff --git a/tests/test_def.c b/tests/test_def.c index 732835d..5be0672 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -10,6 +10,7 @@ int main() { int count; upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY); for (int i = 0; i < count; i++) { + printf("Def with name: " UPB_STRFMT "\n", UPB_STRARG(defs[i]->fqname)); upb_def_unref(defs[i]); } free(defs); diff --git a/tests/test_string.c b/tests/test_string.c index 7c9ed02..6446806 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -23,7 +23,8 @@ static void test_static() { upb_string_unref(&static_upbstr); // Recycling a static string returns a new string (that can be modified). - upb_string *str = upb_string_tryrecycle(&static_upbstr); + upb_string *str = &static_upbstr; + upb_string_recycle(&str); assert(str != &static_upbstr); upb_string_unref(str); @@ -34,8 +35,9 @@ static void test_dynamic() { assert(str != NULL); upb_string_unref(str); - // Can also create a string by tryrecycle(NULL). - str = upb_string_tryrecycle(NULL); + // Can also create a string by recycle(NULL). + str = NULL; + upb_string_recycle(&str); assert(str != NULL); upb_strcpyc(str, static_str); @@ -45,7 +47,8 @@ static void test_dynamic() { assert(upb_streqlc(str, static_str)); upb_string_endread(str); - upb_string *str2 = upb_string_tryrecycle(str); + upb_string *str2 = str; + upb_string_recycle(&str2); // No other referents, so should return the same string. assert(str2 == str); @@ -58,7 +61,7 @@ static void test_dynamic() { // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); upb_string_substr(str, str2, 1, 2); assert(upb_string_len(str) == 2); assert(upb_string_len(str2) == 4); @@ -70,7 +73,7 @@ static void test_dynamic() { assert(upb_atomic_read(&str2->refcount) == 2); // Recycling str should eliminate the extra ref. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); assert(upb_atomic_read(&str2->refcount) == 1); // Resetting str should reuse its old data. @@ -80,7 +83,7 @@ static void test_dynamic() { // Resetting str to something very long should require new data to be // allocated. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); const char longstring[] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; upb_strcpyc(str, longstring); const char *robuf6 = upb_string_getrobuf(str); @@ -88,7 +91,7 @@ static void test_dynamic() { assert(upb_streqlc(str, longstring)); // Test printf. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); upb_string_printf(str, "Number: %d, String: %s", 5, "YO!"); assert(upb_streqlc(str, "Number: 5, String: YO!")); -- cgit v1.2.3 From 58a70b55c62cfefcbe7a55a2fd41ee6b87c7256f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 23 Jan 2011 16:29:10 -0800 Subject: Decoder code structure is mostly in-place. --- core/upb_stream.h | 20 ++- core/upb_string.h | 57 ++++++-- stream/upb_decoder.c | 363 ++++++++++++++++++++++----------------------------- 3 files changed, 212 insertions(+), 228 deletions(-) (limited to 'core/upb_stream.h') diff --git a/core/upb_stream.h b/core/upb_stream.h index cf01a5f..54fd930 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -171,14 +171,18 @@ INLINE void upb_src_run(upb_src *src, upb_status *status); /* upb_bytesrc ****************************************************************/ // Reads up to "count" bytes into "buf", returning the total number of bytes -// read. If <0, indicates error (check upb_bytesrc_status for details). +// read. If 0, indicates error and puts details in "status". INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, - upb_strlen_t count); + upb_strlen_t count, upb_status *status); // Like upb_bytesrc_read(), but modifies "str" in-place, possibly aliasing -// existing string data (which avoids a copy). +// existing string data (which avoids a copy). On the other hand, if +// the data was *not* already in an existing string, this copies it into +// a upb_string, and if the data needs to be put in a specific range of +// memory (because eg. you need to put it into a different kind of string +// object) then upb_bytesrc_get() could be better. INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, - upb_strlen_t count); + upb_status *status); // A convenience function for getting all the remaining data in a upb_bytesrc // as a upb_string. Returns false and sets "status" if the operation fails. @@ -189,14 +193,6 @@ INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, return upb_bytesrc_getfullstr(upb_value_getbytesrc(val), str, status); } -// Returns the current error status for the stream. -// Note! The "eof" flag works like feof() in C; it cannot report end-of-file -// until a read has failed due to eof. It cannot preemptively tell you that -// the next call will fail due to eof. Since these are the semantics that C -// and UNIX provide, we're stuck with them if we want to support eg. stdio. -INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src); -INLINE bool upb_bytesrc_eof(upb_bytesrc *src); - /* upb_bytesink ***************************************************************/ diff --git a/core/upb_string.h b/core/upb_string.h index 1f4b20c..04c0ae9 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -3,26 +3,39 @@ * * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. * - * This file defines a simple string type. The overriding goal of upb_string - * is to avoid memcpy(), malloc(), and free() wheverever possible, while - * keeping both CPU and memory overhead low. Throughout upb there are - * situations where one wants to reference all or part of another string - * without copying. upb_string provides APIs for doing this. + * This file defines a simple string type which is length-delimited instead + * of NULL-terminated, and which has useful sharing semantics. + * + * The overriding goal of upb_string is to avoid memcpy(), malloc(), and free() + * wheverever possible, while keeping both CPU and memory overhead low. + * Throughout upb there are situations where one wants to reference all or part + * of another string without copying. upb_string provides APIs for doing this. * * Characteristics of upb_string: * - strings are reference-counted. - * - strings are logically immutable. + * - strings are immutable (can be mutated only when first created or recycled). * - if a string has no other referents, it can be "recycled" into a new string * without having to reallocate the upb_string. * - strings can be substrings of other strings (owning a ref on the source * string). - * - strings are not thread-safe by default, but can be made so by calling a - * function. This is not the default because it causes extra CPU overhead. * * Reference-counted strings have recently fallen out of favor because of the * performance impacts of doing thread-safe reference counting with atomic * operations. We side-step this issue by not performing atomic operations * unless the string has been marked thread-safe. + * + * Strings are expected to be 8-bit-clean, but "char*" is such an entrenched + * idiom that we go with it instead of making our pointers uint8_t*. + * + * WARNING: THE GETREF, UNREF, AND RECYCLE OPERATIONS ARE NOT THREAD_SAFE + * UNLESS THE STRING HAS BEEN MARKED SYNCHRONIZED! What this means is that if + * you are logically passing a reference to a upb_string to another thread + * (which implies that the other thread must eventually call unref of recycle), + * you have two options: + * + * - create a copy of the string that will be used in the other thread only. + * - call upb_string_get_synchronized_ref(), which will make getref, unref, and + * recycle thread-safe for this upb_string. */ #ifndef UPB_STRING_H @@ -83,10 +96,12 @@ struct _upb_string { // longer needed, it should be unref'd, never freed directly. upb_string *upb_string_new(); +// Internal-only; clients should call upb_string_unref(). void _upb_string_free(upb_string *str); // Releases a ref on the given string, which may free the memory. "str" -// can be NULL, in which case this is a no-op. +// can be NULL, in which case this is a no-op. WARNING: NOT THREAD_SAFE +// UNLESS THE STRING IS SYNCHRONIZED. INLINE void upb_string_unref(upb_string *str) { if (str && upb_atomic_read(&str->refcount) > 0 && upb_atomic_unref(&str->refcount)) { @@ -98,6 +113,7 @@ upb_string *upb_strdup(upb_string *s); // Forward-declare. // Returns a string with the same contents as "str". The caller owns a ref on // the returned string, which may or may not be the same object as "str. +// WARNING: NOT THREAD-SAFE UNLESS THE STRING IS SYNCHRONIZED! INLINE upb_string *upb_string_getref(upb_string *str) { int refcount = upb_atomic_read(&str->refcount); if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str); @@ -163,8 +179,11 @@ void upb_string_substr(upb_string *str, upb_string *target_str, // data. Waiting for a clear use case before actually implementing it. // // Makes the string "str" a reference to the given string data. The caller -// guarantees that the given string data will not change or be deleted until -// a matching call to upb_string_detach(). +// guarantees that the given string data will not change or be deleted until a +// matching call to upb_string_detach(), which may block until any concurrent +// readers have finished reading. upb_string_detach() preserves the contents +// of the string by copying the referenced data if there are any other +// referents. // void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); // void upb_string_detach(upb_string *str); @@ -207,6 +226,22 @@ void upb_string_substr(upb_string *str, upb_string *target_str, _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STACK) #define UPB_STACK_STRING_LEN(str, len) \ _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STACK) + +// A convenient way of specifying upb_strings as literals, like: +// +// upb_streql(UPB_STRLIT("expected"), other_str); +// +// However, this requires either C99 compound initializers or C++. +// Must ONLY be called with a string literal as its argument! +//#ifdef __cplusplus +//namespace upb { +//class String : public upb_string { +// // This constructor must ONLY be called with a string literal. +// String(const char *str) : upb_string(UPB_STATIC_STRING(str)) {} +//}; +//} +//#define UPB_STRLIT(str) upb::String(str) +//#endif #define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) /* upb_string library functions ***********************************************/ diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index b820b08..fbd7eba 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -11,127 +11,39 @@ #include #include "upb_def.h" -/* Functions to read wire values. *********************************************/ - -// These functions are internal to the decode, but might be moved into an -// internal header file if we at some point in the future opt to do code -// generation, because the generated code would want to inline these functions. -// The same applies to the functions to read .proto values below. - -const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status); - -// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). -INLINE const uint8_t *upb_get_v_uint64_t(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status) -{ - // We inline this common case (1-byte varints), if that fails we dispatch to - // the full (non-inlined) version. - if((*buf & 0x80) == 0) { - *val = *buf & 0x7f; - return buf + 1; - } else { - return upb_get_v_uint64_t_full(buf, end, val, status); - } +/* Pure Decoding **************************************************************/ + +// The key fast-path varint-decoding routine. There are a lot of possibilities +// for optimization/experimentation here. +INLINE bool upb_decode_varint_fast(uint8_t **buf, uint8_t *end, uint64_t &val, + upb_status *status) { + *high = 0; + uint32_t b; + uint8_t *ptr = p->ptr; + b = *(*buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 28; + *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + + upb_seterr(status, UPB_ERROR, "Unterminated varint"); + return false; +done: + return true; } -// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit -// varint is not a true wire type. -INLINE const uint8_t *upb_get_v_uint32_t(const uint8_t *buf, const uint8_t *end, - uint32_t *val, upb_status *status) -{ - uint64_t val64; - const uint8_t *ret = upb_get_v_uint64_t(buf, end, &val64, status); - *val = (uint32_t)val64; // Discard the high bits. - return ret; -} -// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). -INLINE const uint8_t *upb_get_f_uint32_t(const uint8_t *buf, const uint8_t *end, - uint32_t *val, upb_status *status) -{ - const uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - memcpy(val, buf, sizeof(uint32_t)); - return uint32_end; -} - -// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). -INLINE const uint8_t *upb_get_f_uint64_t(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status) -{ - const uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - memcpy(val, buf, sizeof(uint64_t)); - return uint64_end; -} - -INLINE const uint8_t *upb_skip_v_uint64_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *const maxend = buf + 10; - uint8_t last = 0x80; - for(; buf < (uint8_t*)end && (last & 0x80); buf++) - last = *buf; - - if(buf >= end && buf <= maxend && (last & 0x80)) { - status->code = UPB_STATUS_NEED_MORE_DATA; - buf = end; - } else if(buf > maxend) { - status->code = UPB_ERROR_UNTERMINATED_VARINT; - buf = end; - } - return buf; -} - -INLINE const uint8_t *upb_skip_f_uint32_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - return uint32_end; -} - -INLINE const uint8_t *upb_skip_f_uint64_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - return uint64_end; -} - -/* Functions to read .proto values. *******************************************/ +/* Decoding/Buffering of individual values ************************************/ // Performs zig-zag decoding, which is used by sint32 and sint64. INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } -// Parses a tag, places the result in *tag. -INLINE const uint8_t *decode_tag(const uint8_t *buf, const uint8_t *end, - upb_tag *tag, upb_status *status) -{ - uint32_t tag_int; - const uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); - tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); - tag->field_number = tag_int >> 3; - return ret; -} - // The decoder keeps a stack with one entry per level of recursion. // upb_decoder_frame is one frame of that stack. typedef struct { @@ -144,6 +56,7 @@ struct upb_decoder { // Immutable state of the decoder. upb_src src; upb_dispatcher dispatcher; + upb_bytesrc *bytesrc; upb_msgdef *toplevel_msgdef; upb_decoder_frame stack[UPB_MAX_NESTING]; @@ -158,66 +71,108 @@ struct upb_decoder { // Current input buffer. upb_string *buf; + // Our current offset *within* buf. + upb_strlen_t buf_offset; + // The offset within the overall stream represented by the *beginning* of buf. upb_strlen_t buf_stream_offset; +}; - // Our current offset *within* buf. Will be negative if we are buffering - // from previous buffers in tmpbuf. - upb_strlen_t buf_offset; +// Called only from the slow path, this function copies the next "len" bytes +// from the stream to "data", adjusting "buf" and "end" appropriately. +INLINE bool upb_getbuf(upb_decoder *d, void *data, size_t len, + uint8_t **buf, uint8_t **end) { + while (len > 0) { + memcpy(data, *buf, *end-*buf); + len -= (*end-*buf); + if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; + *buf = upb_string_getrobuf(d->buf); + *end = *buf + upb_string_len(d->buf); + } +} - // Holds any bytes we have from previous buffers. The number of bytes we - // have encoded here is -buf_offset, if buf_offset<0, 0 otherwise. - uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; -}; +// We use this path when we don't have UPB_MAX_ENCODED_SIZE contiguous bytes +// available in our current buffer. We don't inline this because we accept +// that it will be slow and we don't want to pay for two copies of it. +static bool upb_decode_varint_slow(upb_decoder *d) { + uint8_t buf[UPB_MAX_ENCODED_SIZE]; + uint8_t *p = buf, *end = buf + sizeof(buf); + for(int bitpos = 0; p < end && getbyte(d, p) && (last & 0x80); p++, bitpos += 7) + *val |= ((uint64_t)((last = *p) & 0x7F)) << bitpos; + + if(d->status->code == UPB_EOF && (last & 0x80)) { + upb_seterr(status, UPB_ERROR, + "Provided data ended in the middle of a varint.\n"); + } else if(buf == maxend) { + upb_seterr(status, UPB_ERROR, + "Varint was unterminated after 10 bytes.\n"); + } else { + // Success. + return; + } +} -upb_flow_t upb_decode_varint(upb_decoder *d, ptrs *p, - uint32_t *low, uint32_t *high) { - if (p->end - p->ptr > UPB_MAX_ENCODED_SIZE) { - // Fast path; we know we have a complete varint in our existing buffer. - *high = 0; - uint32_t b; - uint8_t *ptr = p->ptr; - b = *(buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 28; - *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; - - if(bytes_available >= 10) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Varint was unterminated " - "after 10 bytes, stream offset: %u", upb_decoder_offset(d)); - return false; - } +INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_buf, + const uint8_t **end, upb_tag *tag) { + const uint8_t *buf = *_buf, *end = *_end; + uint32_t tag_int; + // Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048). + if (end - buf < 2) goto slow; // unlikely. + tag_int = *buf & 0x7f; + if ((*(buf++) & 0x80) == 0) goto done; // predictable if fields are in order + tag_int |= (*buf & 0x7f) << 7; + if ((*(buf++) & 0x80) != 0) goto slow; // unlikely. +slow: + if (!upb_decode_varint_slow(d, _buf, _end)) return false; + buf = *_buf; // Trick the next line into not overwriting us. +done: + *_buf = buf; + tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); + tag->field_number = tag_int >> 3; + return true; +} + +INLINE bool upb_decode_varint(upb_decoder *d, ptrs *p, + uint32_t *low, uint32_t *high) { + if (p->end - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) + return upb_decode_varint_fast(d); + else + return upb_decode_varint_slow(d); +} - done: - p->ptr = ptr; +INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, + uint8_t **buf, uint8_t **end, upb_value *val) { + static const char table = {0, 8, 0, 0, 0, 4}; + size_t bytes = table[wt]; + if (*end - *buf >= bytes) { + // Common (fast) case. + memcpy(&val, *buf, bytes); + *buf += bytes; } else { - // Slow path: we may have to combine one or more buffers to get a whole - // varint worth of data. - uint8_t buf[UPB_MAX_ENCODED_SIZE]; - uint8_t *p = buf, *end = buf + sizeof(buf); - for(ing bitpos = 0; p < end && getbyte(d, p) && (last & 0x80); p++, bitpos += 7) - *val |= ((uint64_t)((last = *p) & 0x7F)) << bitpos; - - if(d->status->code == UPB_EOF && (last & 0x80)) { - upb_seterr(status, UPB_ERROR, - "Provided data ended in the middle of a varint.\n"); - } else if(buf == maxend) { - upb_seterr(status, UPB_ERROR, - "Varint was unterminated after 10 bytes.\n"); - } else { - // Success. - return; - } - ungetbytes(d, buf, p - buf); + if (!upb_getbuf(d, &val, bytes, buf, end)) return false; + } + return true; +} + +// "val" initially holds the length of the string, this is replaced by the +// contents of the string. +INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) { + upb_string_recycle(str); + upb_strlen_t len = upb_valu_getint32(*val); + if (*end - *buf >= len) { + // Common (fast) case. + upb_string_substr(*str, d->buf, *buf - upb_string_getrobuf(d->buf), len); + *buf += len; + } else { + if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, buf, end)) + return false; } + return true; } + +/* The main decoding loop *****************************************************/ + static const void *get_msgend(upb_decoder *d) { if(d->top->end_offset > 0) @@ -238,36 +193,29 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { return upb_types[ft].expected_wire_type == wt; } - -// Pushes a new stack frame for a submessage with the given len (which will -// be zero if the submessage is a group). -static const uint8_t *push(upb_decoder *d, const uint8_t *start, +static bool upb_push(upb_decoder *d, const uint8_t *start, uint32_t submsg_len, upb_fielddef *f, upb_status *status) { d->top->field = f; d->top++; if(d->top >= d->limit) { - upb_seterr(status, UPB_ERROR_MAX_NESTING_EXCEEDED, - "Nesting exceeded maximum (%d levels)\n", - UPB_MAX_NESTING); - return NULL; + upb_seterr(status, UPB_ERROR, "Nesting too deep."); + return false; } - upb_decoder_frame *frame = d->top; - frame->end_offset = d->completed_offset + submsg_len; - frame->msgdef = upb_downcast_msgdef(f->def); - - upb_dispatch_startsubmsg(&d->dispatcher, f); - return get_msgend(d); + d->top->end_offset = d->completed_offset + submsg_len; + d->top->msgdef = upb_downcast_msgdef(f->def); + *submsg_end = get_msgend(d); + if (!upb_dispatch_startsubmsg(&d->dispatcher, f)) return false; + return true; } -// Pops a stack frame, returning a pointer for where the next submsg should -// end (or a pointer that is out of range for a group). -static const void *pop(upb_decoder *d, const uint8_t *start, upb_status *status) +static bool upb_pop(upb_decoder *d, const uint8_t *start, upb_status *status) { d->top--; upb_dispatch_endsubmsg(&d->dispatcher); - return get_msgend(d); + *submsg_end = get_msgend(d); + return true; } void upb_decoder_run(upb_src *src, upb_status *status) { @@ -278,11 +226,13 @@ void upb_decoder_run(upb_src *src, upb_status *status) { upb_msgdef *msgdef = d->top->msgdef; upb_string *str = NULL; + upb_dispatch_startmsg(&d->dispatcher); + // Main loop: executed once per tag/field pair. while(1) { // Parse/handle tag. upb_tag tag; - CHECK(decode_tag(d, &buf, &end, &tag)); + CHECK(upb_decode_tag(d, &buf, &end, &tag)); // Decode wire data. Hopefully this branch will predict pretty well // since most types will read a varint here. @@ -290,24 +240,19 @@ void upb_decoder_run(upb_src *src, upb_status *status) { switch (tag.wire_type) { case UPB_WIRE_TYPE_END_GROUP: if(!isgroup(submsg_end)) { - upb_seterr(status, UPB_STATUS_ERROR, "End group seen but current " - "message is not a group, byte offset: %zd", - d->completed_offset + (completed - start)); + upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } - submsg_end = pop(d, start, status, &msgdef); - completed = buf; - goto check_msgend; + CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); + goto check_msgend; // We have no value to dispatch. case UPB_WIRE_TYPE_VARINT: case UPB_WIRE_TYPE_DELIMITED: // For the delimited case we are parsing the length. CHECK(upb_decode_varint(d, &buf, &end, &val)); break; case UPB_WIRE_TYPE_32BIT: - CHECK(upb_decode_32bit(d, &buf, &end, &val)); - break; case UPB_WIRE_TYPE_64BIT: - CHECK(upb_decode_64bit(d, &buf, &end, &val)); + CHECK(upb_decode_fixed(d, tag.wire_type, &buf, &end, &val)); break; } @@ -315,24 +260,31 @@ void upb_decoder_run(upb_src *src, upb_status *status) { upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number); if (!f) { - // Unknown field. + if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED) + CHECK(upb_decode_string(d, &val, &str)); + CHECK(upb_dispatch_unknownval(d, tag.field_number, val)); } else if (!upb_check_type(tag.wire_type, f->type)) { - // Field has incorrect type. + // TODO: put more details in this error msg. + upb_seterr(status, UPB_ERROR, "Field had incorrect type."); + goto err; } // Perform any further massaging of the data now that we have the fielddef. // Now we can distinguish strings from submessages, and we know about // zig-zag-encoded types. // TODO: handle packed encoding. + // TODO: if we were being paranoid, we could check for 32-bit-varint types + // that the top 32 bits all match the highest bit of the low 32 bits. + // If this is not true we are losing data. But the main protobuf library + // doesn't check this, and it would slow us down, so pass for now. switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK(push(d, start, upb_value_getint32(val), f, status, &msgdef)); - goto check_msgend; + CHECK(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); + goto check_msgend; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): - CHECK(upb_decode_string(d, str, upb_value_getint32(val))); - upb_value_setstr(&val, str); + CHECK(upb_decode_string(d, &val, &str)); break; case UPB_TYPE(SINT32): upb_value_setint32(&val, upb_zzdec_32(upb_value_getint32(val))); @@ -341,26 +293,27 @@ void upb_decoder_run(upb_src *src, upb_status *status) { upb_value_setint64(&val, upb_zzdec_64(upb_value_getint64(val))); break; default: - // Other types need no further processing at this point. + break; // Other types need no further processing at this point. } CHECK(upb_dispatch_value(d->sink, f, val, status)); check_msgend: while(buf >= submsg_end) { if(buf > submsg_end) { - upb_seterr(status, UPB_ERROR, "Expected submsg end offset " - "did not lie on a tag/value boundary."); + upb_seterr(status, UPB_ERROR, "Bad submessage end.") goto err; } - submsg_end = pop(d, start, status, &msgdef); + CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); } - completed = buf; } + CHECK(upb_dispatch_endmsg(&d->dispatcher)); + return; + err: - read = (char*)completed - (char*)start; - d->completed_offset += read; - return read; + if (upb_ok(status)) { + upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK"); + } } void upb_decoder_sethandlers(upb_src *src, upb_handlers *handlers) { -- cgit v1.2.3 From fe659c8c93c464fcbcfb5739935a2e4341d01fd4 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 23 Jan 2011 18:59:31 -0800 Subject: Getting closer to a decoder that could actually compile and work. --- core/upb_stream.h | 7 +- core/upb_string.h | 6 ++ stream/upb_decoder.c | 207 +++++++++++++++++++++++++++------------------------ 3 files changed, 119 insertions(+), 101 deletions(-) (limited to 'core/upb_stream.h') diff --git a/core/upb_stream.h b/core/upb_stream.h index 54fd930..bf312a8 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -40,8 +40,11 @@ typedef enum { UPB_CONTINUE, // Stop processing for now; check status for details. If no status was set, - // a generic error will be returned. If the error is resumable, processing - // will resume by delivering this callback again. + // a generic error will be returned. If the error is resumable, it is not + // (yet) defined where processing will resume -- waiting for real-world + // examples of resumable decoders and resume-requiring clients. upb_src + // implementations that are not capable of resuming will override the return + // status to be non-resumable if a resumable status was set by the handlers. UPB_BREAK, // Skips to the end of the current submessage (or if we are at the top diff --git a/core/upb_string.h b/core/upb_string.h index 04c0ae9..1a7e06b 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -134,6 +134,12 @@ INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } INLINE void upb_string_endread(upb_string *str) { (void)str; } +// Convenience method for getting the end of the string. Calls +// upb_string_getrobuf() so inherits the caveats of calling that function. +INLINE const char *upb_string_getbufend(upb_string *str) { + return upb_string_getrobuf(str) + upb_string_len(str); +} + // Attempts to recycle the string "str" so it may be reused and have different // data written to it. After the function returns, "str" points to a writable // string, which is either the original string if it had no other references diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index fbd7eba..9a17451 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -13,23 +13,24 @@ /* Pure Decoding **************************************************************/ -// The key fast-path varint-decoding routine. There are a lot of possibilities -// for optimization/experimentation here. -INLINE bool upb_decode_varint_fast(uint8_t **buf, uint8_t *end, uint64_t &val, +// The key fast-path varint-decoding routine. Here we can assume we have at +// least UPB_MAX_ENCODED_SIZE bytes available. There are a lot of +// possibilities for optimization/experimentation here. +INLINE bool upb_decode_varint_fast(uint8_t **ptr, uint64_t &val, upb_status *status) { *high = 0; uint32_t b; uint8_t *ptr = p->ptr; - b = *(*buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(*buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(*buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(*buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(*buf++); *low |= (b & 0x7f) << 28; + b = *(*ptr++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(*ptr++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(*ptr++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(*ptr++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(*ptr++); *low |= (b & 0x7f) << 28; *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(*buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(*buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(*buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(*buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + b = *(*ptr++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(*ptr++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(*ptr++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(*ptr++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; upb_seterr(status, UPB_ERROR, "Unterminated varint"); return false; @@ -71,23 +72,51 @@ struct upb_decoder { // Current input buffer. upb_string *buf; - // Our current offset *within* buf. - upb_strlen_t buf_offset; - // The offset within the overall stream represented by the *beginning* of buf. upb_strlen_t buf_stream_offset; }; // Called only from the slow path, this function copies the next "len" bytes -// from the stream to "data", adjusting "buf" and "end" appropriately. -INLINE bool upb_getbuf(upb_decoder *d, void *data, size_t len, - uint8_t **buf, uint8_t **end) { - while (len > 0) { - memcpy(data, *buf, *end-*buf); - len -= (*end-*buf); - if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; - *buf = upb_string_getrobuf(d->buf); - *end = *buf + upb_string_len(d->buf); +// from the stream to "data", adjusting "buf" and "len" appropriately. +static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, + uint8_t **ptr, size_t *len) { + while (1) { + memcpy(data, *ptr, *len); + bytes_wanted -= *len; + *ptr += *len; + if (bytes_wanted == 0) return true; + + // Did "len" indicate end-of-submessage or end-of-buffer? + size_t buf_offset = d->buf ? (*ptr - upb_string_getrobuf(d->buf)) : 0; + if (d->top->end_offset > 0 && + d->top->end_offset == d->buf_stream_offset + buf_offset) { + // End-of-submessage. + if (bytes_wanted > 0) { + upb_seterr(d->status, UPB_ERROR, "Bad submessage end.") + return false; + } + if (upb_pop(d) != UPB_CONTINUE) return false; + } else { + // End-of-buffer. + if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); + if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; + *ptr = upb_string_getrobuf(d->buf); + } + + // Wait for end-of-submessage or end-of-buffer, whichever comes first. + size_t offset_in_buf = *ptr - upb_string_getrobuf(d->buf); + size_t buf_remaining = upb_string_getbufend(d->buf) - *ptr; + size_t submsg_remaining = + d->top->end_offset - d->buf_stream_offset - offset_in_buf; + if (d->top->end_offset == UPB_GROUP_END_OFFSET || + buf_remaining > submsg_remaining) { + *len = buf_remaining; + } else { + // Check that non of our subtraction overflowed. + assert(d->top->end_offset > d->buf_stream_offset); + assert(d->top->end_offset - d->buf_stream_offset > offset_in_buf); + *len = submsg_remaining; + } } } @@ -112,21 +141,21 @@ static bool upb_decode_varint_slow(upb_decoder *d) { } } -INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_buf, - const uint8_t **end, upb_tag *tag) { - const uint8_t *buf = *_buf, *end = *_end; +INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_ptr, + const uint8_t **len, upb_tag *tag) { + const uint8_t *ptr = *_ptr, *len = *_end; uint32_t tag_int; // Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048). - if (end - buf < 2) goto slow; // unlikely. - tag_int = *buf & 0x7f; - if ((*(buf++) & 0x80) == 0) goto done; // predictable if fields are in order - tag_int |= (*buf & 0x7f) << 7; - if ((*(buf++) & 0x80) != 0) goto slow; // unlikely. + if (len - ptr < 2) goto slow; // unlikely. + tag_int = *ptr & 0x7f; + if ((*(ptr++) & 0x80) == 0) goto done; // predictable if fields are in order + tag_int |= (*ptr & 0x7f) << 7; + if ((*(ptr++) & 0x80) != 0) goto slow; // unlikely. slow: - if (!upb_decode_varint_slow(d, _buf, _end)) return false; - buf = *_buf; // Trick the next line into not overwriting us. + if (!upb_decode_varint_slow(d, _ptr, _end)) return false; + ptr = *_ptr; // Trick the next line into not overwriting us. done: - *_buf = buf; + *_ptr = ptr; tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); tag->field_number = tag_int >> 3; return true; @@ -134,22 +163,22 @@ done: INLINE bool upb_decode_varint(upb_decoder *d, ptrs *p, uint32_t *low, uint32_t *high) { - if (p->end - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) + if (p->len - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) return upb_decode_varint_fast(d); else return upb_decode_varint_slow(d); } INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, - uint8_t **buf, uint8_t **end, upb_value *val) { + uint8_t **ptr, uint8_t **len, upb_value *val) { static const char table = {0, 8, 0, 0, 0, 4}; size_t bytes = table[wt]; - if (*end - *buf >= bytes) { + if (*len - *ptr >= bytes) { // Common (fast) case. - memcpy(&val, *buf, bytes); - *buf += bytes; + memcpy(&val, *ptr, bytes); + *ptr += bytes; } else { - if (!upb_getbuf(d, &val, bytes, buf, end)) return false; + if (!upb_getptr(d, &val, bytes, ptr, len)) return false; } return true; } @@ -159,12 +188,12 @@ INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) { upb_string_recycle(str); upb_strlen_t len = upb_valu_getint32(*val); - if (*end - *buf >= len) { + if (*len - *ptr >= len) { // Common (fast) case. - upb_string_substr(*str, d->buf, *buf - upb_string_getrobuf(d->buf), len); - *buf += len; + upb_string_substr(*str, d->buf, *ptr - upb_string_getrobuf(d->buf), len); + *ptr += len; } else { - if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, buf, end)) + if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, ptr, len)) return false; } return true; @@ -173,19 +202,6 @@ INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) /* The main decoding loop *****************************************************/ -static const void *get_msgend(upb_decoder *d) -{ - if(d->top->end_offset > 0) - return upb_string_getrobuf(d->buf) + (d->top->end_offset - d->buf_stream_offset); - else - return (void*)UINTPTR_MAX; // group. -} - -static bool isgroup(const void *submsg_end) -{ - return submsg_end == (void*)UINTPTR_MAX; -} - extern upb_wire_type_t upb_expected_wire_types[]; // Returns true if wt is the correct on-the-wire type for ft. INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { @@ -193,76 +209,78 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { return upb_types[ft].expected_wire_type == wt; } -static bool upb_push(upb_decoder *d, const uint8_t *start, - uint32_t submsg_len, upb_fielddef *f, - upb_status *status) -{ +static upb_flow_t upb_push(upb_decoder *d, upb_fielddef *f, + upb_strlen_t submsg_len, upb_field_type_t type) { d->top->field = f; d->top++; if(d->top >= d->limit) { upb_seterr(status, UPB_ERROR, "Nesting too deep."); - return false; + return UPB_ERROR; } - d->top->end_offset = d->completed_offset + submsg_len; + d->top->end_offset = type == UPB_TYPE(GROUP) ? + UPB_GROUP_END_OFFSET : d->completed_offset + submsg_len; d->top->msgdef = upb_downcast_msgdef(f->def); - *submsg_end = get_msgend(d); - if (!upb_dispatch_startsubmsg(&d->dispatcher, f)) return false; - return true; + return upb_dispatch_startsubmsg(&d->dispatcher, f); } -static bool upb_pop(upb_decoder *d, const uint8_t *start, upb_status *status) -{ +static upb_flow_t upb_pop(upb_decoder *d) { d->top--; - upb_dispatch_endsubmsg(&d->dispatcher); - *submsg_end = get_msgend(d); - return true; + return upb_dispatch_endsubmsg(&d->dispatcher); } void upb_decoder_run(upb_src *src, upb_status *status) { - // buf is our current offset, moves from start to end. - const uint8_t *buf = (uint8_t*)upb_string_getrobuf(str) + d->buf_offset; - const uint8_t *end = (uint8_t*)upb_string_getrobuf(str) + upb_string_len(str); - const uint8_t *submsg_end = get_msgend(d, start); - upb_msgdef *msgdef = d->top->msgdef; + // We use stack variables for our frequently used vars so the compiler knows + // they can't be changed by external code (like when we dispatch a callback). + + // Our current position in the data buffer. + uint8_t *ptr = NULL; + // Number of bytes available at ptr, until either end-of-buf or + // end-of-submessage (whichever is smaller). + size_t len = 0; + upb_string *str = NULL; - upb_dispatch_startmsg(&d->dispatcher); +// TODO: handle UPB_SKIPSUBMSG +#define CHECK_FLOW(expr) if ((expr) != UPB_CONTINUE) goto err +#define CHECK(expr) if (!expr) goto err; + + CHECK_FLOW(upb_dispatch_startmsg(&d->dispatcher)); // Main loop: executed once per tag/field pair. while(1) { // Parse/handle tag. upb_tag tag; - CHECK(upb_decode_tag(d, &buf, &end, &tag)); + CHECK(upb_decode_tag(d, &ptr, &len, &tag)); // Decode wire data. Hopefully this branch will predict pretty well // since most types will read a varint here. upb_value val; switch (tag.wire_type) { case UPB_WIRE_TYPE_END_GROUP: - if(!isgroup(submsg_end)) { + if(d->top->end_offset != UPB_GROUP_END_OFFSET) upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } - CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); - goto check_msgend; // We have no value to dispatch. + CHECK_FLOW(upb_pop(d)); + continue; // We have no value to dispatch. case UPB_WIRE_TYPE_VARINT: case UPB_WIRE_TYPE_DELIMITED: // For the delimited case we are parsing the length. - CHECK(upb_decode_varint(d, &buf, &end, &val)); + CHECK(upb_decode_varint(d, &ptr, &len, &val)); break; case UPB_WIRE_TYPE_32BIT: case UPB_WIRE_TYPE_64BIT: - CHECK(upb_decode_fixed(d, tag.wire_type, &buf, &end, &val)); + CHECK(upb_decode_fixed(d, tag.wire_type, &ptr, &len, &val)); break; } // Look up field by tag number. - upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number); + upb_fielddef *f = upb_msg_itof(d->top->msgdef, tag.field_number); if (!f) { if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED) CHECK(upb_decode_string(d, &val, &str)); - CHECK(upb_dispatch_unknownval(d, tag.field_number, val)); + CHECK_FLOW(upb_dispatch_unknownval(d, tag.field_number, val)); } else if (!upb_check_type(tag.wire_type, f->type)) { // TODO: put more details in this error msg. upb_seterr(status, UPB_ERROR, "Field had incorrect type."); @@ -280,8 +298,8 @@ void upb_decoder_run(upb_src *src, upb_status *status) { switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); - goto check_msgend; // We have no value to dispatch. + CHECK_FLOW(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); + continue; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): CHECK(upb_decode_string(d, &val, &str)); @@ -295,19 +313,10 @@ void upb_decoder_run(upb_src *src, upb_status *status) { default: break; // Other types need no further processing at this point. } - CHECK(upb_dispatch_value(d->sink, f, val, status)); - -check_msgend: - while(buf >= submsg_end) { - if(buf > submsg_end) { - upb_seterr(status, UPB_ERROR, "Bad submessage end.") - goto err; - } - CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); - } + CHECK_FLOW(upb_dispatch_value(d->sink, f, val, status)); } - CHECK(upb_dispatch_endmsg(&d->dispatcher)); + CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); return; err: -- cgit v1.2.3 From 2c24cbb108bbda296f01e7628028b1dcb2b9516b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 25 Jan 2011 10:07:47 -0800 Subject: More work on decoder and stdio bytesrc/bytesink. --- Makefile | 6 ++--- core/upb.c | 17 +++++-------- core/upb_stream.h | 16 +++++++----- stream/upb_decoder.c | 14 +++++++--- stream/upb_stdio.c | 66 +++++++++++++++++++++++------------------------- stream/upb_textprinter.c | 1 - stream/upb_textprinter.h | 3 +-- 7 files changed, 63 insertions(+), 60 deletions(-) (limited to 'core/upb_stream.h') diff --git a/Makefile b/Makefile index 46cb836..1dfd79d 100644 --- a/Makefile +++ b/Makefile @@ -63,10 +63,10 @@ SRC=core/upb.c \ descriptor/descriptor.c \ core/upb_def.c \ stream/upb_decoder.c \ + stream/upb_stdio.c \ + stream/upb_textprinter.c # core/upb_msg.c \ -# stream/upb_stdio.c \ # stream/upb_strstream.c \ -# stream/upb_textprinter.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. @@ -114,7 +114,7 @@ TESTS=tests/test_string \ tests/test_table \ tests/test_def \ tests/test_stream \ -# tests/test_decoder \ + tests/test_decoder \ # tests/t.test_vs_proto2.googlemessage1 \ # tests/t.test_vs_proto2.googlemessage2 \ # tests/test.proto.pb diff --git a/core/upb.c b/core/upb.c index ff2d47e..525c8a8 100644 --- a/core/upb.c +++ b/core/upb.c @@ -41,16 +41,13 @@ const upb_type_info upb_types[] = { }; void upb_seterr(upb_status *status, enum upb_status_code code, - const char *msg, ...) -{ - if(upb_ok(status)) { // The first error is the most interesting. - status->code = code; - upb_string_recycle(&status->str); - va_list args; - va_start(args, msg); - upb_string_vprintf(status->str, msg, args); - va_end(args); - } + const char *msg, ...) { + status->code = code; + upb_string_recycle(&status->str); + va_list args; + va_start(args, msg); + upb_string_vprintf(status->str, msg, args); + va_end(args); } void upb_copyerr(upb_status *to, upb_status *from) diff --git a/core/upb_stream.h b/core/upb_stream.h index bf312a8..d0045cc 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -178,12 +178,16 @@ INLINE void upb_src_run(upb_src *src, upb_status *status); INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); -// Like upb_bytesrc_read(), but modifies "str" in-place, possibly aliasing -// existing string data (which avoids a copy). On the other hand, if -// the data was *not* already in an existing string, this copies it into -// a upb_string, and if the data needs to be put in a specific range of -// memory (because eg. you need to put it into a different kind of string -// object) then upb_bytesrc_get() could be better. +// Like upb_bytesrc_read(), but modifies "str" in-place. "str" MUST be newly +// created or just recycled. Returns "false" if no data was returned, either +// due to error or EOF (check status for details). +// +// In comparison to upb_bytesrc_read(), this call can possibly alias existing +// string data (which avoids a copy). On the other hand, if the data was *not* +// already in an existing string, this copies it into a upb_string, and if the +// data needs to be put in a specific range of memory (because eg. you need to +// put it into a different kind of string object) then upb_bytesrc_get() could +// be better. INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, upb_status *status); diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index b4b32ff..e60915f 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -126,6 +126,7 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, } else { // End-of-buffer. if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); + upb_string_recycle(&d->buf); if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; s->ptr = upb_string_getrobuf(d->buf); } @@ -295,7 +296,15 @@ void upb_decoder_run(upb_src *src, upb_status *status) { while(1) { // Parse/handle tag. upb_tag tag; - CHECK(upb_decode_tag(d, &state, &tag)); + if (!upb_decode_tag(d, &state, &tag)) { + if (status->code == UPB_EOF && d->top == d->stack) { + // Normal end-of-file. + CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); + return; + } else { + goto err; + } + } // Decode wire data. Hopefully this branch will predict pretty well // since most types will read a varint here. @@ -361,9 +370,6 @@ void upb_decoder_run(upb_src *src, upb_status *status) { CHECK_FLOW(upb_dispatch_value(&d->dispatcher, f, val)); } - CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); - return; - err: if (upb_ok(status)) { upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK"); diff --git a/stream/upb_stdio.c b/stream/upb_stdio.c index 820399b..7923664 100644 --- a/stream/upb_stdio.c +++ b/stream/upb_stdio.c @@ -23,44 +23,42 @@ void upb_stdio_reset(upb_stdio *stdio, FILE* file) { stdio->file = file; } -static bool upb_stdio_read(upb_stdio *stdio, upb_string *str, - int offset, size_t bytes_to_read) { - char *buf = upb_string_getrwbuf(str, offset + bytes_to_read) + offset; - size_t read = fread(buf, 1, bytes_to_read, stdio->file); - if(read < bytes_to_read) { +static upb_strlen_t upb_stdio_read(upb_bytesrc *src, void *buf, + upb_strlen_t count, upb_status *status) { + upb_stdio *stdio = (upb_stdio*)src; + assert(count > 0); + size_t read = fread(buf, 1, count, stdio->file); + if(read < (size_t)count) { // Error or EOF. - stdio->bytesrc.eof = feof(stdio->file); - if(ferror(stdio->file)) { - upb_seterr(&stdio->bytesrc.status, UPB_STATUS_ERROR, - "Error reading from stdio stream."); - return false; + if(feof(stdio->file)) { + upb_seterr(status, UPB_EOF, ""); + return read; + } else if(ferror(stdio->file)) { + upb_seterr(status, UPB_ERROR, "Error reading from stdio stream."); + return -1; } - // Resize to actual read size. - upb_string_getrwbuf(str, offset + read); } - return true; + return read; } -bool upb_stdio_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen) { - // We ignore "minlen" since the stdio interfaces always return a full read - // unless they are at EOF. - (void)minlen; - return upb_stdio_read((upb_stdio*)src, str, 0, BLOCK_SIZE); -} - -bool upb_stdio_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len) { - return upb_stdio_read((upb_stdio*)src, str, upb_string_len(str), len); +static bool upb_stdio_getstr(upb_bytesrc *src, upb_string *str, + upb_status *status) { + upb_strlen_t read = upb_stdio_read( + src, upb_string_getrwbuf(str, BLOCK_SIZE), BLOCK_SIZE, status); + if (read <= 0) return false; + upb_string_getrwbuf(str, read); + return true; } int32_t upb_stdio_put(upb_bytesink *sink, upb_string *str) { upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); upb_strlen_t len = upb_string_len(str); - size_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); + upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); if(written < len) { // Error or EOF. stdio->bytesink.eof = feof(stdio->file); if(ferror(stdio->file)) { - upb_seterr(&stdio->bytesink.status, UPB_STATUS_ERROR, + upb_seterr(&stdio->bytesink.status, UPB_ERROR, "Error writing to stdio stream."); return 0; } @@ -68,19 +66,19 @@ int32_t upb_stdio_put(upb_bytesink *sink, upb_string *str) { return written; } -static upb_bytesrc_vtable upb_stdio_bytesrc_vtbl = { - (upb_bytesrc_get_fptr)upb_stdio_get, - (upb_bytesrc_append_fptr)upb_stdio_append, -}; +upb_stdio *upb_stdio_new() { + static upb_bytesrc_vtbl bytesrc_vtbl = { + upb_stdio_read, + upb_stdio_getstr, + }; -static upb_bytesink_vtable upb_stdio_bytesink_vtbl = { - upb_stdio_put -}; + //static upb_bytesink_vtbl bytesink_vtbl = { + // upb_stdio_put + //}; -upb_stdio *upb_stdio_new() { upb_stdio *stdio = malloc(sizeof(*stdio)); - upb_bytesrc_init(&stdio->bytesrc, &upb_stdio_bytesrc_vtbl); - upb_bytesink_init(&stdio->bytesink, &upb_stdio_bytesink_vtbl); + upb_bytesrc_init(&stdio->bytesrc, &bytesrc_vtbl); + //upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); return stdio; } diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 2d2e237..3a77ab1 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -12,7 +12,6 @@ #include "upb_string.h" struct _upb_textprinter { - upb_sink sink; upb_bytesink *bytesink; upb_string *str; int indent_depth; diff --git a/stream/upb_textprinter.h b/stream/upb_textprinter.h index 7e35412..b40d9fa 100644 --- a/stream/upb_textprinter.h +++ b/stream/upb_textprinter.h @@ -20,8 +20,7 @@ upb_textprinter *upb_textprinter_new(); void upb_textprinter_free(upb_textprinter *p); void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, bool single_line); - -upb_sink *upb_textprinter_sink(upb_textprinter *p); +void upb_textprinter_sethandlers(upb_textprinter *p, upb_handlers *h); #ifdef __cplusplus } /* extern "C" */ -- cgit v1.2.3 From fbb9fd35e05b88908beeca2c2b88b15aec1fca01 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 28 Jan 2011 10:11:48 -0800 Subject: Improve comments in headers, to better explain core interfaces. --- core/upb_def.h | 9 ++-- core/upb_stream.h | 123 ++++++++++++++++++++++++++++++++----------------- core/upb_stream_vtbl.h | 2 +- core/upb_string.h | 7 ++- 4 files changed, 91 insertions(+), 50 deletions(-) (limited to 'core/upb_stream.h') diff --git a/core/upb_def.h b/core/upb_def.h index d9bab97..e95aec3 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -1,17 +1,18 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * Copyright (c) 2009-2011 Joshua Haberman. See LICENSE for details. * - * Provides definitions of .proto constructs: + * Provides a mechanism for loading proto definitions from descriptors, and + * data structures to represent those definitions. These form the protobuf + * schema, and are used extensively throughout upb: * - upb_msgdef: describes a "message" construct. * - upb_fielddef: describes a message field. * - upb_enumdef: describes an enum. * (TODO: definitions of extensions and services). * * Defs are obtained from a upb_symtab object. A upb_symtab is empty when - * constructed, and definitions can be added by supplying serialized - * descriptors. + * constructed, and definitions can be added by supplying descriptors. * * Defs are immutable and reference-counted. Symbol tables reference any defs * that are the "current" definitions. If an extension is loaded that adds a diff --git a/core/upb_stream.h b/core/upb_stream.h index d0045cc..09e4025 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -1,23 +1,46 @@ /* * upb - a minimalist implementation of protocol buffers. * - * This file defines four general-purpose streaming interfaces for protobuf - * data or bytes: + * This file defines four general-purpose streaming data interfaces. * - * - upb_src: pull interface for protobuf data. - * - upb_sink: push interface for protobuf data. - * - upb_bytesrc: pull interface for bytes. - * - upb_bytesink: push interface for bytes. + * - upb_handlers: represents a set of callbacks, very much like in XML's SAX + * API, that a client can register to do a streaming tree traversal over a + * stream of structured protobuf data, without knowing where that data is + * coming from. There is only one upb_handlers type (it is not a virtual + * base class), but the object lets you register any set of handlers. * - * These interfaces are used as general-purpose glue in upb. For example, the - * decoder interface works by implementing a upb_src and calling a upb_bytesrc. + * The upb_handlers interface supports delegation: when entering a submessage, + * you can delegate to another set of upb_handlers instead of handling the + * submessage yourself. This allows upb_handlers objects to *compose* -- you + * can implement a set of upb_handlers without knowing or caring whether this + * is the top-level message or not. * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * The other interfaces are the C equivalent of "virtual base classes" that + * anyone can implement: + * + * - upb_src: an interface that represents a source of streaming protobuf data. + * It lets you register a set of upb_handlers, and then call upb_src_run(), + * which pulls the protobuf data from somewhere and then calls the handlers. + * + * - upb_bytesrc: a pull interface for streams of bytes, basically an + * abstraction of read()/fread(), but it avoids copies where possible. + * + * - upb_bytesink: push interface for streams of bytes, basically an + * abstraction of write()/fwrite(), but it avoids copies where possible. + * + * All of the encoders and decoders are based on these generic interfaces, + * which lets you write streaming algorithms that do not depend on a specific + * serialization format; for example, you can write a pretty printer that works + * with input that came from protobuf binary format, protobuf text format, or + * even an in-memory upb_msg -- the pretty printer will not know the + * difference. + * + * Copyright (c) 2010-2011 Joshua Haberman. See LICENSE for details. * */ -#ifndef UPB_SRCSINK_H -#define UPB_SRCSINK_H +#ifndef UPB_STREAM_H +#define UPB_STREAM_H #include "upb.h" @@ -53,8 +76,10 @@ typedef enum { // When returned from a startsubmsg handler, indicates that the submessage // should be handled by a different set of handlers, which have been - // registered on the provided upb_handlers object. May not be returned - // from any other callback. + // registered on the provided upb_handlers object. This allows upb_handlers + // objects to compose; a set of upb_handlers need not know whether it is the + // top-level message or a sub-message. May not be returned from any other + // callback. UPB_DELEGATE, } upb_flow_t; @@ -105,9 +130,19 @@ typedef upb_flow_t (*upb_unknownval_handler_t)(void *closure, // // static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum, // upb_value val) { -// Called with an unknown value is encountered. +// // Called with an unknown value is encountered. // return UPB_CONTINUE; // } +// +// // Any handlers you don't need can be set to NULL. +// static upb_handlerset handlers = { +// startmsg, +// endmsg, +// value, +// startsubmsg, +// endsubmsg, +// unknownval, +// }; typedef struct { upb_startmsg_handler_t startmsg; upb_endmsg_handler_t endmsg; @@ -128,26 +163,12 @@ INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); // from automatically being converted to strings in the value callback. // INLINE void upb_handlers_use_bytesrcs(bool use_bytesrcs); -// The closure will be passed to every handler. The status will be used -// only immediately after a handler has returned UPB_STOP. +// The closure will be passed to every handler. The status will be read by the +// upb_src immediately after a handler has returned UPB_BREAK and used as the +// overall upb_src status; it will not be referenced at any other time. INLINE void upb_set_handler_closure(upb_handlers *h, void *closure, upb_status *status); -// An object that transparently handles delegation so that the caller needs -// only follow the protocol as if delegation did not exist. -struct _upb_dispatcher; -typedef struct _upb_dispatcher upb_dispatcher; -INLINE void upb_dispatcher_init(upb_dispatcher *d); -INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); -INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); -INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d); -INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f); -INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); -INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, - upb_value val); -INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, - upb_field_number_t fieldnum, upb_value val); - /* upb_src ********************************************************************/ @@ -171,6 +192,24 @@ INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); INLINE void upb_src_run(upb_src *src, upb_status *status); +// A convenience object that a upb_src can use to invoke handlers. It +// transparently handles delegation so that the upb_src needs only follow the +// protocol as if delegation did not exist. +struct _upb_dispatcher; +typedef struct _upb_dispatcher upb_dispatcher; +INLINE void upb_dispatcher_init(upb_dispatcher *d); +INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); +INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, + struct _upb_fielddef *f); +INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, + upb_value val); +INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, + upb_field_number_t fieldnum, + upb_value val); + /* upb_bytesrc ****************************************************************/ // Reads up to "count" bytes into "buf", returning the total number of bytes @@ -178,16 +217,16 @@ INLINE void upb_src_run(upb_src *src, upb_status *status); INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); -// Like upb_bytesrc_read(), but modifies "str" in-place. "str" MUST be newly -// created or just recycled. Returns "false" if no data was returned, either -// due to error or EOF (check status for details). +// Like upb_bytesrc_read(), but modifies "str" in-place. Caller must ensure +// that "str" is created or just recycled. Returns "false" if no data was +// returned, either due to error or EOF (check status for details). // // In comparison to upb_bytesrc_read(), this call can possibly alias existing // string data (which avoids a copy). On the other hand, if the data was *not* // already in an existing string, this copies it into a upb_string, and if the // data needs to be put in a specific range of memory (because eg. you need to // put it into a different kind of string object) then upb_bytesrc_get() could -// be better. +// save you a copy. INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, upb_status *status); @@ -206,15 +245,13 @@ INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, struct _upb_bytesink; typedef struct _upb_bytesink upb_bytesink; -// Writes up to "count" bytes from "buf", returning the total number of bytes -// written. If <0, indicates error (check upb_bytesink_status() for details). -INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, - upb_strlen_t count); +INLINE bool upb_bytesink_printf(upb_bytesink *sink, const char *fmt, ...); -// Puts the given string, which may alias the string data (which avoids a -// copy). Returns the number of bytes that were actually, consumed, which may -// be fewer than were in the string, or <0 on error. -INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str); +// Puts the given string, returning true if the operation was successful, otherwise +// check "status" for details. Ownership of the string is *not* passed; if +// the callee wants a reference he must call upb_string_getref() on it. +INLINE bool upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, + upb_status *status); // Returns the current error status for the stream. INLINE upb_status *upb_bytesink_status(upb_bytesink *sink); diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index ddefba9..ef655fd 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -139,7 +139,7 @@ INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, return sink->vtbl->write(sink, buf, count); } -INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str) { +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { return sink->vtbl->putstr(sink, str); } diff --git a/core/upb_string.h b/core/upb_string.h index 1a7e06b..7d0ae87 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -9,7 +9,9 @@ * The overriding goal of upb_string is to avoid memcpy(), malloc(), and free() * wheverever possible, while keeping both CPU and memory overhead low. * Throughout upb there are situations where one wants to reference all or part - * of another string without copying. upb_string provides APIs for doing this. + * of another string without copying. upb_string provides APIs for doing this, + * and allows the referenced string to be kept alive for as long as anyone is + * referencing it. * * Characteristics of upb_string: * - strings are reference-counted. @@ -22,7 +24,8 @@ * Reference-counted strings have recently fallen out of favor because of the * performance impacts of doing thread-safe reference counting with atomic * operations. We side-step this issue by not performing atomic operations - * unless the string has been marked thread-safe. + * unless the string has been marked thread-safe. Time will tell whether this + * scheme is easy and convenient enough to be practical. * * Strings are expected to be 8-bit-clean, but "char*" is such an entrenched * idiom that we go with it instead of making our pointers uint8_t*. -- cgit v1.2.3 From d98db7cb567f17a3bb56e2af8499d2e3aef03b3b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 29 Jan 2011 12:07:09 -0800 Subject: Textprinter is compiling again. --- core/upb_stream.h | 22 ++++++--- core/upb_stream_vtbl.h | 21 ++++++--- stream/upb_textprinter.c | 114 ++++++++++++++++++++++++++++------------------- stream/upb_textprinter.h | 4 +- 4 files changed, 103 insertions(+), 58 deletions(-) (limited to 'core/upb_stream.h') diff --git a/core/upb_stream.h b/core/upb_stream.h index 09e4025..aa23549 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -245,16 +245,26 @@ INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, struct _upb_bytesink; typedef struct _upb_bytesink upb_bytesink; -INLINE bool upb_bytesink_printf(upb_bytesink *sink, const char *fmt, ...); +// TODO: Figure out how buffering should be handled. Should the caller buffer +// data and only call these functions when a buffer is full? Seems most +// efficient, but then buffering has to be configured in the caller, which +// could be anything, which makes it hard to have a standard interface for +// controlling buffering. +// +// The downside of having the bytesink buffer is efficiency: the caller is +// making more (virtual) function calls, and the caller can't arrange to have +// a big contiguous buffer. The bytesink can do this, but will have to copy +// to make the data contiguous. + +// Returns the number of bytes written. +INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, + const char *fmt, ...); // Puts the given string, returning true if the operation was successful, otherwise // check "status" for details. Ownership of the string is *not* passed; if // the callee wants a reference he must call upb_string_getref() on it. -INLINE bool upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, - upb_status *status); - -// Returns the current error status for the stream. -INLINE upb_status *upb_bytesink_status(upb_bytesink *sink); +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, + upb_status *status); #include "upb_stream_vtbl.h" diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index ef655fd..8e8971f 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -34,8 +34,10 @@ typedef bool (*upb_bytesrc_getstr_fptr)( // upb_bytesink. typedef upb_strlen_t (*upb_bytesink_write_fptr)( upb_bytesink *bytesink, void *buf, upb_strlen_t count); -typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( - upb_bytesink *bytesink, upb_string *str); +typedef bool (*upb_bytesink_putstr_fptr)( + upb_bytesink *bytesink, upb_string *str, upb_status *status); +typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)( + upb_status *status, const char *fmt, va_list args); // Vtables for the above interfaces. typedef struct { @@ -44,8 +46,9 @@ typedef struct { } upb_bytesrc_vtbl; typedef struct { - upb_bytesink_write_fptr write; - upb_bytesink_putstr_fptr putstr; + upb_bytesink_write_fptr write; + upb_bytesink_putstr_fptr putstr; + upb_bytesink_vprintf_fptr vprintf; } upb_bytesink_vtbl; typedef struct { @@ -140,13 +143,21 @@ INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, } INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { - return sink->vtbl->putstr(sink, str); + return sink->vtbl->putstr(sink, str, status); } INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { return &sink->status; } +INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + upb_strlen_t ret = sink->vtbl->vprintf(status, fmt, args); + va_end(args); + return ret; +} + // upb_handlers struct _upb_handlers { upb_handlerset *set; diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 2209173..7025494 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -15,34 +15,51 @@ struct _upb_textprinter { upb_bytesink *bytesink; int indent_depth; bool single_line; - upb_fielddef *f; + upb_status status; }; -static void upb_textprinter_indent(upb_textprinter *p) +#define CHECK(x) if ((x) < 0) goto err; + +static int upb_textprinter_indent(upb_textprinter *p) { if(!p->single_line) for(int i = 0; i < p->indent_depth; i++) - upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); + return 0; +err: + return -1; } -static void upb_textprinter_endfield(upb_textprinter *p) { - if(p->single_line) - upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); - else - upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); +static int upb_textprinter_startfield(upb_textprinter *p, upb_fielddef *f) { + upb_textprinter_indent(p); + CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name))); + return 0; +err: + return -1; +} + +static int upb_textprinter_endfield(upb_textprinter *p) { + if(p->single_line) { + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); + } else { + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status)); + } + return 0; +err: + return -1; } static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, upb_value val) { upb_textprinter *p = _p; - upb_textprinter_indent(p); - upb_bytesink_printf(p->bytesink, UPB_STRFMT ": ", UPB_STRARG(f->name)); -#define CASE(fmtstr, member) upb_bytesink_printf(p->bytesink, fmtstr, val.member); break; - switch(p->f->type) { + upb_textprinter_startfield(p, f); +#define CASE(fmtstr, member) \ + CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break; + switch(f->type) { case UPB_TYPE(DOUBLE): - CASE("%0.f", _double); + CASE("%0.f", double); case UPB_TYPE(FLOAT): - CASE("%0.f", _float) + CASE("%0.f", float) case UPB_TYPE(INT64): case UPB_TYPE(SFIXED64): case UPB_TYPE(SINT64): @@ -50,40 +67,48 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, case UPB_TYPE(UINT64): case UPB_TYPE(FIXED64): CASE("%" PRIu64, uint64) - case UPB_TYPE(INT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - CASE("%" PRId32, int32) case UPB_TYPE(UINT32): case UPB_TYPE(FIXED32): CASE("%" PRIu32, uint32); case UPB_TYPE(ENUM): { - upb_enumdef *enum_def; - upb_string *enum_label; - (enum_def = upb_downcast_enumdef(p->f->def)) != NULL && - (enum_label = upb_enumdef_iton(enum_def, val.int32)) != NULL) { - // This is an enum value for which we found a corresponding string. - upb_bytesink_put(p->bytesink, enum_label); - CASE("%" PRIu32, uint32); + upb_enumdef *enum_def = upb_downcast_enumdef(f->def); + upb_string *enum_label = + upb_enumdef_iton(enum_def, upb_value_getint32(val)); + if (enum_label) { + // We found a corresponding string for this enum. Otherwise we fall + // through to the int32 code path. + CHECK(upb_bytesink_putstr(p->bytesink, enum_label, &p->status)); + break; + } } + case UPB_TYPE(INT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(SINT32): + CASE("%" PRId32, int32) case UPB_TYPE(BOOL): - CASE("%hhu", _bool); + CASE("%hhu", bool); case UPB_TYPE(STRING): case UPB_TYPE(BYTES): - upb_bytesink_put(p->bytesink, UPB_STRLIT(": \"")); - upb_bytesink_put(p->bytesink, str); - upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); + // TODO: escaping. + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(": \""), &p->status)); + CHECK(upb_bytesink_putstr(p->bytesink, upb_value_getstr(val), &p->status)) + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); break; } upb_textprinter_endfield(p); return UPB_CONTINUE; +err: + return UPB_BREAK; } -static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f) { +static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f, + upb_handlers *delegate_to) { + (void)delegate_to; upb_textprinter *p = _p; + upb_textprinter_startfield(p, f); p->indent_depth++; - upb_bytesink_put(p->bytesink, UPB_STRLIT(" {")); - if(!p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); + upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" {"), &p->status); + if(!p->single_line) upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status); return UPB_CONTINUE; } @@ -92,21 +117,13 @@ static upb_flow_t upb_textprinter_endsubmsg(void *_p) upb_textprinter *p = _p; p->indent_depth--; upb_textprinter_indent(p); - upb_bytesink_put(p->bytesink, UPB_STRLIT("}")); + upb_bytesink_putstr(p->bytesink, UPB_STRLIT("}"), &p->status); upb_textprinter_endfield(p); return UPB_CONTINUE; } upb_textprinter *upb_textprinter_new() { - static upb_handlerset handlers = { - NULL, // startmsg - NULL, // endmsg - upb_textprinter_putval, - upb_textprinter_startsubmsg, - upb_textprinter_endsubmsg, - }; upb_textprinter *p = malloc(sizeof(*p)); - upb_byte_init(&p->sink, &upb_textprinter_vtbl); return p; } @@ -114,11 +131,18 @@ void upb_textprinter_free(upb_textprinter *p) { free(p); } -void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, - bool single_line) { +void upb_textprinter_reset(upb_textprinter *p, upb_handlers *handlers, + upb_bytesink *sink, bool single_line) { + static upb_handlerset handlerset = { + NULL, // startmsg + NULL, // endmsg + upb_textprinter_value, + upb_textprinter_startsubmsg, + upb_textprinter_endsubmsg, + }; p->bytesink = sink; p->single_line = single_line; p->indent_depth = 0; + upb_register_handlerset(handlers, &handlerset); + upb_set_handler_closure(handlers, p, &p->status); } - -upb_sink *upb_textprinter_sink(upb_textprinter *p) { return &p->sink; } diff --git a/stream/upb_textprinter.h b/stream/upb_textprinter.h index b40d9fa..a880626 100644 --- a/stream/upb_textprinter.h +++ b/stream/upb_textprinter.h @@ -18,8 +18,8 @@ typedef struct _upb_textprinter upb_textprinter; upb_textprinter *upb_textprinter_new(); void upb_textprinter_free(upb_textprinter *p); -void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, - bool single_line); +void upb_textprinter_reset(upb_textprinter *p, upb_handlers *handlers, + upb_bytesink *sink, bool single_line); void upb_textprinter_sethandlers(upb_textprinter *p, upb_handlers *h); #ifdef __cplusplus -- cgit v1.2.3