From 2a7f51f3fd534b3e9e098c522cffbb96e1551474 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 6 Oct 2010 08:19:34 -0700 Subject: Change upb_src to use push-based interface. Unfortunately my previous detailed commit message was lost somehow by git or vi. Will have to explain in more detail at a later date the rationale for this change. The build will be broken until I port the old decoder to this new interface. --- core/upb.h | 4 ++ core/upb_stream.h | 124 +++++++++++++++++++++--------------------------------- 2 files changed, 51 insertions(+), 77 deletions(-) diff --git a/core/upb.h b/core/upb.h index 7ee0469..6ecc2a0 100644 --- a/core/upb.h +++ b/core/upb.h @@ -261,6 +261,10 @@ enum upb_status_code { UPB_ERROR_MAX_NESTING_EXCEEDED = -3 }; +// TODO: consider making this a single word: a upb_string* where we use the low +// bits as flags indicating whether there is an error and whether it is +// resumable. This would improve efficiency, because the code would not need +// to be loaded after a call to a function returning a status. typedef struct { enum upb_status_code code; upb_string *str; diff --git a/core/upb_stream.h b/core/upb_stream.h index 861bd1c..cd00c1e 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -28,98 +28,64 @@ extern "C" { // Forward-declare. We can't include upb_def.h; it would be circular. struct _upb_fielddef; -// Note! The "eof" flags work like feof() in C; they cannot report end-of-file -// until a read has failed due to eof. They cannot preemptively tell you that -// the next call will fail due to eof. Since these are the semantics that C -// and UNIX provide, we're stuck with them if we want to support eg. stdio. - -/* upb_src ********************************************************************/ +/* upb_sink *******************************************************************/ -// A upb_src is a pull parser for protobuf data. Sample usage: -// -// #define CHECK(x) if(!x) goto err; +// A upb_sink is a component that receives a stream of protobuf data. +// It is an abstract interface that is implemented either by the system or +// by users. // -// bool parse_msg(upb_src *src, int indent) { -// upb_fielddef *f; -// while ((f = upb_src_getdef(src)) != NULL) { -// for (int i = 0; i < indent; i++) putchar(' '); -// printf("Parsed field; name=" UPB_STRFMT ", num=%d", -// UPB_STRARG(d->name), d->number); -// if (upb_issubmsg(f)) { -// CHECK(upb_src_startmsg(src)); -// CHECK(parse_msg(src, indent + 2)); -// CHECK(upb_src_endmsg(src)); -// } else { -// CHECK(upb_src_skipval(src)); -// } -// } -// // We should be EOF now, otherwise there was an error. -// CHECK(upb_src_eof(src)); -// return true; -// -// err: -// return false; -// } -// -// TODO: decide how to handle unknown fields. - -// Retrieves the fielddef for the next field in the stream. Returns NULL on -// error or end-of-stream. End of stream can simply mean end of submessage. -struct _upb_fielddef *upb_src_getdef(upb_src *src); - -// Retrieves and stores the next value in "val". upb_src_getval() is for all -// numeric types and upb_src_getstr() is for strings. For string types "str" -// must be a newly-recycled string. Returns false on error. -bool upb_src_getval(upb_src *src, upb_valueptr val); -bool upb_src_getstr(upb_src *src, upb_string *val); - -// Like upb_src_getval() but skips the value. -bool upb_src_skipval(upb_src *src); - -// Descends into a submessage. May only be called when upb_issubmsg(f) is true -// for an f = upb_src_getdef(src) that was just parsed. -bool upb_src_startmsg(upb_src *src); - -// Stops reading a submessage. May be called before the stream is EOF, in -// which case the rest of the submessage is skipped. -bool upb_src_endmsg(upb_src *src); - -// Returns the current error/eof status for the stream. If a stream is eof -// but we are inside a submessage, calling upb_src_endmsg(src) will reset -// the eof marker. -INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; } -INLINE bool upb_src_eof(upb_src *src) { return src->eof; } - -// The following functions are equivalent to upb_src_getval(), but take -// pointers to specific types. In debug mode this may check that the type -// is compatible with the type being read. This check will *not* be performed -// in non-debug mode, and if you get the type wrong the behavior is undefined. -bool upb_src_getbool(upb_src *src, bool *val); -bool upb_src_getint32(upb_src *src, int32_t *val); -bool upb_src_getint64(upb_src *src, int64_t *val); -bool upb_src_getuint32(upb_src *src, uint32_t *val); -bool upb_src_getuint64(upb_src *src, uint64_t *val); -bool upb_src_getfloat(upb_src *src, float *val); -bool upb_src_getdouble(upb_src *src, double *val); +// TODO: unknown fields. -/* upb_sink *******************************************************************/ +// Constants that a sink returns to indicate to its caller whether it should +// continue or not. +typedef enum { + // Caller should continue sending values to the sink. + UPB_SINK_CONTINUE, + + // Return from upb_sink_putdef() to skip the next value (which may be a + // submessage). + UPB_SINK_SKIP, + + // Caller should stop sending values; check sink status for details. + // If processing resumes later, it should resume with the next value. + UPB_SINK_STOP, +} upb_sinkret_t; // Puts the given fielddef into the stream. -bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); +upb_sinkret_t upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); // Puts the given value into the stream. -bool upb_sink_putval(upb_sink *sink, upb_value val); -bool upb_sink_putstr(upb_sink *sink, upb_string *str); +upb_sinkret_t upb_sink_putval(upb_sink *sink, upb_value val); +upb_sinkret_t upb_sink_putstr(upb_sink *sink, upb_string *str); // Starts/ends a submessage. upb_sink_startmsg may seem redundant, but a // client could have a submessage already serialized, and therefore put it // as a string instead of its individual elements. -bool upb_sink_startmsg(upb_sink *sink); -bool upb_sink_endmsg(upb_sink *sink); +upb_sinkret_t upb_sink_startmsg(upb_sink *sink); +upb_sinkret_t upb_sink_endmsg(upb_sink *sink); // Returns the current error status for the stream. upb_status *upb_sink_status(upb_sink *sink); + +/* upb_src ********************************************************************/ + +// A upb_src is a resumable push parser for protobuf data. It works by first +// accepting registration of a upb_sink to which it will push data, then +// in a second phase is parses the actual data. +// + +// Sets the given sink as the target of this src. It will be called when the +// upb_src_parse() is run. +void upb_src_setsink(upb_src *src, upb_sink *sink); + +// Pushes data from this src to the previously registered sink, returning +// true if all data was processed. If false is returned, check +// upb_src_status() for details; if it is a resumable status, upb_src_run +// may be called again to resume processing. +bool upb_src_run(upb_src *src); + + /* upb_bytesrc ****************************************************************/ // Returns the next string in the stream. false is returned on error or eof. @@ -133,6 +99,10 @@ bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); // Returns the current error status for the stream. +// Note! The "eof" flag works like feof() in C; it cannot report end-of-file +// until a read has failed due to eof. It cannot preemptively tell you that +// the next call will fail due to eof. Since these are the semantics that C +// and UNIX provide, we're stuck with them if we want to support eg. stdio. INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } -- cgit v1.2.3