From 8c1e7170b74e1a6a29736f63507f83ddeb51f560 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 24 May 2010 11:15:08 -0700 Subject: Defined the upb_src and upb_bytesrc interfaces. --- src/upb.h | 13 ++--- src/upb_decoder.c | 27 +++++----- src/upb_sink.h | 155 ----------------------------------------------------- src/upb_srcsink.h | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 176 insertions(+), 175 deletions(-) delete mode 100644 src/upb_sink.h create mode 100644 src/upb_srcsink.h diff --git a/src/upb.h b/src/upb.h index 4fb5773..c65a686 100644 --- a/src/upb.h +++ b/src/upb.h @@ -116,12 +116,13 @@ typedef union { uint32_t _32bit; } upb_wire_value; -// A tag occurs before each value on-the-wire. -typedef struct { - upb_field_number_t field_number; - upb_wire_type_t wire_type; -} upb_tag; - +// A key occurs before each value on-the-wire. +typedef uint32_t upb_key; +INLINE upb_key upb_make_key(upb_fieldnum_t fieldnum, upb_wiretype_t wiretype) { + return (fieldnum << 3) | wiretype; +} +INLINE upb_fieldnum_t upb_key_fieldnum(upb_key key) { return key >> 3; } +INLINE upb_wiretype_t upb_key_wiretype(upb_key key) { return key & 0x07; } /* Polymorphic values of .proto types *****************************************/ diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 209db56..32b8f16 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -195,18 +195,6 @@ T(FLOAT, f, uint32_t, float, _float) { #undef GET #undef T -// Parses a tag, places the result in *tag. -INLINE const uint8_t *decode_tag(const uint8_t *buf, const uint8_t *end, - upb_tag *tag, upb_status *status) -{ - uint32_t tag_int; - const uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); - tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); - tag->field_number = tag_int >> 3; - return ret; -} - - // Parses a 64-bit varint that is known to be >= 2 bytes (the inline version // handles 1 and 2 byte varints). const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end, @@ -311,13 +299,12 @@ typedef struct { struct upb_decoder { // Immutable state of the decoder. upb_msgdef *toplevel_msgdef; - upb_sink *sink; + upb_bytesrc *bytesrc; // State pertaining to a particular decode (resettable). // Stack entries store the offset where the submsg ends (for groups, 0). upb_decoder_frame stack[UPB_MAX_NESTING], *top, *limit; size_t completed_offset; - void *udata; }; upb_decoder *upb_decoder_new(upb_msgdef *msgdef) @@ -344,6 +331,18 @@ void upb_decoder_reset(upb_decoder *d, upb_sink *sink) d->top->end_offset = 0; } +// Parses a tag, places the result in *tag. +upb_key upb_decoder_src_getkey(upb_decoder *d) +{ + upb_key key; + upb_fill_buffer(d); + d-> + const uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); + return ret; +} + + + static const void *get_msgend(upb_decoder *d, const uint8_t *start) { if(d->top->end_offset > 0) diff --git a/src/upb_sink.h b/src/upb_sink.h deleted file mode 100644 index 5dc5b52..0000000 --- a/src/upb_sink.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - * - * upb_sink is a general purpose interface for pushing the contents of a - * protobuf from one component to another in a streaming fashion. We call the - * component that calls a upb_sink a "source". By "pushing" we mean that the - * source calls into the sink; the opposite (where a sink calls into the - * source) is known as "pull". In the push model the source gets the main - * loop; in a pull model the sink does. - * - * This interface is used as general-purpose glue in upb. For example, the - * parser interface works by implementing a source. Likewise the serialization - * simply implements a sink. Copying one protobuf to another is just a matter - * of using one message as a source and another as a sink. - * - * In terms of efficiency, we would generally expect "push" to be faster if the - * source had more state to track, and "pull" to be faster if the sink had more - * state. The reason is that whoever has the main loop can keep state on the - * stack (and possibly even in callee-save registers), whereas the the - * component that is "called into" always needs to reload its state from - * memory. - * - * In terms of programming complexity, it is easier and simpler to have the - * main loop, because you can store state in local variables. - * - * So the assumption inherent in using the push model is that sources are - * generally more complicated and stateful than consumers. For example, in the - * parser case, it has to deal with malformed input and associated errors; in - * comparison, the serializer deals with known-good input. - */ - -#ifndef UPB_SINK_H -#define UPB_SINK_H - -#include "upb_def.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Each of the upb_sink callbacks returns a status of this type. -typedef enum { - // The normal case, where the consumer wants to continue consuming. - UPB_SINK_CONTINUE, - - // The sink did not consume this value, and wants to halt further processing. - // If the source is resumable, it should save the current state so that when - // resumed, the value that was just provided will be replayed. - UPB_SINK_STOP, - - // The consumer wants to skip to the end of the current submessage and - // continue consuming. If we are at the top-level, the rest of the - // data is discarded. - UPB_SINK_SKIP -} upb_sink_status; - - -typedef struct { - struct upb_sink_callbacks *vtbl; -} upb_sink; - -/* upb_sink callbacks *********************************************************/ - -// The value callback is called for a regular value (ie. not a string or -// submessage). -typedef upb_sink_status (*upb_value_cb)(upb_sink *s, upb_fielddef *f, - upb_value val, upb_status *status); - -// The string callback is called for string data. "str" is the string in which -// the data lives, but it may contain more data than the effective string. -// "start" and "end" indicate the substring of "str" that is the effective -// string. If "start" is <0, this string is a continuation of the previous -// string for this field. If end > upb_strlen(str) then there is more data to -// follow for this string. "end" can also be used as a hint for how much data -// follows, but this is only a hint and is not guaranteed. -// -// The data is supplied this way to give you the opportunity to reference this -// data instead of copying it (perhaps using upb_strslice), or to minimize -// copying if it is unavoidable. -typedef upb_sink_status (*upb_str_cb)(upb_sink *s, upb_fielddef *f, - upb_strptr str, - int32_t start, uint32_t end, - upb_status *status); - -// The start and end callbacks are called when a submessage begins and ends, -// respectively. The caller is responsible for ensuring that the nesting -// level never exceeds UPB_MAX_NESTING. -typedef upb_sink_status (*upb_start_cb)(upb_sink *s, upb_fielddef *f, - upb_status *status); -typedef upb_sink_status (*upb_end_cb)(upb_sink *s, upb_fielddef *f, - upb_status *status); - - -/* upb_sink implementation ****************************************************/ - -typedef struct upb_sink_callbacks { - upb_value_cb value_cb; - upb_str_cb str_cb; - upb_start_cb start_cb; - upb_end_cb end_cb; -} upb_sink_callbacks; - -// These macros implement a mini virtual function dispatch for upb_sink instances. -// This allows functions that call upb_sinks to just write: -// -// upb_sink_onvalue(sink, field, val); -// -// The macro will handle the virtual function lookup and dispatch. We could -// potentially define these later to also be capable of calling a C++ virtual -// method instead of doing the virtual dispatch manually. This would make it -// possible to write C++ sinks in a more natural style without loss of -// efficiency. We could have a flag in upb_sink defining whether it is a C -// sink or a C++ one. -#define upb_sink_onvalue(s, f, val, status) s->vtbl->value_cb(s, f, val, status) -#define upb_sink_onstr(s, f, str, start, end, status) s->vtbl->str_cb(s, f, str, start, end, status) -#define upb_sink_onstart(s, f, status) s->vtbl->start_cb(s, f, status) -#define upb_sink_onend(s, f, status) s->vtbl->end_cb(s, f, status) - -// Initializes a plain C visitor with the given vtbl. The sink must have been -// allocated separately. -INLINE void upb_sink_init(upb_sink *s, upb_sink_callbacks *vtbl) { - s->vtbl = vtbl; -} - - -/* upb_bytesink ***************************************************************/ - -// A upb_bytesink is like a upb_sync, but for bytes instead of structured -// protobuf data. Parsers implement upb_bytesink and push to a upb_sink, -// serializers do the opposite (implement upb_sink and push to upb_bytesink). -// -// The two simplest kinds of sinks are "write to string" and "write to FILE*". - -// A forward declaration solely for the benefit of declaring upb_byte_cb below. -// Always prefer upb_bytesink (without the "struct" keyword) instead. -struct _upb_bytesink; - -// The single bytesink callback; it takes the bytes to be written and returns -// how many were successfully written. If the return value is <0, the caller -// should stop processing. -typedef int32_t (*upb_byte_cb)(struct _upb_bytesink *s, upb_strptr str, - uint32_t start, uint32_t end, - upb_status *status); - -typedef struct _upb_bytesink { - upb_byte_cb *cb; -} upb_bytesink; - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_srcsink.h b/src/upb_srcsink.h new file mode 100644 index 0000000..7c95059 --- /dev/null +++ b/src/upb_srcsink.h @@ -0,0 +1,156 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + * This file defines four general-purpose interfaces for pulling/pushing either + * protobuf data or bytes: + * + * - upb_src: pull interface for protobuf key/value pairs. + * - upb_sink: push interface for protobuf key/value pairs. + * - upb_bytesrc: pull interface for bytes. + * - upb_bytesink: push interface for bytes. + * + * These interfaces are used as general-purpose glue in upb. For example, the + * decoder interface works by implementing a upb_src and calling a upb_bytesrc. + */ + +#ifndef UPB_SRCSINK_H +#define UPB_SRCSINK_H + +#include "upb_def.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_src ********************************************************************/ + +// Retrieves the fielddef for the next field in the stream. Returns NULL on +// error or end-of-stream. +upb_fielddef *upb_src_getdef(upb_src *src); + +// Retrieves and stores the next value in "val". For string types the caller +// does not own a ref to the returned type; you must ref it yourself if you +// want one. Returns false on error. +bool upb_src_getval(upb_src *src, upb_valueptr val); + +// Like upb_src_getval() but skips the value. +bool upb_src_skipval(upb_src *src); + +// Descends into a submessage. +bool upb_src_startmsg(upb_src *src); + +// Stops reading a submessage. May be called before the stream is EOF, in +// which case the rest of the submessage is skipped. +bool upb_src_endmsg(upb_src *src); + +// Returns the current error status for the stream. +upb_status *upb_src_status(upb_src *src); + +/* upb_bytesrc ****************************************************************/ + +// Returns the next string in the stream. The caller does not own a ref on the +// returned string; you must ref it yourself if you want one. +upb_string *upb_bytesrc_get(upb_bytesrc *src); + +// Appends the next "len" bytes in the stream in-place to "str". This should +// be used when the caller needs to build a contiguous string of the existing +// data in "str" with more data. +bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); + +// Returns the current error status for the stream. +upb_status *upb_bytesrc_status(upb_src *src); + +/* upb_sink callbacks *********************************************************/ + +// The value callback is called for a regular value (ie. not a string or +// submessage). +typedef upb_sink_status (*upb_value_cb)(upb_sink *s, upb_fielddef *f, + upb_value val, upb_status *status); + +// The string callback is called for string data. "str" is the string in which +// the data lives, but it may contain more data than the effective string. +// "start" and "end" indicate the substring of "str" that is the effective +// string. If "start" is <0, this string is a continuation of the previous +// string for this field. If end > upb_strlen(str) then there is more data to +// follow for this string. "end" can also be used as a hint for how much data +// follows, but this is only a hint and is not guaranteed. +// +// The data is supplied this way to give you the opportunity to reference this +// data instead of copying it (perhaps using upb_strslice), or to minimize +// copying if it is unavoidable. +typedef upb_sink_status (*upb_str_cb)(upb_sink *s, upb_fielddef *f, + upb_strptr str, + int32_t start, uint32_t end, + upb_status *status); + +// The start and end callbacks are called when a submessage begins and ends, +// respectively. The caller is responsible for ensuring that the nesting +// level never exceeds UPB_MAX_NESTING. +typedef upb_sink_status (*upb_start_cb)(upb_sink *s, upb_fielddef *f, + upb_status *status); +typedef upb_sink_status (*upb_end_cb)(upb_sink *s, upb_fielddef *f, + upb_status *status); + + +/* upb_sink implementation ****************************************************/ + +typedef struct upb_sink_callbacks { + upb_value_cb value_cb; + upb_str_cb str_cb; + upb_start_cb start_cb; + upb_end_cb end_cb; +} upb_sink_callbacks; + +// These macros implement a mini virtual function dispatch for upb_sink instances. +// This allows functions that call upb_sinks to just write: +// +// upb_sink_onvalue(sink, field, val); +// +// The macro will handle the virtual function lookup and dispatch. We could +// potentially define these later to also be capable of calling a C++ virtual +// method instead of doing the virtual dispatch manually. This would make it +// possible to write C++ sinks in a more natural style without loss of +// efficiency. We could have a flag in upb_sink defining whether it is a C +// sink or a C++ one. +#define upb_sink_onvalue(s, f, val, status) s->vtbl->value_cb(s, f, val, status) +#define upb_sink_onstr(s, f, str, start, end, status) s->vtbl->str_cb(s, f, str, start, end, status) +#define upb_sink_onstart(s, f, status) s->vtbl->start_cb(s, f, status) +#define upb_sink_onend(s, f, status) s->vtbl->end_cb(s, f, status) + +// Initializes a plain C visitor with the given vtbl. The sink must have been +// allocated separately. +INLINE void upb_sink_init(upb_sink *s, upb_sink_callbacks *vtbl) { + s->vtbl = vtbl; +} + + +/* upb_bytesink ***************************************************************/ + +// A upb_bytesink is like a upb_sync, but for bytes instead of structured +// protobuf data. Parsers implement upb_bytesink and push to a upb_sink, +// serializers do the opposite (implement upb_sink and push to upb_bytesink). +// +// The two simplest kinds of sinks are "write to string" and "write to FILE*". + +// A forward declaration solely for the benefit of declaring upb_byte_cb below. +// Always prefer upb_bytesink (without the "struct" keyword) instead. +struct _upb_bytesink; + +// The single bytesink callback; it takes the bytes to be written and returns +// how many were successfully written. If the return value is <0, the caller +// should stop processing. +typedef int32_t (*upb_byte_cb)(struct _upb_bytesink *s, upb_strptr str, + uint32_t start, uint32_t end, + upb_status *status); + +typedef struct _upb_bytesink { + upb_byte_cb *cb; +} upb_bytesink; + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif -- cgit v1.2.3