From 8c1e7170b74e1a6a29736f63507f83ddeb51f560 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 24 May 2010 11:15:08 -0700 Subject: Defined the upb_src and upb_bytesrc interfaces. --- src/upb.h | 13 ++--- src/upb_decoder.c | 27 +++++----- src/upb_sink.h | 155 ----------------------------------------------------- src/upb_srcsink.h | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 176 insertions(+), 175 deletions(-) delete mode 100644 src/upb_sink.h create mode 100644 src/upb_srcsink.h diff --git a/src/upb.h b/src/upb.h index 4fb5773..c65a686 100644 --- a/src/upb.h +++ b/src/upb.h @@ -116,12 +116,13 @@ typedef union { uint32_t _32bit; } upb_wire_value; -// A tag occurs before each value on-the-wire. -typedef struct { - upb_field_number_t field_number; - upb_wire_type_t wire_type; -} upb_tag; - +// A key occurs before each value on-the-wire. +typedef uint32_t upb_key; +INLINE upb_key upb_make_key(upb_fieldnum_t fieldnum, upb_wiretype_t wiretype) { + return (fieldnum << 3) | wiretype; +} +INLINE upb_fieldnum_t upb_key_fieldnum(upb_key key) { return key >> 3; } +INLINE upb_wiretype_t upb_key_wiretype(upb_key key) { return key & 0x07; } /* Polymorphic values of .proto types *****************************************/ diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 209db56..32b8f16 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -195,18 +195,6 @@ T(FLOAT, f, uint32_t, float, _float) { #undef GET #undef T -// Parses a tag, places the result in *tag. -INLINE const uint8_t *decode_tag(const uint8_t *buf, const uint8_t *end, - upb_tag *tag, upb_status *status) -{ - uint32_t tag_int; - const uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); - tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); - tag->field_number = tag_int >> 3; - return ret; -} - - // Parses a 64-bit varint that is known to be >= 2 bytes (the inline version // handles 1 and 2 byte varints). const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end, @@ -311,13 +299,12 @@ typedef struct { struct upb_decoder { // Immutable state of the decoder. upb_msgdef *toplevel_msgdef; - upb_sink *sink; + upb_bytesrc *bytesrc; // State pertaining to a particular decode (resettable). // Stack entries store the offset where the submsg ends (for groups, 0). upb_decoder_frame stack[UPB_MAX_NESTING], *top, *limit; size_t completed_offset; - void *udata; }; upb_decoder *upb_decoder_new(upb_msgdef *msgdef) @@ -344,6 +331,18 @@ void upb_decoder_reset(upb_decoder *d, upb_sink *sink) d->top->end_offset = 0; } +// Parses a tag, places the result in *tag. +upb_key upb_decoder_src_getkey(upb_decoder *d) +{ + upb_key key; + upb_fill_buffer(d); + d-> + const uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); + return ret; +} + + + static const void *get_msgend(upb_decoder *d, const uint8_t *start) { if(d->top->end_offset > 0) diff --git a/src/upb_sink.h b/src/upb_sink.h deleted file mode 100644 index 5dc5b52..0000000 --- a/src/upb_sink.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - * - * upb_sink is a general purpose interface for pushing the contents of a - * protobuf from one component to another in a streaming fashion. We call the - * component that calls a upb_sink a "source". By "pushing" we mean that the - * source calls into the sink; the opposite (where a sink calls into the - * source) is known as "pull". In the push model the source gets the main - * loop; in a pull model the sink does. - * - * This interface is used as general-purpose glue in upb. For example, the - * parser interface works by implementing a source. Likewise the serialization - * simply implements a sink. Copying one protobuf to another is just a matter - * of using one message as a source and another as a sink. - * - * In terms of efficiency, we would generally expect "push" to be faster if the - * source had more state to track, and "pull" to be faster if the sink had more - * state. The reason is that whoever has the main loop can keep state on the - * stack (and possibly even in callee-save registers), whereas the the - * component that is "called into" always needs to reload its state from - * memory. - * - * In terms of programming complexity, it is easier and simpler to have the - * main loop, because you can store state in local variables. - * - * So the assumption inherent in using the push model is that sources are - * generally more complicated and stateful than consumers. For example, in the - * parser case, it has to deal with malformed input and associated errors; in - * comparison, the serializer deals with known-good input. - */ - -#ifndef UPB_SINK_H -#define UPB_SINK_H - -#include "upb_def.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Each of the upb_sink callbacks returns a status of this type. -typedef enum { - // The normal case, where the consumer wants to continue consuming. - UPB_SINK_CONTINUE, - - // The sink did not consume this value, and wants to halt further processing. - // If the source is resumable, it should save the current state so that when - // resumed, the value that was just provided will be replayed. - UPB_SINK_STOP, - - // The consumer wants to skip to the end of the current submessage and - // continue consuming. If we are at the top-level, the rest of the - // data is discarded. - UPB_SINK_SKIP -} upb_sink_status; - - -typedef struct { - struct upb_sink_callbacks *vtbl; -} upb_sink; - -/* upb_sink callbacks *********************************************************/ - -// The value callback is called for a regular value (ie. not a string or -// submessage). -typedef upb_sink_status (*upb_value_cb)(upb_sink *s, upb_fielddef *f, - upb_value val, upb_status *status); - -// The string callback is called for string data. "str" is the string in which -// the data lives, but it may contain more data than the effective string. -// "start" and "end" indicate the substring of "str" that is the effective -// string. If "start" is <0, this string is a continuation of the previous -// string for this field. If end > upb_strlen(str) then there is more data to -// follow for this string. "end" can also be used as a hint for how much data -// follows, but this is only a hint and is not guaranteed. -// -// The data is supplied this way to give you the opportunity to reference this -// data instead of copying it (perhaps using upb_strslice), or to minimize -// copying if it is unavoidable. -typedef upb_sink_status (*upb_str_cb)(upb_sink *s, upb_fielddef *f, - upb_strptr str, - int32_t start, uint32_t end, - upb_status *status); - -// The start and end callbacks are called when a submessage begins and ends, -// respectively. The caller is responsible for ensuring that the nesting -// level never exceeds UPB_MAX_NESTING. -typedef upb_sink_status (*upb_start_cb)(upb_sink *s, upb_fielddef *f, - upb_status *status); -typedef upb_sink_status (*upb_end_cb)(upb_sink *s, upb_fielddef *f, - upb_status *status); - - -/* upb_sink implementation ****************************************************/ - -typedef struct upb_sink_callbacks { - upb_value_cb value_cb; - upb_str_cb str_cb; - upb_start_cb start_cb; - upb_end_cb end_cb; -} upb_sink_callbacks; - -// These macros implement a mini virtual function dispatch for upb_sink instances. -// This allows functions that call upb_sinks to just write: -// -// upb_sink_onvalue(sink, field, val); -// -// The macro will handle the virtual function lookup and dispatch. We could -// potentially define these later to also be capable of calling a C++ virtual -// method instead of doing the virtual dispatch manually. This would make it -// possible to write C++ sinks in a more natural style without loss of -// efficiency. We could have a flag in upb_sink defining whether it is a C -// sink or a C++ one. -#define upb_sink_onvalue(s, f, val, status) s->vtbl->value_cb(s, f, val, status) -#define upb_sink_onstr(s, f, str, start, end, status) s->vtbl->str_cb(s, f, str, start, end, status) -#define upb_sink_onstart(s, f, status) s->vtbl->start_cb(s, f, status) -#define upb_sink_onend(s, f, status) s->vtbl->end_cb(s, f, status) - -// Initializes a plain C visitor with the given vtbl. The sink must have been -// allocated separately. -INLINE void upb_sink_init(upb_sink *s, upb_sink_callbacks *vtbl) { - s->vtbl = vtbl; -} - - -/* upb_bytesink ***************************************************************/ - -// A upb_bytesink is like a upb_sync, but for bytes instead of structured -// protobuf data. Parsers implement upb_bytesink and push to a upb_sink, -// serializers do the opposite (implement upb_sink and push to upb_bytesink). -// -// The two simplest kinds of sinks are "write to string" and "write to FILE*". - -// A forward declaration solely for the benefit of declaring upb_byte_cb below. -// Always prefer upb_bytesink (without the "struct" keyword) instead. -struct _upb_bytesink; - -// The single bytesink callback; it takes the bytes to be written and returns -// how many were successfully written. If the return value is <0, the caller -// should stop processing. -typedef int32_t (*upb_byte_cb)(struct _upb_bytesink *s, upb_strptr str, - uint32_t start, uint32_t end, - upb_status *status); - -typedef struct _upb_bytesink { - upb_byte_cb *cb; -} upb_bytesink; - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_srcsink.h b/src/upb_srcsink.h new file mode 100644 index 0000000..7c95059 --- /dev/null +++ b/src/upb_srcsink.h @@ -0,0 +1,156 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + * This file defines four general-purpose interfaces for pulling/pushing either + * protobuf data or bytes: + * + * - upb_src: pull interface for protobuf key/value pairs. + * - upb_sink: push interface for protobuf key/value pairs. + * - upb_bytesrc: pull interface for bytes. + * - upb_bytesink: push interface for bytes. + * + * These interfaces are used as general-purpose glue in upb. For example, the + * decoder interface works by implementing a upb_src and calling a upb_bytesrc. + */ + +#ifndef UPB_SRCSINK_H +#define UPB_SRCSINK_H + +#include "upb_def.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_src ********************************************************************/ + +// Retrieves the fielddef for the next field in the stream. Returns NULL on +// error or end-of-stream. +upb_fielddef *upb_src_getdef(upb_src *src); + +// Retrieves and stores the next value in "val". For string types the caller +// does not own a ref to the returned type; you must ref it yourself if you +// want one. Returns false on error. +bool upb_src_getval(upb_src *src, upb_valueptr val); + +// Like upb_src_getval() but skips the value. +bool upb_src_skipval(upb_src *src); + +// Descends into a submessage. +bool upb_src_startmsg(upb_src *src); + +// Stops reading a submessage. May be called before the stream is EOF, in +// which case the rest of the submessage is skipped. +bool upb_src_endmsg(upb_src *src); + +// Returns the current error status for the stream. +upb_status *upb_src_status(upb_src *src); + +/* upb_bytesrc ****************************************************************/ + +// Returns the next string in the stream. The caller does not own a ref on the +// returned string; you must ref it yourself if you want one. +upb_string *upb_bytesrc_get(upb_bytesrc *src); + +// Appends the next "len" bytes in the stream in-place to "str". This should +// be used when the caller needs to build a contiguous string of the existing +// data in "str" with more data. +bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); + +// Returns the current error status for the stream. +upb_status *upb_bytesrc_status(upb_src *src); + +/* upb_sink callbacks *********************************************************/ + +// The value callback is called for a regular value (ie. not a string or +// submessage). +typedef upb_sink_status (*upb_value_cb)(upb_sink *s, upb_fielddef *f, + upb_value val, upb_status *status); + +// The string callback is called for string data. "str" is the string in which +// the data lives, but it may contain more data than the effective string. +// "start" and "end" indicate the substring of "str" that is the effective +// string. If "start" is <0, this string is a continuation of the previous +// string for this field. If end > upb_strlen(str) then there is more data to +// follow for this string. "end" can also be used as a hint for how much data +// follows, but this is only a hint and is not guaranteed. +// +// The data is supplied this way to give you the opportunity to reference this +// data instead of copying it (perhaps using upb_strslice), or to minimize +// copying if it is unavoidable. +typedef upb_sink_status (*upb_str_cb)(upb_sink *s, upb_fielddef *f, + upb_strptr str, + int32_t start, uint32_t end, + upb_status *status); + +// The start and end callbacks are called when a submessage begins and ends, +// respectively. The caller is responsible for ensuring that the nesting +// level never exceeds UPB_MAX_NESTING. +typedef upb_sink_status (*upb_start_cb)(upb_sink *s, upb_fielddef *f, + upb_status *status); +typedef upb_sink_status (*upb_end_cb)(upb_sink *s, upb_fielddef *f, + upb_status *status); + + +/* upb_sink implementation ****************************************************/ + +typedef struct upb_sink_callbacks { + upb_value_cb value_cb; + upb_str_cb str_cb; + upb_start_cb start_cb; + upb_end_cb end_cb; +} upb_sink_callbacks; + +// These macros implement a mini virtual function dispatch for upb_sink instances. +// This allows functions that call upb_sinks to just write: +// +// upb_sink_onvalue(sink, field, val); +// +// The macro will handle the virtual function lookup and dispatch. We could +// potentially define these later to also be capable of calling a C++ virtual +// method instead of doing the virtual dispatch manually. This would make it +// possible to write C++ sinks in a more natural style without loss of +// efficiency. We could have a flag in upb_sink defining whether it is a C +// sink or a C++ one. +#define upb_sink_onvalue(s, f, val, status) s->vtbl->value_cb(s, f, val, status) +#define upb_sink_onstr(s, f, str, start, end, status) s->vtbl->str_cb(s, f, str, start, end, status) +#define upb_sink_onstart(s, f, status) s->vtbl->start_cb(s, f, status) +#define upb_sink_onend(s, f, status) s->vtbl->end_cb(s, f, status) + +// Initializes a plain C visitor with the given vtbl. The sink must have been +// allocated separately. +INLINE void upb_sink_init(upb_sink *s, upb_sink_callbacks *vtbl) { + s->vtbl = vtbl; +} + + +/* upb_bytesink ***************************************************************/ + +// A upb_bytesink is like a upb_sync, but for bytes instead of structured +// protobuf data. Parsers implement upb_bytesink and push to a upb_sink, +// serializers do the opposite (implement upb_sink and push to upb_bytesink). +// +// The two simplest kinds of sinks are "write to string" and "write to FILE*". + +// A forward declaration solely for the benefit of declaring upb_byte_cb below. +// Always prefer upb_bytesink (without the "struct" keyword) instead. +struct _upb_bytesink; + +// The single bytesink callback; it takes the bytes to be written and returns +// how many were successfully written. If the return value is <0, the caller +// should stop processing. +typedef int32_t (*upb_byte_cb)(struct _upb_bytesink *s, upb_strptr str, + uint32_t start, uint32_t end, + upb_status *status); + +typedef struct _upb_bytesink { + upb_byte_cb *cb; +} upb_bytesink; + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif -- cgit v1.2.3 From 0034e6fdb82b7e0623983f44ba4fc1c98393d032 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 27 May 2010 10:22:55 -0700 Subject: Revisions to src and sink interfaces. --- src/upb_srcsink.h | 56 +++++++++++++++++++++++++------------------------------ 1 file changed, 25 insertions(+), 31 deletions(-) diff --git a/src/upb_srcsink.h b/src/upb_srcsink.h index 7c95059..6dd11d1 100644 --- a/src/upb_srcsink.h +++ b/src/upb_srcsink.h @@ -6,8 +6,8 @@ * This file defines four general-purpose interfaces for pulling/pushing either * protobuf data or bytes: * - * - upb_src: pull interface for protobuf key/value pairs. - * - upb_sink: push interface for protobuf key/value pairs. + * - upb_src: pull interface for protobuf data. + * - upb_sink: push interface for protobuf data. * - upb_bytesrc: pull interface for bytes. * - upb_bytesink: push interface for bytes. * @@ -48,6 +48,23 @@ bool upb_src_endmsg(upb_src *src); // Returns the current error status for the stream. upb_status *upb_src_status(upb_src *src); +/* upb_sink *******************************************************************/ + +// Puts the given fielddef into the stream. +bool upb_sink_putdef(upb_sink *sink, upb_fielddef *def); + +// Puts the given value into the stream. +bool upb_sink_putval(upb_sink *sink, upb_value val); + +// Starts a submessage. (needed? the def tells us we're starting a submsg.) +bool upb_sink_startmsg(upb_sink *sink); + +// Ends a submessage. +bool upb_sink_endmsg(upb_sink *sink); + +// Returns the current error status for the stream. +upb_status *upb_sink_status(upb_sink *sink); + /* upb_bytesrc ****************************************************************/ // Returns the next string in the stream. The caller does not own a ref on the @@ -62,37 +79,14 @@ bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); // Returns the current error status for the stream. upb_status *upb_bytesrc_status(upb_src *src); -/* upb_sink callbacks *********************************************************/ +/* upb_bytesink ***************************************************************/ -// The value callback is called for a regular value (ie. not a string or -// submessage). -typedef upb_sink_status (*upb_value_cb)(upb_sink *s, upb_fielddef *f, - upb_value val, upb_status *status); - -// The string callback is called for string data. "str" is the string in which -// the data lives, but it may contain more data than the effective string. -// "start" and "end" indicate the substring of "str" that is the effective -// string. If "start" is <0, this string is a continuation of the previous -// string for this field. If end > upb_strlen(str) then there is more data to -// follow for this string. "end" can also be used as a hint for how much data -// follows, but this is only a hint and is not guaranteed. -// -// The data is supplied this way to give you the opportunity to reference this -// data instead of copying it (perhaps using upb_strslice), or to minimize -// copying if it is unavoidable. -typedef upb_sink_status (*upb_str_cb)(upb_sink *s, upb_fielddef *f, - upb_strptr str, - int32_t start, uint32_t end, - upb_status *status); - -// The start and end callbacks are called when a submessage begins and ends, -// respectively. The caller is responsible for ensuring that the nesting -// level never exceeds UPB_MAX_NESTING. -typedef upb_sink_status (*upb_start_cb)(upb_sink *s, upb_fielddef *f, - upb_status *status); -typedef upb_sink_status (*upb_end_cb)(upb_sink *s, upb_fielddef *f, - upb_status *status); +// Puts the given string. Returns the number of bytes that were actually, +// consumed, which may be fewer than were in the string, or <0 on error. +int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); +// Returns the current error status for the stream. +upb_status *upb_bytesink_status(upb_bytesink *sink); /* upb_sink implementation ****************************************************/ -- cgit v1.2.3 From a484ea0275f4d451d881b4edb1e1e4ae93be42a7 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 3 Jun 2010 12:07:07 -0700 Subject: WIP: intrusive changes to upb_decoder. --- src/upb.h | 4 +- src/upb_decoder.c | 171 +++++++++++++++++++++++++----------------------------- src/upb_decoder.h | 14 ++--- src/upb_srcsink.h | 109 +++++++++++++++++----------------- src/upb_string.h | 7 ++- 5 files changed, 147 insertions(+), 158 deletions(-) diff --git a/src/upb.h b/src/upb.h index c65a686..978ee5c 100644 --- a/src/upb.h +++ b/src/upb.h @@ -270,8 +270,8 @@ INLINE void upb_value_write(upb_valueptr ptr, upb_value val, enum upb_status_code { UPB_STATUS_OK = 0, - // The input byte stream ended in the middle of a record. - UPB_STATUS_NEED_MORE_DATA = 1, + // A read or write from a streaming src/sink could not be completed right now. + UPB_STATUS_TRYAGAIN = 1, // An unrecoverable error occurred. UPB_STATUS_ERROR = -1, diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 32b8f16..5cb25a1 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -221,23 +221,6 @@ const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end, return buf; } -const uint8_t *upb_decode_wire_value(uint8_t *buf, uint8_t *end, - upb_wire_type_t wt, upb_wire_value *wv, - upb_status *status) -{ - switch(wt) { - case UPB_WIRE_TYPE_VARINT: - return upb_get_v_uint64_t(buf, end, &wv->varint, status); - case UPB_WIRE_TYPE_64BIT: - return upb_get_f_uint64_t(buf, end, &wv->_64bit, status); - case UPB_WIRE_TYPE_32BIT: - return upb_get_f_uint32_t(buf, end, &wv->_32bit, status); - default: - status->code = UPB_STATUS_ERROR; // Doesn't handle delimited, groups. - return end; - } -} - // Advances buf past the current wire value (of type wt), saving the result in // outbuf. static const uint8_t *skip_wire_value(const uint8_t *buf, const uint8_t *end, @@ -293,7 +276,7 @@ static const uint8_t *upb_decode_value(const uint8_t *buf, const uint8_t *end, typedef struct { upb_msgdef *msgdef; upb_fielddef *field; - size_t end_offset; // For groups, 0. + int32_t end_offset; // For groups, 0. } upb_decoder_frame; struct upb_decoder { @@ -304,7 +287,15 @@ struct upb_decoder { // State pertaining to a particular decode (resettable). // Stack entries store the offset where the submsg ends (for groups, 0). upb_decoder_frame stack[UPB_MAX_NESTING], *top, *limit; - size_t completed_offset; + + // The current buffer. + upb_string *buf; + + // The overall stream offset of the beginning of this buffer. + uint32_t stream_offset; + + // The current offset in this buffer. + uint32_t buffer_offset; }; upb_decoder *upb_decoder_new(upb_msgdef *msgdef) @@ -331,16 +322,6 @@ void upb_decoder_reset(upb_decoder *d, upb_sink *sink) d->top->end_offset = 0; } -// Parses a tag, places the result in *tag. -upb_key upb_decoder_src_getkey(upb_decoder *d) -{ - upb_key key; - upb_fill_buffer(d); - d-> - const uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); - return ret; -} - static const void *get_msgend(upb_decoder *d, const uint8_t *start) @@ -395,9 +376,78 @@ static const void *pop(upb_decoder *d, const uint8_t *start, upb_status *status) return get_msgend(d, start); } +// Parses a tag, places the result in *tag. +upb_fielddef *upb_decoder_src_getdef(upb_decoder *d) +{ + uint32_t key; + upb_fill_buffer(d); + d->buf = upb_get_v_uint32_t(d->buf, d->end, &key, &d->status); + if (!upb_ok(status)) return NULL; + if(upb_wiretype_from_key(key) == UPB_WIRE_TYPE_END_GROUP) { + if(!isgroup(d->submsg_end)) { + upb_seterr(d->status, UPB_STATUS_ERROR, "End group seen but current " + "message is not a group, byte offset: %zd", + d->completed_offset + (completed - start)); + return NULL; + } + submsg_end = pop(d, start, status); + msgdef = d->top->msgdef; + completed = buf; + return NULL; + } + // Look up field by tag number. + return upb_msg_itof(d->top->msgdef, upb_fieldnum_from_key(key)); +} -size_t upb_decoder_decode(upb_decoder *d, upb_strptr str, upb_status *status) +bool upb_decoder_src_getval(upb_src *src, upb_valueptr val) { + if(upb_wiretype_from_key(d->key) == UPB_WIRE_TYPE_DELIMITED) { + int32_t delim_len; + d->buf = upb_get_INT32(d->buf, d->end, &delim_len, &d->status); + CHECK_STATUS(); // Checking decode_tag() and upb_get_INT32(). + int32_t needed = + const uint8_t *delim_end = buf + delim_len; + if(f->type == UPB_TYPE(MESSAGE)) { + submsg_end = push(d, start, delim_end - start, f, status); + msgdef = d->top->msgdef; + } else { + if(f && upb_isstringtype(f->type)) { + int32_t str_start = buf - start; + uint32_t len = str_start + delim_len; + sink_status = upb_sink_onstr(d->sink, f, str, str_start, len, status); + } // else { TODO: packed arrays } + // If field was not found, it is skipped silently. + buf = delim_end; // Could be >end. + } + } else { + if(!upb_check_type(tag.wire_type, f->type)) { + buf = skip_wire_value(buf, end, tag.wire_type, status); + } else if (f->type == UPB_TYPE(GROUP)) { + submsg_end = push(d, start, 0, f, status); + msgdef = d->top->msgdef; + } else { + upb_value val; + buf = upb_decode_value(buf, end, f->type, upb_value_addrof(&val), + status); + CHECK_STATUS(); // Checking upb_decode_value(). + sink_status = upb_sink_onvalue(d->sink, f, val, status); + } + } + CHECK_STATUS(); + + while(buf >= submsg_end) { + if(buf > submsg_end) { + upb_seterr(status, UPB_STATUS_ERROR, "Expected submsg end offset " + "did not lie on a tag/value boundary."); + goto err; + } + submsg_end = pop(d, start, status); + msgdef = d->top->msgdef; + } + // while(buf < d->packed_end) { TODO: packed arrays } + completed = buf; +} + // buf is our current offset, moves from start to end. const uint8_t *buf = (uint8_t*)upb_string_getrobuf(str); const uint8_t *const start = buf; // ptr equivalent of d->completed_offset @@ -421,68 +471,7 @@ size_t upb_decoder_decode(upb_decoder *d, upb_strptr str, upb_status *status) // Parse/handle tag. upb_tag tag; buf = decode_tag(buf, end, &tag, status); - if(tag.wire_type == UPB_WIRE_TYPE_END_GROUP) { - CHECK_STATUS(); - if(!isgroup(submsg_end)) { - upb_seterr(status, UPB_STATUS_ERROR, "End group seen but current " - "message is not a group, byte offset: %zd", - d->completed_offset + (completed - start)); - goto err; - } - submsg_end = pop(d, start, status); - msgdef = d->top->msgdef; - completed = buf; - continue; - } - - // Look up field by tag number. - upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number); - // Parse/handle field. - if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) { - int32_t delim_len; - buf = upb_get_INT32(buf, end, &delim_len, status); - CHECK_STATUS(); // Checking decode_tag() and upb_get_INT32(). - const uint8_t *delim_end = buf + delim_len; - if(f && f->type == UPB_TYPE(MESSAGE)) { - submsg_end = push(d, start, delim_end - start, f, status); - msgdef = d->top->msgdef; - } else { - if(f && upb_isstringtype(f->type)) { - int32_t str_start = buf - start; - uint32_t len = str_start + delim_len; - sink_status = upb_sink_onstr(d->sink, f, str, str_start, len, status); - } // else { TODO: packed arrays } - // If field was not found, it is skipped silently. - buf = delim_end; // Could be >end. - } - } else { - if(!f || !upb_check_type(tag.wire_type, f->type)) { - buf = skip_wire_value(buf, end, tag.wire_type, status); - } else if (f->type == UPB_TYPE(GROUP)) { - submsg_end = push(d, start, 0, f, status); - msgdef = d->top->msgdef; - } else { - upb_value val; - buf = upb_decode_value(buf, end, f->type, upb_value_addrof(&val), - status); - CHECK_STATUS(); // Checking upb_decode_value(). - sink_status = upb_sink_onvalue(d->sink, f, val, status); - } - } - CHECK_STATUS(); - - while(buf >= submsg_end) { - if(buf > submsg_end) { - upb_seterr(status, UPB_STATUS_ERROR, "Expected submsg end offset " - "did not lie on a tag/value boundary."); - goto err; - } - submsg_end = pop(d, start, status); - msgdef = d->top->msgdef; - } - // while(buf < d->packed_end) { TODO: packed arrays } - completed = buf; } size_t read; diff --git a/src/upb_decoder.h b/src/upb_decoder.h index b84c149..ea20d3d 100644 --- a/src/upb_decoder.h +++ b/src/upb_decoder.h @@ -39,15 +39,11 @@ void upb_decoder_free(upb_decoder *p); // state where it has not seen any data, and expects the next data to be from // the beginning of a new protobuf. Parsers must be reset before they can be // used. A decoder can be reset multiple times. -void upb_decoder_reset(upb_decoder *p, upb_sink *sink); - -// Decodes protobuf data out of str, returning how much data was decoded. The -// next call to upb_decoder_decode should begin with the first byte that was -// not decoded. "status" indicates whether an error occurred. -// -// TODO: provide the following guarantee: -// retval will always be >= len. -size_t upb_decoder_decode(upb_decoder *p, upb_strptr str, upb_status *status); +void upb_decoder_reset(upb_decoder *p, upb_bytesrc *bytesrc); + +// Returns a upb_src pointer by which the decoder can be used. The returned +// upb_src is invalidated by upb_decoder_reset(). +upb_src *upb_decoder_getsrc(upb_decoder *p); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/upb_srcsink.h b/src/upb_srcsink.h index 6dd11d1..97b9885 100644 --- a/src/upb_srcsink.h +++ b/src/upb_srcsink.h @@ -26,6 +26,8 @@ extern "C" { /* upb_src ********************************************************************/ +// TODO: decide how to handle unknown fields. + // Retrieves the fielddef for the next field in the stream. Returns NULL on // error or end-of-stream. upb_fielddef *upb_src_getdef(upb_src *src); @@ -38,7 +40,8 @@ bool upb_src_getval(upb_src *src, upb_valueptr val); // Like upb_src_getval() but skips the value. bool upb_src_skipval(upb_src *src); -// Descends into a submessage. +// Descends into a submessage. May only be called after a def has been +// returned that indicates a submessage. bool upb_src_startmsg(upb_src *src); // Stops reading a submessage. May be called before the stream is EOF, in @@ -88,61 +91,61 @@ int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); // Returns the current error status for the stream. upb_status *upb_bytesink_status(upb_bytesink *sink); -/* upb_sink implementation ****************************************************/ - -typedef struct upb_sink_callbacks { - upb_value_cb value_cb; - upb_str_cb str_cb; - upb_start_cb start_cb; - upb_end_cb end_cb; -} upb_sink_callbacks; - -// These macros implement a mini virtual function dispatch for upb_sink instances. -// This allows functions that call upb_sinks to just write: -// -// upb_sink_onvalue(sink, field, val); -// -// The macro will handle the virtual function lookup and dispatch. We could -// potentially define these later to also be capable of calling a C++ virtual -// method instead of doing the virtual dispatch manually. This would make it -// possible to write C++ sinks in a more natural style without loss of -// efficiency. We could have a flag in upb_sink defining whether it is a C -// sink or a C++ one. -#define upb_sink_onvalue(s, f, val, status) s->vtbl->value_cb(s, f, val, status) -#define upb_sink_onstr(s, f, str, start, end, status) s->vtbl->str_cb(s, f, str, start, end, status) -#define upb_sink_onstart(s, f, status) s->vtbl->start_cb(s, f, status) -#define upb_sink_onend(s, f, status) s->vtbl->end_cb(s, f, status) - -// Initializes a plain C visitor with the given vtbl. The sink must have been -// allocated separately. -INLINE void upb_sink_init(upb_sink *s, upb_sink_callbacks *vtbl) { +/* Dynamic Dispatch implementation for src/sink interfaces ********************/ + +// The rest of this file only concerns components that are implementing any of +// the above interfaces. To simple clients the code below should be considered +// private. + +// Typedefs for function pointers to all of the above functions. +typedef upb_fielddef (*upb_src_getdef_fptr)(upb_src *src); +typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); +typedef bool (*upb_src_skipval_fptr)(upb_src *src); +typedef bool (*upb_src_startmsg_fptr)(upb_src *src); +typedef bool (*upb_src_endmsg_fptr)(upb_src *src); +typedef upb_status *(*upb_src_status_fptr)(upb_src *src); + +typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, upb_fielddef *def); +typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); +typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); +typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); +typedef upb_status *(*upb_sink_status_fptr)(upb_sink *sink); + +typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src); +typedef bool (*upb_bytesrc_append_fptr)( + upb_bytesrc *src, upb_string *str, upb_strlen_t len); +typedef upb_status *(*upb_bytesrc_status_fptr)(upb_src *src); + +typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); +typedef upb_status *(*upb_bytesink_status_fptr)(upb_bytesink *sink); + +// Vtables for the above interfaces. +typedef struct { + upb_src_getdef_fptr getdef; + upb_src_getval_fptr getval; + upb_src_skipval_fptr skipval; + upb_src_startmsg_fptr startmsg; + upb_src_endmsg_fptr endmsg; + upb_src_status_fptr status; +} upb_src_vtable; + +// "Base Class" definitions; components that implement these interfaces should +// contain one of these structures. + +typedef struct { + upb_src_vtable *vtbl; +#ifndef NDEBUG + int state; // For debug-mode checking of API usage. +#endif +} upb_src; + +INLINE void upb_sink_init(upb_src *s, upb_src_vtable *vtbl) { s->vtbl = vtbl; +#ifndef DEBUG + // TODO: initialize debug-mode checking. +#endif } - -/* upb_bytesink ***************************************************************/ - -// A upb_bytesink is like a upb_sync, but for bytes instead of structured -// protobuf data. Parsers implement upb_bytesink and push to a upb_sink, -// serializers do the opposite (implement upb_sink and push to upb_bytesink). -// -// The two simplest kinds of sinks are "write to string" and "write to FILE*". - -// A forward declaration solely for the benefit of declaring upb_byte_cb below. -// Always prefer upb_bytesink (without the "struct" keyword) instead. -struct _upb_bytesink; - -// The single bytesink callback; it takes the bytes to be written and returns -// how many were successfully written. If the return value is <0, the caller -// should stop processing. -typedef int32_t (*upb_byte_cb)(struct _upb_bytesink *s, upb_strptr str, - uint32_t start, uint32_t end, - upb_status *status); - -typedef struct _upb_bytesink { - upb_byte_cb *cb; -} upb_bytesink; - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/upb_string.h b/src/upb_string.h index 0516377..c0d14d5 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -64,9 +64,10 @@ INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } // Use to read the bytes of the string. The caller *must* call // upb_string_endread() after the data has been read. The window between -// upb_string_getrobuf() and upb_string_endread() should be kept as short -// as possible. No other functions may be called on the string during this -// window except upb_string_len(). +// upb_string_getrobuf() and upb_string_endread() should be kept as short as +// possible, because any pending upb_string_detach() may be blocked until +// upb_string_endread is called(). No other functions may be called on the +// string during this window except upb_string_len(). INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } INLINE void upb_string_endread(upb_string *str); -- cgit v1.2.3 From d29f80d6f320143363fb101a9e94f89c17788468 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 3 Jun 2010 19:55:39 -0700 Subject: More incremental work. --- src/upb.c | 38 +++--- src/upb.h | 13 +- src/upb_decoder.c | 358 ++++++++++++++++++++---------------------------------- src/upb_encoder.c | 3 - 4 files changed, 162 insertions(+), 250 deletions(-) diff --git a/src/upb.c b/src/upb.c index 146a9a5..5d145e5 100644 --- a/src/upb.c +++ b/src/upb.c @@ -15,25 +15,27 @@ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## proto_type] = \ {alignof(ctype), sizeof(ctype), wire_type, #ctype}, +// With packed fields, any type expecting 32-bit, 64-bit or varint can instead +// receive delimited. upb_type_info upb_types[] = { - TYPE_INFO(DOUBLE, UPB_WIRE_TYPE_64BIT, double) - TYPE_INFO(FLOAT, UPB_WIRE_TYPE_32BIT, float) - TYPE_INFO(INT64, UPB_WIRE_TYPE_VARINT, int64_t) - TYPE_INFO(UINT64, UPB_WIRE_TYPE_VARINT, uint64_t) - TYPE_INFO(INT32, UPB_WIRE_TYPE_VARINT, int32_t) - TYPE_INFO(FIXED64, UPB_WIRE_TYPE_64BIT, uint64_t) - TYPE_INFO(FIXED32, UPB_WIRE_TYPE_32BIT, uint32_t) - TYPE_INFO(BOOL, UPB_WIRE_TYPE_VARINT, bool) - TYPE_INFO(MESSAGE, UPB_WIRE_TYPE_DELIMITED, void*) - TYPE_INFO(GROUP, UPB_WIRE_TYPE_START_GROUP, void*) - TYPE_INFO(UINT32, UPB_WIRE_TYPE_VARINT, uint32_t) - TYPE_INFO(ENUM, UPB_WIRE_TYPE_VARINT, uint32_t) - TYPE_INFO(SFIXED32, UPB_WIRE_TYPE_32BIT, int32_t) - TYPE_INFO(SFIXED64, UPB_WIRE_TYPE_64BIT, int64_t) - TYPE_INFO(SINT32, UPB_WIRE_TYPE_VARINT, int32_t) - TYPE_INFO(SINT64, UPB_WIRE_TYPE_VARINT, int64_t) - TYPE_INFO(STRING, UPB_WIRE_TYPE_DELIMITED, upb_strptr) - TYPE_INFO(BYTES, UPB_WIRE_TYPE_DELIMITED, upb_strptr) + TYPE_INFO(DOUBLE, (1<=10 +// bytes are available at buf. Returns the number of bytes consumed, or <0 if +// the varint was unterminated after 10 bytes. +INLINE int8_t upb_get_v_uint64_t(const uint8_t *buf, uint64_t *val) { // We inline this common case (1-byte varints), if that fails we dispatch to // the full (non-inlined) version. - if((*buf & 0x80) == 0) { - *val = *buf & 0x7f; - return buf + 1; - } else { - return upb_get_v_uint64_t_full(buf, end, val, status); + int8_t ret = 1; + *val = *buf & 0x7f; + if(*buf & 0x80) { + // Varint is >1 byte. + ret += upb_get_v_uint64_t_full(buf + 1, val); } + return ret; } // Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit // varint is not a true wire type. -INLINE const uint8_t *upb_get_v_uint32_t(const uint8_t *buf, const uint8_t *end, - uint32_t *val, upb_status *status) +INLINE int8_t upb_get_v_uint32_t(const uint8_t *buf, uint32_t *val) { uint64_t val64; - const uint8_t *ret = upb_get_v_uint64_t(buf, end, &val64, status); + int8_t ret = upb_get_v_uint64_t(buf, end, &val64, status); *val = (uint32_t)val64; // Discard the high bits. return ret; } -// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). -INLINE const uint8_t *upb_get_f_uint32_t(const uint8_t *buf, const uint8_t *end, - uint32_t *val, upb_status *status) +// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). Caller +// promises that 4 bytes are available at buf. +INLINE void upb_get_f_uint32_t(const uint8_t *buf, uint32_t *val) { - const uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } #if UPB_UNALIGNED_READS_OK *val = *(uint32_t*)buf; #else @@ -62,18 +57,11 @@ INLINE const uint8_t *upb_get_f_uint32_t(const uint8_t *buf, const uint8_t *end, *val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24); #undef SHL #endif - return uint32_end; } // Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). -INLINE const uint8_t *upb_get_f_uint64_t(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status) +INLINE void upb_get_f_uint64_t(const uint8_t *buf uint64_t *val) { - const uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } #if UPB_UNALIGNED_READS_OK *val = *(uint64_t*)buf; #else @@ -82,7 +70,6 @@ INLINE const uint8_t *upb_get_f_uint64_t(const uint8_t *buf, const uint8_t *end, SHL(buf[4], 32) | SHL(buf[5], 40) | SHL(buf[6], 48) | SHL(buf[7], 56); #undef SHL #endif - return uint64_end; } INLINE const uint8_t *upb_skip_v_uint64_t(const uint8_t *buf, @@ -94,40 +81,10 @@ INLINE const uint8_t *upb_skip_v_uint64_t(const uint8_t *buf, for(; buf < (uint8_t*)end && (last & 0x80); buf++) last = *buf; - if(buf >= end && buf <= maxend && (last & 0x80)) { - status->code = UPB_STATUS_NEED_MORE_DATA; - buf = end; - } else if(buf > maxend) { - status->code = UPB_ERROR_UNTERMINATED_VARINT; - buf = end; - } + if(buf > maxend) return -1; return buf; } -INLINE const uint8_t *upb_skip_f_uint32_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - return uint32_end; -} - -INLINE const uint8_t *upb_skip_f_uint64_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - return uint64_end; -} - /* Functions to read .proto values. *******************************************/ // Performs zig-zag decoding, which is used by sint32 and sint64. @@ -136,13 +93,10 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } // Use macros to define a set of two functions for each .proto type: // -// // Reads and converts a .proto value from buf, placing it in d. -// // "end" indicates the end of the current buffer (if the buffer does -// // not contain the entire value UPB_STATUS_NEED_MORE_DATA is returned). -// // On success, a pointer will be returned to the first byte that was -// // not consumed. -// const uint8_t *upb_get_INT32(const uint8_t *buf, const uint8_t *end, -// int32_t *d, upb_status *status); +// // Reads and converts a .proto value from buf, placing it in d. At least +// // 10 bytes must be available at "buf". On success, the number of bytes +// // consumed is returned, otherwise <0. +// const int8_t upb_get_INT32(const uint8_t *buf, int32_t *d); // // // Given an already read wire value s (source), convert it to a .proto // // value and return it. @@ -155,10 +109,9 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } INLINE val_t upb_wvtov_ ## type(wire_t s) #define GET(type, v_or_f, wire_t, val_t, member_name) \ - INLINE const uint8_t *upb_get_ ## type(const uint8_t *buf, const uint8_t *end, \ - val_t *d, upb_status *status) { \ + INLINE const uint8_t *upb_get_ ## type(const uint8_t *buf, val_t *d) { \ wire_t tmp = 0; \ - const uint8_t *ret = upb_get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp, status); \ + const int8_t ret = upb_get_ ## v_or_f ## _ ## wire_t(buf, &tmp); \ *d = upb_wvtov_ ## type(tmp); \ return ret; \ } @@ -197,52 +150,21 @@ T(FLOAT, f, uint32_t, float, _float) { // Parses a 64-bit varint that is known to be >= 2 bytes (the inline version // handles 1 and 2 byte varints). -const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status) +const int8_t upb_get_v_uint64_t_full(const uint8_t *buf uint64_t *val) { - const uint8_t *const maxend = buf + 10; + const uint8_t *const maxend = buf + 9; uint8_t last = 0x80; - *val = 0; int bitpos; - for(bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7) + for(bitpos = 0; buf < (uint8_t*)maxend && (last & 0x80); buf++, bitpos += 7) *val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos; - if(buf >= end && buf <= maxend && (last & 0x80)) { - upb_seterr(status, UPB_STATUS_NEED_MORE_DATA, - "Provided data ended in the middle of a varint.\n"); - buf = end; - } else if(buf > maxend) { - upb_seterr(status, UPB_ERROR_UNTERMINATED_VARINT, - "Varint was unterminated after 10 bytes.\n"); - buf = end; + if(buf >= maxend) { + return -11; } - return buf; } -// Advances buf past the current wire value (of type wt), saving the result in -// outbuf. -static const uint8_t *skip_wire_value(const uint8_t *buf, const uint8_t *end, - upb_wire_type_t wt, upb_status *status) -{ - switch(wt) { - case UPB_WIRE_TYPE_VARINT: - return upb_skip_v_uint64_t(buf, end, status); - case UPB_WIRE_TYPE_64BIT: - return upb_skip_f_uint64_t(buf, end, status); - case UPB_WIRE_TYPE_32BIT: - return upb_skip_f_uint32_t(buf, end, status); - case UPB_WIRE_TYPE_START_GROUP: - // TODO: skip to matching end group. - case UPB_WIRE_TYPE_END_GROUP: - return buf; - default: - status->code = UPB_STATUS_ERROR; - return end; - } -} - static const uint8_t *upb_decode_value(const uint8_t *buf, const uint8_t *end, upb_field_type_t ft, upb_valueptr v, upb_status *status) @@ -276,7 +198,7 @@ static const uint8_t *upb_decode_value(const uint8_t *buf, const uint8_t *end, typedef struct { upb_msgdef *msgdef; upb_fielddef *field; - int32_t end_offset; // For groups, 0. + int32_t end_offset; // For groups, -1. } upb_decoder_frame; struct upb_decoder { @@ -291,13 +213,27 @@ struct upb_decoder { // The current buffer. upb_string *buf; + // The overflow buffer. Used when fewer than UPB_MAX_ENCODED_SIZE bytes + // are left in a buffer, the remaining bytes are copied here along with + // the bytes from the next buffer (or 0x80 if the byte stream is EOF). + uint8_t overflow_buf[UPB_MAX_ENCODED_SIZE]; + + // The number of bytes we have yet to consume from this buffer. + uint32_t buf_bytes_remaining; + // The overall stream offset of the beginning of this buffer. - uint32_t stream_offset; + uint32_t buf_stream_offset; + + // Indicates that we are in the middle of skipping bytes or groups (or both). + // If both are set, the byte-skipping needs to happen first. + uint8_t skip_groups; + uint32_t skip_bytes; - // The current offset in this buffer. - uint32_t buffer_offset; + bool eof; }; +/* upb_decoder construction/destruction. **************************************/ + upb_decoder *upb_decoder_new(upb_msgdef *msgdef) { upb_decoder *d = malloc(sizeof(*d)); @@ -322,7 +258,9 @@ void upb_decoder_reset(upb_decoder *d, upb_sink *sink) d->top->end_offset = 0; } +/* upb_decoder buffering. *****************************************************/ +bool upb_decoder_get_v_uint32_t(upb_decoder *d, uint32_t *val) {} static const void *get_msgend(upb_decoder *d, const uint8_t *start) { @@ -337,146 +275,110 @@ static bool isgroup(const void *submsg_end) return submsg_end == (void*)UINTPTR_MAX; } -extern upb_wire_type_t upb_expected_wire_types[]; -// Returns true if wt is the correct on-the-wire type for ft. -INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { - // This doesn't currently support packed arrays. - return upb_types[ft].expected_wire_type == wt; -} - - -// Pushes a new stack frame for a submessage with the given len (which will -// be zero if the submessage is a group). -static const uint8_t *push(upb_decoder *d, const uint8_t *start, - uint32_t submsg_len, upb_fielddef *f, - upb_status *status) +upb_fielddef *upb_decoder_getdef(upb_decoder *d) { - d->top->field = f; - d->top++; - if(d->top >= d->limit) { - upb_seterr(status, UPB_ERROR_MAX_NESTING_EXCEEDED, - "Nesting exceeded maximum (%d levels)\n", - UPB_MAX_NESTING); + // Detect end-of-submessage. + if(offset >= d->top->end_offset) { + d->eof = true; return NULL; } - upb_decoder_frame *frame = d->top; - frame->end_offset = d->completed_offset + submsg_len; - frame->msgdef = upb_downcast_msgdef(f->def); - upb_sink_onstart(d->sink, f, status); - return get_msgend(d, start); -} + // Handles the packed field case. + if(d->field) return d->field; + if(d->eof) return NULL; -// Pops a stack frame, returning a pointer for where the next submsg should -// end (or a pointer that is out of range for a group). -static const void *pop(upb_decoder *d, const uint8_t *start, upb_status *status) -{ - d->top--; - upb_sink_onend(d->sink, d->top->field, status); - return get_msgend(d, start); -} - -// Parses a tag, places the result in *tag. -upb_fielddef *upb_decoder_src_getdef(upb_decoder *d) -{ +again: uint32_t key; - upb_fill_buffer(d); - d->buf = upb_get_v_uint32_t(d->buf, d->end, &key, &d->status); - if (!upb_ok(status)) return NULL; + if(!upb_decoder_get_v_uint32_t(d, &key)) return NULL; if(upb_wiretype_from_key(key) == UPB_WIRE_TYPE_END_GROUP) { - if(!isgroup(d->submsg_end)) { + if(isgroup(d->top->submsg_end)) { + d->eof = true; + d->status->code = UPB_STATUS_EOF; + } else { upb_seterr(d->status, UPB_STATUS_ERROR, "End group seen but current " "message is not a group, byte offset: %zd", - d->completed_offset + (completed - start)); - return NULL; + upb_decoder_offset(d)); } - submsg_end = pop(d, start, status); - msgdef = d->top->msgdef; - completed = buf; return NULL; } + + // For delimited wire values we parse the length now, since we need it in all + // cases. + if(d->key.wire_type == UPB_WIRE_TYPE_DELIMITED) { + if(!upb_decoder_get_v_uint32_t(d, &d->delim_len)) return NULL; + } + // Look up field by tag number. - return upb_msg_itof(d->top->msgdef, upb_fieldnum_from_key(key)); + upb_fielddef *f = upb_msg_itof(d->top->msgdef, upb_fieldnum_from_key(key)); + + if (!f || !upb_check_type(upb_wiretype_from_key(key), f->type)) { + // Unknown field or incorrect wire type. In the future these cases may be + // separated, like if we want to give the client unknown fields but not + // incorrect fields. + upb_decoder_skipval(d); + goto again; + } + return f; } -bool upb_decoder_src_getval(upb_src *src, upb_valueptr val) +bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) { - if(upb_wiretype_from_key(d->key) == UPB_WIRE_TYPE_DELIMITED) { - int32_t delim_len; - d->buf = upb_get_INT32(d->buf, d->end, &delim_len, &d->status); - CHECK_STATUS(); // Checking decode_tag() and upb_get_INT32(). - int32_t needed = - const uint8_t *delim_end = buf + delim_len; - if(f->type == UPB_TYPE(MESSAGE)) { - submsg_end = push(d, start, delim_end - start, f, status); - msgdef = d->top->msgdef; - } else { - if(f && upb_isstringtype(f->type)) { - int32_t str_start = buf - start; - uint32_t len = str_start + delim_len; - sink_status = upb_sink_onstr(d->sink, f, str, str_start, len, status); - } // else { TODO: packed arrays } - // If field was not found, it is skipped silently. - buf = delim_end; // Could be >end. + if(upb_isstringtype(d->f->type)) { + d->str = upb_string_tryrecycle(d->str); + if (d->delimited_len <= d->bytes_left) { + upb_string_substr(d->str, d->buf, upb_string_len(d->buf) - d->bytes_left, d->delimited_len); } } else { - if(!upb_check_type(tag.wire_type, f->type)) { - buf = skip_wire_value(buf, end, tag.wire_type, status); - } else if (f->type == UPB_TYPE(GROUP)) { - submsg_end = push(d, start, 0, f, status); - msgdef = d->top->msgdef; - } else { - upb_value val; - buf = upb_decode_value(buf, end, f->type, upb_value_addrof(&val), - status); - CHECK_STATUS(); // Checking upb_decode_value(). - sink_status = upb_sink_onvalue(d->sink, f, val, status); - } + //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + buf = upb_decode_value(buf, end, f->type, val, &d->status); } - CHECK_STATUS(); +} - while(buf >= submsg_end) { - if(buf > submsg_end) { - upb_seterr(status, UPB_STATUS_ERROR, "Expected submsg end offset " - "did not lie on a tag/value boundary."); - goto err; - } - submsg_end = pop(d, start, status); - msgdef = d->top->msgdef; +bool upb_decoder_skipval(upb_decoder *d) { + switch(d->key.wire_type) { + case UPB_WIRE_TYPE_VARINT: + return upb_skip_v_uint64_t(buf, end, status); + case UPB_WIRE_TYPE_64BIT: + return upb_skip_bytes(8); + case UPB_WIRE_TYPE_32BIT: + return upb_skip_bytes(4); + case UPB_WIRE_TYPE_START_GROUP: + return upb_skip_groups(1); + case UPB_WIRE_TYPE_DELIMITED: + return upb_skip_bytes(d->delimited_len); + default: + // Including UPB_WIRE_TYPE_END_GROUP. + assert(false); + upb_seterr(&d->status, UPB_STATUS_ERROR, "Tried to skip an end group"); + return false; } - // while(buf < d->packed_end) { TODO: packed arrays } - completed = buf; } - // buf is our current offset, moves from start to end. - const uint8_t *buf = (uint8_t*)upb_string_getrobuf(str); - const uint8_t *const start = buf; // ptr equivalent of d->completed_offset - const uint8_t *const end = buf + upb_strlen(str); - - // When we have fully decoded a tag/value pair, we advance this. - const uint8_t *completed = buf; - - const uint8_t *submsg_end = get_msgend(d, start); - upb_msgdef *msgdef = d->top->msgdef; - upb_sink_status sink_status = UPB_SINK_CONTINUE; - - // We need to check the status of operations that can fail, but we do so as - // late as possible to avoid introducing branches that have to wait on - // (status->code) which must be loaded from memory. We must always check - // before calling a user callback. -#define CHECK_STATUS() do { if(!upb_ok(status)) goto err; } while(0) - - // Main loop: executed once per tag/field pair. - while(sink_status == UPB_SINK_CONTINUE && buf < end) { - // Parse/handle tag. - upb_tag tag; - buf = decode_tag(buf, end, &tag, status); - // Parse/handle field. +bool upb_decoder_startmsg(upb_src *src) { + } else if(f->type == UPB_TYPE(MESSAGE)) { + submsg_end = push(d, start, delim_end - start, f, status); + msgdef = d->top->msgdef; + } else if (f->type == UPB_TYPE(GROUP)) { + submsg_end = push(d, start, 0, f, status); + msgdef = d->top->msgdef; + d->top->field = f; + d->top++; + if(d->top >= d->limit) { + upb_seterr(status, UPB_ERROR_MAX_NESTING_EXCEEDED, + "Nesting exceeded maximum (%d levels)\n", + UPB_MAX_NESTING); + return NULL; } + upb_decoder_frame *frame = d->top; + frame->end_offset = d->completed_offset + submsg_len; + frame->msgdef = upb_downcast_msgdef(f->def); - size_t read; -err: - read = (char*)completed - (char*)start; - d->completed_offset += read; - return read; + return get_msgend(d, start); } + +bool upb_decoder_endmsg(upb_decoder *src) { + d->top--; +} + +upb_status *upb_decoder_status(upb_decoder *d) { return &d->status; } + diff --git a/src/upb_encoder.c b/src/upb_encoder.c index f1156a8..304a423 100644 --- a/src/upb_encoder.c +++ b/src/upb_encoder.c @@ -38,9 +38,6 @@ static size_t upb_f_uint32_t_size(uint32_t val) { return sizeof(uint32_t); } -// The biggest possible single value is a 10-byte varint. -#define UPB_MAX_ENCODED_SIZE 10 - /* Functions to write wire values. ********************************************/ -- cgit v1.2.3 From a8d3f8e54388467c8b38c23e736553af9b2f88ec Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 5 Jun 2010 20:17:29 -0700 Subject: More work on the decoder. --- src/upb.h | 3 + src/upb_decoder.c | 274 ++++++++++++++++++++++++++++-------------------------- src/upb_srcsink.h | 12 ++- 3 files changed, 155 insertions(+), 134 deletions(-) diff --git a/src/upb.h b/src/upb.h index 8c6f599..97fd20d 100644 --- a/src/upb.h +++ b/src/upb.h @@ -284,6 +284,9 @@ enum upb_status_code { // A read or write from a streaming src/sink could not be completed right now. UPB_STATUS_TRYAGAIN = 1, + // A value had an incorrect wire type and will be skipped. + UPB_STATUS_BADWIRETYPE = 2, + // An unrecoverable error occurred. UPB_STATUS_ERROR = -1, diff --git a/src/upb_decoder.c b/src/upb_decoder.c index ed756c2..73f8e9b 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -13,17 +13,12 @@ /* Functions to read wire values. *********************************************/ -// These functions are internal to the decode, but might be moved into an -// internal header file if we at some point in the future opt to do code -// generation, because the generated code would want to inline these functions. -// The same applies to the functions to read .proto values below. - -const int8_t upb_get_v_uint64_t_full(const uint8_t *buf, uint64_t *val); +const int8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val); // Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). Caller promises that >=10 -// bytes are available at buf. Returns the number of bytes consumed, or <0 if +// bytes are available at buf. Returns the number of bytes consumed, or 11 if // the varint was unterminated after 10 bytes. -INLINE int8_t upb_get_v_uint64_t(const uint8_t *buf, uint64_t *val) +INLINE int8_t upb_get_v_uint64(const uint8_t *buf, uint64_t *val) { // We inline this common case (1-byte varints), if that fails we dispatch to // the full (non-inlined) version. @@ -31,24 +26,24 @@ INLINE int8_t upb_get_v_uint64_t(const uint8_t *buf, uint64_t *val) *val = *buf & 0x7f; if(*buf & 0x80) { // Varint is >1 byte. - ret += upb_get_v_uint64_t_full(buf + 1, val); + ret += upb_get_v_uint64_full(buf + 1, val); } return ret; } // Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit // varint is not a true wire type. -INLINE int8_t upb_get_v_uint32_t(const uint8_t *buf, uint32_t *val) +INLINE int8_t upb_get_v_uint32(const uint8_t *buf, uint32_t *val) { uint64_t val64; - int8_t ret = upb_get_v_uint64_t(buf, end, &val64, status); + int8_t ret = upb_get_v_uint64(buf, end, &val64, status); *val = (uint32_t)val64; // Discard the high bits. return ret; } // Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). Caller // promises that 4 bytes are available at buf. -INLINE void upb_get_f_uint32_t(const uint8_t *buf, uint32_t *val) +INLINE void upb_get_f_uint32(const uint8_t *buf, uint32_t *val) { #if UPB_UNALIGNED_READS_OK *val = *(uint32_t*)buf; @@ -60,7 +55,7 @@ INLINE void upb_get_f_uint32_t(const uint8_t *buf, uint32_t *val) } // Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). -INLINE void upb_get_f_uint64_t(const uint8_t *buf uint64_t *val) +INLINE void upb_get_f_uint64(const uint8_t *buf uint64_t *val) { #if UPB_UNALIGNED_READS_OK *val = *(uint64_t*)buf; @@ -72,9 +67,9 @@ INLINE void upb_get_f_uint64_t(const uint8_t *buf uint64_t *val) #endif } -INLINE const uint8_t *upb_skip_v_uint64_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) +INLINE const uint8_t *upb_skip_v_uint64(const uint8_t *buf, + const uint8_t *end, + upb_status *status) { const uint8_t *const maxend = buf + 10; uint8_t last = 0x80; @@ -85,72 +80,9 @@ INLINE const uint8_t *upb_skip_v_uint64_t(const uint8_t *buf, return buf; } -/* Functions to read .proto values. *******************************************/ - -// Performs zig-zag decoding, which is used by sint32 and sint64. -INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } -INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } - -// Use macros to define a set of two functions for each .proto type: -// -// // Reads and converts a .proto value from buf, placing it in d. At least -// // 10 bytes must be available at "buf". On success, the number of bytes -// // consumed is returned, otherwise <0. -// const int8_t upb_get_INT32(const uint8_t *buf, int32_t *d); -// -// // Given an already read wire value s (source), convert it to a .proto -// // value and return it. -// int32_t upb_wvtov_INT32(uint32_t s); -// -// These are the most efficient functions to call if you want to decode a value -// for a known type. - -#define WVTOV(type, wire_t, val_t) \ - INLINE val_t upb_wvtov_ ## type(wire_t s) - -#define GET(type, v_or_f, wire_t, val_t, member_name) \ - INLINE const uint8_t *upb_get_ ## type(const uint8_t *buf, val_t *d) { \ - wire_t tmp = 0; \ - const int8_t ret = upb_get_ ## v_or_f ## _ ## wire_t(buf, &tmp); \ - *d = upb_wvtov_ ## type(tmp); \ - return ret; \ - } - -#define T(type, v_or_f, wire_t, val_t, member_name) \ - WVTOV(type, wire_t, val_t); /* prototype for GET below */ \ - GET(type, v_or_f, wire_t, val_t, member_name) \ - WVTOV(type, wire_t, val_t) - -T(INT32, v, uint32_t, int32_t, int32) { return (int32_t)s; } -T(INT64, v, uint64_t, int64_t, int64) { return (int64_t)s; } -T(UINT32, v, uint32_t, uint32_t, uint32) { return s; } -T(UINT64, v, uint64_t, uint64_t, uint64) { return s; } -T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzdec_32(s); } -T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzdec_64(s); } -T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; } -T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; } -T(SFIXED32, f, uint32_t, int32_t, int32) { return (int32_t)s; } -T(SFIXED64, f, uint64_t, int64_t, int64) { return (int64_t)s; } -T(BOOL, v, uint32_t, bool, _bool) { return (bool)s; } -T(ENUM, v, uint32_t, int32_t, int32) { return (int32_t)s; } -T(DOUBLE, f, uint64_t, double, _double) { - upb_value v; - v.uint64 = s; - return v._double; -} -T(FLOAT, f, uint32_t, float, _float) { - upb_value v; - v.uint32 = s; - return v._float; -} - -#undef WVTOV -#undef GET -#undef T - // Parses a 64-bit varint that is known to be >= 2 bytes (the inline version // handles 1 and 2 byte varints). -const int8_t upb_get_v_uint64_t_full(const uint8_t *buf uint64_t *val) +const int8_t upb_get_v_uint64_full(const uint8_t *buf uint64_t *val) { const uint8_t *const maxend = buf + 9; uint8_t last = 0x80; @@ -165,33 +97,12 @@ const int8_t upb_get_v_uint64_t_full(const uint8_t *buf uint64_t *val) return buf; } -static const uint8_t *upb_decode_value(const uint8_t *buf, const uint8_t *end, - upb_field_type_t ft, upb_valueptr v, - upb_status *status) -{ -#define CASE(t, member_name) \ - case UPB_TYPE(t): return upb_get_ ## t(buf, end, v.member_name, status); - - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - default: return end; - } +// Performs zig-zag decoding, which is used by sint32 and sint64. +INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } +INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } -#undef CASE -} + +/* Functions to read .proto values. *******************************************/ // The decoder keeps a stack with one entry per level of recursion. // upb_decoder_frame is one frame of that stack. @@ -219,7 +130,7 @@ struct upb_decoder { uint8_t overflow_buf[UPB_MAX_ENCODED_SIZE]; // The number of bytes we have yet to consume from this buffer. - uint32_t buf_bytes_remaining; + int32_t buf_bytes_remaining; // The overall stream offset of the beginning of this buffer. uint32_t buf_stream_offset; @@ -260,19 +171,79 @@ void upb_decoder_reset(upb_decoder *d, upb_sink *sink) /* upb_decoder buffering. *****************************************************/ -bool upb_decoder_get_v_uint32_t(upb_decoder *d, uint32_t *val) {} +static void upb_decoder_advancebuf(upb_decoder *d) +{ + // Discard the current buffer if we are done with it, make the next buffer + // current if there is one. + if(d->buf_bytes_remaining <= 0) { + if(d->buf) upb_bytesrc_recycle(d->bytesrc, d->buf); + d->buf = d->nextbuf; + d->nextbuf = NULL; + if(d->buf) d->buf_bytes_remaining += upb_string_len(d->buf); + } +} -static const void *get_msgend(upb_decoder *d, const uint8_t *start) +static void upb_decoder_pullnextbuf(upb_decoder *d) { - if(d->top->end_offset > 0) - return start + (d->top->end_offset - d->completed_offset); - else - return (void*)UINTPTR_MAX; // group. + if(!d->nextbuf && !upb_bytesrc_eof(d->bytesrc)) { // Need another buffer? + // We test the eof flag both before and after the get; checking it + // before lets us short-circuit the get if we are already at eof, + // checking it after makes sure we don't report an error if the get only + // failed because of eof. + if(!(d->nextbuf = upb_bytesrc_get(d->bytesrc)) && + !upb_bytesrc_eof(d->bytesrc)) { + // There was an error in the byte stream, halt the decoder. + upb_copyerr(&d->status, upb_bytesrc_status(d->bytesrc)); + return; + } + } } -static bool isgroup(const void *submsg_end) +static void upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) { - return submsg_end == (void*)UINTPTR_MAX; + d->buf_bytes_remaining -= bytes; + while(d->buf_bytes_remaining < 0) upb_decoder_getbuf(d); +} + +static void upb_decoder_skipgroup(upb_decoder *d) +{ + // This will be mututally recursive if the group has sub-groups. If we + // wanted to handle EAGAIN in the future, this approach would not work; + // we would need to track the group depth explicitly. + while(upb_decoder_getdef(d)) upb_decoder_skipval(d); +} + +static const uint8_t *upb_decoder_getbuf(upb_decoder *d, int32_t *bytes) +{ + if(d->buf_bytes_remaining < 10) { + upb_strlen_t total = 0; + if(d->buf) { + upb_strlen_t len = upb_string_len(d->buf); + memcpy(d->overflow_buf, upb_string_getrobuf(d->buf), len); + total += len; + if(d->nextbuf) { + len = upb_string_len(d->nextbuf); + if(total + len > 10) len = 10 - total; + memcpy(d->overflow_buf + total, upb_string_getrobuf(d->nextbuf, len)); + total += len; + } + } + memset(d->overflow_buf + total, 0x80, 10 - total); + } else { + return upb_string_getrobuf(d->buf) + upb_string_len(d->buf) - + d->buf_bytes_remaining; + } +} + +INLINE static const uint8_t *upb_decoder_getbuf(upb_decoder *d, int32_t *bytes) +{ + if(d->buf_bytes_remaining >= 10) { + *bytes = d->buf_bytes_remaining; + return upb_string_getrobuf(d->buf) + upb_string_len(d->buf) - + d->buf_bytes_remaining; + } else { + return upb_decoder_getbuf_full(d, bytes); + } } upb_fielddef *upb_decoder_getdef(upb_decoder *d) @@ -289,7 +260,7 @@ upb_fielddef *upb_decoder_getdef(upb_decoder *d) again: uint32_t key; - if(!upb_decoder_get_v_uint32_t(d, &key)) return NULL; + if(!upb_decoder_get_v_uint32(d, &key)) return NULL; if(upb_wiretype_from_key(key) == UPB_WIRE_TYPE_END_GROUP) { if(isgroup(d->top->submsg_end)) { d->eof = true; @@ -305,7 +276,7 @@ again: // For delimited wire values we parse the length now, since we need it in all // cases. if(d->key.wire_type == UPB_WIRE_TYPE_DELIMITED) { - if(!upb_decoder_get_v_uint32_t(d, &d->delim_len)) return NULL; + if(!upb_decoder_get_v_uint32(d, &d->delim_len)) return NULL; } // Look up field by tag number. @@ -323,21 +294,61 @@ again: bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) { - if(upb_isstringtype(d->f->type)) { + uint32_t bytes; + if(expected_type_for_field == UPB_DELIMITED) { + // A string, bytes, or a length-delimited submessage. The latter isn't + // technically a string, but can be gotten as one to perform lazy parsing. d->str = upb_string_tryrecycle(d->str); - if (d->delimited_len <= d->bytes_left) { - upb_string_substr(d->str, d->buf, upb_string_len(d->buf) - d->bytes_left, d->delimited_len); + if (d->delimited_len <= d->buf_bytes_remaining) { + // The entire string is inside our current buffer, so we can just + // return a substring of the buffer without copying. + upb_string_substr(d->str, d->buf, + upb_string_len(d->buf) - d->buf_bytes_remaining, + d->delimited_len); + d->buf_bytes_remaining -= d->delimited_len; + *val.str = d->str; + } else { + // The string spans buffers, so we must copy. + memcpy(upb_string_getrwbuf(d->str, len), + upb_string_getrobuf(d->buf) + upb_string_len(d->buf), + bar); + if(!upb_bytesrc_append(d->bytesrc, d->str, len)) goto err; } } else { - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - buf = upb_decode_value(buf, end, f->type, val, &d->status); + // For all of the integer types we need the bytes to be in a single + // contiguous buffer. + const uint8_t *buf = upb_decoder_getbuf(d, &bytes) + switch(expected_type_for_field) { + case UPB_32BIT_VARINT: + if(upb_get_v_uint32(buf, val.uint32) > 10) goto err; + if(f->type == UPB_TYPE(SINT32)) *val.int32 = upb_zzdec_32(*val.int32); + break; + case UPB_64BIT_VARINT: { + if(upb_get_v_uint64(buf, val.uint64) > 5) goto err; + if(f->type == UPB_TYPE(SINT64)) *val.int64 = upb_zzdec_64(*val.int64); + break; + case UPB_64BIT_FIXED: + if(bytes < 8) goto err; + upb_get_f_uint64(buf, val.uint64); + break; + case UPB_32BIT_FIXED: + if(bytes < 4) goto err; + upb_get_f_uint32(buf, val.uint32); + break; + default: + // Including start/end group. + goto err; } + if(non-packed field || packed field that is done) + d->field = NULL; + return true; +err: } bool upb_decoder_skipval(upb_decoder *d) { switch(d->key.wire_type) { case UPB_WIRE_TYPE_VARINT: - return upb_skip_v_uint64_t(buf, end, status); + return upb_skip_v_uint64(buf, end, status); case UPB_WIRE_TYPE_64BIT: return upb_skip_bytes(8); case UPB_WIRE_TYPE_32BIT: @@ -355,19 +366,13 @@ bool upb_decoder_skipval(upb_decoder *d) { } bool upb_decoder_startmsg(upb_src *src) { - } else if(f->type == UPB_TYPE(MESSAGE)) { - submsg_end = push(d, start, delim_end - start, f, status); - msgdef = d->top->msgdef; - } else if (f->type == UPB_TYPE(GROUP)) { - submsg_end = push(d, start, 0, f, status); - msgdef = d->top->msgdef; d->top->field = f; d->top++; if(d->top >= d->limit) { - upb_seterr(status, UPB_ERROR_MAX_NESTING_EXCEEDED, + upb_seterr(d->status, UPB_ERROR_MAX_NESTING_EXCEEDED, "Nesting exceeded maximum (%d levels)\n", UPB_MAX_NESTING); - return NULL; + return false; } upb_decoder_frame *frame = d->top; frame->end_offset = d->completed_offset + submsg_len; @@ -378,6 +383,13 @@ bool upb_decoder_startmsg(upb_src *src) { bool upb_decoder_endmsg(upb_decoder *src) { d->top--; + if(!d->eof) { + if(d->top->f->type == UPB_TYPE(GROUP)) + upb_skip_group(); + else + upb_skip_bytes(foo); + } + d->eof = false; } upb_status *upb_decoder_status(upb_decoder *d) { return &d->status; } diff --git a/src/upb_srcsink.h b/src/upb_srcsink.h index 97b9885..6a60f31 100644 --- a/src/upb_srcsink.h +++ b/src/upb_srcsink.h @@ -70,9 +70,14 @@ upb_status *upb_sink_status(upb_sink *sink); /* upb_bytesrc ****************************************************************/ -// Returns the next string in the stream. The caller does not own a ref on the -// returned string; you must ref it yourself if you want one. -upb_string *upb_bytesrc_get(upb_bytesrc *src); +// Returns the next string in the stream. NULL is returned on error or eof. +// The string must be at least "minlen" bytes long. +// +// A ref is passed to the caller, though the caller is encouraged to pass the +// ref back to the bytesrc with upb_bytesrc_recycle(). This can help reduce +// memory allocation/deallocation. +upb_string *upb_bytesrc_get(upb_bytesrc *src, upb_strlen_t minlen); +void upb_bytesrc_recycle(upb_bytesrc *src, upb_string *str); // Appends the next "len" bytes in the stream in-place to "str". This should // be used when the caller needs to build a contiguous string of the existing @@ -134,6 +139,7 @@ typedef struct { typedef struct { upb_src_vtable *vtbl; + upb_status status; #ifndef NDEBUG int state; // For debug-mode checking of API usage. #endif -- cgit v1.2.3 From cfe0ef08c15d038865e9618af25de76c8304ad9e Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 7 Jun 2010 10:30:36 -0700 Subject: Changes to upb_def to stop using upb_msg. --- src/upb_def.c | 109 +++++++++++++++++++++++++++++++--------------------------- src/upb_def.h | 7 ---- 2 files changed, 58 insertions(+), 58 deletions(-) diff --git a/src/upb_def.c b/src/upb_def.c index 7c9777d..e770025 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -184,39 +184,39 @@ static void unresolveddef_free(struct _upb_unresolveddef *def) { /* upb_fielddef ***************************************************************/ -static void fielddef_init(upb_fielddef *f, - google_protobuf_FieldDescriptorProto *fd) +static upb_fielddef *fielddef_new(upb_src *src) { - f->type = fd->type; - f->label = fd->label; - f->number = fd->number; - f->name = upb_string_getref(fd->name, UPB_REF_FROZEN); + upb_fielddef *f = malloc(sizeof(*f)); f->def = NULL; f->owned = false; - assert(fd->set_flags.has.type_name == upb_hasdef(f)); - if(fd->set_flags.has.type_name) { - f->def = UPB_UPCAST(upb_unresolveddef_new(fd->type_name)); - f->owned = true; + upb_src_startmsg(src); + upb_fielddef *parsed_f; + while((parsed_f = upb_src_getdef(src))) { + switch(parsed_f->field_number) { + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: + CHECK(upb_src_getval(src, &f->type)); + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: + CHECK(upb_src_getval(src, &f->label)); + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM: + CHECK(upb_src_getval(src, &f->number)); + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: + CHECK(upb_src_getval(src, &f->name)); + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPENAME_FIELDNUM: + CHECK(upb_src_getval(src, &f->type_name)); + f->def = UPB_UPCAST(upb_unresolveddef_new(fd->type_name)); + f->owned = true; + } } -} - -static upb_fielddef *fielddef_new(google_protobuf_FieldDescriptorProto *fd) -{ - upb_fielddef *f = malloc(sizeof(*f)); - fielddef_init(f, fd); + upb_src_endmsg(src); + assert((f->def != NULL) == upb_hasdef(f)); return f; } -static void fielddef_uninit(upb_fielddef *f) -{ +static void fielddef_free(upb_fielddef *f) { upb_string_unref(f->name); if(upb_hasdef(f) && f->owned) { upb_def_unref(f->def); } -} - -static void fielddef_free(upb_fielddef *f) { - fielddef_uninit(f); free(f); } @@ -248,21 +248,6 @@ static int compare_fielddefs(const void *e1, const void *e2) { return compare_fields(*(void**)e1, *(void**)e2); } -static int compare_fds(const void *e1, const void *e2) { - upb_fielddef f1, f2; - fielddef_init(&f1, *(void**)e1); - fielddef_init(&f2, *(void**)e2); - int ret = compare_fields(&f1, &f2); - fielddef_uninit(&f1); - fielddef_uninit(&f2); - return ret; -} - -void upb_fielddef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num) -{ - qsort(fds, num, sizeof(*fds), compare_fds); -} - static void fielddef_sort(upb_fielddef **defs, size_t num) { qsort(defs, num, sizeof(*defs), compare_fielddefs); @@ -355,23 +340,45 @@ typedef struct { upb_strptr string; } iton_ent; -static upb_enumdef *enumdef_new(google_protobuf_EnumDescriptorProto *ed, - upb_strptr fqname) +static void insert_enum_value(upb_src *src, upb_enumdef *e) +{ + upb_src_startmsg(src); + int32_t number = -1; + upb_string *name = NULL; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->field_number) { + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: + upb_src_getval(src, &number); + break; + case GOOGLE_PROTOBUF_ENUMVALUDESCRIPTORPROTO_NAME_FIELDNUM: + upb_src_getval(src, &name); + break; + default: + upb_src_skipval(src); + } + } + upb_src_endmsg(src); + ntoi_ent ntoi_ent = {{value->name, 0}, value->number}; + iton_ent iton_ent = {{value->number, 0}, value->name}; + upb_strtable_insert(&e->ntoi, &ntoi_ent.e); + upb_inttable_insert(&e->iton, &iton_ent.e); +} + +static upb_enumdef *enumdef_new(upb_src *src, upb_strptr fqname) { upb_enumdef *e = malloc(sizeof(*e)); upb_def_init(&e->base, UPB_DEF_ENUM, fqname); - int num_values = ed->set_flags.has.value ? - google_protobuf_EnumValueDescriptorProto_array_len(ed->value) : 0; - upb_strtable_init(&e->ntoi, num_values, sizeof(ntoi_ent)); - upb_inttable_init(&e->iton, num_values, sizeof(iton_ent)); - - for(int i = 0; i < num_values; i++) { - google_protobuf_EnumValueDescriptorProto *value = - google_protobuf_EnumValueDescriptorProto_array_get(ed->value, i); - ntoi_ent ntoi_ent = {{value->name, 0}, value->number}; - iton_ent iton_ent = {{value->number, 0}, value->name}; - upb_strtable_insert(&e->ntoi, &ntoi_ent.e); - upb_inttable_insert(&e->iton, &iton_ent.e); + upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent)); + upb_inttable_init(&e->iton, 0, sizeof(iton_ent)); + upb_src_startmsg(src); + + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + if(f->number == GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM) { + insert_enum_value(src, e); + } else { + upb_src_skipval(src); + } } return e; } diff --git a/src/upb_def.h b/src/upb_def.h index 25c7ff6..d4f0352 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -126,13 +126,6 @@ INLINE bool upb_elem_ismm(upb_fielddef *f) { return upb_isstring(f) || upb_issubmsg(f); } -// Internal-only interface for the upb compiler. -// Sorts the given fielddefs in-place, according to what we think is an optimal -// ordering of fields. This can change from upb release to upb release. -struct google_protobuf_FieldDescriptorProto; -void upb_fielddef_sortfds(struct google_protobuf_FieldDescriptorProto **fds, - size_t num); - /* upb_msgdef *****************************************************************/ struct google_protobuf_EnumDescriptorProto; -- cgit v1.2.3 From fbc57ee4882eca6321f8e1f2f5a3b8fae448605b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 7 Jun 2010 17:27:54 -0700 Subject: More work on the decoder. --- LICENSE | 4 +- src/upb_decoder.c | 179 +++++++++++++++++++++++++++++------------------------- src/upb_decoder.h | 27 ++++---- src/upb_srcsink.h | 3 +- 4 files changed, 113 insertions(+), 100 deletions(-) diff --git a/LICENSE b/LICENSE index 159a95a..b8b44d3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ -Copyright (c) 2009, Joshua Haberman -Copyright (c) 2009, Google Inc. +Copyright (c) 2009-2010, Joshua Haberman +Copyright (c) 2009-2010, Google Inc. All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 73f8e9b..58e6bfa 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -18,7 +18,7 @@ const int8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val); // Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). Caller promises that >=10 // bytes are available at buf. Returns the number of bytes consumed, or 11 if // the varint was unterminated after 10 bytes. -INLINE int8_t upb_get_v_uint64(const uint8_t *buf, uint64_t *val) +INLINE uint8_t upb_get_v_uint64(const uint8_t *buf, uint64_t *val) { // We inline this common case (1-byte varints), if that fails we dispatch to // the full (non-inlined) version. @@ -33,7 +33,7 @@ INLINE int8_t upb_get_v_uint64(const uint8_t *buf, uint64_t *val) // Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit // varint is not a true wire type. -INLINE int8_t upb_get_v_uint32(const uint8_t *buf, uint32_t *val) +INLINE uint8_t upb_get_v_uint32(const uint8_t *buf, uint32_t *val) { uint64_t val64; int8_t ret = upb_get_v_uint64(buf, end, &val64, status); @@ -54,7 +54,8 @@ INLINE void upb_get_f_uint32(const uint8_t *buf, uint32_t *val) #endif } -// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). +// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). Caller +// promises that 8 bytes are available at buf. INLINE void upb_get_f_uint64(const uint8_t *buf uint64_t *val) { #if UPB_UNALIGNED_READS_OK @@ -67,9 +68,10 @@ INLINE void upb_get_f_uint64(const uint8_t *buf uint64_t *val) #endif } -INLINE const uint8_t *upb_skip_v_uint64(const uint8_t *buf, - const uint8_t *end, - upb_status *status) +// Skips a varint (wire type: UPB_WIRE_TYPE_VARINT). Caller promises that 10 +// bytes are available at "buf". Returns the number of bytes that were +// skipped. +INLINE const uint8_t *upb_skip_v_uint64(const uint8_t *buf) { const uint8_t *const maxend = buf + 10; uint8_t last = 0x80; @@ -82,7 +84,7 @@ INLINE const uint8_t *upb_skip_v_uint64(const uint8_t *buf, // Parses a 64-bit varint that is known to be >= 2 bytes (the inline version // handles 1 and 2 byte varints). -const int8_t upb_get_v_uint64_full(const uint8_t *buf uint64_t *val) +const uint8_t upb_get_v_uint64_full(const uint8_t *buf uint64_t *val) { const uint8_t *const maxend = buf + 9; uint8_t last = 0x80; @@ -102,7 +104,7 @@ INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } -/* Functions to read .proto values. *******************************************/ +/* upb_decoder ****************************************************************/ // The decoder keeps a stack with one entry per level of recursion. // upb_decoder_frame is one frame of that stack. @@ -113,36 +115,30 @@ typedef struct { } upb_decoder_frame; struct upb_decoder { - // Immutable state of the decoder. + upb_src src; // upb_decoder is a upb_src. + upb_msgdef *toplevel_msgdef; upb_bytesrc *bytesrc; - // State pertaining to a particular decode (resettable). - // Stack entries store the offset where the submsg ends (for groups, 0). + // We keep a stack of messages we have recursed into. upb_decoder_frame stack[UPB_MAX_NESTING], *top, *limit; - // The current buffer. + // The buffers of input data. See buffering code below for details. upb_string *buf; + upb_string *nextbuf; + uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; // Used to bridge buf and nextbuf. - // The overflow buffer. Used when fewer than UPB_MAX_ENCODED_SIZE bytes - // are left in a buffer, the remaining bytes are copied here along with - // the bytes from the next buffer (or 0x80 if the byte stream is EOF). - uint8_t overflow_buf[UPB_MAX_ENCODED_SIZE]; - - // The number of bytes we have yet to consume from this buffer. - int32_t buf_bytes_remaining; + // The number of bytes we have yet to consume from "buf". This can be + // negative if we have skipped more bytes than are in the buffer, or if we + // have started to consume bytes from "nextbuf". + int32_t buf_bytesleft; - // The overall stream offset of the beginning of this buffer. + // The overall stream offset of the end of "buf". If "buf" is NULL, it is as + // if "buf" was the empty string. uint32_t buf_stream_offset; - - // Indicates that we are in the middle of skipping bytes or groups (or both). - // If both are set, the byte-skipping needs to happen first. - uint8_t skip_groups; - uint32_t skip_bytes; - - bool eof; }; + /* upb_decoder construction/destruction. **************************************/ upb_decoder *upb_decoder_new(upb_msgdef *msgdef) @@ -169,12 +165,13 @@ void upb_decoder_reset(upb_decoder *d, upb_sink *sink) d->top->end_offset = 0; } + /* upb_decoder buffering. *****************************************************/ +// Discards the current buffer if we are done with it, make the next buffer +// current if there is one. static void upb_decoder_advancebuf(upb_decoder *d) { - // Discard the current buffer if we are done with it, make the next buffer - // current if there is one. if(d->buf_bytes_remaining <= 0) { if(d->buf) upb_bytesrc_recycle(d->bytesrc, d->buf); d->buf = d->nextbuf; @@ -185,13 +182,9 @@ static void upb_decoder_advancebuf(upb_decoder *d) static void upb_decoder_pullnextbuf(upb_decoder *d) { - if(!d->nextbuf && !upb_bytesrc_eof(d->bytesrc)) { // Need another buffer? - // We test the eof flag both before and after the get; checking it - // before lets us short-circuit the get if we are already at eof, - // checking it after makes sure we don't report an error if the get only - // failed because of eof. - if(!(d->nextbuf = upb_bytesrc_get(d->bytesrc)) && - !upb_bytesrc_eof(d->bytesrc)) { + if(!d->nextbuf) { + d->nextbuf = upb_bytesrc_get(d->bytesrc); + if(!d->nextbuf && !upb_bytesrc_eof(d->bytesrc)) { // There was an error in the byte stream, halt the decoder. upb_copyerr(&d->status, upb_bytesrc_status(d->bytesrc)); return; @@ -202,7 +195,10 @@ static void upb_decoder_pullnextbuf(upb_decoder *d) static void upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) { d->buf_bytes_remaining -= bytes; - while(d->buf_bytes_remaining < 0) upb_decoder_getbuf(d); + while(d->buf_bytes_remaining <= 0) { + upb_decoder_pullnextbuf(d); + upb_decoder_advancebuf(d); + } } static void upb_decoder_skipgroup(upb_decoder *d) @@ -213,31 +209,29 @@ static void upb_decoder_skipgroup(upb_decoder *d) while(upb_decoder_getdef(d)) upb_decoder_skipval(d); } -static const uint8_t *upb_decoder_getbuf(upb_decoder *d, int32_t *bytes) +static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, int32_t *bytes) { - if(d->buf_bytes_remaining < 10) { - upb_strlen_t total = 0; - if(d->buf) { - upb_strlen_t len = upb_string_len(d->buf); - memcpy(d->overflow_buf, upb_string_getrobuf(d->buf), len); - total += len; - if(d->nextbuf) { - len = upb_string_len(d->nextbuf); - if(total + len > 10) len = 10 - total; - memcpy(d->overflow_buf + total, upb_string_getrobuf(d->nextbuf, len)); - total += len; - } - } - memset(d->overflow_buf + total, 0x80, 10 - total); - } else { + upb_decoder_pullnextbuf(d); + upb_decoder_advancebuf(d); + if(d->buf_bytes_remaining >= UPB_MAX_ENCODED_SIZE) { return upb_string_getrobuf(d->buf) + upb_string_len(d->buf) - d->buf_bytes_remaining; + } else { + upb_strlen_t total = 0; + if(d->buf) total += upb_decoder_append(d->buf, total); + if(d->nextbuf) total += upb_decoder_append(d->nextbuf, total); + memset(d->overflow_buf + total, 0x80, UPB_MAX_ENCODED_SIZE - total); } } +// Returns a pointer to a buffer of data that is at least UPB_MAX_ENCODED_SIZE +// bytes long. This buffer contains the next bytes in the stream (even if +// those bytes span multiple buffers). *bytes is set to the number of actual +// stream bytes that are available in the returned buffer. If +// *bytes < UPB_MAX_ENCODED_SIZE, the buffer is padded with 0x80 bytes. INLINE static const uint8_t *upb_decoder_getbuf(upb_decoder *d, int32_t *bytes) { - if(d->buf_bytes_remaining >= 10) { + if(d->buf_bytes_remaining >= UPB_MAX_ENCODED_SIZE) { *bytes = d->buf_bytes_remaining; return upb_string_getrobuf(d->buf) + upb_string_len(d->buf) - d->buf_bytes_remaining; @@ -246,25 +240,31 @@ INLINE static const uint8_t *upb_decoder_getbuf(upb_decoder *d, int32_t *bytes) } } +/* upb_src implementation for upb_decoder. ************************************/ + upb_fielddef *upb_decoder_getdef(upb_decoder *d) { // Detect end-of-submessage. - if(offset >= d->top->end_offset) { + if(upb_decoder_offset(d) >= d->top->end_offset) { d->eof = true; return NULL; } // Handles the packed field case. if(d->field) return d->field; - if(d->eof) return NULL; again: uint32_t key; - if(!upb_decoder_get_v_uint32(d, &key)) return NULL; - if(upb_wiretype_from_key(key) == UPB_WIRE_TYPE_END_GROUP) { + if(!upb_decoder_get_v_uint32(d, &key)) { + return NULL; + + if(d->key.wire_type == UPB_WIRE_TYPE_DELIMITED) { + // For delimited wire values we parse the length now, since we need it in + // all cases. + if(!upb_decoder_get_v_uint32(d, &d->delim_len)) return NULL; + } else if(upb_wiretype_from_key(key) == UPB_WIRE_TYPE_END_GROUP) { if(isgroup(d->top->submsg_end)) { d->eof = true; - d->status->code = UPB_STATUS_EOF; } else { upb_seterr(d->status, UPB_STATUS_ERROR, "End group seen but current " "message is not a group, byte offset: %zd", @@ -273,59 +273,66 @@ again: return NULL; } - // For delimited wire values we parse the length now, since we need it in all - // cases. - if(d->key.wire_type == UPB_WIRE_TYPE_DELIMITED) { - if(!upb_decoder_get_v_uint32(d, &d->delim_len)) return NULL; - } - // Look up field by tag number. upb_fielddef *f = upb_msg_itof(d->top->msgdef, upb_fieldnum_from_key(key)); - if (!f || !upb_check_type(upb_wiretype_from_key(key), f->type)) { - // Unknown field or incorrect wire type. In the future these cases may be - // separated, like if we want to give the client unknown fields but not - // incorrect fields. + if (!f) { + // Unknown field. If/when the upb_src interface supports reporting + // unknown fields we will implement that here. upb_decoder_skipval(d); goto again; + } else if (!upb_check_type(upb_wiretype_from_key(key), f->type)) { + // This is a recoverable error condition. We skip the value but also + // return NULL and report the error. + upb_decoder_skipval(d); + // TODO: better error message. + upb_seterr(&d->status, UPB_STATUS_ERROR, "Incorrect wire type.\n"); + return NULL; } + d->field = f; return f; } bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) { - uint32_t bytes; if(expected_type_for_field == UPB_DELIMITED) { // A string, bytes, or a length-delimited submessage. The latter isn't // technically a string, but can be gotten as one to perform lazy parsing. d->str = upb_string_tryrecycle(d->str); - if (d->delimited_len <= d->buf_bytes_remaining) { + const upb_strlen_t total_len = d->delimited_len; + if (total_len <= d->buf_bytes_remaining) { // The entire string is inside our current buffer, so we can just // return a substring of the buffer without copying. upb_string_substr(d->str, d->buf, upb_string_len(d->buf) - d->buf_bytes_remaining, - d->delimited_len); - d->buf_bytes_remaining -= d->delimited_len; + total_len); + d->buf_bytes_remaining -= total_len *val.str = d->str; } else { - // The string spans buffers, so we must copy. - memcpy(upb_string_getrwbuf(d->str, len), - upb_string_getrobuf(d->buf) + upb_string_len(d->buf), - bar); - if(!upb_bytesrc_append(d->bytesrc, d->str, len)) goto err; + // The string spans buffers, so we must copy from the current buffer, + // the next buffer (if we have one), and finally from the bytesrc. + char *str = upb_string_getrwbuf(d->str, d->); + upb_strlen_t len = 0; + len += upb_decoder_append(d->buf, len, total_len); + if(!upb_decoder_advancebuf(d)) goto err; + if(d->buf) len += upb_decoder_append(d->buf, len, total_len); + if(len < total_len) + if(!upb_bytesrc_append(d->bytesrc, d->str, len - bytes)) goto err; } + d->field = NULL; } else { // For all of the integer types we need the bytes to be in a single // contiguous buffer. + uint32_t bytes; const uint8_t *buf = upb_decoder_getbuf(d, &bytes) switch(expected_type_for_field) { - case UPB_32BIT_VARINT: + case UPB_64BIT_VARINT: if(upb_get_v_uint32(buf, val.uint32) > 10) goto err; - if(f->type == UPB_TYPE(SINT32)) *val.int32 = upb_zzdec_32(*val.int32); + if(f->type == UPB_TYPE(SINT64)) *val.int64 = upb_zzdec_64(*val.int64); break; - case UPB_64BIT_VARINT: { + case UPB_32BIT_VARINT: if(upb_get_v_uint64(buf, val.uint64) > 5) goto err; - if(f->type == UPB_TYPE(SINT64)) *val.int64 = upb_zzdec_64(*val.int64); + if(f->type == UPB_TYPE(SINT32)) *val.int32 = upb_zzdec_32(*val.int32); break; case UPB_64BIT_FIXED: if(bytes < 8) goto err; @@ -338,9 +345,12 @@ bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) default: // Including start/end group. goto err; + } + if(wire_type != UPB_WIRE_TYPE_DELIMITED || + upb_decoder_offset(d) >= d->packed_end_offset) { + d->field = NULL; + } } - if(non-packed field || packed field that is done) - d->field = NULL; return true; err: } @@ -356,6 +366,7 @@ bool upb_decoder_skipval(upb_decoder *d) { case UPB_WIRE_TYPE_START_GROUP: return upb_skip_groups(1); case UPB_WIRE_TYPE_DELIMITED: + // Works for both string/bytes *and* submessages. return upb_skip_bytes(d->delimited_len); default: // Including UPB_WIRE_TYPE_END_GROUP. diff --git a/src/upb_decoder.h b/src/upb_decoder.h index ea20d3d..d40d9fc 100644 --- a/src/upb_decoder.h +++ b/src/upb_decoder.h @@ -1,15 +1,16 @@ /* * upb - a minimalist implementation of protocol buffers. * - * upb_decoder implements a high performance, callback-based, stream-oriented - * decoder (comparable to the SAX model in XML parsers). For parsing protobufs - * into in-memory messages (a more DOM-like model), see the routines in - * upb_msg.h, which are layered on top of this decoder. + * upb_decoder implements a high performance, streaming decoder for protobuf + * data that works by implementing upb_src and getting its data from a + * upb_bytesrc. * - * TODO: the decoder currently does not support returning unknown values. This - * can easily be added when it is needed. + * The decoder does not currently support non-blocking I/O, in the sense that + * if the bytesrc returns UPB_STATUS_TRYAGAIN it is not possible to resume the + * decoder when data becomes available again. Support for this could be added, + * but it would add complexity and perhaps cost efficiency also. * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. */ #ifndef UPB_DECODER_H_ @@ -17,8 +18,8 @@ #include #include -#include "upb.h" -#include "descriptor.h" +#include "upb_def.h" +#include "upb_srcsink.h" #ifdef __cplusplus extern "C" { @@ -33,17 +34,17 @@ typedef struct upb_decoder upb_decoder; // Allocates and frees a upb_decoder, respectively. upb_decoder *upb_decoder_new(upb_msgdef *md); -void upb_decoder_free(upb_decoder *p); +void upb_decoder_free(upb_decoder *d); // Resets the internal state of an already-allocated decoder. This puts it in a // state where it has not seen any data, and expects the next data to be from // the beginning of a new protobuf. Parsers must be reset before they can be // used. A decoder can be reset multiple times. -void upb_decoder_reset(upb_decoder *p, upb_bytesrc *bytesrc); +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc); // Returns a upb_src pointer by which the decoder can be used. The returned -// upb_src is invalidated by upb_decoder_reset(). -upb_src *upb_decoder_getsrc(upb_decoder *p); +// upb_src is invalidated by upb_decoder_reset() or upb_decoder_free(). +upb_src *upb_decoder_getsrc(upb_decoder *d); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/upb_srcsink.h b/src/upb_srcsink.h index 6a60f31..3a57cc8 100644 --- a/src/upb_srcsink.h +++ b/src/upb_srcsink.h @@ -71,7 +71,7 @@ upb_status *upb_sink_status(upb_sink *sink); /* upb_bytesrc ****************************************************************/ // Returns the next string in the stream. NULL is returned on error or eof. -// The string must be at least "minlen" bytes long. +// The string must be at least "minlen" bytes long unless the stream is eof. // // A ref is passed to the caller, though the caller is encouraged to pass the // ref back to the bytesrc with upb_bytesrc_recycle(). This can help reduce @@ -140,6 +140,7 @@ typedef struct { typedef struct { upb_src_vtable *vtbl; upb_status status; + bool eof; #ifndef NDEBUG int state; // For debug-mode checking of API usage. #endif -- cgit v1.2.3 From 0a57d07a07c7cb7d2eee1db0477254d0199eb435 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 7 Jun 2010 18:38:25 -0700 Subject: Skeleton of upb_bytesrc. --- src/upb_byteio.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 src/upb_byteio.h diff --git a/src/upb_byteio.h b/src/upb_byteio.h new file mode 100644 index 0000000..69a28b3 --- /dev/null +++ b/src/upb_byteio.h @@ -0,0 +1,43 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * This file contains upb_bytesrc and upb_bytesink implementations for common + * interfaces like strings, UNIX fds, and FILE*. + * + * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_BYTEIO_H +#define UPB_BYTEIO_H + +#include "upb_srcsink.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_stringsrc **************************************************************/ + +struct upb_stringsrc; +typedef struct upb_stringsrc upb_stringsrc; + +// Create/free a stringsrc. +upb_stringsrc *upb_stringsrc_new(); +void upb_stringsrc_free(upb_stringsrc *s); + +// Resets the stringsrc to a state where it will vend the given string. The +// stringsrc will take a reference on the string, so the caller need not ensure +// that it outlives the stringsrc. A stringsrc can be reset multiple times. +void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); + +// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. +upb_bytesrc *upb_stringsrc_bytesrc(); + + +/* upb_fdsrc ******************************************************************/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif -- cgit v1.2.3 From ed991c3b300d65dbe6fdb6a1d110cec029b0f5be Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 9 Jun 2010 10:31:43 -0700 Subject: More work on upb_decoder. --- src/upb_decoder.c | 112 +++++++++++++++++++++++++++++++----------------------- src/upb_def.h | 3 -- 2 files changed, 64 insertions(+), 51 deletions(-) diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 58e6bfa..5b9e962 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -135,7 +135,7 @@ struct upb_decoder { // The overall stream offset of the end of "buf". If "buf" is NULL, it is as // if "buf" was the empty string. - uint32_t buf_stream_offset; + uint32_t buf_endoffset; }; @@ -146,6 +146,8 @@ upb_decoder *upb_decoder_new(upb_msgdef *msgdef) upb_decoder *d = malloc(sizeof(*d)); d->toplevel_msgdef = msgdef; d->limit = &d->stack[UPB_MAX_NESTING]; + d->buf = NULL; + d->nextbuf = NULL; return d; } @@ -154,15 +156,20 @@ void upb_decoder_free(upb_decoder *d) free(d); } -void upb_decoder_reset(upb_decoder *d, upb_sink *sink) +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { + if(d->buf) upb_bytesrc_recycle(d->bytesrc, d->buf); + if(d->nextbuf) upb_bytesrc_recycle(d->bytesrc, d->nextbuf); d->top = d->stack; - d->completed_offset = 0; - d->sink = sink; d->top->msgdef = d->toplevel_msgdef; // The top-level message is not delimited (we can keep receiving data for it // indefinitely), so we treat it like a group. d->top->end_offset = 0; + d->bytesrc = bytesrc; + d->buf = NULL; + d->nextbuf = NULL; + d->buf_bytesleft = 0; + d->buf_endoffset = 0; } @@ -172,11 +179,11 @@ void upb_decoder_reset(upb_decoder *d, upb_sink *sink) // current if there is one. static void upb_decoder_advancebuf(upb_decoder *d) { - if(d->buf_bytes_remaining <= 0) { + if(d->buf_bytesleft <= 0) { if(d->buf) upb_bytesrc_recycle(d->bytesrc, d->buf); d->buf = d->nextbuf; d->nextbuf = NULL; - if(d->buf) d->buf_bytes_remaining += upb_string_len(d->buf); + if(d->buf) d->buf_bytesleft += upb_string_len(d->buf); } } @@ -187,15 +194,16 @@ static void upb_decoder_pullnextbuf(upb_decoder *d) if(!d->nextbuf && !upb_bytesrc_eof(d->bytesrc)) { // There was an error in the byte stream, halt the decoder. upb_copyerr(&d->status, upb_bytesrc_status(d->bytesrc)); - return; + return false; } } + return true; } static void upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) { - d->buf_bytes_remaining -= bytes; - while(d->buf_bytes_remaining <= 0) { + d->buf_bytesleft -= bytes; + while(d->buf_bytesleft <= 0 && !upb_bytesrc_eof(d->bytesrc)) { upb_decoder_pullnextbuf(d); upb_decoder_advancebuf(d); } @@ -213,14 +221,15 @@ static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, int32_t *bytes) { upb_decoder_pullnextbuf(d); upb_decoder_advancebuf(d); - if(d->buf_bytes_remaining >= UPB_MAX_ENCODED_SIZE) { + if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE) { return upb_string_getrobuf(d->buf) + upb_string_len(d->buf) - - d->buf_bytes_remaining; + d->buf_bytesleft; } else { upb_strlen_t total = 0; - if(d->buf) total += upb_decoder_append(d->buf, total); - if(d->nextbuf) total += upb_decoder_append(d->nextbuf, total); - memset(d->overflow_buf + total, 0x80, UPB_MAX_ENCODED_SIZE - total); + if(d->buf) total += upb_decoder_append(d->tmpbuf, d->buf, total); + if(d->nextbuf) total += upb_decoder_append(d->tmpbuf, d->nextbuf, total); + memset(d->tmpbuf + total, 0x80, UPB_MAX_ENCODED_SIZE - total); + return d->tmpbuf; } } @@ -231,10 +240,12 @@ static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, int32_t *bytes) // *bytes < UPB_MAX_ENCODED_SIZE, the buffer is padded with 0x80 bytes. INLINE static const uint8_t *upb_decoder_getbuf(upb_decoder *d, int32_t *bytes) { - if(d->buf_bytes_remaining >= UPB_MAX_ENCODED_SIZE) { - *bytes = d->buf_bytes_remaining; + if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE) { + // The common case; only when we get to the last ten bytes of the buffer + // do we have to do tricky things. + *bytes = d->buf_bytesleft; return upb_string_getrobuf(d->buf) + upb_string_len(d->buf) - - d->buf_bytes_remaining; + d->buf_bytesleft; } else { return upb_decoder_getbuf_full(d, bytes); } @@ -300,52 +311,60 @@ bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) // technically a string, but can be gotten as one to perform lazy parsing. d->str = upb_string_tryrecycle(d->str); const upb_strlen_t total_len = d->delimited_len; - if (total_len <= d->buf_bytes_remaining) { + if (total_len <= d->buf_bytesleft) { // The entire string is inside our current buffer, so we can just // return a substring of the buffer without copying. upb_string_substr(d->str, d->buf, - upb_string_len(d->buf) - d->buf_bytes_remaining, + upb_string_len(d->buf) - d->buf_bytesleft, total_len); - d->buf_bytes_remaining -= total_len + upb_decoder_consume(total_len); *val.str = d->str; } else { // The string spans buffers, so we must copy from the current buffer, // the next buffer (if we have one), and finally from the bytesrc. char *str = upb_string_getrwbuf(d->str, d->); upb_strlen_t len = 0; - len += upb_decoder_append(d->buf, len, total_len); - if(!upb_decoder_advancebuf(d)) goto err; - if(d->buf) len += upb_decoder_append(d->buf, len, total_len); - if(len < total_len) - if(!upb_bytesrc_append(d->bytesrc, d->str, len - bytes)) goto err; + len += upb_decoder_append(str, d->buf, len, total_len); + upb_decoder_advancebuf(d); + if(d->buf) len += upb_decoder_append(str, d->buf, len, total_len); + if(len < total_len) { + if(!upb_bytesrc_append(d->bytesrc, d->str, len - bytes)) { + upb_status_copy(&d->error, upb_bytesrc_status(d->bytesrc)); + return false; + } + } } d->field = NULL; } else { // For all of the integer types we need the bytes to be in a single // contiguous buffer. - uint32_t bytes; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes) + uint32_t bytes_available; + uint32_t bytes_consumed; + const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available) switch(expected_type_for_field) { case UPB_64BIT_VARINT: - if(upb_get_v_uint32(buf, val.uint32) > 10) goto err; + if((bytes_consumed = upb_get_v_uint32(buf, val.uint32)) > 10) goto err; if(f->type == UPB_TYPE(SINT64)) *val.int64 = upb_zzdec_64(*val.int64); break; case UPB_32BIT_VARINT: - if(upb_get_v_uint64(buf, val.uint64) > 5) goto err; + if((bytes_consumed = upb_get_v_uint64(buf, val.uint64)) > 5) goto err; if(f->type == UPB_TYPE(SINT32)) *val.int32 = upb_zzdec_32(*val.int32); break; case UPB_64BIT_FIXED: - if(bytes < 8) goto err; + bytes_consumed = 8; + if(bytes_available < bytes_consumed) goto err; upb_get_f_uint64(buf, val.uint64); break; case UPB_32BIT_FIXED: - if(bytes < 4) goto err; + bytes_consumed = 4; + if(bytes_available < bytes_consumed) goto err; upb_get_f_uint32(buf, val.uint32); break; default: // Including start/end group. goto err; } + upb_decoder_consume(bytes_consumed); if(wire_type != UPB_WIRE_TYPE_DELIMITED || upb_decoder_offset(d) >= d->packed_end_offset) { d->field = NULL; @@ -376,32 +395,29 @@ bool upb_decoder_skipval(upb_decoder *d) { } } -bool upb_decoder_startmsg(upb_src *src) { - d->top->field = f; - d->top++; - if(d->top >= d->limit) { +bool upb_decoder_startmsg(upb_decoder *d) { + d->top->field = d->field; + if(++d->top >= d->limit) { upb_seterr(d->status, UPB_ERROR_MAX_NESTING_EXCEEDED, "Nesting exceeded maximum (%d levels)\n", UPB_MAX_NESTING); return false; } upb_decoder_frame *frame = d->top; - frame->end_offset = d->completed_offset + submsg_len; + frame->end_offset = upb_decoder_offset(d) + d->delimited_len; frame->msgdef = upb_downcast_msgdef(f->def); - - return get_msgend(d, start); + return true; } bool upb_decoder_endmsg(upb_decoder *src) { - d->top--; - if(!d->eof) { - if(d->top->f->type == UPB_TYPE(GROUP)) - upb_skip_group(); - else - upb_skip_bytes(foo); + if(d->top > &d->stack) { + --d->top; + if(!d->eof) { + if(d->top->f->type == UPB_TYPE(GROUP)) + upb_skip_group(); + else + upb_skip_bytes(foo); + } + d->eof = false; } - d->eof = false; } - -upb_status *upb_decoder_status(upb_decoder *d) { return &d->status; } - diff --git a/src/upb_def.h b/src/upb_def.h index d4f0352..dd9dc07 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -128,9 +128,6 @@ INLINE bool upb_elem_ismm(upb_fielddef *f) { /* upb_msgdef *****************************************************************/ -struct google_protobuf_EnumDescriptorProto; -struct google_protobuf_DescriptorProto; - // Structure that describes a single .proto message type. typedef struct _upb_msgdef { upb_def base; -- cgit v1.2.3 From 992a03be55faf83d794b9ec5e8c4ca7e78c08a9b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 9 Jun 2010 15:55:02 -0700 Subject: More decoder work, first attempts at compiling it. --- src/upb.c | 28 ----------------- src/upb.h | 26 ---------------- src/upb_decoder.c | 90 +++++++++++++++++++++++++++++-------------------------- src/upb_def.h | 18 +++++------ src/upb_srcsink.h | 16 ++++++++-- src/upb_string.h | 4 +-- src/upb_table.c | 2 +- src/upb_table.h | 5 ++-- 8 files changed, 77 insertions(+), 112 deletions(-) diff --git a/src/upb.c b/src/upb.c index 5d145e5..938c72d 100644 --- a/src/upb.c +++ b/src/upb.c @@ -10,34 +10,6 @@ #include "upb.h" -#define alignof(t) offsetof(struct { char c; t x; }, x) -#define TYPE_INFO(proto_type, wire_type, ctype) \ - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## proto_type] = \ - {alignof(ctype), sizeof(ctype), wire_type, #ctype}, - -// With packed fields, any type expecting 32-bit, 64-bit or varint can instead -// receive delimited. -upb_type_info upb_types[] = { - TYPE_INFO(DOUBLE, (1<> 3; } -INLINE upb_wiretype_t upb_key_wiretype(upb_key key) { return key & 0x07; } - /* Polymorphic values of .proto types *****************************************/ // INTERNAL-ONLY: never refer to these types with a tag ("union", "struct"). diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 5b9e962..916f0db 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -9,11 +9,10 @@ #include #include #include -#include "upb_def.h" /* Functions to read wire values. *********************************************/ -const int8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val); +int8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val); // Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). Caller promises that >=10 // bytes are available at buf. Returns the number of bytes consumed, or 11 if @@ -22,13 +21,9 @@ INLINE uint8_t upb_get_v_uint64(const uint8_t *buf, uint64_t *val) { // We inline this common case (1-byte varints), if that fails we dispatch to // the full (non-inlined) version. - int8_t ret = 1; *val = *buf & 0x7f; - if(*buf & 0x80) { - // Varint is >1 byte. - ret += upb_get_v_uint64_full(buf + 1, val); - } - return ret; + if((*buf & 0x80) == 0) return 1; + return upb_get_v_uint64_full(buf + 1, val); } // Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit @@ -36,7 +31,7 @@ INLINE uint8_t upb_get_v_uint64(const uint8_t *buf, uint64_t *val) INLINE uint8_t upb_get_v_uint32(const uint8_t *buf, uint32_t *val) { uint64_t val64; - int8_t ret = upb_get_v_uint64(buf, end, &val64, status); + int8_t ret = upb_get_v_uint64(buf, &val64); *val = (uint32_t)val64; // Discard the high bits. return ret; } @@ -56,7 +51,7 @@ INLINE void upb_get_f_uint32(const uint8_t *buf, uint32_t *val) // Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). Caller // promises that 8 bytes are available at buf. -INLINE void upb_get_f_uint64(const uint8_t *buf uint64_t *val) +INLINE void upb_get_f_uint64(const uint8_t *buf, uint64_t *val) { #if UPB_UNALIGNED_READS_OK *val = *(uint64_t*)buf; @@ -71,32 +66,27 @@ INLINE void upb_get_f_uint64(const uint8_t *buf uint64_t *val) // Skips a varint (wire type: UPB_WIRE_TYPE_VARINT). Caller promises that 10 // bytes are available at "buf". Returns the number of bytes that were // skipped. -INLINE const uint8_t *upb_skip_v_uint64(const uint8_t *buf) +INLINE const uint8_t upb_skip_v_uint64(const uint8_t *buf) { const uint8_t *const maxend = buf + 10; uint8_t last = 0x80; - for(; buf < (uint8_t*)end && (last & 0x80); buf++) + for(; buf < maxend && (last & 0x80); buf++) last = *buf; - if(buf > maxend) return -1; - return buf; + return } -// Parses a 64-bit varint that is known to be >= 2 bytes (the inline version -// handles 1 and 2 byte varints). -const uint8_t upb_get_v_uint64_full(const uint8_t *buf uint64_t *val) +// Parses remining bytes of a 64-bit varint that has already had its first byte +// parsed. +const uint8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val) { - const uint8_t *const maxend = buf + 9; - uint8_t last = 0x80; - int bitpos; + uint8_t bytes = 0; - for(bitpos = 0; buf < (uint8_t*)maxend && (last & 0x80); buf++, bitpos += 7) - *val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos; + // bitpos starts at 7 because our caller already read one byte. + for(int bitpos = 7; bytes < 10 && (*buf & 0x80); buf++, bitpos += 7) + *val |= (uint64_t)(*buf & 0x7F) << bitpos; - if(buf >= maxend) { - return -11; - } - return buf; + return bytes; } // Performs zig-zag decoding, which is used by sint32 and sint64. @@ -136,6 +126,12 @@ struct upb_decoder { // The overall stream offset of the end of "buf". If "buf" is NULL, it is as // if "buf" was the empty string. uint32_t buf_endoffset; + + // Fielddef for the key we just read. + upb_fielddef *field; + + // Wire type of the key we just read. + upb_wire_type_t wire_type; }; @@ -187,7 +183,7 @@ static void upb_decoder_advancebuf(upb_decoder *d) } } -static void upb_decoder_pullnextbuf(upb_decoder *d) +static bool upb_decoder_pullnextbuf(upb_decoder *d) { if(!d->nextbuf) { d->nextbuf = upb_bytesrc_get(d->bytesrc); @@ -200,21 +196,28 @@ static void upb_decoder_pullnextbuf(upb_decoder *d) return true; } -static void upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) +static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) { d->buf_bytesleft -= bytes; while(d->buf_bytesleft <= 0 && !upb_bytesrc_eof(d->bytesrc)) { - upb_decoder_pullnextbuf(d); + if(!upb_decoder_pullnextbuf(d)) return false; upb_decoder_advancebuf(d); } + return true; } -static void upb_decoder_skipgroup(upb_decoder *d) +static bool upb_decoder_skipgroup(upb_decoder *d) { - // This will be mututally recursive if the group has sub-groups. If we - // wanted to handle EAGAIN in the future, this approach would not work; - // we would need to track the group depth explicitly. - while(upb_decoder_getdef(d)) upb_decoder_skipval(d); + // This will be mututally recursive with upb_decoder_skipval() if the group + // has sub-groups. If we wanted to handle EAGAIN in the future, this + // approach would not work; we would need to track the group depth + // explicitly. + while(upb_decoder_getdef(d)) { + if(!upb_decoder_skipval(d)) return false; + } + // If we are at the end of the group like we want to be, then + // upb_decoder_getdef() returned NULL because of eof, not error. + return upb_ok(&d->status); } static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, int32_t *bytes) @@ -266,14 +269,16 @@ upb_fielddef *upb_decoder_getdef(upb_decoder *d) again: uint32_t key; + upb_wire_type_t wire_type; if(!upb_decoder_get_v_uint32(d, &key)) { return NULL; + wire_type = key & 0x7; - if(d->key.wire_type == UPB_WIRE_TYPE_DELIMITED) { + if(wire_type == UPB_WIRE_TYPE_DELIMITED) { // For delimited wire values we parse the length now, since we need it in // all cases. if(!upb_decoder_get_v_uint32(d, &d->delim_len)) return NULL; - } else if(upb_wiretype_from_key(key) == UPB_WIRE_TYPE_END_GROUP) { + } else if(wire_type == UPB_WIRE_TYPE_END_GROUP) { if(isgroup(d->top->submsg_end)) { d->eof = true; } else { @@ -285,14 +290,14 @@ again: } // Look up field by tag number. - upb_fielddef *f = upb_msg_itof(d->top->msgdef, upb_fieldnum_from_key(key)); + upb_fielddef *f = upb_msg_itof(d->top->msgdef, key >> 3); if (!f) { // Unknown field. If/when the upb_src interface supports reporting // unknown fields we will implement that here. upb_decoder_skipval(d); goto again; - } else if (!upb_check_type(upb_wiretype_from_key(key), f->type)) { + } else if (!upb_check_type(wire_type, f->type)) { // This is a recoverable error condition. We skip the value but also // return NULL and report the error. upb_decoder_skipval(d); @@ -301,6 +306,7 @@ again: return NULL; } d->field = f; + d->wire_type = wire_type; return f; } @@ -379,14 +385,14 @@ bool upb_decoder_skipval(upb_decoder *d) { case UPB_WIRE_TYPE_VARINT: return upb_skip_v_uint64(buf, end, status); case UPB_WIRE_TYPE_64BIT: - return upb_skip_bytes(8); + return upb_decoder_skipbytes(8); case UPB_WIRE_TYPE_32BIT: - return upb_skip_bytes(4); + return upb_decoder_skipbytes(4); case UPB_WIRE_TYPE_START_GROUP: - return upb_skip_groups(1); + return upb_decoder_skipgroup(); case UPB_WIRE_TYPE_DELIMITED: // Works for both string/bytes *and* submessages. - return upb_skip_bytes(d->delimited_len); + return upb_decoder_skipbytes(d->delimited_len); default: // Including UPB_WIRE_TYPE_END_GROUP. assert(false); diff --git a/src/upb_def.h b/src/upb_def.h index dd9dc07..a571730 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -53,7 +53,7 @@ enum upb_def_type { typedef int8_t upb_def_type_t; typedef struct { - upb_strptr fqname; // Fully qualified. + upb_string *fqname; // Fully qualified. upb_atomic_refcount_t refcount; upb_def_type_t type; @@ -90,7 +90,7 @@ typedef struct _upb_fielddef { upb_field_type_t type; upb_label_t label; upb_field_number_t number; - upb_strptr name; + upb_string *name; upb_value default_value; // These are set only when this fielddef is part of a msgdef. @@ -163,7 +163,7 @@ INLINE upb_fielddef *upb_msg_itof(upb_msgdef *m, uint32_t num) { return e ? e->f : NULL; } -INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_strptr name) { +INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_string *name) { upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); return e ? e->f : NULL; } @@ -179,8 +179,8 @@ typedef struct _upb_enumdef { typedef int32_t upb_enumval_t; // Lookups from name to integer and vice-versa. -bool upb_enumdef_ntoi(upb_enumdef *e, upb_strptr name, upb_enumval_t *num); -upb_strptr upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); +bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num); +upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); // Iteration over name/value pairs. The order is undefined. // upb_enum_iter i; @@ -190,7 +190,7 @@ upb_strptr upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); typedef struct { upb_enumdef *e; void *state; // Internal iteration state. - upb_strptr name; + upb_string *name; upb_enumval_t val; } upb_enum_iter; void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e); @@ -232,11 +232,11 @@ INLINE void upb_symtab_unref(upb_symtab *s) { // // If a def is found, the caller owns one ref on the returned def. Otherwise // returns NULL. -upb_def *upb_symtab_resolve(upb_symtab *s, upb_strptr base, upb_strptr symbol); +upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym); // Find an entry in the symbol table with this exact name. If a def is found, // the caller owns one ref on the returned def. Otherwise returns NULL. -upb_def *upb_symtab_lookup(upb_symtab *s, upb_strptr sym); +upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym); // Gets an array of pointers to all currently active defs in this symtab. The // caller owns the returned array (which is of length *count) as well as a ref @@ -249,7 +249,7 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); // defined in desc). desc may not attempt to define any names that are already // defined in this symtab. Caller retains ownership of desc. status indicates // whether the operation was successful or not, and the error message (if any). -void upb_symtab_add_desc(upb_symtab *s, upb_strptr desc, upb_status *status); +void upb_symtab_add_desc(upb_symtab *s, upb_string *desc, upb_status *status); /* upb_def casts **************************************************************/ diff --git a/src/upb_srcsink.h b/src/upb_srcsink.h index 3a57cc8..4a3d1e3 100644 --- a/src/upb_srcsink.h +++ b/src/upb_srcsink.h @@ -28,6 +28,9 @@ extern "C" { // TODO: decide how to handle unknown fields. +struct upb_src; +typedef struct upb_src upb_src; + // Retrieves the fielddef for the next field in the stream. Returns NULL on // error or end-of-stream. upb_fielddef *upb_src_getdef(upb_src *src); @@ -53,6 +56,9 @@ upb_status *upb_src_status(upb_src *src); /* upb_sink *******************************************************************/ +struct upb_sink; +typedef struct upb_sink upb_sink; + // Puts the given fielddef into the stream. bool upb_sink_putdef(upb_sink *sink, upb_fielddef *def); @@ -70,6 +76,9 @@ upb_status *upb_sink_status(upb_sink *sink); /* upb_bytesrc ****************************************************************/ +struct upb_bytesrc; +typedef struct upb_bytesrc upb_bytesrc; + // Returns the next string in the stream. NULL is returned on error or eof. // The string must be at least "minlen" bytes long unless the stream is eof. // @@ -89,6 +98,9 @@ upb_status *upb_bytesrc_status(upb_src *src); /* upb_bytesink ***************************************************************/ +struct upb_bytesink; +typedef struct upb_bytesink upb_bytesink; + // Puts the given string. Returns the number of bytes that were actually, // consumed, which may be fewer than were in the string, or <0 on error. int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); @@ -137,14 +149,14 @@ typedef struct { // "Base Class" definitions; components that implement these interfaces should // contain one of these structures. -typedef struct { +struct upb_src { upb_src_vtable *vtbl; upb_status status; bool eof; #ifndef NDEBUG int state; // For debug-mode checking of API usage. #endif -} upb_src; +}; INLINE void upb_sink_init(upb_src *s, upb_src_vtable *vtbl) { s->vtbl = vtbl; diff --git a/src/upb_string.h b/src/upb_string.h index c0d14d5..2c0303d 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -44,7 +44,7 @@ typedef struct _upb_string { // Used if this is a slice of another string. struct _upb_string *src; // Used if this string is referencing external unowned memory. - upb_stomic_refcount_t reader_count; + upb_atomic_refcount_t reader_count; } extra; } upb_string; @@ -126,7 +126,7 @@ INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) { // Replaces the contents of "dest" with the contents of "src". INLINE void upb_strcpy(upb_string *dest, upb_string *src) { - upb_strcpylen(dest, upb_string_getrobuf(src), upb_strlen(src)); + upb_strcpylen(dest, upb_string_getrobuf(src), upb_string_len(src)); upb_string_endread(src); } diff --git a/src/upb_table.c b/src/upb_table.c index a477121..51a9f21 100644 --- a/src/upb_table.c +++ b/src/upb_table.c @@ -5,7 +5,7 @@ */ #include "upb_table.h" -#include "upb_data.h" +#include "upb_string.h" #include #include diff --git a/src/upb_table.h b/src/upb_table.h index 122aed3..20dae92 100644 --- a/src/upb_table.h +++ b/src/upb_table.h @@ -17,6 +17,7 @@ #include #include "upb.h" +#include "upb_string.h" #ifdef __cplusplus extern "C" { @@ -38,7 +39,7 @@ typedef struct { // performance by letting us compare hashes before comparing lengths or the // strings themselves. typedef struct { - upb_strptr key; // We own a frozen ref. + upb_string *key; // We own a ref. uint32_t next; // Internal chaining. } upb_strtable_entry; @@ -114,7 +115,7 @@ INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) { return upb_inttable_fastlookup(t, key, t->t.entry_size); } -void *upb_strtable_lookup(upb_strtable *t, upb_strptr key); +void *upb_strtable_lookup(upb_strtable *t, upb_string *key); /* Provides iteration over the table. The order in which the entries are * returned is undefined. Insertions invalidate iterators. The _next -- cgit v1.2.3 From 5743636ad19eafb11eddeefd29f2803052dadff2 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 9 Jun 2010 20:28:44 -0700 Subject: Decoder compiler but doesn't work yet. --- src/upb.c | 8 +++ src/upb.h | 21 +++--- src/upb_decoder.c | 172 ++++++++++++++++++++++++++++++++----------------- src/upb_srcsink.h | 83 +++--------------------- src/upb_srcsink_vtbl.h | 93 ++++++++++++++++++++++++++ src/upb_string.h | 5 +- 6 files changed, 235 insertions(+), 147 deletions(-) create mode 100644 src/upb_srcsink_vtbl.h diff --git a/src/upb.c b/src/upb.c index 938c72d..bd41613 100644 --- a/src/upb.c +++ b/src/upb.c @@ -7,6 +7,7 @@ #include #include +#include #include "upb.h" @@ -21,3 +22,10 @@ void upb_seterr(upb_status *status, enum upb_status_code code, va_end(args); } } + +void upb_copyerr(upb_status *to, upb_status *from) +{ + to->code = from->code; + strcpy(to->msg, from->msg); +} + diff --git a/src/upb.h b/src/upb.h index 4991c50..1681763 100644 --- a/src/upb.h +++ b/src/upb.h @@ -71,7 +71,11 @@ enum upb_wire_type { UPB_WIRE_TYPE_DELIMITED = 2, UPB_WIRE_TYPE_START_GROUP = 3, UPB_WIRE_TYPE_END_GROUP = 4, - UPB_WIRE_TYPE_32BIT = 5 + UPB_WIRE_TYPE_32BIT = 5, + + // This isn't a real wire type, but we use this constant to describe varints + // that are expected to be a maximum of 32 bits. + UPB_WIRE_TYPE_32BIT_VARINT = 8 }; typedef uint8_t upb_wire_type_t; @@ -121,14 +125,8 @@ typedef upb_atomic_refcount_t upb_data; typedef uint32_t upb_strlen_t; -struct upb_norefcount_string; -struct upb_refcounted_string; -typedef union { - // Must be first, for the UPB_STATIC_STRING_PTR_INIT() macro. - struct upb_norefcount_string *norefcount; - struct upb_refcounted_string *refcounted; - upb_data *base; -} upb_strptr; +struct _upb_string; +typedef struct _upb_string upb_string; typedef uint32_t upb_arraylen_t; @@ -149,7 +147,7 @@ typedef union { uint32_t uint32; uint64_t uint64; bool _bool; - upb_strptr str; + upb_string *str; upb_arrayptr arr; upb_msg *msg; upb_data *data; @@ -166,7 +164,7 @@ typedef union { uint32_t *uint32; uint64_t *uint64; bool *_bool; - upb_strptr *str; + upb_string **str; upb_arrayptr *arr; upb_msg **msg; upb_data **data; @@ -290,6 +288,7 @@ INLINE void upb_reset(upb_status *status) { void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...); +void upb_copyerr(upb_status *to, upb_status *from); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 916f0db..5d352c2 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -10,9 +10,16 @@ #include #include +#define UPB_GROUP_END_OFFSET UINT32_MAX + +static bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { + // Fake implementation. + return ft + wt > 3; +} + /* Functions to read wire values. *********************************************/ -int8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val); +static uint8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val); // Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). Caller promises that >=10 // bytes are available at buf. Returns the number of bytes consumed, or 11 if @@ -66,19 +73,19 @@ INLINE void upb_get_f_uint64(const uint8_t *buf, uint64_t *val) // Skips a varint (wire type: UPB_WIRE_TYPE_VARINT). Caller promises that 10 // bytes are available at "buf". Returns the number of bytes that were // skipped. -INLINE const uint8_t upb_skip_v_uint64(const uint8_t *buf) +INLINE uint8_t upb_skip_v_uint64(const uint8_t *buf) { const uint8_t *const maxend = buf + 10; uint8_t last = 0x80; for(; buf < maxend && (last & 0x80); buf++) last = *buf; - return + return 0; // TODO } // Parses remining bytes of a 64-bit varint that has already had its first byte // parsed. -const uint8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val) +static uint8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val) { uint8_t bytes = 0; @@ -101,7 +108,7 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } typedef struct { upb_msgdef *msgdef; upb_fielddef *field; - int32_t end_offset; // For groups, -1. + upb_strlen_t end_offset; // For groups, -1. } upb_decoder_frame; struct upb_decoder { @@ -132,6 +139,14 @@ struct upb_decoder { // Wire type of the key we just read. upb_wire_type_t wire_type; + + // Delimited length of the string field we are reading. + upb_strlen_t delimited_len; + + upb_strlen_t packed_end_offset; + + // String we return for string values. We try to recycle it if possible. + upb_string *str; }; @@ -144,6 +159,7 @@ upb_decoder *upb_decoder_new(upb_msgdef *msgdef) d->limit = &d->stack[UPB_MAX_NESTING]; d->buf = NULL; d->nextbuf = NULL; + d->str = upb_string_new(); return d; } @@ -159,8 +175,9 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) d->top = d->stack; d->top->msgdef = d->toplevel_msgdef; // The top-level message is not delimited (we can keep receiving data for it - // indefinitely), so we treat it like a group. - d->top->end_offset = 0; + // indefinitely), so we set the end offset as high as possible, but not equal + // to UINT32_MAX so it doesn't equal UPB_GROUP_END_OFFSET. + d->top->end_offset = UINT32_MAX - 1; d->bytesrc = bytesrc; d->buf = NULL; d->nextbuf = NULL; @@ -171,6 +188,11 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) /* upb_decoder buffering. *****************************************************/ +static upb_strlen_t upb_decoder_offset(upb_decoder *d) +{ + return d->buf_endoffset - d->buf_bytesleft; +} + // Discards the current buffer if we are done with it, make the next buffer // current if there is one. static void upb_decoder_advancebuf(upb_decoder *d) @@ -186,10 +208,10 @@ static void upb_decoder_advancebuf(upb_decoder *d) static bool upb_decoder_pullnextbuf(upb_decoder *d) { if(!d->nextbuf) { - d->nextbuf = upb_bytesrc_get(d->bytesrc); + d->nextbuf = upb_bytesrc_get(d->bytesrc, UPB_MAX_ENCODED_SIZE); if(!d->nextbuf && !upb_bytesrc_eof(d->bytesrc)) { // There was an error in the byte stream, halt the decoder. - upb_copyerr(&d->status, upb_bytesrc_status(d->bytesrc)); + upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); return false; } } @@ -206,6 +228,9 @@ static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) return true; } +bool upb_decoder_skipval(upb_decoder *d); +upb_fielddef *upb_decoder_getdef(upb_decoder *d); + static bool upb_decoder_skipgroup(upb_decoder *d) { // This will be mututally recursive with upb_decoder_skipval() if the group @@ -217,21 +242,28 @@ static bool upb_decoder_skipgroup(upb_decoder *d) } // If we are at the end of the group like we want to be, then // upb_decoder_getdef() returned NULL because of eof, not error. - return upb_ok(&d->status); + return upb_ok(&d->src.status); } -static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, int32_t *bytes) +upb_strlen_t upb_decoder_append(uint8_t *buf, upb_string *frombuf, + upb_strlen_t len, upb_strlen_t total_len); + +static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) { upb_decoder_pullnextbuf(d); upb_decoder_advancebuf(d); if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE) { - return upb_string_getrobuf(d->buf) + upb_string_len(d->buf) - + *bytes = d->buf_bytesleft; + return (uint8_t*)upb_string_getrobuf(d->buf) + upb_string_len(d->buf) - d->buf_bytesleft; } else { - upb_strlen_t total = 0; - if(d->buf) total += upb_decoder_append(d->tmpbuf, d->buf, total); - if(d->nextbuf) total += upb_decoder_append(d->tmpbuf, d->nextbuf, total); - memset(d->tmpbuf + total, 0x80, UPB_MAX_ENCODED_SIZE - total); + upb_strlen_t len = 0; + if(d->buf) + len += upb_decoder_append(d->tmpbuf, d->buf, len, UPB_MAX_ENCODED_SIZE); + if(d->nextbuf) + len += upb_decoder_append(d->tmpbuf, d->nextbuf, len, UPB_MAX_ENCODED_SIZE); + *bytes = len; + memset(d->tmpbuf + len, 0x80, UPB_MAX_ENCODED_SIZE - len); return d->tmpbuf; } } @@ -241,13 +273,13 @@ static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, int32_t *bytes) // those bytes span multiple buffers). *bytes is set to the number of actual // stream bytes that are available in the returned buffer. If // *bytes < UPB_MAX_ENCODED_SIZE, the buffer is padded with 0x80 bytes. -INLINE static const uint8_t *upb_decoder_getbuf(upb_decoder *d, int32_t *bytes) +INLINE const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes) { if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE) { // The common case; only when we get to the last ten bytes of the buffer // do we have to do tricky things. *bytes = d->buf_bytesleft; - return upb_string_getrobuf(d->buf) + upb_string_len(d->buf) - + return (uint8_t*)upb_string_getrobuf(d->buf) + upb_string_len(d->buf) - d->buf_bytesleft; } else { return upb_decoder_getbuf_full(d, bytes); @@ -256,33 +288,37 @@ INLINE static const uint8_t *upb_decoder_getbuf(upb_decoder *d, int32_t *bytes) /* upb_src implementation for upb_decoder. ************************************/ +bool upb_decoder_get_v_uint32(upb_decoder *d, uint32_t *key); + upb_fielddef *upb_decoder_getdef(upb_decoder *d) { + uint32_t key; + upb_wire_type_t wire_type; + // Detect end-of-submessage. if(upb_decoder_offset(d) >= d->top->end_offset) { - d->eof = true; + d->src.eof = true; return NULL; } // Handles the packed field case. if(d->field) return d->field; -again: - uint32_t key; - upb_wire_type_t wire_type; + again: if(!upb_decoder_get_v_uint32(d, &key)) { return NULL; + } wire_type = key & 0x7; if(wire_type == UPB_WIRE_TYPE_DELIMITED) { // For delimited wire values we parse the length now, since we need it in // all cases. - if(!upb_decoder_get_v_uint32(d, &d->delim_len)) return NULL; + if(!upb_decoder_get_v_uint32(d, &d->delimited_len)) return NULL; } else if(wire_type == UPB_WIRE_TYPE_END_GROUP) { - if(isgroup(d->top->submsg_end)) { - d->eof = true; + if(d->top->end_offset == UPB_GROUP_END_OFFSET) { + d->src.eof = true; } else { - upb_seterr(d->status, UPB_STATUS_ERROR, "End group seen but current " + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "End group seen but current " "message is not a group, byte offset: %zd", upb_decoder_offset(d)); } @@ -302,7 +338,7 @@ again: // return NULL and report the error. upb_decoder_skipval(d); // TODO: better error message. - upb_seterr(&d->status, UPB_STATUS_ERROR, "Incorrect wire type.\n"); + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Incorrect wire type.\n"); return NULL; } d->field = f; @@ -312,30 +348,32 @@ again: bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) { - if(expected_type_for_field == UPB_DELIMITED) { + int expected_type_for_field = 0; + if(expected_type_for_field == UPB_WIRE_TYPE_DELIMITED) { // A string, bytes, or a length-delimited submessage. The latter isn't // technically a string, but can be gotten as one to perform lazy parsing. d->str = upb_string_tryrecycle(d->str); const upb_strlen_t total_len = d->delimited_len; - if (total_len <= d->buf_bytesleft) { + if ((int32_t)total_len <= d->buf_bytesleft) { // The entire string is inside our current buffer, so we can just // return a substring of the buffer without copying. upb_string_substr(d->str, d->buf, upb_string_len(d->buf) - d->buf_bytesleft, total_len); - upb_decoder_consume(total_len); + d->buf_bytesleft -= total_len; *val.str = d->str; } else { // The string spans buffers, so we must copy from the current buffer, // the next buffer (if we have one), and finally from the bytesrc. - char *str = upb_string_getrwbuf(d->str, d->); + uint8_t *str = (uint8_t*)upb_string_getrwbuf(d->str, total_len); upb_strlen_t len = 0; len += upb_decoder_append(str, d->buf, len, total_len); upb_decoder_advancebuf(d); if(d->buf) len += upb_decoder_append(str, d->buf, len, total_len); + upb_string_getrwbuf(d->str, len); // Cheap resize. if(len < total_len) { - if(!upb_bytesrc_append(d->bytesrc, d->str, len - bytes)) { - upb_status_copy(&d->error, upb_bytesrc_status(d->bytesrc)); + if(!upb_bytesrc_append(d->bytesrc, d->str, total_len - len)) { + upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); return false; } } @@ -346,22 +384,22 @@ bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) // contiguous buffer. uint32_t bytes_available; uint32_t bytes_consumed; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available) + const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); switch(expected_type_for_field) { - case UPB_64BIT_VARINT: + case UPB_WIRE_TYPE_VARINT: if((bytes_consumed = upb_get_v_uint32(buf, val.uint32)) > 10) goto err; - if(f->type == UPB_TYPE(SINT64)) *val.int64 = upb_zzdec_64(*val.int64); + if(d->field->type == UPB_TYPE(SINT64)) *val.int64 = upb_zzdec_64(*val.int64); break; - case UPB_32BIT_VARINT: + case UPB_WIRE_TYPE_32BIT_VARINT: if((bytes_consumed = upb_get_v_uint64(buf, val.uint64)) > 5) goto err; - if(f->type == UPB_TYPE(SINT32)) *val.int32 = upb_zzdec_32(*val.int32); + if(d->field->type == UPB_TYPE(SINT32)) *val.int32 = upb_zzdec_32(*val.int32); break; - case UPB_64BIT_FIXED: + case UPB_WIRE_TYPE_64BIT: bytes_consumed = 8; if(bytes_available < bytes_consumed) goto err; upb_get_f_uint64(buf, val.uint64); break; - case UPB_32BIT_FIXED: + case UPB_WIRE_TYPE_32BIT: bytes_consumed = 4; if(bytes_available < bytes_consumed) goto err; upb_get_f_uint32(buf, val.uint32); @@ -370,33 +408,40 @@ bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) // Including start/end group. goto err; } - upb_decoder_consume(bytes_consumed); - if(wire_type != UPB_WIRE_TYPE_DELIMITED || + d->buf_bytesleft -= bytes_consumed; + if(d->wire_type != UPB_WIRE_TYPE_DELIMITED || upb_decoder_offset(d) >= d->packed_end_offset) { d->field = NULL; } } return true; err: + return false; } bool upb_decoder_skipval(upb_decoder *d) { - switch(d->key.wire_type) { - case UPB_WIRE_TYPE_VARINT: - return upb_skip_v_uint64(buf, end, status); + switch(d->wire_type) { + case UPB_WIRE_TYPE_VARINT: { + uint32_t bytes_available; + const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); + uint8_t bytes = upb_skip_v_uint64(buf); + if(bytes > 10) return false; + upb_decoder_skipbytes(d, bytes); + return true; + } case UPB_WIRE_TYPE_64BIT: - return upb_decoder_skipbytes(8); + return upb_decoder_skipbytes(d, 8); case UPB_WIRE_TYPE_32BIT: - return upb_decoder_skipbytes(4); + return upb_decoder_skipbytes(d, 4); case UPB_WIRE_TYPE_START_GROUP: - return upb_decoder_skipgroup(); + return upb_decoder_skipgroup(d); case UPB_WIRE_TYPE_DELIMITED: // Works for both string/bytes *and* submessages. - return upb_decoder_skipbytes(d->delimited_len); + return upb_decoder_skipbytes(d, d->delimited_len); default: // Including UPB_WIRE_TYPE_END_GROUP. assert(false); - upb_seterr(&d->status, UPB_STATUS_ERROR, "Tried to skip an end group"); + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Tried to skip an end group"); return false; } } @@ -404,26 +449,33 @@ bool upb_decoder_skipval(upb_decoder *d) { bool upb_decoder_startmsg(upb_decoder *d) { d->top->field = d->field; if(++d->top >= d->limit) { - upb_seterr(d->status, UPB_ERROR_MAX_NESTING_EXCEEDED, + upb_seterr(&d->src.status, UPB_ERROR_MAX_NESTING_EXCEEDED, "Nesting exceeded maximum (%d levels)\n", UPB_MAX_NESTING); return false; } upb_decoder_frame *frame = d->top; - frame->end_offset = upb_decoder_offset(d) + d->delimited_len; - frame->msgdef = upb_downcast_msgdef(f->def); + frame->msgdef = upb_downcast_msgdef(d->field->def); + if(d->field->type == UPB_TYPE(GROUP)) { + frame->end_offset = UPB_GROUP_END_OFFSET; + } else { + frame->end_offset = upb_decoder_offset(d) + d->delimited_len; + } return true; } -bool upb_decoder_endmsg(upb_decoder *src) { - if(d->top > &d->stack) { +bool upb_decoder_endmsg(upb_decoder *d) { + if(d->top > d->stack) { --d->top; - if(!d->eof) { - if(d->top->f->type == UPB_TYPE(GROUP)) - upb_skip_group(); + if(!d->src.eof) { + if(d->top->field->type == UPB_TYPE(GROUP)) + upb_decoder_skipgroup(d); else - upb_skip_bytes(foo); + upb_decoder_skipbytes(d, d->top->end_offset - upb_decoder_offset(d)); } - d->eof = false; + d->src.eof = false; + return true; + } else { + return false; } } diff --git a/src/upb_srcsink.h b/src/upb_srcsink.h index 4a3d1e3..8e5a09d 100644 --- a/src/upb_srcsink.h +++ b/src/upb_srcsink.h @@ -1,8 +1,6 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - * * This file defines four general-purpose interfaces for pulling/pushing either * protobuf data or bytes: * @@ -13,12 +11,16 @@ * * These interfaces are used as general-purpose glue in upb. For example, the * decoder interface works by implementing a upb_src and calling a upb_bytesrc. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * */ #ifndef UPB_SRCSINK_H #define UPB_SRCSINK_H #include "upb_def.h" +#include "upb_srcsink_vtbl.h" #ifdef __cplusplus extern "C" { @@ -28,9 +30,6 @@ extern "C" { // TODO: decide how to handle unknown fields. -struct upb_src; -typedef struct upb_src upb_src; - // Retrieves the fielddef for the next field in the stream. Returns NULL on // error or end-of-stream. upb_fielddef *upb_src_getdef(upb_src *src); @@ -51,14 +50,12 @@ bool upb_src_startmsg(upb_src *src); // which case the rest of the submessage is skipped. bool upb_src_endmsg(upb_src *src); -// Returns the current error status for the stream. -upb_status *upb_src_status(upb_src *src); +// Returns the current error/eof status for the stream. +INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; } +INLINE bool upb_src_eof(upb_src *src) { return src->eof; } /* upb_sink *******************************************************************/ -struct upb_sink; -typedef struct upb_sink upb_sink; - // Puts the given fielddef into the stream. bool upb_sink_putdef(upb_sink *sink, upb_fielddef *def); @@ -76,9 +73,6 @@ upb_status *upb_sink_status(upb_sink *sink); /* upb_bytesrc ****************************************************************/ -struct upb_bytesrc; -typedef struct upb_bytesrc upb_bytesrc; - // Returns the next string in the stream. NULL is returned on error or eof. // The string must be at least "minlen" bytes long unless the stream is eof. // @@ -94,13 +88,11 @@ void upb_bytesrc_recycle(upb_bytesrc *src, upb_string *str); bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); // Returns the current error status for the stream. -upb_status *upb_bytesrc_status(upb_src *src); +INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } +INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } /* upb_bytesink ***************************************************************/ -struct upb_bytesink; -typedef struct upb_bytesink upb_bytesink; - // Puts the given string. Returns the number of bytes that were actually, // consumed, which may be fewer than were in the string, or <0 on error. int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); @@ -108,63 +100,6 @@ int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); // Returns the current error status for the stream. upb_status *upb_bytesink_status(upb_bytesink *sink); -/* Dynamic Dispatch implementation for src/sink interfaces ********************/ - -// The rest of this file only concerns components that are implementing any of -// the above interfaces. To simple clients the code below should be considered -// private. - -// Typedefs for function pointers to all of the above functions. -typedef upb_fielddef (*upb_src_getdef_fptr)(upb_src *src); -typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); -typedef bool (*upb_src_skipval_fptr)(upb_src *src); -typedef bool (*upb_src_startmsg_fptr)(upb_src *src); -typedef bool (*upb_src_endmsg_fptr)(upb_src *src); -typedef upb_status *(*upb_src_status_fptr)(upb_src *src); - -typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, upb_fielddef *def); -typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); -typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); -typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); -typedef upb_status *(*upb_sink_status_fptr)(upb_sink *sink); - -typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src); -typedef bool (*upb_bytesrc_append_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t len); -typedef upb_status *(*upb_bytesrc_status_fptr)(upb_src *src); - -typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); -typedef upb_status *(*upb_bytesink_status_fptr)(upb_bytesink *sink); - -// Vtables for the above interfaces. -typedef struct { - upb_src_getdef_fptr getdef; - upb_src_getval_fptr getval; - upb_src_skipval_fptr skipval; - upb_src_startmsg_fptr startmsg; - upb_src_endmsg_fptr endmsg; - upb_src_status_fptr status; -} upb_src_vtable; - -// "Base Class" definitions; components that implement these interfaces should -// contain one of these structures. - -struct upb_src { - upb_src_vtable *vtbl; - upb_status status; - bool eof; -#ifndef NDEBUG - int state; // For debug-mode checking of API usage. -#endif -}; - -INLINE void upb_sink_init(upb_src *s, upb_src_vtable *vtbl) { - s->vtbl = vtbl; -#ifndef DEBUG - // TODO: initialize debug-mode checking. -#endif -} - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/upb_srcsink_vtbl.h b/src/upb_srcsink_vtbl.h new file mode 100644 index 0000000..66cd3c2 --- /dev/null +++ b/src/upb_srcsink_vtbl.h @@ -0,0 +1,93 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * vtable declarations for types that are implementing any of the src or sink + * interfaces. Only components that are implementing these interfaces need + * to worry about this file. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_SRCSINK_VTBL_H_ +#define UPB_SRCSINK_VTBL_H_ + +#include "upb_def.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct upb_src; +typedef struct upb_src upb_src; +struct upb_sink; +typedef struct upb_sink upb_sink; +struct upb_bytesrc; +typedef struct upb_bytesrc upb_bytesrc; +struct upb_bytesink; +typedef struct upb_bytesink upb_bytesink; + +// Typedefs for function pointers to all of the virtual functions. +typedef upb_fielddef (*upb_src_getdef_fptr)(upb_src *src); +typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); +typedef bool (*upb_src_skipval_fptr)(upb_src *src); +typedef bool (*upb_src_startmsg_fptr)(upb_src *src); +typedef bool (*upb_src_endmsg_fptr)(upb_src *src); + +typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, upb_fielddef *def); +typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); +typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); +typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); + +typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src); +typedef void (*upb_bytesrc_recycle_fptr)(upb_bytesrc *src, upb_string *str); +typedef bool (*upb_bytesrc_append_fptr)( + upb_bytesrc *src, upb_string *str, upb_strlen_t len); + +typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); + +// Vtables for the above interfaces. +typedef struct { + upb_src_getdef_fptr getdef; + upb_src_getval_fptr getval; + upb_src_skipval_fptr skipval; + upb_src_startmsg_fptr startmsg; + upb_src_endmsg_fptr endmsg; +} upb_src_vtable; + +typedef struct { + upb_bytesrc_get_fptr get; + upb_bytesrc_append_fptr append; + upb_bytesrc_recycle_fptr recycle; +} upb_bytesrc_vtable; + +// "Base Class" definitions; components that implement these interfaces should +// contain one of these structures. + +struct upb_src { + upb_src_vtable *vtbl; + upb_status status; + bool eof; +#ifndef NDEBUG + int state; // For debug-mode checking of API usage. +#endif +}; + +struct upb_bytesrc { + upb_bytesrc_vtable *vtbl; + upb_status status; + bool eof; +}; + +INLINE void upb_sink_init(upb_src *s, upb_src_vtable *vtbl) { + s->vtbl = vtbl; + s->eof = false; +#ifndef DEBUG + // TODO: initialize debug-mode checking. +#endif +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/src/upb_string.h b/src/upb_string.h index 2c0303d..9a3957c 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -28,6 +28,7 @@ #include #include #include "upb_atomic.h" +#include "upb.h" #ifdef __cplusplus extern "C" { @@ -35,7 +36,7 @@ extern "C" { // All members of this struct are private, and may only be read/written through // the associated functions. Also, strings may *only* be allocated on the heap. -typedef struct _upb_string { +struct _upb_string { char *ptr; uint32_t len; uint32_t size; @@ -46,7 +47,7 @@ typedef struct _upb_string { // Used if this string is referencing external unowned memory. upb_atomic_refcount_t reader_count; } extra; -} upb_string; +}; // Returns a newly-created, empty, non-finalized string. When the string is no // longer needed, it should be unref'd, never freed directly. -- cgit v1.2.3 From 563325435753fae01d2b45cceba8b9d14c8a7aad Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 10 Jun 2010 09:14:31 -0700 Subject: Implement proper type checking again. --- src/upb.c | 28 ++++++++++++++++++++++++++++ src/upb.h | 12 ++++++++++++ src/upb_decoder.c | 11 ++++++----- src/upb_string.h | 2 +- 4 files changed, 47 insertions(+), 6 deletions(-) diff --git a/src/upb.c b/src/upb.c index bd41613..189dfe4 100644 --- a/src/upb.c +++ b/src/upb.c @@ -11,6 +11,34 @@ #include "upb.h" +#define alignof(t) offsetof(struct { char c; t x; }, x) +#define TYPE_INFO(wire_type, ctype, allows_delimited) \ + {alignof(ctype), sizeof(ctype), wire_type, \ + (1 << wire_type) | (allows_delimited << UPB_WIRE_TYPE_DELIMITED), \ + #ctype}, + +upb_type_info upb_types[] = { + {0, 0, 0, ""} // There is no type 0. + TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, 1), // DOUBLE + TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, 1), // FLOAT + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1), // INT64 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, 1), // UINT64 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1), // INT32 + TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, 1), // FIXED64 + TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, 1), // FIXED32 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, 1), // BOOL + TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1), // STRING + TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, 0), // GROUP + TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1), // MESSAGE + TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1), // BYTES + TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1), // UINT32 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1), // ENUM + TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, 1), // SFIXED32 + TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, 1), // SFIXED64 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1), // SINT32 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1), // SINT64 +}; + void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...) { diff --git a/src/upb.h b/src/upb.h index 1681763..6bf548c 100644 --- a/src/upb.h +++ b/src/upb.h @@ -98,6 +98,18 @@ INLINE bool upb_isstringtype(upb_field_type_t type) { return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); } +// Info for a given field type. +typedef struct { + uint8_t align; + uint8_t size; + upb_wire_type_t native_wire_type; + uint8_t allowed_wire_types; // For packable fields, also allows delimited. + char *ctype; +} upb_type_info; + +// A static array of info about all of the field types, indexed by type number. +extern upb_type_info upb_types[]; + // The number of a field, eg. "optional string foo = 3". typedef int32_t upb_field_number_t; diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 5d352c2..c54a21a 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -12,9 +12,10 @@ #define UPB_GROUP_END_OFFSET UINT32_MAX +// Returns true if the give wire type and field type combination is valid, +// taking into account both packed and non-packed encodings. static bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { - // Fake implementation. - return ft + wt > 3; + return (1 << wt) & upb_types[ft].allowed_wire_types; } /* Functions to read wire values. *********************************************/ @@ -348,8 +349,8 @@ upb_fielddef *upb_decoder_getdef(upb_decoder *d) bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) { - int expected_type_for_field = 0; - if(expected_type_for_field == UPB_WIRE_TYPE_DELIMITED) { + upb_wire_type_t native_wire_type = upb_types[d->field->type].native_wire_type; + if(native_wire_type == UPB_WIRE_TYPE_DELIMITED) { // A string, bytes, or a length-delimited submessage. The latter isn't // technically a string, but can be gotten as one to perform lazy parsing. d->str = upb_string_tryrecycle(d->str); @@ -385,7 +386,7 @@ bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) uint32_t bytes_available; uint32_t bytes_consumed; const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); - switch(expected_type_for_field) { + switch(native_wire_type) { case UPB_WIRE_TYPE_VARINT: if((bytes_consumed = upb_get_v_uint32(buf, val.uint32)) > 10) goto err; if(d->field->type == UPB_TYPE(SINT64)) *val.int64 = upb_zzdec_64(*val.int64); diff --git a/src/upb_string.h b/src/upb_string.h index 9a3957c..eab7f54 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -70,7 +70,7 @@ INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } // upb_string_endread is called(). No other functions may be called on the // string during this window except upb_string_len(). INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } -INLINE void upb_string_endread(upb_string *str); +INLINE void upb_string_endread(upb_string *str) {} // Attempts to recycle the string "str" so it may be reused and have different // data written to it. The returned string is either "str" if it could be -- cgit v1.2.3 From b3d40eb92f26f9aea7fb82f07901d90d61e00214 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 10 Jun 2010 14:16:06 -0700 Subject: More decoder work. --- src/upb_decoder.c | 179 +++++++++++++++++++++++++++---------------------- src/upb_srcsink_vtbl.h | 2 +- 2 files changed, 100 insertions(+), 81 deletions(-) diff --git a/src/upb_decoder.c b/src/upb_decoder.c index c54a21a..263343c 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -151,42 +151,6 @@ struct upb_decoder { }; -/* upb_decoder construction/destruction. **************************************/ - -upb_decoder *upb_decoder_new(upb_msgdef *msgdef) -{ - upb_decoder *d = malloc(sizeof(*d)); - d->toplevel_msgdef = msgdef; - d->limit = &d->stack[UPB_MAX_NESTING]; - d->buf = NULL; - d->nextbuf = NULL; - d->str = upb_string_new(); - return d; -} - -void upb_decoder_free(upb_decoder *d) -{ - free(d); -} - -void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) -{ - if(d->buf) upb_bytesrc_recycle(d->bytesrc, d->buf); - if(d->nextbuf) upb_bytesrc_recycle(d->bytesrc, d->nextbuf); - d->top = d->stack; - d->top->msgdef = d->toplevel_msgdef; - // The top-level message is not delimited (we can keep receiving data for it - // indefinitely), so we set the end offset as high as possible, but not equal - // to UINT32_MAX so it doesn't equal UPB_GROUP_END_OFFSET. - d->top->end_offset = UINT32_MAX - 1; - d->bytesrc = bytesrc; - d->buf = NULL; - d->nextbuf = NULL; - d->buf_bytesleft = 0; - d->buf_endoffset = 0; -} - - /* upb_decoder buffering. *****************************************************/ static upb_strlen_t upb_decoder_offset(upb_decoder *d) @@ -229,23 +193,6 @@ static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) return true; } -bool upb_decoder_skipval(upb_decoder *d); -upb_fielddef *upb_decoder_getdef(upb_decoder *d); - -static bool upb_decoder_skipgroup(upb_decoder *d) -{ - // This will be mututally recursive with upb_decoder_skipval() if the group - // has sub-groups. If we wanted to handle EAGAIN in the future, this - // approach would not work; we would need to track the group depth - // explicitly. - while(upb_decoder_getdef(d)) { - if(!upb_decoder_skipval(d)) return false; - } - // If we are at the end of the group like we want to be, then - // upb_decoder_getdef() returned NULL because of eof, not error. - return upb_ok(&d->src.status); -} - upb_strlen_t upb_decoder_append(uint8_t *buf, upb_string *frombuf, upb_strlen_t len, upb_strlen_t total_len); @@ -290,6 +237,7 @@ INLINE const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes) /* upb_src implementation for upb_decoder. ************************************/ bool upb_decoder_get_v_uint32(upb_decoder *d, uint32_t *key); +bool upb_decoder_skipval(upb_decoder *d); upb_fielddef *upb_decoder_getdef(upb_decoder *d) { @@ -305,7 +253,7 @@ upb_fielddef *upb_decoder_getdef(upb_decoder *d) // Handles the packed field case. if(d->field) return d->field; - again: +again: if(!upb_decoder_get_v_uint32(d, &key)) { return NULL; } @@ -420,32 +368,7 @@ err: return false; } -bool upb_decoder_skipval(upb_decoder *d) { - switch(d->wire_type) { - case UPB_WIRE_TYPE_VARINT: { - uint32_t bytes_available; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); - uint8_t bytes = upb_skip_v_uint64(buf); - if(bytes > 10) return false; - upb_decoder_skipbytes(d, bytes); - return true; - } - case UPB_WIRE_TYPE_64BIT: - return upb_decoder_skipbytes(d, 8); - case UPB_WIRE_TYPE_32BIT: - return upb_decoder_skipbytes(d, 4); - case UPB_WIRE_TYPE_START_GROUP: - return upb_decoder_skipgroup(d); - case UPB_WIRE_TYPE_DELIMITED: - // Works for both string/bytes *and* submessages. - return upb_decoder_skipbytes(d, d->delimited_len); - default: - // Including UPB_WIRE_TYPE_END_GROUP. - assert(false); - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Tried to skip an end group"); - return false; - } -} +static bool upb_decoder_skipgroup(upb_decoder *d); bool upb_decoder_startmsg(upb_decoder *d) { d->top->field = d->field; @@ -480,3 +403,99 @@ bool upb_decoder_endmsg(upb_decoder *d) { return false; } } + +bool upb_decoder_skipval(upb_decoder *d) { + upb_strlen_t bytes_to_skip; + switch(d->wire_type) { + case UPB_WIRE_TYPE_64BIT: + bytes_to_skip = 8; + break; + case UPB_WIRE_TYPE_32BIT: + bytes_to_skip = 4; + break; + case UPB_WIRE_TYPE_DELIMITED: + // Works for both string/bytes *and* submessages. + bytes_to_skip = d->delimited_len; + break; + case UPB_WIRE_TYPE_VARINT: { + uint32_t bytes_available; + const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); + bytes_to_skip = upb_skip_v_uint64(buf); + if(bytes_to_skip > 10) return false; + break; + } + case UPB_WIRE_TYPE_START_GROUP: + if(!upb_decoder_startmsg(d)) return false; + if(!upb_decoder_skipgroup(d)) return false; + if(!upb_decoder_endmsg(d)) return false; + return true; + default: + // Including UPB_WIRE_TYPE_END_GROUP. + assert(false); + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Tried to skip an end group"); + return false; + } + upb_decoder_skipbytes(d, bytes_to_skip); + return true; +} + +static bool upb_decoder_skipgroup(upb_decoder *d) +{ + // This will be mututally recursive with upb_decoder_skipval() if the group + // has sub-groups. If we wanted to handle EAGAIN in the future, this + // approach would not work; we would need to track the group depth + // explicitly. + while(upb_decoder_getdef(d)) { + if(!upb_decoder_skipval(d)) return false; + } + // If we are at the end of the group like we want to be, then + // upb_decoder_getdef() returned NULL because of eof, not error. + if(!&d->src.eof) return false; + return true; +} + +upb_src_vtable upb_decoder_src_vtbl = { + (upb_src_getdef_fptr)&upb_decoder_getdef, + (upb_src_getval_fptr)&upb_decoder_getval, + (upb_src_skipval_fptr)&upb_decoder_skipval, + (upb_src_startmsg_fptr)&upb_decoder_startmsg, + (upb_src_endmsg_fptr)&upb_decoder_endmsg, +}; + + +/* upb_decoder construction/destruction. **************************************/ + +upb_decoder *upb_decoder_new(upb_msgdef *msgdef) +{ + upb_decoder *d = malloc(sizeof(*d)); + d->toplevel_msgdef = msgdef; + d->limit = &d->stack[UPB_MAX_NESTING]; + d->buf = NULL; + d->nextbuf = NULL; + d->str = upb_string_new(); + upb_src_init(&d->src, &upb_decoder_src_vtbl); + return d; +} + +void upb_decoder_free(upb_decoder *d) +{ + free(d); +} + +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) +{ + if(d->buf) upb_bytesrc_recycle(d->bytesrc, d->buf); + if(d->nextbuf) upb_bytesrc_recycle(d->bytesrc, d->nextbuf); + d->top = d->stack; + d->top->msgdef = d->toplevel_msgdef; + // The top-level message is not delimited (we can keep receiving data for it + // indefinitely), so we set the end offset as high as possible, but not equal + // to UINT32_MAX so it doesn't equal UPB_GROUP_END_OFFSET. + d->top->end_offset = UINT32_MAX - 1; + d->bytesrc = bytesrc; + d->buf = NULL; + d->nextbuf = NULL; + d->buf_bytesleft = 0; + d->buf_endoffset = 0; +} + diff --git a/src/upb_srcsink_vtbl.h b/src/upb_srcsink_vtbl.h index 66cd3c2..45c5825 100644 --- a/src/upb_srcsink_vtbl.h +++ b/src/upb_srcsink_vtbl.h @@ -78,7 +78,7 @@ struct upb_bytesrc { bool eof; }; -INLINE void upb_sink_init(upb_src *s, upb_src_vtable *vtbl) { +INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { s->vtbl = vtbl; s->eof = false; #ifndef DEBUG -- cgit v1.2.3 From 768644817b34e79551aed5cffe7d7312754c5035 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 10 Jun 2010 21:45:37 -0700 Subject: More decoder work. --- src/upb_decoder.c | 274 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 159 insertions(+), 115 deletions(-) diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 263343c..900f5e2 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -18,88 +18,9 @@ static bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { return (1 << wt) & upb_types[ft].allowed_wire_types; } -/* Functions to read wire values. *********************************************/ - -static uint8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val); - -// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). Caller promises that >=10 -// bytes are available at buf. Returns the number of bytes consumed, or 11 if -// the varint was unterminated after 10 bytes. -INLINE uint8_t upb_get_v_uint64(const uint8_t *buf, uint64_t *val) -{ - // We inline this common case (1-byte varints), if that fails we dispatch to - // the full (non-inlined) version. - *val = *buf & 0x7f; - if((*buf & 0x80) == 0) return 1; - return upb_get_v_uint64_full(buf + 1, val); -} - -// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit -// varint is not a true wire type. -INLINE uint8_t upb_get_v_uint32(const uint8_t *buf, uint32_t *val) -{ - uint64_t val64; - int8_t ret = upb_get_v_uint64(buf, &val64); - *val = (uint32_t)val64; // Discard the high bits. - return ret; -} - -// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). Caller -// promises that 4 bytes are available at buf. -INLINE void upb_get_f_uint32(const uint8_t *buf, uint32_t *val) -{ -#if UPB_UNALIGNED_READS_OK - *val = *(uint32_t*)buf; -#else -#define SHL(val, bits) ((uint32_t)val << bits) - *val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24); -#undef SHL -#endif -} - -// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). Caller -// promises that 8 bytes are available at buf. -INLINE void upb_get_f_uint64(const uint8_t *buf, uint64_t *val) -{ -#if UPB_UNALIGNED_READS_OK - *val = *(uint64_t*)buf; -#else -#define SHL(val, bits) ((uint64_t)val << bits) - *val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24) | - SHL(buf[4], 32) | SHL(buf[5], 40) | SHL(buf[6], 48) | SHL(buf[7], 56); -#undef SHL -#endif -} - -// Skips a varint (wire type: UPB_WIRE_TYPE_VARINT). Caller promises that 10 -// bytes are available at "buf". Returns the number of bytes that were -// skipped. -INLINE uint8_t upb_skip_v_uint64(const uint8_t *buf) -{ - const uint8_t *const maxend = buf + 10; - uint8_t last = 0x80; - for(; buf < maxend && (last & 0x80); buf++) - last = *buf; - - return 0; // TODO -} - -// Parses remining bytes of a 64-bit varint that has already had its first byte -// parsed. -static uint8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val) -{ - uint8_t bytes = 0; - - // bitpos starts at 7 because our caller already read one byte. - for(int bitpos = 7; bytes < 10 && (*buf & 0x80); buf++, bitpos += 7) - *val |= (uint64_t)(*buf & 0x7F) << bitpos; - - return bytes; -} - // Performs zig-zag decoding, which is used by sint32 and sint64. -INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } -INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } +static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } +static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } /* upb_decoder ****************************************************************/ @@ -172,7 +93,7 @@ static void upb_decoder_advancebuf(upb_decoder *d) static bool upb_decoder_pullnextbuf(upb_decoder *d) { - if(!d->nextbuf) { + if(!d->nextbuf && !upb_bytesrc_eof(d->bytesrc)) { d->nextbuf = upb_bytesrc_get(d->bytesrc, UPB_MAX_ENCODED_SIZE); if(!d->nextbuf && !upb_bytesrc_eof(d->bytesrc)) { // There was an error in the byte stream, halt the decoder. @@ -193,13 +114,20 @@ static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) return true; } -upb_strlen_t upb_decoder_append(uint8_t *buf, upb_string *frombuf, - upb_strlen_t len, upb_strlen_t total_len); +static upb_strlen_t upb_decoder_append(uint8_t *buf, upb_string *frombuf, + upb_strlen_t len, upb_strlen_t total_len) +{ + upb_strlen_t copy = UPB_MIN(total_len - len, upb_string_len(frombuf)); + //memcpy(buf, upb_string_getrobuf(frombuf) ) + return 0; +} static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) { - upb_decoder_pullnextbuf(d); - upb_decoder_advancebuf(d); + if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE) { + upb_decoder_pullnextbuf(d); + upb_decoder_advancebuf(d); + } if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE) { *bytes = d->buf_bytesleft; return (uint8_t*)upb_string_getrobuf(d->buf) + upb_string_len(d->buf) - @@ -221,7 +149,10 @@ static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) // those bytes span multiple buffers). *bytes is set to the number of actual // stream bytes that are available in the returned buffer. If // *bytes < UPB_MAX_ENCODED_SIZE, the buffer is padded with 0x80 bytes. -INLINE const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes) +// +// After the data has been read, upb_decoder_consume() should be called to +// indicate how many bytes were consumed. +static const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes) { if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE) { // The common case; only when we get to the last ten bytes of the buffer @@ -234,6 +165,116 @@ INLINE const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes) } } +static void upb_decoder_consume(upb_decoder *d, uint32_t bytes) +{ + assert(bytes <= UPB_MAX_ENCODED_SIZE); + //if() + d->buf_bytesleft -= bytes; + //if(d->buf_bytesleft > upb_string_length()) +} + + +/* Functions to read wire values. *********************************************/ + +// Parses remining bytes of a 64-bit varint that has already had its first byte +// parsed. +INLINE bool upb_decoder_readv64(upb_decoder *d, uint32_t *low, uint32_t *high) +{ + upb_strlen_t bytes_available; + const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); + const uint8_t *start = buf; + if(!buf) return false; + + *high = 0; + uint32_t b; + b = *(buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 28; + *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + + if(bytes_available >= 10) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Varint was unterminated " + "after 10 bytes, stream offset: %u", upb_decoder_offset(d)); + } else { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Stream ended in the middle " + "of a varint, stream offset: %u", upb_decoder_offset(d)); + } + return false; + +done: + upb_decoder_consume(d, buf - start); + return true; +} + +// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit +// varint is not a true wire type. +static bool upb_decoder_readv32(upb_decoder *d, uint32_t *val) +{ + uint32_t high; + if(!upb_decoder_readv64(d, val, &high)) return false; + + // We expect the high bits to be zero, except that signed 32-bit values are + // first sign-extended to be wire-compatible with 64 bits, in which case we + // expect the high bits to be all one. + if(high != 0 && ~high != 0) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Read a 32-bit varint, but " + "the high bits contained data we should not truncate: " + "%ux, stream offset: %u", high, upb_decoder_offset(d)); + return false; + } + return true; +} + +// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). Caller +// promises that 4 bytes are available at buf. +static bool upb_decoder_readf32(upb_decoder *d, uint32_t *val) +{ + upb_strlen_t bytes_available; + const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); + if(bytes_available < 4) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, + "Stream ended in the middle of a 32-bit value"); + return false; + } + memcpy(val, buf, 4); + // TODO: byte swap if big-endian. + return true; +} + +// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). Caller +// promises that 8 bytes are available at buf. +static bool upb_decoder_readf64(upb_decoder *d, uint64_t *val) +{ + upb_strlen_t bytes_available; + const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); + if(bytes_available < 8) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, + "Stream ended in the middle of a 64-bit value"); + return false; + } + memcpy(val, buf, 8); + // TODO: byte swap if big-endian. + return true; +} + +// Returns the length of a varint (wire type: UPB_WIRE_TYPE_VARINT), allowing +// it to be easily skipped. Caller promises that 10 bytes are available at +// "buf". The function will return a maximum of 11 bytes before quitting. +static uint8_t upb_varint_length(const uint8_t *buf) +{ + uint8_t i; + for(i = 0; i < 10 && buf[i] & 0x80; i++) + ; // empty loop body. + return i + 1; +} + + /* upb_src implementation for upb_decoder. ************************************/ bool upb_decoder_get_v_uint32(upb_decoder *d, uint32_t *key); @@ -254,15 +295,13 @@ upb_fielddef *upb_decoder_getdef(upb_decoder *d) if(d->field) return d->field; again: - if(!upb_decoder_get_v_uint32(d, &key)) { - return NULL; - } + if(!upb_decoder_readv32(d, &key)) return NULL; wire_type = key & 0x7; if(wire_type == UPB_WIRE_TYPE_DELIMITED) { // For delimited wire values we parse the length now, since we need it in // all cases. - if(!upb_decoder_get_v_uint32(d, &d->delimited_len)) return NULL; + if(!upb_decoder_readv32(d, &d->delimited_len)) return NULL; } else if(wire_type == UPB_WIRE_TYPE_END_GROUP) { if(d->top->end_offset == UPB_GROUP_END_OFFSET) { d->src.eof = true; @@ -329,43 +368,45 @@ bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) } d->field = NULL; } else { - // For all of the integer types we need the bytes to be in a single - // contiguous buffer. - uint32_t bytes_available; - uint32_t bytes_consumed; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); switch(native_wire_type) { - case UPB_WIRE_TYPE_VARINT: - if((bytes_consumed = upb_get_v_uint32(buf, val.uint32)) > 10) goto err; - if(d->field->type == UPB_TYPE(SINT64)) *val.int64 = upb_zzdec_64(*val.int64); + case UPB_WIRE_TYPE_VARINT: { + uint32_t low, high; + if(!upb_decoder_readv64(d, &low, &high)) return false; + uint64_t u64 = ((uint64_t)high << 32) | low; + if(d->field->type == UPB_TYPE(SINT64)) + *val.int64 = upb_zzdec_64(u64); + else + *val.uint64 = u64; break; - case UPB_WIRE_TYPE_32BIT_VARINT: - if((bytes_consumed = upb_get_v_uint64(buf, val.uint64)) > 5) goto err; - if(d->field->type == UPB_TYPE(SINT32)) *val.int32 = upb_zzdec_32(*val.int32); + } + case UPB_WIRE_TYPE_32BIT_VARINT: { + uint32_t u32; + if(!upb_decoder_readv32(d, &u32)) return false; + if(d->field->type == UPB_TYPE(SINT32)) + *val.int32 = upb_zzdec_32(u32); + else + *val.uint32 = u32; break; + } case UPB_WIRE_TYPE_64BIT: - bytes_consumed = 8; - if(bytes_available < bytes_consumed) goto err; - upb_get_f_uint64(buf, val.uint64); + if(!upb_decoder_readf64(d, val.uint64)) return false; break; case UPB_WIRE_TYPE_32BIT: - bytes_consumed = 4; - if(bytes_available < bytes_consumed) goto err; - upb_get_f_uint32(buf, val.uint32); + if(!upb_decoder_readf32(d, val.uint32)) return false; break; default: - // Including start/end group. - goto err; + upb_seterr(&d->src.status, UPB_STATUS_ERROR, + "Attempted to call getval on a group."); + return false; } - d->buf_bytesleft -= bytes_consumed; + // For a packed field where we have not reached the end, we leave the field + // in the decoder so we will return it again without parsing a key. if(d->wire_type != UPB_WIRE_TYPE_DELIMITED || upb_decoder_offset(d) >= d->packed_end_offset) { d->field = NULL; } } return true; -err: - return false; } static bool upb_decoder_skipgroup(upb_decoder *d); @@ -420,8 +461,11 @@ bool upb_decoder_skipval(upb_decoder *d) { case UPB_WIRE_TYPE_VARINT: { uint32_t bytes_available; const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); - bytes_to_skip = upb_skip_v_uint64(buf); - if(bytes_to_skip > 10) return false; + bytes_to_skip = upb_varint_length(buf); + if(bytes_to_skip > 10) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Unterminated varint."); + return false; + } break; } case UPB_WIRE_TYPE_START_GROUP: -- cgit v1.2.3 From d7e631d9b05e19662802ac8f0727adff0a2d9f98 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 10 Jun 2010 22:10:37 -0700 Subject: Yet more. --- src/upb_decoder.c | 56 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 900f5e2..3a1c8f9 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -165,7 +165,7 @@ static const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes) } } -static void upb_decoder_consume(upb_decoder *d, uint32_t bytes) +static bool upb_decoder_consume(upb_decoder *d, uint32_t bytes) { assert(bytes <= UPB_MAX_ENCODED_SIZE); //if() @@ -222,6 +222,11 @@ static bool upb_decoder_readv32(upb_decoder *d, uint32_t *val) // We expect the high bits to be zero, except that signed 32-bit values are // first sign-extended to be wire-compatible with 64 bits, in which case we // expect the high bits to be all one. + // + // We could perform a slightly more sophisticated check by having the caller + // indicate whether a signed or unsigned value is being read. We could check + // that the high bits are all zeros for unsigned, and properly sign-extended + // for signed. if(high != 0 && ~high != 0) { upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Read a 32-bit varint, but " "the high bits contained data we should not truncate: " @@ -237,6 +242,7 @@ static bool upb_decoder_readf32(upb_decoder *d, uint32_t *val) { upb_strlen_t bytes_available; const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); + if(!buf) return false; if(bytes_available < 4) { upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Stream ended in the middle of a 32-bit value"); @@ -244,7 +250,7 @@ static bool upb_decoder_readf32(upb_decoder *d, uint32_t *val) } memcpy(val, buf, 4); // TODO: byte swap if big-endian. - return true; + return upb_decoder_consume(d, 4); } // Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). Caller @@ -253,6 +259,7 @@ static bool upb_decoder_readf64(upb_decoder *d, uint64_t *val) { upb_strlen_t bytes_available; const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); + if(!buf) return false; if(bytes_available < 8) { upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Stream ended in the middle of a 64-bit value"); @@ -260,18 +267,25 @@ static bool upb_decoder_readf64(upb_decoder *d, uint64_t *val) } memcpy(val, buf, 8); // TODO: byte swap if big-endian. - return true; + return upb_decoder_consume(d, 8); } // Returns the length of a varint (wire type: UPB_WIRE_TYPE_VARINT), allowing // it to be easily skipped. Caller promises that 10 bytes are available at // "buf". The function will return a maximum of 11 bytes before quitting. -static uint8_t upb_varint_length(const uint8_t *buf) +static uint8_t upb_decoder_skipv64(upb_decoder *d) { + uint32_t bytes_available; + const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); + if(!buf) return false; uint8_t i; for(i = 0; i < 10 && buf[i] & 0x80; i++) ; // empty loop body. - return i + 1; + if(i > 10) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Unterminated varint."); + return false; + } + return upb_decoder_consume(d, i); } @@ -448,25 +462,8 @@ bool upb_decoder_endmsg(upb_decoder *d) { bool upb_decoder_skipval(upb_decoder *d) { upb_strlen_t bytes_to_skip; switch(d->wire_type) { - case UPB_WIRE_TYPE_64BIT: - bytes_to_skip = 8; - break; - case UPB_WIRE_TYPE_32BIT: - bytes_to_skip = 4; - break; - case UPB_WIRE_TYPE_DELIMITED: - // Works for both string/bytes *and* submessages. - bytes_to_skip = d->delimited_len; - break; case UPB_WIRE_TYPE_VARINT: { - uint32_t bytes_available; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); - bytes_to_skip = upb_varint_length(buf); - if(bytes_to_skip > 10) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Unterminated varint."); - return false; - } - break; + return upb_decoder_skipv64(d); } case UPB_WIRE_TYPE_START_GROUP: if(!upb_decoder_startmsg(d)) return false; @@ -478,9 +475,18 @@ bool upb_decoder_skipval(upb_decoder *d) { assert(false); upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Tried to skip an end group"); return false; + case UPB_WIRE_TYPE_64BIT: + bytes_to_skip = 8; + break; + case UPB_WIRE_TYPE_32BIT: + bytes_to_skip = 4; + break; + case UPB_WIRE_TYPE_DELIMITED: + // Works for both string/bytes *and* submessages. + bytes_to_skip = d->delimited_len; + break; } - upb_decoder_skipbytes(d, bytes_to_skip); - return true; + return upb_decoder_skipbytes(d, bytes_to_skip); } static bool upb_decoder_skipgroup(upb_decoder *d) -- cgit v1.2.3 From edd1f5a61f3323992072fb71b6088f7ec485ff7d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 11 Jun 2010 12:12:39 -0700 Subject: Work on decoder buffering. --- src/upb_decoder.c | 148 +++++++++++++++++++++++++++++------------------------- src/upb_srcsink.h | 5 ++ 2 files changed, 85 insertions(+), 68 deletions(-) diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 3a1c8f9..601c4c8 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -42,19 +42,21 @@ struct upb_decoder { // We keep a stack of messages we have recursed into. upb_decoder_frame stack[UPB_MAX_NESTING], *top, *limit; - // The buffers of input data. See buffering code below for details. + // The buffer of input data. NULL is equivalent to the empty string. upb_string *buf; - upb_string *nextbuf; - uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; // Used to bridge buf and nextbuf. + + // Holds residual bytes when fewer than UPB_MAX_ENCODED_SIZE bytes remain. + uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; // The number of bytes we have yet to consume from "buf". This can be // negative if we have skipped more bytes than are in the buffer, or if we // have started to consume bytes from "nextbuf". int32_t buf_bytesleft; + int32_t buf_offset; // The overall stream offset of the end of "buf". If "buf" is NULL, it is as // if "buf" was the empty string. - uint32_t buf_endoffset; + uint32_t buf_stream_offset; // Fielddef for the key we just read. upb_fielddef *field; @@ -76,29 +78,39 @@ struct upb_decoder { static upb_strlen_t upb_decoder_offset(upb_decoder *d) { - return d->buf_endoffset - d->buf_bytesleft; + return d->buf_stream_offset - d->buf_offset; } -// Discards the current buffer if we are done with it, make the next buffer -// current if there is one. -static void upb_decoder_advancebuf(upb_decoder *d) +static bool upb_decoder_nextbuf(upb_decoder *d) { - if(d->buf_bytesleft <= 0) { - if(d->buf) upb_bytesrc_recycle(d->bytesrc, d->buf); - d->buf = d->nextbuf; - d->nextbuf = NULL; - if(d->buf) d->buf_bytesleft += upb_string_len(d->buf); + assert(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE); + + // Copy residual bytes to temporary buffer. + if(d->buf_bytesleft > 0) { + memcpy(d->tmpbuf, upb_string_getrobuf(d->buf) + d->buf_offset, + d->buf_bytesleft); } -} -static bool upb_decoder_pullnextbuf(upb_decoder *d) -{ - if(!d->nextbuf && !upb_bytesrc_eof(d->bytesrc)) { - d->nextbuf = upb_bytesrc_get(d->bytesrc, UPB_MAX_ENCODED_SIZE); - if(!d->nextbuf && !upb_bytesrc_eof(d->bytesrc)) { - // There was an error in the byte stream, halt the decoder. + // Recycle old buffer, pull new one. + if(d->buf) { + upb_bytesrc_recycle(d->bytesrc, d->buf); + d->buf_offset -= upb_string_len(d->buf); + d->buf_stream_offset += upb_string_len(d->buf); + } + d->buf = upb_bytesrc_get(d->bytesrc, UPB_MAX_ENCODED_SIZE); + + // Handle cases arising from error or EOF. + if(d->buf) { + d->buf_bytesleft += upb_string_len(d->buf); + } else { + if(!upb_bytesrc_eof(d->bytesrc)) { + // Error from bytesrc. upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); return false; + } else if(d->buf_bytesleft == 0) { + // EOF from bytesrc and we don't have any residual bytes left. + d->src.eof = true; + return false; } } return true; @@ -106,11 +118,7 @@ static bool upb_decoder_pullnextbuf(upb_decoder *d) static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) { - d->buf_bytesleft -= bytes; - while(d->buf_bytesleft <= 0 && !upb_bytesrc_eof(d->bytesrc)) { - if(!upb_decoder_pullnextbuf(d)) return false; - upb_decoder_advancebuf(d); - } + // TODO. return true; } @@ -124,22 +132,28 @@ static upb_strlen_t upb_decoder_append(uint8_t *buf, upb_string *frombuf, static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) { - if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE) { - upb_decoder_pullnextbuf(d); - upb_decoder_advancebuf(d); - } - if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE) { + if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE) + if(!upb_decoder_nextbuf(d)) return NULL; + + assert(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE); + + if(d->buf_offset >= 0) { + // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE + // contiguous bytes, so we can read directly out of it. *bytes = d->buf_bytesleft; - return (uint8_t*)upb_string_getrobuf(d->buf) + upb_string_len(d->buf) - - d->buf_bytesleft; + return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset; } else { - upb_strlen_t len = 0; - if(d->buf) - len += upb_decoder_append(d->tmpbuf, d->buf, len, UPB_MAX_ENCODED_SIZE); - if(d->nextbuf) - len += upb_decoder_append(d->tmpbuf, d->nextbuf, len, UPB_MAX_ENCODED_SIZE); - *bytes = len; - memset(d->tmpbuf + len, 0x80, UPB_MAX_ENCODED_SIZE - len); + upb_strlen_t residual_bytes = -d->buf_offset; + if(d->buf) { + memcpy(d->tmpbuf + residual_bytes, upb_string_getrobuf(d->buf), + UPB_MAX_ENCODED_SIZE - residual_bytes); + *bytes = 10; + } else { + // All we have are residual bytes; pad them with 0x80. + memset(d->tmpbuf + residual_bytes, 0x80, + UPB_MAX_ENCODED_SIZE - residual_bytes); + *bytes = residual_bytes; + } return d->tmpbuf; } } @@ -154,12 +168,11 @@ static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) // indicate how many bytes were consumed. static const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes) { - if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE) { - // The common case; only when we get to the last ten bytes of the buffer - // do we have to do tricky things. + if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE && d->buf_offset >= 0) { + // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE + // contiguous bytes, so we can read directly out of it. *bytes = d->buf_bytesleft; - return (uint8_t*)upb_string_getrobuf(d->buf) + upb_string_len(d->buf) - - d->buf_bytesleft; + return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset; } else { return upb_decoder_getbuf_full(d, bytes); } @@ -168,9 +181,13 @@ static const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes) static bool upb_decoder_consume(upb_decoder *d, uint32_t bytes) { assert(bytes <= UPB_MAX_ENCODED_SIZE); - //if() + d->buf_offset += bytes; d->buf_bytesleft -= bytes; - //if(d->buf_bytesleft > upb_string_length()) + if(d->buf_offset < 0) { + // We still have residual bytes we have not consumed. + memmove(d->tmpbuf, d->tmpbuf + bytes, -d->buf_offset); + } + return true; } @@ -208,8 +225,7 @@ INLINE bool upb_decoder_readv64(upb_decoder *d, uint32_t *low, uint32_t *high) return false; done: - upb_decoder_consume(d, buf - start); - return true; + return upb_decoder_consume(d, buf - start); } // Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit @@ -291,7 +307,6 @@ static uint8_t upb_decoder_skipv64(upb_decoder *d) /* upb_src implementation for upb_decoder. ************************************/ -bool upb_decoder_get_v_uint32(upb_decoder *d, uint32_t *key); bool upb_decoder_skipval(upb_decoder *d); upb_fielddef *upb_decoder_getdef(upb_decoder *d) @@ -365,20 +380,20 @@ bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) d->buf_bytesleft -= total_len; *val.str = d->str; } else { - // The string spans buffers, so we must copy from the current buffer, - // the next buffer (if we have one), and finally from the bytesrc. - uint8_t *str = (uint8_t*)upb_string_getrwbuf(d->str, total_len); - upb_strlen_t len = 0; - len += upb_decoder_append(str, d->buf, len, total_len); - upb_decoder_advancebuf(d); - if(d->buf) len += upb_decoder_append(str, d->buf, len, total_len); - upb_string_getrwbuf(d->str, len); // Cheap resize. - if(len < total_len) { - if(!upb_bytesrc_append(d->bytesrc, d->str, total_len - len)) { - upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); - return false; - } - } + //// The string spans buffers, so we must copy from the current buffer, + //// the next buffer (if we have one), and finally from the bytesrc. + //uint8_t *str = (uint8_t*)upb_string_getrwbuf(d->str, total_len); + //upb_strlen_t len = 0; + //len += upb_decoder_append(str, d->buf, len, total_len); + //upb_decoder_advancebuf(d); + //if(d->buf) len += upb_decoder_append(str, d->buf, len, total_len); + //upb_string_getrwbuf(d->str, len); // Cheap resize. + //if(len < total_len) { + // if(!upb_bytesrc_append(d->bytesrc, d->str, total_len - len)) { + // upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); + // return false; + // } + //} } d->field = NULL; } else { @@ -521,7 +536,6 @@ upb_decoder *upb_decoder_new(upb_msgdef *msgdef) d->toplevel_msgdef = msgdef; d->limit = &d->stack[UPB_MAX_NESTING]; d->buf = NULL; - d->nextbuf = NULL; d->str = upb_string_new(); upb_src_init(&d->src, &upb_decoder_src_vtbl); return d; @@ -535,7 +549,6 @@ void upb_decoder_free(upb_decoder *d) void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { if(d->buf) upb_bytesrc_recycle(d->bytesrc, d->buf); - if(d->nextbuf) upb_bytesrc_recycle(d->bytesrc, d->nextbuf); d->top = d->stack; d->top->msgdef = d->toplevel_msgdef; // The top-level message is not delimited (we can keep receiving data for it @@ -544,8 +557,7 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) d->top->end_offset = UINT32_MAX - 1; d->bytesrc = bytesrc; d->buf = NULL; - d->nextbuf = NULL; d->buf_bytesleft = 0; - d->buf_endoffset = 0; + d->buf_stream_offset = 0; + d->buf_offset = 0; } - diff --git a/src/upb_srcsink.h b/src/upb_srcsink.h index 8e5a09d..199149d 100644 --- a/src/upb_srcsink.h +++ b/src/upb_srcsink.h @@ -26,6 +26,11 @@ extern "C" { #endif +// Note! The "eof" flags work like feof() in C; they cannot report end-of-file +// until a read has failed due to eof. They cannot preemptively tell you that +// the next call will fail due to eof. Since these are the semantics that C +// and UNIX provide, we're stuck with them if we want to support eg. stdio. + /* upb_src ********************************************************************/ // TODO: decide how to handle unknown fields. -- cgit v1.2.3 From 61a0f0bc5f6b3ddd8a495a05bf1a06096e99a859 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 11 Jun 2010 18:52:00 -0700 Subject: More decoder work. --- src/upb_decoder.c | 58 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 601c4c8..e6fcec5 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -39,23 +39,22 @@ struct upb_decoder { upb_msgdef *toplevel_msgdef; upb_bytesrc *bytesrc; - // We keep a stack of messages we have recursed into. - upb_decoder_frame stack[UPB_MAX_NESTING], *top, *limit; - // The buffer of input data. NULL is equivalent to the empty string. upb_string *buf; // Holds residual bytes when fewer than UPB_MAX_ENCODED_SIZE bytes remain. uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; - // The number of bytes we have yet to consume from "buf". This can be - // negative if we have skipped more bytes than are in the buffer, or if we - // have started to consume bytes from "nextbuf". + // The number of bytes we have yet to consume from "buf" or tmpbuf. This is + // always >= 0 unless we were just reset or are eof. int32_t buf_bytesleft; + + // The offset within "buf" from where we are currently reading. This can be + // <0 if we are reading some residual bytes from the previous buffer, which + // are stored in tmpbuf and combined with bytes from "buf". int32_t buf_offset; - // The overall stream offset of the end of "buf". If "buf" is NULL, it is as - // if "buf" was the empty string. + // The overall stream offset of the beginning of "buf". uint32_t buf_stream_offset; // Fielddef for the key we just read. @@ -71,6 +70,9 @@ struct upb_decoder { // String we return for string values. We try to recycle it if possible. upb_string *str; + + // We keep a stack of messages we have recursed into. + upb_decoder_frame *top, *limit, stack[UPB_MAX_NESTING]; }; @@ -116,24 +118,15 @@ static bool upb_decoder_nextbuf(upb_decoder *d) return true; } -static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) -{ - // TODO. - return true; -} - -static upb_strlen_t upb_decoder_append(uint8_t *buf, upb_string *frombuf, - upb_strlen_t len, upb_strlen_t total_len) -{ - upb_strlen_t copy = UPB_MIN(total_len - len, upb_string_len(frombuf)); - //memcpy(buf, upb_string_getrobuf(frombuf) ) - return 0; -} - static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) { - if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE) + if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE) { + // GCC is currently complaining about use of an uninitialized value if we + // don't set this now. I think this is incorrect, but leaving this in + // to suppress the warning for now. + *bytes = 0; if(!upb_decoder_nextbuf(d)) return NULL; + } assert(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE); @@ -187,9 +180,28 @@ static bool upb_decoder_consume(upb_decoder *d, uint32_t bytes) // We still have residual bytes we have not consumed. memmove(d->tmpbuf, d->tmpbuf + bytes, -d->buf_offset); } + assert(d->buf_bytesleft >= 0); + return true; +} + +static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) +{ + d->buf_offset += bytes; + d->buf_bytesleft -= bytes; + while(d->buf_bytesleft < 0) { + if(!upb_decoder_nextbuf(d)) return false; + } return true; } +static upb_strlen_t upb_decoder_append(uint8_t *buf, upb_string *frombuf, + upb_strlen_t len, upb_strlen_t total_len) +{ + upb_strlen_t copy = UPB_MIN(total_len - len, upb_string_len(frombuf)); + //memcpy(buf, upb_string_getrobuf(frombuf) ) + return 0; +} + /* Functions to read wire values. *********************************************/ -- cgit v1.2.3 From 00b403a7373d783744bebdffde3696824d68b745 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 12 Jun 2010 11:03:03 -0700 Subject: Decoder is finally complete, now just needs testing. --- src/upb_decoder.c | 68 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/src/upb_decoder.c b/src/upb_decoder.c index e6fcec5..6f1e437 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -136,17 +136,18 @@ static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) *bytes = d->buf_bytesleft; return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset; } else { - upb_strlen_t residual_bytes = -d->buf_offset; + // We need to accumulate UPB_MAX_ENCODED_SIZE bytes; len is how many we + // have so far. + upb_strlen_t len = -d->buf_offset; if(d->buf) { - memcpy(d->tmpbuf + residual_bytes, upb_string_getrobuf(d->buf), - UPB_MAX_ENCODED_SIZE - residual_bytes); - *bytes = 10; - } else { - // All we have are residual bytes; pad them with 0x80. - memset(d->tmpbuf + residual_bytes, 0x80, - UPB_MAX_ENCODED_SIZE - residual_bytes); - *bytes = residual_bytes; + upb_strlen_t to_copy = + UPB_MIN(UPB_MAX_ENCODED_SIZE - len, upb_string_len(d->buf)); + memcpy(d->tmpbuf + len, upb_string_getrobuf(d->buf), to_copy); + len += to_copy; } + // Pad the buffer out to UPB_MAX_ENCODED_SIZE. + memset(d->tmpbuf + len, 0x80, UPB_MAX_ENCODED_SIZE - len); + *bytes = len; return d->tmpbuf; } } @@ -194,14 +195,6 @@ static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) return true; } -static upb_strlen_t upb_decoder_append(uint8_t *buf, upb_string *frombuf, - upb_strlen_t len, upb_strlen_t total_len) -{ - upb_strlen_t copy = UPB_MIN(total_len - len, upb_string_len(frombuf)); - //memcpy(buf, upb_string_getrobuf(frombuf) ) - return 0; -} - /* Functions to read wire values. *********************************************/ @@ -383,29 +376,38 @@ bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) // technically a string, but can be gotten as one to perform lazy parsing. d->str = upb_string_tryrecycle(d->str); const upb_strlen_t total_len = d->delimited_len; - if ((int32_t)total_len <= d->buf_bytesleft) { + if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) { // The entire string is inside our current buffer, so we can just // return a substring of the buffer without copying. upb_string_substr(d->str, d->buf, upb_string_len(d->buf) - d->buf_bytesleft, total_len); - d->buf_bytesleft -= total_len; + upb_decoder_skipbytes(d, total_len); *val.str = d->str; } else { - //// The string spans buffers, so we must copy from the current buffer, - //// the next buffer (if we have one), and finally from the bytesrc. - //uint8_t *str = (uint8_t*)upb_string_getrwbuf(d->str, total_len); - //upb_strlen_t len = 0; - //len += upb_decoder_append(str, d->buf, len, total_len); - //upb_decoder_advancebuf(d); - //if(d->buf) len += upb_decoder_append(str, d->buf, len, total_len); - //upb_string_getrwbuf(d->str, len); // Cheap resize. - //if(len < total_len) { - // if(!upb_bytesrc_append(d->bytesrc, d->str, total_len - len)) { - // upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); - // return false; - // } - //} + // The string spans buffers, so we must copy from the current buffer, + // the next buffer (if we have one), and finally from the bytesrc. + uint8_t *str = (uint8_t*)upb_string_getrwbuf(d->str, total_len); + upb_strlen_t len = 0; + if(d->buf_offset < 0) { + // Residual bytes we need to copy from tmpbuf. + memcpy(str, d->tmpbuf, -d->buf_offset); + len += -d->buf_offset; + } + if(d->buf) { + upb_strlen_t to_copy = + UPB_MIN(total_len - len, upb_string_len(d->buf) - d->buf_offset); + memcpy(str + len, upb_string_getrobuf(d->buf) + d->buf_offset, to_copy); + } + upb_decoder_skipbytes(d, len); + upb_string_getrwbuf(d->str, len); // Cheap resize. + if(len < total_len) { + if(!upb_bytesrc_append(d->bytesrc, d->str, total_len - len)) { + upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); + return false; + } + d->buf_stream_offset += total_len - len; + } } d->field = NULL; } else { -- cgit v1.2.3 From 35e5c248bee19703b7e3c9e43d7bd8fd7aa2a79d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 14 Jun 2010 10:47:56 -0700 Subject: Work to make upb_def consume a upb_src. --- src/upb_def.c | 498 ++++++++++++++++++++++++------------------------------ src/upb_def.h | 15 +- src/upb_encoder.h | 49 ++---- 3 files changed, 244 insertions(+), 318 deletions(-) diff --git a/src/upb_def.c b/src/upb_def.c index e770025..9f34b42 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -18,6 +18,29 @@ static int div_round_up(int numerator, int denominator) { return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; } +// A little dynamic array for storing a growing list of upb_defs. +typedef struct { + upb_def **defs; + uint32_t len; + uint32_t size; +}; + +static void upb_deflist_init(upb_deflist *l) { + l->size = 8 + l->defs = malloc(l->size); + l->len = 0; +} + +static void upb_deflist_uninit(upb_deflist *l) { free(l->defs); } + +static void upb_deflist_push(upb_deflist *l, upb_def *d) { + if(l->defs_len == l->defs_size) { + l->defs_size *= 2; + l->defs = realloc(l->defs, l->defs_size); + } + l->defs[l->defs_len++] = d; +} + /* upb_def ********************************************************************/ // Defs are reference counted, but can have cycles when types are @@ -153,7 +176,7 @@ static void upb_def_init(upb_def *def, enum upb_def_type type, def->type = type; def->is_cyclic = 0; // We detect this later, after resolving refs. def->search_depth = 0; - def->fqname = upb_string_getref(fqname, UPB_REF_FROZEN); + def->fqname = NULL; upb_atomic_refcount_init(&def->refcount, 1); } @@ -340,49 +363,6 @@ typedef struct { upb_strptr string; } iton_ent; -static void insert_enum_value(upb_src *src, upb_enumdef *e) -{ - upb_src_startmsg(src); - int32_t number = -1; - upb_string *name = NULL; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->field_number) { - case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: - upb_src_getval(src, &number); - break; - case GOOGLE_PROTOBUF_ENUMVALUDESCRIPTORPROTO_NAME_FIELDNUM: - upb_src_getval(src, &name); - break; - default: - upb_src_skipval(src); - } - } - upb_src_endmsg(src); - ntoi_ent ntoi_ent = {{value->name, 0}, value->number}; - iton_ent iton_ent = {{value->number, 0}, value->name}; - upb_strtable_insert(&e->ntoi, &ntoi_ent.e); - upb_inttable_insert(&e->iton, &iton_ent.e); -} - -static upb_enumdef *enumdef_new(upb_src *src, upb_strptr fqname) -{ - upb_enumdef *e = malloc(sizeof(*e)); - upb_def_init(&e->base, UPB_DEF_ENUM, fqname); - upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(iton_ent)); - upb_src_startmsg(src); - - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - if(f->number == GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM) { - insert_enum_value(src, e); - } else { - upb_src_skipval(src); - } - } - return e; -} - static void enumdef_free(upb_enumdef *e) { upb_strtable_free(&e->ntoi); upb_inttable_free(&e->iton); @@ -420,7 +400,7 @@ bool upb_enum_done(upb_enum_iter *iter) { typedef struct { upb_strtable_entry e; upb_def *def; -} symtab_ent; +} upb_symtab_ent; /* Search for a character in a string, in reverse. */ static int my_memrchr(char *data, char c, size_t len) @@ -469,7 +449,7 @@ static symtab_ent *resolve(upb_strtable *t, upb_strptr base, upb_strptr symbol) * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" * join("", "Baz") -> "Baz" * Caller owns a ref on the returned string. */ -static upb_strptr join(upb_strptr base, upb_strptr name) { +static upb_string *upb_join(upb_string *base, upb_string *name) { upb_strptr joined = upb_strdup(base); upb_strlen_t len = upb_strlen(joined); if(len > 0) { @@ -479,201 +459,123 @@ static upb_strptr join(upb_strptr base, upb_strptr name) { return joined; } -static upb_strptr try_define(upb_strtable *t, upb_strptr base, - upb_strptr name, upb_status *status) +static void upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) { - if(upb_string_isnull(name)) { - upb_seterr(status, UPB_STATUS_ERROR, - "symbol in context '" UPB_STRFMT "' does not have a name", - UPB_STRARG(base)); - return UPB_STRING_NULL; - } - upb_strptr fqname = join(base, name); - if(upb_strtable_lookup(t, fqname)) { - upb_seterr(status, UPB_STATUS_ERROR, - "attempted to redefine symbol '" UPB_STRFMT "'", - UPB_STRARG(fqname)); - upb_string_unref(fqname); - return UPB_STRING_NULL; + upb_src_startmsg(src); + int32_t number = -1; + upb_string *name = NULL; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->field_number) { + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: + upb_src_getval(src, &number); + break; + case GOOGLE_PROTOBUF_ENUMVALUDESCRIPTORPROTO_NAME_FIELDNUM: + upb_src_getval(src, &name); + break; + default: + upb_src_skipval(src); + } } - return fqname; + upb_src_endmsg(src); + ntoi_ent ntoi_ent = {{value->name, 0}, value->number}; + iton_ent iton_ent = {{value->number, 0}, value->name}; + upb_strtable_insert(&e->ntoi, &ntoi_ent.e); + upb_inttable_insert(&e->iton, &iton_ent.e); } -static void insert_enum(upb_strtable *t, - google_protobuf_EnumDescriptorProto *ed, - upb_strptr base, upb_status *status) +static void upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) { - upb_strptr name = ed->set_flags.has.name ? ed->name : UPB_STRING_NULL; - upb_strptr fqname = try_define(t, base, name, status); - if(upb_string_isnull(fqname)) return; - - symtab_ent e; - e.e.key = fqname; - e.def = UPB_UPCAST(enumdef_new(ed, fqname)); - upb_strtable_insert(t, &e.e); - upb_string_unref(fqname); -} + upb_enumdef *e = malloc(sizeof(*e)); + upb_def_init(&e->base, UPB_DEF_ENUM, fqname); + upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent)); + upb_inttable_init(&e->iton, 0, sizeof(iton_ent)); + CHECK(upb_src_startmsg(src)); -static void insert_message(upb_strtable *t, google_protobuf_DescriptorProto *d, - upb_strptr base, bool sort, upb_status *status) -{ - upb_strptr name = d->set_flags.has.name ? d->name : UPB_STRING_NULL; - upb_strptr fqname = try_define(t, base, name, status); - if(upb_string_isnull(fqname)) return; - - int num_fields = d->set_flags.has.field ? - google_protobuf_FieldDescriptorProto_array_len(d->field) : 0; - symtab_ent e; - e.e.key = fqname; - - // Gather our list of fields, sorting if necessary. - upb_fielddef **fielddefs = malloc(sizeof(*fielddefs) * num_fields); - for (int i = 0; i < num_fields; i++) { - google_protobuf_FieldDescriptorProto *fd = - google_protobuf_FieldDescriptorProto_array_get(d->field, i); - fielddefs[i] = fielddef_new(fd); + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->field_number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: + CHECK(upb_addenum_val(src, e, status)); + break; + default: + upb_src_skipval(src); + break; + } } - if(sort) fielddef_sort(fielddefs, num_fields); - - // Create the msgdef with that list of fields. - e.def = UPB_UPCAST(msgdef_new(fielddefs, num_fields, fqname, status)); - - // Cleanup. - for (int i = 0; i < num_fields; i++) fielddef_free(fielddefs[i]); - free(fielddefs); - - if(!upb_ok(status)) goto error; - - upb_strtable_insert(t, &e.e); - - /* Add nested messages and enums. */ - if(d->set_flags.has.nested_type) - for(unsigned int i = 0; i < google_protobuf_DescriptorProto_array_len(d->nested_type); i++) - insert_message(t, google_protobuf_DescriptorProto_array_get(d->nested_type, i), fqname, sort, status); - - if(d->set_flags.has.enum_type) - for(unsigned int i = 0; i < google_protobuf_EnumDescriptorProto_array_len(d->enum_type); i++) - insert_enum(t, google_protobuf_EnumDescriptorProto_array_get(d->enum_type, i), fqname, status); - -error: - // Free the ref we got from try_define(). - upb_string_unref(fqname); + upb_deflist_push(e); } -static bool find_cycles(upb_msgdef *m, int search_depth, upb_status *status) +// Processes a google.protobuf.DescriptorProto, adding defs to "deflist." +static void upb_addmsg(upb_src *src, upb_deflist *deflist, upb_status *status) { - if(search_depth > UPB_MAX_TYPE_DEPTH) { - // There are many situations in upb where we recurse over the type tree - // (like for example, right now) and an absurdly deep tree could cause us - // to stack overflow on systems with very limited stacks. - upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was found at " - "depth %d in the type graph, which exceeds the maximum type " - "depth of %d.", UPB_UPCAST(m)->fqname, search_depth, - UPB_MAX_TYPE_DEPTH); - return false; - } else if(UPB_UPCAST(m)->search_depth == 1) { - // Cycle! - int cycle_len = search_depth - 1; - if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) { - upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was involved " - "in a cycle of length %d, which exceeds the maximum type " - "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len, - UPB_MAX_TYPE_CYCLE_LEN); + upb_msgdef *m = malloc(sizeof(*m)); + upb_def_init(&m->base, UPB_DEF_MSG); + upb_atomic_refcount_init(&m->cycle_refcount, 0); + upb_inttable_init(&m->itof, num_fields, sizeof(upb_itof_ent)); + upb_strtable_init(&m->ntof, num_fields, sizeof(upb_ntof_ent)); + m->num_fields = 0; + m->fields = malloc(sizeof(upb_fielddef) * num_fields); + int32_t start_count = defs->len; + + CHECK(upb_src_startmsg(src)); + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->field_number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: + upb_string_unref(m->fqname); + CHECK(upb_src_getval(src, &m->fqname)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_FIELD_NUM: + CHECK(upb_addfield(src, m)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NESTED_TYPE_NUM: + CHECK(upb_addmsg(src, deflist)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_NUM: + CHECK(upb_addenum(src, deflist)); + break; + default: + // TODO: extensions. + upb_src_skipval(src); } - return true; - } else if(UPB_UPCAST(m)->search_depth > 0) { - // This was a cycle, but did not originate from the base of our search tree. - // We'll find it when we call find_cycles() on this node directly. + } + CHECK(upb_src_eof(src) && upb_src_endmsg(src)); + if(!m->fqname) { + upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); return false; - } else { - UPB_UPCAST(m)->search_depth = ++search_depth; - bool cycle_found = false; - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; - if(!upb_issubmsg(f)) continue; - upb_def *sub_def = f->def; - upb_msgdef *sub_m = upb_downcast_msgdef(sub_def); - if(find_cycles(sub_m, search_depth, status)) { - cycle_found = true; - UPB_UPCAST(m)->is_cyclic = true; - if(f->owned) { - upb_atomic_unref(&sub_def->refcount); - f->owned = false; - } - } - } - UPB_UPCAST(m)->search_depth = 0; - return cycle_found; } + upb_qualify(defs, m->fqname, start_count); + upb_deflist_push(m); + return true; } -static void addfd(upb_strtable *addto, upb_strtable *existingdefs, - google_protobuf_FileDescriptorProto *fd, bool sort, - upb_status *status) +// Processes a google.protobuf.FileDescriptorProto, adding the defs to "defs". +static void upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) { - upb_strptr pkg; - if(fd->set_flags.has.package) { - pkg = upb_string_getref(fd->package, UPB_REF_FROZEN); - } else { - pkg = upb_string_new(); - } - - if(fd->set_flags.has.message_type) - for(unsigned int i = 0; i < google_protobuf_DescriptorProto_array_len(fd->message_type); i++) - insert_message(addto, google_protobuf_DescriptorProto_array_get(fd->message_type, i), pkg, sort, status); - - if(fd->set_flags.has.enum_type) - for(unsigned int i = 0; i < google_protobuf_EnumDescriptorProto_array_len(fd->enum_type); i++) - insert_enum(addto, google_protobuf_EnumDescriptorProto_array_get(fd->enum_type, i), pkg, status); - - upb_string_unref(pkg); - - if(!upb_ok(status)) { - // TODO: make sure we don't leak any memory in this case. - return; - } - - /* TODO: handle extensions and services. */ - - // Attempt to resolve all references. - symtab_ent *e; - for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; - upb_strptr base = e->e.key; - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; - if(!upb_hasdef(f)) continue; // No resolving necessary. - upb_strptr name = upb_downcast_unresolveddef(f->def)->name; - symtab_ent *found = resolve(existingdefs, base, name); - if(!found) found = resolve(addto, base, name); - upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; - if(!found) { - upb_seterr(status, UPB_STATUS_ERROR, - "could not resolve symbol '" UPB_STRFMT "'" - " in context '" UPB_STRFMT "'", - UPB_STRARG(name), UPB_STRARG(base)); - return; - } else if(found->def->type != expected) { - upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type"); - return; - } - upb_msgdef_resolve(m, f, found->def); + CHECK(upb_src_startmsg(src)); + upb_string *package = NULL; + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->field_number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: + upb_string_unref(package); + CHECK(upb_src_getval(src, &package)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_NUM: + CHECK(upb_addmsg(src, defs)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_NUM: + CHECK(upb_addenum(src, defs)); + break; + default: + // TODO: services and extensions. + upb_src_skipval(src); } } + CHECK(upb_src_eof(src) && upb_src_endmsg(src)); - // Deal with type cycles. - for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; - - // Do an initial pass over the graph to check that there are no cycles - // longer than the maximum length. We also mark all cyclic defs as such, - // and decrement refs on cyclic defs. - find_cycles(m, 0, status); - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - cycle_ref_or_unref(m, NULL, open_defs, 0, true); - } + upb_qualify(deflist, package, 0); + upb_string_unref(package); } /* upb_symtab *****************************************************************/ @@ -684,25 +586,6 @@ upb_symtab *upb_symtab_new() upb_atomic_refcount_init(&s->refcount, 1); upb_rwlock_init(&s->lock); upb_strtable_init(&s->symtab, 16, sizeof(symtab_ent)); - upb_strtable_init(&s->psymtab, 16, sizeof(symtab_ent)); - - // Add descriptor.proto types to private symtable so we can parse descriptors. - // We know there is only 1. - google_protobuf_FileDescriptorProto *fd = - google_protobuf_FileDescriptorProto_array_get(upb_file_descriptor_set->file, 0); - upb_status status = UPB_STATUS_INIT; - addfd(&s->psymtab, &s->symtab, fd, false, &status); - if(!upb_ok(&status)) { - fprintf(stderr, "Failed to initialize upb: %s.\n", status.msg); - assert(false); - return NULL; // Indicates that upb is buggy or corrupt. - } - upb_static_string name = - UPB_STATIC_STRING_INIT("google.protobuf.FileDescriptorSet"); - upb_strptr nameptr = UPB_STATIC_STRING_PTR_INIT(name); - symtab_ent *e = upb_strtable_lookup(&s->psymtab, nameptr); - assert(e); - s->fds_msgdef = upb_downcast_msgdef(e->def); return s; } @@ -770,53 +653,110 @@ upb_def *upb_symtab_resolve(upb_symtab *s, upb_strptr base, upb_strptr symbol) { return ret; } -void upb_symtab_addfds(upb_symtab *s, google_protobuf_FileDescriptorSet *fds, - upb_status *status) +static bool upb_symtab_findcycles(upb_msgdef *m, int search_depth, upb_status *status) { - if(fds->set_flags.has.file) { - // Insert new symbols into a temporary table until we have verified that - // the descriptor is valid. - upb_strtable tmp; - upb_strtable_init(&tmp, 0, sizeof(symtab_ent)); - - { // Read lock scope - upb_rwlock_rdlock(&s->lock); - for(uint32_t i = 0; i < google_protobuf_FileDescriptorProto_array_len(fds->file); i++) { - addfd(&tmp, &s->symtab, google_protobuf_FileDescriptorProto_array_get(fds->file, i), true, status); - if(!upb_ok(status)) { - free_symtab(&tmp); - upb_rwlock_unlock(&s->lock); - return; + if(search_depth > UPB_MAX_TYPE_DEPTH) { + // There are many situations in upb where we recurse over the type tree + // (like for example, right now) and an absurdly deep tree could cause us + // to stack overflow on systems with very limited stacks. + upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was found at " + "depth %d in the type graph, which exceeds the maximum type " + "depth of %d.", UPB_UPCAST(m)->fqname, search_depth, + UPB_MAX_TYPE_DEPTH); + return false; + } else if(UPB_UPCAST(m)->search_depth == 1) { + // Cycle! + int cycle_len = search_depth - 1; + if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) { + upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was involved " + "in a cycle of length %d, which exceeds the maximum type " + "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len, + UPB_MAX_TYPE_CYCLE_LEN); + } + return true; + } else if(UPB_UPCAST(m)->search_depth > 0) { + // This was a cycle, but did not originate from the base of our search tree. + // We'll find it when we call find_cycles() on this node directly. + return false; + } else { + UPB_UPCAST(m)->search_depth = ++search_depth; + bool cycle_found = false; + for(upb_field_count_t i = 0; i < m->num_fields; i++) { + upb_fielddef *f = &m->fields[i]; + if(!upb_issubmsg(f)) continue; + upb_def *sub_def = f->def; + upb_msgdef *sub_m = upb_downcast_msgdef(sub_def); + if(find_cycles(sub_m, search_depth, status)) { + cycle_found = true; + UPB_UPCAST(m)->is_cyclic = true; + if(f->owned) { + upb_atomic_unref(&sub_def->refcount); + f->owned = false; } } - upb_rwlock_unlock(&s->lock); } + UPB_UPCAST(m)->search_depth = 0; + return cycle_found; + } +} + +// Given a list of defs, a list of extensions (in the future), and a flag +// indicating whether the new defs can overwrite existing defs in the symtab, +// attempts to add the given defs to the symtab. The whole operation either +// succeeds or fails. Ownership of "defs" and "exts" is taken. +bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef, + upb_status *status) +{ + // Build a table, for duplicate detection and name resolution. + - // Everything was successfully added, copy from the tmp symtable. - { // Write lock scope - upb_rwlock_wrlock(&s->lock); - symtab_ent *e; - for(e = upb_strtable_begin(&tmp); e; e = upb_strtable_next(&tmp, &e->e)) { - // We checked for duplicates when we had only the read lock, but it is - // theoretically possible that a duplicate symbol when we dropped the - // read lock to acquire a write lock. - if(upb_strtable_lookup(&s->symtab, e->e.key)) { - upb_seterr(status, UPB_STATUS_ERROR, "Attempted to insert duplicate " - "symbol: " UPB_STRFMT, UPB_STRARG(e->e.key)); - // To truly handle this situation we would need to remove any symbols - // from tmp that were successfully inserted into s->symtab. Because - // this case is exceedingly unlikely, and because our hashtable - // doesn't support deletions right now, we leave them in there, which - // means we must not call free_symtab(&s->symtab), so we will leak it. - break; + // Attempt to resolve all references. + { // Write lock scope. + symtab_ent *e; + for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { + upb_msgdef *m = upb_dyncast_msgdef(e->def); + if(!m) continue; + upb_strptr base = e->e.key; + for(upb_field_count_t i = 0; i < m->num_fields; i++) { + upb_fielddef *f = &m->fields[i]; + if(!upb_hasdef(f)) continue; // No resolving necessary. + upb_strptr name = upb_downcast_unresolveddef(f->def)->name; + symtab_ent *found = resolve(existingdefs, base, name); + if(!found) found = resolve(addto, base, name); + upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; + if(!found) { + upb_seterr(status, UPB_STATUS_ERROR, + "could not resolve symbol '" UPB_STRFMT "'" + " in context '" UPB_STRFMT "'", + UPB_STRARG(name), UPB_STRARG(base)); + return; + } else if(found->def->type != expected) { + upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type"); + return; } - upb_strtable_insert(&s->symtab, &e->e); + upb_msgdef_resolve(m, f, found->def); } - upb_rwlock_unlock(&s->lock); } - upb_strtable_free(&tmp); + + // Deal with type cycles. + for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { + upb_msgdef *m = upb_dyncast_msgdef(e->def); + if(!m) continue; + + // Do an initial pass over the graph to check that there are no cycles + // longer than the maximum length. We also mark all cyclic defs as such, + // and decrement refs on cyclic defs. + find_cycles(m, 0, status); + upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; + cycle_ref_or_unref(m, NULL, open_defs, 0, true); + } + + // Add all defs to the symtab. } - return; +} + +void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) +{ } void upb_symtab_add_desc(upb_symtab *s, upb_strptr desc, upb_status *status) diff --git a/src/upb_def.h b/src/upb_def.h index a571730..0b8f114 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -244,12 +244,15 @@ upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym); // returned, otherwise only defs of the required type are returned. upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); -// Adds the definitions in the given serialized descriptor to this symtab. All -// types that are referenced from desc must have previously been defined (or be -// defined in desc). desc may not attempt to define any names that are already -// defined in this symtab. Caller retains ownership of desc. status indicates -// whether the operation was successful or not, and the error message (if any). -void upb_symtab_add_desc(upb_symtab *s, upb_string *desc, upb_status *status); +// "fds" is a upb_src that will yield data from the +// google.protobuf.FileDescriptorSet message type. upb_symtab_add_fds() adds +// all the definitions from the given FileDescriptorSet and adds them to the +// symtab. status indicates whether the operation was successful or not, and +// the error message (if any). +// +// TODO: should this allow redefinition? Either is possible, but which is +// more useful? Maybe it should be an option. +void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status); /* upb_def casts **************************************************************/ diff --git a/src/upb_encoder.h b/src/upb_encoder.h index b4d0c98..963af8b 100644 --- a/src/upb_encoder.h +++ b/src/upb_encoder.h @@ -20,27 +20,6 @@ extern "C" { #endif -/* upb_sizebuilder ************************************************************/ - -// A upb_sizebuilder performs a pre-pass on data to be serialized that gathers -// the sizes of submessages. This size data is required for serialization, -// because we have to know at the beginning of a submessage how many encoded -// bytes the submessage will represent. -struct upb_sizebuilder; -typedef struct upb_sizebuilder upb_sizebuilder; - -upb_sizebuilder *upb_sizebuilder_new(upb_msgdef *md); -void upb_sizebuilder_free(upb_sizebuilder *sb); - -void upb_sizebuilder_reset(upb_sizebuilder *sb); - -// Returns a sink that must be used to perform the pre-pass. Note that the -// pre-pass *must* occur in the opposite order from the actual encode that -// follows, and the data *must* be identical both times (except for the -// reversed order. -upb_sink *upb_sizebuilder_sink(upb_sizebuilder *sb); - - /* upb_encoder ****************************************************************/ // A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol @@ -51,21 +30,25 @@ typedef struct upb_encoder upb_encoder; upb_encoder *upb_encoder_new(upb_msgdef *md); void upb_encoder_free(upb_encoder *e); -// Resets the given upb_encoder such that is is ready to begin encoding. The -// upb_sizebuilder "sb" is used to determine submessage sizes; it must have -// previously been initialized by feeding it the same data in reverse order. -// "sb" may be null if and only if the data contains no submessages; groups -// are ok and do not require sizes to be precalculated. The upb_bytesink -// "out" is where the encoded output data will be sent. -// -// Both "sb" and "out" must live until the encoder is either reset or freed. -void upb_encoder_reset(upb_encoder *e, upb_sizebuilder *sb, upb_bytesink *out); +// Resets the given upb_encoder such that is is ready to begin encoding, +// outputting data to "bytesink" (which must live until the encoder is +// reset or destroyed). +void upb_encoder_reset(upb_encoder *e, upb_bytesink *bytesink); -// The upb_sink to which data can be sent to be encoded. Note that this data -// must be identical to the data that was previously given to the sizebuilder -// (if any). +// Returns the upb_sink to which data can be written. The sink is invalidated +// when the encoder is reset or destroyed. Note that if the client wants to +// encode any length-delimited submessages it must first call +// upb_encoder_buildsizes() below. upb_sink *upb_encoder_sink(upb_encoder *e); +// Call prior to pushing any data with embedded submessages. "src" must yield +// exactly the same data as what will next be encoded, but in reverse order. +// The encoder iterates over this data in order to determine the sizes of the +// submessages. If any errors are returned by the upb_src, the status will +// be saved in *status. If the client is sure that the upb_src will not throw +// any errors, "status" may be NULL. +void upb_encoder_buildsizes(upb_encoder *e, upb_src *src, upb_status *status); + #ifdef __cplusplus } /* extern "C" */ #endif -- cgit v1.2.3 From 6bf58a7328fb5241e2f66ef39c60e4483acfb19d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 26 Jun 2010 14:52:41 -0700 Subject: Incremental progress on upb_def. --- src/upb_data.h | 2 +- src/upb_def.c | 511 ++++++++++++++++++++----------------------------- src/upb_def.h | 21 +- src/upb_encoder.h | 2 +- src/upb_srcsink.h | 8 +- src/upb_srcsink_vtbl.h | 6 +- src/upb_string.h | 2 +- 7 files changed, 232 insertions(+), 320 deletions(-) diff --git a/src/upb_data.h b/src/upb_data.h index cdb7af2..c0f53ff 100644 --- a/src/upb_data.h +++ b/src/upb_data.h @@ -26,7 +26,7 @@ #include "upb.h" #include "upb_atomic.h" #include "upb_def.h" -#include "upb_sink.h" +#include "upb_srcsink.h" #ifdef __cplusplus extern "C" { diff --git a/src/upb_def.c b/src/upb_def.c index 9f34b42..39ed546 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -5,10 +5,8 @@ */ #include -#include -#include "descriptor.h" +#include "descriptor_const.h" #include "upb_def.h" -#include "upb_data.h" /* Rounds p up to the next multiple of t. */ #define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t)))) @@ -23,10 +21,10 @@ typedef struct { upb_def **defs; uint32_t len; uint32_t size; -}; +} upb_deflist; static void upb_deflist_init(upb_deflist *l) { - l->size = 8 + l->size = 8; l->defs = malloc(l->size); l->len = 0; } @@ -34,11 +32,11 @@ static void upb_deflist_init(upb_deflist *l) { static void upb_deflist_uninit(upb_deflist *l) { free(l->defs); } static void upb_deflist_push(upb_deflist *l, upb_def *d) { - if(l->defs_len == l->defs_size) { - l->defs_size *= 2; - l->defs = realloc(l->defs, l->defs_size); + if(l->len == l->size) { + l->size *= 2; + l->defs = realloc(l->defs, l->size); } - l->defs[l->defs_len++] = d; + l->defs[l->len++] = d; } /* upb_def ********************************************************************/ @@ -171,12 +169,11 @@ void _upb_def_cyclic_ref(upb_def *def) { cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true); } -static void upb_def_init(upb_def *def, enum upb_def_type type, - upb_strptr fqname) { +static void upb_def_init(upb_def *def, upb_def_type type, upb_string *fqname) { def->type = type; def->is_cyclic = 0; // We detect this later, after resolving refs. def->search_depth = 0; - def->fqname = NULL; + def->fqname = fqname; upb_atomic_refcount_init(&def->refcount, 1); } @@ -188,19 +185,15 @@ static void upb_def_uninit(upb_def *def) { typedef struct _upb_unresolveddef { upb_def base; - upb_strptr name; } upb_unresolveddef; -static upb_unresolveddef *upb_unresolveddef_new(upb_strptr str) { +static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { upb_unresolveddef *def = malloc(sizeof(*def)); - upb_strptr name = upb_string_getref(str, UPB_REF_THREADUNSAFE_READONLY); - upb_def_init(&def->base, UPB_DEF_UNRESOLVED, name); - def->name = name; + upb_def_init(&def->base, UPB_DEF_UNRESOLVED, str); return def; } static void unresolveddef_free(struct _upb_unresolveddef *def) { - upb_string_unref(def->name); upb_def_uninit(&def->base); free(def); } @@ -215,7 +208,7 @@ static upb_fielddef *fielddef_new(upb_src *src) upb_src_startmsg(src); upb_fielddef *parsed_f; while((parsed_f = upb_src_getdef(src))) { - switch(parsed_f->field_number) { + switch(parsed_f->number) { case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: CHECK(upb_src_getval(src, &f->type)); case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: @@ -243,92 +236,50 @@ static void fielddef_free(upb_fielddef *f) { free(f); } -static void fielddef_copy(upb_fielddef *dst, upb_fielddef *src) -{ - *dst = *src; - dst->name = upb_string_getref(src->name, UPB_REF_FROZEN); - if(upb_hasdef(src)) { - upb_def_ref(dst->def); - dst->owned = true; - } -} - -// Callback for sorting fields. -static int compare_fields(upb_fielddef *f1, upb_fielddef *f2) { - // Required fields go before non-required. - bool req1 = f1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; - bool req2 = f2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; - if(req1 != req2) { - return req2 - req1; - } else { - // Within required and non-required field lists, list in number order. - // TODO: consider ordering by data size to reduce padding. */ - return f1->number - f2->number; - } -} - -static int compare_fielddefs(const void *e1, const void *e2) { - return compare_fields(*(void**)e1, *(void**)e2); -} - -static void fielddef_sort(upb_fielddef **defs, size_t num) -{ - qsort(defs, num, sizeof(*defs), compare_fielddefs); -} - /* upb_msgdef *****************************************************************/ -static upb_msgdef *msgdef_new(upb_fielddef **fields, int num_fields, - upb_strptr fqname, upb_status *status) +// Processes a google.protobuf.DescriptorProto, adding defs to "deflist." +static void upb_addmsg(upb_src *src, upb_deflist *deflist, upb_status *status) { - if(num_fields > UPB_MAX_FIELDS) { - upb_seterr(status, UPB_STATUS_ERROR, - "Tried to create a msgdef with more than %d fields", num_fields); - free(fields); - return NULL; - } upb_msgdef *m = malloc(sizeof(*m)); - upb_def_init(&m->base, UPB_DEF_MSG, fqname); + upb_def_init(&m->base, UPB_DEF_MSG); upb_atomic_refcount_init(&m->cycle_refcount, 0); upb_inttable_init(&m->itof, num_fields, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, num_fields, sizeof(upb_ntof_ent)); - - m->num_fields = num_fields; - m->set_flags_bytes = div_round_up(m->num_fields, 8); - // These are incremented in the loop. - m->num_required_fields = 0; - m->size = m->set_flags_bytes + 4; // 4 for the refcount. + m->num_fields = 0; m->fields = malloc(sizeof(upb_fielddef) * num_fields); + int32_t start_count = defs->len; - size_t max_align = 0; - for(int i = 0; i < num_fields; i++) { - upb_fielddef *f = &m->fields[i]; - upb_type_info *type_info = &upb_types[fields[i]->type]; - fielddef_copy(f, fields[i]); - - // General alignment rules are: each member must be at an address that is a - // multiple of that type's alignment. Also, the size of the structure as - // a whole must be a multiple of the greatest alignment of any member. */ - f->field_index = i; - size_t offset = ALIGN_UP(m->size, type_info->align); - f->byte_offset = offset - 4; // Offsets are relative to the refcount. - m->size = offset + type_info->size; - max_align = UPB_MAX(max_align, type_info->align); - if(f->label == UPB_LABEL(REQUIRED)) { - // We currently rely on the fact that required fields are always sorted - // to occur before non-required fields. - m->num_required_fields++; + CHECK(upb_src_startmsg(src)); + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->field_number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: + upb_string_unref(m->fqname); + CHECK(upb_src_getval(src, &m->fqname)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_FIELD_NUM: + CHECK(upb_addfield(src, m)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NESTED_TYPE_NUM: + CHECK(upb_addmsg(src, deflist)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_NUM: + CHECK(upb_addenum(src, deflist)); + break; + default: + // TODO: extensions. + upb_src_skipval(src); } - - // Insert into the tables. - upb_itof_ent itof_ent = {{f->number, 0}, f}; - upb_ntof_ent ntof_ent = {{f->name, 0}, f}; - upb_inttable_insert(&m->itof, &itof_ent.e); - upb_strtable_insert(&m->ntof, &ntof_ent.e); } - - if(max_align > 0) m->size = ALIGN_UP(m->size, max_align); - return m; + CHECK(upb_src_eof(src) && upb_src_endmsg(src)); + if(!m->fqname) { + upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); + return false; + } + upb_qualify(defs, m->fqname, start_count); + upb_deflist_push(m); + return true; } static void msgdef_free(upb_msgdef *m) @@ -363,6 +314,50 @@ typedef struct { upb_strptr string; } iton_ent; +static void upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) +{ + upb_src_startmsg(src); + int32_t number = -1; + upb_string *name = NULL; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->field_number) { + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: + upb_src_getval(src, &number); + break; + case GOOGLE_PROTOBUF_ENUMVALUDESCRIPTORPROTO_NAME_FIELDNUM: + upb_src_getval(src, &name); + break; + default: + upb_src_skipval(src); + } + } + upb_src_endmsg(src); + ntoi_ent ntoi_ent = {{value->name, 0}, value->number}; + iton_ent iton_ent = {{value->number, 0}, value->name}; + upb_strtable_insert(&e->ntoi, &ntoi_ent.e); + upb_inttable_insert(&e->iton, &iton_ent.e); +} + +static void upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) +{ + upb_enumdef *e = malloc(sizeof(*e)); + upb_def_init(&e->base, UPB_DEF_ENUM, fqname); + upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent)); + upb_inttable_init(&e->iton, 0, sizeof(iton_ent)); + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->field_number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: + CHECK(upb_addenum_val(src, e, status)); + break; + default: + upb_src_skipval(src); + break; + } + } + upb_deflist_push(e); +} + static void enumdef_free(upb_enumdef *e) { upb_strtable_free(&e->ntoi); upb_inttable_free(&e->iton); @@ -397,10 +392,37 @@ bool upb_enum_done(upb_enum_iter *iter) { /* symtab internal ***********************************************************/ -typedef struct { - upb_strtable_entry e; - upb_def *def; -} upb_symtab_ent; +// Processes a google.protobuf.FileDescriptorProto, adding the defs to "defs". +static void upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) +{ + upb_string *package = NULL; + int32_t start_count = defs->len; + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->field_number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: + upb_string_unref(package); + CHECK(upb_src_getval(src, &package)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_NUM: + CHECK(upb_startmsg(src)); + CHECK(upb_addmsg(src, defs)); + CHECK(upb_endmsg(src)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_NUM: + CHECK(upb_startmsg(src)); + CHECK(upb_addenum(src, defs)); + CHECK(upb_endmsg(src)); + break; + default: + // TODO: services and extensions. + upb_src_skipval(src); + } + } + CHECK(upb_src_eof(src)); + upb_qualify(deflist, package, start_count); + upb_string_unref(package); +} /* Search for a character in a string, in reverse. */ static int my_memrchr(char *data, char c, size_t len) @@ -410,6 +432,11 @@ static int my_memrchr(char *data, char c, size_t len) return off; } +typedef struct { + upb_strtable_entry e; + upb_def *def; +} upb_symtab_ent; + /* Given a symbol and the base symbol inside which it is defined, find the * symbol's definition in t. */ static symtab_ent *resolve(upb_strtable *t, upb_strptr base, upb_strptr symbol) @@ -459,206 +486,15 @@ static upb_string *upb_join(upb_string *base, upb_string *name) { return joined; } -static void upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) -{ - upb_src_startmsg(src); - int32_t number = -1; - upb_string *name = NULL; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->field_number) { - case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: - upb_src_getval(src, &number); - break; - case GOOGLE_PROTOBUF_ENUMVALUDESCRIPTORPROTO_NAME_FIELDNUM: - upb_src_getval(src, &name); - break; - default: - upb_src_skipval(src); - } - } - upb_src_endmsg(src); - ntoi_ent ntoi_ent = {{value->name, 0}, value->number}; - iton_ent iton_ent = {{value->number, 0}, value->name}; - upb_strtable_insert(&e->ntoi, &ntoi_ent.e); - upb_inttable_insert(&e->iton, &iton_ent.e); -} - -static void upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) -{ - upb_enumdef *e = malloc(sizeof(*e)); - upb_def_init(&e->base, UPB_DEF_ENUM, fqname); - upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(iton_ent)); - CHECK(upb_src_startmsg(src)); - - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->field_number) { - case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: - CHECK(upb_addenum_val(src, e, status)); - break; - default: - upb_src_skipval(src); - break; - } - } - upb_deflist_push(e); -} - -// Processes a google.protobuf.DescriptorProto, adding defs to "deflist." -static void upb_addmsg(upb_src *src, upb_deflist *deflist, upb_status *status) -{ - upb_msgdef *m = malloc(sizeof(*m)); - upb_def_init(&m->base, UPB_DEF_MSG); - upb_atomic_refcount_init(&m->cycle_refcount, 0); - upb_inttable_init(&m->itof, num_fields, sizeof(upb_itof_ent)); - upb_strtable_init(&m->ntof, num_fields, sizeof(upb_ntof_ent)); - m->num_fields = 0; - m->fields = malloc(sizeof(upb_fielddef) * num_fields); - int32_t start_count = defs->len; - - CHECK(upb_src_startmsg(src)); - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->field_number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: - upb_string_unref(m->fqname); - CHECK(upb_src_getval(src, &m->fqname)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_FIELD_NUM: - CHECK(upb_addfield(src, m)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NESTED_TYPE_NUM: - CHECK(upb_addmsg(src, deflist)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_NUM: - CHECK(upb_addenum(src, deflist)); - break; - default: - // TODO: extensions. - upb_src_skipval(src); - } - } - CHECK(upb_src_eof(src) && upb_src_endmsg(src)); - if(!m->fqname) { - upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); - return false; - } - upb_qualify(defs, m->fqname, start_count); - upb_deflist_push(m); - return true; -} - -// Processes a google.protobuf.FileDescriptorProto, adding the defs to "defs". -static void upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) -{ - CHECK(upb_src_startmsg(src)); - upb_string *package = NULL; - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->field_number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: - upb_string_unref(package); - CHECK(upb_src_getval(src, &package)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_NUM: - CHECK(upb_addmsg(src, defs)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_NUM: - CHECK(upb_addenum(src, defs)); - break; - default: - // TODO: services and extensions. - upb_src_skipval(src); - } - } - CHECK(upb_src_eof(src) && upb_src_endmsg(src)); - - upb_qualify(deflist, package, 0); - upb_string_unref(package); -} - -/* upb_symtab *****************************************************************/ - -upb_symtab *upb_symtab_new() -{ - upb_symtab *s = malloc(sizeof(*s)); - upb_atomic_refcount_init(&s->refcount, 1); - upb_rwlock_init(&s->lock); - upb_strtable_init(&s->symtab, 16, sizeof(symtab_ent)); - return s; -} - -static void free_symtab(upb_strtable *t) -{ - symtab_ent *e; - for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) - upb_def_unref(e->def); - upb_strtable_free(t); -} - -void _upb_symtab_free(upb_symtab *s) -{ - free_symtab(&s->symtab); - free_symtab(&s->psymtab); - upb_rwlock_destroy(&s->lock); - free(s); -} - -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type) -{ - upb_rwlock_rdlock(&s->lock); - int total = upb_strtable_count(&s->symtab); - // We may only use part of this, depending on how many symbols are of the - // correct type. - upb_def **defs = malloc(sizeof(*defs) * total); - symtab_ent *e = upb_strtable_begin(&s->symtab); - int i = 0; - for(; e; e = upb_strtable_next(&s->symtab, &e->e)) { - upb_def *def = e->def; - assert(def); - if(type == UPB_DEF_ANY || def->type == type) - defs[i++] = def; - } - upb_rwlock_unlock(&s->lock); - *count = i; - for(i = 0; i < *count; i++) - upb_def_ref(defs[i]); - return defs; -} - -upb_def *upb_symtab_lookup(upb_symtab *s, upb_strptr sym) -{ - upb_rwlock_rdlock(&s->lock); - symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); - upb_def *ret = NULL; - if(e) { - ret = e->def; - upb_def_ref(ret); - } - upb_rwlock_unlock(&s->lock); - return ret; -} - - -upb_def *upb_symtab_resolve(upb_symtab *s, upb_strptr base, upb_strptr symbol) { - upb_rwlock_rdlock(&s->lock); - symtab_ent *e = resolve(&s->symtab, base, symbol); - upb_def *ret = NULL; - if(e) { - ret = e->def; - upb_def_ref(ret); - } - upb_rwlock_unlock(&s->lock); - return ret; -} - +// Performs a pass over the type graph to find all cycles that include m. static bool upb_symtab_findcycles(upb_msgdef *m, int search_depth, upb_status *status) { if(search_depth > UPB_MAX_TYPE_DEPTH) { - // There are many situations in upb where we recurse over the type tree - // (like for example, right now) and an absurdly deep tree could cause us - // to stack overflow on systems with very limited stacks. + // We have found a non-cyclic path from the base of the type tree that + // exceeds the maximum allowed depth. There are many situations in upb + // where we recurse over the type tree (like for example, right now) and an + // absurdly deep tree could cause us to stack overflow on systems with very + // limited stacks. upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was found at " "depth %d in the type graph, which exceeds the maximum type " "depth of %d.", UPB_UPCAST(m)->fqname, search_depth, @@ -709,9 +545,8 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef, { // Build a table, for duplicate detection and name resolution. - - // Attempt to resolve all references. { // Write lock scope. + // Attempt to resolve all references. symtab_ent *e; for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { upb_msgdef *m = upb_dyncast_msgdef(e->def); @@ -742,11 +577,8 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef, for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { upb_msgdef *m = upb_dyncast_msgdef(e->def); if(!m) continue; - - // Do an initial pass over the graph to check that there are no cycles - // longer than the maximum length. We also mark all cyclic defs as such, - // and decrement refs on cyclic defs. - find_cycles(m, 0, status); + // The findcycles() call will decrement the external refcount of the + upb_symtab_findcycles(m, 0, status); upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; cycle_ref_or_unref(m, NULL, open_defs, 0, true); } @@ -755,6 +587,81 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef, } } +/* upb_symtab *****************************************************************/ + +upb_symtab *upb_symtab_new() +{ + upb_symtab *s = malloc(sizeof(*s)); + upb_atomic_refcount_init(&s->refcount, 1); + upb_rwlock_init(&s->lock); + upb_strtable_init(&s->symtab, 16, sizeof(symtab_ent)); + return s; +} + +static void free_symtab(upb_strtable *t) +{ + symtab_ent *e; + for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) + upb_def_unref(e->def); + upb_strtable_free(t); +} + +void _upb_symtab_free(upb_symtab *s) +{ + free_symtab(&s->symtab); + free_symtab(&s->psymtab); + upb_rwlock_destroy(&s->lock); + free(s); +} + +upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type) +{ + upb_rwlock_rdlock(&s->lock); + int total = upb_strtable_count(&s->symtab); + // We may only use part of this, depending on how many symbols are of the + // correct type. + upb_def **defs = malloc(sizeof(*defs) * total); + symtab_ent *e = upb_strtable_begin(&s->symtab); + int i = 0; + for(; e; e = upb_strtable_next(&s->symtab, &e->e)) { + upb_def *def = e->def; + assert(def); + if(type == UPB_DEF_ANY || def->type == type) + defs[i++] = def; + } + upb_rwlock_unlock(&s->lock); + *count = i; + for(i = 0; i < *count; i++) + upb_def_ref(defs[i]); + return defs; +} + +upb_def *upb_symtab_lookup(upb_symtab *s, upb_strptr sym) +{ + upb_rwlock_rdlock(&s->lock); + symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); + upb_def *ret = NULL; + if(e) { + ret = e->def; + upb_def_ref(ret); + } + upb_rwlock_unlock(&s->lock); + return ret; +} + + +upb_def *upb_symtab_resolve(upb_symtab *s, upb_strptr base, upb_strptr symbol) { + upb_rwlock_rdlock(&s->lock); + symtab_ent *e = resolve(&s->symtab, base, symbol); + upb_def *ret = NULL; + if(e) { + ret = e->def; + upb_def_ref(ret); + } + upb_rwlock_unlock(&s->lock); + return ret; +} + void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) { } diff --git a/src/upb_def.h b/src/upb_def.h index 0b8f114..3063386 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -27,6 +27,7 @@ #define UPB_DEF_H_ #include "upb_atomic.h" +#include "upb_srcsink.h" #include "upb_table.h" #ifdef __cplusplus @@ -37,7 +38,7 @@ extern "C" { // All the different kind of defs we support. These correspond 1:1 with // declarations in a .proto file. -enum upb_def_type { +typedef enum { UPB_DEF_MSG = 0, UPB_DEF_ENUM, UPB_DEF_SVC, @@ -47,7 +48,7 @@ enum upb_def_type { // For specifying that defs of any type are requsted from getdefs. UPB_DEF_ANY = -1 -}; +} upb_def_type; // This typedef is more space-efficient than declaring an enum var directly. typedef int8_t upb_def_type_t; @@ -87,20 +88,22 @@ INLINE void upb_def_unref(upb_def *def) { // It is also reference-counted. typedef struct _upb_fielddef { upb_atomic_refcount_t refcount; + upb_string *name; + upb_field_number_t number; upb_field_type_t type; upb_label_t label; - upb_field_number_t number; - upb_string *name; upb_value default_value; + // For the case of an enum or a submessage, points to the def for that type. + upb_def *def; + + // True if we own a ref on "def" (above). This is true unless this edge is + // part of a cycle. + bool owned; + // These are set only when this fielddef is part of a msgdef. uint32_t byte_offset; // Where in a upb_msg to find the data. upb_field_count_t field_index; // Indicates set bit. - - // For the case of an enum or a submessage, points to the def for that type. - // We own a ref on this def. - bool owned; - upb_def *def; } upb_fielddef; // A variety of tests about the type of a field. diff --git a/src/upb_encoder.h b/src/upb_encoder.h index 963af8b..e879b0b 100644 --- a/src/upb_encoder.h +++ b/src/upb_encoder.h @@ -14,7 +14,7 @@ #define UPB_ENCODER_H_ #include "upb.h" -#include "upb_sink.h" +#include "upb_srcsink.h" #ifdef __cplusplus extern "C" { diff --git a/src/upb_srcsink.h b/src/upb_srcsink.h index 199149d..83b4ef0 100644 --- a/src/upb_srcsink.h +++ b/src/upb_srcsink.h @@ -19,13 +19,15 @@ #ifndef UPB_SRCSINK_H #define UPB_SRCSINK_H -#include "upb_def.h" #include "upb_srcsink_vtbl.h" #ifdef __cplusplus extern "C" { #endif +// Forward-declare. We can't include upb_def.h; it would be circular. +struct _upb_fielddef; + // Note! The "eof" flags work like feof() in C; they cannot report end-of-file // until a read has failed due to eof. They cannot preemptively tell you that // the next call will fail due to eof. Since these are the semantics that C @@ -37,7 +39,7 @@ extern "C" { // Retrieves the fielddef for the next field in the stream. Returns NULL on // error or end-of-stream. -upb_fielddef *upb_src_getdef(upb_src *src); +struct _upb_fielddef *upb_src_getdef(upb_src *src); // Retrieves and stores the next value in "val". For string types the caller // does not own a ref to the returned type; you must ref it yourself if you @@ -62,7 +64,7 @@ INLINE bool upb_src_eof(upb_src *src) { return src->eof; } /* upb_sink *******************************************************************/ // Puts the given fielddef into the stream. -bool upb_sink_putdef(upb_sink *sink, upb_fielddef *def); +bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); // Puts the given value into the stream. bool upb_sink_putval(upb_sink *sink, upb_value val); diff --git a/src/upb_srcsink_vtbl.h b/src/upb_srcsink_vtbl.h index 45c5825..0ec45d2 100644 --- a/src/upb_srcsink_vtbl.h +++ b/src/upb_srcsink_vtbl.h @@ -11,7 +11,7 @@ #ifndef UPB_SRCSINK_VTBL_H_ #define UPB_SRCSINK_VTBL_H_ -#include "upb_def.h" +#include "upb.h" #ifdef __cplusplus extern "C" { @@ -27,13 +27,13 @@ struct upb_bytesink; typedef struct upb_bytesink upb_bytesink; // Typedefs for function pointers to all of the virtual functions. -typedef upb_fielddef (*upb_src_getdef_fptr)(upb_src *src); +typedef struct _upb_fielddef (*upb_src_getdef_fptr)(upb_src *src); typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); typedef bool (*upb_src_skipval_fptr)(upb_src *src); typedef bool (*upb_src_startmsg_fptr)(upb_src *src); typedef bool (*upb_src_endmsg_fptr)(upb_src *src); -typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, upb_fielddef *def); +typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); diff --git a/src/upb_string.h b/src/upb_string.h index eab7f54..7b9f8db 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -70,7 +70,7 @@ INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } // upb_string_endread is called(). No other functions may be called on the // string during this window except upb_string_len(). INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } -INLINE void upb_string_endread(upb_string *str) {} +INLINE void upb_string_endread(upb_string *str) { (void)str; } // Attempts to recycle the string "str" so it may be reused and have different // data written to it. The returned string is either "str" if it could be -- cgit v1.2.3 From 229fcf7119b06385eb6440e54916f871b8bbc323 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 27 Jun 2010 13:19:25 -0700 Subject: upb_def compiles again, though with lots of #if 0. --- descriptor/descriptor_const.h | 128 ++++++++++++ src/upb_def.c | 471 +++++++++++++++++++++++++----------------- src/upb_srcsink.h | 13 ++ src/upb_string.h | 6 + 4 files changed, 427 insertions(+), 191 deletions(-) diff --git a/descriptor/descriptor_const.h b/descriptor/descriptor_const.h index 36af6f9..42205a6 100644 --- a/descriptor/descriptor_const.h +++ b/descriptor/descriptor_const.h @@ -46,6 +46,134 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { GOOGLE_PROTOBUF_FILEOPTIONS_SPEED = 1 } google_protobuf_FileOptions_OptimizeMode; +/* Constants for field names and numbers. */ + +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART_FIELDNUM 1 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART_FIELDNAME "name_part" +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION_FIELDNUM 2 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION_FIELDNAME "is_extension" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM 1 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNAME "name" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM 2 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNAME "field" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM 3 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNAME "nested_type" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM 4 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNAME "enum_type" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE_FIELDNUM 5 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE_FIELDNAME "extension_range" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_FIELDNUM 6 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_FIELDNAME "extension" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS_FIELDNUM 7 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS_FIELDNAME "options" +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM 1 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNAME "name" +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM 2 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNAME "value" +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS_FIELDNUM 3 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS_FIELDNAME "options" +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME_FIELDNUM 2 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME_FIELDNAME "name" +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE_FIELDNUM 3 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE_FIELDNAME "identifier_value" +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE_FIELDNUM 4 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE_FIELDNAME "positive_int_value" +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE_FIELDNUM 5 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE_FIELDNAME "negative_int_value" +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE_FIELDNUM 6 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE_FIELDNAME "double_value" +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE_FIELDNUM 7 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE_FIELDNAME "string_value" +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM 1 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNAME "name" +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM 2 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNAME "package" +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY_FIELDNUM 3 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY_FIELDNAME "dependency" +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM 4 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNAME "message_type" +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM 5 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNAME "enum_type" +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE_FIELDNUM 6 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE_FIELDNAME "service" +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION_FIELDNUM 7 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION_FIELDNAME "extension" +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS_FIELDNUM 8 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS_FIELDNAME "options" +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME_FIELDNUM 1 +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME_FIELDNAME "name" +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE_FIELDNUM 2 +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE_FIELDNAME "input_type" +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE_FIELDNUM 3 +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE_FIELDNAME "output_type" +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS_FIELDNUM 4 +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS_FIELDNAME "options" +#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION_FIELDNUM 999 +#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION_FIELDNAME "uninterpreted_option" +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM 1 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNAME "name" +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM 2 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNAME "number" +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS_FIELDNUM 3 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS_FIELDNAME "options" +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME_FIELDNUM 1 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME_FIELDNAME "name" +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD_FIELDNUM 2 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD_FIELDNAME "method" +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS_FIELDNUM 3 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS_FIELDNAME "options" +#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM 1 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNAME "file" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START_FIELDNUM 1 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START_FIELDNAME "start" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END_FIELDNUM 2 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END_FIELDNAME "end" +#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_FIELDNUM 1 +#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_FIELDNAME "ctype" +#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED_FIELDNUM 2 +#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED_FIELDNAME "packed" +#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED_FIELDNUM 3 +#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED_FIELDNAME "deprecated" +#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY_FIELDNUM 9 +#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY_FIELDNAME "experimental_map_key" +#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_FIELDNUM 999 +#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_FIELDNAME "uninterpreted_option" +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE_FIELDNUM 1 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE_FIELDNAME "java_package" +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME_FIELDNUM 8 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME_FIELDNAME "java_outer_classname" +#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR_FIELDNUM 9 +#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR_FIELDNAME "optimize_for" +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES_FIELDNUM 10 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES_FIELDNAME "java_multiple_files" +#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_FIELDNUM 999 +#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_FIELDNAME "uninterpreted_option" +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT_FIELDNUM 1 +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT_FIELDNAME "message_set_wire_format" +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION_FIELDNUM 999 +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION_FIELDNAME "uninterpreted_option" +#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION_FIELDNUM 999 +#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION_FIELDNAME "uninterpreted_option" +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM 1 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNAME "name" +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE_FIELDNUM 2 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE_FIELDNAME "extendee" +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM 3 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNAME "number" +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM 4 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNAME "label" +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM 5 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNAME "type" +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM 6 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNAME "type_name" +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_FIELDNUM 7 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_FIELDNAME "default_value" +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS_FIELDNUM 8 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS_FIELDNAME "options" +#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION_FIELDNUM 999 +#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION_FIELDNAME "uninterpreted_option" +#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION_FIELDNUM 999 +#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION_FIELDNAME "uninterpreted_option" #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/upb_def.c b/src/upb_def.c index 39ed546..e78fb07 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -10,11 +10,8 @@ /* Rounds p up to the next multiple of t. */ #define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t)))) - -static int div_round_up(int numerator, int denominator) { - /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ - return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; -} +#define CHECKSRC(x) if(!(x)) goto src_err +#define CHECK(x) if(!(x)) goto err // A little dynamic array for storing a growing list of upb_defs. typedef struct { @@ -39,6 +36,27 @@ static void upb_deflist_push(upb_deflist *l, upb_def *d) { l->defs[l->len++] = d; } +/* Joins strings together, for example: + * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" + * join("", "Baz") -> "Baz" + * Caller owns a ref on the returned string. */ +static upb_string *upb_join(upb_string *base, upb_string *name) { + upb_string *joined = upb_strdup(base); + upb_strlen_t len = upb_string_len(joined); + if(len > 0) { + upb_string_getrwbuf(joined, len + 1)[len] = UPB_SYMBOL_SEPARATOR; + } + upb_strcat(joined, name); + return joined; +} + + +static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { + (void)l; + (void)str; + (void)start; +} + /* upb_def ********************************************************************/ // Defs are reference counted, but can have cycles when types are @@ -77,18 +95,18 @@ static void upb_deflist_push(upb_deflist *l, upb_def *d) { // This algorithm is relatively cheap, since it only requires extra work when // the external refcount on a cyclic type transitions from 0->1 or 1->0. -static void msgdef_free(upb_msgdef *m); -static void enumdef_free(upb_enumdef *e); -static void unresolveddef_free(struct _upb_unresolveddef *u); +static void upb_msgdef_free(upb_msgdef *m); +static void upb_enumdef_free(upb_enumdef *e); +static void upb_unresolveddef_free(struct _upb_unresolveddef *u); static void def_free(upb_def *def) { switch(def->type) { case UPB_DEF_MSG: - msgdef_free(upb_downcast_msgdef(def)); + upb_msgdef_free(upb_downcast_msgdef(def)); break; case UPB_DEF_ENUM: - enumdef_free(upb_downcast_enumdef(def)); + upb_enumdef_free(upb_downcast_enumdef(def)); break; case UPB_DEF_SVC: assert(false); /* Unimplemented. */ @@ -97,7 +115,7 @@ static void def_free(upb_def *def) assert(false); /* Unimplemented. */ break; case UPB_DEF_UNRESOLVED: - unresolveddef_free(upb_downcast_unresolveddef(def)); + upb_unresolveddef_free(upb_downcast_unresolveddef(def)); break; default: assert(false); @@ -169,11 +187,11 @@ void _upb_def_cyclic_ref(upb_def *def) { cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true); } -static void upb_def_init(upb_def *def, upb_def_type type, upb_string *fqname) { +static void upb_def_init(upb_def *def, upb_def_type type) { def->type = type; def->is_cyclic = 0; // We detect this later, after resolving refs. def->search_depth = 0; - def->fqname = fqname; + def->fqname = NULL; upb_atomic_refcount_init(&def->refcount, 1); } @@ -181,6 +199,7 @@ static void upb_def_uninit(upb_def *def) { upb_string_unref(def->fqname); } + /* upb_unresolveddef **********************************************************/ typedef struct _upb_unresolveddef { @@ -189,118 +208,16 @@ typedef struct _upb_unresolveddef { static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { upb_unresolveddef *def = malloc(sizeof(*def)); - upb_def_init(&def->base, UPB_DEF_UNRESOLVED, str); + upb_def_init(&def->base, UPB_DEF_UNRESOLVED); + def->base.fqname = upb_string_getref(str); return def; } -static void unresolveddef_free(struct _upb_unresolveddef *def) { +static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { upb_def_uninit(&def->base); free(def); } -/* upb_fielddef ***************************************************************/ - -static upb_fielddef *fielddef_new(upb_src *src) -{ - upb_fielddef *f = malloc(sizeof(*f)); - f->def = NULL; - f->owned = false; - upb_src_startmsg(src); - upb_fielddef *parsed_f; - while((parsed_f = upb_src_getdef(src))) { - switch(parsed_f->number) { - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: - CHECK(upb_src_getval(src, &f->type)); - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: - CHECK(upb_src_getval(src, &f->label)); - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM: - CHECK(upb_src_getval(src, &f->number)); - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: - CHECK(upb_src_getval(src, &f->name)); - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPENAME_FIELDNUM: - CHECK(upb_src_getval(src, &f->type_name)); - f->def = UPB_UPCAST(upb_unresolveddef_new(fd->type_name)); - f->owned = true; - } - } - upb_src_endmsg(src); - assert((f->def != NULL) == upb_hasdef(f)); - return f; -} - -static void fielddef_free(upb_fielddef *f) { - upb_string_unref(f->name); - if(upb_hasdef(f) && f->owned) { - upb_def_unref(f->def); - } - free(f); -} - -/* upb_msgdef *****************************************************************/ - -// Processes a google.protobuf.DescriptorProto, adding defs to "deflist." -static void upb_addmsg(upb_src *src, upb_deflist *deflist, upb_status *status) -{ - upb_msgdef *m = malloc(sizeof(*m)); - upb_def_init(&m->base, UPB_DEF_MSG); - upb_atomic_refcount_init(&m->cycle_refcount, 0); - upb_inttable_init(&m->itof, num_fields, sizeof(upb_itof_ent)); - upb_strtable_init(&m->ntof, num_fields, sizeof(upb_ntof_ent)); - m->num_fields = 0; - m->fields = malloc(sizeof(upb_fielddef) * num_fields); - int32_t start_count = defs->len; - - CHECK(upb_src_startmsg(src)); - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->field_number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: - upb_string_unref(m->fqname); - CHECK(upb_src_getval(src, &m->fqname)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_FIELD_NUM: - CHECK(upb_addfield(src, m)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NESTED_TYPE_NUM: - CHECK(upb_addmsg(src, deflist)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_NUM: - CHECK(upb_addenum(src, deflist)); - break; - default: - // TODO: extensions. - upb_src_skipval(src); - } - } - CHECK(upb_src_eof(src) && upb_src_endmsg(src)); - if(!m->fqname) { - upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); - return false; - } - upb_qualify(defs, m->fqname, start_count); - upb_deflist_push(m); - return true; -} - -static void msgdef_free(upb_msgdef *m) -{ - for (upb_field_count_t i = 0; i < m->num_fields; i++) - fielddef_uninit(&m->fields[i]); - free(m->fields); - upb_strtable_free(&m->ntof); - upb_inttable_free(&m->itof); - upb_def_uninit(&m->base); - free(m); -} - -static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) { - (void)m; - if(f->owned) upb_def_unref(f->def); - f->def = def; - // We will later make the ref unowned if it is a part of a cycle. - f->owned = true; - upb_def_ref(def); -} /* upb_enumdef ****************************************************************/ @@ -311,42 +228,60 @@ typedef struct { typedef struct { upb_inttable_entry e; - upb_strptr string; + upb_string *string; } iton_ent; -static void upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) +static void upb_enumdef_free(upb_enumdef *e) { + upb_strtable_free(&e->ntoi); + upb_inttable_free(&e->iton); + upb_def_uninit(&e->base); + free(e); +} + +static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) { - upb_src_startmsg(src); int32_t number = -1; upb_string *name = NULL; + upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { - switch(f->field_number) { + switch(f->number) { case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: - upb_src_getval(src, &number); + CHECKSRC(upb_src_getint32(src, &number)); break; - case GOOGLE_PROTOBUF_ENUMVALUDESCRIPTORPROTO_NAME_FIELDNUM: - upb_src_getval(src, &name); + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: + CHECKSRC(upb_src_getstr(src, &name)); break; default: - upb_src_skipval(src); + CHECKSRC(upb_src_skipval(src)); + break; } } - upb_src_endmsg(src); - ntoi_ent ntoi_ent = {{value->name, 0}, value->number}; - iton_ent iton_ent = {{value->number, 0}, value->name}; + + if(name == NULL || number == -1) { + upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); + goto err; + } + ntoi_ent ntoi_ent = {{name, 0}, number}; + iton_ent iton_ent = {{number, 0}, name}; upb_strtable_insert(&e->ntoi, &ntoi_ent.e); upb_inttable_insert(&e->iton, &iton_ent.e); + return true; + +src_err: + upb_copyerr(status, upb_src_status(src)); +err: + return false; } -static void upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) +static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) { upb_enumdef *e = malloc(sizeof(*e)); - upb_def_init(&e->base, UPB_DEF_ENUM, fqname); + upb_def_init(&e->base, UPB_DEF_ENUM); upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent)); upb_inttable_init(&e->iton, 0, sizeof(iton_ent)); upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { - switch(f->field_number) { + switch(f->number) { case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: CHECK(upb_addenum_val(src, e, status)); break; @@ -355,14 +290,12 @@ static void upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) break; } } - upb_deflist_push(e); -} + upb_deflist_push(defs, UPB_UPCAST(e)); + return true; -static void enumdef_free(upb_enumdef *e) { - upb_strtable_free(&e->ntoi); - upb_inttable_free(&e->iton); - upb_def_uninit(&e->base); - free(e); +err: + upb_enumdef_free(e); + return false; } static void fill_iter(upb_enum_iter *iter, ntoi_ent *ent) { @@ -390,38 +323,190 @@ bool upb_enum_done(upb_enum_iter *iter) { return iter->state == NULL; } + +/* upb_fielddef ***************************************************************/ + +static void upb_fielddef_free(upb_fielddef *f) { + free(f); +} + +static void upb_fielddef_uninit(upb_fielddef *f) { + upb_string_unref(f->name); + if(upb_hasdef(f) && f->owned) { + upb_def_unref(f->def); + } +} + +static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) +{ + upb_fielddef *f = malloc(sizeof(*f)); + f->def = NULL; + f->owned = false; + upb_fielddef *parsed_f; + int32_t tmp; + while((parsed_f = upb_src_getdef(src))) { + switch(parsed_f->number) { + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: + CHECKSRC(upb_src_getint32(src, &tmp)); + f->type = tmp; + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: + CHECKSRC(upb_src_getint32(src, &tmp)); + f->label = tmp; + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM: + CHECKSRC(upb_src_getint32(src, &tmp)); + f->number = tmp; + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: + CHECKSRC(upb_src_getstr(src, &f->name)); + f->name = upb_string_getref(f->name); + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { + upb_string *str; + CHECKSRC(upb_src_getstr(src, &str)); + if(f->def) upb_def_unref(f->def); + f->def = UPB_UPCAST(upb_unresolveddef_new(str)); + f->owned = true; + break; + } + } + } + CHECKSRC(upb_src_eof(src)); + // TODO: verify that all required fields were present. + assert((f->def != NULL) == upb_hasdef(f)); + + // Field was successfully read, add it as a field of the msgdef. + upb_itof_ent itof_ent = {{f->number, 0}, f}; + upb_ntof_ent ntof_ent = {{f->name, 0}, f}; + upb_inttable_insert(&m->itof, &itof_ent.e); + upb_strtable_insert(&m->ntof, &ntof_ent.e); + return true; + +src_err: + upb_copyerr(status, upb_src_status(src)); + upb_fielddef_free(f); + return false; +} + + +/* upb_msgdef *****************************************************************/ + +// Processes a google.protobuf.DescriptorProto, adding defs to "defs." +static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) +{ + upb_msgdef *m = malloc(sizeof(*m)); + upb_def_init(&m->base, UPB_DEF_MSG); + upb_atomic_refcount_init(&m->cycle_refcount, 0); + upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); + upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); + int32_t start_count = defs->len; + + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->number) { + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: + upb_string_unref(m->base.fqname); + CHECKSRC(upb_src_getstr(src, &m->base.fqname)); + m->base.fqname = upb_string_getref(m->base.fqname); + break; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: + CHECKSRC(upb_src_startmsg(src)); + CHECK(upb_addfield(src, m, status)); + CHECKSRC(upb_src_endmsg(src)); + break; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: + CHECKSRC(upb_src_startmsg(src)); + CHECK(upb_addmsg(src, defs, status)); + CHECKSRC(upb_src_endmsg(src)); + break; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + CHECKSRC(upb_src_startmsg(src)); + CHECK(upb_addenum(src, defs, status)); + CHECKSRC(upb_src_endmsg(src)); + break; + default: + // TODO: extensions. + CHECKSRC(upb_src_skipval(src)); + } + } + CHECK(upb_src_eof(src)); + if(!m->base.fqname) { + upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); + goto err; + } + upb_deflist_qualify(defs, m->base.fqname, start_count); + upb_deflist_push(defs, UPB_UPCAST(m)); + return true; + +src_err: + upb_copyerr(status, upb_src_status(src)); +err: + upb_msgdef_free(m); + return false; +} + +static void upb_msgdef_free(upb_msgdef *m) +{ + for (upb_field_count_t i = 0; i < m->num_fields; i++) + upb_fielddef_uninit(&m->fields[i]); + free(m->fields); + upb_strtable_free(&m->ntof); + upb_inttable_free(&m->itof); + upb_def_uninit(&m->base); + free(m); +} + +static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) { + (void)m; + if(f->owned) upb_def_unref(f->def); + f->def = def; + // We will later make the ref unowned if it is a part of a cycle. + f->owned = true; + upb_def_ref(def); +} + + /* symtab internal ***********************************************************/ // Processes a google.protobuf.FileDescriptorProto, adding the defs to "defs". -static void upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) +static bool upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) { upb_string *package = NULL; int32_t start_count = defs->len; upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { - switch(f->field_number) { + switch(f->number) { case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: upb_string_unref(package); - CHECK(upb_src_getval(src, &package)); + CHECKSRC(upb_src_getstr(src, &package)); + package = upb_string_getref(package); break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_NUM: - CHECK(upb_startmsg(src)); - CHECK(upb_addmsg(src, defs)); - CHECK(upb_endmsg(src)); + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: + CHECKSRC(upb_src_startmsg(src)); + CHECK(upb_addmsg(src, defs, status)); + CHECKSRC(upb_src_endmsg(src)); break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_NUM: - CHECK(upb_startmsg(src)); - CHECK(upb_addenum(src, defs)); - CHECK(upb_endmsg(src)); + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + CHECKSRC(upb_src_startmsg(src)); + CHECK(upb_addenum(src, defs, status)); + CHECKSRC(upb_src_endmsg(src)); break; default: // TODO: services and extensions. - upb_src_skipval(src); + CHECKSRC(upb_src_skipval(src)); } } CHECK(upb_src_eof(src)); - upb_qualify(deflist, package, start_count); + upb_deflist_qualify(defs, package, start_count); + upb_string_unref(package); + return true; + +src_err: + upb_copyerr(status, upb_src_status(src)); +err: upb_string_unref(package); + return false; } /* Search for a character in a string, in reverse. */ @@ -437,23 +522,26 @@ typedef struct { upb_def *def; } upb_symtab_ent; -/* Given a symbol and the base symbol inside which it is defined, find the - * symbol's definition in t. */ -static symtab_ent *resolve(upb_strtable *t, upb_strptr base, upb_strptr symbol) +// Given a symbol and the base symbol inside which it is defined, find the +// symbol's definition in t. +static upb_symtab_ent *upb_resolve(upb_strtable *t, + upb_string *base, + upb_string *sym) { - if(upb_strlen(base) + upb_strlen(symbol) + 1 >= UPB_SYMBOL_MAXLEN || +#if 0 + if(upb_strlen(base) + upb_string_len(sym) + 1 >= UPB_SYMBOL_MAXLEN || upb_strlen(symbol) == 0) return NULL; if(upb_string_getrobuf(symbol)[0] == UPB_SYMBOL_SEPARATOR) { // Symbols starting with '.' are absolute, so we do a single lookup. // Slice to omit the leading '.' - upb_strptr sym_str = upb_strslice(symbol, 1, INT_MAX); + upb_string *sym_str = upb_strslice(symbol, 1, INT_MAX); symtab_ent *e = upb_strtable_lookup(t, sym_str); upb_string_unref(sym_str); return e; } else { // Remove components from base until we find an entry or run out. - upb_strptr sym_str = upb_string_new(); + upb_string *sym_str = upb_string_new(); int baselen = upb_strlen(base); while(1) { // sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + symbol @@ -465,31 +553,22 @@ static symtab_ent *resolve(upb_strtable *t, upb_strptr base, upb_strptr symbol) symtab_ent *e = upb_strtable_lookup(t, sym_str); if (e) return e; - else if(baselen == 0) return NULL; /* No more scopes to try. */ + else if(baselen == 0) return NULL; // No more scopes to try. baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen); } } -} - -/* Joins strings together, for example: - * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" - * join("", "Baz") -> "Baz" - * Caller owns a ref on the returned string. */ -static upb_string *upb_join(upb_string *base, upb_string *name) { - upb_strptr joined = upb_strdup(base); - upb_strlen_t len = upb_strlen(joined); - if(len > 0) { - upb_string_getrwbuf(joined, len + 1)[len] = UPB_SYMBOL_SEPARATOR; - } - upb_strcat(joined, name); - return joined; +#endif + (void)t; + (void)base; + (void)sym; + return NULL; } // Performs a pass over the type graph to find all cycles that include m. -static bool upb_symtab_findcycles(upb_msgdef *m, int search_depth, upb_status *status) +static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) { - if(search_depth > UPB_MAX_TYPE_DEPTH) { + if(depth > UPB_MAX_TYPE_DEPTH) { // We have found a non-cyclic path from the base of the type tree that // exceeds the maximum allowed depth. There are many situations in upb // where we recurse over the type tree (like for example, right now) and an @@ -497,12 +576,12 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int search_depth, upb_status *s // limited stacks. upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was found at " "depth %d in the type graph, which exceeds the maximum type " - "depth of %d.", UPB_UPCAST(m)->fqname, search_depth, + "depth of %d.", UPB_UPCAST(m)->fqname, depth, UPB_MAX_TYPE_DEPTH); return false; } else if(UPB_UPCAST(m)->search_depth == 1) { // Cycle! - int cycle_len = search_depth - 1; + int cycle_len = depth - 1; if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) { upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was involved " "in a cycle of length %d, which exceeds the maximum type " @@ -515,14 +594,14 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int search_depth, upb_status *s // We'll find it when we call find_cycles() on this node directly. return false; } else { - UPB_UPCAST(m)->search_depth = ++search_depth; + UPB_UPCAST(m)->search_depth = ++depth; bool cycle_found = false; for(upb_field_count_t i = 0; i < m->num_fields; i++) { upb_fielddef *f = &m->fields[i]; if(!upb_issubmsg(f)) continue; upb_def *sub_def = f->def; upb_msgdef *sub_m = upb_downcast_msgdef(sub_def); - if(find_cycles(sub_m, search_depth, status)) { + if(upb_symtab_findcycles(sub_m, depth, status)) { cycle_found = true; UPB_UPCAST(m)->is_cyclic = true; if(f->owned) { @@ -543,20 +622,26 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int search_depth, upb_status *s bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef, upb_status *status) { + (void)s; + (void)defs; + (void)allow_redef; + (void)status; + return true; +#if 0 // Build a table, for duplicate detection and name resolution. { // Write lock scope. // Attempt to resolve all references. - symtab_ent *e; + upb_symtab_ent *e; for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { upb_msgdef *m = upb_dyncast_msgdef(e->def); if(!m) continue; - upb_strptr base = e->e.key; + upb_string *base = e->e.key; for(upb_field_count_t i = 0; i < m->num_fields; i++) { upb_fielddef *f = &m->fields[i]; if(!upb_hasdef(f)) continue; // No resolving necessary. - upb_strptr name = upb_downcast_unresolveddef(f->def)->name; - symtab_ent *found = resolve(existingdefs, base, name); + upb_string *name = upb_downcast_unresolveddef(f->def)->name; + upb_symtab_ent *found = resolve(existingdefs, base, name); if(!found) found = resolve(addto, base, name); upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; if(!found) { @@ -585,8 +670,10 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef, // Add all defs to the symtab. } +#endif } + /* upb_symtab *****************************************************************/ upb_symtab *upb_symtab_new() @@ -594,13 +681,13 @@ upb_symtab *upb_symtab_new() upb_symtab *s = malloc(sizeof(*s)); upb_atomic_refcount_init(&s->refcount, 1); upb_rwlock_init(&s->lock); - upb_strtable_init(&s->symtab, 16, sizeof(symtab_ent)); + upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent)); return s; } static void free_symtab(upb_strtable *t) { - symtab_ent *e; + upb_symtab_ent *e; for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) upb_def_unref(e->def); upb_strtable_free(t); @@ -621,7 +708,7 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type) // We may only use part of this, depending on how many symbols are of the // correct type. upb_def **defs = malloc(sizeof(*defs) * total); - symtab_ent *e = upb_strtable_begin(&s->symtab); + upb_symtab_ent *e = upb_strtable_begin(&s->symtab); int i = 0; for(; e; e = upb_strtable_next(&s->symtab, &e->e)) { upb_def *def = e->def; @@ -636,10 +723,10 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type) return defs; } -upb_def *upb_symtab_lookup(upb_symtab *s, upb_strptr sym) +upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) { upb_rwlock_rdlock(&s->lock); - symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); + upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); upb_def *ret = NULL; if(e) { ret = e->def; @@ -650,9 +737,9 @@ upb_def *upb_symtab_lookup(upb_symtab *s, upb_strptr sym) } -upb_def *upb_symtab_resolve(upb_symtab *s, upb_strptr base, upb_strptr symbol) { +upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) { upb_rwlock_rdlock(&s->lock); - symtab_ent *e = resolve(&s->symtab, base, symbol); + upb_symtab_ent *e = upb_resolve(&s->symtab, base, symbol); upb_def *ret = NULL; if(e) { ret = e->def; @@ -662,11 +749,12 @@ upb_def *upb_symtab_resolve(upb_symtab *s, upb_strptr base, upb_strptr symbol) { return ret; } +#if 0 void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) { } -void upb_symtab_add_desc(upb_symtab *s, upb_strptr desc, upb_status *status) +void upb_symtab_add_desc(upb_symtab *s, upb_string *desc, upb_status *status) { upb_msg *fds = upb_msg_new(s->fds_msgdef); upb_msg_decodestr(fds, s->fds_msgdef, desc, status); @@ -675,3 +763,4 @@ void upb_symtab_add_desc(upb_symtab *s, upb_strptr desc, upb_status *status) upb_msg_unref(fds, s->fds_msgdef); return; } +#endif diff --git a/src/upb_srcsink.h b/src/upb_srcsink.h index 83b4ef0..dc73613 100644 --- a/src/upb_srcsink.h +++ b/src/upb_srcsink.h @@ -61,6 +61,19 @@ bool upb_src_endmsg(upb_src *src); INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; } INLINE bool upb_src_eof(upb_src *src) { return src->eof; } +// The following functions are equivalent to upb_src_getval(), but take +// pointers to specific types. In debug mode this may check that the type +// is compatible with the type being read. This check will *not* be performed +// in non-debug mode, and if you get the type wrong the behavior is undefined. +bool upb_src_getbool(upb_src *src, bool *val); +bool upb_src_getint32(upb_src *src, int32_t *val); +bool upb_src_getint64(upb_src *src, int64_t *val); +bool upb_src_getuint32(upb_src *src, uint32_t *val); +bool upb_src_getuint64(upb_src *src, uint64_t *val); +bool upb_src_getfloat(upb_src *src, float *val); +bool upb_src_getdouble(upb_src *src, double *val); +bool upb_src_getstr(upb_src *src, upb_string **val); + /* upb_sink *******************************************************************/ // Puts the given fielddef into the stream. diff --git a/src/upb_string.h b/src/upb_string.h index 7b9f8db..5e5d6bc 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -98,6 +98,12 @@ void upb_string_substr(upb_string *str, upb_string *target_str, void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); void upb_string_detach(upb_string *str); +// Allows using upb_strings in printf, ie: +// upb_strptr str = UPB_STRLIT("Hello, World!\n"); +// printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */ +#define UPB_STRARG(str) upb_strlen(str), upb_string_getrobuf(str) +#define UPB_STRFMT "%.*s" + /* upb_string library functions ***********************************************/ // Named like their counterparts, these are all safe against buffer -- cgit v1.2.3 From a417be0f8780fd596b06159079d7c377500026c6 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 3 Jul 2010 12:34:09 -0700 Subject: More work on upb_def. --- src/upb_def.c | 168 ++++++++++++++++++++++++++++++++++++++----------------- src/upb_def.h | 2 +- src/upb_string.h | 2 +- 3 files changed, 119 insertions(+), 53 deletions(-) diff --git a/src/upb_def.c b/src/upb_def.c index e78fb07..088dd0d 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -8,8 +8,6 @@ #include "descriptor_const.h" #include "upb_def.h" -/* Rounds p up to the next multiple of t. */ -#define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t)))) #define CHECKSRC(x) if(!(x)) goto src_err #define CHECK(x) if(!(x)) goto err @@ -111,9 +109,6 @@ static void def_free(upb_def *def) case UPB_DEF_SVC: assert(false); /* Unimplemented. */ break; - case UPB_DEF_EXT: - assert(false); /* Unimplemented. */ - break; case UPB_DEF_UNRESOLVED: upb_unresolveddef_free(upb_downcast_unresolveddef(def)); break; @@ -202,14 +197,20 @@ static void upb_def_uninit(upb_def *def) { /* upb_unresolveddef **********************************************************/ +// Unresolved defs are used as temporary placeholders for a def whose name has +// not been resolved yet. During the name resolution step, all unresolved defs +// are replaced with pointers to the actual def being referenced. typedef struct _upb_unresolveddef { upb_def base; + + // The target type name. This may or may not be fully qualified. + upb_string *name; } upb_unresolveddef; static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { upb_unresolveddef *def = malloc(sizeof(*def)); upb_def_init(&def->base, UPB_DEF_UNRESOLVED); - def->base.fqname = upb_string_getref(str); + def->name = upb_string_getref(str); return def; } @@ -615,6 +616,59 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) } } +// Given a table of pending defs "tmptab" and a table of existing defs "symtab", +// resolves all of the unresolved refs for the defs in tmptab. +bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, + upb_status *status) +{ + upb_symtab_ent *e; + for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) { + upb_msgdef *m = upb_dyncast_msgdef(e->def); + if(!m) continue; + // Type names are resolved relative to the message in which they appear. + upb_string *base = e->e.key; + + for(upb_field_count_t i = 0; i < m->num_fields; i++) { + upb_fielddef *f = &m->fields[i]; + if(!upb_hasdef(f)) continue; // No resolving necessary. + upb_string *name = upb_downcast_unresolveddef(f->def)->name; + + // Resolve from either the tmptab (pending adds) or symtab (existing + // defs). If both exist, prefer the pending add, because it will be + // overwriting the existing def. + upb_symtab_ent *found; + if(!(found = upb_resolve(tmptab, base, name)) && + !(found = upb_resolve(symtab, base, name))) { + upb_seterr(status, UPB_STATUS_ERROR, + "could not resolve symbol '" UPB_STRFMT "'" + " in context '" UPB_STRFMT "'", + UPB_STRARG(name), UPB_STRARG(base)); + return false; + } + + // Check the type of the found def. + upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; + if(found->def->type != expected) { + upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type"); + return false; + } + upb_msgdef_resolve(m, f, found->def); + } + } + + // Deal with type cycles. + for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) { + upb_msgdef *m = upb_dyncast_msgdef(e->def); + if(!m) continue; + // The findcycles() call will decrement the external refcount of the + if(!upb_symtab_findcycles(m, 0, status)) return false; + upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; + cycle_ref_or_unref(m, NULL, open_defs, 0, true); + } + + return true; +} + // Given a list of defs, a list of extensions (in the future), and a flag // indicating whether the new defs can overwrite existing defs in the symtab, // attempts to add the given defs to the symtab. The whole operation either @@ -622,55 +676,67 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef, upb_status *status) { - (void)s; - (void)defs; - (void)allow_redef; - (void)status; - return true; -#if 0 - // Build a table, for duplicate detection and name resolution. - - { // Write lock scope. - // Attempt to resolve all references. - upb_symtab_ent *e; - for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; - upb_string *base = e->e.key; - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; - if(!upb_hasdef(f)) continue; // No resolving necessary. - upb_string *name = upb_downcast_unresolveddef(f->def)->name; - upb_symtab_ent *found = resolve(existingdefs, base, name); - if(!found) found = resolve(addto, base, name); - upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; - if(!found) { - upb_seterr(status, UPB_STATUS_ERROR, - "could not resolve symbol '" UPB_STRFMT "'" - " in context '" UPB_STRFMT "'", - UPB_STRARG(name), UPB_STRARG(base)); - return; - } else if(found->def->type != expected) { - upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type"); - return; - } - upb_msgdef_resolve(m, f, found->def); - } + upb_rwlock_wrlock(&s->lock); + + // Build a table of the defs we mean to add, for duplicate detection and name + // resolution. + upb_strtable tmptab; + upb_strtable_init(&tmptab, defs->len, sizeof(upb_symtab_ent)); + for (uint32_t i = 0; i < defs->len; i++) { + upb_def *def = defs->defs[i]; + upb_symtab_ent e = {{def->fqname, 0}, def}; + + // Redefinition is never allowed within a single FileDescriptorSet. + // Additionally, we only allow overwriting of an existing definition if + // allow_redef is set. + if (upb_strtable_lookup(&tmptab, def->fqname) || + (!allow_redef && upb_strtable_lookup(&s->symtab, def->fqname))) { + upb_seterr(status, UPB_STATUS_ERROR, "Redefinition of symbol " UPB_STRFMT, + UPB_STRARG(def->fqname)); + goto err; } - // Deal with type cycles. - for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; - // The findcycles() call will decrement the external refcount of the - upb_symtab_findcycles(m, 0, status); - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - cycle_ref_or_unref(m, NULL, open_defs, 0, true); - } + // Pass ownership from the deflist to the strtable. + upb_strtable_insert(&tmptab, &e.e); + defs->defs[i] = NULL; + } - // Add all defs to the symtab. + // TODO: process the list of extensions by modifying entries from + // tmptab in-place (copying them from the symtab first if necessary). + + CHECK(upb_resolverefs(&tmptab, &s->symtab, status)); + + // The defs in tmptab have been vetted, and can be added to the symtab + // without causing errors. Now add all tmptab defs to the symtab, + // overwriting (and releasing a ref on) any existing defs with the same + // names. Ownership for tmptab defs passes from the tmptab to the symtab. + upb_symtab_ent *tmptab_e; + for(tmptab_e = upb_strtable_begin(&tmptab); tmptab_e; + tmptab_e = upb_strtable_next(&tmptab, &tmptab_e->e)) { + upb_symtab_ent *symtab_e = + upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname); + if(symtab_e) { + upb_def_unref(symtab_e->def); + symtab_e->def = tmptab_e->def; + } else { + upb_strtable_insert(&s->symtab, &tmptab_e->e); + } } -#endif + + upb_rwlock_unlock(&s->lock); + upb_strtable_free(&tmptab); + return true; + +err: + // We need to free all defs that are in either "defs" or "tmptab." + upb_rwlock_unlock(&s->lock); + for (uint32_t i = 0; i < defs->len; i++) + if(defs->defs[i] != NULL) free(defs->defs[i]); + for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; + e = upb_strtable_next(&tmptab, &e->e)) + upb_def_unref(e->def); + upb_strtable_free(&tmptab); + return false; } diff --git a/src/upb_def.h b/src/upb_def.h index 3063386..033dcde 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -248,7 +248,7 @@ upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym); upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); // "fds" is a upb_src that will yield data from the -// google.protobuf.FileDescriptorSet message type. upb_symtab_add_fds() adds +// google.protobuf.FileDescriptorSet message type. upb_symtab_addfds() adds // all the definitions from the given FileDescriptorSet and adds them to the // symtab. status indicates whether the operation was successful or not, and // the error message (if any). diff --git a/src/upb_string.h b/src/upb_string.h index 5e5d6bc..af1f8ce 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -101,7 +101,7 @@ void upb_string_detach(upb_string *str); // Allows using upb_strings in printf, ie: // upb_strptr str = UPB_STRLIT("Hello, World!\n"); // printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */ -#define UPB_STRARG(str) upb_strlen(str), upb_string_getrobuf(str) +#define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str) #define UPB_STRFMT "%.*s" /* upb_string library functions ***********************************************/ -- cgit v1.2.3 From 956fc84150cb98eaad21a2e2147c96b0fe77f952 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 3 Jul 2010 12:52:51 -0700 Subject: More work on upb_def. --- src/upb_def.c | 69 +++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/src/upb_def.c b/src/upb_def.c index 088dd0d..07781c6 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -24,7 +24,11 @@ static void upb_deflist_init(upb_deflist *l) { l->len = 0; } -static void upb_deflist_uninit(upb_deflist *l) { free(l->defs); } +static void upb_deflist_uninit(upb_deflist *l) { + for(uint32_t i = 0; i < l->len; i++) + if(l->defs[i]) upb_def_unref(l->defs[i]); + free(l->defs); +} static void upb_deflist_push(upb_deflist *l, upb_def *d) { if(l->len == l->size) { @@ -97,7 +101,7 @@ static void upb_msgdef_free(upb_msgdef *m); static void upb_enumdef_free(upb_enumdef *e); static void upb_unresolveddef_free(struct _upb_unresolveddef *u); -static void def_free(upb_def *def) +static void upb_def_free(upb_def *def) { switch(def->type) { case UPB_DEF_MSG: @@ -125,9 +129,9 @@ static void def_free(upb_def *def) // search so we can stop the search if we detect a cycles that do not involve // cycle_base. We can't color the nodes as we go by writing to a member of the // def, because another thread could be performing the search concurrently. -static int cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base, - upb_msgdef **open_defs, int num_open_defs, - bool ref) { +static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base, + upb_msgdef **open_defs, int num_open_defs, + bool ref) { bool found = false; for(int i = 0; i < num_open_defs; i++) { if(open_defs[i] == m) { @@ -153,7 +157,7 @@ static int cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base, upb_def *def = f->def; if(upb_issubmsg(f) && def->is_cyclic) { upb_msgdef *sub_m = upb_downcast_msgdef(def); - path_count += cycle_ref_or_unref(sub_m, cycle_base, open_defs, + path_count += upb_cycle_ref_or_unref(sub_m, cycle_base, open_defs, num_open_defs, ref); } } @@ -161,7 +165,7 @@ static int cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base, upb_atomic_add(&m->cycle_refcount, path_count); } else { if(upb_atomic_add(&m->cycle_refcount, -path_count)) - def_free(UPB_UPCAST(m)); + upb_def_free(UPB_UPCAST(m)); } return path_count; } @@ -171,15 +175,15 @@ void _upb_def_reftozero(upb_def *def) { if(def->is_cyclic) { upb_msgdef *m = upb_downcast_msgdef(def); upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - cycle_ref_or_unref(m, NULL, open_defs, 0, false); + upb_cycle_ref_or_unref(m, NULL, open_defs, 0, false); } else { - def_free(def); + upb_def_free(def); } } void _upb_def_cyclic_ref(upb_def *def) { upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true); + upb_cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true); } static void upb_def_init(upb_def *def, upb_def_type type) { @@ -663,7 +667,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, // The findcycles() call will decrement the external refcount of the if(!upb_symtab_findcycles(m, 0, status)) return false; upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - cycle_ref_or_unref(m, NULL, open_defs, 0, true); + upb_cycle_ref_or_unref(m, NULL, open_defs, 0, true); } return true; @@ -728,10 +732,8 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef, return true; err: - // We need to free all defs that are in either "defs" or "tmptab." + // We need to free all defs from "tmptab." upb_rwlock_unlock(&s->lock); - for (uint32_t i = 0; i < defs->len; i++) - if(defs->defs[i] != NULL) free(defs->defs[i]); for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; e = upb_strtable_next(&tmptab, &e->e)) upb_def_unref(e->def); @@ -751,7 +753,7 @@ upb_symtab *upb_symtab_new() return s; } -static void free_symtab(upb_strtable *t) +static void upb_free_symtab(upb_strtable *t) { upb_symtab_ent *e; for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) @@ -761,8 +763,8 @@ static void free_symtab(upb_strtable *t) void _upb_symtab_free(upb_symtab *s) { - free_symtab(&s->symtab); - free_symtab(&s->psymtab); + upb_free_symtab(&s->symtab); + upb_free_symtab(&s->psymtab); upb_rwlock_destroy(&s->lock); free(s); } @@ -815,18 +817,29 @@ upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) return ret; } -#if 0 void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) { -} - -void upb_symtab_add_desc(upb_symtab *s, upb_string *desc, upb_status *status) -{ - upb_msg *fds = upb_msg_new(s->fds_msgdef); - upb_msg_decodestr(fds, s->fds_msgdef, desc, status); - if(!upb_ok(status)) return; - upb_symtab_addfds(s, (google_protobuf_FileDescriptorSet*)fds, status); - upb_msg_unref(fds, s->fds_msgdef); + upb_deflist defs; + upb_deflist_init(&defs); + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: + CHECKSRC(upb_src_startmsg(src)); + CHECK(upb_addfd(src, &defs, status)); + CHECKSRC(upb_src_endmsg(src)); + break; + default: + CHECKSRC(upb_src_skipval(src)); + } + } + CHECKSRC(upb_src_eof(src)); + CHECK(upb_symtab_add_defs(s, &defs, false, status)); + upb_deflist_uninit(&defs); return; + +src_err: + upb_copyerr(status, upb_src_status(src)); +err: + upb_deflist_uninit(&defs); } -#endif -- cgit v1.2.3 From 9c9b4645536fb4b73ff347cb088fda3bc482b022 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 3 Jul 2010 12:57:50 -0700 Subject: Implement upb_deflist_qualify. --- src/upb_def.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/upb_def.c b/src/upb_def.c index 07781c6..b0347ce 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -52,11 +52,14 @@ static upb_string *upb_join(upb_string *base, upb_string *name) { return joined; } - +// Qualify the defname for all defs starting with offset "start" with "str". static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { - (void)l; - (void)str; - (void)start; + for(uint32_t i = start; i < l->len; i++) { + upb_def *def = l->defs[i]; + upb_string *name = def->fqname; + def->fqname = upb_join(str, name); + upb_string_unref(name); + } } /* upb_def ********************************************************************/ -- cgit v1.2.3 From 5ea7f943f9fd70fa1ada694b4532b71af55f8861 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 3 Jul 2010 13:22:23 -0700 Subject: upb_def now theoretically works again. --- src/upb_def.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/src/upb_def.c b/src/upb_def.c index b0347ce..1eaaeef 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -533,44 +533,37 @@ typedef struct { // Given a symbol and the base symbol inside which it is defined, find the // symbol's definition in t. static upb_symtab_ent *upb_resolve(upb_strtable *t, - upb_string *base, - upb_string *sym) + upb_string *base, upb_string *sym) { -#if 0 - if(upb_strlen(base) + upb_string_len(sym) + 1 >= UPB_SYMBOL_MAXLEN || - upb_strlen(symbol) == 0) return NULL; + if(upb_string_len(base) + upb_string_len(sym) + 1 >= UPB_SYMBOL_MAXLEN || + upb_string_len(sym) == 0) return NULL; - if(upb_string_getrobuf(symbol)[0] == UPB_SYMBOL_SEPARATOR) { + if(upb_string_getrobuf(sym)[0] == UPB_SYMBOL_SEPARATOR) { // Symbols starting with '.' are absolute, so we do a single lookup. // Slice to omit the leading '.' - upb_string *sym_str = upb_strslice(symbol, 1, INT_MAX); - symtab_ent *e = upb_strtable_lookup(t, sym_str); + upb_string *sym_str = upb_strslice(sym, 1, upb_string_len(sym) - 1); + upb_symtab_ent *e = upb_strtable_lookup(t, sym_str); upb_string_unref(sym_str); return e; } else { // Remove components from base until we find an entry or run out. upb_string *sym_str = upb_string_new(); - int baselen = upb_strlen(base); + int baselen = upb_string_len(base); while(1) { - // sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + symbol - upb_strlen_t len = baselen + upb_strlen(symbol) + 1; + // sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + sym + upb_strlen_t len = baselen + upb_string_len(sym) + 1; char *buf = upb_string_getrwbuf(sym_str, len); memcpy(buf, upb_string_getrobuf(base), baselen); buf[baselen] = UPB_SYMBOL_SEPARATOR; - memcpy(buf + baselen + 1, upb_string_getrobuf(symbol), upb_strlen(symbol)); + memcpy(buf + baselen + 1, upb_string_getrobuf(sym), upb_string_len(sym)); - symtab_ent *e = upb_strtable_lookup(t, sym_str); + upb_symtab_ent *e = upb_strtable_lookup(t, sym_str); if (e) return e; else if(baselen == 0) return NULL; // No more scopes to try. baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen); } } -#endif - (void)t; - (void)base; - (void)sym; - return NULL; } // Performs a pass over the type graph to find all cycles that include m. -- cgit v1.2.3 From 9d051254b35b2bf838f1753a24fe490fb448e428 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 3 Jul 2010 19:19:36 -0700 Subject: Implemented upb_baredecoder, for bootstrapping. --- src/upb_decoder.c | 11 ++-- src/upb_def.c | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/upb_table.c | 14 ++--- 3 files changed, 167 insertions(+), 12 deletions(-) diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 6f1e437..dd8ffcd 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -316,9 +316,6 @@ bool upb_decoder_skipval(upb_decoder *d); upb_fielddef *upb_decoder_getdef(upb_decoder *d) { - uint32_t key; - upb_wire_type_t wire_type; - // Detect end-of-submessage. if(upb_decoder_offset(d) >= d->top->end_offset) { d->src.eof = true; @@ -328,9 +325,11 @@ upb_fielddef *upb_decoder_getdef(upb_decoder *d) // Handles the packed field case. if(d->field) return d->field; + uint32_t key = 0; again: if(!upb_decoder_readv32(d, &key)) return NULL; - wire_type = key & 0x7; + upb_wire_type_t wire_type = key & 0x7; + int32_t field_number = key >> 3; if(wire_type == UPB_WIRE_TYPE_DELIMITED) { // For delimited wire values we parse the length now, since we need it in @@ -348,7 +347,7 @@ again: } // Look up field by tag number. - upb_fielddef *f = upb_msg_itof(d->top->msgdef, key >> 3); + upb_fielddef *f = upb_msg_itof(d->top->msgdef, field_number); if (!f) { // Unknown field. If/when the upb_src interface supports reporting @@ -557,6 +556,8 @@ upb_decoder *upb_decoder_new(upb_msgdef *msgdef) void upb_decoder_free(upb_decoder *d) { + upb_string_unref(d->str); + if(d->buf) upb_string_unref(d->buf); free(d); } diff --git a/src/upb_def.c b/src/upb_def.c index 1eaaeef..31f14fa 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -839,3 +839,157 @@ src_err: err: upb_deflist_uninit(&defs); } + +/* upb_baredecoder ************************************************************/ + +// upb_baredecoder is a upb_src that can parse a subset of the protocol buffer +// binary format. It is only used for bootstrapping. It can parse without +// having a upb_msgdef, which is why it is useful for bootstrapping the first +// msgdef. On the downside, it does not support: +// +// * having its input span multiple upb_strings. +// * reading any field of the returned upb_fielddef's except f->number. +// * keeping a pointer to the upb_fielddef* and reading it later (the same +// upb_fielddef is reused over and over). +// * detecting errors in the input (we trust that our input is known-good). +// +// It also does not support any of the follow protobuf features: +// * packed fields. +// * groups. +// * zig-zag-encoded types like sint32 and sint64. +// +// If descriptor.proto ever changed to use any of these features, this decoder +// would need to be extended to support them. + +typedef struct { + upb_src src; + upb_string *input; + upb_strlen_t offset; + upb_fielddef field; + upb_wire_type_t wire_type; + upb_strlen_t delimited_len; + upb_strlen_t stack[UPB_MAX_NESTING], *top; + upb_string *str; +} upb_baredecoder; + +static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) +{ + const uint8_t *start = (uint8_t*)upb_string_getrobuf(d->input) + d->offset; + const uint8_t *buf = start; + uint8_t last = 0x80; + uint64_t val = 0; + for(int bitpos = 0; (last & 0x80); buf++, bitpos += 7) + val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos; + d->offset += buf - start; + return val; +} + +static uint32_t upb_baredecoder_readv32(upb_baredecoder *d) +{ + return (uint32_t)upb_baredecoder_readv64(d); // Truncate. +} + +static uint64_t upb_baredecoder_readf64(upb_baredecoder *d) +{ + uint64_t val; + memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 8); + d->offset += 8; + return val; +} + +static uint32_t upb_baredecoder_readf32(upb_baredecoder *d) +{ + uint32_t val; + memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 4); + d->offset += 4; + return val; +} + +static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) +{ + // Detect end-of-submessage. + if(d->offset >= *d->top) { + d->src.eof = true; + return NULL; + } + + uint32_t key; + key = upb_baredecoder_readv32(d); + d->wire_type = key & 0x7; + d->field.number = key >> 3; + if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { + // For delimited wire values we parse the length now, since we need it in + // all cases. + d->delimited_len = upb_baredecoder_readv32(d); + } + return &d->field; +} + +static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) +{ + if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { + d->str = upb_string_tryrecycle(d->str); + upb_string_substr(d->str, d->input, d->offset, d->delimited_len); + } else { + switch(d->wire_type) { + case UPB_WIRE_TYPE_VARINT: + *val.uint64 = upb_baredecoder_readv64(d); + break; + case UPB_WIRE_TYPE_32BIT_VARINT: + *val.uint32 = upb_baredecoder_readv32(d); + break; + case UPB_WIRE_TYPE_64BIT: + *val.uint64 = upb_baredecoder_readf64(d); + break; + case UPB_WIRE_TYPE_32BIT: + *val.uint32 = upb_baredecoder_readf32(d); + break; + default: + assert(false); + } + } + return true; +} + +static bool upb_baredecoder_skipval(upb_baredecoder *d) +{ + upb_value val; + return upb_baredecoder_getval(d, upb_value_addrof(&val)); +} + +static bool upb_baredecoder_startmsg(upb_baredecoder *d) +{ + *(d->top++) = d->offset + d->delimited_len; + return true; +} + +static bool upb_baredecoder_endmsg(upb_baredecoder *d) +{ + d->offset = *(--d->top); + return true; +} + +static upb_src_vtable upb_baredecoder_src_vtbl = { + (upb_src_getdef_fptr)&upb_baredecoder_getdef, + (upb_src_getval_fptr)&upb_baredecoder_getval, + (upb_src_skipval_fptr)&upb_baredecoder_skipval, + (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, + (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, +}; + +upb_baredecoder *upb_baredecoder_new(upb_string *str) +{ + upb_baredecoder *d = malloc(sizeof(*d)); + d->input = upb_string_getref(str); + d->str = upb_string_new(); + d->top = &d->stack[0]; + upb_src_init(&d->src, &upb_baredecoder_src_vtbl); + return d; +} + +void upb_baredecoder_free(upb_baredecoder *d) +{ + upb_string_unref(d->input); + upb_string_unref(d->str); + free(d); +} diff --git a/src/upb_table.c b/src/upb_table.c index 51a9f21..6fd2c20 100644 --- a/src/upb_table.c +++ b/src/upb_table.c @@ -57,19 +57,19 @@ void upb_strtable_free(upb_strtable *t) { upb_table_free(&t->t); } -static uint32_t strtable_bucket(upb_strtable *t, upb_strptr key) +static uint32_t strtable_bucket(upb_strtable *t, upb_string *key) { - uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_strlen(key), 0); + uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_string_len(key), 0); return (hash & (upb_strtable_size(t)-1)) + 1; } -void *upb_strtable_lookup(upb_strtable *t, upb_strptr key) +void *upb_strtable_lookup(upb_strtable *t, upb_string *key) { uint32_t bucket = strtable_bucket(t, key); upb_strtable_entry *e; do { e = strent(t, bucket); - if(!upb_string_isnull(e->key) && upb_streql(e->key, key)) return e; + if(e->key && upb_streql(e->key, key)) return e; } while((bucket = e->next) != UPB_END_OF_CHAIN); return NULL; } @@ -149,7 +149,7 @@ static uint32_t empty_strbucket(upb_strtable *table) /* TODO: does it matter that this is biased towards the front of the table? */ for(uint32_t i = 1; i <= upb_strtable_size(table); i++) { upb_strtable_entry *e = strent(table, i); - if(upb_string_isnull(e->key)) return i; + if(!e->key) return i; } assert(false); return 0; @@ -158,11 +158,11 @@ static uint32_t empty_strbucket(upb_strtable *table) static void strinsert(upb_strtable *t, upb_strtable_entry *e) { assert(upb_strtable_lookup(t, e->key) == NULL); - e->key = upb_string_getref(e->key, UPB_REF_FROZEN); + e->key = upb_string_getref(e->key); t->t.count++; uint32_t bucket = strtable_bucket(t, e->key); upb_strtable_entry *table_e = strent(t, bucket); - if(!upb_string_isnull(table_e->key)) { /* Collision. */ + if(table_e->key) { /* Collision. */ if(bucket == strtable_bucket(t, table_e->key)) { /* Existing element is in its main posisiton. Find an empty slot to * place our new element and append it to this key's chain. */ -- cgit v1.2.3 From a7eaeaaaeca7f0860ecd690f5861e7d72259bf68 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 7 Jul 2010 10:21:39 -0700 Subject: Change descriptor.c/h to be just a byte array of serialized descriptor.proto. --- descriptor/descriptor.c | 2643 ++++++----------------------------------------- descriptor/descriptor.h | 407 +------- 2 files changed, 308 insertions(+), 2742 deletions(-) diff --git a/descriptor/descriptor.c b/descriptor/descriptor.c index c722b25..cd50a16 100644 --- a/descriptor/descriptor.c +++ b/descriptor/descriptor.c @@ -1,2351 +1,294 @@ -/* - * This file is a data dump of a protocol buffer into a C structure. - * It was created by the upb compiler (upbc) with the following - * command-line: - * - * ./tools/upbc -i upb_file_descriptor_set -o descriptor/descriptor descriptor/descriptor.proto.pb - * - * This file is a dump of 'descriptor/descriptor.proto.pb'. - * It contains exactly the same data, but in a C structure form - * instead of a serialized protobuf. This file contains no code, - * only data. - * - * This file was auto-generated. Do not edit. */ - -#include "descriptor/descriptor.h" - -static char strdata[] = - ".google.protobuf.DescriptorProto.google.protobuf.DescriptorProto.ExtensionRan" - "ge.google.protobuf.EnumDescriptorProto.google.protobuf.EnumOptions.google.pro" - "tobuf.EnumValueDescriptorProto.google.protobuf.EnumValueOptions.google.protob" - "uf.FieldDescriptorProto.google.protobuf.FieldDescriptorProto.Label.google.pro" - "tobuf.FieldDescriptorProto.Type.google.protobuf.FieldOptions.google.protobuf." - "FieldOptions.CType.google.protobuf.FileDescriptorProto.google.protobuf.FileOp" - "tions.google.protobuf.FileOptions.OptimizeMode.google.protobuf.MessageOptions" - ".google.protobuf.MethodDescriptorProto.google.protobuf.MethodOptions.google.p" - "rotobuf.ServiceDescriptorProto.google.protobuf.ServiceOptions.google.protobuf" - ".UninterpretedOption.google.protobuf.UninterpretedOption.NamePartCODE_SIZECOR" - "DCTypeDescriptorProtoDescriptorProtosEnumDescriptorProtoEnumOptionsEnumValueD" - "escriptorProtoEnumValueOptionsExtensionRangeFieldDescriptorProtoFieldOptionsF" - "ileDescriptorProtoFileDescriptorSetFileOptionsLABEL_OPTIONALLABEL_REPEATEDLAB" - "EL_REQUIREDLabelMessageOptionsMethodDescriptorProtoMethodOptionsNamePartOptim" - "izeModeSPEEDSTRING_PIECEServiceDescriptorProtoServiceOptionsTYPE_BOOLTYPE_BYT" - "ESTYPE_DOUBLETYPE_ENUMTYPE_FIXED32TYPE_FIXED64TYPE_FLOATTYPE_GROUPTYPE_INT32T" - "YPE_INT64TYPE_MESSAGETYPE_SFIXED32TYPE_SFIXED64TYPE_SINT32TYPE_SINT64TYPE_STR" - "INGTYPE_UINT32TYPE_UINT64TypeUninterpretedOptioncom.google.protobufctypedefau" - "lt_valuedependencydeprecateddescriptor/descriptor.protodouble_valueendenum_ty" - "peexperimental_map_keyextendeeextensionextension_rangefalsefieldfilegoogle.pr" - "otobufidentifier_valueinput_typeis_extensionjava_multiple_filesjava_outer_cla" - "ssnamejava_packagelabelmessage_set_wire_formatmessage_typemethodnamename_part" - "negative_int_valuenested_typenumberoptimize_foroptionsoutput_typepackagepacke" - "dpositive_int_valueservicestartstring_valuetypetype_nameuninterpreted_optionv" - "alue"; - -static upb_static_string strings[] = { - UPB_STATIC_STRING_INIT_LEN(&strdata[0], 32), - UPB_STATIC_STRING_INIT_LEN(&strdata[32], 47), - UPB_STATIC_STRING_INIT_LEN(&strdata[79], 36), - UPB_STATIC_STRING_INIT_LEN(&strdata[115], 28), - UPB_STATIC_STRING_INIT_LEN(&strdata[143], 41), - UPB_STATIC_STRING_INIT_LEN(&strdata[184], 33), - UPB_STATIC_STRING_INIT_LEN(&strdata[217], 37), - UPB_STATIC_STRING_INIT_LEN(&strdata[254], 43), - UPB_STATIC_STRING_INIT_LEN(&strdata[297], 42), - UPB_STATIC_STRING_INIT_LEN(&strdata[339], 29), - UPB_STATIC_STRING_INIT_LEN(&strdata[368], 35), - UPB_STATIC_STRING_INIT_LEN(&strdata[403], 36), - UPB_STATIC_STRING_INIT_LEN(&strdata[439], 28), - UPB_STATIC_STRING_INIT_LEN(&strdata[467], 41), - UPB_STATIC_STRING_INIT_LEN(&strdata[508], 31), - UPB_STATIC_STRING_INIT_LEN(&strdata[539], 38), - UPB_STATIC_STRING_INIT_LEN(&strdata[577], 30), - UPB_STATIC_STRING_INIT_LEN(&strdata[607], 39), - UPB_STATIC_STRING_INIT_LEN(&strdata[646], 31), - UPB_STATIC_STRING_INIT_LEN(&strdata[677], 36), - UPB_STATIC_STRING_INIT_LEN(&strdata[713], 45), - UPB_STATIC_STRING_INIT_LEN(&strdata[758], 9), - UPB_STATIC_STRING_INIT_LEN(&strdata[767], 4), - UPB_STATIC_STRING_INIT_LEN(&strdata[771], 5), - UPB_STATIC_STRING_INIT_LEN(&strdata[776], 15), - UPB_STATIC_STRING_INIT_LEN(&strdata[791], 16), - UPB_STATIC_STRING_INIT_LEN(&strdata[807], 19), - UPB_STATIC_STRING_INIT_LEN(&strdata[826], 11), - UPB_STATIC_STRING_INIT_LEN(&strdata[837], 24), - UPB_STATIC_STRING_INIT_LEN(&strdata[861], 16), - UPB_STATIC_STRING_INIT_LEN(&strdata[877], 14), - UPB_STATIC_STRING_INIT_LEN(&strdata[891], 20), - UPB_STATIC_STRING_INIT_LEN(&strdata[911], 12), - UPB_STATIC_STRING_INIT_LEN(&strdata[923], 19), - UPB_STATIC_STRING_INIT_LEN(&strdata[942], 17), - UPB_STATIC_STRING_INIT_LEN(&strdata[959], 11), - UPB_STATIC_STRING_INIT_LEN(&strdata[970], 14), - UPB_STATIC_STRING_INIT_LEN(&strdata[984], 14), - UPB_STATIC_STRING_INIT_LEN(&strdata[998], 14), - UPB_STATIC_STRING_INIT_LEN(&strdata[1012], 5), - UPB_STATIC_STRING_INIT_LEN(&strdata[1017], 14), - UPB_STATIC_STRING_INIT_LEN(&strdata[1031], 21), - UPB_STATIC_STRING_INIT_LEN(&strdata[1052], 13), - UPB_STATIC_STRING_INIT_LEN(&strdata[1065], 8), - UPB_STATIC_STRING_INIT_LEN(&strdata[1073], 12), - UPB_STATIC_STRING_INIT_LEN(&strdata[1085], 5), - UPB_STATIC_STRING_INIT_LEN(&strdata[1090], 12), - UPB_STATIC_STRING_INIT_LEN(&strdata[1102], 22), - UPB_STATIC_STRING_INIT_LEN(&strdata[1124], 14), - UPB_STATIC_STRING_INIT_LEN(&strdata[1138], 9), - UPB_STATIC_STRING_INIT_LEN(&strdata[1147], 10), - UPB_STATIC_STRING_INIT_LEN(&strdata[1157], 11), - UPB_STATIC_STRING_INIT_LEN(&strdata[1168], 9), - UPB_STATIC_STRING_INIT_LEN(&strdata[1177], 12), - UPB_STATIC_STRING_INIT_LEN(&strdata[1189], 12), - UPB_STATIC_STRING_INIT_LEN(&strdata[1201], 10), - UPB_STATIC_STRING_INIT_LEN(&strdata[1211], 10), - UPB_STATIC_STRING_INIT_LEN(&strdata[1221], 10), - UPB_STATIC_STRING_INIT_LEN(&strdata[1231], 10), - UPB_STATIC_STRING_INIT_LEN(&strdata[1241], 12), - UPB_STATIC_STRING_INIT_LEN(&strdata[1253], 13), - UPB_STATIC_STRING_INIT_LEN(&strdata[1266], 13), - UPB_STATIC_STRING_INIT_LEN(&strdata[1279], 11), - UPB_STATIC_STRING_INIT_LEN(&strdata[1290], 11), - UPB_STATIC_STRING_INIT_LEN(&strdata[1301], 11), - UPB_STATIC_STRING_INIT_LEN(&strdata[1312], 11), - UPB_STATIC_STRING_INIT_LEN(&strdata[1323], 11), - UPB_STATIC_STRING_INIT_LEN(&strdata[1334], 4), - UPB_STATIC_STRING_INIT_LEN(&strdata[1338], 19), - UPB_STATIC_STRING_INIT_LEN(&strdata[1357], 19), - UPB_STATIC_STRING_INIT_LEN(&strdata[1376], 5), - UPB_STATIC_STRING_INIT_LEN(&strdata[1381], 13), - UPB_STATIC_STRING_INIT_LEN(&strdata[1394], 10), - UPB_STATIC_STRING_INIT_LEN(&strdata[1404], 10), - UPB_STATIC_STRING_INIT_LEN(&strdata[1414], 27), - UPB_STATIC_STRING_INIT_LEN(&strdata[1441], 12), - UPB_STATIC_STRING_INIT_LEN(&strdata[1453], 3), - UPB_STATIC_STRING_INIT_LEN(&strdata[1456], 9), - UPB_STATIC_STRING_INIT_LEN(&strdata[1465], 20), - UPB_STATIC_STRING_INIT_LEN(&strdata[1485], 8), - UPB_STATIC_STRING_INIT_LEN(&strdata[1493], 9), - UPB_STATIC_STRING_INIT_LEN(&strdata[1502], 15), - UPB_STATIC_STRING_INIT_LEN(&strdata[1517], 5), - UPB_STATIC_STRING_INIT_LEN(&strdata[1522], 5), - UPB_STATIC_STRING_INIT_LEN(&strdata[1527], 4), - UPB_STATIC_STRING_INIT_LEN(&strdata[1531], 15), - UPB_STATIC_STRING_INIT_LEN(&strdata[1546], 16), - UPB_STATIC_STRING_INIT_LEN(&strdata[1562], 10), - UPB_STATIC_STRING_INIT_LEN(&strdata[1572], 12), - UPB_STATIC_STRING_INIT_LEN(&strdata[1584], 19), - UPB_STATIC_STRING_INIT_LEN(&strdata[1603], 20), - UPB_STATIC_STRING_INIT_LEN(&strdata[1623], 12), - UPB_STATIC_STRING_INIT_LEN(&strdata[1635], 5), - UPB_STATIC_STRING_INIT_LEN(&strdata[1640], 23), - UPB_STATIC_STRING_INIT_LEN(&strdata[1663], 12), - UPB_STATIC_STRING_INIT_LEN(&strdata[1675], 6), - UPB_STATIC_STRING_INIT_LEN(&strdata[1681], 4), - UPB_STATIC_STRING_INIT_LEN(&strdata[1685], 9), - UPB_STATIC_STRING_INIT_LEN(&strdata[1694], 18), - UPB_STATIC_STRING_INIT_LEN(&strdata[1712], 11), - UPB_STATIC_STRING_INIT_LEN(&strdata[1723], 6), - UPB_STATIC_STRING_INIT_LEN(&strdata[1729], 12), - UPB_STATIC_STRING_INIT_LEN(&strdata[1741], 7), - UPB_STATIC_STRING_INIT_LEN(&strdata[1748], 11), - UPB_STATIC_STRING_INIT_LEN(&strdata[1759], 7), - UPB_STATIC_STRING_INIT_LEN(&strdata[1766], 6), - UPB_STATIC_STRING_INIT_LEN(&strdata[1772], 18), - UPB_STATIC_STRING_INIT_LEN(&strdata[1790], 7), - UPB_STATIC_STRING_INIT_LEN(&strdata[1797], 5), - UPB_STATIC_STRING_INIT_LEN(&strdata[1802], 12), - UPB_STATIC_STRING_INIT_LEN(&strdata[1814], 4), - UPB_STATIC_STRING_INIT_LEN(&strdata[1818], 9), - UPB_STATIC_STRING_INIT_LEN(&strdata[1827], 20), - UPB_STATIC_STRING_INIT_LEN(&strdata[1847], 5), +unsigned char descriptor_pb[] = { + 0x0a, 0x9b, 0x1b, 0x0a, 0x1b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, + 0x74, 0x6f, 0x72, 0x2f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, + 0x6f, 0x72, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0f, 0x67, 0x6f, + 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, + 0x66, 0x22, 0x47, 0x0a, 0x11, 0x46, 0x69, 0x6c, 0x65, 0x44, 0x65, 0x73, + 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x53, 0x65, 0x74, 0x12, 0x32, + 0x0a, 0x04, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x6c, 0x65, 0x44, + 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, + 0x74, 0x6f, 0x22, 0xdc, 0x02, 0x0a, 0x13, 0x46, 0x69, 0x6c, 0x65, 0x44, + 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, + 0x74, 0x6f, 0x12, 0x0c, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x12, 0x0f, 0x0a, 0x07, 0x70, 0x61, 0x63, 0x6b, + 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x12, 0x12, 0x0a, + 0x0a, 0x64, 0x65, 0x70, 0x65, 0x6e, 0x64, 0x65, 0x6e, 0x63, 0x79, 0x18, + 0x03, 0x20, 0x03, 0x28, 0x09, 0x12, 0x36, 0x0a, 0x0c, 0x6d, 0x65, 0x73, + 0x73, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x04, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x65, + 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, + 0x6f, 0x12, 0x37, 0x0a, 0x09, 0x65, 0x6e, 0x75, 0x6d, 0x5f, 0x74, 0x79, + 0x70, 0x65, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, + 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, + 0x75, 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x44, 0x65, 0x73, 0x63, 0x72, + 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x38, + 0x0a, 0x07, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x18, 0x06, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x65, + 0x72, 0x76, 0x69, 0x63, 0x65, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, + 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x38, 0x0a, 0x09, + 0x65, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x07, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, + 0x65, 0x6c, 0x64, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, + 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x2d, 0x0a, 0x07, 0x6f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x1c, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x6c, 0x65, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0xa9, 0x03, 0x0a, 0x0f, 0x44, 0x65, + 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, + 0x6f, 0x12, 0x0c, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x12, 0x34, 0x0a, 0x05, 0x66, 0x69, 0x65, 0x6c, 0x64, + 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, + 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, + 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x38, 0x0a, + 0x09, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x06, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, + 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, + 0x69, 0x65, 0x6c, 0x64, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, + 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x35, 0x0a, 0x0b, 0x6e, + 0x65, 0x73, 0x74, 0x65, 0x64, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, + 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, + 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, + 0x74, 0x6f, 0x12, 0x37, 0x0a, 0x09, 0x65, 0x6e, 0x75, 0x6d, 0x5f, 0x74, + 0x79, 0x70, 0x65, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x44, 0x65, 0x73, 0x63, + 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, + 0x48, 0x0a, 0x0f, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, + 0x5f, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x2f, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x65, 0x73, 0x63, 0x72, + 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x45, + 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x61, 0x6e, 0x67, + 0x65, 0x12, 0x30, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, + 0x2e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x1a, 0x2c, 0x0a, 0x0e, 0x45, 0x78, 0x74, 0x65, 0x6e, + 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x12, 0x0d, 0x0a, + 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, + 0x12, 0x0b, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x05, 0x22, 0x94, 0x05, 0x0a, 0x14, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x44, + 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, + 0x74, 0x6f, 0x12, 0x0c, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x12, 0x0e, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x62, + 0x65, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x12, 0x3a, 0x0a, 0x05, + 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, + 0x2b, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x44, + 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, + 0x74, 0x6f, 0x2e, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x12, 0x38, 0x0a, 0x04, + 0x74, 0x79, 0x70, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2a, + 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x44, 0x65, + 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, + 0x6f, 0x2e, 0x54, 0x79, 0x70, 0x65, 0x12, 0x11, 0x0a, 0x09, 0x74, 0x79, + 0x70, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, + 0x09, 0x12, 0x10, 0x0a, 0x08, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x65, + 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x12, 0x15, 0x0a, 0x0d, 0x64, + 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, + 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x12, 0x2e, 0x0a, 0x07, 0x6f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x1d, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0xb6, 0x02, 0x0a, 0x04, 0x54, + 0x79, 0x70, 0x65, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, + 0x44, 0x4f, 0x55, 0x42, 0x4c, 0x45, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, + 0x54, 0x59, 0x50, 0x45, 0x5f, 0x46, 0x4c, 0x4f, 0x41, 0x54, 0x10, 0x02, + 0x12, 0x0e, 0x0a, 0x0a, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x49, 0x4e, 0x54, + 0x36, 0x34, 0x10, 0x03, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, + 0x5f, 0x55, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x10, 0x04, 0x12, 0x0e, 0x0a, + 0x0a, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x49, 0x4e, 0x54, 0x33, 0x32, 0x10, + 0x05, 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x46, 0x49, + 0x58, 0x45, 0x44, 0x36, 0x34, 0x10, 0x06, 0x12, 0x10, 0x0a, 0x0c, 0x54, + 0x59, 0x50, 0x45, 0x5f, 0x46, 0x49, 0x58, 0x45, 0x44, 0x33, 0x32, 0x10, + 0x07, 0x12, 0x0d, 0x0a, 0x09, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x42, 0x4f, + 0x4f, 0x4c, 0x10, 0x08, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, + 0x5f, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x09, 0x12, 0x0e, 0x0a, + 0x0a, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x47, 0x52, 0x4f, 0x55, 0x50, 0x10, + 0x0a, 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x4d, 0x45, + 0x53, 0x53, 0x41, 0x47, 0x45, 0x10, 0x0b, 0x12, 0x0e, 0x0a, 0x0a, 0x54, + 0x59, 0x50, 0x45, 0x5f, 0x42, 0x59, 0x54, 0x45, 0x53, 0x10, 0x0c, 0x12, + 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x49, 0x4e, 0x54, + 0x33, 0x32, 0x10, 0x0d, 0x12, 0x0d, 0x0a, 0x09, 0x54, 0x59, 0x50, 0x45, + 0x5f, 0x45, 0x4e, 0x55, 0x4d, 0x10, 0x0e, 0x12, 0x11, 0x0a, 0x0d, 0x54, + 0x59, 0x50, 0x45, 0x5f, 0x53, 0x46, 0x49, 0x58, 0x45, 0x44, 0x33, 0x32, + 0x10, 0x0f, 0x12, 0x11, 0x0a, 0x0d, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, + 0x46, 0x49, 0x58, 0x45, 0x44, 0x36, 0x34, 0x10, 0x10, 0x12, 0x0f, 0x0a, + 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, 0x49, 0x4e, 0x54, 0x33, 0x32, + 0x10, 0x11, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, + 0x49, 0x4e, 0x54, 0x36, 0x34, 0x10, 0x12, 0x22, 0x43, 0x0a, 0x05, 0x4c, + 0x61, 0x62, 0x65, 0x6c, 0x12, 0x12, 0x0a, 0x0e, 0x4c, 0x41, 0x42, 0x45, + 0x4c, 0x5f, 0x4f, 0x50, 0x54, 0x49, 0x4f, 0x4e, 0x41, 0x4c, 0x10, 0x01, + 0x12, 0x12, 0x0a, 0x0e, 0x4c, 0x41, 0x42, 0x45, 0x4c, 0x5f, 0x52, 0x45, + 0x51, 0x55, 0x49, 0x52, 0x45, 0x44, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x0e, + 0x4c, 0x41, 0x42, 0x45, 0x4c, 0x5f, 0x52, 0x45, 0x50, 0x45, 0x41, 0x54, + 0x45, 0x44, 0x10, 0x03, 0x22, 0x8c, 0x01, 0x0a, 0x13, 0x45, 0x6e, 0x75, + 0x6d, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, + 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0c, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x12, 0x38, 0x0a, 0x05, 0x76, 0x61, + 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x29, 0x2e, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x56, 0x61, 0x6c, 0x75, + 0x65, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, + 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x2d, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x22, 0x6c, 0x0a, 0x18, 0x45, 0x6e, 0x75, 0x6d, 0x56, + 0x61, 0x6c, 0x75, 0x65, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, + 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0c, 0x0a, 0x04, 0x6e, + 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x12, 0x0e, 0x0a, + 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x05, 0x12, 0x32, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, + 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x90, 0x01, 0x0a, 0x16, 0x53, 0x65, + 0x72, 0x76, 0x69, 0x63, 0x65, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, + 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0c, 0x0a, 0x04, + 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x12, 0x36, + 0x0a, 0x06, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x18, 0x02, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x4d, 0x65, 0x74, + 0x68, 0x6f, 0x64, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, + 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x30, 0x0a, 0x07, 0x6f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x1f, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, + 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x7f, 0x0a, 0x15, + 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, + 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0c, 0x0a, + 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x12, + 0x12, 0x0a, 0x0a, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x74, 0x79, 0x70, + 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x12, 0x13, 0x0a, 0x0b, 0x6f, + 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x09, 0x12, 0x2f, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x62, 0x75, 0x66, 0x2e, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0xa7, 0x02, 0x0a, 0x0b, 0x46, 0x69, + 0x6c, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, + 0x0c, 0x6a, 0x61, 0x76, 0x61, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x61, 0x67, + 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x12, 0x1c, 0x0a, 0x14, 0x6a, + 0x61, 0x76, 0x61, 0x5f, 0x6f, 0x75, 0x74, 0x65, 0x72, 0x5f, 0x63, 0x6c, + 0x61, 0x73, 0x73, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x08, 0x20, 0x01, 0x28, + 0x09, 0x12, 0x22, 0x0a, 0x13, 0x6a, 0x61, 0x76, 0x61, 0x5f, 0x6d, 0x75, + 0x6c, 0x74, 0x69, 0x70, 0x6c, 0x65, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x73, + 0x18, 0x0a, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, + 0x65, 0x12, 0x46, 0x0a, 0x0c, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, + 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x18, 0x09, 0x20, 0x01, 0x28, 0x0e, 0x32, + 0x29, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x6c, 0x65, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x4f, 0x70, 0x74, 0x69, 0x6d, 0x69, + 0x7a, 0x65, 0x4d, 0x6f, 0x64, 0x65, 0x3a, 0x05, 0x53, 0x50, 0x45, 0x45, + 0x44, 0x12, 0x43, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, + 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, + 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, + 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, + 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x28, + 0x0a, 0x0c, 0x4f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x4d, 0x6f, + 0x64, 0x65, 0x12, 0x09, 0x0a, 0x05, 0x53, 0x50, 0x45, 0x45, 0x44, 0x10, + 0x01, 0x12, 0x0d, 0x0a, 0x09, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x53, 0x49, + 0x5a, 0x45, 0x10, 0x02, 0x2a, 0x09, 0x08, 0xe8, 0x07, 0x10, 0x80, 0x80, + 0x80, 0x80, 0x02, 0x22, 0x88, 0x01, 0x0a, 0x0e, 0x4d, 0x65, 0x73, 0x73, + 0x61, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x26, + 0x0a, 0x17, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x5f, 0x73, 0x65, + 0x74, 0x5f, 0x77, 0x69, 0x72, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, + 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, + 0x73, 0x65, 0x12, 0x43, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, + 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, + 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x2a, + 0x09, 0x08, 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x22, 0x80, + 0x02, 0x0a, 0x0c, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x12, 0x32, 0x0a, 0x05, 0x63, 0x74, 0x79, 0x70, 0x65, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x23, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, + 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x2e, 0x43, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0e, 0x0a, 0x06, 0x70, + 0x61, 0x63, 0x6b, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x12, + 0x19, 0x0a, 0x0a, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, + 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, + 0x73, 0x65, 0x12, 0x1c, 0x0a, 0x14, 0x65, 0x78, 0x70, 0x65, 0x72, 0x69, + 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x5f, 0x6d, 0x61, 0x70, 0x5f, 0x6b, + 0x65, 0x79, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x12, 0x43, 0x0a, 0x14, + 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, + 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0xe7, 0x07, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, + 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x23, 0x0a, 0x05, 0x43, 0x54, 0x79, + 0x70, 0x65, 0x12, 0x08, 0x0a, 0x04, 0x43, 0x4f, 0x52, 0x44, 0x10, 0x01, + 0x12, 0x10, 0x0a, 0x0c, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x5f, 0x50, + 0x49, 0x45, 0x43, 0x45, 0x10, 0x02, 0x2a, 0x09, 0x08, 0xe8, 0x07, 0x10, + 0x80, 0x80, 0x80, 0x80, 0x02, 0x22, 0x5d, 0x0a, 0x0b, 0x45, 0x6e, 0x75, + 0x6d, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x43, 0x0a, 0x14, + 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, + 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0xe7, 0x07, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, + 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x2a, 0x09, 0x08, 0xe8, 0x07, 0x10, 0x80, + 0x80, 0x80, 0x80, 0x02, 0x22, 0x62, 0x0a, 0x10, 0x45, 0x6e, 0x75, 0x6d, + 0x56, 0x61, 0x6c, 0x75, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x12, 0x43, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, + 0x72, 0x65, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, + 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, + 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, + 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x2a, 0x09, 0x08, + 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x22, 0x60, 0x0a, 0x0e, + 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x12, 0x43, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, + 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, + 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x2a, + 0x09, 0x08, 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x22, 0x5f, + 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x12, 0x43, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, + 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, + 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, + 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x2a, 0x09, 0x08, 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x22, + 0x85, 0x02, 0x0a, 0x13, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, + 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, + 0x3b, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x2d, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, + 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x2e, 0x4e, 0x61, 0x6d, 0x65, 0x50, 0x61, 0x72, 0x74, + 0x12, 0x18, 0x0a, 0x10, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x66, 0x69, + 0x65, 0x72, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x09, 0x12, 0x1a, 0x0a, 0x12, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, + 0x76, 0x65, 0x5f, 0x69, 0x6e, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x12, 0x1a, 0x0a, 0x12, 0x6e, 0x65, + 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x69, 0x6e, 0x74, 0x5f, 0x76, + 0x61, 0x6c, 0x75, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x12, 0x14, + 0x0a, 0x0c, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x5f, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x01, 0x12, 0x14, 0x0a, 0x0c, + 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, + 0x18, 0x07, 0x20, 0x01, 0x28, 0x0c, 0x1a, 0x33, 0x0a, 0x08, 0x4e, 0x61, + 0x6d, 0x65, 0x50, 0x61, 0x72, 0x74, 0x12, 0x11, 0x0a, 0x09, 0x6e, 0x61, + 0x6d, 0x65, 0x5f, 0x70, 0x61, 0x72, 0x74, 0x18, 0x01, 0x20, 0x02, 0x28, + 0x09, 0x12, 0x14, 0x0a, 0x0c, 0x69, 0x73, 0x5f, 0x65, 0x78, 0x74, 0x65, + 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x02, 0x28, 0x08, 0x42, + 0x29, 0x0a, 0x13, 0x63, 0x6f, 0x6d, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, + 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x42, 0x10, + 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, + 0x6f, 0x74, 0x6f, 0x73, 0x48, 0x01 }; - -/* Forward declarations of messages, and array decls. */ -static google_protobuf_DescriptorProto google_protobuf_DescriptorProto_values[18]; - -static google_protobuf_DescriptorProto *google_protobuf_DescriptorProto_array_elems[] = { - &google_protobuf_DescriptorProto_values[0], - &google_protobuf_DescriptorProto_values[1], - &google_protobuf_DescriptorProto_values[2], - &google_protobuf_DescriptorProto_values[3], - &google_protobuf_DescriptorProto_values[4], - &google_protobuf_DescriptorProto_values[5], - &google_protobuf_DescriptorProto_values[6], - &google_protobuf_DescriptorProto_values[7], - &google_protobuf_DescriptorProto_values[8], - &google_protobuf_DescriptorProto_values[9], - &google_protobuf_DescriptorProto_values[10], - &google_protobuf_DescriptorProto_values[11], - &google_protobuf_DescriptorProto_values[12], - &google_protobuf_DescriptorProto_values[13], - &google_protobuf_DescriptorProto_values[14], - &google_protobuf_DescriptorProto_values[15], - &google_protobuf_DescriptorProto_values[16], - &google_protobuf_DescriptorProto_values[17], -}; -static upb_static_array google_protobuf_DescriptorProto_arrays[3] = { - UPB_STATIC_ARRAY_INIT(&google_protobuf_DescriptorProto_array_elems[0], 16), - UPB_STATIC_ARRAY_INIT(&google_protobuf_DescriptorProto_array_elems[16], 1), - UPB_STATIC_ARRAY_INIT(&google_protobuf_DescriptorProto_array_elems[17], 1), -}; -static google_protobuf_FileDescriptorProto google_protobuf_FileDescriptorProto_values[1]; - -static google_protobuf_FileDescriptorProto *google_protobuf_FileDescriptorProto_array_elems[] = { - &google_protobuf_FileDescriptorProto_values[0], -}; -static upb_static_array google_protobuf_FileDescriptorProto_arrays[1] = { - UPB_STATIC_ARRAY_INIT(&google_protobuf_FileDescriptorProto_array_elems[0], 1), -}; -static google_protobuf_FileDescriptorSet google_protobuf_FileDescriptorSet_values[1]; - -static google_protobuf_DescriptorProto_ExtensionRange google_protobuf_DescriptorProto_ExtensionRange_values[7]; - -static google_protobuf_DescriptorProto_ExtensionRange *google_protobuf_DescriptorProto_ExtensionRange_array_elems[] = { - &google_protobuf_DescriptorProto_ExtensionRange_values[0], - &google_protobuf_DescriptorProto_ExtensionRange_values[1], - &google_protobuf_DescriptorProto_ExtensionRange_values[2], - &google_protobuf_DescriptorProto_ExtensionRange_values[3], - &google_protobuf_DescriptorProto_ExtensionRange_values[4], - &google_protobuf_DescriptorProto_ExtensionRange_values[5], - &google_protobuf_DescriptorProto_ExtensionRange_values[6], -}; -static upb_static_array google_protobuf_DescriptorProto_ExtensionRange_arrays[7] = { - UPB_STATIC_ARRAY_INIT(&google_protobuf_DescriptorProto_ExtensionRange_array_elems[0], 1), - UPB_STATIC_ARRAY_INIT(&google_protobuf_DescriptorProto_ExtensionRange_array_elems[1], 1), - UPB_STATIC_ARRAY_INIT(&google_protobuf_DescriptorProto_ExtensionRange_array_elems[2], 1), - UPB_STATIC_ARRAY_INIT(&google_protobuf_DescriptorProto_ExtensionRange_array_elems[3], 1), - UPB_STATIC_ARRAY_INIT(&google_protobuf_DescriptorProto_ExtensionRange_array_elems[4], 1), - UPB_STATIC_ARRAY_INIT(&google_protobuf_DescriptorProto_ExtensionRange_array_elems[5], 1), - UPB_STATIC_ARRAY_INIT(&google_protobuf_DescriptorProto_ExtensionRange_array_elems[6], 1), -}; -static google_protobuf_FileOptions google_protobuf_FileOptions_values[1]; - -static google_protobuf_EnumDescriptorProto google_protobuf_EnumDescriptorProto_values[4]; - -static google_protobuf_EnumDescriptorProto *google_protobuf_EnumDescriptorProto_array_elems[] = { - &google_protobuf_EnumDescriptorProto_values[0], - &google_protobuf_EnumDescriptorProto_values[1], - &google_protobuf_EnumDescriptorProto_values[2], - &google_protobuf_EnumDescriptorProto_values[3], -}; -static upb_static_array google_protobuf_EnumDescriptorProto_arrays[3] = { - UPB_STATIC_ARRAY_INIT(&google_protobuf_EnumDescriptorProto_array_elems[0], 2), - UPB_STATIC_ARRAY_INIT(&google_protobuf_EnumDescriptorProto_array_elems[2], 1), - UPB_STATIC_ARRAY_INIT(&google_protobuf_EnumDescriptorProto_array_elems[3], 1), -}; -static google_protobuf_FieldDescriptorProto google_protobuf_FieldDescriptorProto_values[63]; - -static google_protobuf_FieldDescriptorProto *google_protobuf_FieldDescriptorProto_array_elems[] = { - &google_protobuf_FieldDescriptorProto_values[0], - &google_protobuf_FieldDescriptorProto_values[1], - &google_protobuf_FieldDescriptorProto_values[2], - &google_protobuf_FieldDescriptorProto_values[3], - &google_protobuf_FieldDescriptorProto_values[4], - &google_protobuf_FieldDescriptorProto_values[5], - &google_protobuf_FieldDescriptorProto_values[6], - &google_protobuf_FieldDescriptorProto_values[7], - &google_protobuf_FieldDescriptorProto_values[8], - &google_protobuf_FieldDescriptorProto_values[9], - &google_protobuf_FieldDescriptorProto_values[10], - &google_protobuf_FieldDescriptorProto_values[11], - &google_protobuf_FieldDescriptorProto_values[12], - &google_protobuf_FieldDescriptorProto_values[13], - &google_protobuf_FieldDescriptorProto_values[14], - &google_protobuf_FieldDescriptorProto_values[15], - &google_protobuf_FieldDescriptorProto_values[16], - &google_protobuf_FieldDescriptorProto_values[17], - &google_protobuf_FieldDescriptorProto_values[18], - &google_protobuf_FieldDescriptorProto_values[19], - &google_protobuf_FieldDescriptorProto_values[20], - &google_protobuf_FieldDescriptorProto_values[21], - &google_protobuf_FieldDescriptorProto_values[22], - &google_protobuf_FieldDescriptorProto_values[23], - &google_protobuf_FieldDescriptorProto_values[24], - &google_protobuf_FieldDescriptorProto_values[25], - &google_protobuf_FieldDescriptorProto_values[26], - &google_protobuf_FieldDescriptorProto_values[27], - &google_protobuf_FieldDescriptorProto_values[28], - &google_protobuf_FieldDescriptorProto_values[29], - &google_protobuf_FieldDescriptorProto_values[30], - &google_protobuf_FieldDescriptorProto_values[31], - &google_protobuf_FieldDescriptorProto_values[32], - &google_protobuf_FieldDescriptorProto_values[33], - &google_protobuf_FieldDescriptorProto_values[34], - &google_protobuf_FieldDescriptorProto_values[35], - &google_protobuf_FieldDescriptorProto_values[36], - &google_protobuf_FieldDescriptorProto_values[37], - &google_protobuf_FieldDescriptorProto_values[38], - &google_protobuf_FieldDescriptorProto_values[39], - &google_protobuf_FieldDescriptorProto_values[40], - &google_protobuf_FieldDescriptorProto_values[41], - &google_protobuf_FieldDescriptorProto_values[42], - &google_protobuf_FieldDescriptorProto_values[43], - &google_protobuf_FieldDescriptorProto_values[44], - &google_protobuf_FieldDescriptorProto_values[45], - &google_protobuf_FieldDescriptorProto_values[46], - &google_protobuf_FieldDescriptorProto_values[47], - &google_protobuf_FieldDescriptorProto_values[48], - &google_protobuf_FieldDescriptorProto_values[49], - &google_protobuf_FieldDescriptorProto_values[50], - &google_protobuf_FieldDescriptorProto_values[51], - &google_protobuf_FieldDescriptorProto_values[52], - &google_protobuf_FieldDescriptorProto_values[53], - &google_protobuf_FieldDescriptorProto_values[54], - &google_protobuf_FieldDescriptorProto_values[55], - &google_protobuf_FieldDescriptorProto_values[56], - &google_protobuf_FieldDescriptorProto_values[57], - &google_protobuf_FieldDescriptorProto_values[58], - &google_protobuf_FieldDescriptorProto_values[59], - &google_protobuf_FieldDescriptorProto_values[60], - &google_protobuf_FieldDescriptorProto_values[61], - &google_protobuf_FieldDescriptorProto_values[62], -}; -static upb_static_array google_protobuf_FieldDescriptorProto_arrays[18] = { - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[0], 1), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[1], 8), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[9], 7), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[16], 2), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[18], 8), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[26], 3), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[29], 3), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[32], 3), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[35], 4), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[39], 5), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[44], 2), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[46], 5), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[51], 1), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[52], 1), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[53], 1), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[54], 1), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[55], 6), - UPB_STATIC_ARRAY_INIT(&google_protobuf_FieldDescriptorProto_array_elems[61], 2), -}; -static google_protobuf_EnumValueDescriptorProto google_protobuf_EnumValueDescriptorProto_values[25]; - -static google_protobuf_EnumValueDescriptorProto *google_protobuf_EnumValueDescriptorProto_array_elems[] = { - &google_protobuf_EnumValueDescriptorProto_values[0], - &google_protobuf_EnumValueDescriptorProto_values[1], - &google_protobuf_EnumValueDescriptorProto_values[2], - &google_protobuf_EnumValueDescriptorProto_values[3], - &google_protobuf_EnumValueDescriptorProto_values[4], - &google_protobuf_EnumValueDescriptorProto_values[5], - &google_protobuf_EnumValueDescriptorProto_values[6], - &google_protobuf_EnumValueDescriptorProto_values[7], - &google_protobuf_EnumValueDescriptorProto_values[8], - &google_protobuf_EnumValueDescriptorProto_values[9], - &google_protobuf_EnumValueDescriptorProto_values[10], - &google_protobuf_EnumValueDescriptorProto_values[11], - &google_protobuf_EnumValueDescriptorProto_values[12], - &google_protobuf_EnumValueDescriptorProto_values[13], - &google_protobuf_EnumValueDescriptorProto_values[14], - &google_protobuf_EnumValueDescriptorProto_values[15], - &google_protobuf_EnumValueDescriptorProto_values[16], - &google_protobuf_EnumValueDescriptorProto_values[17], - &google_protobuf_EnumValueDescriptorProto_values[18], - &google_protobuf_EnumValueDescriptorProto_values[19], - &google_protobuf_EnumValueDescriptorProto_values[20], - &google_protobuf_EnumValueDescriptorProto_values[21], - &google_protobuf_EnumValueDescriptorProto_values[22], - &google_protobuf_EnumValueDescriptorProto_values[23], - &google_protobuf_EnumValueDescriptorProto_values[24], -}; -static upb_static_array google_protobuf_EnumValueDescriptorProto_arrays[4] = { - UPB_STATIC_ARRAY_INIT(&google_protobuf_EnumValueDescriptorProto_array_elems[0], 18), - UPB_STATIC_ARRAY_INIT(&google_protobuf_EnumValueDescriptorProto_array_elems[18], 3), - UPB_STATIC_ARRAY_INIT(&google_protobuf_EnumValueDescriptorProto_array_elems[21], 2), - UPB_STATIC_ARRAY_INIT(&google_protobuf_EnumValueDescriptorProto_array_elems[23], 2), -}; -static google_protobuf_DescriptorProto google_protobuf_DescriptorProto_values[18] = { - - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = false, - .enum_type = false, - .extension_range = false, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[34]), /* "FileDescriptorSet" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[0]), - .nested_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .enum_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension_range = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = false, - .enum_type = false, - .extension_range = false, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[33]), /* "FileDescriptorProto" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[1]), - .nested_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .enum_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension_range = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = true, - .enum_type = false, - .extension_range = false, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[24]), /* "DescriptorProto" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[2]), - .nested_type = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_DescriptorProto_arrays[1]), - .enum_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension_range = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = false, - .enum_type = true, - .extension_range = false, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[31]), /* "FieldDescriptorProto" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[4]), - .nested_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .enum_type = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_EnumDescriptorProto_arrays[0]), - .extension_range = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = false, - .enum_type = false, - .extension_range = false, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[26]), /* "EnumDescriptorProto" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[5]), - .nested_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .enum_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension_range = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = false, - .enum_type = false, - .extension_range = false, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[28]), /* "EnumValueDescriptorProto" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[6]), - .nested_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .enum_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension_range = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = false, - .enum_type = false, - .extension_range = false, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[47]), /* "ServiceDescriptorProto" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[7]), - .nested_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .enum_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension_range = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = false, - .enum_type = false, - .extension_range = false, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[41]), /* "MethodDescriptorProto" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[8]), - .nested_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .enum_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension_range = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = false, - .enum_type = true, - .extension_range = true, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[35]), /* "FileOptions" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[9]), - .nested_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .enum_type = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_EnumDescriptorProto_arrays[1]), - .extension_range = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_DescriptorProto_ExtensionRange_arrays[0]), - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = false, - .enum_type = false, - .extension_range = true, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[40]), /* "MessageOptions" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[10]), - .nested_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .enum_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension_range = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_DescriptorProto_ExtensionRange_arrays[1]), - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = false, - .enum_type = true, - .extension_range = true, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[32]), /* "FieldOptions" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[11]), - .nested_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .enum_type = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_EnumDescriptorProto_arrays[2]), - .extension_range = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_DescriptorProto_ExtensionRange_arrays[2]), - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = false, - .enum_type = false, - .extension_range = true, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[27]), /* "EnumOptions" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[12]), - .nested_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .enum_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension_range = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_DescriptorProto_ExtensionRange_arrays[3]), - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = false, - .enum_type = false, - .extension_range = true, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[29]), /* "EnumValueOptions" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[13]), - .nested_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .enum_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension_range = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_DescriptorProto_ExtensionRange_arrays[4]), - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = false, - .enum_type = false, - .extension_range = true, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[48]), /* "ServiceOptions" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[14]), - .nested_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .enum_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension_range = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_DescriptorProto_ExtensionRange_arrays[5]), - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = false, - .enum_type = false, - .extension_range = true, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[42]), /* "MethodOptions" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[15]), - .nested_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .enum_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension_range = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_DescriptorProto_ExtensionRange_arrays[6]), - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = true, - .enum_type = false, - .extension_range = false, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[68]), /* "UninterpretedOption" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[16]), - .nested_type = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_DescriptorProto_arrays[2]), - .enum_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension_range = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = false, - .enum_type = false, - .extension_range = false, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[30]), /* "ExtensionRange" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[3]), - .nested_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .enum_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension_range = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .field = true, - .nested_type = false, - .enum_type = false, - .extension_range = false, - .extension = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[43]), /* "NamePart" */ - .field = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FieldDescriptorProto_arrays[17]), - .nested_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .enum_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension_range = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = 0, /* Not set. */ - }, -}; -static google_protobuf_FileDescriptorProto google_protobuf_FileDescriptorProto_values[1] = { - - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .package = true, - .dependency = false, - .message_type = true, - .enum_type = false, - .service = false, - .extension = false, - .options = true, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[74]), /* "descriptor/descriptor.proto" */ - .package = UPB_STATIC_STRING_PTR_INIT(strings[85]), /* "google.protobuf" */ - .dependency = UPB_ARRAY_NULL_INITIALIZER, /* Not set. */ - .message_type = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_DescriptorProto_arrays[0]), - .enum_type = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .service = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .extension = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - .options = &google_protobuf_FileOptions_values[0], - }, -}; -static google_protobuf_FileDescriptorSet google_protobuf_FileDescriptorSet_values[1] = { - - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .file = true, - }}, - .file = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_FileDescriptorProto_arrays[0]), - }, -}; -static google_protobuf_DescriptorProto_ExtensionRange google_protobuf_DescriptorProto_ExtensionRange_values[7] = { - - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .start = true, - .end = true, - }}, - .start = 1000, - .end = 536870912, - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .start = true, - .end = true, - }}, - .start = 1000, - .end = 536870912, - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .start = true, - .end = true, - }}, - .start = 1000, - .end = 536870912, - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .start = true, - .end = true, - }}, - .start = 1000, - .end = 536870912, - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .start = true, - .end = true, - }}, - .start = 1000, - .end = 536870912, - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .start = true, - .end = true, - }}, - .start = 1000, - .end = 536870912, - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .start = true, - .end = true, - }}, - .start = 1000, - .end = 536870912, - }, -}; -static google_protobuf_FileOptions google_protobuf_FileOptions_values[1] = { - - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .java_package = true, - .java_outer_classname = true, - .optimize_for = true, - .java_multiple_files = false, - .uninterpreted_option = false, - }}, - .java_package = UPB_STATIC_STRING_PTR_INIT(strings[69]), /* "com.google.protobuf" */ - .java_outer_classname = UPB_STATIC_STRING_PTR_INIT(strings[25]), /* "DescriptorProtos" */ - .optimize_for = 1, - .java_multiple_files = 0, /* Not set. */ - .uninterpreted_option = {UPB_ARRAY_NULL_INITIALIZER}, /* Not set. */ - }, -}; -static google_protobuf_EnumDescriptorProto google_protobuf_EnumDescriptorProto_values[4] = { - - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .value = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[67]), /* "Type" */ - .value = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_EnumValueDescriptorProto_arrays[0]), - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .value = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[39]), /* "Label" */ - .value = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_EnumValueDescriptorProto_arrays[1]), - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .value = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[44]), /* "OptimizeMode" */ - .value = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_EnumValueDescriptorProto_arrays[2]), - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .value = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[23]), /* "CType" */ - .value = UPB_STATIC_ARRAY_PTR_TYPED_INIT(google_protobuf_EnumValueDescriptorProto_arrays[3]), - .options = 0, /* Not set. */ - }, -}; -static google_protobuf_FieldDescriptorProto google_protobuf_FieldDescriptorProto_values[63] = { - - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[84]), /* "file" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 1, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[11]), /* ".google.protobuf.FileDescriptorProto" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[96]), /* "name" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 1, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[104]), /* "package" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 2, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[72]), /* "dependency" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 3, - .label = 3, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[94]), /* "message_type" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 4, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[0]), /* ".google.protobuf.DescriptorProto" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[77]), /* "enum_type" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 5, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[2]), /* ".google.protobuf.EnumDescriptorProto" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[107]), /* "service" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 6, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[17]), /* ".google.protobuf.ServiceDescriptorProto" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[80]), /* "extension" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 7, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[6]), /* ".google.protobuf.FieldDescriptorProto" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[102]), /* "options" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 8, - .label = 1, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[12]), /* ".google.protobuf.FileOptions" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[96]), /* "name" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 1, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[83]), /* "field" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 2, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[6]), /* ".google.protobuf.FieldDescriptorProto" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[99]), /* "nested_type" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 3, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[0]), /* ".google.protobuf.DescriptorProto" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[77]), /* "enum_type" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 4, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[2]), /* ".google.protobuf.EnumDescriptorProto" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[81]), /* "extension_range" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 5, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[1]), /* ".google.protobuf.DescriptorProto.ExtensionRange" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[80]), /* "extension" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 6, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[6]), /* ".google.protobuf.FieldDescriptorProto" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[102]), /* "options" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 7, - .label = 1, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[14]), /* ".google.protobuf.MessageOptions" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[108]), /* "start" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 1, - .label = 1, - .type = 5, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[76]), /* "end" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 2, - .label = 1, - .type = 5, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[96]), /* "name" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 1, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[79]), /* "extendee" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 2, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[100]), /* "number" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 3, - .label = 1, - .type = 5, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[92]), /* "label" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 4, - .label = 1, - .type = 14, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[7]), /* ".google.protobuf.FieldDescriptorProto.Label" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[110]), /* "type" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 5, - .label = 1, - .type = 14, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[8]), /* ".google.protobuf.FieldDescriptorProto.Type" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[111]), /* "type_name" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 6, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[71]), /* "default_value" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 7, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[102]), /* "options" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 8, - .label = 1, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[9]), /* ".google.protobuf.FieldOptions" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[96]), /* "name" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 1, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[113]), /* "value" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 2, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[4]), /* ".google.protobuf.EnumValueDescriptorProto" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[102]), /* "options" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 3, - .label = 1, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[3]), /* ".google.protobuf.EnumOptions" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[96]), /* "name" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 1, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[100]), /* "number" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 2, - .label = 1, - .type = 5, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[102]), /* "options" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 3, - .label = 1, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[5]), /* ".google.protobuf.EnumValueOptions" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[96]), /* "name" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 1, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[95]), /* "method" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 2, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[15]), /* ".google.protobuf.MethodDescriptorProto" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[102]), /* "options" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 3, - .label = 1, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[18]), /* ".google.protobuf.ServiceOptions" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[96]), /* "name" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 1, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[87]), /* "input_type" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 2, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[103]), /* "output_type" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 3, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[102]), /* "options" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 4, - .label = 1, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[16]), /* ".google.protobuf.MethodOptions" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[91]), /* "java_package" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 1, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[90]), /* "java_outer_classname" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 8, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[101]), /* "optimize_for" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 9, - .label = 1, - .type = 14, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[13]), /* ".google.protobuf.FileOptions.OptimizeMode" */ - .default_value = UPB_STATIC_STRING_PTR_INIT(strings[45]), /* "SPEED" */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[89]), /* "java_multiple_files" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 10, - .label = 1, - .type = 8, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STATIC_STRING_PTR_INIT(strings[82]), /* "false" */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[112]), /* "uninterpreted_option" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 999, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[19]), /* ".google.protobuf.UninterpretedOption" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[93]), /* "message_set_wire_format" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 1, - .label = 1, - .type = 8, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STATIC_STRING_PTR_INIT(strings[82]), /* "false" */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[112]), /* "uninterpreted_option" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 999, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[19]), /* ".google.protobuf.UninterpretedOption" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[70]), /* "ctype" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 1, - .label = 1, - .type = 14, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[10]), /* ".google.protobuf.FieldOptions.CType" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[105]), /* "packed" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 2, - .label = 1, - .type = 8, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[73]), /* "deprecated" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 3, - .label = 1, - .type = 8, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STATIC_STRING_PTR_INIT(strings[82]), /* "false" */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[78]), /* "experimental_map_key" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 9, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[112]), /* "uninterpreted_option" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 999, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[19]), /* ".google.protobuf.UninterpretedOption" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[112]), /* "uninterpreted_option" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 999, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[19]), /* ".google.protobuf.UninterpretedOption" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[112]), /* "uninterpreted_option" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 999, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[19]), /* ".google.protobuf.UninterpretedOption" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[112]), /* "uninterpreted_option" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 999, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[19]), /* ".google.protobuf.UninterpretedOption" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[112]), /* "uninterpreted_option" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 999, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[19]), /* ".google.protobuf.UninterpretedOption" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = true, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[96]), /* "name" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 2, - .label = 3, - .type = 11, - .type_name = UPB_STATIC_STRING_PTR_INIT(strings[20]), /* ".google.protobuf.UninterpretedOption.NamePart" */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[86]), /* "identifier_value" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 3, - .label = 1, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[106]), /* "positive_int_value" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 4, - .label = 1, - .type = 4, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[98]), /* "negative_int_value" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 5, - .label = 1, - .type = 3, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[75]), /* "double_value" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 6, - .label = 1, - .type = 1, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[109]), /* "string_value" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 7, - .label = 1, - .type = 12, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[97]), /* "name_part" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 1, - .label = 2, - .type = 9, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .extendee = false, - .number = true, - .label = true, - .type = true, - .type_name = false, - .default_value = false, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[88]), /* "is_extension" */ - .extendee = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .number = 2, - .label = 2, - .type = 8, - .type_name = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .default_value = UPB_STRING_NULL_INITIALIZER, /* Not set. */ - .options = 0, /* Not set. */ - }, -}; -static google_protobuf_EnumValueDescriptorProto google_protobuf_EnumValueDescriptorProto_values[25] = { - - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[51]), /* "TYPE_DOUBLE" */ - .number = 1, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[55]), /* "TYPE_FLOAT" */ - .number = 2, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[58]), /* "TYPE_INT64" */ - .number = 3, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[66]), /* "TYPE_UINT64" */ - .number = 4, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[57]), /* "TYPE_INT32" */ - .number = 5, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[54]), /* "TYPE_FIXED64" */ - .number = 6, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[53]), /* "TYPE_FIXED32" */ - .number = 7, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[49]), /* "TYPE_BOOL" */ - .number = 8, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[64]), /* "TYPE_STRING" */ - .number = 9, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[56]), /* "TYPE_GROUP" */ - .number = 10, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[59]), /* "TYPE_MESSAGE" */ - .number = 11, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[50]), /* "TYPE_BYTES" */ - .number = 12, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[65]), /* "TYPE_UINT32" */ - .number = 13, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[52]), /* "TYPE_ENUM" */ - .number = 14, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[60]), /* "TYPE_SFIXED32" */ - .number = 15, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[61]), /* "TYPE_SFIXED64" */ - .number = 16, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[62]), /* "TYPE_SINT32" */ - .number = 17, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[63]), /* "TYPE_SINT64" */ - .number = 18, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[36]), /* "LABEL_OPTIONAL" */ - .number = 1, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[38]), /* "LABEL_REQUIRED" */ - .number = 2, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[37]), /* "LABEL_REPEATED" */ - .number = 3, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[45]), /* "SPEED" */ - .number = 1, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[21]), /* "CODE_SIZE" */ - .number = 2, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[22]), /* "CORD" */ - .number = 1, - .options = 0, /* Not set. */ - }, - {.base = {UPB_DATA_FROZEN}, - .set_flags = {.has = { - .name = true, - .number = true, - .options = false, - }}, - .name = UPB_STATIC_STRING_PTR_INIT(strings[46]), /* "STRING_PIECE" */ - .number = 2, - .options = 0, /* Not set. */ - }, -}; -/* The externally-visible definition. */ -google_protobuf_FileDescriptorSet *upb_file_descriptor_set = &google_protobuf_FileDescriptorSet_values[0]; +unsigned int descriptor_pb_len = 3486; diff --git a/descriptor/descriptor.h b/descriptor/descriptor.h index 8c59624..9d51e5b 100644 --- a/descriptor/descriptor.h +++ b/descriptor/descriptor.h @@ -1,402 +1,25 @@ -/* This file was generated by upbc (the upb compiler). Do not edit. */ - -#ifndef DESCRIPTOR_DESCRIPTOR_H -#define DESCRIPTOR_DESCRIPTOR_H - -#include +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * This file contains declarations for an array that contains the contents + * of descriptor.proto, serialized as a protobuf. xxd is used to create + * the actual definition. + */ + +#ifndef UPB_DESCRIPTOR_H_ +#define UPB_DESCRIPTOR_H_ #ifdef __cplusplus extern "C" { #endif -struct google_protobuf_FileDescriptorSet; -extern struct google_protobuf_FileDescriptorSet *upb_file_descriptor_set; - -/* Forward declarations of all message types. - * So they can refer to each other in possibly-recursive ways. */ - -struct google_protobuf_UninterpretedOption_NamePart; -typedef struct google_protobuf_UninterpretedOption_NamePart - google_protobuf_UninterpretedOption_NamePart; -UPB_DEFINE_MSG_ARRAY(google_protobuf_UninterpretedOption_NamePart) - -struct google_protobuf_DescriptorProto; -typedef struct google_protobuf_DescriptorProto - google_protobuf_DescriptorProto; -UPB_DEFINE_MSG_ARRAY(google_protobuf_DescriptorProto) - -struct google_protobuf_EnumDescriptorProto; -typedef struct google_protobuf_EnumDescriptorProto - google_protobuf_EnumDescriptorProto; -UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumDescriptorProto) - -struct google_protobuf_UninterpretedOption; -typedef struct google_protobuf_UninterpretedOption - google_protobuf_UninterpretedOption; -UPB_DEFINE_MSG_ARRAY(google_protobuf_UninterpretedOption) - -struct google_protobuf_FileDescriptorProto; -typedef struct google_protobuf_FileDescriptorProto - google_protobuf_FileDescriptorProto; -UPB_DEFINE_MSG_ARRAY(google_protobuf_FileDescriptorProto) - -struct google_protobuf_MethodDescriptorProto; -typedef struct google_protobuf_MethodDescriptorProto - google_protobuf_MethodDescriptorProto; -UPB_DEFINE_MSG_ARRAY(google_protobuf_MethodDescriptorProto) - -struct google_protobuf_EnumValueOptions; -typedef struct google_protobuf_EnumValueOptions - google_protobuf_EnumValueOptions; -UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumValueOptions) - -struct google_protobuf_EnumValueDescriptorProto; -typedef struct google_protobuf_EnumValueDescriptorProto - google_protobuf_EnumValueDescriptorProto; -UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumValueDescriptorProto) - -struct google_protobuf_ServiceDescriptorProto; -typedef struct google_protobuf_ServiceDescriptorProto - google_protobuf_ServiceDescriptorProto; -UPB_DEFINE_MSG_ARRAY(google_protobuf_ServiceDescriptorProto) - -struct google_protobuf_FileDescriptorSet; -typedef struct google_protobuf_FileDescriptorSet - google_protobuf_FileDescriptorSet; -UPB_DEFINE_MSG_ARRAY(google_protobuf_FileDescriptorSet) - -struct google_protobuf_DescriptorProto_ExtensionRange; -typedef struct google_protobuf_DescriptorProto_ExtensionRange - google_protobuf_DescriptorProto_ExtensionRange; -UPB_DEFINE_MSG_ARRAY(google_protobuf_DescriptorProto_ExtensionRange) - -struct google_protobuf_FieldOptions; -typedef struct google_protobuf_FieldOptions - google_protobuf_FieldOptions; -UPB_DEFINE_MSG_ARRAY(google_protobuf_FieldOptions) - -struct google_protobuf_FileOptions; -typedef struct google_protobuf_FileOptions - google_protobuf_FileOptions; -UPB_DEFINE_MSG_ARRAY(google_protobuf_FileOptions) - -struct google_protobuf_MessageOptions; -typedef struct google_protobuf_MessageOptions - google_protobuf_MessageOptions; -UPB_DEFINE_MSG_ARRAY(google_protobuf_MessageOptions) - -struct google_protobuf_EnumOptions; -typedef struct google_protobuf_EnumOptions - google_protobuf_EnumOptions; -UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumOptions) - -struct google_protobuf_FieldDescriptorProto; -typedef struct google_protobuf_FieldDescriptorProto - google_protobuf_FieldDescriptorProto; -UPB_DEFINE_MSG_ARRAY(google_protobuf_FieldDescriptorProto) - -struct google_protobuf_ServiceOptions; -typedef struct google_protobuf_ServiceOptions - google_protobuf_ServiceOptions; -UPB_DEFINE_MSG_ARRAY(google_protobuf_ServiceOptions) - -struct google_protobuf_MethodOptions; -typedef struct google_protobuf_MethodOptions - google_protobuf_MethodOptions; -UPB_DEFINE_MSG_ARRAY(google_protobuf_MethodOptions) - -/* The message definitions themselves. */ - -struct google_protobuf_UninterpretedOption_NamePart { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool name_part:1; /* = 1, required. */ - bool is_extension:1; /* = 2, required. */ - } has; - } set_flags; - upb_strptr name_part; - bool is_extension; -}; - -struct google_protobuf_DescriptorProto { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool name:1; /* = 1, optional. */ - bool field:1; /* = 2, repeated. */ - bool nested_type:1; /* = 3, repeated. */ - bool enum_type:1; /* = 4, repeated. */ - bool extension_range:1; /* = 5, repeated. */ - bool extension:1; /* = 6, repeated. */ - bool options:1; /* = 7, optional. */ - } has; - } set_flags; - upb_strptr name; - UPB_MSG_ARRAYPTR(google_protobuf_FieldDescriptorProto) field; - UPB_MSG_ARRAYPTR(google_protobuf_DescriptorProto) nested_type; - UPB_MSG_ARRAYPTR(google_protobuf_EnumDescriptorProto) enum_type; - UPB_MSG_ARRAYPTR(google_protobuf_DescriptorProto_ExtensionRange) extension_range; - UPB_MSG_ARRAYPTR(google_protobuf_FieldDescriptorProto) extension; - google_protobuf_MessageOptions* options; -}; - -struct google_protobuf_EnumDescriptorProto { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool name:1; /* = 1, optional. */ - bool value:1; /* = 2, repeated. */ - bool options:1; /* = 3, optional. */ - } has; - } set_flags; - upb_strptr name; - UPB_MSG_ARRAYPTR(google_protobuf_EnumValueDescriptorProto) value; - google_protobuf_EnumOptions* options; -}; - -struct google_protobuf_UninterpretedOption { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool name:1; /* = 2, repeated. */ - bool identifier_value:1; /* = 3, optional. */ - bool positive_int_value:1; /* = 4, optional. */ - bool negative_int_value:1; /* = 5, optional. */ - bool double_value:1; /* = 6, optional. */ - bool string_value:1; /* = 7, optional. */ - } has; - } set_flags; - UPB_MSG_ARRAYPTR(google_protobuf_UninterpretedOption_NamePart) name; - upb_strptr identifier_value; - uint64_t positive_int_value; - int64_t negative_int_value; - double double_value; - upb_strptr string_value; -}; - -struct google_protobuf_FileDescriptorProto { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool name:1; /* = 1, optional. */ - bool package:1; /* = 2, optional. */ - bool dependency:1; /* = 3, repeated. */ - bool message_type:1; /* = 4, repeated. */ - bool enum_type:1; /* = 5, repeated. */ - bool service:1; /* = 6, repeated. */ - bool extension:1; /* = 7, repeated. */ - bool options:1; /* = 8, optional. */ - } has; - } set_flags; - upb_strptr name; - upb_strptr package; - upb_arrayptr dependency; - UPB_MSG_ARRAYPTR(google_protobuf_DescriptorProto) message_type; - UPB_MSG_ARRAYPTR(google_protobuf_EnumDescriptorProto) enum_type; - UPB_MSG_ARRAYPTR(google_protobuf_ServiceDescriptorProto) service; - UPB_MSG_ARRAYPTR(google_protobuf_FieldDescriptorProto) extension; - google_protobuf_FileOptions* options; -}; - -struct google_protobuf_MethodDescriptorProto { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool name:1; /* = 1, optional. */ - bool input_type:1; /* = 2, optional. */ - bool output_type:1; /* = 3, optional. */ - bool options:1; /* = 4, optional. */ - } has; - } set_flags; - upb_strptr name; - upb_strptr input_type; - upb_strptr output_type; - google_protobuf_MethodOptions* options; -}; - -struct google_protobuf_EnumValueOptions { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool uninterpreted_option:1; /* = 999, repeated. */ - } has; - } set_flags; - UPB_MSG_ARRAYPTR(google_protobuf_UninterpretedOption) uninterpreted_option; -}; - -struct google_protobuf_EnumValueDescriptorProto { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool name:1; /* = 1, optional. */ - bool number:1; /* = 2, optional. */ - bool options:1; /* = 3, optional. */ - } has; - } set_flags; - upb_strptr name; - int32_t number; - google_protobuf_EnumValueOptions* options; -}; - -struct google_protobuf_ServiceDescriptorProto { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool name:1; /* = 1, optional. */ - bool method:1; /* = 2, repeated. */ - bool options:1; /* = 3, optional. */ - } has; - } set_flags; - upb_strptr name; - UPB_MSG_ARRAYPTR(google_protobuf_MethodDescriptorProto) method; - google_protobuf_ServiceOptions* options; -}; - -struct google_protobuf_FileDescriptorSet { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool file:1; /* = 1, repeated. */ - } has; - } set_flags; - UPB_MSG_ARRAYPTR(google_protobuf_FileDescriptorProto) file; -}; - -struct google_protobuf_DescriptorProto_ExtensionRange { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool start:1; /* = 1, optional. */ - bool end:1; /* = 2, optional. */ - } has; - } set_flags; - int32_t start; - int32_t end; -}; - -struct google_protobuf_FieldOptions { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool ctype:1; /* = 1, optional. */ - bool packed:1; /* = 2, optional. */ - bool deprecated:1; /* = 3, optional. */ - bool experimental_map_key:1; /* = 9, optional. */ - bool uninterpreted_option:1; /* = 999, repeated. */ - } has; - } set_flags; - int32_t ctype; - bool packed; - bool deprecated; - upb_strptr experimental_map_key; - UPB_MSG_ARRAYPTR(google_protobuf_UninterpretedOption) uninterpreted_option; -}; - -struct google_protobuf_FileOptions { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool java_package:1; /* = 1, optional. */ - bool java_outer_classname:1; /* = 8, optional. */ - bool optimize_for:1; /* = 9, optional. */ - bool java_multiple_files:1; /* = 10, optional. */ - bool uninterpreted_option:1; /* = 999, repeated. */ - } has; - } set_flags; - upb_strptr java_package; - upb_strptr java_outer_classname; - int32_t optimize_for; - bool java_multiple_files; - UPB_MSG_ARRAYPTR(google_protobuf_UninterpretedOption) uninterpreted_option; -}; - -struct google_protobuf_MessageOptions { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool message_set_wire_format:1; /* = 1, optional. */ - bool uninterpreted_option:1; /* = 999, repeated. */ - } has; - } set_flags; - bool message_set_wire_format; - UPB_MSG_ARRAYPTR(google_protobuf_UninterpretedOption) uninterpreted_option; -}; - -struct google_protobuf_EnumOptions { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool uninterpreted_option:1; /* = 999, repeated. */ - } has; - } set_flags; - UPB_MSG_ARRAYPTR(google_protobuf_UninterpretedOption) uninterpreted_option; -}; - -struct google_protobuf_FieldDescriptorProto { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool name:1; /* = 1, optional. */ - bool extendee:1; /* = 2, optional. */ - bool number:1; /* = 3, optional. */ - bool label:1; /* = 4, optional. */ - bool type:1; /* = 5, optional. */ - bool type_name:1; /* = 6, optional. */ - bool default_value:1; /* = 7, optional. */ - bool options:1; /* = 8, optional. */ - } has; - } set_flags; - upb_strptr name; - upb_strptr extendee; - int32_t number; - int32_t label; - int32_t type; - upb_strptr type_name; - upb_strptr default_value; - google_protobuf_FieldOptions* options; -}; - -struct google_protobuf_ServiceOptions { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool uninterpreted_option:1; /* = 999, repeated. */ - } has; - } set_flags; - UPB_MSG_ARRAYPTR(google_protobuf_UninterpretedOption) uninterpreted_option; -}; - -struct google_protobuf_MethodOptions { - upb_data base; - union { - uint8_t bytes[1]; - struct { - bool uninterpreted_option:1; /* = 999, repeated. */ - } has; - } set_flags; - UPB_MSG_ARRAYPTR(google_protobuf_UninterpretedOption) uninterpreted_option; -}; +unsigned char descriptor_pb[] +unsigned int descriptor_pb_len; #ifdef __cplusplus } /* extern "C" */ #endif -#endif /* DESCRIPTOR_DESCRIPTOR_H */ +#endif /* UPB_DESCRIPTOR_H_ */ -- cgit v1.2.3 From 209dce5eb08709bfb5b21e19289b3814619ca6cc Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 7 Jul 2010 10:39:08 -0700 Subject: Defined the function for getting a upb_symtab for descriptor.proto. --- Makefile | 19 +- descriptor/descriptor.h | 4 +- src/upb.c | 38 ++-- src/upb_data.c | 500 ------------------------------------------- src/upb_data.h | 552 ------------------------------------------------ src/upb_def.c | 27 ++- src/upb_def.h | 5 + src/upb_table.c | 2 +- 8 files changed, 63 insertions(+), 1084 deletions(-) delete mode 100644 src/upb_data.c delete mode 100644 src/upb_data.h diff --git a/Makefile b/Makefile index 7235ff9..dec18ec 100644 --- a/Makefile +++ b/Makefile @@ -34,20 +34,22 @@ LDLIBS=-lpthread LIBUPB=src/libupb.a LIBUPB_PIC=src/libupb_pic.a LIBUPB_SHARED=src/libupb.so -ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) tools/upbc +ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) all: $(ALL) clean: rm -rf $(LIBUPB) $(LIBUPB_PIC) rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) rm -rf benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb* rm -rf tests/tests tests/t.* tests/test_table - rm -rf descriptor/descriptor.proto.pb + rm -rf descriptor/descriptor.pb rm -rf tools/upbc deps cd lang_ext/python && python setup.py clean --all # The core library (src/libupb.a) -SRC=src/upb.c src/upb_decoder.c src/upb_table.c src/upb_def.c src/upb_data.c \ - src/upb_encoder.c descriptor/descriptor.c src/upb_text.c +SRC=src/upb.c src/upb_decoder.c src/upb_table.c src/upb_def.c \ + descriptor/descriptor.c +# Parts of core that are yet to be converted. +OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. src/upb_def.o: src/upb_def.c @@ -55,6 +57,7 @@ src/upb_def.o: src/upb_def.c src/upb_def.lo: src/upb_def.c $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC + STATICOBJ=$(patsubst %.c,%.o,$(SRC)) SHAREDOBJ=$(patsubst %.c,%.lo,$(SRC)) # building shared objects is like building static ones, except -fPIC is added. @@ -67,12 +70,12 @@ $(LIBUPB_SHARED): $(SHAREDOBJ) $(CC) -shared -o $(LIBUPB_SHARED) $(SHAREDOBJ) # Regenerating the auto-generated files in descriptor/. -descriptor/descriptor.proto.pb: descriptor/descriptor.proto +descriptor/descriptor.pb: descriptor/descriptor.proto # TODO: replace with upbc - protoc descriptor/descriptor.proto -odescriptor/descriptor.proto.pb + protoc descriptor/descriptor.proto -odescriptor/descriptor.pb -descriptorgen: descriptor/descriptor.proto.pb tools/upbc - ./tools/upbc -i upb_file_descriptor_set -o descriptor/descriptor descriptor/descriptor.proto.pb +descriptorgen: descriptor/descriptor.pb + cd descriptor && xxd -i descriptor.pb > descriptor.c # Language extensions. python: $(LIBUPB_PIC) diff --git a/descriptor/descriptor.h b/descriptor/descriptor.h index 9d51e5b..b598a9a 100644 --- a/descriptor/descriptor.h +++ b/descriptor/descriptor.h @@ -15,8 +15,8 @@ extern "C" { #endif -unsigned char descriptor_pb[] -unsigned int descriptor_pb_len; +extern unsigned char descriptor_pb[]; +extern unsigned int descriptor_pb_len; #ifdef __cplusplus } /* extern "C" */ diff --git a/src/upb.c b/src/upb.c index 189dfe4..3c5efe8 100644 --- a/src/upb.c +++ b/src/upb.c @@ -18,25 +18,25 @@ #ctype}, upb_type_info upb_types[] = { - {0, 0, 0, ""} // There is no type 0. - TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, 1), // DOUBLE - TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, 1), // FLOAT - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1), // INT64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, 1), // UINT64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1), // INT32 - TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, 1), // FIXED64 - TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, 1), // FIXED32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, 1), // BOOL - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1), // STRING - TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, 0), // GROUP - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1), // MESSAGE - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1), // BYTES - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1), // UINT32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1), // ENUM - TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, 1), // SFIXED32 - TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, 1), // SFIXED64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1), // SINT32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1), // SINT64 + {0, 0, 0, 0, ""}, // There is no type 0. + TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, 1) // DOUBLE + TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, 1) // FLOAT + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1) // INT64 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, 1) // UINT64 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1) // INT32 + TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, 1) // FIXED64 + TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, 1) // FIXED32 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, 1) // BOOL + TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1) // STRING + TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, 0) // GROUP + TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1) // MESSAGE + TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1) // BYTES + TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1) // UINT32 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1) // ENUM + TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, 1) // SFIXED32 + TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, 1) // SFIXED64 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1) // SINT32 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1) // SINT64 }; void upb_seterr(upb_status *status, enum upb_status_code code, diff --git a/src/upb_data.c b/src/upb_data.c deleted file mode 100644 index 3b4f7ab..0000000 --- a/src/upb_data.c +++ /dev/null @@ -1,500 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include -#include "upb_data.h" -#include "upb_decoder.h" -#include "upb_def.h" - -static uint32_t round_up_to_pow2(uint32_t v) -{ - /* cf. http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */ - v--; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v++; - return v; -} - -/* upb_data *******************************************************************/ - -static void data_elem_unref(upb_valueptr p, upb_fielddef *f) { - if(upb_issubmsg(f)) { - upb_msg_unref(*p.msg, upb_downcast_msgdef(f->def)); - } else if(upb_isstring(f)) { - upb_string_unref(*p.str); - } else { - assert(false); - } -} - -static void data_unref(upb_valueptr p, upb_fielddef *f) { - if(upb_isarray(f)) { - upb_array_unref(*p.arr, f); - } else { - data_elem_unref(p, f); - } -} - -INLINE void data_init(upb_data *d, int flags) { - d->v = REFCOUNT_ONE | flags; -} - -static void check_not_frozen(upb_data *d) { - // On one hand I am reluctant to put abort() calls in a low-level library - // that are enabled in a production build. On the other hand, this is a bug - // in the client code that we cannot recover from, and it seems better to get - // the error here than later. - if(upb_data_hasflag(d, UPB_DATA_FROZEN)) abort(); -} - - -/* upb_string *******************************************************************/ - -void _upb_string_setptr(upb_strptr s, char *ptr) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) - s.refcounted->ptr = ptr; - else - s.norefcount->ptr = ptr; -} - -static void _upb_string_set_bytelen(upb_strptr s, upb_strlen_t newlen) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) { - s.refcounted->byte_len = newlen; - } else { - s.norefcount->byte_len = newlen; - } -} - -upb_strptr upb_string_new() { - upb_strptr s; - s.refcounted = malloc(sizeof(struct upb_refcounted_string)); - data_init(s.base, UPB_DATA_HEAPALLOCATED | UPB_DATA_REFCOUNTED); - s.refcounted->byte_size = 0; - s.refcounted->byte_len = 0; - s.refcounted->ptr = NULL; - return s; -} - -static upb_strlen_t string_get_bytesize(upb_strptr s) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) { - return s.refcounted->byte_size; - } else { - return (s.norefcount->byte_size_and_flags & 0xFFFFFFF8) >> 3; - } -} - -static void string_set_bytesize(upb_strptr s, upb_strlen_t newsize) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) { - s.refcounted->byte_size = newsize; - } else { - s.norefcount->byte_size_and_flags &= 0x7; - s.norefcount->byte_size_and_flags |= (newsize << 3); - } -} - -void _upb_string_free(upb_strptr s) -{ - if(string_get_bytesize(s) != 0) free((void*)upb_string_getrobuf(s)); - free(s.base); -} - -void upb_string_resize(upb_strptr s, upb_strlen_t byte_len) { - check_not_frozen(s.base); - if(string_get_bytesize(s) < byte_len) { - // Need to resize. - size_t new_byte_size = round_up_to_pow2(byte_len); - _upb_string_setptr(s, realloc(_upb_string_getptr(s), new_byte_size)); - string_set_bytesize(s, new_byte_size); - } - _upb_string_set_bytelen(s, byte_len); -} - -upb_strptr upb_string_getref(upb_strptr s, int ref_flags) { - if(_upb_data_incref(s.base, ref_flags)) return s; - upb_strptr copy = upb_strdup(s); - if(ref_flags == UPB_REF_FROZEN) - upb_data_setflag(copy.base, UPB_DATA_FROZEN); - return copy; -} - -upb_strptr upb_strreadfile(const char *filename) { - FILE *f = fopen(filename, "rb"); - if(!f) return UPB_STRING_NULL; - if(fseek(f, 0, SEEK_END) != 0) goto error; - long size = ftell(f); - if(size < 0) goto error; - if(fseek(f, 0, SEEK_SET) != 0) goto error; - upb_strptr s = upb_string_new(); - char *buf = upb_string_getrwbuf(s, size); - if(fread(buf, size, 1, f) != 1) goto error; - fclose(f); - return s; - -error: - fclose(f); - return UPB_STRING_NULL; -} - -upb_strptr upb_strdupc(const char *src) { - upb_strptr copy = upb_string_new(); - upb_strlen_t len = strlen(src); - char *buf = upb_string_getrwbuf(copy, len); - memcpy(buf, src, len); - return copy; -} - -void upb_strcat(upb_strptr s, upb_strptr append) { - upb_strlen_t s_len = upb_strlen(s); - upb_strlen_t append_len = upb_strlen(append); - upb_strlen_t newlen = s_len + append_len; - memcpy(upb_string_getrwbuf(s, newlen) + s_len, - upb_string_getrobuf(append), append_len); -} - -upb_strptr upb_strslice(upb_strptr s, int offset, int len) { - upb_strptr slice = upb_string_new(); - len = UPB_MIN((upb_strlen_t)len, upb_strlen(s) - (upb_strlen_t)offset); - memcpy(upb_string_getrwbuf(slice, len), upb_string_getrobuf(s) + offset, len); - return slice; -} - -upb_strptr upb_strdup(upb_strptr s) { - upb_strptr copy = upb_string_new(); - upb_strcpy(copy, s); - return copy; -} - -int upb_strcmp(upb_strptr s1, upb_strptr s2) { - upb_strlen_t common_length = UPB_MIN(upb_strlen(s1), upb_strlen(s2)); - int common_diff = memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), - common_length); - return common_diff == - 0 ? ((int)upb_strlen(s1) - (int)upb_strlen(s2)) : common_diff; -} - - -/* upb_array ******************************************************************/ - -static void _upb_array_setptr(upb_arrayptr a, void *ptr) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) - a.refcounted->elements._void = ptr; - else - a.norefcount->elements._void = ptr; -} - -static void _upb_array_setlen(upb_arrayptr a, upb_strlen_t newlen) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) { - a.refcounted->len = newlen; - } else { - a.norefcount->len = newlen; - } -} - -upb_arrayptr upb_array_new() { - upb_arrayptr a; - a.refcounted = malloc(sizeof(struct upb_refcounted_array)); - data_init(a.base, UPB_DATA_HEAPALLOCATED | UPB_DATA_REFCOUNTED); - a.refcounted->size = 0; - a.refcounted->len = 0; - a.refcounted->elements._void = NULL; - return a; -} - -// ONLY handles refcounted arrays for the moment. -void _upb_array_free(upb_arrayptr a, upb_fielddef *f) -{ - if(upb_elem_ismm(f)) { - for(upb_arraylen_t i = 0; i < a.refcounted->size; i++) { - upb_valueptr p = _upb_array_getptr(a, f, i); - if(!*p.data) continue; - data_elem_unref(p, f); - } - } - if(a.refcounted->size != 0) free(a.refcounted->elements._void); - free(a.refcounted); -} - -static upb_arraylen_t array_get_size(upb_arrayptr a) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) { - return a.refcounted->size; - } else { - return (a.norefcount->base.v & 0xFFFFFFF8) >> 3; - } -} - -static void array_set_size(upb_arrayptr a, upb_arraylen_t newsize) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) { - a.refcounted->size = newsize; - } else { - a.norefcount->base.v &= 0x7; - a.norefcount->base.v |= (newsize << 3); - } -} - -void upb_array_resize(upb_arrayptr a, upb_fielddef *f, upb_strlen_t len) { - check_not_frozen(a.base); - size_t type_size = upb_types[f->type].size; - upb_arraylen_t old_size = array_get_size(a); - if(old_size < len) { - // Need to resize. - size_t new_size = round_up_to_pow2(len); - _upb_array_setptr(a, realloc(_upb_array_getptr_raw(a, 0, 0)._void, new_size * type_size)); - array_set_size(a, new_size); - memset(_upb_array_getptr_raw(a, old_size, type_size)._void, - 0, - (new_size - old_size) * type_size); - } - _upb_array_setlen(a, len); -} - - -/* upb_msg ********************************************************************/ - -static void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { - msg->data[f->field_index/8] |= (1 << (f->field_index % 8)); -} - -upb_msg *upb_msg_new(upb_msgdef *md) { - upb_msg *msg = malloc(md->size); - memset(msg, 0, md->size); - data_init(&msg->base, UPB_DATA_HEAPALLOCATED | UPB_DATA_REFCOUNTED); - upb_def_ref(UPB_UPCAST(md)); - return msg; -} - -// ONLY handles refcounted messages for the moment. -void _upb_msg_free(upb_msg *msg, upb_msgdef *md) -{ - for(int i = 0; i < md->num_fields; i++) { - upb_fielddef *f = &md->fields[i]; - upb_valueptr p = _upb_msg_getptr(msg, f); - if(!upb_field_ismm(f) || !*p.data) continue; - data_unref(p, f); - } - upb_def_unref(UPB_UPCAST(md)); - free(msg); -} - -void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_strptr str, - upb_status *status) -{ - upb_decoder *d = upb_decoder_new(md); - upb_msgsink *s = upb_msgsink_new(md); - - upb_msgsink_reset(s, msg); - upb_decoder_reset(d, upb_msgsink_sink(s)); - upb_msg_clear(msg, md); - upb_decoder_decode(d, str, status); - - upb_decoder_free(d); - upb_msgsink_free(s); -} - -#if 0 -void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_strptr str, - upb_status *status) -{ - upb_sizebuilder *sb = upb_sizebuilder_new(md); - upb_encoder *e = upb_encoder_new(md); - upb_strsink *sink = upb_strsink_new(); - - // Get sizes. We could avoid performing this step in some cases by having a - // bool in the msgdef indicating whether it or any of its children have - // submessages in the def (groups don't count). - upb_sizebuilder_reset(sb); - upb_msgsrc_produce(msg, md, upb_sizebuilder_sink(sb), true); - - upb_strsink_reset(); - upb_encoder_reset(e, sb, sink); - upb_msgsrc_produce(msg, md, sink, false); -} -#endif - -/* upb_msgsrc ****************************************************************/ - -static void _upb_msgsrc_produceval(upb_value v, upb_fielddef *f, upb_sink *sink, - bool reverse, upb_status *status) -{ - // TODO: We need to check status for failure, but how often? - if(upb_issubmsg(f)) { - upb_msgdef *md = upb_downcast_msgdef(f->def); - upb_sink_onstart(sink, f, status); - upb_msgsrc_produce(v.msg, md, sink, reverse, status); - upb_sink_onend(sink, f, status); - } else if(upb_isstring(f)) { - upb_sink_onstr(sink, f, v.str, 0, upb_strlen(v.str), status); - } else { - upb_sink_onvalue(sink, f, v, status); - } -} - -void upb_msgsrc_produce(upb_msg *msg, upb_msgdef *md, upb_sink *sink, - bool reverse, upb_status *status) -{ - for(int i = 0; i < md->num_fields; i++) { - upb_fielddef *f = &md->fields[reverse ? md->num_fields - i - 1 : i]; - if(!upb_msg_has(msg, f)) continue; - upb_value v = upb_msg_get(msg, f); - if(upb_isarray(f)) { - upb_arrayptr arr = v.arr; - upb_arraylen_t len = upb_array_len(arr); - for(upb_arraylen_t j = 0; j < upb_array_len(arr); j++) { - upb_value elem = upb_array_get(arr, f, reverse ? len - j - 1 : j); - _upb_msgsrc_produceval(elem, f, sink, reverse, status); - } - } else { - _upb_msgsrc_produceval(v, f, sink, reverse, status); - } - } -} - - -/* upb_msgsink ***************************************************************/ - -typedef struct { - upb_msg *msg; - upb_msgdef *md; -} upb_msgsink_frame; - -struct upb_msgsink { - upb_sink base; - upb_msgdef *toplevel_msgdef; - upb_msgsink_frame stack[UPB_MAX_NESTING], *top; -}; - -/* Helper function that returns a pointer to where the next value for field "f" - * should be stored, taking into account whether f is an array that may need to - * be allocated or resized. */ -static upb_valueptr get_valueptr(upb_msg *msg, upb_fielddef *f) -{ - upb_valueptr p = _upb_msg_getptr(msg, f); - if(upb_isarray(f)) { - if(!upb_msg_has(msg, f)) { - if(upb_array_isnull(*p.arr) || !upb_data_only(*p.data)) { - if(!upb_array_isnull(*p.arr)) - upb_array_unref(*p.arr, f); - *p.arr = upb_array_new(); - } - upb_array_truncate(*p.arr); - upb_msg_sethas(msg, f); - } else { - assert(!upb_array_isnull(*p.arr)); - } - upb_arraylen_t oldlen = upb_array_len(*p.arr); - upb_array_resize(*p.arr, f, oldlen + 1); - p = _upb_array_getptr(*p.arr, f, oldlen); - } - return p; -} - -// Callbacks for upb_sink. -// TODO: implement these in terms of public interfaces. - -static upb_sink_status _upb_msgsink_valuecb(upb_sink *s, upb_fielddef *f, - upb_value val, upb_status *status) -{ - (void)status; // No detectable errors can occur. - upb_msgsink *ms = (upb_msgsink*)s; - upb_msg *msg = ms->top->msg; - upb_valueptr p = get_valueptr(msg, f); - upb_msg_sethas(msg, f); - upb_value_write(p, val, f->type); - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_msgsink_strcb(upb_sink *s, upb_fielddef *f, - upb_strptr str, - int32_t start, uint32_t end, - upb_status *status) -{ - (void)status; // No detectable errors can occur. - upb_msgsink *ms = (upb_msgsink*)s; - upb_msg *msg = ms->top->msg; - upb_valueptr p = get_valueptr(msg, f); - upb_msg_sethas(msg, f); - if(end > upb_strlen(str)) abort(); /* TODO: support streaming. */ - if(upb_string_isnull(*p.str) || !upb_data_only(*p.data)) { - if(!upb_string_isnull(*p.str)) - upb_string_unref(*p.str); - *p.str = upb_string_new(); - } - upb_strcpylen(*p.str, upb_string_getrobuf(str) + start, end - start); - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_msgsink_startcb(upb_sink *s, upb_fielddef *f, - upb_status *status) -{ - (void)status; // No detectable errors can occur. - upb_msgsink *ms = (upb_msgsink*)s; - upb_msg *oldmsg = ms->top->msg; - upb_valueptr p = get_valueptr(oldmsg, f); - ms->top++; - - if(upb_isarray(f) || !upb_msg_has(oldmsg, f)) { - upb_msgdef *md = upb_downcast_msgdef(f->def); - if(!*p.msg || !upb_data_only(*p.data)) { - if(*p.msg) - upb_msg_unref(*p.msg, md); - *p.msg = upb_msg_new(md); - } - upb_msg_clear(*p.msg, md); - upb_msg_sethas(oldmsg, f); - } - - ms->top->msg = *p.msg; - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_msgsink_endcb(upb_sink *s, upb_fielddef *f, - upb_status *status) -{ - (void)status; // No detectable errors can occur. - (void)f; // Unused. - upb_msgsink *ms = (upb_msgsink*)s; - ms->top--; - return UPB_SINK_CONTINUE; -} - -static upb_sink_callbacks _upb_msgsink_vtbl = { - _upb_msgsink_valuecb, - _upb_msgsink_strcb, - _upb_msgsink_startcb, - _upb_msgsink_endcb -}; - -// -// External upb_msgsink interface. -// - -upb_msgsink *upb_msgsink_new(upb_msgdef *md) -{ - upb_msgsink *ms = malloc(sizeof(*ms)); - upb_sink_init(&ms->base, &_upb_msgsink_vtbl); - ms->toplevel_msgdef = md; - return ms; -} - -void upb_msgsink_free(upb_msgsink *sink) -{ - free(sink); -} - -upb_sink *upb_msgsink_sink(upb_msgsink *sink) -{ - return &sink->base; -} - -void upb_msgsink_reset(upb_msgsink *ms, upb_msg *msg) -{ - ms->top = ms->stack; - ms->top->msg = msg; - ms->top->md = ms->toplevel_msgdef; -} diff --git a/src/upb_data.h b/src/upb_data.h deleted file mode 100644 index c0f53ff..0000000 --- a/src/upb_data.h +++ /dev/null @@ -1,552 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * This file defines the in-memory format for messages, arrays, and strings - * (which are the three dynamically-allocated structures that make up all - * protobufs). - * - * The members of all structs should be considered private. Access should - * only happen through the provided functions. - * - * Unlike Google's protobuf, messages contain *pointers* to strings and arrays - * instead of including them by value. This makes unused strings and arrays - * use less memory, and lets the strings and arrays have multiple possible - * representations (for example, a string could be a slice). It also gives - * us more flexibility wrt refcounting. The cost is that when a field *is* - * being used, the net memory usage is one pointer more than if we had - * included the thing directly. */ - -#ifndef UPB_DATA_H -#define UPB_DATA_H - -#include -#include -#include "upb.h" -#include "upb_atomic.h" -#include "upb_def.h" -#include "upb_srcsink.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_data *******************************************************************/ - -// The "base class" of strings, arrays, and messages. Contains a few flags and -// possibly a reference count. None of the functions for upb_data are public, -// but some of the constants are. - -// typedef upb_atomic_refcount_t upb_data; - -// The flags in upb_data. -typedef enum { - // Set if the object itself was allocated with malloc() and should be freed - // with free(). This flag would be false if the object was allocated on the - // stack or is data from the static segment of an object file. Note that this - // flag does not apply to the data being referenced by a string or array. - // - // If this flag is false, UPB_FLAG_HAS_REFCOUNT must be false also; there is - // no sense refcounting something that does not need to be freed. - UPB_DATA_HEAPALLOCATED = 1, - - // Set if the object is frozen against modification. While an object is - // frozen, it is suitable for concurrent readonly access. Note that this - // flag alone is not a sufficient mechanism for preventing any kind of writes - // to the object's memory, because the object could still have a refcount. - UPB_DATA_FROZEN = (1<<1), - - // Set if the object has an embedded refcount. - UPB_DATA_REFCOUNTED = (1<<2) -} upb_data_flag; - -#define REFCOUNT_MASK 0xFFFFFFF8 -#define REFCOUNT_SHIFT 3 -#define REFCOUNT_ONE (1<v & flag; -} - -// INTERNAL-ONLY -INLINE void upb_data_setflag(upb_data *d, upb_data_flag flag) { - d->v |= flag; -} - -INLINE uint32_t upb_data_getrefcount(upb_data *d) { - int data; - if(upb_data_hasflag(d, UPB_DATA_FROZEN)) - data = upb_atomic_read(d); - else - data = d->v; - return (data & REFCOUNT_MASK) >> REFCOUNT_SHIFT; -} - -// Returns true if the given data has only one owner. -INLINE bool upb_data_only(upb_data *data) { - return !upb_data_hasflag(data, UPB_DATA_REFCOUNTED) || - upb_data_getrefcount(data) == 1; -} - -// Specifies the type of ref that is requested based on the kind of access the -// caller needs to the object. -typedef enum { - // Use when the client plans to perform read-only access to the object, and - // only in one thread at a time. This imposes the least requirements on the - // object; it can be either frozen or not. As a result, requesting a - // reference of this type never performs a copy unless the object has no - // refcount. - // - // A ref of this type can always be explicitly converted to frozen or - // unfrozen later. - UPB_REF_THREADUNSAFE_READONLY = 0, - - // Use when the client plans to perform read-only access, but from multiple - // threads concurrently. This will force the object to eagerly perform any - // parsing that may have been lazily deferred, and will force a copy if the - // object is not current frozen. - // - // Asking for a reference of this type is equivalent to: - // x = getref(y, UPB_REF_THREADUNSAFE_READONLY); - // x = freeze(x); - // ...except it is more efficient. - UPB_REF_FROZEN = 1, - - // Use when the client plans to perform read/write access. As a result, the - // reference will not be thread-safe for concurrent reading *or* writing; the - // object must be externally synchronized if it is being accessed from more - // than one thread. This will force a copy if the object is currently frozen. - // - // Asking for a reference of this type is equivalent to: - // x = getref(y, UPB_REF_THREADUNSAFE_READONLY); - // x = thaw(x); - // ...except it is more efficient. - UPB_REF_MUTABLE = 2 -} upb_reftype; - -// INTERNAL-ONLY FUNCTION: -// Attempts to increment the reference on d with the given type of ref. If -// this is not possible, returns false. -INLINE bool _upb_data_incref(upb_data *d, upb_reftype reftype) { - bool frozen = upb_data_hasflag(d, UPB_DATA_FROZEN); - if((reftype == UPB_REF_FROZEN && !frozen) || - (reftype == UPB_REF_MUTABLE && frozen) || - (upb_data_hasflag(d, UPB_DATA_HEAPALLOCATED) && - !upb_data_hasflag(d, UPB_DATA_REFCOUNTED))) { - return false; - } - // Increment the ref. Only need to use atomic ops if the ref is frozen. - if(upb_data_hasflag(d, UPB_DATA_FROZEN)) upb_atomic_add(d, REFCOUNT_ONE); - else d->v += REFCOUNT_ONE; - return true; -} - -// INTERNAL-ONLY FUNCTION: -// Releases a reference on d, returning true if the object should be deleted. -INLINE bool _upb_data_unref(upb_data *d) { - if(upb_data_hasflag(d, UPB_DATA_HEAPALLOCATED)) { - // A heap-allocated object without a refcount should never be decref'd. - // Its owner owns it exlusively and should free it directly. - assert(upb_data_hasflag(d, UPB_DATA_REFCOUNTED)); - if(upb_data_hasflag(d, UPB_DATA_FROZEN)) { - int32_t old_val = upb_atomic_fetch_and_add(d, -REFCOUNT_ONE); - return (old_val & REFCOUNT_MASK) == REFCOUNT_ONE; - } else { - d->v -= REFCOUNT_ONE; - return (d->v & REFCOUNT_MASK) == 0; - } - } else { - // Non heap-allocated data never should be deleted. - return false; - } -} - -/* upb_string *****************************************************************/ - -// We have several different representations for string, depending on whether -// it has a refcount (and likely in the future, depending on whether it is a -// slice of another string). We could just have one representation with -// members that are sometimes unused, but this is wasteful in memory. The -// flags that are always part of the first word tell us which representation -// to use. -// -// In a way, this is like inheritance but instead of using a virtual pointer, -// we do switch/case in every "virtual" method. This may sound expensive but -// in many cases the different cases compile to exactly the same code, so there -// is no branch. - -struct upb_norefcount_string { - uint32_t byte_size_and_flags; - upb_strlen_t byte_len; - // We expect the data to be 8-bit clean (uint8_t), but char* is such an - // ingrained convention that we follow it. - char *ptr; -}; - -// Used for a string with a refcount. -struct upb_refcounted_string { - upb_data base; - upb_strlen_t byte_len; - char *ptr; - uint32_t byte_size; -}; - - -// Returns a newly constructed, refcounted string which starts out empty. -// Caller owns one ref on it. The returned string will not be frozen. -upb_strptr upb_string_new(void); - -// INTERNAL-ONLY: -// Frees the given string, alone with any memory the string owned. -void _upb_string_free(upb_strptr s); - -// Returns a string to which caller owns a ref, and contains the same contents -// as src. The returned value may be a copy of src, if the requested flags -// were incompatible with src's. -upb_strptr upb_string_getref(upb_strptr s, int ref_flags); - -#define UPB_STRING_NULL_INITIALIZER {NULL} -static const upb_strptr UPB_STRING_NULL = UPB_STRING_NULL_INITIALIZER; -INLINE bool upb_string_isnull(upb_strptr s) { return s.base == NULL; } - -// The caller releases a ref on src, which it must previously have owned a ref -// on. -INLINE void upb_string_unref(upb_strptr s) { - if(_upb_data_unref(s.base)) _upb_string_free(s); -} - -// The string is resized to byte_len. The string must not be frozen. -void upb_string_resize(upb_strptr s, upb_strlen_t len); - -// Returns a buffer to which the caller may write. The string is resized to -// byte_len (which may or may not trigger a reallocation). The string must not -// be frozen. -INLINE char *upb_string_getrwbuf(upb_strptr s, upb_strlen_t byte_len) { - upb_string_resize(s, byte_len); - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) - return s.refcounted->ptr; - else - return s.norefcount->ptr; -} - -INLINE void upb_string_clear(upb_strptr s) { - upb_string_getrwbuf(s, 0); -} - -// INTERNAL-ONLY: -// Gets/sets the pointer. -INLINE char *_upb_string_getptr(upb_strptr s) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) - return s.refcounted->ptr; - else - return s.norefcount->ptr; -} - -// Returns a buffer that the caller may use to read the current contents of -// the string. The number of bytes available is upb_strlen(s). -INLINE const char *upb_string_getrobuf(upb_strptr s) { - return _upb_string_getptr(s); -} - -// Returns the current length of the string. -INLINE upb_strlen_t upb_strlen(upb_strptr s) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) - return s.refcounted->byte_len; - else - return s.norefcount->byte_len; -} - -/* upb_string library functions ***********************************************/ - -// Named like their counterparts, these are all safe against buffer -// overflow. These only use the public upb_string interface. - -// More efficient than upb_strcmp if all you need is to test equality. -INLINE bool upb_streql(upb_strptr s1, upb_strptr s2) { - upb_strlen_t len = upb_strlen(s1); - if(len != upb_strlen(s2)) { - return false; - } else { - return memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0; - } -} - -// Like strcmp(). -int upb_strcmp(upb_strptr s1, upb_strptr s2); - -// Like upb_strcpy, but copies from a buffer and length. -INLINE void upb_strcpylen(upb_strptr dest, const void *src, upb_strlen_t len) { - memcpy(upb_string_getrwbuf(dest, len), src, len); -} - -// Replaces the contents of "dest" with the contents of "src". -INLINE void upb_strcpy(upb_strptr dest, upb_strptr src) { - upb_strcpylen(dest, upb_string_getrobuf(src), upb_strlen(src)); -} - -// Like upb_strcpy, but copies from a NULL-terminated string. -INLINE void upb_strcpyc(upb_strptr dest, const char *src) { - // This does two passes over src, but that is necessary unless we want to - // repeatedly re-allocate dst, which seems worse. - upb_strcpylen(dest, src, strlen(src)); -} - -// Returns a new string whose contents are a copy of s. -upb_strptr upb_strdup(upb_strptr s); - -// Like upb_strdup(), but duplicates a given buffer and length. -INLINE upb_strptr upb_strduplen(const void *src, upb_strlen_t len) { - upb_strptr s = upb_string_new(); - upb_strcpylen(s, src, len); - return s; -} - -// Like upb_strdup(), but duplicates a C NULL-terminated string. -upb_strptr upb_strdupc(const char *src); - -// Appends 'append' to 's' in-place, resizing s if necessary. -void upb_strcat(upb_strptr s, upb_strptr append); - -// Returns a string that is a substring of the given string. Currently this -// returns a copy, but in the future this may return an object that references -// the original string data instead of copying it. Both now and in the future, -// the caller owns a ref on whatever is returned. -upb_strptr upb_strslice(upb_strptr s, int offset, int len); - -// Reads an entire file into a newly-allocated string (caller owns one ref). -upb_strptr upb_strreadfile(const char *filename); - -// Typedef for a read-only string that is allocated statically or on the stack. -// Initialize with the given macro, which must resolve to a const char*. You -// must not dynamically allocate this type. Example usage: -// -// upb_static_string mystr = UPB_STATIC_STRING_INIT("biscuits"); -// upb_strptr mystr_ptr = UPB_STATIC_STRING_PTR_INIT(mystr); -// -// If C99 compund literals are available, the much nicer UPB_STRLIT macro is -// available instead: -// -// upb_strtr mystr_ptr = UPB_STRLIT("biscuits"); -// -typedef struct upb_norefcount_string upb_static_string; -#define UPB_STATIC_STRING_INIT_LEN(str, len) {0 | UPB_DATA_FROZEN, len, str} -#define UPB_STATIC_STRING_INIT(str) UPB_STATIC_STRING_INIT_LEN(str, sizeof(str)-1) -#define UPB_STATIC_STRING_PTR_INIT(static_string) {&static_string} -#define UPB_STRLIT(str) (upb_strptr){&(upb_static_string)UPB_STATIC_STRING_INIT(str)} - -// Allows using upb_strings in printf, ie: -// upb_strptr str = UPB_STRLIT("Hello, World!\n"); -// printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */ -#define UPB_STRARG(str) upb_strlen(str), upb_string_getrobuf(str) -#define UPB_STRFMT "%.*s" - -/* upb_array ******************************************************************/ - -// The comments attached to upb_string above also apply here. -struct upb_norefcount_array { - upb_data base; // We co-opt the refcount for the size. - upb_arraylen_t len; - upb_valueptr elements; -}; - -struct upb_refcounted_array { - upb_data base; - upb_arraylen_t len; - upb_valueptr elements; - upb_arraylen_t size; -}; - -typedef struct upb_norefcount_array upb_static_array; -#define UPB_STATIC_ARRAY_INIT(arr, len) {{0 | UPB_DATA_FROZEN}, len, {._void=arr}} -#define UPB_STATIC_ARRAY_PTR_TYPED_INIT(static_arr) {{&static_arr}} - -#define UPB_ARRAY_NULL_INITIALIZER {NULL} -static const upb_arrayptr UPB_ARRAY_NULL = UPB_ARRAY_NULL_INITIALIZER; -INLINE bool upb_array_isnull(upb_arrayptr a) { return a.base == NULL; } -INLINE bool upb_array_ptreql(upb_arrayptr a1, upb_arrayptr a2) { - return a1.base == a2.base; -} - -#define UPB_MSG_ARRAYPTR(type) type ## _array -#define UPB_DEFINE_MSG_ARRAY(type) \ -typedef struct { upb_arrayptr ptr; } UPB_MSG_ARRAYPTR(type); \ -INLINE upb_arraylen_t type ## _array_len(UPB_MSG_ARRAYPTR(type) a) { \ - return upb_array_len(a.ptr); \ -} \ -INLINE type* type ## _array_get(UPB_MSG_ARRAYPTR(type) a, upb_arraylen_t elem) { \ - return *(type**)_upb_array_getptr_raw(a.ptr, elem, sizeof(void*))._void; \ -} - -// Constructs a newly-allocated, reference-counted array which starts out -// empty. Caller owns one ref on it. -upb_arrayptr upb_array_new(void); - -// Returns the current number of elements in the array. -INLINE size_t upb_array_len(upb_arrayptr a) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) - return a.refcounted->len; - else - return a.norefcount->len; -} - -// INTERNAL-ONLY: -// Frees the given message and releases references on members. -void _upb_array_free(upb_arrayptr a, upb_fielddef *f); - -// INTERNAL-ONLY: -// Returns a pointer to the given elem. -INLINE upb_valueptr _upb_array_getptr_raw(upb_arrayptr a, upb_arraylen_t elem, - size_t type_size) { - upb_valueptr p; - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) - p._void = &a.refcounted->elements.uint8[elem * type_size]; - else - p._void = &a.norefcount->elements.uint8[elem * type_size]; - return p; -} - -INLINE upb_valueptr _upb_array_getptr(upb_arrayptr a, upb_fielddef *f, - upb_arraylen_t elem) { - return _upb_array_getptr_raw(a, elem, upb_types[f->type].size); -} - -INLINE upb_value upb_array_get(upb_arrayptr a, upb_fielddef *f, - upb_arraylen_t elem) { - assert(elem < upb_array_len(a)); - return upb_value_read(_upb_array_getptr(a, f, elem), f->type); -} - -// The caller releases a ref on the given array, which it must previously have -// owned a ref on. -INLINE void upb_array_unref(upb_arrayptr a, upb_fielddef *f) { - if(_upb_data_unref(a.base)) _upb_array_free(a, f); -} - -#if 0 -// Returns an array to which caller owns a ref, and contains the same contents -// as src. The returned value may be a copy of src, if the requested flags -// were incompatible with src's. -INLINE upb_arrayptr upb_array_getref(upb_arrayptr src, int ref_flags); - -// Sets the given element in the array to val. The current length of the array -// must be greater than elem. If the field type is dynamic, the array will -// take a ref on val and release a ref on what was previously in the array. -INLINE void upb_array_set(upb_arrayptr a, upb_fielddef *f, int elem, - upb_value val); - - -// Note that array_append will attempt to take a reference on the given value, -// so to avoid a copy use append_default and get. -INLINE void upb_array_append(upb_arrayptr a, upb_fielddef *f, - upb_value val); -INLINE void upb_array_append_default(upb_arrayptr a, upb_fielddef *f, - upb_value val); -#endif - -INLINE void upb_array_truncate(upb_arrayptr a) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) - a.refcounted->len = 0; - else - a.norefcount->len = 0; -} - - -/* upb_msg ********************************************************************/ - -// Note that some inline functions for upb_msg are defined in upb_def.h since -// they rely on the defs. - -struct _upb_msg { - upb_data base; - uint8_t data[4]; // We allocate the appropriate amount per message. -}; - -// Creates a new msg of the given type. -upb_msg *upb_msg_new(upb_msgdef *md); - -// INTERNAL-ONLY: -// Frees the given message and releases references on members. -void _upb_msg_free(upb_msg *msg, upb_msgdef *md); - -// INTERNAL-ONLY: -// Returns a pointer to the given field. -INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) { - upb_valueptr p; - p._void = &msg->data[f->byte_offset]; - return p; -} - -// Releases a references on msg. -INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) { - if(_upb_data_unref(&msg->base)) _upb_msg_free(msg, md); -} - -// Tests whether the given field is explicitly set, or whether it will return -// a default. -INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { - return (msg->data[f->field_index/8] & (1 << (f->field_index % 8))) != 0; -} - -// Returns the current value if set, or the default value if not set, of the -// specified field. The caller does *not* own a ref. -INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { - if(upb_msg_has(msg, f)) { - return upb_value_read(_upb_msg_getptr(msg, f), f->type); - } else { - return f->default_value; - } -} - -// Sets the given field to the given value. The msg will take a ref on val, -// and will drop a ref on whatever was there before. -void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val); - -INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { - memset(msg->data, 0, md->set_flags_bytes); -} - -// A convenience function for decoding an entire protobuf all at once, without -// having to worry about setting up the appropriate objects. -void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_strptr str, - upb_status *status); - -// A convenience function for encoding an entire protobuf all at once. If an -// error occurs, the null string is returned and the status object contains -// the error. -void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_strptr str, - upb_status *status); - - -/* upb_msgsrc *****************************************************************/ - -// A nonresumable, non-interruptable (but simple and fast) source for pushing -// the data of a upb_msg to a upb_sink. -void upb_msgsrc_produce(upb_msg *msg, upb_msgdef *md, upb_sink *sink, - bool reverse, upb_status *status); - - -/* upb_msgsink ****************************************************************/ - -// A upb_msgsink can accept the data from a source and write it into a message. -struct upb_msgsink; -typedef struct upb_msgsink upb_msgsink; - -// Allocate and free a msgsink, respectively. -upb_msgsink *upb_msgsink_new(upb_msgdef *md); -void upb_msgsink_free(upb_msgsink *sink); - -// Returns the upb_sink (like an upcast). -upb_sink *upb_msgsink_sink(upb_msgsink *sink); - -// Resets the msgsink for the given msg. -void upb_msgsink_reset(upb_msgsink *sink, upb_msg *msg); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_def.c b/src/upb_def.c index 31f14fa..bb1f07a 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -6,6 +6,7 @@ #include #include "descriptor_const.h" +#include "descriptor.h" #include "upb_def.h" #define CHECKSRC(x) if(!(x)) goto src_err @@ -840,6 +841,7 @@ err: upb_deflist_uninit(&defs); } + /* upb_baredecoder ************************************************************/ // upb_baredecoder is a upb_src that can parse a subset of the protocol buffer @@ -977,7 +979,7 @@ static upb_src_vtable upb_baredecoder_src_vtbl = { (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, }; -upb_baredecoder *upb_baredecoder_new(upb_string *str) +static upb_baredecoder *upb_baredecoder_new(upb_string *str) { upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); @@ -987,9 +989,30 @@ upb_baredecoder *upb_baredecoder_new(upb_string *str) return d; } -void upb_baredecoder_free(upb_baredecoder *d) +static void upb_baredecoder_free(upb_baredecoder *d) { upb_string_unref(d->input); upb_string_unref(d->str); free(d); } + +static upb_src *upb_baredecoder_src(upb_baredecoder *d) +{ + return &d->src; +} + +upb_symtab *upb_get_descriptor_symtab() +{ + // TODO: implement sharing of symtabs, so that successive calls to this + // function will return the same symtab. + upb_symtab *symtab = upb_symtab_new(); + // TODO: allow upb_strings to be static or on the stack. + upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); + upb_baredecoder *decoder = upb_baredecoder_new(descriptor); + upb_status status; + upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status); + assert(upb_ok(&status)); + upb_baredecoder_free(decoder); + upb_string_unref(descriptor); + return symtab; +} diff --git a/src/upb_def.h b/src/upb_def.h index 033dcde..b73b0f9 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -257,6 +257,11 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); // more useful? Maybe it should be an option. void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status); +// Returns a symtab that defines google.protobuf.DescriptorProto and all other +// types that are defined in descriptor.proto. This allows you to load other +// proto types. The caller owns a ref on the returned symtab. +upb_symtab *upb_get_descriptor_symtab(); + /* upb_def casts **************************************************************/ diff --git a/src/upb_table.c b/src/upb_table.c index 6fd2c20..b91776c 100644 --- a/src/upb_table.c +++ b/src/upb_table.c @@ -232,7 +232,7 @@ void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur) { do { cur = (void*)((char*)cur + t->t.entry_size); if(cur == end) return NULL; - } while(upb_string_isnull(cur->key)); + } while(cur->key == NULL); return cur; } -- cgit v1.2.3 From be5ddd8a645eaa949a8d500718257fb7cb71cf44 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Jul 2010 19:25:39 -0700 Subject: Tweaks to upb_src/upb_sink interfaces. --- Makefile | 7 +- src/upb_atomic.h | 23 +++---- src/upb_decoder.c | 171 ++++++++++++++++++++++++------------------------- src/upb_decoder.h | 2 +- src/upb_def.c | 28 ++++---- src/upb_def.h | 2 +- src/upb_srcsink.h | 127 ------------------------------------ src/upb_srcsink_vtbl.h | 93 --------------------------- src/upb_stream.h | 121 ++++++++++++++++++++++++++++++++++ src/upb_stream_vtbl.h | 93 +++++++++++++++++++++++++++ src/upb_string.h | 29 +++++++-- tests/test_table.cc | 8 +-- 12 files changed, 358 insertions(+), 346 deletions(-) delete mode 100644 src/upb_srcsink.h delete mode 100644 src/upb_srcsink_vtbl.h create mode 100644 src/upb_stream.h create mode 100644 src/upb_stream_vtbl.h diff --git a/Makefile b/Makefile index dec18ec..1ba7400 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) all: $(ALL) clean: rm -rf $(LIBUPB) $(LIBUPB_PIC) - rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) + rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(call rwildcard,,*.gc*) rm -rf benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb* rm -rf tests/tests tests/t.* tests/test_table rm -rf descriptor/descriptor.pb @@ -46,7 +46,7 @@ clean: cd lang_ext/python && python setup.py clean --all # The core library (src/libupb.a) -SRC=src/upb.c src/upb_decoder.c src/upb_table.c src/upb_def.c \ +SRC=src/upb.c src/upb_decoder.c src/upb_table.c src/upb_def.c src/upb_string.c \ descriptor/descriptor.c # Parts of core that are yet to be converted. OTHERSRC=src/upb_encoder.c src/upb_text.c @@ -86,11 +86,12 @@ tests/test.proto.pb: tests/test.proto # TODO: replace with upbc protoc tests/test.proto -otests/test.proto.pb -tests: tests/tests \ +TESTS=tests/tests \ tests/test_table \ tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2 \ tests/test.proto.pb +$(TESTS): src/libupb.a #VALGRIND=valgrind --leak-check=full --error-exitcode=1 VALGRIND= diff --git a/src/upb_atomic.h b/src/upb_atomic.h index c2cb8ba..01fc8a2 100644 --- a/src/upb_atomic.h +++ b/src/upb_atomic.h @@ -29,7 +29,6 @@ extern "C" { #define INLINE static inline #endif -#define UPB_THREAD_UNSAFE #ifdef UPB_THREAD_UNSAFE /* Non-thread-safe implementations. ******************************************/ @@ -65,15 +64,6 @@ INLINE int upb_atomic_fetch_and_add(upb_atomic_refcount_t *a, int val) { return ret; } -typedef struct { -} upb_rwlock_t; - -INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; } - #endif /* Atomic refcount ************************************************************/ @@ -111,10 +101,6 @@ INLINE bool upb_atomic_read(upb_atomic_refcount_t *a) { return __sync_fetch_and_add(&a->v, 0); } -INLINE bool upb_atomic_write(upb_atomic_refcount_t *a, int val) { - a->v = val; -} - #elif defined(WIN32) /* Windows defines atomic increment/decrement. */ @@ -145,7 +131,14 @@ INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { #ifdef UPB_THREAD_UNSAFE -/* Already defined. */ +typedef struct { +} upb_rwlock_t; + +INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; } #elif defined(UPB_USE_PTHREADS) diff --git a/src/upb_decoder.c b/src/upb_decoder.c index dd8ffcd..e3fdc49 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -68,9 +68,6 @@ struct upb_decoder { upb_strlen_t packed_end_offset; - // String we return for string values. We try to recycle it if possible. - upb_string *str; - // We keep a stack of messages we have recursed into. upb_decoder_frame *top, *limit, stack[UPB_MAX_NESTING]; }; @@ -93,18 +90,19 @@ static bool upb_decoder_nextbuf(upb_decoder *d) d->buf_bytesleft); } - // Recycle old buffer, pull new one. + // Recycle old buffer. if(d->buf) { - upb_bytesrc_recycle(d->bytesrc, d->buf); + d->buf = upb_string_tryrecycle(d->buf); d->buf_offset -= upb_string_len(d->buf); d->buf_stream_offset += upb_string_len(d->buf); } - d->buf = upb_bytesrc_get(d->bytesrc, UPB_MAX_ENCODED_SIZE); - // Handle cases arising from error or EOF. - if(d->buf) { + // Pull next buffer. + if(upb_bytesrc_get(d->bytesrc, d->buf, UPB_MAX_ENCODED_SIZE)) { d->buf_bytesleft += upb_string_len(d->buf); + return true; } else { + // Error or EOF. if(!upb_bytesrc_eof(d->bytesrc)) { // Error from bytesrc. upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); @@ -113,9 +111,11 @@ static bool upb_decoder_nextbuf(upb_decoder *d) // EOF from bytesrc and we don't have any residual bytes left. d->src.eof = true; return false; + } else { + // No more data left from the bytesrc, but we still have residual bytes. + return true; } } - return true; } static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) @@ -369,85 +369,86 @@ again: bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) { - upb_wire_type_t native_wire_type = upb_types[d->field->type].native_wire_type; - if(native_wire_type == UPB_WIRE_TYPE_DELIMITED) { - // A string, bytes, or a length-delimited submessage. The latter isn't - // technically a string, but can be gotten as one to perform lazy parsing. - d->str = upb_string_tryrecycle(d->str); - const upb_strlen_t total_len = d->delimited_len; - if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) { - // The entire string is inside our current buffer, so we can just - // return a substring of the buffer without copying. - upb_string_substr(d->str, d->buf, - upb_string_len(d->buf) - d->buf_bytesleft, - total_len); - upb_decoder_skipbytes(d, total_len); - *val.str = d->str; - } else { - // The string spans buffers, so we must copy from the current buffer, - // the next buffer (if we have one), and finally from the bytesrc. - uint8_t *str = (uint8_t*)upb_string_getrwbuf(d->str, total_len); - upb_strlen_t len = 0; - if(d->buf_offset < 0) { - // Residual bytes we need to copy from tmpbuf. - memcpy(str, d->tmpbuf, -d->buf_offset); - len += -d->buf_offset; - } - if(d->buf) { - upb_strlen_t to_copy = - UPB_MIN(total_len - len, upb_string_len(d->buf) - d->buf_offset); - memcpy(str + len, upb_string_getrobuf(d->buf) + d->buf_offset, to_copy); - } - upb_decoder_skipbytes(d, len); - upb_string_getrwbuf(d->str, len); // Cheap resize. - if(len < total_len) { - if(!upb_bytesrc_append(d->bytesrc, d->str, total_len - len)) { - upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); - return false; - } - d->buf_stream_offset += total_len - len; - } + switch(upb_types[d->field->type].native_wire_type) { + case UPB_WIRE_TYPE_VARINT: { + uint32_t low, high; + if(!upb_decoder_readv64(d, &low, &high)) return false; + uint64_t u64 = ((uint64_t)high << 32) | low; + if(d->field->type == UPB_TYPE(SINT64)) + *val.int64 = upb_zzdec_64(u64); + else + *val.uint64 = u64; + break; } + case UPB_WIRE_TYPE_32BIT_VARINT: { + uint32_t u32; + if(!upb_decoder_readv32(d, &u32)) return false; + if(d->field->type == UPB_TYPE(SINT32)) + *val.int32 = upb_zzdec_32(u32); + else + *val.uint32 = u32; + break; + } + case UPB_WIRE_TYPE_64BIT: + if(!upb_decoder_readf64(d, val.uint64)) return false; + break; + case UPB_WIRE_TYPE_32BIT: + if(!upb_decoder_readf32(d, val.uint32)) return false; + break; + default: + upb_seterr(&d->src.status, UPB_STATUS_ERROR, + "Attempted to call getval on a group."); + return false; + } + // For a packed field where we have not reached the end, we leave the field + // in the decoder so we will return it again without parsing a key. + if(d->wire_type != UPB_WIRE_TYPE_DELIMITED || + upb_decoder_offset(d) >= d->packed_end_offset) { d->field = NULL; + } + return true; +} + +bool upb_decoder_getstr(upb_decoder *d, upb_string *str) { + // A string, bytes, or a length-delimited submessage. The latter isn't + // technically a string, but can be gotten as one to perform lazy parsing. + const int32_t total_len = d->delimited_len; + if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) { + // The entire string is inside our current buffer, so we can just + // return a substring of the buffer without copying. + upb_string_substr(str, d->buf, + upb_string_len(d->buf) - d->buf_bytesleft, + total_len); + upb_decoder_skipbytes(d, total_len); } else { - switch(native_wire_type) { - case UPB_WIRE_TYPE_VARINT: { - uint32_t low, high; - if(!upb_decoder_readv64(d, &low, &high)) return false; - uint64_t u64 = ((uint64_t)high << 32) | low; - if(d->field->type == UPB_TYPE(SINT64)) - *val.int64 = upb_zzdec_64(u64); - else - *val.uint64 = u64; - break; - } - case UPB_WIRE_TYPE_32BIT_VARINT: { - uint32_t u32; - if(!upb_decoder_readv32(d, &u32)) return false; - if(d->field->type == UPB_TYPE(SINT32)) - *val.int32 = upb_zzdec_32(u32); - else - *val.uint32 = u32; - break; - } - case UPB_WIRE_TYPE_64BIT: - if(!upb_decoder_readf64(d, val.uint64)) return false; - break; - case UPB_WIRE_TYPE_32BIT: - if(!upb_decoder_readf32(d, val.uint32)) return false; - break; - default: - upb_seterr(&d->src.status, UPB_STATUS_ERROR, - "Attempted to call getval on a group."); - return false; + // The string spans buffers, so we must copy from the residual buffer + // (if any bytes are there), then the buffer, and finally from the bytesrc. + uint8_t *ptr = (uint8_t*)upb_string_getrwbuf( + str, UPB_MIN(total_len, d->buf_bytesleft)); + int32_t len = 0; + if(d->buf_offset < 0) { + // Residual bytes we need to copy from tmpbuf. + memcpy(ptr, d->tmpbuf, -d->buf_offset); + len += -d->buf_offset; } - // For a packed field where we have not reached the end, we leave the field - // in the decoder so we will return it again without parsing a key. - if(d->wire_type != UPB_WIRE_TYPE_DELIMITED || - upb_decoder_offset(d) >= d->packed_end_offset) { - d->field = NULL; + if(d->buf) { + // Bytes from the buffer. + memcpy(ptr + len, upb_string_getrobuf(d->buf) + d->buf_offset, + upb_string_len(str) - len); + } + upb_decoder_skipbytes(d, upb_string_len(str)); + if(len < total_len) { + // Bytes from the bytesrc. + if(!upb_bytesrc_append(d->bytesrc, str, total_len - len)) { + upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); + return false; + } + // Have to advance this since the buffering layer of the decoder will + // never see these bytes. + d->buf_stream_offset += total_len - len; } } + d->field = NULL; return true; } @@ -549,21 +550,19 @@ upb_decoder *upb_decoder_new(upb_msgdef *msgdef) d->toplevel_msgdef = msgdef; d->limit = &d->stack[UPB_MAX_NESTING]; d->buf = NULL; - d->str = upb_string_new(); upb_src_init(&d->src, &upb_decoder_src_vtbl); return d; } void upb_decoder_free(upb_decoder *d) { - upb_string_unref(d->str); - if(d->buf) upb_string_unref(d->buf); + upb_string_unref(d->buf); free(d); } void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { - if(d->buf) upb_bytesrc_recycle(d->bytesrc, d->buf); + upb_string_unref(d->buf); d->top = d->stack; d->top->msgdef = d->toplevel_msgdef; // The top-level message is not delimited (we can keep receiving data for it diff --git a/src/upb_decoder.h b/src/upb_decoder.h index d40d9fc..dde61fc 100644 --- a/src/upb_decoder.h +++ b/src/upb_decoder.h @@ -19,7 +19,7 @@ #include #include #include "upb_def.h" -#include "upb_srcsink.h" +#include "upb_stream.h" #ifdef __cplusplus extern "C" { diff --git a/src/upb_def.c b/src/upb_def.c index bb1f07a..bfab738 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -215,10 +215,11 @@ typedef struct _upb_unresolveddef { upb_string *name; } upb_unresolveddef; +// Is passed a ref on the string. static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { upb_unresolveddef *def = malloc(sizeof(*def)); upb_def_init(&def->base, UPB_DEF_UNRESOLVED); - def->name = upb_string_getref(str); + def->name = str; return def; } @@ -258,7 +259,8 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) CHECKSRC(upb_src_getint32(src, &number)); break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: - CHECKSRC(upb_src_getstr(src, &name)); + name = upb_string_tryrecycle(name); + CHECKSRC(upb_src_getstr(src, name)); break; default: CHECKSRC(upb_src_skipval(src)); @@ -274,11 +276,15 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) iton_ent iton_ent = {{number, 0}, name}; upb_strtable_insert(&e->ntoi, &ntoi_ent.e); upb_inttable_insert(&e->iton, &iton_ent.e); + // We don't unref "name" because we pass our ref to the iton entry of the + // table. strtables can ref their keys, but the inttable doesn't know that + // the value is a string. return true; src_err: upb_copyerr(status, upb_src_status(src)); err: + upb_string_unref(name); return false; } @@ -368,12 +374,12 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) f->number = tmp; break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: - CHECKSRC(upb_src_getstr(src, &f->name)); - f->name = upb_string_getref(f->name); + f->name = upb_string_tryrecycle(f->name); + CHECKSRC(upb_src_getstr(src, f->name)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { - upb_string *str; - CHECKSRC(upb_src_getstr(src, &str)); + upb_string *str = upb_string_new(); + CHECKSRC(upb_src_getstr(src, str)); if(f->def) upb_def_unref(f->def); f->def = UPB_UPCAST(upb_unresolveddef_new(str)); f->owned = true; @@ -415,9 +421,8 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: - upb_string_unref(m->base.fqname); - CHECKSRC(upb_src_getstr(src, &m->base.fqname)); - m->base.fqname = upb_string_getref(m->base.fqname); + m->base.fqname = upb_string_tryrecycle(m->base.fqname); + CHECKSRC(upb_src_getstr(src, m->base.fqname)); break; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: CHECKSRC(upb_src_startmsg(src)); @@ -487,9 +492,8 @@ static bool upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: - upb_string_unref(package); - CHECKSRC(upb_src_getstr(src, &package)); - package = upb_string_getref(package); + package = upb_string_tryrecycle(package); + CHECKSRC(upb_src_getstr(src, package)); break; case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: CHECKSRC(upb_src_startmsg(src)); diff --git a/src/upb_def.h b/src/upb_def.h index b73b0f9..c18b424 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -27,7 +27,7 @@ #define UPB_DEF_H_ #include "upb_atomic.h" -#include "upb_srcsink.h" +#include "upb_stream.h" #include "upb_table.h" #ifdef __cplusplus diff --git a/src/upb_srcsink.h b/src/upb_srcsink.h deleted file mode 100644 index dc73613..0000000 --- a/src/upb_srcsink.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * This file defines four general-purpose interfaces for pulling/pushing either - * protobuf data or bytes: - * - * - upb_src: pull interface for protobuf data. - * - upb_sink: push interface for protobuf data. - * - upb_bytesrc: pull interface for bytes. - * - upb_bytesink: push interface for bytes. - * - * These interfaces are used as general-purpose glue in upb. For example, the - * decoder interface works by implementing a upb_src and calling a upb_bytesrc. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - * - */ - -#ifndef UPB_SRCSINK_H -#define UPB_SRCSINK_H - -#include "upb_srcsink_vtbl.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Forward-declare. We can't include upb_def.h; it would be circular. -struct _upb_fielddef; - -// Note! The "eof" flags work like feof() in C; they cannot report end-of-file -// until a read has failed due to eof. They cannot preemptively tell you that -// the next call will fail due to eof. Since these are the semantics that C -// and UNIX provide, we're stuck with them if we want to support eg. stdio. - -/* upb_src ********************************************************************/ - -// TODO: decide how to handle unknown fields. - -// Retrieves the fielddef for the next field in the stream. Returns NULL on -// error or end-of-stream. -struct _upb_fielddef *upb_src_getdef(upb_src *src); - -// Retrieves and stores the next value in "val". For string types the caller -// does not own a ref to the returned type; you must ref it yourself if you -// want one. Returns false on error. -bool upb_src_getval(upb_src *src, upb_valueptr val); - -// Like upb_src_getval() but skips the value. -bool upb_src_skipval(upb_src *src); - -// Descends into a submessage. May only be called after a def has been -// returned that indicates a submessage. -bool upb_src_startmsg(upb_src *src); - -// Stops reading a submessage. May be called before the stream is EOF, in -// which case the rest of the submessage is skipped. -bool upb_src_endmsg(upb_src *src); - -// Returns the current error/eof status for the stream. -INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; } -INLINE bool upb_src_eof(upb_src *src) { return src->eof; } - -// The following functions are equivalent to upb_src_getval(), but take -// pointers to specific types. In debug mode this may check that the type -// is compatible with the type being read. This check will *not* be performed -// in non-debug mode, and if you get the type wrong the behavior is undefined. -bool upb_src_getbool(upb_src *src, bool *val); -bool upb_src_getint32(upb_src *src, int32_t *val); -bool upb_src_getint64(upb_src *src, int64_t *val); -bool upb_src_getuint32(upb_src *src, uint32_t *val); -bool upb_src_getuint64(upb_src *src, uint64_t *val); -bool upb_src_getfloat(upb_src *src, float *val); -bool upb_src_getdouble(upb_src *src, double *val); -bool upb_src_getstr(upb_src *src, upb_string **val); - -/* upb_sink *******************************************************************/ - -// Puts the given fielddef into the stream. -bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); - -// Puts the given value into the stream. -bool upb_sink_putval(upb_sink *sink, upb_value val); - -// Starts a submessage. (needed? the def tells us we're starting a submsg.) -bool upb_sink_startmsg(upb_sink *sink); - -// Ends a submessage. -bool upb_sink_endmsg(upb_sink *sink); - -// Returns the current error status for the stream. -upb_status *upb_sink_status(upb_sink *sink); - -/* upb_bytesrc ****************************************************************/ - -// Returns the next string in the stream. NULL is returned on error or eof. -// The string must be at least "minlen" bytes long unless the stream is eof. -// -// A ref is passed to the caller, though the caller is encouraged to pass the -// ref back to the bytesrc with upb_bytesrc_recycle(). This can help reduce -// memory allocation/deallocation. -upb_string *upb_bytesrc_get(upb_bytesrc *src, upb_strlen_t minlen); -void upb_bytesrc_recycle(upb_bytesrc *src, upb_string *str); - -// Appends the next "len" bytes in the stream in-place to "str". This should -// be used when the caller needs to build a contiguous string of the existing -// data in "str" with more data. -bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); - -// Returns the current error status for the stream. -INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } -INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } - -/* upb_bytesink ***************************************************************/ - -// Puts the given string. Returns the number of bytes that were actually, -// consumed, which may be fewer than were in the string, or <0 on error. -int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); - -// Returns the current error status for the stream. -upb_status *upb_bytesink_status(upb_bytesink *sink); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_srcsink_vtbl.h b/src/upb_srcsink_vtbl.h deleted file mode 100644 index 0ec45d2..0000000 --- a/src/upb_srcsink_vtbl.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * vtable declarations for types that are implementing any of the src or sink - * interfaces. Only components that are implementing these interfaces need - * to worry about this file. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_SRCSINK_VTBL_H_ -#define UPB_SRCSINK_VTBL_H_ - -#include "upb.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct upb_src; -typedef struct upb_src upb_src; -struct upb_sink; -typedef struct upb_sink upb_sink; -struct upb_bytesrc; -typedef struct upb_bytesrc upb_bytesrc; -struct upb_bytesink; -typedef struct upb_bytesink upb_bytesink; - -// Typedefs for function pointers to all of the virtual functions. -typedef struct _upb_fielddef (*upb_src_getdef_fptr)(upb_src *src); -typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); -typedef bool (*upb_src_skipval_fptr)(upb_src *src); -typedef bool (*upb_src_startmsg_fptr)(upb_src *src); -typedef bool (*upb_src_endmsg_fptr)(upb_src *src); - -typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); -typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); -typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); -typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); - -typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src); -typedef void (*upb_bytesrc_recycle_fptr)(upb_bytesrc *src, upb_string *str); -typedef bool (*upb_bytesrc_append_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t len); - -typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); - -// Vtables for the above interfaces. -typedef struct { - upb_src_getdef_fptr getdef; - upb_src_getval_fptr getval; - upb_src_skipval_fptr skipval; - upb_src_startmsg_fptr startmsg; - upb_src_endmsg_fptr endmsg; -} upb_src_vtable; - -typedef struct { - upb_bytesrc_get_fptr get; - upb_bytesrc_append_fptr append; - upb_bytesrc_recycle_fptr recycle; -} upb_bytesrc_vtable; - -// "Base Class" definitions; components that implement these interfaces should -// contain one of these structures. - -struct upb_src { - upb_src_vtable *vtbl; - upb_status status; - bool eof; -#ifndef NDEBUG - int state; // For debug-mode checking of API usage. -#endif -}; - -struct upb_bytesrc { - upb_bytesrc_vtable *vtbl; - upb_status status; - bool eof; -}; - -INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { - s->vtbl = vtbl; - s->eof = false; -#ifndef DEBUG - // TODO: initialize debug-mode checking. -#endif -} - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_stream.h b/src/upb_stream.h new file mode 100644 index 0000000..e7b4074 --- /dev/null +++ b/src/upb_stream.h @@ -0,0 +1,121 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * This file defines four general-purpose streaming interfaces for protobuf + * data or bytes: + * + * - upb_src: pull interface for protobuf data. + * - upb_sink: push interface for protobuf data. + * - upb_bytesrc: pull interface for bytes. + * - upb_bytesink: push interface for bytes. + * + * These interfaces are used as general-purpose glue in upb. For example, the + * decoder interface works by implementing a upb_src and calling a upb_bytesrc. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + */ + +#ifndef UPB_SRCSINK_H +#define UPB_SRCSINK_H + +#include "upb_stream_vtbl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Forward-declare. We can't include upb_def.h; it would be circular. +struct _upb_fielddef; + +// Note! The "eof" flags work like feof() in C; they cannot report end-of-file +// until a read has failed due to eof. They cannot preemptively tell you that +// the next call will fail due to eof. Since these are the semantics that C +// and UNIX provide, we're stuck with them if we want to support eg. stdio. + +/* upb_src ********************************************************************/ + +// TODO: decide how to handle unknown fields. + +// Retrieves the fielddef for the next field in the stream. Returns NULL on +// error or end-of-stream. +struct _upb_fielddef *upb_src_getdef(upb_src *src); + +// Retrieves and stores the next value in "val". For string types "val" must +// be a newly-recycled string. Returns false on error. +bool upb_src_getval(upb_src *src, upb_valueptr val); +bool upb_src_getstr(upb_src *src, upb_string *val); + +// Like upb_src_getval() but skips the value. +bool upb_src_skipval(upb_src *src); + +// Descends into a submessage. May only be called after a def has been +// returned that indicates a submessage. +bool upb_src_startmsg(upb_src *src); + +// Stops reading a submessage. May be called before the stream is EOF, in +// which case the rest of the submessage is skipped. +bool upb_src_endmsg(upb_src *src); + +// Returns the current error/eof status for the stream. +INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; } +INLINE bool upb_src_eof(upb_src *src) { return src->eof; } + +// The following functions are equivalent to upb_src_getval(), but take +// pointers to specific types. In debug mode this may check that the type +// is compatible with the type being read. This check will *not* be performed +// in non-debug mode, and if you get the type wrong the behavior is undefined. +bool upb_src_getbool(upb_src *src, bool *val); +bool upb_src_getint32(upb_src *src, int32_t *val); +bool upb_src_getint64(upb_src *src, int64_t *val); +bool upb_src_getuint32(upb_src *src, uint32_t *val); +bool upb_src_getuint64(upb_src *src, uint64_t *val); +bool upb_src_getfloat(upb_src *src, float *val); +bool upb_src_getdouble(upb_src *src, double *val); + +/* upb_sink *******************************************************************/ + +// Puts the given fielddef into the stream. +bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); + +// Puts the given value into the stream. +bool upb_sink_putval(upb_sink *sink, upb_value val); + +// Starts a submessage. (needed? the def tells us we're starting a submsg.) +bool upb_sink_startmsg(upb_sink *sink); + +// Ends a submessage. +bool upb_sink_endmsg(upb_sink *sink); + +// Returns the current error status for the stream. +upb_status *upb_sink_status(upb_sink *sink); + +/* upb_bytesrc ****************************************************************/ + +// Returns the next string in the stream. false is returned on error or eof. +// The string must be at least "minlen" bytes long unless the stream is eof. +bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); + +// Appends the next "len" bytes in the stream in-place to "str". This should +// be used when the caller needs to build a contiguous string of the existing +// data in "str" with more data. +bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); + +// Returns the current error status for the stream. +INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } +INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } + +/* upb_bytesink ***************************************************************/ + +// Puts the given string. Returns the number of bytes that were actually, +// consumed, which may be fewer than were in the string, or <0 on error. +int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); + +// Returns the current error status for the stream. +upb_status *upb_bytesink_status(upb_bytesink *sink); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/src/upb_stream_vtbl.h b/src/upb_stream_vtbl.h new file mode 100644 index 0000000..0ec45d2 --- /dev/null +++ b/src/upb_stream_vtbl.h @@ -0,0 +1,93 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * vtable declarations for types that are implementing any of the src or sink + * interfaces. Only components that are implementing these interfaces need + * to worry about this file. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_SRCSINK_VTBL_H_ +#define UPB_SRCSINK_VTBL_H_ + +#include "upb.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct upb_src; +typedef struct upb_src upb_src; +struct upb_sink; +typedef struct upb_sink upb_sink; +struct upb_bytesrc; +typedef struct upb_bytesrc upb_bytesrc; +struct upb_bytesink; +typedef struct upb_bytesink upb_bytesink; + +// Typedefs for function pointers to all of the virtual functions. +typedef struct _upb_fielddef (*upb_src_getdef_fptr)(upb_src *src); +typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); +typedef bool (*upb_src_skipval_fptr)(upb_src *src); +typedef bool (*upb_src_startmsg_fptr)(upb_src *src); +typedef bool (*upb_src_endmsg_fptr)(upb_src *src); + +typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); +typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); +typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); +typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); + +typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src); +typedef void (*upb_bytesrc_recycle_fptr)(upb_bytesrc *src, upb_string *str); +typedef bool (*upb_bytesrc_append_fptr)( + upb_bytesrc *src, upb_string *str, upb_strlen_t len); + +typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); + +// Vtables for the above interfaces. +typedef struct { + upb_src_getdef_fptr getdef; + upb_src_getval_fptr getval; + upb_src_skipval_fptr skipval; + upb_src_startmsg_fptr startmsg; + upb_src_endmsg_fptr endmsg; +} upb_src_vtable; + +typedef struct { + upb_bytesrc_get_fptr get; + upb_bytesrc_append_fptr append; + upb_bytesrc_recycle_fptr recycle; +} upb_bytesrc_vtable; + +// "Base Class" definitions; components that implement these interfaces should +// contain one of these structures. + +struct upb_src { + upb_src_vtable *vtbl; + upb_status status; + bool eof; +#ifndef NDEBUG + int state; // For debug-mode checking of API usage. +#endif +}; + +struct upb_bytesrc { + upb_bytesrc_vtable *vtbl; + upb_status status; + bool eof; +}; + +INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { + s->vtbl = vtbl; + s->eof = false; +#ifndef DEBUG + // TODO: initialize debug-mode checking. +#endif +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/src/upb_string.h b/src/upb_string.h index af1f8ce..770dba7 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -38,7 +38,7 @@ extern "C" { // the associated functions. Also, strings may *only* be allocated on the heap. struct _upb_string { char *ptr; - uint32_t len; + int32_t len; uint32_t size; upb_atomic_refcount_t refcount; union { @@ -53,12 +53,22 @@ struct _upb_string { // longer needed, it should be unref'd, never freed directly. upb_string *upb_string_new(); -// Releases a ref on the given string, which may free the memory. -void upb_string_unref(upb_string *str); +void _upb_string_free(upb_string *str); + +// Releases a ref on the given string, which may free the memory. "str" +// can be NULL, in which case this is a no-op. +INLINE void upb_string_unref(upb_string *str) { + if (str && upb_atomic_unref(&str->refcount)) _upb_string_free(str); +} // Returns a string with the same contents as "str". The caller owns a ref on // the returned string, which may or may not be the same object as "str. -upb_string *upb_string_getref(upb_string *str); +INLINE upb_string *upb_string_getref(upb_string *str) { + // If/when we support stack-allocated strings, this will have to allocate + // a new string if the given string is on the stack. + upb_atomic_ref(&str->refcount); + return str; +} // Returns the length of the string. INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } @@ -75,6 +85,17 @@ INLINE void upb_string_endread(upb_string *str) { (void)str; } // Attempts to recycle the string "str" so it may be reused and have different // data written to it. The returned string is either "str" if it could be // recycled or a newly created string if "str" has other references. +// +// As a special case, passing NULL will allocate a new string. This is +// convenient for the pattern: +// +// upb_string *str = NULL; +// while (x) { +// if (y) { +// str = upb_string_tryrecycle(str); +// upb_src_getstr(str); +// } +// } upb_string *upb_string_tryrecycle(upb_string *str); // The three options for setting the contents of a string. These may only be diff --git a/tests/test_table.cc b/tests/test_table.cc index 47d806c..37e14a8 100644 --- a/tests/test_table.cc +++ b/tests/test_table.cc @@ -1,7 +1,7 @@ #undef NDEBUG /* ensure tests always assert. */ #include "upb_table.h" -#include "upb_data.h" +#include "upb_string.h" #include "test_util.h" #include #include @@ -45,7 +45,7 @@ void test_strtable(const vector& keys, uint32_t num_to_insert) all.insert(key); strtable_entry e; e.value = key[0]; - upb_strptr str = upb_strduplen(key.c_str(), key.size()); + upb_string *str = upb_strduplen(key.c_str(), key.size()); e.e.key = str; upb_strtable_insert(&table, &e.e); upb_string_unref(str); // The table still owns a ref. @@ -55,7 +55,7 @@ void test_strtable(const vector& keys, uint32_t num_to_insert) /* Test correctness. */ for(uint32_t i = 0; i < keys.size(); i++) { const string& key = keys[i]; - upb_strptr str = upb_strduplen(key.c_str(), key.size()); + upb_string *str = upb_strduplen(key.c_str(), key.size()); strtable_entry *e = (strtable_entry*)upb_strtable_lookup(&table, str); if(m.find(key) != m.end()) { /* Assume map implementation is correct. */ assert(e); @@ -71,7 +71,7 @@ void test_strtable(const vector& keys, uint32_t num_to_insert) strtable_entry *e; for(e = (strtable_entry*)upb_strtable_begin(&table); e; e = (strtable_entry*)upb_strtable_next(&table, &e->e)) { - string tmp(upb_string_getrobuf(e->e.key), upb_strlen(e->e.key)); + string tmp(upb_string_getrobuf(e->e.key), upb_string_len(e->e.key)); std::set::iterator i = all.find(tmp); assert(i != all.end()); all.erase(i); -- cgit v1.2.3 From b04ff41664f45c3c86eea62173a1223ff04d6ad7 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Jul 2010 19:56:58 -0700 Subject: Dynamically allocate string for error msg. --- src/upb.c | 12 ++++++++++-- src/upb.h | 12 ++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/upb.c b/src/upb.c index 3c5efe8..a98512d 100644 --- a/src/upb.c +++ b/src/upb.c @@ -10,6 +10,7 @@ #include #include "upb.h" +#include "upb_string.h" #define alignof(t) offsetof(struct { char c; t x; }, x) #define TYPE_INFO(wire_type, ctype, allows_delimited) \ @@ -43,10 +44,12 @@ void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...) { if(upb_ok(status)) { // The first error is the most interesting. + status->str = upb_string_new(); + char *str = upb_string_getrwbuf(status->str, UPB_ERRORMSG_MAXLEN); status->code = code; va_list args; va_start(args, msg); - vsnprintf(status->msg, UPB_ERRORMSG_MAXLEN, msg, args); + vsnprintf(str, UPB_ERRORMSG_MAXLEN, msg, args); va_end(args); } } @@ -54,6 +57,11 @@ void upb_seterr(upb_status *status, enum upb_status_code code, void upb_copyerr(upb_status *to, upb_status *from) { to->code = from->code; - strcpy(to->msg, from->msg); + to->str = upb_string_getref(from->str); } +void upb_reset(upb_status *status) { + status->code = UPB_STATUS_OK; + upb_string_unref(status->str); + status->str = NULL; +} diff --git a/src/upb.h b/src/upb.h index 6bf548c..6502dfc 100644 --- a/src/upb.h +++ b/src/upb.h @@ -281,23 +281,19 @@ enum upb_status_code { UPB_ERROR_MAX_NESTING_EXCEEDED = -3 }; -#define UPB_ERRORMSG_MAXLEN 256 typedef struct { enum upb_status_code code; - char msg[UPB_ERRORMSG_MAXLEN]; + upb_string *str; } upb_status; -#define UPB_STATUS_INIT {UPB_STATUS_OK, ""} +#define UPB_STATUS_INIT {UPB_STATUS_OK, NULL} +#define UPB_ERRORMSG_MAXLEN 256 INLINE bool upb_ok(upb_status *status) { return status->code == UPB_STATUS_OK; } -INLINE void upb_reset(upb_status *status) { - status->code = UPB_STATUS_OK; - status->msg[0] = '\0'; -} - +void upb_reset(upb_status *status); void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...); void upb_copyerr(upb_status *to, upb_status *from); -- cgit v1.2.3 From c4aecc414b81ac0fe4c3805f17989d664f204fca Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Jul 2010 20:01:07 -0700 Subject: Strip out some stuff that's not currently being used. --- src/upb.h | 100 +--------------------------------------------------------- src/upb_def.h | 1 - 2 files changed, 1 insertion(+), 100 deletions(-) diff --git a/src/upb.h b/src/upb.h index 6502dfc..230e638 100644 --- a/src/upb.h +++ b/src/upb.h @@ -127,27 +127,10 @@ typedef union { /* Polymorphic values of .proto types *****************************************/ -// INTERNAL-ONLY: never refer to these types with a tag ("union", "struct"). -// Always use the typedefs. -struct _upb_msg; - -typedef struct _upb_msg upb_msg; - -typedef upb_atomic_refcount_t upb_data; - -typedef uint32_t upb_strlen_t; - struct _upb_string; typedef struct _upb_string upb_string; -typedef uint32_t upb_arraylen_t; - -typedef union { - // Must be first, for the UPB_STATIC_ARRAY_PTR_INIT() macro. - struct upb_norefcount_array *norefcount; - struct upb_refcounted_array *refcounted; - upb_data *base; -} upb_arrayptr; +typedef uint32_t upb_strlen_t; // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. @@ -159,10 +142,6 @@ typedef union { uint32_t uint32; uint64_t uint64; bool _bool; - upb_string *str; - upb_arrayptr arr; - upb_msg *msg; - upb_data *data; } upb_value; // A pointer to a .proto value. The owner must have an out-of-band way of @@ -176,11 +155,6 @@ typedef union { uint32_t *uint32; uint64_t *uint64; bool *_bool; - upb_string **str; - upb_arrayptr *arr; - upb_msg **msg; - upb_data **data; - void *_void; } upb_valueptr; INLINE upb_valueptr upb_value_addrof(upb_value *val) { @@ -188,78 +162,6 @@ INLINE upb_valueptr upb_value_addrof(upb_value *val) { return ptr; } -/** - * Converts upb_value_ptr -> upb_value by reading from the pointer. We need to - * know the field type to perform this operation, because we need to know how - * much memory to copy. - */ -INLINE upb_value upb_value_read(upb_valueptr ptr, upb_field_type_t ft) { - upb_value val; - -#define CASE(t, member_name) \ - case UPB_TYPE(t): val.member_name = *ptr.member_name; break; - - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - CASE(STRING, str) - CASE(BYTES, str) - CASE(MESSAGE, msg) - CASE(GROUP, msg) - default: break; - } - return val; - -#undef CASE -} - -/** - * Writes a upb_value to a upb_value_ptr location. We need to know the field - * type to perform this operation, because we need to know how much memory to - * copy. - */ -INLINE void upb_value_write(upb_valueptr ptr, upb_value val, - upb_field_type_t ft) { -#define CASE(t, member_name) \ - case UPB_TYPE(t): *ptr.member_name = val.member_name; break; - - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - CASE(STRING, str) - CASE(BYTES, str) - CASE(MESSAGE, msg) - CASE(GROUP, msg) - default: break; - } - -#undef CASE -} - // Status codes used as a return value. Codes >0 are not fatal and can be // resumed. enum upb_status_code { diff --git a/src/upb_def.h b/src/upb_def.h index c18b424..c297e83 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -135,7 +135,6 @@ INLINE bool upb_elem_ismm(upb_fielddef *f) { typedef struct _upb_msgdef { upb_def base; upb_atomic_refcount_t cycle_refcount; - upb_msg *default_msg; // Message with all default values set. size_t size; upb_field_count_t num_fields; uint32_t set_flags_bytes; -- cgit v1.2.3 From 604c1a78bcbcc5b282ac6aab01a425baeaebdfbd Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Jul 2010 20:06:39 -0700 Subject: Add upb_string.c. --- src/upb_string.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 src/upb_string.c diff --git a/src/upb_string.c b/src/upb_string.c new file mode 100644 index 0000000..91ab9ae --- /dev/null +++ b/src/upb_string.c @@ -0,0 +1,47 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_string.h" + +#include + +#define UPB_STRING_UNFINALIZED -1 + +static uint32_t upb_round_up_pow2(uint32_t v) { + // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; +} + +upb_string *upb_string_new() { + upb_string *str = malloc(sizeof(*str)); + str->ptr = NULL; + str->size = 0; + str->len = UPB_STRING_UNFINALIZED; + upb_atomic_refcount_init(&str->refcount, 1); + return str; +} + +void _upb_string_free(upb_string *str) { + if(str->ptr) free(str->ptr); + free(str); +} + +char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { + assert(str->len == UPB_STRING_UNFINALIZED); + if (str->size < len) { + str->size = upb_round_up_pow2(len); + str->ptr = realloc(str->ptr, str->size); + } + str->len = len; + return str->ptr; +} -- cgit v1.2.3 From 28ec9a1fa0f9b1d741920dfa8afc91fa2532c43d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Jul 2010 20:20:33 -0700 Subject: Split src/ into core/ and stream/. --- Makefile | 22 +- README | 9 +- core/upb.c | 67 ++++ core/upb.h | 207 ++++++++++ core/upb_atomic.h | 185 +++++++++ core/upb_def.c | 1022 ++++++++++++++++++++++++++++++++++++++++++++++++ core/upb_def.h | 302 ++++++++++++++ core/upb_stream.h | 121 ++++++ core/upb_stream_vtbl.h | 93 +++++ core/upb_string.c | 47 +++ core/upb_string.h | 194 +++++++++ core/upb_table.c | 411 +++++++++++++++++++ core/upb_table.h | 133 +++++++ gen-deps.sh | 2 +- src/upb.c | 67 ---- src/upb.h | 207 ---------- src/upb_atomic.h | 185 --------- src/upb_byteio.h | 43 -- src/upb_decoder.c | 577 --------------------------- src/upb_decoder.h | 53 --- src/upb_def.c | 1022 ------------------------------------------------ src/upb_def.h | 302 -------------- src/upb_encoder.c | 420 -------------------- src/upb_encoder.h | 56 --- src/upb_inlinedefs.c | 20 - src/upb_stream.h | 121 ------ src/upb_stream_vtbl.h | 93 ----- src/upb_string.c | 47 --- src/upb_string.h | 194 --------- src/upb_table.c | 411 ------------------- src/upb_table.h | 133 ------- src/upb_text.c | 121 ------ src/upb_text.h | 36 -- stream/upb_byteio.h | 43 ++ stream/upb_decoder.c | 577 +++++++++++++++++++++++++++ stream/upb_decoder.h | 53 +++ stream/upb_encoder.c | 420 ++++++++++++++++++++ stream/upb_encoder.h | 56 +++ stream/upb_text.c | 121 ++++++ stream/upb_text.h | 36 ++ 40 files changed, 4107 insertions(+), 4122 deletions(-) create mode 100644 core/upb.c create mode 100644 core/upb.h create mode 100644 core/upb_atomic.h create mode 100644 core/upb_def.c create mode 100644 core/upb_def.h create mode 100644 core/upb_stream.h create mode 100644 core/upb_stream_vtbl.h create mode 100644 core/upb_string.c create mode 100644 core/upb_string.h create mode 100644 core/upb_table.c create mode 100644 core/upb_table.h delete mode 100644 src/upb.c delete mode 100644 src/upb.h delete mode 100644 src/upb_atomic.h delete mode 100644 src/upb_byteio.h delete mode 100644 src/upb_decoder.c delete mode 100644 src/upb_decoder.h delete mode 100644 src/upb_def.c delete mode 100644 src/upb_def.h delete mode 100644 src/upb_encoder.c delete mode 100644 src/upb_encoder.h delete mode 100644 src/upb_inlinedefs.c delete mode 100644 src/upb_stream.h delete mode 100644 src/upb_stream_vtbl.h delete mode 100644 src/upb_string.c delete mode 100644 src/upb_string.h delete mode 100644 src/upb_table.c delete mode 100644 src/upb_table.h delete mode 100644 src/upb_text.c delete mode 100644 src/upb_text.h create mode 100644 stream/upb_byteio.h create mode 100644 stream/upb_decoder.c create mode 100644 stream/upb_decoder.h create mode 100644 stream/upb_encoder.c create mode 100644 stream/upb_encoder.h create mode 100644 stream/upb_text.c create mode 100644 stream/upb_text.h diff --git a/Makefile b/Makefile index 1ba7400..ca4f940 100644 --- a/Makefile +++ b/Makefile @@ -27,13 +27,13 @@ rwildcard=$(strip $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2)$(filter $ CC=gcc CXX=g++ CFLAGS=-std=c99 -INCLUDE=-Idescriptor -Isrc -Itests -I. +INCLUDE=-Idescriptor -Icore -Itests -I. CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) LDLIBS=-lpthread -LIBUPB=src/libupb.a -LIBUPB_PIC=src/libupb_pic.a -LIBUPB_SHARED=src/libupb.so +LIBUPB=core/libupb.a +LIBUPB_PIC=core/libupb_pic.a +LIBUPB_SHARED=core/libupb.so ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) all: $(ALL) clean: @@ -45,16 +45,16 @@ clean: rm -rf tools/upbc deps cd lang_ext/python && python setup.py clean --all -# The core library (src/libupb.a) -SRC=src/upb.c src/upb_decoder.c src/upb_table.c src/upb_def.c src/upb_string.c \ +# The core library (core/libupb.a) +SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ descriptor/descriptor.c # Parts of core that are yet to be converted. OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. -src/upb_def.o: src/upb_def.c +core/upb_def.o: core/upb_def.c $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -src/upb_def.lo: src/upb_def.c +core/upb_def.lo: core/upb_def.c $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC @@ -91,7 +91,7 @@ TESTS=tests/tests \ tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2 \ tests/test.proto.pb -$(TESTS): src/libupb.a +$(TESTS): core/libupb.a #VALGRIND=valgrind --leak-check=full --error-exitcode=1 VALGRIND= @@ -129,10 +129,10 @@ tests/test_table: tests/test_table.cc # Includes which is a deprecated header. $(CXX) $(CXXFLAGS) $(CPPFLAGS) -Wno-deprecated -o $@ $< $(LIBUPB) -tests/tests: src/libupb.a +tests/tests: core/libupb.a # Tools -tools/upbc: src/libupb.a +tools/upbc: core/libupb.a # Benchmarks UPB_BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \ diff --git a/README b/README index e869667..efa3ddb 100644 --- a/README +++ b/README @@ -10,6 +10,11 @@ ROADMAP OF THE SOURCE benchmark/ Benchmarks of upb and other protocol buffer implementations. +core/ + The core source directory. builds into core/libupb.a. Contains only the + very core library, which is capable of loading descriptors given the + appropriate decoder. Does not even contain decoders for the standard + formats like the protobuf text and binary formats. descriptor/ Files that describe the format of Protocol Buffer "descriptors", which are protocol buffers that describe the format of other protocol buffers. These @@ -19,8 +24,8 @@ labs/ about alternate ways of implementing things. When possible, these are benchmarked by the tests in benchmark/. We also test these with the tests in tests/, to ensure that the alternate implementations are actually correct. -src/ - The core source directory. builds into src/libupb.a. +stream/ + Implementations of streaming protobuf encoders and decoders. tests/ Unit tests. tools/ diff --git a/core/upb.c b/core/upb.c new file mode 100644 index 0000000..a98512d --- /dev/null +++ b/core/upb.c @@ -0,0 +1,67 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + */ + +#include +#include +#include + +#include "upb.h" +#include "upb_string.h" + +#define alignof(t) offsetof(struct { char c; t x; }, x) +#define TYPE_INFO(wire_type, ctype, allows_delimited) \ + {alignof(ctype), sizeof(ctype), wire_type, \ + (1 << wire_type) | (allows_delimited << UPB_WIRE_TYPE_DELIMITED), \ + #ctype}, + +upb_type_info upb_types[] = { + {0, 0, 0, 0, ""}, // There is no type 0. + TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, 1) // DOUBLE + TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, 1) // FLOAT + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1) // INT64 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, 1) // UINT64 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1) // INT32 + TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, 1) // FIXED64 + TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, 1) // FIXED32 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, 1) // BOOL + TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1) // STRING + TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, 0) // GROUP + TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1) // MESSAGE + TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1) // BYTES + TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1) // UINT32 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1) // ENUM + TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, 1) // SFIXED32 + TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, 1) // SFIXED64 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1) // SINT32 + TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1) // SINT64 +}; + +void upb_seterr(upb_status *status, enum upb_status_code code, + const char *msg, ...) +{ + if(upb_ok(status)) { // The first error is the most interesting. + status->str = upb_string_new(); + char *str = upb_string_getrwbuf(status->str, UPB_ERRORMSG_MAXLEN); + status->code = code; + va_list args; + va_start(args, msg); + vsnprintf(str, UPB_ERRORMSG_MAXLEN, msg, args); + va_end(args); + } +} + +void upb_copyerr(upb_status *to, upb_status *from) +{ + to->code = from->code; + to->str = upb_string_getref(from->str); +} + +void upb_reset(upb_status *status) { + status->code = UPB_STATUS_OK; + upb_string_unref(status->str); + status->str = NULL; +} diff --git a/core/upb.h b/core/upb.h new file mode 100644 index 0000000..230e638 --- /dev/null +++ b/core/upb.h @@ -0,0 +1,207 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * This file contains shared definitions that are widely used across upb. + */ + +#ifndef UPB_H_ +#define UPB_H_ + +#include +#include +#include // only for size_t. +#include "descriptor_const.h" +#include "upb_atomic.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// inline if possible, emit standalone code if required. +#ifndef INLINE +#define INLINE static inline +#endif + +#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) +#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) +#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m))) + +// The maximum that any submessages can be nested. Matches proto2's limit. +#define UPB_MAX_NESTING 64 + +// The maximum number of fields that any one .proto type can have. Note that +// this is very different than the max field number. It is hard to imagine a +// scenario where more than 32k fields makes sense. +#define UPB_MAX_FIELDS (1<<15) +typedef int16_t upb_field_count_t; + +// Nested type names are separated by periods. +#define UPB_SYMBOL_SEPARATOR '.' + +// This limit is for the longest fully-qualified symbol, eg. foo.bar.MsgType +#define UPB_SYMBOL_MAXLEN 128 + +// The longest chain that mutually-recursive types are allowed to form. For +// example, this is a type cycle of length 2: +// message A { +// B b = 1; +// } +// message B { +// A a = 1; +// } +#define UPB_MAX_TYPE_CYCLE_LEN 16 + +// The maximum depth that the type graph can have. Note that this setting does +// not automatically constrain UPB_MAX_NESTING, because type cycles allow for +// unlimited nesting if we do not limit it. +#define UPB_MAX_TYPE_DEPTH 64 + +// The biggest possible single value is a 10-byte varint. +#define UPB_MAX_ENCODED_SIZE 10 + + +/* Fundamental types and type constants. **************************************/ + +// A list of types as they are encoded on-the-wire. +enum upb_wire_type { + UPB_WIRE_TYPE_VARINT = 0, + UPB_WIRE_TYPE_64BIT = 1, + UPB_WIRE_TYPE_DELIMITED = 2, + UPB_WIRE_TYPE_START_GROUP = 3, + UPB_WIRE_TYPE_END_GROUP = 4, + UPB_WIRE_TYPE_32BIT = 5, + + // This isn't a real wire type, but we use this constant to describe varints + // that are expected to be a maximum of 32 bits. + UPB_WIRE_TYPE_32BIT_VARINT = 8 +}; + +typedef uint8_t upb_wire_type_t; + +// Value type as defined in a .proto file. eg. string, int32, etc. The +// integers that represent this are defined by descriptor.proto. Note that +// descriptor.proto reserves "0" for errors, and we use it to represent +// exceptional circumstances. +typedef uint8_t upb_field_type_t; + +// For referencing the type constants tersely. +#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type +#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type + +INLINE bool upb_issubmsgtype(upb_field_type_t type) { + return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); +} + +INLINE bool upb_isstringtype(upb_field_type_t type) { + return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); +} + +// Info for a given field type. +typedef struct { + uint8_t align; + uint8_t size; + upb_wire_type_t native_wire_type; + uint8_t allowed_wire_types; // For packable fields, also allows delimited. + char *ctype; +} upb_type_info; + +// A static array of info about all of the field types, indexed by type number. +extern upb_type_info upb_types[]; + +// The number of a field, eg. "optional string foo = 3". +typedef int32_t upb_field_number_t; + +// Label (optional, repeated, required) as defined in a .proto file. The +// values of this are defined by google.protobuf.FieldDescriptorProto.Label +// (from descriptor.proto). +typedef uint8_t upb_label_t; + +// A scalar (non-string) wire value. Used only for parsing unknown fields. +typedef union { + uint64_t varint; + uint64_t _64bit; + uint32_t _32bit; +} upb_wire_value; + +/* Polymorphic values of .proto types *****************************************/ + +struct _upb_string; +typedef struct _upb_string upb_string; + +typedef uint32_t upb_strlen_t; + +// A single .proto value. The owner must have an out-of-band way of knowing +// the type, so that it knows which union member to use. +typedef union { + double _double; + float _float; + int32_t int32; + int64_t int64; + uint32_t uint32; + uint64_t uint64; + bool _bool; +} upb_value; + +// A pointer to a .proto value. The owner must have an out-of-band way of +// knowing the type, so it knows which union member to use. +typedef union { + double *_double; + float *_float; + int32_t *int32; + int64_t *int64; + uint8_t *uint8; + uint32_t *uint32; + uint64_t *uint64; + bool *_bool; +} upb_valueptr; + +INLINE upb_valueptr upb_value_addrof(upb_value *val) { + upb_valueptr ptr = {&val->_double}; + return ptr; +} + +// Status codes used as a return value. Codes >0 are not fatal and can be +// resumed. +enum upb_status_code { + UPB_STATUS_OK = 0, + + // A read or write from a streaming src/sink could not be completed right now. + UPB_STATUS_TRYAGAIN = 1, + + // A value had an incorrect wire type and will be skipped. + UPB_STATUS_BADWIRETYPE = 2, + + // An unrecoverable error occurred. + UPB_STATUS_ERROR = -1, + + // A varint went for 10 bytes without terminating. + UPB_ERROR_UNTERMINATED_VARINT = -2, + + // The max nesting level (UPB_MAX_NESTING) was exceeded. + UPB_ERROR_MAX_NESTING_EXCEEDED = -3 +}; + +typedef struct { + enum upb_status_code code; + upb_string *str; +} upb_status; + +#define UPB_STATUS_INIT {UPB_STATUS_OK, NULL} +#define UPB_ERRORMSG_MAXLEN 256 + +INLINE bool upb_ok(upb_status *status) { + return status->code == UPB_STATUS_OK; +} + +void upb_reset(upb_status *status); +void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, + ...); +void upb_copyerr(upb_status *to, upb_status *from); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_H_ */ diff --git a/core/upb_atomic.h b/core/upb_atomic.h new file mode 100644 index 0000000..01fc8a2 --- /dev/null +++ b/core/upb_atomic.h @@ -0,0 +1,185 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * Only a very small part of upb is thread-safe. Notably, individual + * messages, arrays, and strings are *not* thread safe for mutating. + * However, we do make message *metadata* such as upb_msgdef and + * upb_context thread-safe, and their ownership is tracked via atomic + * refcounting. This header implements the small number of atomic + * primitives required to support this. The primitives we implement + * are: + * + * - a reader/writer lock (wrappers around platform-provided mutexes). + * - an atomic refcount. + */ + +#ifndef UPB_ATOMIC_H_ +#define UPB_ATOMIC_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* inline if possible, emit standalone code if required. */ +#ifndef INLINE +#define INLINE static inline +#endif + +#ifdef UPB_THREAD_UNSAFE + +/* Non-thread-safe implementations. ******************************************/ + +typedef struct { + int v; +} upb_atomic_refcount_t; + +INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { + a->v = val; +} + +INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { + return a->v++ == 0; +} + +INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { + return --a->v == 0; +} + +INLINE int upb_atomic_read(upb_atomic_refcount_t *a) { + return a->v; +} + +INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int val) { + a->v += val; + return a->v == 0; +} + +INLINE int upb_atomic_fetch_and_add(upb_atomic_refcount_t *a, int val) { + int ret = a->v; + a->v += val; + return ret; +} + +#endif + +/* Atomic refcount ************************************************************/ + +#ifdef UPB_THREAD_UNSAFE + +/* Already defined above. */ + +#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 + +/* GCC includes atomic primitives. */ + +typedef struct { + volatile int v; +} upb_atomic_refcount_t; + +INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { + a->v = val; + __sync_synchronize(); /* Ensure the initialized value is visible. */ +} + +INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { + return __sync_fetch_and_add(&a->v, 1) == 0; +} + +INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int n) { + return __sync_add_and_fetch(&a->v, n) == 0; +} + +INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { + return __sync_sub_and_fetch(&a->v, 1) == 0; +} + +INLINE bool upb_atomic_read(upb_atomic_refcount_t *a) { + return __sync_fetch_and_add(&a->v, 0); +} + +#elif defined(WIN32) + +/* Windows defines atomic increment/decrement. */ +#include + +typedef struct { + volatile LONG val; +} upb_atomic_refcount_t; + +INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { + InterlockedExchange(&a->val, val); +} + +INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { + return InterlockedIncrement(&a->val) == 1; +} + +INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { + return InterlockedDecrement(&a->val) == 0; +} + +#else +#error Atomic primitives not defined for your platform/CPU. \ + Implement them or compile with UPB_THREAD_UNSAFE. +#endif + +/* Reader/Writer lock. ********************************************************/ + +#ifdef UPB_THREAD_UNSAFE + +typedef struct { +} upb_rwlock_t; + +INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; } + +#elif defined(UPB_USE_PTHREADS) + +#include + +typedef struct { + pthread_rwlock_t lock; +} upb_rwlock_t; + +INLINE void upb_rwlock_init(upb_rwlock_t *l) { + /* TODO: check return value. */ + pthread_rwlock_init(&l->lock, NULL); +} + +INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { + /* TODO: check return value. */ + pthread_rwlock_destroy(&l->lock); +} + +INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { + /* TODO: check return value. */ + pthread_rwlock_rdlock(&l->lock); +} + +INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { + /* TODO: check return value. */ + pthread_rwlock_wrlock(&l->lock); +} + +INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { + /* TODO: check return value. */ + pthread_rwlock_unlock(&l->lock); +} + +#else +#error Reader/writer lock is not defined for your platform/CPU. \ + Implement it or compile with UPB_THREAD_UNSAFE. +#endif + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_ATOMIC_H_ */ diff --git a/core/upb_def.c b/core/upb_def.c new file mode 100644 index 0000000..bfab738 --- /dev/null +++ b/core/upb_def.c @@ -0,0 +1,1022 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. + */ + +#include +#include "descriptor_const.h" +#include "descriptor.h" +#include "upb_def.h" + +#define CHECKSRC(x) if(!(x)) goto src_err +#define CHECK(x) if(!(x)) goto err + +// A little dynamic array for storing a growing list of upb_defs. +typedef struct { + upb_def **defs; + uint32_t len; + uint32_t size; +} upb_deflist; + +static void upb_deflist_init(upb_deflist *l) { + l->size = 8; + l->defs = malloc(l->size); + l->len = 0; +} + +static void upb_deflist_uninit(upb_deflist *l) { + for(uint32_t i = 0; i < l->len; i++) + if(l->defs[i]) upb_def_unref(l->defs[i]); + free(l->defs); +} + +static void upb_deflist_push(upb_deflist *l, upb_def *d) { + if(l->len == l->size) { + l->size *= 2; + l->defs = realloc(l->defs, l->size); + } + l->defs[l->len++] = d; +} + +/* Joins strings together, for example: + * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" + * join("", "Baz") -> "Baz" + * Caller owns a ref on the returned string. */ +static upb_string *upb_join(upb_string *base, upb_string *name) { + upb_string *joined = upb_strdup(base); + upb_strlen_t len = upb_string_len(joined); + if(len > 0) { + upb_string_getrwbuf(joined, len + 1)[len] = UPB_SYMBOL_SEPARATOR; + } + upb_strcat(joined, name); + return joined; +} + +// Qualify the defname for all defs starting with offset "start" with "str". +static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { + for(uint32_t i = start; i < l->len; i++) { + upb_def *def = l->defs[i]; + upb_string *name = def->fqname; + def->fqname = upb_join(str, name); + upb_string_unref(name); + } +} + +/* upb_def ********************************************************************/ + +// Defs are reference counted, but can have cycles when types are +// self-recursive or mutually recursive, so we need to be capable of collecting +// the cycles. In our situation defs are immutable (so cycles cannot be +// created or destroyed post-initialization). We need to be thread-safe but +// want to avoid locks if at all possible and rely only on atomic operations. +// +// Our scheme is as follows. First we give each def a flag indicating whether +// it is part of a cycle or not. Because defs are immutable, this flag will +// never change. For acyclic defs, we can use a naive algorithm and avoid the +// overhead of dealing with cycles. Most defs will be acyclic, and most cycles +// will be very short. +// +// For defs that participate in cycles we keep two reference counts. One +// tracks references that come from outside the cycle (we call these external +// references), and is incremented and decremented like a regular refcount. +// The other is a cycle refcount, and works as follows. Every cycle is +// considered distinct, even if two cycles share members. For example, this +// graph has two distinct cycles: +// +// A-->B-->C +// ^ | | +// +---+---+ +// +// The cycles in this graph are AB and ABC. When A's external refcount +// transitions from 0->1, we say that A takes "cycle references" on both +// cycles. Taking a cycle reference means incrementing the cycle refcount of +// all defs in the cycle. Since A and B are common to both cycles, A and B's +// cycle refcounts will be incremented by two, and C's will be incremented by +// one. Likewise, when A's external refcount transitions from 1->0, we +// decrement A and B's cycle refcounts by two and C's by one. We collect a +// cyclic type when its cycle refcount drops to zero. A precondition for this +// is that the external refcount has dropped to zero also. +// +// This algorithm is relatively cheap, since it only requires extra work when +// the external refcount on a cyclic type transitions from 0->1 or 1->0. + +static void upb_msgdef_free(upb_msgdef *m); +static void upb_enumdef_free(upb_enumdef *e); +static void upb_unresolveddef_free(struct _upb_unresolveddef *u); + +static void upb_def_free(upb_def *def) +{ + switch(def->type) { + case UPB_DEF_MSG: + upb_msgdef_free(upb_downcast_msgdef(def)); + break; + case UPB_DEF_ENUM: + upb_enumdef_free(upb_downcast_enumdef(def)); + break; + case UPB_DEF_SVC: + assert(false); /* Unimplemented. */ + break; + case UPB_DEF_UNRESOLVED: + upb_unresolveddef_free(upb_downcast_unresolveddef(def)); + break; + default: + assert(false); + } +} + +// Depth-first search for all cycles that include cycle_base. Returns the +// number of paths from def that lead to cycle_base, which is equivalent to the +// number of cycles def is in that include cycle_base. +// +// open_defs tracks the set of nodes that are currently being visited in the +// search so we can stop the search if we detect a cycles that do not involve +// cycle_base. We can't color the nodes as we go by writing to a member of the +// def, because another thread could be performing the search concurrently. +static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base, + upb_msgdef **open_defs, int num_open_defs, + bool ref) { + bool found = false; + for(int i = 0; i < num_open_defs; i++) { + if(open_defs[i] == m) { + // We encountered a cycle that did not involve cycle_base. + found = true; + break; + } + } + + if(found || num_open_defs == UPB_MAX_TYPE_CYCLE_LEN) { + return 0; + } else if(m == cycle_base) { + return 1; + } else { + int path_count = 0; + if(cycle_base == NULL) { + cycle_base = m; + } else { + open_defs[num_open_defs++] = m; + } + for(int i = 0; i < m->num_fields; i++) { + upb_fielddef *f = &m->fields[i]; + upb_def *def = f->def; + if(upb_issubmsg(f) && def->is_cyclic) { + upb_msgdef *sub_m = upb_downcast_msgdef(def); + path_count += upb_cycle_ref_or_unref(sub_m, cycle_base, open_defs, + num_open_defs, ref); + } + } + if(ref) { + upb_atomic_add(&m->cycle_refcount, path_count); + } else { + if(upb_atomic_add(&m->cycle_refcount, -path_count)) + upb_def_free(UPB_UPCAST(m)); + } + return path_count; + } +} + +void _upb_def_reftozero(upb_def *def) { + if(def->is_cyclic) { + upb_msgdef *m = upb_downcast_msgdef(def); + upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; + upb_cycle_ref_or_unref(m, NULL, open_defs, 0, false); + } else { + upb_def_free(def); + } +} + +void _upb_def_cyclic_ref(upb_def *def) { + upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; + upb_cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true); +} + +static void upb_def_init(upb_def *def, upb_def_type type) { + def->type = type; + def->is_cyclic = 0; // We detect this later, after resolving refs. + def->search_depth = 0; + def->fqname = NULL; + upb_atomic_refcount_init(&def->refcount, 1); +} + +static void upb_def_uninit(upb_def *def) { + upb_string_unref(def->fqname); +} + + +/* upb_unresolveddef **********************************************************/ + +// Unresolved defs are used as temporary placeholders for a def whose name has +// not been resolved yet. During the name resolution step, all unresolved defs +// are replaced with pointers to the actual def being referenced. +typedef struct _upb_unresolveddef { + upb_def base; + + // The target type name. This may or may not be fully qualified. + upb_string *name; +} upb_unresolveddef; + +// Is passed a ref on the string. +static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { + upb_unresolveddef *def = malloc(sizeof(*def)); + upb_def_init(&def->base, UPB_DEF_UNRESOLVED); + def->name = str; + return def; +} + +static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { + upb_def_uninit(&def->base); + free(def); +} + + +/* upb_enumdef ****************************************************************/ + +typedef struct { + upb_strtable_entry e; + uint32_t value; +} ntoi_ent; + +typedef struct { + upb_inttable_entry e; + upb_string *string; +} iton_ent; + +static void upb_enumdef_free(upb_enumdef *e) { + upb_strtable_free(&e->ntoi); + upb_inttable_free(&e->iton); + upb_def_uninit(&e->base); + free(e); +} + +static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) +{ + int32_t number = -1; + upb_string *name = NULL; + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->number) { + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: + CHECKSRC(upb_src_getint32(src, &number)); + break; + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: + name = upb_string_tryrecycle(name); + CHECKSRC(upb_src_getstr(src, name)); + break; + default: + CHECKSRC(upb_src_skipval(src)); + break; + } + } + + if(name == NULL || number == -1) { + upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); + goto err; + } + ntoi_ent ntoi_ent = {{name, 0}, number}; + iton_ent iton_ent = {{number, 0}, name}; + upb_strtable_insert(&e->ntoi, &ntoi_ent.e); + upb_inttable_insert(&e->iton, &iton_ent.e); + // We don't unref "name" because we pass our ref to the iton entry of the + // table. strtables can ref their keys, but the inttable doesn't know that + // the value is a string. + return true; + +src_err: + upb_copyerr(status, upb_src_status(src)); +err: + upb_string_unref(name); + return false; +} + +static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) +{ + upb_enumdef *e = malloc(sizeof(*e)); + upb_def_init(&e->base, UPB_DEF_ENUM); + upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent)); + upb_inttable_init(&e->iton, 0, sizeof(iton_ent)); + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: + CHECK(upb_addenum_val(src, e, status)); + break; + default: + upb_src_skipval(src); + break; + } + } + upb_deflist_push(defs, UPB_UPCAST(e)); + return true; + +err: + upb_enumdef_free(e); + return false; +} + +static void fill_iter(upb_enum_iter *iter, ntoi_ent *ent) { + iter->state = ent; + iter->name = ent->e.key; + iter->val = ent->value; +} + +void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e) { + // We could iterate over either table here; the choice is arbitrary. + ntoi_ent *ent = upb_strtable_begin(&e->ntoi); + iter->e = e; + fill_iter(iter, ent); +} + +void upb_enum_next(upb_enum_iter *iter) { + ntoi_ent *ent = iter->state; + assert(ent); + ent = upb_strtable_next(&iter->e->ntoi, &ent->e); + iter->state = ent; + if(ent) fill_iter(iter, ent); +} + +bool upb_enum_done(upb_enum_iter *iter) { + return iter->state == NULL; +} + + +/* upb_fielddef ***************************************************************/ + +static void upb_fielddef_free(upb_fielddef *f) { + free(f); +} + +static void upb_fielddef_uninit(upb_fielddef *f) { + upb_string_unref(f->name); + if(upb_hasdef(f) && f->owned) { + upb_def_unref(f->def); + } +} + +static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) +{ + upb_fielddef *f = malloc(sizeof(*f)); + f->def = NULL; + f->owned = false; + upb_fielddef *parsed_f; + int32_t tmp; + while((parsed_f = upb_src_getdef(src))) { + switch(parsed_f->number) { + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: + CHECKSRC(upb_src_getint32(src, &tmp)); + f->type = tmp; + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: + CHECKSRC(upb_src_getint32(src, &tmp)); + f->label = tmp; + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM: + CHECKSRC(upb_src_getint32(src, &tmp)); + f->number = tmp; + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: + f->name = upb_string_tryrecycle(f->name); + CHECKSRC(upb_src_getstr(src, f->name)); + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { + upb_string *str = upb_string_new(); + CHECKSRC(upb_src_getstr(src, str)); + if(f->def) upb_def_unref(f->def); + f->def = UPB_UPCAST(upb_unresolveddef_new(str)); + f->owned = true; + break; + } + } + } + CHECKSRC(upb_src_eof(src)); + // TODO: verify that all required fields were present. + assert((f->def != NULL) == upb_hasdef(f)); + + // Field was successfully read, add it as a field of the msgdef. + upb_itof_ent itof_ent = {{f->number, 0}, f}; + upb_ntof_ent ntof_ent = {{f->name, 0}, f}; + upb_inttable_insert(&m->itof, &itof_ent.e); + upb_strtable_insert(&m->ntof, &ntof_ent.e); + return true; + +src_err: + upb_copyerr(status, upb_src_status(src)); + upb_fielddef_free(f); + return false; +} + + +/* upb_msgdef *****************************************************************/ + +// Processes a google.protobuf.DescriptorProto, adding defs to "defs." +static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) +{ + upb_msgdef *m = malloc(sizeof(*m)); + upb_def_init(&m->base, UPB_DEF_MSG); + upb_atomic_refcount_init(&m->cycle_refcount, 0); + upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); + upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); + int32_t start_count = defs->len; + + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->number) { + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: + m->base.fqname = upb_string_tryrecycle(m->base.fqname); + CHECKSRC(upb_src_getstr(src, m->base.fqname)); + break; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: + CHECKSRC(upb_src_startmsg(src)); + CHECK(upb_addfield(src, m, status)); + CHECKSRC(upb_src_endmsg(src)); + break; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: + CHECKSRC(upb_src_startmsg(src)); + CHECK(upb_addmsg(src, defs, status)); + CHECKSRC(upb_src_endmsg(src)); + break; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + CHECKSRC(upb_src_startmsg(src)); + CHECK(upb_addenum(src, defs, status)); + CHECKSRC(upb_src_endmsg(src)); + break; + default: + // TODO: extensions. + CHECKSRC(upb_src_skipval(src)); + } + } + CHECK(upb_src_eof(src)); + if(!m->base.fqname) { + upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); + goto err; + } + upb_deflist_qualify(defs, m->base.fqname, start_count); + upb_deflist_push(defs, UPB_UPCAST(m)); + return true; + +src_err: + upb_copyerr(status, upb_src_status(src)); +err: + upb_msgdef_free(m); + return false; +} + +static void upb_msgdef_free(upb_msgdef *m) +{ + for (upb_field_count_t i = 0; i < m->num_fields; i++) + upb_fielddef_uninit(&m->fields[i]); + free(m->fields); + upb_strtable_free(&m->ntof); + upb_inttable_free(&m->itof); + upb_def_uninit(&m->base); + free(m); +} + +static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) { + (void)m; + if(f->owned) upb_def_unref(f->def); + f->def = def; + // We will later make the ref unowned if it is a part of a cycle. + f->owned = true; + upb_def_ref(def); +} + + +/* symtab internal ***********************************************************/ + +// Processes a google.protobuf.FileDescriptorProto, adding the defs to "defs". +static bool upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) +{ + upb_string *package = NULL; + int32_t start_count = defs->len; + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: + package = upb_string_tryrecycle(package); + CHECKSRC(upb_src_getstr(src, package)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: + CHECKSRC(upb_src_startmsg(src)); + CHECK(upb_addmsg(src, defs, status)); + CHECKSRC(upb_src_endmsg(src)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + CHECKSRC(upb_src_startmsg(src)); + CHECK(upb_addenum(src, defs, status)); + CHECKSRC(upb_src_endmsg(src)); + break; + default: + // TODO: services and extensions. + CHECKSRC(upb_src_skipval(src)); + } + } + CHECK(upb_src_eof(src)); + upb_deflist_qualify(defs, package, start_count); + upb_string_unref(package); + return true; + +src_err: + upb_copyerr(status, upb_src_status(src)); +err: + upb_string_unref(package); + return false; +} + +/* Search for a character in a string, in reverse. */ +static int my_memrchr(char *data, char c, size_t len) +{ + int off = len-1; + while(off > 0 && data[off] != c) --off; + return off; +} + +typedef struct { + upb_strtable_entry e; + upb_def *def; +} upb_symtab_ent; + +// Given a symbol and the base symbol inside which it is defined, find the +// symbol's definition in t. +static upb_symtab_ent *upb_resolve(upb_strtable *t, + upb_string *base, upb_string *sym) +{ + if(upb_string_len(base) + upb_string_len(sym) + 1 >= UPB_SYMBOL_MAXLEN || + upb_string_len(sym) == 0) return NULL; + + if(upb_string_getrobuf(sym)[0] == UPB_SYMBOL_SEPARATOR) { + // Symbols starting with '.' are absolute, so we do a single lookup. + // Slice to omit the leading '.' + upb_string *sym_str = upb_strslice(sym, 1, upb_string_len(sym) - 1); + upb_symtab_ent *e = upb_strtable_lookup(t, sym_str); + upb_string_unref(sym_str); + return e; + } else { + // Remove components from base until we find an entry or run out. + upb_string *sym_str = upb_string_new(); + int baselen = upb_string_len(base); + while(1) { + // sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + sym + upb_strlen_t len = baselen + upb_string_len(sym) + 1; + char *buf = upb_string_getrwbuf(sym_str, len); + memcpy(buf, upb_string_getrobuf(base), baselen); + buf[baselen] = UPB_SYMBOL_SEPARATOR; + memcpy(buf + baselen + 1, upb_string_getrobuf(sym), upb_string_len(sym)); + + upb_symtab_ent *e = upb_strtable_lookup(t, sym_str); + if (e) return e; + else if(baselen == 0) return NULL; // No more scopes to try. + + baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen); + } + } +} + +// Performs a pass over the type graph to find all cycles that include m. +static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) +{ + if(depth > UPB_MAX_TYPE_DEPTH) { + // We have found a non-cyclic path from the base of the type tree that + // exceeds the maximum allowed depth. There are many situations in upb + // where we recurse over the type tree (like for example, right now) and an + // absurdly deep tree could cause us to stack overflow on systems with very + // limited stacks. + upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was found at " + "depth %d in the type graph, which exceeds the maximum type " + "depth of %d.", UPB_UPCAST(m)->fqname, depth, + UPB_MAX_TYPE_DEPTH); + return false; + } else if(UPB_UPCAST(m)->search_depth == 1) { + // Cycle! + int cycle_len = depth - 1; + if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) { + upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was involved " + "in a cycle of length %d, which exceeds the maximum type " + "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len, + UPB_MAX_TYPE_CYCLE_LEN); + } + return true; + } else if(UPB_UPCAST(m)->search_depth > 0) { + // This was a cycle, but did not originate from the base of our search tree. + // We'll find it when we call find_cycles() on this node directly. + return false; + } else { + UPB_UPCAST(m)->search_depth = ++depth; + bool cycle_found = false; + for(upb_field_count_t i = 0; i < m->num_fields; i++) { + upb_fielddef *f = &m->fields[i]; + if(!upb_issubmsg(f)) continue; + upb_def *sub_def = f->def; + upb_msgdef *sub_m = upb_downcast_msgdef(sub_def); + if(upb_symtab_findcycles(sub_m, depth, status)) { + cycle_found = true; + UPB_UPCAST(m)->is_cyclic = true; + if(f->owned) { + upb_atomic_unref(&sub_def->refcount); + f->owned = false; + } + } + } + UPB_UPCAST(m)->search_depth = 0; + return cycle_found; + } +} + +// Given a table of pending defs "tmptab" and a table of existing defs "symtab", +// resolves all of the unresolved refs for the defs in tmptab. +bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, + upb_status *status) +{ + upb_symtab_ent *e; + for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) { + upb_msgdef *m = upb_dyncast_msgdef(e->def); + if(!m) continue; + // Type names are resolved relative to the message in which they appear. + upb_string *base = e->e.key; + + for(upb_field_count_t i = 0; i < m->num_fields; i++) { + upb_fielddef *f = &m->fields[i]; + if(!upb_hasdef(f)) continue; // No resolving necessary. + upb_string *name = upb_downcast_unresolveddef(f->def)->name; + + // Resolve from either the tmptab (pending adds) or symtab (existing + // defs). If both exist, prefer the pending add, because it will be + // overwriting the existing def. + upb_symtab_ent *found; + if(!(found = upb_resolve(tmptab, base, name)) && + !(found = upb_resolve(symtab, base, name))) { + upb_seterr(status, UPB_STATUS_ERROR, + "could not resolve symbol '" UPB_STRFMT "'" + " in context '" UPB_STRFMT "'", + UPB_STRARG(name), UPB_STRARG(base)); + return false; + } + + // Check the type of the found def. + upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; + if(found->def->type != expected) { + upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type"); + return false; + } + upb_msgdef_resolve(m, f, found->def); + } + } + + // Deal with type cycles. + for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) { + upb_msgdef *m = upb_dyncast_msgdef(e->def); + if(!m) continue; + // The findcycles() call will decrement the external refcount of the + if(!upb_symtab_findcycles(m, 0, status)) return false; + upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; + upb_cycle_ref_or_unref(m, NULL, open_defs, 0, true); + } + + return true; +} + +// Given a list of defs, a list of extensions (in the future), and a flag +// indicating whether the new defs can overwrite existing defs in the symtab, +// attempts to add the given defs to the symtab. The whole operation either +// succeeds or fails. Ownership of "defs" and "exts" is taken. +bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef, + upb_status *status) +{ + upb_rwlock_wrlock(&s->lock); + + // Build a table of the defs we mean to add, for duplicate detection and name + // resolution. + upb_strtable tmptab; + upb_strtable_init(&tmptab, defs->len, sizeof(upb_symtab_ent)); + for (uint32_t i = 0; i < defs->len; i++) { + upb_def *def = defs->defs[i]; + upb_symtab_ent e = {{def->fqname, 0}, def}; + + // Redefinition is never allowed within a single FileDescriptorSet. + // Additionally, we only allow overwriting of an existing definition if + // allow_redef is set. + if (upb_strtable_lookup(&tmptab, def->fqname) || + (!allow_redef && upb_strtable_lookup(&s->symtab, def->fqname))) { + upb_seterr(status, UPB_STATUS_ERROR, "Redefinition of symbol " UPB_STRFMT, + UPB_STRARG(def->fqname)); + goto err; + } + + // Pass ownership from the deflist to the strtable. + upb_strtable_insert(&tmptab, &e.e); + defs->defs[i] = NULL; + } + + // TODO: process the list of extensions by modifying entries from + // tmptab in-place (copying them from the symtab first if necessary). + + CHECK(upb_resolverefs(&tmptab, &s->symtab, status)); + + // The defs in tmptab have been vetted, and can be added to the symtab + // without causing errors. Now add all tmptab defs to the symtab, + // overwriting (and releasing a ref on) any existing defs with the same + // names. Ownership for tmptab defs passes from the tmptab to the symtab. + upb_symtab_ent *tmptab_e; + for(tmptab_e = upb_strtable_begin(&tmptab); tmptab_e; + tmptab_e = upb_strtable_next(&tmptab, &tmptab_e->e)) { + upb_symtab_ent *symtab_e = + upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname); + if(symtab_e) { + upb_def_unref(symtab_e->def); + symtab_e->def = tmptab_e->def; + } else { + upb_strtable_insert(&s->symtab, &tmptab_e->e); + } + } + + upb_rwlock_unlock(&s->lock); + upb_strtable_free(&tmptab); + return true; + +err: + // We need to free all defs from "tmptab." + upb_rwlock_unlock(&s->lock); + for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; + e = upb_strtable_next(&tmptab, &e->e)) + upb_def_unref(e->def); + upb_strtable_free(&tmptab); + return false; +} + + +/* upb_symtab *****************************************************************/ + +upb_symtab *upb_symtab_new() +{ + upb_symtab *s = malloc(sizeof(*s)); + upb_atomic_refcount_init(&s->refcount, 1); + upb_rwlock_init(&s->lock); + upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent)); + return s; +} + +static void upb_free_symtab(upb_strtable *t) +{ + upb_symtab_ent *e; + for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) + upb_def_unref(e->def); + upb_strtable_free(t); +} + +void _upb_symtab_free(upb_symtab *s) +{ + upb_free_symtab(&s->symtab); + upb_free_symtab(&s->psymtab); + upb_rwlock_destroy(&s->lock); + free(s); +} + +upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type) +{ + upb_rwlock_rdlock(&s->lock); + int total = upb_strtable_count(&s->symtab); + // We may only use part of this, depending on how many symbols are of the + // correct type. + upb_def **defs = malloc(sizeof(*defs) * total); + upb_symtab_ent *e = upb_strtable_begin(&s->symtab); + int i = 0; + for(; e; e = upb_strtable_next(&s->symtab, &e->e)) { + upb_def *def = e->def; + assert(def); + if(type == UPB_DEF_ANY || def->type == type) + defs[i++] = def; + } + upb_rwlock_unlock(&s->lock); + *count = i; + for(i = 0; i < *count; i++) + upb_def_ref(defs[i]); + return defs; +} + +upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) +{ + upb_rwlock_rdlock(&s->lock); + upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); + upb_def *ret = NULL; + if(e) { + ret = e->def; + upb_def_ref(ret); + } + upb_rwlock_unlock(&s->lock); + return ret; +} + + +upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) { + upb_rwlock_rdlock(&s->lock); + upb_symtab_ent *e = upb_resolve(&s->symtab, base, symbol); + upb_def *ret = NULL; + if(e) { + ret = e->def; + upb_def_ref(ret); + } + upb_rwlock_unlock(&s->lock); + return ret; +} + +void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) +{ + upb_deflist defs; + upb_deflist_init(&defs); + upb_fielddef *f; + while((f = upb_src_getdef(src)) != NULL) { + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: + CHECKSRC(upb_src_startmsg(src)); + CHECK(upb_addfd(src, &defs, status)); + CHECKSRC(upb_src_endmsg(src)); + break; + default: + CHECKSRC(upb_src_skipval(src)); + } + } + CHECKSRC(upb_src_eof(src)); + CHECK(upb_symtab_add_defs(s, &defs, false, status)); + upb_deflist_uninit(&defs); + return; + +src_err: + upb_copyerr(status, upb_src_status(src)); +err: + upb_deflist_uninit(&defs); +} + + +/* upb_baredecoder ************************************************************/ + +// upb_baredecoder is a upb_src that can parse a subset of the protocol buffer +// binary format. It is only used for bootstrapping. It can parse without +// having a upb_msgdef, which is why it is useful for bootstrapping the first +// msgdef. On the downside, it does not support: +// +// * having its input span multiple upb_strings. +// * reading any field of the returned upb_fielddef's except f->number. +// * keeping a pointer to the upb_fielddef* and reading it later (the same +// upb_fielddef is reused over and over). +// * detecting errors in the input (we trust that our input is known-good). +// +// It also does not support any of the follow protobuf features: +// * packed fields. +// * groups. +// * zig-zag-encoded types like sint32 and sint64. +// +// If descriptor.proto ever changed to use any of these features, this decoder +// would need to be extended to support them. + +typedef struct { + upb_src src; + upb_string *input; + upb_strlen_t offset; + upb_fielddef field; + upb_wire_type_t wire_type; + upb_strlen_t delimited_len; + upb_strlen_t stack[UPB_MAX_NESTING], *top; + upb_string *str; +} upb_baredecoder; + +static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) +{ + const uint8_t *start = (uint8_t*)upb_string_getrobuf(d->input) + d->offset; + const uint8_t *buf = start; + uint8_t last = 0x80; + uint64_t val = 0; + for(int bitpos = 0; (last & 0x80); buf++, bitpos += 7) + val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos; + d->offset += buf - start; + return val; +} + +static uint32_t upb_baredecoder_readv32(upb_baredecoder *d) +{ + return (uint32_t)upb_baredecoder_readv64(d); // Truncate. +} + +static uint64_t upb_baredecoder_readf64(upb_baredecoder *d) +{ + uint64_t val; + memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 8); + d->offset += 8; + return val; +} + +static uint32_t upb_baredecoder_readf32(upb_baredecoder *d) +{ + uint32_t val; + memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 4); + d->offset += 4; + return val; +} + +static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) +{ + // Detect end-of-submessage. + if(d->offset >= *d->top) { + d->src.eof = true; + return NULL; + } + + uint32_t key; + key = upb_baredecoder_readv32(d); + d->wire_type = key & 0x7; + d->field.number = key >> 3; + if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { + // For delimited wire values we parse the length now, since we need it in + // all cases. + d->delimited_len = upb_baredecoder_readv32(d); + } + return &d->field; +} + +static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) +{ + if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { + d->str = upb_string_tryrecycle(d->str); + upb_string_substr(d->str, d->input, d->offset, d->delimited_len); + } else { + switch(d->wire_type) { + case UPB_WIRE_TYPE_VARINT: + *val.uint64 = upb_baredecoder_readv64(d); + break; + case UPB_WIRE_TYPE_32BIT_VARINT: + *val.uint32 = upb_baredecoder_readv32(d); + break; + case UPB_WIRE_TYPE_64BIT: + *val.uint64 = upb_baredecoder_readf64(d); + break; + case UPB_WIRE_TYPE_32BIT: + *val.uint32 = upb_baredecoder_readf32(d); + break; + default: + assert(false); + } + } + return true; +} + +static bool upb_baredecoder_skipval(upb_baredecoder *d) +{ + upb_value val; + return upb_baredecoder_getval(d, upb_value_addrof(&val)); +} + +static bool upb_baredecoder_startmsg(upb_baredecoder *d) +{ + *(d->top++) = d->offset + d->delimited_len; + return true; +} + +static bool upb_baredecoder_endmsg(upb_baredecoder *d) +{ + d->offset = *(--d->top); + return true; +} + +static upb_src_vtable upb_baredecoder_src_vtbl = { + (upb_src_getdef_fptr)&upb_baredecoder_getdef, + (upb_src_getval_fptr)&upb_baredecoder_getval, + (upb_src_skipval_fptr)&upb_baredecoder_skipval, + (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, + (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, +}; + +static upb_baredecoder *upb_baredecoder_new(upb_string *str) +{ + upb_baredecoder *d = malloc(sizeof(*d)); + d->input = upb_string_getref(str); + d->str = upb_string_new(); + d->top = &d->stack[0]; + upb_src_init(&d->src, &upb_baredecoder_src_vtbl); + return d; +} + +static void upb_baredecoder_free(upb_baredecoder *d) +{ + upb_string_unref(d->input); + upb_string_unref(d->str); + free(d); +} + +static upb_src *upb_baredecoder_src(upb_baredecoder *d) +{ + return &d->src; +} + +upb_symtab *upb_get_descriptor_symtab() +{ + // TODO: implement sharing of symtabs, so that successive calls to this + // function will return the same symtab. + upb_symtab *symtab = upb_symtab_new(); + // TODO: allow upb_strings to be static or on the stack. + upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); + upb_baredecoder *decoder = upb_baredecoder_new(descriptor); + upb_status status; + upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status); + assert(upb_ok(&status)); + upb_baredecoder_free(decoder); + upb_string_unref(descriptor); + return symtab; +} diff --git a/core/upb_def.h b/core/upb_def.h new file mode 100644 index 0000000..c297e83 --- /dev/null +++ b/core/upb_def.h @@ -0,0 +1,302 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * Provides definitions of .proto constructs: + * - upb_msgdef: describes a "message" construct. + * - upb_fielddef: describes a message field. + * - upb_enumdef: describes an enum. + * (TODO: definitions of extensions and services). + * + * Defs are obtained from a upb_symtab object. A upb_symtab is empty when + * constructed, and definitions can be added by supplying serialized + * descriptors. + * + * Defs are immutable and reference-counted. Symbol tables reference any defs + * that are the "current" definitions. If an extension is loaded that adds a + * field to an existing message, a new msgdef is constructed that includes the + * new field and the old msgdef is unref'd. The old msgdef will still be ref'd + * by messages (if any) that were constructed with that msgdef. + * + * This file contains routines for creating and manipulating the definitions + * themselves. To create and manipulate actual messages, see upb_msg.h. + */ + +#ifndef UPB_DEF_H_ +#define UPB_DEF_H_ + +#include "upb_atomic.h" +#include "upb_stream.h" +#include "upb_table.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_def: base class for defs **********************************************/ + +// All the different kind of defs we support. These correspond 1:1 with +// declarations in a .proto file. +typedef enum { + UPB_DEF_MSG = 0, + UPB_DEF_ENUM, + UPB_DEF_SVC, + UPB_DEF_EXT, + // Internal-only, placeholder for a def that hasn't be resolved yet. + UPB_DEF_UNRESOLVED, + + // For specifying that defs of any type are requsted from getdefs. + UPB_DEF_ANY = -1 +} upb_def_type; + +// This typedef is more space-efficient than declaring an enum var directly. +typedef int8_t upb_def_type_t; + +typedef struct { + upb_string *fqname; // Fully qualified. + upb_atomic_refcount_t refcount; + upb_def_type_t type; + + // The is_cyclic flag could go in upb_msgdef instead of here, because only + // messages can be involved in cycles. However, putting them here is free + // from a space perspective because structure alignment will otherwise leave + // three bytes empty after type. It is also makes ref and unref more + // efficient, because we don't have to downcast to msgdef before checking the + // is_cyclic flag. + bool is_cyclic; + uint16_t search_depth; // Used during initialization dfs. +} upb_def; + +// These must not be called directly! +void _upb_def_cyclic_ref(upb_def *def); +void _upb_def_reftozero(upb_def *def); + +// Call to ref/deref a def. +INLINE void upb_def_ref(upb_def *def) { + if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def); +} +INLINE void upb_def_unref(upb_def *def) { + if(upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); +} + +/* upb_fielddef ***************************************************************/ + +// A upb_fielddef describes a single field in a message. It isn't a full def +// in the sense that it derives from upb_def. It cannot stand on its own; it +// is either a field of a upb_msgdef or contained inside a upb_extensiondef. +// It is also reference-counted. +typedef struct _upb_fielddef { + upb_atomic_refcount_t refcount; + upb_string *name; + upb_field_number_t number; + upb_field_type_t type; + upb_label_t label; + upb_value default_value; + + // For the case of an enum or a submessage, points to the def for that type. + upb_def *def; + + // True if we own a ref on "def" (above). This is true unless this edge is + // part of a cycle. + bool owned; + + // These are set only when this fielddef is part of a msgdef. + uint32_t byte_offset; // Where in a upb_msg to find the data. + upb_field_count_t field_index; // Indicates set bit. +} upb_fielddef; + +// A variety of tests about the type of a field. +INLINE bool upb_issubmsg(upb_fielddef *f) { + return upb_issubmsgtype(f->type); +} +INLINE bool upb_isstring(upb_fielddef *f) { + return upb_isstringtype(f->type); +} +INLINE bool upb_isarray(upb_fielddef *f) { + return f->label == UPB_LABEL(REPEATED); +} +// Does the type of this field imply that it should contain an associated def? +INLINE bool upb_hasdef(upb_fielddef *f) { + return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); +} + +INLINE bool upb_field_ismm(upb_fielddef *f) { + return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); +} + +INLINE bool upb_elem_ismm(upb_fielddef *f) { + return upb_isstring(f) || upb_issubmsg(f); +} + +/* upb_msgdef *****************************************************************/ + +// Structure that describes a single .proto message type. +typedef struct _upb_msgdef { + upb_def base; + upb_atomic_refcount_t cycle_refcount; + size_t size; + upb_field_count_t num_fields; + uint32_t set_flags_bytes; + uint32_t num_required_fields; // Required fields have the lowest set bytemasks. + upb_fielddef *fields; // We have exclusive ownership of these. + + // Tables for looking up fields by number and name. + upb_inttable itof; // int to field + upb_strtable ntof; // name to field +} upb_msgdef; + +// Hash table entries for looking up fields by name or number. +typedef struct { + upb_inttable_entry e; + upb_fielddef *f; +} upb_itof_ent; +typedef struct { + upb_strtable_entry e; + upb_fielddef *f; +} upb_ntof_ent; + +// Looks up a field by name or number. While these are written to be as fast +// as possible, it will still be faster to cache the results of this lookup if +// possible. These return NULL if no such field is found. +INLINE upb_fielddef *upb_msg_itof(upb_msgdef *m, uint32_t num) { + upb_itof_ent *e = + (upb_itof_ent*)upb_inttable_fastlookup(&m->itof, num, sizeof(*e)); + return e ? e->f : NULL; +} + +INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_string *name) { + upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); + return e ? e->f : NULL; +} + +/* upb_enumdef ****************************************************************/ + +typedef struct _upb_enumdef { + upb_def base; + upb_strtable ntoi; + upb_inttable iton; +} upb_enumdef; + +typedef int32_t upb_enumval_t; + +// Lookups from name to integer and vice-versa. +bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num); +upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); + +// Iteration over name/value pairs. The order is undefined. +// upb_enum_iter i; +// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { +// // ... +// } +typedef struct { + upb_enumdef *e; + void *state; // Internal iteration state. + upb_string *name; + upb_enumval_t val; +} upb_enum_iter; +void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e); +void upb_enum_next(upb_enum_iter *iter); +bool upb_enum_done(upb_enum_iter *iter); + +/* upb_symtab *****************************************************************/ + +// A SymbolTable is where upb_defs live. It is empty when first constructed. +// Clients add definitions to the symtab by supplying unserialized or +// serialized descriptors (as defined in descriptor.proto). +typedef struct { + upb_atomic_refcount_t refcount; + upb_rwlock_t lock; // Protects all members except the refcount. + upb_msgdef *fds_msgdef; // In psymtab, ptr here for convenience. + + // Our symbol tables; we own refs to the defs therein. + upb_strtable symtab; // The main symbol table. + upb_strtable psymtab; // Private symbols, for internal use. +} upb_symtab; + +// Initializes a upb_symtab. Contexts are not freed explicitly, but unref'd +// when the caller is done with them. +upb_symtab *upb_symtab_new(void); +void _upb_symtab_free(upb_symtab *s); // Must not be called directly! + +INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); } +INLINE void upb_symtab_unref(upb_symtab *s) { + if(upb_atomic_unref(&s->refcount)) _upb_symtab_free(s); +} + +// Resolves the given symbol using the rules described in descriptor.proto, +// namely: +// +// If the name starts with a '.', it is fully-qualified. Otherwise, C++-like +// scoping rules are used to find the type (i.e. first the nested types +// within this message are searched, then within the parent, on up to the +// root namespace). +// +// If a def is found, the caller owns one ref on the returned def. Otherwise +// returns NULL. +upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym); + +// Find an entry in the symbol table with this exact name. If a def is found, +// the caller owns one ref on the returned def. Otherwise returns NULL. +upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym); + +// Gets an array of pointers to all currently active defs in this symtab. The +// caller owns the returned array (which is of length *count) as well as a ref +// to each symbol inside. If type is UPB_DEF_ANY then defs of all types are +// returned, otherwise only defs of the required type are returned. +upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); + +// "fds" is a upb_src that will yield data from the +// google.protobuf.FileDescriptorSet message type. upb_symtab_addfds() adds +// all the definitions from the given FileDescriptorSet and adds them to the +// symtab. status indicates whether the operation was successful or not, and +// the error message (if any). +// +// TODO: should this allow redefinition? Either is possible, but which is +// more useful? Maybe it should be an option. +void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status); + +// Returns a symtab that defines google.protobuf.DescriptorProto and all other +// types that are defined in descriptor.proto. This allows you to load other +// proto types. The caller owns a ref on the returned symtab. +upb_symtab *upb_get_descriptor_symtab(); + + +/* upb_def casts **************************************************************/ + +// Dynamic casts, for determining if a def is of a particular type at runtime. +#define UPB_DYNAMIC_CAST_DEF(lower, upper) \ + struct _upb_ ## lower; /* Forward-declare. */ \ + INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \ + if(def->type != UPB_DEF_ ## upper) return NULL; \ + return (struct _upb_ ## lower*)def; \ + } +UPB_DYNAMIC_CAST_DEF(msgdef, MSG); +UPB_DYNAMIC_CAST_DEF(enumdef, ENUM); +UPB_DYNAMIC_CAST_DEF(svcdef, SVC); +UPB_DYNAMIC_CAST_DEF(extdef, EXT); +UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED); +#undef UPB_DYNAMIC_CAST_DEF + +// Downcasts, for when some wants to assert that a def is of a particular type. +// These are only checked if we are building debug. +#define UPB_DOWNCAST_DEF(lower, upper) \ + struct _upb_ ## lower; /* Forward-declare. */ \ + INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \ + assert(def->type == UPB_DEF_ ## upper); \ + return (struct _upb_ ## lower*)def; \ + } +UPB_DOWNCAST_DEF(msgdef, MSG); +UPB_DOWNCAST_DEF(enumdef, ENUM); +UPB_DOWNCAST_DEF(svcdef, SVC); +UPB_DOWNCAST_DEF(extdef, EXT); +UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED); +#undef UPB_DOWNCAST_DEF + +#define UPB_UPCAST(ptr) (&(ptr)->base) + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_DEF_H_ */ diff --git a/core/upb_stream.h b/core/upb_stream.h new file mode 100644 index 0000000..e7b4074 --- /dev/null +++ b/core/upb_stream.h @@ -0,0 +1,121 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * This file defines four general-purpose streaming interfaces for protobuf + * data or bytes: + * + * - upb_src: pull interface for protobuf data. + * - upb_sink: push interface for protobuf data. + * - upb_bytesrc: pull interface for bytes. + * - upb_bytesink: push interface for bytes. + * + * These interfaces are used as general-purpose glue in upb. For example, the + * decoder interface works by implementing a upb_src and calling a upb_bytesrc. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + */ + +#ifndef UPB_SRCSINK_H +#define UPB_SRCSINK_H + +#include "upb_stream_vtbl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Forward-declare. We can't include upb_def.h; it would be circular. +struct _upb_fielddef; + +// Note! The "eof" flags work like feof() in C; they cannot report end-of-file +// until a read has failed due to eof. They cannot preemptively tell you that +// the next call will fail due to eof. Since these are the semantics that C +// and UNIX provide, we're stuck with them if we want to support eg. stdio. + +/* upb_src ********************************************************************/ + +// TODO: decide how to handle unknown fields. + +// Retrieves the fielddef for the next field in the stream. Returns NULL on +// error or end-of-stream. +struct _upb_fielddef *upb_src_getdef(upb_src *src); + +// Retrieves and stores the next value in "val". For string types "val" must +// be a newly-recycled string. Returns false on error. +bool upb_src_getval(upb_src *src, upb_valueptr val); +bool upb_src_getstr(upb_src *src, upb_string *val); + +// Like upb_src_getval() but skips the value. +bool upb_src_skipval(upb_src *src); + +// Descends into a submessage. May only be called after a def has been +// returned that indicates a submessage. +bool upb_src_startmsg(upb_src *src); + +// Stops reading a submessage. May be called before the stream is EOF, in +// which case the rest of the submessage is skipped. +bool upb_src_endmsg(upb_src *src); + +// Returns the current error/eof status for the stream. +INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; } +INLINE bool upb_src_eof(upb_src *src) { return src->eof; } + +// The following functions are equivalent to upb_src_getval(), but take +// pointers to specific types. In debug mode this may check that the type +// is compatible with the type being read. This check will *not* be performed +// in non-debug mode, and if you get the type wrong the behavior is undefined. +bool upb_src_getbool(upb_src *src, bool *val); +bool upb_src_getint32(upb_src *src, int32_t *val); +bool upb_src_getint64(upb_src *src, int64_t *val); +bool upb_src_getuint32(upb_src *src, uint32_t *val); +bool upb_src_getuint64(upb_src *src, uint64_t *val); +bool upb_src_getfloat(upb_src *src, float *val); +bool upb_src_getdouble(upb_src *src, double *val); + +/* upb_sink *******************************************************************/ + +// Puts the given fielddef into the stream. +bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); + +// Puts the given value into the stream. +bool upb_sink_putval(upb_sink *sink, upb_value val); + +// Starts a submessage. (needed? the def tells us we're starting a submsg.) +bool upb_sink_startmsg(upb_sink *sink); + +// Ends a submessage. +bool upb_sink_endmsg(upb_sink *sink); + +// Returns the current error status for the stream. +upb_status *upb_sink_status(upb_sink *sink); + +/* upb_bytesrc ****************************************************************/ + +// Returns the next string in the stream. false is returned on error or eof. +// The string must be at least "minlen" bytes long unless the stream is eof. +bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); + +// Appends the next "len" bytes in the stream in-place to "str". This should +// be used when the caller needs to build a contiguous string of the existing +// data in "str" with more data. +bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); + +// Returns the current error status for the stream. +INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } +INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } + +/* upb_bytesink ***************************************************************/ + +// Puts the given string. Returns the number of bytes that were actually, +// consumed, which may be fewer than were in the string, or <0 on error. +int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); + +// Returns the current error status for the stream. +upb_status *upb_bytesink_status(upb_bytesink *sink); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h new file mode 100644 index 0000000..0ec45d2 --- /dev/null +++ b/core/upb_stream_vtbl.h @@ -0,0 +1,93 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * vtable declarations for types that are implementing any of the src or sink + * interfaces. Only components that are implementing these interfaces need + * to worry about this file. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_SRCSINK_VTBL_H_ +#define UPB_SRCSINK_VTBL_H_ + +#include "upb.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct upb_src; +typedef struct upb_src upb_src; +struct upb_sink; +typedef struct upb_sink upb_sink; +struct upb_bytesrc; +typedef struct upb_bytesrc upb_bytesrc; +struct upb_bytesink; +typedef struct upb_bytesink upb_bytesink; + +// Typedefs for function pointers to all of the virtual functions. +typedef struct _upb_fielddef (*upb_src_getdef_fptr)(upb_src *src); +typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); +typedef bool (*upb_src_skipval_fptr)(upb_src *src); +typedef bool (*upb_src_startmsg_fptr)(upb_src *src); +typedef bool (*upb_src_endmsg_fptr)(upb_src *src); + +typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); +typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); +typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); +typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); + +typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src); +typedef void (*upb_bytesrc_recycle_fptr)(upb_bytesrc *src, upb_string *str); +typedef bool (*upb_bytesrc_append_fptr)( + upb_bytesrc *src, upb_string *str, upb_strlen_t len); + +typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); + +// Vtables for the above interfaces. +typedef struct { + upb_src_getdef_fptr getdef; + upb_src_getval_fptr getval; + upb_src_skipval_fptr skipval; + upb_src_startmsg_fptr startmsg; + upb_src_endmsg_fptr endmsg; +} upb_src_vtable; + +typedef struct { + upb_bytesrc_get_fptr get; + upb_bytesrc_append_fptr append; + upb_bytesrc_recycle_fptr recycle; +} upb_bytesrc_vtable; + +// "Base Class" definitions; components that implement these interfaces should +// contain one of these structures. + +struct upb_src { + upb_src_vtable *vtbl; + upb_status status; + bool eof; +#ifndef NDEBUG + int state; // For debug-mode checking of API usage. +#endif +}; + +struct upb_bytesrc { + upb_bytesrc_vtable *vtbl; + upb_status status; + bool eof; +}; + +INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { + s->vtbl = vtbl; + s->eof = false; +#ifndef DEBUG + // TODO: initialize debug-mode checking. +#endif +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/core/upb_string.c b/core/upb_string.c new file mode 100644 index 0000000..91ab9ae --- /dev/null +++ b/core/upb_string.c @@ -0,0 +1,47 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_string.h" + +#include + +#define UPB_STRING_UNFINALIZED -1 + +static uint32_t upb_round_up_pow2(uint32_t v) { + // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; +} + +upb_string *upb_string_new() { + upb_string *str = malloc(sizeof(*str)); + str->ptr = NULL; + str->size = 0; + str->len = UPB_STRING_UNFINALIZED; + upb_atomic_refcount_init(&str->refcount, 1); + return str; +} + +void _upb_string_free(upb_string *str) { + if(str->ptr) free(str->ptr); + free(str); +} + +char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { + assert(str->len == UPB_STRING_UNFINALIZED); + if (str->size < len) { + str->size = upb_round_up_pow2(len); + str->ptr = realloc(str->ptr, str->size); + } + str->len = len; + return str->ptr; +} diff --git a/core/upb_string.h b/core/upb_string.h new file mode 100644 index 0000000..770dba7 --- /dev/null +++ b/core/upb_string.h @@ -0,0 +1,194 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + * This file defines a simple string type. The overriding goal of upb_string + * is to avoid memcpy(), malloc(), and free() wheverever possible, while + * keeping both CPU and memory overhead low. Throughout upb there are + * situations where one wants to reference all or part of another string + * without copying. upb_string provides APIs for doing this. + * + * Characteristics of upb_string: + * - strings are reference-counted. + * - strings are logically immutable. + * - if a string has no other referents, it can be "recycled" into a new string + * without having to reallocate the upb_string. + * - strings can be substrings of other strings (owning a ref on the source + * string). + * - strings can refer to memory that they do not own, in which case we avoid + * copies if possible (the exact strategy for doing this can vary). + * - strings are not thread-safe by default, but can be made so by calling a + * function. This is not the default because it causes extra CPU overhead. + */ + +#ifndef UPB_STRING_H +#define UPB_STRING_H + +#include +#include +#include "upb_atomic.h" +#include "upb.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// All members of this struct are private, and may only be read/written through +// the associated functions. Also, strings may *only* be allocated on the heap. +struct _upb_string { + char *ptr; + int32_t len; + uint32_t size; + upb_atomic_refcount_t refcount; + union { + // Used if this is a slice of another string. + struct _upb_string *src; + // Used if this string is referencing external unowned memory. + upb_atomic_refcount_t reader_count; + } extra; +}; + +// Returns a newly-created, empty, non-finalized string. When the string is no +// longer needed, it should be unref'd, never freed directly. +upb_string *upb_string_new(); + +void _upb_string_free(upb_string *str); + +// Releases a ref on the given string, which may free the memory. "str" +// can be NULL, in which case this is a no-op. +INLINE void upb_string_unref(upb_string *str) { + if (str && upb_atomic_unref(&str->refcount)) _upb_string_free(str); +} + +// Returns a string with the same contents as "str". The caller owns a ref on +// the returned string, which may or may not be the same object as "str. +INLINE upb_string *upb_string_getref(upb_string *str) { + // If/when we support stack-allocated strings, this will have to allocate + // a new string if the given string is on the stack. + upb_atomic_ref(&str->refcount); + return str; +} + +// Returns the length of the string. +INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } + +// Use to read the bytes of the string. The caller *must* call +// upb_string_endread() after the data has been read. The window between +// upb_string_getrobuf() and upb_string_endread() should be kept as short as +// possible, because any pending upb_string_detach() may be blocked until +// upb_string_endread is called(). No other functions may be called on the +// string during this window except upb_string_len(). +INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } +INLINE void upb_string_endread(upb_string *str) { (void)str; } + +// Attempts to recycle the string "str" so it may be reused and have different +// data written to it. The returned string is either "str" if it could be +// recycled or a newly created string if "str" has other references. +// +// As a special case, passing NULL will allocate a new string. This is +// convenient for the pattern: +// +// upb_string *str = NULL; +// while (x) { +// if (y) { +// str = upb_string_tryrecycle(str); +// upb_src_getstr(str); +// } +// } +upb_string *upb_string_tryrecycle(upb_string *str); + +// The three options for setting the contents of a string. These may only be +// called when a string is first created or recycled; once other functions have +// been called on the string, these functions are not allowed until the string +// is recycled. + +// Gets a pointer suitable for writing to the string, which is guaranteed to +// have at least "len" bytes of data available. The size of the string will +// become "len". +char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); + +// Sets the contents of "str" to be the given substring of "target_str", to +// which the caller must own a ref. +void upb_string_substr(upb_string *str, upb_string *target_str, + upb_strlen_t start, upb_strlen_t len); + +// Makes the string "str" a reference to the given string data. The caller +// guarantees that the given string data will not change or be deleted until +// a matching call to upb_string_detach(). +void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); +void upb_string_detach(upb_string *str); + +// Allows using upb_strings in printf, ie: +// upb_strptr str = UPB_STRLIT("Hello, World!\n"); +// printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */ +#define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str) +#define UPB_STRFMT "%.*s" + +/* upb_string library functions ***********************************************/ + +// Named like their counterparts, these are all safe against buffer +// overflow. These only use the public upb_string interface. + +// More efficient than upb_strcmp if all you need is to test equality. +INLINE bool upb_streql(upb_string *s1, upb_string *s2) { + upb_strlen_t len = upb_string_len(s1); + if(len != upb_string_len(s2)) { + return false; + } else { + bool ret = + memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0; + upb_string_endread(s1); + upb_string_endread(s2); + return ret; + } +} + +// Like strcmp(). +int upb_strcmp(upb_string *s1, upb_string *s2); + +// Like upb_strcpy, but copies from a buffer and length. +INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) { + memcpy(upb_string_getrwbuf(dest, len), src, len); +} + +// Replaces the contents of "dest" with the contents of "src". +INLINE void upb_strcpy(upb_string *dest, upb_string *src) { + upb_strcpylen(dest, upb_string_getrobuf(src), upb_string_len(src)); + upb_string_endread(src); +} + +// Like upb_strcpy, but copies from a NULL-terminated string. +INLINE void upb_strcpyc(upb_string *dest, const char *src) { + // This does two passes over src, but that is necessary unless we want to + // repeatedly re-allocate dst, which seems worse. + upb_strcpylen(dest, src, strlen(src)); +} + +// Returns a new string whose contents are a copy of s. +upb_string *upb_strdup(upb_string *s); + +// Like upb_strdup(), but duplicates a given buffer and length. +INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) { + upb_string *s = upb_string_new(); + upb_strcpylen(s, src, len); + return s; +} + +// Like upb_strdup(), but duplicates a C NULL-terminated string. +upb_string *upb_strdupc(const char *src); + +// Appends 'append' to 's' in-place, resizing s if necessary. +void upb_strcat(upb_string *s, upb_string *append); + +// Returns a new string that is a substring of the given string. +upb_string *upb_strslice(upb_string *s, int offset, int len); + +// Reads an entire file into a newly-allocated string. +upb_string *upb_strreadfile(const char *filename); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/core/upb_table.c b/core/upb_table.c new file mode 100644 index 0000000..b91776c --- /dev/null +++ b/core/upb_table.c @@ -0,0 +1,411 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_table.h" +#include "upb_string.h" + +#include +#include +#include + +static const upb_inttable_key_t EMPTYENT = 0; +static const double MAX_LOAD = 0.85; + +static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); + +/* We use 1-based indexes into the table so that 0 can be "NULL". */ +static upb_inttable_entry *intent(upb_inttable *t, int32_t i) { + return UPB_INDEX(t->t.entries, i-1, t->t.entry_size); +} +static upb_strtable_entry *strent(upb_strtable *t, int32_t i) { + return UPB_INDEX(t->t.entries, i-1, t->t.entry_size); +} + +void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) +{ + t->count = 0; + t->entry_size = entry_size; + t->size_lg2 = 1; + while(size >>= 1) t->size_lg2++; + size_t bytes = upb_table_size(t) * t->entry_size; + t->mask = upb_table_size(t) - 1; + t->entries = malloc(bytes); + memset(t->entries, 0, bytes); /* Both tables consider 0's an empty entry. */ +} + +void upb_inttable_init(upb_inttable *t, uint32_t size, uint16_t entsize) +{ + upb_table_init(&t->t, size, entsize); +} + +void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t entsize) +{ + upb_table_init(&t->t, size, entsize); +} + +void upb_table_free(upb_table *t) { free(t->entries); } +void upb_inttable_free(upb_inttable *t) { upb_table_free(&t->t); } +void upb_strtable_free(upb_strtable *t) { + // Free refs from the strtable. + upb_strtable_entry *e = upb_strtable_begin(t); + for(; e; e = upb_strtable_next(t, e)) { + upb_string_unref(e->key); + } + upb_table_free(&t->t); +} + +static uint32_t strtable_bucket(upb_strtable *t, upb_string *key) +{ + uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_string_len(key), 0); + return (hash & (upb_strtable_size(t)-1)) + 1; +} + +void *upb_strtable_lookup(upb_strtable *t, upb_string *key) +{ + uint32_t bucket = strtable_bucket(t, key); + upb_strtable_entry *e; + do { + e = strent(t, bucket); + if(e->key && upb_streql(e->key, key)) return e; + } while((bucket = e->next) != UPB_END_OF_CHAIN); + return NULL; +} + +static uint32_t empty_intbucket(upb_inttable *table) +{ + /* TODO: does it matter that this is biased towards the front of the table? */ + for(uint32_t i = 1; i <= upb_inttable_size(table); i++) { + upb_inttable_entry *e = intent(table, i); + if(e->key == EMPTYENT) return i; + } + assert(false); + return 0; +} + +/* The insert routines have a lot more code duplication between int/string + * variants than I would like, but there's just a bit too much that varies to + * parameterize them. */ +static void intinsert(upb_inttable *t, upb_inttable_entry *e) +{ + assert(upb_inttable_lookup(t, e->key) == NULL); + t->t.count++; + uint32_t bucket = upb_inttable_bucket(t, e->key); + upb_inttable_entry *table_e = intent(t, bucket); + if(table_e->key != EMPTYENT) { /* Collision. */ + if(bucket == upb_inttable_bucket(t, table_e->key)) { + /* Existing element is in its main posisiton. Find an empty slot to + * place our new element and append it to this key's chain. */ + uint32_t empty_bucket = empty_intbucket(t); + while (table_e->next != UPB_END_OF_CHAIN) + table_e = intent(t, table_e->next); + table_e->next = empty_bucket; + table_e = intent(t, empty_bucket); + } else { + /* Existing element is not in its main position. Move it to an empty + * slot and put our element in its main position. */ + uint32_t empty_bucket = empty_intbucket(t); + uint32_t evictee_bucket = upb_inttable_bucket(t, table_e->key); + memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ + upb_inttable_entry *evictee_e = intent(t, evictee_bucket); + while(1) { + assert(evictee_e->key != UPB_EMPTY_ENTRY); + assert(evictee_e->next != UPB_END_OF_CHAIN); + if(evictee_e->next == bucket) { + evictee_e->next = empty_bucket; + break; + } + evictee_e = intent(t, evictee_e->next); + } + /* table_e remains set to our mainpos. */ + } + } + memcpy(table_e, e, t->t.entry_size); + table_e->next = UPB_END_OF_CHAIN; + assert(upb_inttable_lookup(t, e->key) == table_e); +} + +void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e) +{ + assert(e->key != 0); + if((double)(t->t.count + 1) / upb_inttable_size(t) > MAX_LOAD) { + /* Need to resize. New table of double the size, add old elements to it. */ + upb_inttable new_table; + upb_inttable_init(&new_table, upb_inttable_size(t)*2, t->t.entry_size); + new_table.t.count = t->t.count; + upb_inttable_entry *old_e; + for(old_e = upb_inttable_begin(t); old_e; old_e = upb_inttable_next(t, old_e)) + intinsert(&new_table, old_e); + upb_inttable_free(t); + *t = new_table; + } + intinsert(t, e); +} + +static uint32_t empty_strbucket(upb_strtable *table) +{ + /* TODO: does it matter that this is biased towards the front of the table? */ + for(uint32_t i = 1; i <= upb_strtable_size(table); i++) { + upb_strtable_entry *e = strent(table, i); + if(!e->key) return i; + } + assert(false); + return 0; +} + +static void strinsert(upb_strtable *t, upb_strtable_entry *e) +{ + assert(upb_strtable_lookup(t, e->key) == NULL); + e->key = upb_string_getref(e->key); + t->t.count++; + uint32_t bucket = strtable_bucket(t, e->key); + upb_strtable_entry *table_e = strent(t, bucket); + if(table_e->key) { /* Collision. */ + if(bucket == strtable_bucket(t, table_e->key)) { + /* Existing element is in its main posisiton. Find an empty slot to + * place our new element and append it to this key's chain. */ + uint32_t empty_bucket = empty_strbucket(t); + while (table_e->next != UPB_END_OF_CHAIN) + table_e = strent(t, table_e->next); + table_e->next = empty_bucket; + table_e = strent(t, empty_bucket); + } else { + /* Existing element is not in its main position. Move it to an empty + * slot and put our element in its main position. */ + uint32_t empty_bucket = empty_strbucket(t); + uint32_t evictee_bucket = strtable_bucket(t, table_e->key); + memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ + upb_strtable_entry *evictee_e = strent(t, evictee_bucket); + while(1) { + assert(!upb_string_isnull(evictee_e->key)); + assert(evictee_e->next != UPB_END_OF_CHAIN); + if(evictee_e->next == bucket) { + evictee_e->next = empty_bucket; + break; + } + evictee_e = strent(t, evictee_e->next); + } + /* table_e remains set to our mainpos. */ + } + } + memcpy(table_e, e, t->t.entry_size); + table_e->next = UPB_END_OF_CHAIN; + assert(upb_strtable_lookup(t, e->key) == table_e); +} + +void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e) +{ + if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) { + /* Need to resize. New table of double the size, add old elements to it. */ + upb_strtable new_table; + upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.entry_size); + upb_strtable_entry *old_e; + for(old_e = upb_strtable_begin(t); old_e; old_e = upb_strtable_next(t, old_e)) + strinsert(&new_table, old_e); + upb_strtable_free(t); + *t = new_table; + } + strinsert(t, e); +} + +void *upb_inttable_begin(upb_inttable *t) { + return upb_inttable_next(t, intent(t, 0)); +} + +void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur) { + upb_inttable_entry *end = intent(t, upb_inttable_size(t)+1); + do { + cur = (void*)((char*)cur + t->t.entry_size); + if(cur == end) return NULL; + } while(cur->key == UPB_EMPTY_ENTRY); + return cur; +} + +void *upb_strtable_begin(upb_strtable *t) { + return upb_strtable_next(t, strent(t, 0)); +} + +void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur) { + upb_strtable_entry *end = strent(t, upb_strtable_size(t)+1); + do { + cur = (void*)((char*)cur + t->t.entry_size); + if(cur == end) return NULL; + } while(cur->key == NULL); + return cur; +} + +#ifdef UPB_UNALIGNED_READS_OK +//----------------------------------------------------------------------------- +// MurmurHash2, by Austin Appleby (released as public domain). +// Reformatted and C99-ified by Joshua Haberman. +// Note - This code makes a few assumptions about how your machine behaves - +// 1. We can read a 4-byte value from any address without crashing +// 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t +// And it has a few limitations - +// 1. It will not work incrementally. +// 2. It will not produce the same results on little-endian and big-endian +// machines. +static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) +{ + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + const uint32_t m = 0x5bd1e995; + const int32_t r = 24; + + // Initialize the hash to a 'random' value + uint32_t h = seed ^ len; + + // Mix 4 bytes at a time into the hash + const uint8_t * data = (const uint8_t *)key; + while(len >= 4) { + uint32_t k = *(uint32_t *)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + switch(len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; h *= m; + }; + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +#else // !UPB_UNALIGNED_READS_OK + +//----------------------------------------------------------------------------- +// MurmurHashAligned2, by Austin Appleby +// Same algorithm as MurmurHash2, but only does aligned reads - should be safer +// on certain platforms. +// Performance will be lower than MurmurHash2 + +#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } + +static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) +{ + const uint32_t m = 0x5bd1e995; + const int32_t r = 24; + const uint8_t * data = (const uint8_t *)key; + uint32_t h = seed ^ len; + uint8_t align = (uintptr_t)data & 3; + + if(align && (len >= 4)) { + // Pre-load the temp registers + uint32_t t = 0, d = 0; + + switch(align) { + case 1: t |= data[2] << 16; + case 2: t |= data[1] << 8; + case 3: t |= data[0]; + } + + t <<= (8 * align); + + data += 4-align; + len -= 4-align; + + int32_t sl = 8 * (4-align); + int32_t sr = 8 * align; + + // Mix + + while(len >= 4) { + d = *(uint32_t *)data; + t = (t >> sr) | (d << sl); + + uint32_t k = t; + + MIX(h,k,m); + + t = d; + + data += 4; + len -= 4; + } + + // Handle leftover data in temp registers + + d = 0; + + if(len >= align) { + switch(align) { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + } + + uint32_t k = (t >> sr) | (d << sl); + MIX(h,k,m); + + data += align; + len -= align; + + //---------- + // Handle tail bytes + + switch(len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; h *= m; + }; + } else { + switch(len) { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + case 0: h ^= (t >> sr) | (d << sl); h *= m; + } + } + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } else { + while(len >= 4) { + uint32_t k = *(uint32_t *)data; + + MIX(h,k,m); + + data += 4; + len -= 4; + } + + //---------- + // Handle tail bytes + + switch(len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } +} +#undef MIX + +#endif // UPB_UNALIGNED_READS_OK diff --git a/core/upb_table.h b/core/upb_table.h new file mode 100644 index 0000000..20dae92 --- /dev/null +++ b/core/upb_table.h @@ -0,0 +1,133 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * This file defines very fast int->struct (inttable) and string->struct + * (strtable) hash tables. The struct can be of any size, and it is stored + * in the table itself, for cache-friendly performance. + * + * The table uses internal chaining with Brent's variation (inspired by the + * Lua implementation of hash tables). The hash function for strings is + * Austin Appleby's "MurmurHash." + */ + +#ifndef UPB_TABLE_H_ +#define UPB_TABLE_H_ + +#include +#include "upb.h" +#include "upb_string.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Note: the key cannot be zero! Zero is used by the implementation. */ +typedef uint32_t upb_inttable_key_t; + +#define UPB_END_OF_CHAIN (uint32_t)0 +#define UPB_EMPTY_ENTRY (uint32_t)0 + +typedef struct { + upb_inttable_key_t key; + uint32_t next; /* Internal chaining. */ +} upb_inttable_entry; + +// TODO: consider storing the hash in the entry. This would avoid the need to +// rehash on table resizes, but more importantly could possibly improve lookup +// performance by letting us compare hashes before comparing lengths or the +// strings themselves. +typedef struct { + upb_string *key; // We own a ref. + uint32_t next; // Internal chaining. +} upb_strtable_entry; + +typedef struct { + void *entries; + uint32_t count; /* How many elements are currently in the table? */ + uint16_t entry_size; /* How big is each entry? */ + uint8_t size_lg2; /* The table is 2^size_lg2 in size. */ + uint32_t mask; +} upb_table; + +typedef struct { + upb_table t; +} upb_strtable; + +typedef struct { + upb_table t; +} upb_inttable; + +/* Initialize and free a table, respectively. Specify the initial size + * with 'size' (the size will be increased as necessary). Entry size + * specifies how many bytes each entry in the table is. */ +void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t entry_size); +void upb_inttable_free(upb_inttable *table); +void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t entry_size); +void upb_strtable_free(upb_strtable *table); + +INLINE uint32_t upb_table_size(upb_table *t) { return 1 << t->size_lg2; } +INLINE uint32_t upb_inttable_size(upb_inttable *t) { + return upb_table_size(&t->t); +} +INLINE uint32_t upb_strtable_size(upb_strtable *t) { + return upb_table_size(&t->t); +} + +INLINE uint32_t upb_table_count(upb_table *t) { return t->count; } +INLINE uint32_t upb_inttable_count(upb_inttable *t) { + return upb_table_count(&t->t); +} +INLINE uint32_t upb_strtable_count(upb_strtable *t) { + return upb_table_count(&t->t); +} + +/* Inserts the given key into the hashtable with the given value. The key must + * not already exist in the hash table. The data will be copied from e into + * the hashtable (the amount of data copied comes from entry_size when the + * table was constructed). Therefore the data at val may be freed once the + * call returns. */ +void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e); +void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e); + +INLINE uint32_t upb_inttable_bucket(upb_inttable *t, upb_inttable_key_t k) { + return (k & t->t.mask) + 1; /* Identity hash for ints. */ +} + +/* Looks up key in this table. Inlined because this is in the critical path of + * decoding. We have the caller specify the entry_size because fixing this as + * a literal (instead of reading table->entry_size) gives the compiler more + * ability to optimize. */ +INLINE void *upb_inttable_fastlookup(upb_inttable *t, uint32_t key, + uint32_t entry_size) { + assert(key != 0); + uint32_t bucket = upb_inttable_bucket(t, key); + upb_inttable_entry *e; + do { + e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket-1, entry_size); + if(e->key == key) return e; + } while((bucket = e->next) != UPB_END_OF_CHAIN); + return NULL; /* Not found. */ +} + +INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) { + return upb_inttable_fastlookup(t, key, t->t.entry_size); +} + +void *upb_strtable_lookup(upb_strtable *t, upb_string *key); + +/* Provides iteration over the table. The order in which the entries are + * returned is undefined. Insertions invalidate iterators. The _next + * functions return NULL when the end has been reached. */ +void *upb_inttable_begin(upb_inttable *t); +void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur); + +void *upb_strtable_begin(upb_strtable *t); +void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_TABLE_H_ */ diff --git a/gen-deps.sh b/gen-deps.sh index 6c0ced3..2bc82f8 100755 --- a/gen-deps.sh +++ b/gen-deps.sh @@ -14,5 +14,5 @@ set -e rm -f deps for file in $@; do - gcc -MM $file -MT ${file%.*}.o -DUPB_THREAD_UNSAFE -Idescriptor -Isrc -I. >> deps + gcc -MM $file -MT ${file%.*}.o -DUPB_THREAD_UNSAFE -Idescriptor -Icore -Istream -I. >> deps done diff --git a/src/upb.c b/src/upb.c deleted file mode 100644 index a98512d..0000000 --- a/src/upb.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - */ - -#include -#include -#include - -#include "upb.h" -#include "upb_string.h" - -#define alignof(t) offsetof(struct { char c; t x; }, x) -#define TYPE_INFO(wire_type, ctype, allows_delimited) \ - {alignof(ctype), sizeof(ctype), wire_type, \ - (1 << wire_type) | (allows_delimited << UPB_WIRE_TYPE_DELIMITED), \ - #ctype}, - -upb_type_info upb_types[] = { - {0, 0, 0, 0, ""}, // There is no type 0. - TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, 1) // DOUBLE - TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, 1) // FLOAT - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1) // INT64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, 1) // UINT64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1) // INT32 - TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, 1) // FIXED64 - TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, 1) // FIXED32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, 1) // BOOL - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1) // STRING - TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, 0) // GROUP - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1) // MESSAGE - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1) // BYTES - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1) // UINT32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1) // ENUM - TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, 1) // SFIXED32 - TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, 1) // SFIXED64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1) // SINT32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1) // SINT64 -}; - -void upb_seterr(upb_status *status, enum upb_status_code code, - const char *msg, ...) -{ - if(upb_ok(status)) { // The first error is the most interesting. - status->str = upb_string_new(); - char *str = upb_string_getrwbuf(status->str, UPB_ERRORMSG_MAXLEN); - status->code = code; - va_list args; - va_start(args, msg); - vsnprintf(str, UPB_ERRORMSG_MAXLEN, msg, args); - va_end(args); - } -} - -void upb_copyerr(upb_status *to, upb_status *from) -{ - to->code = from->code; - to->str = upb_string_getref(from->str); -} - -void upb_reset(upb_status *status) { - status->code = UPB_STATUS_OK; - upb_string_unref(status->str); - status->str = NULL; -} diff --git a/src/upb.h b/src/upb.h deleted file mode 100644 index 230e638..0000000 --- a/src/upb.h +++ /dev/null @@ -1,207 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * This file contains shared definitions that are widely used across upb. - */ - -#ifndef UPB_H_ -#define UPB_H_ - -#include -#include -#include // only for size_t. -#include "descriptor_const.h" -#include "upb_atomic.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// inline if possible, emit standalone code if required. -#ifndef INLINE -#define INLINE static inline -#endif - -#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) -#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) -#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m))) - -// The maximum that any submessages can be nested. Matches proto2's limit. -#define UPB_MAX_NESTING 64 - -// The maximum number of fields that any one .proto type can have. Note that -// this is very different than the max field number. It is hard to imagine a -// scenario where more than 32k fields makes sense. -#define UPB_MAX_FIELDS (1<<15) -typedef int16_t upb_field_count_t; - -// Nested type names are separated by periods. -#define UPB_SYMBOL_SEPARATOR '.' - -// This limit is for the longest fully-qualified symbol, eg. foo.bar.MsgType -#define UPB_SYMBOL_MAXLEN 128 - -// The longest chain that mutually-recursive types are allowed to form. For -// example, this is a type cycle of length 2: -// message A { -// B b = 1; -// } -// message B { -// A a = 1; -// } -#define UPB_MAX_TYPE_CYCLE_LEN 16 - -// The maximum depth that the type graph can have. Note that this setting does -// not automatically constrain UPB_MAX_NESTING, because type cycles allow for -// unlimited nesting if we do not limit it. -#define UPB_MAX_TYPE_DEPTH 64 - -// The biggest possible single value is a 10-byte varint. -#define UPB_MAX_ENCODED_SIZE 10 - - -/* Fundamental types and type constants. **************************************/ - -// A list of types as they are encoded on-the-wire. -enum upb_wire_type { - UPB_WIRE_TYPE_VARINT = 0, - UPB_WIRE_TYPE_64BIT = 1, - UPB_WIRE_TYPE_DELIMITED = 2, - UPB_WIRE_TYPE_START_GROUP = 3, - UPB_WIRE_TYPE_END_GROUP = 4, - UPB_WIRE_TYPE_32BIT = 5, - - // This isn't a real wire type, but we use this constant to describe varints - // that are expected to be a maximum of 32 bits. - UPB_WIRE_TYPE_32BIT_VARINT = 8 -}; - -typedef uint8_t upb_wire_type_t; - -// Value type as defined in a .proto file. eg. string, int32, etc. The -// integers that represent this are defined by descriptor.proto. Note that -// descriptor.proto reserves "0" for errors, and we use it to represent -// exceptional circumstances. -typedef uint8_t upb_field_type_t; - -// For referencing the type constants tersely. -#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type -#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type - -INLINE bool upb_issubmsgtype(upb_field_type_t type) { - return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); -} - -INLINE bool upb_isstringtype(upb_field_type_t type) { - return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); -} - -// Info for a given field type. -typedef struct { - uint8_t align; - uint8_t size; - upb_wire_type_t native_wire_type; - uint8_t allowed_wire_types; // For packable fields, also allows delimited. - char *ctype; -} upb_type_info; - -// A static array of info about all of the field types, indexed by type number. -extern upb_type_info upb_types[]; - -// The number of a field, eg. "optional string foo = 3". -typedef int32_t upb_field_number_t; - -// Label (optional, repeated, required) as defined in a .proto file. The -// values of this are defined by google.protobuf.FieldDescriptorProto.Label -// (from descriptor.proto). -typedef uint8_t upb_label_t; - -// A scalar (non-string) wire value. Used only for parsing unknown fields. -typedef union { - uint64_t varint; - uint64_t _64bit; - uint32_t _32bit; -} upb_wire_value; - -/* Polymorphic values of .proto types *****************************************/ - -struct _upb_string; -typedef struct _upb_string upb_string; - -typedef uint32_t upb_strlen_t; - -// A single .proto value. The owner must have an out-of-band way of knowing -// the type, so that it knows which union member to use. -typedef union { - double _double; - float _float; - int32_t int32; - int64_t int64; - uint32_t uint32; - uint64_t uint64; - bool _bool; -} upb_value; - -// A pointer to a .proto value. The owner must have an out-of-band way of -// knowing the type, so it knows which union member to use. -typedef union { - double *_double; - float *_float; - int32_t *int32; - int64_t *int64; - uint8_t *uint8; - uint32_t *uint32; - uint64_t *uint64; - bool *_bool; -} upb_valueptr; - -INLINE upb_valueptr upb_value_addrof(upb_value *val) { - upb_valueptr ptr = {&val->_double}; - return ptr; -} - -// Status codes used as a return value. Codes >0 are not fatal and can be -// resumed. -enum upb_status_code { - UPB_STATUS_OK = 0, - - // A read or write from a streaming src/sink could not be completed right now. - UPB_STATUS_TRYAGAIN = 1, - - // A value had an incorrect wire type and will be skipped. - UPB_STATUS_BADWIRETYPE = 2, - - // An unrecoverable error occurred. - UPB_STATUS_ERROR = -1, - - // A varint went for 10 bytes without terminating. - UPB_ERROR_UNTERMINATED_VARINT = -2, - - // The max nesting level (UPB_MAX_NESTING) was exceeded. - UPB_ERROR_MAX_NESTING_EXCEEDED = -3 -}; - -typedef struct { - enum upb_status_code code; - upb_string *str; -} upb_status; - -#define UPB_STATUS_INIT {UPB_STATUS_OK, NULL} -#define UPB_ERRORMSG_MAXLEN 256 - -INLINE bool upb_ok(upb_status *status) { - return status->code == UPB_STATUS_OK; -} - -void upb_reset(upb_status *status); -void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, - ...); -void upb_copyerr(upb_status *to, upb_status *from); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_H_ */ diff --git a/src/upb_atomic.h b/src/upb_atomic.h deleted file mode 100644 index 01fc8a2..0000000 --- a/src/upb_atomic.h +++ /dev/null @@ -1,185 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * Only a very small part of upb is thread-safe. Notably, individual - * messages, arrays, and strings are *not* thread safe for mutating. - * However, we do make message *metadata* such as upb_msgdef and - * upb_context thread-safe, and their ownership is tracked via atomic - * refcounting. This header implements the small number of atomic - * primitives required to support this. The primitives we implement - * are: - * - * - a reader/writer lock (wrappers around platform-provided mutexes). - * - an atomic refcount. - */ - -#ifndef UPB_ATOMIC_H_ -#define UPB_ATOMIC_H_ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* inline if possible, emit standalone code if required. */ -#ifndef INLINE -#define INLINE static inline -#endif - -#ifdef UPB_THREAD_UNSAFE - -/* Non-thread-safe implementations. ******************************************/ - -typedef struct { - int v; -} upb_atomic_refcount_t; - -INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { - a->v = val; -} - -INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { - return a->v++ == 0; -} - -INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { - return --a->v == 0; -} - -INLINE int upb_atomic_read(upb_atomic_refcount_t *a) { - return a->v; -} - -INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int val) { - a->v += val; - return a->v == 0; -} - -INLINE int upb_atomic_fetch_and_add(upb_atomic_refcount_t *a, int val) { - int ret = a->v; - a->v += val; - return ret; -} - -#endif - -/* Atomic refcount ************************************************************/ - -#ifdef UPB_THREAD_UNSAFE - -/* Already defined above. */ - -#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 - -/* GCC includes atomic primitives. */ - -typedef struct { - volatile int v; -} upb_atomic_refcount_t; - -INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { - a->v = val; - __sync_synchronize(); /* Ensure the initialized value is visible. */ -} - -INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { - return __sync_fetch_and_add(&a->v, 1) == 0; -} - -INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int n) { - return __sync_add_and_fetch(&a->v, n) == 0; -} - -INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { - return __sync_sub_and_fetch(&a->v, 1) == 0; -} - -INLINE bool upb_atomic_read(upb_atomic_refcount_t *a) { - return __sync_fetch_and_add(&a->v, 0); -} - -#elif defined(WIN32) - -/* Windows defines atomic increment/decrement. */ -#include - -typedef struct { - volatile LONG val; -} upb_atomic_refcount_t; - -INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { - InterlockedExchange(&a->val, val); -} - -INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { - return InterlockedIncrement(&a->val) == 1; -} - -INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { - return InterlockedDecrement(&a->val) == 0; -} - -#else -#error Atomic primitives not defined for your platform/CPU. \ - Implement them or compile with UPB_THREAD_UNSAFE. -#endif - -/* Reader/Writer lock. ********************************************************/ - -#ifdef UPB_THREAD_UNSAFE - -typedef struct { -} upb_rwlock_t; - -INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; } - -#elif defined(UPB_USE_PTHREADS) - -#include - -typedef struct { - pthread_rwlock_t lock; -} upb_rwlock_t; - -INLINE void upb_rwlock_init(upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_init(&l->lock, NULL); -} - -INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_destroy(&l->lock); -} - -INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_rdlock(&l->lock); -} - -INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_wrlock(&l->lock); -} - -INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_unlock(&l->lock); -} - -#else -#error Reader/writer lock is not defined for your platform/CPU. \ - Implement it or compile with UPB_THREAD_UNSAFE. -#endif - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_ATOMIC_H_ */ diff --git a/src/upb_byteio.h b/src/upb_byteio.h deleted file mode 100644 index 69a28b3..0000000 --- a/src/upb_byteio.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * This file contains upb_bytesrc and upb_bytesink implementations for common - * interfaces like strings, UNIX fds, and FILE*. - * - * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_BYTEIO_H -#define UPB_BYTEIO_H - -#include "upb_srcsink.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_stringsrc **************************************************************/ - -struct upb_stringsrc; -typedef struct upb_stringsrc upb_stringsrc; - -// Create/free a stringsrc. -upb_stringsrc *upb_stringsrc_new(); -void upb_stringsrc_free(upb_stringsrc *s); - -// Resets the stringsrc to a state where it will vend the given string. The -// stringsrc will take a reference on the string, so the caller need not ensure -// that it outlives the stringsrc. A stringsrc can be reset multiple times. -void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); - -// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. -upb_bytesrc *upb_stringsrc_bytesrc(); - - -/* upb_fdsrc ******************************************************************/ - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_decoder.c b/src/upb_decoder.c deleted file mode 100644 index e3fdc49..0000000 --- a/src/upb_decoder.c +++ /dev/null @@ -1,577 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_decoder.h" - -#include -#include -#include - -#define UPB_GROUP_END_OFFSET UINT32_MAX - -// Returns true if the give wire type and field type combination is valid, -// taking into account both packed and non-packed encodings. -static bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { - return (1 << wt) & upb_types[ft].allowed_wire_types; -} - -// Performs zig-zag decoding, which is used by sint32 and sint64. -static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } -static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } - - -/* upb_decoder ****************************************************************/ - -// The decoder keeps a stack with one entry per level of recursion. -// upb_decoder_frame is one frame of that stack. -typedef struct { - upb_msgdef *msgdef; - upb_fielddef *field; - upb_strlen_t end_offset; // For groups, -1. -} upb_decoder_frame; - -struct upb_decoder { - upb_src src; // upb_decoder is a upb_src. - - upb_msgdef *toplevel_msgdef; - upb_bytesrc *bytesrc; - - // The buffer of input data. NULL is equivalent to the empty string. - upb_string *buf; - - // Holds residual bytes when fewer than UPB_MAX_ENCODED_SIZE bytes remain. - uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; - - // The number of bytes we have yet to consume from "buf" or tmpbuf. This is - // always >= 0 unless we were just reset or are eof. - int32_t buf_bytesleft; - - // The offset within "buf" from where we are currently reading. This can be - // <0 if we are reading some residual bytes from the previous buffer, which - // are stored in tmpbuf and combined with bytes from "buf". - int32_t buf_offset; - - // The overall stream offset of the beginning of "buf". - uint32_t buf_stream_offset; - - // Fielddef for the key we just read. - upb_fielddef *field; - - // Wire type of the key we just read. - upb_wire_type_t wire_type; - - // Delimited length of the string field we are reading. - upb_strlen_t delimited_len; - - upb_strlen_t packed_end_offset; - - // We keep a stack of messages we have recursed into. - upb_decoder_frame *top, *limit, stack[UPB_MAX_NESTING]; -}; - - -/* upb_decoder buffering. *****************************************************/ - -static upb_strlen_t upb_decoder_offset(upb_decoder *d) -{ - return d->buf_stream_offset - d->buf_offset; -} - -static bool upb_decoder_nextbuf(upb_decoder *d) -{ - assert(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE); - - // Copy residual bytes to temporary buffer. - if(d->buf_bytesleft > 0) { - memcpy(d->tmpbuf, upb_string_getrobuf(d->buf) + d->buf_offset, - d->buf_bytesleft); - } - - // Recycle old buffer. - if(d->buf) { - d->buf = upb_string_tryrecycle(d->buf); - d->buf_offset -= upb_string_len(d->buf); - d->buf_stream_offset += upb_string_len(d->buf); - } - - // Pull next buffer. - if(upb_bytesrc_get(d->bytesrc, d->buf, UPB_MAX_ENCODED_SIZE)) { - d->buf_bytesleft += upb_string_len(d->buf); - return true; - } else { - // Error or EOF. - if(!upb_bytesrc_eof(d->bytesrc)) { - // Error from bytesrc. - upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); - return false; - } else if(d->buf_bytesleft == 0) { - // EOF from bytesrc and we don't have any residual bytes left. - d->src.eof = true; - return false; - } else { - // No more data left from the bytesrc, but we still have residual bytes. - return true; - } - } -} - -static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) -{ - if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE) { - // GCC is currently complaining about use of an uninitialized value if we - // don't set this now. I think this is incorrect, but leaving this in - // to suppress the warning for now. - *bytes = 0; - if(!upb_decoder_nextbuf(d)) return NULL; - } - - assert(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE); - - if(d->buf_offset >= 0) { - // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE - // contiguous bytes, so we can read directly out of it. - *bytes = d->buf_bytesleft; - return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset; - } else { - // We need to accumulate UPB_MAX_ENCODED_SIZE bytes; len is how many we - // have so far. - upb_strlen_t len = -d->buf_offset; - if(d->buf) { - upb_strlen_t to_copy = - UPB_MIN(UPB_MAX_ENCODED_SIZE - len, upb_string_len(d->buf)); - memcpy(d->tmpbuf + len, upb_string_getrobuf(d->buf), to_copy); - len += to_copy; - } - // Pad the buffer out to UPB_MAX_ENCODED_SIZE. - memset(d->tmpbuf + len, 0x80, UPB_MAX_ENCODED_SIZE - len); - *bytes = len; - return d->tmpbuf; - } -} - -// Returns a pointer to a buffer of data that is at least UPB_MAX_ENCODED_SIZE -// bytes long. This buffer contains the next bytes in the stream (even if -// those bytes span multiple buffers). *bytes is set to the number of actual -// stream bytes that are available in the returned buffer. If -// *bytes < UPB_MAX_ENCODED_SIZE, the buffer is padded with 0x80 bytes. -// -// After the data has been read, upb_decoder_consume() should be called to -// indicate how many bytes were consumed. -static const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes) -{ - if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE && d->buf_offset >= 0) { - // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE - // contiguous bytes, so we can read directly out of it. - *bytes = d->buf_bytesleft; - return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset; - } else { - return upb_decoder_getbuf_full(d, bytes); - } -} - -static bool upb_decoder_consume(upb_decoder *d, uint32_t bytes) -{ - assert(bytes <= UPB_MAX_ENCODED_SIZE); - d->buf_offset += bytes; - d->buf_bytesleft -= bytes; - if(d->buf_offset < 0) { - // We still have residual bytes we have not consumed. - memmove(d->tmpbuf, d->tmpbuf + bytes, -d->buf_offset); - } - assert(d->buf_bytesleft >= 0); - return true; -} - -static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) -{ - d->buf_offset += bytes; - d->buf_bytesleft -= bytes; - while(d->buf_bytesleft < 0) { - if(!upb_decoder_nextbuf(d)) return false; - } - return true; -} - - -/* Functions to read wire values. *********************************************/ - -// Parses remining bytes of a 64-bit varint that has already had its first byte -// parsed. -INLINE bool upb_decoder_readv64(upb_decoder *d, uint32_t *low, uint32_t *high) -{ - upb_strlen_t bytes_available; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); - const uint8_t *start = buf; - if(!buf) return false; - - *high = 0; - uint32_t b; - b = *(buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 28; - *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; - - if(bytes_available >= 10) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Varint was unterminated " - "after 10 bytes, stream offset: %u", upb_decoder_offset(d)); - } else { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Stream ended in the middle " - "of a varint, stream offset: %u", upb_decoder_offset(d)); - } - return false; - -done: - return upb_decoder_consume(d, buf - start); -} - -// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit -// varint is not a true wire type. -static bool upb_decoder_readv32(upb_decoder *d, uint32_t *val) -{ - uint32_t high; - if(!upb_decoder_readv64(d, val, &high)) return false; - - // We expect the high bits to be zero, except that signed 32-bit values are - // first sign-extended to be wire-compatible with 64 bits, in which case we - // expect the high bits to be all one. - // - // We could perform a slightly more sophisticated check by having the caller - // indicate whether a signed or unsigned value is being read. We could check - // that the high bits are all zeros for unsigned, and properly sign-extended - // for signed. - if(high != 0 && ~high != 0) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Read a 32-bit varint, but " - "the high bits contained data we should not truncate: " - "%ux, stream offset: %u", high, upb_decoder_offset(d)); - return false; - } - return true; -} - -// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). Caller -// promises that 4 bytes are available at buf. -static bool upb_decoder_readf32(upb_decoder *d, uint32_t *val) -{ - upb_strlen_t bytes_available; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); - if(!buf) return false; - if(bytes_available < 4) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, - "Stream ended in the middle of a 32-bit value"); - return false; - } - memcpy(val, buf, 4); - // TODO: byte swap if big-endian. - return upb_decoder_consume(d, 4); -} - -// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). Caller -// promises that 8 bytes are available at buf. -static bool upb_decoder_readf64(upb_decoder *d, uint64_t *val) -{ - upb_strlen_t bytes_available; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); - if(!buf) return false; - if(bytes_available < 8) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, - "Stream ended in the middle of a 64-bit value"); - return false; - } - memcpy(val, buf, 8); - // TODO: byte swap if big-endian. - return upb_decoder_consume(d, 8); -} - -// Returns the length of a varint (wire type: UPB_WIRE_TYPE_VARINT), allowing -// it to be easily skipped. Caller promises that 10 bytes are available at -// "buf". The function will return a maximum of 11 bytes before quitting. -static uint8_t upb_decoder_skipv64(upb_decoder *d) -{ - uint32_t bytes_available; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); - if(!buf) return false; - uint8_t i; - for(i = 0; i < 10 && buf[i] & 0x80; i++) - ; // empty loop body. - if(i > 10) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Unterminated varint."); - return false; - } - return upb_decoder_consume(d, i); -} - - -/* upb_src implementation for upb_decoder. ************************************/ - -bool upb_decoder_skipval(upb_decoder *d); - -upb_fielddef *upb_decoder_getdef(upb_decoder *d) -{ - // Detect end-of-submessage. - if(upb_decoder_offset(d) >= d->top->end_offset) { - d->src.eof = true; - return NULL; - } - - // Handles the packed field case. - if(d->field) return d->field; - - uint32_t key = 0; -again: - if(!upb_decoder_readv32(d, &key)) return NULL; - upb_wire_type_t wire_type = key & 0x7; - int32_t field_number = key >> 3; - - if(wire_type == UPB_WIRE_TYPE_DELIMITED) { - // For delimited wire values we parse the length now, since we need it in - // all cases. - if(!upb_decoder_readv32(d, &d->delimited_len)) return NULL; - } else if(wire_type == UPB_WIRE_TYPE_END_GROUP) { - if(d->top->end_offset == UPB_GROUP_END_OFFSET) { - d->src.eof = true; - } else { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "End group seen but current " - "message is not a group, byte offset: %zd", - upb_decoder_offset(d)); - } - return NULL; - } - - // Look up field by tag number. - upb_fielddef *f = upb_msg_itof(d->top->msgdef, field_number); - - if (!f) { - // Unknown field. If/when the upb_src interface supports reporting - // unknown fields we will implement that here. - upb_decoder_skipval(d); - goto again; - } else if (!upb_check_type(wire_type, f->type)) { - // This is a recoverable error condition. We skip the value but also - // return NULL and report the error. - upb_decoder_skipval(d); - // TODO: better error message. - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Incorrect wire type.\n"); - return NULL; - } - d->field = f; - d->wire_type = wire_type; - return f; -} - -bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) -{ - switch(upb_types[d->field->type].native_wire_type) { - case UPB_WIRE_TYPE_VARINT: { - uint32_t low, high; - if(!upb_decoder_readv64(d, &low, &high)) return false; - uint64_t u64 = ((uint64_t)high << 32) | low; - if(d->field->type == UPB_TYPE(SINT64)) - *val.int64 = upb_zzdec_64(u64); - else - *val.uint64 = u64; - break; - } - case UPB_WIRE_TYPE_32BIT_VARINT: { - uint32_t u32; - if(!upb_decoder_readv32(d, &u32)) return false; - if(d->field->type == UPB_TYPE(SINT32)) - *val.int32 = upb_zzdec_32(u32); - else - *val.uint32 = u32; - break; - } - case UPB_WIRE_TYPE_64BIT: - if(!upb_decoder_readf64(d, val.uint64)) return false; - break; - case UPB_WIRE_TYPE_32BIT: - if(!upb_decoder_readf32(d, val.uint32)) return false; - break; - default: - upb_seterr(&d->src.status, UPB_STATUS_ERROR, - "Attempted to call getval on a group."); - return false; - } - // For a packed field where we have not reached the end, we leave the field - // in the decoder so we will return it again without parsing a key. - if(d->wire_type != UPB_WIRE_TYPE_DELIMITED || - upb_decoder_offset(d) >= d->packed_end_offset) { - d->field = NULL; - } - return true; -} - -bool upb_decoder_getstr(upb_decoder *d, upb_string *str) { - // A string, bytes, or a length-delimited submessage. The latter isn't - // technically a string, but can be gotten as one to perform lazy parsing. - const int32_t total_len = d->delimited_len; - if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) { - // The entire string is inside our current buffer, so we can just - // return a substring of the buffer without copying. - upb_string_substr(str, d->buf, - upb_string_len(d->buf) - d->buf_bytesleft, - total_len); - upb_decoder_skipbytes(d, total_len); - } else { - // The string spans buffers, so we must copy from the residual buffer - // (if any bytes are there), then the buffer, and finally from the bytesrc. - uint8_t *ptr = (uint8_t*)upb_string_getrwbuf( - str, UPB_MIN(total_len, d->buf_bytesleft)); - int32_t len = 0; - if(d->buf_offset < 0) { - // Residual bytes we need to copy from tmpbuf. - memcpy(ptr, d->tmpbuf, -d->buf_offset); - len += -d->buf_offset; - } - if(d->buf) { - // Bytes from the buffer. - memcpy(ptr + len, upb_string_getrobuf(d->buf) + d->buf_offset, - upb_string_len(str) - len); - } - upb_decoder_skipbytes(d, upb_string_len(str)); - if(len < total_len) { - // Bytes from the bytesrc. - if(!upb_bytesrc_append(d->bytesrc, str, total_len - len)) { - upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); - return false; - } - // Have to advance this since the buffering layer of the decoder will - // never see these bytes. - d->buf_stream_offset += total_len - len; - } - } - d->field = NULL; - return true; -} - -static bool upb_decoder_skipgroup(upb_decoder *d); - -bool upb_decoder_startmsg(upb_decoder *d) { - d->top->field = d->field; - if(++d->top >= d->limit) { - upb_seterr(&d->src.status, UPB_ERROR_MAX_NESTING_EXCEEDED, - "Nesting exceeded maximum (%d levels)\n", - UPB_MAX_NESTING); - return false; - } - upb_decoder_frame *frame = d->top; - frame->msgdef = upb_downcast_msgdef(d->field->def); - if(d->field->type == UPB_TYPE(GROUP)) { - frame->end_offset = UPB_GROUP_END_OFFSET; - } else { - frame->end_offset = upb_decoder_offset(d) + d->delimited_len; - } - return true; -} - -bool upb_decoder_endmsg(upb_decoder *d) { - if(d->top > d->stack) { - --d->top; - if(!d->src.eof) { - if(d->top->field->type == UPB_TYPE(GROUP)) - upb_decoder_skipgroup(d); - else - upb_decoder_skipbytes(d, d->top->end_offset - upb_decoder_offset(d)); - } - d->src.eof = false; - return true; - } else { - return false; - } -} - -bool upb_decoder_skipval(upb_decoder *d) { - upb_strlen_t bytes_to_skip; - switch(d->wire_type) { - case UPB_WIRE_TYPE_VARINT: { - return upb_decoder_skipv64(d); - } - case UPB_WIRE_TYPE_START_GROUP: - if(!upb_decoder_startmsg(d)) return false; - if(!upb_decoder_skipgroup(d)) return false; - if(!upb_decoder_endmsg(d)) return false; - return true; - default: - // Including UPB_WIRE_TYPE_END_GROUP. - assert(false); - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Tried to skip an end group"); - return false; - case UPB_WIRE_TYPE_64BIT: - bytes_to_skip = 8; - break; - case UPB_WIRE_TYPE_32BIT: - bytes_to_skip = 4; - break; - case UPB_WIRE_TYPE_DELIMITED: - // Works for both string/bytes *and* submessages. - bytes_to_skip = d->delimited_len; - break; - } - return upb_decoder_skipbytes(d, bytes_to_skip); -} - -static bool upb_decoder_skipgroup(upb_decoder *d) -{ - // This will be mututally recursive with upb_decoder_skipval() if the group - // has sub-groups. If we wanted to handle EAGAIN in the future, this - // approach would not work; we would need to track the group depth - // explicitly. - while(upb_decoder_getdef(d)) { - if(!upb_decoder_skipval(d)) return false; - } - // If we are at the end of the group like we want to be, then - // upb_decoder_getdef() returned NULL because of eof, not error. - if(!&d->src.eof) return false; - return true; -} - -upb_src_vtable upb_decoder_src_vtbl = { - (upb_src_getdef_fptr)&upb_decoder_getdef, - (upb_src_getval_fptr)&upb_decoder_getval, - (upb_src_skipval_fptr)&upb_decoder_skipval, - (upb_src_startmsg_fptr)&upb_decoder_startmsg, - (upb_src_endmsg_fptr)&upb_decoder_endmsg, -}; - - -/* upb_decoder construction/destruction. **************************************/ - -upb_decoder *upb_decoder_new(upb_msgdef *msgdef) -{ - upb_decoder *d = malloc(sizeof(*d)); - d->toplevel_msgdef = msgdef; - d->limit = &d->stack[UPB_MAX_NESTING]; - d->buf = NULL; - upb_src_init(&d->src, &upb_decoder_src_vtbl); - return d; -} - -void upb_decoder_free(upb_decoder *d) -{ - upb_string_unref(d->buf); - free(d); -} - -void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) -{ - upb_string_unref(d->buf); - d->top = d->stack; - d->top->msgdef = d->toplevel_msgdef; - // The top-level message is not delimited (we can keep receiving data for it - // indefinitely), so we set the end offset as high as possible, but not equal - // to UINT32_MAX so it doesn't equal UPB_GROUP_END_OFFSET. - d->top->end_offset = UINT32_MAX - 1; - d->bytesrc = bytesrc; - d->buf = NULL; - d->buf_bytesleft = 0; - d->buf_stream_offset = 0; - d->buf_offset = 0; -} diff --git a/src/upb_decoder.h b/src/upb_decoder.h deleted file mode 100644 index dde61fc..0000000 --- a/src/upb_decoder.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * upb_decoder implements a high performance, streaming decoder for protobuf - * data that works by implementing upb_src and getting its data from a - * upb_bytesrc. - * - * The decoder does not currently support non-blocking I/O, in the sense that - * if the bytesrc returns UPB_STATUS_TRYAGAIN it is not possible to resume the - * decoder when data becomes available again. Support for this could be added, - * but it would add complexity and perhaps cost efficiency also. - * - * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_DECODER_H_ -#define UPB_DECODER_H_ - -#include -#include -#include "upb_def.h" -#include "upb_stream.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_decoder *****************************************************************/ - -// A upb_decoder decodes the binary protocol buffer format, writing the data it -// decodes to a upb_sink. -struct upb_decoder; -typedef struct upb_decoder upb_decoder; - -// Allocates and frees a upb_decoder, respectively. -upb_decoder *upb_decoder_new(upb_msgdef *md); -void upb_decoder_free(upb_decoder *d); - -// Resets the internal state of an already-allocated decoder. This puts it in a -// state where it has not seen any data, and expects the next data to be from -// the beginning of a new protobuf. Parsers must be reset before they can be -// used. A decoder can be reset multiple times. -void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc); - -// Returns a upb_src pointer by which the decoder can be used. The returned -// upb_src is invalidated by upb_decoder_reset() or upb_decoder_free(). -upb_src *upb_decoder_getsrc(upb_decoder *d); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_DECODER_H_ */ diff --git a/src/upb_def.c b/src/upb_def.c deleted file mode 100644 index bfab738..0000000 --- a/src/upb_def.c +++ /dev/null @@ -1,1022 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. - */ - -#include -#include "descriptor_const.h" -#include "descriptor.h" -#include "upb_def.h" - -#define CHECKSRC(x) if(!(x)) goto src_err -#define CHECK(x) if(!(x)) goto err - -// A little dynamic array for storing a growing list of upb_defs. -typedef struct { - upb_def **defs; - uint32_t len; - uint32_t size; -} upb_deflist; - -static void upb_deflist_init(upb_deflist *l) { - l->size = 8; - l->defs = malloc(l->size); - l->len = 0; -} - -static void upb_deflist_uninit(upb_deflist *l) { - for(uint32_t i = 0; i < l->len; i++) - if(l->defs[i]) upb_def_unref(l->defs[i]); - free(l->defs); -} - -static void upb_deflist_push(upb_deflist *l, upb_def *d) { - if(l->len == l->size) { - l->size *= 2; - l->defs = realloc(l->defs, l->size); - } - l->defs[l->len++] = d; -} - -/* Joins strings together, for example: - * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" - * join("", "Baz") -> "Baz" - * Caller owns a ref on the returned string. */ -static upb_string *upb_join(upb_string *base, upb_string *name) { - upb_string *joined = upb_strdup(base); - upb_strlen_t len = upb_string_len(joined); - if(len > 0) { - upb_string_getrwbuf(joined, len + 1)[len] = UPB_SYMBOL_SEPARATOR; - } - upb_strcat(joined, name); - return joined; -} - -// Qualify the defname for all defs starting with offset "start" with "str". -static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { - for(uint32_t i = start; i < l->len; i++) { - upb_def *def = l->defs[i]; - upb_string *name = def->fqname; - def->fqname = upb_join(str, name); - upb_string_unref(name); - } -} - -/* upb_def ********************************************************************/ - -// Defs are reference counted, but can have cycles when types are -// self-recursive or mutually recursive, so we need to be capable of collecting -// the cycles. In our situation defs are immutable (so cycles cannot be -// created or destroyed post-initialization). We need to be thread-safe but -// want to avoid locks if at all possible and rely only on atomic operations. -// -// Our scheme is as follows. First we give each def a flag indicating whether -// it is part of a cycle or not. Because defs are immutable, this flag will -// never change. For acyclic defs, we can use a naive algorithm and avoid the -// overhead of dealing with cycles. Most defs will be acyclic, and most cycles -// will be very short. -// -// For defs that participate in cycles we keep two reference counts. One -// tracks references that come from outside the cycle (we call these external -// references), and is incremented and decremented like a regular refcount. -// The other is a cycle refcount, and works as follows. Every cycle is -// considered distinct, even if two cycles share members. For example, this -// graph has two distinct cycles: -// -// A-->B-->C -// ^ | | -// +---+---+ -// -// The cycles in this graph are AB and ABC. When A's external refcount -// transitions from 0->1, we say that A takes "cycle references" on both -// cycles. Taking a cycle reference means incrementing the cycle refcount of -// all defs in the cycle. Since A and B are common to both cycles, A and B's -// cycle refcounts will be incremented by two, and C's will be incremented by -// one. Likewise, when A's external refcount transitions from 1->0, we -// decrement A and B's cycle refcounts by two and C's by one. We collect a -// cyclic type when its cycle refcount drops to zero. A precondition for this -// is that the external refcount has dropped to zero also. -// -// This algorithm is relatively cheap, since it only requires extra work when -// the external refcount on a cyclic type transitions from 0->1 or 1->0. - -static void upb_msgdef_free(upb_msgdef *m); -static void upb_enumdef_free(upb_enumdef *e); -static void upb_unresolveddef_free(struct _upb_unresolveddef *u); - -static void upb_def_free(upb_def *def) -{ - switch(def->type) { - case UPB_DEF_MSG: - upb_msgdef_free(upb_downcast_msgdef(def)); - break; - case UPB_DEF_ENUM: - upb_enumdef_free(upb_downcast_enumdef(def)); - break; - case UPB_DEF_SVC: - assert(false); /* Unimplemented. */ - break; - case UPB_DEF_UNRESOLVED: - upb_unresolveddef_free(upb_downcast_unresolveddef(def)); - break; - default: - assert(false); - } -} - -// Depth-first search for all cycles that include cycle_base. Returns the -// number of paths from def that lead to cycle_base, which is equivalent to the -// number of cycles def is in that include cycle_base. -// -// open_defs tracks the set of nodes that are currently being visited in the -// search so we can stop the search if we detect a cycles that do not involve -// cycle_base. We can't color the nodes as we go by writing to a member of the -// def, because another thread could be performing the search concurrently. -static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base, - upb_msgdef **open_defs, int num_open_defs, - bool ref) { - bool found = false; - for(int i = 0; i < num_open_defs; i++) { - if(open_defs[i] == m) { - // We encountered a cycle that did not involve cycle_base. - found = true; - break; - } - } - - if(found || num_open_defs == UPB_MAX_TYPE_CYCLE_LEN) { - return 0; - } else if(m == cycle_base) { - return 1; - } else { - int path_count = 0; - if(cycle_base == NULL) { - cycle_base = m; - } else { - open_defs[num_open_defs++] = m; - } - for(int i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; - upb_def *def = f->def; - if(upb_issubmsg(f) && def->is_cyclic) { - upb_msgdef *sub_m = upb_downcast_msgdef(def); - path_count += upb_cycle_ref_or_unref(sub_m, cycle_base, open_defs, - num_open_defs, ref); - } - } - if(ref) { - upb_atomic_add(&m->cycle_refcount, path_count); - } else { - if(upb_atomic_add(&m->cycle_refcount, -path_count)) - upb_def_free(UPB_UPCAST(m)); - } - return path_count; - } -} - -void _upb_def_reftozero(upb_def *def) { - if(def->is_cyclic) { - upb_msgdef *m = upb_downcast_msgdef(def); - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - upb_cycle_ref_or_unref(m, NULL, open_defs, 0, false); - } else { - upb_def_free(def); - } -} - -void _upb_def_cyclic_ref(upb_def *def) { - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - upb_cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true); -} - -static void upb_def_init(upb_def *def, upb_def_type type) { - def->type = type; - def->is_cyclic = 0; // We detect this later, after resolving refs. - def->search_depth = 0; - def->fqname = NULL; - upb_atomic_refcount_init(&def->refcount, 1); -} - -static void upb_def_uninit(upb_def *def) { - upb_string_unref(def->fqname); -} - - -/* upb_unresolveddef **********************************************************/ - -// Unresolved defs are used as temporary placeholders for a def whose name has -// not been resolved yet. During the name resolution step, all unresolved defs -// are replaced with pointers to the actual def being referenced. -typedef struct _upb_unresolveddef { - upb_def base; - - // The target type name. This may or may not be fully qualified. - upb_string *name; -} upb_unresolveddef; - -// Is passed a ref on the string. -static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { - upb_unresolveddef *def = malloc(sizeof(*def)); - upb_def_init(&def->base, UPB_DEF_UNRESOLVED); - def->name = str; - return def; -} - -static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { - upb_def_uninit(&def->base); - free(def); -} - - -/* upb_enumdef ****************************************************************/ - -typedef struct { - upb_strtable_entry e; - uint32_t value; -} ntoi_ent; - -typedef struct { - upb_inttable_entry e; - upb_string *string; -} iton_ent; - -static void upb_enumdef_free(upb_enumdef *e) { - upb_strtable_free(&e->ntoi); - upb_inttable_free(&e->iton); - upb_def_uninit(&e->base); - free(e); -} - -static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) -{ - int32_t number = -1; - upb_string *name = NULL; - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->number) { - case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &number)); - break; - case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: - name = upb_string_tryrecycle(name); - CHECKSRC(upb_src_getstr(src, name)); - break; - default: - CHECKSRC(upb_src_skipval(src)); - break; - } - } - - if(name == NULL || number == -1) { - upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); - goto err; - } - ntoi_ent ntoi_ent = {{name, 0}, number}; - iton_ent iton_ent = {{number, 0}, name}; - upb_strtable_insert(&e->ntoi, &ntoi_ent.e); - upb_inttable_insert(&e->iton, &iton_ent.e); - // We don't unref "name" because we pass our ref to the iton entry of the - // table. strtables can ref their keys, but the inttable doesn't know that - // the value is a string. - return true; - -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_string_unref(name); - return false; -} - -static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) -{ - upb_enumdef *e = malloc(sizeof(*e)); - upb_def_init(&e->base, UPB_DEF_ENUM); - upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(iton_ent)); - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->number) { - case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: - CHECK(upb_addenum_val(src, e, status)); - break; - default: - upb_src_skipval(src); - break; - } - } - upb_deflist_push(defs, UPB_UPCAST(e)); - return true; - -err: - upb_enumdef_free(e); - return false; -} - -static void fill_iter(upb_enum_iter *iter, ntoi_ent *ent) { - iter->state = ent; - iter->name = ent->e.key; - iter->val = ent->value; -} - -void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e) { - // We could iterate over either table here; the choice is arbitrary. - ntoi_ent *ent = upb_strtable_begin(&e->ntoi); - iter->e = e; - fill_iter(iter, ent); -} - -void upb_enum_next(upb_enum_iter *iter) { - ntoi_ent *ent = iter->state; - assert(ent); - ent = upb_strtable_next(&iter->e->ntoi, &ent->e); - iter->state = ent; - if(ent) fill_iter(iter, ent); -} - -bool upb_enum_done(upb_enum_iter *iter) { - return iter->state == NULL; -} - - -/* upb_fielddef ***************************************************************/ - -static void upb_fielddef_free(upb_fielddef *f) { - free(f); -} - -static void upb_fielddef_uninit(upb_fielddef *f) { - upb_string_unref(f->name); - if(upb_hasdef(f) && f->owned) { - upb_def_unref(f->def); - } -} - -static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) -{ - upb_fielddef *f = malloc(sizeof(*f)); - f->def = NULL; - f->owned = false; - upb_fielddef *parsed_f; - int32_t tmp; - while((parsed_f = upb_src_getdef(src))) { - switch(parsed_f->number) { - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &tmp)); - f->type = tmp; - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &tmp)); - f->label = tmp; - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &tmp)); - f->number = tmp; - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: - f->name = upb_string_tryrecycle(f->name); - CHECKSRC(upb_src_getstr(src, f->name)); - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { - upb_string *str = upb_string_new(); - CHECKSRC(upb_src_getstr(src, str)); - if(f->def) upb_def_unref(f->def); - f->def = UPB_UPCAST(upb_unresolveddef_new(str)); - f->owned = true; - break; - } - } - } - CHECKSRC(upb_src_eof(src)); - // TODO: verify that all required fields were present. - assert((f->def != NULL) == upb_hasdef(f)); - - // Field was successfully read, add it as a field of the msgdef. - upb_itof_ent itof_ent = {{f->number, 0}, f}; - upb_ntof_ent ntof_ent = {{f->name, 0}, f}; - upb_inttable_insert(&m->itof, &itof_ent.e); - upb_strtable_insert(&m->ntof, &ntof_ent.e); - return true; - -src_err: - upb_copyerr(status, upb_src_status(src)); - upb_fielddef_free(f); - return false; -} - - -/* upb_msgdef *****************************************************************/ - -// Processes a google.protobuf.DescriptorProto, adding defs to "defs." -static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) -{ - upb_msgdef *m = malloc(sizeof(*m)); - upb_def_init(&m->base, UPB_DEF_MSG); - upb_atomic_refcount_init(&m->cycle_refcount, 0); - upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); - upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); - int32_t start_count = defs->len; - - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->number) { - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: - m->base.fqname = upb_string_tryrecycle(m->base.fqname); - CHECKSRC(upb_src_getstr(src, m->base.fqname)); - break; - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addfield(src, m, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addmsg(src, defs, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addenum(src, defs, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - default: - // TODO: extensions. - CHECKSRC(upb_src_skipval(src)); - } - } - CHECK(upb_src_eof(src)); - if(!m->base.fqname) { - upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); - goto err; - } - upb_deflist_qualify(defs, m->base.fqname, start_count); - upb_deflist_push(defs, UPB_UPCAST(m)); - return true; - -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_msgdef_free(m); - return false; -} - -static void upb_msgdef_free(upb_msgdef *m) -{ - for (upb_field_count_t i = 0; i < m->num_fields; i++) - upb_fielddef_uninit(&m->fields[i]); - free(m->fields); - upb_strtable_free(&m->ntof); - upb_inttable_free(&m->itof); - upb_def_uninit(&m->base); - free(m); -} - -static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) { - (void)m; - if(f->owned) upb_def_unref(f->def); - f->def = def; - // We will later make the ref unowned if it is a part of a cycle. - f->owned = true; - upb_def_ref(def); -} - - -/* symtab internal ***********************************************************/ - -// Processes a google.protobuf.FileDescriptorProto, adding the defs to "defs". -static bool upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) -{ - upb_string *package = NULL; - int32_t start_count = defs->len; - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: - package = upb_string_tryrecycle(package); - CHECKSRC(upb_src_getstr(src, package)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addmsg(src, defs, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addenum(src, defs, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - default: - // TODO: services and extensions. - CHECKSRC(upb_src_skipval(src)); - } - } - CHECK(upb_src_eof(src)); - upb_deflist_qualify(defs, package, start_count); - upb_string_unref(package); - return true; - -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_string_unref(package); - return false; -} - -/* Search for a character in a string, in reverse. */ -static int my_memrchr(char *data, char c, size_t len) -{ - int off = len-1; - while(off > 0 && data[off] != c) --off; - return off; -} - -typedef struct { - upb_strtable_entry e; - upb_def *def; -} upb_symtab_ent; - -// Given a symbol and the base symbol inside which it is defined, find the -// symbol's definition in t. -static upb_symtab_ent *upb_resolve(upb_strtable *t, - upb_string *base, upb_string *sym) -{ - if(upb_string_len(base) + upb_string_len(sym) + 1 >= UPB_SYMBOL_MAXLEN || - upb_string_len(sym) == 0) return NULL; - - if(upb_string_getrobuf(sym)[0] == UPB_SYMBOL_SEPARATOR) { - // Symbols starting with '.' are absolute, so we do a single lookup. - // Slice to omit the leading '.' - upb_string *sym_str = upb_strslice(sym, 1, upb_string_len(sym) - 1); - upb_symtab_ent *e = upb_strtable_lookup(t, sym_str); - upb_string_unref(sym_str); - return e; - } else { - // Remove components from base until we find an entry or run out. - upb_string *sym_str = upb_string_new(); - int baselen = upb_string_len(base); - while(1) { - // sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + sym - upb_strlen_t len = baselen + upb_string_len(sym) + 1; - char *buf = upb_string_getrwbuf(sym_str, len); - memcpy(buf, upb_string_getrobuf(base), baselen); - buf[baselen] = UPB_SYMBOL_SEPARATOR; - memcpy(buf + baselen + 1, upb_string_getrobuf(sym), upb_string_len(sym)); - - upb_symtab_ent *e = upb_strtable_lookup(t, sym_str); - if (e) return e; - else if(baselen == 0) return NULL; // No more scopes to try. - - baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen); - } - } -} - -// Performs a pass over the type graph to find all cycles that include m. -static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) -{ - if(depth > UPB_MAX_TYPE_DEPTH) { - // We have found a non-cyclic path from the base of the type tree that - // exceeds the maximum allowed depth. There are many situations in upb - // where we recurse over the type tree (like for example, right now) and an - // absurdly deep tree could cause us to stack overflow on systems with very - // limited stacks. - upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was found at " - "depth %d in the type graph, which exceeds the maximum type " - "depth of %d.", UPB_UPCAST(m)->fqname, depth, - UPB_MAX_TYPE_DEPTH); - return false; - } else if(UPB_UPCAST(m)->search_depth == 1) { - // Cycle! - int cycle_len = depth - 1; - if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) { - upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was involved " - "in a cycle of length %d, which exceeds the maximum type " - "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len, - UPB_MAX_TYPE_CYCLE_LEN); - } - return true; - } else if(UPB_UPCAST(m)->search_depth > 0) { - // This was a cycle, but did not originate from the base of our search tree. - // We'll find it when we call find_cycles() on this node directly. - return false; - } else { - UPB_UPCAST(m)->search_depth = ++depth; - bool cycle_found = false; - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; - if(!upb_issubmsg(f)) continue; - upb_def *sub_def = f->def; - upb_msgdef *sub_m = upb_downcast_msgdef(sub_def); - if(upb_symtab_findcycles(sub_m, depth, status)) { - cycle_found = true; - UPB_UPCAST(m)->is_cyclic = true; - if(f->owned) { - upb_atomic_unref(&sub_def->refcount); - f->owned = false; - } - } - } - UPB_UPCAST(m)->search_depth = 0; - return cycle_found; - } -} - -// Given a table of pending defs "tmptab" and a table of existing defs "symtab", -// resolves all of the unresolved refs for the defs in tmptab. -bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, - upb_status *status) -{ - upb_symtab_ent *e; - for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) { - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; - // Type names are resolved relative to the message in which they appear. - upb_string *base = e->e.key; - - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; - if(!upb_hasdef(f)) continue; // No resolving necessary. - upb_string *name = upb_downcast_unresolveddef(f->def)->name; - - // Resolve from either the tmptab (pending adds) or symtab (existing - // defs). If both exist, prefer the pending add, because it will be - // overwriting the existing def. - upb_symtab_ent *found; - if(!(found = upb_resolve(tmptab, base, name)) && - !(found = upb_resolve(symtab, base, name))) { - upb_seterr(status, UPB_STATUS_ERROR, - "could not resolve symbol '" UPB_STRFMT "'" - " in context '" UPB_STRFMT "'", - UPB_STRARG(name), UPB_STRARG(base)); - return false; - } - - // Check the type of the found def. - upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; - if(found->def->type != expected) { - upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type"); - return false; - } - upb_msgdef_resolve(m, f, found->def); - } - } - - // Deal with type cycles. - for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) { - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; - // The findcycles() call will decrement the external refcount of the - if(!upb_symtab_findcycles(m, 0, status)) return false; - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - upb_cycle_ref_or_unref(m, NULL, open_defs, 0, true); - } - - return true; -} - -// Given a list of defs, a list of extensions (in the future), and a flag -// indicating whether the new defs can overwrite existing defs in the symtab, -// attempts to add the given defs to the symtab. The whole operation either -// succeeds or fails. Ownership of "defs" and "exts" is taken. -bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef, - upb_status *status) -{ - upb_rwlock_wrlock(&s->lock); - - // Build a table of the defs we mean to add, for duplicate detection and name - // resolution. - upb_strtable tmptab; - upb_strtable_init(&tmptab, defs->len, sizeof(upb_symtab_ent)); - for (uint32_t i = 0; i < defs->len; i++) { - upb_def *def = defs->defs[i]; - upb_symtab_ent e = {{def->fqname, 0}, def}; - - // Redefinition is never allowed within a single FileDescriptorSet. - // Additionally, we only allow overwriting of an existing definition if - // allow_redef is set. - if (upb_strtable_lookup(&tmptab, def->fqname) || - (!allow_redef && upb_strtable_lookup(&s->symtab, def->fqname))) { - upb_seterr(status, UPB_STATUS_ERROR, "Redefinition of symbol " UPB_STRFMT, - UPB_STRARG(def->fqname)); - goto err; - } - - // Pass ownership from the deflist to the strtable. - upb_strtable_insert(&tmptab, &e.e); - defs->defs[i] = NULL; - } - - // TODO: process the list of extensions by modifying entries from - // tmptab in-place (copying them from the symtab first if necessary). - - CHECK(upb_resolverefs(&tmptab, &s->symtab, status)); - - // The defs in tmptab have been vetted, and can be added to the symtab - // without causing errors. Now add all tmptab defs to the symtab, - // overwriting (and releasing a ref on) any existing defs with the same - // names. Ownership for tmptab defs passes from the tmptab to the symtab. - upb_symtab_ent *tmptab_e; - for(tmptab_e = upb_strtable_begin(&tmptab); tmptab_e; - tmptab_e = upb_strtable_next(&tmptab, &tmptab_e->e)) { - upb_symtab_ent *symtab_e = - upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname); - if(symtab_e) { - upb_def_unref(symtab_e->def); - symtab_e->def = tmptab_e->def; - } else { - upb_strtable_insert(&s->symtab, &tmptab_e->e); - } - } - - upb_rwlock_unlock(&s->lock); - upb_strtable_free(&tmptab); - return true; - -err: - // We need to free all defs from "tmptab." - upb_rwlock_unlock(&s->lock); - for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; - e = upb_strtable_next(&tmptab, &e->e)) - upb_def_unref(e->def); - upb_strtable_free(&tmptab); - return false; -} - - -/* upb_symtab *****************************************************************/ - -upb_symtab *upb_symtab_new() -{ - upb_symtab *s = malloc(sizeof(*s)); - upb_atomic_refcount_init(&s->refcount, 1); - upb_rwlock_init(&s->lock); - upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent)); - return s; -} - -static void upb_free_symtab(upb_strtable *t) -{ - upb_symtab_ent *e; - for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) - upb_def_unref(e->def); - upb_strtable_free(t); -} - -void _upb_symtab_free(upb_symtab *s) -{ - upb_free_symtab(&s->symtab); - upb_free_symtab(&s->psymtab); - upb_rwlock_destroy(&s->lock); - free(s); -} - -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type) -{ - upb_rwlock_rdlock(&s->lock); - int total = upb_strtable_count(&s->symtab); - // We may only use part of this, depending on how many symbols are of the - // correct type. - upb_def **defs = malloc(sizeof(*defs) * total); - upb_symtab_ent *e = upb_strtable_begin(&s->symtab); - int i = 0; - for(; e; e = upb_strtable_next(&s->symtab, &e->e)) { - upb_def *def = e->def; - assert(def); - if(type == UPB_DEF_ANY || def->type == type) - defs[i++] = def; - } - upb_rwlock_unlock(&s->lock); - *count = i; - for(i = 0; i < *count; i++) - upb_def_ref(defs[i]); - return defs; -} - -upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) -{ - upb_rwlock_rdlock(&s->lock); - upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); - upb_def *ret = NULL; - if(e) { - ret = e->def; - upb_def_ref(ret); - } - upb_rwlock_unlock(&s->lock); - return ret; -} - - -upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) { - upb_rwlock_rdlock(&s->lock); - upb_symtab_ent *e = upb_resolve(&s->symtab, base, symbol); - upb_def *ret = NULL; - if(e) { - ret = e->def; - upb_def_ref(ret); - } - upb_rwlock_unlock(&s->lock); - return ret; -} - -void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) -{ - upb_deflist defs; - upb_deflist_init(&defs); - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addfd(src, &defs, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - default: - CHECKSRC(upb_src_skipval(src)); - } - } - CHECKSRC(upb_src_eof(src)); - CHECK(upb_symtab_add_defs(s, &defs, false, status)); - upb_deflist_uninit(&defs); - return; - -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_deflist_uninit(&defs); -} - - -/* upb_baredecoder ************************************************************/ - -// upb_baredecoder is a upb_src that can parse a subset of the protocol buffer -// binary format. It is only used for bootstrapping. It can parse without -// having a upb_msgdef, which is why it is useful for bootstrapping the first -// msgdef. On the downside, it does not support: -// -// * having its input span multiple upb_strings. -// * reading any field of the returned upb_fielddef's except f->number. -// * keeping a pointer to the upb_fielddef* and reading it later (the same -// upb_fielddef is reused over and over). -// * detecting errors in the input (we trust that our input is known-good). -// -// It also does not support any of the follow protobuf features: -// * packed fields. -// * groups. -// * zig-zag-encoded types like sint32 and sint64. -// -// If descriptor.proto ever changed to use any of these features, this decoder -// would need to be extended to support them. - -typedef struct { - upb_src src; - upb_string *input; - upb_strlen_t offset; - upb_fielddef field; - upb_wire_type_t wire_type; - upb_strlen_t delimited_len; - upb_strlen_t stack[UPB_MAX_NESTING], *top; - upb_string *str; -} upb_baredecoder; - -static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) -{ - const uint8_t *start = (uint8_t*)upb_string_getrobuf(d->input) + d->offset; - const uint8_t *buf = start; - uint8_t last = 0x80; - uint64_t val = 0; - for(int bitpos = 0; (last & 0x80); buf++, bitpos += 7) - val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos; - d->offset += buf - start; - return val; -} - -static uint32_t upb_baredecoder_readv32(upb_baredecoder *d) -{ - return (uint32_t)upb_baredecoder_readv64(d); // Truncate. -} - -static uint64_t upb_baredecoder_readf64(upb_baredecoder *d) -{ - uint64_t val; - memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 8); - d->offset += 8; - return val; -} - -static uint32_t upb_baredecoder_readf32(upb_baredecoder *d) -{ - uint32_t val; - memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 4); - d->offset += 4; - return val; -} - -static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) -{ - // Detect end-of-submessage. - if(d->offset >= *d->top) { - d->src.eof = true; - return NULL; - } - - uint32_t key; - key = upb_baredecoder_readv32(d); - d->wire_type = key & 0x7; - d->field.number = key >> 3; - if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { - // For delimited wire values we parse the length now, since we need it in - // all cases. - d->delimited_len = upb_baredecoder_readv32(d); - } - return &d->field; -} - -static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) -{ - if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { - d->str = upb_string_tryrecycle(d->str); - upb_string_substr(d->str, d->input, d->offset, d->delimited_len); - } else { - switch(d->wire_type) { - case UPB_WIRE_TYPE_VARINT: - *val.uint64 = upb_baredecoder_readv64(d); - break; - case UPB_WIRE_TYPE_32BIT_VARINT: - *val.uint32 = upb_baredecoder_readv32(d); - break; - case UPB_WIRE_TYPE_64BIT: - *val.uint64 = upb_baredecoder_readf64(d); - break; - case UPB_WIRE_TYPE_32BIT: - *val.uint32 = upb_baredecoder_readf32(d); - break; - default: - assert(false); - } - } - return true; -} - -static bool upb_baredecoder_skipval(upb_baredecoder *d) -{ - upb_value val; - return upb_baredecoder_getval(d, upb_value_addrof(&val)); -} - -static bool upb_baredecoder_startmsg(upb_baredecoder *d) -{ - *(d->top++) = d->offset + d->delimited_len; - return true; -} - -static bool upb_baredecoder_endmsg(upb_baredecoder *d) -{ - d->offset = *(--d->top); - return true; -} - -static upb_src_vtable upb_baredecoder_src_vtbl = { - (upb_src_getdef_fptr)&upb_baredecoder_getdef, - (upb_src_getval_fptr)&upb_baredecoder_getval, - (upb_src_skipval_fptr)&upb_baredecoder_skipval, - (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, - (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, -}; - -static upb_baredecoder *upb_baredecoder_new(upb_string *str) -{ - upb_baredecoder *d = malloc(sizeof(*d)); - d->input = upb_string_getref(str); - d->str = upb_string_new(); - d->top = &d->stack[0]; - upb_src_init(&d->src, &upb_baredecoder_src_vtbl); - return d; -} - -static void upb_baredecoder_free(upb_baredecoder *d) -{ - upb_string_unref(d->input); - upb_string_unref(d->str); - free(d); -} - -static upb_src *upb_baredecoder_src(upb_baredecoder *d) -{ - return &d->src; -} - -upb_symtab *upb_get_descriptor_symtab() -{ - // TODO: implement sharing of symtabs, so that successive calls to this - // function will return the same symtab. - upb_symtab *symtab = upb_symtab_new(); - // TODO: allow upb_strings to be static or on the stack. - upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); - upb_baredecoder *decoder = upb_baredecoder_new(descriptor); - upb_status status; - upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status); - assert(upb_ok(&status)); - upb_baredecoder_free(decoder); - upb_string_unref(descriptor); - return symtab; -} diff --git a/src/upb_def.h b/src/upb_def.h deleted file mode 100644 index c297e83..0000000 --- a/src/upb_def.h +++ /dev/null @@ -1,302 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * Provides definitions of .proto constructs: - * - upb_msgdef: describes a "message" construct. - * - upb_fielddef: describes a message field. - * - upb_enumdef: describes an enum. - * (TODO: definitions of extensions and services). - * - * Defs are obtained from a upb_symtab object. A upb_symtab is empty when - * constructed, and definitions can be added by supplying serialized - * descriptors. - * - * Defs are immutable and reference-counted. Symbol tables reference any defs - * that are the "current" definitions. If an extension is loaded that adds a - * field to an existing message, a new msgdef is constructed that includes the - * new field and the old msgdef is unref'd. The old msgdef will still be ref'd - * by messages (if any) that were constructed with that msgdef. - * - * This file contains routines for creating and manipulating the definitions - * themselves. To create and manipulate actual messages, see upb_msg.h. - */ - -#ifndef UPB_DEF_H_ -#define UPB_DEF_H_ - -#include "upb_atomic.h" -#include "upb_stream.h" -#include "upb_table.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_def: base class for defs **********************************************/ - -// All the different kind of defs we support. These correspond 1:1 with -// declarations in a .proto file. -typedef enum { - UPB_DEF_MSG = 0, - UPB_DEF_ENUM, - UPB_DEF_SVC, - UPB_DEF_EXT, - // Internal-only, placeholder for a def that hasn't be resolved yet. - UPB_DEF_UNRESOLVED, - - // For specifying that defs of any type are requsted from getdefs. - UPB_DEF_ANY = -1 -} upb_def_type; - -// This typedef is more space-efficient than declaring an enum var directly. -typedef int8_t upb_def_type_t; - -typedef struct { - upb_string *fqname; // Fully qualified. - upb_atomic_refcount_t refcount; - upb_def_type_t type; - - // The is_cyclic flag could go in upb_msgdef instead of here, because only - // messages can be involved in cycles. However, putting them here is free - // from a space perspective because structure alignment will otherwise leave - // three bytes empty after type. It is also makes ref and unref more - // efficient, because we don't have to downcast to msgdef before checking the - // is_cyclic flag. - bool is_cyclic; - uint16_t search_depth; // Used during initialization dfs. -} upb_def; - -// These must not be called directly! -void _upb_def_cyclic_ref(upb_def *def); -void _upb_def_reftozero(upb_def *def); - -// Call to ref/deref a def. -INLINE void upb_def_ref(upb_def *def) { - if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def); -} -INLINE void upb_def_unref(upb_def *def) { - if(upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); -} - -/* upb_fielddef ***************************************************************/ - -// A upb_fielddef describes a single field in a message. It isn't a full def -// in the sense that it derives from upb_def. It cannot stand on its own; it -// is either a field of a upb_msgdef or contained inside a upb_extensiondef. -// It is also reference-counted. -typedef struct _upb_fielddef { - upb_atomic_refcount_t refcount; - upb_string *name; - upb_field_number_t number; - upb_field_type_t type; - upb_label_t label; - upb_value default_value; - - // For the case of an enum or a submessage, points to the def for that type. - upb_def *def; - - // True if we own a ref on "def" (above). This is true unless this edge is - // part of a cycle. - bool owned; - - // These are set only when this fielddef is part of a msgdef. - uint32_t byte_offset; // Where in a upb_msg to find the data. - upb_field_count_t field_index; // Indicates set bit. -} upb_fielddef; - -// A variety of tests about the type of a field. -INLINE bool upb_issubmsg(upb_fielddef *f) { - return upb_issubmsgtype(f->type); -} -INLINE bool upb_isstring(upb_fielddef *f) { - return upb_isstringtype(f->type); -} -INLINE bool upb_isarray(upb_fielddef *f) { - return f->label == UPB_LABEL(REPEATED); -} -// Does the type of this field imply that it should contain an associated def? -INLINE bool upb_hasdef(upb_fielddef *f) { - return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); -} - -INLINE bool upb_field_ismm(upb_fielddef *f) { - return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); -} - -INLINE bool upb_elem_ismm(upb_fielddef *f) { - return upb_isstring(f) || upb_issubmsg(f); -} - -/* upb_msgdef *****************************************************************/ - -// Structure that describes a single .proto message type. -typedef struct _upb_msgdef { - upb_def base; - upb_atomic_refcount_t cycle_refcount; - size_t size; - upb_field_count_t num_fields; - uint32_t set_flags_bytes; - uint32_t num_required_fields; // Required fields have the lowest set bytemasks. - upb_fielddef *fields; // We have exclusive ownership of these. - - // Tables for looking up fields by number and name. - upb_inttable itof; // int to field - upb_strtable ntof; // name to field -} upb_msgdef; - -// Hash table entries for looking up fields by name or number. -typedef struct { - upb_inttable_entry e; - upb_fielddef *f; -} upb_itof_ent; -typedef struct { - upb_strtable_entry e; - upb_fielddef *f; -} upb_ntof_ent; - -// Looks up a field by name or number. While these are written to be as fast -// as possible, it will still be faster to cache the results of this lookup if -// possible. These return NULL if no such field is found. -INLINE upb_fielddef *upb_msg_itof(upb_msgdef *m, uint32_t num) { - upb_itof_ent *e = - (upb_itof_ent*)upb_inttable_fastlookup(&m->itof, num, sizeof(*e)); - return e ? e->f : NULL; -} - -INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_string *name) { - upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); - return e ? e->f : NULL; -} - -/* upb_enumdef ****************************************************************/ - -typedef struct _upb_enumdef { - upb_def base; - upb_strtable ntoi; - upb_inttable iton; -} upb_enumdef; - -typedef int32_t upb_enumval_t; - -// Lookups from name to integer and vice-versa. -bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num); -upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); - -// Iteration over name/value pairs. The order is undefined. -// upb_enum_iter i; -// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { -// // ... -// } -typedef struct { - upb_enumdef *e; - void *state; // Internal iteration state. - upb_string *name; - upb_enumval_t val; -} upb_enum_iter; -void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e); -void upb_enum_next(upb_enum_iter *iter); -bool upb_enum_done(upb_enum_iter *iter); - -/* upb_symtab *****************************************************************/ - -// A SymbolTable is where upb_defs live. It is empty when first constructed. -// Clients add definitions to the symtab by supplying unserialized or -// serialized descriptors (as defined in descriptor.proto). -typedef struct { - upb_atomic_refcount_t refcount; - upb_rwlock_t lock; // Protects all members except the refcount. - upb_msgdef *fds_msgdef; // In psymtab, ptr here for convenience. - - // Our symbol tables; we own refs to the defs therein. - upb_strtable symtab; // The main symbol table. - upb_strtable psymtab; // Private symbols, for internal use. -} upb_symtab; - -// Initializes a upb_symtab. Contexts are not freed explicitly, but unref'd -// when the caller is done with them. -upb_symtab *upb_symtab_new(void); -void _upb_symtab_free(upb_symtab *s); // Must not be called directly! - -INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); } -INLINE void upb_symtab_unref(upb_symtab *s) { - if(upb_atomic_unref(&s->refcount)) _upb_symtab_free(s); -} - -// Resolves the given symbol using the rules described in descriptor.proto, -// namely: -// -// If the name starts with a '.', it is fully-qualified. Otherwise, C++-like -// scoping rules are used to find the type (i.e. first the nested types -// within this message are searched, then within the parent, on up to the -// root namespace). -// -// If a def is found, the caller owns one ref on the returned def. Otherwise -// returns NULL. -upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym); - -// Find an entry in the symbol table with this exact name. If a def is found, -// the caller owns one ref on the returned def. Otherwise returns NULL. -upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym); - -// Gets an array of pointers to all currently active defs in this symtab. The -// caller owns the returned array (which is of length *count) as well as a ref -// to each symbol inside. If type is UPB_DEF_ANY then defs of all types are -// returned, otherwise only defs of the required type are returned. -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); - -// "fds" is a upb_src that will yield data from the -// google.protobuf.FileDescriptorSet message type. upb_symtab_addfds() adds -// all the definitions from the given FileDescriptorSet and adds them to the -// symtab. status indicates whether the operation was successful or not, and -// the error message (if any). -// -// TODO: should this allow redefinition? Either is possible, but which is -// more useful? Maybe it should be an option. -void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status); - -// Returns a symtab that defines google.protobuf.DescriptorProto and all other -// types that are defined in descriptor.proto. This allows you to load other -// proto types. The caller owns a ref on the returned symtab. -upb_symtab *upb_get_descriptor_symtab(); - - -/* upb_def casts **************************************************************/ - -// Dynamic casts, for determining if a def is of a particular type at runtime. -#define UPB_DYNAMIC_CAST_DEF(lower, upper) \ - struct _upb_ ## lower; /* Forward-declare. */ \ - INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \ - if(def->type != UPB_DEF_ ## upper) return NULL; \ - return (struct _upb_ ## lower*)def; \ - } -UPB_DYNAMIC_CAST_DEF(msgdef, MSG); -UPB_DYNAMIC_CAST_DEF(enumdef, ENUM); -UPB_DYNAMIC_CAST_DEF(svcdef, SVC); -UPB_DYNAMIC_CAST_DEF(extdef, EXT); -UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED); -#undef UPB_DYNAMIC_CAST_DEF - -// Downcasts, for when some wants to assert that a def is of a particular type. -// These are only checked if we are building debug. -#define UPB_DOWNCAST_DEF(lower, upper) \ - struct _upb_ ## lower; /* Forward-declare. */ \ - INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \ - assert(def->type == UPB_DEF_ ## upper); \ - return (struct _upb_ ## lower*)def; \ - } -UPB_DOWNCAST_DEF(msgdef, MSG); -UPB_DOWNCAST_DEF(enumdef, ENUM); -UPB_DOWNCAST_DEF(svcdef, SVC); -UPB_DOWNCAST_DEF(extdef, EXT); -UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED); -#undef UPB_DOWNCAST_DEF - -#define UPB_UPCAST(ptr) (&(ptr)->base) - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_DEF_H_ */ diff --git a/src/upb_encoder.c b/src/upb_encoder.c deleted file mode 100644 index 304a423..0000000 --- a/src/upb_encoder.c +++ /dev/null @@ -1,420 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_encoder.h" - -#include -#include "descriptor.h" - -/* Functions for calculating sizes of wire values. ****************************/ - -static size_t upb_v_uint64_t_size(uint64_t val) { -#ifdef __GNUC__ - int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0. -#else - int high_bit = 0; - uint64_t tmp = val; - while(tmp >>= 1) high_bit++; -#endif - return val == 0 ? 1 : high_bit / 7 + 1; -} - -static size_t upb_v_int32_t_size(int32_t val) { - // v_uint32's are sign-extended to maintain wire compatibility with int64s. - return upb_v_uint64_t_size((int64_t)val); -} -static size_t upb_v_uint32_t_size(uint32_t val) { - return upb_v_uint64_t_size(val); -} -static size_t upb_f_uint64_t_size(uint64_t val) { - (void)val; // Length is independent of value. - return sizeof(uint64_t); -} -static size_t upb_f_uint32_t_size(uint32_t val) { - (void)val; // Length is independent of value. - return sizeof(uint32_t); -} - - -/* Functions to write wire values. ********************************************/ - -// Since we know in advance the longest that the value could be, we always make -// sure that our buffer is long enough. This saves us from having to perform -// bounds checks. - -// Puts a varint (wire type: UPB_WIRE_TYPE_VARINT). -static uint8_t *upb_put_v_uint64_t(uint8_t *buf, uint64_t val) -{ - do { - uint8_t byte = val & 0x7f; - val >>= 7; - if(val) byte |= 0x80; - *buf++ = byte; - } while(val); - return buf; -} - -// Puts an unsigned 32-bit varint, verbatim. Never uses the high 64 bits. -static uint8_t *upb_put_v_uint32_t(uint8_t *buf, uint32_t val) -{ - return upb_put_v_uint64_t(buf, val); -} - -// Puts a signed 32-bit varint, first sign-extending to 64-bits. We do this to -// maintain wire-compatibility with 64-bit signed integers. -static uint8_t *upb_put_v_int32_t(uint8_t *buf, int32_t val) -{ - return upb_put_v_uint64_t(buf, (int64_t)val); -} - -static void upb_put32(uint8_t *buf, uint32_t val) { - buf[0] = val & 0xff; - buf[1] = (val >> 8) & 0xff; - buf[2] = (val >> 16) & 0xff; - buf[3] = (val >> 24); -} - -// Puts a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). -static uint8_t *upb_put_f_uint32_t(uint8_t *buf, uint32_t val) -{ - uint8_t *uint32_end = buf + sizeof(uint32_t); -#if UPB_UNALIGNED_READS_OK - *(uint32_t*)buf = val; -#else - upb_put32(buf, val); -#endif - return uint32_end; -} - -// Puts a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). -static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val) -{ - uint8_t *uint64_end = buf + sizeof(uint64_t); -#if UPB_UNALIGNED_READS_OK - *(uint64_t*)buf = val; -#else - upb_put32(buf, (uint32_t)val); - upb_put32(buf, (uint32_t)(val >> 32)); -#endif - return uint64_end; -} - -/* Functions to write and calculate sizes for .proto values. ******************/ - -// Performs zig-zag encoding, which is used by sint32 and sint64. -static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } -static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } - -/* Use macros to define a set of two functions for each .proto type: - * - * // Converts and writes a .proto value into buf. "end" indicates the end - * // of the current available buffer (if the buffer does not contain enough - * // space UPB_STATUS_NEED_MORE_DATA is returned). On success, *outbuf will - * // point one past the data that was written. - * uint8_t *upb_put_INT32(uint8_t *buf, int32_t val); - * - * // Returns the number of bytes required to encode val. - * size_t upb_get_INT32_size(int32_t val); - * - * // Given a .proto value s (source) convert it to a wire value. - * uint32_t upb_vtowv_INT32(int32_t s); - */ - -#define VTOWV(type, wire_t, val_t) \ - static wire_t upb_vtowv_ ## type(val_t s) - -#define PUT(type, v_or_f, wire_t, val_t, member_name) \ - static uint8_t *upb_put_ ## type(uint8_t *buf, val_t val) { \ - wire_t tmp = upb_vtowv_ ## type(val); \ - return upb_put_ ## v_or_f ## _ ## wire_t(buf, tmp); \ - } - -#define T(type, v_or_f, wire_t, val_t, member_name) \ - static size_t upb_get_ ## type ## _size(val_t val) { \ - return upb_ ## v_or_f ## _ ## wire_t ## _size(val); \ - } \ - VTOWV(type, wire_t, val_t); /* prototype for PUT below */ \ - PUT(type, v_or_f, wire_t, val_t, member_name) \ - VTOWV(type, wire_t, val_t) - -T(INT32, v, int32_t, int32_t, int32) { return (uint32_t)s; } -T(INT64, v, uint64_t, int64_t, int64) { return (uint64_t)s; } -T(UINT32, v, uint32_t, uint32_t, uint32) { return s; } -T(UINT64, v, uint64_t, uint64_t, uint64) { return s; } -T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzenc_32(s); } -T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzenc_64(s); } -T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; } -T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; } -T(SFIXED32, f, uint32_t, int32_t, int32) { return (uint32_t)s; } -T(SFIXED64, f, uint64_t, int64_t, int64) { return (uint64_t)s; } -T(BOOL, v, uint32_t, bool, _bool) { return (uint32_t)s; } -T(ENUM, v, uint32_t, int32_t, int32) { return (uint32_t)s; } -T(DOUBLE, f, uint64_t, double, _double) { - upb_value v; - v._double = s; - return v.uint64; -} -T(FLOAT, f, uint32_t, float, _float) { - upb_value v; - v._float = s; - return v.uint32; -} -#undef VTOWV -#undef PUT -#undef T - -static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v) -{ -#define CASE(t, member_name) \ - case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name); - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - default: assert(false); return buf; - } -#undef CASE -} - -static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v) -{ -#define CASE(t, member_name) \ - case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name); - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - default: assert(false); return 0; - } -#undef CASE -} - -static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num, - upb_wire_type_t wt) -{ - return upb_put_UINT32(buf, wt | (num << 3)); -} - -static uint32_t _upb_get_tag_size(upb_field_number_t num) -{ - return upb_get_UINT32_size(num << 3); -} - - -/* upb_sizebuilder ************************************************************/ - -struct upb_sizebuilder { - // Accumulating size for the current level. - uint32_t size; - - // Stack of sizes for our current nesting. - uint32_t stack[UPB_MAX_NESTING], *top; - - // Vector of sizes. - uint32_t *sizes; - int sizes_len; - int sizes_size; - - upb_status status; -}; - -// upb_sink callbacks. -static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f, - upb_value val, - upb_status *status) -{ - (void)status; - upb_sizebuilder *sb = (upb_sizebuilder*)sink; - uint32_t size = 0; - size += _upb_get_tag_size(f->number); - size += _upb_get_value_size(f->type, val); - sb->size += size; - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f, - upb_strptr str, - int32_t start, uint32_t end, - upb_status *status) -{ - (void)status; - (void)str; // String data itself is not used. - upb_sizebuilder *sb = (upb_sizebuilder*)sink; - if(start >= 0) { - uint32_t size = 0; - size += _upb_get_tag_size(f->number); - size += upb_get_UINT32_size(end - start); - sb->size += size; - } - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f, - upb_status *status) -{ - (void)status; - (void)f; // Unused (we calculate tag size and delimiter in endcb). - upb_sizebuilder *sb = (upb_sizebuilder*)sink; - if(f->type == UPB_TYPE(MESSAGE)) { - *sb->top = sb->size; - sb->top++; - sb->size = 0; - } else { - assert(f->type == UPB_TYPE(GROUP)); - sb->size += _upb_get_tag_size(f->number); - } - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f, - upb_status *status) -{ - (void)status; - upb_sizebuilder *sb = (upb_sizebuilder*)sink; - if(f->type == UPB_TYPE(MESSAGE)) { - sb->top--; - if(sb->sizes_len == sb->sizes_size) { - sb->sizes_size *= 2; - sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes)); - } - uint32_t child_size = sb->size; - uint32_t parent_size = *sb->top; - sb->sizes[sb->sizes_len++] = child_size; - // The size according to the parent includes the tag size and delimiter of - // the submessage. - parent_size += upb_get_UINT32_size(child_size); - parent_size += _upb_get_tag_size(f->number); - // Include size accumulated in parent before child began. - sb->size = child_size + parent_size; - } else { - assert(f->type == UPB_TYPE(GROUP)); - // As an optimization, we could just add this number twice in startcb, to - // avoid having to recalculate it. - sb->size += _upb_get_tag_size(f->number); - } - return UPB_SINK_CONTINUE; -} - -upb_sink_callbacks _upb_sizebuilder_sink_vtbl = { - _upb_sizebuilder_valuecb, - _upb_sizebuilder_strcb, - _upb_sizebuilder_startcb, - _upb_sizebuilder_endcb -}; - - -/* upb_sink callbacks *********************************************************/ - -struct upb_encoder { - upb_sink base; - //upb_bytesink *bytesink; - uint32_t *sizes; - int size_offset; -}; - - -// Within one callback we may need to encode up to two separate values. -#define UPB_ENCODER_BUFSIZE (UPB_MAX_ENCODED_SIZE * 2) - -static upb_sink_status _upb_encoder_push_buf(upb_encoder *s, const uint8_t *buf, - size_t len, upb_status *status) -{ - // TODO: conjure a upb_strptr that points to buf. - //upb_strptr ptr; - (void)s; - (void)buf; - (void)status; - size_t written = 5;// = upb_bytesink_onbytes(s->bytesink, ptr); - if(written < len) { - // TODO: mark to skip "written" bytes next time. - return UPB_SINK_STOP; - } else { - return UPB_SINK_CONTINUE; - } -} - -static upb_sink_status _upb_encoder_valuecb(upb_sink *sink, upb_fielddef *f, - upb_value val, upb_status *status) -{ - upb_encoder *s = (upb_encoder*)sink; - uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; - upb_wire_type_t wt = upb_types[f->type].expected_wire_type; - // TODO: handle packed encoding. - ptr = _upb_put_tag(ptr, f->number, wt); - ptr = upb_encode_value(ptr, f->type, val); - return _upb_encoder_push_buf(s, buf, ptr - buf, status); -} - -static upb_sink_status _upb_encoder_strcb(upb_sink *sink, upb_fielddef *f, - upb_strptr str, - int32_t start, uint32_t end, - upb_status *status) -{ - upb_encoder *s = (upb_encoder*)sink; - uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; - if(start >= 0) { - ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED); - ptr = upb_put_UINT32(ptr, end - start); - } - // TODO: properly handle partially consumed strings and partially supplied - // strings. - _upb_encoder_push_buf(s, buf, ptr - buf, status); - return _upb_encoder_push_buf(s, (uint8_t*)upb_string_getrobuf(str), end - start, status); -} - -static upb_sink_status _upb_encoder_startcb(upb_sink *sink, upb_fielddef *f, - upb_status *status) -{ - upb_encoder *s = (upb_encoder*)sink; - uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; - if(f->type == UPB_TYPE(GROUP)) { - ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_START_GROUP); - } else { - ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED); - ptr = upb_put_UINT32(ptr, s->sizes[--s->size_offset]); - } - return _upb_encoder_push_buf(s, buf, ptr - buf, status); -} - -static upb_sink_status _upb_encoder_endcb(upb_sink *sink, upb_fielddef *f, - upb_status *status) -{ - upb_encoder *s = (upb_encoder*)sink; - uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; - if(f->type != UPB_TYPE(GROUP)) return UPB_SINK_CONTINUE; - ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_END_GROUP); - return _upb_encoder_push_buf(s, buf, ptr - buf, status); -} - -upb_sink_callbacks _upb_encoder_sink_vtbl = { - _upb_encoder_valuecb, - _upb_encoder_strcb, - _upb_encoder_startcb, - _upb_encoder_endcb -}; - diff --git a/src/upb_encoder.h b/src/upb_encoder.h deleted file mode 100644 index e879b0b..0000000 --- a/src/upb_encoder.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Implements a upb_sink that writes protobuf data to the binary wire format. - * - * For messages that have any submessages, the encoder needs a buffer - * containing the submessage sizes, so they can be properly written at the - * front of each message. Note that groups do *not* have this requirement. - * - * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_ENCODER_H_ -#define UPB_ENCODER_H_ - -#include "upb.h" -#include "upb_srcsink.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_encoder ****************************************************************/ - -// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol -// buffer binary wire format. -struct upb_encoder; -typedef struct upb_encoder upb_encoder; - -upb_encoder *upb_encoder_new(upb_msgdef *md); -void upb_encoder_free(upb_encoder *e); - -// Resets the given upb_encoder such that is is ready to begin encoding, -// outputting data to "bytesink" (which must live until the encoder is -// reset or destroyed). -void upb_encoder_reset(upb_encoder *e, upb_bytesink *bytesink); - -// Returns the upb_sink to which data can be written. The sink is invalidated -// when the encoder is reset or destroyed. Note that if the client wants to -// encode any length-delimited submessages it must first call -// upb_encoder_buildsizes() below. -upb_sink *upb_encoder_sink(upb_encoder *e); - -// Call prior to pushing any data with embedded submessages. "src" must yield -// exactly the same data as what will next be encoded, but in reverse order. -// The encoder iterates over this data in order to determine the sizes of the -// submessages. If any errors are returned by the upb_src, the status will -// be saved in *status. If the client is sure that the upb_src will not throw -// any errors, "status" may be NULL. -void upb_encoder_buildsizes(upb_encoder *e, upb_src *src, upb_status *status); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_ENCODER_H_ */ diff --git a/src/upb_inlinedefs.c b/src/upb_inlinedefs.c deleted file mode 100644 index 5db04f6..0000000 --- a/src/upb_inlinedefs.c +++ /dev/null @@ -1,20 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * This file, if compiled, will contain standalone (non-inlined) versions of - * all inline functions defined in header files. We don't generally use this - * file since we use "static inline" for inline functions (which will put a - * standalone version of the function in any .o file that needs it, but - * compiling this file and dumping the object file will let us inspect how - * inline functions are compiled, so we keep it around. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#define INLINE -#include "upb.h" -#include "upb_data.h" -#include "upb_def.h" -#include "upb_parse.h" -#include "upb_table.h" -#include "upb_text.h" diff --git a/src/upb_stream.h b/src/upb_stream.h deleted file mode 100644 index e7b4074..0000000 --- a/src/upb_stream.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * This file defines four general-purpose streaming interfaces for protobuf - * data or bytes: - * - * - upb_src: pull interface for protobuf data. - * - upb_sink: push interface for protobuf data. - * - upb_bytesrc: pull interface for bytes. - * - upb_bytesink: push interface for bytes. - * - * These interfaces are used as general-purpose glue in upb. For example, the - * decoder interface works by implementing a upb_src and calling a upb_bytesrc. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - * - */ - -#ifndef UPB_SRCSINK_H -#define UPB_SRCSINK_H - -#include "upb_stream_vtbl.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Forward-declare. We can't include upb_def.h; it would be circular. -struct _upb_fielddef; - -// Note! The "eof" flags work like feof() in C; they cannot report end-of-file -// until a read has failed due to eof. They cannot preemptively tell you that -// the next call will fail due to eof. Since these are the semantics that C -// and UNIX provide, we're stuck with them if we want to support eg. stdio. - -/* upb_src ********************************************************************/ - -// TODO: decide how to handle unknown fields. - -// Retrieves the fielddef for the next field in the stream. Returns NULL on -// error or end-of-stream. -struct _upb_fielddef *upb_src_getdef(upb_src *src); - -// Retrieves and stores the next value in "val". For string types "val" must -// be a newly-recycled string. Returns false on error. -bool upb_src_getval(upb_src *src, upb_valueptr val); -bool upb_src_getstr(upb_src *src, upb_string *val); - -// Like upb_src_getval() but skips the value. -bool upb_src_skipval(upb_src *src); - -// Descends into a submessage. May only be called after a def has been -// returned that indicates a submessage. -bool upb_src_startmsg(upb_src *src); - -// Stops reading a submessage. May be called before the stream is EOF, in -// which case the rest of the submessage is skipped. -bool upb_src_endmsg(upb_src *src); - -// Returns the current error/eof status for the stream. -INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; } -INLINE bool upb_src_eof(upb_src *src) { return src->eof; } - -// The following functions are equivalent to upb_src_getval(), but take -// pointers to specific types. In debug mode this may check that the type -// is compatible with the type being read. This check will *not* be performed -// in non-debug mode, and if you get the type wrong the behavior is undefined. -bool upb_src_getbool(upb_src *src, bool *val); -bool upb_src_getint32(upb_src *src, int32_t *val); -bool upb_src_getint64(upb_src *src, int64_t *val); -bool upb_src_getuint32(upb_src *src, uint32_t *val); -bool upb_src_getuint64(upb_src *src, uint64_t *val); -bool upb_src_getfloat(upb_src *src, float *val); -bool upb_src_getdouble(upb_src *src, double *val); - -/* upb_sink *******************************************************************/ - -// Puts the given fielddef into the stream. -bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); - -// Puts the given value into the stream. -bool upb_sink_putval(upb_sink *sink, upb_value val); - -// Starts a submessage. (needed? the def tells us we're starting a submsg.) -bool upb_sink_startmsg(upb_sink *sink); - -// Ends a submessage. -bool upb_sink_endmsg(upb_sink *sink); - -// Returns the current error status for the stream. -upb_status *upb_sink_status(upb_sink *sink); - -/* upb_bytesrc ****************************************************************/ - -// Returns the next string in the stream. false is returned on error or eof. -// The string must be at least "minlen" bytes long unless the stream is eof. -bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); - -// Appends the next "len" bytes in the stream in-place to "str". This should -// be used when the caller needs to build a contiguous string of the existing -// data in "str" with more data. -bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); - -// Returns the current error status for the stream. -INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } -INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } - -/* upb_bytesink ***************************************************************/ - -// Puts the given string. Returns the number of bytes that were actually, -// consumed, which may be fewer than were in the string, or <0 on error. -int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); - -// Returns the current error status for the stream. -upb_status *upb_bytesink_status(upb_bytesink *sink); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_stream_vtbl.h b/src/upb_stream_vtbl.h deleted file mode 100644 index 0ec45d2..0000000 --- a/src/upb_stream_vtbl.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * vtable declarations for types that are implementing any of the src or sink - * interfaces. Only components that are implementing these interfaces need - * to worry about this file. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_SRCSINK_VTBL_H_ -#define UPB_SRCSINK_VTBL_H_ - -#include "upb.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct upb_src; -typedef struct upb_src upb_src; -struct upb_sink; -typedef struct upb_sink upb_sink; -struct upb_bytesrc; -typedef struct upb_bytesrc upb_bytesrc; -struct upb_bytesink; -typedef struct upb_bytesink upb_bytesink; - -// Typedefs for function pointers to all of the virtual functions. -typedef struct _upb_fielddef (*upb_src_getdef_fptr)(upb_src *src); -typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); -typedef bool (*upb_src_skipval_fptr)(upb_src *src); -typedef bool (*upb_src_startmsg_fptr)(upb_src *src); -typedef bool (*upb_src_endmsg_fptr)(upb_src *src); - -typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); -typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); -typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); -typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); - -typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src); -typedef void (*upb_bytesrc_recycle_fptr)(upb_bytesrc *src, upb_string *str); -typedef bool (*upb_bytesrc_append_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t len); - -typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); - -// Vtables for the above interfaces. -typedef struct { - upb_src_getdef_fptr getdef; - upb_src_getval_fptr getval; - upb_src_skipval_fptr skipval; - upb_src_startmsg_fptr startmsg; - upb_src_endmsg_fptr endmsg; -} upb_src_vtable; - -typedef struct { - upb_bytesrc_get_fptr get; - upb_bytesrc_append_fptr append; - upb_bytesrc_recycle_fptr recycle; -} upb_bytesrc_vtable; - -// "Base Class" definitions; components that implement these interfaces should -// contain one of these structures. - -struct upb_src { - upb_src_vtable *vtbl; - upb_status status; - bool eof; -#ifndef NDEBUG - int state; // For debug-mode checking of API usage. -#endif -}; - -struct upb_bytesrc { - upb_bytesrc_vtable *vtbl; - upb_status status; - bool eof; -}; - -INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { - s->vtbl = vtbl; - s->eof = false; -#ifndef DEBUG - // TODO: initialize debug-mode checking. -#endif -} - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_string.c b/src/upb_string.c deleted file mode 100644 index 91ab9ae..0000000 --- a/src/upb_string.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_string.h" - -#include - -#define UPB_STRING_UNFINALIZED -1 - -static uint32_t upb_round_up_pow2(uint32_t v) { - // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 - v--; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v++; - return v; -} - -upb_string *upb_string_new() { - upb_string *str = malloc(sizeof(*str)); - str->ptr = NULL; - str->size = 0; - str->len = UPB_STRING_UNFINALIZED; - upb_atomic_refcount_init(&str->refcount, 1); - return str; -} - -void _upb_string_free(upb_string *str) { - if(str->ptr) free(str->ptr); - free(str); -} - -char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { - assert(str->len == UPB_STRING_UNFINALIZED); - if (str->size < len) { - str->size = upb_round_up_pow2(len); - str->ptr = realloc(str->ptr, str->size); - } - str->len = len; - return str->ptr; -} diff --git a/src/upb_string.h b/src/upb_string.h deleted file mode 100644 index 770dba7..0000000 --- a/src/upb_string.h +++ /dev/null @@ -1,194 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - * - * This file defines a simple string type. The overriding goal of upb_string - * is to avoid memcpy(), malloc(), and free() wheverever possible, while - * keeping both CPU and memory overhead low. Throughout upb there are - * situations where one wants to reference all or part of another string - * without copying. upb_string provides APIs for doing this. - * - * Characteristics of upb_string: - * - strings are reference-counted. - * - strings are logically immutable. - * - if a string has no other referents, it can be "recycled" into a new string - * without having to reallocate the upb_string. - * - strings can be substrings of other strings (owning a ref on the source - * string). - * - strings can refer to memory that they do not own, in which case we avoid - * copies if possible (the exact strategy for doing this can vary). - * - strings are not thread-safe by default, but can be made so by calling a - * function. This is not the default because it causes extra CPU overhead. - */ - -#ifndef UPB_STRING_H -#define UPB_STRING_H - -#include -#include -#include "upb_atomic.h" -#include "upb.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// All members of this struct are private, and may only be read/written through -// the associated functions. Also, strings may *only* be allocated on the heap. -struct _upb_string { - char *ptr; - int32_t len; - uint32_t size; - upb_atomic_refcount_t refcount; - union { - // Used if this is a slice of another string. - struct _upb_string *src; - // Used if this string is referencing external unowned memory. - upb_atomic_refcount_t reader_count; - } extra; -}; - -// Returns a newly-created, empty, non-finalized string. When the string is no -// longer needed, it should be unref'd, never freed directly. -upb_string *upb_string_new(); - -void _upb_string_free(upb_string *str); - -// Releases a ref on the given string, which may free the memory. "str" -// can be NULL, in which case this is a no-op. -INLINE void upb_string_unref(upb_string *str) { - if (str && upb_atomic_unref(&str->refcount)) _upb_string_free(str); -} - -// Returns a string with the same contents as "str". The caller owns a ref on -// the returned string, which may or may not be the same object as "str. -INLINE upb_string *upb_string_getref(upb_string *str) { - // If/when we support stack-allocated strings, this will have to allocate - // a new string if the given string is on the stack. - upb_atomic_ref(&str->refcount); - return str; -} - -// Returns the length of the string. -INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } - -// Use to read the bytes of the string. The caller *must* call -// upb_string_endread() after the data has been read. The window between -// upb_string_getrobuf() and upb_string_endread() should be kept as short as -// possible, because any pending upb_string_detach() may be blocked until -// upb_string_endread is called(). No other functions may be called on the -// string during this window except upb_string_len(). -INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } -INLINE void upb_string_endread(upb_string *str) { (void)str; } - -// Attempts to recycle the string "str" so it may be reused and have different -// data written to it. The returned string is either "str" if it could be -// recycled or a newly created string if "str" has other references. -// -// As a special case, passing NULL will allocate a new string. This is -// convenient for the pattern: -// -// upb_string *str = NULL; -// while (x) { -// if (y) { -// str = upb_string_tryrecycle(str); -// upb_src_getstr(str); -// } -// } -upb_string *upb_string_tryrecycle(upb_string *str); - -// The three options for setting the contents of a string. These may only be -// called when a string is first created or recycled; once other functions have -// been called on the string, these functions are not allowed until the string -// is recycled. - -// Gets a pointer suitable for writing to the string, which is guaranteed to -// have at least "len" bytes of data available. The size of the string will -// become "len". -char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); - -// Sets the contents of "str" to be the given substring of "target_str", to -// which the caller must own a ref. -void upb_string_substr(upb_string *str, upb_string *target_str, - upb_strlen_t start, upb_strlen_t len); - -// Makes the string "str" a reference to the given string data. The caller -// guarantees that the given string data will not change or be deleted until -// a matching call to upb_string_detach(). -void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); -void upb_string_detach(upb_string *str); - -// Allows using upb_strings in printf, ie: -// upb_strptr str = UPB_STRLIT("Hello, World!\n"); -// printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */ -#define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str) -#define UPB_STRFMT "%.*s" - -/* upb_string library functions ***********************************************/ - -// Named like their counterparts, these are all safe against buffer -// overflow. These only use the public upb_string interface. - -// More efficient than upb_strcmp if all you need is to test equality. -INLINE bool upb_streql(upb_string *s1, upb_string *s2) { - upb_strlen_t len = upb_string_len(s1); - if(len != upb_string_len(s2)) { - return false; - } else { - bool ret = - memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0; - upb_string_endread(s1); - upb_string_endread(s2); - return ret; - } -} - -// Like strcmp(). -int upb_strcmp(upb_string *s1, upb_string *s2); - -// Like upb_strcpy, but copies from a buffer and length. -INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) { - memcpy(upb_string_getrwbuf(dest, len), src, len); -} - -// Replaces the contents of "dest" with the contents of "src". -INLINE void upb_strcpy(upb_string *dest, upb_string *src) { - upb_strcpylen(dest, upb_string_getrobuf(src), upb_string_len(src)); - upb_string_endread(src); -} - -// Like upb_strcpy, but copies from a NULL-terminated string. -INLINE void upb_strcpyc(upb_string *dest, const char *src) { - // This does two passes over src, but that is necessary unless we want to - // repeatedly re-allocate dst, which seems worse. - upb_strcpylen(dest, src, strlen(src)); -} - -// Returns a new string whose contents are a copy of s. -upb_string *upb_strdup(upb_string *s); - -// Like upb_strdup(), but duplicates a given buffer and length. -INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) { - upb_string *s = upb_string_new(); - upb_strcpylen(s, src, len); - return s; -} - -// Like upb_strdup(), but duplicates a C NULL-terminated string. -upb_string *upb_strdupc(const char *src); - -// Appends 'append' to 's' in-place, resizing s if necessary. -void upb_strcat(upb_string *s, upb_string *append); - -// Returns a new string that is a substring of the given string. -upb_string *upb_strslice(upb_string *s, int offset, int len); - -// Reads an entire file into a newly-allocated string. -upb_string *upb_strreadfile(const char *filename); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_table.c b/src/upb_table.c deleted file mode 100644 index b91776c..0000000 --- a/src/upb_table.c +++ /dev/null @@ -1,411 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_table.h" -#include "upb_string.h" - -#include -#include -#include - -static const upb_inttable_key_t EMPTYENT = 0; -static const double MAX_LOAD = 0.85; - -static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); - -/* We use 1-based indexes into the table so that 0 can be "NULL". */ -static upb_inttable_entry *intent(upb_inttable *t, int32_t i) { - return UPB_INDEX(t->t.entries, i-1, t->t.entry_size); -} -static upb_strtable_entry *strent(upb_strtable *t, int32_t i) { - return UPB_INDEX(t->t.entries, i-1, t->t.entry_size); -} - -void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) -{ - t->count = 0; - t->entry_size = entry_size; - t->size_lg2 = 1; - while(size >>= 1) t->size_lg2++; - size_t bytes = upb_table_size(t) * t->entry_size; - t->mask = upb_table_size(t) - 1; - t->entries = malloc(bytes); - memset(t->entries, 0, bytes); /* Both tables consider 0's an empty entry. */ -} - -void upb_inttable_init(upb_inttable *t, uint32_t size, uint16_t entsize) -{ - upb_table_init(&t->t, size, entsize); -} - -void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t entsize) -{ - upb_table_init(&t->t, size, entsize); -} - -void upb_table_free(upb_table *t) { free(t->entries); } -void upb_inttable_free(upb_inttable *t) { upb_table_free(&t->t); } -void upb_strtable_free(upb_strtable *t) { - // Free refs from the strtable. - upb_strtable_entry *e = upb_strtable_begin(t); - for(; e; e = upb_strtable_next(t, e)) { - upb_string_unref(e->key); - } - upb_table_free(&t->t); -} - -static uint32_t strtable_bucket(upb_strtable *t, upb_string *key) -{ - uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_string_len(key), 0); - return (hash & (upb_strtable_size(t)-1)) + 1; -} - -void *upb_strtable_lookup(upb_strtable *t, upb_string *key) -{ - uint32_t bucket = strtable_bucket(t, key); - upb_strtable_entry *e; - do { - e = strent(t, bucket); - if(e->key && upb_streql(e->key, key)) return e; - } while((bucket = e->next) != UPB_END_OF_CHAIN); - return NULL; -} - -static uint32_t empty_intbucket(upb_inttable *table) -{ - /* TODO: does it matter that this is biased towards the front of the table? */ - for(uint32_t i = 1; i <= upb_inttable_size(table); i++) { - upb_inttable_entry *e = intent(table, i); - if(e->key == EMPTYENT) return i; - } - assert(false); - return 0; -} - -/* The insert routines have a lot more code duplication between int/string - * variants than I would like, but there's just a bit too much that varies to - * parameterize them. */ -static void intinsert(upb_inttable *t, upb_inttable_entry *e) -{ - assert(upb_inttable_lookup(t, e->key) == NULL); - t->t.count++; - uint32_t bucket = upb_inttable_bucket(t, e->key); - upb_inttable_entry *table_e = intent(t, bucket); - if(table_e->key != EMPTYENT) { /* Collision. */ - if(bucket == upb_inttable_bucket(t, table_e->key)) { - /* Existing element is in its main posisiton. Find an empty slot to - * place our new element and append it to this key's chain. */ - uint32_t empty_bucket = empty_intbucket(t); - while (table_e->next != UPB_END_OF_CHAIN) - table_e = intent(t, table_e->next); - table_e->next = empty_bucket; - table_e = intent(t, empty_bucket); - } else { - /* Existing element is not in its main position. Move it to an empty - * slot and put our element in its main position. */ - uint32_t empty_bucket = empty_intbucket(t); - uint32_t evictee_bucket = upb_inttable_bucket(t, table_e->key); - memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ - upb_inttable_entry *evictee_e = intent(t, evictee_bucket); - while(1) { - assert(evictee_e->key != UPB_EMPTY_ENTRY); - assert(evictee_e->next != UPB_END_OF_CHAIN); - if(evictee_e->next == bucket) { - evictee_e->next = empty_bucket; - break; - } - evictee_e = intent(t, evictee_e->next); - } - /* table_e remains set to our mainpos. */ - } - } - memcpy(table_e, e, t->t.entry_size); - table_e->next = UPB_END_OF_CHAIN; - assert(upb_inttable_lookup(t, e->key) == table_e); -} - -void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e) -{ - assert(e->key != 0); - if((double)(t->t.count + 1) / upb_inttable_size(t) > MAX_LOAD) { - /* Need to resize. New table of double the size, add old elements to it. */ - upb_inttable new_table; - upb_inttable_init(&new_table, upb_inttable_size(t)*2, t->t.entry_size); - new_table.t.count = t->t.count; - upb_inttable_entry *old_e; - for(old_e = upb_inttable_begin(t); old_e; old_e = upb_inttable_next(t, old_e)) - intinsert(&new_table, old_e); - upb_inttable_free(t); - *t = new_table; - } - intinsert(t, e); -} - -static uint32_t empty_strbucket(upb_strtable *table) -{ - /* TODO: does it matter that this is biased towards the front of the table? */ - for(uint32_t i = 1; i <= upb_strtable_size(table); i++) { - upb_strtable_entry *e = strent(table, i); - if(!e->key) return i; - } - assert(false); - return 0; -} - -static void strinsert(upb_strtable *t, upb_strtable_entry *e) -{ - assert(upb_strtable_lookup(t, e->key) == NULL); - e->key = upb_string_getref(e->key); - t->t.count++; - uint32_t bucket = strtable_bucket(t, e->key); - upb_strtable_entry *table_e = strent(t, bucket); - if(table_e->key) { /* Collision. */ - if(bucket == strtable_bucket(t, table_e->key)) { - /* Existing element is in its main posisiton. Find an empty slot to - * place our new element and append it to this key's chain. */ - uint32_t empty_bucket = empty_strbucket(t); - while (table_e->next != UPB_END_OF_CHAIN) - table_e = strent(t, table_e->next); - table_e->next = empty_bucket; - table_e = strent(t, empty_bucket); - } else { - /* Existing element is not in its main position. Move it to an empty - * slot and put our element in its main position. */ - uint32_t empty_bucket = empty_strbucket(t); - uint32_t evictee_bucket = strtable_bucket(t, table_e->key); - memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ - upb_strtable_entry *evictee_e = strent(t, evictee_bucket); - while(1) { - assert(!upb_string_isnull(evictee_e->key)); - assert(evictee_e->next != UPB_END_OF_CHAIN); - if(evictee_e->next == bucket) { - evictee_e->next = empty_bucket; - break; - } - evictee_e = strent(t, evictee_e->next); - } - /* table_e remains set to our mainpos. */ - } - } - memcpy(table_e, e, t->t.entry_size); - table_e->next = UPB_END_OF_CHAIN; - assert(upb_strtable_lookup(t, e->key) == table_e); -} - -void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e) -{ - if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) { - /* Need to resize. New table of double the size, add old elements to it. */ - upb_strtable new_table; - upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.entry_size); - upb_strtable_entry *old_e; - for(old_e = upb_strtable_begin(t); old_e; old_e = upb_strtable_next(t, old_e)) - strinsert(&new_table, old_e); - upb_strtable_free(t); - *t = new_table; - } - strinsert(t, e); -} - -void *upb_inttable_begin(upb_inttable *t) { - return upb_inttable_next(t, intent(t, 0)); -} - -void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur) { - upb_inttable_entry *end = intent(t, upb_inttable_size(t)+1); - do { - cur = (void*)((char*)cur + t->t.entry_size); - if(cur == end) return NULL; - } while(cur->key == UPB_EMPTY_ENTRY); - return cur; -} - -void *upb_strtable_begin(upb_strtable *t) { - return upb_strtable_next(t, strent(t, 0)); -} - -void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur) { - upb_strtable_entry *end = strent(t, upb_strtable_size(t)+1); - do { - cur = (void*)((char*)cur + t->t.entry_size); - if(cur == end) return NULL; - } while(cur->key == NULL); - return cur; -} - -#ifdef UPB_UNALIGNED_READS_OK -//----------------------------------------------------------------------------- -// MurmurHash2, by Austin Appleby (released as public domain). -// Reformatted and C99-ified by Joshua Haberman. -// Note - This code makes a few assumptions about how your machine behaves - -// 1. We can read a 4-byte value from any address without crashing -// 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t -// And it has a few limitations - -// 1. It will not work incrementally. -// 2. It will not produce the same results on little-endian and big-endian -// machines. -static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) -{ - // 'm' and 'r' are mixing constants generated offline. - // They're not really 'magic', they just happen to work well. - const uint32_t m = 0x5bd1e995; - const int32_t r = 24; - - // Initialize the hash to a 'random' value - uint32_t h = seed ^ len; - - // Mix 4 bytes at a time into the hash - const uint8_t * data = (const uint8_t *)key; - while(len >= 4) { - uint32_t k = *(uint32_t *)data; - - k *= m; - k ^= k >> r; - k *= m; - - h *= m; - h ^= k; - - data += 4; - len -= 4; - } - - // Handle the last few bytes of the input array - switch(len) { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; h *= m; - }; - - // Do a few final mixes of the hash to ensure the last few - // bytes are well-incorporated. - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; -} - -#else // !UPB_UNALIGNED_READS_OK - -//----------------------------------------------------------------------------- -// MurmurHashAligned2, by Austin Appleby -// Same algorithm as MurmurHash2, but only does aligned reads - should be safer -// on certain platforms. -// Performance will be lower than MurmurHash2 - -#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } - -static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) -{ - const uint32_t m = 0x5bd1e995; - const int32_t r = 24; - const uint8_t * data = (const uint8_t *)key; - uint32_t h = seed ^ len; - uint8_t align = (uintptr_t)data & 3; - - if(align && (len >= 4)) { - // Pre-load the temp registers - uint32_t t = 0, d = 0; - - switch(align) { - case 1: t |= data[2] << 16; - case 2: t |= data[1] << 8; - case 3: t |= data[0]; - } - - t <<= (8 * align); - - data += 4-align; - len -= 4-align; - - int32_t sl = 8 * (4-align); - int32_t sr = 8 * align; - - // Mix - - while(len >= 4) { - d = *(uint32_t *)data; - t = (t >> sr) | (d << sl); - - uint32_t k = t; - - MIX(h,k,m); - - t = d; - - data += 4; - len -= 4; - } - - // Handle leftover data in temp registers - - d = 0; - - if(len >= align) { - switch(align) { - case 3: d |= data[2] << 16; - case 2: d |= data[1] << 8; - case 1: d |= data[0]; - } - - uint32_t k = (t >> sr) | (d << sl); - MIX(h,k,m); - - data += align; - len -= align; - - //---------- - // Handle tail bytes - - switch(len) { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; h *= m; - }; - } else { - switch(len) { - case 3: d |= data[2] << 16; - case 2: d |= data[1] << 8; - case 1: d |= data[0]; - case 0: h ^= (t >> sr) | (d << sl); h *= m; - } - } - - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; - } else { - while(len >= 4) { - uint32_t k = *(uint32_t *)data; - - MIX(h,k,m); - - data += 4; - len -= 4; - } - - //---------- - // Handle tail bytes - - switch(len) { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; h *= m; - }; - - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; - } -} -#undef MIX - -#endif // UPB_UNALIGNED_READS_OK diff --git a/src/upb_table.h b/src/upb_table.h deleted file mode 100644 index 20dae92..0000000 --- a/src/upb_table.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * This file defines very fast int->struct (inttable) and string->struct - * (strtable) hash tables. The struct can be of any size, and it is stored - * in the table itself, for cache-friendly performance. - * - * The table uses internal chaining with Brent's variation (inspired by the - * Lua implementation of hash tables). The hash function for strings is - * Austin Appleby's "MurmurHash." - */ - -#ifndef UPB_TABLE_H_ -#define UPB_TABLE_H_ - -#include -#include "upb.h" -#include "upb_string.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* Note: the key cannot be zero! Zero is used by the implementation. */ -typedef uint32_t upb_inttable_key_t; - -#define UPB_END_OF_CHAIN (uint32_t)0 -#define UPB_EMPTY_ENTRY (uint32_t)0 - -typedef struct { - upb_inttable_key_t key; - uint32_t next; /* Internal chaining. */ -} upb_inttable_entry; - -// TODO: consider storing the hash in the entry. This would avoid the need to -// rehash on table resizes, but more importantly could possibly improve lookup -// performance by letting us compare hashes before comparing lengths or the -// strings themselves. -typedef struct { - upb_string *key; // We own a ref. - uint32_t next; // Internal chaining. -} upb_strtable_entry; - -typedef struct { - void *entries; - uint32_t count; /* How many elements are currently in the table? */ - uint16_t entry_size; /* How big is each entry? */ - uint8_t size_lg2; /* The table is 2^size_lg2 in size. */ - uint32_t mask; -} upb_table; - -typedef struct { - upb_table t; -} upb_strtable; - -typedef struct { - upb_table t; -} upb_inttable; - -/* Initialize and free a table, respectively. Specify the initial size - * with 'size' (the size will be increased as necessary). Entry size - * specifies how many bytes each entry in the table is. */ -void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t entry_size); -void upb_inttable_free(upb_inttable *table); -void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t entry_size); -void upb_strtable_free(upb_strtable *table); - -INLINE uint32_t upb_table_size(upb_table *t) { return 1 << t->size_lg2; } -INLINE uint32_t upb_inttable_size(upb_inttable *t) { - return upb_table_size(&t->t); -} -INLINE uint32_t upb_strtable_size(upb_strtable *t) { - return upb_table_size(&t->t); -} - -INLINE uint32_t upb_table_count(upb_table *t) { return t->count; } -INLINE uint32_t upb_inttable_count(upb_inttable *t) { - return upb_table_count(&t->t); -} -INLINE uint32_t upb_strtable_count(upb_strtable *t) { - return upb_table_count(&t->t); -} - -/* Inserts the given key into the hashtable with the given value. The key must - * not already exist in the hash table. The data will be copied from e into - * the hashtable (the amount of data copied comes from entry_size when the - * table was constructed). Therefore the data at val may be freed once the - * call returns. */ -void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e); -void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e); - -INLINE uint32_t upb_inttable_bucket(upb_inttable *t, upb_inttable_key_t k) { - return (k & t->t.mask) + 1; /* Identity hash for ints. */ -} - -/* Looks up key in this table. Inlined because this is in the critical path of - * decoding. We have the caller specify the entry_size because fixing this as - * a literal (instead of reading table->entry_size) gives the compiler more - * ability to optimize. */ -INLINE void *upb_inttable_fastlookup(upb_inttable *t, uint32_t key, - uint32_t entry_size) { - assert(key != 0); - uint32_t bucket = upb_inttable_bucket(t, key); - upb_inttable_entry *e; - do { - e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket-1, entry_size); - if(e->key == key) return e; - } while((bucket = e->next) != UPB_END_OF_CHAIN); - return NULL; /* Not found. */ -} - -INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) { - return upb_inttable_fastlookup(t, key, t->t.entry_size); -} - -void *upb_strtable_lookup(upb_strtable *t, upb_string *key); - -/* Provides iteration over the table. The order in which the entries are - * returned is undefined. Insertions invalidate iterators. The _next - * functions return NULL when the end has been reached. */ -void *upb_inttable_begin(upb_inttable *t); -void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur); - -void *upb_strtable_begin(upb_strtable *t); -void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_TABLE_H_ */ diff --git a/src/upb_text.c b/src/upb_text.c deleted file mode 100644 index 8662269..0000000 --- a/src/upb_text.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include -#include "descriptor.h" -#include "upb_text.h" -#include "upb_data.h" - -void upb_text_printval(upb_field_type_t type, upb_value val, FILE *file) -{ -#define CASE(fmtstr, member) fprintf(file, fmtstr, val.member); break; - switch(type) { - case UPB_TYPE(DOUBLE): - CASE("%0.f", _double); - case UPB_TYPE(FLOAT): - CASE("%0.f", _float) - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): - CASE("%" PRId64, int64) - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - CASE("%" PRIu64, uint64) - case UPB_TYPE(INT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - CASE("%" PRId32, int32) - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): - case UPB_TYPE(ENUM): - CASE("%" PRIu32, uint32); - case UPB_TYPE(BOOL): - CASE("%hhu", _bool); - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): - /* TODO: escaping. */ - fprintf(file, "\"" UPB_STRFMT "\"", UPB_STRARG(val.str)); break; - } -} - -static void print_indent(upb_text_printer *p, FILE *stream) -{ - if(!p->single_line) - for(int i = 0; i < p->indent_depth; i++) - fprintf(stream, " "); -} - -void upb_text_printfield(upb_text_printer *p, upb_strptr name, - upb_field_type_t valtype, upb_value val, - FILE *stream) -{ - print_indent(p, stream); - fprintf(stream, UPB_STRFMT ":", UPB_STRARG(name)); - upb_text_printval(valtype, val, stream); - if(p->single_line) - fputc(' ', stream); - else - fputc('\n', stream); -} - -void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, FILE *stream) -{ - print_indent(p, stream); - fprintf(stream, UPB_STRFMT " {", UPB_STRARG(submsg_type)); - if(!p->single_line) fputc('\n', stream); - p->indent_depth++; -} - -void upb_text_pop(upb_text_printer *p, FILE *stream) -{ - p->indent_depth--; - print_indent(p, stream); - fprintf(stream, "}\n"); -} - -static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, - FILE *stream); - -static void printmsg(upb_text_printer *printer, upb_msg *msg, upb_msgdef *md, - FILE *stream) -{ - for(upb_field_count_t i = 0; i < md->num_fields; i++) { - upb_fielddef *f = &md->fields[i]; - if(!upb_msg_has(msg, f)) continue; - upb_value v = upb_msg_get(msg, f); - if(upb_isarray(f)) { - upb_arrayptr arr = v.arr; - for(uint32_t j = 0; j < upb_array_len(arr); j++) { - upb_value elem = upb_array_get(arr, f, j); - printval(printer, elem, f, stream); - } - } else { - printval(printer, v, f, stream); - } - } -} - -static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, - FILE *stream) -{ - if(upb_issubmsg(f)) { - upb_text_push(printer, f->name, stream); - printmsg(printer, v.msg, upb_downcast_msgdef(f->def), stream); - upb_text_pop(printer, stream); - } else { - upb_text_printfield(printer, f->name, f->type, v, stream); - } -} - - -void upb_msg_print(upb_msg *msg, upb_msgdef *md, bool single_line, - FILE *stream) -{ - upb_text_printer printer; - upb_text_printer_init(&printer, single_line); - printmsg(&printer, msg, md, stream); -} - diff --git a/src/upb_text.h b/src/upb_text.h deleted file mode 100644 index d89c9d6..0000000 --- a/src/upb_text.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_TEXT_H_ -#define UPB_TEXT_H_ - -#include "upb.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - int indent_depth; - bool single_line; -} upb_text_printer; - -INLINE void upb_text_printer_init(upb_text_printer *p, bool single_line) { - p->indent_depth = 0; - p->single_line = single_line; -} -void upb_text_printval(upb_field_type_t type, upb_value p, FILE *file); -void upb_text_printfield(upb_text_printer *p, upb_strptr name, - upb_field_type_t valtype, upb_value val, FILE *stream); -void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, - FILE *stream); -void upb_text_pop(upb_text_printer *p, FILE *stream); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_TEXT_H_ */ diff --git a/stream/upb_byteio.h b/stream/upb_byteio.h new file mode 100644 index 0000000..69a28b3 --- /dev/null +++ b/stream/upb_byteio.h @@ -0,0 +1,43 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * This file contains upb_bytesrc and upb_bytesink implementations for common + * interfaces like strings, UNIX fds, and FILE*. + * + * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_BYTEIO_H +#define UPB_BYTEIO_H + +#include "upb_srcsink.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_stringsrc **************************************************************/ + +struct upb_stringsrc; +typedef struct upb_stringsrc upb_stringsrc; + +// Create/free a stringsrc. +upb_stringsrc *upb_stringsrc_new(); +void upb_stringsrc_free(upb_stringsrc *s); + +// Resets the stringsrc to a state where it will vend the given string. The +// stringsrc will take a reference on the string, so the caller need not ensure +// that it outlives the stringsrc. A stringsrc can be reset multiple times. +void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); + +// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. +upb_bytesrc *upb_stringsrc_bytesrc(); + + +/* upb_fdsrc ******************************************************************/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c new file mode 100644 index 0000000..e3fdc49 --- /dev/null +++ b/stream/upb_decoder.c @@ -0,0 +1,577 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_decoder.h" + +#include +#include +#include + +#define UPB_GROUP_END_OFFSET UINT32_MAX + +// Returns true if the give wire type and field type combination is valid, +// taking into account both packed and non-packed encodings. +static bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { + return (1 << wt) & upb_types[ft].allowed_wire_types; +} + +// Performs zig-zag decoding, which is used by sint32 and sint64. +static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } +static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } + + +/* upb_decoder ****************************************************************/ + +// The decoder keeps a stack with one entry per level of recursion. +// upb_decoder_frame is one frame of that stack. +typedef struct { + upb_msgdef *msgdef; + upb_fielddef *field; + upb_strlen_t end_offset; // For groups, -1. +} upb_decoder_frame; + +struct upb_decoder { + upb_src src; // upb_decoder is a upb_src. + + upb_msgdef *toplevel_msgdef; + upb_bytesrc *bytesrc; + + // The buffer of input data. NULL is equivalent to the empty string. + upb_string *buf; + + // Holds residual bytes when fewer than UPB_MAX_ENCODED_SIZE bytes remain. + uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; + + // The number of bytes we have yet to consume from "buf" or tmpbuf. This is + // always >= 0 unless we were just reset or are eof. + int32_t buf_bytesleft; + + // The offset within "buf" from where we are currently reading. This can be + // <0 if we are reading some residual bytes from the previous buffer, which + // are stored in tmpbuf and combined with bytes from "buf". + int32_t buf_offset; + + // The overall stream offset of the beginning of "buf". + uint32_t buf_stream_offset; + + // Fielddef for the key we just read. + upb_fielddef *field; + + // Wire type of the key we just read. + upb_wire_type_t wire_type; + + // Delimited length of the string field we are reading. + upb_strlen_t delimited_len; + + upb_strlen_t packed_end_offset; + + // We keep a stack of messages we have recursed into. + upb_decoder_frame *top, *limit, stack[UPB_MAX_NESTING]; +}; + + +/* upb_decoder buffering. *****************************************************/ + +static upb_strlen_t upb_decoder_offset(upb_decoder *d) +{ + return d->buf_stream_offset - d->buf_offset; +} + +static bool upb_decoder_nextbuf(upb_decoder *d) +{ + assert(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE); + + // Copy residual bytes to temporary buffer. + if(d->buf_bytesleft > 0) { + memcpy(d->tmpbuf, upb_string_getrobuf(d->buf) + d->buf_offset, + d->buf_bytesleft); + } + + // Recycle old buffer. + if(d->buf) { + d->buf = upb_string_tryrecycle(d->buf); + d->buf_offset -= upb_string_len(d->buf); + d->buf_stream_offset += upb_string_len(d->buf); + } + + // Pull next buffer. + if(upb_bytesrc_get(d->bytesrc, d->buf, UPB_MAX_ENCODED_SIZE)) { + d->buf_bytesleft += upb_string_len(d->buf); + return true; + } else { + // Error or EOF. + if(!upb_bytesrc_eof(d->bytesrc)) { + // Error from bytesrc. + upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); + return false; + } else if(d->buf_bytesleft == 0) { + // EOF from bytesrc and we don't have any residual bytes left. + d->src.eof = true; + return false; + } else { + // No more data left from the bytesrc, but we still have residual bytes. + return true; + } + } +} + +static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) +{ + if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE) { + // GCC is currently complaining about use of an uninitialized value if we + // don't set this now. I think this is incorrect, but leaving this in + // to suppress the warning for now. + *bytes = 0; + if(!upb_decoder_nextbuf(d)) return NULL; + } + + assert(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE); + + if(d->buf_offset >= 0) { + // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE + // contiguous bytes, so we can read directly out of it. + *bytes = d->buf_bytesleft; + return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset; + } else { + // We need to accumulate UPB_MAX_ENCODED_SIZE bytes; len is how many we + // have so far. + upb_strlen_t len = -d->buf_offset; + if(d->buf) { + upb_strlen_t to_copy = + UPB_MIN(UPB_MAX_ENCODED_SIZE - len, upb_string_len(d->buf)); + memcpy(d->tmpbuf + len, upb_string_getrobuf(d->buf), to_copy); + len += to_copy; + } + // Pad the buffer out to UPB_MAX_ENCODED_SIZE. + memset(d->tmpbuf + len, 0x80, UPB_MAX_ENCODED_SIZE - len); + *bytes = len; + return d->tmpbuf; + } +} + +// Returns a pointer to a buffer of data that is at least UPB_MAX_ENCODED_SIZE +// bytes long. This buffer contains the next bytes in the stream (even if +// those bytes span multiple buffers). *bytes is set to the number of actual +// stream bytes that are available in the returned buffer. If +// *bytes < UPB_MAX_ENCODED_SIZE, the buffer is padded with 0x80 bytes. +// +// After the data has been read, upb_decoder_consume() should be called to +// indicate how many bytes were consumed. +static const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes) +{ + if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE && d->buf_offset >= 0) { + // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE + // contiguous bytes, so we can read directly out of it. + *bytes = d->buf_bytesleft; + return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset; + } else { + return upb_decoder_getbuf_full(d, bytes); + } +} + +static bool upb_decoder_consume(upb_decoder *d, uint32_t bytes) +{ + assert(bytes <= UPB_MAX_ENCODED_SIZE); + d->buf_offset += bytes; + d->buf_bytesleft -= bytes; + if(d->buf_offset < 0) { + // We still have residual bytes we have not consumed. + memmove(d->tmpbuf, d->tmpbuf + bytes, -d->buf_offset); + } + assert(d->buf_bytesleft >= 0); + return true; +} + +static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) +{ + d->buf_offset += bytes; + d->buf_bytesleft -= bytes; + while(d->buf_bytesleft < 0) { + if(!upb_decoder_nextbuf(d)) return false; + } + return true; +} + + +/* Functions to read wire values. *********************************************/ + +// Parses remining bytes of a 64-bit varint that has already had its first byte +// parsed. +INLINE bool upb_decoder_readv64(upb_decoder *d, uint32_t *low, uint32_t *high) +{ + upb_strlen_t bytes_available; + const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); + const uint8_t *start = buf; + if(!buf) return false; + + *high = 0; + uint32_t b; + b = *(buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 28; + *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + + if(bytes_available >= 10) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Varint was unterminated " + "after 10 bytes, stream offset: %u", upb_decoder_offset(d)); + } else { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Stream ended in the middle " + "of a varint, stream offset: %u", upb_decoder_offset(d)); + } + return false; + +done: + return upb_decoder_consume(d, buf - start); +} + +// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit +// varint is not a true wire type. +static bool upb_decoder_readv32(upb_decoder *d, uint32_t *val) +{ + uint32_t high; + if(!upb_decoder_readv64(d, val, &high)) return false; + + // We expect the high bits to be zero, except that signed 32-bit values are + // first sign-extended to be wire-compatible with 64 bits, in which case we + // expect the high bits to be all one. + // + // We could perform a slightly more sophisticated check by having the caller + // indicate whether a signed or unsigned value is being read. We could check + // that the high bits are all zeros for unsigned, and properly sign-extended + // for signed. + if(high != 0 && ~high != 0) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Read a 32-bit varint, but " + "the high bits contained data we should not truncate: " + "%ux, stream offset: %u", high, upb_decoder_offset(d)); + return false; + } + return true; +} + +// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). Caller +// promises that 4 bytes are available at buf. +static bool upb_decoder_readf32(upb_decoder *d, uint32_t *val) +{ + upb_strlen_t bytes_available; + const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); + if(!buf) return false; + if(bytes_available < 4) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, + "Stream ended in the middle of a 32-bit value"); + return false; + } + memcpy(val, buf, 4); + // TODO: byte swap if big-endian. + return upb_decoder_consume(d, 4); +} + +// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). Caller +// promises that 8 bytes are available at buf. +static bool upb_decoder_readf64(upb_decoder *d, uint64_t *val) +{ + upb_strlen_t bytes_available; + const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); + if(!buf) return false; + if(bytes_available < 8) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, + "Stream ended in the middle of a 64-bit value"); + return false; + } + memcpy(val, buf, 8); + // TODO: byte swap if big-endian. + return upb_decoder_consume(d, 8); +} + +// Returns the length of a varint (wire type: UPB_WIRE_TYPE_VARINT), allowing +// it to be easily skipped. Caller promises that 10 bytes are available at +// "buf". The function will return a maximum of 11 bytes before quitting. +static uint8_t upb_decoder_skipv64(upb_decoder *d) +{ + uint32_t bytes_available; + const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); + if(!buf) return false; + uint8_t i; + for(i = 0; i < 10 && buf[i] & 0x80; i++) + ; // empty loop body. + if(i > 10) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Unterminated varint."); + return false; + } + return upb_decoder_consume(d, i); +} + + +/* upb_src implementation for upb_decoder. ************************************/ + +bool upb_decoder_skipval(upb_decoder *d); + +upb_fielddef *upb_decoder_getdef(upb_decoder *d) +{ + // Detect end-of-submessage. + if(upb_decoder_offset(d) >= d->top->end_offset) { + d->src.eof = true; + return NULL; + } + + // Handles the packed field case. + if(d->field) return d->field; + + uint32_t key = 0; +again: + if(!upb_decoder_readv32(d, &key)) return NULL; + upb_wire_type_t wire_type = key & 0x7; + int32_t field_number = key >> 3; + + if(wire_type == UPB_WIRE_TYPE_DELIMITED) { + // For delimited wire values we parse the length now, since we need it in + // all cases. + if(!upb_decoder_readv32(d, &d->delimited_len)) return NULL; + } else if(wire_type == UPB_WIRE_TYPE_END_GROUP) { + if(d->top->end_offset == UPB_GROUP_END_OFFSET) { + d->src.eof = true; + } else { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "End group seen but current " + "message is not a group, byte offset: %zd", + upb_decoder_offset(d)); + } + return NULL; + } + + // Look up field by tag number. + upb_fielddef *f = upb_msg_itof(d->top->msgdef, field_number); + + if (!f) { + // Unknown field. If/when the upb_src interface supports reporting + // unknown fields we will implement that here. + upb_decoder_skipval(d); + goto again; + } else if (!upb_check_type(wire_type, f->type)) { + // This is a recoverable error condition. We skip the value but also + // return NULL and report the error. + upb_decoder_skipval(d); + // TODO: better error message. + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Incorrect wire type.\n"); + return NULL; + } + d->field = f; + d->wire_type = wire_type; + return f; +} + +bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) +{ + switch(upb_types[d->field->type].native_wire_type) { + case UPB_WIRE_TYPE_VARINT: { + uint32_t low, high; + if(!upb_decoder_readv64(d, &low, &high)) return false; + uint64_t u64 = ((uint64_t)high << 32) | low; + if(d->field->type == UPB_TYPE(SINT64)) + *val.int64 = upb_zzdec_64(u64); + else + *val.uint64 = u64; + break; + } + case UPB_WIRE_TYPE_32BIT_VARINT: { + uint32_t u32; + if(!upb_decoder_readv32(d, &u32)) return false; + if(d->field->type == UPB_TYPE(SINT32)) + *val.int32 = upb_zzdec_32(u32); + else + *val.uint32 = u32; + break; + } + case UPB_WIRE_TYPE_64BIT: + if(!upb_decoder_readf64(d, val.uint64)) return false; + break; + case UPB_WIRE_TYPE_32BIT: + if(!upb_decoder_readf32(d, val.uint32)) return false; + break; + default: + upb_seterr(&d->src.status, UPB_STATUS_ERROR, + "Attempted to call getval on a group."); + return false; + } + // For a packed field where we have not reached the end, we leave the field + // in the decoder so we will return it again without parsing a key. + if(d->wire_type != UPB_WIRE_TYPE_DELIMITED || + upb_decoder_offset(d) >= d->packed_end_offset) { + d->field = NULL; + } + return true; +} + +bool upb_decoder_getstr(upb_decoder *d, upb_string *str) { + // A string, bytes, or a length-delimited submessage. The latter isn't + // technically a string, but can be gotten as one to perform lazy parsing. + const int32_t total_len = d->delimited_len; + if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) { + // The entire string is inside our current buffer, so we can just + // return a substring of the buffer without copying. + upb_string_substr(str, d->buf, + upb_string_len(d->buf) - d->buf_bytesleft, + total_len); + upb_decoder_skipbytes(d, total_len); + } else { + // The string spans buffers, so we must copy from the residual buffer + // (if any bytes are there), then the buffer, and finally from the bytesrc. + uint8_t *ptr = (uint8_t*)upb_string_getrwbuf( + str, UPB_MIN(total_len, d->buf_bytesleft)); + int32_t len = 0; + if(d->buf_offset < 0) { + // Residual bytes we need to copy from tmpbuf. + memcpy(ptr, d->tmpbuf, -d->buf_offset); + len += -d->buf_offset; + } + if(d->buf) { + // Bytes from the buffer. + memcpy(ptr + len, upb_string_getrobuf(d->buf) + d->buf_offset, + upb_string_len(str) - len); + } + upb_decoder_skipbytes(d, upb_string_len(str)); + if(len < total_len) { + // Bytes from the bytesrc. + if(!upb_bytesrc_append(d->bytesrc, str, total_len - len)) { + upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); + return false; + } + // Have to advance this since the buffering layer of the decoder will + // never see these bytes. + d->buf_stream_offset += total_len - len; + } + } + d->field = NULL; + return true; +} + +static bool upb_decoder_skipgroup(upb_decoder *d); + +bool upb_decoder_startmsg(upb_decoder *d) { + d->top->field = d->field; + if(++d->top >= d->limit) { + upb_seterr(&d->src.status, UPB_ERROR_MAX_NESTING_EXCEEDED, + "Nesting exceeded maximum (%d levels)\n", + UPB_MAX_NESTING); + return false; + } + upb_decoder_frame *frame = d->top; + frame->msgdef = upb_downcast_msgdef(d->field->def); + if(d->field->type == UPB_TYPE(GROUP)) { + frame->end_offset = UPB_GROUP_END_OFFSET; + } else { + frame->end_offset = upb_decoder_offset(d) + d->delimited_len; + } + return true; +} + +bool upb_decoder_endmsg(upb_decoder *d) { + if(d->top > d->stack) { + --d->top; + if(!d->src.eof) { + if(d->top->field->type == UPB_TYPE(GROUP)) + upb_decoder_skipgroup(d); + else + upb_decoder_skipbytes(d, d->top->end_offset - upb_decoder_offset(d)); + } + d->src.eof = false; + return true; + } else { + return false; + } +} + +bool upb_decoder_skipval(upb_decoder *d) { + upb_strlen_t bytes_to_skip; + switch(d->wire_type) { + case UPB_WIRE_TYPE_VARINT: { + return upb_decoder_skipv64(d); + } + case UPB_WIRE_TYPE_START_GROUP: + if(!upb_decoder_startmsg(d)) return false; + if(!upb_decoder_skipgroup(d)) return false; + if(!upb_decoder_endmsg(d)) return false; + return true; + default: + // Including UPB_WIRE_TYPE_END_GROUP. + assert(false); + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Tried to skip an end group"); + return false; + case UPB_WIRE_TYPE_64BIT: + bytes_to_skip = 8; + break; + case UPB_WIRE_TYPE_32BIT: + bytes_to_skip = 4; + break; + case UPB_WIRE_TYPE_DELIMITED: + // Works for both string/bytes *and* submessages. + bytes_to_skip = d->delimited_len; + break; + } + return upb_decoder_skipbytes(d, bytes_to_skip); +} + +static bool upb_decoder_skipgroup(upb_decoder *d) +{ + // This will be mututally recursive with upb_decoder_skipval() if the group + // has sub-groups. If we wanted to handle EAGAIN in the future, this + // approach would not work; we would need to track the group depth + // explicitly. + while(upb_decoder_getdef(d)) { + if(!upb_decoder_skipval(d)) return false; + } + // If we are at the end of the group like we want to be, then + // upb_decoder_getdef() returned NULL because of eof, not error. + if(!&d->src.eof) return false; + return true; +} + +upb_src_vtable upb_decoder_src_vtbl = { + (upb_src_getdef_fptr)&upb_decoder_getdef, + (upb_src_getval_fptr)&upb_decoder_getval, + (upb_src_skipval_fptr)&upb_decoder_skipval, + (upb_src_startmsg_fptr)&upb_decoder_startmsg, + (upb_src_endmsg_fptr)&upb_decoder_endmsg, +}; + + +/* upb_decoder construction/destruction. **************************************/ + +upb_decoder *upb_decoder_new(upb_msgdef *msgdef) +{ + upb_decoder *d = malloc(sizeof(*d)); + d->toplevel_msgdef = msgdef; + d->limit = &d->stack[UPB_MAX_NESTING]; + d->buf = NULL; + upb_src_init(&d->src, &upb_decoder_src_vtbl); + return d; +} + +void upb_decoder_free(upb_decoder *d) +{ + upb_string_unref(d->buf); + free(d); +} + +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) +{ + upb_string_unref(d->buf); + d->top = d->stack; + d->top->msgdef = d->toplevel_msgdef; + // The top-level message is not delimited (we can keep receiving data for it + // indefinitely), so we set the end offset as high as possible, but not equal + // to UINT32_MAX so it doesn't equal UPB_GROUP_END_OFFSET. + d->top->end_offset = UINT32_MAX - 1; + d->bytesrc = bytesrc; + d->buf = NULL; + d->buf_bytesleft = 0; + d->buf_stream_offset = 0; + d->buf_offset = 0; +} diff --git a/stream/upb_decoder.h b/stream/upb_decoder.h new file mode 100644 index 0000000..dde61fc --- /dev/null +++ b/stream/upb_decoder.h @@ -0,0 +1,53 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * upb_decoder implements a high performance, streaming decoder for protobuf + * data that works by implementing upb_src and getting its data from a + * upb_bytesrc. + * + * The decoder does not currently support non-blocking I/O, in the sense that + * if the bytesrc returns UPB_STATUS_TRYAGAIN it is not possible to resume the + * decoder when data becomes available again. Support for this could be added, + * but it would add complexity and perhaps cost efficiency also. + * + * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_DECODER_H_ +#define UPB_DECODER_H_ + +#include +#include +#include "upb_def.h" +#include "upb_stream.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_decoder *****************************************************************/ + +// A upb_decoder decodes the binary protocol buffer format, writing the data it +// decodes to a upb_sink. +struct upb_decoder; +typedef struct upb_decoder upb_decoder; + +// Allocates and frees a upb_decoder, respectively. +upb_decoder *upb_decoder_new(upb_msgdef *md); +void upb_decoder_free(upb_decoder *d); + +// Resets the internal state of an already-allocated decoder. This puts it in a +// state where it has not seen any data, and expects the next data to be from +// the beginning of a new protobuf. Parsers must be reset before they can be +// used. A decoder can be reset multiple times. +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc); + +// Returns a upb_src pointer by which the decoder can be used. The returned +// upb_src is invalidated by upb_decoder_reset() or upb_decoder_free(). +upb_src *upb_decoder_getsrc(upb_decoder *d); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_DECODER_H_ */ diff --git a/stream/upb_encoder.c b/stream/upb_encoder.c new file mode 100644 index 0000000..304a423 --- /dev/null +++ b/stream/upb_encoder.c @@ -0,0 +1,420 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_encoder.h" + +#include +#include "descriptor.h" + +/* Functions for calculating sizes of wire values. ****************************/ + +static size_t upb_v_uint64_t_size(uint64_t val) { +#ifdef __GNUC__ + int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0. +#else + int high_bit = 0; + uint64_t tmp = val; + while(tmp >>= 1) high_bit++; +#endif + return val == 0 ? 1 : high_bit / 7 + 1; +} + +static size_t upb_v_int32_t_size(int32_t val) { + // v_uint32's are sign-extended to maintain wire compatibility with int64s. + return upb_v_uint64_t_size((int64_t)val); +} +static size_t upb_v_uint32_t_size(uint32_t val) { + return upb_v_uint64_t_size(val); +} +static size_t upb_f_uint64_t_size(uint64_t val) { + (void)val; // Length is independent of value. + return sizeof(uint64_t); +} +static size_t upb_f_uint32_t_size(uint32_t val) { + (void)val; // Length is independent of value. + return sizeof(uint32_t); +} + + +/* Functions to write wire values. ********************************************/ + +// Since we know in advance the longest that the value could be, we always make +// sure that our buffer is long enough. This saves us from having to perform +// bounds checks. + +// Puts a varint (wire type: UPB_WIRE_TYPE_VARINT). +static uint8_t *upb_put_v_uint64_t(uint8_t *buf, uint64_t val) +{ + do { + uint8_t byte = val & 0x7f; + val >>= 7; + if(val) byte |= 0x80; + *buf++ = byte; + } while(val); + return buf; +} + +// Puts an unsigned 32-bit varint, verbatim. Never uses the high 64 bits. +static uint8_t *upb_put_v_uint32_t(uint8_t *buf, uint32_t val) +{ + return upb_put_v_uint64_t(buf, val); +} + +// Puts a signed 32-bit varint, first sign-extending to 64-bits. We do this to +// maintain wire-compatibility with 64-bit signed integers. +static uint8_t *upb_put_v_int32_t(uint8_t *buf, int32_t val) +{ + return upb_put_v_uint64_t(buf, (int64_t)val); +} + +static void upb_put32(uint8_t *buf, uint32_t val) { + buf[0] = val & 0xff; + buf[1] = (val >> 8) & 0xff; + buf[2] = (val >> 16) & 0xff; + buf[3] = (val >> 24); +} + +// Puts a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). +static uint8_t *upb_put_f_uint32_t(uint8_t *buf, uint32_t val) +{ + uint8_t *uint32_end = buf + sizeof(uint32_t); +#if UPB_UNALIGNED_READS_OK + *(uint32_t*)buf = val; +#else + upb_put32(buf, val); +#endif + return uint32_end; +} + +// Puts a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). +static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val) +{ + uint8_t *uint64_end = buf + sizeof(uint64_t); +#if UPB_UNALIGNED_READS_OK + *(uint64_t*)buf = val; +#else + upb_put32(buf, (uint32_t)val); + upb_put32(buf, (uint32_t)(val >> 32)); +#endif + return uint64_end; +} + +/* Functions to write and calculate sizes for .proto values. ******************/ + +// Performs zig-zag encoding, which is used by sint32 and sint64. +static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } +static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } + +/* Use macros to define a set of two functions for each .proto type: + * + * // Converts and writes a .proto value into buf. "end" indicates the end + * // of the current available buffer (if the buffer does not contain enough + * // space UPB_STATUS_NEED_MORE_DATA is returned). On success, *outbuf will + * // point one past the data that was written. + * uint8_t *upb_put_INT32(uint8_t *buf, int32_t val); + * + * // Returns the number of bytes required to encode val. + * size_t upb_get_INT32_size(int32_t val); + * + * // Given a .proto value s (source) convert it to a wire value. + * uint32_t upb_vtowv_INT32(int32_t s); + */ + +#define VTOWV(type, wire_t, val_t) \ + static wire_t upb_vtowv_ ## type(val_t s) + +#define PUT(type, v_or_f, wire_t, val_t, member_name) \ + static uint8_t *upb_put_ ## type(uint8_t *buf, val_t val) { \ + wire_t tmp = upb_vtowv_ ## type(val); \ + return upb_put_ ## v_or_f ## _ ## wire_t(buf, tmp); \ + } + +#define T(type, v_or_f, wire_t, val_t, member_name) \ + static size_t upb_get_ ## type ## _size(val_t val) { \ + return upb_ ## v_or_f ## _ ## wire_t ## _size(val); \ + } \ + VTOWV(type, wire_t, val_t); /* prototype for PUT below */ \ + PUT(type, v_or_f, wire_t, val_t, member_name) \ + VTOWV(type, wire_t, val_t) + +T(INT32, v, int32_t, int32_t, int32) { return (uint32_t)s; } +T(INT64, v, uint64_t, int64_t, int64) { return (uint64_t)s; } +T(UINT32, v, uint32_t, uint32_t, uint32) { return s; } +T(UINT64, v, uint64_t, uint64_t, uint64) { return s; } +T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzenc_32(s); } +T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzenc_64(s); } +T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; } +T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; } +T(SFIXED32, f, uint32_t, int32_t, int32) { return (uint32_t)s; } +T(SFIXED64, f, uint64_t, int64_t, int64) { return (uint64_t)s; } +T(BOOL, v, uint32_t, bool, _bool) { return (uint32_t)s; } +T(ENUM, v, uint32_t, int32_t, int32) { return (uint32_t)s; } +T(DOUBLE, f, uint64_t, double, _double) { + upb_value v; + v._double = s; + return v.uint64; +} +T(FLOAT, f, uint32_t, float, _float) { + upb_value v; + v._float = s; + return v.uint32; +} +#undef VTOWV +#undef PUT +#undef T + +static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v) +{ +#define CASE(t, member_name) \ + case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name); + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + default: assert(false); return buf; + } +#undef CASE +} + +static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v) +{ +#define CASE(t, member_name) \ + case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name); + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + default: assert(false); return 0; + } +#undef CASE +} + +static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num, + upb_wire_type_t wt) +{ + return upb_put_UINT32(buf, wt | (num << 3)); +} + +static uint32_t _upb_get_tag_size(upb_field_number_t num) +{ + return upb_get_UINT32_size(num << 3); +} + + +/* upb_sizebuilder ************************************************************/ + +struct upb_sizebuilder { + // Accumulating size for the current level. + uint32_t size; + + // Stack of sizes for our current nesting. + uint32_t stack[UPB_MAX_NESTING], *top; + + // Vector of sizes. + uint32_t *sizes; + int sizes_len; + int sizes_size; + + upb_status status; +}; + +// upb_sink callbacks. +static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f, + upb_value val, + upb_status *status) +{ + (void)status; + upb_sizebuilder *sb = (upb_sizebuilder*)sink; + uint32_t size = 0; + size += _upb_get_tag_size(f->number); + size += _upb_get_value_size(f->type, val); + sb->size += size; + return UPB_SINK_CONTINUE; +} + +static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f, + upb_strptr str, + int32_t start, uint32_t end, + upb_status *status) +{ + (void)status; + (void)str; // String data itself is not used. + upb_sizebuilder *sb = (upb_sizebuilder*)sink; + if(start >= 0) { + uint32_t size = 0; + size += _upb_get_tag_size(f->number); + size += upb_get_UINT32_size(end - start); + sb->size += size; + } + return UPB_SINK_CONTINUE; +} + +static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f, + upb_status *status) +{ + (void)status; + (void)f; // Unused (we calculate tag size and delimiter in endcb). + upb_sizebuilder *sb = (upb_sizebuilder*)sink; + if(f->type == UPB_TYPE(MESSAGE)) { + *sb->top = sb->size; + sb->top++; + sb->size = 0; + } else { + assert(f->type == UPB_TYPE(GROUP)); + sb->size += _upb_get_tag_size(f->number); + } + return UPB_SINK_CONTINUE; +} + +static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f, + upb_status *status) +{ + (void)status; + upb_sizebuilder *sb = (upb_sizebuilder*)sink; + if(f->type == UPB_TYPE(MESSAGE)) { + sb->top--; + if(sb->sizes_len == sb->sizes_size) { + sb->sizes_size *= 2; + sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes)); + } + uint32_t child_size = sb->size; + uint32_t parent_size = *sb->top; + sb->sizes[sb->sizes_len++] = child_size; + // The size according to the parent includes the tag size and delimiter of + // the submessage. + parent_size += upb_get_UINT32_size(child_size); + parent_size += _upb_get_tag_size(f->number); + // Include size accumulated in parent before child began. + sb->size = child_size + parent_size; + } else { + assert(f->type == UPB_TYPE(GROUP)); + // As an optimization, we could just add this number twice in startcb, to + // avoid having to recalculate it. + sb->size += _upb_get_tag_size(f->number); + } + return UPB_SINK_CONTINUE; +} + +upb_sink_callbacks _upb_sizebuilder_sink_vtbl = { + _upb_sizebuilder_valuecb, + _upb_sizebuilder_strcb, + _upb_sizebuilder_startcb, + _upb_sizebuilder_endcb +}; + + +/* upb_sink callbacks *********************************************************/ + +struct upb_encoder { + upb_sink base; + //upb_bytesink *bytesink; + uint32_t *sizes; + int size_offset; +}; + + +// Within one callback we may need to encode up to two separate values. +#define UPB_ENCODER_BUFSIZE (UPB_MAX_ENCODED_SIZE * 2) + +static upb_sink_status _upb_encoder_push_buf(upb_encoder *s, const uint8_t *buf, + size_t len, upb_status *status) +{ + // TODO: conjure a upb_strptr that points to buf. + //upb_strptr ptr; + (void)s; + (void)buf; + (void)status; + size_t written = 5;// = upb_bytesink_onbytes(s->bytesink, ptr); + if(written < len) { + // TODO: mark to skip "written" bytes next time. + return UPB_SINK_STOP; + } else { + return UPB_SINK_CONTINUE; + } +} + +static upb_sink_status _upb_encoder_valuecb(upb_sink *sink, upb_fielddef *f, + upb_value val, upb_status *status) +{ + upb_encoder *s = (upb_encoder*)sink; + uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; + upb_wire_type_t wt = upb_types[f->type].expected_wire_type; + // TODO: handle packed encoding. + ptr = _upb_put_tag(ptr, f->number, wt); + ptr = upb_encode_value(ptr, f->type, val); + return _upb_encoder_push_buf(s, buf, ptr - buf, status); +} + +static upb_sink_status _upb_encoder_strcb(upb_sink *sink, upb_fielddef *f, + upb_strptr str, + int32_t start, uint32_t end, + upb_status *status) +{ + upb_encoder *s = (upb_encoder*)sink; + uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; + if(start >= 0) { + ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED); + ptr = upb_put_UINT32(ptr, end - start); + } + // TODO: properly handle partially consumed strings and partially supplied + // strings. + _upb_encoder_push_buf(s, buf, ptr - buf, status); + return _upb_encoder_push_buf(s, (uint8_t*)upb_string_getrobuf(str), end - start, status); +} + +static upb_sink_status _upb_encoder_startcb(upb_sink *sink, upb_fielddef *f, + upb_status *status) +{ + upb_encoder *s = (upb_encoder*)sink; + uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; + if(f->type == UPB_TYPE(GROUP)) { + ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_START_GROUP); + } else { + ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED); + ptr = upb_put_UINT32(ptr, s->sizes[--s->size_offset]); + } + return _upb_encoder_push_buf(s, buf, ptr - buf, status); +} + +static upb_sink_status _upb_encoder_endcb(upb_sink *sink, upb_fielddef *f, + upb_status *status) +{ + upb_encoder *s = (upb_encoder*)sink; + uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; + if(f->type != UPB_TYPE(GROUP)) return UPB_SINK_CONTINUE; + ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_END_GROUP); + return _upb_encoder_push_buf(s, buf, ptr - buf, status); +} + +upb_sink_callbacks _upb_encoder_sink_vtbl = { + _upb_encoder_valuecb, + _upb_encoder_strcb, + _upb_encoder_startcb, + _upb_encoder_endcb +}; + diff --git a/stream/upb_encoder.h b/stream/upb_encoder.h new file mode 100644 index 0000000..e879b0b --- /dev/null +++ b/stream/upb_encoder.h @@ -0,0 +1,56 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Implements a upb_sink that writes protobuf data to the binary wire format. + * + * For messages that have any submessages, the encoder needs a buffer + * containing the submessage sizes, so they can be properly written at the + * front of each message. Note that groups do *not* have this requirement. + * + * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_ENCODER_H_ +#define UPB_ENCODER_H_ + +#include "upb.h" +#include "upb_srcsink.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_encoder ****************************************************************/ + +// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol +// buffer binary wire format. +struct upb_encoder; +typedef struct upb_encoder upb_encoder; + +upb_encoder *upb_encoder_new(upb_msgdef *md); +void upb_encoder_free(upb_encoder *e); + +// Resets the given upb_encoder such that is is ready to begin encoding, +// outputting data to "bytesink" (which must live until the encoder is +// reset or destroyed). +void upb_encoder_reset(upb_encoder *e, upb_bytesink *bytesink); + +// Returns the upb_sink to which data can be written. The sink is invalidated +// when the encoder is reset or destroyed. Note that if the client wants to +// encode any length-delimited submessages it must first call +// upb_encoder_buildsizes() below. +upb_sink *upb_encoder_sink(upb_encoder *e); + +// Call prior to pushing any data with embedded submessages. "src" must yield +// exactly the same data as what will next be encoded, but in reverse order. +// The encoder iterates over this data in order to determine the sizes of the +// submessages. If any errors are returned by the upb_src, the status will +// be saved in *status. If the client is sure that the upb_src will not throw +// any errors, "status" may be NULL. +void upb_encoder_buildsizes(upb_encoder *e, upb_src *src, upb_status *status); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_ENCODER_H_ */ diff --git a/stream/upb_text.c b/stream/upb_text.c new file mode 100644 index 0000000..8662269 --- /dev/null +++ b/stream/upb_text.c @@ -0,0 +1,121 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include +#include "descriptor.h" +#include "upb_text.h" +#include "upb_data.h" + +void upb_text_printval(upb_field_type_t type, upb_value val, FILE *file) +{ +#define CASE(fmtstr, member) fprintf(file, fmtstr, val.member); break; + switch(type) { + case UPB_TYPE(DOUBLE): + CASE("%0.f", _double); + case UPB_TYPE(FLOAT): + CASE("%0.f", _float) + case UPB_TYPE(INT64): + case UPB_TYPE(SFIXED64): + case UPB_TYPE(SINT64): + CASE("%" PRId64, int64) + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + CASE("%" PRIu64, uint64) + case UPB_TYPE(INT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(SINT32): + CASE("%" PRId32, int32) + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + case UPB_TYPE(ENUM): + CASE("%" PRIu32, uint32); + case UPB_TYPE(BOOL): + CASE("%hhu", _bool); + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + /* TODO: escaping. */ + fprintf(file, "\"" UPB_STRFMT "\"", UPB_STRARG(val.str)); break; + } +} + +static void print_indent(upb_text_printer *p, FILE *stream) +{ + if(!p->single_line) + for(int i = 0; i < p->indent_depth; i++) + fprintf(stream, " "); +} + +void upb_text_printfield(upb_text_printer *p, upb_strptr name, + upb_field_type_t valtype, upb_value val, + FILE *stream) +{ + print_indent(p, stream); + fprintf(stream, UPB_STRFMT ":", UPB_STRARG(name)); + upb_text_printval(valtype, val, stream); + if(p->single_line) + fputc(' ', stream); + else + fputc('\n', stream); +} + +void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, FILE *stream) +{ + print_indent(p, stream); + fprintf(stream, UPB_STRFMT " {", UPB_STRARG(submsg_type)); + if(!p->single_line) fputc('\n', stream); + p->indent_depth++; +} + +void upb_text_pop(upb_text_printer *p, FILE *stream) +{ + p->indent_depth--; + print_indent(p, stream); + fprintf(stream, "}\n"); +} + +static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, + FILE *stream); + +static void printmsg(upb_text_printer *printer, upb_msg *msg, upb_msgdef *md, + FILE *stream) +{ + for(upb_field_count_t i = 0; i < md->num_fields; i++) { + upb_fielddef *f = &md->fields[i]; + if(!upb_msg_has(msg, f)) continue; + upb_value v = upb_msg_get(msg, f); + if(upb_isarray(f)) { + upb_arrayptr arr = v.arr; + for(uint32_t j = 0; j < upb_array_len(arr); j++) { + upb_value elem = upb_array_get(arr, f, j); + printval(printer, elem, f, stream); + } + } else { + printval(printer, v, f, stream); + } + } +} + +static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, + FILE *stream) +{ + if(upb_issubmsg(f)) { + upb_text_push(printer, f->name, stream); + printmsg(printer, v.msg, upb_downcast_msgdef(f->def), stream); + upb_text_pop(printer, stream); + } else { + upb_text_printfield(printer, f->name, f->type, v, stream); + } +} + + +void upb_msg_print(upb_msg *msg, upb_msgdef *md, bool single_line, + FILE *stream) +{ + upb_text_printer printer; + upb_text_printer_init(&printer, single_line); + printmsg(&printer, msg, md, stream); +} + diff --git a/stream/upb_text.h b/stream/upb_text.h new file mode 100644 index 0000000..d89c9d6 --- /dev/null +++ b/stream/upb_text.h @@ -0,0 +1,36 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_TEXT_H_ +#define UPB_TEXT_H_ + +#include "upb.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + int indent_depth; + bool single_line; +} upb_text_printer; + +INLINE void upb_text_printer_init(upb_text_printer *p, bool single_line) { + p->indent_depth = 0; + p->single_line = single_line; +} +void upb_text_printval(upb_field_type_t type, upb_value p, FILE *file); +void upb_text_printfield(upb_text_printer *p, upb_strptr name, + upb_field_type_t valtype, upb_value val, FILE *stream); +void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, + FILE *stream); +void upb_text_pop(upb_text_printer *p, FILE *stream); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_TEXT_H_ */ -- cgit v1.2.3 From e29bf964d1716398e8354a50f506906a307298e5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 12:15:31 -0700 Subject: Tests for string and fleshed out implementation. --- Makefile | 15 ++++++++----- core/upb_string.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++------- core/upb_string.h | 40 ++++++++++++++++++++++++---------- tests/test_string.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++ tests/test_table.cc | 13 ++++++++++- 5 files changed, 161 insertions(+), 26 deletions(-) create mode 100644 tests/test_string.c diff --git a/Makefile b/Makefile index ca4f940..1f977b4 100644 --- a/Makefile +++ b/Makefile @@ -86,22 +86,25 @@ tests/test.proto.pb: tests/test.proto # TODO: replace with upbc protoc tests/test.proto -otests/test.proto.pb -TESTS=tests/tests \ +TESTS=tests/test_string \ + tests/test_table +tests: $(TESTS) + +OTHER_TESTS=tests/tests \ tests/test_table \ tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2 \ tests/test.proto.pb $(TESTS): core/libupb.a -#VALGRIND=valgrind --leak-check=full --error-exitcode=1 -VALGRIND= +VALGRIND=valgrind --leak-check=full --error-exitcode=1 +#VALGRIND= test: tests @echo Running all tests under valgrind. - $(VALGRIND) ./tests/tests # Needs to be rewritten to separate the benchmark. # valgrind --error-exitcode=1 ./tests/test_table - @for test in tests/t.* ; do \ - if [ -f ./$$test ] ; then \ + @for test in tests/*; do \ + if [ -x ./$$test ] ; then \ echo $(VALGRIND) ./$$test: \\c; \ $(VALGRIND) ./$$test; \ fi \ diff --git a/core/upb_string.c b/core/upb_string.c index 91ab9ae..f9af9e9 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -7,8 +7,11 @@ #include "upb_string.h" #include - -#define UPB_STRING_UNFINALIZED -1 +#ifdef __GLIBC__ +#include +#elif defined(__APPLE__) +#include +#endif static uint32_t upb_round_up_pow2(uint32_t v) { // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 @@ -25,23 +28,67 @@ static uint32_t upb_round_up_pow2(uint32_t v) { upb_string *upb_string_new() { upb_string *str = malloc(sizeof(*str)); str->ptr = NULL; + str->cached_mem = NULL; +#ifndef UPB_HAVE_MSIZE str->size = 0; - str->len = UPB_STRING_UNFINALIZED; +#endif + str->src = NULL; upb_atomic_refcount_init(&str->refcount, 1); return str; } +uint32_t upb_string_size(upb_string *str) { +#ifdef __GLIBC__ + return malloc_usable_size(str->cached_mem); +#elif defined(__APPLE__) + return malloc_size(str->cached_mem); +#else + return str->size; +#endif +} + +static void upb_string_release(upb_string *str) { + if(str->src) { + upb_string_unref(str->src); + str->src = NULL; + } +} + void _upb_string_free(upb_string *str) { - if(str->ptr) free(str->ptr); + if(str->cached_mem) free(str->cached_mem); + upb_string_release(str); free(str); } +upb_string *upb_string_tryrecycle(upb_string *str) { + if(str == NULL || upb_atomic_read(&str->refcount) > 1) { + return upb_string_new(); + } else { + str->ptr = NULL; + upb_string_release(str); + return str; + } +} + char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { - assert(str->len == UPB_STRING_UNFINALIZED); - if (str->size < len) { - str->size = upb_round_up_pow2(len); - str->ptr = realloc(str->ptr, str->size); + assert(str->ptr == NULL); + uint32_t size = upb_string_size(str); + if (size < len) { + size = upb_round_up_pow2(len); + str->cached_mem = realloc(str->cached_mem, size); +#ifndef UPB_HAVE_MSIZE + str->size = size; +#endif } str->len = len; + str->ptr = str->cached_mem; return str->ptr; } + +void upb_string_substr(upb_string *str, upb_string *target_str, + upb_strlen_t start, upb_strlen_t len) { + assert(str->ptr == NULL); + str->src = upb_string_getref(target_str); + str->ptr = upb_string_getrobuf(target_str) + start; + str->len = len; +} diff --git a/core/upb_string.h b/core/upb_string.h index 770dba7..7ec3d48 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -16,8 +16,6 @@ * without having to reallocate the upb_string. * - strings can be substrings of other strings (owning a ref on the source * string). - * - strings can refer to memory that they do not own, in which case we avoid - * copies if possible (the exact strategy for doing this can vary). * - strings are not thread-safe by default, but can be made so by calling a * function. This is not the default because it causes extra CPU overhead. */ @@ -37,16 +35,31 @@ extern "C" { // All members of this struct are private, and may only be read/written through // the associated functions. Also, strings may *only* be allocated on the heap. struct _upb_string { + // The pointer to our currently active data. This may be memory we own + // or a pointer into memory we don't own. char *ptr; + + // If non-NULL, this is a block of memory we own. We keep this cached even + // if "ptr" is currently aliasing memory we don't own. + char *cached_mem; + + // The effective length of the string (the bytes at ptr). int32_t len; +#ifndef UPB_HAVE_MSIZE + // How many bytes are allocated in cached_mem. + // + // Many platforms have a function that can tell you the size of a block + // that was previously malloc'd. In this case we can avoid storing the + // size explicitly. uint32_t size; +#endif + + // The string's refcount. upb_atomic_refcount_t refcount; - union { - // Used if this is a slice of another string. - struct _upb_string *src; - // Used if this string is referencing external unowned memory. - upb_atomic_refcount_t reader_count; - } extra; + + // Used if this is a slice of another string, NULL otherwise. We own a ref + // on src. + struct _upb_string *src; }; // Returns a newly-created, empty, non-finalized string. When the string is no @@ -113,11 +126,14 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); void upb_string_substr(upb_string *str, upb_string *target_str, upb_strlen_t start, upb_strlen_t len); +// Sketch of an API for allowing upb_strings to reference external, unowned +// data. Waiting for a clear use case before actually implementing it. +// // Makes the string "str" a reference to the given string data. The caller // guarantees that the given string data will not change or be deleted until // a matching call to upb_string_detach(). -void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); -void upb_string_detach(upb_string *str); +// void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); +// void upb_string_detach(upb_string *str); // Allows using upb_strings in printf, ie: // upb_strptr str = UPB_STRLIT("Hello, World!\n"); @@ -176,7 +192,9 @@ INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) { } // Like upb_strdup(), but duplicates a C NULL-terminated string. -upb_string *upb_strdupc(const char *src); +INLINE upb_string *upb_strdupc(const char *src) { + return upb_strduplen(src, strlen(src)); +} // Appends 'append' to 's' in-place, resizing s if necessary. void upb_strcat(upb_string *s, upb_string *append); diff --git a/tests/test_string.c b/tests/test_string.c new file mode 100644 index 0000000..4fdab6c --- /dev/null +++ b/tests/test_string.c @@ -0,0 +1,56 @@ + +#undef NDEBUG /* ensure tests always assert. */ +#include "upb_string.h" + +char static_str[] = "Static string."; + +int main() { + upb_string *str = upb_string_new(); + assert(str != NULL); + upb_string_unref(str); + + // Can also create a string by tryrecycle(NULL). + str = upb_string_tryrecycle(NULL); + assert(str != NULL); + + upb_strcpyc(str, static_str); + assert(upb_string_len(str) == (sizeof(static_str) - 1)); + const char *robuf = upb_string_getrobuf(str); + assert(robuf != NULL); + assert(memcmp(robuf, static_str, upb_string_len(str)) == 0); + upb_string_endread(str); + + upb_string *str2 = upb_string_tryrecycle(str); + // No other referents, so should return the same string. + assert(str2 == str); + + // Write a shorter string, the same memory should be reused. + upb_strcpyc(str, "XX"); + const char *robuf2 = upb_string_getrobuf(str); + assert(robuf2 == robuf); + assert(memcmp(robuf2, "XX", 2) == 0); + + // Make string alias part of another string. + str2 = upb_strdupc("WXYZ"); + upb_string_substr(str, str2, 1, 2); + assert(upb_string_len(str) == 2); + assert(upb_string_len(str2) == 4); + // The two string should be aliasing the same data. + const char *robuf3 = upb_string_getrobuf(str); + const char *robuf4 = upb_string_getrobuf(str2); + assert(robuf3 == robuf4 + 1); + // The aliased string should have an extra ref. + assert(upb_atomic_read(&str2->refcount) == 2); + + // Recycling str should eliminate the extra ref. + str = upb_string_tryrecycle(str); + assert(upb_atomic_read(&str2->refcount) == 1); + + // Resetting str should reuse its old data. + upb_strcpyc(str, "XX"); + const char *robuf5 = upb_string_getrobuf(str); + assert(robuf5 == robuf); + + upb_string_unref(str); + upb_string_unref(str2); +} diff --git a/tests/test_table.cc b/tests/test_table.cc index 37e14a8..47d5e57 100644 --- a/tests/test_table.cc +++ b/tests/test_table.cc @@ -12,6 +12,8 @@ #include #include +bool benchmark = false; + using std::string; using std::vector; @@ -116,6 +118,11 @@ void test_inttable(int32_t *keys, size_t num_entries) } } + if(!benchmark) { + upb_inttable_free(&table); + return; + } + /* Test performance. We only test lookups for keys that are known to exist. */ uintptr_t x = 0; const unsigned int iterations = 0xFFFFFF; @@ -219,8 +226,12 @@ int32_t *get_contiguous_keys(int32_t num) return buf; } -int main() +int main(int argc, char *argv[]) { + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--benchmark") == 0) benchmark = true; + } + vector keys; keys.push_back("google.protobuf.FileDescriptorSet"); keys.push_back("google.protobuf.FileDescriptorProto"); -- cgit v1.2.3 From 2ef013126c682a44d15554ea7a04144fc9a10fed Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 13:28:47 -0700 Subject: Fleshed out upb_string further. Now upb_def's only unresolved references are upb_src. --- Makefile | 3 ++- core/upb_def.c | 11 +++++------ core/upb_string.c | 30 +++++++++++++++++++++++++++++- core/upb_string.h | 46 +++++++++++++++++++++++++++++++++++++--------- tests/test_string.c | 17 +++++++++++++++-- 5 files changed, 88 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index 1f977b4..2abe0c7 100644 --- a/Makefile +++ b/Makefile @@ -87,7 +87,8 @@ tests/test.proto.pb: tests/test.proto protoc tests/test.proto -otests/test.proto.pb TESTS=tests/test_string \ - tests/test_table + tests/test_table \ + tests/test_def tests: $(TESTS) OTHER_TESTS=tests/tests \ diff --git a/core/upb_def.c b/core/upb_def.c index bfab738..1f57c70 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -44,13 +44,12 @@ static void upb_deflist_push(upb_deflist *l, upb_def *d) { * join("", "Baz") -> "Baz" * Caller owns a ref on the returned string. */ static upb_string *upb_join(upb_string *base, upb_string *name) { - upb_string *joined = upb_strdup(base); - upb_strlen_t len = upb_string_len(joined); - if(len > 0) { - upb_string_getrwbuf(joined, len + 1)[len] = UPB_SYMBOL_SEPARATOR; + if (upb_string_len(base) == 0) { + return upb_string_getref(name); + } else { + return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT, + UPB_STRARG(base), UPB_STRARG(name)); } - upb_strcat(joined, name); - return joined; } // Qualify the defname for all defs starting with offset "start" with "str". diff --git a/core/upb_string.c b/core/upb_string.c index f9af9e9..2f487aa 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -82,7 +82,7 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { } str->len = len; str->ptr = str->cached_mem; - return str->ptr; + return str->cached_mem; } void upb_string_substr(upb_string *str, upb_string *target_str, @@ -92,3 +92,31 @@ void upb_string_substr(upb_string *str, upb_string *target_str, str->ptr = upb_string_getrobuf(target_str) + start; str->len = len; } + +void upb_string_vprintf(upb_string *str, const char *format, va_list args) { + // Try once without reallocating. We have to va_copy because we might have + // to call vsnprintf again. + uint32_t size = UPB_MAX(upb_string_size(str), 16); + char *buf = upb_string_getrwbuf(str, size); + va_list args_copy; + va_copy(args_copy, args); + uint32_t true_size = vsnprintf(buf, size, format, args_copy); + va_end(args_copy); + + if (true_size > size) { + // Need to reallocate. + str = upb_string_tryrecycle(str); + buf = upb_string_getrwbuf(str, true_size); + vsnprintf(buf, true_size, format, args); + } + str->len = true_size; +} + +upb_string *upb_string_asprintf(const char *format, ...) { + upb_string *str = upb_string_new(); + va_list args; + va_start(args, format); + upb_string_vprintf(str, format, args); + va_end(args); + return str; +} diff --git a/core/upb_string.h b/core/upb_string.h index 7ec3d48..5cc0eaf 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -25,6 +25,7 @@ #include #include +#include #include "upb_atomic.h" #include "upb.h" @@ -37,7 +38,7 @@ extern "C" { struct _upb_string { // The pointer to our currently active data. This may be memory we own // or a pointer into memory we don't own. - char *ptr; + const char *ptr; // If non-NULL, this is a block of memory we own. We keep this cached even // if "ptr" is currently aliasing memory we don't own. @@ -111,16 +112,25 @@ INLINE void upb_string_endread(upb_string *str) { (void)str; } // } upb_string *upb_string_tryrecycle(upb_string *str); -// The three options for setting the contents of a string. These may only be -// called when a string is first created or recycled; once other functions have -// been called on the string, these functions are not allowed until the string -// is recycled. +// The options for setting the contents of a string. These may only be called +// when a string is first created or recycled; once other functions have been +// called on the string, these functions are not allowed until the string is +// recycled. // Gets a pointer suitable for writing to the string, which is guaranteed to // have at least "len" bytes of data available. The size of the string will // become "len". char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); +// Replaces the contents of str with the contents of the given printf. +void upb_string_vprintf(upb_string *str, const char *format, va_list args); +INLINE void upb_string_printf(upb_string *str, const char *format, ...) { + va_list args; + va_start(args, format); + upb_string_vprintf(str, format, args); + va_end(args); +} + // Sets the contents of "str" to be the given substring of "target_str", to // which the caller must own a ref. void upb_string_substr(upb_string *str, upb_string *target_str, @@ -144,7 +154,7 @@ void upb_string_substr(upb_string *str, upb_string *target_str, /* upb_string library functions ***********************************************/ // Named like their counterparts, these are all safe against buffer -// overflow. These only use the public upb_string interface. +// overflow. For the most part these only use the public upb_string interface. // More efficient than upb_strcmp if all you need is to test equality. INLINE bool upb_streql(upb_string *s1, upb_string *s2) { @@ -163,6 +173,17 @@ INLINE bool upb_streql(upb_string *s1, upb_string *s2) { // Like strcmp(). int upb_strcmp(upb_string *s1, upb_string *s2); +// Compare a upb_string with memory or a NULL-terminated C string. +INLINE bool upb_streqllen(upb_string *str, const void *buf, upb_strlen_t len) { + return len == upb_string_len(str) && + memcmp(upb_string_getrobuf(str), buf, len) == 0; +} + +INLINE bool upb_streqlc(upb_string *str, const void *buf) { + // Could be made one-pass. + return upb_streqllen(str, buf, strlen((const char*)buf)); +} + // Like upb_strcpy, but copies from a buffer and length. INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) { memcpy(upb_string_getrwbuf(dest, len), src, len); @@ -175,10 +196,10 @@ INLINE void upb_strcpy(upb_string *dest, upb_string *src) { } // Like upb_strcpy, but copies from a NULL-terminated string. -INLINE void upb_strcpyc(upb_string *dest, const char *src) { +INLINE void upb_strcpyc(upb_string *dest, const void *src) { // This does two passes over src, but that is necessary unless we want to // repeatedly re-allocate dst, which seems worse. - upb_strcpylen(dest, src, strlen(src)); + upb_strcpylen(dest, src, strlen((const char*)src)); } // Returns a new string whose contents are a copy of s. @@ -200,11 +221,18 @@ INLINE upb_string *upb_strdupc(const char *src) { void upb_strcat(upb_string *s, upb_string *append); // Returns a new string that is a substring of the given string. -upb_string *upb_strslice(upb_string *s, int offset, int len); +INLINE upb_string *upb_strslice(upb_string *s, int offset, int len) { + upb_string *str = upb_string_new(); + upb_string_substr(str, s, offset, len); + return str; +} // Reads an entire file into a newly-allocated string. upb_string *upb_strreadfile(const char *filename); +// Returns a new string with the contents of the given printf. +upb_string *upb_string_asprintf(const char *format, ...); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tests/test_string.c b/tests/test_string.c index 4fdab6c..5e6e2a9 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -17,7 +17,7 @@ int main() { assert(upb_string_len(str) == (sizeof(static_str) - 1)); const char *robuf = upb_string_getrobuf(str); assert(robuf != NULL); - assert(memcmp(robuf, static_str, upb_string_len(str)) == 0); + assert(upb_streqlc(str, static_str)); upb_string_endread(str); upb_string *str2 = upb_string_tryrecycle(str); @@ -28,7 +28,7 @@ int main() { upb_strcpyc(str, "XX"); const char *robuf2 = upb_string_getrobuf(str); assert(robuf2 == robuf); - assert(memcmp(robuf2, "XX", 2) == 0); + assert(upb_streqlc(str, "XX")); // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); @@ -51,6 +51,19 @@ int main() { const char *robuf5 = upb_string_getrobuf(str); assert(robuf5 == robuf); + // Resetting str to something very long should require new data to be + // allocated. + str = upb_string_tryrecycle(str); + const char longstring[] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; + upb_strcpyc(str, longstring); + const char *robuf6 = upb_string_getrobuf(str); + assert(robuf6 != robuf); + assert(upb_streqlc(str, longstring)); + + // Test printf. + str = upb_string_tryrecycle(str); + upb_string_printf(str, "Number: %d, String: %s", 5, "YO!"); + upb_string_unref(str); upb_string_unref(str2); } -- cgit v1.2.3 From 67b16cbe5c55d00d7e576cdf479392f3a0e927a5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 14:37:02 -0700 Subject: Basic test_def links and passes no-op test! --- Makefile | 8 +++++--- core/upb_def.c | 42 ++++++++++++++++++++--------------------- core/upb_def.h | 6 +----- core/upb_stream_vtbl.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++++- stream/upb_decoder.c | 1 + 5 files changed, 78 insertions(+), 30 deletions(-) diff --git a/Makefile b/Makefile index 2abe0c7..568dcad 100644 --- a/Makefile +++ b/Makefile @@ -102,14 +102,16 @@ VALGRIND=valgrind --leak-check=full --error-exitcode=1 #VALGRIND= test: tests @echo Running all tests under valgrind. + @set -e # Abort on error. # Needs to be rewritten to separate the benchmark. # valgrind --error-exitcode=1 ./tests/test_table @for test in tests/*; do \ if [ -x ./$$test ] ; then \ - echo $(VALGRIND) ./$$test: \\c; \ - $(VALGRIND) ./$$test; \ + echo !!! $(VALGRIND) ./$$test; \ + $(VALGRIND) ./$$test || exit 1; \ fi \ - done; + done; \ + echo "All tests passed!" tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2: \ diff --git a/core/upb_def.c b/core/upb_def.c index 1f57c70..cc4fd80 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -764,7 +764,6 @@ static void upb_free_symtab(upb_strtable *t) void _upb_symtab_free(upb_symtab *s) { upb_free_symtab(&s->symtab); - upb_free_symtab(&s->psymtab); upb_rwlock_destroy(&s->lock); free(s); } @@ -932,30 +931,30 @@ static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) { - if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { - d->str = upb_string_tryrecycle(d->str); - upb_string_substr(d->str, d->input, d->offset, d->delimited_len); - } else { - switch(d->wire_type) { - case UPB_WIRE_TYPE_VARINT: - *val.uint64 = upb_baredecoder_readv64(d); - break; - case UPB_WIRE_TYPE_32BIT_VARINT: - *val.uint32 = upb_baredecoder_readv32(d); - break; - case UPB_WIRE_TYPE_64BIT: - *val.uint64 = upb_baredecoder_readf64(d); - break; - case UPB_WIRE_TYPE_32BIT: - *val.uint32 = upb_baredecoder_readf32(d); - break; - default: - assert(false); - } + switch(d->wire_type) { + case UPB_WIRE_TYPE_VARINT: + *val.uint64 = upb_baredecoder_readv64(d); + break; + case UPB_WIRE_TYPE_32BIT_VARINT: + *val.uint32 = upb_baredecoder_readv32(d); + break; + case UPB_WIRE_TYPE_64BIT: + *val.uint64 = upb_baredecoder_readf64(d); + break; + case UPB_WIRE_TYPE_32BIT: + *val.uint32 = upb_baredecoder_readf32(d); + break; + default: + assert(false); } return true; } +static bool upb_baredecoder_getstr(upb_baredecoder *d, upb_string *str) { + upb_string_substr(str, d->input, d->offset, d->delimited_len); + return true; +} + static bool upb_baredecoder_skipval(upb_baredecoder *d) { upb_value val; @@ -977,6 +976,7 @@ static bool upb_baredecoder_endmsg(upb_baredecoder *d) static upb_src_vtable upb_baredecoder_src_vtbl = { (upb_src_getdef_fptr)&upb_baredecoder_getdef, (upb_src_getval_fptr)&upb_baredecoder_getval, + (upb_src_getstr_fptr)&upb_baredecoder_getstr, (upb_src_skipval_fptr)&upb_baredecoder_skipval, (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, diff --git a/core/upb_def.h b/core/upb_def.h index c297e83..5c8c11e 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -207,11 +207,7 @@ bool upb_enum_done(upb_enum_iter *iter); typedef struct { upb_atomic_refcount_t refcount; upb_rwlock_t lock; // Protects all members except the refcount. - upb_msgdef *fds_msgdef; // In psymtab, ptr here for convenience. - - // Our symbol tables; we own refs to the defs therein. - upb_strtable symtab; // The main symbol table. - upb_strtable psymtab; // Private symbols, for internal use. + upb_strtable symtab; // The symbol table. } upb_symtab; // Initializes a upb_symtab. Contexts are not freed explicitly, but unref'd diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 0ec45d2..52172d2 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -27,28 +27,35 @@ struct upb_bytesink; typedef struct upb_bytesink upb_bytesink; // Typedefs for function pointers to all of the virtual functions. -typedef struct _upb_fielddef (*upb_src_getdef_fptr)(upb_src *src); + +// upb_src. +typedef struct _upb_fielddef *(*upb_src_getdef_fptr)(upb_src *src); typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); +typedef bool (*upb_src_getstr_fptr)(upb_src *src, upb_string *str); typedef bool (*upb_src_skipval_fptr)(upb_src *src); typedef bool (*upb_src_startmsg_fptr)(upb_src *src); typedef bool (*upb_src_endmsg_fptr)(upb_src *src); +// upb_sink. typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); +// upb_bytesrc. typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src); typedef void (*upb_bytesrc_recycle_fptr)(upb_bytesrc *src, upb_string *str); typedef bool (*upb_bytesrc_append_fptr)( upb_bytesrc *src, upb_string *str, upb_strlen_t len); +// upb_bytesink. typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); // Vtables for the above interfaces. typedef struct { upb_src_getdef_fptr getdef; upb_src_getval_fptr getval; + upb_src_getstr_fptr getstr; upb_src_skipval_fptr skipval; upb_src_startmsg_fptr startmsg; upb_src_endmsg_fptr endmsg; @@ -86,6 +93,48 @@ INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { #endif } +// Implementation of virtual function dispatch. +INLINE struct _upb_fielddef *upb_src_getdef(upb_src *src) { + return src->vtbl->getdef(src); +} +INLINE bool upb_src_getval(upb_src *src, upb_valueptr val) { + return src->vtbl->getval(src, val); +} +INLINE bool upb_src_getstr(upb_src *src, upb_string *str) { + return src->vtbl->getstr(src, str); +} +INLINE bool upb_src_skipval(upb_src *src) { return src->vtbl->skipval(src); } +INLINE bool upb_src_startmsg(upb_src *src) { return src->vtbl->startmsg(src); } +INLINE bool upb_src_endmsg(upb_src *src) { return src->vtbl->endmsg(src); } + +// Implementation of type-specific upb_src accessors. If we encounter a upb_src +// where these can be implemented directly in a measurably more efficient way, +// we can make these part of the vtable also. +// +// For <64-bit types we have to use a temporary to accommodate baredecoder, +// which does not know the actual width of the type. +INLINE bool upb_src_getbool(upb_src *src, bool *_bool) { + upb_value val; + bool ret = upb_src_getval(src, upb_value_addrof(&val)); + *_bool = val._bool; + return ret; +} + +INLINE bool upb_src_getint32(upb_src *src, int32_t *i32) { + upb_value val; + bool ret = upb_src_getval(src, upb_value_addrof(&val)); + *i32 = val.int32; + return ret; +} + +// TODO. +bool upb_src_getint32(upb_src *src, int32_t *val); +bool upb_src_getint64(upb_src *src, int64_t *val); +bool upb_src_getuint32(upb_src *src, uint32_t *val); +bool upb_src_getuint64(upb_src *src, uint64_t *val); +bool upb_src_getfloat(upb_src *src, float *val); +bool upb_src_getdouble(upb_src *src, double *val); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index e3fdc49..52fc72b 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -536,6 +536,7 @@ static bool upb_decoder_skipgroup(upb_decoder *d) upb_src_vtable upb_decoder_src_vtbl = { (upb_src_getdef_fptr)&upb_decoder_getdef, (upb_src_getval_fptr)&upb_decoder_getval, + (upb_src_getstr_fptr)&upb_decoder_getstr, (upb_src_skipval_fptr)&upb_decoder_skipval, (upb_src_startmsg_fptr)&upb_decoder_startmsg, (upb_src_endmsg_fptr)&upb_decoder_endmsg, -- cgit v1.2.3 From db6c7387bc1df49deac41155a173e33017a75ed8 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 18:11:24 -0700 Subject: Incremental progress towards getting upb_def to bootstrap. --- Makefile | 3 +- core/upb.c | 9 ++--- core/upb.h | 7 +++- core/upb_def.c | 102 ++++++++++++++++++++++--------------------------- core/upb_def.h | 62 +++++++++++++++++++++--------- core/upb_stream_vtbl.h | 1 + core/upb_table.c | 2 +- tests/test_string.c | 3 ++ 8 files changed, 105 insertions(+), 84 deletions(-) diff --git a/Makefile b/Makefile index 568dcad..2b2a269 100644 --- a/Makefile +++ b/Makefile @@ -48,12 +48,13 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ descriptor/descriptor.c +$(SRC): perf-cppflags # Parts of core that are yet to be converted. OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. core/upb_def.o: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< + $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< core/upb_def.lo: core/upb_def.c $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC diff --git a/core/upb.c b/core/upb.c index a98512d..9ed5617 100644 --- a/core/upb.c +++ b/core/upb.c @@ -44,12 +44,11 @@ void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...) { if(upb_ok(status)) { // The first error is the most interesting. - status->str = upb_string_new(); - char *str = upb_string_getrwbuf(status->str, UPB_ERRORMSG_MAXLEN); status->code = code; + status->str = upb_string_tryrecycle(status->str); va_list args; va_start(args, msg); - vsnprintf(str, UPB_ERRORMSG_MAXLEN, msg, args); + upb_string_vprintf(status->str, msg, args); va_end(args); } } @@ -57,10 +56,10 @@ void upb_seterr(upb_status *status, enum upb_status_code code, void upb_copyerr(upb_status *to, upb_status *from) { to->code = from->code; - to->str = upb_string_getref(from->str); + if(from->str) to->str = upb_string_getref(from->str); } -void upb_reset(upb_status *status) { +void upb_status_reset(upb_status *status) { status->code = UPB_STATUS_OK; upb_string_unref(status->str); status->str = NULL; diff --git a/core/upb.h b/core/upb.h index 230e638..630d9e1 100644 --- a/core/upb.h +++ b/core/upb.h @@ -195,7 +195,12 @@ INLINE bool upb_ok(upb_status *status) { return status->code == UPB_STATUS_OK; } -void upb_reset(upb_status *status); +INLINE void upb_status_init(upb_status *status) { + status->code = UPB_STATUS_OK; + status->str = NULL; +} + +void upb_status_reset(upb_status *status); void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...); void upb_copyerr(upb_status *to, upb_status *from); diff --git a/core/upb_def.c b/core/upb_def.c index cc4fd80..0f48559 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -155,8 +155,9 @@ static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base, } else { open_defs[num_open_defs++] = m; } - for(int i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; + upb_msg_iter iter = upb_msg_begin(m); + for(; !upb_msg_done(iter); iter = upb_msg_next(m, iter)) { + upb_fielddef *f = upb_msg_iter_field(iter); upb_def *def = f->def; if(upb_issubmsg(f) && def->is_cyclic) { upb_msgdef *sub_m = upb_downcast_msgdef(def); @@ -230,16 +231,6 @@ static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { /* upb_enumdef ****************************************************************/ -typedef struct { - upb_strtable_entry e; - uint32_t value; -} ntoi_ent; - -typedef struct { - upb_inttable_entry e; - upb_string *string; -} iton_ent; - static void upb_enumdef_free(upb_enumdef *e) { upb_strtable_free(&e->ntoi); upb_inttable_free(&e->iton); @@ -271,8 +262,8 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); goto err; } - ntoi_ent ntoi_ent = {{name, 0}, number}; - iton_ent iton_ent = {{number, 0}, name}; + upb_ntoi_ent ntoi_ent = {{name, 0}, number}; + upb_iton_ent iton_ent = {{number, 0}, name}; upb_strtable_insert(&e->ntoi, &ntoi_ent.e); upb_inttable_insert(&e->iton, &iton_ent.e); // We don't unref "name" because we pass our ref to the iton entry of the @@ -291,11 +282,14 @@ static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) { upb_enumdef *e = malloc(sizeof(*e)); upb_def_init(&e->base, UPB_DEF_ENUM); - upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(iton_ent)); + upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); + upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: + e->base.fqname = upb_string_tryrecycle(e->base.fqname); + CHECKSRC(upb_src_getstr(src, e->base.fqname)); case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: CHECK(upb_addenum_val(src, e, status)); break; @@ -304,37 +298,25 @@ static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) break; } } + assert(e->base.fqname); upb_deflist_push(defs, UPB_UPCAST(e)); return true; +src_err: + upb_copyerr(status, upb_src_status(src)); err: upb_enumdef_free(e); return false; } -static void fill_iter(upb_enum_iter *iter, ntoi_ent *ent) { - iter->state = ent; - iter->name = ent->e.key; - iter->val = ent->value; -} - -void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e) { +upb_enum_iter upb_enum_begin(upb_enumdef *e) { // We could iterate over either table here; the choice is arbitrary. - ntoi_ent *ent = upb_strtable_begin(&e->ntoi); - iter->e = e; - fill_iter(iter, ent); + return upb_inttable_begin(&e->iton); } -void upb_enum_next(upb_enum_iter *iter) { - ntoi_ent *ent = iter->state; - assert(ent); - ent = upb_strtable_next(&iter->e->ntoi, &ent->e); - iter->state = ent; - if(ent) fill_iter(iter, ent); -} - -bool upb_enum_done(upb_enum_iter *iter) { - return iter->state == NULL; +upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) { + assert(iter); + return upb_inttable_next(&e->iton, &iter->e); } @@ -346,7 +328,7 @@ static void upb_fielddef_free(upb_fielddef *f) { static void upb_fielddef_uninit(upb_fielddef *f) { upb_string_unref(f->name); - if(upb_hasdef(f) && f->owned) { + if(f->owned) { upb_def_unref(f->def); } } @@ -354,6 +336,8 @@ static void upb_fielddef_uninit(upb_fielddef *f) { static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) { upb_fielddef *f = malloc(sizeof(*f)); + f->number = -1; + f->name = NULL; f->def = NULL; f->owned = false; upb_fielddef *parsed_f; @@ -388,6 +372,7 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) } CHECKSRC(upb_src_eof(src)); // TODO: verify that all required fields were present. + assert(f->number != -1 && f->name != NULL); assert((f->def != NULL) == upb_hasdef(f)); // Field was successfully read, add it as a field of the msgdef. @@ -461,9 +446,9 @@ err: static void upb_msgdef_free(upb_msgdef *m) { - for (upb_field_count_t i = 0; i < m->num_fields; i++) - upb_fielddef_uninit(&m->fields[i]); - free(m->fields); + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) + upb_fielddef_uninit(upb_msg_iter_field(i)); upb_strtable_free(&m->ntof); upb_inttable_free(&m->itof); upb_def_uninit(&m->base); @@ -479,6 +464,13 @@ static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) { upb_def_ref(def); } +upb_msg_iter upb_msg_begin(upb_msgdef *m) { + return upb_inttable_begin(&m->itof); +} + +upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) { + return upb_inttable_next(&m->itof, &iter->e); +} /* symtab internal ***********************************************************/ @@ -601,8 +593,9 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) } else { UPB_UPCAST(m)->search_depth = ++depth; bool cycle_found = false; - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); if(!upb_issubmsg(f)) continue; upb_def *sub_def = f->def; upb_msgdef *sub_m = upb_downcast_msgdef(sub_def); @@ -632,8 +625,9 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, // Type names are resolved relative to the message in which they appear. upb_string *base = e->e.key; - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); if(!upb_hasdef(f)) continue; // No resolving necessary. upb_string *name = upb_downcast_unresolveddef(f->def)->name; @@ -873,7 +867,6 @@ typedef struct { upb_wire_type_t wire_type; upb_strlen_t delimited_len; upb_strlen_t stack[UPB_MAX_NESTING], *top; - upb_string *str; } upb_baredecoder; static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) @@ -929,6 +922,12 @@ static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) return &d->field; } +static bool upb_baredecoder_getstr(upb_baredecoder *d, upb_string *str) { + upb_string_substr(str, d->input, d->offset, d->delimited_len); + d->offset += d->delimited_len; + return true; +} + static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) { switch(d->wire_type) { @@ -950,11 +949,6 @@ static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) return true; } -static bool upb_baredecoder_getstr(upb_baredecoder *d, upb_string *str) { - upb_string_substr(str, d->input, d->offset, d->delimited_len); - return true; -} - static bool upb_baredecoder_skipval(upb_baredecoder *d) { upb_value val; @@ -986,7 +980,6 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) { upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); - d->str = upb_string_new(); d->top = &d->stack[0]; upb_src_init(&d->src, &upb_baredecoder_src_vtbl); return d; @@ -995,7 +988,6 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) static void upb_baredecoder_free(upb_baredecoder *d) { upb_string_unref(d->input); - upb_string_unref(d->str); free(d); } @@ -1004,11 +996,8 @@ static upb_src *upb_baredecoder_src(upb_baredecoder *d) return &d->src; } -upb_symtab *upb_get_descriptor_symtab() +void upb_symtab_add_descriptorproto(upb_symtab *symtab) { - // TODO: implement sharing of symtabs, so that successive calls to this - // function will return the same symtab. - upb_symtab *symtab = upb_symtab_new(); // TODO: allow upb_strings to be static or on the stack. upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); upb_baredecoder *decoder = upb_baredecoder_new(descriptor); @@ -1017,5 +1006,4 @@ upb_symtab *upb_get_descriptor_symtab() assert(upb_ok(&status)); upb_baredecoder_free(decoder); upb_string_unref(descriptor); - return symtab; } diff --git a/core/upb_def.h b/core/upb_def.h index 5c8c11e..82d8520 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -135,11 +135,6 @@ INLINE bool upb_elem_ismm(upb_fielddef *f) { typedef struct _upb_msgdef { upb_def base; upb_atomic_refcount_t cycle_refcount; - size_t size; - upb_field_count_t num_fields; - uint32_t set_flags_bytes; - uint32_t num_required_fields; // Required fields have the lowest set bytemasks. - upb_fielddef *fields; // We have exclusive ownership of these. // Tables for looking up fields by number and name. upb_inttable itof; // int to field @@ -170,6 +165,21 @@ INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_string *name) { return e ? e->f : NULL; } +// Iteration over fields. The order is undefined. +// upb_msg_iter i; +// for(i = upb_msg_begin(m); !upb_msg_done(&i); i = upb_msg_next(&i)) { +// // ... +// } +typedef upb_itof_ent *upb_msg_iter; + +upb_msg_iter upb_msg_begin(upb_msgdef *m); +upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter); +INLINE bool upb_msg_done(upb_msg_iter iter) { return iter == NULL; } + +INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) { + return iter->f; +} + /* upb_enumdef ****************************************************************/ typedef struct _upb_enumdef { @@ -178,6 +188,16 @@ typedef struct _upb_enumdef { upb_inttable iton; } upb_enumdef; +typedef struct { + upb_strtable_entry e; + uint32_t value; +} upb_ntoi_ent; + +typedef struct { + upb_inttable_entry e; + upb_string *string; +} upb_iton_ent; + typedef int32_t upb_enumval_t; // Lookups from name to integer and vice-versa. @@ -186,18 +206,22 @@ upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); // Iteration over name/value pairs. The order is undefined. // upb_enum_iter i; -// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { +// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { // // ... // } -typedef struct { - upb_enumdef *e; - void *state; // Internal iteration state. - upb_string *name; - upb_enumval_t val; -} upb_enum_iter; -void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e); -void upb_enum_next(upb_enum_iter *iter); -bool upb_enum_done(upb_enum_iter *iter); +typedef upb_iton_ent *upb_enum_iter; + +upb_enum_iter upb_enum_begin(upb_enumdef *e); +upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter); +INLINE bool upb_enum_done(upb_enum_iter iter) { return iter == NULL; } + +INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) { + return iter->string; +} +INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) { + return iter->e.key; +} + /* upb_symtab *****************************************************************/ @@ -252,10 +276,10 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); // more useful? Maybe it should be an option. void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status); -// Returns a symtab that defines google.protobuf.DescriptorProto and all other -// types that are defined in descriptor.proto. This allows you to load other -// proto types. The caller owns a ref on the returned symtab. -upb_symtab *upb_get_descriptor_symtab(); +// Adds defs for google.protobuf.FileDescriptorSet and friends to this symtab. +// This is necessary for bootstrapping, since these are the upb_defs that +// specify other defs and allow them to be loaded. +void upb_symtab_add_descriptorproto(upb_symtab *s); /* upb_def casts **************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 52172d2..ba2670e 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -88,6 +88,7 @@ struct upb_bytesrc { INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { s->vtbl = vtbl; s->eof = false; + upb_status_init(&s->status); #ifndef DEBUG // TODO: initialize debug-mode checking. #endif diff --git a/core/upb_table.c b/core/upb_table.c index b91776c..b860204 100644 --- a/core/upb_table.c +++ b/core/upb_table.c @@ -179,7 +179,7 @@ static void strinsert(upb_strtable *t, upb_strtable_entry *e) memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ upb_strtable_entry *evictee_e = strent(t, evictee_bucket); while(1) { - assert(!upb_string_isnull(evictee_e->key)); + assert(evictee_e->key); assert(evictee_e->next != UPB_END_OF_CHAIN); if(evictee_e->next == bucket) { evictee_e->next = empty_bucket; diff --git a/tests/test_string.c b/tests/test_string.c index 5e6e2a9..5869b70 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -66,4 +66,7 @@ int main() { upb_string_unref(str); upb_string_unref(str2); + + // Unref of NULL is harmless. + upb_string_unref(NULL); } -- cgit v1.2.3 From ae0beee2854b977f472d48cd149b880b074b59c5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 19:37:47 -0700 Subject: Fixed upb_string error with strange vsnprintf() behavior. --- core/upb.c | 9 +++++++++ core/upb.h | 1 + core/upb_def.c | 49 +++++++++++++++++++++++++++++++++++++------------ core/upb_string.c | 13 +++++++++---- tests/test_string.c | 9 +++++++++ 5 files changed, 65 insertions(+), 16 deletions(-) diff --git a/core/upb.c b/core/upb.c index 9ed5617..d581bbe 100644 --- a/core/upb.c +++ b/core/upb.c @@ -64,3 +64,12 @@ void upb_status_reset(upb_status *status) { upb_string_unref(status->str); status->str = NULL; } + +void upb_printerr(upb_status *status) { + if(status->str) { + fprintf(stderr, "code: %d, msg: " UPB_STRFMT "\n", + status->code, UPB_STRARG(status->str)); + } else { + fprintf(stderr, "code: %d, no msg\n", status->code); + } +} diff --git a/core/upb.h b/core/upb.h index 630d9e1..13317bb 100644 --- a/core/upb.h +++ b/core/upb.h @@ -200,6 +200,7 @@ INLINE void upb_status_init(upb_status *status) { status->str = NULL; } +void upb_printerr(upb_status *status); void upb_status_reset(upb_status *status); void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...); diff --git a/core/upb_def.c b/core/upb_def.c index 0f48559..2b2916e 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -21,7 +21,7 @@ typedef struct { static void upb_deflist_init(upb_deflist *l) { l->size = 8; - l->defs = malloc(l->size); + l->defs = malloc(l->size * sizeof(void*)); l->len = 0; } @@ -34,7 +34,7 @@ static void upb_deflist_uninit(upb_deflist *l) { static void upb_deflist_push(upb_deflist *l, upb_def *d) { if(l->len == l->size) { l->size *= 2; - l->defs = realloc(l->defs, l->size); + l->defs = realloc(l->defs, l->size * sizeof(void*)); } l->defs[l->len++] = d; } @@ -238,6 +238,7 @@ static void upb_enumdef_free(upb_enumdef *e) { free(e); } +// google.protobuf.EnumValueDescriptorProto. static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) { int32_t number = -1; @@ -245,13 +246,13 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { - case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &number)); - break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: name = upb_string_tryrecycle(name); CHECKSRC(upb_src_getstr(src, name)); break; + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: + CHECKSRC(upb_src_getint32(src, &number)); + break; default: CHECKSRC(upb_src_skipval(src)); break; @@ -278,6 +279,7 @@ err: return false; } +// google.protobuf.EnumDescriptorProto. static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) { upb_enumdef *e = malloc(sizeof(*e)); @@ -290,8 +292,11 @@ static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: e->base.fqname = upb_string_tryrecycle(e->base.fqname); CHECKSRC(upb_src_getstr(src, e->base.fqname)); + break; case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: + CHECKSRC(upb_src_startmsg(src)); CHECK(upb_addenum_val(src, e, status)); + CHECKSRC(upb_src_endmsg(src)); break; default: upb_src_skipval(src); @@ -729,8 +734,10 @@ err: // We need to free all defs from "tmptab." upb_rwlock_unlock(&s->lock); for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; - e = upb_strtable_next(&tmptab, &e->e)) + e = upb_strtable_next(&tmptab, &e->e)) { + fprintf(stderr, "Unreffing def: '" UPB_STRFMT "'\n", UPB_STRARG(e->e.key)); upb_def_unref(e->def); + } upb_strtable_free(&tmptab); return false; } @@ -914,10 +921,12 @@ static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) key = upb_baredecoder_readv32(d); d->wire_type = key & 0x7; d->field.number = key >> 3; + fprintf(stderr, "field num: %d, wire_type: %d\n", d->field.number, d->wire_type); if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { // For delimited wire values we parse the length now, since we need it in // all cases. d->delimited_len = upb_baredecoder_readv32(d); + fprintf(stderr, "delimited size: %d\n", d->delimited_len); } return &d->field; } @@ -944,6 +953,7 @@ static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) *val.uint32 = upb_baredecoder_readf32(d); break; default: + *(char*)0 = 0; assert(false); } return true; @@ -951,19 +961,24 @@ static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) static bool upb_baredecoder_skipval(upb_baredecoder *d) { - upb_value val; - return upb_baredecoder_getval(d, upb_value_addrof(&val)); + if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { + d->offset += d->delimited_len; + return true; + } else { + upb_value val; + return upb_baredecoder_getval(d, upb_value_addrof(&val)); + } } static bool upb_baredecoder_startmsg(upb_baredecoder *d) { - *(d->top++) = d->offset + d->delimited_len; + *(++d->top) = d->offset + d->delimited_len; return true; } static bool upb_baredecoder_endmsg(upb_baredecoder *d) { - d->offset = *(--d->top); + d->offset = *(d->top--); return true; } @@ -980,7 +995,9 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) { upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); + d->offset = 0; d->top = &d->stack[0]; + *(d->top) = upb_string_len(d->input); upb_src_init(&d->src, &upb_baredecoder_src_vtbl); return d; } @@ -1001,9 +1018,17 @@ void upb_symtab_add_descriptorproto(upb_symtab *symtab) // TODO: allow upb_strings to be static or on the stack. upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); upb_baredecoder *decoder = upb_baredecoder_new(descriptor); - upb_status status; + upb_status status = UPB_STATUS_INIT; upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status); - assert(upb_ok(&status)); upb_baredecoder_free(decoder); upb_string_unref(descriptor); + + if(!upb_ok(&status)) { + // upb itself is corrupt. + upb_printerr(&status); + upb_symtab_unref(symtab); + abort(); + } + fprintf(stderr, "Claims to have succeeded\n"); + upb_printerr(&status); } diff --git a/core/upb_string.c b/core/upb_string.c index 2f487aa..3563c9e 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -87,6 +87,7 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { void upb_string_substr(upb_string *str, upb_string *target_str, upb_strlen_t start, upb_strlen_t len) { + if(str->ptr) *(char*)0 = 0; assert(str->ptr == NULL); str->src = upb_string_getref(target_str); str->ptr = upb_string_getrobuf(target_str) + start; @@ -103,11 +104,15 @@ void upb_string_vprintf(upb_string *str, const char *format, va_list args) { uint32_t true_size = vsnprintf(buf, size, format, args_copy); va_end(args_copy); - if (true_size > size) { - // Need to reallocate. + if (true_size >= size) { + // Need to reallocate. We reallocate even if the sizes were equal, + // because snprintf excludes the terminating NULL from its count. + // We don't care about the terminating NULL, but snprintf might + // bail out of printing even other characters if it doesn't have + // enough space to write the NULL also. str = upb_string_tryrecycle(str); - buf = upb_string_getrwbuf(str, true_size); - vsnprintf(buf, true_size, format, args); + buf = upb_string_getrwbuf(str, true_size + 1); + vsnprintf(buf, true_size + 1, format, args); } str->len = true_size; } diff --git a/tests/test_string.c b/tests/test_string.c index 5869b70..46f35b9 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -32,6 +32,7 @@ int main() { // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); + str = upb_string_tryrecycle(str); upb_string_substr(str, str2, 1, 2); assert(upb_string_len(str) == 2); assert(upb_string_len(str2) == 4); @@ -63,9 +64,17 @@ int main() { // Test printf. str = upb_string_tryrecycle(str); upb_string_printf(str, "Number: %d, String: %s", 5, "YO!"); + assert(upb_streqlc(str, "Number: 5, String: YO!")); + + // Test asprintf + upb_string *str3 = upb_string_asprintf("Yo %s: " UPB_STRFMT "\n", + "Josh", UPB_STRARG(str)); + const char expected[] = "Yo Josh: Number: 5, String: YO!\n"; + assert(upb_streqlc(str3, expected)); upb_string_unref(str); upb_string_unref(str2); + upb_string_unref(str3); // Unref of NULL is harmless. upb_string_unref(NULL); -- cgit v1.2.3 From c7a95061a7c02ffeebd71eeb56bf19fc1c1797dd Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 20:13:06 -0700 Subject: Successfully bootstraps!! --- core/upb.c | 2 +- core/upb.h | 2 +- core/upb_def.c | 27 ++++++++++++++------------- tests/test_def.c | 24 ++++++++++++++++++++++++ 4 files changed, 40 insertions(+), 15 deletions(-) create mode 100644 tests/test_def.c diff --git a/core/upb.c b/core/upb.c index d581bbe..c396323 100644 --- a/core/upb.c +++ b/core/upb.c @@ -59,7 +59,7 @@ void upb_copyerr(upb_status *to, upb_status *from) if(from->str) to->str = upb_string_getref(from->str); } -void upb_status_reset(upb_status *status) { +void upb_clearerr(upb_status *status) { status->code = UPB_STATUS_OK; upb_string_unref(status->str); status->str = NULL; diff --git a/core/upb.h b/core/upb.h index 13317bb..b605fd9 100644 --- a/core/upb.h +++ b/core/upb.h @@ -201,7 +201,7 @@ INLINE void upb_status_init(upb_status *status) { } void upb_printerr(upb_status *status); -void upb_status_reset(upb_status *status); +void upb_clearerr(upb_status *status); void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...); void upb_copyerr(upb_status *to, upb_status *from); diff --git a/core/upb_def.c b/core/upb_def.c index 2b2916e..b9402c5 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -211,7 +211,9 @@ static void upb_def_uninit(upb_def *def) { typedef struct _upb_unresolveddef { upb_def base; - // The target type name. This may or may not be fully qualified. + // The target type name. This may or may not be fully qualified. It is + // tempting to want to use base.fqname for this, but that will be qualified + // which is inappropriate for a name we still have to resolve. upb_string *name; } upb_unresolveddef; @@ -224,6 +226,7 @@ static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { } static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { + upb_string_unref(def->name); upb_def_uninit(&def->base); free(def); } @@ -232,6 +235,10 @@ static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { /* upb_enumdef ****************************************************************/ static void upb_enumdef_free(upb_enumdef *e) { + upb_enum_iter i; + for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { + upb_string_unref(upb_enum_iter_name(i)); + } upb_strtable_free(&e->ntoi); upb_inttable_free(&e->iton); upb_def_uninit(&e->base); @@ -328,14 +335,11 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) { /* upb_fielddef ***************************************************************/ static void upb_fielddef_free(upb_fielddef *f) { - free(f); -} - -static void upb_fielddef_uninit(upb_fielddef *f) { upb_string_unref(f->name); if(f->owned) { upb_def_unref(f->def); } + free(f); } static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) @@ -453,7 +457,7 @@ static void upb_msgdef_free(upb_msgdef *m) { upb_msg_iter i; for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) - upb_fielddef_uninit(upb_msg_iter_field(i)); + upb_fielddef_free(upb_msg_iter_field(i)); upb_strtable_free(&m->ntof); upb_inttable_free(&m->itof); upb_def_uninit(&m->base); @@ -487,7 +491,7 @@ static bool upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM: package = upb_string_tryrecycle(package); CHECKSRC(upb_src_getstr(src, package)); break; @@ -589,6 +593,7 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) "in a cycle of length %d, which exceeds the maximum type " "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len, UPB_MAX_TYPE_CYCLE_LEN); + return false; } return true; } else if(UPB_UPCAST(m)->search_depth > 0) { @@ -664,7 +669,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, upb_msgdef *m = upb_dyncast_msgdef(e->def); if(!m) continue; // The findcycles() call will decrement the external refcount of the - if(!upb_symtab_findcycles(m, 0, status)) return false; + upb_symtab_findcycles(m, 0, status); upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; upb_cycle_ref_or_unref(m, NULL, open_defs, 0, true); } @@ -735,7 +740,6 @@ err: upb_rwlock_unlock(&s->lock); for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; e = upb_strtable_next(&tmptab, &e->e)) { - fprintf(stderr, "Unreffing def: '" UPB_STRFMT "'\n", UPB_STRARG(e->e.key)); upb_def_unref(e->def); } upb_strtable_free(&tmptab); @@ -921,12 +925,10 @@ static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) key = upb_baredecoder_readv32(d); d->wire_type = key & 0x7; d->field.number = key >> 3; - fprintf(stderr, "field num: %d, wire_type: %d\n", d->field.number, d->wire_type); if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { // For delimited wire values we parse the length now, since we need it in // all cases. d->delimited_len = upb_baredecoder_readv32(d); - fprintf(stderr, "delimited size: %d\n", d->delimited_len); } return &d->field; } @@ -1026,9 +1028,8 @@ void upb_symtab_add_descriptorproto(upb_symtab *symtab) if(!upb_ok(&status)) { // upb itself is corrupt. upb_printerr(&status); + upb_clearerr(&status); upb_symtab_unref(symtab); abort(); } - fprintf(stderr, "Claims to have succeeded\n"); - upb_printerr(&status); } diff --git a/tests/test_def.c b/tests/test_def.c new file mode 100644 index 0000000..e6f95d7 --- /dev/null +++ b/tests/test_def.c @@ -0,0 +1,24 @@ + +#undef NDEBUG /* ensure tests always assert. */ +#include "upb_def.h" +#include + +int main() { + upb_symtab *s = upb_symtab_new(); + upb_symtab_add_descriptorproto(s); + + int count; + upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY); + for (int i = 0; i < count; i++) { + upb_def_unref(defs[i]); + } + free(defs); + + upb_string *str = upb_strdupc("google.protobuf.FileDescriptorSet"); + upb_def *fds = upb_symtab_lookup(s, str); + assert(fds != NULL); + assert(upb_dyncast_msgdef(fds) != NULL); + upb_def_unref(fds); + upb_string_unref(str); + upb_symtab_unref(s); +} -- cgit v1.2.3 From fcfc37e7d41f87bc9ff5ecfb64e0aebb3457c633 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 11 Jul 2010 16:58:44 -0700 Subject: Reduce decoder memory usage. The "field" entry was only being used to determine whether we were inside a group, but the "end_offset" member contains enough information to tell us that. --- Makefile | 2 +- stream/upb_decoder.c | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 2b2a269..c37df72 100644 --- a/Makefile +++ b/Makefile @@ -54,7 +54,7 @@ OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. core/upb_def.o: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< + $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< core/upb_def.lo: core/upb_def.c $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 52fc72b..c06660f 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -29,8 +29,7 @@ static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } // upb_decoder_frame is one frame of that stack. typedef struct { upb_msgdef *msgdef; - upb_fielddef *field; - upb_strlen_t end_offset; // For groups, -1. + upb_strlen_t end_offset; // For groups, UPB_GROUP_END_OFFSET. } upb_decoder_frame; struct upb_decoder { @@ -57,9 +56,6 @@ struct upb_decoder { // The overall stream offset of the beginning of "buf". uint32_t buf_stream_offset; - // Fielddef for the key we just read. - upb_fielddef *field; - // Wire type of the key we just read. upb_wire_type_t wire_type; @@ -68,6 +64,9 @@ struct upb_decoder { upb_strlen_t packed_end_offset; + // Fielddef for the key we just read. + upb_fielddef *field; + // We keep a stack of messages we have recursed into. upb_decoder_frame *top, *limit, stack[UPB_MAX_NESTING]; }; @@ -455,7 +454,6 @@ bool upb_decoder_getstr(upb_decoder *d, upb_string *str) { static bool upb_decoder_skipgroup(upb_decoder *d); bool upb_decoder_startmsg(upb_decoder *d) { - d->top->field = d->field; if(++d->top >= d->limit) { upb_seterr(&d->src.status, UPB_ERROR_MAX_NESTING_EXCEEDED, "Nesting exceeded maximum (%d levels)\n", @@ -476,7 +474,7 @@ bool upb_decoder_endmsg(upb_decoder *d) { if(d->top > d->stack) { --d->top; if(!d->src.eof) { - if(d->top->field->type == UPB_TYPE(GROUP)) + if(d->top->end_offset == UPB_GROUP_END_OFFSET) upb_decoder_skipgroup(d); else upb_decoder_skipbytes(d, d->top->end_offset - upb_decoder_offset(d)); -- cgit v1.2.3 From 7a6a702792e769366a8852fc90dbea9cfc9e01c0 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 11 Jul 2010 18:53:27 -0700 Subject: Allow static upb_strings. This can allow strings to reference static data, and reduced the memory footprint of test_def by about 10% (3k). --- core/upb_def.c | 4 +--- core/upb_string.c | 12 ++++++++--- core/upb_string.h | 57 +++++++++++++++++++++++++++++++++++++++++++++---- descriptor/descriptor.c | 7 ++++-- descriptor/descriptor.h | 5 +++-- tests/test_string.c | 33 +++++++++++++++++++++++++++- 6 files changed, 103 insertions(+), 15 deletions(-) diff --git a/core/upb_def.c b/core/upb_def.c index b9402c5..c0d72db 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -1018,12 +1018,10 @@ static upb_src *upb_baredecoder_src(upb_baredecoder *d) void upb_symtab_add_descriptorproto(upb_symtab *symtab) { // TODO: allow upb_strings to be static or on the stack. - upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); - upb_baredecoder *decoder = upb_baredecoder_new(descriptor); + upb_baredecoder *decoder = upb_baredecoder_new(&descriptor_str); upb_status status = UPB_STATUS_INIT; upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status); upb_baredecoder_free(decoder); - upb_string_unref(descriptor); if(!upb_ok(&status)) { // upb itself is corrupt. diff --git a/core/upb_string.c b/core/upb_string.c index 3563c9e..ca3c669 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -61,12 +61,12 @@ void _upb_string_free(upb_string *str) { } upb_string *upb_string_tryrecycle(upb_string *str) { - if(str == NULL || upb_atomic_read(&str->refcount) > 1) { - return upb_string_new(); - } else { + if(str && upb_atomic_read(&str->refcount) == 1) { str->ptr = NULL; upb_string_release(str); return str; + } else { + return upb_string_new(); } } @@ -125,3 +125,9 @@ upb_string *upb_string_asprintf(const char *format, ...) { va_end(args); return str; } + +upb_string *upb_strdup(upb_string *s) { + upb_string *str = upb_string_new(); + upb_strcpy(str, s); + return str; +} diff --git a/core/upb_string.h b/core/upb_string.h index 5cc0eaf..65ba404 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -63,6 +63,17 @@ struct _upb_string { struct _upb_string *src; }; +// Internal-only initializer for upb_string instances. +#ifdef UPB_HAVE_MSIZE +#define _UPB_STRING_INIT(str, len, refcount) {(char*)str, NULL, len, {refcount}, NULL} +#else +#define _UPB_STRING_INIT(str, len, refcount) {(char*)str, NULL, len, 0, {refcount}, NULL} +#endif + +// Special pseudo-refcounts for static/stack-allocated strings, respectively. +#define _UPB_STRING_REFCOUNT_STATIC -1 +#define _UPB_STRING_REFCOUNT_STACK -2 + // Returns a newly-created, empty, non-finalized string. When the string is no // longer needed, it should be unref'd, never freed directly. upb_string *upb_string_new(); @@ -72,15 +83,21 @@ void _upb_string_free(upb_string *str); // Releases a ref on the given string, which may free the memory. "str" // can be NULL, in which case this is a no-op. INLINE void upb_string_unref(upb_string *str) { - if (str && upb_atomic_unref(&str->refcount)) _upb_string_free(str); + if (str && upb_atomic_read(&str->refcount) > 0 && + upb_atomic_unref(&str->refcount)) { + _upb_string_free(str); + } } +upb_string *upb_strdup(upb_string *s); // Forward-declare. + // Returns a string with the same contents as "str". The caller owns a ref on // the returned string, which may or may not be the same object as "str. INLINE upb_string *upb_string_getref(upb_string *str) { - // If/when we support stack-allocated strings, this will have to allocate - // a new string if the given string is on the stack. - upb_atomic_ref(&str->refcount); + int refcount = upb_atomic_read(&str->refcount); + if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str); + // We don't ref the special <0 refcount for static strings. + if (refcount > 0) upb_atomic_ref(&str->refcount); return str; } @@ -151,6 +168,38 @@ void upb_string_substr(upb_string *str, upb_string *target_str, #define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str) #define UPB_STRFMT "%.*s" +// Macros for constructing upb_string objects statically or on the stack. These +// can be used like: +// +// upb_string static_str = UPB_STATIC_STRING("Foo"); +// +// int main() { +// upb_string stack_str = UPB_STACK_STRING("Foo"); +// // Now: +// // upb_streql(&static_str, &stack_str) == true +// // upb_streql(&static_str, UPB_STRLIT("Foo")) == true +// } +// +// You can also use UPB_STACK_STRING or UPB_STATIC_STRING with character arrays, +// but you must not change the underlying data once you've passed the string on: +// +// void foo() { +// char data[] = "ABC123"; +// upb_string stack_str = UPB_STACK_STR(data); +// bar(&stack_str); +// data[0] = "B"; // NOT ALLOWED!! +// } +// +// TODO: should the stack business just be like attach/detach? The latter seems +// more flexible, though it does require a stack allocation. Maybe put this off +// until there is a clear use case. +#define UPB_STATIC_STRING(str) \ + _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STATIC) +#define UPB_STATIC_STRING_LEN(str, len) \ + _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STATIC) +#define UPB_STACK_STRING(str) _UPB_STRING_INIT(str, _UPB_STRING_REFCOUNT_STACK) +#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) + /* upb_string library functions ***********************************************/ // Named like their counterparts, these are all safe against buffer diff --git a/descriptor/descriptor.c b/descriptor/descriptor.c index cd50a16..ee6b25b 100644 --- a/descriptor/descriptor.c +++ b/descriptor/descriptor.c @@ -1,4 +1,6 @@ -unsigned char descriptor_pb[] = { +#include "descriptor.h" + +static unsigned char descriptor_pb[] = { 0x0a, 0x9b, 0x1b, 0x0a, 0x1b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x2f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0f, 0x67, 0x6f, @@ -291,4 +293,5 @@ unsigned char descriptor_pb[] = { 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x48, 0x01 }; -unsigned int descriptor_pb_len = 3486; +static const unsigned int descriptor_pb_len = 3486; +upb_string descriptor_str = UPB_STATIC_STRING(descriptor_pb); diff --git a/descriptor/descriptor.h b/descriptor/descriptor.h index b598a9a..f6d3ca3 100644 --- a/descriptor/descriptor.h +++ b/descriptor/descriptor.h @@ -11,12 +11,13 @@ #ifndef UPB_DESCRIPTOR_H_ #define UPB_DESCRIPTOR_H_ +#include "upb_string.h" + #ifdef __cplusplus extern "C" { #endif -extern unsigned char descriptor_pb[]; -extern unsigned int descriptor_pb_len; +extern upb_string descriptor_str; #ifdef __cplusplus } /* extern "C" */ diff --git a/tests/test_string.c b/tests/test_string.c index 46f35b9..7c9ed02 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -3,8 +3,33 @@ #include "upb_string.h" char static_str[] = "Static string."; +upb_string static_upbstr = UPB_STATIC_STRING(static_str); -int main() { +static void test_static() { + // Static string is initialized appropriately. + assert(upb_streql(&static_upbstr, UPB_STRLIT("Static string."))); + + // Taking a ref on a static string returns the same string, and repeated + // refs don't get the string in a confused state. + assert(upb_string_getref(&static_upbstr) == &static_upbstr); + assert(upb_string_getref(&static_upbstr) == &static_upbstr); + assert(upb_string_getref(&static_upbstr) == &static_upbstr); + + // Unreffing a static string does nothing (is not harmful). + upb_string_unref(&static_upbstr); + upb_string_unref(&static_upbstr); + upb_string_unref(&static_upbstr); + upb_string_unref(&static_upbstr); + upb_string_unref(&static_upbstr); + + // Recycling a static string returns a new string (that can be modified). + upb_string *str = upb_string_tryrecycle(&static_upbstr); + assert(str != &static_upbstr); + + upb_string_unref(str); +} + +static void test_dynamic() { upb_string *str = upb_string_new(); assert(str != NULL); upb_string_unref(str); @@ -29,6 +54,7 @@ int main() { const char *robuf2 = upb_string_getrobuf(str); assert(robuf2 == robuf); assert(upb_streqlc(str, "XX")); + assert(upb_streql(str, UPB_STRLIT("XX"))); // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); @@ -79,3 +105,8 @@ int main() { // Unref of NULL is harmless. upb_string_unref(NULL); } + +int main() { + test_static(); + test_dynamic(); +} -- cgit v1.2.3 From c53921d2fef69647cbeec3709962a17ac32fa119 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 12 Jul 2010 01:03:18 -0700 Subject: Implemented upb_streamdata(). upb_streamdata() reads data from a upb_src until EOF or error, passing all the data to a upb_sink. --- core/upb_stream.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 core/upb_stream.c diff --git a/core/upb_stream.c b/core/upb_stream.c new file mode 100644 index 0000000..e0863b8 --- /dev/null +++ b/core/upb_stream.c @@ -0,0 +1,44 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_stream.h" + +#include "upb_def.h" + +#define CHECKSRC(x) if(!x) goto src_err +#define CHECKSINK(x) if(!x) goto sink_err + +void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status) { + upb_fielddef *f; + upb_string *str = NULL; + while((f = upb_src_getdef(src)) != NULL) { + CHECKSINK(upb_sink_putdef(sink, f)); + if(f->type == UPB_TYPE(GROUP) || f->type == UPB_TYPE(MESSAGE)) { + // We always recurse into submessages, but the putdef above already told + // the sink that. + } else if(f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES)) { + str = upb_string_tryrecycle(str); + CHECKSRC(upb_src_getstr(src, str)); + CHECKSINK(upb_sink_putstr(sink, str)); + } else { + // Primitive type. + upb_value val; + CHECKSRC(upb_src_getval(src, upb_value_addrof(&val))); + CHECKSINK(upb_sink_putval(sink, val)); + } + } + // If we're not EOF now, the loop terminated due to an error. + CHECKSRC(upb_src_eof(src)); + return; + +src_err: + upb_copyerr(status, upb_src_status(src)); + return; + +sink_err: + upb_copyerr(status, upb_sink_status(sink)); + return; +} -- cgit v1.2.3 From 57ad204ceaef0943bba11bdc5d4d98f2d179a22f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 12 Jul 2010 01:04:14 -0700 Subject: Implemented upb_stdio (upb_bytesrc/upb_bytesink). --- stream/upb_stdio.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ stream/upb_stdio.h | 42 +++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 stream/upb_stdio.c create mode 100644 stream/upb_stdio.h diff --git a/stream/upb_stdio.c b/stream/upb_stdio.c new file mode 100644 index 0000000..7cbca91 --- /dev/null +++ b/stream/upb_stdio.c @@ -0,0 +1,61 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_stdio.h" + +// We can make this configurable if necessary. +#define BLOCK_SIZE 4096 + +struct upb_stdio { + upb_bytesrc bytesrc; + upb_bytesink bytesink; + FILE *file; +} + +static bool upb_stdio_read(upb_stdio *stdio, upb_string *str, + int offset, int bytes_to_read) { + char *buf = upb_string_getrwbuf(offset + bytes_to_read) + offset; + size_t read = fread(buf, 1, bytes_to_read, stdio->file); + if(read < bytes_to_read) { + // Error or EOF. + stdio->bytesrc.eof = feof(stdio->file); + if(ferror(stdio->file)) { + upb_seterr(&stdio->bytesrc.status, UPB_STATUS_ERROR, + "Error reading from stdio stream."); + return false; + } + // Resize to actual read size. + upb_string_getrwbuf(str, offset + read); + } + return true; +} + +bool upb_stdio_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen) { + // We ignore "minlen" since the stdio interfaces always return a full read + // unless they are at EOF. + (void)minlen; + return upb_stdio_read((upb_stdio*)src, str, 0, BLOCK_SIZE); +} + +bool upb_stdio_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len) { + return upb_stdio_read((upb_stdio*)src, str, upb_string_len(str), len); +} + +int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { + upb_stdio *stdio = (upb_stdio*)sink - offsetof(upb_stdio, bytesink); + upb_strlen_t len = upb_string_len(str); + size_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); + if(written < len) { + // Error or EOF. + stdio->bytesink.eof = feof(stdio->file); + if(ferror(stdio->file)) { + upb_seterr(&stdio->bytesink.status, UPB_STATUS_ERROR, + "Error writing to stdio stream."); + return 0; + } + } + return written; +} diff --git a/stream/upb_stdio.h b/stream/upb_stdio.h new file mode 100644 index 0000000..3c29fcb --- /dev/null +++ b/stream/upb_stdio.h @@ -0,0 +1,42 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * This file provides upb_bytesrc and upb_bytesink implementations for + * ANSI C stdio. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#include +#include "upb_stream.h" + +#ifndef UPB_STDIO_H_ +#define UPB_STDIO_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct upb_stdio; +typedef struct upb_stdio upb_stdio; + +// Creation/deletion. +upb_stdio_ *upb_stdio__new(); +void upb_stdio_free(upb_stdio *stdio); + +// Reset/initialize the object for use. The src or sink will call +// fread()/fwrite()/etc. on the given FILE*. +void upb_stdio_reset(upb_stdio *stdio, FILE* file); + +// Gets a bytesrc or bytesink for the given stdio. The returned pointer is +// invalidated by upb_stdio_reset above. It is perfectly valid to get both +// a bytesrc and a bytesink for the same stdio if the FILE* is open for reading +// and writing. +upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio); +upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif -- cgit v1.2.3 From 8e138c4687bf021d40be5134228940dbfe2fbd45 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 12 Jul 2010 10:21:53 -0700 Subject: Added more comments for upb_src interface. --- Makefile | 1 + core/upb_stream.c | 4 ++-- core/upb_stream.h | 51 ++++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 45 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index c37df72..166ca3a 100644 --- a/Makefile +++ b/Makefile @@ -47,6 +47,7 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ + core/upb_stream.c \ descriptor/descriptor.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. diff --git a/core/upb_stream.c b/core/upb_stream.c index e0863b8..bda11de 100644 --- a/core/upb_stream.c +++ b/core/upb_stream.c @@ -16,10 +16,10 @@ void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status) { upb_string *str = NULL; while((f = upb_src_getdef(src)) != NULL) { CHECKSINK(upb_sink_putdef(sink, f)); - if(f->type == UPB_TYPE(GROUP) || f->type == UPB_TYPE(MESSAGE)) { + if(upb_issubmsg(f)) { // We always recurse into submessages, but the putdef above already told // the sink that. - } else if(f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES)) { + } else if(upb_isstring(f)) { str = upb_string_tryrecycle(str); CHECKSRC(upb_src_getstr(src, str)); CHECKSINK(upb_sink_putstr(sink, str)); diff --git a/core/upb_stream.h b/core/upb_stream.h index e7b4074..9147e45 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -35,29 +35,58 @@ struct _upb_fielddef; /* upb_src ********************************************************************/ +// A upb_src is a pull parser for protobuf data. Sample usage: +// +// #define CHECK(x) if(!x) goto err; +// +// bool parse_msg(upb_src *src, int indent) { +// upb_fielddef *f; +// while ((f = upb_src_getdef(src)) != NULL) { +// for (int i = 0; i < indent; i++) putchar(' '); +// printf("Parsed field; name=" UPB_STRFMT ", num=%d", +// UPB_STRARG(d->name), d->number); +// if (upb_issubmsg(f)) { +// CHECK(upb_src_startmsg(src)); +// CHECK(parse_msg(src, indent + 2)); +// CHECK(upb_src_endmsg(src)); +// } else { +// CHECK(upb_src_skipval(src)); +// } +// } +// // We should be EOF now, otherwise there was an error. +// CHECK(upb_src_eof(src)); +// return true; +// +// err: +// return false; +// } +// // TODO: decide how to handle unknown fields. // Retrieves the fielddef for the next field in the stream. Returns NULL on -// error or end-of-stream. +// error or end-of-stream. End of stream can simply mean end of submessage. struct _upb_fielddef *upb_src_getdef(upb_src *src); -// Retrieves and stores the next value in "val". For string types "val" must -// be a newly-recycled string. Returns false on error. +// Retrieves and stores the next value in "val". upb_src_getval() is for all +// numeric types and upb_src_getstr() is for strings. For string types "str" +// must be a newly-recycled string. Returns false on error. bool upb_src_getval(upb_src *src, upb_valueptr val); bool upb_src_getstr(upb_src *src, upb_string *val); // Like upb_src_getval() but skips the value. bool upb_src_skipval(upb_src *src); -// Descends into a submessage. May only be called after a def has been -// returned that indicates a submessage. +// Descends into a submessage. May only be called when upb_issubmsg(f) is true +// for an f = upb_src_getdef(src) that was just parsed. bool upb_src_startmsg(upb_src *src); // Stops reading a submessage. May be called before the stream is EOF, in // which case the rest of the submessage is skipped. bool upb_src_endmsg(upb_src *src); -// Returns the current error/eof status for the stream. +// Returns the current error/eof status for the stream. If a stream is eof +// but we are inside a submessage, calling upb_src_endmsg(src) will reset +// the eof marker. INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; } INLINE bool upb_src_eof(upb_src *src) { return src->eof; } @@ -80,9 +109,7 @@ bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); // Puts the given value into the stream. bool upb_sink_putval(upb_sink *sink, upb_value val); - -// Starts a submessage. (needed? the def tells us we're starting a submsg.) -bool upb_sink_startmsg(upb_sink *sink); +bool upb_sink_putstr(upb_sink *sink, upb_string *str); // Ends a submessage. bool upb_sink_endmsg(upb_sink *sink); @@ -114,6 +141,12 @@ int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); // Returns the current error status for the stream. upb_status *upb_bytesink_status(upb_bytesink *sink); +/* Utility functions **********************************************************/ + +// Streams data from src to sink until EOF or error. +void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status); + + #ifdef __cplusplus } /* extern "C" */ #endif -- cgit v1.2.3 From 5b5e26144ddcfcbbee8b6df843a7d169d4cf1eea Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 13 Jul 2010 20:44:27 -0700 Subject: Work on upb_textprinter. --- stream/upb_text.c | 54 +++++++++++++----------------------------------------- 1 file changed, 13 insertions(+), 41 deletions(-) diff --git a/stream/upb_text.c b/stream/upb_text.c index 8662269..4a25ecd 100644 --- a/stream/upb_text.c +++ b/stream/upb_text.c @@ -9,9 +9,9 @@ #include "upb_text.h" #include "upb_data.h" -void upb_text_printval(upb_field_type_t type, upb_value val, FILE *file) -{ -#define CASE(fmtstr, member) fprintf(file, fmtstr, val.member); break; +bool upb_textprinter_putval(upb_textprinter *p, upb_value val) { + upb_string *p->str = upb_string_tryrecycle(p->str); +#define CASE(fmtstr, member) upb_string_printf(p->str, fmtstr, val.member); break; switch(type) { case UPB_TYPE(DOUBLE): CASE("%0.f", _double); @@ -34,18 +34,22 @@ void upb_text_printval(upb_field_type_t type, upb_value val, FILE *file) CASE("%" PRIu32, uint32); case UPB_TYPE(BOOL): CASE("%hhu", _bool); - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): - /* TODO: escaping. */ - fprintf(file, "\"" UPB_STRFMT "\"", UPB_STRARG(val.str)); break; } + return upb_bytesink_put(p->str); +} + +bool upb_textprinter_putstr(upb_textprinter *p, upb_string *str) { + upb_bytesink_put(UPB_STRLIT("\"")); + // TODO: escaping. + upb_bytesink_put(str); + upb_bytesink_put(UPB_STRLIT("\"")); } static void print_indent(upb_text_printer *p, FILE *stream) { if(!p->single_line) for(int i = 0; i < p->indent_depth; i++) - fprintf(stream, " "); + upb_bytesink_put(UPB_STRLIT(" ")); } void upb_text_printfield(upb_text_printer *p, upb_strptr name, @@ -61,7 +65,7 @@ void upb_text_printfield(upb_text_printer *p, upb_strptr name, fputc('\n', stream); } -void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, FILE *stream) +void upb_textprinter_startmsg(upb_textprinter *p) { print_indent(p, stream); fprintf(stream, UPB_STRFMT " {", UPB_STRARG(submsg_type)); @@ -76,28 +80,6 @@ void upb_text_pop(upb_text_printer *p, FILE *stream) fprintf(stream, "}\n"); } -static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, - FILE *stream); - -static void printmsg(upb_text_printer *printer, upb_msg *msg, upb_msgdef *md, - FILE *stream) -{ - for(upb_field_count_t i = 0; i < md->num_fields; i++) { - upb_fielddef *f = &md->fields[i]; - if(!upb_msg_has(msg, f)) continue; - upb_value v = upb_msg_get(msg, f); - if(upb_isarray(f)) { - upb_arrayptr arr = v.arr; - for(uint32_t j = 0; j < upb_array_len(arr); j++) { - upb_value elem = upb_array_get(arr, f, j); - printval(printer, elem, f, stream); - } - } else { - printval(printer, v, f, stream); - } - } -} - static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, FILE *stream) { @@ -109,13 +91,3 @@ static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, upb_text_printfield(printer, f->name, f->type, v, stream); } } - - -void upb_msg_print(upb_msg *msg, upb_msgdef *md, bool single_line, - FILE *stream) -{ - upb_text_printer printer; - upb_text_printer_init(&printer, single_line); - printmsg(&printer, msg, md, stream); -} - -- cgit v1.2.3 From 87b2c69c15716b96a294f5918878fb8b7b9a0b40 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 12:56:04 -0700 Subject: Fleshed out upb_stdio and upb_textprinter. test_decoder now compiles and links! But it doesn't work yet. --- Makefile | 7 +-- core/upb_stream.h | 5 +- core/upb_stream_vtbl.h | 110 +++++++++++++++++++++++++++++++++++---- stream/upb_decoder.c | 4 ++ stream/upb_decoder.h | 2 +- stream/upb_stdio.c | 37 +++++++++++-- stream/upb_stdio.h | 2 +- stream/upb_text.c | 93 --------------------------------- stream/upb_text.h | 36 ------------- stream/upb_textprinter.c | 131 +++++++++++++++++++++++++++++++++++++++++++++++ stream/upb_textprinter.h | 30 +++++++++++ 11 files changed, 310 insertions(+), 147 deletions(-) delete mode 100644 stream/upb_text.c delete mode 100644 stream/upb_text.h create mode 100644 stream/upb_textprinter.c create mode 100644 stream/upb_textprinter.h diff --git a/Makefile b/Makefile index 166ca3a..10ef96d 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ rwildcard=$(strip $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2)$(filter $ CC=gcc CXX=g++ CFLAGS=-std=c99 -INCLUDE=-Idescriptor -Icore -Itests -I. +INCLUDE=-Idescriptor -Icore -Itests -Istream -I. CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) LDLIBS=-lpthread @@ -47,7 +47,7 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ - core/upb_stream.c \ + core/upb_stream.c stream/upb_stdio.c stream/upb_textprinter.c \ descriptor/descriptor.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. @@ -90,7 +90,8 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ - tests/test_def + tests/test_def \ + tests/test_decoder tests: $(TESTS) OTHER_TESTS=tests/tests \ diff --git a/core/upb_stream.h b/core/upb_stream.h index 9147e45..b7400c5 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -111,7 +111,10 @@ bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); bool upb_sink_putval(upb_sink *sink, upb_value val); bool upb_sink_putstr(upb_sink *sink, upb_string *str); -// Ends a submessage. +// Starts/ends a submessage. upb_sink_startmsg may seem redundant, but a +// client could have a submessage already serialized, and therefore put it +// as a string instead of its individual elements. +bool upb_sink_startmsg(upb_sink *sink); bool upb_sink_endmsg(upb_sink *sink); // Returns the current error status for the stream. diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index ba2670e..96f6cfe 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -5,6 +5,21 @@ * interfaces. Only components that are implementing these interfaces need * to worry about this file. * + * This is tedious; this is the place in upb where I most wish I had a C++ + * feature. In C++ the compiler would generate this all for me. If there's + * any consolation, it's that I have a bit of flexibility you don't have in + * C++: I could, with preprocessor magic alone "de-virtualize" this interface + * for a particular source file. Say I had a C file that called a upb_src, + * but didn't want to pay the virtual function overhead. I could define: + * + * #define upb_src_getdef(src) upb_decoder_getdef((upb_decoder*)src) + * #define upb_src_stargmsg(src) upb_decoder_startmsg(upb_decoder*)src) + * // etc. + * + * The source file is compatible with the regular upb_src interface, but here + * we bind it to a particular upb_src (upb_decoder), which could lead to + * improved performance at a loss of flexibility for this one upb_src client. + * * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. */ @@ -39,12 +54,13 @@ typedef bool (*upb_src_endmsg_fptr)(upb_src *src); // upb_sink. typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); +typedef bool (*upb_sink_putstr_fptr)(upb_sink *sink, upb_string *str); typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); // upb_bytesrc. -typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src); -typedef void (*upb_bytesrc_recycle_fptr)(upb_bytesrc *src, upb_string *str); +typedef bool (*upb_bytesrc_get_fptr)( + upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); typedef bool (*upb_bytesrc_append_fptr)( upb_bytesrc *src, upb_string *str, upb_strlen_t len); @@ -61,12 +77,23 @@ typedef struct { upb_src_endmsg_fptr endmsg; } upb_src_vtable; +typedef struct { + upb_sink_putdef_fptr putdef; + upb_sink_putval_fptr putval; + upb_sink_putstr_fptr putstr; + upb_sink_startmsg_fptr startmsg; + upb_sink_endmsg_fptr endmsg; +} upb_sink_vtable; + typedef struct { upb_bytesrc_get_fptr get; upb_bytesrc_append_fptr append; - upb_bytesrc_recycle_fptr recycle; } upb_bytesrc_vtable; +typedef struct { + upb_bytesink_put_fptr put; +} upb_bytesink_vtable; + // "Base Class" definitions; components that implement these interfaces should // contain one of these structures. @@ -74,9 +101,12 @@ struct upb_src { upb_src_vtable *vtbl; upb_status status; bool eof; -#ifndef NDEBUG - int state; // For debug-mode checking of API usage. -#endif +}; + +struct upb_sink { + upb_sink_vtable *vtbl; + upb_status status; + bool eof; }; struct upb_bytesrc { @@ -85,13 +115,34 @@ struct upb_bytesrc { bool eof; }; +struct upb_bytesink { + upb_bytesink_vtable *vtbl; + upb_status status; + bool eof; +}; + INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { s->vtbl = vtbl; s->eof = false; upb_status_init(&s->status); -#ifndef DEBUG - // TODO: initialize debug-mode checking. -#endif +} + +INLINE void upb_sink_init(upb_sink *s, upb_sink_vtable *vtbl) { + s->vtbl = vtbl; + s->eof = false; + upb_status_init(&s->status); +} + +INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtable *vtbl) { + s->vtbl = vtbl; + s->eof = false; + upb_status_init(&s->status); +} + +INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { + s->vtbl = vtbl; + s->eof = false; + upb_status_init(&s->status); } // Implementation of virtual function dispatch. @@ -136,6 +187,47 @@ bool upb_src_getuint64(upb_src *src, uint64_t *val); bool upb_src_getfloat(upb_src *src, float *val); bool upb_src_getdouble(upb_src *src, double *val); +// upb_bytesrc +INLINE bool upb_bytesrc_get( + upb_bytesrc *bytesrc, upb_string *str, upb_strlen_t minlen) { + return bytesrc->vtbl->get(bytesrc, str, minlen); +} + +INLINE bool upb_bytesrc_append( + upb_bytesrc *bytesrc, upb_string *str, upb_strlen_t len) { + return bytesrc->vtbl->append(bytesrc, str, len); +} + +// upb_sink +INLINE bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def) { + return sink->vtbl->putdef(sink, def); +} +INLINE bool upb_sink_putval(upb_sink *sink, upb_value val) { + return sink->vtbl->putval(sink, val); +} +INLINE bool upb_sink_putstr(upb_sink *sink, upb_string *str) { + return sink->vtbl->putstr(sink, str); +} +INLINE bool upb_sink_startmsg(upb_sink *sink) { + return sink->vtbl->startmsg(sink); +} +INLINE bool upb_sink_endmsg(upb_sink *sink) { + return sink->vtbl->endmsg(sink); +} + +INLINE upb_status *upb_sink_status(upb_sink *sink) { return &sink->status; } + +// upb_bytesink +INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { + return sink->vtbl->put(sink, str); +} +INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { + return &sink->status; +} + +// upb_bytesink + + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index c06660f..9a3f6b0 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -574,3 +574,7 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) d->buf_stream_offset = 0; d->buf_offset = 0; } + +upb_src *upb_decoder_src(upb_decoder *d) { + return &d->src; +} diff --git a/stream/upb_decoder.h b/stream/upb_decoder.h index dde61fc..6ba4d77 100644 --- a/stream/upb_decoder.h +++ b/stream/upb_decoder.h @@ -44,7 +44,7 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc); // Returns a upb_src pointer by which the decoder can be used. The returned // upb_src is invalidated by upb_decoder_reset() or upb_decoder_free(). -upb_src *upb_decoder_getsrc(upb_decoder *d); +upb_src *upb_decoder_src(upb_decoder *d); #ifdef __cplusplus } /* extern "C" */ diff --git a/stream/upb_stdio.c b/stream/upb_stdio.c index 7cbca91..89a6621 100644 --- a/stream/upb_stdio.c +++ b/stream/upb_stdio.c @@ -6,6 +6,10 @@ #include "upb_stdio.h" +#include +#include +#include "upb_string.h" + // We can make this configurable if necessary. #define BLOCK_SIZE 4096 @@ -13,11 +17,15 @@ struct upb_stdio { upb_bytesrc bytesrc; upb_bytesink bytesink; FILE *file; +}; + +void upb_stdio_reset(upb_stdio *stdio, FILE* file) { + stdio->file = file; } static bool upb_stdio_read(upb_stdio *stdio, upb_string *str, - int offset, int bytes_to_read) { - char *buf = upb_string_getrwbuf(offset + bytes_to_read) + offset; + int offset, size_t bytes_to_read) { + char *buf = upb_string_getrwbuf(str, offset + bytes_to_read) + offset; size_t read = fread(buf, 1, bytes_to_read, stdio->file); if(read < bytes_to_read) { // Error or EOF. @@ -44,7 +52,7 @@ bool upb_stdio_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len) { return upb_stdio_read((upb_stdio*)src, str, upb_string_len(str), len); } -int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { +int32_t upb_stdio_put(upb_bytesink *sink, upb_string *str) { upb_stdio *stdio = (upb_stdio*)sink - offsetof(upb_stdio, bytesink); upb_strlen_t len = upb_string_len(str); size_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); @@ -59,3 +67,26 @@ int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { } return written; } + +static upb_bytesrc_vtable upb_stdio_bytesrc_vtbl = { + (upb_bytesrc_get_fptr)upb_stdio_get, + (upb_bytesrc_append_fptr)upb_stdio_append, +}; + +static upb_bytesink_vtable upb_stdio_bytesink_vtbl = { + upb_stdio_put +}; + +upb_stdio *upb_stdio_new() { + upb_stdio *stdio = malloc(sizeof(*stdio)); + upb_bytesrc_init(&stdio->bytesrc, &upb_stdio_bytesrc_vtbl); + upb_bytesink_init(&stdio->bytesink, &upb_stdio_bytesink_vtbl); + return stdio; +} + +void upb_stdio_free(upb_stdio *stdio) { + free(stdio); +} + +upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->bytesrc; } +upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->bytesink; } diff --git a/stream/upb_stdio.h b/stream/upb_stdio.h index 3c29fcb..fd71fdd 100644 --- a/stream/upb_stdio.h +++ b/stream/upb_stdio.h @@ -21,7 +21,7 @@ struct upb_stdio; typedef struct upb_stdio upb_stdio; // Creation/deletion. -upb_stdio_ *upb_stdio__new(); +upb_stdio *upb_stdio_new(); void upb_stdio_free(upb_stdio *stdio); // Reset/initialize the object for use. The src or sink will call diff --git a/stream/upb_text.c b/stream/upb_text.c deleted file mode 100644 index 4a25ecd..0000000 --- a/stream/upb_text.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include -#include "descriptor.h" -#include "upb_text.h" -#include "upb_data.h" - -bool upb_textprinter_putval(upb_textprinter *p, upb_value val) { - upb_string *p->str = upb_string_tryrecycle(p->str); -#define CASE(fmtstr, member) upb_string_printf(p->str, fmtstr, val.member); break; - switch(type) { - case UPB_TYPE(DOUBLE): - CASE("%0.f", _double); - case UPB_TYPE(FLOAT): - CASE("%0.f", _float) - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): - CASE("%" PRId64, int64) - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - CASE("%" PRIu64, uint64) - case UPB_TYPE(INT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - CASE("%" PRId32, int32) - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): - case UPB_TYPE(ENUM): - CASE("%" PRIu32, uint32); - case UPB_TYPE(BOOL): - CASE("%hhu", _bool); - } - return upb_bytesink_put(p->str); -} - -bool upb_textprinter_putstr(upb_textprinter *p, upb_string *str) { - upb_bytesink_put(UPB_STRLIT("\"")); - // TODO: escaping. - upb_bytesink_put(str); - upb_bytesink_put(UPB_STRLIT("\"")); -} - -static void print_indent(upb_text_printer *p, FILE *stream) -{ - if(!p->single_line) - for(int i = 0; i < p->indent_depth; i++) - upb_bytesink_put(UPB_STRLIT(" ")); -} - -void upb_text_printfield(upb_text_printer *p, upb_strptr name, - upb_field_type_t valtype, upb_value val, - FILE *stream) -{ - print_indent(p, stream); - fprintf(stream, UPB_STRFMT ":", UPB_STRARG(name)); - upb_text_printval(valtype, val, stream); - if(p->single_line) - fputc(' ', stream); - else - fputc('\n', stream); -} - -void upb_textprinter_startmsg(upb_textprinter *p) -{ - print_indent(p, stream); - fprintf(stream, UPB_STRFMT " {", UPB_STRARG(submsg_type)); - if(!p->single_line) fputc('\n', stream); - p->indent_depth++; -} - -void upb_text_pop(upb_text_printer *p, FILE *stream) -{ - p->indent_depth--; - print_indent(p, stream); - fprintf(stream, "}\n"); -} - -static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, - FILE *stream) -{ - if(upb_issubmsg(f)) { - upb_text_push(printer, f->name, stream); - printmsg(printer, v.msg, upb_downcast_msgdef(f->def), stream); - upb_text_pop(printer, stream); - } else { - upb_text_printfield(printer, f->name, f->type, v, stream); - } -} diff --git a/stream/upb_text.h b/stream/upb_text.h deleted file mode 100644 index d89c9d6..0000000 --- a/stream/upb_text.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_TEXT_H_ -#define UPB_TEXT_H_ - -#include "upb.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - int indent_depth; - bool single_line; -} upb_text_printer; - -INLINE void upb_text_printer_init(upb_text_printer *p, bool single_line) { - p->indent_depth = 0; - p->single_line = single_line; -} -void upb_text_printval(upb_field_type_t type, upb_value p, FILE *file); -void upb_text_printfield(upb_text_printer *p, upb_strptr name, - upb_field_type_t valtype, upb_value val, FILE *stream); -void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, - FILE *stream); -void upb_text_pop(upb_text_printer *p, FILE *stream); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_TEXT_H_ */ diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c new file mode 100644 index 0000000..0f0357a --- /dev/null +++ b/stream/upb_textprinter.c @@ -0,0 +1,131 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_textprinter.h" + +#include +#include +#include "upb_def.h" +#include "upb_string.h" + +struct _upb_textprinter { + upb_sink sink; + upb_bytesink *bytesink; + upb_string *str; + int indent_depth; + bool single_line; + upb_fielddef *f; +}; + +static void upb_textprinter_endfield(upb_textprinter *p) +{ + if(p->single_line) + upb_bytesink_put(p->bytesink, UPB_STRLIT(' ')); + else + upb_bytesink_put(p->bytesink, UPB_STRLIT('\n')); +} + +static bool upb_textprinter_putval(upb_textprinter *p, upb_value val) { + p->str = upb_string_tryrecycle(p->str); +#define CASE(fmtstr, member) upb_string_printf(p->str, fmtstr, val.member); break; + switch(p->f->type) { + case UPB_TYPE(DOUBLE): + CASE("%0.f", _double); + case UPB_TYPE(FLOAT): + CASE("%0.f", _float) + case UPB_TYPE(INT64): + case UPB_TYPE(SFIXED64): + case UPB_TYPE(SINT64): + CASE("%" PRId64, int64) + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + CASE("%" PRIu64, uint64) + case UPB_TYPE(INT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(SINT32): + CASE("%" PRId32, int32) + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + case UPB_TYPE(ENUM): + CASE("%" PRIu32, uint32); + case UPB_TYPE(BOOL): + CASE("%hhu", _bool); + } + upb_bytesink_put(p->bytesink, p->str); + upb_textprinter_endfield(p); + return upb_ok(upb_bytesink_status(p->bytesink)); +} + +static bool upb_textprinter_putstr(upb_textprinter *p, upb_string *str) { + upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); + // TODO: escaping. + upb_bytesink_put(p->bytesink, str); + upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); + upb_textprinter_endfield(p); + return upb_ok(upb_bytesink_status(p->bytesink)); +} + +static void upb_textprinter_indent(upb_textprinter *p) +{ + if(!p->single_line) + for(int i = 0; i < p->indent_depth; i++) + upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); +} + +static bool upb_textprinter_putdef(upb_textprinter *p, upb_fielddef *f) +{ + upb_textprinter_indent(p); + upb_bytesink_put(p->bytesink, f->name); + upb_bytesink_put(p->bytesink, UPB_STRLIT(":")); + p->f = f; + return upb_ok(upb_bytesink_status(p->bytesink)); +} + +static bool upb_textprinter_startmsg(upb_textprinter *p) +{ + upb_textprinter_indent(p); + upb_bytesink_put(p->bytesink, p->f->def->fqname); + upb_bytesink_put(p->bytesink, UPB_STRLIT(" {")); + if(!p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT('\n')); + p->indent_depth++; + return upb_ok(upb_bytesink_status(p->bytesink)); +} + +static bool upb_textprinter_endmsg(upb_textprinter *p) +{ + p->indent_depth--; + upb_textprinter_indent(p); + upb_bytesink_put(p->bytesink, UPB_STRLIT("}")); + upb_textprinter_endfield(p); + return upb_ok(upb_bytesink_status(p->bytesink)); +} + +upb_sink_vtable upb_textprinter_vtbl = { + (upb_sink_putdef_fptr)upb_textprinter_putdef, + (upb_sink_putval_fptr)upb_textprinter_putval, + (upb_sink_putstr_fptr)upb_textprinter_putstr, + (upb_sink_startmsg_fptr)upb_textprinter_startmsg, + (upb_sink_endmsg_fptr)upb_textprinter_endmsg, +}; + +upb_textprinter *upb_textprinter_new() { + upb_textprinter *p = malloc(sizeof(*p)); + upb_sink_init(&p->sink, &upb_textprinter_vtbl); + return p; +} + +void upb_textprinter_free(upb_textprinter *p) { + free(p); +} + +void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, + bool single_line) { + p->bytesink = sink; + p->single_line = single_line; + p->indent_depth = 0; +} + +upb_sink *upb_textprinter_sink(upb_textprinter *p) { return &p->sink; } diff --git a/stream/upb_textprinter.h b/stream/upb_textprinter.h new file mode 100644 index 0000000..7e35412 --- /dev/null +++ b/stream/upb_textprinter.h @@ -0,0 +1,30 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_TEXT_H_ +#define UPB_TEXT_H_ + +#include "upb_stream.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct _upb_textprinter; +typedef struct _upb_textprinter upb_textprinter; + +upb_textprinter *upb_textprinter_new(); +void upb_textprinter_free(upb_textprinter *p); +void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, + bool single_line); + +upb_sink *upb_textprinter_sink(upb_textprinter *p); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_TEXT_H_ */ -- cgit v1.2.3 From 60ae9be4380937c3cd39fb72df04fd1723e741e6 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 13:39:38 -0700 Subject: Bugfixes to upb_stdio and upb_decoder. --- stream/upb_decoder.c | 3 ++- stream/upb_stdio.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 9a3f6b0..64057c5 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -91,10 +91,10 @@ static bool upb_decoder_nextbuf(upb_decoder *d) // Recycle old buffer. if(d->buf) { - d->buf = upb_string_tryrecycle(d->buf); d->buf_offset -= upb_string_len(d->buf); d->buf_stream_offset += upb_string_len(d->buf); } + d->buf = upb_string_tryrecycle(d->buf); // Pull next buffer. if(upb_bytesrc_get(d->bytesrc, d->buf, UPB_MAX_ENCODED_SIZE)) { @@ -569,6 +569,7 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) // to UINT32_MAX so it doesn't equal UPB_GROUP_END_OFFSET. d->top->end_offset = UINT32_MAX - 1; d->bytesrc = bytesrc; + d->field = NULL; d->buf = NULL; d->buf_bytesleft = 0; d->buf_stream_offset = 0; diff --git a/stream/upb_stdio.c b/stream/upb_stdio.c index 89a6621..820399b 100644 --- a/stream/upb_stdio.c +++ b/stream/upb_stdio.c @@ -53,7 +53,7 @@ bool upb_stdio_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len) { } int32_t upb_stdio_put(upb_bytesink *sink, upb_string *str) { - upb_stdio *stdio = (upb_stdio*)sink - offsetof(upb_stdio, bytesink); + upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); upb_strlen_t len = upb_string_len(str); size_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); if(written < len) { -- cgit v1.2.3 From 79de3ca9e48877dfc506c9a486e1e5599c2312f9 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 13:49:50 -0700 Subject: Add forgotten test_decoder.c. --- tests/test_decoder.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tests/test_decoder.c diff --git a/tests/test_decoder.c b/tests/test_decoder.c new file mode 100644 index 0000000..0e6f19c --- /dev/null +++ b/tests/test_decoder.c @@ -0,0 +1,32 @@ + +#include "upb_decoder.h" +#include "upb_textprinter.h" +#include "upb_stdio.h" + +int main() { + upb_symtab *symtab = upb_symtab_new(); + upb_symtab_add_descriptorproto(symtab); + upb_def *fds = upb_symtab_lookup( + symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + + upb_stdio *in = upb_stdio_new(); + upb_stdio_reset(in, stdin); + upb_stdio *out = upb_stdio_new(); + upb_stdio_reset(out, stdout); + upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds)); + upb_decoder_reset(d, upb_stdio_bytesrc(in)); + upb_textprinter *p = upb_textprinter_new(); + upb_textprinter_reset(p, upb_stdio_bytesink(out), false); + + upb_status status = UPB_STATUS_INIT; + upb_streamdata(upb_decoder_src(d), upb_textprinter_sink(p), &status); + + assert(upb_ok(&status)); + + upb_stdio_free(in); + upb_stdio_free(out); + upb_decoder_free(d); + upb_textprinter_free(p); + upb_def_unref(fds); + upb_symtab_unref(symtab); +} -- cgit v1.2.3 From af9d691a344746b15fb1df2e454273b637d20433 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 15:05:57 -0700 Subject: Added Xcode project. --- core/upb_string.c | 2 +- stream/upb_textprinter.c | 2 +- upb.xcodeproj/project.pbxproj | 497 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 499 insertions(+), 2 deletions(-) create mode 100644 upb.xcodeproj/project.pbxproj diff --git a/core/upb_string.c b/core/upb_string.c index ca3c669..93686f5 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -71,7 +71,7 @@ upb_string *upb_string_tryrecycle(upb_string *str) { } char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { - assert(str->ptr == NULL); + // assert(str->ptr == NULL); uint32_t size = upb_string_size(str); if (size < len) { size = upb_round_up_pow2(len); diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 0f0357a..11ad6a8 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -7,7 +7,7 @@ #include "upb_textprinter.h" #include -#include +#include #include "upb_def.h" #include "upb_string.h" diff --git a/upb.xcodeproj/project.pbxproj b/upb.xcodeproj/project.pbxproj new file mode 100644 index 0000000..8b4eb4e --- /dev/null +++ b/upb.xcodeproj/project.pbxproj @@ -0,0 +1,497 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 45; + objects = { + +/* Begin PBXBuildFile section */ + 420E6F1C11F258AE001DA8FE /* test_decoder.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D4F11F24F3E0076AD28 /* test_decoder.c */; }; + 420E6F3B11F259B3001DA8FE /* liblibupbcore.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 42BD1D5C11F24F920076AD28 /* liblibupbcore.a */; }; + 420E6F3C11F259B3001DA8FE /* liblibupbstream.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 42BD1D6411F24FBA0076AD28 /* liblibupbstream.a */; }; + 42BD1D6E11F2500D0076AD28 /* upb.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D3211F24E4C0076AD28 /* upb.c */; }; + 42BD1D7011F2500D0076AD28 /* upb_def.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D2911F24E4C0076AD28 /* upb_def.c */; }; + 42BD1D7211F2500D0076AD28 /* upb_stream.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D2C11F24E4C0076AD28 /* upb_stream.c */; }; + 42BD1D7311F2500D0076AD28 /* upb_string.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D2E11F24E4C0076AD28 /* upb_string.c */; }; + 42BD1D7411F2500D0076AD28 /* upb_table.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D3011F24E4C0076AD28 /* upb_table.c */; }; + 42BD1D7611F250B90076AD28 /* upb_decoder.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D3E11F24EA30076AD28 /* upb_decoder.c */; }; + 42BD1D7711F250B90076AD28 /* upb_stdio.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D4011F24EA30076AD28 /* upb_stdio.c */; }; + 42BD1D7811F250B90076AD28 /* upb_textprinter.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D4211F24EA30076AD28 /* upb_textprinter.c */; }; + 42BD1D9011F251820076AD28 /* descriptor_const.h in Headers */ = {isa = PBXBuildFile; fileRef = 42BD1D8D11F251820076AD28 /* descriptor_const.h */; }; + 42BD1D9111F251820076AD28 /* descriptor.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D8E11F251820076AD28 /* descriptor.c */; }; + 42BD1D9211F251820076AD28 /* descriptor.h in Headers */ = {isa = PBXBuildFile; fileRef = 42BD1D8F11F251820076AD28 /* descriptor.h */; }; +/* End PBXBuildFile section */ + +/* Begin PBXContainerItemProxy section */ + 420E6F3311F2598D001DA8FE /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 42BD1D5B11F24F920076AD28 /* upbcore */; + remoteInfo = upbcore; + }; + 420E6F3511F2598D001DA8FE /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 42BD1D6311F24FBA0076AD28 /* upbstream */; + remoteInfo = upbstream; + }; +/* End PBXContainerItemProxy section */ + +/* Begin PBXFileReference section */ + 420E6F1811F2589F001DA8FE /* test_decoder */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = test_decoder; sourceTree = BUILT_PRODUCTS_DIR; }; + 42BD1D2811F24E4C0076AD28 /* upb_atomic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_atomic.h; path = core/upb_atomic.h; sourceTree = ""; }; + 42BD1D2911F24E4C0076AD28 /* upb_def.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_def.c; path = core/upb_def.c; sourceTree = ""; }; + 42BD1D2A11F24E4C0076AD28 /* upb_def.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_def.h; path = core/upb_def.h; sourceTree = ""; }; + 42BD1D2B11F24E4C0076AD28 /* upb_stream_vtbl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_stream_vtbl.h; path = core/upb_stream_vtbl.h; sourceTree = ""; }; + 42BD1D2C11F24E4C0076AD28 /* upb_stream.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_stream.c; path = core/upb_stream.c; sourceTree = ""; }; + 42BD1D2D11F24E4C0076AD28 /* upb_stream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_stream.h; path = core/upb_stream.h; sourceTree = ""; }; + 42BD1D2E11F24E4C0076AD28 /* upb_string.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_string.c; path = core/upb_string.c; sourceTree = ""; }; + 42BD1D2F11F24E4C0076AD28 /* upb_string.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_string.h; path = core/upb_string.h; sourceTree = ""; }; + 42BD1D3011F24E4C0076AD28 /* upb_table.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_table.c; path = core/upb_table.c; sourceTree = ""; }; + 42BD1D3111F24E4C0076AD28 /* upb_table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_table.h; path = core/upb_table.h; sourceTree = ""; }; + 42BD1D3211F24E4C0076AD28 /* upb.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb.c; path = core/upb.c; sourceTree = ""; }; + 42BD1D3311F24E4C0076AD28 /* upb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb.h; path = core/upb.h; sourceTree = ""; }; + 42BD1D3E11F24EA30076AD28 /* upb_decoder.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_decoder.c; path = stream/upb_decoder.c; sourceTree = ""; }; + 42BD1D3F11F24EA30076AD28 /* upb_decoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_decoder.h; path = stream/upb_decoder.h; sourceTree = ""; }; + 42BD1D4011F24EA30076AD28 /* upb_stdio.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_stdio.c; path = stream/upb_stdio.c; sourceTree = ""; }; + 42BD1D4111F24EA30076AD28 /* upb_stdio.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_stdio.h; path = stream/upb_stdio.h; sourceTree = ""; }; + 42BD1D4211F24EA30076AD28 /* upb_textprinter.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_textprinter.c; path = stream/upb_textprinter.c; sourceTree = ""; }; + 42BD1D4311F24EA30076AD28 /* upb_textprinter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_textprinter.h; path = stream/upb_textprinter.h; sourceTree = ""; }; + 42BD1D4F11F24F3E0076AD28 /* test_decoder.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = test_decoder.c; path = tests/test_decoder.c; sourceTree = ""; }; + 42BD1D5011F24F3E0076AD28 /* test_def.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = test_def.c; path = tests/test_def.c; sourceTree = ""; }; + 42BD1D5111F24F3E0076AD28 /* test_string.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = test_string.c; path = tests/test_string.c; sourceTree = ""; }; + 42BD1D5211F24F3E0076AD28 /* test_table.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = test_table.cc; path = tests/test_table.cc; sourceTree = ""; }; + 42BD1D5311F24F3E0076AD28 /* test_util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = test_util.h; path = tests/test_util.h; sourceTree = ""; }; + 42BD1D5C11F24F920076AD28 /* liblibupbcore.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = liblibupbcore.a; sourceTree = BUILT_PRODUCTS_DIR; }; + 42BD1D6411F24FBA0076AD28 /* liblibupbstream.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = liblibupbstream.a; sourceTree = BUILT_PRODUCTS_DIR; }; + 42BD1D8D11F251820076AD28 /* descriptor_const.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = descriptor_const.h; path = descriptor/descriptor_const.h; sourceTree = ""; }; + 42BD1D8E11F251820076AD28 /* descriptor.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = descriptor.c; path = descriptor/descriptor.c; sourceTree = ""; }; + 42BD1D8F11F251820076AD28 /* descriptor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = descriptor.h; path = descriptor/descriptor.h; sourceTree = ""; }; + C6A0FF2C0290799A04C91782 /* upb.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = upb.1; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 420E6F1611F2589F001DA8FE /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 420E6F3B11F259B3001DA8FE /* liblibupbcore.a in Frameworks */, + 420E6F3C11F259B3001DA8FE /* liblibupbstream.a in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 42BD1D5A11F24F920076AD28 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 42BD1D6211F24FBA0076AD28 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 08FB7794FE84155DC02AAC07 /* upb */ = { + isa = PBXGroup; + children = ( + 08FB7795FE84155DC02AAC07 /* Source */, + C6A0FF2B0290797F04C91782 /* Documentation */, + 1AB674ADFE9D54B511CA2CBB /* Products */, + ); + name = upb; + sourceTree = ""; + }; + 08FB7795FE84155DC02AAC07 /* Source */ = { + isa = PBXGroup; + children = ( + 42BD1D8A11F251670076AD28 /* descriptor */, + 42BD1D4711F24EB20076AD28 /* tests */, + 42BD1D3B11F24E810076AD28 /* stream */, + 42BD1D3A11F24E5F0076AD28 /* core */, + ); + name = Source; + sourceTree = ""; + }; + 1AB674ADFE9D54B511CA2CBB /* Products */ = { + isa = PBXGroup; + children = ( + 42BD1D5C11F24F920076AD28 /* liblibupbcore.a */, + 42BD1D6411F24FBA0076AD28 /* liblibupbstream.a */, + 420E6F1811F2589F001DA8FE /* test_decoder */, + ); + name = Products; + sourceTree = ""; + }; + 42BD1D3A11F24E5F0076AD28 /* core */ = { + isa = PBXGroup; + children = ( + 42BD1D2811F24E4C0076AD28 /* upb_atomic.h */, + 42BD1D2911F24E4C0076AD28 /* upb_def.c */, + 42BD1D2A11F24E4C0076AD28 /* upb_def.h */, + 42BD1D2B11F24E4C0076AD28 /* upb_stream_vtbl.h */, + 42BD1D2C11F24E4C0076AD28 /* upb_stream.c */, + 42BD1D2D11F24E4C0076AD28 /* upb_stream.h */, + 42BD1D2E11F24E4C0076AD28 /* upb_string.c */, + 42BD1D2F11F24E4C0076AD28 /* upb_string.h */, + 42BD1D3011F24E4C0076AD28 /* upb_table.c */, + 42BD1D3111F24E4C0076AD28 /* upb_table.h */, + 42BD1D3211F24E4C0076AD28 /* upb.c */, + 42BD1D3311F24E4C0076AD28 /* upb.h */, + ); + name = core; + sourceTree = ""; + }; + 42BD1D3B11F24E810076AD28 /* stream */ = { + isa = PBXGroup; + children = ( + 42BD1D3E11F24EA30076AD28 /* upb_decoder.c */, + 42BD1D3F11F24EA30076AD28 /* upb_decoder.h */, + 42BD1D4011F24EA30076AD28 /* upb_stdio.c */, + 42BD1D4111F24EA30076AD28 /* upb_stdio.h */, + 42BD1D4211F24EA30076AD28 /* upb_textprinter.c */, + 42BD1D4311F24EA30076AD28 /* upb_textprinter.h */, + ); + name = stream; + sourceTree = ""; + }; + 42BD1D4711F24EB20076AD28 /* tests */ = { + isa = PBXGroup; + children = ( + 42BD1D4F11F24F3E0076AD28 /* test_decoder.c */, + 42BD1D5011F24F3E0076AD28 /* test_def.c */, + 42BD1D5111F24F3E0076AD28 /* test_string.c */, + 42BD1D5211F24F3E0076AD28 /* test_table.cc */, + 42BD1D5311F24F3E0076AD28 /* test_util.h */, + ); + name = tests; + sourceTree = ""; + }; + 42BD1D8A11F251670076AD28 /* descriptor */ = { + isa = PBXGroup; + children = ( + 42BD1D8D11F251820076AD28 /* descriptor_const.h */, + 42BD1D8E11F251820076AD28 /* descriptor.c */, + 42BD1D8F11F251820076AD28 /* descriptor.h */, + ); + name = descriptor; + sourceTree = ""; + }; + C6A0FF2B0290797F04C91782 /* Documentation */ = { + isa = PBXGroup; + children = ( + C6A0FF2C0290799A04C91782 /* upb.1 */, + ); + name = Documentation; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXHeadersBuildPhase section */ + 42BD1D5811F24F920076AD28 /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + 42BD1D9011F251820076AD28 /* descriptor_const.h in Headers */, + 42BD1D9211F251820076AD28 /* descriptor.h in Headers */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 42BD1D6011F24FBA0076AD28 /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXHeadersBuildPhase section */ + +/* Begin PBXNativeTarget section */ + 420E6F1711F2589F001DA8FE /* test_decoder */ = { + isa = PBXNativeTarget; + buildConfigurationList = 420E6F1F11F258CC001DA8FE /* Build configuration list for PBXNativeTarget "test_decoder" */; + buildPhases = ( + 420E6F1511F2589F001DA8FE /* Sources */, + 420E6F1611F2589F001DA8FE /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + 420E6F3411F2598D001DA8FE /* PBXTargetDependency */, + 420E6F3611F2598D001DA8FE /* PBXTargetDependency */, + ); + name = test_decoder; + productName = test_decoder; + productReference = 420E6F1811F2589F001DA8FE /* test_decoder */; + productType = "com.apple.product-type.tool"; + }; + 42BD1D5B11F24F920076AD28 /* upbcore */ = { + isa = PBXNativeTarget; + buildConfigurationList = 42BD1D5F11F24FB10076AD28 /* Build configuration list for PBXNativeTarget "upbcore" */; + buildPhases = ( + 42BD1D5811F24F920076AD28 /* Headers */, + 42BD1D5911F24F920076AD28 /* Sources */, + 42BD1D5A11F24F920076AD28 /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = upbcore; + productName = libupbcore; + productReference = 42BD1D5C11F24F920076AD28 /* liblibupbcore.a */; + productType = "com.apple.product-type.library.static"; + }; + 42BD1D6311F24FBA0076AD28 /* upbstream */ = { + isa = PBXNativeTarget; + buildConfigurationList = 42BD1D6911F24FED0076AD28 /* Build configuration list for PBXNativeTarget "upbstream" */; + buildPhases = ( + 42BD1D6011F24FBA0076AD28 /* Headers */, + 42BD1D6111F24FBA0076AD28 /* Sources */, + 42BD1D6211F24FBA0076AD28 /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = upbstream; + productName = libupbstream; + productReference = 42BD1D6411F24FBA0076AD28 /* liblibupbstream.a */; + productType = "com.apple.product-type.library.static"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 08FB7793FE84155DC02AAC07 /* Project object */ = { + isa = PBXProject; + buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "upb" */; + compatibilityVersion = "Xcode 3.1"; + hasScannedForEncodings = 1; + mainGroup = 08FB7794FE84155DC02AAC07 /* upb */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 42BD1D5B11F24F920076AD28 /* upbcore */, + 42BD1D6311F24FBA0076AD28 /* upbstream */, + 420E6F1711F2589F001DA8FE /* test_decoder */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + 420E6F1511F2589F001DA8FE /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 420E6F1C11F258AE001DA8FE /* test_decoder.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 42BD1D5911F24F920076AD28 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 42BD1D6E11F2500D0076AD28 /* upb.c in Sources */, + 42BD1D7011F2500D0076AD28 /* upb_def.c in Sources */, + 42BD1D7211F2500D0076AD28 /* upb_stream.c in Sources */, + 42BD1D7311F2500D0076AD28 /* upb_string.c in Sources */, + 42BD1D7411F2500D0076AD28 /* upb_table.c in Sources */, + 42BD1D9111F251820076AD28 /* descriptor.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 42BD1D6111F24FBA0076AD28 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 42BD1D7611F250B90076AD28 /* upb_decoder.c in Sources */, + 42BD1D7711F250B90076AD28 /* upb_stdio.c in Sources */, + 42BD1D7811F250B90076AD28 /* upb_textprinter.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXTargetDependency section */ + 420E6F3411F2598D001DA8FE /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 42BD1D5B11F24F920076AD28 /* upbcore */; + targetProxy = 420E6F3311F2598D001DA8FE /* PBXContainerItemProxy */; + }; + 420E6F3611F2598D001DA8FE /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 42BD1D6311F24FBA0076AD28 /* upbstream */; + targetProxy = 420E6F3511F2598D001DA8FE /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + +/* Begin XCBuildConfiguration section */ + 1DEB928A08733DD80010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = UPB_THREAD_UNSAFE; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + ONLY_ACTIVE_ARCH = YES; + PREBINDING = NO; + SDKROOT = macosx10.6; + }; + name = Debug; + }; + 1DEB928B08733DD80010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_PREPROCESSOR_DEFINITIONS = UPB_THREAD_UNSAFE; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + PREBINDING = NO; + SDKROOT = macosx10.6; + }; + name = Release; + }; + 420E6F1A11F258A0001DA8FE /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + INSTALL_PATH = /usr/local/bin; + PREBINDING = NO; + PRODUCT_NAME = test_decoder; + }; + name = Debug; + }; + 420E6F1B11F258A0001DA8FE /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = YES; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_MODEL_TUNING = G5; + INSTALL_PATH = /usr/local/bin; + PREBINDING = NO; + PRODUCT_NAME = test_decoder; + ZERO_LINK = NO; + }; + name = Release; + }; + 42BD1D5D11F24F930076AD28 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + INSTALL_PATH = /usr/local/lib; + PREBINDING = NO; + PRODUCT_NAME = libupbcore; + }; + name = Debug; + }; + 42BD1D5E11F24F930076AD28 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = YES; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_MODEL_TUNING = G5; + INSTALL_PATH = /usr/local/lib; + PREBINDING = NO; + PRODUCT_NAME = libupbcore; + ZERO_LINK = NO; + }; + name = Release; + }; + 42BD1D6511F24FBA0076AD28 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + INSTALL_PATH = /usr/local/lib; + PREBINDING = NO; + PRODUCT_NAME = libupbstream; + }; + name = Debug; + }; + 42BD1D6611F24FBA0076AD28 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = YES; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_MODEL_TUNING = G5; + INSTALL_PATH = /usr/local/lib; + PREBINDING = NO; + PRODUCT_NAME = libupbstream; + ZERO_LINK = NO; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "upb" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB928A08733DD80010E9CD /* Debug */, + 1DEB928B08733DD80010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 420E6F1F11F258CC001DA8FE /* Build configuration list for PBXNativeTarget "test_decoder" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 420E6F1A11F258A0001DA8FE /* Debug */, + 420E6F1B11F258A0001DA8FE /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 42BD1D5F11F24FB10076AD28 /* Build configuration list for PBXNativeTarget "upbcore" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 42BD1D5D11F24F930076AD28 /* Debug */, + 42BD1D5E11F24F930076AD28 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 42BD1D6911F24FED0076AD28 /* Build configuration list for PBXNativeTarget "upbstream" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 42BD1D6511F24FBA0076AD28 /* Debug */, + 42BD1D6611F24FBA0076AD28 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 08FB7793FE84155DC02AAC07 /* Project object */; +} -- cgit v1.2.3 From b77db146466a113bbfb9e56472bda1975f7a25a5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 15:13:05 -0700 Subject: Fixed broken submsg support in upb_streamdata. --- core/upb_stream.c | 38 ++++++++++++++++++++++---------------- stream/upb_textprinter.c | 2 +- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/core/upb_stream.c b/core/upb_stream.c index bda11de..e63ba00 100644 --- a/core/upb_stream.c +++ b/core/upb_stream.c @@ -14,24 +14,30 @@ void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status) { upb_fielddef *f; upb_string *str = NULL; - while((f = upb_src_getdef(src)) != NULL) { - CHECKSINK(upb_sink_putdef(sink, f)); - if(upb_issubmsg(f)) { - // We always recurse into submessages, but the putdef above already told - // the sink that. - } else if(upb_isstring(f)) { - str = upb_string_tryrecycle(str); - CHECKSRC(upb_src_getstr(src, str)); - CHECKSINK(upb_sink_putstr(sink, str)); - } else { - // Primitive type. - upb_value val; - CHECKSRC(upb_src_getval(src, upb_value_addrof(&val))); - CHECKSINK(upb_sink_putval(sink, val)); + int depth = 0; + while(1) { + while((f = upb_src_getdef(src)) != NULL) { + CHECKSINK(upb_sink_putdef(sink, f)); + if(upb_issubmsg(f)) { + upb_src_startmsg(src); + upb_sink_startmsg(sink); + ++depth; + } else if(upb_isstring(f)) { + str = upb_string_tryrecycle(str); + CHECKSRC(upb_src_getstr(src, str)); + CHECKSINK(upb_sink_putstr(sink, str)); + } else { + // Primitive type. + upb_value val; + CHECKSRC(upb_src_getval(src, upb_value_addrof(&val))); + CHECKSINK(upb_sink_putval(sink, val)); + } } + // If we're not EOF now, the loop terminated due to an error. + CHECKSRC(upb_src_eof(src)); + if (depth == 0) break; + --depth; } - // If we're not EOF now, the loop terminated due to an error. - CHECKSRC(upb_src_eof(src)); return; src_err: diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 11ad6a8..201edba 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -79,7 +79,7 @@ static bool upb_textprinter_putdef(upb_textprinter *p, upb_fielddef *f) { upb_textprinter_indent(p); upb_bytesink_put(p->bytesink, f->name); - upb_bytesink_put(p->bytesink, UPB_STRLIT(":")); + upb_bytesink_put(p->bytesink, UPB_STRLIT(": ")); p->f = f; return upb_ok(upb_bytesink_status(p->bytesink)); } -- cgit v1.2.3 From 0fcfeab521b01160875e863575dd5b63952b1593 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 18:30:53 -0700 Subject: Bugfixes, test_decoder successfully stream-decodes a stream! --- core/upb_stream.c | 5 +++++ stream/upb_decoder.c | 39 ++++++++++++++++++--------------------- stream/upb_textprinter.c | 9 +++++---- 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/core/upb_stream.c b/core/upb_stream.c index e63ba00..0d47392 100644 --- a/core/upb_stream.c +++ b/core/upb_stream.c @@ -37,14 +37,19 @@ void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status) { CHECKSRC(upb_src_eof(src)); if (depth == 0) break; --depth; + upb_src_endmsg(src); + upb_sink_endmsg(sink); } + upb_string_unref(str); return; src_err: + upb_string_unref(str); upb_copyerr(status, upb_src_status(src)); return; sink_err: + upb_string_unref(str); upb_copyerr(status, upb_sink_status(sink)); return; } diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 64057c5..949ce2d 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -76,7 +76,7 @@ struct upb_decoder { static upb_strlen_t upb_decoder_offset(upb_decoder *d) { - return d->buf_stream_offset - d->buf_offset; + return d->buf_stream_offset + d->buf_offset; } static bool upb_decoder_nextbuf(upb_decoder *d) @@ -101,34 +101,30 @@ static bool upb_decoder_nextbuf(upb_decoder *d) d->buf_bytesleft += upb_string_len(d->buf); return true; } else { - // Error or EOF. - if(!upb_bytesrc_eof(d->bytesrc)) { - // Error from bytesrc. - upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); - return false; - } else if(d->buf_bytesleft == 0) { - // EOF from bytesrc and we don't have any residual bytes left. - d->src.eof = true; - return false; - } else { - // No more data left from the bytesrc, but we still have residual bytes. - return true; - } + return false; } } static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) { + if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE && !upb_bytesrc_eof(d->bytesrc)) + upb_decoder_nextbuf(d); + if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE) { - // GCC is currently complaining about use of an uninitialized value if we - // don't set this now. I think this is incorrect, but leaving this in - // to suppress the warning for now. - *bytes = 0; - if(!upb_decoder_nextbuf(d)) return NULL; + if(upb_bytesrc_eof(d->bytesrc) && d->buf_bytesleft > 0) { + // We're working through the last few bytes of the buffer. + } else if(upb_bytesrc_eof(d->bytesrc)) { + // End of stream, no more bytes left. + assert(d->buf_bytesleft == 0); + d->src.eof = true; + return NULL; + } else { + // We are short of bytes even though the bytesrc isn't EOF; must be error. + upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); + return NULL; + } } - assert(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE); - if(d->buf_offset >= 0) { // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE // contiguous bytes, so we can read directly out of it. @@ -467,6 +463,7 @@ bool upb_decoder_startmsg(upb_decoder *d) { } else { frame->end_offset = upb_decoder_offset(d) + d->delimited_len; } + d->field = NULL; return true; } diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 201edba..75668a3 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -23,9 +23,9 @@ struct _upb_textprinter { static void upb_textprinter_endfield(upb_textprinter *p) { if(p->single_line) - upb_bytesink_put(p->bytesink, UPB_STRLIT(' ')); + upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); else - upb_bytesink_put(p->bytesink, UPB_STRLIT('\n')); + upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); } static bool upb_textprinter_putval(upb_textprinter *p, upb_value val) { @@ -86,10 +86,9 @@ static bool upb_textprinter_putdef(upb_textprinter *p, upb_fielddef *f) static bool upb_textprinter_startmsg(upb_textprinter *p) { - upb_textprinter_indent(p); upb_bytesink_put(p->bytesink, p->f->def->fqname); upb_bytesink_put(p->bytesink, UPB_STRLIT(" {")); - if(!p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT('\n')); + if(!p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); p->indent_depth++; return upb_ok(upb_bytesink_status(p->bytesink)); } @@ -114,10 +113,12 @@ upb_sink_vtable upb_textprinter_vtbl = { upb_textprinter *upb_textprinter_new() { upb_textprinter *p = malloc(sizeof(*p)); upb_sink_init(&p->sink, &upb_textprinter_vtbl); + p->str = NULL; return p; } void upb_textprinter_free(upb_textprinter *p) { + upb_string_unref(p->str); free(p); } -- cgit v1.2.3 From 4b6c8b6b2317436ab77b38e17b49a7c7b03bf3f4 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Jul 2010 19:00:40 -0700 Subject: Fixed bugs in textoutput. Text output from descriptor.proto is now identical to protoc! --- core/upb_def.c | 6 +++++ core/upb_def.h | 1 + stream/upb_textprinter.c | 62 +++++++++++++++++++++++++++--------------------- 3 files changed, 42 insertions(+), 27 deletions(-) diff --git a/core/upb_def.c b/core/upb_def.c index c0d72db..fd00895 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -331,6 +331,12 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) { return upb_inttable_next(&e->iton, &iter->e); } +upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) { + upb_iton_ent *e = + (upb_iton_ent*)upb_inttable_fastlookup(&def->iton, num, sizeof(*e)); + return e ? e->string : NULL; +} + /* upb_fielddef ***************************************************************/ diff --git a/core/upb_def.h b/core/upb_def.h index 82d8520..9cdc54d 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -202,6 +202,7 @@ typedef int32_t upb_enumval_t; // Lookups from name to integer and vice-versa. bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num); +// Caller does not own a ref on the returned string. upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); // Iteration over name/value pairs. The order is undefined. diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 75668a3..2d2e237 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -29,38 +29,48 @@ static void upb_textprinter_endfield(upb_textprinter *p) } static bool upb_textprinter_putval(upb_textprinter *p, upb_value val) { - p->str = upb_string_tryrecycle(p->str); + upb_bytesink_put(p->bytesink, UPB_STRLIT(": ")); + upb_enumdef *enum_def; + upb_string *enum_label; + if(p->f->type == UPB_TYPE(ENUM) && + (enum_def = upb_downcast_enumdef(p->f->def)) != NULL && + (enum_label = upb_enumdef_iton(enum_def, val.int32)) != NULL) { + // This is an enum value for which we found a corresponding string. + upb_bytesink_put(p->bytesink, enum_label); + } else { + p->str = upb_string_tryrecycle(p->str); #define CASE(fmtstr, member) upb_string_printf(p->str, fmtstr, val.member); break; - switch(p->f->type) { - case UPB_TYPE(DOUBLE): - CASE("%0.f", _double); - case UPB_TYPE(FLOAT): - CASE("%0.f", _float) - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): - CASE("%" PRId64, int64) - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - CASE("%" PRIu64, uint64) - case UPB_TYPE(INT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - CASE("%" PRId32, int32) - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): - case UPB_TYPE(ENUM): - CASE("%" PRIu32, uint32); - case UPB_TYPE(BOOL): - CASE("%hhu", _bool); + switch(p->f->type) { + case UPB_TYPE(DOUBLE): + CASE("%0.f", _double); + case UPB_TYPE(FLOAT): + CASE("%0.f", _float) + case UPB_TYPE(INT64): + case UPB_TYPE(SFIXED64): + case UPB_TYPE(SINT64): + CASE("%" PRId64, int64) + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + CASE("%" PRIu64, uint64) + case UPB_TYPE(INT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(SINT32): + CASE("%" PRId32, int32) + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + case UPB_TYPE(ENUM): + CASE("%" PRIu32, uint32); + case UPB_TYPE(BOOL): + CASE("%hhu", _bool); + } + upb_bytesink_put(p->bytesink, p->str); } - upb_bytesink_put(p->bytesink, p->str); upb_textprinter_endfield(p); return upb_ok(upb_bytesink_status(p->bytesink)); } static bool upb_textprinter_putstr(upb_textprinter *p, upb_string *str) { - upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); + upb_bytesink_put(p->bytesink, UPB_STRLIT(": \"")); // TODO: escaping. upb_bytesink_put(p->bytesink, str); upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); @@ -79,14 +89,12 @@ static bool upb_textprinter_putdef(upb_textprinter *p, upb_fielddef *f) { upb_textprinter_indent(p); upb_bytesink_put(p->bytesink, f->name); - upb_bytesink_put(p->bytesink, UPB_STRLIT(": ")); p->f = f; return upb_ok(upb_bytesink_status(p->bytesink)); } static bool upb_textprinter_startmsg(upb_textprinter *p) { - upb_bytesink_put(p->bytesink, p->f->def->fqname); upb_bytesink_put(p->bytesink, UPB_STRLIT(" {")); if(!p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); p->indent_depth++; -- cgit v1.2.3 From 5871ed0d02ff69b20b65f577dd3be18a2e92dec7 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 18 Jul 2010 22:45:15 -0700 Subject: First go at Lua bindings. --- Makefile | 10 +++ core/upb_def.c | 4 +- core/upb_def.h | 8 +- core/upb_string.h | 5 +- lang_ext/lua/upb.c | 254 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 274 insertions(+), 7 deletions(-) create mode 100644 lang_ext/lua/upb.c diff --git a/Makefile b/Makefile index 10ef96d..749c5a7 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,13 @@ CFLAGS=-std=c99 INCLUDE=-Idescriptor -Icore -Itests -Istream -I. CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) LDLIBS=-lpthread +ifeq ($(shell uname), Darwin) + CPPFLAGS += -I/usr/include/lua5.1 + LDFLAGS += -L/usr/local/lib -llua +else + CFLAGS += $(strip $(shell pkg-config --silence-errors --cflags lua || pkg-config --cflags lua5.1)) + LDFLAGS += $(strip $(shell pkg-config --silence-errors --libs lua || pkg-config --libs lua5.1)) +endif LIBUPB=core/libupb.a LIBUPB_PIC=core/libupb_pic.a @@ -59,6 +66,9 @@ core/upb_def.o: core/upb_def.c core/upb_def.lo: core/upb_def.c $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC +lang_ext/lua/upb.so: lang_ext/lua/upb.lo + $(CC) $(CFLAGS) $(CPPFLAGS) -shared -o $@ $< core/libupb_pic.a + STATICOBJ=$(patsubst %.c,%.o,$(SRC)) SHAREDOBJ=$(patsubst %.c,%.lo,$(SRC)) diff --git a/core/upb_def.c b/core/upb_def.c index fd00895..0d97982 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -190,7 +190,7 @@ void _upb_def_cyclic_ref(upb_def *def) { upb_cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true); } -static void upb_def_init(upb_def *def, upb_def_type type) { +static void upb_def_init(upb_def *def, upb_deftype type) { def->type = type; def->is_cyclic = 0; // We detect this later, after resolving refs. def->search_depth = 0; @@ -779,7 +779,7 @@ void _upb_symtab_free(upb_symtab *s) free(s); } -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type) +upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) { upb_rwlock_rdlock(&s->lock); int total = upb_strtable_count(&s->symtab); diff --git a/core/upb_def.h b/core/upb_def.h index 9cdc54d..ae9e0fa 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -48,15 +48,15 @@ typedef enum { // For specifying that defs of any type are requsted from getdefs. UPB_DEF_ANY = -1 -} upb_def_type; +} upb_deftype; // This typedef is more space-efficient than declaring an enum var directly. -typedef int8_t upb_def_type_t; +typedef int8_t upb_deftype_t; typedef struct { upb_string *fqname; // Fully qualified. upb_atomic_refcount_t refcount; - upb_def_type_t type; + upb_deftype_t type; // The is_cyclic flag could go in upb_msgdef instead of here, because only // messages can be involved in cycles. However, putting them here is free @@ -265,7 +265,7 @@ upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym); // caller owns the returned array (which is of length *count) as well as a ref // to each symbol inside. If type is UPB_DEF_ANY then defs of all types are // returned, otherwise only defs of the required type are returned. -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); +upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type); // "fds" is a upb_src that will yield data from the // google.protobuf.FileDescriptorSet message type. upb_symtab_addfds() adds diff --git a/core/upb_string.h b/core/upb_string.h index 65ba404..bd89f67 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -197,7 +197,10 @@ void upb_string_substr(upb_string *str, upb_string *target_str, _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STATIC) #define UPB_STATIC_STRING_LEN(str, len) \ _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STATIC) -#define UPB_STACK_STRING(str) _UPB_STRING_INIT(str, _UPB_STRING_REFCOUNT_STACK) +#define UPB_STACK_STRING(str) \ + _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STACK) +#define UPB_STACK_STRING_LEN(str, len) \ + _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STACK) #define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) /* upb_string library functions ***********************************************/ diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c new file mode 100644 index 0000000..ac7f188 --- /dev/null +++ b/lang_ext/lua/upb.c @@ -0,0 +1,254 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * A Lua extension for upb. + */ + +#include "lauxlib.h" +#include "upb_def.h" + +/* lupb_def *******************************************************************/ + +// All the def types share the same C layout, even though they are differen Lua +// types with different metatables. +typedef struct { + upb_def *def; +} lupb_def; + +static void lupb_pushnewdef(lua_State *L, upb_def *def) { + lupb_def *ldef = lua_newuserdata(L, sizeof(lupb_def)); + ldef->def = def; + const char *type_name; + switch(def->type) { + case UPB_DEF_MSG: + type_name = "upb.msgdef"; + break; + case UPB_DEF_ENUM: + type_name = "upb.enumdef"; + break; + default: + luaL_error(L, "unknown deftype %d", def->type); + } + luaL_getmetatable(L, type_name); + lua_setmetatable(L, -2); +} + +static lupb_def *lupb_msgdef_check(lua_State *L, int narg) { + return luaL_checkudata(L, narg, "upb.msgdef"); +} + +static lupb_def *lupb_enumdef_check(lua_State *L, int narg) { + return luaL_checkudata(L, narg, "upb.enumdef"); +} + +static int lupb_msgdef_gc(lua_State *L) { + lupb_def *ldef = lupb_msgdef_check(L, 1); + upb_def_unref(ldef->def); + return 0; +} + +static int lupb_enumdef_gc(lua_State *L) { + lupb_def *ldef = lupb_enumdef_check(L, 1); + upb_def_unref(ldef->def); + return 0; +} + +static const struct luaL_Reg lupb_msgdef_methods[] = { + {"__gc", lupb_msgdef_gc}, + {NULL, NULL} +}; + +static const struct luaL_Reg lupb_enumdef_methods[] = { + {"__gc", lupb_enumdef_gc}, + {NULL, NULL} +}; + + +/* lupb_symtab ****************************************************************/ + +// lupb_symtab caches the Lua objects it vends (defs) via lookup or resolve. +// It does this (instead of creating a new Lua object every time) for two +// reasons: +// * it uses less memory, because we can reuse existing objects. +// * it gives the expected equality semantics, eg. symtab[sym] == symtab[sym]. +// +// The downside is a bit of complexity. We need a place to store these +// cached defs; the only good answer is in the metatable. This means we need +// a new metatable for every symtab instance (instead of one shared by all +// instances). Since this is different than the regular pattern, we can't +// use luaL_checkudata(), we have to implement it ourselves. +typedef struct { + upb_symtab *symtab; +} lupb_symtab; + +static int lupb_symtab_gc(lua_State *L); + +// Inherits a ref on the symtab. +static void lupb_pushnewsymtab(lua_State *L, upb_symtab *symtab) { + lupb_symtab *lsymtab = lua_newuserdata(L, sizeof(lupb_symtab)); + lsymtab->symtab = symtab; + // Create its metatable (see note above about mt-per-object). + lua_createtable(L, 0, 1); + luaL_getmetatable(L, "upb.symtab"); + lua_setfield(L, -2, "__index"); // Uses the type metatable to find methods. + lua_pushcfunction(L, lupb_symtab_gc); + lua_setfield(L, -2, "__gc"); + + // Put this metatable in the registry so we can find it for type validation. + lua_pushlightuserdata(L, lsymtab); + lua_pushvalue(L, -2); + lua_rawset(L, LUA_REGISTRYINDEX); + + // Set the symtab's metatable. + lua_setmetatable(L, -2); +} + +// Checks that narg is a proper lupb_symtab object. If it is, leaves its +// metatable on the stack for cache lookups/updates. +lupb_symtab *lupb_symtab_check(lua_State *L, int narg) { + lupb_symtab *symtab = lua_touserdata(L, narg); + if (symtab != NULL) { + if (lua_getmetatable(L, narg)) { + // We use a metatable-per-object to support memoization of defs. + lua_pushlightuserdata(L, symtab); + lua_rawget(L, LUA_REGISTRYINDEX); + if (lua_rawequal(L, -1, -2)) { // Does it have the correct mt? + lua_pop(L, 1); // Remove one copy of the mt, keep the other. + return symtab; + } + } + } + luaL_typerror(L, narg, "upb.symtab"); + return NULL; // Placate the compiler; luaL_typerror will longjmp out of here. +} + +static int lupb_symtab_gc(lua_State *L) { + lupb_symtab *s = lupb_symtab_check(L, 1); + upb_symtab_unref(s->symtab); + + // Remove its metatable from the registry. + lua_pushlightuserdata(L, s); + lua_pushnil(L); + lua_rawset(L, LUA_REGISTRYINDEX); + return 0; +} + +// "mt" is the index of the metatable, -1 is the fqname of this def. +// Leaves the Lua object for the def at the top of the stack. +// Inherits a ref on "def". +static void lupb_symtab_getorcreate(lua_State *L, upb_def *def, int mt) { + // We may have this def cached, in which case we should return the same Lua + // object (as long as the value in the underlying symtab has not changed. + lua_rawget(L, mt); + if (!lua_isnil(L, -1)) { + // Def is cached, make sure it hasn't changed. + lupb_def *ldef = lua_touserdata(L, -1); + if (!ldef) luaL_error(L, "upb's internal cache is corrupt."); + if (ldef->def == def) { + // Cache is good, we can just return the cached value. + upb_def_unref(def); + return; + } + } + // Cached entry didn't exist or wasn't good. + lua_pop(L, 1); // Remove bad cached value. + lupb_pushnewdef(L, def); + + // Set it in the cache. + lua_pushvalue(L, 2); // push name (arg to this function). + lua_pushvalue(L, -2); // push the new def. + lua_rawset(L, mt); // set in the cache (the mt). +} + +static int lupb_symtab_lookup(lua_State *L) { + lupb_symtab *s = lupb_symtab_check(L, 1); + size_t len; + const char *name = luaL_checklstring(L, 2, &len); + upb_string namestr = UPB_STACK_STRING_LEN(name, len); + upb_def *def = upb_symtab_lookup(s->symtab, &namestr); + if (!def) { + // There shouldn't be a value in our cache either because the symtab + // currently provides no API for deleting syms from a table. In case + // this changes in the future, we explicitly delete from the cache here. + lua_pushvalue(L, 2); // push name (arg to this function). + lua_pushnil(L); + lua_rawset(L, -3); // lupb_symtab_check() left our mt on the stack. + + // Return nil because the symbol was not found. + lua_pushnil(L); + return 1; + } else { + lua_pushvalue(L, 2); + lupb_symtab_getorcreate(L, def, 3); + return 1; + } +} + +static int lupb_symtab_getdefs(lua_State *L) { + lupb_symtab *s = lupb_symtab_check(L, 1); + upb_deftype_t type = luaL_checkint(L, 2); + int count; + upb_def **defs = upb_symtab_getdefs(s->symtab, &count, type); + + // Create the table in which we will return the defs. + lua_createtable(L, 0, count); + int ret = lua_gettop(L); + + for (int i = 0; i < count; i++) { + upb_def *def = defs[i]; + // Look it up in the cache by name. + upb_string *name = def->fqname; + lua_pushlstring(L, upb_string_getrobuf(name), upb_string_len(name)); + lua_pushvalue(L, -1); // Push it again since the getorcreate consumes one. + lupb_symtab_getorcreate(L, def, 3); + + // Add it to our return table. + lua_settable(L, ret); + } + return 1; +} + +static int lupb_symtab_add_descriptorproto(lua_State *L) { + lupb_symtab *s = lupb_symtab_check(L, 1); + upb_symtab_add_descriptorproto(s->symtab); + return 0; // No args to return. +} + +static const struct luaL_Reg lupb_symtab_methods[] = { + {"add_descriptorproto", lupb_symtab_add_descriptorproto}, + //{"addfds", lupb_symtab_addfds}, + {"getdefs", lupb_symtab_getdefs}, + {"lookup", lupb_symtab_lookup}, + //{"resolve", lupb_symtab_resolve}, + {NULL, NULL} +}; + + +/* lupb toplevel **************************************************************/ + +static int lupb_symtab_new(lua_State *L) { + upb_symtab *s = upb_symtab_new(); + lupb_pushnewsymtab(L, s); + return 1; +} + +static const struct luaL_Reg lupb_toplevel_methods[] = { + {"symtab", lupb_symtab_new}, + {NULL, NULL} +}; + +int luaopen_upb(lua_State *L) { + luaL_newmetatable(L, "upb.msgdef"); + luaL_register(L, NULL, lupb_msgdef_methods); + + luaL_newmetatable(L, "upb.enumdef"); + luaL_register(L, NULL, lupb_enumdef_methods); + + luaL_newmetatable(L, "upb.symtab"); + luaL_register(L, NULL, lupb_symtab_methods); + + luaL_register(L, "upb", lupb_toplevel_methods); + return 1; // Return package table. +} -- cgit v1.2.3 From 904a79cec3e77efc0253e4412ab766d602ae126d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 19 Jul 2010 08:50:43 -0700 Subject: Fix bugs in lookup and getdefs. --- lang_ext/lua/upb.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c index ac7f188..6f50c67 100644 --- a/lang_ext/lua/upb.c +++ b/lang_ext/lua/upb.c @@ -6,6 +6,7 @@ * A Lua extension for upb. */ +#include #include "lauxlib.h" #include "upb_def.h" @@ -141,6 +142,7 @@ static int lupb_symtab_gc(lua_State *L) { static void lupb_symtab_getorcreate(lua_State *L, upb_def *def, int mt) { // We may have this def cached, in which case we should return the same Lua // object (as long as the value in the underlying symtab has not changed. + lua_pushvalue(L, -1); // Copy the name for cache insertion later. lua_rawget(L, mt); if (!lua_isnil(L, -1)) { // Def is cached, make sure it hasn't changed. @@ -148,6 +150,8 @@ static void lupb_symtab_getorcreate(lua_State *L, upb_def *def, int mt) { if (!ldef) luaL_error(L, "upb's internal cache is corrupt."); if (ldef->def == def) { // Cache is good, we can just return the cached value. + lua_insert(L, -2); // Move our cached def before the copy of the name. + lua_pop(L, 1); // Our extra copy of the name. upb_def_unref(def); return; } @@ -155,11 +159,13 @@ static void lupb_symtab_getorcreate(lua_State *L, upb_def *def, int mt) { // Cached entry didn't exist or wasn't good. lua_pop(L, 1); // Remove bad cached value. lupb_pushnewdef(L, def); + lua_insert(L, -2); // Move new def before the name, so stack is [def, name] // Set it in the cache. - lua_pushvalue(L, 2); // push name (arg to this function). - lua_pushvalue(L, -2); // push the new def. + lua_pushvalue(L, -2); // push def. lua_rawset(L, mt); // set in the cache (the mt). + + // Def is left at the top of the stack. } static int lupb_symtab_lookup(lua_State *L) { @@ -207,6 +213,7 @@ static int lupb_symtab_getdefs(lua_State *L) { // Add it to our return table. lua_settable(L, ret); } + free(defs); return 1; } -- cgit v1.2.3 From f6bc538f31d705f58aceac38726b9515291db24f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 19 Jul 2010 09:48:08 -0700 Subject: Prevent abort() if add_descriptorproto() is called twice. --- core/upb_def.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/core/upb_def.c b/core/upb_def.c index 0d97982..1feaf9d 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -1023,7 +1023,16 @@ static upb_src *upb_baredecoder_src(upb_baredecoder *d) void upb_symtab_add_descriptorproto(upb_symtab *symtab) { - // TODO: allow upb_strings to be static or on the stack. + // For the moment we silently decline to perform the operation if the symbols + // already exist in the symtab. Revisit this when we have a better story + // about whether syms in a table can be replaced. + upb_def *def = upb_symtab_lookup( + symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + if(def) { + upb_def_unref(def); + return; + } + upb_baredecoder *decoder = upb_baredecoder_new(&descriptor_str); upb_status status = UPB_STATUS_INIT; upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status); -- cgit v1.2.3 From 4a38d38f96c561d1e0797e23ed97fd2657c21da0 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 19 Jul 2010 17:42:56 -0700 Subject: Use a weak table to cache objects. This simplifies things considerably, and is more in line with common practice. --- lang_ext/lua/upb.c | 211 ++++++++++++++++++++++------------------------------- 1 file changed, 88 insertions(+), 123 deletions(-) diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c index 6f50c67..7241dc5 100644 --- a/lang_ext/lua/upb.c +++ b/lang_ext/lua/upb.c @@ -10,17 +10,57 @@ #include "lauxlib.h" #include "upb_def.h" +/* object cache ***************************************************************/ + +// We cache all the lua objects (userdata) we vend in a weak table, indexed by +// the C pointer of the object they are caching. + +typedef void (*lupb_unref)(void *cobj); + +static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type, + lupb_unref unref) { + // Lookup our cache in the registry (we don't put our objects in the registry + // directly because we need our cache to be a weak table). + lua_getfield(L, LUA_REGISTRYINDEX, "upb.objcache"); + assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb. + lua_pushlightuserdata(L, cobj); + lua_rawget(L, -2); + // Stack: objcache, cached value. + if (lua_isnil(L, -1)) { + // Remove bad cached value and push new value. + lua_pop(L, 1); + // We take advantage of the fact that all of our objects are currently a + // single pointer, and thus have the same layout. + void **obj = lua_newuserdata(L, sizeof(void*)); + *obj = cobj; + luaL_getmetatable(L, type); + lua_setmetatable(L, -2); + + // Set it in the cache. + lua_pushlightuserdata(L, cobj); + lua_pushvalue(L, -2); + lua_rawset(L, -4); + } else { + unref(cobj); + } + lua_insert(L, -2); + lua_pop(L, 1); +} + + /* lupb_def *******************************************************************/ -// All the def types share the same C layout, even though they are differen Lua +// All the def types share the same C layout, even though they are different Lua // types with different metatables. typedef struct { upb_def *def; } lupb_def; -static void lupb_pushnewdef(lua_State *L, upb_def *def) { - lupb_def *ldef = lua_newuserdata(L, sizeof(lupb_def)); - ldef->def = def; +static void lupb_def_unref(void *cobj) { + upb_def_unref((upb_def*)cobj); +} + +static void lupb_def_getorcreate(lua_State *L, upb_def *def) { const char *type_name; switch(def->type) { case UPB_DEF_MSG: @@ -32,8 +72,7 @@ static void lupb_pushnewdef(lua_State *L, upb_def *def) { default: luaL_error(L, "unknown deftype %d", def->type); } - luaL_getmetatable(L, type_name); - lua_setmetatable(L, -2); + return lupb_cache_getorcreate(L, def, type_name, lupb_def_unref); } static lupb_def *lupb_msgdef_check(lua_State *L, int narg) { @@ -56,116 +95,46 @@ static int lupb_enumdef_gc(lua_State *L) { return 0; } -static const struct luaL_Reg lupb_msgdef_methods[] = { +static const struct luaL_Reg lupb_msgdef_mm[] = { {"__gc", lupb_msgdef_gc}, {NULL, NULL} }; -static const struct luaL_Reg lupb_enumdef_methods[] = { +static const struct luaL_Reg lupb_msgdef_m[] = { + {NULL, NULL} +}; + +static const struct luaL_Reg lupb_enumdef_mm[] = { {"__gc", lupb_enumdef_gc}, {NULL, NULL} }; +static const struct luaL_Reg lupb_enumdef_m[] = { + {NULL, NULL} +}; + /* lupb_symtab ****************************************************************/ -// lupb_symtab caches the Lua objects it vends (defs) via lookup or resolve. -// It does this (instead of creating a new Lua object every time) for two -// reasons: -// * it uses less memory, because we can reuse existing objects. -// * it gives the expected equality semantics, eg. symtab[sym] == symtab[sym]. -// -// The downside is a bit of complexity. We need a place to store these -// cached defs; the only good answer is in the metatable. This means we need -// a new metatable for every symtab instance (instead of one shared by all -// instances). Since this is different than the regular pattern, we can't -// use luaL_checkudata(), we have to implement it ourselves. typedef struct { upb_symtab *symtab; } lupb_symtab; -static int lupb_symtab_gc(lua_State *L); - // Inherits a ref on the symtab. -static void lupb_pushnewsymtab(lua_State *L, upb_symtab *symtab) { - lupb_symtab *lsymtab = lua_newuserdata(L, sizeof(lupb_symtab)); - lsymtab->symtab = symtab; - // Create its metatable (see note above about mt-per-object). - lua_createtable(L, 0, 1); - luaL_getmetatable(L, "upb.symtab"); - lua_setfield(L, -2, "__index"); // Uses the type metatable to find methods. - lua_pushcfunction(L, lupb_symtab_gc); - lua_setfield(L, -2, "__gc"); - - // Put this metatable in the registry so we can find it for type validation. - lua_pushlightuserdata(L, lsymtab); - lua_pushvalue(L, -2); - lua_rawset(L, LUA_REGISTRYINDEX); - - // Set the symtab's metatable. - lua_setmetatable(L, -2); -} - // Checks that narg is a proper lupb_symtab object. If it is, leaves its // metatable on the stack for cache lookups/updates. lupb_symtab *lupb_symtab_check(lua_State *L, int narg) { - lupb_symtab *symtab = lua_touserdata(L, narg); - if (symtab != NULL) { - if (lua_getmetatable(L, narg)) { - // We use a metatable-per-object to support memoization of defs. - lua_pushlightuserdata(L, symtab); - lua_rawget(L, LUA_REGISTRYINDEX); - if (lua_rawequal(L, -1, -2)) { // Does it have the correct mt? - lua_pop(L, 1); // Remove one copy of the mt, keep the other. - return symtab; - } - } - } - luaL_typerror(L, narg, "upb.symtab"); - return NULL; // Placate the compiler; luaL_typerror will longjmp out of here. + return luaL_checkudata(L, narg, "upb.symtab"); } static int lupb_symtab_gc(lua_State *L) { lupb_symtab *s = lupb_symtab_check(L, 1); upb_symtab_unref(s->symtab); - - // Remove its metatable from the registry. - lua_pushlightuserdata(L, s); - lua_pushnil(L); - lua_rawset(L, LUA_REGISTRYINDEX); return 0; } -// "mt" is the index of the metatable, -1 is the fqname of this def. -// Leaves the Lua object for the def at the top of the stack. -// Inherits a ref on "def". -static void lupb_symtab_getorcreate(lua_State *L, upb_def *def, int mt) { - // We may have this def cached, in which case we should return the same Lua - // object (as long as the value in the underlying symtab has not changed. - lua_pushvalue(L, -1); // Copy the name for cache insertion later. - lua_rawget(L, mt); - if (!lua_isnil(L, -1)) { - // Def is cached, make sure it hasn't changed. - lupb_def *ldef = lua_touserdata(L, -1); - if (!ldef) luaL_error(L, "upb's internal cache is corrupt."); - if (ldef->def == def) { - // Cache is good, we can just return the cached value. - lua_insert(L, -2); // Move our cached def before the copy of the name. - lua_pop(L, 1); // Our extra copy of the name. - upb_def_unref(def); - return; - } - } - // Cached entry didn't exist or wasn't good. - lua_pop(L, 1); // Remove bad cached value. - lupb_pushnewdef(L, def); - lua_insert(L, -2); // Move new def before the name, so stack is [def, name] - - // Set it in the cache. - lua_pushvalue(L, -2); // push def. - lua_rawset(L, mt); // set in the cache (the mt). - - // Def is left at the top of the stack. +static void lupb_symtab_unref(void *cobj) { + upb_symtab_unref((upb_symtab*)cobj); } static int lupb_symtab_lookup(lua_State *L) { @@ -174,22 +143,8 @@ static int lupb_symtab_lookup(lua_State *L) { const char *name = luaL_checklstring(L, 2, &len); upb_string namestr = UPB_STACK_STRING_LEN(name, len); upb_def *def = upb_symtab_lookup(s->symtab, &namestr); - if (!def) { - // There shouldn't be a value in our cache either because the symtab - // currently provides no API for deleting syms from a table. In case - // this changes in the future, we explicitly delete from the cache here. - lua_pushvalue(L, 2); // push name (arg to this function). - lua_pushnil(L); - lua_rawset(L, -3); // lupb_symtab_check() left our mt on the stack. - - // Return nil because the symbol was not found. - lua_pushnil(L); - return 1; - } else { - lua_pushvalue(L, 2); - lupb_symtab_getorcreate(L, def, 3); - return 1; - } + lupb_def_getorcreate(L, def); + return 1; } static int lupb_symtab_getdefs(lua_State *L) { @@ -200,18 +155,13 @@ static int lupb_symtab_getdefs(lua_State *L) { // Create the table in which we will return the defs. lua_createtable(L, 0, count); - int ret = lua_gettop(L); - for (int i = 0; i < count; i++) { upb_def *def = defs[i]; - // Look it up in the cache by name. upb_string *name = def->fqname; lua_pushlstring(L, upb_string_getrobuf(name), upb_string_len(name)); - lua_pushvalue(L, -1); // Push it again since the getorcreate consumes one. - lupb_symtab_getorcreate(L, def, 3); - + lupb_def_getorcreate(L, def); // Add it to our return table. - lua_settable(L, ret); + lua_settable(L, -3); } free(defs); return 1; @@ -223,7 +173,7 @@ static int lupb_symtab_add_descriptorproto(lua_State *L) { return 0; // No args to return. } -static const struct luaL_Reg lupb_symtab_methods[] = { +static const struct luaL_Reg lupb_symtab_m[] = { {"add_descriptorproto", lupb_symtab_add_descriptorproto}, //{"addfds", lupb_symtab_addfds}, {"getdefs", lupb_symtab_getdefs}, @@ -232,30 +182,45 @@ static const struct luaL_Reg lupb_symtab_methods[] = { {NULL, NULL} }; +static const struct luaL_Reg lupb_symtab_mm[] = { + {"__gc", lupb_symtab_gc}, + {NULL, NULL} +}; + /* lupb toplevel **************************************************************/ static int lupb_symtab_new(lua_State *L) { upb_symtab *s = upb_symtab_new(); - lupb_pushnewsymtab(L, s); + lupb_cache_getorcreate(L, s, "upb.symtab", lupb_symtab_unref); return 1; } -static const struct luaL_Reg lupb_toplevel_methods[] = { +static const struct luaL_Reg lupb_toplevel_m[] = { {"symtab", lupb_symtab_new}, {NULL, NULL} }; -int luaopen_upb(lua_State *L) { - luaL_newmetatable(L, "upb.msgdef"); - luaL_register(L, NULL, lupb_msgdef_methods); +// Register the given type with the given methods and metamethods. +static void lupb_register_type(lua_State *L, const char *name, + const luaL_Reg *m, const luaL_Reg *mm) { + luaL_newmetatable(L, name); + luaL_register(L, NULL, mm); + lua_createtable(L, 0, 0); + luaL_register(L, NULL, m); + lua_setfield(L, -2, "__index"); + lua_pop(L, 1); // The mt. +} - luaL_newmetatable(L, "upb.enumdef"); - luaL_register(L, NULL, lupb_enumdef_methods); +int luaopen_upb(lua_State *L) { + lupb_register_type(L, "upb.msgdef", lupb_msgdef_m, lupb_msgdef_mm); + lupb_register_type(L, "upb.enumdef", lupb_enumdef_m, lupb_enumdef_mm); + lupb_register_type(L, "upb.symtab", lupb_symtab_m, lupb_symtab_mm); - luaL_newmetatable(L, "upb.symtab"); - luaL_register(L, NULL, lupb_symtab_methods); + // Create our object cache. TODO: need to make this table weak! + lua_createtable(L, 0, 0); + lua_setfield(L, LUA_REGISTRYINDEX, "upb.objcache"); - luaL_register(L, "upb", lupb_toplevel_methods); + luaL_register(L, "upb", lupb_toplevel_m); return 1; // Return package table. } -- cgit v1.2.3 From 71ac83fe7a15b27f34e9da452eaeee8df460a2aa Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 21 Jul 2010 11:58:32 -0700 Subject: Make object cache weak. --- lang_ext/lua/upb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c index 7241dc5..bfc1355 100644 --- a/lang_ext/lua/upb.c +++ b/lang_ext/lua/upb.c @@ -71,6 +71,7 @@ static void lupb_def_getorcreate(lua_State *L, upb_def *def) { break; default: luaL_error(L, "unknown deftype %d", def->type); + type_name = NULL; // Placate the compiler. } return lupb_cache_getorcreate(L, def, type_name, lupb_def_unref); } @@ -219,6 +220,9 @@ int luaopen_upb(lua_State *L) { // Create our object cache. TODO: need to make this table weak! lua_createtable(L, 0, 0); + lua_createtable(L, 0, 1); // Cache metatable. + lua_pushstring(L, "v"); // Values are weak. + lua_setfield(L, -2, "__mode"); lua_setfield(L, LUA_REGISTRYINDEX, "upb.objcache"); luaL_register(L, "upb", lupb_toplevel_m); -- cgit v1.2.3 From 21ee24a7300dbdabef707457d2407b4f9187603b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 21 Jul 2010 18:59:01 -0700 Subject: Updated Lua extension to handle fielddefs. --- core/upb_def.c | 1 + core/upb_def.h | 22 ++++++++++------- core/upb_table.c | 2 +- lang_ext/lua/upb.c | 71 +++++++++++++++++++++++++++++++++++++++++++----------- tests/test_def.c | 2 ++ 5 files changed, 74 insertions(+), 24 deletions(-) diff --git a/core/upb_def.c b/core/upb_def.c index 1feaf9d..e40e1f0 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -355,6 +355,7 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) f->name = NULL; f->def = NULL; f->owned = false; + f->msgdef = m; upb_fielddef *parsed_f; int32_t tmp; while((parsed_f = upb_src_getdef(src))) { diff --git a/core/upb_def.h b/core/upb_def.h index ae9e0fa..5c19a7a 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -87,23 +87,27 @@ INLINE void upb_def_unref(upb_def *def) { // is either a field of a upb_msgdef or contained inside a upb_extensiondef. // It is also reference-counted. typedef struct _upb_fielddef { - upb_atomic_refcount_t refcount; - upb_string *name; - upb_field_number_t number; - upb_field_type_t type; - upb_label_t label; upb_value default_value; + upb_string *name; + + struct _upb_msgdef *msgdef; + // For the case of an enum or a submessage, points to the def for that type. upb_def *def; - // True if we own a ref on "def" (above). This is true unless this edge is - // part of a cycle. - bool owned; + upb_atomic_refcount_t refcount; + uint32_t byte_offset; // Where in a upb_msg to find the data. // These are set only when this fielddef is part of a msgdef. - uint32_t byte_offset; // Where in a upb_msg to find the data. upb_field_count_t field_index; // Indicates set bit. + + upb_field_number_t number; + upb_field_type_t type; + upb_label_t label; + // True if we own a ref on "def" (above). This is true unless this edge is + // part of a cycle. + bool owned; } upb_fielddef; // A variety of tests about the type of a field. diff --git a/core/upb_table.c b/core/upb_table.c index b860204..a6e0a56 100644 --- a/core/upb_table.c +++ b/core/upb_table.c @@ -28,7 +28,7 @@ void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) { t->count = 0; t->entry_size = entry_size; - t->size_lg2 = 1; + t->size_lg2 = 0; while(size >>= 1) t->size_lg2++; size_t bytes = upb_table_size(t) * t->entry_size; t->mask = upb_table_size(t) - 1; diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c index bfc1355..a16a187 100644 --- a/lang_ext/lua/upb.c +++ b/lang_ext/lua/upb.c @@ -15,10 +15,14 @@ // We cache all the lua objects (userdata) we vend in a weak table, indexed by // the C pointer of the object they are caching. -typedef void (*lupb_unref)(void *cobj); +typedef void (*lupb_cb)(void *cobj); + +static void lupb_nop(void *foo) { + (void)foo; +} static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type, - lupb_unref unref) { + lupb_cb ref, lupb_cb unref) { // Lookup our cache in the registry (we don't put our objects in the registry // directly because we need our cache to be a weak table). lua_getfield(L, LUA_REGISTRYINDEX, "upb.objcache"); @@ -40,6 +44,7 @@ static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type, lua_pushlightuserdata(L, cobj); lua_pushvalue(L, -2); lua_rawset(L, -4); + ref(cobj); } else { unref(cobj); } @@ -73,29 +78,21 @@ static void lupb_def_getorcreate(lua_State *L, upb_def *def) { luaL_error(L, "unknown deftype %d", def->type); type_name = NULL; // Placate the compiler. } - return lupb_cache_getorcreate(L, def, type_name, lupb_def_unref); + return lupb_cache_getorcreate(L, def, type_name, lupb_nop, lupb_def_unref); } +// msgdef + static lupb_def *lupb_msgdef_check(lua_State *L, int narg) { return luaL_checkudata(L, narg, "upb.msgdef"); } -static lupb_def *lupb_enumdef_check(lua_State *L, int narg) { - return luaL_checkudata(L, narg, "upb.enumdef"); -} - static int lupb_msgdef_gc(lua_State *L) { lupb_def *ldef = lupb_msgdef_check(L, 1); upb_def_unref(ldef->def); return 0; } -static int lupb_enumdef_gc(lua_State *L) { - lupb_def *ldef = lupb_enumdef_check(L, 1); - upb_def_unref(ldef->def); - return 0; -} - static const struct luaL_Reg lupb_msgdef_mm[] = { {"__gc", lupb_msgdef_gc}, {NULL, NULL} @@ -105,6 +102,18 @@ static const struct luaL_Reg lupb_msgdef_m[] = { {NULL, NULL} }; +// enumdef + +static lupb_def *lupb_enumdef_check(lua_State *L, int narg) { + return luaL_checkudata(L, narg, "upb.enumdef"); +} + +static int lupb_enumdef_gc(lua_State *L) { + lupb_def *ldef = lupb_enumdef_check(L, 1); + upb_def_unref(ldef->def); + return 0; +} + static const struct luaL_Reg lupb_enumdef_mm[] = { {"__gc", lupb_enumdef_gc}, {NULL, NULL} @@ -115,6 +124,40 @@ static const struct luaL_Reg lupb_enumdef_m[] = { }; +/* lupb_fielddef **************************************************************/ + +typedef struct { + upb_fielddef *field; +} lupb_fielddef; + +static void lupb_fielddef_ref(void *cobj) { + upb_def_ref(UPB_UPCAST(((upb_fielddef*)cobj)->msgdef)); +} + +static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f) { + lupb_cache_getorcreate(L, f, "upb.fielddef", lupb_fielddef_ref, lupb_nop); +} + +static lupb_fielddef *lupb_fielddef_check(lua_State *L, int narg) { + return luaL_checkudata(L, narg, "upb.fielddef"); +} + +static int lupb_fielddef_gc(lua_State *L) { + lupb_fielddef *lfielddef = lupb_fielddef_check(L, 1); + upb_def_unref(UPB_UPCAST(lfielddef->field->msgdef)); + return 0; +} + +static const struct luaL_Reg lupb_fielddef_mm[] = { + {"__gc", lupb_fielddef_gc}, + {NULL, NULL} +}; + +static const struct luaL_Reg lupb_fielddef_m[] = { + {NULL, NULL} +}; + + /* lupb_symtab ****************************************************************/ typedef struct { @@ -193,7 +236,7 @@ static const struct luaL_Reg lupb_symtab_mm[] = { static int lupb_symtab_new(lua_State *L) { upb_symtab *s = upb_symtab_new(); - lupb_cache_getorcreate(L, s, "upb.symtab", lupb_symtab_unref); + lupb_cache_getorcreate(L, s, "upb.symtab", lupb_nop, lupb_symtab_unref); return 1; } diff --git a/tests/test_def.c b/tests/test_def.c index e6f95d7..732835d 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -14,6 +14,8 @@ int main() { } free(defs); + printf("Size: %zd\n", sizeof(upb_ntof_ent)); + upb_string *str = upb_strdupc("google.protobuf.FileDescriptorSet"); upb_def *fds = upb_symtab_lookup(s, str); assert(fds != NULL); -- cgit v1.2.3 From d3d939ab7fc14f73d1bb20a6e84a4428e6cde24a Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 21 Jul 2010 21:54:36 -0700 Subject: Fix the case where no def is found for lookup. --- lang_ext/lua/upb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c index a16a187..a8165c7 100644 --- a/lang_ext/lua/upb.c +++ b/lang_ext/lua/upb.c @@ -187,7 +187,11 @@ static int lupb_symtab_lookup(lua_State *L) { const char *name = luaL_checklstring(L, 2, &len); upb_string namestr = UPB_STACK_STRING_LEN(name, len); upb_def *def = upb_symtab_lookup(s->symtab, &namestr); - lupb_def_getorcreate(L, def); + if (def) { + lupb_def_getorcreate(L, def); + } else { + lua_pushnil(L); + } return 1; } -- cgit v1.2.3 From 672f4617e2ab7923806c6d6a44d16e128e16b3a4 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 21 Jul 2010 22:36:31 -0700 Subject: Lua support for fielddefs and getting their properties. --- core/upb_def.h | 4 +-- lang_ext/lua/upb.c | 79 +++++++++++++++++++++++++++++++++++++++++++++------- stream/upb_decoder.c | 2 +- 3 files changed, 72 insertions(+), 13 deletions(-) diff --git a/core/upb_def.h b/core/upb_def.h index 5c19a7a..3294a8d 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -158,13 +158,13 @@ typedef struct { // Looks up a field by name or number. While these are written to be as fast // as possible, it will still be faster to cache the results of this lookup if // possible. These return NULL if no such field is found. -INLINE upb_fielddef *upb_msg_itof(upb_msgdef *m, uint32_t num) { +INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t num) { upb_itof_ent *e = (upb_itof_ent*)upb_inttable_fastlookup(&m->itof, num, sizeof(*e)); return e ? e->f : NULL; } -INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_string *name) { +INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) { upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); return e ? e->f : NULL; } diff --git a/lang_ext/lua/upb.c b/lang_ext/lua/upb.c index a8165c7..5ab07ba 100644 --- a/lang_ext/lua/upb.c +++ b/lang_ext/lua/upb.c @@ -10,6 +10,10 @@ #include "lauxlib.h" #include "upb_def.h" +void lupb_pushstring(lua_State *L, upb_string *str) { + lua_pushlstring(L, upb_string_getrobuf(str), upb_string_len(str)); +} + /* object cache ***************************************************************/ // We cache all the lua objects (userdata) we vend in a weak table, indexed by @@ -38,6 +42,7 @@ static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type, void **obj = lua_newuserdata(L, sizeof(void*)); *obj = cobj; luaL_getmetatable(L, type); + assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb. lua_setmetatable(L, -2); // Set it in the cache. @@ -83,22 +88,53 @@ static void lupb_def_getorcreate(lua_State *L, upb_def *def) { // msgdef -static lupb_def *lupb_msgdef_check(lua_State *L, int narg) { - return luaL_checkudata(L, narg, "upb.msgdef"); +static upb_msgdef *lupb_msgdef_check(lua_State *L, int narg) { + lupb_def *ldef = luaL_checkudata(L, narg, "upb.msgdef"); + return upb_downcast_msgdef(ldef->def); } static int lupb_msgdef_gc(lua_State *L) { - lupb_def *ldef = lupb_msgdef_check(L, 1); + lupb_def *ldef = luaL_checkudata(L, 1, "upb.msgdef"); upb_def_unref(ldef->def); return 0; } +static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f); + +static int lupb_msgdef_fieldbyname(lua_State *L) { + upb_msgdef *m = lupb_msgdef_check(L, 1); + size_t len; + const char *name = luaL_checklstring(L, 2, &len); + upb_string namestr = UPB_STACK_STRING_LEN(name, len); + upb_fielddef *f = upb_msgdef_ntof(m, &namestr); + if (f) { + lupb_fielddef_getorcreate(L, f); + } else { + lua_pushnil(L); + } + return 1; +} + +static int lupb_msgdef_fieldbynum(lua_State *L) { + upb_msgdef *m = lupb_msgdef_check(L, 1); + int num = luaL_checkint(L, 2); + upb_fielddef *f = upb_msgdef_itof(m, num); + if (f) { + lupb_fielddef_getorcreate(L, f); + } else { + lua_pushnil(L); + } + return 1; +} + static const struct luaL_Reg lupb_msgdef_mm[] = { {"__gc", lupb_msgdef_gc}, {NULL, NULL} }; static const struct luaL_Reg lupb_msgdef_m[] = { + {"fieldbyname", lupb_msgdef_fieldbyname}, + {"fieldbynum", lupb_msgdef_fieldbynum}, {NULL, NULL} }; @@ -142,6 +178,29 @@ static lupb_fielddef *lupb_fielddef_check(lua_State *L, int narg) { return luaL_checkudata(L, narg, "upb.fielddef"); } +static int lupb_fielddef_index(lua_State *L) { + lupb_fielddef *f = lupb_fielddef_check(L, 1); + const char *str = luaL_checkstring(L, 2); + if (strcmp(str, "name") == 0) { + lupb_pushstring(L, f->field->name); + } else if (strcmp(str, "number") == 0) { + lua_pushinteger(L, f->field->number); + } else if (strcmp(str, "type") == 0) { + lua_pushinteger(L, f->field->type); + } else if (strcmp(str, "label") == 0) { + lua_pushinteger(L, f->field->label); + } else if (strcmp(str, "def") == 0) { + upb_def_ref(f->field->def); + lupb_def_getorcreate(L, f->field->def); + } else if (strcmp(str, "msgdef") == 0) { + upb_def_ref(UPB_UPCAST(f->field->msgdef)); + lupb_def_getorcreate(L, UPB_UPCAST(f->field->msgdef)); + } else { + lua_pushnil(L); + } + return 1; +} + static int lupb_fielddef_gc(lua_State *L) { lupb_fielddef *lfielddef = lupb_fielddef_check(L, 1); upb_def_unref(UPB_UPCAST(lfielddef->field->msgdef)); @@ -150,10 +209,7 @@ static int lupb_fielddef_gc(lua_State *L) { static const struct luaL_Reg lupb_fielddef_mm[] = { {"__gc", lupb_fielddef_gc}, - {NULL, NULL} -}; - -static const struct luaL_Reg lupb_fielddef_m[] = { + {"__index", lupb_fielddef_index}, {NULL, NULL} }; @@ -206,7 +262,7 @@ static int lupb_symtab_getdefs(lua_State *L) { for (int i = 0; i < count; i++) { upb_def *def = defs[i]; upb_string *name = def->fqname; - lua_pushlstring(L, upb_string_getrobuf(name), upb_string_len(name)); + lupb_pushstring(L, name); lupb_def_getorcreate(L, def); // Add it to our return table. lua_settable(L, -3); @@ -255,14 +311,17 @@ static void lupb_register_type(lua_State *L, const char *name, luaL_newmetatable(L, name); luaL_register(L, NULL, mm); lua_createtable(L, 0, 0); - luaL_register(L, NULL, m); - lua_setfield(L, -2, "__index"); + if (m) { + luaL_register(L, NULL, m); + lua_setfield(L, -2, "__index"); + } lua_pop(L, 1); // The mt. } int luaopen_upb(lua_State *L) { lupb_register_type(L, "upb.msgdef", lupb_msgdef_m, lupb_msgdef_mm); lupb_register_type(L, "upb.enumdef", lupb_enumdef_m, lupb_enumdef_mm); + lupb_register_type(L, "upb.fielddef", NULL, lupb_fielddef_mm); lupb_register_type(L, "upb.symtab", lupb_symtab_m, lupb_symtab_mm); // Create our object cache. TODO: need to make this table weak! diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 949ce2d..74ef5c5 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -342,7 +342,7 @@ again: } // Look up field by tag number. - upb_fielddef *f = upb_msg_itof(d->top->msgdef, field_number); + upb_fielddef *f = upb_msgdef_itof(d->top->msgdef, field_number); if (!f) { // Unknown field. If/when the upb_src interface supports reporting -- cgit v1.2.3 From 678799082b9775e601a09af9aa68e59fc1c64f6f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 24 Jul 2010 16:23:52 -0700 Subject: Stream decoding benchmark. --- Makefile | 24 ++++++-- benchmarks/parsestream.upb_table.c | 113 +++++++++++++++++++++++++++++++++++++ core/upb_stream.h | 3 +- core/upb_string.c | 18 ++++++ stream/upb_byteio.h | 43 -------------- 5 files changed, 152 insertions(+), 49 deletions(-) create mode 100644 benchmarks/parsestream.upb_table.c delete mode 100644 stream/upb_byteio.h diff --git a/Makefile b/Makefile index 749c5a7..203bed6 100644 --- a/Makefile +++ b/Makefile @@ -54,7 +54,7 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ - core/upb_stream.c stream/upb_stdio.c stream/upb_textprinter.c \ + core/upb_stream.c stream/upb_stdio.c stream/upb_strstream.c stream/upb_textprinter.c \ descriptor/descriptor.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. @@ -154,10 +154,10 @@ tests/tests: core/libupb.a tools/upbc: core/libupb.a # Benchmarks -UPB_BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \ - benchmarks/b.parsetostruct_googlemessage1.upb_table_byref \ - benchmarks/b.parsetostruct_googlemessage2.upb_table_byval \ - benchmarks/b.parsetostruct_googlemessage2.upb_table_byref +#UPB_BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table \ +# benchmarks/b.parsetostruct_googlemessage2.upb_table +UPB_BENCHMARKS=benchmarks/b.parsestream_googlemessage1.upb_table \ + benchmarks/b.parsestream_googlemessage2.upb_table BENCHMARKS=$(UPB_BENCHMARKS) \ benchmarks/b.parsetostruct_googlemessage1.proto2_table \ @@ -204,6 +204,20 @@ benchmarks/b.parsetostruct_googlemessage2.upb_table_byref: \ -DMESSAGE_FILE=\"google_message2.dat\" \ -DBYREF=true $(LIBUPB) +benchmarks/b.parsestream_googlemessage1.upb_table \ +benchmarks/b.parsestream_googlemessage2.upb_table: \ + benchmarks/parsestream.upb_table.c $(LIBUPB) benchmarks/google_messages.proto.pb + $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsestream_googlemessage1.upb_table $< \ + -DMESSAGE_NAME=\"benchmarks.SpeedMessage1\" \ + -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \ + -DMESSAGE_FILE=\"google_message1.dat\" \ + $(LIBUPB) + $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsestream_googlemessage2.upb_table $< \ + -DMESSAGE_NAME=\"benchmarks.SpeedMessage2\" \ + -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \ + -DMESSAGE_FILE=\"google_message2.dat\" \ + $(LIBUPB) + benchmarks/b.parsetostruct_googlemessage1.proto2_table \ benchmarks/b.parsetostruct_googlemessage2.proto2_table: \ benchmarks/parsetostruct.proto2_table.cc benchmarks/google_messages.pb.cc diff --git a/benchmarks/parsestream.upb_table.c b/benchmarks/parsestream.upb_table.c new file mode 100644 index 0000000..c6acad9 --- /dev/null +++ b/benchmarks/parsestream.upb_table.c @@ -0,0 +1,113 @@ + +#include "main.c" + +#include "upb_def.h" +#include "upb_decoder.h" +#include "upb_strstream.h" + +static upb_stringsrc *stringsrc; +static upb_string *input_str; +static upb_string *tmp_str; +static upb_msgdef *def; +static upb_decoder *decoder; + +static bool initialize() +{ + // Initialize upb state, decode descriptor. + upb_status status = UPB_STATUS_INIT; + upb_symtab *s = upb_symtab_new(); + upb_symtab_add_descriptorproto(s); + upb_string *fds_str = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); + if(fds_str == NULL) { + fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ":"), + upb_printerr(&status); + return false; + } + + upb_stringsrc *ssrc = upb_stringsrc_new(); + upb_stringsrc_reset(ssrc, fds_str); + upb_def *fds_def = upb_symtab_lookup( + s, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds_def)); + upb_decoder_reset(d, upb_stringsrc_bytesrc(ssrc)); + + upb_symtab_addfds(s, upb_decoder_src(d), &status); + + if(!upb_ok(&status)) { + fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ":"); + upb_printerr(&status); + return false; + } + + upb_string_unref(fds_str); + upb_decoder_free(d); + upb_stringsrc_free(ssrc); + upb_def_unref(fds_def); + + def = upb_downcast_msgdef(upb_symtab_lookup(s, UPB_STRLIT(MESSAGE_NAME))); + if(!def) { + fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n", + UPB_STRARG(UPB_STRLIT(MESSAGE_NAME))); + return false; + } + upb_symtab_unref(s); + + // Read the message data itself. + input_str = upb_strreadfile(MESSAGE_FILE); + if(input_str == NULL) { + fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); + return false; + } + tmp_str = NULL; + decoder = upb_decoder_new(def); + stringsrc = upb_stringsrc_new(); + return true; +} + +static void cleanup() +{ + upb_string_unref(input_str); + upb_string_unref(tmp_str); + upb_def_unref(UPB_UPCAST(def)); + upb_decoder_free(decoder); + upb_stringsrc_free(stringsrc); +} + +static size_t run(int i) +{ + (void)i; + upb_status status = UPB_STATUS_INIT; + upb_stringsrc_reset(stringsrc, input_str); + upb_decoder_reset(decoder, upb_stringsrc_bytesrc(stringsrc)); + upb_src *src = upb_decoder_src(decoder); + upb_fielddef *f; + upb_string *str = NULL; + int depth = 0; + while(1) { + while((f = upb_src_getdef(src)) != NULL) { + if(upb_issubmsg(f)) { + upb_src_startmsg(src); + ++depth; + } else if(upb_isstring(f)) { + tmp_str = upb_string_tryrecycle(str); + upb_src_getstr(src, tmp_str); + } else { + // Primitive type. + upb_value val; + upb_src_getval(src, upb_value_addrof(&val)); + } + } + // If we're not EOF now, the loop terminated due to an error. + if (!upb_src_eof(src)) goto err; + if (depth == 0) break; + --depth; + upb_src_endmsg(src); + } + if(!upb_ok(&status)) goto err; + return upb_string_len(input_str); + +err: + fprintf(stderr, "Decode error"); + upb_printerr(&status); + return 0; +} diff --git a/core/upb_stream.h b/core/upb_stream.h index b7400c5..861bd1c 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -128,7 +128,8 @@ bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); // Appends the next "len" bytes in the stream in-place to "str". This should // be used when the caller needs to build a contiguous string of the existing -// data in "str" with more data. +// data in "str" with more data. The call fails if fewer than len bytes are +// available in the stream. bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); // Returns the current error status for the stream. diff --git a/core/upb_string.c b/core/upb_string.c index 93686f5..847a3ee 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -131,3 +131,21 @@ upb_string *upb_strdup(upb_string *s) { upb_strcpy(str, s); return str; } + +upb_string *upb_strreadfile(const char *filename) { + FILE *f = fopen(filename, "rb"); + if(!f) return NULL; + if(fseek(f, 0, SEEK_END) != 0) goto error; + long size = ftell(f); + if(size < 0) goto error; + if(fseek(f, 0, SEEK_SET) != 0) goto error; + upb_string *s = upb_string_new(); + char *buf = upb_string_getrwbuf(s, size); + if(fread(buf, size, 1, f) != 1) goto error; + fclose(f); + return s; + +error: + fclose(f); + return NULL; +} diff --git a/stream/upb_byteio.h b/stream/upb_byteio.h deleted file mode 100644 index 69a28b3..0000000 --- a/stream/upb_byteio.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * This file contains upb_bytesrc and upb_bytesink implementations for common - * interfaces like strings, UNIX fds, and FILE*. - * - * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_BYTEIO_H -#define UPB_BYTEIO_H - -#include "upb_srcsink.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_stringsrc **************************************************************/ - -struct upb_stringsrc; -typedef struct upb_stringsrc upb_stringsrc; - -// Create/free a stringsrc. -upb_stringsrc *upb_stringsrc_new(); -void upb_stringsrc_free(upb_stringsrc *s); - -// Resets the stringsrc to a state where it will vend the given string. The -// stringsrc will take a reference on the string, so the caller need not ensure -// that it outlives the stringsrc. A stringsrc can be reset multiple times. -void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); - -// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. -upb_bytesrc *upb_stringsrc_bytesrc(); - - -/* upb_fdsrc ******************************************************************/ - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif -- cgit v1.2.3 From e30260bb0af98fa1d6d829fa9ad2fbd95d7dff95 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 24 Jul 2010 16:25:57 -0700 Subject: upb_stringsrc: upb_bytesrc for strings. --- stream/upb_strstream.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 stream/upb_strstream.c diff --git a/stream/upb_strstream.c b/stream/upb_strstream.c new file mode 100644 index 0000000..65f33d9 --- /dev/null +++ b/stream/upb_strstream.c @@ -0,0 +1,62 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_strstream.h" + +#include +#include "upb_string.h" + +struct upb_stringsrc { + upb_bytesrc bytesrc; + upb_string *str; +}; + +void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str) { + if (str != s->str) { + if (s->str) upb_string_unref(s->str); + s->str = upb_string_getref(str); + } + s->bytesrc.eof = false; +} + +void upb_stringsrc_free(upb_stringsrc *s) { + if (s->str) upb_string_unref(s->str); + free(s); +} + +static bool upb_stringsrc_get(upb_stringsrc *src, upb_string *str, + upb_strlen_t minlen) { + // We ignore "minlen" since we always return the entire string. + (void)minlen; + upb_string_substr(str, src->str, 0, upb_string_len(src->str)); + src->bytesrc.eof = true; + return true; +} + +static bool upb_stringsrc_append(upb_stringsrc *src, upb_string *str, + upb_strlen_t len) { + // Unimplemented; since we return the string via "get" all in one go, + // this method probably isn't very useful. + (void)src; + (void)str; + (void)len; + return false; +} + +static upb_bytesrc_vtable upb_stringsrc_vtbl = { + (upb_bytesrc_get_fptr)upb_stringsrc_get, + (upb_bytesrc_append_fptr)upb_stringsrc_append, +}; + +upb_stringsrc *upb_stringsrc_new() { + upb_stringsrc *s = malloc(sizeof(*s)); + upb_bytesrc_init(&s->bytesrc, &upb_stringsrc_vtbl); + return s; +} + +upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) { + return &s->bytesrc; +} -- cgit v1.2.3 From 851c6a6915b55842809c8622d0fb941bc911be37 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 24 Jul 2010 16:27:29 -0700 Subject: strstream header file also. --- stream/upb_strstream.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 stream/upb_strstream.h diff --git a/stream/upb_strstream.h b/stream/upb_strstream.h new file mode 100644 index 0000000..fa9bace --- /dev/null +++ b/stream/upb_strstream.h @@ -0,0 +1,61 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * This file contains upb_bytesrc and upb_bytesink implementations for + * upb_string. + * + * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_STRSTREAM_H +#define UPB_STRSTREAM_H + +#include "upb_stream.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_stringsrc **************************************************************/ + +struct upb_stringsrc; +typedef struct upb_stringsrc upb_stringsrc; + +// Create/free a stringsrc. +upb_stringsrc *upb_stringsrc_new(); +void upb_stringsrc_free(upb_stringsrc *s); + +// Resets the stringsrc to a state where it will vend the given string. The +// stringsrc will take a reference on the string, so the caller need not ensure +// that it outlives the stringsrc. A stringsrc can be reset multiple times. +void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); + +// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. +upb_bytesrc *upb_stringsrc_bytesrc(); + + +/* upb_stringsink *************************************************************/ + +struct upb_stringsink; +typedef struct upb_stringsink upb_stringsink; + +// Create/free a stringsrc. +upb_stringsink *upb_stringsink_new(); +void upb_stringsink_free(upb_stringsink *s); + +// Gets a string containing the data that has been written to this stringsink. +// The caller does *not* own any references to this string. +upb_string *upb_stringsink_getstring(upb_stringsink *s); + +// Clears the internal string of accumulated data, resetting it to empty. +void upb_stringsink_reset(upb_stringsink *s); + +// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. +upb_bytesink *upb_stringsrc_bytesink(); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif -- cgit v1.2.3 From 372c8f0487a666c3fb36edc18accba0fba9a2680 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 24 Jul 2010 17:01:45 -0700 Subject: Fixes to benchmark. --- benchmarks/parsestream.upb_table.c | 8 ++++++-- core/upb_def.c | 2 ++ stream/upb_decoder.c | 12 +++++++++--- stream/upb_strstream.c | 1 + 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/benchmarks/parsestream.upb_table.c b/benchmarks/parsestream.upb_table.c index c6acad9..1e18119 100644 --- a/benchmarks/parsestream.upb_table.c +++ b/benchmarks/parsestream.upb_table.c @@ -17,6 +17,12 @@ static bool initialize() upb_status status = UPB_STATUS_INIT; upb_symtab *s = upb_symtab_new(); upb_symtab_add_descriptorproto(s); + upb_def *fds_def = upb_symtab_lookup( + s, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + if (!fds_def) { + fprintf(stderr, "Couldn't load FileDescriptorSet def"); + } + upb_string *fds_str = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); if(fds_str == NULL) { fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ":"), @@ -26,8 +32,6 @@ static bool initialize() upb_stringsrc *ssrc = upb_stringsrc_new(); upb_stringsrc_reset(ssrc, fds_str); - upb_def *fds_def = upb_symtab_lookup( - s, UPB_STRLIT("google.protobuf.FileDescriptorSet")); upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds_def)); upb_decoder_reset(d, upb_stringsrc_bytesrc(ssrc)); diff --git a/core/upb_def.c b/core/upb_def.c index e40e1f0..e117455 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -384,6 +384,8 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) f->owned = true; break; } + default: + upb_src_skipval(src); } } CHECKSRC(upb_src_eof(src)); diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 74ef5c5..46cfb3f 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -318,7 +318,9 @@ upb_fielddef *upb_decoder_getdef(upb_decoder *d) } // Handles the packed field case. - if(d->field) return d->field; + if(d->field) { + return d->field; + } uint32_t key = 0; again: @@ -457,12 +459,15 @@ bool upb_decoder_startmsg(upb_decoder *d) { return false; } upb_decoder_frame *frame = d->top; - frame->msgdef = upb_downcast_msgdef(d->field->def); if(d->field->type == UPB_TYPE(GROUP)) { frame->end_offset = UPB_GROUP_END_OFFSET; - } else { + } else if (d->field->type == UPB_TYPE(MESSAGE)) { frame->end_offset = upb_decoder_offset(d) + d->delimited_len; + } else { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, + "Tried to startmsg a non-msg field."); } + frame->msgdef = upb_downcast_msgdef(d->field->def); d->field = NULL; return true; } @@ -485,6 +490,7 @@ bool upb_decoder_endmsg(upb_decoder *d) { bool upb_decoder_skipval(upb_decoder *d) { upb_strlen_t bytes_to_skip; + d->field = NULL; switch(d->wire_type) { case UPB_WIRE_TYPE_VARINT: { return upb_decoder_skipv64(d); diff --git a/stream/upb_strstream.c b/stream/upb_strstream.c index 65f33d9..7ed761b 100644 --- a/stream/upb_strstream.c +++ b/stream/upb_strstream.c @@ -53,6 +53,7 @@ static upb_bytesrc_vtable upb_stringsrc_vtbl = { upb_stringsrc *upb_stringsrc_new() { upb_stringsrc *s = malloc(sizeof(*s)); + s->str = NULL; upb_bytesrc_init(&s->bytesrc, &upb_stringsrc_vtbl); return s; } -- cgit v1.2.3 From 2a617bf12c8e1f7f689e3767bf7e4582d76c4f39 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 24 Jul 2010 18:18:09 -0700 Subject: Optimizations and bugfix to benchmark. --- benchmarks/parsestream.upb_table.c | 5 ++--- stream/upb_decoder.c | 23 ++++++++++++++++------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/benchmarks/parsestream.upb_table.c b/benchmarks/parsestream.upb_table.c index 1e18119..16979b0 100644 --- a/benchmarks/parsestream.upb_table.c +++ b/benchmarks/parsestream.upb_table.c @@ -85,15 +85,14 @@ static size_t run(int i) upb_decoder_reset(decoder, upb_stringsrc_bytesrc(stringsrc)); upb_src *src = upb_decoder_src(decoder); upb_fielddef *f; - upb_string *str = NULL; int depth = 0; while(1) { - while((f = upb_src_getdef(src)) != NULL) { + while(!upb_src_eof(src) && (f = upb_src_getdef(src)) != NULL) { if(upb_issubmsg(f)) { upb_src_startmsg(src); ++depth; } else if(upb_isstring(f)) { - tmp_str = upb_string_tryrecycle(str); + tmp_str = upb_string_tryrecycle(tmp_str); upb_src_getstr(src, tmp_str); } else { // Primitive type. diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 46cfb3f..7591f78 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -177,6 +177,12 @@ static bool upb_decoder_consume(upb_decoder *d, uint32_t bytes) memmove(d->tmpbuf, d->tmpbuf + bytes, -d->buf_offset); } assert(d->buf_bytesleft >= 0); + + // Detect end-of-submessage. + if(upb_decoder_offset(d) >= d->top->end_offset) { + d->src.eof = true; + } + return true; } @@ -187,6 +193,12 @@ static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) while(d->buf_bytesleft < 0) { if(!upb_decoder_nextbuf(d)) return false; } + + // Detect end-of-submessage. + if(upb_decoder_offset(d) >= d->top->end_offset) { + d->src.eof = true; + } + return true; } @@ -311,12 +323,7 @@ bool upb_decoder_skipval(upb_decoder *d); upb_fielddef *upb_decoder_getdef(upb_decoder *d) { - // Detect end-of-submessage. - if(upb_decoder_offset(d) >= d->top->end_offset) { - d->src.eof = true; - return NULL; - } - + if (d->src.eof) return NULL; // Handles the packed field case. if(d->field) { return d->field; @@ -481,7 +488,8 @@ bool upb_decoder_endmsg(upb_decoder *d) { else upb_decoder_skipbytes(d, d->top->end_offset - upb_decoder_offset(d)); } - d->src.eof = false; + // Detect end-of-submessage. + d->src.eof = upb_decoder_offset(d) >= d->top->end_offset; return true; } else { return false; @@ -571,6 +579,7 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) // indefinitely), so we set the end offset as high as possible, but not equal // to UINT32_MAX so it doesn't equal UPB_GROUP_END_OFFSET. d->top->end_offset = UINT32_MAX - 1; + d->src.eof = false; d->bytesrc = bytesrc; d->field = NULL; d->buf = NULL; -- cgit v1.2.3 From 4e7dc9d8b6baa598ec63a9991e8b11aede576ac1 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 25 Jul 2010 23:30:25 -0700 Subject: Re-add a simplified upb_msg. --- core/upb_msg.h | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 core/upb_msg.h diff --git a/core/upb_msg.h b/core/upb_msg.h new file mode 100644 index 0000000..5215bd9 --- /dev/null +++ b/core/upb_msg.h @@ -0,0 +1,99 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + * Data structure for storing a message of protobuf data. + */ + +#ifndef UPB_MSG_H +#define UPB_MSG_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + upb_atomic_refcount_t refcount; + uint32_t len; + uint32_t size; + upb_valueptr elements; +}; + +upb_array *upb_array_new(void); + +INLINE uint32_t upb_array_len(upb_array *a) { + return a->len; +} + +void _upb_array_free(upb_array *a, upb_fielddef *f); +INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) { + if (upb_atomic_unref(&a->refcount)) _upb_array_free(a, f); +} + +INLINE upb_value upb_array_get(upb_array *a, upb_fielddef *f, uint32_t elem) { + assert(elem < upb_array_len(a)); + return upb_value_read(_upb_array_getptr(a, f, elem), f->type); +} + +// For string or submessages, will release a ref on the previously set value. +INLINE void upb_array_set(upb_array *a, upb_fielddef *f, uint32_t elem, + upb_value val) { +} + +// Append an element with the default value, returning it. For strings or +// submessages, this will try to reuse previously allocated memory. +INLINE upb_value upb_array_append_mutable(upb_array *a, upb_fielddef *f) { +} + +typedef struct { + upb_atomic_refcount_t refcount; + uint8_t data[4]; // We allocate the appropriate amount per message. +} upb_msg; + +// Creates a new msg of the given type. +upb_msg *upb_msg_new(upb_msgdef *md); + +void _upb_msg_free(upb_msg *msg, upb_msgdef *md); +INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) { + if (upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md); +} + +// Tests whether the given field is explicitly set, or whether it will return a +// default. +INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { + return (msg->data[f->field_index/8] & (1 << (f->field_index % 8))) != 0; +} + +// Returns the current value of the given field if set, or the default value if +// not set. +INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { + if (upb_msg_has(msg, f)) { + return upb_value_read(_upb_msg_getptr(msg, f), f->type); + } else { + return f->default_value; + } +} + +// If the given string, submessage, or array is already set, returns it. +// Otherwise sets it and returns an empty instance, attempting to reuse any +// previously allocated memory. +INLINE upb_value upb_msg_getmutable(upb_msg *msg, upb_fielddef *f) { +} + +// Sets the current value of the field. If this is a string, array, or +// submessage field, releases a ref on the value (if any) that was previously +// set. +INLINE void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) { +} + +// Unsets all field values back to their defaults. +INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { + memset(msg->data, 0, md->set_flags_bytes); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif -- cgit v1.2.3 From a9e998159c5ac8c4f2644b5ed0eda2e8ff1f8706 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 2 Aug 2010 10:25:24 -0700 Subject: Fleshed out upb_msg: test_vs_proto2 compiles but fails. --- Makefile | 10 ++-- core/upb.h | 98 ++++++++++++++++++++++++++++++++++---- core/upb_atomic.h | 4 ++ core/upb_def.c | 65 ++++++++++++++++++++++++- core/upb_def.h | 28 +++++++++-- core/upb_msg.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++ core/upb_msg.h | 114 ++++++++++++++++++++++++++++++++++++++++---- stream/upb_decoder.c | 8 ++-- stream/upb_strstream.h | 2 +- tests/test_vs_proto2.cc | 54 ++++++++++++--------- 10 files changed, 452 insertions(+), 54 deletions(-) create mode 100644 core/upb_msg.c diff --git a/Makefile b/Makefile index 203bed6..131b3c0 100644 --- a/Makefile +++ b/Makefile @@ -55,6 +55,7 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ core/upb_stream.c stream/upb_stdio.c stream/upb_strstream.c stream/upb_textprinter.c \ + core/upb_msg.c \ descriptor/descriptor.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. @@ -101,14 +102,13 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ tests/test_def \ - tests/test_decoder -tests: $(TESTS) - -OTHER_TESTS=tests/tests \ - tests/test_table \ + tests/test_decoder \ tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2 \ tests/test.proto.pb +tests: $(TESTS) + +OTHER_TESTS=tests/tests \ $(TESTS): core/libupb.a VALGRIND=valgrind --leak-check=full --error-exitcode=1 diff --git a/core/upb.h b/core/upb.h index b605fd9..7ee0469 100644 --- a/core/upb.h +++ b/core/upb.h @@ -80,24 +80,16 @@ enum upb_wire_type { typedef uint8_t upb_wire_type_t; -// Value type as defined in a .proto file. eg. string, int32, etc. The +// Type of a field as defined in a .proto file. eg. string, int32, etc. The // integers that represent this are defined by descriptor.proto. Note that // descriptor.proto reserves "0" for errors, and we use it to represent // exceptional circumstances. -typedef uint8_t upb_field_type_t; +typedef uint8_t upb_fieldtype_t; // For referencing the type constants tersely. #define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type #define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type -INLINE bool upb_issubmsgtype(upb_field_type_t type) { - return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); -} - -INLINE bool upb_isstringtype(upb_field_type_t type) { - return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); -} - // Info for a given field type. typedef struct { uint8_t align; @@ -129,6 +121,10 @@ typedef union { struct _upb_string; typedef struct _upb_string upb_string; +struct _upb_array; +typedef struct _upb_array upb_array; +struct _upb_msg; +typedef struct _upb_msg upb_msg; typedef uint32_t upb_strlen_t; @@ -142,6 +138,11 @@ typedef union { uint32_t uint32; uint64_t uint64; bool _bool; + upb_string *str; + upb_msg *msg; + upb_array *arr; + upb_atomic_refcount_t *refcount; + void *_void; } upb_value; // A pointer to a .proto value. The owner must have an out-of-band way of @@ -155,13 +156,90 @@ typedef union { uint32_t *uint32; uint64_t *uint64; bool *_bool; + upb_string **str; + upb_msg **msg; + upb_array **arr; + void *_void; } upb_valueptr; +// The type of a upb_value. This is like a upb_fieldtype_t, but adds the +// constant UPB_VALUETYPE_ARRAY to represent an array. +typedef uint8_t upb_valuetype_t; +#define UPB_VALUETYPE_ARRAY 32 + INLINE upb_valueptr upb_value_addrof(upb_value *val) { upb_valueptr ptr = {&val->_double}; return ptr; } +// Converts upb_value_ptr -> upb_value by reading from the pointer. We need to +// know the value type to perform this operation, because we need to know how +// much memory to copy. +INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { + upb_value val; + +#define CASE(t, member_name) \ + case UPB_TYPE(t): val.member_name = *ptr.member_name; break; + + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + default: break; + } + return val; + +#undef CASE +} + +// Writes a upb_value to a upb_value_ptr location. We need to know the value +// type to perform this operation, because we need to know how much memory to +// copy. +INLINE void upb_value_write(upb_valueptr ptr, upb_value val, + upb_fieldtype_t ft) { +#define CASE(t, member_name) \ + case UPB_TYPE(t): *ptr.member_name = val.member_name; break; + + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + default: break; + } + +#undef CASE +} + // Status codes used as a return value. Codes >0 are not fatal and can be // resumed. enum upb_status_code { diff --git a/core/upb_atomic.h b/core/upb_atomic.h index 01fc8a2..1cd848b 100644 --- a/core/upb_atomic.h +++ b/core/upb_atomic.h @@ -127,6 +127,10 @@ INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { Implement them or compile with UPB_THREAD_UNSAFE. #endif +INLINE bool upb_atomic_only(upb_atomic_refcount_t *a) { + return upb_atomic_read(a) == 1; +} + /* Reader/Writer lock. ********************************************************/ #ifdef UPB_THREAD_UNSAFE diff --git a/core/upb_def.c b/core/upb_def.c index e117455..1c8fbdc 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -12,6 +12,16 @@ #define CHECKSRC(x) if(!(x)) goto src_err #define CHECK(x) if(!(x)) goto err +/* Rounds p up to the next multiple of t. */ +static size_t upb_align_up(size_t val, size_t align) { + return val % align == 0 ? val : val + align - (val % align); +} + +static int upb_div_round_up(int numerator, int denominator) { + /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ + return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; +} + // A little dynamic array for storing a growing list of upb_defs. typedef struct { upb_def **defs; @@ -409,6 +419,19 @@ src_err: /* upb_msgdef *****************************************************************/ +static int upb_compare_typed_fields(upb_fielddef *f1, upb_fielddef *f2) { + // Sort by data size (ascending) to reduce padding. + size_t size1 = upb_types[f1->type].size; + size_t size2 = upb_types[f2->type].size; + if (size1 != size2) return size1 - size2; + // Otherwise return in number order (just so we get a reproduceable order. + return f1->number - f2->number; +} + +static int upb_compare_fields(const void *f1, const void *f2) { + return upb_compare_typed_fields(*(void**)f1, *(void**)f2); +} + // Processes a google.protobuf.DescriptorProto, adding defs to "defs." static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) { @@ -418,7 +441,6 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); int32_t start_count = defs->len; - upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { @@ -451,6 +473,45 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); goto err; } + + + // Create an ordering over the fields. + upb_field_count_t n = upb_msgdef_numfields(m); + upb_fielddef **sorted_fields = malloc(sizeof(upb_fielddef*) * n); + upb_field_count_t field = 0; + upb_msg_iter i; + for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + sorted_fields[field++]= upb_msg_iter_field(i); + } + qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields); + + // Assign offsets in the msg. + m->set_flags_bytes = upb_div_round_up(n, 8); + m->size = sizeof(upb_atomic_refcount_t) + m->set_flags_bytes; + + size_t max_align = 0; + for (int i = 0; i < n; i++) { + upb_fielddef *f = sorted_fields[i]; + upb_type_info *type_info = &upb_types[f->type]; + + // This identifies the set bit. When we implement is_initialized (a + // general check about whether all required bits are set) we will probably + // want to use a different ordering that puts all the required bits + // together. + f->field_index = i; + + // General alignment rules are: each member must be at an address that is a + // multiple of that type's alignment. Also, the size of the structure as a + // whole must be a multiple of the greatest alignment of any member. + size_t offset = upb_align_up(m->size, type_info->align); + // Offsets are relative to the end of the refcount. + f->byte_offset = offset - sizeof(upb_atomic_refcount_t); + m->size = offset + type_info->size; + max_align = UPB_MAX(max_align, type_info->align); + } + + if (max_align > 0) m->size = upb_align_up(m->size, max_align); + upb_deflist_qualify(defs, m->base.fqname, start_count); upb_deflist_push(defs, UPB_UPCAST(m)); return true; @@ -664,7 +725,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, } // Check the type of the found def. - upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; + upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; if(found->def->type != expected) { upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type"); return false; diff --git a/core/upb_def.h b/core/upb_def.h index 3294a8d..9eb961a 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -103,7 +103,7 @@ typedef struct _upb_fielddef { upb_field_count_t field_index; // Indicates set bit. upb_field_number_t number; - upb_field_type_t type; + upb_fieldtype_t type; upb_label_t label; // True if we own a ref on "def" (above). This is true unless this edge is // part of a cycle. @@ -112,10 +112,10 @@ typedef struct _upb_fielddef { // A variety of tests about the type of a field. INLINE bool upb_issubmsg(upb_fielddef *f) { - return upb_issubmsgtype(f->type); + return f->type == UPB_TYPE(GROUP) || f->type == UPB_TYPE(MESSAGE); } INLINE bool upb_isstring(upb_fielddef *f) { - return upb_isstringtype(f->type); + return f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES); } INLINE bool upb_isarray(upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); @@ -125,6 +125,19 @@ INLINE bool upb_hasdef(upb_fielddef *f) { return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); } +INLINE upb_valuetype_t upb_field_valuetype(upb_fielddef *f) { + if (upb_isarray(f)) { + return UPB_VALUETYPE_ARRAY; + } else { + return f->type; + } +} + +INLINE upb_valuetype_t upb_elem_valuetype(upb_fielddef *f) { + assert(upb_isarray(f)); + return f->type; +} + INLINE bool upb_field_ismm(upb_fielddef *f) { return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); } @@ -139,6 +152,8 @@ INLINE bool upb_elem_ismm(upb_fielddef *f) { typedef struct _upb_msgdef { upb_def base; upb_atomic_refcount_t cycle_refcount; + uint32_t size; + uint32_t set_flags_bytes; // Tables for looking up fields by number and name. upb_inttable itof; // int to field @@ -169,9 +184,14 @@ INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) { return e ? e->f : NULL; } +INLINE upb_field_count_t upb_msgdef_numfields(upb_msgdef *m) { + return upb_strtable_count(&m->ntof); +} + // Iteration over fields. The order is undefined. // upb_msg_iter i; -// for(i = upb_msg_begin(m); !upb_msg_done(&i); i = upb_msg_next(&i)) { +// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { +// upb_fielddef *f = upb_msg_iter_field(i); // // ... // } typedef upb_itof_ent *upb_msg_iter; diff --git a/core/upb_msg.c b/core/upb_msg.c new file mode 100644 index 0000000..75f7a35 --- /dev/null +++ b/core/upb_msg.c @@ -0,0 +1,123 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + * Data structure for storing a message of protobuf data. + */ + +#include "upb_msg.h" + +void _upb_elem_free(upb_value v, upb_fielddef *f) { + switch(f->type) { + case UPB_TYPE(MESSAGE): + case UPB_TYPE(GROUP): + _upb_msg_free(v.msg, upb_downcast_msgdef(f->def)); + break; + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + _upb_string_free(v.str); + break; + default: + abort(); + } +} + +void _upb_field_free(upb_value v, upb_fielddef *f) { + if (upb_isarray(f)) { + _upb_array_free(v.arr, f); + } else { + _upb_elem_free(v, f); + } +} + +upb_msg *upb_msg_new(upb_msgdef *md) { + upb_msg *msg = malloc(md->size); + // Clear all set bits and cached pointers. + memset(msg, 0, md->size); + upb_atomic_refcount_init(&msg->refcount, 1); + return msg; +} + +void _upb_msg_free(upb_msg *msg, upb_msgdef *md) { + // Need to release refs on all sub-objects. + upb_msg_iter i; + for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + if (upb_field_ismm(f)) _upb_field_unref(upb_value_read(p, type), f); + } + free(msg); +} + +upb_array *upb_array_new(void) { + upb_array *arr = malloc(sizeof(*arr)); + upb_atomic_refcount_init(&arr->refcount, 1); + arr->size = 0; + arr->len = 0; + arr->elements._void = NULL; + return arr; +} + +void _upb_array_free(upb_array *arr, upb_fielddef *f) { + if (upb_elem_ismm(f)) { + // Need to release refs on sub-objects. + upb_valuetype_t type = upb_elem_valuetype(f); + for (upb_arraylen_t i = 0; i < arr->size; i++) { + upb_valueptr p = _upb_array_getptr(arr, f, i); + _upb_elem_unref(upb_value_read(p, type), f); + } + } + if (arr->elements._void) free(arr->elements._void); + free(arr); +} + +upb_value upb_field_new(upb_fielddef *f, upb_valuetype_t type) { + upb_value v; + switch(type) { + case UPB_TYPE(MESSAGE): + case UPB_TYPE(GROUP): + v.msg = upb_msg_new(upb_downcast_msgdef(f->def)); + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + v.str = upb_string_new(); + case UPB_VALUETYPE_ARRAY: + v.arr = upb_array_new(); + default: + abort(); + } + return v; +} + +static void upb_field_recycle(upb_value val) { + (void)val; +} + +upb_value upb_field_tryrecycle(upb_valueptr p, upb_value val, upb_fielddef *f, + upb_valuetype_t type) { + if (val._void == NULL || !upb_atomic_only(val.refcount)) { + if (val._void != NULL) upb_atomic_unref(val.refcount); + val = upb_field_new(f, type); + upb_value_write(p, val, type); + } else { + upb_field_recycle(val); + } + return val; +} + +void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status) { + (void)msg; + (void)md; + (void)str; + (void)status; +} + +void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status) { + (void)msg; + (void)md; + (void)str; + (void)status; +} diff --git a/core/upb_msg.h b/core/upb_msg.h index 5215bd9..2db67c0 100644 --- a/core/upb_msg.h +++ b/core/upb_msg.h @@ -9,14 +9,39 @@ #ifndef UPB_MSG_H #define UPB_MSG_H +#include "upb.h" +#include "upb_def.h" +#include + #ifdef __cplusplus extern "C" { #endif -typedef struct { +upb_value upb_field_tryrecycle(upb_valueptr p, upb_value v, upb_fielddef *f, + upb_valuetype_t type); + +INLINE void _upb_value_ref(upb_value v) { upb_atomic_ref(v.refcount); } + +void _upb_field_free(upb_value v, upb_fielddef *f); +void _upb_elem_free(upb_value v, upb_fielddef *f); +INLINE void _upb_field_unref(upb_value v, upb_fielddef *f) { + assert(upb_field_ismm(f)); + if (v.refcount && upb_atomic_unref(v.refcount)) + _upb_field_free(v, f); +} +INLINE void _upb_elem_unref(upb_value v, upb_fielddef *f) { + assert(upb_elem_ismm(f)); + if (v.refcount && upb_atomic_unref(v.refcount)) + _upb_elem_free(v, f); +} + +/* upb_array ******************************************************************/ + +typedef uint32_t upb_arraylen_t; +struct _upb_array { upb_atomic_refcount_t refcount; - uint32_t len; - uint32_t size; + upb_arraylen_t len; + upb_arraylen_t size; upb_valueptr elements; }; @@ -31,29 +56,70 @@ INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) { if (upb_atomic_unref(&a->refcount)) _upb_array_free(a, f); } +INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f, + uint32_t elem) { + upb_valueptr p; + p._void = &a->elements.uint8[elem * upb_types[f->type].size]; + return p; +} + INLINE upb_value upb_array_get(upb_array *a, upb_fielddef *f, uint32_t elem) { assert(elem < upb_array_len(a)); return upb_value_read(_upb_array_getptr(a, f, elem), f->type); } // For string or submessages, will release a ref on the previously set value. +// and take a ref on the new value. The array must already be at least "elem" +// long; to append use append_mutable. INLINE void upb_array_set(upb_array *a, upb_fielddef *f, uint32_t elem, upb_value val) { + assert(elem < upb_array_len(a)); + upb_valueptr p = _upb_array_getptr(a, f, elem); + if (upb_elem_ismm(f)) { + _upb_elem_unref(upb_value_read(p, f->type), f); + _upb_value_ref(val); + } + upb_value_write(p, val, f->type); } -// Append an element with the default value, returning it. For strings or -// submessages, this will try to reuse previously allocated memory. -INLINE upb_value upb_array_append_mutable(upb_array *a, upb_fielddef *f) { +INLINE void upb_array_resize(upb_array *a, upb_fielddef *f) { + if (a->len == a->size) { + a->len *= 2; + a->elements._void = realloc(a->elements._void, + a->len * upb_types[f->type].size); + } } -typedef struct { +// Append an element to an array of string or submsg with the default value, +// returning it. This will try to reuse previously allocated memory. +INLINE upb_value upb_array_appendmutable(upb_array *a, upb_fielddef *f) { + assert(upb_elem_ismm(f)); + upb_array_resize(a, f); + upb_valueptr p = _upb_array_getptr(a, f, a->len++); + upb_valuetype_t type = upb_elem_valuetype(f); + upb_value val = upb_value_read(p, type); + val = upb_field_tryrecycle(p, val, f, type); + return val; +} + + +/* upb_msg ********************************************************************/ + +struct _upb_msg { upb_atomic_refcount_t refcount; uint8_t data[4]; // We allocate the appropriate amount per message. -} upb_msg; +}; // Creates a new msg of the given type. upb_msg *upb_msg_new(upb_msgdef *md); +// Returns a pointer to the given field. +INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) { + upb_valueptr p; + p._void = &msg->data[f->byte_offset]; + return p; +} + void _upb_msg_free(upb_msg *msg, upb_msgdef *md); INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) { if (upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md); @@ -65,6 +131,10 @@ INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { return (msg->data[f->field_index/8] & (1 << (f->field_index % 8))) != 0; } +INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { + msg->data[f->field_index/8] |= (1 << (f->field_index % 8)); +} + // Returns the current value of the given field if set, or the default value if // not set. INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { @@ -79,12 +149,29 @@ INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { // Otherwise sets it and returns an empty instance, attempting to reuse any // previously allocated memory. INLINE upb_value upb_msg_getmutable(upb_msg *msg, upb_fielddef *f) { + assert(upb_field_ismm(f)); + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + upb_value val = upb_value_read(p, type); + if (!upb_msg_has(msg, f)) { + upb_msg_sethas(msg, f); + val = upb_field_tryrecycle(p, val, f, type); + } + return val; } // Sets the current value of the field. If this is a string, array, or // submessage field, releases a ref on the value (if any) that was previously // set. INLINE void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) { + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + if (upb_field_ismm(f)) { + _upb_field_unref(upb_value_read(p, type), f); + _upb_value_ref(val); + } + upb_msg_sethas(msg, f); + upb_value_write(p, val, upb_field_valuetype(f)); } // Unsets all field values back to their defaults. @@ -92,6 +179,17 @@ INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { memset(msg->data, 0, md->set_flags_bytes); } +// A convenience function for decoding an entire protobuf all at once, without +// having to worry about setting up the appropriate objects. +void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status); + +// A convenience function for encoding an entire protobuf all at once. If an +// error occurs, the null string is returned and the status object contains +// the error. +void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 7591f78..c35212e 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -14,8 +14,10 @@ // Returns true if the give wire type and field type combination is valid, // taking into account both packed and non-packed encodings. -static bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { - return (1 << wt) & upb_types[ft].allowed_wire_types; +static bool upb_check_type(upb_wire_type_t wt, upb_fielddef *f) { + // TODO: need to take into account the label; only repeated fields are + // allowed to use packed encoding. + return (1 << wt) & upb_types[f->type].allowed_wire_types; } // Performs zig-zag decoding, which is used by sint32 and sint64. @@ -358,7 +360,7 @@ again: // unknown fields we will implement that here. upb_decoder_skipval(d); goto again; - } else if (!upb_check_type(wire_type, f->type)) { + } else if (!upb_check_type(wire_type, f)) { // This is a recoverable error condition. We skip the value but also // return NULL and report the error. upb_decoder_skipval(d); diff --git a/stream/upb_strstream.h b/stream/upb_strstream.h index fa9bace..d01d21f 100644 --- a/stream/upb_strstream.h +++ b/stream/upb_strstream.h @@ -31,7 +31,7 @@ void upb_stringsrc_free(upb_stringsrc *s); void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); // Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. -upb_bytesrc *upb_stringsrc_bytesrc(); +upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s); /* upb_stringsink *************************************************************/ diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc index 9083788..9446b8f 100644 --- a/tests/test_vs_proto2.cc +++ b/tests/test_vs_proto2.cc @@ -4,9 +4,10 @@ #include #include #include -#include "upb_data.h" +#include "upb_msg.h" #include "upb_def.h" #include "upb_decoder.h" +#include "upb_strstream.h" int num_assertions = 0; #define ASSERT(expr) do { \ @@ -25,7 +26,7 @@ void compare_arrays(const google::protobuf::Reflection *r, upb_msg *upb_msg, upb_fielddef *upb_f) { ASSERT(upb_msg_has(upb_msg, upb_f)); - upb_arrayptr arr = upb_msg_get(upb_msg, upb_f).arr; + upb_array *arr = upb_msg_get(upb_msg, upb_f).arr; ASSERT(upb_array_len(arr) == (upb_arraylen_t)r->FieldSize(proto2_msg, proto2_f)); for(upb_arraylen_t i = 0; i < upb_array_len(arr); i++) { upb_value v = upb_array_get(arr, upb_f, i); @@ -63,7 +64,7 @@ void compare_arrays(const google::protobuf::Reflection *r, case UPB_TYPE(STRING): case UPB_TYPE(BYTES): { std::string str = r->GetRepeatedString(proto2_msg, proto2_f, i); - std::string str2(upb_string_getrobuf(v.str), upb_strlen(v.str)); + std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str)); ASSERT(str == str2); break; } @@ -116,7 +117,7 @@ void compare_values(const google::protobuf::Reflection *r, case UPB_TYPE(STRING): case UPB_TYPE(BYTES): { std::string str = r->GetString(proto2_msg, proto2_f); - std::string str2(upb_string_getrobuf(v.str), upb_strlen(v.str)); + std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str)); ASSERT(str == str2); break; } @@ -133,9 +134,10 @@ void compare(const google::protobuf::Message& proto2_msg, const google::protobuf::Reflection *r = proto2_msg.GetReflection(); const google::protobuf::Descriptor *d = proto2_msg.GetDescriptor(); - ASSERT((upb_field_count_t)d->field_count() == upb_md->num_fields); - for(upb_field_count_t i = 0; i < upb_md->num_fields; i++) { - upb_fielddef *upb_f = &upb_md->fields[i]; + ASSERT((upb_field_count_t)d->field_count() == upb_msgdef_numfields(upb_md)); + upb_msg_iter i; + for(i = upb_msg_begin(upb_md); !upb_msg_done(i); i = upb_msg_next(upb_md, i)) { + upb_fielddef *upb_f = upb_msg_iter_field(i); const google::protobuf::FieldDescriptor *proto2_f = d->FindFieldByNumber(upb_f->number); // Make sure the definitions are equal. @@ -143,7 +145,7 @@ void compare(const google::protobuf::Message& proto2_msg, ASSERT(proto2_f); ASSERT(upb_f->number == proto2_f->number()); ASSERT(std::string(upb_string_getrobuf(upb_f->name), - upb_strlen(upb_f->name)) == + upb_string_len(upb_f->name)) == proto2_f->name()); ASSERT(upb_f->type == proto2_f->type()); ASSERT(upb_isarray(upb_f) == proto2_f->is_repeated()); @@ -166,10 +168,10 @@ void compare(const google::protobuf::Message& proto2_msg, void parse_and_compare(MESSAGE_CIDENT *proto2_msg, upb_msg *upb_msg, upb_msgdef *upb_md, - upb_strptr str) + upb_string *str) { // Parse to both proto2 and upb. - ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_strlen(str))); + ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_string_len(str))); upb_status status = UPB_STATUS_INIT; upb_msg_decodestr(upb_msg, upb_md, str, &status); ASSERT(upb_ok(&status)); @@ -194,22 +196,32 @@ int main(int argc, char *argv[]) // Initialize upb state, parse descriptor. upb_status status = UPB_STATUS_INIT; - upb_symtab *c = upb_symtab_new(); - upb_strptr fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); - if(upb_string_isnull(fds)) { + upb_symtab *symtab = upb_symtab_new(); + upb_string *fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); + if(fds == NULL) { fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n"); return 1; } - upb_symtab_add_desc(c, fds, &status); + upb_symtab_add_descriptorproto(symtab); + upb_def *fds_msgdef = upb_symtab_lookup( + symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + + upb_stringsrc *ssrc = upb_stringsrc_new(); + upb_stringsrc_reset(ssrc, fds); + upb_decoder *decoder = upb_decoder_new(upb_downcast_msgdef(fds_msgdef)); + upb_decoder_reset(decoder, upb_stringsrc_bytesrc(ssrc)); + upb_symtab_addfds(symtab, upb_decoder_src(decoder), &status); if(!upb_ok(&status)) { - fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": %s.\n", - status.msg); + fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": "); + upb_printerr(&status); return 1; } upb_string_unref(fds); + upb_decoder_free(decoder); + upb_stringsrc_free(ssrc); - upb_strptr proto_name = upb_strdupc(MESSAGE_NAME); - upb_msgdef *def = upb_downcast_msgdef(upb_symtab_lookup(c, proto_name)); + upb_string *proto_name = upb_strdupc(MESSAGE_NAME); + upb_msgdef *def = upb_downcast_msgdef(upb_symtab_lookup(symtab, proto_name)); if(!def) { fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n", UPB_STRARG(proto_name)); @@ -218,8 +230,8 @@ int main(int argc, char *argv[]) upb_string_unref(proto_name); // Read the message data itself. - upb_strptr str = upb_strreadfile(MESSAGE_FILE); - if(upb_string_isnull(str)) { + upb_string *str = upb_strreadfile(MESSAGE_FILE); + if(str == NULL) { fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); return 1; } @@ -234,7 +246,7 @@ int main(int argc, char *argv[]) upb_msg_unref(upb_msg, def); upb_def_unref(UPB_UPCAST(def)); upb_string_unref(str); - upb_symtab_unref(c); + upb_symtab_unref(symtab); return 0; } -- cgit v1.2.3 From 2a7f51f3fd534b3e9e098c522cffbb96e1551474 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 6 Oct 2010 08:19:34 -0700 Subject: Change upb_src to use push-based interface. Unfortunately my previous detailed commit message was lost somehow by git or vi. Will have to explain in more detail at a later date the rationale for this change. The build will be broken until I port the old decoder to this new interface. --- core/upb.h | 4 ++ core/upb_stream.h | 124 +++++++++++++++++++++--------------------------------- 2 files changed, 51 insertions(+), 77 deletions(-) diff --git a/core/upb.h b/core/upb.h index 7ee0469..6ecc2a0 100644 --- a/core/upb.h +++ b/core/upb.h @@ -261,6 +261,10 @@ enum upb_status_code { UPB_ERROR_MAX_NESTING_EXCEEDED = -3 }; +// TODO: consider making this a single word: a upb_string* where we use the low +// bits as flags indicating whether there is an error and whether it is +// resumable. This would improve efficiency, because the code would not need +// to be loaded after a call to a function returning a status. typedef struct { enum upb_status_code code; upb_string *str; diff --git a/core/upb_stream.h b/core/upb_stream.h index 861bd1c..cd00c1e 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -28,98 +28,64 @@ extern "C" { // Forward-declare. We can't include upb_def.h; it would be circular. struct _upb_fielddef; -// Note! The "eof" flags work like feof() in C; they cannot report end-of-file -// until a read has failed due to eof. They cannot preemptively tell you that -// the next call will fail due to eof. Since these are the semantics that C -// and UNIX provide, we're stuck with them if we want to support eg. stdio. - -/* upb_src ********************************************************************/ +/* upb_sink *******************************************************************/ -// A upb_src is a pull parser for protobuf data. Sample usage: -// -// #define CHECK(x) if(!x) goto err; +// A upb_sink is a component that receives a stream of protobuf data. +// It is an abstract interface that is implemented either by the system or +// by users. // -// bool parse_msg(upb_src *src, int indent) { -// upb_fielddef *f; -// while ((f = upb_src_getdef(src)) != NULL) { -// for (int i = 0; i < indent; i++) putchar(' '); -// printf("Parsed field; name=" UPB_STRFMT ", num=%d", -// UPB_STRARG(d->name), d->number); -// if (upb_issubmsg(f)) { -// CHECK(upb_src_startmsg(src)); -// CHECK(parse_msg(src, indent + 2)); -// CHECK(upb_src_endmsg(src)); -// } else { -// CHECK(upb_src_skipval(src)); -// } -// } -// // We should be EOF now, otherwise there was an error. -// CHECK(upb_src_eof(src)); -// return true; -// -// err: -// return false; -// } -// -// TODO: decide how to handle unknown fields. - -// Retrieves the fielddef for the next field in the stream. Returns NULL on -// error or end-of-stream. End of stream can simply mean end of submessage. -struct _upb_fielddef *upb_src_getdef(upb_src *src); - -// Retrieves and stores the next value in "val". upb_src_getval() is for all -// numeric types and upb_src_getstr() is for strings. For string types "str" -// must be a newly-recycled string. Returns false on error. -bool upb_src_getval(upb_src *src, upb_valueptr val); -bool upb_src_getstr(upb_src *src, upb_string *val); - -// Like upb_src_getval() but skips the value. -bool upb_src_skipval(upb_src *src); - -// Descends into a submessage. May only be called when upb_issubmsg(f) is true -// for an f = upb_src_getdef(src) that was just parsed. -bool upb_src_startmsg(upb_src *src); - -// Stops reading a submessage. May be called before the stream is EOF, in -// which case the rest of the submessage is skipped. -bool upb_src_endmsg(upb_src *src); - -// Returns the current error/eof status for the stream. If a stream is eof -// but we are inside a submessage, calling upb_src_endmsg(src) will reset -// the eof marker. -INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; } -INLINE bool upb_src_eof(upb_src *src) { return src->eof; } - -// The following functions are equivalent to upb_src_getval(), but take -// pointers to specific types. In debug mode this may check that the type -// is compatible with the type being read. This check will *not* be performed -// in non-debug mode, and if you get the type wrong the behavior is undefined. -bool upb_src_getbool(upb_src *src, bool *val); -bool upb_src_getint32(upb_src *src, int32_t *val); -bool upb_src_getint64(upb_src *src, int64_t *val); -bool upb_src_getuint32(upb_src *src, uint32_t *val); -bool upb_src_getuint64(upb_src *src, uint64_t *val); -bool upb_src_getfloat(upb_src *src, float *val); -bool upb_src_getdouble(upb_src *src, double *val); +// TODO: unknown fields. -/* upb_sink *******************************************************************/ +// Constants that a sink returns to indicate to its caller whether it should +// continue or not. +typedef enum { + // Caller should continue sending values to the sink. + UPB_SINK_CONTINUE, + + // Return from upb_sink_putdef() to skip the next value (which may be a + // submessage). + UPB_SINK_SKIP, + + // Caller should stop sending values; check sink status for details. + // If processing resumes later, it should resume with the next value. + UPB_SINK_STOP, +} upb_sinkret_t; // Puts the given fielddef into the stream. -bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); +upb_sinkret_t upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); // Puts the given value into the stream. -bool upb_sink_putval(upb_sink *sink, upb_value val); -bool upb_sink_putstr(upb_sink *sink, upb_string *str); +upb_sinkret_t upb_sink_putval(upb_sink *sink, upb_value val); +upb_sinkret_t upb_sink_putstr(upb_sink *sink, upb_string *str); // Starts/ends a submessage. upb_sink_startmsg may seem redundant, but a // client could have a submessage already serialized, and therefore put it // as a string instead of its individual elements. -bool upb_sink_startmsg(upb_sink *sink); -bool upb_sink_endmsg(upb_sink *sink); +upb_sinkret_t upb_sink_startmsg(upb_sink *sink); +upb_sinkret_t upb_sink_endmsg(upb_sink *sink); // Returns the current error status for the stream. upb_status *upb_sink_status(upb_sink *sink); + +/* upb_src ********************************************************************/ + +// A upb_src is a resumable push parser for protobuf data. It works by first +// accepting registration of a upb_sink to which it will push data, then +// in a second phase is parses the actual data. +// + +// Sets the given sink as the target of this src. It will be called when the +// upb_src_parse() is run. +void upb_src_setsink(upb_src *src, upb_sink *sink); + +// Pushes data from this src to the previously registered sink, returning +// true if all data was processed. If false is returned, check +// upb_src_status() for details; if it is a resumable status, upb_src_run +// may be called again to resume processing. +bool upb_src_run(upb_src *src); + + /* upb_bytesrc ****************************************************************/ // Returns the next string in the stream. false is returned on error or eof. @@ -133,6 +99,10 @@ bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); // Returns the current error status for the stream. +// Note! The "eof" flag works like feof() in C; it cannot report end-of-file +// until a read has failed due to eof. It cannot preemptively tell you that +// the next call will fail due to eof. Since these are the semantics that C +// and UNIX provide, we're stuck with them if we want to support eg. stdio. INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } -- cgit v1.2.3 From b471ca6b81b88dc23aae6a53345d94d9a2714a7c Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 6 Dec 2010 15:52:40 -0800 Subject: The last major revision to the upb_stream protocol. Sources and sinks communicate by means of a upb_handlers object, which encapsulates a set of handler callbacks and will possibly offer richer semantics in the future like giving specific fields different callbacks. The upb_handlers protocol supports delegation, so sets of handlers can be written in reusable ways. For example, if a set of handlers is written to handle a specific .proto type, those handlers can be used whether that type is at the top level or whether it is a sub-message of a higher-level type. Delegation allows the streaming protocol to properly compose. --- Makefile | 41 +++++---- core/upb_stream.c | 55 ------------ core/upb_stream.h | 167 +++++++++++++++++++++++------------ core/upb_stream_vtbl.h | 235 +++++++++++++++++++++---------------------------- core/upb_string.c | 9 ++ core/upb_string.h | 7 +- 6 files changed, 249 insertions(+), 265 deletions(-) delete mode 100644 core/upb_stream.c diff --git a/Makefile b/Makefile index 131b3c0..5c6598c 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ CXX=g++ CFLAGS=-std=c99 INCLUDE=-Idescriptor -Icore -Itests -Istream -I. CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) -LDLIBS=-lpthread +LDLIBS=-lpthread core/libupb.a ifeq ($(shell uname), Darwin) CPPFLAGS += -I/usr/include/lua5.1 LDFLAGS += -L/usr/local/lib -llua @@ -47,16 +47,27 @@ clean: rm -rf $(LIBUPB) $(LIBUPB_PIC) rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(call rwildcard,,*.gc*) rm -rf benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb* - rm -rf tests/tests tests/t.* tests/test_table + rm -rf $(TESTS) tests/t.* rm -rf descriptor/descriptor.pb rm -rf tools/upbc deps cd lang_ext/python && python setup.py clean --all +-include deps +deps: gen-deps.sh Makefile $(call rwildcard,,*.c) $(call rwildcard,,*.h) + @./gen-deps.sh $(SRC) + # The core library (core/libupb.a) -SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ - core/upb_stream.c stream/upb_stdio.c stream/upb_strstream.c stream/upb_textprinter.c \ - core/upb_msg.c \ - descriptor/descriptor.c +SRC=core/upb.c \ + core/upb_table.c \ + core/upb_string.c \ + descriptor/descriptor.c \ +# core/upb_def.c \ +# core/upb_msg.c \ +# stream/upb_decoder.c \ +# stream/upb_stdio.c \ +# stream/upb_strstream.c \ +# stream/upb_textprinter.c + $(SRC): perf-cppflags # Parts of core that are yet to be converted. OTHERSRC=src/upb_encoder.c src/upb_text.c @@ -101,15 +112,16 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ - tests/test_def \ - tests/test_decoder \ - tests/t.test_vs_proto2.googlemessage1 \ - tests/t.test_vs_proto2.googlemessage2 \ - tests/test.proto.pb + tests/test_stream \ +# tests/test_def \ +# tests/test_decoder \ +# tests/t.test_vs_proto2.googlemessage1 \ +# tests/t.test_vs_proto2.googlemessage2 \ +# tests/test.proto.pb tests: $(TESTS) OTHER_TESTS=tests/tests \ -$(TESTS): core/libupb.a +$(TESTS): $(LIBUPB) VALGRIND=valgrind --leak-check=full --error-exitcode=1 #VALGRIND= @@ -118,7 +130,7 @@ test: tests @set -e # Abort on error. # Needs to be rewritten to separate the benchmark. # valgrind --error-exitcode=1 ./tests/test_table - @for test in tests/*; do \ + @for test in $(TESTS); do \ if [ -x ./$$test ] ; then \ echo !!! $(VALGRIND) ./$$test; \ $(VALGRIND) ./$$test || exit 1; \ @@ -247,6 +259,3 @@ benchmarks/b.parsetostruct_googlemessage2.proto2_compiled: \ -DMESSAGE_HFILE=\"google_messages.pb.h\" \ benchmarks/google_messages.pb.cc -lprotobuf -lpthread --include deps -deps: gen-deps.sh Makefile $(call rwildcard,,*.c) $(call rwildcard,,*.h) - @./gen-deps.sh $(SRC) diff --git a/core/upb_stream.c b/core/upb_stream.c deleted file mode 100644 index 0d47392..0000000 --- a/core/upb_stream.c +++ /dev/null @@ -1,55 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_stream.h" - -#include "upb_def.h" - -#define CHECKSRC(x) if(!x) goto src_err -#define CHECKSINK(x) if(!x) goto sink_err - -void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status) { - upb_fielddef *f; - upb_string *str = NULL; - int depth = 0; - while(1) { - while((f = upb_src_getdef(src)) != NULL) { - CHECKSINK(upb_sink_putdef(sink, f)); - if(upb_issubmsg(f)) { - upb_src_startmsg(src); - upb_sink_startmsg(sink); - ++depth; - } else if(upb_isstring(f)) { - str = upb_string_tryrecycle(str); - CHECKSRC(upb_src_getstr(src, str)); - CHECKSINK(upb_sink_putstr(sink, str)); - } else { - // Primitive type. - upb_value val; - CHECKSRC(upb_src_getval(src, upb_value_addrof(&val))); - CHECKSINK(upb_sink_putval(sink, val)); - } - } - // If we're not EOF now, the loop terminated due to an error. - CHECKSRC(upb_src_eof(src)); - if (depth == 0) break; - --depth; - upb_src_endmsg(src); - upb_sink_endmsg(sink); - } - upb_string_unref(str); - return; - -src_err: - upb_string_unref(str); - upb_copyerr(status, upb_src_status(src)); - return; - -sink_err: - upb_string_unref(str); - upb_copyerr(status, upb_sink_status(sink)); - return; -} diff --git a/core/upb_stream.h b/core/upb_stream.h index cd00c1e..1eb111e 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -19,7 +19,7 @@ #ifndef UPB_SRCSINK_H #define UPB_SRCSINK_H -#include "upb_stream_vtbl.h" +#include "upb.h" #ifdef __cplusplus extern "C" { @@ -28,98 +28,149 @@ extern "C" { // Forward-declare. We can't include upb_def.h; it would be circular. struct _upb_fielddef; -/* upb_sink *******************************************************************/ +/* upb_handlers ***************************************************************/ -// A upb_sink is a component that receives a stream of protobuf data. -// It is an abstract interface that is implemented either by the system or -// by users. -// -// TODO: unknown fields. +// upb_handlers define the interface by which a upb_src passes data to a +// upb_sink. -// Constants that a sink returns to indicate to its caller whether it should +// Constants that a handler returns to indicate to its caller whether it should // continue or not. typedef enum { // Caller should continue sending values to the sink. - UPB_SINK_CONTINUE, + UPB_CONTINUE, - // Return from upb_sink_putdef() to skip the next value (which may be a - // submessage). - UPB_SINK_SKIP, + // Skips to the end of the current submessage (or if we are at the top + // level, skips to the end of the entire message). + UPB_SKIP, // Caller should stop sending values; check sink status for details. // If processing resumes later, it should resume with the next value. - UPB_SINK_STOP, -} upb_sinkret_t; - -// Puts the given fielddef into the stream. -upb_sinkret_t upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); - -// Puts the given value into the stream. -upb_sinkret_t upb_sink_putval(upb_sink *sink, upb_value val); -upb_sinkret_t upb_sink_putstr(upb_sink *sink, upb_string *str); - -// Starts/ends a submessage. upb_sink_startmsg may seem redundant, but a -// client could have a submessage already serialized, and therefore put it -// as a string instead of its individual elements. -upb_sinkret_t upb_sink_startmsg(upb_sink *sink); -upb_sinkret_t upb_sink_endmsg(upb_sink *sink); - -// Returns the current error status for the stream. -upb_status *upb_sink_status(upb_sink *sink); - - -/* upb_src ********************************************************************/ - -// A upb_src is a resumable push parser for protobuf data. It works by first -// accepting registration of a upb_sink to which it will push data, then -// in a second phase is parses the actual data. + UPB_STOP, + + // When returned from a startsubmsg handler, indicates that the submessage + // should be handled by a different set of handlers, which have been + // registered on the provided upb_handlers object. May not be returned + // from any other callback. + UPB_DELEGATE, +} upb_flow_t; + +// upb_handlers +struct _upb_handlers; +typedef struct _upb_handlers upb_handlers; + +typedef void (*upb_startmsg_handler_t)(void *closure); +typedef void (*upb_endmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_value_handler_t)(void *closure, + struct _upb_fielddef *f, + upb_value val); +typedef upb_flow_t (*upb_startsubmsg_handler_t)(void *closure, + struct _upb_fielddef *f, + upb_handlers *delegate_to); +typedef upb_flow_t (*upb_endsubmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_unknownval_handler_t)(void *closure, + upb_field_number_t fieldnum, + upb_value val); + +// An empty set of handlers, for convenient copy/paste: // - -// Sets the given sink as the target of this src. It will be called when the -// upb_src_parse() is run. -void upb_src_setsink(upb_src *src, upb_sink *sink); - -// Pushes data from this src to the previously registered sink, returning -// true if all data was processed. If false is returned, check -// upb_src_status() for details; if it is a resumable status, upb_src_run -// may be called again to resume processing. -bool upb_src_run(upb_src *src); +// static void startmsg(void *closure) { +// // Called when the top-level message begins. +// } +// +// static void endmsg(void *closure) { +// // Called when the top-level message ends. +// } +// +// static upb_flow_t value(void *closure, upb_fielddef *f, upb_value val) { +// // Called for every value in the stream. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t startsubmsg(void *closure, upb_fielddef *f, +// upb_handlers *delegate_to) { +// // Called when a submessage begins; can delegate by returning UPB_DELEGATE. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t endsubmsg(void *closure) { +// // Called when a submessage ends. +// return UPB_CONTINUE; +// } +// +// static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum, +// upb_value val) { +// Called with an unknown value is encountered. +// return UPB_CONTINUE; +// } +typedef struct { + upb_startmsg_handler_t startmsg; + upb_endmsg_handler_t endmsg; + upb_value_handler_t value; + upb_startsubmsg_handler_t startsubmsg; + upb_endsubmsg_handler_t endsubmsg; + upb_unknownval_handler_t unknownval; +} upb_handlerset; + +// Functions to register handlers on a upb_handlers object. +INLINE void upb_handlers_init(upb_handlers *h); +INLINE void upb_handlers_uninit(upb_handlers *h); +INLINE void upb_handlers_reset(upb_handlers *h); +INLINE bool upb_handlers_isempty(upb_handlers *h); +INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); +INLINE void upb_set_handler_closure(upb_handlers *h, void *closure); + +// An object that transparently handles delegation so that the caller needs +// only follow the protocol as if delegation did not exist. +struct _upb_dispatcher; +typedef struct _upb_dispatcher upb_dispatcher; +INLINE void upb_dispatcher_init(upb_dispatcher *d); +INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); +INLINE void upb_dispatch_startmsg(upb_dispatcher *d); +INLINE void upb_dispatch_endmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f); +INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, + upb_value val); +INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, + upb_field_number_t fieldnum, upb_value val); /* upb_bytesrc ****************************************************************/ +struct _upb_bytesrc; +typedef struct _upb_bytesrc upb_bytesrc; + // Returns the next string in the stream. false is returned on error or eof. // The string must be at least "minlen" bytes long unless the stream is eof. -bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); +INLINE bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); // Appends the next "len" bytes in the stream in-place to "str". This should // be used when the caller needs to build a contiguous string of the existing // data in "str" with more data. The call fails if fewer than len bytes are // available in the stream. -bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); +INLINE bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); // Returns the current error status for the stream. // Note! The "eof" flag works like feof() in C; it cannot report end-of-file // until a read has failed due to eof. It cannot preemptively tell you that // the next call will fail due to eof. Since these are the semantics that C // and UNIX provide, we're stuck with them if we want to support eg. stdio. -INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } -INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } +INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src); +INLINE bool upb_bytesrc_eof(upb_bytesrc *src); /* upb_bytesink ***************************************************************/ +struct _upb_bytesink; +typedef struct _upb_bytesink upb_bytesink; + // Puts the given string. Returns the number of bytes that were actually, // consumed, which may be fewer than were in the string, or <0 on error. -int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); +INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); // Returns the current error status for the stream. -upb_status *upb_bytesink_status(upb_bytesink *sink); - -/* Utility functions **********************************************************/ - -// Streams data from src to sink until EOF or error. -void upb_streamdata(upb_src *src, upb_sink *sink, upb_status *status); +INLINE upb_status *upb_bytesink_status(upb_bytesink *sink); +#include "upb_stream_vtbl.h" #ifdef __cplusplus } /* extern "C" */ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 96f6cfe..91464a7 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -5,59 +5,21 @@ * interfaces. Only components that are implementing these interfaces need * to worry about this file. * - * This is tedious; this is the place in upb where I most wish I had a C++ - * feature. In C++ the compiler would generate this all for me. If there's - * any consolation, it's that I have a bit of flexibility you don't have in - * C++: I could, with preprocessor magic alone "de-virtualize" this interface - * for a particular source file. Say I had a C file that called a upb_src, - * but didn't want to pay the virtual function overhead. I could define: - * - * #define upb_src_getdef(src) upb_decoder_getdef((upb_decoder*)src) - * #define upb_src_stargmsg(src) upb_decoder_startmsg(upb_decoder*)src) - * // etc. - * - * The source file is compatible with the regular upb_src interface, but here - * we bind it to a particular upb_src (upb_decoder), which could lead to - * improved performance at a loss of flexibility for this one upb_src client. - * * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. */ #ifndef UPB_SRCSINK_VTBL_H_ #define UPB_SRCSINK_VTBL_H_ -#include "upb.h" +#include +#include "upb_stream.h" #ifdef __cplusplus extern "C" { #endif -struct upb_src; -typedef struct upb_src upb_src; -struct upb_sink; -typedef struct upb_sink upb_sink; -struct upb_bytesrc; -typedef struct upb_bytesrc upb_bytesrc; -struct upb_bytesink; -typedef struct upb_bytesink upb_bytesink; - // Typedefs for function pointers to all of the virtual functions. -// upb_src. -typedef struct _upb_fielddef *(*upb_src_getdef_fptr)(upb_src *src); -typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); -typedef bool (*upb_src_getstr_fptr)(upb_src *src, upb_string *str); -typedef bool (*upb_src_skipval_fptr)(upb_src *src); -typedef bool (*upb_src_startmsg_fptr)(upb_src *src); -typedef bool (*upb_src_endmsg_fptr)(upb_src *src); - -// upb_sink. -typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); -typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); -typedef bool (*upb_sink_putstr_fptr)(upb_sink *sink, upb_string *str); -typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); -typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); - // upb_bytesrc. typedef bool (*upb_bytesrc_get_fptr)( upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); @@ -68,23 +30,6 @@ typedef bool (*upb_bytesrc_append_fptr)( typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); // Vtables for the above interfaces. -typedef struct { - upb_src_getdef_fptr getdef; - upb_src_getval_fptr getval; - upb_src_getstr_fptr getstr; - upb_src_skipval_fptr skipval; - upb_src_startmsg_fptr startmsg; - upb_src_endmsg_fptr endmsg; -} upb_src_vtable; - -typedef struct { - upb_sink_putdef_fptr putdef; - upb_sink_putval_fptr putval; - upb_sink_putstr_fptr putstr; - upb_sink_startmsg_fptr startmsg; - upb_sink_endmsg_fptr endmsg; -} upb_sink_vtable; - typedef struct { upb_bytesrc_get_fptr get; upb_bytesrc_append_fptr append; @@ -97,42 +42,18 @@ typedef struct { // "Base Class" definitions; components that implement these interfaces should // contain one of these structures. -struct upb_src { - upb_src_vtable *vtbl; - upb_status status; - bool eof; -}; - -struct upb_sink { - upb_sink_vtable *vtbl; - upb_status status; - bool eof; -}; - -struct upb_bytesrc { +struct _upb_bytesrc { upb_bytesrc_vtable *vtbl; upb_status status; bool eof; }; -struct upb_bytesink { +struct _upb_bytesink { upb_bytesink_vtable *vtbl; upb_status status; bool eof; }; -INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { - s->vtbl = vtbl; - s->eof = false; - upb_status_init(&s->status); -} - -INLINE void upb_sink_init(upb_sink *s, upb_sink_vtable *vtbl) { - s->vtbl = vtbl; - s->eof = false; - upb_status_init(&s->status); -} - INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtable *vtbl) { s->vtbl = vtbl; s->eof = false; @@ -146,46 +67,6 @@ INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { } // Implementation of virtual function dispatch. -INLINE struct _upb_fielddef *upb_src_getdef(upb_src *src) { - return src->vtbl->getdef(src); -} -INLINE bool upb_src_getval(upb_src *src, upb_valueptr val) { - return src->vtbl->getval(src, val); -} -INLINE bool upb_src_getstr(upb_src *src, upb_string *str) { - return src->vtbl->getstr(src, str); -} -INLINE bool upb_src_skipval(upb_src *src) { return src->vtbl->skipval(src); } -INLINE bool upb_src_startmsg(upb_src *src) { return src->vtbl->startmsg(src); } -INLINE bool upb_src_endmsg(upb_src *src) { return src->vtbl->endmsg(src); } - -// Implementation of type-specific upb_src accessors. If we encounter a upb_src -// where these can be implemented directly in a measurably more efficient way, -// we can make these part of the vtable also. -// -// For <64-bit types we have to use a temporary to accommodate baredecoder, -// which does not know the actual width of the type. -INLINE bool upb_src_getbool(upb_src *src, bool *_bool) { - upb_value val; - bool ret = upb_src_getval(src, upb_value_addrof(&val)); - *_bool = val._bool; - return ret; -} - -INLINE bool upb_src_getint32(upb_src *src, int32_t *i32) { - upb_value val; - bool ret = upb_src_getval(src, upb_value_addrof(&val)); - *i32 = val.int32; - return ret; -} - -// TODO. -bool upb_src_getint32(upb_src *src, int32_t *val); -bool upb_src_getint64(upb_src *src, int64_t *val); -bool upb_src_getuint32(upb_src *src, uint32_t *val); -bool upb_src_getuint64(upb_src *src, uint64_t *val); -bool upb_src_getfloat(upb_src *src, float *val); -bool upb_src_getdouble(upb_src *src, double *val); // upb_bytesrc INLINE bool upb_bytesrc_get( @@ -198,24 +79,108 @@ INLINE bool upb_bytesrc_append( return bytesrc->vtbl->append(bytesrc, str, len); } -// upb_sink -INLINE bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def) { - return sink->vtbl->putdef(sink, def); +INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } +INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } + +// upb_handlers +struct _upb_handlers { + upb_handlerset *set; + void *closure; +}; + +INLINE void upb_handlers_init(upb_handlers *h) { + (void)h; +} +INLINE void upb_handlers_uninit(upb_handlers *h) { + (void)h; +} + +INLINE void upb_handlers_reset(upb_handlers *h) { + h->set = NULL; + h->closure = NULL; +} + +INLINE bool upb_handlers_isempty(upb_handlers *h) { + return !h->set && !h->closure; +} + +INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set) { + h->set = set; +} + +INLINE void upb_set_handler_closure(upb_handlers *h, void *closure) { + h->closure = closure; +} + +// upb_dispatcher +typedef struct { + upb_handlers handlers; + int depth; +} upb_dispatcher_frame; + +struct _upb_dispatcher { + upb_dispatcher_frame stack[UPB_MAX_NESTING], *top, *limit; +}; + +INLINE void upb_dispatcher_init(upb_dispatcher *d) { + d->limit = d->stack + sizeof(d->stack); } -INLINE bool upb_sink_putval(upb_sink *sink, upb_value val) { - return sink->vtbl->putval(sink, val); + +INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h) { + d->top = d->stack; + d->top->depth = 1; // Never want to trigger end-of-delegation. + d->top->handlers = *h; } -INLINE bool upb_sink_putstr(upb_sink *sink, upb_string *str) { - return sink->vtbl->putstr(sink, str); + +INLINE void upb_dispatch_startmsg(upb_dispatcher *d) { + assert(d->stack == d->top); + d->top->handlers.set->startmsg(d->top->handlers.closure); } -INLINE bool upb_sink_startmsg(upb_sink *sink) { - return sink->vtbl->startmsg(sink); + +INLINE void upb_dispatch_endmsg(upb_dispatcher *d) { + assert(d->stack == d->top); + d->top->handlers.set->endmsg(d->top->handlers.closure); } -INLINE bool upb_sink_endmsg(upb_sink *sink) { - return sink->vtbl->endmsg(sink); + +INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, + struct _upb_fielddef *f) { + upb_handlers handlers; + upb_handlers_init(&handlers); + upb_handlers_reset(&handlers); + upb_flow_t ret = d->top->handlers.set->startsubmsg(d->top->handlers.closure, f, &handlers); + assert((ret == UPB_DELEGATE) == !upb_handlers_isempty(&handlers)); + if (ret == UPB_DELEGATE) { + ++d->top; + d->top->handlers = handlers; + d->top->depth = 0; + d->top->handlers.set->startmsg(d->top->handlers.closure); + ret = UPB_CONTINUE; + } + ++d->top->depth; + upb_handlers_uninit(&handlers); + return ret; +} + +INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { + if (--d->top->depth == 0) { + d->top->handlers.set->endmsg(d->top->handlers.closure); + --d->top; + } + return d->top->handlers.set->endsubmsg(d->top->handlers.closure); } -INLINE upb_status *upb_sink_status(upb_sink *sink) { return &sink->status; } +INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, + struct _upb_fielddef *f, + upb_value val) { + return d->top->handlers.set->value(d->top->handlers.closure, f, val); +} + +INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, + upb_field_number_t fieldnum, + upb_value val) { + return d->top->handlers.set->unknownval(d->top->handlers.closure, + fieldnum, val); +} // upb_bytesink INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { diff --git a/core/upb_string.c b/core/upb_string.c index 847a3ee..4f5f5c2 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -29,6 +29,7 @@ upb_string *upb_string_new() { upb_string *str = malloc(sizeof(*str)); str->ptr = NULL; str->cached_mem = NULL; + str->len = 0; #ifndef UPB_HAVE_MSIZE str->size = 0; #endif @@ -132,6 +133,14 @@ upb_string *upb_strdup(upb_string *s) { return str; } +void upb_strcat(upb_string *s, upb_string *append) { + uint32_t old_size = upb_string_len(s); + uint32_t append_size = upb_string_len(append); + uint32_t new_size = old_size + append_size; + char *buf = upb_string_getrwbuf(s, new_size); + memcpy(buf + old_size, upb_string_getrobuf(append), append_size); +} + upb_string *upb_strreadfile(const char *filename) { FILE *f = fopen(filename, "rb"); if(!f) return NULL; diff --git a/core/upb_string.h b/core/upb_string.h index bd89f67..ee345e3 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -18,6 +18,11 @@ * string). * - strings are not thread-safe by default, but can be made so by calling a * function. This is not the default because it causes extra CPU overhead. + * + * Reference-counted strings have recently fallen out of favor because of the + * performance impacts of doing thread-safe reference counting with atomic + * operations. We side-step this issue by not performing atomic operations + * unless the string has been marked thread-safe. */ #ifndef UPB_STRING_H @@ -34,7 +39,7 @@ extern "C" { #endif // All members of this struct are private, and may only be read/written through -// the associated functions. Also, strings may *only* be allocated on the heap. +// the associated functions. struct _upb_string { // The pointer to our currently active data. This may be memory we own // or a pointer into memory we don't own. -- cgit v1.2.3 From db512df98e0fac208a716c7807d037f0b0d309f1 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 4 Jan 2011 15:47:25 -0800 Subject: A bunch of work on upb_def and upb_value. --- core/upb.h | 51 +++-- core/upb_def.c | 642 +++++++++++++++++++++++++++++---------------------------- 2 files changed, 370 insertions(+), 323 deletions(-) diff --git a/core/upb.h b/core/upb.h index 6ecc2a0..7bed779 100644 --- a/core/upb.h +++ b/core/upb.h @@ -130,21 +130,46 @@ typedef uint32_t upb_strlen_t; // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. -typedef union { - double _double; - float _float; - int32_t int32; - int64_t int64; - uint32_t uint32; - uint64_t uint64; - bool _bool; - upb_string *str; - upb_msg *msg; - upb_array *arr; - upb_atomic_refcount_t *refcount; - void *_void; +typedef struct { + union { + double _double; + float _float; + int32_t int32; + int64_t int64; + uint32_t uint32; + uint64_t uint64; + bool _bool; + upb_string *str; + upb_msg *msg; + upb_array *arr; + upb_atomic_refcount_t *refcount; + void *_void; + } val; + + // In debug mode we carry the value type around also so we can check accesses + // to be sure the right member is being read. +#ifndef NDEBUG + upb_valuetype_t type; +#endif } upb_value; +#define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ + ctype upb_value_get ## name(upb_value val) { \ + assert(val.type == UPB_TYPE(proto_type)); \ + return val.membername; \ + } \ + void upb_value_ ## name(upb_value *val, ctype cval) { \ + val.type = UPB_TYPE(proto_type); \ + val.membername = cval; \ + } +UPB_VALUE_ACCESSORS(double, _double, double, DOUBLE); +UPB_VALUE_ACCESSORS(float, _float, float, FLOAT); +UPB_VALUE_ACCESSORS(int32, int32, int32_t, INT32); +UPB_VALUE_ACCESSORS(int64, int64, int64_t, INT64); +UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UINT32); +UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UINT64); +UPB_VALUE_ACCESSORS(bool, _bool, bool, BOOL); + // A pointer to a .proto value. The owner must have an out-of-band way of // knowing the type, so it knows which union member to use. typedef union { diff --git a/core/upb_def.c b/core/upb_def.c index 1c8fbdc..cc771dc 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -22,33 +22,6 @@ static int upb_div_round_up(int numerator, int denominator) { return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; } -// A little dynamic array for storing a growing list of upb_defs. -typedef struct { - upb_def **defs; - uint32_t len; - uint32_t size; -} upb_deflist; - -static void upb_deflist_init(upb_deflist *l) { - l->size = 8; - l->defs = malloc(l->size * sizeof(void*)); - l->len = 0; -} - -static void upb_deflist_uninit(upb_deflist *l) { - for(uint32_t i = 0; i < l->len; i++) - if(l->defs[i]) upb_def_unref(l->defs[i]); - free(l->defs); -} - -static void upb_deflist_push(upb_deflist *l, upb_def *d) { - if(l->len == l->size) { - l->size *= 2; - l->defs = realloc(l->defs, l->size * sizeof(void*)); - } - l->defs[l->len++] = d; -} - /* Joins strings together, for example: * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" * join("", "Baz") -> "Baz" @@ -62,14 +35,12 @@ static upb_string *upb_join(upb_string *base, upb_string *name) { } } -// Qualify the defname for all defs starting with offset "start" with "str". -static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { - for(uint32_t i = start; i < l->len; i++) { - upb_def *def = l->defs[i]; - upb_string *name = def->fqname; - def->fqname = upb_join(str, name); - upb_string_unref(name); - } +/* Search for a character in a string, in reverse. */ +static int my_memrchr(char *data, char c, size_t len) +{ + int off = len-1; + while(off > 0 && data[off] != c) --off; + return off; } /* upb_def ********************************************************************/ @@ -256,26 +227,27 @@ static void upb_enumdef_free(upb_enumdef *e) { } // google.protobuf.EnumValueDescriptorProto. -static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) -{ - int32_t number = -1; - upb_string *name = NULL; - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->number) { - case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: - name = upb_string_tryrecycle(name); - CHECKSRC(upb_src_getstr(src, name)); - break; - case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &number)); - break; - default: - CHECKSRC(upb_src_skipval(src)); - break; - } +static void upb_enumdef_startmsg(upb_defbuilder *b) { + b->number = -1; + name = NULL; +} + +static upb_flow_t upb_enumdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { + switch(f->number) { + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: + name = upb_string_tryrecycle(name); + CHECKSRC(upb_src_getstr(src, name)); + break; + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: + CHECKSRC(upb_src_getint32(src, &number)); + break; + default: + CHECKSRC(upb_src_skipval(src)); + break; } +} +static void upb_enumdef_endmsg(upb_defbuilder *b) { if(name == NULL || number == -1) { upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); goto err; @@ -287,48 +259,7 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) // We don't unref "name" because we pass our ref to the iton entry of the // table. strtables can ref their keys, but the inttable doesn't know that // the value is a string. - return true; - -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_string_unref(name); - return false; -} - -// google.protobuf.EnumDescriptorProto. -static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) -{ - upb_enumdef *e = malloc(sizeof(*e)); - upb_def_init(&e->base, UPB_DEF_ENUM); - upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->number) { - case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: - e->base.fqname = upb_string_tryrecycle(e->base.fqname); - CHECKSRC(upb_src_getstr(src, e->base.fqname)); - break; - case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addenum_val(src, e, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - default: - upb_src_skipval(src); - break; - } - } - assert(e->base.fqname); - upb_deflist_push(defs, UPB_UPCAST(e)); - return true; - -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_enumdef_free(e); - return false; + return UPB_CONTINUE; } upb_enum_iter upb_enum_begin(upb_enumdef *e) { @@ -358,47 +289,17 @@ static void upb_fielddef_free(upb_fielddef *f) { free(f); } -static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) -{ +static void upb_fielddef_startmsg(upb_defbuilder *b) { upb_fielddef *f = malloc(sizeof(*f)); f->number = -1; f->name = NULL; f->def = NULL; f->owned = false; f->msgdef = m; - upb_fielddef *parsed_f; - int32_t tmp; - while((parsed_f = upb_src_getdef(src))) { - switch(parsed_f->number) { - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &tmp)); - f->type = tmp; - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &tmp)); - f->label = tmp; - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &tmp)); - f->number = tmp; - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: - f->name = upb_string_tryrecycle(f->name); - CHECKSRC(upb_src_getstr(src, f->name)); - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { - upb_string *str = upb_string_new(); - CHECKSRC(upb_src_getstr(src, str)); - if(f->def) upb_def_unref(f->def); - f->def = UPB_UPCAST(upb_unresolveddef_new(str)); - f->owned = true; - break; - } - default: - upb_src_skipval(src); - } - } - CHECKSRC(upb_src_eof(src)); + b->f = f; +} + +static void upb_fielddef_endmsg(upb_defbuilder *b) { // TODO: verify that all required fields were present. assert(f->number != -1 && f->name != NULL); assert((f->def != NULL) == upb_hasdef(f)); @@ -409,11 +310,33 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) upb_inttable_insert(&m->itof, &itof_ent.e); upb_strtable_insert(&m->ntof, &ntof_ent.e); return true; +} -src_err: - upb_copyerr(status, upb_src_status(src)); - upb_fielddef_free(f); - return false; +static upb_flow_t upb_fielddef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { + switch(parsed_f->number) { + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: + f->type = upb_value_getint32(val); + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: + f->label = upb_value_getint32(val); + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM: + f->number = upb_value_getint32(val); + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: + f->name = upb_string_tryrecycle(f->name); + CHECKSRC(upb_src_getstr(src, f->name)); + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { + upb_string *str = upb_string_new(); + CHECKSRC(upb_src_getstr(src, str)); + if(f->def) upb_def_unref(f->def); + f->def = UPB_UPCAST(upb_unresolveddef_new(str)); + f->owned = true; + break; + } + } + return UPB_CONTINUE; } @@ -433,48 +356,23 @@ static int upb_compare_fields(const void *f1, const void *f2) { } // Processes a google.protobuf.DescriptorProto, adding defs to "defs." -static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) -{ +static void upb_msgdef_startmsg(upb_defbuilder *b) { upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG); upb_atomic_refcount_init(&m->cycle_refcount, 0); upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); - int32_t start_count = defs->len; - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->number) { - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: - m->base.fqname = upb_string_tryrecycle(m->base.fqname); - CHECKSRC(upb_src_getstr(src, m->base.fqname)); - break; - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addfield(src, m, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addmsg(src, defs, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addenum(src, defs, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - default: - // TODO: extensions. - CHECKSRC(upb_src_skipval(src)); - } - } - CHECK(upb_src_eof(src)); + upb_deflist_push(&b->defs, UPB_UPCAST(m)); + upb_defbuilder_startcontainer(b, UPB_UPCAST(m)); +} + +static void upb_msgdef_endmsg(upb_defbuilder *b) { + upb_msgdef *m = upb_downcast_msgdef(upb_deflist_stacktop(&m->defs)); if(!m->base.fqname) { upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); - goto err; + return UPB_ERROR; } - // Create an ordering over the fields. upb_field_count_t n = upb_msgdef_numfields(m); upb_fielddef **sorted_fields = malloc(sizeof(upb_fielddef*) * n); @@ -512,15 +410,43 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) if (max_align > 0) m->size = upb_align_up(m->size, max_align); - upb_deflist_qualify(defs, m->base.fqname, start_count); - upb_deflist_push(defs, UPB_UPCAST(m)); - return true; + upb_defbuilder_endcontainer(b); + return UPB_CONTINUE; +} -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_msgdef_free(m); - return false; +static bool upb_msgdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { + switch(f->number) { + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: + // XXX + m->base.fqname = upb_string_tryrecycle(m->base.fqname); + m->base.fqname = upb_value_getstr(val); + upb_defbuilder_setscopename(upb_value_getstr(val)); + break; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + return BEGIN_SUBMSG; + default: + // TODO: extensions. + return UPB_SKIP; + } +} + +static upb_flow_t upb_msgdef_startsubmsg(upb_defbuilder *b, upb_fielddef *f, upb_handlers *h) { + switch(f->number) { + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: + upb_register_FieldDescriptorProto(b, h); + return UPB_DELEGATE; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: + upb_register_DescriptorProto(b, h); + return UPB_DELEGATE; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + upb_register_EnumDescriptorProto(b, h); + return UPB_DELEGATE; + break; + default: + return UPB_SKIP; + } } static void upb_msgdef_free(upb_msgdef *m) @@ -551,55 +477,171 @@ upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) { return upb_inttable_next(&m->itof, &iter->e); } -/* symtab internal ***********************************************************/ +/* upb_defbuilder ************************************************************/ -// Processes a google.protobuf.FileDescriptorProto, adding the defs to "defs". -static bool upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) -{ - upb_string *package = NULL; - int32_t start_count = defs->len; - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM: - package = upb_string_tryrecycle(package); - CHECKSRC(upb_src_getstr(src, package)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addmsg(src, defs, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addenum(src, defs, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - default: - // TODO: services and extensions. - CHECKSRC(upb_src_skipval(src)); - } +// A upb_defbuilder builds a list of defs by handling a parse of a protobuf in +// the format defined in descriptor.proto. The output of a upb_defbuilder is +// a list of upb_def* that possibly contain unresolved references. +// +// We use a separate object (upb_defbuilder) instead of having the defs handle +// the parse themselves because we need to store state that is only necessary +// during the building process itself. + +// When we are bootstrapping descriptor.proto, we must help the bare decoder out +// by telling it when to descend into a submessage, because with the wire format +// alone we cannot tell the difference between a submessage and a string. +#define BEGIN_SUBMSG 100 + +// upb_deflist: A little dynamic array for storing a growing list of upb_defs. +typedef struct { + upb_def **defs; + uint32_t len; + uint32_t size; +} upb_deflist; + +static void upb_deflist_init(upb_deflist *l) { + l->size = 8; + l->defs = malloc(l->size * sizeof(void*)); + l->len = 0; +} + +static void upb_deflist_uninit(upb_deflist *l) { + for(uint32_t i = 0; i < l->len; i++) + if(l->defs[i]) upb_def_unref(l->defs[i]); + free(l->defs); +} + +static void upb_deflist_push(upb_deflist *l, upb_def *d) { + if(l->len == l->size) { + l->size *= 2; + l->defs = realloc(l->defs, l->size * sizeof(void*)); } - CHECK(upb_src_eof(src)); - upb_deflist_qualify(defs, package, start_count); - upb_string_unref(package); - return true; + l->defs[l->len++] = d; +} -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_string_unref(package); - return false; +// Qualify the defname for all defs starting with offset "start" with "str". +static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { + for(uint32_t i = start; i < l->len; i++) { + upb_def *def = l->defs[i]; + upb_string *name = def->fqname; + def->fqname = upb_join(str, name); + upb_string_unref(name); + } } -/* Search for a character in a string, in reverse. */ -static int my_memrchr(char *data, char c, size_t len) -{ - int off = len-1; - while(off > 0 && data[off] != c) --off; - return off; +typedef struct { + upb_deflist defs; + struct { + upb_string *name; + int start; + } upb_defbuilder_frame; + upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH]; + int stack_len; +} upb_defbuilder; + +// Start/end handlers for FileDescriptorProto and DescriptorProto (the two +// entities that have names and can contain sub-definitions. +upb_defbuilder_startcontainer(upb_defbuilder *b) { + upb_defbuilder_frame *f = b->stack[b->stack_len++]; + f->start = b->defs.len; + f->name = NULL; +} + +upb_defbuilder_endcontainer(upb_defbuilder *b) { + upb_defbuilder_frame *f = b->stack[--b->stack_len]; + upb_deflist_qualify(&b->defs, f->name, f->start); + upb_string_unref(f->name); +} + +upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { +} + +// Handlers for google.protobuf.FileDescriptorProto. +static bool upb_defbuilder_FileDescriptorProto_value(upb_defbuilder *b, + upb_fielddef *f, + upb_value val) { + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM: + upb_defbuilder_setscopename(b, val.str); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + return BEGIN_SUBMSG; + default: + return UPB_SKIP; + } +} + +static bool upb_defbuilder_FileDescriptorProto_startsubmsg(upb_defbuilder *b, + upb_fielddef *f, + upb_handlers *h) { + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: + upb_defbuilder_register_DescriptorProto(b, h); + return UPB_DELEGATE; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + upb_defbuilder_register_EnumDescriptorProto(b, h); + return UPB_DELEGATE; + default: + // TODO: services and extensions. + return UPB_SKIP; + } +} + +static upb_handlers upb_defbuilder_FileDescriptorProto_handlers = { + NULL, // startmsg + NULL, // endmsg + &upb_defbuilder_FileDescriptorProto_value, + &upb_defbuilder_FileDescriptorProto_startsubmsg, } +upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, upb_handlers *h) { + upb_register_handlerset(h, &upb_defbuilder_FileDescriptorProto_handlers); + upb_set_handler_closure(h, b); +} + +// Handlers for google.protobuf.FileDescriptorSet. +upb_defbuilder_FileDescriptorSet_value(upb_defbuilder *b, upb_fielddef *f, + upb_value val) { + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: + return BEGIN_SUBMSG; + default: + return UPB_SKIP; + } +} + +upb_defbuilder_FileDescriptorSet_startsubmsg(upb_defbuilder *b, + upb_fielddef *f, upb_handlers *h) { + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: + upb_defbuilder_register_FileDescriptorProto(b, h); + return UPB_DELEGATE; + default: + return UPB_SKIP; + } +} + +static upb_handlers upb_defbuilder_FileDescriptorSet_handlers = { + NULL, // startmsg + NULL, // endmsg + &upb_defbuilder_FileDescriptorSet_value, + &upb_defbuilder_FileDescriptorSet_startsubmsg, +} + +upb_defbuilder_register_FileDescriptorSet(upb_defbuilder *b, upb_handlers *h) { + upb_register_handlerset(h, &upb_defbuilder_FileDescriptorSet_handlers); + upb_set_handler_closure(h, b); +} + + + +/* upb_symtab adding defs *****************************************************/ + +// This is a self-contained group of functions that, given a list of upb_defs +// whose references are not yet resolved, resolves references and adds them +// atomically to a upb_symtab. + typedef struct { upb_strtable_entry e; upb_def *def; @@ -751,8 +793,8 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, // indicating whether the new defs can overwrite existing defs in the symtab, // attempts to add the given defs to the symtab. The whole operation either // succeeds or fails. Ownership of "defs" and "exts" is taken. -bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef, - upb_status *status) +bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, + bool allow_redef, upb_status *status) { upb_rwlock_wrlock(&s->lock); @@ -817,7 +859,7 @@ err: } -/* upb_symtab *****************************************************************/ +/* upb_symtab public interface ************************************************/ upb_symtab *upb_symtab_new() { @@ -893,22 +935,13 @@ upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) { - upb_deflist defs; - upb_deflist_init(&defs); - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addfd(src, &defs, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - default: - CHECKSRC(upb_src_skipval(src)); - } + upb_defbuilder *b = upb_defbuilder_new(); + upb_defbuilder_register_handlers(b, upb_src_gethandlers(src)); + if(!upb_src_run(src)) { + upb_copyerr(status, upb_src_status(src)); + return; } - CHECKSRC(upb_src_eof(src)); - CHECK(upb_symtab_add_defs(s, &defs, false, status)); + upb_symtab_add_defs(s, b->defs, b->defs_len, false, status); upb_deflist_uninit(&defs); return; @@ -937,17 +970,21 @@ err: // * groups. // * zig-zag-encoded types like sint32 and sint64. // -// If descriptor.proto ever changed to use any of these features, this decoder -// would need to be extended to support them. +// Since it cannot tell the difference between submessages and strings, it +// always reports them as strings first, but if the value callback returns +// UPB_TREAT_AS_SUBMSG this signals to the baredecoder that it should be +// treated like a submessage instead. +// +// TODO: for bootstrapping we should define a slightly different wire format +// that includes enough information to know the precise integer types and +// that distinguishes between strings and submessages. This will allow +// us to get rid of the UPB_TREAT_AS_SUBMSG hack. It will also allow us +// to get rid of the upb_value_setraw() scheme, which would be more +// complicated to support on big-endian machines. typedef struct { - upb_src src; upb_string *input; upb_strlen_t offset; - upb_fielddef field; - upb_wire_type_t wire_type; - upb_strlen_t delimited_len; - upb_strlen_t stack[UPB_MAX_NESTING], *top; } upb_baredecoder; static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) @@ -983,75 +1020,62 @@ static uint32_t upb_baredecoder_readf32(upb_baredecoder *d) return val; } -static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) -{ - // Detect end-of-submessage. - if(d->offset >= *d->top) { - d->src.eof = true; - return NULL; - } - - uint32_t key; - key = upb_baredecoder_readv32(d); - d->wire_type = key & 0x7; - d->field.number = key >> 3; - if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { - // For delimited wire values we parse the length now, since we need it in - // all cases. - d->delimited_len = upb_baredecoder_readv32(d); - } - return &d->field; -} - -static bool upb_baredecoder_getstr(upb_baredecoder *d, upb_string *str) { - upb_string_substr(str, d->input, d->offset, d->delimited_len); - d->offset += d->delimited_len; - return true; -} +bool upb_baredecoder_run(upb_baredecoder *d) { + upb_string *str = NULL; + upb_strlen_t stack[UPB_MAX_NESTING]; + upb_strlen_t *top = &stack[0]; + *top = upb_string_len(d->input); + d->offset = 0; -static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) -{ - switch(d->wire_type) { - case UPB_WIRE_TYPE_VARINT: - *val.uint64 = upb_baredecoder_readv64(d); - break; - case UPB_WIRE_TYPE_32BIT_VARINT: - *val.uint32 = upb_baredecoder_readv32(d); - break; - case UPB_WIRE_TYPE_64BIT: - *val.uint64 = upb_baredecoder_readf64(d); - break; - case UPB_WIRE_TYPE_32BIT: - *val.uint32 = upb_baredecoder_readf32(d); - break; - default: - *(char*)0 = 0; - assert(false); - } - return true; -} + upb_dispatch_startmsg(&d->dispatcher); + while(d->offset < upb_string_len(d->input)) { + // Detect end-of-submessage. + while(d->offset >= *d->top) { + upb_dispatch_endsubmsg(&d->dispatcher); + d->offset = *(d->top--); + } -static bool upb_baredecoder_skipval(upb_baredecoder *d) -{ - if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { - d->offset += d->delimited_len; - return true; - } else { - upb_value val; - return upb_baredecoder_getval(d, upb_value_addrof(&val)); + uint32_t key = upb_baredecoder_readv64(d); + upb_fielddef f; + f.number = key >> 3; + upb_wire_type_t wt = key & 0x7; + if(wt == UPB_WIRE_TYPE_DELIMITED) { + uint32_t delim_len = upb_baredecoder_readv32(d); + // We don't know if it's a string or a submessage; deliver first as + // string. + str = upb_string_tryrecycle(str); + upb_string_substr(str, d->input, d->offset, d->delimited_len); + upb_value v; + upb_value_setstr(&v, str); + if(upb_dispatch_value(&d->dispatcher, &f, v) == UPB_TREAT_AS_SUBMSG) { + // Should deliver as a submessage instead. + upb_dispatch_startsubmsg(&d->dispatcher, &f); + *(++d->top) = d->offset + delimited_len; + } else { + d->offset += delimited_len; + } + } else { + upb_value v; + switch(wt) { + case UPB_WIRE_TYPE_VARINT: + upb_value_setraw(&v, upb_baredecoder_readv64(d)); + upb_dispatch_value(&d->dispatcher, &f, v); + break; + case UPB_WIRE_TYPE_64BIT: + upb_value_setraw(&v, upb_baredecoder_readf64(d)); + upb_dispatch_value(&d->dispatcher, &f, v); + break; + case UPB_WIRE_TYPE_32BIT: + upb_value_setraw(&v, upb_baredecoder_readf32(d)); + break; + default: + assert(false); + abort(); + } + upb_dispatch_value(&d->dispatcher, &f, v); + } } -} - -static bool upb_baredecoder_startmsg(upb_baredecoder *d) -{ - *(++d->top) = d->offset + d->delimited_len; - return true; -} - -static bool upb_baredecoder_endmsg(upb_baredecoder *d) -{ - d->offset = *(d->top--); - return true; + upb_dispatch_endmsg(&d->dispatcher); } static upb_src_vtable upb_baredecoder_src_vtbl = { @@ -1068,8 +1092,6 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); d->offset = 0; - d->top = &d->stack[0]; - *(d->top) = upb_string_len(d->input); upb_src_init(&d->src, &upb_baredecoder_src_vtbl); return d; } -- cgit v1.2.3 From 45599180905d45a882970f6ca8b6007436ac3f97 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 10 Jan 2011 09:43:28 -0800 Subject: More work on upb_src. --- Makefile | 4 +- core/upb.h | 43 +++--- core/upb_def.c | 445 ++++++++++++++++++++++++++++++++---------------------- core/upb_stream.h | 6 + 4 files changed, 302 insertions(+), 196 deletions(-) diff --git a/Makefile b/Makefile index 5c6598c..42c7d41 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ CC=gcc CXX=g++ CFLAGS=-std=c99 INCLUDE=-Idescriptor -Icore -Itests -Istream -I. -CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) +CPPFLAGS=-Wall -Wextra -Wno-missing-field-initializers -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) LDLIBS=-lpthread core/libupb.a ifeq ($(shell uname), Darwin) CPPFLAGS += -I/usr/include/lua5.1 @@ -61,7 +61,7 @@ SRC=core/upb.c \ core/upb_table.c \ core/upb_string.c \ descriptor/descriptor.c \ -# core/upb_def.c \ + core/upb_def.c \ # core/upb_msg.c \ # stream/upb_decoder.c \ # stream/upb_stdio.c \ diff --git a/core/upb.h b/core/upb.h index 7bed779..2057d60 100644 --- a/core/upb.h +++ b/core/upb.h @@ -12,6 +12,7 @@ #include #include #include // only for size_t. +#include #include "descriptor_const.h" #include "upb_atomic.h" @@ -128,6 +129,11 @@ typedef struct _upb_msg upb_msg; typedef uint32_t upb_strlen_t; +// The type of a upb_value. This is like a upb_fieldtype_t, but adds the +// constant UPB_VALUETYPE_ARRAY to represent an array. +typedef uint8_t upb_valuetype_t; +#define UPB_VALUETYPE_ARRAY 32 + // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. typedef struct { @@ -153,14 +159,20 @@ typedef struct { #endif } upb_value; +#ifdef NDEBUG +#define SET_TYPE(dest, val) +#else +#define SET_TYPE(dest, val) dest = val +#endif + #define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ ctype upb_value_get ## name(upb_value val) { \ assert(val.type == UPB_TYPE(proto_type)); \ - return val.membername; \ + return val.val.membername; \ } \ void upb_value_ ## name(upb_value *val, ctype cval) { \ - val.type = UPB_TYPE(proto_type); \ - val.membername = cval; \ + SET_TYPE(val->type, UPB_TYPE(proto_type)); \ + val->val.membername = cval; \ } UPB_VALUE_ACCESSORS(double, _double, double, DOUBLE); UPB_VALUE_ACCESSORS(float, _float, float, FLOAT); @@ -169,6 +181,7 @@ UPB_VALUE_ACCESSORS(int64, int64, int64_t, INT64); UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UINT32); UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UINT64); UPB_VALUE_ACCESSORS(bool, _bool, bool, BOOL); +UPB_VALUE_ACCESSORS(str, str, upb_string*, STRING); // A pointer to a .proto value. The owner must have an out-of-band way of // knowing the type, so it knows which union member to use. @@ -187,24 +200,23 @@ typedef union { void *_void; } upb_valueptr; -// The type of a upb_value. This is like a upb_fieldtype_t, but adds the -// constant UPB_VALUETYPE_ARRAY to represent an array. -typedef uint8_t upb_valuetype_t; -#define UPB_VALUETYPE_ARRAY 32 - INLINE upb_valueptr upb_value_addrof(upb_value *val) { - upb_valueptr ptr = {&val->_double}; + upb_valueptr ptr = {&val->val._double}; return ptr; } -// Converts upb_value_ptr -> upb_value by reading from the pointer. We need to -// know the value type to perform this operation, because we need to know how -// much memory to copy. +// Reads or writes a upb_value from an address represented by a upb_value_ptr. +// We need to know the value type to perform this operation, because we need to +// know how much memory to copy (and for big-endian machines, we need to know +// where in the upb_value the data goes). +// +// For little endian-machines where we didn't mind overreading, we could make +// upb_value_read simply use memcpy(). INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { upb_value val; #define CASE(t, member_name) \ - case UPB_TYPE(t): val.member_name = *ptr.member_name; break; + case UPB_TYPE(t): val.val.member_name = *ptr.member_name; break; switch(ft) { CASE(DOUBLE, _double) @@ -232,13 +244,10 @@ INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { #undef CASE } -// Writes a upb_value to a upb_value_ptr location. We need to know the value -// type to perform this operation, because we need to know how much memory to -// copy. INLINE void upb_value_write(upb_valueptr ptr, upb_value val, upb_fieldtype_t ft) { #define CASE(t, member_name) \ - case UPB_TYPE(t): *ptr.member_name = val.member_name; break; + case UPB_TYPE(t): *ptr.member_name = val.val.member_name; break; switch(ft) { CASE(DOUBLE, _double) diff --git a/core/upb_def.c b/core/upb_def.c index cc771dc..4320fb6 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -9,9 +9,6 @@ #include "descriptor.h" #include "upb_def.h" -#define CHECKSRC(x) if(!(x)) goto src_err -#define CHECK(x) if(!(x)) goto err - /* Rounds p up to the next multiple of t. */ static size_t upb_align_up(size_t val, size_t align) { return val % align == 0 ? val : val + align - (val % align); @@ -184,6 +181,188 @@ static void upb_def_uninit(upb_def *def) { } +/* upb_defbuilder ************************************************************/ + +// A upb_defbuilder builds a list of defs by handling a parse of a protobuf in +// the format defined in descriptor.proto. The output of a upb_defbuilder is +// a list of upb_def* that possibly contain unresolved references. +// +// We use a separate object (upb_defbuilder) instead of having the defs handle +// the parse themselves because we need to store state that is only necessary +// during the building process itself. + +// When we are bootstrapping descriptor.proto, we must help the bare decoder out +// by telling it when to descend into a submessage, because with the wire format +// alone we cannot tell the difference between a submessage and a string. +// +// TODO: In the long-term, we should bootstrap from a serialization format that +// contains this information, so we can remove this special-case code. This +// would involve defining a serialization format very similar to the existing +// protobuf format, but that contains more information about the wire type. +#define BEGIN_SUBMSG 100 + +// upb_deflist: A little dynamic array for storing a growing list of upb_defs. +typedef struct { + upb_def **defs; + uint32_t len; + uint32_t size; +} upb_deflist; + +static void upb_deflist_init(upb_deflist *l) { + l->size = 8; + l->defs = malloc(l->size * sizeof(void*)); + l->len = 0; +} + +static void upb_deflist_uninit(upb_deflist *l) { + for(uint32_t i = 0; i < l->len; i++) + if(l->defs[i]) upb_def_unref(l->defs[i]); + free(l->defs); +} + +static void upb_deflist_push(upb_deflist *l, upb_def *d) { + if(l->len == l->size) { + l->size *= 2; + l->defs = realloc(l->defs, l->size * sizeof(void*)); + } + l->defs[l->len++] = d; +} + +// Qualify the defname for all defs starting with offset "start" with "str". +static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { + for(uint32_t i = start; i < l->len; i++) { + upb_def *def = l->defs[i]; + upb_string *name = def->fqname; + def->fqname = upb_join(str, name); + upb_string_unref(name); + } +} + +typedef struct { + upb_string *name; + int start; +} upb_defbuilder_frame; + +struct _upb_defbuilder { + upb_deflist defs; + upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH]; + int stack_len; + + uint32_t number; + upb_string *name; +}; +typedef struct _upb_defbuilder upb_defbuilder; + +// Forward declares for top-level file descriptors. +static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, upb_handlers *h); +static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, + upb_handlers *h); + + +// Start/end handlers for FileDescriptorProto and DescriptorProto (the two +// entities that have names and can contain sub-definitions. +void upb_defbuilder_startcontainer(upb_defbuilder *b) { + upb_defbuilder_frame *f = &b->stack[b->stack_len++]; + f->start = b->defs.len; + f->name = NULL; +} + +void upb_defbuilder_endcontainer(upb_defbuilder *b) { + upb_defbuilder_frame *f = &b->stack[--b->stack_len]; + upb_deflist_qualify(&b->defs, f->name, f->start); + upb_string_unref(f->name); +} + +void upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { + upb_defbuilder_frame *f = &b->stack[b->stack_len-1]; + upb_string_unref(f->name); + f->name = upb_string_getref(str); +} + +// Handlers for google.protobuf.FileDescriptorProto. +static upb_flow_t upb_defbuilder_FileDescriptorProto_value(void *_b, + upb_fielddef *f, + upb_value val) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM: + upb_defbuilder_setscopename(b, upb_value_getstr(val)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + return BEGIN_SUBMSG; + default: + return UPB_SKIP; + } +} + +static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( + void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: + upb_msgdef_register_DescriptorProto(b, h); + return UPB_DELEGATE; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + upb_enumdef_register_EnumDescriptorProto(b, h); + return UPB_DELEGATE; + default: + // TODO: services and extensions. + return UPB_SKIP; + } +} + +static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset upb_defbuilder_FileDescriptorProto_handlers = { + NULL, // startmsg + NULL, // endmsg + &upb_defbuilder_FileDescriptorProto_value, + &upb_defbuilder_FileDescriptorProto_startsubmsg, + }; + upb_register_handlerset(h, &upb_defbuilder_FileDescriptorProto_handlers); + upb_set_handler_closure(h, b); +} + +// Handlers for google.protobuf.FileDescriptorSet. +static upb_flow_t upb_defbuilder_FileDescriptorSet_value(void *b, + upb_fielddef *f, + upb_value val) { + (void)b; + (void)val; + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: + return BEGIN_SUBMSG; + default: + return UPB_SKIP; + } +} + +static upb_flow_t upb_defbuilder_FileDescriptorSet_startsubmsg( + void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: + upb_defbuilder_register_FileDescriptorProto(b, h); + return UPB_DELEGATE; + default: + return UPB_SKIP; + } +} + +static void upb_defbuilder_register_FileDescriptorSet( + upb_defbuilder *b, upb_handlers *h) { + static upb_handlerset upb_defbuilder_FileDescriptorSet_handlers = { + NULL, // startmsg + NULL, // endmsg + &upb_defbuilder_FileDescriptorSet_value, + &upb_defbuilder_FileDescriptorSet_startsubmsg, + }; + upb_register_handlerset(h, &upb_defbuilder_FileDescriptorSet_handlers); + upb_set_handler_closure(h, b); +} + + /* upb_unresolveddef **********************************************************/ // Unresolved defs are used as temporary placeholders for a def whose name has @@ -227,28 +406,30 @@ static void upb_enumdef_free(upb_enumdef *e) { } // google.protobuf.EnumValueDescriptorProto. -static void upb_enumdef_startmsg(upb_defbuilder *b) { +static void upb_enumdef_EnumValueDescriptorProto_startmsg(upb_defbuilder *b) { b->number = -1; - name = NULL; + b->name = NULL; } -static upb_flow_t upb_enumdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(upb_defbuilder *b, + upb_fielddef *f, + upb_value val) { switch(f->number) { case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: - name = upb_string_tryrecycle(name); + b->name = upb_string_tryrecycle(name); CHECKSRC(upb_src_getstr(src, name)); break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &number)); + b->number = upb_value_getint32(val); break; default: - CHECKSRC(upb_src_skipval(src)); break; } + return UPB_CONTINUE; } -static void upb_enumdef_endmsg(upb_defbuilder *b) { - if(name == NULL || number == -1) { +static void upb_enumdef_EnumValueDescriptorProto_endmsg(upb_defbuilder *b) { + if(b->name == NULL || b->number == -1) { upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); goto err; } @@ -262,7 +443,66 @@ static void upb_enumdef_endmsg(upb_defbuilder *b) { return UPB_CONTINUE; } -upb_enum_iter upb_enum_begin(upb_enumdef *e) { +static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset upb_enumdef_EnumValueDescriptorProto_handlers = { + &upb_enumdef_EnumValueDescriptorProto_startmsg, + &upb_enumdef_EnumValueDescriptorProto_endmsg, + &upb_enumdef_EnumValueDescriptorProto_value, + } + upb_register_handlerset(h, &upb_enumdef_EnumValueDescriptorProto_handlers); + upb_set_handler_closure(h, b); +} + +// google.protobuf.EnumDescriptorProto. +void upb_enumdef_EnumDescriptorProto_startmsg(upb_defbuilder *b) { + upb_enumdef *e = malloc(sizeof(*e)); + upb_def_init(&e->base, UPB_DEF_ENUM); + upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); + upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); + upb_deflist_push(&b->defs, UPB_UPCAST(e)); +} + +void upb_enumdef_EnumDescriptorProto_endmsg(upb_defbuilder *b) { + assert(e->base.fqname); +} + +static upb_flow_t upb_enumdef_EnumDescriptorProto_value(upb_defbuilder *b, + upb_fielddef *f, + upb_value val) { + switch(f->number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: + upb_string_unref(e->base.fqname); + e->base.fqname = upb_value_getstr(val); + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: + return BEGIN_SUBMSG; + } + return UPB_CONTINUE; +} + +static upb_flow_t upb_enumdef_EnumDescriptorProto_startsubmsg(upb_defbuilder *b, + upb_fielddef *f, + upb_handlers *h) { + switch(f->number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: + upb_enumdef_register_EnumValueDescriptorProto(b, h); + return UPB_DELEGATE; + } + return UPB_SKIP; +} + +static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset upb_enumdef_EnumDescriptorProto_handlers = { + &upb_enumdef_EnumDescriptorProto_startmsg, + &upb_enumdef_EnumDescriptorProto_endmsg, + &upb_enumdef_EnumDescriptorProto_value, + } + upb_register_handlerset(h, &upb_enumdef_EnumDescriptorProto_handlers); + upb_set_handler_closure(h, b); +} + +upb_enum_iter upb_enum_begin(upb_enumdef *e) { // We could iterate over either table here; the choice is arbitrary. return upb_inttable_begin(&e->iton); } @@ -355,7 +595,7 @@ static int upb_compare_fields(const void *f1, const void *f2) { return upb_compare_typed_fields(*(void**)f1, *(void**)f2); } -// Processes a google.protobuf.DescriptorProto, adding defs to "defs." +// google.protobuf.DescriptorProto. static void upb_msgdef_startmsg(upb_defbuilder *b) { upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG); @@ -417,9 +657,6 @@ static void upb_msgdef_endmsg(upb_defbuilder *b) { static bool upb_msgdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { switch(f->number) { case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: - // XXX - m->base.fqname = upb_string_tryrecycle(m->base.fqname); - m->base.fqname = upb_value_getstr(val); upb_defbuilder_setscopename(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: @@ -432,13 +669,14 @@ static bool upb_msgdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) } } -static upb_flow_t upb_msgdef_startsubmsg(upb_defbuilder *b, upb_fielddef *f, upb_handlers *h) { +static upb_flow_t upb_msgdef_startsubmsg(upb_defbuilder *b, upb_fielddef *f, + upb_handlers *h) { switch(f->number) { case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: upb_register_FieldDescriptorProto(b, h); return UPB_DELEGATE; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: - upb_register_DescriptorProto(b, h); + upb_msgdef_register_DescriptorProto(b, h); return UPB_DELEGATE; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: upb_register_EnumDescriptorProto(b, h); @@ -449,6 +687,18 @@ static upb_flow_t upb_msgdef_startsubmsg(upb_defbuilder *b, upb_fielddef *f, upb } } +static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset upb_msgdef_DescriptorProto_handlers = { + &upb_msgdef_startmsg, + &upb_msgdef_endmsg, + &upb_msgdef_value, + &upb_msgdef_startsubmsg, + } + upb_register_handlerset(h, &upb_msgdef_DescriptorProto_handlers); + upb_set_handler_closure(h, b); +} + static void upb_msgdef_free(upb_msgdef *m) { upb_msg_iter i; @@ -477,165 +727,6 @@ upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) { return upb_inttable_next(&m->itof, &iter->e); } -/* upb_defbuilder ************************************************************/ - -// A upb_defbuilder builds a list of defs by handling a parse of a protobuf in -// the format defined in descriptor.proto. The output of a upb_defbuilder is -// a list of upb_def* that possibly contain unresolved references. -// -// We use a separate object (upb_defbuilder) instead of having the defs handle -// the parse themselves because we need to store state that is only necessary -// during the building process itself. - -// When we are bootstrapping descriptor.proto, we must help the bare decoder out -// by telling it when to descend into a submessage, because with the wire format -// alone we cannot tell the difference between a submessage and a string. -#define BEGIN_SUBMSG 100 - -// upb_deflist: A little dynamic array for storing a growing list of upb_defs. -typedef struct { - upb_def **defs; - uint32_t len; - uint32_t size; -} upb_deflist; - -static void upb_deflist_init(upb_deflist *l) { - l->size = 8; - l->defs = malloc(l->size * sizeof(void*)); - l->len = 0; -} - -static void upb_deflist_uninit(upb_deflist *l) { - for(uint32_t i = 0; i < l->len; i++) - if(l->defs[i]) upb_def_unref(l->defs[i]); - free(l->defs); -} - -static void upb_deflist_push(upb_deflist *l, upb_def *d) { - if(l->len == l->size) { - l->size *= 2; - l->defs = realloc(l->defs, l->size * sizeof(void*)); - } - l->defs[l->len++] = d; -} - -// Qualify the defname for all defs starting with offset "start" with "str". -static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { - for(uint32_t i = start; i < l->len; i++) { - upb_def *def = l->defs[i]; - upb_string *name = def->fqname; - def->fqname = upb_join(str, name); - upb_string_unref(name); - } -} - -typedef struct { - upb_deflist defs; - struct { - upb_string *name; - int start; - } upb_defbuilder_frame; - upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH]; - int stack_len; -} upb_defbuilder; - -// Start/end handlers for FileDescriptorProto and DescriptorProto (the two -// entities that have names and can contain sub-definitions. -upb_defbuilder_startcontainer(upb_defbuilder *b) { - upb_defbuilder_frame *f = b->stack[b->stack_len++]; - f->start = b->defs.len; - f->name = NULL; -} - -upb_defbuilder_endcontainer(upb_defbuilder *b) { - upb_defbuilder_frame *f = b->stack[--b->stack_len]; - upb_deflist_qualify(&b->defs, f->name, f->start); - upb_string_unref(f->name); -} - -upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { -} - -// Handlers for google.protobuf.FileDescriptorProto. -static bool upb_defbuilder_FileDescriptorProto_value(upb_defbuilder *b, - upb_fielddef *f, - upb_value val) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM: - upb_defbuilder_setscopename(b, val.str); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - return BEGIN_SUBMSG; - default: - return UPB_SKIP; - } -} - -static bool upb_defbuilder_FileDescriptorProto_startsubmsg(upb_defbuilder *b, - upb_fielddef *f, - upb_handlers *h) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: - upb_defbuilder_register_DescriptorProto(b, h); - return UPB_DELEGATE; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - upb_defbuilder_register_EnumDescriptorProto(b, h); - return UPB_DELEGATE; - default: - // TODO: services and extensions. - return UPB_SKIP; - } -} - -static upb_handlers upb_defbuilder_FileDescriptorProto_handlers = { - NULL, // startmsg - NULL, // endmsg - &upb_defbuilder_FileDescriptorProto_value, - &upb_defbuilder_FileDescriptorProto_startsubmsg, -} - -upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - upb_register_handlerset(h, &upb_defbuilder_FileDescriptorProto_handlers); - upb_set_handler_closure(h, b); -} - -// Handlers for google.protobuf.FileDescriptorSet. -upb_defbuilder_FileDescriptorSet_value(upb_defbuilder *b, upb_fielddef *f, - upb_value val) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: - return BEGIN_SUBMSG; - default: - return UPB_SKIP; - } -} - -upb_defbuilder_FileDescriptorSet_startsubmsg(upb_defbuilder *b, - upb_fielddef *f, upb_handlers *h) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: - upb_defbuilder_register_FileDescriptorProto(b, h); - return UPB_DELEGATE; - default: - return UPB_SKIP; - } -} - -static upb_handlers upb_defbuilder_FileDescriptorSet_handlers = { - NULL, // startmsg - NULL, // endmsg - &upb_defbuilder_FileDescriptorSet_value, - &upb_defbuilder_FileDescriptorSet_startsubmsg, -} - -upb_defbuilder_register_FileDescriptorSet(upb_defbuilder *b, upb_handlers *h) { - upb_register_handlerset(h, &upb_defbuilder_FileDescriptorSet_handlers); - upb_set_handler_closure(h, b); -} - - - /* upb_symtab adding defs *****************************************************/ // This is a self-contained group of functions that, given a list of upb_defs diff --git a/core/upb_stream.h b/core/upb_stream.h index 1eb111e..c96c544 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -135,6 +135,12 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, upb_field_number_t fieldnum, upb_value val); +/* upb_src ********************************************************************/ + +struct _upb_src; +typedef struct _upb_src upb_src; + + /* upb_bytesrc ****************************************************************/ struct _upb_bytesrc; -- cgit v1.2.3 From bcc688a303439c758a47da9f0eb1c064ece6ce09 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 10 Jan 2011 20:37:04 -0800 Subject: upb_def compiles again! --- core/upb.c | 2 +- core/upb.h | 37 ++++--- core/upb_def.c | 283 +++++++++++++++++++++++++++++++------------------ core/upb_msg.c | 13 ++- core/upb_stream.h | 62 +++++++---- core/upb_stream_vtbl.h | 88 ++++++++++----- core/upb_string.c | 2 +- core/upb_string.h | 13 +-- 8 files changed, 325 insertions(+), 175 deletions(-) diff --git a/core/upb.c b/core/upb.c index c396323..2f715d0 100644 --- a/core/upb.c +++ b/core/upb.c @@ -45,7 +45,7 @@ void upb_seterr(upb_status *status, enum upb_status_code code, { if(upb_ok(status)) { // The first error is the most interesting. status->code = code; - status->str = upb_string_tryrecycle(status->str); + upb_string_recycle(&status->str); va_list args; va_start(args, msg); upb_string_vprintf(status->str, msg, args); diff --git a/core/upb.h b/core/upb.h index 2057d60..64bc88c 100644 --- a/core/upb.h +++ b/core/upb.h @@ -126,14 +126,20 @@ struct _upb_array; typedef struct _upb_array upb_array; struct _upb_msg; typedef struct _upb_msg upb_msg; +struct _upb_bytesrc; +typedef struct _upb_bytesrc upb_bytesrc; -typedef uint32_t upb_strlen_t; +typedef int32_t upb_strlen_t; +#define UPB_STRLEN_MAX INT32_MAX // The type of a upb_value. This is like a upb_fieldtype_t, but adds the // constant UPB_VALUETYPE_ARRAY to represent an array. typedef uint8_t upb_valuetype_t; #define UPB_VALUETYPE_ARRAY 32 +#define UPB_VALUETYPE_BYTESRC 32 +#define UPB_VALUETYPE_RAW 33 + // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. typedef struct { @@ -146,6 +152,7 @@ typedef struct { uint64_t uint64; bool _bool; upb_string *str; + upb_bytesrc *bytesrc; upb_msg *msg; upb_array *arr; upb_atomic_refcount_t *refcount; @@ -167,21 +174,27 @@ typedef struct { #define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ ctype upb_value_get ## name(upb_value val) { \ - assert(val.type == UPB_TYPE(proto_type)); \ + assert(val.type == proto_type || val.type == UPB_VALUETYPE_RAW); \ return val.val.membername; \ } \ - void upb_value_ ## name(upb_value *val, ctype cval) { \ - SET_TYPE(val->type, UPB_TYPE(proto_type)); \ + void upb_value_set ## name(upb_value *val, ctype cval) { \ + SET_TYPE(val->type, proto_type); \ val->val.membername = cval; \ } -UPB_VALUE_ACCESSORS(double, _double, double, DOUBLE); -UPB_VALUE_ACCESSORS(float, _float, float, FLOAT); -UPB_VALUE_ACCESSORS(int32, int32, int32_t, INT32); -UPB_VALUE_ACCESSORS(int64, int64, int64_t, INT64); -UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UINT32); -UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UINT64); -UPB_VALUE_ACCESSORS(bool, _bool, bool, BOOL); -UPB_VALUE_ACCESSORS(str, str, upb_string*, STRING); +UPB_VALUE_ACCESSORS(double, _double, double, UPB_TYPE(DOUBLE)); +UPB_VALUE_ACCESSORS(float, _float, float, UPB_TYPE(FLOAT)); +UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_TYPE(INT32)); +UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64)); +UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32)); +UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64)); +UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); +UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); +UPB_VALUE_ACCESSORS(bytesrc, bytesrc, upb_bytesrc*, UPB_VALUETYPE_BYTESRC); + +void upb_value_setraw(upb_value *val, uint64_t cval) { + SET_TYPE(val->type, UPB_VALUETYPE_RAW); + val->val.uint64 = cval; +} // A pointer to a .proto value. The owner must have an out-of-band way of // knowing the type, so it knows which union member to use. diff --git a/core/upb_def.c b/core/upb_def.c index 4320fb6..4f12dbe 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -228,6 +228,10 @@ static void upb_deflist_push(upb_deflist *l, upb_def *d) { l->defs[l->len++] = d; } +static upb_def *upb_deflist_last(upb_deflist *l) { + return l->defs[l->len-1]; +} + // Qualify the defname for all defs starting with offset "start" with "str". static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { for(uint32_t i = start; i < l->len; i++) { @@ -238,8 +242,14 @@ static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) } } +// We keep a stack of all the messages scopes we are currently in, as well as +// the top-level file scope. This is necessary to correctly qualify the +// definitions that are contained inside. "name" tracks the name of the +// message or package (a bare name -- not qualified by any enclosing scopes). typedef struct { upb_string *name; + // Index of the first def that is under this scope. For msgdefs, the + // msgdef itself is at start-1. int start; } upb_defbuilder_frame; @@ -250,6 +260,10 @@ struct _upb_defbuilder { uint32_t number; upb_string *name; + bool saw_number; + bool saw_name; + + upb_fielddef *f; }; typedef struct _upb_defbuilder upb_defbuilder; @@ -259,6 +273,28 @@ static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, upb_handlers *h); +static void upb_defbuilder_init(upb_defbuilder *b) { + upb_deflist_init(&b->defs); + b->stack_len = 0; + b->name = NULL; +} + +static void upb_defbuilder_uninit(upb_defbuilder *b) { + upb_string_unref(b->name); + upb_deflist_uninit(&b->defs); +} + +static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) { + if (b->stack_len <= 1) return NULL; + int index = b->stack[b->stack_len-1].start - 1; + assert(index >= 0); + return upb_downcast_msgdef(b->defs.defs[index]); +} + +static upb_def *upb_defbuilder_last(upb_defbuilder *b) { + return upb_deflist_last(&b->defs); +} + // Start/end handlers for FileDescriptorProto and DescriptorProto (the two // entities that have names and can contain sub-definitions. void upb_defbuilder_startcontainer(upb_defbuilder *b) { @@ -291,9 +327,8 @@ static upb_flow_t upb_defbuilder_FileDescriptorProto_value(void *_b, case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: return BEGIN_SUBMSG; - default: - return UPB_SKIP; } + return UPB_CONTINUE; } static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( @@ -308,19 +343,19 @@ static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( return UPB_DELEGATE; default: // TODO: services and extensions. - return UPB_SKIP; + return UPB_SKIPSUBMSG; } } static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_defbuilder_FileDescriptorProto_handlers = { + static upb_handlerset handlers = { NULL, // startmsg NULL, // endmsg &upb_defbuilder_FileDescriptorProto_value, &upb_defbuilder_FileDescriptorProto_startsubmsg, }; - upb_register_handlerset(h, &upb_defbuilder_FileDescriptorProto_handlers); + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -333,9 +368,8 @@ static upb_flow_t upb_defbuilder_FileDescriptorSet_value(void *b, switch(f->number) { case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: return BEGIN_SUBMSG; - default: - return UPB_SKIP; } + return UPB_CONTINUE; } static upb_flow_t upb_defbuilder_FileDescriptorSet_startsubmsg( @@ -345,20 +379,19 @@ static upb_flow_t upb_defbuilder_FileDescriptorSet_startsubmsg( case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: upb_defbuilder_register_FileDescriptorProto(b, h); return UPB_DELEGATE; - default: - return UPB_SKIP; } + return UPB_SKIPSUBMSG; } static void upb_defbuilder_register_FileDescriptorSet( upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_defbuilder_FileDescriptorSet_handlers = { + static upb_handlerset handlers = { NULL, // startmsg NULL, // endmsg &upb_defbuilder_FileDescriptorSet_value, &upb_defbuilder_FileDescriptorSet_startsubmsg, }; - upb_register_handlerset(h, &upb_defbuilder_FileDescriptorSet_handlers); + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -406,18 +439,20 @@ static void upb_enumdef_free(upb_enumdef *e) { } // google.protobuf.EnumValueDescriptorProto. -static void upb_enumdef_EnumValueDescriptorProto_startmsg(upb_defbuilder *b) { - b->number = -1; - b->name = NULL; +static void upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; + b->saw_number = false; + b->saw_name = false; } -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(upb_defbuilder *b, +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; switch(f->number) { case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: - b->name = upb_string_tryrecycle(name); - CHECKSRC(upb_src_getstr(src, name)); + upb_string_unref(b->name); + upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: b->number = upb_value_getint32(val); @@ -428,34 +463,37 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(upb_defbuilder *b, return UPB_CONTINUE; } -static void upb_enumdef_EnumValueDescriptorProto_endmsg(upb_defbuilder *b) { - if(b->name == NULL || b->number == -1) { - upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); - goto err; +static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + if(!b->saw_number || !b->saw_name) { + //upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); + //goto err; + return; } - upb_ntoi_ent ntoi_ent = {{name, 0}, number}; - upb_iton_ent iton_ent = {{number, 0}, name}; + upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number}; + upb_iton_ent iton_ent = {{b->number, 0}, b->name}; + upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); upb_strtable_insert(&e->ntoi, &ntoi_ent.e); upb_inttable_insert(&e->iton, &iton_ent.e); // We don't unref "name" because we pass our ref to the iton entry of the // table. strtables can ref their keys, but the inttable doesn't know that // the value is a string. - return UPB_CONTINUE; } static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_enumdef_EnumValueDescriptorProto_handlers = { + static upb_handlerset handlers = { &upb_enumdef_EnumValueDescriptorProto_startmsg, &upb_enumdef_EnumValueDescriptorProto_endmsg, &upb_enumdef_EnumValueDescriptorProto_value, - } - upb_register_handlerset(h, &upb_enumdef_EnumValueDescriptorProto_handlers); + }; + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } // google.protobuf.EnumDescriptorProto. -void upb_enumdef_EnumDescriptorProto_startmsg(upb_defbuilder *b) { +void upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; upb_enumdef *e = malloc(sizeof(*e)); upb_def_init(&e->base, UPB_DEF_ENUM); upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); @@ -463,42 +501,51 @@ void upb_enumdef_EnumDescriptorProto_startmsg(upb_defbuilder *b) { upb_deflist_push(&b->defs, UPB_UPCAST(e)); } -void upb_enumdef_EnumDescriptorProto_endmsg(upb_defbuilder *b) { - assert(e->base.fqname); +void upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + assert(upb_defbuilder_last(b)->fqname != NULL); } -static upb_flow_t upb_enumdef_EnumDescriptorProto_value(upb_defbuilder *b, +static upb_flow_t upb_enumdef_EnumDescriptorProto_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; switch(f->number) { - case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: { + upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); upb_string_unref(e->base.fqname); - e->base.fqname = upb_value_getstr(val); + e->base.fqname = upb_string_getref(upb_value_getstr(val)); + return UPB_CONTINUE; + } case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: return BEGIN_SUBMSG; + default: + return UPB_CONTINUE; } - return UPB_CONTINUE; } -static upb_flow_t upb_enumdef_EnumDescriptorProto_startsubmsg(upb_defbuilder *b, +static upb_flow_t upb_enumdef_EnumDescriptorProto_startsubmsg(void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; switch(f->number) { case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: upb_enumdef_register_EnumValueDescriptorProto(b, h); return UPB_DELEGATE; + default: + return UPB_SKIPSUBMSG; } - return UPB_SKIP; } static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_enumdef_EnumDescriptorProto_handlers = { + static upb_handlerset handlers = { &upb_enumdef_EnumDescriptorProto_startmsg, &upb_enumdef_EnumDescriptorProto_endmsg, &upb_enumdef_EnumDescriptorProto_value, - } - upb_register_handlerset(h, &upb_enumdef_EnumDescriptorProto_handlers); + &upb_enumdef_EnumDescriptorProto_startsubmsg, + }; + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -529,56 +576,71 @@ static void upb_fielddef_free(upb_fielddef *f) { free(f); } -static void upb_fielddef_startmsg(upb_defbuilder *b) { +static void upb_fielddef_startmsg(void *_b) { + upb_defbuilder *b = _b; upb_fielddef *f = malloc(sizeof(*f)); f->number = -1; f->name = NULL; f->def = NULL; f->owned = false; - f->msgdef = m; + f->msgdef = upb_defbuilder_top(b); b->f = f; } -static void upb_fielddef_endmsg(upb_defbuilder *b) { +static void upb_fielddef_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_fielddef *f = b->f; // TODO: verify that all required fields were present. assert(f->number != -1 && f->name != NULL); assert((f->def != NULL) == upb_hasdef(f)); // Field was successfully read, add it as a field of the msgdef. + upb_msgdef *m = upb_defbuilder_top(b); upb_itof_ent itof_ent = {{f->number, 0}, f}; upb_ntof_ent ntof_ent = {{f->name, 0}, f}; upb_inttable_insert(&m->itof, &itof_ent.e); upb_strtable_insert(&m->ntof, &ntof_ent.e); - return true; } -static upb_flow_t upb_fielddef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { - switch(parsed_f->number) { +static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; + switch(f->number) { case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: - f->type = upb_value_getint32(val); + b->f->type = upb_value_getint32(val); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: - f->label = upb_value_getint32(val); + b->f->label = upb_value_getint32(val); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM: - f->number = upb_value_getint32(val); + b->f->number = upb_value_getint32(val); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: - f->name = upb_string_tryrecycle(f->name); - CHECKSRC(upb_src_getstr(src, f->name)); + upb_string_unref(b->f->name); + b->f->name = upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { upb_string *str = upb_string_new(); - CHECKSRC(upb_src_getstr(src, str)); - if(f->def) upb_def_unref(f->def); - f->def = UPB_UPCAST(upb_unresolveddef_new(str)); - f->owned = true; + if (!upb_value_getfullstr(val, str, NULL)) return UPB_ERROR; + if(b->f->def) upb_def_unref(b->f->def); + b->f->def = UPB_UPCAST(upb_unresolveddef_new(str)); + b->f->owned = true; break; } } return UPB_CONTINUE; } +static void upb_fielddef_register_FieldDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset handlers = { + &upb_fielddef_startmsg, + &upb_fielddef_endmsg, + &upb_fielddef_value, + }; + upb_register_handlerset(h, &handlers); + upb_set_handler_closure(h, b); +} + /* upb_msgdef *****************************************************************/ @@ -596,21 +658,24 @@ static int upb_compare_fields(const void *f1, const void *f2) { } // google.protobuf.DescriptorProto. -static void upb_msgdef_startmsg(upb_defbuilder *b) { +static void upb_msgdef_startmsg(void *_b) { + upb_defbuilder *b = _b; upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG); upb_atomic_refcount_init(&m->cycle_refcount, 0); upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); upb_deflist_push(&b->defs, UPB_UPCAST(m)); - upb_defbuilder_startcontainer(b, UPB_UPCAST(m)); + upb_defbuilder_startcontainer(b); } -static void upb_msgdef_endmsg(upb_defbuilder *b) { - upb_msgdef *m = upb_downcast_msgdef(upb_deflist_stacktop(&m->defs)); +static void upb_msgdef_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_msgdef *m = upb_defbuilder_top(b); if(!m->base.fqname) { - upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); - return UPB_ERROR; + //upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); + //return UPB_ERROR; + return; } // Create an ordering over the fields. @@ -651,51 +716,57 @@ static void upb_msgdef_endmsg(upb_defbuilder *b) { if (max_align > 0) m->size = upb_align_up(m->size, max_align); upb_defbuilder_endcontainer(b); - return UPB_CONTINUE; + //return UPB_CONTINUE; } -static bool upb_msgdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { +static upb_flow_t upb_msgdef_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; switch(f->number) { - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: - upb_defbuilder_setscopename(upb_value_getstr(val)); - break; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: { + upb_msgdef *m = upb_defbuilder_top(b); + upb_string_unref(m->base.fqname); + m->base.fqname = upb_string_getref(upb_value_getstr(val)); + upb_defbuilder_setscopename(b, upb_value_getstr(val)); + return UPB_CONTINUE; + } case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: return BEGIN_SUBMSG; default: // TODO: extensions. - return UPB_SKIP; + return UPB_CONTINUE; } } -static upb_flow_t upb_msgdef_startsubmsg(upb_defbuilder *b, upb_fielddef *f, +static upb_flow_t upb_msgdef_startsubmsg(void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; switch(f->number) { case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: - upb_register_FieldDescriptorProto(b, h); + upb_fielddef_register_FieldDescriptorProto(b, h); return UPB_DELEGATE; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: upb_msgdef_register_DescriptorProto(b, h); return UPB_DELEGATE; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - upb_register_EnumDescriptorProto(b, h); + upb_enumdef_register_EnumDescriptorProto(b, h); return UPB_DELEGATE; break; default: - return UPB_SKIP; + return UPB_SKIPSUBMSG; } } static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_msgdef_DescriptorProto_handlers = { + static upb_handlerset handlers = { &upb_msgdef_startmsg, &upb_msgdef_endmsg, &upb_msgdef_value, &upb_msgdef_startsubmsg, - } - upb_register_handlerset(h, &upb_msgdef_DescriptorProto_handlers); + }; + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -884,7 +955,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, // indicating whether the new defs can overwrite existing defs in the symtab, // attempts to add the given defs to the symtab. The whole operation either // succeeds or fails. Ownership of "defs" and "exts" is taken. -bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, +bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs, bool allow_redef, upb_status *status) { upb_rwlock_wrlock(&s->lock); @@ -892,9 +963,9 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, // Build a table of the defs we mean to add, for duplicate detection and name // resolution. upb_strtable tmptab; - upb_strtable_init(&tmptab, defs->len, sizeof(upb_symtab_ent)); - for (uint32_t i = 0; i < defs->len; i++) { - upb_def *def = defs->defs[i]; + upb_strtable_init(&tmptab, num_defs, sizeof(upb_symtab_ent)); + for (int i = 0; i < num_defs; i++) { + upb_def *def = defs[i]; upb_symtab_ent e = {{def->fqname, 0}, def}; // Redefinition is never allowed within a single FileDescriptorSet. @@ -909,13 +980,13 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, // Pass ownership from the deflist to the strtable. upb_strtable_insert(&tmptab, &e.e); - defs->defs[i] = NULL; + defs[i] = NULL; } // TODO: process the list of extensions by modifying entries from // tmptab in-place (copying them from the symtab first if necessary). - CHECK(upb_resolverefs(&tmptab, &s->symtab, status)); + if (!upb_resolverefs(&tmptab, &s->symtab, status)) goto err; // The defs in tmptab have been vetted, and can be added to the symtab // without causing errors. Now add all tmptab defs to the symtab, @@ -946,6 +1017,7 @@ err: upb_def_unref(e->def); } upb_strtable_free(&tmptab); + for (int i = 0; i < num_defs; i++) upb_def_unref(defs[i]); return false; } @@ -1026,20 +1098,18 @@ upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) { - upb_defbuilder *b = upb_defbuilder_new(); - upb_defbuilder_register_handlers(b, upb_src_gethandlers(src)); + upb_defbuilder b; + upb_defbuilder_init(&b); + //upb_defbuilder_register_FileDescriptorSet(&b, upb_src_gethandlers(src)); + upb_defbuilder_register_FileDescriptorSet(&b, NULL); if(!upb_src_run(src)) { upb_copyerr(status, upb_src_status(src)); + upb_defbuilder_uninit(&b); return; } - upb_symtab_add_defs(s, b->defs, b->defs_len, false, status); - upb_deflist_uninit(&defs); + upb_symtab_add_defs(s, b.defs.defs, b.defs.len, false, status); + upb_defbuilder_uninit(&b); return; - -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_deflist_uninit(&defs); } @@ -1074,8 +1144,10 @@ err: // complicated to support on big-endian machines. typedef struct { + upb_src src; upb_string *input; upb_strlen_t offset; + upb_dispatcher dispatcher; } upb_baredecoder; static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) @@ -1121,9 +1193,9 @@ bool upb_baredecoder_run(upb_baredecoder *d) { upb_dispatch_startmsg(&d->dispatcher); while(d->offset < upb_string_len(d->input)) { // Detect end-of-submessage. - while(d->offset >= *d->top) { + while(d->offset >= *top) { upb_dispatch_endsubmsg(&d->dispatcher); - d->offset = *(d->top--); + d->offset = *(top--); } uint32_t key = upb_baredecoder_readv64(d); @@ -1134,16 +1206,16 @@ bool upb_baredecoder_run(upb_baredecoder *d) { uint32_t delim_len = upb_baredecoder_readv32(d); // We don't know if it's a string or a submessage; deliver first as // string. - str = upb_string_tryrecycle(str); - upb_string_substr(str, d->input, d->offset, d->delimited_len); + upb_string_recycle(&str); + upb_string_substr(str, d->input, d->offset, delim_len); upb_value v; upb_value_setstr(&v, str); - if(upb_dispatch_value(&d->dispatcher, &f, v) == UPB_TREAT_AS_SUBMSG) { + if(upb_dispatch_value(&d->dispatcher, &f, v) == BEGIN_SUBMSG) { // Should deliver as a submessage instead. upb_dispatch_startsubmsg(&d->dispatcher, &f); - *(++d->top) = d->offset + delimited_len; + *(++top) = d->offset + delim_len; } else { - d->offset += delimited_len; + d->offset += delim_len; } } else { upb_value v; @@ -1167,23 +1239,24 @@ bool upb_baredecoder_run(upb_baredecoder *d) { } } upb_dispatch_endmsg(&d->dispatcher); + return true; } -static upb_src_vtable upb_baredecoder_src_vtbl = { - (upb_src_getdef_fptr)&upb_baredecoder_getdef, - (upb_src_getval_fptr)&upb_baredecoder_getval, - (upb_src_getstr_fptr)&upb_baredecoder_getstr, - (upb_src_skipval_fptr)&upb_baredecoder_skipval, - (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, - (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, -}; - static upb_baredecoder *upb_baredecoder_new(upb_string *str) { + //static upb_src_vtable vtbl = { + // (upb_src_getdef_fptr)&upb_baredecoder_getdef, + // (upb_src_getval_fptr)&upb_baredecoder_getval, + // (upb_src_getstr_fptr)&upb_baredecoder_getstr, + // (upb_src_skipval_fptr)&upb_baredecoder_skipval, + // (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, + // (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, + //}; upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); d->offset = 0; - upb_src_init(&d->src, &upb_baredecoder_src_vtbl); + upb_dispatcher_init(&d->dispatcher); + //upb_src_init(&d->src, &vtbl); return d; } diff --git a/core/upb_msg.c b/core/upb_msg.c index 75f7a35..a0a5196 100644 --- a/core/upb_msg.c +++ b/core/upb_msg.c @@ -7,6 +7,8 @@ */ #include "upb_msg.h" +#include "upb_decoder.h" +#include "upb_strstream.h" void _upb_elem_free(upb_value v, upb_fielddef *f) { switch(f->type) { @@ -108,10 +110,13 @@ upb_value upb_field_tryrecycle(upb_valueptr p, upb_value val, upb_fielddef *f, void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, upb_status *status) { - (void)msg; - (void)md; - (void)str; - (void)status; + upb_stringsrc *ssrc = upb_stringsrc_new(); + upb_stringsrc_reset(ssrc, str); + upb_decoder *d = upb_decoder_new(md); + upb_decoder_reset(d, upb_stringsrc_bytesrc(ssrc)); + + upb_decoder_free(d); + upb_stringsrc_free(ssrc); } void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, diff --git a/core/upb_stream.h b/core/upb_stream.h index c96c544..9ae69de 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -39,13 +39,16 @@ typedef enum { // Caller should continue sending values to the sink. UPB_CONTINUE, - // Skips to the end of the current submessage (or if we are at the top - // level, skips to the end of the entire message). - UPB_SKIP, + // An error occurred; check status for details. + UPB_ERROR, - // Caller should stop sending values; check sink status for details. + // Processing should stop for now, but could be resumed later. // If processing resumes later, it should resume with the next value. - UPB_STOP, + UPB_SUSPEND, + + // Skips to the end of the current submessage (or if we are at the top + // level, skips to the end of the entire message). + UPB_SKIPSUBMSG, // When returned from a startsubmsg handler, indicates that the submessage // should be handled by a different set of handlers, which have been @@ -117,6 +120,9 @@ INLINE void upb_handlers_uninit(upb_handlers *h); INLINE void upb_handlers_reset(upb_handlers *h); INLINE bool upb_handlers_isempty(upb_handlers *h); INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); +// TODO: for clients that want to increase efficiency by preventing bytesrcs +// from automatically being converted to strings in the value callback. +// INLINE void upb_handlers_use_bytesrcs(bool use_bytesrcs); INLINE void upb_set_handler_closure(upb_handlers *h, void *closure); // An object that transparently handles delegation so that the caller needs @@ -140,21 +146,30 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, struct _upb_src; typedef struct _upb_src upb_src; +bool upb_src_run(upb_src *src); +upb_status *upb_src_status(upb_src *src); -/* upb_bytesrc ****************************************************************/ - -struct _upb_bytesrc; -typedef struct _upb_bytesrc upb_bytesrc; -// Returns the next string in the stream. false is returned on error or eof. -// The string must be at least "minlen" bytes long unless the stream is eof. -INLINE bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); +/* upb_bytesrc ****************************************************************/ -// Appends the next "len" bytes in the stream in-place to "str". This should -// be used when the caller needs to build a contiguous string of the existing -// data in "str" with more data. The call fails if fewer than len bytes are -// available in the stream. -INLINE bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); +// Reads up to "count" bytes into "buf", returning the total number of bytes +// read. If <0, indicates error (check upb_bytesrc_status for details). +INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, + upb_strlen_t count); + +// Like upb_bytesrc_read(), but modifies "str" in-place, possibly aliasing +// existing string data (which avoids a copy). +INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, + upb_strlen_t count); + +// A convenience function for getting all the remaining data in a upb_bytesrc +// as a upb_string. Returns false and sets "status" if the operation fails. +INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, + upb_status *status); +INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, + upb_status *status) { + return upb_bytesrc_getfullstr(upb_value_getbytesrc(val), str, status); +} // Returns the current error status for the stream. // Note! The "eof" flag works like feof() in C; it cannot report end-of-file @@ -164,14 +179,21 @@ INLINE bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t l INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src); INLINE bool upb_bytesrc_eof(upb_bytesrc *src); + /* upb_bytesink ***************************************************************/ struct _upb_bytesink; typedef struct _upb_bytesink upb_bytesink; -// Puts the given string. Returns the number of bytes that were actually, -// consumed, which may be fewer than were in the string, or <0 on error. -INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); +// Writes up to "count" bytes from "buf", returning the total number of bytes +// written. If <0, indicates error (check upb_bytesink_status() for details). +INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, + upb_strlen_t count); + +// Puts the given string, which may alias the string data (which avoids a +// copy). Returns the number of bytes that were actually, consumed, which may +// be fewer than were in the string, or <0 on error. +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str); // Returns the current error status for the stream. INLINE upb_status *upb_bytesink_status(upb_bytesink *sink); diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 91464a7..c0cf04f 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -20,23 +20,33 @@ extern "C" { // Typedefs for function pointers to all of the virtual functions. +// upb_src +struct _upb_src { +}; +typedef struct { +} upb_src_vtbl; + // upb_bytesrc. -typedef bool (*upb_bytesrc_get_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); -typedef bool (*upb_bytesrc_append_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t len); +typedef upb_strlen_t (*upb_bytesrc_read_fptr)( + upb_bytesrc *src, void *buf, upb_strlen_t count); +typedef bool (*upb_bytesrc_getstr_fptr)( + upb_bytesrc *src, upb_string *str, upb_strlen_t count); // upb_bytesink. -typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); +typedef upb_strlen_t (*upb_bytesink_write_fptr)( + upb_bytesink *bytesink, void *buf, upb_strlen_t count); +typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( + upb_bytesink *bytesink, upb_string *str); // Vtables for the above interfaces. typedef struct { - upb_bytesrc_get_fptr get; - upb_bytesrc_append_fptr append; + upb_bytesrc_read_fptr read; + upb_bytesrc_getstr_fptr getstr; } upb_bytesrc_vtable; typedef struct { - upb_bytesink_put_fptr put; + upb_bytesink_write_fptr write; + upb_bytesink_putstr_fptr putstr; } upb_bytesink_vtable; // "Base Class" definitions; components that implement these interfaces should @@ -69,19 +79,56 @@ INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { // Implementation of virtual function dispatch. // upb_bytesrc -INLINE bool upb_bytesrc_get( - upb_bytesrc *bytesrc, upb_string *str, upb_strlen_t minlen) { - return bytesrc->vtbl->get(bytesrc, str, minlen); -} +INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, + upb_strlen_t count) { + return src->vtbl->read(src, buf, count); +} + +INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, + upb_strlen_t count) { + return src->vtbl->getstr(src, str, count); +} + +INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, + upb_status *status) { + // We start with a getstr, because that could possibly alias data instead of + // copying. + if (!upb_bytesrc_getstr(src, str, UPB_STRLEN_MAX)) goto error; + // Trade-off between number of read calls and amount of overallocation. + const size_t bufsize = 4096; + while (!upb_bytesrc_eof(src)) { + upb_strlen_t len = upb_string_len(str); + char *buf = upb_string_getrwbuf(str, len + bufsize); + upb_strlen_t read = upb_bytesrc_read(src, buf + len, bufsize); + if (read < 0) goto error; + // Resize to proper size. + upb_string_getrwbuf(str, len + read); + } + return true; -INLINE bool upb_bytesrc_append( - upb_bytesrc *bytesrc, upb_string *str, upb_strlen_t len) { - return bytesrc->vtbl->append(bytesrc, str, len); +error: + upb_copyerr(status, upb_bytesrc_status(src)); + return false; } INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } + +// upb_bytesink +INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, + upb_strlen_t count) { + return sink->vtbl->write(sink, buf, count); +} + +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str) { + return sink->vtbl->putstr(sink, str); +} + +INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { + return &sink->status; +} + // upb_handlers struct _upb_handlers { upb_handlerset *set; @@ -182,17 +229,6 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, fieldnum, val); } -// upb_bytesink -INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { - return sink->vtbl->put(sink, str); -} -INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { - return &sink->status; -} - -// upb_bytesink - - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/core/upb_string.c b/core/upb_string.c index 4f5f5c2..b243dfd 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -73,7 +73,7 @@ upb_string *upb_string_tryrecycle(upb_string *str) { char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { // assert(str->ptr == NULL); - uint32_t size = upb_string_size(str); + upb_strlen_t size = upb_string_size(str); if (size < len) { size = upb_round_up_pow2(len); str->cached_mem = realloc(str->cached_mem, size); diff --git a/core/upb_string.h b/core/upb_string.h index ee345e3..f82603b 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -119,20 +119,21 @@ INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } INLINE void upb_string_endread(upb_string *str) { (void)str; } // Attempts to recycle the string "str" so it may be reused and have different -// data written to it. The returned string is either "str" if it could be -// recycled or a newly created string if "str" has other references. +// data written to it. After the function returns, "str" points to a writable +// string, which is either the original string if it had no other references +// or a newly created string if it did have other references. // -// As a special case, passing NULL will allocate a new string. This is -// convenient for the pattern: +// As a special case, passing a pointer to NULL will allocate a new string. +// This is convenient for the pattern: // // upb_string *str = NULL; // while (x) { // if (y) { -// str = upb_string_tryrecycle(str); +// upb_string_recycle(&str); // upb_src_getstr(str); // } // } -upb_string *upb_string_tryrecycle(upb_string *str); +upb_string *upb_string_recycle(upb_string **str); // The options for setting the contents of a string. These may only be called // when a string is first created or recycled; once other functions have been -- cgit v1.2.3 From e9b9bbf216fdcbc86114b074dba6d0f51e4a438e Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 10 Jan 2011 21:01:07 -0800 Subject: Add INLINE to a few identifiers. --- core/upb.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/upb.h b/core/upb.h index 64bc88c..764e9ba 100644 --- a/core/upb.h +++ b/core/upb.h @@ -173,11 +173,11 @@ typedef struct { #endif #define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ - ctype upb_value_get ## name(upb_value val) { \ + INLINE ctype upb_value_get ## name(upb_value val) { \ assert(val.type == proto_type || val.type == UPB_VALUETYPE_RAW); \ return val.val.membername; \ } \ - void upb_value_set ## name(upb_value *val, ctype cval) { \ + INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \ SET_TYPE(val->type, proto_type); \ val->val.membername = cval; \ } @@ -191,7 +191,7 @@ UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); UPB_VALUE_ACCESSORS(bytesrc, bytesrc, upb_bytesrc*, UPB_VALUETYPE_BYTESRC); -void upb_value_setraw(upb_value *val, uint64_t cval) { +INLINE void upb_value_setraw(upb_value *val, uint64_t cval) { SET_TYPE(val->type, UPB_VALUETYPE_RAW); val->val.uint64 = cval; } -- cgit v1.2.3 From a38742bbe1cbc037f15edc053f5cf4dd53c5457a Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 18 Jan 2011 22:33:05 -0800 Subject: A few minor changes to the streaming protocol. 1. the start and end callbacks can now return a upb_flow_t and set a status message. 2. clarified some semantics around passing an error status back from the callbacks. --- core/upb.c | 4 +++ core/upb.h | 27 ++++++++++--------- core/upb_def.c | 70 ++++++++++++++++++++++++++++---------------------- core/upb_stream.h | 29 +++++++++++---------- core/upb_stream_vtbl.h | 5 +++- 5 files changed, 76 insertions(+), 59 deletions(-) diff --git a/core/upb.c b/core/upb.c index 2f715d0..05e9b7d 100644 --- a/core/upb.c +++ b/core/upb.c @@ -73,3 +73,7 @@ void upb_printerr(upb_status *status) { fprintf(stderr, "code: %d, no msg\n", status->code); } } + +void upb_status_uninit(upb_status *status) { + upb_string_unref(status->str); +} diff --git a/core/upb.h b/core/upb.h index 764e9ba..fb6d9ea 100644 --- a/core/upb.h +++ b/core/upb.h @@ -290,30 +290,27 @@ INLINE void upb_value_write(upb_valueptr ptr, upb_value val, // Status codes used as a return value. Codes >0 are not fatal and can be // resumed. enum upb_status_code { + // The operation completed successfully. UPB_STATUS_OK = 0, - // A read or write from a streaming src/sink could not be completed right now. - UPB_STATUS_TRYAGAIN = 1, + // The bytesrc is at EOF and all data was read successfully. + UPB_STATUS_EOF = 1, - // A value had an incorrect wire type and will be skipped. - UPB_STATUS_BADWIRETYPE = 2, + // A read or write from a streaming src/sink could not be completed right now. + UPB_STATUS_TRYAGAIN = 2, // An unrecoverable error occurred. UPB_STATUS_ERROR = -1, - // A varint went for 10 bytes without terminating. - UPB_ERROR_UNTERMINATED_VARINT = -2, - - // The max nesting level (UPB_MAX_NESTING) was exceeded. - UPB_ERROR_MAX_NESTING_EXCEEDED = -3 + // A recoverable error occurred (for example, data of the wrong type was + // encountered which we can skip over). + // UPB_STATUS_RECOVERABLE_ERROR = -2 }; -// TODO: consider making this a single word: a upb_string* where we use the low -// bits as flags indicating whether there is an error and whether it is -// resumable. This would improve efficiency, because the code would not need -// to be loaded after a call to a function returning a status. +// TODO: consider adding error space and code, to let ie. errno be stored +// as a proper code. typedef struct { - enum upb_status_code code; + char code; upb_string *str; } upb_status; @@ -329,6 +326,8 @@ INLINE void upb_status_init(upb_status *status) { status->str = NULL; } +void upb_status_uninit(upb_status *status); + void upb_printerr(upb_status *status); void upb_clearerr(upb_status *status); void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, diff --git a/core/upb_def.c b/core/upb_def.c index 4f12dbe..0176dc9 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -257,6 +257,7 @@ struct _upb_defbuilder { upb_deflist defs; upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH]; int stack_len; + upb_status status; uint32_t number; upb_string *name; @@ -275,12 +276,14 @@ static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, static void upb_defbuilder_init(upb_defbuilder *b) { upb_deflist_init(&b->defs); + upb_status_init(&b->status); b->stack_len = 0; b->name = NULL; } static void upb_defbuilder_uninit(upb_defbuilder *b) { upb_string_unref(b->name); + upb_status_uninit(&b->status); upb_deflist_uninit(&b->defs); } @@ -356,7 +359,7 @@ static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, &upb_defbuilder_FileDescriptorProto_startsubmsg, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } // Handlers for google.protobuf.FileDescriptorSet. @@ -392,7 +395,7 @@ static void upb_defbuilder_register_FileDescriptorSet( &upb_defbuilder_FileDescriptorSet_startsubmsg, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } @@ -439,10 +442,11 @@ static void upb_enumdef_free(upb_enumdef *e) { } // google.protobuf.EnumValueDescriptorProto. -static void upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) { +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) { upb_defbuilder *b = _b; b->saw_number = false; b->saw_name = false; + return UPB_CONTINUE; } static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, @@ -463,12 +467,12 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, return UPB_CONTINUE; } -static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { upb_defbuilder *b = _b; if(!b->saw_number || !b->saw_name) { - //upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); - //goto err; - return; + upb_seterr(&b->status, UPB_STATUS_ERROR, + "Enum value missing name or number."); + return UPB_STOP; } upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number}; upb_iton_ent iton_ent = {{b->number, 0}, b->name}; @@ -478,6 +482,7 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { // We don't unref "name" because we pass our ref to the iton entry of the // table. strtables can ref their keys, but the inttable doesn't know that // the value is a string. + return UPB_CONTINUE; } static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b, @@ -488,22 +493,24 @@ static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b, &upb_enumdef_EnumValueDescriptorProto_value, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } // google.protobuf.EnumDescriptorProto. -void upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { +static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { upb_defbuilder *b = _b; upb_enumdef *e = malloc(sizeof(*e)); upb_def_init(&e->base, UPB_DEF_ENUM); upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); upb_deflist_push(&b->defs, UPB_UPCAST(e)); + return UPB_CONTINUE; } -void upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { +static upb_flow_t upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { upb_defbuilder *b = _b; assert(upb_defbuilder_last(b)->fqname != NULL); + return UPB_CONTINUE; } static upb_flow_t upb_enumdef_EnumDescriptorProto_value(void *_b, @@ -546,7 +553,7 @@ static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, &upb_enumdef_EnumDescriptorProto_startsubmsg, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } upb_enum_iter upb_enum_begin(upb_enumdef *e) { @@ -576,7 +583,7 @@ static void upb_fielddef_free(upb_fielddef *f) { free(f); } -static void upb_fielddef_startmsg(void *_b) { +static upb_flow_t upb_fielddef_startmsg(void *_b) { upb_defbuilder *b = _b; upb_fielddef *f = malloc(sizeof(*f)); f->number = -1; @@ -585,9 +592,10 @@ static void upb_fielddef_startmsg(void *_b) { f->owned = false; f->msgdef = upb_defbuilder_top(b); b->f = f; + return UPB_CONTINUE; } -static void upb_fielddef_endmsg(void *_b) { +static upb_flow_t upb_fielddef_endmsg(void *_b) { upb_defbuilder *b = _b; upb_fielddef *f = b->f; // TODO: verify that all required fields were present. @@ -600,6 +608,7 @@ static void upb_fielddef_endmsg(void *_b) { upb_ntof_ent ntof_ent = {{f->name, 0}, f}; upb_inttable_insert(&m->itof, &itof_ent.e); upb_strtable_insert(&m->ntof, &ntof_ent.e); + return UPB_CONTINUE; } static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { @@ -620,7 +629,7 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { upb_string *str = upb_string_new(); - if (!upb_value_getfullstr(val, str, NULL)) return UPB_ERROR; + if (!upb_value_getfullstr(val, str, NULL)) return UPB_STOP; if(b->f->def) upb_def_unref(b->f->def); b->f->def = UPB_UPCAST(upb_unresolveddef_new(str)); b->f->owned = true; @@ -638,7 +647,7 @@ static void upb_fielddef_register_FieldDescriptorProto(upb_defbuilder *b, &upb_fielddef_value, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } @@ -658,7 +667,7 @@ static int upb_compare_fields(const void *f1, const void *f2) { } // google.protobuf.DescriptorProto. -static void upb_msgdef_startmsg(void *_b) { +static upb_flow_t upb_msgdef_startmsg(void *_b) { upb_defbuilder *b = _b; upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG); @@ -667,15 +676,16 @@ static void upb_msgdef_startmsg(void *_b) { upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); upb_deflist_push(&b->defs, UPB_UPCAST(m)); upb_defbuilder_startcontainer(b); + return UPB_CONTINUE; } -static void upb_msgdef_endmsg(void *_b) { +static upb_flow_t upb_msgdef_endmsg(void *_b) { upb_defbuilder *b = _b; upb_msgdef *m = upb_defbuilder_top(b); if(!m->base.fqname) { - //upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); - //return UPB_ERROR; - return; + upb_seterr(&b->status, UPB_STATUS_ERROR, + "Encountered message with no name."); + return UPB_STOP; } // Create an ordering over the fields. @@ -716,7 +726,7 @@ static void upb_msgdef_endmsg(void *_b) { if (max_align > 0) m->size = upb_align_up(m->size, max_align); upb_defbuilder_endcontainer(b); - //return UPB_CONTINUE; + return UPB_CONTINUE; } static upb_flow_t upb_msgdef_value(void *_b, upb_fielddef *f, upb_value val) { @@ -767,7 +777,7 @@ static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, &upb_msgdef_startsubmsg, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } static void upb_msgdef_free(upb_msgdef *m) @@ -1100,16 +1110,14 @@ void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) { upb_defbuilder b; upb_defbuilder_init(&b); - //upb_defbuilder_register_FileDescriptorSet(&b, upb_src_gethandlers(src)); - upb_defbuilder_register_FileDescriptorSet(&b, NULL); - if(!upb_src_run(src)) { - upb_copyerr(status, upb_src_status(src)); - upb_defbuilder_uninit(&b); - return; - } - upb_symtab_add_defs(s, b.defs.defs, b.defs.len, false, status); + upb_handlers handlers; + upb_handlers_init(&handlers); + upb_defbuilder_register_FileDescriptorSet(&b, &handlers); + upb_src_sethandlers(src, &handlers); + upb_src_run(src, status); + if (upb_ok(status)) + upb_symtab_add_defs(s, b.defs.defs, b.defs.len, false, status); upb_defbuilder_uninit(&b); - return; } diff --git a/core/upb_stream.h b/core/upb_stream.h index 9ae69de..40836e9 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -39,12 +39,8 @@ typedef enum { // Caller should continue sending values to the sink. UPB_CONTINUE, - // An error occurred; check status for details. - UPB_ERROR, - - // Processing should stop for now, but could be resumed later. - // If processing resumes later, it should resume with the next value. - UPB_SUSPEND, + // Stop processing for now; check status for details. + UPB_STOP, // Skips to the end of the current submessage (or if we are at the top // level, skips to the end of the entire message). @@ -61,8 +57,8 @@ typedef enum { struct _upb_handlers; typedef struct _upb_handlers upb_handlers; -typedef void (*upb_startmsg_handler_t)(void *closure); -typedef void (*upb_endmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_startmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_endmsg_handler_t)(void *closure); typedef upb_flow_t (*upb_value_handler_t)(void *closure, struct _upb_fielddef *f, upb_value val); @@ -76,12 +72,14 @@ typedef upb_flow_t (*upb_unknownval_handler_t)(void *closure, // An empty set of handlers, for convenient copy/paste: // -// static void startmsg(void *closure) { +// static upb_flow_t startmsg(void *closure) { // // Called when the top-level message begins. +// return UPB_CONTINUE; // } // -// static void endmsg(void *closure) { +// static upb_flow_t endmsg(void *closure) { // // Called when the top-level message ends. +// return UPB_CONTINUE; // } // // static upb_flow_t value(void *closure, upb_fielddef *f, upb_value val) { @@ -120,10 +118,15 @@ INLINE void upb_handlers_uninit(upb_handlers *h); INLINE void upb_handlers_reset(upb_handlers *h); INLINE bool upb_handlers_isempty(upb_handlers *h); INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); + // TODO: for clients that want to increase efficiency by preventing bytesrcs // from automatically being converted to strings in the value callback. // INLINE void upb_handlers_use_bytesrcs(bool use_bytesrcs); -INLINE void upb_set_handler_closure(upb_handlers *h, void *closure); + +// The closure will be passed to every handler. The status will be used +// only immediately after a handler has returned UPB_STOP. +INLINE void upb_set_handler_closure(upb_handlers *h, void *closure, + upb_status *status); // An object that transparently handles delegation so that the caller needs // only follow the protocol as if delegation did not exist. @@ -146,8 +149,8 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, struct _upb_src; typedef struct _upb_src upb_src; -bool upb_src_run(upb_src *src); -upb_status *upb_src_status(upb_src *src); +void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); +void upb_src_run(upb_src *src, upb_status *status); /* upb_bytesrc ****************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index c0cf04f..d017177 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -133,6 +133,7 @@ INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { struct _upb_handlers { upb_handlerset *set; void *closure; + upb_status *status; // We don't own this. }; INLINE void upb_handlers_init(upb_handlers *h) { @@ -155,8 +156,10 @@ INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set) { h->set = set; } -INLINE void upb_set_handler_closure(upb_handlers *h, void *closure) { +INLINE void upb_set_handler_closure(upb_handlers *h, void *closure, + upb_status *status) { h->closure = closure; + h->status = status; } // upb_dispatcher -- cgit v1.2.3 From 1dea81b1c244d357a6e46ee22c14b36280bf2100 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 21 Jan 2011 17:29:16 -0800 Subject: Interface refinement: rename some constants. * UPB_STOP -> UPB_BREAK, better represents breaking out of a parsing loop. * UPB_STATUS_OK -> UPB_OK, for all status codes, more concise at no readability cost (perhaps an improvement). --- core/upb.c | 5 ++--- core/upb.h | 16 ++++++++-------- core/upb_def.c | 22 ++++++++++------------ core/upb_stream.h | 6 ++++-- 4 files changed, 24 insertions(+), 25 deletions(-) diff --git a/core/upb.c b/core/upb.c index 05e9b7d..da2a0f0 100644 --- a/core/upb.c +++ b/core/upb.c @@ -60,9 +60,8 @@ void upb_copyerr(upb_status *to, upb_status *from) } void upb_clearerr(upb_status *status) { - status->code = UPB_STATUS_OK; - upb_string_unref(status->str); - status->str = NULL; + status->code = UPB_OK; + upb_string_recycle(&status->str); } void upb_printerr(upb_status *status) { diff --git a/core/upb.h b/core/upb.h index fb6d9ea..d394a08 100644 --- a/core/upb.h +++ b/core/upb.h @@ -291,16 +291,16 @@ INLINE void upb_value_write(upb_valueptr ptr, upb_value val, // resumed. enum upb_status_code { // The operation completed successfully. - UPB_STATUS_OK = 0, + UPB_OK = 0, // The bytesrc is at EOF and all data was read successfully. - UPB_STATUS_EOF = 1, + UPB_EOF = 1, // A read or write from a streaming src/sink could not be completed right now. - UPB_STATUS_TRYAGAIN = 2, + UPB_TRYAGAIN = 2, // An unrecoverable error occurred. - UPB_STATUS_ERROR = -1, + UPB_ERROR = -1, // A recoverable error occurred (for example, data of the wrong type was // encountered which we can skip over). @@ -308,21 +308,21 @@ enum upb_status_code { }; // TODO: consider adding error space and code, to let ie. errno be stored -// as a proper code. +// as a proper code, or application-specific error codes. typedef struct { char code; upb_string *str; } upb_status; -#define UPB_STATUS_INIT {UPB_STATUS_OK, NULL} +#define UPB_STATUS_INIT {UPB_OK, NULL} #define UPB_ERRORMSG_MAXLEN 256 INLINE bool upb_ok(upb_status *status) { - return status->code == UPB_STATUS_OK; + return status->code == UPB_OK; } INLINE void upb_status_init(upb_status *status) { - status->code = UPB_STATUS_OK; + status->code = UPB_OK; status->str = NULL; } diff --git a/core/upb_def.c b/core/upb_def.c index 0176dc9..79b6632 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -470,9 +470,8 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, static upb_flow_t upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { upb_defbuilder *b = _b; if(!b->saw_number || !b->saw_name) { - upb_seterr(&b->status, UPB_STATUS_ERROR, - "Enum value missing name or number."); - return UPB_STOP; + upb_seterr(&b->status, UPB_ERROR, "Enum value missing name or number."); + return UPB_BREAK; } upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number}; upb_iton_ent iton_ent = {{b->number, 0}, b->name}; @@ -629,7 +628,7 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { upb_string *str = upb_string_new(); - if (!upb_value_getfullstr(val, str, NULL)) return UPB_STOP; + if (!upb_value_getfullstr(val, str, NULL)) return UPB_BREAK; if(b->f->def) upb_def_unref(b->f->def); b->f->def = UPB_UPCAST(upb_unresolveddef_new(str)); b->f->owned = true; @@ -683,9 +682,8 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) { upb_defbuilder *b = _b; upb_msgdef *m = upb_defbuilder_top(b); if(!m->base.fqname) { - upb_seterr(&b->status, UPB_STATUS_ERROR, - "Encountered message with no name."); - return UPB_STOP; + upb_seterr(&b->status, UPB_ERROR, "Encountered message with no name."); + return UPB_BREAK; } // Create an ordering over the fields. @@ -864,7 +862,7 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) // where we recurse over the type tree (like for example, right now) and an // absurdly deep tree could cause us to stack overflow on systems with very // limited stacks. - upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was found at " + upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was found at " "depth %d in the type graph, which exceeds the maximum type " "depth of %d.", UPB_UPCAST(m)->fqname, depth, UPB_MAX_TYPE_DEPTH); @@ -873,7 +871,7 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) // Cycle! int cycle_len = depth - 1; if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) { - upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was involved " + upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was involved " "in a cycle of length %d, which exceeds the maximum type " "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len, UPB_MAX_TYPE_CYCLE_LEN); @@ -931,7 +929,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, upb_symtab_ent *found; if(!(found = upb_resolve(tmptab, base, name)) && !(found = upb_resolve(symtab, base, name))) { - upb_seterr(status, UPB_STATUS_ERROR, + upb_seterr(status, UPB_ERROR, "could not resolve symbol '" UPB_STRFMT "'" " in context '" UPB_STRFMT "'", UPB_STRARG(name), UPB_STRARG(base)); @@ -941,7 +939,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, // Check the type of the found def. upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; if(found->def->type != expected) { - upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type"); + upb_seterr(status, UPB_ERROR, "Unexpected type"); return false; } upb_msgdef_resolve(m, f, found->def); @@ -983,7 +981,7 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs, // allow_redef is set. if (upb_strtable_lookup(&tmptab, def->fqname) || (!allow_redef && upb_strtable_lookup(&s->symtab, def->fqname))) { - upb_seterr(status, UPB_STATUS_ERROR, "Redefinition of symbol " UPB_STRFMT, + upb_seterr(status, UPB_ERROR, "Redefinition of symbol " UPB_STRFMT, UPB_STRARG(def->fqname)); goto err; } diff --git a/core/upb_stream.h b/core/upb_stream.h index 40836e9..66bfec2 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -39,8 +39,10 @@ typedef enum { // Caller should continue sending values to the sink. UPB_CONTINUE, - // Stop processing for now; check status for details. - UPB_STOP, + // Stop processing for now; check status for details. If no status was set, + // a generic error will be returned. If the error is resumable, processing + // will resume by delivering this callback again. + UPB_BREAK, // Skips to the end of the current submessage (or if we are at the top // level, skips to the end of the entire message). -- cgit v1.2.3 From a695b92ccea4b82180ae45d21d7ed4445f7d0769 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 21 Jan 2011 19:18:22 -0800 Subject: Debugging test_def, it's close to working again! --- Makefile | 10 +++--- core/upb_def.c | 80 +++++++++++++++++++++++++++-------------- core/upb_stream.h | 21 ++++++++--- core/upb_stream_vtbl.h | 96 ++++++++++++++++++++++++++++++++++++++++---------- core/upb_string.c | 8 ++--- core/upb_string.h | 2 +- tests/test_def.c | 1 + tests/test_string.c | 19 +++++----- 8 files changed, 171 insertions(+), 66 deletions(-) diff --git a/Makefile b/Makefile index 42c7d41..af79363 100644 --- a/Makefile +++ b/Makefile @@ -74,9 +74,9 @@ OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. core/upb_def.o: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< + $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< core/upb_def.lo: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC + $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< -fPIC lang_ext/lua/upb.so: lang_ext/lua/upb.lo $(CC) $(CFLAGS) $(CPPFLAGS) -shared -o $@ $< core/libupb_pic.a @@ -112,13 +112,13 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ - tests/test_stream \ -# tests/test_def \ + tests/test_def \ +# tests/test_stream \ # tests/test_decoder \ # tests/t.test_vs_proto2.googlemessage1 \ # tests/t.test_vs_proto2.googlemessage2 \ # tests/test.proto.pb -tests: $(TESTS) +tests: $(LIBUPB) $(TESTS) OTHER_TESTS=tests/tests \ $(TESTS): $(LIBUPB) diff --git a/core/upb_def.c b/core/upb_def.c index 79b6632..a935930 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -319,6 +319,18 @@ void upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { } // Handlers for google.protobuf.FileDescriptorProto. +static upb_flow_t upb_defbuilder_FileDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; + upb_defbuilder_startcontainer(b); + return UPB_CONTINUE; +} + +static upb_flow_t upb_defbuilder_FileDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_defbuilder_endcontainer(b); + return UPB_CONTINUE; +} + static upb_flow_t upb_defbuilder_FileDescriptorProto_value(void *_b, upb_fielddef *f, upb_value val) { @@ -353,8 +365,8 @@ static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, upb_handlers *h) { static upb_handlerset handlers = { - NULL, // startmsg - NULL, // endmsg + &upb_defbuilder_FileDescriptorProto_startmsg, + &upb_defbuilder_FileDescriptorProto_endmsg, &upb_defbuilder_FileDescriptorProto_value, &upb_defbuilder_FileDescriptorProto_startsubmsg, }; @@ -457,9 +469,11 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: upb_string_unref(b->name); upb_string_getref(upb_value_getstr(val)); + b->saw_name = true; break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: b->number = upb_value_getint32(val); + b->saw_number = true; break; default: break; @@ -507,8 +521,8 @@ static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { } static upb_flow_t upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { - upb_defbuilder *b = _b; - assert(upb_defbuilder_last(b)->fqname != NULL); + (void)_b; + assert(upb_defbuilder_last((upb_defbuilder*)_b)->fqname != NULL); return UPB_CONTINUE; } @@ -627,10 +641,8 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { b->f->name = upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { - upb_string *str = upb_string_new(); - if (!upb_value_getfullstr(val, str, NULL)) return UPB_BREAK; if(b->f->def) upb_def_unref(b->f->def); - b->f->def = UPB_UPCAST(upb_unresolveddef_new(str)); + b->f->def = UPB_UPCAST(upb_unresolveddef_new(upb_value_getstr(val))); b->f->owned = true; break; } @@ -720,6 +732,7 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) { m->size = offset + type_info->size; max_align = UPB_MAX(max_align, type_info->align); } + free(sorted_fields); if (max_align > 0) m->size = upb_align_up(m->size, max_align); @@ -1131,6 +1144,7 @@ void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) // * keeping a pointer to the upb_fielddef* and reading it later (the same // upb_fielddef is reused over and over). // * detecting errors in the input (we trust that our input is known-good). +// * skipping the rest of the submessage (UPB_SKIPSUBMSG). // // It also does not support any of the follow protobuf features: // * packed fields. @@ -1189,18 +1203,27 @@ static uint32_t upb_baredecoder_readf32(upb_baredecoder *d) return val; } -bool upb_baredecoder_run(upb_baredecoder *d) { +static void upb_baredecoder_sethandlers(upb_src *src, upb_handlers *handlers) { + upb_baredecoder *d = (upb_baredecoder*)src; + upb_dispatcher_reset(&d->dispatcher, handlers); +} + +static void upb_baredecoder_run(upb_src *src, upb_status *status) { + upb_baredecoder *d = (upb_baredecoder*)src; + assert(!upb_handlers_isempty(&d->dispatcher.top->handlers)); upb_string *str = NULL; upb_strlen_t stack[UPB_MAX_NESTING]; upb_strlen_t *top = &stack[0]; *top = upb_string_len(d->input); d->offset = 0; - upb_dispatch_startmsg(&d->dispatcher); +#define CHECK(x) if (x != UPB_CONTINUE && x != BEGIN_SUBMSG) goto err; + + CHECK(upb_dispatch_startmsg(&d->dispatcher)); while(d->offset < upb_string_len(d->input)) { // Detect end-of-submessage. while(d->offset >= *top) { - upb_dispatch_endsubmsg(&d->dispatcher); + CHECK(upb_dispatch_endsubmsg(&d->dispatcher)); d->offset = *(top--); } @@ -1216,9 +1239,11 @@ bool upb_baredecoder_run(upb_baredecoder *d) { upb_string_substr(str, d->input, d->offset, delim_len); upb_value v; upb_value_setstr(&v, str); - if(upb_dispatch_value(&d->dispatcher, &f, v) == BEGIN_SUBMSG) { + upb_flow_t ret = upb_dispatch_value(&d->dispatcher, &f, v); + CHECK(ret); + if(ret == BEGIN_SUBMSG) { // Should deliver as a submessage instead. - upb_dispatch_startsubmsg(&d->dispatcher, &f); + CHECK(upb_dispatch_startsubmsg(&d->dispatcher, &f)); *(++top) = d->offset + delim_len; } else { d->offset += delim_len; @@ -1228,11 +1253,9 @@ bool upb_baredecoder_run(upb_baredecoder *d) { switch(wt) { case UPB_WIRE_TYPE_VARINT: upb_value_setraw(&v, upb_baredecoder_readv64(d)); - upb_dispatch_value(&d->dispatcher, &f, v); break; case UPB_WIRE_TYPE_64BIT: upb_value_setraw(&v, upb_baredecoder_readf64(d)); - upb_dispatch_value(&d->dispatcher, &f, v); break; case UPB_WIRE_TYPE_32BIT: upb_value_setraw(&v, upb_baredecoder_readf32(d)); @@ -1241,28 +1264,33 @@ bool upb_baredecoder_run(upb_baredecoder *d) { assert(false); abort(); } - upb_dispatch_value(&d->dispatcher, &f, v); + CHECK(upb_dispatch_value(&d->dispatcher, &f, v)); } } - upb_dispatch_endmsg(&d->dispatcher); - return true; + CHECK(upb_dispatch_endmsg(&d->dispatcher)); + printf("SUCCESS!!\n"); + upb_string_unref(str); + return; + +err: + upb_copyerr(status, d->dispatcher.top->handlers.status); + upb_printerr(d->dispatcher.top->handlers.status); + upb_printerr(status); + upb_string_unref(str); + printf("ERROR!!\n"); } static upb_baredecoder *upb_baredecoder_new(upb_string *str) { - //static upb_src_vtable vtbl = { - // (upb_src_getdef_fptr)&upb_baredecoder_getdef, - // (upb_src_getval_fptr)&upb_baredecoder_getval, - // (upb_src_getstr_fptr)&upb_baredecoder_getstr, - // (upb_src_skipval_fptr)&upb_baredecoder_skipval, - // (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, - // (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, - //}; + static upb_src_vtbl vtbl = { + &upb_baredecoder_sethandlers, + &upb_baredecoder_run, + }; upb_baredecoder *d = malloc(sizeof(*d)); + upb_src_init(&d->src, &vtbl); d->input = upb_string_getref(str); d->offset = 0; upb_dispatcher_init(&d->dispatcher); - //upb_src_init(&d->src, &vtbl); return d; } diff --git a/core/upb_stream.h b/core/upb_stream.h index 66bfec2..cf01a5f 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -136,8 +136,8 @@ struct _upb_dispatcher; typedef struct _upb_dispatcher upb_dispatcher; INLINE void upb_dispatcher_init(upb_dispatcher *d); INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); -INLINE void upb_dispatch_startmsg(upb_dispatcher *d); -INLINE void upb_dispatch_endmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d); INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f); INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, @@ -151,8 +151,21 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, struct _upb_src; typedef struct _upb_src upb_src; -void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); -void upb_src_run(upb_src *src, upb_status *status); +// upb_src_sethandlers() must be called once and only once before upb_src_run() +// is called. This sets up the callbacks that will handle the parse. A +// upb_src that is fully initialized except for the call to +// upb_src_sethandlers() is called "prepared" -- this is useful for library +// functions that want to consume the output of a generic upb_src. +// Calling sethandlers() multiple times is an error and will trigger an abort(). +INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); + +// Runs the src, calling the callbacks that were registered with +// upb_src_sethandlers(), and returning the status of the operation in +// "status." The status might indicate UPB_TRYAGAIN (indicating EAGAIN on a +// non-blocking socket) or a resumable error; in both cases upb_src_run can be +// called again later. TRYAGAIN could come from either the src (input buffers +// are empty) or the handlers (output buffers are full). +INLINE void upb_src_run(upb_src *src, upb_status *status); /* upb_bytesrc ****************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index d017177..e462122 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -13,6 +13,7 @@ #include #include "upb_stream.h" +#include "upb_string.h" #ifdef __cplusplus extern "C" { @@ -21,10 +22,8 @@ extern "C" { // Typedefs for function pointers to all of the virtual functions. // upb_src -struct _upb_src { -}; -typedef struct { -} upb_src_vtbl; +typedef void (*upb_src_sethandlers_fptr)(upb_src *src, upb_handlers *handlers); +typedef void (*upb_src_run_fptr)(upb_src *src, upb_status *status); // upb_bytesrc. typedef upb_strlen_t (*upb_bytesrc_read_fptr)( @@ -42,42 +41,65 @@ typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( typedef struct { upb_bytesrc_read_fptr read; upb_bytesrc_getstr_fptr getstr; -} upb_bytesrc_vtable; +} upb_bytesrc_vtbl; typedef struct { upb_bytesink_write_fptr write; upb_bytesink_putstr_fptr putstr; -} upb_bytesink_vtable; +} upb_bytesink_vtbl; + +typedef struct { + upb_src_sethandlers_fptr sethandlers; + upb_src_run_fptr run; +} upb_src_vtbl; + // "Base Class" definitions; components that implement these interfaces should // contain one of these structures. struct _upb_bytesrc { - upb_bytesrc_vtable *vtbl; + upb_bytesrc_vtbl *vtbl; upb_status status; bool eof; }; struct _upb_bytesink { - upb_bytesink_vtable *vtbl; + upb_bytesink_vtbl *vtbl; upb_status status; bool eof; }; -INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtable *vtbl) { +struct _upb_src { + upb_src_vtbl *vtbl; +}; + +INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) { s->vtbl = vtbl; s->eof = false; upb_status_init(&s->status); } -INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { +INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) { s->vtbl = vtbl; s->eof = false; upb_status_init(&s->status); } +INLINE void upb_src_init(upb_src *s, upb_src_vtbl *vtbl) { + s->vtbl = vtbl; +} + // Implementation of virtual function dispatch. +// upb_src +INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers) { + src->vtbl->sethandlers(src, handlers); +} + +INLINE void upb_src_run(upb_src *src, upb_status *status) { + src->vtbl->run(src, status); +} + // upb_bytesrc INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, upb_strlen_t count) { @@ -152,7 +174,41 @@ INLINE bool upb_handlers_isempty(upb_handlers *h) { return !h->set && !h->closure; } +INLINE upb_flow_t upb_nop(void *closure) { + (void)closure; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_value_nop(void *closure, struct _upb_fielddef *f, upb_value val) { + (void)closure; + (void)f; + (void)val; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_startsubmsg_nop(void *closure, struct _upb_fielddef *f, + upb_handlers *delegate_to) { + (void)closure; + (void)f; + (void)delegate_to; + return UPB_CONTINUE; +} + +INLINE upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, + upb_value val) { + (void)closure; + (void)fieldnum; + (void)val; + return UPB_CONTINUE; +} + INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set) { + if (!set->startmsg) set->startmsg = &upb_nop; + if (!set->endmsg) set->endmsg = &upb_nop; + if (!set->value) set->value = &upb_value_nop; + if (!set->startsubmsg) set->startsubmsg = &upb_startsubmsg_nop; + if (!set->endsubmsg) set->endsubmsg = &upb_nop; + if (!set->unknownval) set->unknownval = &upb_unknownval_nop; h->set = set; } @@ -182,16 +238,19 @@ INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h) { d->top->handlers = *h; } -INLINE void upb_dispatch_startmsg(upb_dispatcher *d) { +INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d) { assert(d->stack == d->top); - d->top->handlers.set->startmsg(d->top->handlers.closure); + return d->top->handlers.set->startmsg(d->top->handlers.closure); } -INLINE void upb_dispatch_endmsg(upb_dispatcher *d) { +INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d) { assert(d->stack == d->top); - d->top->handlers.set->endmsg(d->top->handlers.closure); + return d->top->handlers.set->endmsg(d->top->handlers.closure); } +// TODO: several edge cases to fix: +// - delegated start returns UPB_BREAK, should replay the start on resume. +// - endsubmsg returns UPB_BREAK, should NOT replay the delegated endmsg. INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f) { upb_handlers handlers; @@ -203,17 +262,18 @@ INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, ++d->top; d->top->handlers = handlers; d->top->depth = 0; - d->top->handlers.set->startmsg(d->top->handlers.closure); - ret = UPB_CONTINUE; + ret = d->top->handlers.set->startmsg(d->top->handlers.closure); } - ++d->top->depth; + if (ret == UPB_CONTINUE) ++d->top->depth; upb_handlers_uninit(&handlers); return ret; } INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { + upb_flow_t ret; if (--d->top->depth == 0) { - d->top->handlers.set->endmsg(d->top->handlers.closure); + ret = d->top->handlers.set->endmsg(d->top->handlers.closure); + if (ret != UPB_CONTINUE) return ret; --d->top; } return d->top->handlers.set->endsubmsg(d->top->handlers.closure); diff --git a/core/upb_string.c b/core/upb_string.c index b243dfd..e9ff0d9 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -61,13 +61,13 @@ void _upb_string_free(upb_string *str) { free(str); } -upb_string *upb_string_tryrecycle(upb_string *str) { +void upb_string_recycle(upb_string **_str) { + upb_string *str = *_str; if(str && upb_atomic_read(&str->refcount) == 1) { str->ptr = NULL; upb_string_release(str); - return str; } else { - return upb_string_new(); + *_str = upb_string_new(); } } @@ -111,7 +111,7 @@ void upb_string_vprintf(upb_string *str, const char *format, va_list args) { // We don't care about the terminating NULL, but snprintf might // bail out of printing even other characters if it doesn't have // enough space to write the NULL also. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); buf = upb_string_getrwbuf(str, true_size + 1); vsnprintf(buf, true_size + 1, format, args); } diff --git a/core/upb_string.h b/core/upb_string.h index f82603b..1f4b20c 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -133,7 +133,7 @@ INLINE void upb_string_endread(upb_string *str) { (void)str; } // upb_src_getstr(str); // } // } -upb_string *upb_string_recycle(upb_string **str); +void upb_string_recycle(upb_string **str); // The options for setting the contents of a string. These may only be called // when a string is first created or recycled; once other functions have been diff --git a/tests/test_def.c b/tests/test_def.c index 732835d..5be0672 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -10,6 +10,7 @@ int main() { int count; upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY); for (int i = 0; i < count; i++) { + printf("Def with name: " UPB_STRFMT "\n", UPB_STRARG(defs[i]->fqname)); upb_def_unref(defs[i]); } free(defs); diff --git a/tests/test_string.c b/tests/test_string.c index 7c9ed02..6446806 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -23,7 +23,8 @@ static void test_static() { upb_string_unref(&static_upbstr); // Recycling a static string returns a new string (that can be modified). - upb_string *str = upb_string_tryrecycle(&static_upbstr); + upb_string *str = &static_upbstr; + upb_string_recycle(&str); assert(str != &static_upbstr); upb_string_unref(str); @@ -34,8 +35,9 @@ static void test_dynamic() { assert(str != NULL); upb_string_unref(str); - // Can also create a string by tryrecycle(NULL). - str = upb_string_tryrecycle(NULL); + // Can also create a string by recycle(NULL). + str = NULL; + upb_string_recycle(&str); assert(str != NULL); upb_strcpyc(str, static_str); @@ -45,7 +47,8 @@ static void test_dynamic() { assert(upb_streqlc(str, static_str)); upb_string_endread(str); - upb_string *str2 = upb_string_tryrecycle(str); + upb_string *str2 = str; + upb_string_recycle(&str2); // No other referents, so should return the same string. assert(str2 == str); @@ -58,7 +61,7 @@ static void test_dynamic() { // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); upb_string_substr(str, str2, 1, 2); assert(upb_string_len(str) == 2); assert(upb_string_len(str2) == 4); @@ -70,7 +73,7 @@ static void test_dynamic() { assert(upb_atomic_read(&str2->refcount) == 2); // Recycling str should eliminate the extra ref. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); assert(upb_atomic_read(&str2->refcount) == 1); // Resetting str should reuse its old data. @@ -80,7 +83,7 @@ static void test_dynamic() { // Resetting str to something very long should require new data to be // allocated. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); const char longstring[] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; upb_strcpyc(str, longstring); const char *robuf6 = upb_string_getrobuf(str); @@ -88,7 +91,7 @@ static void test_dynamic() { assert(upb_streqlc(str, longstring)); // Test printf. - str = upb_string_tryrecycle(str); + upb_string_recycle(&str); upb_string_printf(str, "Number: %d, String: %s", 5, "YO!"); assert(upb_streqlc(str, "Number: 5, String: YO!")); -- cgit v1.2.3 From c9df91b04a429f9324afeefece28f21e7078e3ac Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 22 Jan 2011 01:03:02 -0800 Subject: upb bootstraps again! and with no memory leaks! --- core/upb_def.c | 40 +++++++++++++++++----------------------- core/upb_def.h | 2 +- core/upb_stream_vtbl.h | 1 + core/upb_string.c | 1 + tests/test_def.c | 4 +--- tests/test_string.c | 11 +++++++++++ 6 files changed, 32 insertions(+), 27 deletions(-) diff --git a/core/upb_def.c b/core/upb_def.c index a935930..c21843e 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -429,7 +429,7 @@ typedef struct _upb_unresolveddef { static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { upb_unresolveddef *def = malloc(sizeof(*def)); upb_def_init(&def->base, UPB_DEF_UNRESOLVED); - def->name = str; + def->name = upb_string_getref(str); return def; } @@ -445,6 +445,7 @@ static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { static void upb_enumdef_free(upb_enumdef *e) { upb_enum_iter i; for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { + // Frees the ref taken when the string was parsed. upb_string_unref(upb_enum_iter_name(i)); } upb_strtable_free(&e->ntoi); @@ -468,7 +469,7 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, switch(f->number) { case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: upb_string_unref(b->name); - upb_string_getref(upb_value_getstr(val)); + b->name = upb_string_getref(upb_value_getstr(val)); b->saw_name = true; break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: @@ -495,6 +496,7 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { // We don't unref "name" because we pass our ref to the iton entry of the // table. strtables can ref their keys, but the inttable doesn't know that // the value is a string. + b->name = NULL; return UPB_CONTINUE; } @@ -641,7 +643,7 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { b->f->name = upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { - if(b->f->def) upb_def_unref(b->f->def); + upb_def_unref(b->f->def); b->f->def = UPB_UPCAST(upb_unresolveddef_new(upb_value_getstr(val))); b->f->owned = true; break; @@ -847,6 +849,7 @@ static upb_symtab_ent *upb_resolve(upb_strtable *t, return e; } else { // Remove components from base until we find an entry or run out. + // TODO: This branch is totally broken, but currently not used. upb_string *sym_str = upb_string_new(); int baselen = upb_string_len(base); while(1) { @@ -1212,21 +1215,14 @@ static void upb_baredecoder_run(upb_src *src, upb_status *status) { upb_baredecoder *d = (upb_baredecoder*)src; assert(!upb_handlers_isempty(&d->dispatcher.top->handlers)); upb_string *str = NULL; - upb_strlen_t stack[UPB_MAX_NESTING]; + upb_strlen_t stack[UPB_MAX_NESTING] = {UPB_STRLEN_MAX}; upb_strlen_t *top = &stack[0]; - *top = upb_string_len(d->input); d->offset = 0; #define CHECK(x) if (x != UPB_CONTINUE && x != BEGIN_SUBMSG) goto err; CHECK(upb_dispatch_startmsg(&d->dispatcher)); while(d->offset < upb_string_len(d->input)) { - // Detect end-of-submessage. - while(d->offset >= *top) { - CHECK(upb_dispatch_endsubmsg(&d->dispatcher)); - d->offset = *(top--); - } - uint32_t key = upb_baredecoder_readv64(d); upb_fielddef f; f.number = key >> 3; @@ -1266,22 +1262,22 @@ static void upb_baredecoder_run(upb_src *src, upb_status *status) { } CHECK(upb_dispatch_value(&d->dispatcher, &f, v)); } + // Detect end-of-submessage. + while(d->offset >= *top) { + CHECK(upb_dispatch_endsubmsg(&d->dispatcher)); + d->offset = *(top--); + } } CHECK(upb_dispatch_endmsg(&d->dispatcher)); - printf("SUCCESS!!\n"); upb_string_unref(str); return; err: upb_copyerr(status, d->dispatcher.top->handlers.status); - upb_printerr(d->dispatcher.top->handlers.status); - upb_printerr(status); upb_string_unref(str); - printf("ERROR!!\n"); } -static upb_baredecoder *upb_baredecoder_new(upb_string *str) -{ +static upb_baredecoder *upb_baredecoder_new(upb_string *str) { static upb_src_vtbl vtbl = { &upb_baredecoder_sethandlers, &upb_baredecoder_run, @@ -1294,19 +1290,16 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) return d; } -static void upb_baredecoder_free(upb_baredecoder *d) -{ +static void upb_baredecoder_free(upb_baredecoder *d) { upb_string_unref(d->input); free(d); } -static upb_src *upb_baredecoder_src(upb_baredecoder *d) -{ +static upb_src *upb_baredecoder_src(upb_baredecoder *d) { return &d->src; } -void upb_symtab_add_descriptorproto(upb_symtab *symtab) -{ +void upb_symtab_add_descriptorproto(upb_symtab *symtab) { // For the moment we silently decline to perform the operation if the symbols // already exist in the symtab. Revisit this when we have a better story // about whether syms in a table can be replaced. @@ -1329,4 +1322,5 @@ void upb_symtab_add_descriptorproto(upb_symtab *symtab) upb_symtab_unref(symtab); abort(); } + upb_status_uninit(&status); } diff --git a/core/upb_def.h b/core/upb_def.h index 9eb961a..d9bab97 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -77,7 +77,7 @@ INLINE void upb_def_ref(upb_def *def) { if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def); } INLINE void upb_def_unref(upb_def *def) { - if(upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); + if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); } /* upb_fielddef ***************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index e462122..fd71b2d 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -275,6 +275,7 @@ INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) { ret = d->top->handlers.set->endmsg(d->top->handlers.closure); if (ret != UPB_CONTINUE) return ret; --d->top; + assert(d->top >= d->stack); } return d->top->handlers.set->endsubmsg(d->top->handlers.closure); } diff --git a/core/upb_string.c b/core/upb_string.c index e9ff0d9..c599728 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -67,6 +67,7 @@ void upb_string_recycle(upb_string **_str) { str->ptr = NULL; upb_string_release(str); } else { + upb_string_unref(str); *_str = upb_string_new(); } } diff --git a/tests/test_def.c b/tests/test_def.c index 5be0672..2d2658f 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -10,13 +10,10 @@ int main() { int count; upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY); for (int i = 0; i < count; i++) { - printf("Def with name: " UPB_STRFMT "\n", UPB_STRARG(defs[i]->fqname)); upb_def_unref(defs[i]); } free(defs); - printf("Size: %zd\n", sizeof(upb_ntof_ent)); - upb_string *str = upb_strdupc("google.protobuf.FileDescriptorSet"); upb_def *fds = upb_symtab_lookup(s, str); assert(fds != NULL); @@ -24,4 +21,5 @@ int main() { upb_def_unref(fds); upb_string_unref(str); upb_symtab_unref(s); + return 0; } diff --git a/tests/test_string.c b/tests/test_string.c index 6446806..ef0e2a9 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -40,6 +40,17 @@ static void test_dynamic() { upb_string_recycle(&str); assert(str != NULL); + // Take a ref and recycle; should create a new string and release a ref + // on the old one. + upb_string *strcp = upb_string_getref(str); + assert(strcp == str); + assert(upb_atomic_read(&str->refcount) == 2); + upb_string_recycle(&str); + assert(strcp != str); + assert(upb_atomic_read(&str->refcount) == 1); + assert(upb_atomic_read(&strcp->refcount) == 1); + upb_string_unref(strcp); + upb_strcpyc(str, static_str); assert(upb_string_len(str) == (sizeof(static_str) - 1)); const char *robuf = upb_string_getrobuf(str); -- cgit v1.2.3 From 2ea9737e5d5b085eef6fe762c7e86a7161b9d231 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 22 Jan 2011 01:06:19 -0800 Subject: Added test_stream.c for testing upb_stream.h. --- Makefile | 2 +- tests/test_stream.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 tests/test_stream.c diff --git a/Makefile b/Makefile index af79363..04779c0 100644 --- a/Makefile +++ b/Makefile @@ -113,7 +113,7 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ tests/test_def \ -# tests/test_stream \ + tests/test_stream \ # tests/test_decoder \ # tests/t.test_vs_proto2.googlemessage1 \ # tests/t.test_vs_proto2.googlemessage2 \ diff --git a/tests/test_stream.c b/tests/test_stream.c new file mode 100644 index 0000000..b6d511c --- /dev/null +++ b/tests/test_stream.c @@ -0,0 +1,127 @@ + +#undef NDEBUG /* ensure tests always assert. */ +#include "upb_stream.h" +#include "upb_string.h" + +typedef struct { + upb_string *str; + bool should_delegate; +} test_data; + +extern upb_handlerset test_handlers; + +static void strappendf(upb_string *s, const char *format, ...) { + upb_string *str = upb_string_new(); + va_list args; + va_start(args, format); + upb_string_vprintf(str, format, args); + va_end(args); + upb_strcat(s, str); + upb_string_unref(str); +} + +static upb_flow_t startmsg(void *closure) { + test_data *d = closure; + strappendf(d->str, "startmsg\n"); + return UPB_CONTINUE; +} + +static upb_flow_t endmsg(void *closure) { + test_data *d = closure; + strappendf(d->str, "endmsg\n"); + return UPB_CONTINUE; +} + +static upb_flow_t value(void *closure, struct _upb_fielddef *f, upb_value val) { + (void)f; + test_data *d = closure; + strappendf(d->str, "value, %lld\n", upb_value_getint64(val)); + return UPB_CONTINUE; +} + +static upb_flow_t startsubmsg(void *closure, struct _upb_fielddef *f, + upb_handlers *delegate_to) { + (void)f; + test_data *d = closure; + strappendf(d->str, "startsubmsg\n"); + if (d->should_delegate) { + upb_register_handlerset(delegate_to, &test_handlers); + upb_set_handler_closure(delegate_to, closure, NULL); + return UPB_DELEGATE; + } else { + return UPB_CONTINUE; + } +} + +static upb_flow_t endsubmsg(void *closure) { + test_data *d = closure; + strappendf(d->str, "endsubmsg\n"); + return UPB_CONTINUE; +} + +static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum, + upb_value val) { + (void)val; + test_data *d = closure; + strappendf(d->str, "unknownval, %d\n", fieldnum); + return UPB_CONTINUE; +} + +upb_handlerset test_handlers = { + &startmsg, + &endmsg, + &value, + &startsubmsg, + &endsubmsg, + &unknownval, +}; + +static void test_dispatcher() { + test_data data; + data.should_delegate = false; + data.str = upb_string_new(); + upb_handlers h; + upb_handlers_init(&h); + upb_handlers_reset(&h); + upb_register_handlerset(&h, &test_handlers); + upb_set_handler_closure(&h, &data, NULL); + upb_dispatcher d; + upb_dispatcher_init(&d); + upb_dispatcher_reset(&d, &h); + + upb_dispatch_startmsg(&d); + upb_value val; + upb_value_setint64(&val, 5); + upb_dispatch_value(&d, NULL, val); + upb_dispatch_startsubmsg(&d, NULL); + data.should_delegate = true; + upb_dispatch_startsubmsg(&d, NULL); + data.should_delegate = false; + upb_dispatch_startsubmsg(&d, NULL); + upb_dispatch_value(&d, NULL, val); + upb_dispatch_endsubmsg(&d); + upb_dispatch_endsubmsg(&d); + upb_dispatch_endsubmsg(&d); + upb_dispatch_endmsg(&d); + + upb_string expected = UPB_STACK_STRING( + "startmsg\n" + "value, 5\n" + "startsubmsg\n" + "startsubmsg\n" + "startmsg\n" // Because of the delegation. + "startsubmsg\n" + "value, 5\n" + "endsubmsg\n" + "endmsg\n" // Because of the delegation. + "endsubmsg\n" + "endsubmsg\n" + "endmsg\n"); + assert(upb_streql(data.str, &expected)); + upb_string_unref(data.str); +} + +int main() { + test_dispatcher(); + return 0; +} -- cgit v1.2.3 From 5511aa16b02fd5fc1688b87f06ee09f4c8649f06 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 22 Jan 2011 23:31:33 -0800 Subject: Begin porting old decoder to new interfaces. Doesn't build yet. --- stream/upb_decoder.c | 815 +++++++++++++++++++-------------------------------- 1 file changed, 306 insertions(+), 509 deletions(-) diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index c35212e..b820b08 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -9,587 +9,384 @@ #include #include #include +#include "upb_def.h" -#define UPB_GROUP_END_OFFSET UINT32_MAX - -// Returns true if the give wire type and field type combination is valid, -// taking into account both packed and non-packed encodings. -static bool upb_check_type(upb_wire_type_t wt, upb_fielddef *f) { - // TODO: need to take into account the label; only repeated fields are - // allowed to use packed encoding. - return (1 << wt) & upb_types[f->type].allowed_wire_types; -} - -// Performs zig-zag decoding, which is used by sint32 and sint64. -static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } -static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } - - -/* upb_decoder ****************************************************************/ - -// The decoder keeps a stack with one entry per level of recursion. -// upb_decoder_frame is one frame of that stack. -typedef struct { - upb_msgdef *msgdef; - upb_strlen_t end_offset; // For groups, UPB_GROUP_END_OFFSET. -} upb_decoder_frame; - -struct upb_decoder { - upb_src src; // upb_decoder is a upb_src. - - upb_msgdef *toplevel_msgdef; - upb_bytesrc *bytesrc; - - // The buffer of input data. NULL is equivalent to the empty string. - upb_string *buf; - - // Holds residual bytes when fewer than UPB_MAX_ENCODED_SIZE bytes remain. - uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; - - // The number of bytes we have yet to consume from "buf" or tmpbuf. This is - // always >= 0 unless we were just reset or are eof. - int32_t buf_bytesleft; - - // The offset within "buf" from where we are currently reading. This can be - // <0 if we are reading some residual bytes from the previous buffer, which - // are stored in tmpbuf and combined with bytes from "buf". - int32_t buf_offset; - - // The overall stream offset of the beginning of "buf". - uint32_t buf_stream_offset; - - // Wire type of the key we just read. - upb_wire_type_t wire_type; - - // Delimited length of the string field we are reading. - upb_strlen_t delimited_len; - - upb_strlen_t packed_end_offset; - - // Fielddef for the key we just read. - upb_fielddef *field; - - // We keep a stack of messages we have recursed into. - upb_decoder_frame *top, *limit, stack[UPB_MAX_NESTING]; -}; +/* Functions to read wire values. *********************************************/ +// These functions are internal to the decode, but might be moved into an +// internal header file if we at some point in the future opt to do code +// generation, because the generated code would want to inline these functions. +// The same applies to the functions to read .proto values below. -/* upb_decoder buffering. *****************************************************/ +const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end, + uint64_t *val, upb_status *status); -static upb_strlen_t upb_decoder_offset(upb_decoder *d) +// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). +INLINE const uint8_t *upb_get_v_uint64_t(const uint8_t *buf, const uint8_t *end, + uint64_t *val, upb_status *status) { - return d->buf_stream_offset + d->buf_offset; -} - -static bool upb_decoder_nextbuf(upb_decoder *d) -{ - assert(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE); - - // Copy residual bytes to temporary buffer. - if(d->buf_bytesleft > 0) { - memcpy(d->tmpbuf, upb_string_getrobuf(d->buf) + d->buf_offset, - d->buf_bytesleft); - } - - // Recycle old buffer. - if(d->buf) { - d->buf_offset -= upb_string_len(d->buf); - d->buf_stream_offset += upb_string_len(d->buf); - } - d->buf = upb_string_tryrecycle(d->buf); - - // Pull next buffer. - if(upb_bytesrc_get(d->bytesrc, d->buf, UPB_MAX_ENCODED_SIZE)) { - d->buf_bytesleft += upb_string_len(d->buf); - return true; + // We inline this common case (1-byte varints), if that fails we dispatch to + // the full (non-inlined) version. + if((*buf & 0x80) == 0) { + *val = *buf & 0x7f; + return buf + 1; } else { - return false; + return upb_get_v_uint64_t_full(buf, end, val, status); } } -static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) +// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit +// varint is not a true wire type. +INLINE const uint8_t *upb_get_v_uint32_t(const uint8_t *buf, const uint8_t *end, + uint32_t *val, upb_status *status) { - if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE && !upb_bytesrc_eof(d->bytesrc)) - upb_decoder_nextbuf(d); - - if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE) { - if(upb_bytesrc_eof(d->bytesrc) && d->buf_bytesleft > 0) { - // We're working through the last few bytes of the buffer. - } else if(upb_bytesrc_eof(d->bytesrc)) { - // End of stream, no more bytes left. - assert(d->buf_bytesleft == 0); - d->src.eof = true; - return NULL; - } else { - // We are short of bytes even though the bytesrc isn't EOF; must be error. - upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); - return NULL; - } - } - - if(d->buf_offset >= 0) { - // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE - // contiguous bytes, so we can read directly out of it. - *bytes = d->buf_bytesleft; - return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset; - } else { - // We need to accumulate UPB_MAX_ENCODED_SIZE bytes; len is how many we - // have so far. - upb_strlen_t len = -d->buf_offset; - if(d->buf) { - upb_strlen_t to_copy = - UPB_MIN(UPB_MAX_ENCODED_SIZE - len, upb_string_len(d->buf)); - memcpy(d->tmpbuf + len, upb_string_getrobuf(d->buf), to_copy); - len += to_copy; - } - // Pad the buffer out to UPB_MAX_ENCODED_SIZE. - memset(d->tmpbuf + len, 0x80, UPB_MAX_ENCODED_SIZE - len); - *bytes = len; - return d->tmpbuf; - } + uint64_t val64; + const uint8_t *ret = upb_get_v_uint64_t(buf, end, &val64, status); + *val = (uint32_t)val64; // Discard the high bits. + return ret; } -// Returns a pointer to a buffer of data that is at least UPB_MAX_ENCODED_SIZE -// bytes long. This buffer contains the next bytes in the stream (even if -// those bytes span multiple buffers). *bytes is set to the number of actual -// stream bytes that are available in the returned buffer. If -// *bytes < UPB_MAX_ENCODED_SIZE, the buffer is padded with 0x80 bytes. -// -// After the data has been read, upb_decoder_consume() should be called to -// indicate how many bytes were consumed. -static const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes) +// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). +INLINE const uint8_t *upb_get_f_uint32_t(const uint8_t *buf, const uint8_t *end, + uint32_t *val, upb_status *status) { - if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE && d->buf_offset >= 0) { - // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE - // contiguous bytes, so we can read directly out of it. - *bytes = d->buf_bytesleft; - return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset; - } else { - return upb_decoder_getbuf_full(d, bytes); + const uint8_t *uint32_end = buf + sizeof(uint32_t); + if(uint32_end > end) { + status->code = UPB_STATUS_NEED_MORE_DATA; + return end; } + memcpy(val, buf, sizeof(uint32_t)); + return uint32_end; } -static bool upb_decoder_consume(upb_decoder *d, uint32_t bytes) +// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). +INLINE const uint8_t *upb_get_f_uint64_t(const uint8_t *buf, const uint8_t *end, + uint64_t *val, upb_status *status) { - assert(bytes <= UPB_MAX_ENCODED_SIZE); - d->buf_offset += bytes; - d->buf_bytesleft -= bytes; - if(d->buf_offset < 0) { - // We still have residual bytes we have not consumed. - memmove(d->tmpbuf, d->tmpbuf + bytes, -d->buf_offset); - } - assert(d->buf_bytesleft >= 0); - - // Detect end-of-submessage. - if(upb_decoder_offset(d) >= d->top->end_offset) { - d->src.eof = true; + const uint8_t *uint64_end = buf + sizeof(uint64_t); + if(uint64_end > end) { + status->code = UPB_STATUS_NEED_MORE_DATA; + return end; } - - return true; + memcpy(val, buf, sizeof(uint64_t)); + return uint64_end; } -static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes) +INLINE const uint8_t *upb_skip_v_uint64_t(const uint8_t *buf, + const uint8_t *end, + upb_status *status) { - d->buf_offset += bytes; - d->buf_bytesleft -= bytes; - while(d->buf_bytesleft < 0) { - if(!upb_decoder_nextbuf(d)) return false; - } - - // Detect end-of-submessage. - if(upb_decoder_offset(d) >= d->top->end_offset) { - d->src.eof = true; + const uint8_t *const maxend = buf + 10; + uint8_t last = 0x80; + for(; buf < (uint8_t*)end && (last & 0x80); buf++) + last = *buf; + + if(buf >= end && buf <= maxend && (last & 0x80)) { + status->code = UPB_STATUS_NEED_MORE_DATA; + buf = end; + } else if(buf > maxend) { + status->code = UPB_ERROR_UNTERMINATED_VARINT; + buf = end; } - - return true; + return buf; } - -/* Functions to read wire values. *********************************************/ - -// Parses remining bytes of a 64-bit varint that has already had its first byte -// parsed. -INLINE bool upb_decoder_readv64(upb_decoder *d, uint32_t *low, uint32_t *high) +INLINE const uint8_t *upb_skip_f_uint32_t(const uint8_t *buf, + const uint8_t *end, + upb_status *status) { - upb_strlen_t bytes_available; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); - const uint8_t *start = buf; - if(!buf) return false; - - *high = 0; - uint32_t b; - b = *(buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 28; - *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; - - if(bytes_available >= 10) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Varint was unterminated " - "after 10 bytes, stream offset: %u", upb_decoder_offset(d)); - } else { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Stream ended in the middle " - "of a varint, stream offset: %u", upb_decoder_offset(d)); + const uint8_t *uint32_end = buf + sizeof(uint32_t); + if(uint32_end > end) { + status->code = UPB_STATUS_NEED_MORE_DATA; + return end; } - return false; - -done: - return upb_decoder_consume(d, buf - start); + return uint32_end; } -// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit -// varint is not a true wire type. -static bool upb_decoder_readv32(upb_decoder *d, uint32_t *val) +INLINE const uint8_t *upb_skip_f_uint64_t(const uint8_t *buf, + const uint8_t *end, + upb_status *status) { - uint32_t high; - if(!upb_decoder_readv64(d, val, &high)) return false; - - // We expect the high bits to be zero, except that signed 32-bit values are - // first sign-extended to be wire-compatible with 64 bits, in which case we - // expect the high bits to be all one. - // - // We could perform a slightly more sophisticated check by having the caller - // indicate whether a signed or unsigned value is being read. We could check - // that the high bits are all zeros for unsigned, and properly sign-extended - // for signed. - if(high != 0 && ~high != 0) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Read a 32-bit varint, but " - "the high bits contained data we should not truncate: " - "%ux, stream offset: %u", high, upb_decoder_offset(d)); - return false; + const uint8_t *uint64_end = buf + sizeof(uint64_t); + if(uint64_end > end) { + status->code = UPB_STATUS_NEED_MORE_DATA; + return end; } - return true; + return uint64_end; } -// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). Caller -// promises that 4 bytes are available at buf. -static bool upb_decoder_readf32(upb_decoder *d, uint32_t *val) -{ - upb_strlen_t bytes_available; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); - if(!buf) return false; - if(bytes_available < 4) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, - "Stream ended in the middle of a 32-bit value"); - return false; - } - memcpy(val, buf, 4); - // TODO: byte swap if big-endian. - return upb_decoder_consume(d, 4); -} +/* Functions to read .proto values. *******************************************/ -// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). Caller -// promises that 8 bytes are available at buf. -static bool upb_decoder_readf64(upb_decoder *d, uint64_t *val) -{ - upb_strlen_t bytes_available; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); - if(!buf) return false; - if(bytes_available < 8) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, - "Stream ended in the middle of a 64-bit value"); - return false; - } - memcpy(val, buf, 8); - // TODO: byte swap if big-endian. - return upb_decoder_consume(d, 8); -} +// Performs zig-zag decoding, which is used by sint32 and sint64. +INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } +INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } -// Returns the length of a varint (wire type: UPB_WIRE_TYPE_VARINT), allowing -// it to be easily skipped. Caller promises that 10 bytes are available at -// "buf". The function will return a maximum of 11 bytes before quitting. -static uint8_t upb_decoder_skipv64(upb_decoder *d) +// Parses a tag, places the result in *tag. +INLINE const uint8_t *decode_tag(const uint8_t *buf, const uint8_t *end, + upb_tag *tag, upb_status *status) { - uint32_t bytes_available; - const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available); - if(!buf) return false; - uint8_t i; - for(i = 0; i < 10 && buf[i] & 0x80; i++) - ; // empty loop body. - if(i > 10) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Unterminated varint."); - return false; - } - return upb_decoder_consume(d, i); + uint32_t tag_int; + const uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); + tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); + tag->field_number = tag_int >> 3; + return ret; } +// The decoder keeps a stack with one entry per level of recursion. +// upb_decoder_frame is one frame of that stack. +typedef struct { + upb_msgdef *msgdef; + upb_fielddef *field; + size_t end_offset; // For groups, 0. +} upb_decoder_frame; -/* upb_src implementation for upb_decoder. ************************************/ +struct upb_decoder { + // Immutable state of the decoder. + upb_src src; + upb_dispatcher dispatcher; + upb_msgdef *toplevel_msgdef; + upb_decoder_frame stack[UPB_MAX_NESTING]; -bool upb_decoder_skipval(upb_decoder *d); + // Mutable state of the decoder. -upb_fielddef *upb_decoder_getdef(upb_decoder *d) -{ - if (d->src.eof) return NULL; - // Handles the packed field case. - if(d->field) { - return d->field; - } + // Where we will store any errors that occur. + upb_status *status; + + // Stack entries store the offset where the submsg ends (for groups, 0). + upb_decoder_frame *top, *limit; + + // Current input buffer. + upb_string *buf; + + // The offset within the overall stream represented by the *beginning* of buf. + upb_strlen_t buf_stream_offset; + + // Our current offset *within* buf. Will be negative if we are buffering + // from previous buffers in tmpbuf. + upb_strlen_t buf_offset; + + // Holds any bytes we have from previous buffers. The number of bytes we + // have encoded here is -buf_offset, if buf_offset<0, 0 otherwise. + uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; +}; + +upb_flow_t upb_decode_varint(upb_decoder *d, ptrs *p, + uint32_t *low, uint32_t *high) { + if (p->end - p->ptr > UPB_MAX_ENCODED_SIZE) { + // Fast path; we know we have a complete varint in our existing buffer. + *high = 0; + uint32_t b; + uint8_t *ptr = p->ptr; + b = *(buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(buf++); *low |= (b & 0x7f) << 28; + *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + + if(bytes_available >= 10) { + upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Varint was unterminated " + "after 10 bytes, stream offset: %u", upb_decoder_offset(d)); + return false; + } - uint32_t key = 0; -again: - if(!upb_decoder_readv32(d, &key)) return NULL; - upb_wire_type_t wire_type = key & 0x7; - int32_t field_number = key >> 3; - - if(wire_type == UPB_WIRE_TYPE_DELIMITED) { - // For delimited wire values we parse the length now, since we need it in - // all cases. - if(!upb_decoder_readv32(d, &d->delimited_len)) return NULL; - } else if(wire_type == UPB_WIRE_TYPE_END_GROUP) { - if(d->top->end_offset == UPB_GROUP_END_OFFSET) { - d->src.eof = true; + done: + p->ptr = ptr; + } else { + // Slow path: we may have to combine one or more buffers to get a whole + // varint worth of data. + uint8_t buf[UPB_MAX_ENCODED_SIZE]; + uint8_t *p = buf, *end = buf + sizeof(buf); + for(ing bitpos = 0; p < end && getbyte(d, p) && (last & 0x80); p++, bitpos += 7) + *val |= ((uint64_t)((last = *p) & 0x7F)) << bitpos; + + if(d->status->code == UPB_EOF && (last & 0x80)) { + upb_seterr(status, UPB_ERROR, + "Provided data ended in the middle of a varint.\n"); + } else if(buf == maxend) { + upb_seterr(status, UPB_ERROR, + "Varint was unterminated after 10 bytes.\n"); } else { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "End group seen but current " - "message is not a group, byte offset: %zd", - upb_decoder_offset(d)); + // Success. + return; } - return NULL; + ungetbytes(d, buf, p - buf); } +} - // Look up field by tag number. - upb_fielddef *f = upb_msgdef_itof(d->top->msgdef, field_number); - - if (!f) { - // Unknown field. If/when the upb_src interface supports reporting - // unknown fields we will implement that here. - upb_decoder_skipval(d); - goto again; - } else if (!upb_check_type(wire_type, f)) { - // This is a recoverable error condition. We skip the value but also - // return NULL and report the error. - upb_decoder_skipval(d); - // TODO: better error message. - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Incorrect wire type.\n"); - return NULL; - } - d->field = f; - d->wire_type = wire_type; - return f; +static const void *get_msgend(upb_decoder *d) +{ + if(d->top->end_offset > 0) + return upb_string_getrobuf(d->buf) + (d->top->end_offset - d->buf_stream_offset); + else + return (void*)UINTPTR_MAX; // group. } -bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) +static bool isgroup(const void *submsg_end) { - switch(upb_types[d->field->type].native_wire_type) { - case UPB_WIRE_TYPE_VARINT: { - uint32_t low, high; - if(!upb_decoder_readv64(d, &low, &high)) return false; - uint64_t u64 = ((uint64_t)high << 32) | low; - if(d->field->type == UPB_TYPE(SINT64)) - *val.int64 = upb_zzdec_64(u64); - else - *val.uint64 = u64; - break; - } - case UPB_WIRE_TYPE_32BIT_VARINT: { - uint32_t u32; - if(!upb_decoder_readv32(d, &u32)) return false; - if(d->field->type == UPB_TYPE(SINT32)) - *val.int32 = upb_zzdec_32(u32); - else - *val.uint32 = u32; - break; - } - case UPB_WIRE_TYPE_64BIT: - if(!upb_decoder_readf64(d, val.uint64)) return false; - break; - case UPB_WIRE_TYPE_32BIT: - if(!upb_decoder_readf32(d, val.uint32)) return false; - break; - default: - upb_seterr(&d->src.status, UPB_STATUS_ERROR, - "Attempted to call getval on a group."); - return false; - } - // For a packed field where we have not reached the end, we leave the field - // in the decoder so we will return it again without parsing a key. - if(d->wire_type != UPB_WIRE_TYPE_DELIMITED || - upb_decoder_offset(d) >= d->packed_end_offset) { - d->field = NULL; - } - return true; + return submsg_end == (void*)UINTPTR_MAX; } -bool upb_decoder_getstr(upb_decoder *d, upb_string *str) { - // A string, bytes, or a length-delimited submessage. The latter isn't - // technically a string, but can be gotten as one to perform lazy parsing. - const int32_t total_len = d->delimited_len; - if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) { - // The entire string is inside our current buffer, so we can just - // return a substring of the buffer without copying. - upb_string_substr(str, d->buf, - upb_string_len(d->buf) - d->buf_bytesleft, - total_len); - upb_decoder_skipbytes(d, total_len); - } else { - // The string spans buffers, so we must copy from the residual buffer - // (if any bytes are there), then the buffer, and finally from the bytesrc. - uint8_t *ptr = (uint8_t*)upb_string_getrwbuf( - str, UPB_MIN(total_len, d->buf_bytesleft)); - int32_t len = 0; - if(d->buf_offset < 0) { - // Residual bytes we need to copy from tmpbuf. - memcpy(ptr, d->tmpbuf, -d->buf_offset); - len += -d->buf_offset; - } - if(d->buf) { - // Bytes from the buffer. - memcpy(ptr + len, upb_string_getrobuf(d->buf) + d->buf_offset, - upb_string_len(str) - len); - } - upb_decoder_skipbytes(d, upb_string_len(str)); - if(len < total_len) { - // Bytes from the bytesrc. - if(!upb_bytesrc_append(d->bytesrc, str, total_len - len)) { - upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); - return false; - } - // Have to advance this since the buffering layer of the decoder will - // never see these bytes. - d->buf_stream_offset += total_len - len; - } - } - d->field = NULL; - return true; +extern upb_wire_type_t upb_expected_wire_types[]; +// Returns true if wt is the correct on-the-wire type for ft. +INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { + // This doesn't currently support packed arrays. + return upb_types[ft].expected_wire_type == wt; } -static bool upb_decoder_skipgroup(upb_decoder *d); -bool upb_decoder_startmsg(upb_decoder *d) { - if(++d->top >= d->limit) { - upb_seterr(&d->src.status, UPB_ERROR_MAX_NESTING_EXCEEDED, +// Pushes a new stack frame for a submessage with the given len (which will +// be zero if the submessage is a group). +static const uint8_t *push(upb_decoder *d, const uint8_t *start, + uint32_t submsg_len, upb_fielddef *f, + upb_status *status) +{ + d->top->field = f; + d->top++; + if(d->top >= d->limit) { + upb_seterr(status, UPB_ERROR_MAX_NESTING_EXCEEDED, "Nesting exceeded maximum (%d levels)\n", UPB_MAX_NESTING); - return false; + return NULL; } upb_decoder_frame *frame = d->top; - if(d->field->type == UPB_TYPE(GROUP)) { - frame->end_offset = UPB_GROUP_END_OFFSET; - } else if (d->field->type == UPB_TYPE(MESSAGE)) { - frame->end_offset = upb_decoder_offset(d) + d->delimited_len; - } else { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, - "Tried to startmsg a non-msg field."); - } - frame->msgdef = upb_downcast_msgdef(d->field->def); - d->field = NULL; - return true; -} - -bool upb_decoder_endmsg(upb_decoder *d) { - if(d->top > d->stack) { - --d->top; - if(!d->src.eof) { - if(d->top->end_offset == UPB_GROUP_END_OFFSET) - upb_decoder_skipgroup(d); - else - upb_decoder_skipbytes(d, d->top->end_offset - upb_decoder_offset(d)); - } - // Detect end-of-submessage. - d->src.eof = upb_decoder_offset(d) >= d->top->end_offset; - return true; - } else { - return false; - } -} + frame->end_offset = d->completed_offset + submsg_len; + frame->msgdef = upb_downcast_msgdef(f->def); -bool upb_decoder_skipval(upb_decoder *d) { - upb_strlen_t bytes_to_skip; - d->field = NULL; - switch(d->wire_type) { - case UPB_WIRE_TYPE_VARINT: { - return upb_decoder_skipv64(d); - } - case UPB_WIRE_TYPE_START_GROUP: - if(!upb_decoder_startmsg(d)) return false; - if(!upb_decoder_skipgroup(d)) return false; - if(!upb_decoder_endmsg(d)) return false; - return true; - default: - // Including UPB_WIRE_TYPE_END_GROUP. - assert(false); - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Tried to skip an end group"); - return false; - case UPB_WIRE_TYPE_64BIT: - bytes_to_skip = 8; - break; - case UPB_WIRE_TYPE_32BIT: - bytes_to_skip = 4; - break; - case UPB_WIRE_TYPE_DELIMITED: - // Works for both string/bytes *and* submessages. - bytes_to_skip = d->delimited_len; - break; - } - return upb_decoder_skipbytes(d, bytes_to_skip); + upb_dispatch_startsubmsg(&d->dispatcher, f); + return get_msgend(d); } -static bool upb_decoder_skipgroup(upb_decoder *d) +// Pops a stack frame, returning a pointer for where the next submsg should +// end (or a pointer that is out of range for a group). +static const void *pop(upb_decoder *d, const uint8_t *start, upb_status *status) { - // This will be mututally recursive with upb_decoder_skipval() if the group - // has sub-groups. If we wanted to handle EAGAIN in the future, this - // approach would not work; we would need to track the group depth - // explicitly. - while(upb_decoder_getdef(d)) { - if(!upb_decoder_skipval(d)) return false; - } - // If we are at the end of the group like we want to be, then - // upb_decoder_getdef() returned NULL because of eof, not error. - if(!&d->src.eof) return false; - return true; + d->top--; + upb_dispatch_endsubmsg(&d->dispatcher); + return get_msgend(d); } -upb_src_vtable upb_decoder_src_vtbl = { - (upb_src_getdef_fptr)&upb_decoder_getdef, - (upb_src_getval_fptr)&upb_decoder_getval, - (upb_src_getstr_fptr)&upb_decoder_getstr, - (upb_src_skipval_fptr)&upb_decoder_skipval, - (upb_src_startmsg_fptr)&upb_decoder_startmsg, - (upb_src_endmsg_fptr)&upb_decoder_endmsg, -}; +void upb_decoder_run(upb_src *src, upb_status *status) { + // buf is our current offset, moves from start to end. + const uint8_t *buf = (uint8_t*)upb_string_getrobuf(str) + d->buf_offset; + const uint8_t *end = (uint8_t*)upb_string_getrobuf(str) + upb_string_len(str); + const uint8_t *submsg_end = get_msgend(d, start); + upb_msgdef *msgdef = d->top->msgdef; + upb_string *str = NULL; + + // Main loop: executed once per tag/field pair. + while(1) { + // Parse/handle tag. + upb_tag tag; + CHECK(decode_tag(d, &buf, &end, &tag)); + + // Decode wire data. Hopefully this branch will predict pretty well + // since most types will read a varint here. + upb_value val; + switch (tag.wire_type) { + case UPB_WIRE_TYPE_END_GROUP: + if(!isgroup(submsg_end)) { + upb_seterr(status, UPB_STATUS_ERROR, "End group seen but current " + "message is not a group, byte offset: %zd", + d->completed_offset + (completed - start)); + goto err; + } + submsg_end = pop(d, start, status, &msgdef); + completed = buf; + goto check_msgend; + case UPB_WIRE_TYPE_VARINT: + case UPB_WIRE_TYPE_DELIMITED: + // For the delimited case we are parsing the length. + CHECK(upb_decode_varint(d, &buf, &end, &val)); + break; + case UPB_WIRE_TYPE_32BIT: + CHECK(upb_decode_32bit(d, &buf, &end, &val)); + break; + case UPB_WIRE_TYPE_64BIT: + CHECK(upb_decode_64bit(d, &buf, &end, &val)); + break; + } + // Look up field by tag number. + upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number); -/* upb_decoder construction/destruction. **************************************/ + if (!f) { + // Unknown field. + } else if (!upb_check_type(tag.wire_type, f->type)) { + // Field has incorrect type. + } -upb_decoder *upb_decoder_new(upb_msgdef *msgdef) -{ - upb_decoder *d = malloc(sizeof(*d)); - d->toplevel_msgdef = msgdef; - d->limit = &d->stack[UPB_MAX_NESTING]; - d->buf = NULL; - upb_src_init(&d->src, &upb_decoder_src_vtbl); - return d; -} + // Perform any further massaging of the data now that we have the fielddef. + // Now we can distinguish strings from submessages, and we know about + // zig-zag-encoded types. + // TODO: handle packed encoding. + switch (f->type) { + case UPB_TYPE(MESSAGE): + case UPB_TYPE(GROUP): + CHECK(push(d, start, upb_value_getint32(val), f, status, &msgdef)); + goto check_msgend; + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + CHECK(upb_decode_string(d, str, upb_value_getint32(val))); + upb_value_setstr(&val, str); + break; + case UPB_TYPE(SINT32): + upb_value_setint32(&val, upb_zzdec_32(upb_value_getint32(val))); + break; + case UPB_TYPE(SINT64): + upb_value_setint64(&val, upb_zzdec_64(upb_value_getint64(val))); + break; + default: + // Other types need no further processing at this point. + } + CHECK(upb_dispatch_value(d->sink, f, val, status)); + +check_msgend: + while(buf >= submsg_end) { + if(buf > submsg_end) { + upb_seterr(status, UPB_ERROR, "Expected submsg end offset " + "did not lie on a tag/value boundary."); + goto err; + } + submsg_end = pop(d, start, status, &msgdef); + } + completed = buf; + } -void upb_decoder_free(upb_decoder *d) -{ - upb_string_unref(d->buf); - free(d); +err: + read = (char*)completed - (char*)start; + d->completed_offset += read; + return read; } -void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) -{ - upb_string_unref(d->buf); +void upb_decoder_sethandlers(upb_src *src, upb_handlers *handlers) { + upb_decoder *d = (upb_decoder*)src; + upb_dispatcher_reset(&d->dispatcher, handlers); d->top = d->stack; + d->completed_offset = 0; d->top->msgdef = d->toplevel_msgdef; // The top-level message is not delimited (we can keep receiving data for it - // indefinitely), so we set the end offset as high as possible, but not equal - // to UINT32_MAX so it doesn't equal UPB_GROUP_END_OFFSET. - d->top->end_offset = UINT32_MAX - 1; - d->src.eof = false; - d->bytesrc = bytesrc; - d->field = NULL; - d->buf = NULL; - d->buf_bytesleft = 0; - d->buf_stream_offset = 0; - d->buf_offset = 0; + // indefinitely), so we treat it like a group. + d->top->end_offset = 0; } -upb_src *upb_decoder_src(upb_decoder *d) { - return &d->src; +upb_decoder *upb_decoder_new(upb_msgdef *msgdef) { + static upb_src_vtbl vtbl = { + &upb_decoder_sethandlers, + &upb_decoder_run, + }; + upb_decoder *d = malloc(sizeof(*d)); + upb_src_init(&d->src, &vtbl); + upb_dispatcher_init(&d->dispatcher); + d->toplevel_msgdef = msgdef; + d->limit = &d->stack[UPB_MAX_NESTING]; + return d; +} + +void upb_decoder_free(upb_decoder *d) { + free(d); } -- cgit v1.2.3 From 58a70b55c62cfefcbe7a55a2fd41ee6b87c7256f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 23 Jan 2011 16:29:10 -0800 Subject: Decoder code structure is mostly in-place. --- core/upb_stream.h | 20 ++- core/upb_string.h | 57 ++++++-- stream/upb_decoder.c | 363 ++++++++++++++++++++++----------------------------- 3 files changed, 212 insertions(+), 228 deletions(-) diff --git a/core/upb_stream.h b/core/upb_stream.h index cf01a5f..54fd930 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -171,14 +171,18 @@ INLINE void upb_src_run(upb_src *src, upb_status *status); /* upb_bytesrc ****************************************************************/ // Reads up to "count" bytes into "buf", returning the total number of bytes -// read. If <0, indicates error (check upb_bytesrc_status for details). +// read. If 0, indicates error and puts details in "status". INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, - upb_strlen_t count); + upb_strlen_t count, upb_status *status); // Like upb_bytesrc_read(), but modifies "str" in-place, possibly aliasing -// existing string data (which avoids a copy). +// existing string data (which avoids a copy). On the other hand, if +// the data was *not* already in an existing string, this copies it into +// a upb_string, and if the data needs to be put in a specific range of +// memory (because eg. you need to put it into a different kind of string +// object) then upb_bytesrc_get() could be better. INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, - upb_strlen_t count); + upb_status *status); // A convenience function for getting all the remaining data in a upb_bytesrc // as a upb_string. Returns false and sets "status" if the operation fails. @@ -189,14 +193,6 @@ INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, return upb_bytesrc_getfullstr(upb_value_getbytesrc(val), str, status); } -// Returns the current error status for the stream. -// Note! The "eof" flag works like feof() in C; it cannot report end-of-file -// until a read has failed due to eof. It cannot preemptively tell you that -// the next call will fail due to eof. Since these are the semantics that C -// and UNIX provide, we're stuck with them if we want to support eg. stdio. -INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src); -INLINE bool upb_bytesrc_eof(upb_bytesrc *src); - /* upb_bytesink ***************************************************************/ diff --git a/core/upb_string.h b/core/upb_string.h index 1f4b20c..04c0ae9 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -3,26 +3,39 @@ * * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. * - * This file defines a simple string type. The overriding goal of upb_string - * is to avoid memcpy(), malloc(), and free() wheverever possible, while - * keeping both CPU and memory overhead low. Throughout upb there are - * situations where one wants to reference all or part of another string - * without copying. upb_string provides APIs for doing this. + * This file defines a simple string type which is length-delimited instead + * of NULL-terminated, and which has useful sharing semantics. + * + * The overriding goal of upb_string is to avoid memcpy(), malloc(), and free() + * wheverever possible, while keeping both CPU and memory overhead low. + * Throughout upb there are situations where one wants to reference all or part + * of another string without copying. upb_string provides APIs for doing this. * * Characteristics of upb_string: * - strings are reference-counted. - * - strings are logically immutable. + * - strings are immutable (can be mutated only when first created or recycled). * - if a string has no other referents, it can be "recycled" into a new string * without having to reallocate the upb_string. * - strings can be substrings of other strings (owning a ref on the source * string). - * - strings are not thread-safe by default, but can be made so by calling a - * function. This is not the default because it causes extra CPU overhead. * * Reference-counted strings have recently fallen out of favor because of the * performance impacts of doing thread-safe reference counting with atomic * operations. We side-step this issue by not performing atomic operations * unless the string has been marked thread-safe. + * + * Strings are expected to be 8-bit-clean, but "char*" is such an entrenched + * idiom that we go with it instead of making our pointers uint8_t*. + * + * WARNING: THE GETREF, UNREF, AND RECYCLE OPERATIONS ARE NOT THREAD_SAFE + * UNLESS THE STRING HAS BEEN MARKED SYNCHRONIZED! What this means is that if + * you are logically passing a reference to a upb_string to another thread + * (which implies that the other thread must eventually call unref of recycle), + * you have two options: + * + * - create a copy of the string that will be used in the other thread only. + * - call upb_string_get_synchronized_ref(), which will make getref, unref, and + * recycle thread-safe for this upb_string. */ #ifndef UPB_STRING_H @@ -83,10 +96,12 @@ struct _upb_string { // longer needed, it should be unref'd, never freed directly. upb_string *upb_string_new(); +// Internal-only; clients should call upb_string_unref(). void _upb_string_free(upb_string *str); // Releases a ref on the given string, which may free the memory. "str" -// can be NULL, in which case this is a no-op. +// can be NULL, in which case this is a no-op. WARNING: NOT THREAD_SAFE +// UNLESS THE STRING IS SYNCHRONIZED. INLINE void upb_string_unref(upb_string *str) { if (str && upb_atomic_read(&str->refcount) > 0 && upb_atomic_unref(&str->refcount)) { @@ -98,6 +113,7 @@ upb_string *upb_strdup(upb_string *s); // Forward-declare. // Returns a string with the same contents as "str". The caller owns a ref on // the returned string, which may or may not be the same object as "str. +// WARNING: NOT THREAD-SAFE UNLESS THE STRING IS SYNCHRONIZED! INLINE upb_string *upb_string_getref(upb_string *str) { int refcount = upb_atomic_read(&str->refcount); if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str); @@ -163,8 +179,11 @@ void upb_string_substr(upb_string *str, upb_string *target_str, // data. Waiting for a clear use case before actually implementing it. // // Makes the string "str" a reference to the given string data. The caller -// guarantees that the given string data will not change or be deleted until -// a matching call to upb_string_detach(). +// guarantees that the given string data will not change or be deleted until a +// matching call to upb_string_detach(), which may block until any concurrent +// readers have finished reading. upb_string_detach() preserves the contents +// of the string by copying the referenced data if there are any other +// referents. // void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); // void upb_string_detach(upb_string *str); @@ -207,6 +226,22 @@ void upb_string_substr(upb_string *str, upb_string *target_str, _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STACK) #define UPB_STACK_STRING_LEN(str, len) \ _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STACK) + +// A convenient way of specifying upb_strings as literals, like: +// +// upb_streql(UPB_STRLIT("expected"), other_str); +// +// However, this requires either C99 compound initializers or C++. +// Must ONLY be called with a string literal as its argument! +//#ifdef __cplusplus +//namespace upb { +//class String : public upb_string { +// // This constructor must ONLY be called with a string literal. +// String(const char *str) : upb_string(UPB_STATIC_STRING(str)) {} +//}; +//} +//#define UPB_STRLIT(str) upb::String(str) +//#endif #define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) /* upb_string library functions ***********************************************/ diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index b820b08..fbd7eba 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -11,127 +11,39 @@ #include #include "upb_def.h" -/* Functions to read wire values. *********************************************/ - -// These functions are internal to the decode, but might be moved into an -// internal header file if we at some point in the future opt to do code -// generation, because the generated code would want to inline these functions. -// The same applies to the functions to read .proto values below. - -const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status); - -// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). -INLINE const uint8_t *upb_get_v_uint64_t(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status) -{ - // We inline this common case (1-byte varints), if that fails we dispatch to - // the full (non-inlined) version. - if((*buf & 0x80) == 0) { - *val = *buf & 0x7f; - return buf + 1; - } else { - return upb_get_v_uint64_t_full(buf, end, val, status); - } +/* Pure Decoding **************************************************************/ + +// The key fast-path varint-decoding routine. There are a lot of possibilities +// for optimization/experimentation here. +INLINE bool upb_decode_varint_fast(uint8_t **buf, uint8_t *end, uint64_t &val, + upb_status *status) { + *high = 0; + uint32_t b; + uint8_t *ptr = p->ptr; + b = *(*buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(*buf++); *low |= (b & 0x7f) << 28; + *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(*buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + + upb_seterr(status, UPB_ERROR, "Unterminated varint"); + return false; +done: + return true; } -// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit -// varint is not a true wire type. -INLINE const uint8_t *upb_get_v_uint32_t(const uint8_t *buf, const uint8_t *end, - uint32_t *val, upb_status *status) -{ - uint64_t val64; - const uint8_t *ret = upb_get_v_uint64_t(buf, end, &val64, status); - *val = (uint32_t)val64; // Discard the high bits. - return ret; -} -// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). -INLINE const uint8_t *upb_get_f_uint32_t(const uint8_t *buf, const uint8_t *end, - uint32_t *val, upb_status *status) -{ - const uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - memcpy(val, buf, sizeof(uint32_t)); - return uint32_end; -} - -// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). -INLINE const uint8_t *upb_get_f_uint64_t(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status) -{ - const uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - memcpy(val, buf, sizeof(uint64_t)); - return uint64_end; -} - -INLINE const uint8_t *upb_skip_v_uint64_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *const maxend = buf + 10; - uint8_t last = 0x80; - for(; buf < (uint8_t*)end && (last & 0x80); buf++) - last = *buf; - - if(buf >= end && buf <= maxend && (last & 0x80)) { - status->code = UPB_STATUS_NEED_MORE_DATA; - buf = end; - } else if(buf > maxend) { - status->code = UPB_ERROR_UNTERMINATED_VARINT; - buf = end; - } - return buf; -} - -INLINE const uint8_t *upb_skip_f_uint32_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - return uint32_end; -} - -INLINE const uint8_t *upb_skip_f_uint64_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - return uint64_end; -} - -/* Functions to read .proto values. *******************************************/ +/* Decoding/Buffering of individual values ************************************/ // Performs zig-zag decoding, which is used by sint32 and sint64. INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } -// Parses a tag, places the result in *tag. -INLINE const uint8_t *decode_tag(const uint8_t *buf, const uint8_t *end, - upb_tag *tag, upb_status *status) -{ - uint32_t tag_int; - const uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); - tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); - tag->field_number = tag_int >> 3; - return ret; -} - // The decoder keeps a stack with one entry per level of recursion. // upb_decoder_frame is one frame of that stack. typedef struct { @@ -144,6 +56,7 @@ struct upb_decoder { // Immutable state of the decoder. upb_src src; upb_dispatcher dispatcher; + upb_bytesrc *bytesrc; upb_msgdef *toplevel_msgdef; upb_decoder_frame stack[UPB_MAX_NESTING]; @@ -158,66 +71,108 @@ struct upb_decoder { // Current input buffer. upb_string *buf; + // Our current offset *within* buf. + upb_strlen_t buf_offset; + // The offset within the overall stream represented by the *beginning* of buf. upb_strlen_t buf_stream_offset; +}; - // Our current offset *within* buf. Will be negative if we are buffering - // from previous buffers in tmpbuf. - upb_strlen_t buf_offset; +// Called only from the slow path, this function copies the next "len" bytes +// from the stream to "data", adjusting "buf" and "end" appropriately. +INLINE bool upb_getbuf(upb_decoder *d, void *data, size_t len, + uint8_t **buf, uint8_t **end) { + while (len > 0) { + memcpy(data, *buf, *end-*buf); + len -= (*end-*buf); + if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; + *buf = upb_string_getrobuf(d->buf); + *end = *buf + upb_string_len(d->buf); + } +} - // Holds any bytes we have from previous buffers. The number of bytes we - // have encoded here is -buf_offset, if buf_offset<0, 0 otherwise. - uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE]; -}; +// We use this path when we don't have UPB_MAX_ENCODED_SIZE contiguous bytes +// available in our current buffer. We don't inline this because we accept +// that it will be slow and we don't want to pay for two copies of it. +static bool upb_decode_varint_slow(upb_decoder *d) { + uint8_t buf[UPB_MAX_ENCODED_SIZE]; + uint8_t *p = buf, *end = buf + sizeof(buf); + for(int bitpos = 0; p < end && getbyte(d, p) && (last & 0x80); p++, bitpos += 7) + *val |= ((uint64_t)((last = *p) & 0x7F)) << bitpos; + + if(d->status->code == UPB_EOF && (last & 0x80)) { + upb_seterr(status, UPB_ERROR, + "Provided data ended in the middle of a varint.\n"); + } else if(buf == maxend) { + upb_seterr(status, UPB_ERROR, + "Varint was unterminated after 10 bytes.\n"); + } else { + // Success. + return; + } +} -upb_flow_t upb_decode_varint(upb_decoder *d, ptrs *p, - uint32_t *low, uint32_t *high) { - if (p->end - p->ptr > UPB_MAX_ENCODED_SIZE) { - // Fast path; we know we have a complete varint in our existing buffer. - *high = 0; - uint32_t b; - uint8_t *ptr = p->ptr; - b = *(buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(buf++); *low |= (b & 0x7f) << 28; - *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; - - if(bytes_available >= 10) { - upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Varint was unterminated " - "after 10 bytes, stream offset: %u", upb_decoder_offset(d)); - return false; - } +INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_buf, + const uint8_t **end, upb_tag *tag) { + const uint8_t *buf = *_buf, *end = *_end; + uint32_t tag_int; + // Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048). + if (end - buf < 2) goto slow; // unlikely. + tag_int = *buf & 0x7f; + if ((*(buf++) & 0x80) == 0) goto done; // predictable if fields are in order + tag_int |= (*buf & 0x7f) << 7; + if ((*(buf++) & 0x80) != 0) goto slow; // unlikely. +slow: + if (!upb_decode_varint_slow(d, _buf, _end)) return false; + buf = *_buf; // Trick the next line into not overwriting us. +done: + *_buf = buf; + tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); + tag->field_number = tag_int >> 3; + return true; +} + +INLINE bool upb_decode_varint(upb_decoder *d, ptrs *p, + uint32_t *low, uint32_t *high) { + if (p->end - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) + return upb_decode_varint_fast(d); + else + return upb_decode_varint_slow(d); +} - done: - p->ptr = ptr; +INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, + uint8_t **buf, uint8_t **end, upb_value *val) { + static const char table = {0, 8, 0, 0, 0, 4}; + size_t bytes = table[wt]; + if (*end - *buf >= bytes) { + // Common (fast) case. + memcpy(&val, *buf, bytes); + *buf += bytes; } else { - // Slow path: we may have to combine one or more buffers to get a whole - // varint worth of data. - uint8_t buf[UPB_MAX_ENCODED_SIZE]; - uint8_t *p = buf, *end = buf + sizeof(buf); - for(ing bitpos = 0; p < end && getbyte(d, p) && (last & 0x80); p++, bitpos += 7) - *val |= ((uint64_t)((last = *p) & 0x7F)) << bitpos; - - if(d->status->code == UPB_EOF && (last & 0x80)) { - upb_seterr(status, UPB_ERROR, - "Provided data ended in the middle of a varint.\n"); - } else if(buf == maxend) { - upb_seterr(status, UPB_ERROR, - "Varint was unterminated after 10 bytes.\n"); - } else { - // Success. - return; - } - ungetbytes(d, buf, p - buf); + if (!upb_getbuf(d, &val, bytes, buf, end)) return false; + } + return true; +} + +// "val" initially holds the length of the string, this is replaced by the +// contents of the string. +INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) { + upb_string_recycle(str); + upb_strlen_t len = upb_valu_getint32(*val); + if (*end - *buf >= len) { + // Common (fast) case. + upb_string_substr(*str, d->buf, *buf - upb_string_getrobuf(d->buf), len); + *buf += len; + } else { + if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, buf, end)) + return false; } + return true; } + +/* The main decoding loop *****************************************************/ + static const void *get_msgend(upb_decoder *d) { if(d->top->end_offset > 0) @@ -238,36 +193,29 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { return upb_types[ft].expected_wire_type == wt; } - -// Pushes a new stack frame for a submessage with the given len (which will -// be zero if the submessage is a group). -static const uint8_t *push(upb_decoder *d, const uint8_t *start, +static bool upb_push(upb_decoder *d, const uint8_t *start, uint32_t submsg_len, upb_fielddef *f, upb_status *status) { d->top->field = f; d->top++; if(d->top >= d->limit) { - upb_seterr(status, UPB_ERROR_MAX_NESTING_EXCEEDED, - "Nesting exceeded maximum (%d levels)\n", - UPB_MAX_NESTING); - return NULL; + upb_seterr(status, UPB_ERROR, "Nesting too deep."); + return false; } - upb_decoder_frame *frame = d->top; - frame->end_offset = d->completed_offset + submsg_len; - frame->msgdef = upb_downcast_msgdef(f->def); - - upb_dispatch_startsubmsg(&d->dispatcher, f); - return get_msgend(d); + d->top->end_offset = d->completed_offset + submsg_len; + d->top->msgdef = upb_downcast_msgdef(f->def); + *submsg_end = get_msgend(d); + if (!upb_dispatch_startsubmsg(&d->dispatcher, f)) return false; + return true; } -// Pops a stack frame, returning a pointer for where the next submsg should -// end (or a pointer that is out of range for a group). -static const void *pop(upb_decoder *d, const uint8_t *start, upb_status *status) +static bool upb_pop(upb_decoder *d, const uint8_t *start, upb_status *status) { d->top--; upb_dispatch_endsubmsg(&d->dispatcher); - return get_msgend(d); + *submsg_end = get_msgend(d); + return true; } void upb_decoder_run(upb_src *src, upb_status *status) { @@ -278,11 +226,13 @@ void upb_decoder_run(upb_src *src, upb_status *status) { upb_msgdef *msgdef = d->top->msgdef; upb_string *str = NULL; + upb_dispatch_startmsg(&d->dispatcher); + // Main loop: executed once per tag/field pair. while(1) { // Parse/handle tag. upb_tag tag; - CHECK(decode_tag(d, &buf, &end, &tag)); + CHECK(upb_decode_tag(d, &buf, &end, &tag)); // Decode wire data. Hopefully this branch will predict pretty well // since most types will read a varint here. @@ -290,24 +240,19 @@ void upb_decoder_run(upb_src *src, upb_status *status) { switch (tag.wire_type) { case UPB_WIRE_TYPE_END_GROUP: if(!isgroup(submsg_end)) { - upb_seterr(status, UPB_STATUS_ERROR, "End group seen but current " - "message is not a group, byte offset: %zd", - d->completed_offset + (completed - start)); + upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } - submsg_end = pop(d, start, status, &msgdef); - completed = buf; - goto check_msgend; + CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); + goto check_msgend; // We have no value to dispatch. case UPB_WIRE_TYPE_VARINT: case UPB_WIRE_TYPE_DELIMITED: // For the delimited case we are parsing the length. CHECK(upb_decode_varint(d, &buf, &end, &val)); break; case UPB_WIRE_TYPE_32BIT: - CHECK(upb_decode_32bit(d, &buf, &end, &val)); - break; case UPB_WIRE_TYPE_64BIT: - CHECK(upb_decode_64bit(d, &buf, &end, &val)); + CHECK(upb_decode_fixed(d, tag.wire_type, &buf, &end, &val)); break; } @@ -315,24 +260,31 @@ void upb_decoder_run(upb_src *src, upb_status *status) { upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number); if (!f) { - // Unknown field. + if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED) + CHECK(upb_decode_string(d, &val, &str)); + CHECK(upb_dispatch_unknownval(d, tag.field_number, val)); } else if (!upb_check_type(tag.wire_type, f->type)) { - // Field has incorrect type. + // TODO: put more details in this error msg. + upb_seterr(status, UPB_ERROR, "Field had incorrect type."); + goto err; } // Perform any further massaging of the data now that we have the fielddef. // Now we can distinguish strings from submessages, and we know about // zig-zag-encoded types. // TODO: handle packed encoding. + // TODO: if we were being paranoid, we could check for 32-bit-varint types + // that the top 32 bits all match the highest bit of the low 32 bits. + // If this is not true we are losing data. But the main protobuf library + // doesn't check this, and it would slow us down, so pass for now. switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK(push(d, start, upb_value_getint32(val), f, status, &msgdef)); - goto check_msgend; + CHECK(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); + goto check_msgend; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): - CHECK(upb_decode_string(d, str, upb_value_getint32(val))); - upb_value_setstr(&val, str); + CHECK(upb_decode_string(d, &val, &str)); break; case UPB_TYPE(SINT32): upb_value_setint32(&val, upb_zzdec_32(upb_value_getint32(val))); @@ -341,26 +293,27 @@ void upb_decoder_run(upb_src *src, upb_status *status) { upb_value_setint64(&val, upb_zzdec_64(upb_value_getint64(val))); break; default: - // Other types need no further processing at this point. + break; // Other types need no further processing at this point. } CHECK(upb_dispatch_value(d->sink, f, val, status)); check_msgend: while(buf >= submsg_end) { if(buf > submsg_end) { - upb_seterr(status, UPB_ERROR, "Expected submsg end offset " - "did not lie on a tag/value boundary."); + upb_seterr(status, UPB_ERROR, "Bad submessage end.") goto err; } - submsg_end = pop(d, start, status, &msgdef); + CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); } - completed = buf; } + CHECK(upb_dispatch_endmsg(&d->dispatcher)); + return; + err: - read = (char*)completed - (char*)start; - d->completed_offset += read; - return read; + if (upb_ok(status)) { + upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK"); + } } void upb_decoder_sethandlers(upb_src *src, upb_handlers *handlers) { -- cgit v1.2.3 From fe659c8c93c464fcbcfb5739935a2e4341d01fd4 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 23 Jan 2011 18:59:31 -0800 Subject: Getting closer to a decoder that could actually compile and work. --- core/upb_stream.h | 7 +- core/upb_string.h | 6 ++ stream/upb_decoder.c | 207 +++++++++++++++++++++++++++------------------------ 3 files changed, 119 insertions(+), 101 deletions(-) diff --git a/core/upb_stream.h b/core/upb_stream.h index 54fd930..bf312a8 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -40,8 +40,11 @@ typedef enum { UPB_CONTINUE, // Stop processing for now; check status for details. If no status was set, - // a generic error will be returned. If the error is resumable, processing - // will resume by delivering this callback again. + // a generic error will be returned. If the error is resumable, it is not + // (yet) defined where processing will resume -- waiting for real-world + // examples of resumable decoders and resume-requiring clients. upb_src + // implementations that are not capable of resuming will override the return + // status to be non-resumable if a resumable status was set by the handlers. UPB_BREAK, // Skips to the end of the current submessage (or if we are at the top diff --git a/core/upb_string.h b/core/upb_string.h index 04c0ae9..1a7e06b 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -134,6 +134,12 @@ INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } INLINE void upb_string_endread(upb_string *str) { (void)str; } +// Convenience method for getting the end of the string. Calls +// upb_string_getrobuf() so inherits the caveats of calling that function. +INLINE const char *upb_string_getbufend(upb_string *str) { + return upb_string_getrobuf(str) + upb_string_len(str); +} + // Attempts to recycle the string "str" so it may be reused and have different // data written to it. After the function returns, "str" points to a writable // string, which is either the original string if it had no other references diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index fbd7eba..9a17451 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -13,23 +13,24 @@ /* Pure Decoding **************************************************************/ -// The key fast-path varint-decoding routine. There are a lot of possibilities -// for optimization/experimentation here. -INLINE bool upb_decode_varint_fast(uint8_t **buf, uint8_t *end, uint64_t &val, +// The key fast-path varint-decoding routine. Here we can assume we have at +// least UPB_MAX_ENCODED_SIZE bytes available. There are a lot of +// possibilities for optimization/experimentation here. +INLINE bool upb_decode_varint_fast(uint8_t **ptr, uint64_t &val, upb_status *status) { *high = 0; uint32_t b; uint8_t *ptr = p->ptr; - b = *(*buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(*buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(*buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(*buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(*buf++); *low |= (b & 0x7f) << 28; + b = *(*ptr++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(*ptr++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(*ptr++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(*ptr++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(*ptr++); *low |= (b & 0x7f) << 28; *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(*buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(*buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(*buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(*buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + b = *(*ptr++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(*ptr++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(*ptr++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(*ptr++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; upb_seterr(status, UPB_ERROR, "Unterminated varint"); return false; @@ -71,23 +72,51 @@ struct upb_decoder { // Current input buffer. upb_string *buf; - // Our current offset *within* buf. - upb_strlen_t buf_offset; - // The offset within the overall stream represented by the *beginning* of buf. upb_strlen_t buf_stream_offset; }; // Called only from the slow path, this function copies the next "len" bytes -// from the stream to "data", adjusting "buf" and "end" appropriately. -INLINE bool upb_getbuf(upb_decoder *d, void *data, size_t len, - uint8_t **buf, uint8_t **end) { - while (len > 0) { - memcpy(data, *buf, *end-*buf); - len -= (*end-*buf); - if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; - *buf = upb_string_getrobuf(d->buf); - *end = *buf + upb_string_len(d->buf); +// from the stream to "data", adjusting "buf" and "len" appropriately. +static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, + uint8_t **ptr, size_t *len) { + while (1) { + memcpy(data, *ptr, *len); + bytes_wanted -= *len; + *ptr += *len; + if (bytes_wanted == 0) return true; + + // Did "len" indicate end-of-submessage or end-of-buffer? + size_t buf_offset = d->buf ? (*ptr - upb_string_getrobuf(d->buf)) : 0; + if (d->top->end_offset > 0 && + d->top->end_offset == d->buf_stream_offset + buf_offset) { + // End-of-submessage. + if (bytes_wanted > 0) { + upb_seterr(d->status, UPB_ERROR, "Bad submessage end.") + return false; + } + if (upb_pop(d) != UPB_CONTINUE) return false; + } else { + // End-of-buffer. + if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); + if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; + *ptr = upb_string_getrobuf(d->buf); + } + + // Wait for end-of-submessage or end-of-buffer, whichever comes first. + size_t offset_in_buf = *ptr - upb_string_getrobuf(d->buf); + size_t buf_remaining = upb_string_getbufend(d->buf) - *ptr; + size_t submsg_remaining = + d->top->end_offset - d->buf_stream_offset - offset_in_buf; + if (d->top->end_offset == UPB_GROUP_END_OFFSET || + buf_remaining > submsg_remaining) { + *len = buf_remaining; + } else { + // Check that non of our subtraction overflowed. + assert(d->top->end_offset > d->buf_stream_offset); + assert(d->top->end_offset - d->buf_stream_offset > offset_in_buf); + *len = submsg_remaining; + } } } @@ -112,21 +141,21 @@ static bool upb_decode_varint_slow(upb_decoder *d) { } } -INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_buf, - const uint8_t **end, upb_tag *tag) { - const uint8_t *buf = *_buf, *end = *_end; +INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_ptr, + const uint8_t **len, upb_tag *tag) { + const uint8_t *ptr = *_ptr, *len = *_end; uint32_t tag_int; // Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048). - if (end - buf < 2) goto slow; // unlikely. - tag_int = *buf & 0x7f; - if ((*(buf++) & 0x80) == 0) goto done; // predictable if fields are in order - tag_int |= (*buf & 0x7f) << 7; - if ((*(buf++) & 0x80) != 0) goto slow; // unlikely. + if (len - ptr < 2) goto slow; // unlikely. + tag_int = *ptr & 0x7f; + if ((*(ptr++) & 0x80) == 0) goto done; // predictable if fields are in order + tag_int |= (*ptr & 0x7f) << 7; + if ((*(ptr++) & 0x80) != 0) goto slow; // unlikely. slow: - if (!upb_decode_varint_slow(d, _buf, _end)) return false; - buf = *_buf; // Trick the next line into not overwriting us. + if (!upb_decode_varint_slow(d, _ptr, _end)) return false; + ptr = *_ptr; // Trick the next line into not overwriting us. done: - *_buf = buf; + *_ptr = ptr; tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); tag->field_number = tag_int >> 3; return true; @@ -134,22 +163,22 @@ done: INLINE bool upb_decode_varint(upb_decoder *d, ptrs *p, uint32_t *low, uint32_t *high) { - if (p->end - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) + if (p->len - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) return upb_decode_varint_fast(d); else return upb_decode_varint_slow(d); } INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, - uint8_t **buf, uint8_t **end, upb_value *val) { + uint8_t **ptr, uint8_t **len, upb_value *val) { static const char table = {0, 8, 0, 0, 0, 4}; size_t bytes = table[wt]; - if (*end - *buf >= bytes) { + if (*len - *ptr >= bytes) { // Common (fast) case. - memcpy(&val, *buf, bytes); - *buf += bytes; + memcpy(&val, *ptr, bytes); + *ptr += bytes; } else { - if (!upb_getbuf(d, &val, bytes, buf, end)) return false; + if (!upb_getptr(d, &val, bytes, ptr, len)) return false; } return true; } @@ -159,12 +188,12 @@ INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) { upb_string_recycle(str); upb_strlen_t len = upb_valu_getint32(*val); - if (*end - *buf >= len) { + if (*len - *ptr >= len) { // Common (fast) case. - upb_string_substr(*str, d->buf, *buf - upb_string_getrobuf(d->buf), len); - *buf += len; + upb_string_substr(*str, d->buf, *ptr - upb_string_getrobuf(d->buf), len); + *ptr += len; } else { - if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, buf, end)) + if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, ptr, len)) return false; } return true; @@ -173,19 +202,6 @@ INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) /* The main decoding loop *****************************************************/ -static const void *get_msgend(upb_decoder *d) -{ - if(d->top->end_offset > 0) - return upb_string_getrobuf(d->buf) + (d->top->end_offset - d->buf_stream_offset); - else - return (void*)UINTPTR_MAX; // group. -} - -static bool isgroup(const void *submsg_end) -{ - return submsg_end == (void*)UINTPTR_MAX; -} - extern upb_wire_type_t upb_expected_wire_types[]; // Returns true if wt is the correct on-the-wire type for ft. INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { @@ -193,76 +209,78 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { return upb_types[ft].expected_wire_type == wt; } -static bool upb_push(upb_decoder *d, const uint8_t *start, - uint32_t submsg_len, upb_fielddef *f, - upb_status *status) -{ +static upb_flow_t upb_push(upb_decoder *d, upb_fielddef *f, + upb_strlen_t submsg_len, upb_field_type_t type) { d->top->field = f; d->top++; if(d->top >= d->limit) { upb_seterr(status, UPB_ERROR, "Nesting too deep."); - return false; + return UPB_ERROR; } - d->top->end_offset = d->completed_offset + submsg_len; + d->top->end_offset = type == UPB_TYPE(GROUP) ? + UPB_GROUP_END_OFFSET : d->completed_offset + submsg_len; d->top->msgdef = upb_downcast_msgdef(f->def); - *submsg_end = get_msgend(d); - if (!upb_dispatch_startsubmsg(&d->dispatcher, f)) return false; - return true; + return upb_dispatch_startsubmsg(&d->dispatcher, f); } -static bool upb_pop(upb_decoder *d, const uint8_t *start, upb_status *status) -{ +static upb_flow_t upb_pop(upb_decoder *d) { d->top--; - upb_dispatch_endsubmsg(&d->dispatcher); - *submsg_end = get_msgend(d); - return true; + return upb_dispatch_endsubmsg(&d->dispatcher); } void upb_decoder_run(upb_src *src, upb_status *status) { - // buf is our current offset, moves from start to end. - const uint8_t *buf = (uint8_t*)upb_string_getrobuf(str) + d->buf_offset; - const uint8_t *end = (uint8_t*)upb_string_getrobuf(str) + upb_string_len(str); - const uint8_t *submsg_end = get_msgend(d, start); - upb_msgdef *msgdef = d->top->msgdef; + // We use stack variables for our frequently used vars so the compiler knows + // they can't be changed by external code (like when we dispatch a callback). + + // Our current position in the data buffer. + uint8_t *ptr = NULL; + // Number of bytes available at ptr, until either end-of-buf or + // end-of-submessage (whichever is smaller). + size_t len = 0; + upb_string *str = NULL; - upb_dispatch_startmsg(&d->dispatcher); +// TODO: handle UPB_SKIPSUBMSG +#define CHECK_FLOW(expr) if ((expr) != UPB_CONTINUE) goto err +#define CHECK(expr) if (!expr) goto err; + + CHECK_FLOW(upb_dispatch_startmsg(&d->dispatcher)); // Main loop: executed once per tag/field pair. while(1) { // Parse/handle tag. upb_tag tag; - CHECK(upb_decode_tag(d, &buf, &end, &tag)); + CHECK(upb_decode_tag(d, &ptr, &len, &tag)); // Decode wire data. Hopefully this branch will predict pretty well // since most types will read a varint here. upb_value val; switch (tag.wire_type) { case UPB_WIRE_TYPE_END_GROUP: - if(!isgroup(submsg_end)) { + if(d->top->end_offset != UPB_GROUP_END_OFFSET) upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } - CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); - goto check_msgend; // We have no value to dispatch. + CHECK_FLOW(upb_pop(d)); + continue; // We have no value to dispatch. case UPB_WIRE_TYPE_VARINT: case UPB_WIRE_TYPE_DELIMITED: // For the delimited case we are parsing the length. - CHECK(upb_decode_varint(d, &buf, &end, &val)); + CHECK(upb_decode_varint(d, &ptr, &len, &val)); break; case UPB_WIRE_TYPE_32BIT: case UPB_WIRE_TYPE_64BIT: - CHECK(upb_decode_fixed(d, tag.wire_type, &buf, &end, &val)); + CHECK(upb_decode_fixed(d, tag.wire_type, &ptr, &len, &val)); break; } // Look up field by tag number. - upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number); + upb_fielddef *f = upb_msg_itof(d->top->msgdef, tag.field_number); if (!f) { if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED) CHECK(upb_decode_string(d, &val, &str)); - CHECK(upb_dispatch_unknownval(d, tag.field_number, val)); + CHECK_FLOW(upb_dispatch_unknownval(d, tag.field_number, val)); } else if (!upb_check_type(tag.wire_type, f->type)) { // TODO: put more details in this error msg. upb_seterr(status, UPB_ERROR, "Field had incorrect type."); @@ -280,8 +298,8 @@ void upb_decoder_run(upb_src *src, upb_status *status) { switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); - goto check_msgend; // We have no value to dispatch. + CHECK_FLOW(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); + continue; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): CHECK(upb_decode_string(d, &val, &str)); @@ -295,19 +313,10 @@ void upb_decoder_run(upb_src *src, upb_status *status) { default: break; // Other types need no further processing at this point. } - CHECK(upb_dispatch_value(d->sink, f, val, status)); - -check_msgend: - while(buf >= submsg_end) { - if(buf > submsg_end) { - upb_seterr(status, UPB_ERROR, "Bad submessage end.") - goto err; - } - CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); - } + CHECK_FLOW(upb_dispatch_value(d->sink, f, val, status)); } - CHECK(upb_dispatch_endmsg(&d->dispatcher)); + CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); return; err: -- cgit v1.2.3 From 93381f1411def0dba5677b71cd4df859d99777f3 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 24 Jan 2011 21:15:44 -0800 Subject: Decoder compiles again! But probably doesn't work. --- Makefile | 6 +- core/upb.c | 2 +- core/upb.h | 2 +- core/upb_def.c | 2 +- core/upb_stream_vtbl.h | 26 +++--- stream/upb_decoder.c | 241 +++++++++++++++++++++++++++++-------------------- 6 files changed, 160 insertions(+), 119 deletions(-) diff --git a/Makefile b/Makefile index 04779c0..46cb836 100644 --- a/Makefile +++ b/Makefile @@ -62,8 +62,8 @@ SRC=core/upb.c \ core/upb_string.c \ descriptor/descriptor.c \ core/upb_def.c \ + stream/upb_decoder.c \ # core/upb_msg.c \ -# stream/upb_decoder.c \ # stream/upb_stdio.c \ # stream/upb_strstream.c \ # stream/upb_textprinter.c @@ -74,9 +74,9 @@ OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. core/upb_def.o: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< + $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< core/upb_def.lo: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< -fPIC + $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC lang_ext/lua/upb.so: lang_ext/lua/upb.lo $(CC) $(CFLAGS) $(CPPFLAGS) -shared -o $@ $< core/libupb_pic.a diff --git a/core/upb.c b/core/upb.c index da2a0f0..ff2d47e 100644 --- a/core/upb.c +++ b/core/upb.c @@ -18,7 +18,7 @@ (1 << wire_type) | (allows_delimited << UPB_WIRE_TYPE_DELIMITED), \ #ctype}, -upb_type_info upb_types[] = { +const upb_type_info upb_types[] = { {0, 0, 0, 0, ""}, // There is no type 0. TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, 1) // DOUBLE TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, 1) // FLOAT diff --git a/core/upb.h b/core/upb.h index d394a08..7b228a0 100644 --- a/core/upb.h +++ b/core/upb.h @@ -101,7 +101,7 @@ typedef struct { } upb_type_info; // A static array of info about all of the field types, indexed by type number. -extern upb_type_info upb_types[]; +extern const upb_type_info upb_types[]; // The number of a field, eg. "optional string foo = 3". typedef int32_t upb_field_number_t; diff --git a/core/upb_def.c b/core/upb_def.c index c21843e..2eda89f 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -717,7 +717,7 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) { size_t max_align = 0; for (int i = 0; i < n; i++) { upb_fielddef *f = sorted_fields[i]; - upb_type_info *type_info = &upb_types[f->type]; + const upb_type_info *type_info = &upb_types[f->type]; // This identifies the set bit. When we implement is_initialized (a // general check about whether all required bits are set) we will probably diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index fd71b2d..ddefba9 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -27,9 +27,9 @@ typedef void (*upb_src_run_fptr)(upb_src *src, upb_status *status); // upb_bytesrc. typedef upb_strlen_t (*upb_bytesrc_read_fptr)( - upb_bytesrc *src, void *buf, upb_strlen_t count); + upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); typedef bool (*upb_bytesrc_getstr_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t count); + upb_bytesrc *src, upb_string *str, upb_status *status); // upb_bytesink. typedef upb_strlen_t (*upb_bytesink_write_fptr)( @@ -102,35 +102,31 @@ INLINE void upb_src_run(upb_src *src, upb_status *status) { // upb_bytesrc INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, - upb_strlen_t count) { - return src->vtbl->read(src, buf, count); + upb_strlen_t count, upb_status *status) { + return src->vtbl->read(src, buf, count, status); } INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, - upb_strlen_t count) { - return src->vtbl->getstr(src, str, count); + upb_status *status) { + return src->vtbl->getstr(src, str, status); } INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, upb_status *status) { // We start with a getstr, because that could possibly alias data instead of // copying. - if (!upb_bytesrc_getstr(src, str, UPB_STRLEN_MAX)) goto error; + if (!upb_bytesrc_getstr(src, str, status)) return false; // Trade-off between number of read calls and amount of overallocation. const size_t bufsize = 4096; - while (!upb_bytesrc_eof(src)) { + do { upb_strlen_t len = upb_string_len(str); char *buf = upb_string_getrwbuf(str, len + bufsize); - upb_strlen_t read = upb_bytesrc_read(src, buf + len, bufsize); - if (read < 0) goto error; + upb_strlen_t read = upb_bytesrc_read(src, buf + len, bufsize, status); + if (read < 0) return false; // Resize to proper size. upb_string_getrwbuf(str, len + read); - } + } while (!status->code != UPB_EOF); return true; - -error: - upb_copyerr(status, upb_bytesrc_status(src)); - return false; } INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 9a17451..b4b32ff 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -14,27 +14,27 @@ /* Pure Decoding **************************************************************/ // The key fast-path varint-decoding routine. Here we can assume we have at -// least UPB_MAX_ENCODED_SIZE bytes available. There are a lot of +// least UPB_MAX_VARINT_ENCODED_SIZE bytes available. There are a lot of // possibilities for optimization/experimentation here. -INLINE bool upb_decode_varint_fast(uint8_t **ptr, uint64_t &val, +INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *status) { - *high = 0; + uint32_t low, high = 0; uint32_t b; - uint8_t *ptr = p->ptr; - b = *(*ptr++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(*ptr++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(*ptr++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(*ptr++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(*ptr++); *low |= (b & 0x7f) << 28; - *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(*ptr++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(*ptr++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(*ptr++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(*ptr++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + b = *(*ptr++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(*ptr++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(*ptr++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(*ptr++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(*ptr++); low |= (b & 0x7f) << 28; + high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(*ptr++); high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(*ptr++); high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(*ptr++); high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(*ptr++); high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; upb_seterr(status, UPB_ERROR, "Unterminated varint"); return false; done: + *val = ((uint64_t)high << 32) | low; return true; } @@ -50,7 +50,7 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } typedef struct { upb_msgdef *msgdef; upb_fielddef *field; - size_t end_offset; // For groups, 0. + ssize_t end_offset; // For groups, 0. } upb_decoder_frame; struct upb_decoder { @@ -76,23 +76,50 @@ struct upb_decoder { upb_strlen_t buf_stream_offset; }; +typedef struct { + // Our current position in the data buffer. + const char *ptr; + + // Number of bytes available at ptr, until either end-of-buf or + // end-of-submessage (whichever is smaller). + size_t len; + + // Msgdef for the current level. + upb_msgdef *msgdef; +} upb_dstate; + +INLINE void upb_dstate_advance(upb_dstate *s, size_t len) { + s->ptr += len; + s->len -= len; +} + +static upb_flow_t upb_pop(upb_decoder *d); + +// Constant used to signal that the submessage is a group and therefore we +// don't know its end offset. This cannot be the offset of a real submessage +// end because it takes at least one byte to begin a submessage. +#define UPB_GROUP_END_OFFSET -1 +#define UPB_MAX_VARINT_ENCODED_SIZE 10 + // Called only from the slow path, this function copies the next "len" bytes // from the stream to "data", adjusting "buf" and "len" appropriately. static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, - uint8_t **ptr, size_t *len) { + upb_dstate *s) { while (1) { - memcpy(data, *ptr, *len); - bytes_wanted -= *len; - *ptr += *len; + size_t to_copy = UPB_MIN(bytes_wanted, s->len); + memcpy(data, s->ptr, to_copy); + upb_dstate_advance(s, to_copy); + bytes_wanted -= to_copy; if (bytes_wanted == 0) return true; // Did "len" indicate end-of-submessage or end-of-buffer? - size_t buf_offset = d->buf ? (*ptr - upb_string_getrobuf(d->buf)) : 0; + ssize_t buf_offset = + d->buf ? ((const char*)s->ptr - upb_string_getrobuf(d->buf)) : 0; if (d->top->end_offset > 0 && d->top->end_offset == d->buf_stream_offset + buf_offset) { // End-of-submessage. if (bytes_wanted > 0) { - upb_seterr(d->status, UPB_ERROR, "Bad submessage end.") + upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); return false; } if (upb_pop(d) != UPB_CONTINUE) return false; @@ -100,100 +127,121 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, // End-of-buffer. if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; - *ptr = upb_string_getrobuf(d->buf); + s->ptr = upb_string_getrobuf(d->buf); } // Wait for end-of-submessage or end-of-buffer, whichever comes first. - size_t offset_in_buf = *ptr - upb_string_getrobuf(d->buf); - size_t buf_remaining = upb_string_getbufend(d->buf) - *ptr; - size_t submsg_remaining = + ssize_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf); + ssize_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr; + ssize_t submsg_remaining = d->top->end_offset - d->buf_stream_offset - offset_in_buf; if (d->top->end_offset == UPB_GROUP_END_OFFSET || buf_remaining > submsg_remaining) { - *len = buf_remaining; + s->len = buf_remaining; } else { // Check that non of our subtraction overflowed. assert(d->top->end_offset > d->buf_stream_offset); assert(d->top->end_offset - d->buf_stream_offset > offset_in_buf); - *len = submsg_remaining; + s->len = submsg_remaining; } } } -// We use this path when we don't have UPB_MAX_ENCODED_SIZE contiguous bytes -// available in our current buffer. We don't inline this because we accept -// that it will be slow and we don't want to pay for two copies of it. -static bool upb_decode_varint_slow(upb_decoder *d) { - uint8_t buf[UPB_MAX_ENCODED_SIZE]; - uint8_t *p = buf, *end = buf + sizeof(buf); - for(int bitpos = 0; p < end && getbyte(d, p) && (last & 0x80); p++, bitpos += 7) - *val |= ((uint64_t)((last = *p) & 0x7F)) << bitpos; - - if(d->status->code == UPB_EOF && (last & 0x80)) { - upb_seterr(status, UPB_ERROR, - "Provided data ended in the middle of a varint.\n"); - } else if(buf == maxend) { - upb_seterr(status, UPB_ERROR, +// We use this path when we don't have UPB_MAX_VARINT_ENCODED_SIZE contiguous +// bytes available in our current buffer. We don't inline this because we +// accept that it will be slow and we don't want to pay for two copies of it. +static bool upb_decode_varint_slow(upb_decoder *d, upb_dstate *s, + upb_value *val) { + char byte = 0x80; + uint64_t val64 = 0; + int bitpos; + for(bitpos = 0; + bitpos < 70 && (byte & 0x80) && upb_getbuf(d, &byte, 1, s); + bitpos += 7) + val64 |= ((uint64_t)byte & 0x7F) << bitpos; + + if(bitpos == 70) { + upb_seterr(d->status, UPB_ERROR, "Varint was unterminated after 10 bytes.\n"); + return false; + } else if (d->status->code == UPB_EOF && (byte & 0x80)) { + upb_seterr(d->status, UPB_ERROR, + "Provided data ended in the middle of a varint.\n"); + return false; } else { // Success. - return; + upb_value_setint64(val, val64); + return true; } } -INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_ptr, - const uint8_t **len, upb_tag *tag) { - const uint8_t *ptr = *_ptr, *len = *_end; +typedef struct { + upb_wire_type_t wire_type; + upb_field_number_t field_number; +} upb_tag; + +INLINE bool upb_decode_tag(upb_decoder *d, upb_dstate *s, upb_tag *tag) { + const char *p = s->ptr; uint32_t tag_int; + upb_value val; // Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048). - if (len - ptr < 2) goto slow; // unlikely. - tag_int = *ptr & 0x7f; - if ((*(ptr++) & 0x80) == 0) goto done; // predictable if fields are in order - tag_int |= (*ptr & 0x7f) << 7; - if ((*(ptr++) & 0x80) != 0) goto slow; // unlikely. + if (s->len < 2) goto slow; // unlikely. + tag_int = *p & 0x7f; + if ((*(p++) & 0x80) == 0) goto done; // predictable if fields are in order + tag_int |= (*p & 0x7f) << 7; + if ((*(p++) & 0x80) == 0) goto done; // likely slow: - if (!upb_decode_varint_slow(d, _ptr, _end)) return false; - ptr = *_ptr; // Trick the next line into not overwriting us. + // Decode a full varint starting over from ptr. + if (!upb_decode_varint_slow(d, s, &val)) return false; + tag_int = upb_value_getint64(val); + p = s->ptr; // Trick the next line into not overwriting us. done: - *_ptr = ptr; + upb_dstate_advance(s, p - s->ptr); tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); tag->field_number = tag_int >> 3; return true; } -INLINE bool upb_decode_varint(upb_decoder *d, ptrs *p, - uint32_t *low, uint32_t *high) { - if (p->len - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) - return upb_decode_varint_fast(d); - else - return upb_decode_varint_slow(d); +INLINE bool upb_decode_varint(upb_decoder *d, upb_dstate *s, upb_value *val) { + if (s->len >= UPB_MAX_VARINT_ENCODED_SIZE) { + // Common (fast) case. + uint64_t val64; + const char *p = s->ptr; + if (!upb_decode_varint_fast(&p, &val64, d->status)) return false; + upb_dstate_advance(s, p - s->ptr); + upb_value_setint64(val, val64); + return true; + } else { + return upb_decode_varint_slow(d, s, val); + } } INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, - uint8_t **ptr, uint8_t **len, upb_value *val) { - static const char table = {0, 8, 0, 0, 0, 4}; + upb_dstate *s, upb_value *val) { + static const char table[] = {0, 8, 0, 0, 0, 4}; size_t bytes = table[wt]; - if (*len - *ptr >= bytes) { + if (s->len >= bytes) { // Common (fast) case. - memcpy(&val, *ptr, bytes); - *ptr += bytes; + memcpy(&val, s->ptr, bytes); + upb_dstate_advance(s, bytes); } else { - if (!upb_getptr(d, &val, bytes, ptr, len)) return false; + if (!upb_getbuf(d, &val, bytes, s)) return false; } return true; } // "val" initially holds the length of the string, this is replaced by the // contents of the string. -INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) { +INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str, + upb_dstate *s) { upb_string_recycle(str); - upb_strlen_t len = upb_valu_getint32(*val); - if (*len - *ptr >= len) { + uint32_t strlen = upb_value_getint32(*val); + if (s->len >= strlen) { // Common (fast) case. - upb_string_substr(*str, d->buf, *ptr - upb_string_getrobuf(d->buf), len); - *ptr += len; + upb_string_substr(*str, d->buf, s->ptr - upb_string_getrobuf(d->buf), strlen); + upb_dstate_advance(s, strlen); } else { - if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, ptr, len)) + if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen, s)) return false; } return true; @@ -204,21 +252,22 @@ INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) extern upb_wire_type_t upb_expected_wire_types[]; // Returns true if wt is the correct on-the-wire type for ft. -INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { +INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) { // This doesn't currently support packed arrays. - return upb_types[ft].expected_wire_type == wt; + return upb_types[ft].native_wire_type == wt; } -static upb_flow_t upb_push(upb_decoder *d, upb_fielddef *f, - upb_strlen_t submsg_len, upb_field_type_t type) { +static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, + upb_strlen_t submsg_len, upb_fieldtype_t type) { d->top->field = f; d->top++; if(d->top >= d->limit) { - upb_seterr(status, UPB_ERROR, "Nesting too deep."); + upb_seterr(d->status, UPB_ERROR, "Nesting too deep."); return UPB_ERROR; } - d->top->end_offset = type == UPB_TYPE(GROUP) ? - UPB_GROUP_END_OFFSET : d->completed_offset + submsg_len; + d->top->end_offset = (type == UPB_TYPE(GROUP)) ? + UPB_GROUP_END_OFFSET : + d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + submsg_len; d->top->msgdef = upb_downcast_msgdef(f->def); return upb_dispatch_startsubmsg(&d->dispatcher, f); } @@ -229,15 +278,11 @@ static upb_flow_t upb_pop(upb_decoder *d) { } void upb_decoder_run(upb_src *src, upb_status *status) { - // We use stack variables for our frequently used vars so the compiler knows - // they can't be changed by external code (like when we dispatch a callback). - - // Our current position in the data buffer. - uint8_t *ptr = NULL; - // Number of bytes available at ptr, until either end-of-buf or - // end-of-submessage (whichever is smaller). - size_t len = 0; - + upb_decoder *d = (upb_decoder*)src; + // We put our dstate on the stack so the compiler knows they can't be changed + // by external code (like when we dispatch a callback). We must be sure not + // to let its address escape this source file. + upb_dstate state = {NULL, 0, d->top->msgdef}; upb_string *str = NULL; // TODO: handle UPB_SKIPSUBMSG @@ -250,14 +295,14 @@ void upb_decoder_run(upb_src *src, upb_status *status) { while(1) { // Parse/handle tag. upb_tag tag; - CHECK(upb_decode_tag(d, &ptr, &len, &tag)); + CHECK(upb_decode_tag(d, &state, &tag)); // Decode wire data. Hopefully this branch will predict pretty well // since most types will read a varint here. upb_value val; switch (tag.wire_type) { case UPB_WIRE_TYPE_END_GROUP: - if(d->top->end_offset != UPB_GROUP_END_OFFSET) + if(d->top->end_offset != UPB_GROUP_END_OFFSET) { upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } @@ -266,21 +311,21 @@ void upb_decoder_run(upb_src *src, upb_status *status) { case UPB_WIRE_TYPE_VARINT: case UPB_WIRE_TYPE_DELIMITED: // For the delimited case we are parsing the length. - CHECK(upb_decode_varint(d, &ptr, &len, &val)); + CHECK(upb_decode_varint(d, &state, &val)); break; case UPB_WIRE_TYPE_32BIT: case UPB_WIRE_TYPE_64BIT: - CHECK(upb_decode_fixed(d, tag.wire_type, &ptr, &len, &val)); + CHECK(upb_decode_fixed(d, tag.wire_type, &state, &val)); break; } // Look up field by tag number. - upb_fielddef *f = upb_msg_itof(d->top->msgdef, tag.field_number); + upb_fielddef *f = upb_msgdef_itof(d->top->msgdef, tag.field_number); if (!f) { if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED) - CHECK(upb_decode_string(d, &val, &str)); - CHECK_FLOW(upb_dispatch_unknownval(d, tag.field_number, val)); + CHECK(upb_decode_string(d, &val, &str, &state)); + CHECK_FLOW(upb_dispatch_unknownval(&d->dispatcher, tag.field_number, val)); } else if (!upb_check_type(tag.wire_type, f->type)) { // TODO: put more details in this error msg. upb_seterr(status, UPB_ERROR, "Field had incorrect type."); @@ -298,11 +343,11 @@ void upb_decoder_run(upb_src *src, upb_status *status) { switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK_FLOW(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); + CHECK_FLOW(upb_push(d, &state, f, upb_value_getint32(val), f->type)); continue; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): - CHECK(upb_decode_string(d, &val, &str)); + CHECK(upb_decode_string(d, &val, &str, &state)); break; case UPB_TYPE(SINT32): upb_value_setint32(&val, upb_zzdec_32(upb_value_getint32(val))); @@ -313,7 +358,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { default: break; // Other types need no further processing at this point. } - CHECK_FLOW(upb_dispatch_value(d->sink, f, val, status)); + CHECK_FLOW(upb_dispatch_value(&d->dispatcher, f, val)); } CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); @@ -329,7 +374,7 @@ void upb_decoder_sethandlers(upb_src *src, upb_handlers *handlers) { upb_decoder *d = (upb_decoder*)src; upb_dispatcher_reset(&d->dispatcher, handlers); d->top = d->stack; - d->completed_offset = 0; + d->buf_stream_offset = 0; d->top->msgdef = d->toplevel_msgdef; // The top-level message is not delimited (we can keep receiving data for it // indefinitely), so we treat it like a group. -- cgit v1.2.3 From 2c24cbb108bbda296f01e7628028b1dcb2b9516b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 25 Jan 2011 10:07:47 -0800 Subject: More work on decoder and stdio bytesrc/bytesink. --- Makefile | 6 ++--- core/upb.c | 17 +++++-------- core/upb_stream.h | 16 +++++++----- stream/upb_decoder.c | 14 +++++++--- stream/upb_stdio.c | 66 +++++++++++++++++++++++------------------------- stream/upb_textprinter.c | 1 - stream/upb_textprinter.h | 3 +-- 7 files changed, 63 insertions(+), 60 deletions(-) diff --git a/Makefile b/Makefile index 46cb836..1dfd79d 100644 --- a/Makefile +++ b/Makefile @@ -63,10 +63,10 @@ SRC=core/upb.c \ descriptor/descriptor.c \ core/upb_def.c \ stream/upb_decoder.c \ + stream/upb_stdio.c \ + stream/upb_textprinter.c # core/upb_msg.c \ -# stream/upb_stdio.c \ # stream/upb_strstream.c \ -# stream/upb_textprinter.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. @@ -114,7 +114,7 @@ TESTS=tests/test_string \ tests/test_table \ tests/test_def \ tests/test_stream \ -# tests/test_decoder \ + tests/test_decoder \ # tests/t.test_vs_proto2.googlemessage1 \ # tests/t.test_vs_proto2.googlemessage2 \ # tests/test.proto.pb diff --git a/core/upb.c b/core/upb.c index ff2d47e..525c8a8 100644 --- a/core/upb.c +++ b/core/upb.c @@ -41,16 +41,13 @@ const upb_type_info upb_types[] = { }; void upb_seterr(upb_status *status, enum upb_status_code code, - const char *msg, ...) -{ - if(upb_ok(status)) { // The first error is the most interesting. - status->code = code; - upb_string_recycle(&status->str); - va_list args; - va_start(args, msg); - upb_string_vprintf(status->str, msg, args); - va_end(args); - } + const char *msg, ...) { + status->code = code; + upb_string_recycle(&status->str); + va_list args; + va_start(args, msg); + upb_string_vprintf(status->str, msg, args); + va_end(args); } void upb_copyerr(upb_status *to, upb_status *from) diff --git a/core/upb_stream.h b/core/upb_stream.h index bf312a8..d0045cc 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -178,12 +178,16 @@ INLINE void upb_src_run(upb_src *src, upb_status *status); INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); -// Like upb_bytesrc_read(), but modifies "str" in-place, possibly aliasing -// existing string data (which avoids a copy). On the other hand, if -// the data was *not* already in an existing string, this copies it into -// a upb_string, and if the data needs to be put in a specific range of -// memory (because eg. you need to put it into a different kind of string -// object) then upb_bytesrc_get() could be better. +// Like upb_bytesrc_read(), but modifies "str" in-place. "str" MUST be newly +// created or just recycled. Returns "false" if no data was returned, either +// due to error or EOF (check status for details). +// +// In comparison to upb_bytesrc_read(), this call can possibly alias existing +// string data (which avoids a copy). On the other hand, if the data was *not* +// already in an existing string, this copies it into a upb_string, and if the +// data needs to be put in a specific range of memory (because eg. you need to +// put it into a different kind of string object) then upb_bytesrc_get() could +// be better. INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, upb_status *status); diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index b4b32ff..e60915f 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -126,6 +126,7 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, } else { // End-of-buffer. if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); + upb_string_recycle(&d->buf); if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; s->ptr = upb_string_getrobuf(d->buf); } @@ -295,7 +296,15 @@ void upb_decoder_run(upb_src *src, upb_status *status) { while(1) { // Parse/handle tag. upb_tag tag; - CHECK(upb_decode_tag(d, &state, &tag)); + if (!upb_decode_tag(d, &state, &tag)) { + if (status->code == UPB_EOF && d->top == d->stack) { + // Normal end-of-file. + CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); + return; + } else { + goto err; + } + } // Decode wire data. Hopefully this branch will predict pretty well // since most types will read a varint here. @@ -361,9 +370,6 @@ void upb_decoder_run(upb_src *src, upb_status *status) { CHECK_FLOW(upb_dispatch_value(&d->dispatcher, f, val)); } - CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); - return; - err: if (upb_ok(status)) { upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK"); diff --git a/stream/upb_stdio.c b/stream/upb_stdio.c index 820399b..7923664 100644 --- a/stream/upb_stdio.c +++ b/stream/upb_stdio.c @@ -23,44 +23,42 @@ void upb_stdio_reset(upb_stdio *stdio, FILE* file) { stdio->file = file; } -static bool upb_stdio_read(upb_stdio *stdio, upb_string *str, - int offset, size_t bytes_to_read) { - char *buf = upb_string_getrwbuf(str, offset + bytes_to_read) + offset; - size_t read = fread(buf, 1, bytes_to_read, stdio->file); - if(read < bytes_to_read) { +static upb_strlen_t upb_stdio_read(upb_bytesrc *src, void *buf, + upb_strlen_t count, upb_status *status) { + upb_stdio *stdio = (upb_stdio*)src; + assert(count > 0); + size_t read = fread(buf, 1, count, stdio->file); + if(read < (size_t)count) { // Error or EOF. - stdio->bytesrc.eof = feof(stdio->file); - if(ferror(stdio->file)) { - upb_seterr(&stdio->bytesrc.status, UPB_STATUS_ERROR, - "Error reading from stdio stream."); - return false; + if(feof(stdio->file)) { + upb_seterr(status, UPB_EOF, ""); + return read; + } else if(ferror(stdio->file)) { + upb_seterr(status, UPB_ERROR, "Error reading from stdio stream."); + return -1; } - // Resize to actual read size. - upb_string_getrwbuf(str, offset + read); } - return true; + return read; } -bool upb_stdio_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen) { - // We ignore "minlen" since the stdio interfaces always return a full read - // unless they are at EOF. - (void)minlen; - return upb_stdio_read((upb_stdio*)src, str, 0, BLOCK_SIZE); -} - -bool upb_stdio_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len) { - return upb_stdio_read((upb_stdio*)src, str, upb_string_len(str), len); +static bool upb_stdio_getstr(upb_bytesrc *src, upb_string *str, + upb_status *status) { + upb_strlen_t read = upb_stdio_read( + src, upb_string_getrwbuf(str, BLOCK_SIZE), BLOCK_SIZE, status); + if (read <= 0) return false; + upb_string_getrwbuf(str, read); + return true; } int32_t upb_stdio_put(upb_bytesink *sink, upb_string *str) { upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); upb_strlen_t len = upb_string_len(str); - size_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); + upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); if(written < len) { // Error or EOF. stdio->bytesink.eof = feof(stdio->file); if(ferror(stdio->file)) { - upb_seterr(&stdio->bytesink.status, UPB_STATUS_ERROR, + upb_seterr(&stdio->bytesink.status, UPB_ERROR, "Error writing to stdio stream."); return 0; } @@ -68,19 +66,19 @@ int32_t upb_stdio_put(upb_bytesink *sink, upb_string *str) { return written; } -static upb_bytesrc_vtable upb_stdio_bytesrc_vtbl = { - (upb_bytesrc_get_fptr)upb_stdio_get, - (upb_bytesrc_append_fptr)upb_stdio_append, -}; +upb_stdio *upb_stdio_new() { + static upb_bytesrc_vtbl bytesrc_vtbl = { + upb_stdio_read, + upb_stdio_getstr, + }; -static upb_bytesink_vtable upb_stdio_bytesink_vtbl = { - upb_stdio_put -}; + //static upb_bytesink_vtbl bytesink_vtbl = { + // upb_stdio_put + //}; -upb_stdio *upb_stdio_new() { upb_stdio *stdio = malloc(sizeof(*stdio)); - upb_bytesrc_init(&stdio->bytesrc, &upb_stdio_bytesrc_vtbl); - upb_bytesink_init(&stdio->bytesink, &upb_stdio_bytesink_vtbl); + upb_bytesrc_init(&stdio->bytesrc, &bytesrc_vtbl); + //upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); return stdio; } diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 2d2e237..3a77ab1 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -12,7 +12,6 @@ #include "upb_string.h" struct _upb_textprinter { - upb_sink sink; upb_bytesink *bytesink; upb_string *str; int indent_depth; diff --git a/stream/upb_textprinter.h b/stream/upb_textprinter.h index 7e35412..b40d9fa 100644 --- a/stream/upb_textprinter.h +++ b/stream/upb_textprinter.h @@ -20,8 +20,7 @@ upb_textprinter *upb_textprinter_new(); void upb_textprinter_free(upb_textprinter *p); void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, bool single_line); - -upb_sink *upb_textprinter_sink(upb_textprinter *p); +void upb_textprinter_sethandlers(upb_textprinter *p, upb_handlers *h); #ifdef __cplusplus } /* extern "C" */ -- cgit v1.2.3 From 5af1ade5435807da065197c0c558947b34628d58 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 28 Jan 2011 10:11:25 -0800 Subject: More work on textprinter. --- stream/upb_textprinter.c | 143 +++++++++++++++++++++-------------------------- 1 file changed, 64 insertions(+), 79 deletions(-) diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 3a77ab1..2209173 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -13,119 +13,104 @@ struct _upb_textprinter { upb_bytesink *bytesink; - upb_string *str; int indent_depth; bool single_line; upb_fielddef *f; }; -static void upb_textprinter_endfield(upb_textprinter *p) +static void upb_textprinter_indent(upb_textprinter *p) { + if(!p->single_line) + for(int i = 0; i < p->indent_depth; i++) + upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); +} + +static void upb_textprinter_endfield(upb_textprinter *p) { if(p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); else upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); } -static bool upb_textprinter_putval(upb_textprinter *p, upb_value val) { - upb_bytesink_put(p->bytesink, UPB_STRLIT(": ")); - upb_enumdef *enum_def; - upb_string *enum_label; - if(p->f->type == UPB_TYPE(ENUM) && - (enum_def = upb_downcast_enumdef(p->f->def)) != NULL && - (enum_label = upb_enumdef_iton(enum_def, val.int32)) != NULL) { - // This is an enum value for which we found a corresponding string. - upb_bytesink_put(p->bytesink, enum_label); - } else { - p->str = upb_string_tryrecycle(p->str); -#define CASE(fmtstr, member) upb_string_printf(p->str, fmtstr, val.member); break; - switch(p->f->type) { - case UPB_TYPE(DOUBLE): - CASE("%0.f", _double); - case UPB_TYPE(FLOAT): - CASE("%0.f", _float) - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): - CASE("%" PRId64, int64) - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - CASE("%" PRIu64, uint64) - case UPB_TYPE(INT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - CASE("%" PRId32, int32) - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): - case UPB_TYPE(ENUM): - CASE("%" PRIu32, uint32); - case UPB_TYPE(BOOL): - CASE("%hhu", _bool); +static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, + upb_value val) { + upb_textprinter *p = _p; + upb_textprinter_indent(p); + upb_bytesink_printf(p->bytesink, UPB_STRFMT ": ", UPB_STRARG(f->name)); +#define CASE(fmtstr, member) upb_bytesink_printf(p->bytesink, fmtstr, val.member); break; + switch(p->f->type) { + case UPB_TYPE(DOUBLE): + CASE("%0.f", _double); + case UPB_TYPE(FLOAT): + CASE("%0.f", _float) + case UPB_TYPE(INT64): + case UPB_TYPE(SFIXED64): + case UPB_TYPE(SINT64): + CASE("%" PRId64, int64) + case UPB_TYPE(UINT64): + case UPB_TYPE(FIXED64): + CASE("%" PRIu64, uint64) + case UPB_TYPE(INT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(SINT32): + CASE("%" PRId32, int32) + case UPB_TYPE(UINT32): + case UPB_TYPE(FIXED32): + CASE("%" PRIu32, uint32); + case UPB_TYPE(ENUM): { + upb_enumdef *enum_def; + upb_string *enum_label; + (enum_def = upb_downcast_enumdef(p->f->def)) != NULL && + (enum_label = upb_enumdef_iton(enum_def, val.int32)) != NULL) { + // This is an enum value for which we found a corresponding string. + upb_bytesink_put(p->bytesink, enum_label); + CASE("%" PRIu32, uint32); } - upb_bytesink_put(p->bytesink, p->str); + case UPB_TYPE(BOOL): + CASE("%hhu", _bool); + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + upb_bytesink_put(p->bytesink, UPB_STRLIT(": \"")); + upb_bytesink_put(p->bytesink, str); + upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); + break; } upb_textprinter_endfield(p); - return upb_ok(upb_bytesink_status(p->bytesink)); -} - -static bool upb_textprinter_putstr(upb_textprinter *p, upb_string *str) { - upb_bytesink_put(p->bytesink, UPB_STRLIT(": \"")); - // TODO: escaping. - upb_bytesink_put(p->bytesink, str); - upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); - upb_textprinter_endfield(p); - return upb_ok(upb_bytesink_status(p->bytesink)); -} - -static void upb_textprinter_indent(upb_textprinter *p) -{ - if(!p->single_line) - for(int i = 0; i < p->indent_depth; i++) - upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); + return UPB_CONTINUE; } -static bool upb_textprinter_putdef(upb_textprinter *p, upb_fielddef *f) -{ - upb_textprinter_indent(p); - upb_bytesink_put(p->bytesink, f->name); - p->f = f; - return upb_ok(upb_bytesink_status(p->bytesink)); -} - -static bool upb_textprinter_startmsg(upb_textprinter *p) -{ +static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f) { + upb_textprinter *p = _p; + p->indent_depth++; upb_bytesink_put(p->bytesink, UPB_STRLIT(" {")); if(!p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); - p->indent_depth++; - return upb_ok(upb_bytesink_status(p->bytesink)); + return UPB_CONTINUE; } -static bool upb_textprinter_endmsg(upb_textprinter *p) +static upb_flow_t upb_textprinter_endsubmsg(void *_p) { + upb_textprinter *p = _p; p->indent_depth--; upb_textprinter_indent(p); upb_bytesink_put(p->bytesink, UPB_STRLIT("}")); upb_textprinter_endfield(p); - return upb_ok(upb_bytesink_status(p->bytesink)); + return UPB_CONTINUE; } -upb_sink_vtable upb_textprinter_vtbl = { - (upb_sink_putdef_fptr)upb_textprinter_putdef, - (upb_sink_putval_fptr)upb_textprinter_putval, - (upb_sink_putstr_fptr)upb_textprinter_putstr, - (upb_sink_startmsg_fptr)upb_textprinter_startmsg, - (upb_sink_endmsg_fptr)upb_textprinter_endmsg, -}; - upb_textprinter *upb_textprinter_new() { + static upb_handlerset handlers = { + NULL, // startmsg + NULL, // endmsg + upb_textprinter_putval, + upb_textprinter_startsubmsg, + upb_textprinter_endsubmsg, + }; upb_textprinter *p = malloc(sizeof(*p)); - upb_sink_init(&p->sink, &upb_textprinter_vtbl); - p->str = NULL; + upb_byte_init(&p->sink, &upb_textprinter_vtbl); return p; } void upb_textprinter_free(upb_textprinter *p) { - upb_string_unref(p->str); free(p); } -- cgit v1.2.3 From fbb9fd35e05b88908beeca2c2b88b15aec1fca01 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 28 Jan 2011 10:11:48 -0800 Subject: Improve comments in headers, to better explain core interfaces. --- core/upb_def.h | 9 ++-- core/upb_stream.h | 123 ++++++++++++++++++++++++++++++++----------------- core/upb_stream_vtbl.h | 2 +- core/upb_string.h | 7 ++- 4 files changed, 91 insertions(+), 50 deletions(-) diff --git a/core/upb_def.h b/core/upb_def.h index d9bab97..e95aec3 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -1,17 +1,18 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * Copyright (c) 2009-2011 Joshua Haberman. See LICENSE for details. * - * Provides definitions of .proto constructs: + * Provides a mechanism for loading proto definitions from descriptors, and + * data structures to represent those definitions. These form the protobuf + * schema, and are used extensively throughout upb: * - upb_msgdef: describes a "message" construct. * - upb_fielddef: describes a message field. * - upb_enumdef: describes an enum. * (TODO: definitions of extensions and services). * * Defs are obtained from a upb_symtab object. A upb_symtab is empty when - * constructed, and definitions can be added by supplying serialized - * descriptors. + * constructed, and definitions can be added by supplying descriptors. * * Defs are immutable and reference-counted. Symbol tables reference any defs * that are the "current" definitions. If an extension is loaded that adds a diff --git a/core/upb_stream.h b/core/upb_stream.h index d0045cc..09e4025 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -1,23 +1,46 @@ /* * upb - a minimalist implementation of protocol buffers. * - * This file defines four general-purpose streaming interfaces for protobuf - * data or bytes: + * This file defines four general-purpose streaming data interfaces. * - * - upb_src: pull interface for protobuf data. - * - upb_sink: push interface for protobuf data. - * - upb_bytesrc: pull interface for bytes. - * - upb_bytesink: push interface for bytes. + * - upb_handlers: represents a set of callbacks, very much like in XML's SAX + * API, that a client can register to do a streaming tree traversal over a + * stream of structured protobuf data, without knowing where that data is + * coming from. There is only one upb_handlers type (it is not a virtual + * base class), but the object lets you register any set of handlers. * - * These interfaces are used as general-purpose glue in upb. For example, the - * decoder interface works by implementing a upb_src and calling a upb_bytesrc. + * The upb_handlers interface supports delegation: when entering a submessage, + * you can delegate to another set of upb_handlers instead of handling the + * submessage yourself. This allows upb_handlers objects to *compose* -- you + * can implement a set of upb_handlers without knowing or caring whether this + * is the top-level message or not. * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * The other interfaces are the C equivalent of "virtual base classes" that + * anyone can implement: + * + * - upb_src: an interface that represents a source of streaming protobuf data. + * It lets you register a set of upb_handlers, and then call upb_src_run(), + * which pulls the protobuf data from somewhere and then calls the handlers. + * + * - upb_bytesrc: a pull interface for streams of bytes, basically an + * abstraction of read()/fread(), but it avoids copies where possible. + * + * - upb_bytesink: push interface for streams of bytes, basically an + * abstraction of write()/fwrite(), but it avoids copies where possible. + * + * All of the encoders and decoders are based on these generic interfaces, + * which lets you write streaming algorithms that do not depend on a specific + * serialization format; for example, you can write a pretty printer that works + * with input that came from protobuf binary format, protobuf text format, or + * even an in-memory upb_msg -- the pretty printer will not know the + * difference. + * + * Copyright (c) 2010-2011 Joshua Haberman. See LICENSE for details. * */ -#ifndef UPB_SRCSINK_H -#define UPB_SRCSINK_H +#ifndef UPB_STREAM_H +#define UPB_STREAM_H #include "upb.h" @@ -53,8 +76,10 @@ typedef enum { // When returned from a startsubmsg handler, indicates that the submessage // should be handled by a different set of handlers, which have been - // registered on the provided upb_handlers object. May not be returned - // from any other callback. + // registered on the provided upb_handlers object. This allows upb_handlers + // objects to compose; a set of upb_handlers need not know whether it is the + // top-level message or a sub-message. May not be returned from any other + // callback. UPB_DELEGATE, } upb_flow_t; @@ -105,9 +130,19 @@ typedef upb_flow_t (*upb_unknownval_handler_t)(void *closure, // // static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum, // upb_value val) { -// Called with an unknown value is encountered. +// // Called with an unknown value is encountered. // return UPB_CONTINUE; // } +// +// // Any handlers you don't need can be set to NULL. +// static upb_handlerset handlers = { +// startmsg, +// endmsg, +// value, +// startsubmsg, +// endsubmsg, +// unknownval, +// }; typedef struct { upb_startmsg_handler_t startmsg; upb_endmsg_handler_t endmsg; @@ -128,26 +163,12 @@ INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); // from automatically being converted to strings in the value callback. // INLINE void upb_handlers_use_bytesrcs(bool use_bytesrcs); -// The closure will be passed to every handler. The status will be used -// only immediately after a handler has returned UPB_STOP. +// The closure will be passed to every handler. The status will be read by the +// upb_src immediately after a handler has returned UPB_BREAK and used as the +// overall upb_src status; it will not be referenced at any other time. INLINE void upb_set_handler_closure(upb_handlers *h, void *closure, upb_status *status); -// An object that transparently handles delegation so that the caller needs -// only follow the protocol as if delegation did not exist. -struct _upb_dispatcher; -typedef struct _upb_dispatcher upb_dispatcher; -INLINE void upb_dispatcher_init(upb_dispatcher *d); -INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); -INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); -INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d); -INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, struct _upb_fielddef *f); -INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); -INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, - upb_value val); -INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, - upb_field_number_t fieldnum, upb_value val); - /* upb_src ********************************************************************/ @@ -171,6 +192,24 @@ INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); INLINE void upb_src_run(upb_src *src, upb_status *status); +// A convenience object that a upb_src can use to invoke handlers. It +// transparently handles delegation so that the upb_src needs only follow the +// protocol as if delegation did not exist. +struct _upb_dispatcher; +typedef struct _upb_dispatcher upb_dispatcher; +INLINE void upb_dispatcher_init(upb_dispatcher *d); +INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h); +INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, + struct _upb_fielddef *f); +INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); +INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f, + upb_value val); +INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, + upb_field_number_t fieldnum, + upb_value val); + /* upb_bytesrc ****************************************************************/ // Reads up to "count" bytes into "buf", returning the total number of bytes @@ -178,16 +217,16 @@ INLINE void upb_src_run(upb_src *src, upb_status *status); INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); -// Like upb_bytesrc_read(), but modifies "str" in-place. "str" MUST be newly -// created or just recycled. Returns "false" if no data was returned, either -// due to error or EOF (check status for details). +// Like upb_bytesrc_read(), but modifies "str" in-place. Caller must ensure +// that "str" is created or just recycled. Returns "false" if no data was +// returned, either due to error or EOF (check status for details). // // In comparison to upb_bytesrc_read(), this call can possibly alias existing // string data (which avoids a copy). On the other hand, if the data was *not* // already in an existing string, this copies it into a upb_string, and if the // data needs to be put in a specific range of memory (because eg. you need to // put it into a different kind of string object) then upb_bytesrc_get() could -// be better. +// save you a copy. INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, upb_status *status); @@ -206,15 +245,13 @@ INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, struct _upb_bytesink; typedef struct _upb_bytesink upb_bytesink; -// Writes up to "count" bytes from "buf", returning the total number of bytes -// written. If <0, indicates error (check upb_bytesink_status() for details). -INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, - upb_strlen_t count); +INLINE bool upb_bytesink_printf(upb_bytesink *sink, const char *fmt, ...); -// Puts the given string, which may alias the string data (which avoids a -// copy). Returns the number of bytes that were actually, consumed, which may -// be fewer than were in the string, or <0 on error. -INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str); +// Puts the given string, returning true if the operation was successful, otherwise +// check "status" for details. Ownership of the string is *not* passed; if +// the callee wants a reference he must call upb_string_getref() on it. +INLINE bool upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, + upb_status *status); // Returns the current error status for the stream. INLINE upb_status *upb_bytesink_status(upb_bytesink *sink); diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index ddefba9..ef655fd 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -139,7 +139,7 @@ INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, return sink->vtbl->write(sink, buf, count); } -INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str) { +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { return sink->vtbl->putstr(sink, str); } diff --git a/core/upb_string.h b/core/upb_string.h index 1a7e06b..7d0ae87 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -9,7 +9,9 @@ * The overriding goal of upb_string is to avoid memcpy(), malloc(), and free() * wheverever possible, while keeping both CPU and memory overhead low. * Throughout upb there are situations where one wants to reference all or part - * of another string without copying. upb_string provides APIs for doing this. + * of another string without copying. upb_string provides APIs for doing this, + * and allows the referenced string to be kept alive for as long as anyone is + * referencing it. * * Characteristics of upb_string: * - strings are reference-counted. @@ -22,7 +24,8 @@ * Reference-counted strings have recently fallen out of favor because of the * performance impacts of doing thread-safe reference counting with atomic * operations. We side-step this issue by not performing atomic operations - * unless the string has been marked thread-safe. + * unless the string has been marked thread-safe. Time will tell whether this + * scheme is easy and convenient enough to be practical. * * Strings are expected to be 8-bit-clean, but "char*" is such an entrenched * idiom that we go with it instead of making our pointers uint8_t*. -- cgit v1.2.3 From d98db7cb567f17a3bb56e2af8499d2e3aef03b3b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 29 Jan 2011 12:07:09 -0800 Subject: Textprinter is compiling again. --- core/upb_stream.h | 22 ++++++--- core/upb_stream_vtbl.h | 21 ++++++--- stream/upb_textprinter.c | 114 ++++++++++++++++++++++++++++------------------- stream/upb_textprinter.h | 4 +- 4 files changed, 103 insertions(+), 58 deletions(-) diff --git a/core/upb_stream.h b/core/upb_stream.h index 09e4025..aa23549 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -245,16 +245,26 @@ INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, struct _upb_bytesink; typedef struct _upb_bytesink upb_bytesink; -INLINE bool upb_bytesink_printf(upb_bytesink *sink, const char *fmt, ...); +// TODO: Figure out how buffering should be handled. Should the caller buffer +// data and only call these functions when a buffer is full? Seems most +// efficient, but then buffering has to be configured in the caller, which +// could be anything, which makes it hard to have a standard interface for +// controlling buffering. +// +// The downside of having the bytesink buffer is efficiency: the caller is +// making more (virtual) function calls, and the caller can't arrange to have +// a big contiguous buffer. The bytesink can do this, but will have to copy +// to make the data contiguous. + +// Returns the number of bytes written. +INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, + const char *fmt, ...); // Puts the given string, returning true if the operation was successful, otherwise // check "status" for details. Ownership of the string is *not* passed; if // the callee wants a reference he must call upb_string_getref() on it. -INLINE bool upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, - upb_status *status); - -// Returns the current error status for the stream. -INLINE upb_status *upb_bytesink_status(upb_bytesink *sink); +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, + upb_status *status); #include "upb_stream_vtbl.h" diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index ef655fd..8e8971f 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -34,8 +34,10 @@ typedef bool (*upb_bytesrc_getstr_fptr)( // upb_bytesink. typedef upb_strlen_t (*upb_bytesink_write_fptr)( upb_bytesink *bytesink, void *buf, upb_strlen_t count); -typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( - upb_bytesink *bytesink, upb_string *str); +typedef bool (*upb_bytesink_putstr_fptr)( + upb_bytesink *bytesink, upb_string *str, upb_status *status); +typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)( + upb_status *status, const char *fmt, va_list args); // Vtables for the above interfaces. typedef struct { @@ -44,8 +46,9 @@ typedef struct { } upb_bytesrc_vtbl; typedef struct { - upb_bytesink_write_fptr write; - upb_bytesink_putstr_fptr putstr; + upb_bytesink_write_fptr write; + upb_bytesink_putstr_fptr putstr; + upb_bytesink_vprintf_fptr vprintf; } upb_bytesink_vtbl; typedef struct { @@ -140,13 +143,21 @@ INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, } INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { - return sink->vtbl->putstr(sink, str); + return sink->vtbl->putstr(sink, str, status); } INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { return &sink->status; } +INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + upb_strlen_t ret = sink->vtbl->vprintf(status, fmt, args); + va_end(args); + return ret; +} + // upb_handlers struct _upb_handlers { upb_handlerset *set; diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 2209173..7025494 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -15,34 +15,51 @@ struct _upb_textprinter { upb_bytesink *bytesink; int indent_depth; bool single_line; - upb_fielddef *f; + upb_status status; }; -static void upb_textprinter_indent(upb_textprinter *p) +#define CHECK(x) if ((x) < 0) goto err; + +static int upb_textprinter_indent(upb_textprinter *p) { if(!p->single_line) for(int i = 0; i < p->indent_depth; i++) - upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); + return 0; +err: + return -1; } -static void upb_textprinter_endfield(upb_textprinter *p) { - if(p->single_line) - upb_bytesink_put(p->bytesink, UPB_STRLIT(" ")); - else - upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); +static int upb_textprinter_startfield(upb_textprinter *p, upb_fielddef *f) { + upb_textprinter_indent(p); + CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name))); + return 0; +err: + return -1; +} + +static int upb_textprinter_endfield(upb_textprinter *p) { + if(p->single_line) { + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); + } else { + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status)); + } + return 0; +err: + return -1; } static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, upb_value val) { upb_textprinter *p = _p; - upb_textprinter_indent(p); - upb_bytesink_printf(p->bytesink, UPB_STRFMT ": ", UPB_STRARG(f->name)); -#define CASE(fmtstr, member) upb_bytesink_printf(p->bytesink, fmtstr, val.member); break; - switch(p->f->type) { + upb_textprinter_startfield(p, f); +#define CASE(fmtstr, member) \ + CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break; + switch(f->type) { case UPB_TYPE(DOUBLE): - CASE("%0.f", _double); + CASE("%0.f", double); case UPB_TYPE(FLOAT): - CASE("%0.f", _float) + CASE("%0.f", float) case UPB_TYPE(INT64): case UPB_TYPE(SFIXED64): case UPB_TYPE(SINT64): @@ -50,40 +67,48 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, case UPB_TYPE(UINT64): case UPB_TYPE(FIXED64): CASE("%" PRIu64, uint64) - case UPB_TYPE(INT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - CASE("%" PRId32, int32) case UPB_TYPE(UINT32): case UPB_TYPE(FIXED32): CASE("%" PRIu32, uint32); case UPB_TYPE(ENUM): { - upb_enumdef *enum_def; - upb_string *enum_label; - (enum_def = upb_downcast_enumdef(p->f->def)) != NULL && - (enum_label = upb_enumdef_iton(enum_def, val.int32)) != NULL) { - // This is an enum value for which we found a corresponding string. - upb_bytesink_put(p->bytesink, enum_label); - CASE("%" PRIu32, uint32); + upb_enumdef *enum_def = upb_downcast_enumdef(f->def); + upb_string *enum_label = + upb_enumdef_iton(enum_def, upb_value_getint32(val)); + if (enum_label) { + // We found a corresponding string for this enum. Otherwise we fall + // through to the int32 code path. + CHECK(upb_bytesink_putstr(p->bytesink, enum_label, &p->status)); + break; + } } + case UPB_TYPE(INT32): + case UPB_TYPE(SFIXED32): + case UPB_TYPE(SINT32): + CASE("%" PRId32, int32) case UPB_TYPE(BOOL): - CASE("%hhu", _bool); + CASE("%hhu", bool); case UPB_TYPE(STRING): case UPB_TYPE(BYTES): - upb_bytesink_put(p->bytesink, UPB_STRLIT(": \"")); - upb_bytesink_put(p->bytesink, str); - upb_bytesink_put(p->bytesink, UPB_STRLIT("\"")); + // TODO: escaping. + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(": \""), &p->status)); + CHECK(upb_bytesink_putstr(p->bytesink, upb_value_getstr(val), &p->status)) + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); break; } upb_textprinter_endfield(p); return UPB_CONTINUE; +err: + return UPB_BREAK; } -static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f) { +static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f, + upb_handlers *delegate_to) { + (void)delegate_to; upb_textprinter *p = _p; + upb_textprinter_startfield(p, f); p->indent_depth++; - upb_bytesink_put(p->bytesink, UPB_STRLIT(" {")); - if(!p->single_line) upb_bytesink_put(p->bytesink, UPB_STRLIT("\n")); + upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" {"), &p->status); + if(!p->single_line) upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status); return UPB_CONTINUE; } @@ -92,21 +117,13 @@ static upb_flow_t upb_textprinter_endsubmsg(void *_p) upb_textprinter *p = _p; p->indent_depth--; upb_textprinter_indent(p); - upb_bytesink_put(p->bytesink, UPB_STRLIT("}")); + upb_bytesink_putstr(p->bytesink, UPB_STRLIT("}"), &p->status); upb_textprinter_endfield(p); return UPB_CONTINUE; } upb_textprinter *upb_textprinter_new() { - static upb_handlerset handlers = { - NULL, // startmsg - NULL, // endmsg - upb_textprinter_putval, - upb_textprinter_startsubmsg, - upb_textprinter_endsubmsg, - }; upb_textprinter *p = malloc(sizeof(*p)); - upb_byte_init(&p->sink, &upb_textprinter_vtbl); return p; } @@ -114,11 +131,18 @@ void upb_textprinter_free(upb_textprinter *p) { free(p); } -void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, - bool single_line) { +void upb_textprinter_reset(upb_textprinter *p, upb_handlers *handlers, + upb_bytesink *sink, bool single_line) { + static upb_handlerset handlerset = { + NULL, // startmsg + NULL, // endmsg + upb_textprinter_value, + upb_textprinter_startsubmsg, + upb_textprinter_endsubmsg, + }; p->bytesink = sink; p->single_line = single_line; p->indent_depth = 0; + upb_register_handlerset(handlers, &handlerset); + upb_set_handler_closure(handlers, p, &p->status); } - -upb_sink *upb_textprinter_sink(upb_textprinter *p) { return &p->sink; } diff --git a/stream/upb_textprinter.h b/stream/upb_textprinter.h index b40d9fa..a880626 100644 --- a/stream/upb_textprinter.h +++ b/stream/upb_textprinter.h @@ -18,8 +18,8 @@ typedef struct _upb_textprinter upb_textprinter; upb_textprinter *upb_textprinter_new(); void upb_textprinter_free(upb_textprinter *p); -void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, - bool single_line); +void upb_textprinter_reset(upb_textprinter *p, upb_handlers *handlers, + upb_bytesink *sink, bool single_line); void upb_textprinter_sethandlers(upb_textprinter *p, upb_handlers *h); #ifdef __cplusplus -- cgit v1.2.3 From 8536bbc5f88a4d2bb98d6875cf317154c263d473 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 29 Jan 2011 20:16:12 -0800 Subject: Some work on upb_msg, but it has a long way to go. --- core/upb_msg.c | 44 +++++++++++++++++++++++++++ core/upb_msg.h | 94 +++++++++++++++++++++++++--------------------------------- 2 files changed, 84 insertions(+), 54 deletions(-) diff --git a/core/upb_msg.c b/core/upb_msg.c index a0a5196..83191d2 100644 --- a/core/upb_msg.c +++ b/core/upb_msg.c @@ -53,6 +53,50 @@ void _upb_msg_free(upb_msg *msg, upb_msgdef *md) { free(msg); } +void upb_msg_recycle(upb_msg **_msg, upb_msgdef *md); + upb_msg *msg = *_msg; + if(msg && upb_atomic_only(&msg->refcount)) { + upb_msg_clear(msg); + } else { + upb_msg_unref(msg); + *_msg = upb_msg_new(); + } +} + +void upb_msg_appendval(upb_msg *msg, upb_fielddef *f, upb_value val) { + upb_valueptr ptr; + if (upb_isarray(f)) { + } +} + +INLINE upb_value upb_msg_getmutable(upb_msg *msg, upb_fielddef *f); + assert(upb_field_ismm(f)); + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + upb_value val = upb_value_read(p, type); + if (!upb_msg_has(msg, f)) { + upb_msg_sethas(msg, f); + val = upb_field_tryrecycle(p, val, f, type); + } + return val; +} + +INLINE void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) { + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + if (upb_field_ismm(f)) { + _upb_field_unref(upb_value_read(p, type), f); + _upb_value_ref(val); + } + upb_msg_sethas(msg, f); + upb_value_write(p, val, upb_field_valuetype(f)); +} + +INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { + msg->data[f->field_index/8] |= (1 << (f->field_index % 8)); +} + + upb_array *upb_array_new(void) { upb_array *arr = malloc(sizeof(*arr)); upb_atomic_refcount_init(&arr->refcount, 1); diff --git a/core/upb_msg.h b/core/upb_msg.h index 2db67c0..815a7cb 100644 --- a/core/upb_msg.h +++ b/core/upb_msg.h @@ -45,17 +45,7 @@ struct _upb_array { upb_valueptr elements; }; -upb_array *upb_array_new(void); - -INLINE uint32_t upb_array_len(upb_array *a) { - return a->len; -} - void _upb_array_free(upb_array *a, upb_fielddef *f); -INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) { - if (upb_atomic_unref(&a->refcount)) _upb_array_free(a, f); -} - INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f, uint32_t elem) { upb_valueptr p; @@ -63,6 +53,16 @@ INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f, return p; } +upb_array *upb_array_new(void); + +INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) { + if (upb_atomic_unref(&a->refcount)) _upb_array_free(a, f); +} + +INLINE uint32_t upb_array_len(upb_array *a) { + return a->len; +} + INLINE upb_value upb_array_get(upb_array *a, upb_fielddef *f, uint32_t elem) { assert(elem < upb_array_len(a)); return upb_value_read(_upb_array_getptr(a, f, elem), f->type); @@ -93,6 +93,7 @@ INLINE void upb_array_resize(upb_array *a, upb_fielddef *f) { // Append an element to an array of string or submsg with the default value, // returning it. This will try to reuse previously allocated memory. INLINE upb_value upb_array_appendmutable(upb_array *a, upb_fielddef *f) { + assert(upb_elem_ismm(f)); upb_array_resize(a, f); upb_valueptr p = _upb_array_getptr(a, f, a->len++); @@ -110,8 +111,9 @@ struct _upb_msg { uint8_t data[4]; // We allocate the appropriate amount per message. }; -// Creates a new msg of the given type. -upb_msg *upb_msg_new(upb_msgdef *md); +// INTERNAL-ONLY FUNCTIONS. + +void _upb_msg_free(upb_msg *msg, upb_msgdef *md); // Returns a pointer to the given field. INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) { @@ -120,9 +122,14 @@ INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) { return p; } -void _upb_msg_free(upb_msg *msg, upb_msgdef *md); +// PUBLIC FUNCTIONS. + +// Creates a new msg of the given type. +upb_msg *upb_msg_new(upb_msgdef *md); + +// Unrefs the given message. INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) { - if (upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md); + if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md); } // Tests whether the given field is explicitly set, or whether it will return a @@ -131,12 +138,26 @@ INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { return (msg->data[f->field_index/8] & (1 << (f->field_index % 8))) != 0; } -INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { - msg->data[f->field_index/8] |= (1 << (f->field_index % 8)); +// Unsets all field values back to their defaults. +INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { + memset(msg->data, 0, md->set_flags_bytes); } +// Used to obtain an empty message of the given type, attempting to reuse the +// memory pointed to by msg if it has no other referents. +void upb_msg_recycle(upb_msg **_msg, upb_msgdef *md); + +// For a repeated field, appends the given scalar value (ie. not a message or +// array) to the field's array; for non-repeated fields, overwrites the +// existing value with this one. +// REQUIRES: !upb_issubmsg(f) +void upb_msg_appendval(upb_msg *msg, upb_fielddef *f, upb_value val); + +upb_msg *upb_msg_append_emptymsg(upb_msg *msg, upb_fielddef *f); + // Returns the current value of the given field if set, or the default value if -// not set. +// not set. The returned value is not mutable! (In practice this only matters +// for submessages and arrays). INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { if (upb_msg_has(msg, f)) { return upb_value_read(_upb_msg_getptr(msg, f), f->type); @@ -148,47 +169,12 @@ INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { // If the given string, submessage, or array is already set, returns it. // Otherwise sets it and returns an empty instance, attempting to reuse any // previously allocated memory. -INLINE upb_value upb_msg_getmutable(upb_msg *msg, upb_fielddef *f) { - assert(upb_field_ismm(f)); - upb_valueptr p = _upb_msg_getptr(msg, f); - upb_valuetype_t type = upb_field_valuetype(f); - upb_value val = upb_value_read(p, type); - if (!upb_msg_has(msg, f)) { - upb_msg_sethas(msg, f); - val = upb_field_tryrecycle(p, val, f, type); - } - return val; -} +INLINE upb_value upb_msg_getmutable(upb_msg *msg, upb_fielddef *f); // Sets the current value of the field. If this is a string, array, or // submessage field, releases a ref on the value (if any) that was previously // set. -INLINE void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) { - upb_valueptr p = _upb_msg_getptr(msg, f); - upb_valuetype_t type = upb_field_valuetype(f); - if (upb_field_ismm(f)) { - _upb_field_unref(upb_value_read(p, type), f); - _upb_value_ref(val); - } - upb_msg_sethas(msg, f); - upb_value_write(p, val, upb_field_valuetype(f)); -} - -// Unsets all field values back to their defaults. -INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { - memset(msg->data, 0, md->set_flags_bytes); -} - -// A convenience function for decoding an entire protobuf all at once, without -// having to worry about setting up the appropriate objects. -void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, - upb_status *status); - -// A convenience function for encoding an entire protobuf all at once. If an -// error occurs, the null string is returned and the status object contains -// the error. -void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, - upb_status *status); +INLINE void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val); #ifdef __cplusplus } /* extern "C" */ -- cgit v1.2.3 From 93099cccd1e6428d6be45553c7dd7746bbd65e93 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 29 Jan 2011 20:16:34 -0800 Subject: upb_strstream compiles again. That covers all source files except upb_msg! --- Makefile | 4 ++-- stream/upb_strstream.c | 50 +++++++++++++++++++++++++++++--------------------- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/Makefile b/Makefile index 1dfd79d..26e036e 100644 --- a/Makefile +++ b/Makefile @@ -64,9 +64,9 @@ SRC=core/upb.c \ core/upb_def.c \ stream/upb_decoder.c \ stream/upb_stdio.c \ - stream/upb_textprinter.c + stream/upb_textprinter.c \ + stream/upb_strstream.c \ # core/upb_msg.c \ -# stream/upb_strstream.c \ $(SRC): perf-cppflags # Parts of core that are yet to be converted. diff --git a/stream/upb_strstream.c b/stream/upb_strstream.c index 7ed761b..d3fd4e0 100644 --- a/stream/upb_strstream.c +++ b/stream/upb_strstream.c @@ -12,6 +12,7 @@ struct upb_stringsrc { upb_bytesrc bytesrc; upb_string *str; + upb_strlen_t offset; }; void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str) { @@ -27,34 +28,41 @@ void upb_stringsrc_free(upb_stringsrc *s) { free(s); } -static bool upb_stringsrc_get(upb_stringsrc *src, upb_string *str, - upb_strlen_t minlen) { - // We ignore "minlen" since we always return the entire string. - (void)minlen; - upb_string_substr(str, src->str, 0, upb_string_len(src->str)); - src->bytesrc.eof = true; - return true; +static upb_strlen_t upb_stringsrc_read(upb_bytesrc *_src, void *buf, + upb_strlen_t count, upb_status *status) { + upb_stringsrc *src = (upb_stringsrc*)_src; + if (src->offset == upb_string_len(src->str)) { + upb_seterr(status, UPB_EOF, ""); + return -1; + } else { + upb_strlen_t to_read = UPB_MIN(count, upb_string_len(src->str) - src->offset); + memcpy(buf, upb_string_getrobuf(src->str) + src->offset, to_read); + src->offset += to_read; + return to_read; + } } -static bool upb_stringsrc_append(upb_stringsrc *src, upb_string *str, - upb_strlen_t len) { - // Unimplemented; since we return the string via "get" all in one go, - // this method probably isn't very useful. - (void)src; - (void)str; - (void)len; - return false; +static bool upb_stringsrc_getstr(upb_bytesrc *_src, upb_string *str, + upb_status *status) { + upb_stringsrc *src = (upb_stringsrc*)_src; + if (src->offset == upb_string_len(str)) { + upb_seterr(status, UPB_EOF, ""); + return false; + } else { + upb_string_substr(str, src->str, 0, upb_string_len(src->str)); + return true; + } } -static upb_bytesrc_vtable upb_stringsrc_vtbl = { - (upb_bytesrc_get_fptr)upb_stringsrc_get, - (upb_bytesrc_append_fptr)upb_stringsrc_append, -}; - upb_stringsrc *upb_stringsrc_new() { + static upb_bytesrc_vtbl bytesrc_vtbl = { + upb_stringsrc_read, + upb_stringsrc_getstr, + }; + upb_stringsrc *s = malloc(sizeof(*s)); s->str = NULL; - upb_bytesrc_init(&s->bytesrc, &upb_stringsrc_vtbl); + upb_bytesrc_init(&s->bytesrc, &bytesrc_vtbl); return s; } -- cgit v1.2.3 From 02a8cdfff29d6a17836847490a06dfe535855d52 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 29 Jan 2011 23:22:33 -0800 Subject: Fixes to decoder, stdio, textprinter. --- core/upb_stream_vtbl.h | 6 ++--- stream/upb_decoder.c | 68 ++++++++++++++++++++++++++++++++---------------- stream/upb_stdio.c | 38 ++++++++++++++++++--------- stream/upb_textprinter.c | 4 +-- tests/test_decoder.c | 10 +++++-- 5 files changed, 85 insertions(+), 41 deletions(-) diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 8e8971f..a6990bc 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -34,10 +34,10 @@ typedef bool (*upb_bytesrc_getstr_fptr)( // upb_bytesink. typedef upb_strlen_t (*upb_bytesink_write_fptr)( upb_bytesink *bytesink, void *buf, upb_strlen_t count); -typedef bool (*upb_bytesink_putstr_fptr)( +typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( upb_bytesink *bytesink, upb_string *str, upb_status *status); typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)( - upb_status *status, const char *fmt, va_list args); + upb_bytesink *bytesink, upb_status *status, const char *fmt, va_list args); // Vtables for the above interfaces. typedef struct { @@ -153,7 +153,7 @@ INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, const char *fmt, ...) { va_list args; va_start(args, fmt); - upb_strlen_t ret = sink->vtbl->vprintf(status, fmt, args); + upb_strlen_t ret = sink->vtbl->vprintf(sink, status, fmt, args); va_end(args); return ret; } diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index e60915f..a7a2c76 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -18,22 +18,24 @@ // possibilities for optimization/experimentation here. INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *status) { + const char *p = *ptr; uint32_t low, high = 0; uint32_t b; - b = *(*ptr++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(*ptr++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(*ptr++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(*ptr++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(*ptr++); low |= (b & 0x7f) << 28; - high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(*ptr++); high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(*ptr++); high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(*ptr++); high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(*ptr++); high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(p++); low |= (b & 0x7f) << 28; + high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(p++); high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; upb_seterr(status, UPB_ERROR, "Unterminated varint"); return false; done: + *ptr = p; *val = ((uint64_t)high << 32) | low; return true; } @@ -50,7 +52,7 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } typedef struct { upb_msgdef *msgdef; upb_fielddef *field; - ssize_t end_offset; // For groups, 0. + size_t end_offset; // For groups, 0. } upb_decoder_frame; struct upb_decoder { @@ -73,7 +75,7 @@ struct upb_decoder { upb_string *buf; // The offset within the overall stream represented by the *beginning* of buf. - upb_strlen_t buf_stream_offset; + size_t buf_stream_offset; }; typedef struct { @@ -98,7 +100,7 @@ static upb_flow_t upb_pop(upb_decoder *d); // Constant used to signal that the submessage is a group and therefore we // don't know its end offset. This cannot be the offset of a real submessage // end because it takes at least one byte to begin a submessage. -#define UPB_GROUP_END_OFFSET -1 +#define UPB_GROUP_END_OFFSET 0 #define UPB_MAX_VARINT_ENCODED_SIZE 10 // Called only from the slow path, this function copies the next "len" bytes @@ -132,12 +134,12 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, } // Wait for end-of-submessage or end-of-buffer, whichever comes first. - ssize_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf); - ssize_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr; - ssize_t submsg_remaining = + size_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf); + size_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr; + size_t submsg_remaining = d->top->end_offset - d->buf_stream_offset - offset_in_buf; if (d->top->end_offset == UPB_GROUP_END_OFFSET || - buf_remaining > submsg_remaining) { + buf_remaining < submsg_remaining) { s->len = buf_remaining; } else { // Check that non of our subtraction overflowed. @@ -165,13 +167,16 @@ static bool upb_decode_varint_slow(upb_decoder *d, upb_dstate *s, upb_seterr(d->status, UPB_ERROR, "Varint was unterminated after 10 bytes.\n"); return false; + } else if (d->status->code == UPB_EOF && bitpos == 0) { + // Regular EOF. + return false; } else if (d->status->code == UPB_EOF && (byte & 0x80)) { upb_seterr(d->status, UPB_ERROR, "Provided data ended in the middle of a varint.\n"); return false; } else { // Success. - upb_value_setint64(val, val64); + upb_value_setraw(val, val64); return true; } } @@ -210,7 +215,7 @@ INLINE bool upb_decode_varint(upb_decoder *d, upb_dstate *s, upb_value *val) { const char *p = s->ptr; if (!upb_decode_varint_fast(&p, &val64, d->status)) return false; upb_dstate_advance(s, p - s->ptr); - upb_value_setint64(val, val64); + upb_value_setraw(val, val64); return true; } else { return upb_decode_varint_slow(d, s, val); @@ -245,6 +250,7 @@ INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str, if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen, s)) return false; } + upb_value_setstr(val, *str); return true; } @@ -259,7 +265,7 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) { } static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, - upb_strlen_t submsg_len, upb_fieldtype_t type) { + upb_value submsg_len, upb_fieldtype_t type) { d->top->field = f; d->top++; if(d->top >= d->limit) { @@ -268,7 +274,7 @@ static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, } d->top->end_offset = (type == UPB_TYPE(GROUP)) ? UPB_GROUP_END_OFFSET : - d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + submsg_len; + d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + upb_value_getint32(submsg_len); d->top->msgdef = upb_downcast_msgdef(f->def); return upb_dispatch_startsubmsg(&d->dispatcher, f); } @@ -280,6 +286,7 @@ static upb_flow_t upb_pop(upb_decoder *d) { void upb_decoder_run(upb_src *src, upb_status *status) { upb_decoder *d = (upb_decoder*)src; + d->status = status; // We put our dstate on the stack so the compiler knows they can't be changed // by external code (like when we dispatch a callback). We must be sure not // to let its address escape this source file. @@ -299,9 +306,14 @@ void upb_decoder_run(upb_src *src, upb_status *status) { if (!upb_decode_tag(d, &state, &tag)) { if (status->code == UPB_EOF && d->top == d->stack) { // Normal end-of-file. + upb_clearerr(status); CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); return; } else { + if (status->code == UPB_EOF) { + upb_seterr(status, UPB_ERROR, + "Input ended in the middle of a submessage."); + } goto err; } } @@ -352,7 +364,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK_FLOW(upb_push(d, &state, f, upb_value_getint32(val), f->type)); + CHECK_FLOW(upb_push(d, &state, f, val, f->type)); continue; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): @@ -397,9 +409,21 @@ upb_decoder *upb_decoder_new(upb_msgdef *msgdef) { upb_dispatcher_init(&d->dispatcher); d->toplevel_msgdef = msgdef; d->limit = &d->stack[UPB_MAX_NESTING]; + d->buf = NULL; return d; } +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { + d->bytesrc = bytesrc; + d->top = &d->stack[0]; + d->top->msgdef = d->toplevel_msgdef; + d->top->end_offset = SIZE_MAX; // never want to end top-level message. + upb_string_unref(d->buf); + d->buf = NULL; +} + void upb_decoder_free(upb_decoder *d) { free(d); } + +upb_src *upb_decoder_src(upb_decoder *d) { return &d->src; } diff --git a/stream/upb_stdio.c b/stream/upb_stdio.c index 7923664..8857677 100644 --- a/stream/upb_stdio.c +++ b/stream/upb_stdio.c @@ -23,6 +23,9 @@ void upb_stdio_reset(upb_stdio *stdio, FILE* file) { stdio->file = file; } + +/* upb_bytesrc methods ********************************************************/ + static upb_strlen_t upb_stdio_read(upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status) { upb_stdio *stdio = (upb_stdio*)src; @@ -50,18 +53,27 @@ static bool upb_stdio_getstr(upb_bytesrc *src, upb_string *str, return true; } -int32_t upb_stdio_put(upb_bytesink *sink, upb_string *str) { + +/* upb_bytesink methods *******************************************************/ + +upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); upb_strlen_t len = upb_string_len(str); upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); if(written < len) { - // Error or EOF. - stdio->bytesink.eof = feof(stdio->file); - if(ferror(stdio->file)) { - upb_seterr(&stdio->bytesink.status, UPB_ERROR, - "Error writing to stdio stream."); - return 0; - } + upb_seterr(status, UPB_ERROR, "Error writing to stdio stream."); + return -1; + } + return written; +} + +upb_strlen_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status, + const char *fmt, va_list args) { + upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); + upb_strlen_t written = vfprintf(stdio->file, fmt, args); + if (written < 0) { + upb_seterr(status, UPB_ERROR, "Error writing to stdio stream."); + return -1; } return written; } @@ -72,13 +84,15 @@ upb_stdio *upb_stdio_new() { upb_stdio_getstr, }; - //static upb_bytesink_vtbl bytesink_vtbl = { - // upb_stdio_put - //}; + static upb_bytesink_vtbl bytesink_vtbl = { + NULL, + upb_stdio_putstr, + upb_stdio_vprintf + }; upb_stdio *stdio = malloc(sizeof(*stdio)); upb_bytesrc_init(&stdio->bytesrc, &bytesrc_vtbl); - //upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); + upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); return stdio; } diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 7025494..531da12 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -90,7 +90,7 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, case UPB_TYPE(STRING): case UPB_TYPE(BYTES): // TODO: escaping. - CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(": \""), &p->status)); + CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); CHECK(upb_bytesink_putstr(p->bytesink, upb_value_getstr(val), &p->status)) CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); break; @@ -107,7 +107,7 @@ static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f, upb_textprinter *p = _p; upb_textprinter_startfield(p, f); p->indent_depth++; - upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" {"), &p->status); + upb_bytesink_putstr(p->bytesink, UPB_STRLIT("{"), &p->status); if(!p->single_line) upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status); return UPB_CONTINUE; } diff --git a/tests/test_decoder.c b/tests/test_decoder.c index 0e6f19c..ed5a77e 100644 --- a/tests/test_decoder.c +++ b/tests/test_decoder.c @@ -16,13 +16,19 @@ int main() { upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds)); upb_decoder_reset(d, upb_stdio_bytesrc(in)); upb_textprinter *p = upb_textprinter_new(); - upb_textprinter_reset(p, upb_stdio_bytesink(out), false); + upb_handlers handlers; + upb_handlers_init(&handlers); + upb_textprinter_reset(p, &handlers, upb_stdio_bytesink(out), false); + upb_src *src = upb_decoder_src(d); + upb_src_sethandlers(src, &handlers); upb_status status = UPB_STATUS_INIT; - upb_streamdata(upb_decoder_src(d), upb_textprinter_sink(p), &status); + upb_src_run(src, &status); + upb_printerr(&status); assert(upb_ok(&status)); + upb_stdio_free(in); upb_stdio_free(out); upb_decoder_free(d); -- cgit v1.2.3 From 9aa7e559d634a3ecf087ee376f82704e2290f478 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 30 Jan 2011 16:28:37 -0800 Subject: Fixes to decoder and textprinter: it works (for some input)! A protobuf -> text stream for descriptor.proto now outputs the same text as proto2. --- stream/upb_decoder.c | 98 ++++++++++++++++++++++++------------------------ stream/upb_textprinter.c | 19 ++++------ tests/test_decoder.c | 8 +++- 3 files changed, 62 insertions(+), 63 deletions(-) diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index a7a2c76..3a279a1 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -51,7 +51,6 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } // upb_decoder_frame is one frame of that stack. typedef struct { upb_msgdef *msgdef; - upb_fielddef *field; size_t end_offset; // For groups, 0. } upb_decoder_frame; @@ -82,29 +81,37 @@ typedef struct { // Our current position in the data buffer. const char *ptr; - // Number of bytes available at ptr, until either end-of-buf or - // end-of-submessage (whichever is smaller). + // End of this submessage, relative to *ptr. + const char *submsg_end; + + // Number of bytes available at ptr. size_t len; // Msgdef for the current level. upb_msgdef *msgdef; } upb_dstate; +// Constant used to signal that the submessage is a group and therefore we +// don't know its end offset. This cannot be the offset of a real submessage +// end because it takes at least one byte to begin a submessage. +#define UPB_GROUP_END_OFFSET 0 +#define UPB_MAX_VARINT_ENCODED_SIZE 10 + INLINE void upb_dstate_advance(upb_dstate *s, size_t len) { s->ptr += len; s->len -= len; } -static upb_flow_t upb_pop(upb_decoder *d); +INLINE void upb_dstate_setmsgend(upb_decoder *d, upb_dstate *s) { + s->submsg_end = (d->top->end_offset == UPB_GROUP_END_OFFSET) ? + (void*)UINTPTR_MAX : + upb_string_getrobuf(d->buf) + (d->top->end_offset - d->buf_stream_offset); +} -// Constant used to signal that the submessage is a group and therefore we -// don't know its end offset. This cannot be the offset of a real submessage -// end because it takes at least one byte to begin a submessage. -#define UPB_GROUP_END_OFFSET 0 -#define UPB_MAX_VARINT_ENCODED_SIZE 10 +static upb_flow_t upb_pop(upb_decoder *d, upb_dstate *s); // Called only from the slow path, this function copies the next "len" bytes -// from the stream to "data", adjusting "buf" and "len" appropriately. +// from the stream to "data", adjusting the dstate appropriately. static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, upb_dstate *s) { while (1) { @@ -112,41 +119,17 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, memcpy(data, s->ptr, to_copy); upb_dstate_advance(s, to_copy); bytes_wanted -= to_copy; - if (bytes_wanted == 0) return true; - - // Did "len" indicate end-of-submessage or end-of-buffer? - ssize_t buf_offset = - d->buf ? ((const char*)s->ptr - upb_string_getrobuf(d->buf)) : 0; - if (d->top->end_offset > 0 && - d->top->end_offset == d->buf_stream_offset + buf_offset) { - // End-of-submessage. - if (bytes_wanted > 0) { - upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); - return false; - } - if (upb_pop(d) != UPB_CONTINUE) return false; - } else { - // End-of-buffer. - if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); - upb_string_recycle(&d->buf); - if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; - s->ptr = upb_string_getrobuf(d->buf); + if (bytes_wanted == 0) { + upb_dstate_setmsgend(d, s); + return true; } - // Wait for end-of-submessage or end-of-buffer, whichever comes first. - size_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf); - size_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr; - size_t submsg_remaining = - d->top->end_offset - d->buf_stream_offset - offset_in_buf; - if (d->top->end_offset == UPB_GROUP_END_OFFSET || - buf_remaining < submsg_remaining) { - s->len = buf_remaining; - } else { - // Check that non of our subtraction overflowed. - assert(d->top->end_offset > d->buf_stream_offset); - assert(d->top->end_offset - d->buf_stream_offset > offset_in_buf); - s->len = submsg_remaining; - } + // Get next buffer. + if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); + upb_string_recycle(&d->buf); + if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; + s->ptr = upb_string_getrobuf(d->buf); + s->len = upb_string_len(d->buf); } } @@ -266,7 +249,6 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) { static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, upb_value submsg_len, upb_fieldtype_t type) { - d->top->field = f; d->top++; if(d->top >= d->limit) { upb_seterr(d->status, UPB_ERROR, "Nesting too deep."); @@ -274,13 +256,16 @@ static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, } d->top->end_offset = (type == UPB_TYPE(GROUP)) ? UPB_GROUP_END_OFFSET : - d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + upb_value_getint32(submsg_len); + d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + + upb_value_getint32(submsg_len); d->top->msgdef = upb_downcast_msgdef(f->def); + upb_dstate_setmsgend(d, s); return upb_dispatch_startsubmsg(&d->dispatcher, f); } -static upb_flow_t upb_pop(upb_decoder *d) { +static upb_flow_t upb_pop(upb_decoder *d, upb_dstate *s) { d->top--; + upb_dstate_setmsgend(d, s); return upb_dispatch_endsubmsg(&d->dispatcher); } @@ -290,7 +275,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // We put our dstate on the stack so the compiler knows they can't be changed // by external code (like when we dispatch a callback). We must be sure not // to let its address escape this source file. - upb_dstate state = {NULL, 0, d->top->msgdef}; + upb_dstate state = {NULL, (void*)0x1, 0, d->top->msgdef}; upb_string *str = NULL; // TODO: handle UPB_SKIPSUBMSG @@ -301,6 +286,15 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // Main loop: executed once per tag/field pair. while(1) { + // Check for end-of-submessage. + while (state.ptr >= state.submsg_end) { + if (state.ptr > state.submsg_end) { + upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); + goto err; + } + CHECK_FLOW(upb_pop(d, &state)); + } + // Parse/handle tag. upb_tag tag; if (!upb_decode_tag(d, &state, &tag)) { @@ -308,6 +302,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // Normal end-of-file. upb_clearerr(status); CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); + upb_string_unref(str); return; } else { if (status->code == UPB_EOF) { @@ -322,12 +317,14 @@ void upb_decoder_run(upb_src *src, upb_status *status) { // since most types will read a varint here. upb_value val; switch (tag.wire_type) { + case UPB_WIRE_TYPE_START_GROUP: + break; // Nothing to do now, below we will push appropriately. case UPB_WIRE_TYPE_END_GROUP: if(d->top->end_offset != UPB_GROUP_END_OFFSET) { upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } - CHECK_FLOW(upb_pop(d)); + CHECK_FLOW(upb_pop(d, &state)); continue; // We have no value to dispatch. case UPB_WIRE_TYPE_VARINT: case UPB_WIRE_TYPE_DELIMITED: @@ -383,6 +380,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { } err: + upb_string_unref(str); if (upb_ok(status)) { upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK"); } @@ -417,12 +415,14 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { d->bytesrc = bytesrc; d->top = &d->stack[0]; d->top->msgdef = d->toplevel_msgdef; - d->top->end_offset = SIZE_MAX; // never want to end top-level message. + // Never want to end top-level message, so treat it like a group. + d->top->end_offset = UPB_GROUP_END_OFFSET; upb_string_unref(d->buf); d->buf = NULL; } void upb_decoder_free(upb_decoder *d) { + upb_string_unref(d->buf); free(d); } diff --git a/stream/upb_textprinter.c b/stream/upb_textprinter.c index 531da12..894a1ea 100644 --- a/stream/upb_textprinter.c +++ b/stream/upb_textprinter.c @@ -30,14 +30,6 @@ err: return -1; } -static int upb_textprinter_startfield(upb_textprinter *p, upb_fielddef *f) { - upb_textprinter_indent(p); - CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name))); - return 0; -err: - return -1; -} - static int upb_textprinter_endfield(upb_textprinter *p) { if(p->single_line) { CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); @@ -52,7 +44,8 @@ err: static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f, upb_value val) { upb_textprinter *p = _p; - upb_textprinter_startfield(p, f); + upb_textprinter_indent(p); + CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name))); #define CASE(fmtstr, member) \ CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break; switch(f->type) { @@ -105,11 +98,13 @@ static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f, upb_handlers *delegate_to) { (void)delegate_to; upb_textprinter *p = _p; - upb_textprinter_startfield(p, f); - p->indent_depth++; - upb_bytesink_putstr(p->bytesink, UPB_STRLIT("{"), &p->status); + upb_textprinter_indent(p); + CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT " {", UPB_STRARG(f->name))); if(!p->single_line) upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status); + p->indent_depth++; return UPB_CONTINUE; +err: + return UPB_BREAK; } static upb_flow_t upb_textprinter_endsubmsg(void *_p) diff --git a/tests/test_decoder.c b/tests/test_decoder.c index ed5a77e..f48472d 100644 --- a/tests/test_decoder.c +++ b/tests/test_decoder.c @@ -25,14 +25,18 @@ int main() { upb_status status = UPB_STATUS_INIT; upb_src_run(src, &status); - upb_printerr(&status); assert(upb_ok(&status)); - + upb_status_uninit(&status); upb_stdio_free(in); upb_stdio_free(out); upb_decoder_free(d); upb_textprinter_free(p); upb_def_unref(fds); upb_symtab_unref(symtab); + + // Prevent C library from holding buffers open, so Valgrind doesn't see + // memory leaks. + fclose(stdin); + fclose(stdout); } -- cgit v1.2.3 From 8465e5e65014ac080d62855f8abfd44acdf7beb2 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 2 Feb 2011 10:00:30 -0800 Subject: Gutted upb_msg a bit, re-adding only the essentials. --- Makefile | 15 +++++-- core/upb.h | 10 ++++- core/upb_msg.c | 121 ++++++++++++--------------------------------------------- core/upb_msg.h | 109 ++++++--------------------------------------------- 4 files changed, 56 insertions(+), 199 deletions(-) diff --git a/Makefile b/Makefile index 26e036e..bea6980 100644 --- a/Makefile +++ b/Makefile @@ -56,17 +56,24 @@ clean: deps: gen-deps.sh Makefile $(call rwildcard,,*.c) $(call rwildcard,,*.h) @./gen-deps.sh $(SRC) -# The core library (core/libupb.a) -SRC=core/upb.c \ +# The core library -- the absolute minimum you must compile in to successfully +# bootstrap. +CORE= \ + core/upb.c \ core/upb_table.c \ core/upb_string.c \ - descriptor/descriptor.c \ core/upb_def.c \ + descriptor/descriptor.c + +# Common encoders/decoders and upb_msg -- you're almost certain to want these. +STREAM= \ stream/upb_decoder.c \ stream/upb_stdio.c \ stream/upb_textprinter.c \ stream/upb_strstream.c \ -# core/upb_msg.c \ + core/upb_msg.c \ + +SRC=$(CORE) $(STREAM) $(SRC): perf-cppflags # Parts of core that are yet to be converted. diff --git a/core/upb.h b/core/upb.h index 7b228a0..243c7bc 100644 --- a/core/upb.h +++ b/core/upb.h @@ -136,7 +136,6 @@ typedef int32_t upb_strlen_t; // constant UPB_VALUETYPE_ARRAY to represent an array. typedef uint8_t upb_valuetype_t; #define UPB_VALUETYPE_ARRAY 32 - #define UPB_VALUETYPE_BYTESRC 32 #define UPB_VALUETYPE_RAW 33 @@ -189,6 +188,8 @@ UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32)); UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64)); UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); +UPB_VALUE_ACCESSORS(msg, msg, upb_msg*, UPB_TYPE(MESSAGE)); +UPB_VALUE_ACCESSORS(arr, arr, upb_array*, UPB_VALUETYPE_ARRAY); UPB_VALUE_ACCESSORS(bytesrc, bytesrc, upb_bytesrc*, UPB_VALUETYPE_BYTESRC); INLINE void upb_value_setraw(upb_value *val, uint64_t cval) { @@ -196,6 +197,13 @@ INLINE void upb_value_setraw(upb_value *val, uint64_t cval) { val->val.uint64 = cval; } +INLINE upb_atomic_refcount_t *upb_value_getrefcount(upb_value val) { + assert(val.type == UPB_TYPE(MESSAGE) || + val.type == UPB_TYPE(STRING) || + val.type == UPB_VALUETYPE_ARRAY); + return val.val.refcount; +} + // A pointer to a .proto value. The owner must have an out-of-band way of // knowing the type, so it knows which union member to use. typedef union { diff --git a/core/upb_msg.c b/core/upb_msg.c index 83191d2..e9f863d 100644 --- a/core/upb_msg.c +++ b/core/upb_msg.c @@ -10,29 +10,43 @@ #include "upb_decoder.h" #include "upb_strstream.h" -void _upb_elem_free(upb_value v, upb_fielddef *f) { +static void upb_elem_free(upb_value v, upb_fielddef *f) { switch(f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - _upb_msg_free(v.msg, upb_downcast_msgdef(f->def)); + _upb_msg_free(upb_value_getmsg(v), upb_downcast_msgdef(f->def)); break; case UPB_TYPE(STRING): case UPB_TYPE(BYTES): - _upb_string_free(v.str); + _upb_string_free(upb_value_getstr(v)); break; default: abort(); } } -void _upb_field_free(upb_value v, upb_fielddef *f) { +static void upb_elem_unref(upb_value v, upb_fielddef *f) { + assert(upb_elem_ismm(f)); + upb_atomic_refcount_t *refcount = upb_value_getrefcount(v); + if (refcount && upb_atomic_unref(refcount)) + upb_elem_free(v, f); +} + +static void upb_field_free(upb_value v, upb_fielddef *f) { if (upb_isarray(f)) { - _upb_array_free(v.arr, f); + _upb_array_free(upb_value_getarr(v), f); } else { - _upb_elem_free(v, f); + upb_elem_free(v, f); } } +static void upb_field_unref(upb_value v, upb_fielddef *f) { + assert(upb_field_ismm(f)); + upb_atomic_refcount_t *refcount = upb_value_getrefcount(v); + if (refcount && upb_atomic_unref(refcount)) + upb_field_free(v, f); +} + upb_msg *upb_msg_new(upb_msgdef *md) { upb_msg *msg = malloc(md->size); // Clear all set bits and cached pointers. @@ -48,50 +62,11 @@ void _upb_msg_free(upb_msg *msg, upb_msgdef *md) { upb_fielddef *f = upb_msg_iter_field(i); upb_valueptr p = _upb_msg_getptr(msg, f); upb_valuetype_t type = upb_field_valuetype(f); - if (upb_field_ismm(f)) _upb_field_unref(upb_value_read(p, type), f); + if (upb_field_ismm(f)) upb_field_unref(upb_value_read(p, type), f); } free(msg); } -void upb_msg_recycle(upb_msg **_msg, upb_msgdef *md); - upb_msg *msg = *_msg; - if(msg && upb_atomic_only(&msg->refcount)) { - upb_msg_clear(msg); - } else { - upb_msg_unref(msg); - *_msg = upb_msg_new(); - } -} - -void upb_msg_appendval(upb_msg *msg, upb_fielddef *f, upb_value val) { - upb_valueptr ptr; - if (upb_isarray(f)) { - } -} - -INLINE upb_value upb_msg_getmutable(upb_msg *msg, upb_fielddef *f); - assert(upb_field_ismm(f)); - upb_valueptr p = _upb_msg_getptr(msg, f); - upb_valuetype_t type = upb_field_valuetype(f); - upb_value val = upb_value_read(p, type); - if (!upb_msg_has(msg, f)) { - upb_msg_sethas(msg, f); - val = upb_field_tryrecycle(p, val, f, type); - } - return val; -} - -INLINE void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) { - upb_valueptr p = _upb_msg_getptr(msg, f); - upb_valuetype_t type = upb_field_valuetype(f); - if (upb_field_ismm(f)) { - _upb_field_unref(upb_value_read(p, type), f); - _upb_value_ref(val); - } - upb_msg_sethas(msg, f); - upb_value_write(p, val, upb_field_valuetype(f)); -} - INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { msg->data[f->field_index/8] |= (1 << (f->field_index % 8)); } @@ -112,61 +87,15 @@ void _upb_array_free(upb_array *arr, upb_fielddef *f) { upb_valuetype_t type = upb_elem_valuetype(f); for (upb_arraylen_t i = 0; i < arr->size; i++) { upb_valueptr p = _upb_array_getptr(arr, f, i); - _upb_elem_unref(upb_value_read(p, type), f); + upb_elem_unref(upb_value_read(p, type), f); } } if (arr->elements._void) free(arr->elements._void); free(arr); } -upb_value upb_field_new(upb_fielddef *f, upb_valuetype_t type) { - upb_value v; - switch(type) { - case UPB_TYPE(MESSAGE): - case UPB_TYPE(GROUP): - v.msg = upb_msg_new(upb_downcast_msgdef(f->def)); - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): - v.str = upb_string_new(); - case UPB_VALUETYPE_ARRAY: - v.arr = upb_array_new(); - default: - abort(); - } - return v; -} - -static void upb_field_recycle(upb_value val) { - (void)val; -} - -upb_value upb_field_tryrecycle(upb_valueptr p, upb_value val, upb_fielddef *f, - upb_valuetype_t type) { - if (val._void == NULL || !upb_atomic_only(val.refcount)) { - if (val._void != NULL) upb_atomic_unref(val.refcount); - val = upb_field_new(f, type); - upb_value_write(p, val, type); - } else { - upb_field_recycle(val); +void upb_msg_register_handlers(upb_msg *msg, upb_msgdef *md, + upb_handlers *handlers, bool merge) { + static upb_handlerset handlerset = { } - return val; -} - -void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, - upb_status *status) { - upb_stringsrc *ssrc = upb_stringsrc_new(); - upb_stringsrc_reset(ssrc, str); - upb_decoder *d = upb_decoder_new(md); - upb_decoder_reset(d, upb_stringsrc_bytesrc(ssrc)); - - upb_decoder_free(d); - upb_stringsrc_free(ssrc); -} - -void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, - upb_status *status) { - (void)msg; - (void)md; - (void)str; - (void)status; } diff --git a/core/upb_msg.h b/core/upb_msg.h index 815a7cb..0569039 100644 --- a/core/upb_msg.h +++ b/core/upb_msg.h @@ -1,9 +1,15 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * Copyright (c) 2010-2011 Joshua Haberman. See LICENSE for details. * - * Data structure for storing a message of protobuf data. + * Data structure for storing a message of protobuf data. Unlike Google's + * protobuf, upb_msg and upb_array are reference counted instead of having + * exclusive ownership of their fields. This is a better match for dynamic + * languages where statements like a.b = other_b are normal. + * + * upb's parsers and serializers could also be used to populate and serialize + * other kinds of message objects (even one generated by Google's protobuf). */ #ifndef UPB_MSG_H @@ -17,24 +23,6 @@ extern "C" { #endif -upb_value upb_field_tryrecycle(upb_valueptr p, upb_value v, upb_fielddef *f, - upb_valuetype_t type); - -INLINE void _upb_value_ref(upb_value v) { upb_atomic_ref(v.refcount); } - -void _upb_field_free(upb_value v, upb_fielddef *f); -void _upb_elem_free(upb_value v, upb_fielddef *f); -INLINE void _upb_field_unref(upb_value v, upb_fielddef *f) { - assert(upb_field_ismm(f)); - if (v.refcount && upb_atomic_unref(v.refcount)) - _upb_field_free(v, f); -} -INLINE void _upb_elem_unref(upb_value v, upb_fielddef *f) { - assert(upb_elem_ismm(f)); - if (v.refcount && upb_atomic_unref(v.refcount)) - _upb_elem_free(v, f); -} - /* upb_array ******************************************************************/ typedef uint32_t upb_arraylen_t; @@ -63,47 +51,6 @@ INLINE uint32_t upb_array_len(upb_array *a) { return a->len; } -INLINE upb_value upb_array_get(upb_array *a, upb_fielddef *f, uint32_t elem) { - assert(elem < upb_array_len(a)); - return upb_value_read(_upb_array_getptr(a, f, elem), f->type); -} - -// For string or submessages, will release a ref on the previously set value. -// and take a ref on the new value. The array must already be at least "elem" -// long; to append use append_mutable. -INLINE void upb_array_set(upb_array *a, upb_fielddef *f, uint32_t elem, - upb_value val) { - assert(elem < upb_array_len(a)); - upb_valueptr p = _upb_array_getptr(a, f, elem); - if (upb_elem_ismm(f)) { - _upb_elem_unref(upb_value_read(p, f->type), f); - _upb_value_ref(val); - } - upb_value_write(p, val, f->type); -} - -INLINE void upb_array_resize(upb_array *a, upb_fielddef *f) { - if (a->len == a->size) { - a->len *= 2; - a->elements._void = realloc(a->elements._void, - a->len * upb_types[f->type].size); - } -} - -// Append an element to an array of string or submsg with the default value, -// returning it. This will try to reuse previously allocated memory. -INLINE upb_value upb_array_appendmutable(upb_array *a, upb_fielddef *f) { - - assert(upb_elem_ismm(f)); - upb_array_resize(a, f); - upb_valueptr p = _upb_array_getptr(a, f, a->len++); - upb_valuetype_t type = upb_elem_valuetype(f); - upb_value val = upb_value_read(p, type); - val = upb_field_tryrecycle(p, val, f, type); - return val; -} - - /* upb_msg ********************************************************************/ struct _upb_msg { @@ -111,19 +58,14 @@ struct _upb_msg { uint8_t data[4]; // We allocate the appropriate amount per message. }; -// INTERNAL-ONLY FUNCTIONS. - void _upb_msg_free(upb_msg *msg, upb_msgdef *md); -// Returns a pointer to the given field. INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) { upb_valueptr p; p._void = &msg->data[f->byte_offset]; return p; } -// PUBLIC FUNCTIONS. - // Creates a new msg of the given type. upb_msg *upb_msg_new(upb_msgdef *md); @@ -143,38 +85,9 @@ INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { memset(msg->data, 0, md->set_flags_bytes); } -// Used to obtain an empty message of the given type, attempting to reuse the -// memory pointed to by msg if it has no other referents. -void upb_msg_recycle(upb_msg **_msg, upb_msgdef *md); - -// For a repeated field, appends the given scalar value (ie. not a message or -// array) to the field's array; for non-repeated fields, overwrites the -// existing value with this one. -// REQUIRES: !upb_issubmsg(f) -void upb_msg_appendval(upb_msg *msg, upb_fielddef *f, upb_value val); - -upb_msg *upb_msg_append_emptymsg(upb_msg *msg, upb_fielddef *f); - -// Returns the current value of the given field if set, or the default value if -// not set. The returned value is not mutable! (In practice this only matters -// for submessages and arrays). -INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { - if (upb_msg_has(msg, f)) { - return upb_value_read(_upb_msg_getptr(msg, f), f->type); - } else { - return f->default_value; - } -} - -// If the given string, submessage, or array is already set, returns it. -// Otherwise sets it and returns an empty instance, attempting to reuse any -// previously allocated memory. -INLINE upb_value upb_msg_getmutable(upb_msg *msg, upb_fielddef *f); - -// Sets the current value of the field. If this is a string, array, or -// submessage field, releases a ref on the value (if any) that was previously -// set. -INLINE void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val); +// Registers a set of handlers that will populate this msgdef. +void upb_msg_register_handlers(upb_msg *msg, upb_msgdef *md, + upb_handlers *handlers); #ifdef __cplusplus } /* extern "C" */ -- cgit v1.2.3