From f6ee9454093bb83c60ba62bfbcc104bb504b1b3d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 27 Aug 2009 22:05:49 -0700 Subject: Renamed upb_stream_parser -> upb_cbparser, and API cleanup. --- src/upb_msg.c | 30 ++++---- src/upb_parse.c | 60 +++++++++++---- src/upb_parse.h | 222 +++++++++++++++++++++++++++----------------------------- 3 files changed, 167 insertions(+), 145 deletions(-) (limited to 'src') diff --git a/src/upb_msg.c b/src/upb_msg.c index f977527..7bacc89 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -130,14 +130,14 @@ struct upb_msg_parser_frame { }; struct upb_msg_parser { - struct upb_stream_parser s; + struct upb_cbparser *s; bool merge; bool byref; struct upb_msg_parser_frame stack[UPB_MAX_NESTING], *top; }; -void upb_msg_parser_reset(struct upb_msg_parser *p, - struct upb_msg *msg, bool byref); +void upb_msgparser_init(struct upb_msg_parser *p, + struct upb_msg *msg, bool byref); /* Parses protocol buffer data out of data which has length of len. The data * need not be a complete protocol buffer. The number of bytes parsed is @@ -223,7 +223,7 @@ static void str_cb(void *udata, uint8_t *str, //} } -static void submsg_start_cb(void *udata, void *user_field_desc) +static void start_cb(void *udata, void *user_field_desc) { struct upb_msg_parser *mp = udata; struct upb_msg_fielddef *f = user_field_desc; @@ -244,7 +244,7 @@ static void submsg_start_cb(void *udata, void *user_field_desc) mp->top->msg = *p.msg; } -static void submsg_end_cb(void *udata) +static void end_cb(void *udata) { struct upb_msg_parser *mp = udata; struct upb_msg *msg = mp->top->msg; @@ -258,30 +258,32 @@ static void submsg_end_cb(void *udata) upb_status_t upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len) { struct upb_msg_parser mp; - upb_msg_parser_reset(&mp, msg, false); + upb_msgparser_init(&mp, msg, false); size_t read; upb_msg_clear(msg); upb_status_t ret = upb_msg_parser_parse(&mp, buf, len, &read); + upb_msgparser_free(&mp); return ret; } -void upb_msg_parser_reset(struct upb_msg_parser *s, struct upb_msg *msg, bool byref) +void upb_msgparser_init(struct upb_msg_parser *s, struct upb_msg *msg, bool byref) { - upb_stream_parser_reset(&s->s, s); + s->s = upb_cbparser_new(); + upb_cbparser_reset(s->s, s, tag_cb, value_cb, str_cb, start_cb, end_cb); s->byref = byref; s->top = s->stack; s->top->msg = msg; - s->s.tag_cb = tag_cb; - s->s.value_cb = value_cb; - s->s.str_cb = str_cb; - s->s.submsg_start_cb = submsg_start_cb; - s->s.submsg_end_cb = submsg_end_cb; +} + +void upb_msgparser_free(struct upb_msg_parser *s) +{ + upb_cbparser_free(s->s); } upb_status_t upb_msg_parser_parse(struct upb_msg_parser *s, void *data, size_t len, size_t *read) { - return upb_stream_parser_parse(&s->s, data, len, read); + return upb_cbparser_parse(s->s, data, len, read); } /* Serialization. ************************************************************/ diff --git a/src/upb_parse.c b/src/upb_parse.c index baaeb99..bd511e4 100644 --- a/src/upb_parse.c +++ b/src/upb_parse.c @@ -98,23 +98,55 @@ upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft, #undef CASE } -void upb_stream_parser_reset(struct upb_stream_parser *state, void *udata) +struct upb_cbparser { + // Stack entries store the offset where the submsg ends (for groups, 0). + size_t stack[UPB_MAX_NESTING], *top, *limit; + size_t completed_offset; + void *udata; + upb_tag_cb tag_cb; + upb_value_cb value_cb; + upb_str_cb str_cb; + upb_start_cb start_cb; + upb_end_cb end_cb; +}; + +struct upb_cbparser *upb_cbparser_new(void) { - state->top = state->stack; - state->limit = &state->stack[UPB_MAX_NESTING]; - state->completed_offset = 0; - state->udata = udata; + return malloc(sizeof(struct upb_cbparser)); +} + +void upb_cbparser_free(struct upb_cbparser *p) +{ + free(p); +} + +void upb_cbparser_reset(struct upb_cbparser *p, void *udata, + upb_tag_cb tagcb, + upb_value_cb valuecb, + upb_str_cb strcb, + upb_start_cb startcb, + upb_end_cb endcb) +{ + p->top = p->stack; + p->limit = &p->stack[UPB_MAX_NESTING]; + p->completed_offset = 0; + p->udata = udata; + p->tag_cb = tagcb; + p->value_cb = valuecb; + p->str_cb = strcb; + p->start_cb = startcb; + p->end_cb = endcb; // The top-level message is not delimited (we can keep receiving data for it // indefinitely), so we treat it like a group. - *state->top = 0; + *p->top = 0; } /** * Pushes a new stack frame for a submessage with the given len (which will * be zero if the submessage is a group). */ -static upb_status_t push(struct upb_stream_parser *s, uint8_t *start, +static upb_status_t push(struct upb_cbparser *s, uint8_t *start, uint32_t submsg_len, void *user_field_desc, uint8_t **submsg_end) { @@ -123,8 +155,8 @@ static upb_status_t push(struct upb_stream_parser *s, uint8_t *start, return UPB_ERROR_STACK_OVERFLOW; *s->top = s->completed_offset + submsg_len; - if(s->submsg_start_cb) - s->submsg_start_cb(s->udata, user_field_desc); + if(s->start_cb) + s->start_cb(s->udata, user_field_desc); *submsg_end = start + (*s->top > 0 ? (*s->top - s->completed_offset) : 0); return UPB_STATUS_OK; @@ -134,10 +166,10 @@ static upb_status_t push(struct upb_stream_parser *s, uint8_t *start, * Pops a stack frame, returning a pointer for where the next submsg should * end (or a pointer that is out of range for a group). */ -static void *pop(struct upb_stream_parser *s, uint8_t *start) +static void *pop(struct upb_cbparser *s, uint8_t *start) { - if(s->submsg_end_cb) - s->submsg_end_cb(s->udata); + if(s->end_cb) + s->end_cb(s->udata); s->top--; @@ -148,8 +180,8 @@ static void *pop(struct upb_stream_parser *s, uint8_t *start) } -upb_status_t upb_stream_parser_parse(struct upb_stream_parser *s, - void *_buf, size_t len, size_t *read) +upb_status_t upb_cbparser_parse(struct upb_cbparser *s, void *_buf, size_t len, + size_t *read) { uint8_t *buf = _buf; uint8_t *completed = buf; diff --git a/src/upb_parse.h b/src/upb_parse.h index a8f4294..df26089 100644 --- a/src/upb_parse.h +++ b/src/upb_parse.h @@ -21,125 +21,114 @@ extern "C" { #endif -/* High-level parsing interface. **********************************************/ - -/* The general scheme is that the client registers callbacks that will be - * called at the appropriate times. These callbacks provide the client with - * data and let the client make decisions (like whether to parse or to skip - * a value). - * - * After initializing the parse state, the client can repeatedly call upb_parse - * as data becomes available. The parser is fully streaming-capable, so the - * data need not all be available at the same time. */ - -struct upb_stream_parser; - -/* Resets the internal state of an already-allocated parser. udata will be - * passed to callbacks as appropriate. */ -void upb_stream_parser_reset(struct upb_stream_parser *p, void *udata); - -/* The callback that is called immediately after a tag has been parsed. The - * client should determine whether it wants to parse or skip the corresponding - * value. If it wants to parse it, it must discover and return the correct - * .proto type (the tag only contains the wire type) and check that the wire - * type is appropriate for the .proto type. To skip the value (which means - * skipping all submessages, in the case of a submessage), the callback should - * return zero. */ -typedef upb_field_type_t (*upb_tag_cb)(void *udata, - struct upb_tag *tag, +/* Event Callbacks. ***********************************************************/ + +// The tag callback is called immediately after a tag has been parsed. The +// client should determine whether it wants to parse or skip the corresponding +// value. If it wants to parse it, it must discover and return the correct +// .proto type (the tag only contains the wire type) and check that the wire +// type is appropriate for the .proto type. To skip the value (which means +// skipping all submessages, in the case of a submessage), the callback should +// return zero. +// +// The client can store a void* in *user_field_desc; this will be passed to +// the value callback or the string callback. +typedef upb_field_type_t (*upb_tag_cb)(void *udata, struct upb_tag *tag, void **user_field_desc); -/* The callback that is called when a regular value (ie. not a string or - * submessage) is encountered which the client has opted to parse (by not - * returning 0 from the tag_cb). The client must parse the value and update - * buf accordingly, returning success or failure. - * - * Note that this callback can be called several times in a row for a single - * call to tag_cb in the case of packed arrays. */ +// The value callback is called when a regular value (ie. not a string or +// submessage) is encountered which the client has opted to parse (by not +// returning 0 from the tag_cb). The client must parse the value by calling +// upb_parse_value(), returning success or failure accordingly. +// +// Note that this callback can be called several times in a row for a single +// call to tag_cb in the case of packed arrays. typedef upb_status_t (*upb_value_cb)(void *udata, uint8_t *buf, uint8_t *end, void *user_field_desc, uint8_t **outbuf); -/* The callback that is called when a string is parsed. Note that the data - * for the string might not all be available -- we could be streaming, and - * the current buffer might end right in the middle of the string. So we - * pass both the available length and the total length. */ -typedef void (*upb_str_cb)(void *udata, uint8_t *str, - size_t avail_len, size_t total_len, - void *user_field_desc); - -/* Callbacks that are called when a submessage begins and ends, respectively. - * Both are called with the submessage's stack frame at the top of the stack. */ -typedef void (*upb_submsg_start_cb)(void *udata, - void *user_field_desc); -typedef void (*upb_submsg_end_cb)(void *udata); - -struct upb_stream_parser { - // Stack entries store the offset where the submsg ends (for groups, 0). - size_t stack[UPB_MAX_NESTING], *top, *limit; - size_t completed_offset; - void *udata; - upb_tag_cb tag_cb; - upb_value_cb value_cb; - upb_str_cb str_cb; - upb_submsg_start_cb submsg_start_cb; - upb_submsg_end_cb submsg_end_cb; -}; - -/* Parses up to len bytes of protobuf data out of buf, calling the appropriate - * callbacks as values are parsed. - * - * The function returns a status indicating the success of the operation. Data - * is parsed until no more data can be read from buf, or the callback returns an - * error like UPB_STATUS_USER_CANCELLED, or an error occurs. - * - * *read is set to the number of bytes consumed. Note that this can be greater - * than len in the case that a string was recognized that spans beyond the end - * of the currently provided data. - * - * The next call to upb_parse must be the first byte after buf + *read, even in - * the case that *read > len. - * - * TODO: see if we can provide the following guarantee efficiently: - * *read will always be >= len. */ -upb_status_t upb_stream_parser_parse(struct upb_stream_parser *p, - void *buf, size_t len, size_t *read); +// The string callback is called when a string is parsed. avail_len is the +// number of bytes that are currently available at str. If the client is +// streaming and the current buffer ends in the middle of the string, this +// number could be less than total_len. +typedef void (*upb_str_cb)(void *udata, uint8_t *str, size_t avail_len, + size_t total_len, void *user_field_desc); + +// The start and end callbacks are called when a submessage begins and ends, +// respectively. +typedef void (*upb_start_cb)(void *udata, void *user_field_desc); +typedef void (*upb_end_cb)(void *udata); + +/* Callback parser interface. *************************************************/ + +// Allocates and frees a upb_cbparser, respectively. +struct upb_cbparser *upb_cbparser_new(void); +void upb_cbparser_free(struct upb_cbparser *p); + +// Resets the internal state of an already-allocated parser. Parsers must be +// reset before they can be used. A parser can be reset multiple times. udata +// will be passed as the first argument to callbacks. +void upb_cbparser_reset(struct upb_cbparser *p, void *udata, + upb_tag_cb tagcb, + upb_value_cb valuecb, + upb_str_cb strcb, + upb_start_cb startcb, + upb_end_cb endcb); + + +// Parses up to len bytes of protobuf data out of buf, calling the appropriate +// callbacks as values are parsed. +// +// The function returns a status indicating the success of the operation. Data +// is parsed until no more data can be read from buf, or the callback returns an +// error like UPB_STATUS_USER_CANCELLED, or an error occurs. +// +// *read is set to the number of bytes consumed. Note that this can be greater +// than len in the case that a string was recognized that spans beyond the end +// of the currently provided data. +// +// The next call to upb_parse must be the first byte after buf + *read, even in +// the case that *read > len. +// +// TODO: see if we can provide the following guarantee efficiently: +// *read will always be >= len. */ +upb_status_t upb_cbparser_parse(struct upb_cbparser *p, void *buf, size_t len, + size_t *read); extern upb_wire_type_t upb_expected_wire_types[]; -/* Returns true if wt is the correct on-the-wire type for ft. */ +// Returns true if wt is the correct on-the-wire type for ft. INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { - /* This doesn't currently support packed arrays. */ + // This doesn't currently support packed arrays. return upb_type_info[ft].expected_wire_type == wt; } /* Data-consuming functions (to be called from value cb). *********************/ -/* Parses and converts a value from the character data starting at buf. The - * caller must have previously checked that the wire type is appropriate for - * this field type. */ +// Parses and converts a value from the character data starting at buf (but not +// past end). *outbuf will be set to one past the data that was read. The +// caller must have previously checked that the wire type is appropriate for +// this field type. upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft, union upb_value_ptr v, uint8_t **outbuf); -/* Parses a wire value with the given type (which must have been obtained from - * a tag that was just parsed) and adds the number of bytes that were consumed - * to *offset. */ +// Parses a wire value with the given type (which must have been obtained from +// a tag that was just parsed) and sets *outbuf to one past the data that was +// read. upb_status_t upb_parse_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt, union upb_wire_value *wv, uint8_t **outbuf); /* Functions to read wire values. *********************************************/ -/* In general, these should never be called directly from any code outside upb. - * They are included here only because we expect them to get inlined inside the - * value-reading functions below. */ +// Most clients will not want to use these directly. upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val, uint8_t **outbuf); -/* Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). */ +// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). INLINE upb_status_t upb_get_v_uint64_t(uint8_t *buf, uint8_t *end, uint64_t *val, uint8_t **outbuf) { - /* We inline this common case (1-byte varints), if that fails we dispatch to - * the full (non-inlined) version. */ + // We inline this common case (1-byte varints), if that fails we dispatch to + // the full (non-inlined) version. if((*buf & 0x80) == 0) { *val = *buf & 0x7f; *outbuf = buf + 1; @@ -149,17 +138,17 @@ INLINE upb_status_t upb_get_v_uint64_t(uint8_t *buf, uint8_t *end, uint64_t *val } } -/* Gets a varint -- called when we only need 32 bits of it. */ +// Gets a varint -- called when we only need 32 bits of it. INLINE upb_status_t upb_get_v_uint32_t(uint8_t *buf, uint8_t *end, uint32_t *val, uint8_t **outbuf) { uint64_t val64; UPB_CHECK(upb_get_v_uint64_t(buf, end, &val64, outbuf)); - *val = (uint32_t)val64; /* Discard the high bits. */ + *val = (uint32_t)val64; // Discard the high bits. return UPB_STATUS_OK; } -/* Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). */ +// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). INLINE upb_status_t upb_get_f_uint32_t(uint8_t *buf, uint8_t *end, uint32_t *val, uint8_t **outbuf) { @@ -176,7 +165,7 @@ INLINE upb_status_t upb_get_f_uint32_t(uint8_t *buf, uint8_t *end, return UPB_STATUS_OK; } -/* Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). */ +// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). INLINE upb_status_t upb_get_f_uint64_t(uint8_t *buf, uint8_t *end, uint64_t *val, uint8_t **outbuf) { @@ -207,8 +196,8 @@ INLINE upb_status_t upb_skip_v_uint64_t(uint8_t *buf, uint8_t *end, return UPB_STATUS_OK; } -INLINE upb_status_t upb_skip_f_uint32_t(uint8_t *buf, uint8_t *end, uint8_t - **outbuf) +INLINE upb_status_t upb_skip_f_uint32_t(uint8_t *buf, uint8_t *end, + uint8_t **outbuf) { uint8_t *uint32_end = buf + sizeof(uint32_t); if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA; @@ -216,8 +205,8 @@ INLINE upb_status_t upb_skip_f_uint32_t(uint8_t *buf, uint8_t *end, uint8_t return UPB_STATUS_OK; } -INLINE upb_status_t upb_skip_f_uint64_t(uint8_t *buf, uint8_t *end, uint8_t - **outbuf) +INLINE upb_status_t upb_skip_f_uint64_t(uint8_t *buf, uint8_t *end, + uint8_t **outbuf) { uint8_t *uint64_end = buf + sizeof(uint64_t); if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA; @@ -228,27 +217,26 @@ INLINE upb_status_t upb_skip_f_uint64_t(uint8_t *buf, uint8_t *end, uint8_t /* Functions to read .proto values. *******************************************/ -/* These functions read the appropriate wire value for a given .proto type - * and then convert it based on the .proto type. These are the most efficient - * functions to call if you want to decode a value for a known type. */ -/* Performs zig-zag decoding, which is used by sint32 and sint64. */ +// Performs zig-zag decoding, which is used by sint32 and sint64. INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } -/* Use macros to define a set of two functions for each .proto type: - * - * // Reads and converts a .proto value from buf, placing it in d. - * // "end" indicates the end of the current buffer (if the buffer does - * // not contain the entire value UPB_STATUS_NEED_MORE_DATA is returned). - * // On success, *outbuf will point to the first byte that was not consumed. - * upb_status_t upb_get_INT32(uint8_t *buf, uint8_t *end, int32_t *d, - * uint8_t **outbuf); - * - * // Given an already read wire value s (source), convert it to a .proto - * // value and return it. - * int32_t upb_wvtov_INT32(uint32_t s); - */ +// Use macros to define a set of two functions for each .proto type: +// +// // Reads and converts a .proto value from buf, placing it in d. +// // "end" indicates the end of the current buffer (if the buffer does +// // not contain the entire value UPB_STATUS_NEED_MORE_DATA is returned). +// // On success, *outbuf will point to the first byte that was not consumed. +// upb_status_t upb_get_INT32(uint8_t *buf, uint8_t *end, int32_t *d, +// uint8_t **outbuf); +// +// // Given an already read wire value s (source), convert it to a .proto +// // value and return it. +// int32_t upb_wvtov_INT32(uint32_t s); +// +// These are the most efficient functions to call if you want to decode a value +// for a known type. #define WVTOV(type, wire_t, val_t) \ INLINE val_t upb_wvtov_ ## type(wire_t s) @@ -294,7 +282,7 @@ T(FLOAT, f, uint32_t, float, _float) { #undef GET #undef T -/* Parses a tag, places the result in *tag. */ +// Parses a tag, places the result in *tag. INLINE upb_status_t parse_tag(uint8_t *buf, uint8_t *end, struct upb_tag *tag, uint8_t **outbuf) { -- cgit v1.2.3