From 527c0945000315ed8589436d2807bd5f8ad7a543 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 8 Jan 2010 18:42:24 -0800 Subject: Reduced the complexity of the cbparser interface. --- TODO | 16 ++++++++++++---- src/upb_data.c | 14 +++++--------- src/upb_parse.c | 24 +++++++++++++++--------- src/upb_parse.h | 27 +++++++++++++++------------ 4 files changed, 47 insertions(+), 34 deletions(-) diff --git a/TODO b/TODO index 28eb813..c8544b0 100644 --- a/TODO +++ b/TODO @@ -1,8 +1,15 @@ - functionality - finish serialization - - text format to string (not just FILE*) - default values - - enums with 0 + - extensions + - MessageSet + - create msgdef with subset of fields + + - lazy parsing + - strings by reference + + - enums with 0 (is this already fixed?) + - text format to string (not just FILE*) - python - _list() accessor - expose enums @@ -15,8 +22,6 @@ - low-level encoding/decoding - event parsing - python -- performance - - strings by reference - checkin suite - benchmark diff (done) - memory footprint diff @@ -25,3 +30,6 @@ - -Werror no compiler warnings - test suite - verify that all headers compile with C++. +- misc + - handle streaming where strings span buffer boundaries + - expose an unknown field callback diff --git a/src/upb_data.c b/src/upb_data.c index a0a588a..3dc39f5 100644 --- a/src/upb_data.c +++ b/src/upb_data.c @@ -324,10 +324,8 @@ static union upb_value_ptr get_value_ptr(upb_msg *msg, struct upb_fielddef *f) // Callbacks for the stream parser. // TODO: implement these in terms of public interfaces. -static bool value_cb(void *udata, struct upb_msgdef *msgdef, - struct upb_fielddef *f, union upb_value val) +static bool value_cb(void *udata, struct upb_fielddef *f, union upb_value val) { - (void)msgdef; struct upb_msgparser *mp = udata; upb_msg *msg = mp->top->msg; union upb_value_ptr p = get_value_ptr(msg, f); @@ -336,22 +334,20 @@ static bool value_cb(void *udata, struct upb_msgdef *msgdef, return true; } -static bool str_cb(void *udata, struct upb_msgdef *msgdef, - struct upb_fielddef *f, const uint8_t *str, size_t avail_len, - size_t total_len) +static bool str_cb(void *udata, struct upb_fielddef *f, upb_strptr str, + int32_t start, uint32_t end) { - (void)msgdef; struct upb_msgparser *mp = udata; upb_msg *msg = mp->top->msg; union upb_value_ptr p = get_value_ptr(msg, f); upb_msg_sethas(msg, f); - if(avail_len != total_len) abort(); /* TODO: support streaming. */ + if(end > upb_strlen(str)) abort(); /* TODO: support streaming. */ if(upb_string_isnull(*p.str) || !upb_data_only(*p.data)) { if(!upb_string_isnull(*p.str)) upb_string_unref(*p.str); *p.str = upb_string_new(); } - upb_strcpylen(*p.str, str, avail_len); + upb_strcpylen(*p.str, upb_string_getrobuf(str) + start, end - start); return true; } diff --git a/src/upb_parse.c b/src/upb_parse.c index c4790c6..101792a 100644 --- a/src/upb_parse.c +++ b/src/upb_parse.c @@ -432,17 +432,19 @@ size_t upb_cbparser_parse(struct upb_cbparser *p, upb_strptr str, bool keep_going = true; // Make local copies so optimizer knows they won't change. - upb_str_cb str_cb = p->str_cb; - upb_value_cb value_cb = p->value_cb; - void *udata = p->udata; + const upb_str_cb str_cb = p->str_cb; + const upb_value_cb value_cb = p->value_cb; + void *const udata = p->udata; // We need to check the status of operations that can fail, but we do so as // late as possible to avoid introducing branches that have to wait on - // (status->code) which must be loaded from memory. + // (status->code) which must be loaded from memory. We must always check + // before calling a user callback. #define CHECK_STATUS() do { if(!upb_ok(status)) goto err; } while(0) - // Main loop: parse a tag, find the appropriate fielddef. + // Main loop: executed once per tag/field pair. while(keep_going && buf < end) { + // Parse/handle tag. struct upb_tag tag; buf = parse_tag(buf, end, &tag, status); if(tag.wire_type == UPB_WIRE_TYPE_END_GROUP) { @@ -459,20 +461,23 @@ size_t upb_cbparser_parse(struct upb_cbparser *p, upb_strptr str, continue; } + // Look up field by tag number. struct upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number); + + // Parse/handle field. if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) { int32_t delim_len; buf = upb_get_INT32(buf, end, &delim_len, status); - CHECK_STATUS(); + CHECK_STATUS(); // Checking parse_tag() and upb_get_INT32(). const uint8_t *delim_end = buf + delim_len; if(f && f->type == UPB_TYPE(MESSAGE)) { submsg_end = push(p, start, delim_end - start, f, status); msgdef = p->top->msgdef; } else { if(f && upb_isstringtype(f->type)) { - size_t avail_len = UPB_MIN(delim_end, end) - buf; + int32_t str_start = buf - start; keep_going = - str_cb(udata, msgdef, f, buf, avail_len, delim_end - buf); + str_cb(udata, f, str, str_start, str_start + delim_len); } // else { TODO: packed arrays } // If field was not found, it is skipped silently. buf = delim_end; // Could be >end. @@ -487,7 +492,8 @@ size_t upb_cbparser_parse(struct upb_cbparser *p, upb_strptr str, union upb_value val; buf = upb_parse_value(buf, end, f->type, upb_value_addrof(&val), status); - keep_going = value_cb(udata, msgdef, f, val); + CHECK_STATUS(); // Checking upb_parse_value(). + keep_going = value_cb(udata, f, val); } } CHECK_STATUS(); diff --git a/src/upb_parse.h b/src/upb_parse.h index 056750f..9cc9974 100644 --- a/src/upb_parse.h +++ b/src/upb_parse.h @@ -32,16 +32,20 @@ extern "C" { // // Note that this callback can be called several times in a row for a single // call to tag_cb in the case of packed arrays. -typedef bool (*upb_value_cb)(void *udata, struct upb_msgdef *msgdef, - struct upb_fielddef *f, union upb_value val); +typedef bool (*upb_value_cb)(void *udata, struct upb_fielddef *f, + union upb_value val); // The string callback is called when a string that was defined in the -// upb_msgdef is parsed. avail_len is the number of bytes that are currently -// available at str. If the client is streaming and the current buffer ends in -// the middle of the string, this number could be less than total_len. -typedef bool (*upb_str_cb)(void *udata, struct upb_msgdef *msgdef, - struct upb_fielddef *f, const uint8_t *str, - size_t avail_len, size_t total_len); +// upb_msgdef is parsed. "str" is the protobuf data that is being parsed (NOT +// the string in question); "start" and "end" are the start and end offset of +// the string we parsed *within* str. The data is supplied this way to give +// you the opportunity to reference this data instead of copying it (perhaps +// using upb_strslice), or to minimize copying if it is unavoidable. +// +// Note that if you are parsing in a streaming fashion, start could be <0 and +// "end" could be >upb_strlen(str). +typedef bool (*upb_str_cb)(void *udata, struct upb_fielddef *f, upb_strptr str, + int32_t start, uint32_t end); // The start and end callbacks are called when a submessage begins and ends, // respectively. @@ -114,11 +118,10 @@ typedef void (*upb_pp_str_cb)(void *udata, int fieldnum, uint8_t *str, // new function will return NULL if any of the field names are invalid, or are // repeated fields. struct upb_pickparser *upb_pickparser_new(struct upb_msgdef *msgdef, - char *fields[], - upb_pp_value_cb value_cb, - upb_pp_str_cb str_cb); + char *fields[]); void upb_pickparser_free(struct upb_pickparser *p); -void upb_pickparser_reset(struct upb_pickparser *p, void *udata); +void upb_pickparser_reset(struct upb_pickparser *p, + bool found[], union upb_value vals[]); size_t upb_pickparser_parse(struct upb_pickparser *p, upb_strptr str, struct upb_status *status); -- cgit v1.2.3