summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2010-01-08 18:42:24 -0800
committerJoshua Haberman <joshua@reverberate.org>2010-01-08 18:42:24 -0800
commit527c0945000315ed8589436d2807bd5f8ad7a543 (patch)
tree1b1b214f99b294b8d47248a542e41694fbc8a671
parent5864888c6e7424af70d84802b55879f406d2d7b2 (diff)
Reduced the complexity of the cbparser interface.
-rw-r--r--TODO16
-rw-r--r--src/upb_data.c14
-rw-r--r--src/upb_parse.c24
-rw-r--r--src/upb_parse.h27
4 files changed, 47 insertions, 34 deletions
diff --git a/TODO b/TODO
index 28eb813..c8544b0 100644
--- a/TODO
+++ b/TODO
@@ -1,8 +1,15 @@
- functionality
- finish serialization
- - text format to string (not just FILE*)
- default values
- - enums with 0
+ - extensions
+ - MessageSet
+ - create msgdef with subset of fields
+
+ - lazy parsing
+ - strings by reference
+
+ - enums with 0 (is this already fixed?)
+ - text format to string (not just FILE*)
- python
- _list() accessor
- expose enums
@@ -15,8 +22,6 @@
- low-level encoding/decoding
- event parsing
- python
-- performance
- - strings by reference
- checkin suite
- benchmark diff (done)
- memory footprint diff
@@ -25,3 +30,6 @@
- -Werror no compiler warnings
- test suite
- verify that all headers compile with C++.
+- misc
+ - handle streaming where strings span buffer boundaries
+ - expose an unknown field callback
diff --git a/src/upb_data.c b/src/upb_data.c
index a0a588a..3dc39f5 100644
--- a/src/upb_data.c
+++ b/src/upb_data.c
@@ -324,10 +324,8 @@ static union upb_value_ptr get_value_ptr(upb_msg *msg, struct upb_fielddef *f)
// Callbacks for the stream parser.
// TODO: implement these in terms of public interfaces.
-static bool value_cb(void *udata, struct upb_msgdef *msgdef,
- struct upb_fielddef *f, union upb_value val)
+static bool value_cb(void *udata, struct upb_fielddef *f, union upb_value val)
{
- (void)msgdef;
struct upb_msgparser *mp = udata;
upb_msg *msg = mp->top->msg;
union upb_value_ptr p = get_value_ptr(msg, f);
@@ -336,22 +334,20 @@ static bool value_cb(void *udata, struct upb_msgdef *msgdef,
return true;
}
-static bool str_cb(void *udata, struct upb_msgdef *msgdef,
- struct upb_fielddef *f, const uint8_t *str, size_t avail_len,
- size_t total_len)
+static bool str_cb(void *udata, struct upb_fielddef *f, upb_strptr str,
+ int32_t start, uint32_t end)
{
- (void)msgdef;
struct upb_msgparser *mp = udata;
upb_msg *msg = mp->top->msg;
union upb_value_ptr p = get_value_ptr(msg, f);
upb_msg_sethas(msg, f);
- if(avail_len != total_len) abort(); /* TODO: support streaming. */
+ if(end > upb_strlen(str)) abort(); /* TODO: support streaming. */
if(upb_string_isnull(*p.str) || !upb_data_only(*p.data)) {
if(!upb_string_isnull(*p.str))
upb_string_unref(*p.str);
*p.str = upb_string_new();
}
- upb_strcpylen(*p.str, str, avail_len);
+ upb_strcpylen(*p.str, upb_string_getrobuf(str) + start, end - start);
return true;
}
diff --git a/src/upb_parse.c b/src/upb_parse.c
index c4790c6..101792a 100644
--- a/src/upb_parse.c
+++ b/src/upb_parse.c
@@ -432,17 +432,19 @@ size_t upb_cbparser_parse(struct upb_cbparser *p, upb_strptr str,
bool keep_going = true;
// Make local copies so optimizer knows they won't change.
- upb_str_cb str_cb = p->str_cb;
- upb_value_cb value_cb = p->value_cb;
- void *udata = p->udata;
+ const upb_str_cb str_cb = p->str_cb;
+ const upb_value_cb value_cb = p->value_cb;
+ void *const udata = p->udata;
// We need to check the status of operations that can fail, but we do so as
// late as possible to avoid introducing branches that have to wait on
- // (status->code) which must be loaded from memory.
+ // (status->code) which must be loaded from memory. We must always check
+ // before calling a user callback.
#define CHECK_STATUS() do { if(!upb_ok(status)) goto err; } while(0)
- // Main loop: parse a tag, find the appropriate fielddef.
+ // Main loop: executed once per tag/field pair.
while(keep_going && buf < end) {
+ // Parse/handle tag.
struct upb_tag tag;
buf = parse_tag(buf, end, &tag, status);
if(tag.wire_type == UPB_WIRE_TYPE_END_GROUP) {
@@ -459,20 +461,23 @@ size_t upb_cbparser_parse(struct upb_cbparser *p, upb_strptr str,
continue;
}
+ // Look up field by tag number.
struct upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number);
+
+ // Parse/handle field.
if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) {
int32_t delim_len;
buf = upb_get_INT32(buf, end, &delim_len, status);
- CHECK_STATUS();
+ CHECK_STATUS(); // Checking parse_tag() and upb_get_INT32().
const uint8_t *delim_end = buf + delim_len;
if(f && f->type == UPB_TYPE(MESSAGE)) {
submsg_end = push(p, start, delim_end - start, f, status);
msgdef = p->top->msgdef;
} else {
if(f && upb_isstringtype(f->type)) {
- size_t avail_len = UPB_MIN(delim_end, end) - buf;
+ int32_t str_start = buf - start;
keep_going =
- str_cb(udata, msgdef, f, buf, avail_len, delim_end - buf);
+ str_cb(udata, f, str, str_start, str_start + delim_len);
} // else { TODO: packed arrays }
// If field was not found, it is skipped silently.
buf = delim_end; // Could be >end.
@@ -487,7 +492,8 @@ size_t upb_cbparser_parse(struct upb_cbparser *p, upb_strptr str,
union upb_value val;
buf = upb_parse_value(buf, end, f->type, upb_value_addrof(&val),
status);
- keep_going = value_cb(udata, msgdef, f, val);
+ CHECK_STATUS(); // Checking upb_parse_value().
+ keep_going = value_cb(udata, f, val);
}
}
CHECK_STATUS();
diff --git a/src/upb_parse.h b/src/upb_parse.h
index 056750f..9cc9974 100644
--- a/src/upb_parse.h
+++ b/src/upb_parse.h
@@ -32,16 +32,20 @@ extern "C" {
//
// Note that this callback can be called several times in a row for a single
// call to tag_cb in the case of packed arrays.
-typedef bool (*upb_value_cb)(void *udata, struct upb_msgdef *msgdef,
- struct upb_fielddef *f, union upb_value val);
+typedef bool (*upb_value_cb)(void *udata, struct upb_fielddef *f,
+ union upb_value val);
// The string callback is called when a string that was defined in the
-// upb_msgdef is parsed. avail_len is the number of bytes that are currently
-// available at str. If the client is streaming and the current buffer ends in
-// the middle of the string, this number could be less than total_len.
-typedef bool (*upb_str_cb)(void *udata, struct upb_msgdef *msgdef,
- struct upb_fielddef *f, const uint8_t *str,
- size_t avail_len, size_t total_len);
+// upb_msgdef is parsed. "str" is the protobuf data that is being parsed (NOT
+// the string in question); "start" and "end" are the start and end offset of
+// the string we parsed *within* str. The data is supplied this way to give
+// you the opportunity to reference this data instead of copying it (perhaps
+// using upb_strslice), or to minimize copying if it is unavoidable.
+//
+// Note that if you are parsing in a streaming fashion, start could be <0 and
+// "end" could be >upb_strlen(str).
+typedef bool (*upb_str_cb)(void *udata, struct upb_fielddef *f, upb_strptr str,
+ int32_t start, uint32_t end);
// The start and end callbacks are called when a submessage begins and ends,
// respectively.
@@ -114,11 +118,10 @@ typedef void (*upb_pp_str_cb)(void *udata, int fieldnum, uint8_t *str,
// new function will return NULL if any of the field names are invalid, or are
// repeated fields.
struct upb_pickparser *upb_pickparser_new(struct upb_msgdef *msgdef,
- char *fields[],
- upb_pp_value_cb value_cb,
- upb_pp_str_cb str_cb);
+ char *fields[]);
void upb_pickparser_free(struct upb_pickparser *p);
-void upb_pickparser_reset(struct upb_pickparser *p, void *udata);
+void upb_pickparser_reset(struct upb_pickparser *p,
+ bool found[], union upb_value vals[]);
size_t upb_pickparser_parse(struct upb_pickparser *p, upb_strptr str,
struct upb_status *status);
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback