summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2009-11-14 21:59:31 -0800
committerJoshua Haberman <joshua@reverberate.org>2009-11-14 21:59:31 -0800
commit868f118797969cd0178d38207330e410267e6c46 (patch)
tree650c891eae81fc946e24e3a1cf0e9ee3e3c09c55 /src
parent7cde43ea0abf2022a0c800c7af1d5f1ec2033bea (diff)
Changed parse API to know about msgdefs.
This should make it both easier to use and easier to optimize, in exchange for a small amount of generality. In practice, any remotely normal case is still very natural.
Diffstat (limited to 'src')
-rw-r--r--src/upb.h11
-rw-r--r--src/upb_msg.c38
-rw-r--r--src/upb_parse.c189
-rw-r--r--src/upb_parse.h107
4 files changed, 165 insertions, 180 deletions
diff --git a/src/upb.h b/src/upb.h
index e8ec001..cc09ab1 100644
--- a/src/upb.h
+++ b/src/upb.h
@@ -140,13 +140,10 @@ union upb_value_ptr {
void *_void;
};
-// Unfortunately there is no way to define this so that it can be used as a
-// generic expression, a la:
-// foo(UPB_VALUE_ADDROF(bar));
-// ...you have to use it as the initializer of a upb_value_ptr:
-// union upb_value_ptr p = UPB_VALUE_ADDROF(bar);
-// foo(p);
-#define UPB_VALUE_ADDROF(val) {(void*)&val._double}
+INLINE union upb_value_ptr upb_value_addrof(union upb_value *val) {
+ union upb_value_ptr ptr = {&val->_double};
+ return ptr;
+}
/**
* Converts upb_value_ptr -> upb_value by reading from the pointer. We need to
diff --git a/src/upb_msg.c b/src/upb_msg.c
index 926eda0..3786a63 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -50,35 +50,24 @@ static union upb_value_ptr get_value_ptr(struct upb_msg *msg,
/* Callbacks for the stream parser. */
-static upb_field_type_t tag_cb(void *udata, struct upb_tag *tag,
- void **user_field_desc)
+static bool value_cb(void *udata, struct upb_msgdef *msgdef,
+ struct upb_fielddef *f, union upb_value val)
{
+ (void)msgdef;
struct upb_msgparser *mp = udata;
- struct upb_fielddef *f =
- upb_msg_fieldbynum(mp->top->msg->def, tag->field_number);
- if(!f || !upb_check_type(tag->wire_type, f->type))
- return 0; /* Skip unknown or fields of the wrong type. */
- *user_field_desc = f;
- return f->type;
-}
-
-static void *value_cb(void *udata, uint8_t *buf, uint8_t *end,
- void *user_field_desc, struct upb_status *status)
-{
- struct upb_msgparser *mp = udata;
- struct upb_fielddef *f = user_field_desc;
struct upb_msg *msg = mp->top->msg;
union upb_value_ptr p = get_value_ptr(msg, f);
upb_msg_set(msg, f);
- return upb_parse_value(buf, end, f->type, p, status);
+ upb_value_write(p, val, f->type);
+ return true;
}
-static void str_cb(void *udata, uint8_t *str,
- size_t avail_len, size_t total_len,
- void *udesc)
+static bool str_cb(void *udata, struct upb_msgdef *msgdef,
+ struct upb_fielddef *f, uint8_t *str, size_t avail_len,
+ size_t total_len)
{
+ (void)msgdef;
struct upb_msgparser *mp = udata;
- struct upb_fielddef *f = udesc;
struct upb_msg *msg = mp->top->msg;
union upb_value_ptr p = get_value_ptr(msg, f);
upb_msg_set(msg, f);
@@ -98,12 +87,12 @@ static void str_cb(void *udata, uint8_t *str,
memcpy((*p.str)->ptr, str, avail_len);
(*p.str)->byte_len = avail_len;
//}
+ return true;
}
-static void start_cb(void *udata, void *user_field_desc)
+static void start_cb(void *udata, struct upb_fielddef *f)
{
struct upb_msgparser *mp = udata;
- struct upb_fielddef *f = user_field_desc;
struct upb_msg *oldmsg = mp->top->msg;
union upb_value_ptr p = get_value_ptr(oldmsg, f);
@@ -131,15 +120,14 @@ static void end_cb(void *udata)
struct upb_msgparser *upb_msgparser_new(struct upb_msgdef *def)
{
- (void)def; // Not used atm.
struct upb_msgparser *mp = malloc(sizeof(struct upb_msgparser));
- mp->s = upb_cbparser_new();
+ mp->s = upb_cbparser_new(def, value_cb, str_cb, start_cb, end_cb);
return mp;
}
void upb_msgparser_reset(struct upb_msgparser *s, struct upb_msg *msg, bool byref)
{
- upb_cbparser_reset(s->s, s, tag_cb, value_cb, str_cb, start_cb, end_cb);
+ upb_cbparser_reset(s->s, s);
s->byref = byref;
s->top = s->stack;
s->top->msg = msg;
diff --git a/src/upb_parse.c b/src/upb_parse.c
index d1d535a..2e910f2 100644
--- a/src/upb_parse.c
+++ b/src/upb_parse.c
@@ -6,8 +6,10 @@
#include "upb_parse.h"
+#include <inttypes.h>
#include <stddef.h>
#include <stdlib.h>
+#include "upb_def.h"
/* Functions to read wire values. *********************************************/
@@ -297,21 +299,38 @@ uint8_t *upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft,
#undef CASE
}
+struct upb_cbparser_frame {
+ struct upb_msgdef *msgdef;
+ size_t end_offset; // For groups, 0.
+};
+
struct upb_cbparser {
- // Stack entries store the offset where the submsg ends (for groups, 0).
- size_t stack[UPB_MAX_NESTING], *top, *limit;
- size_t completed_offset;
- void *udata;
- upb_tag_cb tag_cb;
+ // Immutable state of the parser.
+ struct upb_msgdef *toplevel_msgdef;
upb_value_cb value_cb;
upb_str_cb str_cb;
upb_start_cb start_cb;
upb_end_cb end_cb;
+
+ // State pertaining to a particular parse (resettable).
+ // Stack entries store the offset where the submsg ends (for groups, 0).
+ struct upb_cbparser_frame stack[UPB_MAX_NESTING], *top, *limit;
+ size_t completed_offset;
+ void *udata;
};
-struct upb_cbparser *upb_cbparser_new(void)
+struct upb_cbparser *upb_cbparser_new(struct upb_msgdef *msgdef,
+ upb_value_cb valuecb, upb_str_cb strcb,
+ upb_start_cb startcb, upb_end_cb endcb)
{
- return malloc(sizeof(struct upb_cbparser));
+ struct upb_cbparser *p = malloc(sizeof(struct upb_cbparser));
+ p->toplevel_msgdef = msgdef;
+ p->value_cb = valuecb;
+ p->str_cb = strcb;
+ p->start_cb = startcb;
+ p->end_cb = endcb;
+ p->limit = &p->stack[UPB_MAX_NESTING];
+ return p;
}
void upb_cbparser_free(struct upb_cbparser *p)
@@ -319,145 +338,165 @@ void upb_cbparser_free(struct upb_cbparser *p)
free(p);
}
-void upb_cbparser_reset(struct upb_cbparser *p, void *udata,
- upb_tag_cb tagcb,
- upb_value_cb valuecb,
- upb_str_cb strcb,
- upb_start_cb startcb,
- upb_end_cb endcb)
+void upb_cbparser_reset(struct upb_cbparser *p, void *udata)
{
p->top = p->stack;
- p->limit = &p->stack[UPB_MAX_NESTING];
p->completed_offset = 0;
p->udata = udata;
- p->tag_cb = tagcb;
- p->value_cb = valuecb;
- p->str_cb = strcb;
- p->start_cb = startcb;
- p->end_cb = endcb;
-
+ p->top->msgdef = p->toplevel_msgdef;
// The top-level message is not delimited (we can keep receiving data for it
// indefinitely), so we treat it like a group.
- *p->top = 0;
+ p->top->end_offset = 0;
}
+static void *get_msgend(struct upb_cbparser *p, uint8_t *start)
+{
+ if(p->top->end_offset > 0)
+ return start + (p->top->end_offset - p->completed_offset);
+ else
+ return (void*)UINTPTR_MAX; // group.
+}
+
+static bool isgroup(void *submsg_end)
+{
+ return submsg_end == (void*)UINTPTR_MAX;
+}
+
+extern upb_wire_type_t upb_expected_wire_types[];
+// Returns true if wt is the correct on-the-wire type for ft.
+INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) {
+ // This doesn't currently support packed arrays.
+ return upb_type_info[ft].expected_wire_type == wt;
+}
+
+
/**
* Pushes a new stack frame for a submessage with the given len (which will
* be zero if the submessage is a group).
*/
-static uint8_t *push(struct upb_cbparser *s, uint8_t *start,
- uint32_t submsg_len, void *user_field_desc,
+static uint8_t *push(struct upb_cbparser *p, uint8_t *start,
+ uint32_t submsg_len, struct upb_fielddef *f,
struct upb_status *status)
{
- s->top++;
- if(s->top >= s->limit) {
+ p->top++;
+ if(p->top >= p->limit) {
upb_seterr(status, UPB_STATUS_ERROR,
"Nesting exceeded maximum (%d levels)\n",
UPB_MAX_NESTING);
return NULL;
}
- *s->top = s->completed_offset + submsg_len;
+ struct upb_cbparser_frame *frame = p->top;
+ frame->end_offset = p->completed_offset + submsg_len;
+ frame->msgdef = f->ref.msg;
- if(s->start_cb)
- s->start_cb(s->udata, user_field_desc);
-
- if(*s->top > 0)
- return start + (*s->top - s->completed_offset);
- else
- return (void*)UINTPTR_MAX;
+ if(p->start_cb) p->start_cb(p->udata, f);
+ return get_msgend(p, start);
}
/**
* Pops a stack frame, returning a pointer for where the next submsg should
* end (or a pointer that is out of range for a group).
*/
-static void *pop(struct upb_cbparser *s, uint8_t *start)
+static void *pop(struct upb_cbparser *p, uint8_t *start)
{
- if(s->end_cb)
- s->end_cb(s->udata);
-
- s->top--;
-
- if(*s->top > 0)
- return (char*)start + (*s->top - s->completed_offset);
- else
- return (void*)UINTPTR_MAX; // group.
+ if(p->end_cb) p->end_cb(p->udata);
+ p->top--;
+ return get_msgend(p, start);
}
-size_t upb_cbparser_parse(struct upb_cbparser *s, void *_buf, size_t len,
+size_t upb_cbparser_parse(struct upb_cbparser *p, void *_buf, size_t len,
struct upb_status *status)
{
+ // buf is our current offset, moves from start to end.
uint8_t *buf = _buf;
- uint8_t *completed = buf;
- uint8_t *const start = buf; // ptr equivalent of s->completed_offset
+ uint8_t *const start = buf; // ptr equivalent of p->completed_offset
uint8_t *end = buf + len;
- uint8_t *submsg_end = *s->top > 0 ? buf + *s->top : (uint8_t*)UINTPTR_MAX;
+
+ // When we have fully parsed a tag/value pair, we advance this.
+ uint8_t *completed = buf;
+
+ uint8_t *submsg_end = get_msgend(p, start);
+ struct upb_msgdef *msgdef = p->top->msgdef;
+ bool keep_going = true;
// Make local copies so optimizer knows they won't change.
- upb_tag_cb tag_cb = s->tag_cb;
- upb_str_cb str_cb = s->str_cb;
- upb_value_cb value_cb = s->value_cb;
- void *udata = s->udata;
+ upb_str_cb str_cb = p->str_cb;
+ upb_value_cb value_cb = p->value_cb;
+ void *udata = p->udata;
+ // We need to check the status of operations that can fail, but we do so as
+ // late as possible to avoid introducing branches that have to wait on
+ // (status->code) which must be loaded from memory.
#define CHECK_STATUS() do { if(!upb_ok(status)) goto err; } while(0)
- // Main loop: parse a tag, then handle the value.
- while(buf < end) {
+ // Main loop: parse a tag, find the appropriate fielddef.
+ while(keep_going && buf < end) {
struct upb_tag tag;
buf = parse_tag(buf, end, &tag, status);
if(tag.wire_type == UPB_WIRE_TYPE_END_GROUP) {
CHECK_STATUS();
- submsg_end = pop(s, start);
+ if(!isgroup(submsg_end)) {
+ upb_seterr(status, UPB_STATUS_ERROR, "End group seen but current "
+ "message is not a group, byte offset: %zd",
+ p->completed_offset + (completed - start));
+ goto err;
+ }
+ submsg_end = pop(p, start);
+ msgdef = p->top->msgdef;
completed = buf;
continue;
}
- void *udesc;
- upb_field_type_t ft = tag_cb(udata, &tag, &udesc);
+ struct upb_fielddef *f = upb_msg_fieldbynum(msgdef, tag.field_number);
if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) {
int32_t delim_len;
buf = upb_get_INT32(buf, end, &delim_len, status);
CHECK_STATUS();
uint8_t *delim_end = buf + delim_len;
- if(ft == UPB_TYPENUM(MESSAGE)) {
- submsg_end = push(s, start, delim_end - start, udesc, status);
+ if(f && f->type == UPB_TYPENUM(MESSAGE)) {
+ submsg_end = push(p, start, delim_end - start, f, status);
+ msgdef = p->top->msgdef;
} else {
- if(upb_isstringtype(ft)) {
+ if(f && upb_isstringtype(f->type)) {
size_t avail_len = UPB_MIN(delim_end, end) - buf;
- str_cb(udata, buf, avail_len, delim_end - buf, udesc);
+ keep_going =
+ str_cb(udata, msgdef, f, buf, avail_len, delim_end - buf);
} // else { TODO: packed arrays }
+ // If field was not found, it is skipped silently.
buf = delim_end; // Could be >end.
}
} else {
- // Scalar (non-delimited) value.
- switch(ft) {
- case 0: // Client elected to skip.
- buf = skip_wire_value(buf, end, tag.wire_type, status);
- break;
- case UPB_TYPENUM(GROUP):
- submsg_end = push(s, start, 0, udesc, status);
- break;
- default:
- buf = value_cb(udata, buf, end, udesc, status);
- break;
+ if(!f || !upb_check_type(tag.wire_type, f->type)) {
+ buf = skip_wire_value(buf, end, tag.wire_type, status);
+ } else if (f->type == UPB_TYPENUM(GROUP)) {
+ submsg_end = push(p, start, 0, f, status);
+ msgdef = p->top->msgdef;
+ } else {
+ union upb_value val;
+ buf = upb_parse_value(buf, end, f->type, upb_value_addrof(&val),
+ status);
+ keep_going = value_cb(udata, msgdef, f, val);
}
}
CHECK_STATUS();
while(buf >= submsg_end) {
if(buf > submsg_end) {
- return UPB_STATUS_ERROR; // Bad submessage end.
+ upb_seterr(status, UPB_STATUS_ERROR, "Expected submsg end offset "
+ "did not lie on a tag/value boundary.");
+ goto err;
}
- submsg_end = pop(s, start);
+ submsg_end = pop(p, start);
+ msgdef = p->top->msgdef;
}
- // while(buf < s->packed_end) { TODO: packed arrays }
+ // while(buf < p->packed_end) { TODO: packed arrays }
completed = buf;
}
size_t read;
err:
read = (char*)completed - (char*)start;
- s->completed_offset += read;
+ p->completed_offset += read;
return read;
}
diff --git a/src/upb_parse.h b/src/upb_parse.h
index 9e64a5b..6c26b83 100644
--- a/src/upb_parse.h
+++ b/src/upb_parse.h
@@ -6,6 +6,9 @@
* into in-memory messages (a more DOM-like model), see the routines in
* upb_msg.h, which are layered on top of this parser.
*
+ * TODO: the parser currently does not support returning unknown values. This
+ * can easily be added when it is needed.
+ *
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*/
@@ -23,105 +26,63 @@ extern "C" {
/* Event Callbacks. ***********************************************************/
-// The tag callback is called immediately after a tag has been parsed. The
-// client should determine whether it wants to parse or skip the corresponding
-// value. If it wants to parse it, it must discover and return the correct
-// .proto type (the tag only contains the wire type) and check that the wire
-// type is appropriate for the .proto type. Returning a type for which
-// upb_check_type(tag->wire_type, type) == false invokes undefined behavior.
-//
-// To skip the value (which means skipping all submessages, in the case of a
-// submessage), the callback should return zero.
-//
-// The client can store a void* in *user_field_desc; this will be passed to
-// the value callback or the string callback.
-typedef upb_field_type_t (*upb_tag_cb)(void *udata, struct upb_tag *tag,
- void **user_field_desc);
-
// The value callback is called when a regular value (ie. not a string or
-// submessage) is encountered which the client has opted to parse (by not
-// returning 0 from the tag_cb). The client must parse the value by calling
-// upb_parse_value(), returning success or failure accordingly.
+// submessage) is encountered which was defined in the upb_msgdef. The client
+// returns true to continue the parse or false to halt it.
//
// Note that this callback can be called several times in a row for a single
// call to tag_cb in the case of packed arrays.
-typedef void *(*upb_value_cb)(void *udata, uint8_t *buf, uint8_t *end,
- void *user_field_desc, struct upb_status *status);
+typedef bool (*upb_value_cb)(void *udata, struct upb_msgdef *msgdef,
+ struct upb_fielddef *f, union upb_value val);
-// The string callback is called when a string is parsed. avail_len is the
-// number of bytes that are currently available at str. If the client is
-// streaming and the current buffer ends in the middle of the string, this
-// number could be less than total_len.
-typedef void (*upb_str_cb)(void *udata, uint8_t *str, size_t avail_len,
- size_t total_len, void *user_field_desc);
+// The string callback is called when a string that was defined in the
+// upb_msgdef is parsed. avail_len is the number of bytes that are currently
+// available at str. If the client is streaming and the current buffer ends in
+// the middle of the string, this number could be less than total_len.
+typedef bool (*upb_str_cb)(void *udata, struct upb_msgdef *msgdef,
+ struct upb_fielddef *f, uint8_t *str,
+ size_t avail_len, size_t total_len);
// The start and end callbacks are called when a submessage begins and ends,
// respectively.
-typedef void (*upb_start_cb)(void *udata, void *user_field_desc);
+typedef void (*upb_start_cb)(void *udata, struct upb_fielddef *f);
typedef void (*upb_end_cb)(void *udata);
/* Callback parser interface. *************************************************/
-// Allocates and frees a upb_cbparser, respectively.
-struct upb_cbparser *upb_cbparser_new(void);
+// Allocates and frees a upb_cbparser, respectively. Callbacks may be NULL,
+// in which case they will be skipped.
+struct upb_cbparser *upb_cbparser_new(struct upb_msgdef *md,
+ upb_value_cb valuecb, upb_str_cb strcb,
+ upb_start_cb startcb, upb_end_cb endcb);
void upb_cbparser_free(struct upb_cbparser *p);
-// Resets the internal state of an already-allocated parser. Parsers must be
-// reset before they can be used. A parser can be reset multiple times. udata
-// will be passed as the first argument to callbacks.
-//
-// tagcb must be set, but all other callbacks can be NULL, in which case they
-// will just be skipped.
-void upb_cbparser_reset(struct upb_cbparser *p, void *udata,
- upb_tag_cb tagcb,
- upb_value_cb valuecb,
- upb_str_cb strcb,
- upb_start_cb startcb,
- upb_end_cb endcb);
-
+// Resets the internal state of an already-allocated parser. This puts it in a
+// state where it has not seen any data, and expects the next data to be from
+// the beginning of a new protobuf. Parsers must be reset before they can be
+// used. A parser can be reset multiple times. udata will be passed as the
+// first argument to callbacks.
+void upb_cbparser_reset(struct upb_cbparser *p, void *udata);
// Parses up to len bytes of protobuf data out of buf, calling the appropriate
// callbacks as values are parsed.
//
// The function returns a status indicating the success of the operation. Data
-// is parsed until no more data can be read from buf, or the callback returns an
-// error like UPB_STATUS_USER_CANCELLED, or an error occurs.
+// is parsed until no more data can be read from buf, or a user callback
+// returns false, or an error occurs.
//
-// *read is set to the number of bytes consumed. Note that this can be greater
-// than len in the case that a string was recognized that spans beyond the end
-// of the currently provided data.
+// The function returns the number of bytes consumed. Note that this can be
+// greater than len in the case that a string was recognized that spans beyond
+// the end of the currently provided data.
//
-// The next call to upb_parse must be the first byte after buf + *read, even in
-// the case that *read > len.
+// The next call to upb_parse must be the first byte after buf + retval, even in
+// the case that retval > len.
//
// TODO: see if we can provide the following guarantee efficiently:
-// *read will always be >= len. */
+// retval will always be >= len. */
size_t upb_cbparser_parse(struct upb_cbparser *p, void *buf, size_t len,
struct upb_status *status);
-extern upb_wire_type_t upb_expected_wire_types[];
-// Returns true if wt is the correct on-the-wire type for ft.
-INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) {
- // This doesn't currently support packed arrays.
- return upb_type_info[ft].expected_wire_type == wt;
-}
-
-/* Data-consuming functions (to be called from value cb). *********************/
-
-// Parses and converts a value from the character data starting at buf (but not
-// past end). Returns a pointer that is one past the data that was read. The
-// caller must have previously checked that the wire type is appropriate for
-// this field type.
-uint8_t *upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft,
- union upb_value_ptr v, struct upb_status *status);
-
-// Parses a wire value with the given type (which must have been obtained from
-// a tag that was just parsed) and returns a pointer to one past the data that
-// was read.
-uint8_t *upb_parse_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt,
- union upb_wire_value *wv,
- struct upb_status *status);
-
#ifdef __cplusplus
} /* extern "C" */
#endif
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback