summaryrefslogtreecommitdiff
path: root/src/upb_parse.c
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2009-08-27 11:10:13 -0700
committerJoshua Haberman <joshua@reverberate.org>2009-08-27 11:10:13 -0700
commitf17ed90f7704d77e3eb59a6f6b693ab4c598936a (patch)
tree48c0b69db3984577d77219eb861783b6bed16d24 /src/upb_parse.c
parentb276aa78b6d04a35420ce699ae51771550b97d11 (diff)
Some cleanup and reformatting, fixed the benchmarks.
Diffstat (limited to 'src/upb_parse.c')
-rw-r--r--src/upb_parse.c180
1 files changed, 100 insertions, 80 deletions
diff --git a/src/upb_parse.c b/src/upb_parse.c
index 7c1ad66..baaeb99 100644
--- a/src/upb_parse.c
+++ b/src/upb_parse.c
@@ -9,32 +9,10 @@
#include <stddef.h>
#include <stdlib.h>
-/* May want to move this to upb.c if enough other things warrant it. */
-#define alignof(t) offsetof(struct { char c; t x; }, x)
-#define TYPE_INFO(proto_type, wire_type, ctype) [proto_type] = {alignof(ctype), sizeof(ctype), wire_type, #ctype},
-struct upb_type_info upb_type_info[] = {
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE, UPB_WIRE_TYPE_64BIT, double)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT, UPB_WIRE_TYPE_32BIT, float)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64, UPB_WIRE_TYPE_VARINT, int64_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64, UPB_WIRE_TYPE_VARINT, uint64_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32, UPB_WIRE_TYPE_VARINT, int32_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64, UPB_WIRE_TYPE_64BIT, uint64_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32, UPB_WIRE_TYPE_32BIT, uint32_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL, UPB_WIRE_TYPE_VARINT, bool)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE, UPB_WIRE_TYPE_DELIMITED, void*)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP, UPB_WIRE_TYPE_START_GROUP, void*)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32, UPB_WIRE_TYPE_VARINT, uint32_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM, UPB_WIRE_TYPE_VARINT, uint32_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32, UPB_WIRE_TYPE_32BIT, int32_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64, UPB_WIRE_TYPE_64BIT, int64_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32, UPB_WIRE_TYPE_VARINT, int32_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64, UPB_WIRE_TYPE_VARINT, int64_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING, UPB_WIRE_TYPE_DELIMITED, struct upb_string*)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES, UPB_WIRE_TYPE_DELIMITED, struct upb_string*)
-};
-
-/* This is called by the inline version of the function if the varint turns out
- * to be >= 2 bytes. */
+/**
+ * Parses a 64-bit varint that is known to be >= 2 bytes (the inline version
+ * handles 1 and 2 byte varints).
+ */
upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val,
uint8_t **outbuf)
{
@@ -42,10 +20,15 @@ upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val,
uint8_t last = 0x80;
*val = 0;
int bitpos;
+
for(bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7)
*val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos;
- if(buf >= end && buf <= maxend && (last & 0x80)) return UPB_STATUS_NEED_MORE_DATA;
- if(buf > maxend) return UPB_ERROR_UNTERMINATED_VARINT;
+
+ if(buf >= end && buf <= maxend && (last & 0x80))
+ return UPB_STATUS_NEED_MORE_DATA;
+ if(buf > maxend)
+ return UPB_ERROR_UNTERMINATED_VARINT;
+
*outbuf = buf;
return UPB_STATUS_OK;
}
@@ -54,23 +37,37 @@ upb_status_t upb_parse_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt
union upb_wire_value *wv, uint8_t **outbuf)
{
switch(wt) {
- case UPB_WIRE_TYPE_VARINT: return upb_get_v_uint64_t(buf, end, &wv->varint, outbuf);
- case UPB_WIRE_TYPE_64BIT: return upb_get_f_uint64_t(buf, end, &wv->_64bit, outbuf);
- case UPB_WIRE_TYPE_32BIT: return upb_get_f_uint32_t(buf, end, &wv->_32bit, outbuf);
- default: return UPB_ERROR_ILLEGAL; /* Doesn't handle delimited, groups. */
+ case UPB_WIRE_TYPE_VARINT:
+ return upb_get_v_uint64_t(buf, end, &wv->varint, outbuf);
+ case UPB_WIRE_TYPE_64BIT:
+ return upb_get_f_uint64_t(buf, end, &wv->_64bit, outbuf);
+ case UPB_WIRE_TYPE_32BIT:
+ return upb_get_f_uint32_t(buf, end, &wv->_32bit, outbuf);
+ default:
+ return UPB_ERROR_ILLEGAL; // Doesn't handle delimited, groups.
}
}
+/**
+ * Advances buf past the current wire value (of type wt), saving the result in
+ * outbuf.
+ */
static upb_status_t skip_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt,
uint8_t **outbuf)
{
switch(wt) {
- case UPB_WIRE_TYPE_VARINT: return upb_skip_v_uint64_t(buf, end, outbuf);
- case UPB_WIRE_TYPE_64BIT: return upb_skip_f_uint64_t(buf, end, outbuf);
- case UPB_WIRE_TYPE_32BIT: return upb_skip_f_uint32_t(buf, end, outbuf);
- case UPB_WIRE_TYPE_START_GROUP: /* TODO: skip to matching end group. */
- case UPB_WIRE_TYPE_END_GROUP: return UPB_STATUS_OK;
- default: return UPB_ERROR_ILLEGAL;
+ case UPB_WIRE_TYPE_VARINT:
+ return upb_skip_v_uint64_t(buf, end, outbuf);
+ case UPB_WIRE_TYPE_64BIT:
+ return upb_skip_f_uint64_t(buf, end, outbuf);
+ case UPB_WIRE_TYPE_32BIT:
+ return upb_skip_f_uint32_t(buf, end, outbuf);
+ case UPB_WIRE_TYPE_START_GROUP:
+ // TODO: skip to matching end group.
+ case UPB_WIRE_TYPE_END_GROUP:
+ return UPB_STATUS_OK;
+ default:
+ return UPB_ERROR_ILLEGAL;
}
}
@@ -78,8 +75,8 @@ upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft,
union upb_value_ptr v, uint8_t **outbuf)
{
#define CASE(t, member_name) \
- case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
- return upb_get_ ## t(buf, end, v.member_name, outbuf);
+ case UPB_TYPENUM(t): return upb_get_ ## t(buf, end, v.member_name, outbuf);
+
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
@@ -97,6 +94,7 @@ upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft,
CASE(ENUM, int32)
default: return UPB_ERROR_ILLEGAL;
}
+
#undef CASE
}
@@ -104,55 +102,76 @@ void upb_stream_parser_reset(struct upb_stream_parser *state, void *udata)
{
state->top = state->stack;
state->limit = &state->stack[UPB_MAX_NESTING];
- /* The top-level message is not delimited (we can keep receiving data for
- * it indefinitely), so we treat it like a group. */
- *state->top = 0;
state->completed_offset = 0;
state->udata = udata;
-}
-static void *pop_stack_frame(struct upb_stream_parser *s, uint8_t *buf)
-{
- if(s->submsg_end_cb) s->submsg_end_cb(s->udata);
- s->top--;
- return (char*)buf + (*s->top > 0 ? (*s->top - s->completed_offset) : 0);
+ // The top-level message is not delimited (we can keep receiving data for it
+ // indefinitely), so we treat it like a group.
+ *state->top = 0;
}
-/* Returns the next end offset. */
-static upb_status_t push_stack_frame(struct upb_stream_parser *s,
- uint8_t *buf, uint32_t len,
- void *user_field_desc, uint8_t **submsg_end)
+/**
+ * Pushes a new stack frame for a submessage with the given len (which will
+ * be zero if the submessage is a group).
+ */
+static upb_status_t push(struct upb_stream_parser *s, uint8_t *start,
+ uint32_t submsg_len, void *user_field_desc,
+ uint8_t **submsg_end)
{
s->top++;
- if(s->top > s->limit) return UPB_ERROR_STACK_OVERFLOW;
- *s->top = s->completed_offset + len;
- if(s->submsg_start_cb) s->submsg_start_cb(s->udata, user_field_desc);
- *submsg_end = buf + (*s->top > 0 ? (*s->top - s->completed_offset) : 0);
+ if(s->top >= s->limit)
+ return UPB_ERROR_STACK_OVERFLOW;
+ *s->top = s->completed_offset + submsg_len;
+
+ if(s->submsg_start_cb)
+ s->submsg_start_cb(s->udata, user_field_desc);
+
+ *submsg_end = start + (*s->top > 0 ? (*s->top - s->completed_offset) : 0);
return UPB_STATUS_OK;
}
+/**
+ * Pops a stack frame, returning a pointer for where the next submsg should
+ * end (or a pointer that is out of range for a group).
+ */
+static void *pop(struct upb_stream_parser *s, uint8_t *start)
+{
+ if(s->submsg_end_cb)
+ s->submsg_end_cb(s->udata);
+
+ s->top--;
+
+ if(*s->top > 0)
+ return (char*)start + (*s->top - s->completed_offset);
+ else
+ return (char*)start; // group.
+}
+
+
upb_status_t upb_stream_parser_parse(struct upb_stream_parser *s,
void *_buf, size_t len, size_t *read)
{
uint8_t *buf = _buf;
uint8_t *completed = buf;
- uint8_t *const start = buf;
+ uint8_t *const start = buf; // ptr equivalent of s->completed_offset
uint8_t *end = buf + len;
uint8_t *submsg_end = buf + (*s->top > 0 ? *s->top : 0);
upb_status_t status = UPB_STATUS_OK;
- /* Make local copies so optimizer knows they won't change. */
+ // Make local copies so optimizer knows they won't change.
upb_tag_cb tag_cb = s->tag_cb;
upb_str_cb str_cb = s->str_cb;
upb_value_cb value_cb = s->value_cb;
void *udata = s->udata;
- /* Main loop: parse a tag, then handle the value. */
+#define CHECK(exp) do { if((status = exp) != UPB_STATUS_OK) goto err; } while(0)
+
+ // Main loop: parse a tag, then handle the value.
while(buf < end) {
struct upb_tag tag;
- UPB_CHECK(parse_tag(buf, end, &tag, &buf));
+ CHECK(parse_tag(buf, end, &tag, &buf));
if(tag.wire_type == UPB_WIRE_TYPE_END_GROUP) {
- submsg_end = pop_stack_frame(s, start);
+ submsg_end = pop(s, start);
completed = buf;
continue;
}
@@ -161,38 +180,39 @@ upb_status_t upb_stream_parser_parse(struct upb_stream_parser *s,
upb_field_type_t ft = tag_cb(udata, &tag, &udesc);
if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) {
int32_t delim_len;
- UPB_CHECK(upb_get_INT32(buf, end, &delim_len, &buf));
+ CHECK(upb_get_INT32(buf, end, &delim_len, &buf));
uint8_t *delim_end = buf + delim_len;
- if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
- UPB_CHECK(push_stack_frame(
- s, start, delim_end - start, udesc, &submsg_end));
+ if(ft == UPB_TYPENUM(MESSAGE)) {
+ CHECK(push(s, start, delim_end - start, udesc, &submsg_end));
} else {
- if(upb_isstringtype(ft))
- str_cb(udata, buf, UPB_MIN(delim_end, end) - buf, delim_end - buf, udesc);
- //else
- // /* Set a marker for packed arrays. */
- buf = delim_end; /* Note that this could be greater than end. */
+ if(upb_isstringtype(ft)) {
+ size_t avail_len = UPB_MIN(delim_end, end) - buf;
+ str_cb(udata, buf, avail_len, delim_end - buf, udesc);
+ } // else { TODO: packed arrays }
+ buf = delim_end; // Could be >end.
}
- } else { /* Scalar (non-delimited) value. */
+ } else {
+ // Scalar (non-delimited) value.
switch(ft) {
- case 0: /* Client elected to skip. */
- UPB_CHECK(skip_wire_value(buf, end, tag.wire_type, &buf));
+ case 0: // Client elected to skip.
+ CHECK(skip_wire_value(buf, end, tag.wire_type, &buf));
break;
- case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP:
- UPB_CHECK(push_stack_frame(s, start, 0, udesc, &submsg_end));
+ case UPB_TYPENUM(GROUP):
+ CHECK(push(s, start, 0, udesc, &submsg_end));
break;
default:
- UPB_CHECK(value_cb(udata, buf, end, udesc, &buf));
+ CHECK(value_cb(udata, buf, end, udesc, &buf));
break;
}
}
- while(buf == submsg_end) submsg_end = pop_stack_frame(s, start);
- //while(buf < s->packed_end) /* packed arrays. */
- // UPB_CHECK(value_cb(udata, buf, end, udesc, &buf));
+ while(buf == submsg_end)
+ submsg_end = pop(s, start);
+ // while(buf < s->packed_end) { TODO: packed arrays }
completed = buf;
}
+err:
*read = (char*)completed - (char*)start;
s->completed_offset += *read;
return status;
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback