summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2009-07-29 20:37:32 -0700
committerJoshua Haberman <joshua@reverberate.org>2009-07-29 20:37:32 -0700
commit4240e0e5989fadd367c29651e0a0b01d499927b0 (patch)
treebbcd5db02000e662e040725600c9674e37e4d667 /src
parent5fa6912da86d8f264a23c9545c1a877a31603a49 (diff)
Updated parser semantics to better support streaming.
Diffstat (limited to 'src')
-rw-r--r--src/upb_msg.c15
-rw-r--r--src/upb_parse.c100
-rw-r--r--src/upb_parse.h37
-rw-r--r--src/upb_string.h6
4 files changed, 68 insertions, 90 deletions
diff --git a/src/upb_msg.c b/src/upb_msg.c
index 6d6d934..bdebe0d 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -291,18 +291,23 @@ static upb_status_t value_cb(void *udata, uint8_t *buf, uint8_t *end,
return UPB_STATUS_OK;
}
-static void str_cb(void *udata, struct upb_string *str, void *user_field_desc)
+static void str_cb(void *udata, uint8_t *str,
+ size_t avail_len, size_t total_len,
+ void *udesc)
{
struct upb_msg_parse_state *s = udata;
- struct upb_msg_field *f = user_field_desc;
+ struct upb_msg_field *f = udesc;
union upb_value_ptr p = get_value_ptr(s->top->data, f);
upb_msg_set(s->top->data, f);
+ if(avail_len != total_len) abort(); /* TODO: support streaming. */
if(s->byref) {
upb_msg_reuse_strref(p.str);
- **p.str = *str;
+ (*p.str)->ptr = (char*)str;
+ (*p.str)->byte_len = avail_len;
} else {
- upb_msg_reuse_str(p.str, str->byte_len);
- upb_strcpy(*p.str, str);
+ upb_msg_reuse_str(p.str, avail_len);
+ memcpy((*p.str)->ptr, str, avail_len);
+ (*p.str)->byte_len = avail_len;
}
//google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, s->top->m);
//upb_text_printfield(&s->p, *fd->name, f->type, upb_deref(p, fd->type), stdout);
diff --git a/src/upb_parse.c b/src/upb_parse.c
index 4e1f4a5..a8fa3a6 100644
--- a/src/upb_parse.c
+++ b/src/upb_parse.c
@@ -39,36 +39,12 @@ upb_status_t upb_get_v_uint64_t_full(uint8_t *restrict buf, uint8_t *end,
uint64_t *restrict val,
uint8_t **outbuf)
{
- if(buf + 10 <= end) {
- /* >2-byte varint, fast path. */
- uint64_t cont = *(uint64_t*)(buf+2) | 0x7f7f7f7f7f7f7f7fULL;
- int num_bytes = __builtin_ffsll(~cont) / 8;
- uint32_t part0 = 0, part1 = 0, part2 = 0;
-
- switch(num_bytes) {
- default: return UPB_ERROR_UNTERMINATED_VARINT;
- case 8: part2 |= (buf[9] & 0x7F) << 7;
- case 7: part2 |= (buf[8] & 0x7F);
- case 6: part1 |= (buf[7] & 0x7F) << 21;
- case 5: part1 |= (buf[6] & 0x7F) << 14;
- case 4: part1 |= (buf[5] & 0x7F) << 7;
- case 3: part1 |= (buf[4] & 0x7F);
- case 2: part0 |= (buf[3] & 0x7F) << 21;
- case 1: part0 |= (buf[2] & 0x7F) << 14;
- part0 |= (buf[1] & 0x7F) << 7;
- part0 |= (buf[0] & 0x7F);
- }
- *val = (uint64_t)part0 | ((uint64_t)part1 << 28) | ((uint64_t)part2 << 56);
- *outbuf = buf + num_bytes + 2;
- } else {
- /* >2-byte varint, slow path. */
- uint8_t last = 0x80;
- *val = 0;
- for(int bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7)
- *val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos;
- if(last & 0x80) return UPB_STATUS_NEED_MORE_DATA;
- *outbuf = buf;
- }
+ uint8_t last = 0x80;
+ *val = 0;
+ for(int bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7)
+ *val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos;
+ if(last & 0x80) return UPB_STATUS_NEED_MORE_DATA;
+ *outbuf = buf;
return UPB_STATUS_OK;
}
@@ -201,15 +177,17 @@ upb_status_t upb_parse(struct upb_parse_state *s, void *_buf, size_t len,
uint8_t *buf = _buf;
uint8_t *completed = buf;
uint8_t *const start = buf;
-
+ uint8_t *end = buf + len;
+ uint8_t *submsg_end = buf + (*s->top > 0 ? *s->top : 0);
upb_status_t status = UPB_STATUS_OK;
+
+ /* Make local copies so optimizer knows they won't change. */
upb_tag_cb tag_cb = s->tag_cb;
upb_str_cb str_cb = s->str_cb;
upb_value_cb value_cb = s->value_cb;
void *udata = s->udata;
- uint8_t *end = buf + len;
- uint8_t *submsg_end = buf + (*s->top > 0 ? *s->top : 0);
+ /* Main loop: parse a tag, then handle the value. */
while(buf < end) {
struct upb_tag tag;
UPB_CHECK(parse_tag(buf, end, &tag, &buf));
@@ -218,53 +196,43 @@ upb_status_t upb_parse(struct upb_parse_state *s, void *_buf, size_t len,
completed = buf;
continue;
}
- /* Don't handle START_GROUP here, so client can skip group via tag_cb. */
- void *user_field_desc;
- upb_field_type_t ft = tag_cb(udata, &tag, &user_field_desc);
+ void *udesc;
+ upb_field_type_t ft = tag_cb(udata, &tag, &udesc);
if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) {
int32_t delim_len;
UPB_CHECK(upb_get_INT32(buf, end, &delim_len, &buf));
uint8_t *delim_end = buf + delim_len;
-
- if(delim_end > end) { /* String ends beyond the data we have. */
- if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
- /* Streaming the body of a message is ok. */
- } else {
- /* String, bytes, and packed arrays must have all data present. */
- status = UPB_STATUS_NEED_MORE_DATA;
- goto done;
- }
- }
-
if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
- UPB_CHECK(push_stack_frame(s, start, delim_end - start, user_field_desc, &submsg_end));
- } else { /* Delimited data for which we require (and have) all data. */
- if(ft == 0) {
- /* Do nothing -- client has elected to skip. */
- } else if(upb_isstringtype(ft)) {
- struct upb_string str = {.ptr = (char*)buf, .byte_len = delim_len};
- str_cb(udata, &str, user_field_desc);
- } else { /* Packed Array. */
- while(buf < delim_end)
- UPB_CHECK(value_cb(udata, buf, end, user_field_desc, &buf));
- }
- buf = delim_end;
+ UPB_CHECK(push_stack_frame(
+ s, start, delim_end - start, udesc, &submsg_end));
+ } else {
+ if(upb_isstringtype(ft))
+ str_cb(udata, buf, UPB_MIN(delim_end, end) - buf, delim_end - buf, udesc);
+ else
+ ;/* Set a marker for packed arrays. */
+ buf = delim_end; /* Note that this could be greater than end. */
}
} else { /* Scalar (non-delimited) value. */
- if(ft == 0) /* Client elected to skip. */
- UPB_CHECK(skip_wire_value(buf, end, tag.wire_type, &buf));
- else if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP)
- UPB_CHECK(push_stack_frame(s, start, 0, user_field_desc, &submsg_end));
- else
- UPB_CHECK(value_cb(udata, buf, end, user_field_desc, &buf));
+ switch(ft) {
+ case 0: /* Client elected to skip. */
+ UPB_CHECK(skip_wire_value(buf, end, tag.wire_type, &buf));
+ break;
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP:
+ UPB_CHECK(push_stack_frame(s, start, 0, udesc, &submsg_end));
+ break;
+ default:
+ UPB_CHECK(value_cb(udata, buf, end, udesc, &buf));
+ break;
+ }
}
while(buf == submsg_end) submsg_end = pop_stack_frame(s, start);
+ //while(buf < s->packed_end) /* packed arrays. */
+ // UPB_CHECK(value_cb(udata, buf, end, udesc, &buf));
completed = buf;
}
-done:
*read = (char*)completed - (char*)start;
s->completed_offset += *read;
return status;
diff --git a/src/upb_parse.h b/src/upb_parse.h
index 09ac4f7..ca18937 100644
--- a/src/upb_parse.h
+++ b/src/upb_parse.h
@@ -72,8 +72,12 @@ typedef upb_field_type_t (*upb_tag_cb)(void *udata,
typedef upb_status_t (*upb_value_cb)(void *udata, uint8_t *buf, uint8_t *end,
void *user_field_desc, uint8_t **outbuf);
-/* The callback that is called when a string is parsed. */
-typedef void (*upb_str_cb)(void *udata, struct upb_string *str,
+/* The callback that is called when a string is parsed. Note that the data
+ * for the string might not all be available -- we could be streaming, and
+ * the current buffer might end right in the middle of the string. So we
+ * pass both the available length and the total length. */
+typedef void (*upb_str_cb)(void *udata, uint8_t *str,
+ size_t avail_len, size_t total_len,
void *user_field_desc);
/* Callbacks that are called when a submessage begins and ends, respectively.
@@ -96,9 +100,16 @@ struct upb_parse_state {
};
/* Parses up to len bytes of protobuf data out of buf, calling cb as needed.
- * The function returns how many bytes were consumed from buf. Data is parsed
- * until no more data can be read from buf, or the callback sets *done=true,
- * or an error occured. Sets *read to the number of bytes consumed. */
+ * The function returns a status indicating the success of the operation. Data
+ * is parsed until no more data can be read from buf, or the callback returns an
+ * error like UPB_STATUS_USER_CANCELLED, or an error occurs.
+ *
+ * *read is set to the number of bytes consumed. Note that this can be greater
+ * than len in the case that a string was recognized that spans beyond the end
+ * of the currently provided data.
+ *
+ * The next call to upb_parse must be the first byte after buf + *read, even in
+ * the case that *read > len. */
upb_status_t upb_parse(struct upb_parse_state *s, void *buf, size_t len,
size_t *read);
@@ -136,18 +147,12 @@ upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val,
INLINE upb_status_t upb_get_v_uint64_t(uint8_t *buf, uint8_t *end, uint64_t *val,
uint8_t **outbuf)
{
- /* We inline these two common cases (short varints), if that fails we
- * dispatch to the full (non-inlined) version. */
+ /* We inline this common case (1-byte varints), if that fails we dispatch to
+ * the full (non-inlined) version. */
if((*buf & 0x80) == 0) {
- /* Single-byte varint -- very common case. */
*val = *buf & 0x7f;
*outbuf = buf + 1;
return UPB_STATUS_OK;
- } else if(buf <= end && (*(buf+1) & 0x80) == 0) {
- /* Two-byte varint. */
- *val = (buf[0] & 0x7f) | ((buf[1] & 0x7f) << 7);
- *outbuf = buf + 2;
- return UPB_STATUS_OK;
} else {
return upb_get_v_uint64_t_full(buf, end, val, outbuf);
}
@@ -174,7 +179,7 @@ INLINE upb_status_t upb_get_f_uint32_t(uint8_t *buf, uint8_t *end,
*val = *(uint32_t*)buf;
#else
#define SHL(val, bits) ((uint32_t)val << bits)
- *val = SHL(b[0], 0) | SHL(b[1], 8) | SHL(b[2], 16) | SHL(b[3], 24);
+ *val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24);
#undef SHL
#endif
*outbuf = uint32_end;
@@ -191,8 +196,8 @@ INLINE upb_status_t upb_get_f_uint64_t(uint8_t *buf, uint8_t *end,
*val = *(uint64_t*)buf;
#else
#define SHL(val, bits) ((uint64_t)val << bits)
- *val = SHL(b[0], 0) | SHL(b[1], 8) | SHL(b[2], 16) | SHL(b[3], 24) |
- SHL(b[4], 32) | SHL(b[5], 40) | SHL(b[6], 48) | SHL(b[7], 56) |
+ *val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24) |
+ SHL(buf[4], 32) | SHL(buf[5], 40) | SHL(buf[6], 48) | SHL(buf[7], 56);
#undef SHL
#endif
*outbuf = uint64_end;
diff --git a/src/upb_string.h b/src/upb_string.h
index 8e56daa..528c8c8 100644
--- a/src/upb_string.h
+++ b/src/upb_string.h
@@ -37,6 +37,8 @@ extern "C" {
#define INLINE static inline
#endif
+#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
+
struct upb_string {
/* We expect the data to be 8-bit clean (uint8_t), but char* is such an
* ingrained convention that we follow it. */
@@ -44,15 +46,13 @@ struct upb_string {
uint32_t byte_len;
};
-INLINE uint32_t min(uint32_t a, uint32_t b) { return a < b ? a : b; }
-
INLINE bool upb_streql(struct upb_string *s1, struct upb_string *s2) {
return s1->byte_len == s2->byte_len &&
memcmp(s1->ptr, s2->ptr, s1->byte_len) == 0;
}
INLINE int upb_strcmp(struct upb_string s1, struct upb_string s2) {
- size_t common_length = min(s1.byte_len, s2.byte_len);
+ size_t common_length = UPB_MIN(s1.byte_len, s2.byte_len);
int common_diff = memcmp(s1.ptr, s2.ptr, common_length);
if(common_diff == 0) return s1.byte_len - s2.byte_len;
else return common_diff;
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback