summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2009-07-20 10:52:37 -0700
committerJoshua Haberman <joshua@reverberate.org>2009-07-20 10:52:37 -0700
commit1e388b0af3d7c2384cd350a349ba756914620466 (patch)
treed2b515f706dc15ba18b74c0cfd2da5a7052be468 /src
parent293cbe5135d64a1c85bb5125d9d4a1f6f39886f8 (diff)
Significant, experimental changes (setjmp/longjmp, group handling).
Diffstat (limited to 'src')
-rw-r--r--src/upb_msg.c63
-rw-r--r--src/upb_msg.h6
-rw-r--r--src/upb_parse.c440
-rw-r--r--src/upb_parse.h46
4 files changed, 253 insertions, 302 deletions
diff --git a/src/upb_msg.c b/src/upb_msg.c
index ed2a851..5b50541 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -256,26 +256,12 @@ void upb_msg_reuse_submsg(void **msg, struct upb_msg *m)
/* Serialization/Deserialization. ********************************************/
-/* We use this as our "user_data" for each frame of the parsing stack. */
-struct parse_frame_data {
- struct upb_msg *m;
- void *data;
-};
-
-static void set_frame_data(struct upb_parse_state *s, struct upb_msg *m,
- void *data)
-{
- struct parse_frame_data *frame = (void*)&s->top->user_data;
- frame->m = m;
- frame->data = data;
-}
-
-static upb_field_type_t tag_cb(struct upb_parse_state *s, struct upb_tag *tag,
+static upb_field_type_t tag_cb(void *udata, struct upb_tag tag,
void **user_field_desc)
{
- struct parse_frame_data *frame = (void*)&s->top->user_data;
- struct upb_msg_field *f = upb_msg_fieldbynum(frame->m, tag->field_number);
- if(!f || !upb_check_type(tag->wire_type, f->type))
+ struct upb_msg_parse_state *s = udata;
+ struct upb_msg_field *f = upb_msg_fieldbynum(s->top->m, tag.field_number);
+ if(!f || !upb_check_type(tag.wire_type, f->type))
return 0; /* Skip unknown or fields of the wrong type. */
*user_field_desc = f;
return f->type;
@@ -299,23 +285,21 @@ static union upb_value_ptr get_value_ptr(void *data, struct upb_msg_field *f)
return p;
}
-static upb_status_t value_cb(struct upb_parse_state *s, void **buf, void *end,
- void *user_field_desc)
+static void *value_cb(void *udata, void *buf, void *end,
+ void *user_field_desc, jmp_buf errjmp)
{
- struct parse_frame_data *frame = (void*)&s->top->user_data;
+ struct upb_msg_parse_state *s = udata;
struct upb_msg_field *f = user_field_desc;
- union upb_value_ptr p = get_value_ptr(frame->data, f);
- UPB_CHECK(upb_parse_value(buf, end, f->type, p));
- return UPB_STATUS_OK;
+ union upb_value_ptr p = get_value_ptr(s->top->data, f);
+ return upb_parse_value(buf, end, f->type, p, errjmp);
}
-static upb_status_t str_cb(struct upb_parse_state *_s, struct upb_string *str,
+static upb_status_t str_cb(void *udata, struct upb_string *str,
void *user_field_desc)
{
- struct upb_msg_parse_state *s = (void*)_s;
- struct parse_frame_data *frame = (void*)&s->s.top->user_data;
+ struct upb_msg_parse_state *s = udata;
struct upb_msg_field *f = user_field_desc;
- union upb_value_ptr p = get_value_ptr(frame->data, f);
+ union upb_value_ptr p = get_value_ptr(s->top->data, f);
if(s->byref) {
upb_msg_reuse_strref(p.str);
**p.str = *str;
@@ -326,29 +310,30 @@ static upb_status_t str_cb(struct upb_parse_state *_s, struct upb_string *str,
return UPB_STATUS_OK;
}
-static void submsg_start_cb(struct upb_parse_state *_s, void *user_field_desc)
+static void submsg_start_cb(void *udata, void *user_field_desc)
{
- struct upb_msg_parse_state *s = (void*)_s;
+ struct upb_msg_parse_state *s = udata;
struct upb_msg_field *f = user_field_desc;
- struct parse_frame_data *frame = (void*)&s->s.top->user_data;
- // TODO: find a non-hacky way to get a pointer to the old frame.
- struct parse_frame_data *oldframe = (void*)((char*)s->s.top - s->s.udata_size);
- union upb_value_ptr p = get_value_ptr(oldframe->data, f);
+ union upb_value_ptr p = get_value_ptr(s->top->data, f);
assert(f->ref.msg);
upb_msg_reuse_submsg(p.msg, f->ref.msg);
- set_frame_data(&s->s, f->ref.msg, *p.msg);
- if(!s->merge) upb_msg_clear(frame->data, f->ref.msg);
+ s->top++;
+ s->top->m = f->ref.msg;
+ s->top->data = *p.msg;
+ if(!s->merge) upb_msg_clear(s->top->data, s->top->m);
}
void upb_msg_parse_reset(struct upb_msg_parse_state *s, void *msg,
struct upb_msg *m, bool merge, bool byref)
{
- upb_parse_reset(&s->s);
+ upb_parse_reset(&s->s, s);
s->merge = merge;
s->byref = byref;
if(!merge && msg == NULL) msg = upb_msgdata_new(m);
upb_msg_clear(msg, m);
- set_frame_data(&s->s, m, msg);
+ s->top = s->stack;
+ s->top->m = m;
+ s->top->data = msg;
s->s.tag_cb = tag_cb;
s->s.value_cb = value_cb;
s->s.str_cb = str_cb;
@@ -358,7 +343,7 @@ void upb_msg_parse_reset(struct upb_msg_parse_state *s, void *msg,
void upb_msg_parse_init(struct upb_msg_parse_state *s, void *msg,
struct upb_msg *m, bool merge, bool byref)
{
- upb_parse_init(&s->s, sizeof(struct parse_frame_data));
+ upb_parse_init(&s->s, s);
upb_msg_parse_reset(s, msg, m, merge, byref);
}
diff --git a/src/upb_msg.h b/src/upb_msg.h
index 1d41805..14fc870 100644
--- a/src/upb_msg.h
+++ b/src/upb_msg.h
@@ -331,11 +331,17 @@ void upb_msg_reuse_submsg(void **msg, struct upb_msg *m);
/* This is all just a layer on top of the stream-oriented facility in
* upb_parse.h. */
+struct upb_msg_parse_frame {
+ struct upb_msg *m;
+ void *data;
+};
+
struct upb_msg_parse_state {
struct upb_parse_state s;
bool merge;
bool byref;
struct upb_msg *m;
+ struct upb_msg_parse_frame stack[UPB_MAX_NESTING], *top;
};
/* Initializes/frees a message parser. The parser will write the data to the
diff --git a/src/upb_parse.c b/src/upb_parse.c
index ca28ccc..fa37a56 100644
--- a/src/upb_parse.c
+++ b/src/upb_parse.c
@@ -6,147 +6,149 @@
#include "upb_parse.h"
-#include <assert.h>
#include <stddef.h>
#include <stdlib.h>
-#include <string.h>
+
+/* May want to move this to upb.c if enough other things warrant it. */
#include "descriptor.h"
+#define alignof(t) offsetof(struct { char c; t x; }, x)
+struct upb_type_info upb_type_info[] = {
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE] = {alignof(double), sizeof(double), UPB_WIRE_TYPE_64BIT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT] = {alignof(float), sizeof(float), UPB_WIRE_TYPE_32BIT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_VARINT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_64BIT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_32BIT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL] = {alignof(bool), sizeof(bool), UPB_WIRE_TYPE_VARINT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE] = {alignof(void*), sizeof(void*), UPB_WIRE_TYPE_DELIMITED},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = {alignof(void*), sizeof(void*), UPB_WIRE_TYPE_START_GROUP},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32]= {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_32BIT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64]= {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_64BIT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = {alignof(struct upb_string*), sizeof(struct upb_string*), UPB_WIRE_TYPE_DELIMITED},
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = {alignof(struct upb_string*), sizeof(struct upb_string*), UPB_WIRE_TYPE_DELIMITED},
+};
/* Lowest-level functions -- these read integers from the input buffer. */
-static void *check_end(uint8_t *buf, void *end, size_t maxlen,
- upb_status_t *bound_error)
+inline
+static void *get_v_uint64_t(void *restrict _buf, void *_end,
+ uint64_t *restrict val, jmp_buf errjmp)
{
- void *maxend = buf + maxlen;
- if(end < maxend) {
- *bound_error = UPB_STATUS_NEED_MORE_DATA;
- return end;
- } else {
- *bound_error = UPB_ERROR_UNTERMINATED_VARINT;
- return maxend;
- }
-}
-
-inline static upb_status_t get_v_uint64_t(void *restrict *buf, void *end,
- uint64_t *restrict val)
-{
- uint8_t *b = *buf;
-
- if((*b & 0x80) == 0) {
+ uint8_t *buf = _buf, *end = _end;
+ if((*buf & 0x80) == 0) {
/* Single-byte varint -- very common case. */
- *buf = b + 1;
- *val = *b & 0x7f;
- return UPB_STATUS_OK;
- } else if(b <= (uint8_t*)end && (*(b+1) & 0x80) == 0) {
+ *val = *buf & 0x7f;
+ return buf + 1;
+ } else if(buf <= end && (*(buf+1) & 0x80) == 0) {
/* Two-byte varint. */
- *buf = b + 2;
- *val = (b[0] & 0x7f) | ((b[1] & 0x7f) << 7);
- return UPB_STATUS_OK;
- } else if(b + 10 <= (uint8_t*)end) {
+ *val = (buf[0] & 0x7f) | ((buf[1] & 0x7f) << 7);
+ return buf + 2;
+ } else if(buf + 10 <= end) {
/* >2-byte varint, fast path. */
- uint64_t cont = *(uint64_t*)(b+2) | 0x7f7f7f7f7f7f7f7fULL;
+ uint64_t cont = *(uint64_t*)(buf+2) | 0x7f7f7f7f7f7f7f7fULL;
int num_bytes = __builtin_ffsll(~cont) / 8;
uint32_t part0 = 0, part1 = 0, part2 = 0;
switch(num_bytes) {
- default: return UPB_ERROR_UNTERMINATED_VARINT;
- case 8: part2 |= (b[9] & 0x7F) << 7;
- case 7: part2 |= (b[8] & 0x7F);
- case 6: part1 |= (b[7] & 0x7F) << 21;
- case 5: part1 |= (b[6] & 0x7F) << 14;
- case 4: part1 |= (b[5] & 0x7F) << 7;
- case 3: part1 |= (b[4] & 0x7F);
- case 2: part0 |= (b[3] & 0x7F) << 21;
- case 1: part0 |= (b[2] & 0x7F) << 14;
- part0 |= (b[1] & 0x7F) << 7;
- part0 |= (b[0] & 0x7F);
+ default: longjmp(errjmp, UPB_ERROR_UNTERMINATED_VARINT);
+ case 8: part2 |= (buf[9] & 0x7F) << 7;
+ case 7: part2 |= (buf[8] & 0x7F);
+ case 6: part1 |= (buf[7] & 0x7F) << 21;
+ case 5: part1 |= (buf[6] & 0x7F) << 14;
+ case 4: part1 |= (buf[5] & 0x7F) << 7;
+ case 3: part1 |= (buf[4] & 0x7F);
+ case 2: part0 |= (buf[3] & 0x7F) << 21;
+ case 1: part0 |= (buf[2] & 0x7F) << 14;
+ part0 |= (buf[1] & 0x7F) << 7;
+ part0 |= (buf[0] & 0x7F);
}
- *buf = b + num_bytes + 2;
*val = (uint64_t)part0 | ((uint64_t)part1 << 28) | ((uint64_t)part2 << 56);
- return UPB_STATUS_OK;
+ return buf + num_bytes + 2;
} else {
/* >2-byte varint, slow path. */
uint8_t last = 0x80;
*val = 0;
- for(int bitpos = 0; b < (uint8_t*)end && (last & 0x80); b++, bitpos += 7)
- *val |= ((uint64_t)((last = *b) & 0x7F)) << bitpos;
- if(last & 0x80) return UPB_STATUS_NEED_MORE_DATA;
- *buf = b;
- return UPB_STATUS_OK;
+ for(int bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7)
+ *val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos;
+ if(last & 0x80) longjmp(errjmp, UPB_STATUS_NEED_MORE_DATA);
+ return buf;
}
}
-static upb_status_t skip_v_uint64_t(void **buf, void *end)
+static void *skip_v_uint64_t(void *_buf, void *_end, jmp_buf errjmp)
{
- uint8_t *b = *buf;
- upb_status_t bound_error;
- end = check_end(b, end, 10, &bound_error); /* 2**64 is a 10-byte varint. */
+ /* TODO: optimize. */
+ uint8_t *buf = _buf, *end = _end;
uint8_t last = 0x80;
- for(; b < (uint8_t*)end && (last & 0x80); b++)
- last = *b;
+ for(; buf < end && (last & 0x80); buf++)
+ last = *buf;
- if(last & 0x80) return bound_error;
- *buf = b;
- return UPB_STATUS_OK;
+ if(last & 0x80) {
+ upb_status_t err =
+ buf == end ? UPB_STATUS_NEED_MORE_DATA : UPB_ERROR_UNTERMINATED_VARINT;
+ longjmp(errjmp, err);
+ }
+ return buf;
}
-static upb_status_t get_v_uint32_t(void *restrict *buf, void *end,
- uint32_t *restrict val)
+static void *get_v_uint32_t(void *restrict buf, void *end,
+ uint32_t *restrict val, jmp_buf errjmp)
{
uint64_t val64;
- UPB_CHECK(get_v_uint64_t(buf, end, &val64));
+ void *outbuf = get_v_uint64_t(buf, end, &val64, errjmp);
+ /* TODO: should we throw an error if any of the high bits in val64 are set? */
*val = (uint32_t)val64;
- return UPB_STATUS_OK;
+ return outbuf;
}
-static upb_status_t get_f_uint32_t(void *restrict *buf, void *end,
- uint32_t *restrict val)
+static void *get_f_uint32_t(void *restrict buf, void *end,
+ uint32_t *restrict val, jmp_buf errjmp)
{
- uint8_t *b = *buf;
- void *uint32_end = (uint8_t*)*buf + sizeof(uint32_t);
- if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA;
+ void *uint32_end = (uint8_t*)buf + sizeof(uint32_t);
+ if(uint32_end > end) longjmp(errjmp, UPB_STATUS_NEED_MORE_DATA);
#if UPB_UNALIGNED_READS_OK
- *val = *(uint32_t*)b;
+ *val = *(uint32_t*)buf;
#else
#define SHL(val, bits) ((uint32_t)val << bits)
*val = SHL(b[0], 0) | SHL(b[1], 8) | SHL(b[2], 16) | SHL(b[3], 24);
#undef SHL
#endif
- *buf = uint32_end;
- return UPB_STATUS_OK;
+ return uint32_end;
}
-static upb_status_t get_f_uint64_t(void *restrict *buf, void *end,
- uint64_t *restrict val)
+static void *get_f_uint64_t(void *restrict buf, void *end,
+ uint64_t *restrict val, jmp_buf errjmp)
{
- void *uint64_end = (uint8_t*)*buf + sizeof(uint64_t);
- if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA;
+ void *uint64_end = (uint8_t*)buf + sizeof(uint64_t);
+ if(uint64_end > end) longjmp(errjmp, UPB_STATUS_NEED_MORE_DATA);
#if UPB_UNALIGNED_READS_OK
- *val = *(uint64_t*)*buf;
- *buf = uint64_end;
+ *val = *(uint64_t*)buf;
#else
- uint32_t lo32, hi32;
- get_f_uint32_t(buf, &lo32, end);
- get_f_uint32_t(buf, &hi32, end);
- *val = lo32 | ((uint64_t)hi32 << 32);
+#define SHL(val, bits) ((uint64_t)val << bits)
+ *val = SHL(b[0], 0) | SHL(b[1], 8) | SHL(b[2], 16) | SHL(b[3], 24) |
+ SHL(b[4], 32) | SHL(b[5], 40) | SHL(b[6], 48) | SHL(b[7], 56) |
+#undef SHL
#endif
- return UPB_STATUS_OK;
+ return uint64_end;
}
-static upb_status_t skip_f_uint32_t(void **buf, void *end)
+static void *skip_f_uint32_t(void *buf, void *end, jmp_buf errjmp)
{
- void *uint32_end = (uint8_t*)*buf + sizeof(uint32_t);
- if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA;
- *buf = uint32_end;
- return UPB_STATUS_OK;
+ void *uint32_end = (uint8_t*)buf + sizeof(uint32_t);
+ if(uint32_end > end) longjmp(errjmp, UPB_STATUS_NEED_MORE_DATA);
+ return uint32_end;
}
-static upb_status_t skip_f_uint64_t(void **buf, void *end)
+static void *skip_f_uint64_t(void *buf, void *end, jmp_buf errjmp)
{
- void *uint64_end = (uint8_t*)*buf + sizeof(uint64_t);
- if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA;
- *buf = uint64_end;
- return UPB_STATUS_OK;
+ void *uint64_end = (uint8_t*)buf + sizeof(uint64_t);
+ if(uint64_end > end) longjmp(errjmp, UPB_STATUS_NEED_MORE_DATA);
+ return uint64_end;
}
static int32_t zz_decode_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
@@ -159,11 +161,11 @@ static int64_t zz_decode_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
static void wvtov_ ## type(wire_t s, val_t *d)
#define GET(type, v_or_f, wire_t, val_t, member_name) \
- static upb_status_t get_ ## type(void **buf, void *end, val_t *d) { \
+ static void *get_ ## type(void *buf, void *end, val_t *d, jmp_buf errjmp) { \
wire_t tmp; \
- UPB_CHECK(get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp)); \
+ void *outbuf = get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp, errjmp); \
wvtov_ ## type(tmp, d); \
- return UPB_STATUS_OK; \
+ return outbuf; \
}
#define T(type, v_or_f, wire_t, val_t, member_name) \
@@ -189,70 +191,46 @@ T(ENUM, v, uint32_t, int32_t, int32) { *d = (int32_t)s; }
#undef GET
#undef T
-#define alignof(t) offsetof(struct { char c; t x; }, x)
-
-/* May want to move this to upb.c if enough other things warrant it. */
-struct upb_type_info upb_type_info[] = {
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE] = {alignof(double), sizeof(double), UPB_WIRE_TYPE_64BIT},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT] = {alignof(float), sizeof(float), UPB_WIRE_TYPE_32BIT},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_VARINT},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_64BIT},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_32BIT},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL] = {alignof(bool), sizeof(bool), UPB_WIRE_TYPE_VARINT},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE] = {alignof(void*), sizeof(void*), UPB_WIRE_TYPE_DELIMITED},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = {alignof(void*), sizeof(void*), UPB_WIRE_TYPE_START_GROUP},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32]= {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_32BIT},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64]= {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_64BIT},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = {alignof(struct upb_string*), sizeof(struct upb_string*), UPB_WIRE_TYPE_DELIMITED},
- [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = {alignof(struct upb_string*), sizeof(struct upb_string*), UPB_WIRE_TYPE_DELIMITED},
-};
-
-static upb_status_t parse_tag(void **buf, void *end, struct upb_tag *tag)
+static void *parse_tag(void *buf, void *end, struct upb_tag *tag, jmp_buf errjmp)
{
uint32_t tag_int;
- UPB_CHECK(get_v_uint32_t(buf, end, &tag_int));
+ void *outbuf = get_v_uint32_t(buf, end, &tag_int, errjmp);
tag->wire_type = (upb_wire_type_t)(tag_int & 0x07);
tag->field_number = tag_int >> 3;
- return UPB_STATUS_OK;
+ return outbuf;
}
-upb_status_t upb_parse_wire_value(void **buf, void *end, upb_wire_type_t wt,
- union upb_wire_value *wv)
+void *upb_parse_wire_value(void *buf, void *end, upb_wire_type_t wt,
+ union upb_wire_value *wv, jmp_buf errjmp)
{
switch(wt) {
- case UPB_WIRE_TYPE_VARINT: UPB_CHECK(get_v_uint64_t(buf, end, &wv->varint)); break;
- case UPB_WIRE_TYPE_64BIT: UPB_CHECK(get_f_uint64_t(buf, end, &wv->_64bit)); break;
- case UPB_WIRE_TYPE_32BIT: UPB_CHECK(get_f_uint32_t(buf, end, &wv->_32bit)); break;
- default: return UPB_ERROR_ILLEGAL; /* Doesn't handle delimited, groups. */
+ case UPB_WIRE_TYPE_VARINT: return get_v_uint64_t(buf, end, &wv->varint, errjmp);
+ case UPB_WIRE_TYPE_64BIT: return get_f_uint64_t(buf, end, &wv->_64bit, errjmp);
+ case UPB_WIRE_TYPE_32BIT: return get_f_uint32_t(buf, end, &wv->_32bit, errjmp);
+ default: longjmp(errjmp, UPB_ERROR_ILLEGAL); /* Doesn't handle delimited, groups. */
}
- return UPB_STATUS_OK;
}
-static upb_status_t skip_wire_value(void **buf, void *end, upb_wire_type_t wt)
+static void *skip_wire_value(void *buf, void *end, upb_wire_type_t wt,
+ jmp_buf errjmp)
{
switch(wt) {
- case UPB_WIRE_TYPE_VARINT: UPB_CHECK(skip_v_uint64_t(buf, end)); break;
- case UPB_WIRE_TYPE_64BIT: UPB_CHECK(skip_f_uint64_t(buf, end)); break;
- case UPB_WIRE_TYPE_32BIT: UPB_CHECK(skip_f_uint32_t(buf, end)); break;
+ case UPB_WIRE_TYPE_VARINT: return skip_v_uint64_t(buf, end, errjmp);
+ case UPB_WIRE_TYPE_64BIT: return skip_f_uint64_t(buf, end, errjmp);
+ case UPB_WIRE_TYPE_32BIT: return skip_f_uint32_t(buf, end, errjmp);
case UPB_WIRE_TYPE_START_GROUP: /* TODO: skip to matching end group. */
case UPB_WIRE_TYPE_END_GROUP: break;
- default: return UPB_ERROR_ILLEGAL;
+ default: longjmp(errjmp, UPB_ERROR_ILLEGAL);
}
- return UPB_STATUS_OK;
+ return buf;
}
-upb_status_t upb_parse_value(void **buf, void *end, upb_field_type_t ft,
- union upb_value_ptr v)
+void *upb_parse_value(void *buf, void *end, upb_field_type_t ft,
+ union upb_value_ptr v, jmp_buf errjmp)
{
#define CASE(t, member_name) \
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
- return get_ ## t(buf, end, v.member_name);
+ return get_ ## t(buf, end, v.member_name, errjmp);
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
@@ -268,28 +246,25 @@ upb_status_t upb_parse_value(void **buf, void *end, upb_field_type_t ft,
CASE(SFIXED64, int64)
CASE(BOOL, _bool)
CASE(ENUM, int32)
- default: return UPB_ERROR_ILLEGAL;
+ default: longjmp(errjmp, UPB_ERROR_ILLEGAL);
}
#undef CASE
}
-void upb_parse_reset(struct upb_parse_state *state)
+void upb_parse_reset(struct upb_parse_state *state, void *udata)
{
- state->offset = 0;
state->top = state->stack;
+ state->limit = &state->stack[UPB_MAX_NESTING];
/* The top-level message is not delimited (we can keep receiving data for
- * it indefinitely). */
- state->top->end_offset = SIZE_MAX;
+ * it indefinitely), so we treat it like a group. */
+ *state->top = 0;
+ state->udata = udata;
}
-void upb_parse_init(struct upb_parse_state *state, size_t udata_size)
+void upb_parse_init(struct upb_parse_state *state, void *udata)
{
memset(state, 0, sizeof(struct upb_parse_state)); /* Clear all callbacks. */
- size_t stack_bytes = (sizeof(*state->stack) + udata_size) * UPB_MAX_NESTING;
- state->stack = malloc(stack_bytes);
- state->limit = (struct upb_parse_stack_frame*)((char*)state->stack + stack_bytes);
- state->udata_size = udata_size;
- upb_parse_reset(state);
+ upb_parse_reset(state, udata);
}
void upb_parse_free(struct upb_parse_state *state)
@@ -297,112 +272,105 @@ void upb_parse_free(struct upb_parse_state *state)
free(state->stack);
}
-static size_t pop_stack_frame(struct upb_parse_state *s)
+static void *pop_stack_frame(struct upb_parse_state *s,
+ uint8_t *buf, uint8_t *submsg_end)
{
- if(s->submsg_end_cb) s->submsg_end_cb(s);
+ if(s->submsg_end_cb) s->submsg_end_cb(s->udata);
+ uint32_t final_submsg_len = *s->top - (buf - submsg_end);
s->top--;
- s->top = (struct upb_parse_stack_frame*)((char*)s->top - s->udata_size);
- return s->top->end_offset;
+ *s->top -= final_submsg_len;
+ return (char*)buf + (*s->top > 0 ? *s->top : 0);
}
-static upb_status_t push_stack_frame(struct upb_parse_state *s, size_t end,
- void *user_field_desc, size_t *end_offset)
+/* Returns the next end offset. */
+static void *push_stack_frame(struct upb_parse_state *s,
+ uint8_t *buf, uint8_t *submsg_end, uint32_t len,
+ void *user_field_desc, jmp_buf errjmp)
{
+ *s->top -= len;
+ if(*s->top < 0) *s->top -= (buf - submsg_end);
s->top++;
- s->top = (struct upb_parse_stack_frame*)((char*)s->top + s->udata_size);
- if(s->top > s->limit) return UPB_ERROR_STACK_OVERFLOW;
- s->top->end_offset = end;
- *end_offset = end;
- if(s->submsg_start_cb) s->submsg_start_cb(s, user_field_desc);
- return UPB_STATUS_OK;
-}
-
-static upb_status_t parse_delimited(struct upb_parse_state *s,
- struct upb_tag *tag,
- void **buf, void *end,
- size_t base_offset, size_t *end_offset)
-{
- int32_t delim_len;
- void *user_field_desc;
- void *bufstart = *buf;
-
- /* Whether we are parsing or skipping the field, we always need to parse
- * the length. */
- UPB_CHECK(get_INT32(buf, end, &delim_len));
- upb_field_type_t ft = s->tag_cb(s, tag, &user_field_desc);
- if(*buf < bufstart) return UPB_ERROR_OVERFLOW;
- if(*buf > end && ft != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
- /* Streaming submessages is ok, but for other delimited types (string,
- * bytes, and packed arrays) we require that all the delimited data is
- * available. This could be relaxed if desired. */
- return UPB_STATUS_NEED_MORE_DATA;
- }
-
- if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
- base_offset += ((char*)*buf - (char*)bufstart);
- UPB_CHECK(push_stack_frame(s, base_offset + delim_len, user_field_desc, end_offset));
- } else {
- void *delim_end = (char*)*buf + delim_len;
- if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING ||
- ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES) {
- struct upb_string str = {.ptr = *buf, .byte_len = delim_len};
- s->str_cb(s, &str, user_field_desc);
- *buf = delim_end;
- } else {
- /* Packed Array. */
- while(*buf < delim_end)
- UPB_CHECK(s->value_cb(s, buf, end, user_field_desc));
- }
- }
- return UPB_STATUS_OK;
+ if(s->top > s->limit) longjmp(errjmp, UPB_ERROR_STACK_OVERFLOW);
+ *s->top = len;
+ if(s->submsg_start_cb) s->submsg_start_cb(s->udata, user_field_desc);
+ return (char*)buf + *s->top;
}
-static upb_status_t parse_nondelimited(struct upb_parse_state *s,
- struct upb_tag *tag,
- void **buf, void *end,
- size_t *end_offset)
+upb_status_t upb_isstringtype(upb_field_type_t type)
{
- /* Simple value or begin group. */
- void *user_field_desc;
- upb_field_type_t ft = s->tag_cb(s, tag, &user_field_desc);
- if(ft == 0) {
- UPB_CHECK(skip_wire_value(buf, end, tag->wire_type));
- } else if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP) {
- /* No length specified, an "end group" tag will mark the end. */
- UPB_CHECK(push_stack_frame(s, UINT32_MAX, user_field_desc, end_offset));
- } else {
- UPB_CHECK(s->value_cb(s, buf, end, user_field_desc));
- }
- return UPB_STATUS_OK;
+ return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING ||
+ type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES;
}
-upb_status_t upb_parse(struct upb_parse_state *restrict s, void *buf, size_t len,
- size_t *restrict read)
+upb_status_t upb_parse(struct upb_parse_state *s, void *_buf, size_t len,
+ size_t *read)
{
- void *end = (char*)buf + len;
- size_t offset = s->offset;
- size_t end_offset = s->top->end_offset;
+ uint8_t *buf = _buf;
+ uint8_t *volatile completed = buf;
+ uint8_t *const start = buf;
+ /* Error handling with setjmp/longjmp (saves repeated error code checks, and
+ * lets us use function return values for something more useful). */
+ jmp_buf errjmp;
+ upb_status_t status = UPB_STATUS_OK;
+ if((status = setjmp(errjmp)) != 0) goto done;
+
+ uint8_t *end = buf + len;
+ uint8_t *submsg_end = buf + (*s->top > 0 ? *s->top : 0);
while(buf < end) {
struct upb_tag tag;
- void *bufstart = buf;
- UPB_CHECK(parse_tag(&buf, end, &tag));
+ buf = parse_tag(buf, end, &tag, errjmp);
if(tag.wire_type == UPB_WIRE_TYPE_END_GROUP) {
- if(end_offset != UINT32_MAX)
- return UPB_ERROR_SPURIOUS_END_GROUP;
- end_offset = pop_stack_frame(s);
- } else if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) {
- UPB_CHECK(parse_delimited(
- s, &tag, &buf, end, offset + (char*)buf - (char*)bufstart, &end_offset));
- } else {
- UPB_CHECK(parse_nondelimited(s, &tag, &buf, end, &end_offset));
+ submsg_end = pop_stack_frame(s, buf, submsg_end);
+ completed = buf;
+ continue;
}
- offset += ((char*)buf - (char*)bufstart);
- while(offset >= end_offset) {
- if(offset != end_offset) return UPB_ERROR_BAD_SUBMESSAGE_END;
- end_offset = pop_stack_frame(s);
+ /* Don't handle START_GROUP here, so client can skip group via tag_cb. */
+ void *user_field_desc;
+
+ upb_field_type_t ft = s->tag_cb(s->udata, tag, &user_field_desc);
+ if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) {
+ int32_t delim_len;
+ buf = get_INT32(buf, end, &delim_len, errjmp);
+ uint8_t *delim_end = buf + delim_len;
+
+ if(delim_end > end) { /* String ends beyond the data we have. */
+ if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
+ /* Streaming the body of a message is ok. */
+ } else {
+ /* String, bytes, and packed arrays must have all data present. */
+ status = UPB_STATUS_NEED_MORE_DATA;
+ goto done;
+ }
+ }
+
+ if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
+ submsg_end = push_stack_frame(s, buf, submsg_end, delim_len, user_field_desc, errjmp);
+ } else { /* Delimited data for which we require (and have) all data. */
+ if(ft == 0) {
+ /* Do nothing -- client has elected to skip. */
+ } else if(upb_isstringtype(ft)) {
+ struct upb_string str = {.ptr = (char*)buf, .byte_len = delim_len};
+ s->str_cb(s->udata, &str, user_field_desc);
+ } else { /* Packed Array. */
+ while(buf < delim_end)
+ buf = s->value_cb(s->udata, buf, end, user_field_desc, errjmp);
+ }
+ buf = delim_end;
+ }
+ } else { /* Scalar (non-delimited) value. */
+ if(ft == 0) /* Client elected to skip. */
+ buf = skip_wire_value(buf, end, tag.wire_type, errjmp);
+ else if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP)
+ submsg_end = push_stack_frame(s, buf, submsg_end, 0, user_field_desc, errjmp);
+ else
+ buf = s->value_cb(s->udata, buf, end, user_field_desc, errjmp);
}
+
+ while(buf == submsg_end) submsg_end = pop_stack_frame(s, buf, submsg_end);
+ completed = buf;
}
- *read = offset - s->offset;
- s->offset = offset;
- return UPB_STATUS_OK;
+
+done:
+ *read = (char*)completed - (char*)start;
+ return status;
}
diff --git a/src/upb_parse.h b/src/upb_parse.h
index 4600c04..4a432d8 100644
--- a/src/upb_parse.h
+++ b/src/upb_parse.h
@@ -12,8 +12,9 @@
#ifndef UPB_PARSE_H_
#define UPB_PARSE_H_
-#include <stdint.h>
+#include <setjmp.h>
#include <stdbool.h>
+#include <stdint.h>
#include "upb.h"
#ifdef __cplusplus
@@ -63,8 +64,8 @@ struct upb_parse_state;
/* Initialize and free (respectively) the given parse state, which must have
* been previously allocated. udata_size specifies how much space will be
* available at parse_stack_frame.user_data in each frame for user data. */
-void upb_parse_init(struct upb_parse_state *state, size_t udata_size);
-void upb_parse_reset(struct upb_parse_state *state);
+void upb_parse_init(struct upb_parse_state *state, void *udata);
+void upb_parse_reset(struct upb_parse_state *state, void *udata);
void upb_parse_free(struct upb_parse_state *state);
/* The callback that is called immediately after a tag has been parsed. The
@@ -74,8 +75,8 @@ void upb_parse_free(struct upb_parse_state *state);
* type is appropriate for the .proto type. To skip the value (which means
* skipping all submessages, in the case of a submessage), the callback should
* return zero. */
-typedef upb_field_type_t (*upb_tag_cb)(struct upb_parse_state *s,
- struct upb_tag *tag,
+typedef upb_field_type_t (*upb_tag_cb)(void *udata,
+ struct upb_tag tag,
void **user_field_desc);
/* The callback that is called when a regular value (ie. not a string or
@@ -85,34 +86,25 @@ typedef upb_field_type_t (*upb_tag_cb)(struct upb_parse_state *s,
*
* Note that this callback can be called several times in a row for a single
* call to tag_cb in the case of packed arrays. */
-typedef upb_status_t (*upb_value_cb)(struct upb_parse_state *s,
- void **buf, void *end,
- void *user_field_desc);
+typedef void *(*upb_value_cb)(void *udata, void *buf, void *end,
+ void *user_field_desc, jmp_buf errjmp);
/* The callback that is called when a string is parsed. */
-typedef upb_status_t (*upb_str_cb)(struct upb_parse_state *s,
+typedef upb_status_t (*upb_str_cb)(void *udata,
struct upb_string *str,
void *user_field_desc);
/* Callbacks that are called when a submessage begins and ends, respectively.
* Both are called with the submessage's stack frame at the top of the stack. */
-typedef void (*upb_submsg_start_cb)(struct upb_parse_state *s,
+typedef void (*upb_submsg_start_cb)(void *udata,
void *user_field_desc);
-typedef void (*upb_submsg_end_cb)(struct upb_parse_state *s);
-
-/* Each stack frame (one for each level of submessages/groups) has this format,
- * where user_data has as many bytes allocated as specified when initialized. */
-struct upb_parse_stack_frame {
- size_t end_offset; /* 0 indicates that this is a group. */
-#ifndef __cplusplus /* Temporary hack since C++ doesn't support flex arrays. */
- char user_data[];
-#endif
-};
+typedef void (*upb_submsg_end_cb)(void *udata);
struct upb_parse_state {
- size_t offset;
- struct upb_parse_stack_frame *stack, *top, *limit;
- size_t udata_size; /* How many bytes the user gets in each frame. */
+ /* For delimited submsgs, counts from the submsg len down to zero.
+ * For group submsgs, counts from zero down to the negative len. */
+ int32_t stack[UPB_MAX_NESTING], *top, *limit;
+ void *udata;
upb_tag_cb tag_cb;
upb_value_cb value_cb;
upb_str_cb str_cb;
@@ -144,14 +136,14 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) {
/* Parses and converts a value from the character data starting at buf. The
* caller must have previously checked that the wire type is appropriate for
* this field type. */
-upb_status_t upb_parse_value(void **buf, void *end, upb_field_type_t ft,
- union upb_value_ptr v);
+void *upb_parse_value(void *buf, void *end, upb_field_type_t ft,
+ union upb_value_ptr v, jmp_buf errjmp);
/* Parses a wire value with the given type (which must have been obtained from
* a tag that was just parsed) and adds the number of bytes that were consumed
* to *offset. */
-upb_status_t upb_parse_wire_value(void **buf, void *end, upb_wire_type_t wt,
- union upb_wire_value *wv);
+void *upb_parse_wire_value(void *buf, void *end, upb_wire_type_t wt,
+ union upb_wire_value *wv, jmp_buf errjmp);
#ifdef __cplusplus
} /* extern "C" */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback