summaryrefslogtreecommitdiff
path: root/upb_parse.c
diff options
context:
space:
mode:
Diffstat (limited to 'upb_parse.c')
-rw-r--r--upb_parse.c189
1 files changed, 47 insertions, 142 deletions
diff --git a/upb_parse.c b/upb_parse.c
index c0fc007..458876e 100644
--- a/upb_parse.c
+++ b/upb_parse.c
@@ -8,6 +8,7 @@
#include <assert.h>
#include <string.h>
+#include "descriptor.h"
/* Branch prediction hints for GCC. */
#ifdef __GNUC__
@@ -138,14 +139,10 @@ static int64_t zz_decode_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
static void wvtov_ ## type(wire_t s, val_t *d)
#define GET(type, v_or_f, wire_t, val_t, member_name) \
- static upb_status_t get_ ## type(struct upb_parse_state *s, \
- uint8_t *buf, \
- struct upb_tagged_value *d) { \
+ static upb_status_t get_ ## type(uint8_t **buf, union upb_value *d) { \
wire_t tmp; \
- uint8_t *b = buf; \
- CHECK(get_ ## v_or_f ## _ ## wire_t(&b, &tmp)); \
- wvtov_ ## type(tmp, &d->v.member_name); \
- s->offset += (b-buf); \
+ CHECK(get_ ## v_or_f ## _ ## wire_t(buf, &tmp)); \
+ wvtov_ ## type(tmp, &d->member_name); \
return UPB_STATUS_OK; \
}
@@ -172,65 +169,25 @@ T(ENUM, v, uint32_t, int32_t, int32) { *d = (int32_t)s; }
#undef GET
#undef T
-static void wvtov_delimited(uint32_t s, struct upb_delimited *d, size_t o)
-{
- d->offset = o;
- d->len = s;
-}
-
-/* Use BYTES version for both STRING and BYTES, leave UTF-8 checks to client. */
-static upb_status_t get_BYTES(struct upb_parse_state *s, uint8_t *buf,
- struct upb_tagged_value *d) {
- uint32_t tmp;
- uint8_t *b = buf;
- CHECK(get_v_uint32_t(&b, &tmp));
- s->offset += (b-buf); /* advance past length varint. */
- wvtov_delimited(tmp, &d->v.delimited, s->offset);
- size_t new_offset = s->offset + d->v.delimited.len; /* skip bytes */
- if (unlikely(new_offset < s->offset)) return UPB_ERROR_OVERFLOW;
- s->offset = new_offset;
- return UPB_STATUS_OK;
-}
-
-static upb_status_t get_MESSAGE(struct upb_parse_state *s, uint8_t *buf,
- struct upb_tagged_value *d) {
- /* We're entering a sub-message. */
- uint32_t tmp;
- uint8_t *b = buf;
- CHECK(get_v_uint32_t(&b, &tmp));
- s->offset += (b-buf); /* advance past length varint. */
- wvtov_delimited(tmp, &d->v.delimited, s->offset);
- /* Unlike STRING and BYTES, we *don't* advance past delimited here. */
- if (unlikely(++s->top == s->limit)) return UPB_ERROR_STACK_OVERFLOW;
- s->top->fieldset = d->field->fieldset;
- s->top->end_offset = d->v.delimited.offset + d->v.delimited.len;
- if (unlikely(s->top->end_offset < s->offset)) return UPB_ERROR_OVERFLOW;
- return UPB_STATUS_OK;
-}
-
-struct upb_type_info {
- upb_wire_type_t expected_wire_type;
- upb_status_t (*get)(struct upb_parse_state *s, uint8_t *buf,
- struct upb_tagged_value *d);
-};
-static struct upb_type_info type_info[] = {
- {UPB_WIRE_TYPE_64BIT, get_DOUBLE},
- {UPB_WIRE_TYPE_32BIT, get_FLOAT},
- {UPB_WIRE_TYPE_VARINT, get_INT32},
- {UPB_WIRE_TYPE_VARINT, get_INT64},
- {UPB_WIRE_TYPE_VARINT, get_UINT32},
- {UPB_WIRE_TYPE_VARINT, get_UINT64},
- {UPB_WIRE_TYPE_VARINT, get_SINT32},
- {UPB_WIRE_TYPE_VARINT, get_SINT64},
- {UPB_WIRE_TYPE_32BIT, get_FIXED32},
- {UPB_WIRE_TYPE_64BIT, get_FIXED64},
- {UPB_WIRE_TYPE_32BIT, get_SFIXED32},
- {UPB_WIRE_TYPE_64BIT, get_SFIXED64},
- {UPB_WIRE_TYPE_VARINT, get_BOOL},
- {UPB_WIRE_TYPE_DELIMITED, get_BYTES},
- {UPB_WIRE_TYPE_DELIMITED, get_BYTES},
- {UPB_WIRE_TYPE_VARINT, get_ENUM},
- {UPB_WIRE_TYPE_DELIMITED, get_MESSAGE}
+upb_wire_type_t upb_expected_wire_types[] = {
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE] = UPB_WIRE_TYPE_64BIT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT] = UPB_WIRE_TYPE_32BIT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64] = UPB_WIRE_TYPE_VARINT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64] = UPB_WIRE_TYPE_VARINT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32] = UPB_WIRE_TYPE_VARINT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64] = UPB_WIRE_TYPE_64BIT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32] = UPB_WIRE_TYPE_32BIT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL] = UPB_WIRE_TYPE_VARINT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = UPB_WIRE_TYPE_DELIMITED,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = UPB_WIRE_TYPE_DELIMITED,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = -1, /* TODO */
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE] = UPB_WIRE_TYPE_DELIMITED,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32] = UPB_WIRE_TYPE_VARINT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM] = UPB_WIRE_TYPE_VARINT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32] = UPB_WIRE_TYPE_32BIT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64] = UPB_WIRE_TYPE_64BIT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32] = UPB_WIRE_TYPE_VARINT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64] = UPB_WIRE_TYPE_VARINT,
};
upb_status_t parse_tag(uint8_t **buf, struct upb_tag *tag)
@@ -249,22 +206,17 @@ upb_status_t parse_wire_value(uint8_t *buf, size_t *offset,
#define READ(expr) CHECK(expr); *offset += (b-buf)
uint8_t *b = buf;
switch(wt) {
- case UPB_WIRE_TYPE_VARINT:
- READ(get_v_uint64_t(&b, &wv->varint)); break;
- case UPB_WIRE_TYPE_64BIT:
- READ(get_f_uint64_t(&b, &wv->_64bit)); break;
- case UPB_WIRE_TYPE_32BIT:
- READ(get_f_uint32_t(&b, &wv->_32bit)); break;
+ case UPB_WIRE_TYPE_VARINT: READ(get_v_uint64_t(&b, &wv->varint)); break;
+ case UPB_WIRE_TYPE_64BIT: READ(get_f_uint64_t(&b, &wv->_64bit)); break;
+ case UPB_WIRE_TYPE_32BIT: READ(get_f_uint32_t(&b, &wv->_32bit)); break;
case UPB_WIRE_TYPE_DELIMITED:
- wv->delimited.offset = *offset;
- READ(get_v_uint32_t(&b, &wv->delimited.len));
- size_t new_offset = *offset + wv->delimited.len;
+ READ(get_v_uint32_t(&b, &wv->_32bit));
+ size_t new_offset = *offset + wv->_32bit;
if (new_offset < *offset) return UPB_ERROR_OVERFLOW;
*offset += new_offset;
break;
case UPB_WIRE_TYPE_START_GROUP:
- case UPB_WIRE_TYPE_END_GROUP:
- return UPB_ERROR_GROUP; /* deprecated, no plans to support. */
+ case UPB_WIRE_TYPE_END_GROUP: return UPB_ERROR_GROUP; /* TODO */
}
return UPB_STATUS_OK;
}
@@ -274,12 +226,9 @@ upb_status_t skip_wire_value(uint8_t *buf, size_t *offset,
{
uint8_t *b = buf;
switch(wt) {
- case UPB_WIRE_TYPE_VARINT:
- READ(skip_v_uint64_t(&b)); break;
- case UPB_WIRE_TYPE_64BIT:
- READ(skip_f_uint64_t(&b)); break;
- case UPB_WIRE_TYPE_32BIT:
- READ(skip_f_uint32_t(&b)); break;
+ case UPB_WIRE_TYPE_VARINT: READ(skip_v_uint64_t(&b)); break;
+ case UPB_WIRE_TYPE_64BIT: READ(skip_f_uint64_t(&b)); break;
+ case UPB_WIRE_TYPE_32BIT: READ(skip_f_uint32_t(&b)); break;
case UPB_WIRE_TYPE_DELIMITED: {
/* Have to get (not skip) the length to skip the bytes. */
uint32_t len;
@@ -290,71 +239,27 @@ upb_status_t skip_wire_value(uint8_t *buf, size_t *offset,
break;
}
case UPB_WIRE_TYPE_START_GROUP:
- case UPB_WIRE_TYPE_END_GROUP:
- return UPB_ERROR_GROUP; /* deprecated, no plans to support. */
+ case UPB_WIRE_TYPE_END_GROUP: return UPB_ERROR_GROUP; /* TODO */
}
return UPB_STATUS_OK;
#undef READ
}
-/* Parses and processes the next value from buf. */
-upb_status_t upb_parse_field(struct upb_parse_state *s,
- uint8_t *buf,
- upb_field_number_t *fieldnum,
- struct upb_tagged_value *val,
- struct upb_tagged_wire_value *wv)
+upb_status_t upb_parse_value(uint8_t **b, upb_field_type_t ft,
+ union upb_value *v)
{
- /* Check for end-of-message at the current stack depth. */
- if(unlikely(s->offset >= s->top->end_offset)) {
- /* If the end offset isn't an exact field boundary, the pb is corrupt. */
- if(unlikely(s->offset != s->top->end_offset))
- return UPB_ERROR_BAD_SUBMESSAGE_END;
- s->top--;
- return UPB_STATUS_SUBMESSAGE_END;
+#define CASE(t) \
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: return get_ ## t(b, v);
+ switch(ft) {
+ CASE(DOUBLE) CASE(FLOAT) CASE(INT64) CASE(UINT64) CASE(INT32) CASE(FIXED64)
+ CASE(FIXED32) CASE(BOOL) CASE(UINT32) CASE(ENUM) CASE(SFIXED32)
+ CASE(SFIXED64) CASE(SINT32) CASE(SINT64)
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES:
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING:
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE:
+ return get_UINT32(b, v);
+ default: return UPB_ERROR; /* Including GROUP. */
}
-
- struct upb_tag tag;
- uint8_t *b = buf;
- CHECK(parse_tag(&b, &tag));
- s->offset += (b-buf);
- struct upb_field *fd = upb_find_field(s->top->fieldset,
- tag.field_number);
- upb_status_t unknown_value_status;
- if(unlikely(!fd)) {
- unknown_value_status = UPB_ERROR_UNKNOWN_VALUE;
- goto unknown_value;
- }
- struct upb_type_info *info = &type_info[fd->type];
- if(unlikely(tag.wire_type != info->expected_wire_type)) {
- unknown_value_status = UPB_ERROR_MISMATCHED_TYPE;
- goto unknown_value;
- }
-
- *fieldnum = tag.field_number;
- val->field = fd;
- CHECK(info->get(s, b, val));
- return UPB_STATUS_OK;
-
-unknown_value:
- wv->type = tag.wire_type;
- CHECK(parse_wire_value(buf, &s->offset, tag.wire_type, &wv->v));
- return unknown_value_status;
-}
-
-void upb_init_parser(
- struct upb_parse_state *state,
- struct upb_fieldset *toplevel_fieldset)
-{
- state->offset = 0;
- state->top = state->stack;
- state->limit = state->top + UPB_MAX_STACK;
- state->top->fieldset = toplevel_fieldset;
- state->top->end_offset = SIZE_MAX;
-}
-
-static int compare_fields(const void *f1, const void *f2)
-{
- return ((struct upb_field*)f1)->field_number -
- ((struct upb_field*)f2)->field_number;
+#undef CASE
}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback