summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2010-07-03 19:19:36 -0700
committerJoshua Haberman <joshua@reverberate.org>2010-07-03 19:19:36 -0700
commit9d051254b35b2bf838f1753a24fe490fb448e428 (patch)
tree362cf3a0ff040b2eeccbbb530b9e60aeb13b05ab /src
parent5ea7f943f9fd70fa1ada694b4532b71af55f8861 (diff)
Implemented upb_baredecoder, for bootstrapping.
Diffstat (limited to 'src')
-rw-r--r--src/upb_decoder.c11
-rw-r--r--src/upb_def.c154
-rw-r--r--src/upb_table.c14
3 files changed, 167 insertions, 12 deletions
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index 6f1e437..dd8ffcd 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -316,9 +316,6 @@ bool upb_decoder_skipval(upb_decoder *d);
upb_fielddef *upb_decoder_getdef(upb_decoder *d)
{
- uint32_t key;
- upb_wire_type_t wire_type;
-
// Detect end-of-submessage.
if(upb_decoder_offset(d) >= d->top->end_offset) {
d->src.eof = true;
@@ -328,9 +325,11 @@ upb_fielddef *upb_decoder_getdef(upb_decoder *d)
// Handles the packed field case.
if(d->field) return d->field;
+ uint32_t key = 0;
again:
if(!upb_decoder_readv32(d, &key)) return NULL;
- wire_type = key & 0x7;
+ upb_wire_type_t wire_type = key & 0x7;
+ int32_t field_number = key >> 3;
if(wire_type == UPB_WIRE_TYPE_DELIMITED) {
// For delimited wire values we parse the length now, since we need it in
@@ -348,7 +347,7 @@ again:
}
// Look up field by tag number.
- upb_fielddef *f = upb_msg_itof(d->top->msgdef, key >> 3);
+ upb_fielddef *f = upb_msg_itof(d->top->msgdef, field_number);
if (!f) {
// Unknown field. If/when the upb_src interface supports reporting
@@ -557,6 +556,8 @@ upb_decoder *upb_decoder_new(upb_msgdef *msgdef)
void upb_decoder_free(upb_decoder *d)
{
+ upb_string_unref(d->str);
+ if(d->buf) upb_string_unref(d->buf);
free(d);
}
diff --git a/src/upb_def.c b/src/upb_def.c
index 1eaaeef..31f14fa 100644
--- a/src/upb_def.c
+++ b/src/upb_def.c
@@ -839,3 +839,157 @@ src_err:
err:
upb_deflist_uninit(&defs);
}
+
+/* upb_baredecoder ************************************************************/
+
+// upb_baredecoder is a upb_src that can parse a subset of the protocol buffer
+// binary format. It is only used for bootstrapping. It can parse without
+// having a upb_msgdef, which is why it is useful for bootstrapping the first
+// msgdef. On the downside, it does not support:
+//
+// * having its input span multiple upb_strings.
+// * reading any field of the returned upb_fielddef's except f->number.
+// * keeping a pointer to the upb_fielddef* and reading it later (the same
+// upb_fielddef is reused over and over).
+// * detecting errors in the input (we trust that our input is known-good).
+//
+// It also does not support any of the follow protobuf features:
+// * packed fields.
+// * groups.
+// * zig-zag-encoded types like sint32 and sint64.
+//
+// If descriptor.proto ever changed to use any of these features, this decoder
+// would need to be extended to support them.
+
+typedef struct {
+ upb_src src;
+ upb_string *input;
+ upb_strlen_t offset;
+ upb_fielddef field;
+ upb_wire_type_t wire_type;
+ upb_strlen_t delimited_len;
+ upb_strlen_t stack[UPB_MAX_NESTING], *top;
+ upb_string *str;
+} upb_baredecoder;
+
+static uint64_t upb_baredecoder_readv64(upb_baredecoder *d)
+{
+ const uint8_t *start = (uint8_t*)upb_string_getrobuf(d->input) + d->offset;
+ const uint8_t *buf = start;
+ uint8_t last = 0x80;
+ uint64_t val = 0;
+ for(int bitpos = 0; (last & 0x80); buf++, bitpos += 7)
+ val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos;
+ d->offset += buf - start;
+ return val;
+}
+
+static uint32_t upb_baredecoder_readv32(upb_baredecoder *d)
+{
+ return (uint32_t)upb_baredecoder_readv64(d); // Truncate.
+}
+
+static uint64_t upb_baredecoder_readf64(upb_baredecoder *d)
+{
+ uint64_t val;
+ memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 8);
+ d->offset += 8;
+ return val;
+}
+
+static uint32_t upb_baredecoder_readf32(upb_baredecoder *d)
+{
+ uint32_t val;
+ memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 4);
+ d->offset += 4;
+ return val;
+}
+
+static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d)
+{
+ // Detect end-of-submessage.
+ if(d->offset >= *d->top) {
+ d->src.eof = true;
+ return NULL;
+ }
+
+ uint32_t key;
+ key = upb_baredecoder_readv32(d);
+ d->wire_type = key & 0x7;
+ d->field.number = key >> 3;
+ if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) {
+ // For delimited wire values we parse the length now, since we need it in
+ // all cases.
+ d->delimited_len = upb_baredecoder_readv32(d);
+ }
+ return &d->field;
+}
+
+static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val)
+{
+ if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) {
+ d->str = upb_string_tryrecycle(d->str);
+ upb_string_substr(d->str, d->input, d->offset, d->delimited_len);
+ } else {
+ switch(d->wire_type) {
+ case UPB_WIRE_TYPE_VARINT:
+ *val.uint64 = upb_baredecoder_readv64(d);
+ break;
+ case UPB_WIRE_TYPE_32BIT_VARINT:
+ *val.uint32 = upb_baredecoder_readv32(d);
+ break;
+ case UPB_WIRE_TYPE_64BIT:
+ *val.uint64 = upb_baredecoder_readf64(d);
+ break;
+ case UPB_WIRE_TYPE_32BIT:
+ *val.uint32 = upb_baredecoder_readf32(d);
+ break;
+ default:
+ assert(false);
+ }
+ }
+ return true;
+}
+
+static bool upb_baredecoder_skipval(upb_baredecoder *d)
+{
+ upb_value val;
+ return upb_baredecoder_getval(d, upb_value_addrof(&val));
+}
+
+static bool upb_baredecoder_startmsg(upb_baredecoder *d)
+{
+ *(d->top++) = d->offset + d->delimited_len;
+ return true;
+}
+
+static bool upb_baredecoder_endmsg(upb_baredecoder *d)
+{
+ d->offset = *(--d->top);
+ return true;
+}
+
+static upb_src_vtable upb_baredecoder_src_vtbl = {
+ (upb_src_getdef_fptr)&upb_baredecoder_getdef,
+ (upb_src_getval_fptr)&upb_baredecoder_getval,
+ (upb_src_skipval_fptr)&upb_baredecoder_skipval,
+ (upb_src_startmsg_fptr)&upb_baredecoder_startmsg,
+ (upb_src_endmsg_fptr)&upb_baredecoder_endmsg,
+};
+
+upb_baredecoder *upb_baredecoder_new(upb_string *str)
+{
+ upb_baredecoder *d = malloc(sizeof(*d));
+ d->input = upb_string_getref(str);
+ d->str = upb_string_new();
+ d->top = &d->stack[0];
+ upb_src_init(&d->src, &upb_baredecoder_src_vtbl);
+ return d;
+}
+
+void upb_baredecoder_free(upb_baredecoder *d)
+{
+ upb_string_unref(d->input);
+ upb_string_unref(d->str);
+ free(d);
+}
diff --git a/src/upb_table.c b/src/upb_table.c
index 51a9f21..6fd2c20 100644
--- a/src/upb_table.c
+++ b/src/upb_table.c
@@ -57,19 +57,19 @@ void upb_strtable_free(upb_strtable *t) {
upb_table_free(&t->t);
}
-static uint32_t strtable_bucket(upb_strtable *t, upb_strptr key)
+static uint32_t strtable_bucket(upb_strtable *t, upb_string *key)
{
- uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_strlen(key), 0);
+ uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_string_len(key), 0);
return (hash & (upb_strtable_size(t)-1)) + 1;
}
-void *upb_strtable_lookup(upb_strtable *t, upb_strptr key)
+void *upb_strtable_lookup(upb_strtable *t, upb_string *key)
{
uint32_t bucket = strtable_bucket(t, key);
upb_strtable_entry *e;
do {
e = strent(t, bucket);
- if(!upb_string_isnull(e->key) && upb_streql(e->key, key)) return e;
+ if(e->key && upb_streql(e->key, key)) return e;
} while((bucket = e->next) != UPB_END_OF_CHAIN);
return NULL;
}
@@ -149,7 +149,7 @@ static uint32_t empty_strbucket(upb_strtable *table)
/* TODO: does it matter that this is biased towards the front of the table? */
for(uint32_t i = 1; i <= upb_strtable_size(table); i++) {
upb_strtable_entry *e = strent(table, i);
- if(upb_string_isnull(e->key)) return i;
+ if(!e->key) return i;
}
assert(false);
return 0;
@@ -158,11 +158,11 @@ static uint32_t empty_strbucket(upb_strtable *table)
static void strinsert(upb_strtable *t, upb_strtable_entry *e)
{
assert(upb_strtable_lookup(t, e->key) == NULL);
- e->key = upb_string_getref(e->key, UPB_REF_FROZEN);
+ e->key = upb_string_getref(e->key);
t->t.count++;
uint32_t bucket = strtable_bucket(t, e->key);
upb_strtable_entry *table_e = strent(t, bucket);
- if(!upb_string_isnull(table_e->key)) { /* Collision. */
+ if(table_e->key) { /* Collision. */
if(bucket == strtable_bucket(t, table_e->key)) {
/* Existing element is in its main posisiton. Find an empty slot to
* place our new element and append it to this key's chain. */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback