summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2009-06-03 22:06:24 -0700
committerJoshua Haberman <joshua@reverberate.org>2009-06-03 22:06:24 -0700
commitd1f78c88faafea7e672c7c45e20f6f040942a92a (patch)
tree67d6f959815b464a241d5c6b7c7df1a6755e4f55
parent0c80c384756a48d5f731eeafa62b6cd5f3861749 (diff)
A bunch more work, a fast table for field lookup.
-rw-r--r--Makefile8
-rw-r--r--upb.h56
-rw-r--r--upb_fieldmap.c50
-rw-r--r--upb_fieldmap.h53
-rw-r--r--upb_parse.c189
-rw-r--r--upb_parse.h110
-rw-r--r--upb_struct.c1
-rw-r--r--upb_struct.h4
-rw-r--r--upb_table.c89
-rw-r--r--upb_table.h75
10 files changed, 279 insertions, 356 deletions
diff --git a/Makefile b/Makefile
index eb2f10d..4d1efd7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,16 +1,16 @@
.PHONY: all clean
CFLAGS=-std=c99 -O3 -Wall -Wextra -pedantic
-OBJ=upb_parse.o upb_fieldmap.o upb_struct.o
-all: $(OBJ) tests
+OBJ=upb_parse.o upb_table.o upb_struct.o
+all: $(OBJ)
clean:
rm -f $(OBJ) tests
upb_parse.o: upb_parse.c upb_parse.h
gcc $(CFLAGS) -o upb_parse.o -c upb_parse.c
-upb_fieldmap.o: upb_fieldmap.c upb_fieldmap.h
- gcc $(CFLAGS) -o upb_fieldmap.o -c upb_fieldmap.c
+upb_table.o: upb_table.c upb_table.h
+ gcc $(CFLAGS) -o upb_table.o -c upb_table.c
upb_struct.o: upb_struct.c upb_struct.h
gcc $(CFLAGS) -o upb_struct.o -c upb_struct.c
diff --git a/upb.h b/upb.h
index d63e207..445975b 100644
--- a/upb.h
+++ b/upb.h
@@ -19,29 +19,30 @@ extern "C" {
#define UPB_MAX_NESTING 64
/* A list of types as they are encoded on-the-wire. */
-typedef enum upb_wire_type {
+enum upb_wire_type {
UPB_WIRE_TYPE_VARINT = 0,
UPB_WIRE_TYPE_64BIT = 1,
UPB_WIRE_TYPE_DELIMITED = 2,
UPB_WIRE_TYPE_START_GROUP = 3,
UPB_WIRE_TYPE_END_GROUP = 4,
UPB_WIRE_TYPE_32BIT = 5,
-} upb_wire_type_t;
-
-struct upb_delimited {
- size_t offset; /* relative to the beginning of the stream. */
- uint32_t len;
};
+typedef int8_t upb_wire_type_t;
-/* A value as it is encoded on-the-wire. */
+/* A value as it is encoded on-the-wire, except delimited, which is handled
+ * separately. */
union upb_wire_value {
uint64_t varint;
uint64_t _64bit;
uint32_t _32bit;
- struct upb_delimited delimited;
};
-/* A value as described in a .proto file. */
+/* Value type as defined in a .proto file. The values of this are defined by
+ * google_protobuf_FieldDescriptorProto_Type (from descriptor.proto). */
+typedef int32_t upb_field_type_t;
+
+/* A value as described in a .proto file, except delimited, which is handled
+ * separately. */
union upb_value {
double _double;
float _float;
@@ -50,9 +51,10 @@ union upb_value {
uint32_t uint32;
uint64_t uint64;
bool _bool;
- struct upb_delimited delimited;
+ uint32_t delim_len;
};
+/* The number of a field, eg. "optional string foo = 3". */
typedef int32_t upb_field_number_t;
/* A tag occurs before each value on-the-wire. */
@@ -61,6 +63,40 @@ struct upb_tag {
upb_wire_type_t wire_type;
};
+/* Status codes used as a return value. */
+typedef enum upb_status {
+ UPB_STATUS_OK = 0,
+ UPB_STATUS_SUBMESSAGE_END = 1,
+
+ /** FATAL ERRORS: these indicate corruption, and cannot be recovered. */
+
+ // A varint did not terminate before hitting 64 bits.
+ UPB_ERROR_UNTERMINATED_VARINT = -1,
+
+ // A submessage ended in the middle of data.
+ UPB_ERROR_BAD_SUBMESSAGE_END = -2,
+
+ // Encountered a "group" on the wire (deprecated and unsupported).
+ UPB_ERROR_GROUP = -3,
+
+ // Input was nested more than UPB_MAX_NESTING deep.
+ UPB_ERROR_STACK_OVERFLOW = -4,
+
+ // The input data caused the pb's offset (a size_t) to overflow.
+ UPB_ERROR_OVERFLOW = -5,
+
+ // Generic error.
+ UPB_ERROR = -6,
+
+ /** NONFATAL ERRORS: the input was invalid, but we can continue if desired. */
+
+ // A value was encountered that was not defined in the .proto file.
+ UPB_ERROR_UNKNOWN_VALUE = 2,
+
+ // A field was encoded with the wrong wire type.
+ UPB_ERROR_MISMATCHED_TYPE = 3,
+} upb_status_t;
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/upb_fieldmap.c b/upb_fieldmap.c
deleted file mode 100644
index 015b2e1..0000000
--- a/upb_fieldmap.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
- */
-
-#include "upb_fieldmap.h"
-
-#include <stdlib.h>
-
-void pbstream_init_fieldmap(struct pbstream_fieldmap *fieldmap,
- struct pbstream_field *fields,
- int num_fields)
-{
- qsort(fields, num_fields, sizeof(*fields), compare_fields);
-
- /* Find the largest n for which at least half the fieldnums <n are used.
- * Start at 8 to avoid noise of small numbers. */
- pbstream_field_number_t n = 0, maybe_n;
- for(int i = 0; i < num_fields; i++) {
- maybe_n = fields[i].field_number;
- if(maybe_n > 8 && maybe_n/(i+1) >= 2) break;
- n = maybe_n;
- }
-
- fieldmap->num_fields = num_fields;
- fieldmap->fields = malloc(sizeof(*fieldmap->fields)*num_fields);
- memcpy(fieldmap->fields, fields, sizeof(*fields)*num_fields);
-
- fieldmap->array_size = n;
- fieldmap->array = malloc(sizeof(*fieldmap->array)*n);
- memset(fieldmap->array, 0, sizeof(*fieldmap->array)*n);
-
- for (int i = 0; i < num_fields && fields[i].field_number <= n; i++)
- fieldmap->array[fields[i].field_number-1] = &fieldmap->fields[i];
-
- /* Until we support the hashtable part... */
- assert(n == fields[num_fields-1].field_number);
-}
-
-void pbstream_free_fieldmap(struct pbstream_fieldmap *fieldmap)
-{
- free(fieldmap->fields);
- free(fieldmap->array);
-}
-
-/* Emit definition for inline function. */
-extern void *upb_fieldmap_find(struct upb_fieldmap *fm,
- pbstream_field_number_t num,
- size_t info_size);
diff --git a/upb_fieldmap.h b/upb_fieldmap.h
deleted file mode 100644
index 0fb5a3e..0000000
--- a/upb_fieldmap.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
- *
- * A fieldmap is a data structure that supports fast lookup of fields by
- * number. It is logically a map of {field_number -> <field info>}, where
- * <field info> is any struct that begins with the field number. Fast lookup
- * is important, because it is in the critical path of parsing. */
-
-#ifndef UPB_FIELDMAP_H_
-#define UPB_FIELDMAP_H_
-
-#include "upb.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct upb_fieldmap {
- int array_size;
- void *array;
- /* TODO: the hashtable part. */
-};
-
-/* Takes an array of num_fields fields and builds an optimized table for fast
- * lookup of fields by number. The input fields need not be sorted. This
- * fieldmap must be freed with upb_free_fieldmap(). */
-void upb_init_fieldmap(struct upb_fieldmap *fieldmap,
- void *fields,
- int num_fields,
- int field_size);
-void upb_free_fieldmap(struct upb_fieldmap *fieldmap);
-
-/* Looks the given field number up in the fieldmap, and returns the
- * corresponding field definition (or NULL if this field number does not exist
- * in this fieldmap). */
-inline void *upb_fieldmap_find(struct upb_fieldmap *fm,
- upb_field_number_t num,
- size_t info_size)
-{
- if (num < array_size) {
- return (char*)fs->array + (num*info_size);
- } else {
- /* TODO: the hashtable part. */
- }
-}
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* UPB_PARSE_H_ */
diff --git a/upb_parse.c b/upb_parse.c
index c0fc007..458876e 100644
--- a/upb_parse.c
+++ b/upb_parse.c
@@ -8,6 +8,7 @@
#include <assert.h>
#include <string.h>
+#include "descriptor.h"
/* Branch prediction hints for GCC. */
#ifdef __GNUC__
@@ -138,14 +139,10 @@ static int64_t zz_decode_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
static void wvtov_ ## type(wire_t s, val_t *d)
#define GET(type, v_or_f, wire_t, val_t, member_name) \
- static upb_status_t get_ ## type(struct upb_parse_state *s, \
- uint8_t *buf, \
- struct upb_tagged_value *d) { \
+ static upb_status_t get_ ## type(uint8_t **buf, union upb_value *d) { \
wire_t tmp; \
- uint8_t *b = buf; \
- CHECK(get_ ## v_or_f ## _ ## wire_t(&b, &tmp)); \
- wvtov_ ## type(tmp, &d->v.member_name); \
- s->offset += (b-buf); \
+ CHECK(get_ ## v_or_f ## _ ## wire_t(buf, &tmp)); \
+ wvtov_ ## type(tmp, &d->member_name); \
return UPB_STATUS_OK; \
}
@@ -172,65 +169,25 @@ T(ENUM, v, uint32_t, int32_t, int32) { *d = (int32_t)s; }
#undef GET
#undef T
-static void wvtov_delimited(uint32_t s, struct upb_delimited *d, size_t o)
-{
- d->offset = o;
- d->len = s;
-}
-
-/* Use BYTES version for both STRING and BYTES, leave UTF-8 checks to client. */
-static upb_status_t get_BYTES(struct upb_parse_state *s, uint8_t *buf,
- struct upb_tagged_value *d) {
- uint32_t tmp;
- uint8_t *b = buf;
- CHECK(get_v_uint32_t(&b, &tmp));
- s->offset += (b-buf); /* advance past length varint. */
- wvtov_delimited(tmp, &d->v.delimited, s->offset);
- size_t new_offset = s->offset + d->v.delimited.len; /* skip bytes */
- if (unlikely(new_offset < s->offset)) return UPB_ERROR_OVERFLOW;
- s->offset = new_offset;
- return UPB_STATUS_OK;
-}
-
-static upb_status_t get_MESSAGE(struct upb_parse_state *s, uint8_t *buf,
- struct upb_tagged_value *d) {
- /* We're entering a sub-message. */
- uint32_t tmp;
- uint8_t *b = buf;
- CHECK(get_v_uint32_t(&b, &tmp));
- s->offset += (b-buf); /* advance past length varint. */
- wvtov_delimited(tmp, &d->v.delimited, s->offset);
- /* Unlike STRING and BYTES, we *don't* advance past delimited here. */
- if (unlikely(++s->top == s->limit)) return UPB_ERROR_STACK_OVERFLOW;
- s->top->fieldset = d->field->fieldset;
- s->top->end_offset = d->v.delimited.offset + d->v.delimited.len;
- if (unlikely(s->top->end_offset < s->offset)) return UPB_ERROR_OVERFLOW;
- return UPB_STATUS_OK;
-}
-
-struct upb_type_info {
- upb_wire_type_t expected_wire_type;
- upb_status_t (*get)(struct upb_parse_state *s, uint8_t *buf,
- struct upb_tagged_value *d);
-};
-static struct upb_type_info type_info[] = {
- {UPB_WIRE_TYPE_64BIT, get_DOUBLE},
- {UPB_WIRE_TYPE_32BIT, get_FLOAT},
- {UPB_WIRE_TYPE_VARINT, get_INT32},
- {UPB_WIRE_TYPE_VARINT, get_INT64},
- {UPB_WIRE_TYPE_VARINT, get_UINT32},
- {UPB_WIRE_TYPE_VARINT, get_UINT64},
- {UPB_WIRE_TYPE_VARINT, get_SINT32},
- {UPB_WIRE_TYPE_VARINT, get_SINT64},
- {UPB_WIRE_TYPE_32BIT, get_FIXED32},
- {UPB_WIRE_TYPE_64BIT, get_FIXED64},
- {UPB_WIRE_TYPE_32BIT, get_SFIXED32},
- {UPB_WIRE_TYPE_64BIT, get_SFIXED64},
- {UPB_WIRE_TYPE_VARINT, get_BOOL},
- {UPB_WIRE_TYPE_DELIMITED, get_BYTES},
- {UPB_WIRE_TYPE_DELIMITED, get_BYTES},
- {UPB_WIRE_TYPE_VARINT, get_ENUM},
- {UPB_WIRE_TYPE_DELIMITED, get_MESSAGE}
+upb_wire_type_t upb_expected_wire_types[] = {
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE] = UPB_WIRE_TYPE_64BIT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT] = UPB_WIRE_TYPE_32BIT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64] = UPB_WIRE_TYPE_VARINT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64] = UPB_WIRE_TYPE_VARINT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32] = UPB_WIRE_TYPE_VARINT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64] = UPB_WIRE_TYPE_64BIT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32] = UPB_WIRE_TYPE_32BIT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL] = UPB_WIRE_TYPE_VARINT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = UPB_WIRE_TYPE_DELIMITED,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = UPB_WIRE_TYPE_DELIMITED,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = -1, /* TODO */
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE] = UPB_WIRE_TYPE_DELIMITED,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32] = UPB_WIRE_TYPE_VARINT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM] = UPB_WIRE_TYPE_VARINT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32] = UPB_WIRE_TYPE_32BIT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64] = UPB_WIRE_TYPE_64BIT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32] = UPB_WIRE_TYPE_VARINT,
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64] = UPB_WIRE_TYPE_VARINT,
};
upb_status_t parse_tag(uint8_t **buf, struct upb_tag *tag)
@@ -249,22 +206,17 @@ upb_status_t parse_wire_value(uint8_t *buf, size_t *offset,
#define READ(expr) CHECK(expr); *offset += (b-buf)
uint8_t *b = buf;
switch(wt) {
- case UPB_WIRE_TYPE_VARINT:
- READ(get_v_uint64_t(&b, &wv->varint)); break;
- case UPB_WIRE_TYPE_64BIT:
- READ(get_f_uint64_t(&b, &wv->_64bit)); break;
- case UPB_WIRE_TYPE_32BIT:
- READ(get_f_uint32_t(&b, &wv->_32bit)); break;
+ case UPB_WIRE_TYPE_VARINT: READ(get_v_uint64_t(&b, &wv->varint)); break;
+ case UPB_WIRE_TYPE_64BIT: READ(get_f_uint64_t(&b, &wv->_64bit)); break;
+ case UPB_WIRE_TYPE_32BIT: READ(get_f_uint32_t(&b, &wv->_32bit)); break;
case UPB_WIRE_TYPE_DELIMITED:
- wv->delimited.offset = *offset;
- READ(get_v_uint32_t(&b, &wv->delimited.len));
- size_t new_offset = *offset + wv->delimited.len;
+ READ(get_v_uint32_t(&b, &wv->_32bit));
+ size_t new_offset = *offset + wv->_32bit;
if (new_offset < *offset) return UPB_ERROR_OVERFLOW;
*offset += new_offset;
break;
case UPB_WIRE_TYPE_START_GROUP:
- case UPB_WIRE_TYPE_END_GROUP:
- return UPB_ERROR_GROUP; /* deprecated, no plans to support. */
+ case UPB_WIRE_TYPE_END_GROUP: return UPB_ERROR_GROUP; /* TODO */
}
return UPB_STATUS_OK;
}
@@ -274,12 +226,9 @@ upb_status_t skip_wire_value(uint8_t *buf, size_t *offset,
{
uint8_t *b = buf;
switch(wt) {
- case UPB_WIRE_TYPE_VARINT:
- READ(skip_v_uint64_t(&b)); break;
- case UPB_WIRE_TYPE_64BIT:
- READ(skip_f_uint64_t(&b)); break;
- case UPB_WIRE_TYPE_32BIT:
- READ(skip_f_uint32_t(&b)); break;
+ case UPB_WIRE_TYPE_VARINT: READ(skip_v_uint64_t(&b)); break;
+ case UPB_WIRE_TYPE_64BIT: READ(skip_f_uint64_t(&b)); break;
+ case UPB_WIRE_TYPE_32BIT: READ(skip_f_uint32_t(&b)); break;
case UPB_WIRE_TYPE_DELIMITED: {
/* Have to get (not skip) the length to skip the bytes. */
uint32_t len;
@@ -290,71 +239,27 @@ upb_status_t skip_wire_value(uint8_t *buf, size_t *offset,
break;
}
case UPB_WIRE_TYPE_START_GROUP:
- case UPB_WIRE_TYPE_END_GROUP:
- return UPB_ERROR_GROUP; /* deprecated, no plans to support. */
+ case UPB_WIRE_TYPE_END_GROUP: return UPB_ERROR_GROUP; /* TODO */
}
return UPB_STATUS_OK;
#undef READ
}
-/* Parses and processes the next value from buf. */
-upb_status_t upb_parse_field(struct upb_parse_state *s,
- uint8_t *buf,
- upb_field_number_t *fieldnum,
- struct upb_tagged_value *val,
- struct upb_tagged_wire_value *wv)
+upb_status_t upb_parse_value(uint8_t **b, upb_field_type_t ft,
+ union upb_value *v)
{
- /* Check for end-of-message at the current stack depth. */
- if(unlikely(s->offset >= s->top->end_offset)) {
- /* If the end offset isn't an exact field boundary, the pb is corrupt. */
- if(unlikely(s->offset != s->top->end_offset))
- return UPB_ERROR_BAD_SUBMESSAGE_END;
- s->top--;
- return UPB_STATUS_SUBMESSAGE_END;
+#define CASE(t) \
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: return get_ ## t(b, v);
+ switch(ft) {
+ CASE(DOUBLE) CASE(FLOAT) CASE(INT64) CASE(UINT64) CASE(INT32) CASE(FIXED64)
+ CASE(FIXED32) CASE(BOOL) CASE(UINT32) CASE(ENUM) CASE(SFIXED32)
+ CASE(SFIXED64) CASE(SINT32) CASE(SINT64)
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES:
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING:
+ case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE:
+ return get_UINT32(b, v);
+ default: return UPB_ERROR; /* Including GROUP. */
}
-
- struct upb_tag tag;
- uint8_t *b = buf;
- CHECK(parse_tag(&b, &tag));
- s->offset += (b-buf);
- struct upb_field *fd = upb_find_field(s->top->fieldset,
- tag.field_number);
- upb_status_t unknown_value_status;
- if(unlikely(!fd)) {
- unknown_value_status = UPB_ERROR_UNKNOWN_VALUE;
- goto unknown_value;
- }
- struct upb_type_info *info = &type_info[fd->type];
- if(unlikely(tag.wire_type != info->expected_wire_type)) {
- unknown_value_status = UPB_ERROR_MISMATCHED_TYPE;
- goto unknown_value;
- }
-
- *fieldnum = tag.field_number;
- val->field = fd;
- CHECK(info->get(s, b, val));
- return UPB_STATUS_OK;
-
-unknown_value:
- wv->type = tag.wire_type;
- CHECK(parse_wire_value(buf, &s->offset, tag.wire_type, &wv->v));
- return unknown_value_status;
-}
-
-void upb_init_parser(
- struct upb_parse_state *state,
- struct upb_fieldset *toplevel_fieldset)
-{
- state->offset = 0;
- state->top = state->stack;
- state->limit = state->top + UPB_MAX_STACK;
- state->top->fieldset = toplevel_fieldset;
- state->top->end_offset = SIZE_MAX;
-}
-
-static int compare_fields(const void *f1, const void *f2)
-{
- return ((struct upb_field*)f1)->field_number -
- ((struct upb_field*)f2)->field_number;
+#undef CASE
}
diff --git a/upb_parse.h b/upb_parse.h
index 75752a0..829e97c 100644
--- a/upb_parse.h
+++ b/upb_parse.h
@@ -18,99 +18,6 @@
extern "C" {
#endif
-/* A deserialized value as described in a .proto file. */
-struct upb_tagged_value {
- struct upb_field *field;
- union upb_value v;
-};
-
-/* A value as it is encoded on-the-wire, before it has been interpreted as
- * any particular .proto type. */
-struct upb_tagged_wire_value {
- upb_wire_type_t type;
- union upb_wire_value v;
-};
-
-/* Definition of a single field in a message, for the purposes of the parser's
- * fieldmap. Note that this does not include nearly all of the information
- * that can be specified about a field in a .proto file. For example, we don't
- * even know the field's name. We keep only the information necessary to parse
- * the field. */
-struct upb_field {
- upb_field_number_t field_number;
- int32_t type; /* google_protobuf_FieldDescriptorProto_Type */
- struct upb_fieldset *fieldset; /* if type == MESSAGE */
-};
-
-struct upb_parse_stack_frame {
- struct upb_fieldset *fieldset;
- size_t end_offset; /* unknown for the top frame, so we set to SIZE_MAX */
-};
-
-/* The stream parser's state. */
-struct upb_parse_state {
- size_t offset;
- struct upb_parse_stack_frame stack[UPB_MAX_STACK];
- struct upb_parse_stack_frame *top, *limit;
-};
-
-/* Call this once before parsing to initialize the data structures.
- * message_type can be NULL, in which case all fields will be reported as
- * unknown. */
-void upb_init_parser(struct upb_parse_state *state,
- struct upb_fieldset *toplevel_fieldset);
-
-/* Status as returned by upb_parse(). Status codes <0 are fatal errors
- * that cannot be recovered. Status codes >0 are unusual but nonfatal events,
- * which nonetheless must be handled differently since they do not return data
- * in val. */
-typedef enum upb_status {
- UPB_STATUS_OK = 0,
- UPB_STATUS_SUBMESSAGE_END = 1, // No data is stored in val or wv.
-
- /** FATAL ERRORS: these indicate corruption, and cannot be recovered. */
-
- // A varint did not terminate before hitting 64 bits.
- UPB_ERROR_UNTERMINATED_VARINT = -1,
-
- // A submessage ended in the middle of data.
- UPB_ERROR_BAD_SUBMESSAGE_END = -2,
-
- // Encountered a "group" on the wire (deprecated and unsupported).
- UPB_ERROR_GROUP = -3,
-
- // Input was nested more than UPB_MAX_NESTING deep.
- UPB_ERROR_STACK_OVERFLOW = -4,
-
- // The input data caused the pb's offset (a size_t) to overflow.
- UPB_ERROR_OVERFLOW = -5,
-
- /** NONFATAL ERRORS: the input was invalid, but we can continue if desired. */
-
- // A value was encountered that was not defined in the .proto file. The
- // unknown value is stored in wv.
- UPB_ERROR_UNKNOWN_VALUE = 2,
-
- // A field was encoded with the wrong wire type. The wire value is stored in
- // wv.
- UPB_ERROR_MISMATCHED_TYPE = 3,
-} upb_status_t;
-struct upb_parse_state;
-
-/* The main parsing function. Parses the next value from buf, storing the
- * parsed value in val. If val is of type UPB_TYPE_MESSAGE, then a
- * submessage was entered.
- *
- * IMPORTANT NOTE: for efficiency, the parsing routines do not do bounds checks,
- * and may read as much as far as buf+10. So the caller must ensure that buf is
- * not within 10 bytes of unmapped memory, or the program will segfault. Clients
- * are encouraged to overallocate their buffers by ten bytes to compensate. */
-upb_status_t upb_parse_field(struct upb_parse_state *s,
- uint8_t *buf,
- upb_field_number_t *fieldnum,
- struct upb_tagged_value *val,
- struct upb_tagged_wire_value *wv);
-
/* Low-level parsing functions. ***********************************************/
/* Parses a single tag from the character data starting at buf, and updates
@@ -118,6 +25,19 @@ upb_status_t upb_parse_field(struct upb_parse_state *s,
* by at most ten bytes. */
upb_status_t parse_tag(uint8_t **buf, struct upb_tag *tag);
+extern upb_wire_type_t upb_expected_wire_types[];
+/* Returns true if wt is the correct on-the-wire type for ft. */
+inline bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) {
+ return upb_expected_wire_types[ft] == wt;
+}
+
+/* Parses and converts a value from the character data starting at buf. The
+ * caller must have previously checked that the wire type is appropriate for
+ * this field type. For delimited data, buf is advanced to the beginning of
+ * the delimited data, not the end. */
+upb_status_t upb_parse_value(uint8_t **buf, upb_field_type_t ft,
+ union upb_value *value);
+
/* Parses a wire value with the given type (which must have been obtained from
* a tag that was just parsed) and adds the number of bytes that were consumed
* to *offset. For delimited types, offset is advanced past the delimited
@@ -127,8 +47,8 @@ upb_status_t upb_parse_wire_value(uint8_t *buf, size_t *offset,
union upb_wire_value *wv);
/* Like the above, but discards the wire value instead of saving it. */
-upb_status_t skip_wire_value(uint8_t *buf, size_t *offset,
- upb_wire_type_t wt);
+upb_status_t upb_skip_wire_value(uint8_t *buf, size_t *offset,
+ upb_wire_type_t wt);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/upb_struct.c b/upb_struct.c
index b0b244a..3284796 100644
--- a/upb_struct.c
+++ b/upb_struct.c
@@ -42,3 +42,4 @@ extern bool upb_struct_is_set(uint8_t *s, struct upb_struct_field *f);
extern bool upb_struct_all_required_fields_set(
uint8_t *s, struct upb_struct_definition *d);
extern void upb_struct_clear(uint8_t *s, struct upb_struct_definition *d);
+
diff --git a/upb_struct.h b/upb_struct.h
index a64b3d4..7d5c219 100644
--- a/upb_struct.h
+++ b/upb_struct.h
@@ -57,8 +57,8 @@ struct upb_struct_definition {
};
/* While these are written to be as fast as possible, it will still be faster
- * cache the results of this lookup if possible. These return NULL if no such
- * field is found. */
+ * to cache the results of this lookup if possible. These return NULL if no
+ * such field is found. */
struct upb_struct_field *upb_struct_find_field_by_name(
struct upb_struct_definition *d, char *name);
struct upb_struct_field *upb_struct_find_field_by_number(
diff --git a/upb_table.c b/upb_table.c
new file mode 100644
index 0000000..656da24
--- /dev/null
+++ b/upb_table.c
@@ -0,0 +1,89 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
+ */
+
+#include "upb_table.h"
+
+#include <stdlib.h>
+
+static int compare_entries(const void *f1, const void *f2)
+{
+ return ((struct upb_inttable_entry*)f1)->key -
+ ((struct upb_inttable_entry*)f2)->key;
+}
+
+static uint32_t max(uint32_t a, uint32_t b) { return a > b ? a : b; }
+
+static uint32_t round_up_to_pow2(uint32_t v)
+{
+ /* cf. Bit Twiddling Hacks:
+ * http://www-graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
+ v--;
+ v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16;
+ v++;
+ return v;
+}
+
+void upb_inttable_init(struct upb_inttable *table, void *entries,
+ int num_entries, int entry_size)
+{
+ qsort(entries, num_entries, entry_size, compare_entries);
+
+ /* Find the largest n for which at least half the keys <n are used. We
+ * make sure our table size is at least n. This allows all keys <n to be
+ * in their main position (as if it were an array) and only numbers >n might
+ * possibly have collisions. Start at 8 to avoid noise of small numbers. */
+ upb_inttable_key_t n = 0, maybe_n;
+ bool all_in_array = true;
+ for(int i = 0; i < num_entries; i++) {
+ struct upb_inttable_entry *e =
+ upb_inttable_entry_get(entries, i, entry_size);
+ maybe_n = e->key;
+ if(maybe_n > 8 && maybe_n/(i+1) >= 2) {
+ all_in_array = false;
+ break;
+ }
+ n = maybe_n;
+ }
+
+ /* TODO: measure, tweak, optimize this choice of table size. Possibly test
+ * (at runtime) maximum chain length for each proposed size. */
+ uint32_t min_size_by_load = all_in_array ? n : (double)num_entries / 0.85;
+ uint32_t min_size = max(n, min_size_by_load);
+ table->size = round_up_to_pow2(min_size);
+
+ table->entries = malloc(table->size * entry_size);
+ /* Initialize to empty. */
+ for(size_t i = 0; i < table->size; i++) {
+ struct upb_inttable_entry *e = upb_inttable_get(table, i, entry_size);
+ e->key = UPB_END_OF_CHAIN;
+ e->next = UPB_END_OF_CHAIN;
+ }
+
+ /* Insert the elements. */
+ for(int i = 0; i < num_entries; i++) {
+ struct upb_inttable_entry *e =
+ upb_inttable_entry_get(entries, i, entry_size);
+ int32_t hash = upb_inttable_hash(table, e->key);
+ struct upb_inttable_entry *table_e =
+ upb_inttable_get(table, hash, entry_size);
+ if(table_e->key != UPB_END_OF_CHAIN) { /* Collision. */
+ if(hash == upb_inttable_hash(table, table_e->key)) {
+ /* Existing element is in its main posisiton. Find an empty slot to
+ * place our new element. */
+ } else {
+ /* Existing element is not in its main position. Move it to an empty
+ * slot and put our element in its main position. */
+ }
+ }
+ }
+}
+
+void upb_inttable_free(struct upb_inttable *table)
+{
+ free(table->entries);
+}
+
+/* Emit definition for inline functions. */
diff --git a/upb_table.h b/upb_table.h
new file mode 100644
index 0000000..684dbff
--- /dev/null
+++ b/upb_table.h
@@ -0,0 +1,75 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
+ */
+
+#ifndef UPB_TABLE_H_
+#define UPB_TABLE_H_
+
+#include "upb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int32_t upb_inttable_key_t;
+
+#define UPB_END_OF_CHAIN (upb_inttable_key_t)-1
+
+struct upb_inttable_entry {
+ upb_inttable_key_t key;
+ int32_t next;
+};
+
+struct upb_inttable {
+ uint32_t size; /* Is a power of two. */
+ void *entries;
+};
+
+/* Builds an int32_t -> <entry> table, optimized for very fast lookup by
+ * number. table is a pointer to a previously allocated upb_inttable.
+ * entries points to an array of the desired entries themselves, each of size
+ * entry_size. The table is allocated in dynamic memory, and does not reference
+ * the data in entries. Entries may be modified by the function.
+ *
+ * The table must be freed with upb_inttable_free. */
+void upb_inttable_init(struct upb_inttable *table, void *entries,
+ int num_entries, int entry_size);
+
+inline int32_t upb_inttable_hash(struct upb_inttable * table,
+ upb_inttable_key_t key) {
+ return key & (table->size-1);
+}
+
+/* Frees any data that was allocated by upb_inttable_init. */
+void upb_inttable_free(struct upb_inttable *table);
+
+inline struct upb_inttable_entry *upb_inttable_entry_get(
+ void *entries, int32_t pos, int entry_size) {
+ return (struct upb_inttable_entry*)((char*)entries) + pos*entry_size;
+}
+
+inline struct upb_inttable_entry *upb_inttable_get(
+ struct upb_inttable *table, int32_t pos, int entry_size) {
+ return upb_inttable_entry_get(table->entries, pos, entry_size);
+}
+
+/* Lookups up key in this table. Inlined because this is in the critical path
+ * of parsing. */
+inline void *upb_inttable_lookup(struct upb_inttable *table, int32_t key,
+ int entry_size) {
+ int32_t pos = upb_inttable_hash(table, key);
+ do {
+ struct upb_inttable_entry *e = upb_inttable_get(table, pos, entry_size);
+ if (key == e->key) return e;
+ pos = e->next;
+ } while (pos != UPB_END_OF_CHAIN);
+ return NULL; /* Not found. */
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* UPB_TABLE_H_ */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback