summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile8
-rw-r--r--benchmarks/parsetostruct.upb_table.c51
-rw-r--r--src/upb.c37
-rw-r--r--src/upb.h131
-rw-r--r--src/upb_array.h39
-rw-r--r--src/upb_atomic.h1
-rw-r--r--src/upb_mm.c4
-rw-r--r--src/upb_msg.c45
-rw-r--r--src/upb_parse.c180
-rw-r--r--src/upb_parse.h5
-rw-r--r--src/upb_struct.h6
-rw-r--r--src/upb_text.c2
12 files changed, 295 insertions, 214 deletions
diff --git a/Makefile b/Makefile
index 0aaae32..110a263 100644
--- a/Makefile
+++ b/Makefile
@@ -34,7 +34,7 @@ LDLIBS=-lpthread
LIBUPB=src/libupb.a
LIBUPB_PIC=src/libupb_pic.a
LIBUPB_SHARED=src/libupb.so
-ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) $(LIBUPB_SHARED) tests/test_table tests/tests tools/upbc
+ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) tests/test_table tests/tests tools/upbc
all: $(ALL)
clean:
rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(ALL) benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb*
@@ -42,8 +42,9 @@ clean:
cd lang_ext/python && python setup.py clean --all
# The core library (src/libupb.a)
-SRC=src/upb_parse.c src/upb_table.c src/upb_msg.c src/upb_mm.c src/upb_enum.c src/upb_context.c \
- src/upb_string.c src/upb_text.c src/upb_serialize.c descriptor/descriptor.c
+SRC=src/upb.c src/upb_parse.c src/upb_table.c src/upb_msg.c src/upb_mm.c \
+ src/upb_enum.c src/upb_context.c src/upb_string.c src/upb_text.c \
+ src/upb_serialize.c descriptor/descriptor.c
STATICOBJ=$(patsubst %.c,%.o,$(SRC))
SHAREDOBJ=$(patsubst %.c,%.lo,$(SRC))
# building shared objects is like building static ones, except -fPIC is added.
@@ -91,6 +92,7 @@ upb_benchmarks: $(UPB_BENCHMARKS)
benchmarks: $(BENCHMARKS)
benchmark:
@rm -f benchmarks/results
+ @rm -rf benchmarks/*.dSYM
@for test in benchmarks/b.* ; do ./$$test ; done
benchmarks/google_messages.proto.pb: benchmarks/google_messages.proto
diff --git a/benchmarks/parsetostruct.upb_table.c b/benchmarks/parsetostruct.upb_table.c
index 751c982..9daa8e0 100644
--- a/benchmarks/parsetostruct.upb_table.c
+++ b/benchmarks/parsetostruct.upb_table.c
@@ -3,45 +3,47 @@
#include "upb_context.h"
#include "upb_msg.h"
+#include "upb_mm.h"
-static struct upb_context c;
-static struct upb_string str;
-static struct upb_msg_parse_state s;
-static struct upb_msg *m;
-static void *data[NUM_MESSAGES];
+static struct upb_context *c;
+static struct upb_string *str;
+static struct upb_msgdef *def;
+static struct upb_msg *msgs[NUM_MESSAGES];
static bool initialize()
{
/* Initialize upb state, parse descriptor. */
- upb_context_init(&c);
- struct upb_string fds;
- if(!upb_strreadfile(MESSAGE_DESCRIPTOR_FILE, &fds)) {
+ c = upb_context_new();
+ struct upb_string *fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE);
+ if(!fds) {
fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n");
return false;
}
- if(!upb_context_parsefds(&c, &fds)) {
+ if(!upb_context_parsefds(c, fds)) {
fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ".\n");
return false;
}
- upb_strfree(fds);
+ upb_string_unref(fds);
char class_name[] = MESSAGE_NAME;
struct upb_string proto_name;
proto_name.ptr = class_name;
proto_name.byte_len = sizeof(class_name)-1;
- struct upb_symtab_entry *e = upb_context_lookup(&c, &proto_name);
- if(!e || e->type != UPB_SYM_MESSAGE) {
+ struct upb_symtab_entry e;
+ upb_status_t success = upb_context_lookup(c, &proto_name, &e);
+ if(!success || e.type != UPB_SYM_MESSAGE) {
fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n",
- UPB_STRARG(proto_name));
+ UPB_STRARG(&proto_name));
return false;
}
- m = e->ref.msg;
+ def = e.ref.msg;
for(int i = 0; i < 32; i++)
- data[i] = upb_msgdata_new(m);
+ msgs[i] = upb_msg_new(def);
/* Read the message data itself. */
- if(!upb_strreadfile(MESSAGE_FILE, &str)) {
+ str = upb_strreadfile(MESSAGE_FILE);
+ if(!str) {
fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
return false;
}
@@ -51,19 +53,18 @@ static bool initialize()
static void cleanup()
{
for(int i = 0; i < 32; i++)
- upb_msgdata_free(data[i], m, true);
- upb_strfree(str);
- upb_context_free(&c);
+ upb_msg_unref(msgs[i]);
+ upb_string_unref(str);
+ upb_context_unref(c);
}
static size_t run(int i)
{
- size_t read;
- upb_msg_parse_reset(&s, data[i%NUM_MESSAGES], m, false, BYREF);
- upb_status_t status = upb_msg_parse(&s, str.ptr, str.byte_len, &read);
- if(status != UPB_STATUS_OK && read != str.byte_len) {
- fprintf(stderr, "Error. :( error=%d, read=%zu\n", status, read);
+ upb_status_t status;
+ status = upb_msg_parsestr(msgs[i%NUM_MESSAGES], str->ptr, str->byte_len);
+ if(status != UPB_STATUS_OK) {
+ fprintf(stderr, "Error. :( error=%d\n", status);
return 0;
}
- return read;
+ return str->byte_len;
}
diff --git a/src/upb.c b/src/upb.c
new file mode 100644
index 0000000..e82a8e4
--- /dev/null
+++ b/src/upb.c
@@ -0,0 +1,37 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
+ *
+ */
+
+#include <stddef.h>
+
+#include "upb.h"
+
+#define alignof(t) offsetof(struct { char c; t x; }, x)
+#define TYPE_INFO(proto_type, wire_type, ctype) \
+ [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## proto_type] = \
+ {alignof(ctype), sizeof(ctype), wire_type, #ctype},
+
+struct upb_type_info upb_type_info[] = {
+ TYPE_INFO(DOUBLE, UPB_WIRE_TYPE_64BIT, double)
+ TYPE_INFO(FLOAT, UPB_WIRE_TYPE_32BIT, float)
+ TYPE_INFO(INT64, UPB_WIRE_TYPE_VARINT, int64_t)
+ TYPE_INFO(UINT64, UPB_WIRE_TYPE_VARINT, uint64_t)
+ TYPE_INFO(INT32, UPB_WIRE_TYPE_VARINT, int32_t)
+ TYPE_INFO(FIXED64, UPB_WIRE_TYPE_64BIT, uint64_t)
+ TYPE_INFO(FIXED32, UPB_WIRE_TYPE_32BIT, uint32_t)
+ TYPE_INFO(BOOL, UPB_WIRE_TYPE_VARINT, bool)
+ TYPE_INFO(MESSAGE, UPB_WIRE_TYPE_DELIMITED, void*)
+ TYPE_INFO(GROUP, UPB_WIRE_TYPE_START_GROUP, void*)
+ TYPE_INFO(UINT32, UPB_WIRE_TYPE_VARINT, uint32_t)
+ TYPE_INFO(ENUM, UPB_WIRE_TYPE_VARINT, uint32_t)
+ TYPE_INFO(SFIXED32, UPB_WIRE_TYPE_32BIT, int32_t)
+ TYPE_INFO(SFIXED64, UPB_WIRE_TYPE_64BIT, int64_t)
+ TYPE_INFO(SINT32, UPB_WIRE_TYPE_VARINT, int32_t)
+ TYPE_INFO(SINT64, UPB_WIRE_TYPE_VARINT, int64_t)
+ TYPE_INFO(STRING, UPB_WIRE_TYPE_DELIMITED, struct upb_string*)
+ TYPE_INFO(BYTES, UPB_WIRE_TYPE_DELIMITED, struct upb_string*)
+};
+
diff --git a/src/upb.h b/src/upb.h
index af026f5..1112fe1 100644
--- a/src/upb.h
+++ b/src/upb.h
@@ -1,6 +1,6 @@
/*
* upb - a minimalist implementation of protocol buffers.
-
+ *
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* This file contains shared definitions that are widely used across upb.
@@ -11,14 +11,14 @@
#include <stdbool.h>
#include <stdint.h>
-#include <stdio.h> /* for size_t. */
+#include <stdio.h> // only for size_t.
#include "descriptor_const.h"
#ifdef __cplusplus
extern "C" {
#endif
-/* inline if possible, emit standalone code if required. */
+// inline if possible, emit standalone code if required.
#ifndef INLINE
#define INLINE static inline
#endif
@@ -26,21 +26,22 @@ extern "C" {
#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
-/* The maximum that any submessages can be nested. Matches proto2's limit. */
+// The maximum that any submessages can be nested. Matches proto2's limit.
#define UPB_MAX_NESTING 64
-/* The maximum number of fields that any one .proto type can have. */
+// The maximum number of fields that any one .proto type can have.
#define UPB_MAX_FIELDS (1<<16)
-/* Nested type names are separated by periods. */
+// Nested type names are separated by periods.
#define UPB_SYMBOL_SEPARATOR '.'
#define UPB_SYMBOL_MAX_LENGTH 256
#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
+
/* Fundamental types and type constants. **************************************/
-/* A list of types as they are encoded on-the-wire. */
+// A list of types as they are encoded on-the-wire.
enum upb_wire_type {
UPB_WIRE_TYPE_VARINT = 0,
UPB_WIRE_TYPE_64BIT = 1,
@@ -49,26 +50,27 @@ enum upb_wire_type {
UPB_WIRE_TYPE_END_GROUP = 4,
UPB_WIRE_TYPE_32BIT = 5
};
+
typedef uint8_t upb_wire_type_t;
-/* Value type as defined in a .proto file. eg. string, int32, etc.
- *
- * The values of this are defined by google_protobuf_FieldDescriptorProto_Type
- * (from descriptor.proto). Note that descriptor.proto reserves "0" for
- * errors, and we use it to represent exceptional circumstances. */
+// Value type as defined in a .proto file. eg. string, int32, etc. The
+// integers that represent this are defined by descriptor.proto. Note that
+// descriptor.proto reserves "0" for errors, and we use it to represent
+// exceptional circumstances.
typedef uint8_t upb_field_type_t;
+// For referencing the type constants tersely.
+#define UPB_TYPENUM(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type
+
INLINE bool upb_issubmsgtype(upb_field_type_t type) {
- return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP ||
- type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE;
+ return type == UPB_TYPENUM(GROUP) || type == UPB_TYPENUM(MESSAGE);
}
INLINE bool upb_isstringtype(upb_field_type_t type) {
- return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING ||
- type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES;
+ return type == UPB_TYPENUM(STRING) || type == UPB_TYPENUM(BYTES);
}
-/* Information about a given value type (upb_field_type_t). */
+// Info for a given field type.
struct upb_type_info {
uint8_t align;
uint8_t size;
@@ -76,87 +78,88 @@ struct upb_type_info {
char *ctype;
};
-/* Contains information for all .proto types. Indexed by upb_field_type_t. */
+// A static array of info about all of the field types, indexed by type number.
extern struct upb_type_info upb_type_info[];
-/* The number of a field, eg. "optional string foo = 3". */
+// The number of a field, eg. "optional string foo = 3".
typedef int32_t upb_field_number_t;
-/* Label (optional, repeated, required) as defined in a .proto file. The values
- * of this are defined by google.protobuf.FieldDescriptorProto.Label (from
- * descriptor.proto). */
+// Label (optional, repeated, required) as defined in a .proto file. The
+// values of this are defined by google.protobuf.FieldDescriptorProto.Label
+// (from descriptor.proto).
typedef uint8_t upb_label_t;
-/* A value as it is encoded on-the-wire, except delimited, which is handled
- * separately. */
+// A scalar (non-string) wire value. Used only for parsing unknown fields.
union upb_wire_value {
uint64_t varint;
uint64_t _64bit;
uint32_t _32bit;
};
-/* A tag occurs before each value on-the-wire. */
+// A tag occurs before each value on-the-wire.
struct upb_tag {
upb_field_number_t field_number;
upb_wire_type_t wire_type;
};
+
/* Polymorphic values of .proto types *****************************************/
struct upb_string;
struct upb_array;
struct upb_msg;
-/* A single .proto value. The owner must have an out-of-band way of knowing
- * the type, so that it knows which union member to use. */
+// A single .proto value. The owner must have an out-of-band way of knowing
+// the type, so that it knows which union member to use.
union upb_value {
- double _double;
- float _float;
- int32_t int32;
- int64_t int64;
+ double _double;
+ float _float;
+ int32_t int32;
+ int64_t int64;
uint32_t uint32;
uint64_t uint64;
- bool _bool;
+ bool _bool;
struct upb_string *str;
struct upb_array *arr;
struct upb_msg *msg;
};
-/* A pointer to a .proto value. The owner must have an out-of-band way of
- * knowing the type, so it knows which union member to use. */
+// A pointer to a .proto value. The owner must have an out-of-band way of
+// knowing the type, so it knows which union member to use.
union upb_value_ptr {
- double *_double;
- float *_float;
- int32_t *int32;
- int64_t *int64;
+ double *_double;
+ float *_float;
+ int32_t *int32;
+ int64_t *int64;
uint32_t *uint32;
uint64_t *uint64;
- bool *_bool;
+ bool *_bool;
struct upb_string **str;
struct upb_array **arr;
struct upb_msg **msg;
- void *_void;
+ void *_void;
};
-/* Unfortunately there is no way to define this so that it can be used as a
- * generic expression, a la:
- * foo(UPB_VALUE_ADDROF(bar));
- * ...you have to use it as the initializer of a upb_value_ptr:
- * union upb_value_ptr p = UPB_VALUE_ADDROF(bar);
- * foo(p);
- */
+// Unfortunately there is no way to define this so that it can be used as a
+// generic expression, a la:
+// foo(UPB_VALUE_ADDROF(bar));
+// ...you have to use it as the initializer of a upb_value_ptr:
+// union upb_value_ptr p = UPB_VALUE_ADDROF(bar);
+// foo(p);
#define UPB_VALUE_ADDROF(val) {(void*)&val._double}
-/* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer. We need
- * to know the field type to perform this operation, because we need to know
- * how much memory to copy. */
+/**
+ * Converts upb_value_ptr -> upb_value by reading from the pointer. We need to
+ * know the field type to perform this operation, because we need to know how
+ * much memory to copy.
+ */
INLINE union upb_value upb_value_read(union upb_value_ptr ptr,
upb_field_type_t ft) {
union upb_value val;
+
#define CASE(t, member_name) \
- case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
- val.member_name = *ptr.member_name; \
- break;
+ case UPB_TYPENUM(t): val.member_name = *ptr.member_name; break;
+
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
@@ -178,19 +181,21 @@ INLINE union upb_value upb_value_read(union upb_value_ptr ptr,
CASE(GROUP, msg)
default: break;
}
-#undef CASE
return val;
+
+#undef CASE
}
-/* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer. We need
- * to know the field type to perform this operation, because we need to know
- * how much memory to copy. */
+/**
+ * Writes a upb_value to a upb_value_ptr location. We need to know the field
+ * type to perform this operation, because we need to know how much memory to
+ * copy.
+ */
INLINE void upb_value_write(union upb_value_ptr ptr, union upb_value val,
upb_field_type_t ft) {
#define CASE(t, member_name) \
- case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
- *ptr.member_name = val.member_name; \
- break;
+ case UPB_TYPENUM(t): *ptr.member_name = val.member_name; break;
+
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
@@ -212,17 +217,19 @@ INLINE void upb_value_write(union upb_value_ptr ptr, union upb_value val,
CASE(GROUP, msg)
default: break;
}
+
#undef CASE
}
+// All the different definitions that can occur in .proto files.
union upb_symbol_ref {
struct upb_msgdef *msg;
struct upb_enum *_enum;
struct upb_svc *svc;
};
-/* Status codes used as a return value. Codes >0 are not fatal and can be
- * resumed. */
+// Status codes used as a return value. Codes >0 are not fatal and can be
+// resumed.
typedef enum upb_status {
UPB_STATUS_OK = 0,
diff --git a/src/upb_array.h b/src/upb_array.h
index 732c4aa..b5eb22d 100644
--- a/src/upb_array.h
+++ b/src/upb_array.h
@@ -32,11 +32,12 @@ extern "C" {
struct upb_string;
/* Returns a pointer to an array element. Does not perform a bounds check! */
-INLINE union upb_value_ptr upb_array_getelementptr(
- struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type)
+INLINE union upb_value_ptr upb_array_getelementptr(struct upb_array *arr,
+ upb_arraylen_t n)
{
union upb_value_ptr ptr;
- ptr._void = (void*)((char*)arr->elements._void + n*upb_type_info[type].size);
+ ptr._void = UPB_INDEX(arr->elements._void, n,
+ upb_type_info[arr->fielddef->type].size);
return ptr;
}
@@ -66,26 +67,22 @@ INLINE uint32_t upb_round_up_to_pow2(uint32_t v)
return v;
}
-/* Resizes array to be "len" elements long (reallocating if necessary). */
-INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen)
+INLINE union upb_value_ptr upb_array_append(struct upb_array *arr)
{
- size_t type_size = upb_type_info[arr->fielddef->type].size;
- bool dropped = false;
- bool ref = arr->size == 0; /* Ref'ing external memory. */
- void *data = arr->elements._void;
- if(arr->size < newlen) {
- /* Need to resize. */
- arr->size = UPB_MAX(4, upb_round_up_to_pow2(newlen));
- arr->elements._void = realloc(ref ? NULL : data, arr->size * type_size);
+ size_t size = upb_type_info[arr->fielddef->type].size;
+ upb_arraylen_t oldlen = arr->len;
+ if(oldlen == arr->size) {
+ arr->size = UPB_MAX(4, upb_round_up_to_pow2(oldlen+1));
+ arr->elements._void = realloc(arr->elements._void, arr->size * size);
+ memset((char*)arr->elements._void + (arr->len*size), 0, (arr->size - arr->len) * size);
}
- if(ref) {
- /* Need to take referenced data and copy it to memory we own. */
- memcpy(arr->elements._void, data, UPB_MIN(arr->len, newlen) * type_size);
- dropped = true;
- }
- /* TODO: fill with defaults. */
- arr->len = newlen;
- return dropped;
+ arr->len++;
+ return upb_array_getelementptr(arr, oldlen);
+}
+
+INLINE void upb_array_truncate(struct upb_array *arr)
+{
+ arr->len = 0;
}
#ifdef __cplusplus
diff --git a/src/upb_atomic.h b/src/upb_atomic.h
index c1a60b9..85ec582 100644
--- a/src/upb_atomic.h
+++ b/src/upb_atomic.h
@@ -29,6 +29,7 @@ extern "C" {
#define INLINE static inline
#endif
+#define UPB_THREAD_UNSAFE
#ifdef UPB_THREAD_UNSAFE
/* Non-thread-safe implementations. ******************************************/
diff --git a/src/upb_mm.c b/src/upb_mm.c
index 853d572..769db96 100644
--- a/src/upb_mm.c
+++ b/src/upb_mm.c
@@ -27,7 +27,7 @@ void upb_array_destroy(struct upb_array *arr)
upb_arraylen_t i;
/* Unref elements. */
for(i = 0; i < arr->len; i++) {
- union upb_value_ptr p = upb_array_getelementptr(arr, i, arr->fielddef->type);
+ union upb_value_ptr p = upb_array_getelementptr(arr, i);
upb_mm_ptrtype type = upb_elem_ptrtype(arr->fielddef);
union upb_mmptr mmptr = upb_mmptr_read(p, type);
upb_mm_unref(mmptr, type);
@@ -120,7 +120,7 @@ struct upb_mm_ref *upb_mm_getelemref(struct upb_mm_ref *arrref, upb_arraylen_t i
struct upb_msg_fielddef *f = arr->fielddef;
assert(upb_elem_ismm(f));
assert(i < arr->len);
- union upb_value_ptr p = upb_array_getelementptr(arr, i, f->type);
+ union upb_value_ptr p = upb_array_getelementptr(arr, i);
upb_mm_ptrtype type = upb_elem_ptrtype(f);
union upb_mmptr val = upb_mmptr_read(p, type);
return find_or_create_ref(arrref, arrref->mm, val, type, refcreated);
diff --git a/src/upb_msg.c b/src/upb_msg.c
index 80602dd..f977527 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -156,11 +156,16 @@ static union upb_value_ptr get_value_ptr(struct upb_msg *msg,
{
union upb_value_ptr p = upb_msg_getptr(msg, f);
if(upb_isarray(f)) {
- bool isset = upb_msg_isset(msg, f);
- size_t len = isset ? (*p.arr)->len : 0;
- if(!isset) *p.arr = upb_array_new(f);
- upb_array_resize(*p.arr, len+1);
- p = upb_array_getelementptr(*p.arr, len, f->type);
+ if(!upb_msg_isset(msg, f)) {
+ if(!*p.arr || !upb_mmhead_only(&((*p.arr)->mmhead))) {
+ if(*p.arr)
+ upb_array_unref(*p.arr);
+ *p.arr = upb_array_new(f);
+ }
+ upb_array_truncate(*p.arr);
+ upb_msg_set(msg, f);
+ }
+ p = upb_array_append(*p.arr);
}
return p;
}
@@ -202,7 +207,11 @@ static void str_cb(void *udata, uint8_t *str,
upb_msg_set(msg, f);
if(avail_len != total_len) abort(); /* TODO: support streaming. */
//bool byref = avail_len == total_len && mp->byref;
- *p.str = upb_string_new();
+ if(!*p.str || !upb_mmhead_only(&((*p.str)->mmhead))) {
+ if(*p.str)
+ upb_string_unref(*p.str);
+ *p.str = upb_string_new();
+ }
//if(byref) {
// upb_strdrop(*p.str);
// (*p.str)->ptr = (char*)str;
@@ -220,16 +229,19 @@ static void submsg_start_cb(void *udata, void *user_field_desc)
struct upb_msg_fielddef *f = user_field_desc;
struct upb_msg *oldmsg = mp->top->msg;
union upb_value_ptr p = get_value_ptr(oldmsg, f);
- struct upb_msg **submsg = p.msg;
- //if(*submsg && upb_mmhead_only(&((*submsg)->mmhead))) {
- // /* We can reuse the existing submsg. */
- //} else {
- *submsg = upb_msg_new(f->ref.msg);
- //}
- upb_msg_clear(*submsg);
- upb_msg_set(oldmsg, f);
+
+ if(upb_isarray(f) || !upb_msg_isset(oldmsg, f)) {
+ if(!*p.msg || !upb_mmhead_only(&((*p.msg)->mmhead))) {
+ if(*p.msg)
+ upb_msg_unref(*p.msg);
+ *p.msg = upb_msg_new(f->ref.msg);
+ }
+ upb_msg_clear(*p.msg);
+ upb_msg_set(oldmsg, f);
+ }
+
mp->top++;
- mp->top->msg = *submsg;
+ mp->top->msg = *p.msg;
}
static void submsg_end_cb(void *udata)
@@ -248,6 +260,7 @@ upb_status_t upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len)
struct upb_msg_parser mp;
upb_msg_parser_reset(&mp, msg, false);
size_t read;
+ upb_msg_clear(msg);
upb_status_t ret = upb_msg_parser_parse(&mp, buf, len, &read);
return ret;
}
@@ -337,7 +350,7 @@ static size_t get_msgsize(struct upb_msgsizes *sizes, struct upb_msg *m)
union upb_value_ptr p = upb_msg_getptr(m, f);
if(upb_isarray(f)) {
for(int32_t j = (*p.arr)->len - 1; j >= 0; j--) {
- union upb_value_ptr elem = upb_array_getelementptr((*p.arr), j, f->type);
+ union upb_value_ptr elem = upb_array_getelementptr(*p.arr, j);
/* TODO: for packed arrays tag size goes outside the loop. */
size += upb_get_tag_size(fd->number);
size += get_valuesize(sizes, elem, f, fd);
diff --git a/src/upb_parse.c b/src/upb_parse.c
index 7c1ad66..baaeb99 100644
--- a/src/upb_parse.c
+++ b/src/upb_parse.c
@@ -9,32 +9,10 @@
#include <stddef.h>
#include <stdlib.h>
-/* May want to move this to upb.c if enough other things warrant it. */
-#define alignof(t) offsetof(struct { char c; t x; }, x)
-#define TYPE_INFO(proto_type, wire_type, ctype) [proto_type] = {alignof(ctype), sizeof(ctype), wire_type, #ctype},
-struct upb_type_info upb_type_info[] = {
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE, UPB_WIRE_TYPE_64BIT, double)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT, UPB_WIRE_TYPE_32BIT, float)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64, UPB_WIRE_TYPE_VARINT, int64_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64, UPB_WIRE_TYPE_VARINT, uint64_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32, UPB_WIRE_TYPE_VARINT, int32_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64, UPB_WIRE_TYPE_64BIT, uint64_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32, UPB_WIRE_TYPE_32BIT, uint32_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL, UPB_WIRE_TYPE_VARINT, bool)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE, UPB_WIRE_TYPE_DELIMITED, void*)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP, UPB_WIRE_TYPE_START_GROUP, void*)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32, UPB_WIRE_TYPE_VARINT, uint32_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM, UPB_WIRE_TYPE_VARINT, uint32_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32, UPB_WIRE_TYPE_32BIT, int32_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64, UPB_WIRE_TYPE_64BIT, int64_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32, UPB_WIRE_TYPE_VARINT, int32_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64, UPB_WIRE_TYPE_VARINT, int64_t)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING, UPB_WIRE_TYPE_DELIMITED, struct upb_string*)
- TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES, UPB_WIRE_TYPE_DELIMITED, struct upb_string*)
-};
-
-/* This is called by the inline version of the function if the varint turns out
- * to be >= 2 bytes. */
+/**
+ * Parses a 64-bit varint that is known to be >= 2 bytes (the inline version
+ * handles 1 and 2 byte varints).
+ */
upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val,
uint8_t **outbuf)
{
@@ -42,10 +20,15 @@ upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val,
uint8_t last = 0x80;
*val = 0;
int bitpos;
+
for(bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7)
*val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos;
- if(buf >= end && buf <= maxend && (last & 0x80)) return UPB_STATUS_NEED_MORE_DATA;
- if(buf > maxend) return UPB_ERROR_UNTERMINATED_VARINT;
+
+ if(buf >= end && buf <= maxend && (last & 0x80))
+ return UPB_STATUS_NEED_MORE_DATA;
+ if(buf > maxend)
+ return UPB_ERROR_UNTERMINATED_VARINT;
+
*outbuf = buf;
return UPB_STATUS_OK;
}
@@ -54,23 +37,37 @@ upb_status_t upb_parse_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt
union upb_wire_value *wv, uint8_t **outbuf)
{
switch(wt) {
- case UPB_WIRE_TYPE_VARINT: return upb_get_v_uint64_t(buf, end, &wv->varint, outbuf);
- case UPB_WIRE_TYPE_64BIT: return upb_get_f_uint64_t(buf, end, &wv->_64bit, outbuf);
- case UPB_WIRE_TYPE_32BIT: return upb_get_f_uint32_t(buf, end, &wv->_32bit, outbuf);
- default: return UPB_ERROR_ILLEGAL; /* Doesn't handle delimited, groups. */
+ case UPB_WIRE_TYPE_VARINT:
+ return upb_get_v_uint64_t(buf, end, &wv->varint, outbuf);
+ case UPB_WIRE_TYPE_64BIT:
+ return upb_get_f_uint64_t(buf, end, &wv->_64bit, outbuf);
+ case UPB_WIRE_TYPE_32BIT:
+ return upb_get_f_uint32_t(buf, end, &wv->_32bit, outbuf);
+ default:
+ return UPB_ERROR_ILLEGAL; // Doesn't handle delimited, groups.
}
}
+/**
+ * Advances buf past the current wire value (of type wt), saving the result in
+ * outbuf.
+ */
static upb_status_t skip_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt,
uint8_t **outbuf)
{
switch(wt) {
- case UPB_WIRE_TYPE_VARINT: return upb_skip_v_uint64_t(buf, end, outbuf);
- case UPB_WIRE_TYPE_64BIT: return upb_skip_f_uint64_t(buf, end, outbuf);
- case UPB_WIRE_TYPE_32BIT: return upb_skip_f_uint32_t(buf, end, outbuf);
- case UPB_WIRE_TYPE_START_GROUP: /* TODO: skip to matching end group. */
- case UPB_WIRE_TYPE_END_GROUP: return UPB_STATUS_OK;
- default: return UPB_ERROR_ILLEGAL;
+ case UPB_WIRE_TYPE_VARINT:
+ return upb_skip_v_uint64_t(buf, end, outbuf);
+ case UPB_WIRE_TYPE_64BIT:
+ return upb_skip_f_uint64_t(buf, end, outbuf);
+ case UPB_WIRE_TYPE_32BIT:
+ return upb_skip_f_uint32_t(buf, end, outbuf);
+ case UPB_WIRE_TYPE_START_GROUP:
+ // TODO: skip to matching end group.
+ case UPB_WIRE_TYPE_END_GROUP:
+ return UPB_STATUS_OK;
+ default:
+ return UPB_ERROR_ILLEGAL;
}
}
@@ -78,8 +75,8 @@ upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft,
union upb_value_ptr v, uint8_t **outbuf)
{
#define CASE(t, member_name) \
- case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
- return upb_get_ ## t(buf, end, v.member_name, outbuf);
+ case UPB_TYPENUM(t): return upb_get_ ## t(buf, end, v.member_name, outbuf);
+
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
@@ -97,6 +94,7 @@ upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft,
CASE(ENUM, int32)
default: return UPB_ERROR_ILLEGAL;
}
+
#undef CASE
}
@@ -104,55 +102,76 @@ void upb_stream_parser_reset(struct upb_stream_parser *state, void *udata)
{
state->top = state->stack;
state->limit = &state->stack[UPB_MAX_NESTING];
- /* The top-level message is not delimited (we can keep receiving data for
- * it indefinitely), so we treat it like a group. */
- *state->top = 0;
state->completed_offset = 0;
state->udata = udata;
-}
-static void *pop_stack_frame(struct upb_stream_parser *s, uint8_t *buf)
-{
- if(s->submsg_end_cb) s->submsg_end_cb(s->udata);
- s->top--;
- return (char*)buf + (*s->top > 0 ? (*s->top - s->completed_offset) : 0);
+ // The top-level message is not delimited (we can keep receiving data for it
+ // indefinitely), so we treat it like a group.
+ *state->top = 0;
}
-/* Returns the next end offset. */
-static upb_status_t push_stack_frame(struct upb_stream_parser *s,
- uint8_t *buf, uint32_t len,
- void *user_field_desc, uint8_t **submsg_end)
+/**
+ * Pushes a new stack frame for a submessage with the given len (which will
+ * be zero if the submessage is a group).
+ */
+static upb_status_t push(struct upb_stream_parser *s, uint8_t *start,
+ uint32_t submsg_len, void *user_field_desc,
+ uint8_t **submsg_end)
{
s->top++;
- if(s->top > s->limit) return UPB_ERROR_STACK_OVERFLOW;
- *s->top = s->completed_offset + len;
- if(s->submsg_start_cb) s->submsg_start_cb(s->udata, user_field_desc);
- *submsg_end = buf + (*s->top > 0 ? (*s->top - s->completed_offset) : 0);
+ if(s->top >= s->limit)
+ return UPB_ERROR_STACK_OVERFLOW;
+ *s->top = s->completed_offset + submsg_len;
+
+ if(s->submsg_start_cb)
+ s->submsg_start_cb(s->udata, user_field_desc);
+
+ *submsg_end = start + (*s->top > 0 ? (*s->top - s->completed_offset) : 0);
return UPB_STATUS_OK;
}
+/**
+ * Pops a stack frame, returning a pointer for where the next submsg should
+ * end (or a pointer that is out of range for a group).
+ */
+static void *pop(struct upb_stream_parser *s, uint8_t *start)
+{
+ if(s->submsg_end_cb)
+ s->submsg_end_cb(s->udata);
+
+ s->top--;
+
+ if(*s->top > 0)
+ return (char*)start + (*s->top - s->completed_offset);
+ else
+ return (char*)start; // group.
+}
+
+
upb_status_t upb_stream_parser_parse(struct upb_stream_parser *s,
void *_buf, size_t len, size_t *read)
{
uint8_t *buf = _buf;
uint8_t *completed = buf;
- uint8_t *const start = buf;
+ uint8_t *const start = buf; // ptr equivalent of s->completed_offset
uint8_t *end = buf + len;
uint8_t *submsg_end = buf + (*s->top > 0 ? *s->top : 0);
upb_status_t status = UPB_STATUS_OK;
- /* Make local copies so optimizer knows they won't change. */
+ // Make local copies so optimizer knows they won't change.
upb_tag_cb tag_cb = s->tag_cb;
upb_str_cb str_cb = s->str_cb;
upb_value_cb value_cb = s->value_cb;
void *udata = s->udata;
- /* Main loop: parse a tag, then handle the value. */
+#define CHECK(exp) do { if((status = exp) != UPB_STATUS_OK) goto err; } while(0)
+
+ // Main loop: parse a tag, then handle the value.
while(buf < end) {
struct upb_tag tag;
- UPB_CHECK(parse_tag(buf, end, &tag, &buf));
+ CHECK(parse_tag(buf, end, &tag, &buf));
if(tag.wire_type == UPB_WIRE_TYPE_END_GROUP) {
- submsg_end = pop_stack_frame(s, start);
+ submsg_end = pop(s, start);
completed = buf;
continue;
}
@@ -161,38 +180,39 @@ upb_status_t upb_stream_parser_parse(struct upb_stream_parser *s,
upb_field_type_t ft = tag_cb(udata, &tag, &udesc);
if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) {
int32_t delim_len;
- UPB_CHECK(upb_get_INT32(buf, end, &delim_len, &buf));
+ CHECK(upb_get_INT32(buf, end, &delim_len, &buf));
uint8_t *delim_end = buf + delim_len;
- if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
- UPB_CHECK(push_stack_frame(
- s, start, delim_end - start, udesc, &submsg_end));
+ if(ft == UPB_TYPENUM(MESSAGE)) {
+ CHECK(push(s, start, delim_end - start, udesc, &submsg_end));
} else {
- if(upb_isstringtype(ft))
- str_cb(udata, buf, UPB_MIN(delim_end, end) - buf, delim_end - buf, udesc);
- //else
- // /* Set a marker for packed arrays. */
- buf = delim_end; /* Note that this could be greater than end. */
+ if(upb_isstringtype(ft)) {
+ size_t avail_len = UPB_MIN(delim_end, end) - buf;
+ str_cb(udata, buf, avail_len, delim_end - buf, udesc);
+ } // else { TODO: packed arrays }
+ buf = delim_end; // Could be >end.
}
- } else { /* Scalar (non-delimited) value. */
+ } else {
+ // Scalar (non-delimited) value.
switch(ft) {
- case 0: /* Client elected to skip. */
- UPB_CHECK(skip_wire_value(buf, end, tag.wire_type, &buf));
+ case 0: // Client elected to skip.
+ CHECK(skip_wire_value(buf, end, tag.wire_type, &buf));
break;
- case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP:
- UPB_CHECK(push_stack_frame(s, start, 0, udesc, &submsg_end));
+ case UPB_TYPENUM(GROUP):
+ CHECK(push(s, start, 0, udesc, &submsg_end));
break;
default:
- UPB_CHECK(value_cb(udata, buf, end, udesc, &buf));
+ CHECK(value_cb(udata, buf, end, udesc, &buf));
break;
}
}
- while(buf == submsg_end) submsg_end = pop_stack_frame(s, start);
- //while(buf < s->packed_end) /* packed arrays. */
- // UPB_CHECK(value_cb(udata, buf, end, udesc, &buf));
+ while(buf == submsg_end)
+ submsg_end = pop(s, start);
+ // while(buf < s->packed_end) { TODO: packed arrays }
completed = buf;
}
+err:
*read = (char*)completed - (char*)start;
s->completed_offset += *read;
return status;
diff --git a/src/upb_parse.h b/src/upb_parse.h
index 1454dd5..a8f4294 100644
--- a/src/upb_parse.h
+++ b/src/upb_parse.h
@@ -74,9 +74,8 @@ typedef void (*upb_submsg_start_cb)(void *udata,
typedef void (*upb_submsg_end_cb)(void *udata);
struct upb_stream_parser {
- /* For delimited submsgs, counts from the submsg len down to zero.
- * For group submsgs, counts from zero down to the negative len. */
- uint32_t stack[UPB_MAX_NESTING], *top, *limit;
+ // Stack entries store the offset where the submsg ends (for groups, 0).
+ size_t stack[UPB_MAX_NESTING], *top, *limit;
size_t completed_offset;
void *udata;
upb_tag_cb tag_cb;
diff --git a/src/upb_struct.h b/src/upb_struct.h
index 9c1bb2e..c83978f 100644
--- a/src/upb_struct.h
+++ b/src/upb_struct.h
@@ -29,6 +29,10 @@ INLINE bool upb_mmhead_norefs(struct upb_mmhead *head) {
return head->refcount == 0 && head->refs == NULL;
}
+INLINE bool upb_mmhead_only(struct upb_mmhead *head) {
+ return head->refcount == 1 && head->refs == NULL;
+}
+
INLINE bool upb_mmhead_unref(struct upb_mmhead *head) {
head->refcount--;
return upb_mmhead_norefs(head);
@@ -57,7 +61,7 @@ struct upb_array {
struct upb_msg_fielddef *fielddef; /* Defines the type of the array. */
union upb_value_ptr elements;
upb_arraylen_t len; /* Number of elements in "elements". */
- upb_arraylen_t size; /* Memory we own (0 if by reference). */
+ upb_arraylen_t size; /* Memory we own. */
};
struct upb_string {
diff --git a/src/upb_text.c b/src/upb_text.c
index 6d43152..bed4b43 100644
--- a/src/upb_text.c
+++ b/src/upb_text.c
@@ -99,7 +99,7 @@ static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg,
if(upb_isarray(f)) {
struct upb_array *arr = *p.arr;
for(uint32_t j = 0; j < arr->len; j++) {
- union upb_value_ptr elem_p = upb_array_getelementptr(arr, j, f->type);
+ union upb_value_ptr elem_p = upb_array_getelementptr(arr, j);
printval(printer, elem_p, f, fd, stream);
}
} else {
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback