From f17ed90f7704d77e3eb59a6f6b693ab4c598936a Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 27 Aug 2009 11:10:13 -0700 Subject: Some cleanup and reformatting, fixed the benchmarks. --- Makefile | 8 +- benchmarks/parsetostruct.upb_table.c | 51 +++++----- src/upb.c | 37 +++++++ src/upb.h | 131 +++++++++++++------------ src/upb_array.h | 39 ++++---- src/upb_atomic.h | 1 + src/upb_mm.c | 4 +- src/upb_msg.c | 45 +++++---- src/upb_parse.c | 180 +++++++++++++++++++---------------- src/upb_parse.h | 5 +- src/upb_struct.h | 6 +- src/upb_text.c | 2 +- 12 files changed, 295 insertions(+), 214 deletions(-) create mode 100644 src/upb.c diff --git a/Makefile b/Makefile index 0aaae32..110a263 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ LDLIBS=-lpthread LIBUPB=src/libupb.a LIBUPB_PIC=src/libupb_pic.a LIBUPB_SHARED=src/libupb.so -ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) $(LIBUPB_SHARED) tests/test_table tests/tests tools/upbc +ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) tests/test_table tests/tests tools/upbc all: $(ALL) clean: rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(ALL) benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb* @@ -42,8 +42,9 @@ clean: cd lang_ext/python && python setup.py clean --all # The core library (src/libupb.a) -SRC=src/upb_parse.c src/upb_table.c src/upb_msg.c src/upb_mm.c src/upb_enum.c src/upb_context.c \ - src/upb_string.c src/upb_text.c src/upb_serialize.c descriptor/descriptor.c +SRC=src/upb.c src/upb_parse.c src/upb_table.c src/upb_msg.c src/upb_mm.c \ + src/upb_enum.c src/upb_context.c src/upb_string.c src/upb_text.c \ + src/upb_serialize.c descriptor/descriptor.c STATICOBJ=$(patsubst %.c,%.o,$(SRC)) SHAREDOBJ=$(patsubst %.c,%.lo,$(SRC)) # building shared objects is like building static ones, except -fPIC is added. @@ -91,6 +92,7 @@ upb_benchmarks: $(UPB_BENCHMARKS) benchmarks: $(BENCHMARKS) benchmark: @rm -f benchmarks/results + @rm -rf benchmarks/*.dSYM @for test in benchmarks/b.* ; do ./$$test ; done benchmarks/google_messages.proto.pb: benchmarks/google_messages.proto diff --git a/benchmarks/parsetostruct.upb_table.c b/benchmarks/parsetostruct.upb_table.c index 751c982..9daa8e0 100644 --- a/benchmarks/parsetostruct.upb_table.c +++ b/benchmarks/parsetostruct.upb_table.c @@ -3,45 +3,47 @@ #include "upb_context.h" #include "upb_msg.h" +#include "upb_mm.h" -static struct upb_context c; -static struct upb_string str; -static struct upb_msg_parse_state s; -static struct upb_msg *m; -static void *data[NUM_MESSAGES]; +static struct upb_context *c; +static struct upb_string *str; +static struct upb_msgdef *def; +static struct upb_msg *msgs[NUM_MESSAGES]; static bool initialize() { /* Initialize upb state, parse descriptor. */ - upb_context_init(&c); - struct upb_string fds; - if(!upb_strreadfile(MESSAGE_DESCRIPTOR_FILE, &fds)) { + c = upb_context_new(); + struct upb_string *fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); + if(!fds) { fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n"); return false; } - if(!upb_context_parsefds(&c, &fds)) { + if(!upb_context_parsefds(c, fds)) { fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ".\n"); return false; } - upb_strfree(fds); + upb_string_unref(fds); char class_name[] = MESSAGE_NAME; struct upb_string proto_name; proto_name.ptr = class_name; proto_name.byte_len = sizeof(class_name)-1; - struct upb_symtab_entry *e = upb_context_lookup(&c, &proto_name); - if(!e || e->type != UPB_SYM_MESSAGE) { + struct upb_symtab_entry e; + upb_status_t success = upb_context_lookup(c, &proto_name, &e); + if(!success || e.type != UPB_SYM_MESSAGE) { fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n", - UPB_STRARG(proto_name)); + UPB_STRARG(&proto_name)); return false; } - m = e->ref.msg; + def = e.ref.msg; for(int i = 0; i < 32; i++) - data[i] = upb_msgdata_new(m); + msgs[i] = upb_msg_new(def); /* Read the message data itself. */ - if(!upb_strreadfile(MESSAGE_FILE, &str)) { + str = upb_strreadfile(MESSAGE_FILE); + if(!str) { fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); return false; } @@ -51,19 +53,18 @@ static bool initialize() static void cleanup() { for(int i = 0; i < 32; i++) - upb_msgdata_free(data[i], m, true); - upb_strfree(str); - upb_context_free(&c); + upb_msg_unref(msgs[i]); + upb_string_unref(str); + upb_context_unref(c); } static size_t run(int i) { - size_t read; - upb_msg_parse_reset(&s, data[i%NUM_MESSAGES], m, false, BYREF); - upb_status_t status = upb_msg_parse(&s, str.ptr, str.byte_len, &read); - if(status != UPB_STATUS_OK && read != str.byte_len) { - fprintf(stderr, "Error. :( error=%d, read=%zu\n", status, read); + upb_status_t status; + status = upb_msg_parsestr(msgs[i%NUM_MESSAGES], str->ptr, str->byte_len); + if(status != UPB_STATUS_OK) { + fprintf(stderr, "Error. :( error=%d\n", status); return 0; } - return read; + return str->byte_len; } diff --git a/src/upb.c b/src/upb.c new file mode 100644 index 0000000..e82a8e4 --- /dev/null +++ b/src/upb.c @@ -0,0 +1,37 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + */ + +#include + +#include "upb.h" + +#define alignof(t) offsetof(struct { char c; t x; }, x) +#define TYPE_INFO(proto_type, wire_type, ctype) \ + [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## proto_type] = \ + {alignof(ctype), sizeof(ctype), wire_type, #ctype}, + +struct upb_type_info upb_type_info[] = { + TYPE_INFO(DOUBLE, UPB_WIRE_TYPE_64BIT, double) + TYPE_INFO(FLOAT, UPB_WIRE_TYPE_32BIT, float) + TYPE_INFO(INT64, UPB_WIRE_TYPE_VARINT, int64_t) + TYPE_INFO(UINT64, UPB_WIRE_TYPE_VARINT, uint64_t) + TYPE_INFO(INT32, UPB_WIRE_TYPE_VARINT, int32_t) + TYPE_INFO(FIXED64, UPB_WIRE_TYPE_64BIT, uint64_t) + TYPE_INFO(FIXED32, UPB_WIRE_TYPE_32BIT, uint32_t) + TYPE_INFO(BOOL, UPB_WIRE_TYPE_VARINT, bool) + TYPE_INFO(MESSAGE, UPB_WIRE_TYPE_DELIMITED, void*) + TYPE_INFO(GROUP, UPB_WIRE_TYPE_START_GROUP, void*) + TYPE_INFO(UINT32, UPB_WIRE_TYPE_VARINT, uint32_t) + TYPE_INFO(ENUM, UPB_WIRE_TYPE_VARINT, uint32_t) + TYPE_INFO(SFIXED32, UPB_WIRE_TYPE_32BIT, int32_t) + TYPE_INFO(SFIXED64, UPB_WIRE_TYPE_64BIT, int64_t) + TYPE_INFO(SINT32, UPB_WIRE_TYPE_VARINT, int32_t) + TYPE_INFO(SINT64, UPB_WIRE_TYPE_VARINT, int64_t) + TYPE_INFO(STRING, UPB_WIRE_TYPE_DELIMITED, struct upb_string*) + TYPE_INFO(BYTES, UPB_WIRE_TYPE_DELIMITED, struct upb_string*) +}; + diff --git a/src/upb.h b/src/upb.h index af026f5..1112fe1 100644 --- a/src/upb.h +++ b/src/upb.h @@ -1,6 +1,6 @@ /* * upb - a minimalist implementation of protocol buffers. - + * * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. * * This file contains shared definitions that are widely used across upb. @@ -11,14 +11,14 @@ #include #include -#include /* for size_t. */ +#include // only for size_t. #include "descriptor_const.h" #ifdef __cplusplus extern "C" { #endif -/* inline if possible, emit standalone code if required. */ +// inline if possible, emit standalone code if required. #ifndef INLINE #define INLINE static inline #endif @@ -26,21 +26,22 @@ extern "C" { #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) -/* The maximum that any submessages can be nested. Matches proto2's limit. */ +// The maximum that any submessages can be nested. Matches proto2's limit. #define UPB_MAX_NESTING 64 -/* The maximum number of fields that any one .proto type can have. */ +// The maximum number of fields that any one .proto type can have. #define UPB_MAX_FIELDS (1<<16) -/* Nested type names are separated by periods. */ +// Nested type names are separated by periods. #define UPB_SYMBOL_SEPARATOR '.' #define UPB_SYMBOL_MAX_LENGTH 256 #define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m))) + /* Fundamental types and type constants. **************************************/ -/* A list of types as they are encoded on-the-wire. */ +// A list of types as they are encoded on-the-wire. enum upb_wire_type { UPB_WIRE_TYPE_VARINT = 0, UPB_WIRE_TYPE_64BIT = 1, @@ -49,26 +50,27 @@ enum upb_wire_type { UPB_WIRE_TYPE_END_GROUP = 4, UPB_WIRE_TYPE_32BIT = 5 }; + typedef uint8_t upb_wire_type_t; -/* Value type as defined in a .proto file. eg. string, int32, etc. - * - * The values of this are defined by google_protobuf_FieldDescriptorProto_Type - * (from descriptor.proto). Note that descriptor.proto reserves "0" for - * errors, and we use it to represent exceptional circumstances. */ +// Value type as defined in a .proto file. eg. string, int32, etc. The +// integers that represent this are defined by descriptor.proto. Note that +// descriptor.proto reserves "0" for errors, and we use it to represent +// exceptional circumstances. typedef uint8_t upb_field_type_t; +// For referencing the type constants tersely. +#define UPB_TYPENUM(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type + INLINE bool upb_issubmsgtype(upb_field_type_t type) { - return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP || - type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE; + return type == UPB_TYPENUM(GROUP) || type == UPB_TYPENUM(MESSAGE); } INLINE bool upb_isstringtype(upb_field_type_t type) { - return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING || - type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES; + return type == UPB_TYPENUM(STRING) || type == UPB_TYPENUM(BYTES); } -/* Information about a given value type (upb_field_type_t). */ +// Info for a given field type. struct upb_type_info { uint8_t align; uint8_t size; @@ -76,87 +78,88 @@ struct upb_type_info { char *ctype; }; -/* Contains information for all .proto types. Indexed by upb_field_type_t. */ +// A static array of info about all of the field types, indexed by type number. extern struct upb_type_info upb_type_info[]; -/* The number of a field, eg. "optional string foo = 3". */ +// The number of a field, eg. "optional string foo = 3". typedef int32_t upb_field_number_t; -/* Label (optional, repeated, required) as defined in a .proto file. The values - * of this are defined by google.protobuf.FieldDescriptorProto.Label (from - * descriptor.proto). */ +// Label (optional, repeated, required) as defined in a .proto file. The +// values of this are defined by google.protobuf.FieldDescriptorProto.Label +// (from descriptor.proto). typedef uint8_t upb_label_t; -/* A value as it is encoded on-the-wire, except delimited, which is handled - * separately. */ +// A scalar (non-string) wire value. Used only for parsing unknown fields. union upb_wire_value { uint64_t varint; uint64_t _64bit; uint32_t _32bit; }; -/* A tag occurs before each value on-the-wire. */ +// A tag occurs before each value on-the-wire. struct upb_tag { upb_field_number_t field_number; upb_wire_type_t wire_type; }; + /* Polymorphic values of .proto types *****************************************/ struct upb_string; struct upb_array; struct upb_msg; -/* A single .proto value. The owner must have an out-of-band way of knowing - * the type, so that it knows which union member to use. */ +// A single .proto value. The owner must have an out-of-band way of knowing +// the type, so that it knows which union member to use. union upb_value { - double _double; - float _float; - int32_t int32; - int64_t int64; + double _double; + float _float; + int32_t int32; + int64_t int64; uint32_t uint32; uint64_t uint64; - bool _bool; + bool _bool; struct upb_string *str; struct upb_array *arr; struct upb_msg *msg; }; -/* A pointer to a .proto value. The owner must have an out-of-band way of - * knowing the type, so it knows which union member to use. */ +// A pointer to a .proto value. The owner must have an out-of-band way of +// knowing the type, so it knows which union member to use. union upb_value_ptr { - double *_double; - float *_float; - int32_t *int32; - int64_t *int64; + double *_double; + float *_float; + int32_t *int32; + int64_t *int64; uint32_t *uint32; uint64_t *uint64; - bool *_bool; + bool *_bool; struct upb_string **str; struct upb_array **arr; struct upb_msg **msg; - void *_void; + void *_void; }; -/* Unfortunately there is no way to define this so that it can be used as a - * generic expression, a la: - * foo(UPB_VALUE_ADDROF(bar)); - * ...you have to use it as the initializer of a upb_value_ptr: - * union upb_value_ptr p = UPB_VALUE_ADDROF(bar); - * foo(p); - */ +// Unfortunately there is no way to define this so that it can be used as a +// generic expression, a la: +// foo(UPB_VALUE_ADDROF(bar)); +// ...you have to use it as the initializer of a upb_value_ptr: +// union upb_value_ptr p = UPB_VALUE_ADDROF(bar); +// foo(p); #define UPB_VALUE_ADDROF(val) {(void*)&val._double} -/* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer. We need - * to know the field type to perform this operation, because we need to know - * how much memory to copy. */ +/** + * Converts upb_value_ptr -> upb_value by reading from the pointer. We need to + * know the field type to perform this operation, because we need to know how + * much memory to copy. + */ INLINE union upb_value upb_value_read(union upb_value_ptr ptr, upb_field_type_t ft) { union upb_value val; + #define CASE(t, member_name) \ - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \ - val.member_name = *ptr.member_name; \ - break; + case UPB_TYPENUM(t): val.member_name = *ptr.member_name; break; + switch(ft) { CASE(DOUBLE, _double) CASE(FLOAT, _float) @@ -178,19 +181,21 @@ INLINE union upb_value upb_value_read(union upb_value_ptr ptr, CASE(GROUP, msg) default: break; } -#undef CASE return val; + +#undef CASE } -/* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer. We need - * to know the field type to perform this operation, because we need to know - * how much memory to copy. */ +/** + * Writes a upb_value to a upb_value_ptr location. We need to know the field + * type to perform this operation, because we need to know how much memory to + * copy. + */ INLINE void upb_value_write(union upb_value_ptr ptr, union upb_value val, upb_field_type_t ft) { #define CASE(t, member_name) \ - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \ - *ptr.member_name = val.member_name; \ - break; + case UPB_TYPENUM(t): *ptr.member_name = val.member_name; break; + switch(ft) { CASE(DOUBLE, _double) CASE(FLOAT, _float) @@ -212,17 +217,19 @@ INLINE void upb_value_write(union upb_value_ptr ptr, union upb_value val, CASE(GROUP, msg) default: break; } + #undef CASE } +// All the different definitions that can occur in .proto files. union upb_symbol_ref { struct upb_msgdef *msg; struct upb_enum *_enum; struct upb_svc *svc; }; -/* Status codes used as a return value. Codes >0 are not fatal and can be - * resumed. */ +// Status codes used as a return value. Codes >0 are not fatal and can be +// resumed. typedef enum upb_status { UPB_STATUS_OK = 0, diff --git a/src/upb_array.h b/src/upb_array.h index 732c4aa..b5eb22d 100644 --- a/src/upb_array.h +++ b/src/upb_array.h @@ -32,11 +32,12 @@ extern "C" { struct upb_string; /* Returns a pointer to an array element. Does not perform a bounds check! */ -INLINE union upb_value_ptr upb_array_getelementptr( - struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type) +INLINE union upb_value_ptr upb_array_getelementptr(struct upb_array *arr, + upb_arraylen_t n) { union upb_value_ptr ptr; - ptr._void = (void*)((char*)arr->elements._void + n*upb_type_info[type].size); + ptr._void = UPB_INDEX(arr->elements._void, n, + upb_type_info[arr->fielddef->type].size); return ptr; } @@ -66,26 +67,22 @@ INLINE uint32_t upb_round_up_to_pow2(uint32_t v) return v; } -/* Resizes array to be "len" elements long (reallocating if necessary). */ -INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen) +INLINE union upb_value_ptr upb_array_append(struct upb_array *arr) { - size_t type_size = upb_type_info[arr->fielddef->type].size; - bool dropped = false; - bool ref = arr->size == 0; /* Ref'ing external memory. */ - void *data = arr->elements._void; - if(arr->size < newlen) { - /* Need to resize. */ - arr->size = UPB_MAX(4, upb_round_up_to_pow2(newlen)); - arr->elements._void = realloc(ref ? NULL : data, arr->size * type_size); + size_t size = upb_type_info[arr->fielddef->type].size; + upb_arraylen_t oldlen = arr->len; + if(oldlen == arr->size) { + arr->size = UPB_MAX(4, upb_round_up_to_pow2(oldlen+1)); + arr->elements._void = realloc(arr->elements._void, arr->size * size); + memset((char*)arr->elements._void + (arr->len*size), 0, (arr->size - arr->len) * size); } - if(ref) { - /* Need to take referenced data and copy it to memory we own. */ - memcpy(arr->elements._void, data, UPB_MIN(arr->len, newlen) * type_size); - dropped = true; - } - /* TODO: fill with defaults. */ - arr->len = newlen; - return dropped; + arr->len++; + return upb_array_getelementptr(arr, oldlen); +} + +INLINE void upb_array_truncate(struct upb_array *arr) +{ + arr->len = 0; } #ifdef __cplusplus diff --git a/src/upb_atomic.h b/src/upb_atomic.h index c1a60b9..85ec582 100644 --- a/src/upb_atomic.h +++ b/src/upb_atomic.h @@ -29,6 +29,7 @@ extern "C" { #define INLINE static inline #endif +#define UPB_THREAD_UNSAFE #ifdef UPB_THREAD_UNSAFE /* Non-thread-safe implementations. ******************************************/ diff --git a/src/upb_mm.c b/src/upb_mm.c index 853d572..769db96 100644 --- a/src/upb_mm.c +++ b/src/upb_mm.c @@ -27,7 +27,7 @@ void upb_array_destroy(struct upb_array *arr) upb_arraylen_t i; /* Unref elements. */ for(i = 0; i < arr->len; i++) { - union upb_value_ptr p = upb_array_getelementptr(arr, i, arr->fielddef->type); + union upb_value_ptr p = upb_array_getelementptr(arr, i); upb_mm_ptrtype type = upb_elem_ptrtype(arr->fielddef); union upb_mmptr mmptr = upb_mmptr_read(p, type); upb_mm_unref(mmptr, type); @@ -120,7 +120,7 @@ struct upb_mm_ref *upb_mm_getelemref(struct upb_mm_ref *arrref, upb_arraylen_t i struct upb_msg_fielddef *f = arr->fielddef; assert(upb_elem_ismm(f)); assert(i < arr->len); - union upb_value_ptr p = upb_array_getelementptr(arr, i, f->type); + union upb_value_ptr p = upb_array_getelementptr(arr, i); upb_mm_ptrtype type = upb_elem_ptrtype(f); union upb_mmptr val = upb_mmptr_read(p, type); return find_or_create_ref(arrref, arrref->mm, val, type, refcreated); diff --git a/src/upb_msg.c b/src/upb_msg.c index 80602dd..f977527 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -156,11 +156,16 @@ static union upb_value_ptr get_value_ptr(struct upb_msg *msg, { union upb_value_ptr p = upb_msg_getptr(msg, f); if(upb_isarray(f)) { - bool isset = upb_msg_isset(msg, f); - size_t len = isset ? (*p.arr)->len : 0; - if(!isset) *p.arr = upb_array_new(f); - upb_array_resize(*p.arr, len+1); - p = upb_array_getelementptr(*p.arr, len, f->type); + if(!upb_msg_isset(msg, f)) { + if(!*p.arr || !upb_mmhead_only(&((*p.arr)->mmhead))) { + if(*p.arr) + upb_array_unref(*p.arr); + *p.arr = upb_array_new(f); + } + upb_array_truncate(*p.arr); + upb_msg_set(msg, f); + } + p = upb_array_append(*p.arr); } return p; } @@ -202,7 +207,11 @@ static void str_cb(void *udata, uint8_t *str, upb_msg_set(msg, f); if(avail_len != total_len) abort(); /* TODO: support streaming. */ //bool byref = avail_len == total_len && mp->byref; - *p.str = upb_string_new(); + if(!*p.str || !upb_mmhead_only(&((*p.str)->mmhead))) { + if(*p.str) + upb_string_unref(*p.str); + *p.str = upb_string_new(); + } //if(byref) { // upb_strdrop(*p.str); // (*p.str)->ptr = (char*)str; @@ -220,16 +229,19 @@ static void submsg_start_cb(void *udata, void *user_field_desc) struct upb_msg_fielddef *f = user_field_desc; struct upb_msg *oldmsg = mp->top->msg; union upb_value_ptr p = get_value_ptr(oldmsg, f); - struct upb_msg **submsg = p.msg; - //if(*submsg && upb_mmhead_only(&((*submsg)->mmhead))) { - // /* We can reuse the existing submsg. */ - //} else { - *submsg = upb_msg_new(f->ref.msg); - //} - upb_msg_clear(*submsg); - upb_msg_set(oldmsg, f); + + if(upb_isarray(f) || !upb_msg_isset(oldmsg, f)) { + if(!*p.msg || !upb_mmhead_only(&((*p.msg)->mmhead))) { + if(*p.msg) + upb_msg_unref(*p.msg); + *p.msg = upb_msg_new(f->ref.msg); + } + upb_msg_clear(*p.msg); + upb_msg_set(oldmsg, f); + } + mp->top++; - mp->top->msg = *submsg; + mp->top->msg = *p.msg; } static void submsg_end_cb(void *udata) @@ -248,6 +260,7 @@ upb_status_t upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len) struct upb_msg_parser mp; upb_msg_parser_reset(&mp, msg, false); size_t read; + upb_msg_clear(msg); upb_status_t ret = upb_msg_parser_parse(&mp, buf, len, &read); return ret; } @@ -337,7 +350,7 @@ static size_t get_msgsize(struct upb_msgsizes *sizes, struct upb_msg *m) union upb_value_ptr p = upb_msg_getptr(m, f); if(upb_isarray(f)) { for(int32_t j = (*p.arr)->len - 1; j >= 0; j--) { - union upb_value_ptr elem = upb_array_getelementptr((*p.arr), j, f->type); + union upb_value_ptr elem = upb_array_getelementptr(*p.arr, j); /* TODO: for packed arrays tag size goes outside the loop. */ size += upb_get_tag_size(fd->number); size += get_valuesize(sizes, elem, f, fd); diff --git a/src/upb_parse.c b/src/upb_parse.c index 7c1ad66..baaeb99 100644 --- a/src/upb_parse.c +++ b/src/upb_parse.c @@ -9,32 +9,10 @@ #include #include -/* May want to move this to upb.c if enough other things warrant it. */ -#define alignof(t) offsetof(struct { char c; t x; }, x) -#define TYPE_INFO(proto_type, wire_type, ctype) [proto_type] = {alignof(ctype), sizeof(ctype), wire_type, #ctype}, -struct upb_type_info upb_type_info[] = { - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE, UPB_WIRE_TYPE_64BIT, double) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT, UPB_WIRE_TYPE_32BIT, float) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64, UPB_WIRE_TYPE_VARINT, int64_t) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64, UPB_WIRE_TYPE_VARINT, uint64_t) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32, UPB_WIRE_TYPE_VARINT, int32_t) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64, UPB_WIRE_TYPE_64BIT, uint64_t) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32, UPB_WIRE_TYPE_32BIT, uint32_t) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL, UPB_WIRE_TYPE_VARINT, bool) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE, UPB_WIRE_TYPE_DELIMITED, void*) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP, UPB_WIRE_TYPE_START_GROUP, void*) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32, UPB_WIRE_TYPE_VARINT, uint32_t) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM, UPB_WIRE_TYPE_VARINT, uint32_t) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32, UPB_WIRE_TYPE_32BIT, int32_t) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64, UPB_WIRE_TYPE_64BIT, int64_t) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32, UPB_WIRE_TYPE_VARINT, int32_t) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64, UPB_WIRE_TYPE_VARINT, int64_t) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING, UPB_WIRE_TYPE_DELIMITED, struct upb_string*) - TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES, UPB_WIRE_TYPE_DELIMITED, struct upb_string*) -}; - -/* This is called by the inline version of the function if the varint turns out - * to be >= 2 bytes. */ +/** + * Parses a 64-bit varint that is known to be >= 2 bytes (the inline version + * handles 1 and 2 byte varints). + */ upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val, uint8_t **outbuf) { @@ -42,10 +20,15 @@ upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val, uint8_t last = 0x80; *val = 0; int bitpos; + for(bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7) *val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos; - if(buf >= end && buf <= maxend && (last & 0x80)) return UPB_STATUS_NEED_MORE_DATA; - if(buf > maxend) return UPB_ERROR_UNTERMINATED_VARINT; + + if(buf >= end && buf <= maxend && (last & 0x80)) + return UPB_STATUS_NEED_MORE_DATA; + if(buf > maxend) + return UPB_ERROR_UNTERMINATED_VARINT; + *outbuf = buf; return UPB_STATUS_OK; } @@ -54,23 +37,37 @@ upb_status_t upb_parse_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt union upb_wire_value *wv, uint8_t **outbuf) { switch(wt) { - case UPB_WIRE_TYPE_VARINT: return upb_get_v_uint64_t(buf, end, &wv->varint, outbuf); - case UPB_WIRE_TYPE_64BIT: return upb_get_f_uint64_t(buf, end, &wv->_64bit, outbuf); - case UPB_WIRE_TYPE_32BIT: return upb_get_f_uint32_t(buf, end, &wv->_32bit, outbuf); - default: return UPB_ERROR_ILLEGAL; /* Doesn't handle delimited, groups. */ + case UPB_WIRE_TYPE_VARINT: + return upb_get_v_uint64_t(buf, end, &wv->varint, outbuf); + case UPB_WIRE_TYPE_64BIT: + return upb_get_f_uint64_t(buf, end, &wv->_64bit, outbuf); + case UPB_WIRE_TYPE_32BIT: + return upb_get_f_uint32_t(buf, end, &wv->_32bit, outbuf); + default: + return UPB_ERROR_ILLEGAL; // Doesn't handle delimited, groups. } } +/** + * Advances buf past the current wire value (of type wt), saving the result in + * outbuf. + */ static upb_status_t skip_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt, uint8_t **outbuf) { switch(wt) { - case UPB_WIRE_TYPE_VARINT: return upb_skip_v_uint64_t(buf, end, outbuf); - case UPB_WIRE_TYPE_64BIT: return upb_skip_f_uint64_t(buf, end, outbuf); - case UPB_WIRE_TYPE_32BIT: return upb_skip_f_uint32_t(buf, end, outbuf); - case UPB_WIRE_TYPE_START_GROUP: /* TODO: skip to matching end group. */ - case UPB_WIRE_TYPE_END_GROUP: return UPB_STATUS_OK; - default: return UPB_ERROR_ILLEGAL; + case UPB_WIRE_TYPE_VARINT: + return upb_skip_v_uint64_t(buf, end, outbuf); + case UPB_WIRE_TYPE_64BIT: + return upb_skip_f_uint64_t(buf, end, outbuf); + case UPB_WIRE_TYPE_32BIT: + return upb_skip_f_uint32_t(buf, end, outbuf); + case UPB_WIRE_TYPE_START_GROUP: + // TODO: skip to matching end group. + case UPB_WIRE_TYPE_END_GROUP: + return UPB_STATUS_OK; + default: + return UPB_ERROR_ILLEGAL; } } @@ -78,8 +75,8 @@ upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft, union upb_value_ptr v, uint8_t **outbuf) { #define CASE(t, member_name) \ - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \ - return upb_get_ ## t(buf, end, v.member_name, outbuf); + case UPB_TYPENUM(t): return upb_get_ ## t(buf, end, v.member_name, outbuf); + switch(ft) { CASE(DOUBLE, _double) CASE(FLOAT, _float) @@ -97,6 +94,7 @@ upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft, CASE(ENUM, int32) default: return UPB_ERROR_ILLEGAL; } + #undef CASE } @@ -104,55 +102,76 @@ void upb_stream_parser_reset(struct upb_stream_parser *state, void *udata) { state->top = state->stack; state->limit = &state->stack[UPB_MAX_NESTING]; - /* The top-level message is not delimited (we can keep receiving data for - * it indefinitely), so we treat it like a group. */ - *state->top = 0; state->completed_offset = 0; state->udata = udata; -} -static void *pop_stack_frame(struct upb_stream_parser *s, uint8_t *buf) -{ - if(s->submsg_end_cb) s->submsg_end_cb(s->udata); - s->top--; - return (char*)buf + (*s->top > 0 ? (*s->top - s->completed_offset) : 0); + // The top-level message is not delimited (we can keep receiving data for it + // indefinitely), so we treat it like a group. + *state->top = 0; } -/* Returns the next end offset. */ -static upb_status_t push_stack_frame(struct upb_stream_parser *s, - uint8_t *buf, uint32_t len, - void *user_field_desc, uint8_t **submsg_end) +/** + * Pushes a new stack frame for a submessage with the given len (which will + * be zero if the submessage is a group). + */ +static upb_status_t push(struct upb_stream_parser *s, uint8_t *start, + uint32_t submsg_len, void *user_field_desc, + uint8_t **submsg_end) { s->top++; - if(s->top > s->limit) return UPB_ERROR_STACK_OVERFLOW; - *s->top = s->completed_offset + len; - if(s->submsg_start_cb) s->submsg_start_cb(s->udata, user_field_desc); - *submsg_end = buf + (*s->top > 0 ? (*s->top - s->completed_offset) : 0); + if(s->top >= s->limit) + return UPB_ERROR_STACK_OVERFLOW; + *s->top = s->completed_offset + submsg_len; + + if(s->submsg_start_cb) + s->submsg_start_cb(s->udata, user_field_desc); + + *submsg_end = start + (*s->top > 0 ? (*s->top - s->completed_offset) : 0); return UPB_STATUS_OK; } +/** + * Pops a stack frame, returning a pointer for where the next submsg should + * end (or a pointer that is out of range for a group). + */ +static void *pop(struct upb_stream_parser *s, uint8_t *start) +{ + if(s->submsg_end_cb) + s->submsg_end_cb(s->udata); + + s->top--; + + if(*s->top > 0) + return (char*)start + (*s->top - s->completed_offset); + else + return (char*)start; // group. +} + + upb_status_t upb_stream_parser_parse(struct upb_stream_parser *s, void *_buf, size_t len, size_t *read) { uint8_t *buf = _buf; uint8_t *completed = buf; - uint8_t *const start = buf; + uint8_t *const start = buf; // ptr equivalent of s->completed_offset uint8_t *end = buf + len; uint8_t *submsg_end = buf + (*s->top > 0 ? *s->top : 0); upb_status_t status = UPB_STATUS_OK; - /* Make local copies so optimizer knows they won't change. */ + // Make local copies so optimizer knows they won't change. upb_tag_cb tag_cb = s->tag_cb; upb_str_cb str_cb = s->str_cb; upb_value_cb value_cb = s->value_cb; void *udata = s->udata; - /* Main loop: parse a tag, then handle the value. */ +#define CHECK(exp) do { if((status = exp) != UPB_STATUS_OK) goto err; } while(0) + + // Main loop: parse a tag, then handle the value. while(buf < end) { struct upb_tag tag; - UPB_CHECK(parse_tag(buf, end, &tag, &buf)); + CHECK(parse_tag(buf, end, &tag, &buf)); if(tag.wire_type == UPB_WIRE_TYPE_END_GROUP) { - submsg_end = pop_stack_frame(s, start); + submsg_end = pop(s, start); completed = buf; continue; } @@ -161,38 +180,39 @@ upb_status_t upb_stream_parser_parse(struct upb_stream_parser *s, upb_field_type_t ft = tag_cb(udata, &tag, &udesc); if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) { int32_t delim_len; - UPB_CHECK(upb_get_INT32(buf, end, &delim_len, &buf)); + CHECK(upb_get_INT32(buf, end, &delim_len, &buf)); uint8_t *delim_end = buf + delim_len; - if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) { - UPB_CHECK(push_stack_frame( - s, start, delim_end - start, udesc, &submsg_end)); + if(ft == UPB_TYPENUM(MESSAGE)) { + CHECK(push(s, start, delim_end - start, udesc, &submsg_end)); } else { - if(upb_isstringtype(ft)) - str_cb(udata, buf, UPB_MIN(delim_end, end) - buf, delim_end - buf, udesc); - //else - // /* Set a marker for packed arrays. */ - buf = delim_end; /* Note that this could be greater than end. */ + if(upb_isstringtype(ft)) { + size_t avail_len = UPB_MIN(delim_end, end) - buf; + str_cb(udata, buf, avail_len, delim_end - buf, udesc); + } // else { TODO: packed arrays } + buf = delim_end; // Could be >end. } - } else { /* Scalar (non-delimited) value. */ + } else { + // Scalar (non-delimited) value. switch(ft) { - case 0: /* Client elected to skip. */ - UPB_CHECK(skip_wire_value(buf, end, tag.wire_type, &buf)); + case 0: // Client elected to skip. + CHECK(skip_wire_value(buf, end, tag.wire_type, &buf)); break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP: - UPB_CHECK(push_stack_frame(s, start, 0, udesc, &submsg_end)); + case UPB_TYPENUM(GROUP): + CHECK(push(s, start, 0, udesc, &submsg_end)); break; default: - UPB_CHECK(value_cb(udata, buf, end, udesc, &buf)); + CHECK(value_cb(udata, buf, end, udesc, &buf)); break; } } - while(buf == submsg_end) submsg_end = pop_stack_frame(s, start); - //while(buf < s->packed_end) /* packed arrays. */ - // UPB_CHECK(value_cb(udata, buf, end, udesc, &buf)); + while(buf == submsg_end) + submsg_end = pop(s, start); + // while(buf < s->packed_end) { TODO: packed arrays } completed = buf; } +err: *read = (char*)completed - (char*)start; s->completed_offset += *read; return status; diff --git a/src/upb_parse.h b/src/upb_parse.h index 1454dd5..a8f4294 100644 --- a/src/upb_parse.h +++ b/src/upb_parse.h @@ -74,9 +74,8 @@ typedef void (*upb_submsg_start_cb)(void *udata, typedef void (*upb_submsg_end_cb)(void *udata); struct upb_stream_parser { - /* For delimited submsgs, counts from the submsg len down to zero. - * For group submsgs, counts from zero down to the negative len. */ - uint32_t stack[UPB_MAX_NESTING], *top, *limit; + // Stack entries store the offset where the submsg ends (for groups, 0). + size_t stack[UPB_MAX_NESTING], *top, *limit; size_t completed_offset; void *udata; upb_tag_cb tag_cb; diff --git a/src/upb_struct.h b/src/upb_struct.h index 9c1bb2e..c83978f 100644 --- a/src/upb_struct.h +++ b/src/upb_struct.h @@ -29,6 +29,10 @@ INLINE bool upb_mmhead_norefs(struct upb_mmhead *head) { return head->refcount == 0 && head->refs == NULL; } +INLINE bool upb_mmhead_only(struct upb_mmhead *head) { + return head->refcount == 1 && head->refs == NULL; +} + INLINE bool upb_mmhead_unref(struct upb_mmhead *head) { head->refcount--; return upb_mmhead_norefs(head); @@ -57,7 +61,7 @@ struct upb_array { struct upb_msg_fielddef *fielddef; /* Defines the type of the array. */ union upb_value_ptr elements; upb_arraylen_t len; /* Number of elements in "elements". */ - upb_arraylen_t size; /* Memory we own (0 if by reference). */ + upb_arraylen_t size; /* Memory we own. */ }; struct upb_string { diff --git a/src/upb_text.c b/src/upb_text.c index 6d43152..bed4b43 100644 --- a/src/upb_text.c +++ b/src/upb_text.c @@ -99,7 +99,7 @@ static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg, if(upb_isarray(f)) { struct upb_array *arr = *p.arr; for(uint32_t j = 0; j < arr->len; j++) { - union upb_value_ptr elem_p = upb_array_getelementptr(arr, j, f->type); + union upb_value_ptr elem_p = upb_array_getelementptr(arr, j); printval(printer, elem_p, f, fd, stream); } } else { -- cgit v1.2.3