From 28ec9a1fa0f9b1d741920dfa8afc91fa2532c43d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Jul 2010 20:20:33 -0700 Subject: Split src/ into core/ and stream/. --- core/upb.h | 207 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 core/upb.h (limited to 'core/upb.h') diff --git a/core/upb.h b/core/upb.h new file mode 100644 index 0000000..230e638 --- /dev/null +++ b/core/upb.h @@ -0,0 +1,207 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * + * This file contains shared definitions that are widely used across upb. + */ + +#ifndef UPB_H_ +#define UPB_H_ + +#include +#include +#include // only for size_t. +#include "descriptor_const.h" +#include "upb_atomic.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// inline if possible, emit standalone code if required. +#ifndef INLINE +#define INLINE static inline +#endif + +#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) +#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) +#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m))) + +// The maximum that any submessages can be nested. Matches proto2's limit. +#define UPB_MAX_NESTING 64 + +// The maximum number of fields that any one .proto type can have. Note that +// this is very different than the max field number. It is hard to imagine a +// scenario where more than 32k fields makes sense. +#define UPB_MAX_FIELDS (1<<15) +typedef int16_t upb_field_count_t; + +// Nested type names are separated by periods. +#define UPB_SYMBOL_SEPARATOR '.' + +// This limit is for the longest fully-qualified symbol, eg. foo.bar.MsgType +#define UPB_SYMBOL_MAXLEN 128 + +// The longest chain that mutually-recursive types are allowed to form. For +// example, this is a type cycle of length 2: +// message A { +// B b = 1; +// } +// message B { +// A a = 1; +// } +#define UPB_MAX_TYPE_CYCLE_LEN 16 + +// The maximum depth that the type graph can have. Note that this setting does +// not automatically constrain UPB_MAX_NESTING, because type cycles allow for +// unlimited nesting if we do not limit it. +#define UPB_MAX_TYPE_DEPTH 64 + +// The biggest possible single value is a 10-byte varint. +#define UPB_MAX_ENCODED_SIZE 10 + + +/* Fundamental types and type constants. **************************************/ + +// A list of types as they are encoded on-the-wire. +enum upb_wire_type { + UPB_WIRE_TYPE_VARINT = 0, + UPB_WIRE_TYPE_64BIT = 1, + UPB_WIRE_TYPE_DELIMITED = 2, + UPB_WIRE_TYPE_START_GROUP = 3, + UPB_WIRE_TYPE_END_GROUP = 4, + UPB_WIRE_TYPE_32BIT = 5, + + // This isn't a real wire type, but we use this constant to describe varints + // that are expected to be a maximum of 32 bits. + UPB_WIRE_TYPE_32BIT_VARINT = 8 +}; + +typedef uint8_t upb_wire_type_t; + +// Value type as defined in a .proto file. eg. string, int32, etc. The +// integers that represent this are defined by descriptor.proto. Note that +// descriptor.proto reserves "0" for errors, and we use it to represent +// exceptional circumstances. +typedef uint8_t upb_field_type_t; + +// For referencing the type constants tersely. +#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type +#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type + +INLINE bool upb_issubmsgtype(upb_field_type_t type) { + return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); +} + +INLINE bool upb_isstringtype(upb_field_type_t type) { + return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); +} + +// Info for a given field type. +typedef struct { + uint8_t align; + uint8_t size; + upb_wire_type_t native_wire_type; + uint8_t allowed_wire_types; // For packable fields, also allows delimited. + char *ctype; +} upb_type_info; + +// A static array of info about all of the field types, indexed by type number. +extern upb_type_info upb_types[]; + +// The number of a field, eg. "optional string foo = 3". +typedef int32_t upb_field_number_t; + +// Label (optional, repeated, required) as defined in a .proto file. The +// values of this are defined by google.protobuf.FieldDescriptorProto.Label +// (from descriptor.proto). +typedef uint8_t upb_label_t; + +// A scalar (non-string) wire value. Used only for parsing unknown fields. +typedef union { + uint64_t varint; + uint64_t _64bit; + uint32_t _32bit; +} upb_wire_value; + +/* Polymorphic values of .proto types *****************************************/ + +struct _upb_string; +typedef struct _upb_string upb_string; + +typedef uint32_t upb_strlen_t; + +// A single .proto value. The owner must have an out-of-band way of knowing +// the type, so that it knows which union member to use. +typedef union { + double _double; + float _float; + int32_t int32; + int64_t int64; + uint32_t uint32; + uint64_t uint64; + bool _bool; +} upb_value; + +// A pointer to a .proto value. The owner must have an out-of-band way of +// knowing the type, so it knows which union member to use. +typedef union { + double *_double; + float *_float; + int32_t *int32; + int64_t *int64; + uint8_t *uint8; + uint32_t *uint32; + uint64_t *uint64; + bool *_bool; +} upb_valueptr; + +INLINE upb_valueptr upb_value_addrof(upb_value *val) { + upb_valueptr ptr = {&val->_double}; + return ptr; +} + +// Status codes used as a return value. Codes >0 are not fatal and can be +// resumed. +enum upb_status_code { + UPB_STATUS_OK = 0, + + // A read or write from a streaming src/sink could not be completed right now. + UPB_STATUS_TRYAGAIN = 1, + + // A value had an incorrect wire type and will be skipped. + UPB_STATUS_BADWIRETYPE = 2, + + // An unrecoverable error occurred. + UPB_STATUS_ERROR = -1, + + // A varint went for 10 bytes without terminating. + UPB_ERROR_UNTERMINATED_VARINT = -2, + + // The max nesting level (UPB_MAX_NESTING) was exceeded. + UPB_ERROR_MAX_NESTING_EXCEEDED = -3 +}; + +typedef struct { + enum upb_status_code code; + upb_string *str; +} upb_status; + +#define UPB_STATUS_INIT {UPB_STATUS_OK, NULL} +#define UPB_ERRORMSG_MAXLEN 256 + +INLINE bool upb_ok(upb_status *status) { + return status->code == UPB_STATUS_OK; +} + +void upb_reset(upb_status *status); +void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, + ...); +void upb_copyerr(upb_status *to, upb_status *from); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_H_ */ -- cgit v1.2.3 From db6c7387bc1df49deac41155a173e33017a75ed8 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 18:11:24 -0700 Subject: Incremental progress towards getting upb_def to bootstrap. --- Makefile | 3 +- core/upb.c | 9 ++--- core/upb.h | 7 +++- core/upb_def.c | 102 ++++++++++++++++++++++--------------------------- core/upb_def.h | 62 +++++++++++++++++++++--------- core/upb_stream_vtbl.h | 1 + core/upb_table.c | 2 +- tests/test_string.c | 3 ++ 8 files changed, 105 insertions(+), 84 deletions(-) (limited to 'core/upb.h') diff --git a/Makefile b/Makefile index 568dcad..2b2a269 100644 --- a/Makefile +++ b/Makefile @@ -48,12 +48,13 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ descriptor/descriptor.c +$(SRC): perf-cppflags # Parts of core that are yet to be converted. OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. core/upb_def.o: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< + $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< core/upb_def.lo: core/upb_def.c $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC diff --git a/core/upb.c b/core/upb.c index a98512d..9ed5617 100644 --- a/core/upb.c +++ b/core/upb.c @@ -44,12 +44,11 @@ void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...) { if(upb_ok(status)) { // The first error is the most interesting. - status->str = upb_string_new(); - char *str = upb_string_getrwbuf(status->str, UPB_ERRORMSG_MAXLEN); status->code = code; + status->str = upb_string_tryrecycle(status->str); va_list args; va_start(args, msg); - vsnprintf(str, UPB_ERRORMSG_MAXLEN, msg, args); + upb_string_vprintf(status->str, msg, args); va_end(args); } } @@ -57,10 +56,10 @@ void upb_seterr(upb_status *status, enum upb_status_code code, void upb_copyerr(upb_status *to, upb_status *from) { to->code = from->code; - to->str = upb_string_getref(from->str); + if(from->str) to->str = upb_string_getref(from->str); } -void upb_reset(upb_status *status) { +void upb_status_reset(upb_status *status) { status->code = UPB_STATUS_OK; upb_string_unref(status->str); status->str = NULL; diff --git a/core/upb.h b/core/upb.h index 230e638..630d9e1 100644 --- a/core/upb.h +++ b/core/upb.h @@ -195,7 +195,12 @@ INLINE bool upb_ok(upb_status *status) { return status->code == UPB_STATUS_OK; } -void upb_reset(upb_status *status); +INLINE void upb_status_init(upb_status *status) { + status->code = UPB_STATUS_OK; + status->str = NULL; +} + +void upb_status_reset(upb_status *status); void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...); void upb_copyerr(upb_status *to, upb_status *from); diff --git a/core/upb_def.c b/core/upb_def.c index cc4fd80..0f48559 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -155,8 +155,9 @@ static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base, } else { open_defs[num_open_defs++] = m; } - for(int i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; + upb_msg_iter iter = upb_msg_begin(m); + for(; !upb_msg_done(iter); iter = upb_msg_next(m, iter)) { + upb_fielddef *f = upb_msg_iter_field(iter); upb_def *def = f->def; if(upb_issubmsg(f) && def->is_cyclic) { upb_msgdef *sub_m = upb_downcast_msgdef(def); @@ -230,16 +231,6 @@ static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { /* upb_enumdef ****************************************************************/ -typedef struct { - upb_strtable_entry e; - uint32_t value; -} ntoi_ent; - -typedef struct { - upb_inttable_entry e; - upb_string *string; -} iton_ent; - static void upb_enumdef_free(upb_enumdef *e) { upb_strtable_free(&e->ntoi); upb_inttable_free(&e->iton); @@ -271,8 +262,8 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); goto err; } - ntoi_ent ntoi_ent = {{name, 0}, number}; - iton_ent iton_ent = {{number, 0}, name}; + upb_ntoi_ent ntoi_ent = {{name, 0}, number}; + upb_iton_ent iton_ent = {{number, 0}, name}; upb_strtable_insert(&e->ntoi, &ntoi_ent.e); upb_inttable_insert(&e->iton, &iton_ent.e); // We don't unref "name" because we pass our ref to the iton entry of the @@ -291,11 +282,14 @@ static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) { upb_enumdef *e = malloc(sizeof(*e)); upb_def_init(&e->base, UPB_DEF_ENUM); - upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(iton_ent)); + upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); + upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: + e->base.fqname = upb_string_tryrecycle(e->base.fqname); + CHECKSRC(upb_src_getstr(src, e->base.fqname)); case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: CHECK(upb_addenum_val(src, e, status)); break; @@ -304,37 +298,25 @@ static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) break; } } + assert(e->base.fqname); upb_deflist_push(defs, UPB_UPCAST(e)); return true; +src_err: + upb_copyerr(status, upb_src_status(src)); err: upb_enumdef_free(e); return false; } -static void fill_iter(upb_enum_iter *iter, ntoi_ent *ent) { - iter->state = ent; - iter->name = ent->e.key; - iter->val = ent->value; -} - -void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e) { +upb_enum_iter upb_enum_begin(upb_enumdef *e) { // We could iterate over either table here; the choice is arbitrary. - ntoi_ent *ent = upb_strtable_begin(&e->ntoi); - iter->e = e; - fill_iter(iter, ent); + return upb_inttable_begin(&e->iton); } -void upb_enum_next(upb_enum_iter *iter) { - ntoi_ent *ent = iter->state; - assert(ent); - ent = upb_strtable_next(&iter->e->ntoi, &ent->e); - iter->state = ent; - if(ent) fill_iter(iter, ent); -} - -bool upb_enum_done(upb_enum_iter *iter) { - return iter->state == NULL; +upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) { + assert(iter); + return upb_inttable_next(&e->iton, &iter->e); } @@ -346,7 +328,7 @@ static void upb_fielddef_free(upb_fielddef *f) { static void upb_fielddef_uninit(upb_fielddef *f) { upb_string_unref(f->name); - if(upb_hasdef(f) && f->owned) { + if(f->owned) { upb_def_unref(f->def); } } @@ -354,6 +336,8 @@ static void upb_fielddef_uninit(upb_fielddef *f) { static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) { upb_fielddef *f = malloc(sizeof(*f)); + f->number = -1; + f->name = NULL; f->def = NULL; f->owned = false; upb_fielddef *parsed_f; @@ -388,6 +372,7 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) } CHECKSRC(upb_src_eof(src)); // TODO: verify that all required fields were present. + assert(f->number != -1 && f->name != NULL); assert((f->def != NULL) == upb_hasdef(f)); // Field was successfully read, add it as a field of the msgdef. @@ -461,9 +446,9 @@ err: static void upb_msgdef_free(upb_msgdef *m) { - for (upb_field_count_t i = 0; i < m->num_fields; i++) - upb_fielddef_uninit(&m->fields[i]); - free(m->fields); + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) + upb_fielddef_uninit(upb_msg_iter_field(i)); upb_strtable_free(&m->ntof); upb_inttable_free(&m->itof); upb_def_uninit(&m->base); @@ -479,6 +464,13 @@ static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) { upb_def_ref(def); } +upb_msg_iter upb_msg_begin(upb_msgdef *m) { + return upb_inttable_begin(&m->itof); +} + +upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) { + return upb_inttable_next(&m->itof, &iter->e); +} /* symtab internal ***********************************************************/ @@ -601,8 +593,9 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) } else { UPB_UPCAST(m)->search_depth = ++depth; bool cycle_found = false; - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); if(!upb_issubmsg(f)) continue; upb_def *sub_def = f->def; upb_msgdef *sub_m = upb_downcast_msgdef(sub_def); @@ -632,8 +625,9 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, // Type names are resolved relative to the message in which they appear. upb_string *base = e->e.key; - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; + upb_msg_iter i; + for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + upb_fielddef *f = upb_msg_iter_field(i); if(!upb_hasdef(f)) continue; // No resolving necessary. upb_string *name = upb_downcast_unresolveddef(f->def)->name; @@ -873,7 +867,6 @@ typedef struct { upb_wire_type_t wire_type; upb_strlen_t delimited_len; upb_strlen_t stack[UPB_MAX_NESTING], *top; - upb_string *str; } upb_baredecoder; static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) @@ -929,6 +922,12 @@ static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) return &d->field; } +static bool upb_baredecoder_getstr(upb_baredecoder *d, upb_string *str) { + upb_string_substr(str, d->input, d->offset, d->delimited_len); + d->offset += d->delimited_len; + return true; +} + static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) { switch(d->wire_type) { @@ -950,11 +949,6 @@ static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) return true; } -static bool upb_baredecoder_getstr(upb_baredecoder *d, upb_string *str) { - upb_string_substr(str, d->input, d->offset, d->delimited_len); - return true; -} - static bool upb_baredecoder_skipval(upb_baredecoder *d) { upb_value val; @@ -986,7 +980,6 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) { upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); - d->str = upb_string_new(); d->top = &d->stack[0]; upb_src_init(&d->src, &upb_baredecoder_src_vtbl); return d; @@ -995,7 +988,6 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) static void upb_baredecoder_free(upb_baredecoder *d) { upb_string_unref(d->input); - upb_string_unref(d->str); free(d); } @@ -1004,11 +996,8 @@ static upb_src *upb_baredecoder_src(upb_baredecoder *d) return &d->src; } -upb_symtab *upb_get_descriptor_symtab() +void upb_symtab_add_descriptorproto(upb_symtab *symtab) { - // TODO: implement sharing of symtabs, so that successive calls to this - // function will return the same symtab. - upb_symtab *symtab = upb_symtab_new(); // TODO: allow upb_strings to be static or on the stack. upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); upb_baredecoder *decoder = upb_baredecoder_new(descriptor); @@ -1017,5 +1006,4 @@ upb_symtab *upb_get_descriptor_symtab() assert(upb_ok(&status)); upb_baredecoder_free(decoder); upb_string_unref(descriptor); - return symtab; } diff --git a/core/upb_def.h b/core/upb_def.h index 5c8c11e..82d8520 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -135,11 +135,6 @@ INLINE bool upb_elem_ismm(upb_fielddef *f) { typedef struct _upb_msgdef { upb_def base; upb_atomic_refcount_t cycle_refcount; - size_t size; - upb_field_count_t num_fields; - uint32_t set_flags_bytes; - uint32_t num_required_fields; // Required fields have the lowest set bytemasks. - upb_fielddef *fields; // We have exclusive ownership of these. // Tables for looking up fields by number and name. upb_inttable itof; // int to field @@ -170,6 +165,21 @@ INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_string *name) { return e ? e->f : NULL; } +// Iteration over fields. The order is undefined. +// upb_msg_iter i; +// for(i = upb_msg_begin(m); !upb_msg_done(&i); i = upb_msg_next(&i)) { +// // ... +// } +typedef upb_itof_ent *upb_msg_iter; + +upb_msg_iter upb_msg_begin(upb_msgdef *m); +upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter); +INLINE bool upb_msg_done(upb_msg_iter iter) { return iter == NULL; } + +INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) { + return iter->f; +} + /* upb_enumdef ****************************************************************/ typedef struct _upb_enumdef { @@ -178,6 +188,16 @@ typedef struct _upb_enumdef { upb_inttable iton; } upb_enumdef; +typedef struct { + upb_strtable_entry e; + uint32_t value; +} upb_ntoi_ent; + +typedef struct { + upb_inttable_entry e; + upb_string *string; +} upb_iton_ent; + typedef int32_t upb_enumval_t; // Lookups from name to integer and vice-versa. @@ -186,18 +206,22 @@ upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); // Iteration over name/value pairs. The order is undefined. // upb_enum_iter i; -// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { +// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { // // ... // } -typedef struct { - upb_enumdef *e; - void *state; // Internal iteration state. - upb_string *name; - upb_enumval_t val; -} upb_enum_iter; -void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e); -void upb_enum_next(upb_enum_iter *iter); -bool upb_enum_done(upb_enum_iter *iter); +typedef upb_iton_ent *upb_enum_iter; + +upb_enum_iter upb_enum_begin(upb_enumdef *e); +upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter); +INLINE bool upb_enum_done(upb_enum_iter iter) { return iter == NULL; } + +INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) { + return iter->string; +} +INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) { + return iter->e.key; +} + /* upb_symtab *****************************************************************/ @@ -252,10 +276,10 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); // more useful? Maybe it should be an option. void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status); -// Returns a symtab that defines google.protobuf.DescriptorProto and all other -// types that are defined in descriptor.proto. This allows you to load other -// proto types. The caller owns a ref on the returned symtab. -upb_symtab *upb_get_descriptor_symtab(); +// Adds defs for google.protobuf.FileDescriptorSet and friends to this symtab. +// This is necessary for bootstrapping, since these are the upb_defs that +// specify other defs and allow them to be loaded. +void upb_symtab_add_descriptorproto(upb_symtab *s); /* upb_def casts **************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 52172d2..ba2670e 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -88,6 +88,7 @@ struct upb_bytesrc { INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { s->vtbl = vtbl; s->eof = false; + upb_status_init(&s->status); #ifndef DEBUG // TODO: initialize debug-mode checking. #endif diff --git a/core/upb_table.c b/core/upb_table.c index b91776c..b860204 100644 --- a/core/upb_table.c +++ b/core/upb_table.c @@ -179,7 +179,7 @@ static void strinsert(upb_strtable *t, upb_strtable_entry *e) memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ upb_strtable_entry *evictee_e = strent(t, evictee_bucket); while(1) { - assert(!upb_string_isnull(evictee_e->key)); + assert(evictee_e->key); assert(evictee_e->next != UPB_END_OF_CHAIN); if(evictee_e->next == bucket) { evictee_e->next = empty_bucket; diff --git a/tests/test_string.c b/tests/test_string.c index 5e6e2a9..5869b70 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -66,4 +66,7 @@ int main() { upb_string_unref(str); upb_string_unref(str2); + + // Unref of NULL is harmless. + upb_string_unref(NULL); } -- cgit v1.2.3 From ae0beee2854b977f472d48cd149b880b074b59c5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 19:37:47 -0700 Subject: Fixed upb_string error with strange vsnprintf() behavior. --- core/upb.c | 9 +++++++++ core/upb.h | 1 + core/upb_def.c | 49 +++++++++++++++++++++++++++++++++++++------------ core/upb_string.c | 13 +++++++++---- tests/test_string.c | 9 +++++++++ 5 files changed, 65 insertions(+), 16 deletions(-) (limited to 'core/upb.h') diff --git a/core/upb.c b/core/upb.c index 9ed5617..d581bbe 100644 --- a/core/upb.c +++ b/core/upb.c @@ -64,3 +64,12 @@ void upb_status_reset(upb_status *status) { upb_string_unref(status->str); status->str = NULL; } + +void upb_printerr(upb_status *status) { + if(status->str) { + fprintf(stderr, "code: %d, msg: " UPB_STRFMT "\n", + status->code, UPB_STRARG(status->str)); + } else { + fprintf(stderr, "code: %d, no msg\n", status->code); + } +} diff --git a/core/upb.h b/core/upb.h index 630d9e1..13317bb 100644 --- a/core/upb.h +++ b/core/upb.h @@ -200,6 +200,7 @@ INLINE void upb_status_init(upb_status *status) { status->str = NULL; } +void upb_printerr(upb_status *status); void upb_status_reset(upb_status *status); void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...); diff --git a/core/upb_def.c b/core/upb_def.c index 0f48559..2b2916e 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -21,7 +21,7 @@ typedef struct { static void upb_deflist_init(upb_deflist *l) { l->size = 8; - l->defs = malloc(l->size); + l->defs = malloc(l->size * sizeof(void*)); l->len = 0; } @@ -34,7 +34,7 @@ static void upb_deflist_uninit(upb_deflist *l) { static void upb_deflist_push(upb_deflist *l, upb_def *d) { if(l->len == l->size) { l->size *= 2; - l->defs = realloc(l->defs, l->size); + l->defs = realloc(l->defs, l->size * sizeof(void*)); } l->defs[l->len++] = d; } @@ -238,6 +238,7 @@ static void upb_enumdef_free(upb_enumdef *e) { free(e); } +// google.protobuf.EnumValueDescriptorProto. static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) { int32_t number = -1; @@ -245,13 +246,13 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { - case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &number)); - break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: name = upb_string_tryrecycle(name); CHECKSRC(upb_src_getstr(src, name)); break; + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: + CHECKSRC(upb_src_getint32(src, &number)); + break; default: CHECKSRC(upb_src_skipval(src)); break; @@ -278,6 +279,7 @@ err: return false; } +// google.protobuf.EnumDescriptorProto. static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) { upb_enumdef *e = malloc(sizeof(*e)); @@ -290,8 +292,11 @@ static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: e->base.fqname = upb_string_tryrecycle(e->base.fqname); CHECKSRC(upb_src_getstr(src, e->base.fqname)); + break; case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: + CHECKSRC(upb_src_startmsg(src)); CHECK(upb_addenum_val(src, e, status)); + CHECKSRC(upb_src_endmsg(src)); break; default: upb_src_skipval(src); @@ -729,8 +734,10 @@ err: // We need to free all defs from "tmptab." upb_rwlock_unlock(&s->lock); for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; - e = upb_strtable_next(&tmptab, &e->e)) + e = upb_strtable_next(&tmptab, &e->e)) { + fprintf(stderr, "Unreffing def: '" UPB_STRFMT "'\n", UPB_STRARG(e->e.key)); upb_def_unref(e->def); + } upb_strtable_free(&tmptab); return false; } @@ -914,10 +921,12 @@ static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) key = upb_baredecoder_readv32(d); d->wire_type = key & 0x7; d->field.number = key >> 3; + fprintf(stderr, "field num: %d, wire_type: %d\n", d->field.number, d->wire_type); if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { // For delimited wire values we parse the length now, since we need it in // all cases. d->delimited_len = upb_baredecoder_readv32(d); + fprintf(stderr, "delimited size: %d\n", d->delimited_len); } return &d->field; } @@ -944,6 +953,7 @@ static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) *val.uint32 = upb_baredecoder_readf32(d); break; default: + *(char*)0 = 0; assert(false); } return true; @@ -951,19 +961,24 @@ static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) static bool upb_baredecoder_skipval(upb_baredecoder *d) { - upb_value val; - return upb_baredecoder_getval(d, upb_value_addrof(&val)); + if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { + d->offset += d->delimited_len; + return true; + } else { + upb_value val; + return upb_baredecoder_getval(d, upb_value_addrof(&val)); + } } static bool upb_baredecoder_startmsg(upb_baredecoder *d) { - *(d->top++) = d->offset + d->delimited_len; + *(++d->top) = d->offset + d->delimited_len; return true; } static bool upb_baredecoder_endmsg(upb_baredecoder *d) { - d->offset = *(--d->top); + d->offset = *(d->top--); return true; } @@ -980,7 +995,9 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) { upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); + d->offset = 0; d->top = &d->stack[0]; + *(d->top) = upb_string_len(d->input); upb_src_init(&d->src, &upb_baredecoder_src_vtbl); return d; } @@ -1001,9 +1018,17 @@ void upb_symtab_add_descriptorproto(upb_symtab *symtab) // TODO: allow upb_strings to be static or on the stack. upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len); upb_baredecoder *decoder = upb_baredecoder_new(descriptor); - upb_status status; + upb_status status = UPB_STATUS_INIT; upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status); - assert(upb_ok(&status)); upb_baredecoder_free(decoder); upb_string_unref(descriptor); + + if(!upb_ok(&status)) { + // upb itself is corrupt. + upb_printerr(&status); + upb_symtab_unref(symtab); + abort(); + } + fprintf(stderr, "Claims to have succeeded\n"); + upb_printerr(&status); } diff --git a/core/upb_string.c b/core/upb_string.c index 2f487aa..3563c9e 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -87,6 +87,7 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { void upb_string_substr(upb_string *str, upb_string *target_str, upb_strlen_t start, upb_strlen_t len) { + if(str->ptr) *(char*)0 = 0; assert(str->ptr == NULL); str->src = upb_string_getref(target_str); str->ptr = upb_string_getrobuf(target_str) + start; @@ -103,11 +104,15 @@ void upb_string_vprintf(upb_string *str, const char *format, va_list args) { uint32_t true_size = vsnprintf(buf, size, format, args_copy); va_end(args_copy); - if (true_size > size) { - // Need to reallocate. + if (true_size >= size) { + // Need to reallocate. We reallocate even if the sizes were equal, + // because snprintf excludes the terminating NULL from its count. + // We don't care about the terminating NULL, but snprintf might + // bail out of printing even other characters if it doesn't have + // enough space to write the NULL also. str = upb_string_tryrecycle(str); - buf = upb_string_getrwbuf(str, true_size); - vsnprintf(buf, true_size, format, args); + buf = upb_string_getrwbuf(str, true_size + 1); + vsnprintf(buf, true_size + 1, format, args); } str->len = true_size; } diff --git a/tests/test_string.c b/tests/test_string.c index 5869b70..46f35b9 100644 --- a/tests/test_string.c +++ b/tests/test_string.c @@ -32,6 +32,7 @@ int main() { // Make string alias part of another string. str2 = upb_strdupc("WXYZ"); + str = upb_string_tryrecycle(str); upb_string_substr(str, str2, 1, 2); assert(upb_string_len(str) == 2); assert(upb_string_len(str2) == 4); @@ -63,9 +64,17 @@ int main() { // Test printf. str = upb_string_tryrecycle(str); upb_string_printf(str, "Number: %d, String: %s", 5, "YO!"); + assert(upb_streqlc(str, "Number: 5, String: YO!")); + + // Test asprintf + upb_string *str3 = upb_string_asprintf("Yo %s: " UPB_STRFMT "\n", + "Josh", UPB_STRARG(str)); + const char expected[] = "Yo Josh: Number: 5, String: YO!\n"; + assert(upb_streqlc(str3, expected)); upb_string_unref(str); upb_string_unref(str2); + upb_string_unref(str3); // Unref of NULL is harmless. upb_string_unref(NULL); -- cgit v1.2.3 From c7a95061a7c02ffeebd71eeb56bf19fc1c1797dd Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 10 Jul 2010 20:13:06 -0700 Subject: Successfully bootstraps!! --- core/upb.c | 2 +- core/upb.h | 2 +- core/upb_def.c | 27 ++++++++++++++------------- tests/test_def.c | 24 ++++++++++++++++++++++++ 4 files changed, 40 insertions(+), 15 deletions(-) create mode 100644 tests/test_def.c (limited to 'core/upb.h') diff --git a/core/upb.c b/core/upb.c index d581bbe..c396323 100644 --- a/core/upb.c +++ b/core/upb.c @@ -59,7 +59,7 @@ void upb_copyerr(upb_status *to, upb_status *from) if(from->str) to->str = upb_string_getref(from->str); } -void upb_status_reset(upb_status *status) { +void upb_clearerr(upb_status *status) { status->code = UPB_STATUS_OK; upb_string_unref(status->str); status->str = NULL; diff --git a/core/upb.h b/core/upb.h index 13317bb..b605fd9 100644 --- a/core/upb.h +++ b/core/upb.h @@ -201,7 +201,7 @@ INLINE void upb_status_init(upb_status *status) { } void upb_printerr(upb_status *status); -void upb_status_reset(upb_status *status); +void upb_clearerr(upb_status *status); void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, ...); void upb_copyerr(upb_status *to, upb_status *from); diff --git a/core/upb_def.c b/core/upb_def.c index 2b2916e..b9402c5 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -211,7 +211,9 @@ static void upb_def_uninit(upb_def *def) { typedef struct _upb_unresolveddef { upb_def base; - // The target type name. This may or may not be fully qualified. + // The target type name. This may or may not be fully qualified. It is + // tempting to want to use base.fqname for this, but that will be qualified + // which is inappropriate for a name we still have to resolve. upb_string *name; } upb_unresolveddef; @@ -224,6 +226,7 @@ static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { } static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { + upb_string_unref(def->name); upb_def_uninit(&def->base); free(def); } @@ -232,6 +235,10 @@ static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { /* upb_enumdef ****************************************************************/ static void upb_enumdef_free(upb_enumdef *e) { + upb_enum_iter i; + for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { + upb_string_unref(upb_enum_iter_name(i)); + } upb_strtable_free(&e->ntoi); upb_inttable_free(&e->iton); upb_def_uninit(&e->base); @@ -328,14 +335,11 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) { /* upb_fielddef ***************************************************************/ static void upb_fielddef_free(upb_fielddef *f) { - free(f); -} - -static void upb_fielddef_uninit(upb_fielddef *f) { upb_string_unref(f->name); if(f->owned) { upb_def_unref(f->def); } + free(f); } static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) @@ -453,7 +457,7 @@ static void upb_msgdef_free(upb_msgdef *m) { upb_msg_iter i; for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) - upb_fielddef_uninit(upb_msg_iter_field(i)); + upb_fielddef_free(upb_msg_iter_field(i)); upb_strtable_free(&m->ntof); upb_inttable_free(&m->itof); upb_def_uninit(&m->base); @@ -487,7 +491,7 @@ static bool upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM: package = upb_string_tryrecycle(package); CHECKSRC(upb_src_getstr(src, package)); break; @@ -589,6 +593,7 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) "in a cycle of length %d, which exceeds the maximum type " "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len, UPB_MAX_TYPE_CYCLE_LEN); + return false; } return true; } else if(UPB_UPCAST(m)->search_depth > 0) { @@ -664,7 +669,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, upb_msgdef *m = upb_dyncast_msgdef(e->def); if(!m) continue; // The findcycles() call will decrement the external refcount of the - if(!upb_symtab_findcycles(m, 0, status)) return false; + upb_symtab_findcycles(m, 0, status); upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; upb_cycle_ref_or_unref(m, NULL, open_defs, 0, true); } @@ -735,7 +740,6 @@ err: upb_rwlock_unlock(&s->lock); for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e; e = upb_strtable_next(&tmptab, &e->e)) { - fprintf(stderr, "Unreffing def: '" UPB_STRFMT "'\n", UPB_STRARG(e->e.key)); upb_def_unref(e->def); } upb_strtable_free(&tmptab); @@ -921,12 +925,10 @@ static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) key = upb_baredecoder_readv32(d); d->wire_type = key & 0x7; d->field.number = key >> 3; - fprintf(stderr, "field num: %d, wire_type: %d\n", d->field.number, d->wire_type); if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { // For delimited wire values we parse the length now, since we need it in // all cases. d->delimited_len = upb_baredecoder_readv32(d); - fprintf(stderr, "delimited size: %d\n", d->delimited_len); } return &d->field; } @@ -1026,9 +1028,8 @@ void upb_symtab_add_descriptorproto(upb_symtab *symtab) if(!upb_ok(&status)) { // upb itself is corrupt. upb_printerr(&status); + upb_clearerr(&status); upb_symtab_unref(symtab); abort(); } - fprintf(stderr, "Claims to have succeeded\n"); - upb_printerr(&status); } diff --git a/tests/test_def.c b/tests/test_def.c new file mode 100644 index 0000000..e6f95d7 --- /dev/null +++ b/tests/test_def.c @@ -0,0 +1,24 @@ + +#undef NDEBUG /* ensure tests always assert. */ +#include "upb_def.h" +#include + +int main() { + upb_symtab *s = upb_symtab_new(); + upb_symtab_add_descriptorproto(s); + + int count; + upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY); + for (int i = 0; i < count; i++) { + upb_def_unref(defs[i]); + } + free(defs); + + upb_string *str = upb_strdupc("google.protobuf.FileDescriptorSet"); + upb_def *fds = upb_symtab_lookup(s, str); + assert(fds != NULL); + assert(upb_dyncast_msgdef(fds) != NULL); + upb_def_unref(fds); + upb_string_unref(str); + upb_symtab_unref(s); +} -- cgit v1.2.3 From a9e998159c5ac8c4f2644b5ed0eda2e8ff1f8706 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 2 Aug 2010 10:25:24 -0700 Subject: Fleshed out upb_msg: test_vs_proto2 compiles but fails. --- Makefile | 10 ++-- core/upb.h | 98 ++++++++++++++++++++++++++++++++++---- core/upb_atomic.h | 4 ++ core/upb_def.c | 65 ++++++++++++++++++++++++- core/upb_def.h | 28 +++++++++-- core/upb_msg.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++ core/upb_msg.h | 114 ++++++++++++++++++++++++++++++++++++++++---- stream/upb_decoder.c | 8 ++-- stream/upb_strstream.h | 2 +- tests/test_vs_proto2.cc | 54 ++++++++++++--------- 10 files changed, 452 insertions(+), 54 deletions(-) create mode 100644 core/upb_msg.c (limited to 'core/upb.h') diff --git a/Makefile b/Makefile index 203bed6..131b3c0 100644 --- a/Makefile +++ b/Makefile @@ -55,6 +55,7 @@ clean: # The core library (core/libupb.a) SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \ core/upb_stream.c stream/upb_stdio.c stream/upb_strstream.c stream/upb_textprinter.c \ + core/upb_msg.c \ descriptor/descriptor.c $(SRC): perf-cppflags # Parts of core that are yet to be converted. @@ -101,14 +102,13 @@ tests/test.proto.pb: tests/test.proto TESTS=tests/test_string \ tests/test_table \ tests/test_def \ - tests/test_decoder -tests: $(TESTS) - -OTHER_TESTS=tests/tests \ - tests/test_table \ + tests/test_decoder \ tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2 \ tests/test.proto.pb +tests: $(TESTS) + +OTHER_TESTS=tests/tests \ $(TESTS): core/libupb.a VALGRIND=valgrind --leak-check=full --error-exitcode=1 diff --git a/core/upb.h b/core/upb.h index b605fd9..7ee0469 100644 --- a/core/upb.h +++ b/core/upb.h @@ -80,24 +80,16 @@ enum upb_wire_type { typedef uint8_t upb_wire_type_t; -// Value type as defined in a .proto file. eg. string, int32, etc. The +// Type of a field as defined in a .proto file. eg. string, int32, etc. The // integers that represent this are defined by descriptor.proto. Note that // descriptor.proto reserves "0" for errors, and we use it to represent // exceptional circumstances. -typedef uint8_t upb_field_type_t; +typedef uint8_t upb_fieldtype_t; // For referencing the type constants tersely. #define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type #define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type -INLINE bool upb_issubmsgtype(upb_field_type_t type) { - return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); -} - -INLINE bool upb_isstringtype(upb_field_type_t type) { - return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); -} - // Info for a given field type. typedef struct { uint8_t align; @@ -129,6 +121,10 @@ typedef union { struct _upb_string; typedef struct _upb_string upb_string; +struct _upb_array; +typedef struct _upb_array upb_array; +struct _upb_msg; +typedef struct _upb_msg upb_msg; typedef uint32_t upb_strlen_t; @@ -142,6 +138,11 @@ typedef union { uint32_t uint32; uint64_t uint64; bool _bool; + upb_string *str; + upb_msg *msg; + upb_array *arr; + upb_atomic_refcount_t *refcount; + void *_void; } upb_value; // A pointer to a .proto value. The owner must have an out-of-band way of @@ -155,13 +156,90 @@ typedef union { uint32_t *uint32; uint64_t *uint64; bool *_bool; + upb_string **str; + upb_msg **msg; + upb_array **arr; + void *_void; } upb_valueptr; +// The type of a upb_value. This is like a upb_fieldtype_t, but adds the +// constant UPB_VALUETYPE_ARRAY to represent an array. +typedef uint8_t upb_valuetype_t; +#define UPB_VALUETYPE_ARRAY 32 + INLINE upb_valueptr upb_value_addrof(upb_value *val) { upb_valueptr ptr = {&val->_double}; return ptr; } +// Converts upb_value_ptr -> upb_value by reading from the pointer. We need to +// know the value type to perform this operation, because we need to know how +// much memory to copy. +INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { + upb_value val; + +#define CASE(t, member_name) \ + case UPB_TYPE(t): val.member_name = *ptr.member_name; break; + + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + default: break; + } + return val; + +#undef CASE +} + +// Writes a upb_value to a upb_value_ptr location. We need to know the value +// type to perform this operation, because we need to know how much memory to +// copy. +INLINE void upb_value_write(upb_valueptr ptr, upb_value val, + upb_fieldtype_t ft) { +#define CASE(t, member_name) \ + case UPB_TYPE(t): *ptr.member_name = val.member_name; break; + + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + default: break; + } + +#undef CASE +} + // Status codes used as a return value. Codes >0 are not fatal and can be // resumed. enum upb_status_code { diff --git a/core/upb_atomic.h b/core/upb_atomic.h index 01fc8a2..1cd848b 100644 --- a/core/upb_atomic.h +++ b/core/upb_atomic.h @@ -127,6 +127,10 @@ INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { Implement them or compile with UPB_THREAD_UNSAFE. #endif +INLINE bool upb_atomic_only(upb_atomic_refcount_t *a) { + return upb_atomic_read(a) == 1; +} + /* Reader/Writer lock. ********************************************************/ #ifdef UPB_THREAD_UNSAFE diff --git a/core/upb_def.c b/core/upb_def.c index e117455..1c8fbdc 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -12,6 +12,16 @@ #define CHECKSRC(x) if(!(x)) goto src_err #define CHECK(x) if(!(x)) goto err +/* Rounds p up to the next multiple of t. */ +static size_t upb_align_up(size_t val, size_t align) { + return val % align == 0 ? val : val + align - (val % align); +} + +static int upb_div_round_up(int numerator, int denominator) { + /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ + return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; +} + // A little dynamic array for storing a growing list of upb_defs. typedef struct { upb_def **defs; @@ -409,6 +419,19 @@ src_err: /* upb_msgdef *****************************************************************/ +static int upb_compare_typed_fields(upb_fielddef *f1, upb_fielddef *f2) { + // Sort by data size (ascending) to reduce padding. + size_t size1 = upb_types[f1->type].size; + size_t size2 = upb_types[f2->type].size; + if (size1 != size2) return size1 - size2; + // Otherwise return in number order (just so we get a reproduceable order. + return f1->number - f2->number; +} + +static int upb_compare_fields(const void *f1, const void *f2) { + return upb_compare_typed_fields(*(void**)f1, *(void**)f2); +} + // Processes a google.protobuf.DescriptorProto, adding defs to "defs." static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) { @@ -418,7 +441,6 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); int32_t start_count = defs->len; - upb_fielddef *f; while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { @@ -451,6 +473,45 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); goto err; } + + + // Create an ordering over the fields. + upb_field_count_t n = upb_msgdef_numfields(m); + upb_fielddef **sorted_fields = malloc(sizeof(upb_fielddef*) * n); + upb_field_count_t field = 0; + upb_msg_iter i; + for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { + sorted_fields[field++]= upb_msg_iter_field(i); + } + qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields); + + // Assign offsets in the msg. + m->set_flags_bytes = upb_div_round_up(n, 8); + m->size = sizeof(upb_atomic_refcount_t) + m->set_flags_bytes; + + size_t max_align = 0; + for (int i = 0; i < n; i++) { + upb_fielddef *f = sorted_fields[i]; + upb_type_info *type_info = &upb_types[f->type]; + + // This identifies the set bit. When we implement is_initialized (a + // general check about whether all required bits are set) we will probably + // want to use a different ordering that puts all the required bits + // together. + f->field_index = i; + + // General alignment rules are: each member must be at an address that is a + // multiple of that type's alignment. Also, the size of the structure as a + // whole must be a multiple of the greatest alignment of any member. + size_t offset = upb_align_up(m->size, type_info->align); + // Offsets are relative to the end of the refcount. + f->byte_offset = offset - sizeof(upb_atomic_refcount_t); + m->size = offset + type_info->size; + max_align = UPB_MAX(max_align, type_info->align); + } + + if (max_align > 0) m->size = upb_align_up(m->size, max_align); + upb_deflist_qualify(defs, m->base.fqname, start_count); upb_deflist_push(defs, UPB_UPCAST(m)); return true; @@ -664,7 +725,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, } // Check the type of the found def. - upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; + upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; if(found->def->type != expected) { upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type"); return false; diff --git a/core/upb_def.h b/core/upb_def.h index 3294a8d..9eb961a 100644 --- a/core/upb_def.h +++ b/core/upb_def.h @@ -103,7 +103,7 @@ typedef struct _upb_fielddef { upb_field_count_t field_index; // Indicates set bit. upb_field_number_t number; - upb_field_type_t type; + upb_fieldtype_t type; upb_label_t label; // True if we own a ref on "def" (above). This is true unless this edge is // part of a cycle. @@ -112,10 +112,10 @@ typedef struct _upb_fielddef { // A variety of tests about the type of a field. INLINE bool upb_issubmsg(upb_fielddef *f) { - return upb_issubmsgtype(f->type); + return f->type == UPB_TYPE(GROUP) || f->type == UPB_TYPE(MESSAGE); } INLINE bool upb_isstring(upb_fielddef *f) { - return upb_isstringtype(f->type); + return f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES); } INLINE bool upb_isarray(upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); @@ -125,6 +125,19 @@ INLINE bool upb_hasdef(upb_fielddef *f) { return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); } +INLINE upb_valuetype_t upb_field_valuetype(upb_fielddef *f) { + if (upb_isarray(f)) { + return UPB_VALUETYPE_ARRAY; + } else { + return f->type; + } +} + +INLINE upb_valuetype_t upb_elem_valuetype(upb_fielddef *f) { + assert(upb_isarray(f)); + return f->type; +} + INLINE bool upb_field_ismm(upb_fielddef *f) { return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); } @@ -139,6 +152,8 @@ INLINE bool upb_elem_ismm(upb_fielddef *f) { typedef struct _upb_msgdef { upb_def base; upb_atomic_refcount_t cycle_refcount; + uint32_t size; + uint32_t set_flags_bytes; // Tables for looking up fields by number and name. upb_inttable itof; // int to field @@ -169,9 +184,14 @@ INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) { return e ? e->f : NULL; } +INLINE upb_field_count_t upb_msgdef_numfields(upb_msgdef *m) { + return upb_strtable_count(&m->ntof); +} + // Iteration over fields. The order is undefined. // upb_msg_iter i; -// for(i = upb_msg_begin(m); !upb_msg_done(&i); i = upb_msg_next(&i)) { +// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { +// upb_fielddef *f = upb_msg_iter_field(i); // // ... // } typedef upb_itof_ent *upb_msg_iter; diff --git a/core/upb_msg.c b/core/upb_msg.c new file mode 100644 index 0000000..75f7a35 --- /dev/null +++ b/core/upb_msg.c @@ -0,0 +1,123 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + * Data structure for storing a message of protobuf data. + */ + +#include "upb_msg.h" + +void _upb_elem_free(upb_value v, upb_fielddef *f) { + switch(f->type) { + case UPB_TYPE(MESSAGE): + case UPB_TYPE(GROUP): + _upb_msg_free(v.msg, upb_downcast_msgdef(f->def)); + break; + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + _upb_string_free(v.str); + break; + default: + abort(); + } +} + +void _upb_field_free(upb_value v, upb_fielddef *f) { + if (upb_isarray(f)) { + _upb_array_free(v.arr, f); + } else { + _upb_elem_free(v, f); + } +} + +upb_msg *upb_msg_new(upb_msgdef *md) { + upb_msg *msg = malloc(md->size); + // Clear all set bits and cached pointers. + memset(msg, 0, md->size); + upb_atomic_refcount_init(&msg->refcount, 1); + return msg; +} + +void _upb_msg_free(upb_msg *msg, upb_msgdef *md) { + // Need to release refs on all sub-objects. + upb_msg_iter i; + for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { + upb_fielddef *f = upb_msg_iter_field(i); + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + if (upb_field_ismm(f)) _upb_field_unref(upb_value_read(p, type), f); + } + free(msg); +} + +upb_array *upb_array_new(void) { + upb_array *arr = malloc(sizeof(*arr)); + upb_atomic_refcount_init(&arr->refcount, 1); + arr->size = 0; + arr->len = 0; + arr->elements._void = NULL; + return arr; +} + +void _upb_array_free(upb_array *arr, upb_fielddef *f) { + if (upb_elem_ismm(f)) { + // Need to release refs on sub-objects. + upb_valuetype_t type = upb_elem_valuetype(f); + for (upb_arraylen_t i = 0; i < arr->size; i++) { + upb_valueptr p = _upb_array_getptr(arr, f, i); + _upb_elem_unref(upb_value_read(p, type), f); + } + } + if (arr->elements._void) free(arr->elements._void); + free(arr); +} + +upb_value upb_field_new(upb_fielddef *f, upb_valuetype_t type) { + upb_value v; + switch(type) { + case UPB_TYPE(MESSAGE): + case UPB_TYPE(GROUP): + v.msg = upb_msg_new(upb_downcast_msgdef(f->def)); + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): + v.str = upb_string_new(); + case UPB_VALUETYPE_ARRAY: + v.arr = upb_array_new(); + default: + abort(); + } + return v; +} + +static void upb_field_recycle(upb_value val) { + (void)val; +} + +upb_value upb_field_tryrecycle(upb_valueptr p, upb_value val, upb_fielddef *f, + upb_valuetype_t type) { + if (val._void == NULL || !upb_atomic_only(val.refcount)) { + if (val._void != NULL) upb_atomic_unref(val.refcount); + val = upb_field_new(f, type); + upb_value_write(p, val, type); + } else { + upb_field_recycle(val); + } + return val; +} + +void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status) { + (void)msg; + (void)md; + (void)str; + (void)status; +} + +void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status) { + (void)msg; + (void)md; + (void)str; + (void)status; +} diff --git a/core/upb_msg.h b/core/upb_msg.h index 5215bd9..2db67c0 100644 --- a/core/upb_msg.h +++ b/core/upb_msg.h @@ -9,14 +9,39 @@ #ifndef UPB_MSG_H #define UPB_MSG_H +#include "upb.h" +#include "upb_def.h" +#include + #ifdef __cplusplus extern "C" { #endif -typedef struct { +upb_value upb_field_tryrecycle(upb_valueptr p, upb_value v, upb_fielddef *f, + upb_valuetype_t type); + +INLINE void _upb_value_ref(upb_value v) { upb_atomic_ref(v.refcount); } + +void _upb_field_free(upb_value v, upb_fielddef *f); +void _upb_elem_free(upb_value v, upb_fielddef *f); +INLINE void _upb_field_unref(upb_value v, upb_fielddef *f) { + assert(upb_field_ismm(f)); + if (v.refcount && upb_atomic_unref(v.refcount)) + _upb_field_free(v, f); +} +INLINE void _upb_elem_unref(upb_value v, upb_fielddef *f) { + assert(upb_elem_ismm(f)); + if (v.refcount && upb_atomic_unref(v.refcount)) + _upb_elem_free(v, f); +} + +/* upb_array ******************************************************************/ + +typedef uint32_t upb_arraylen_t; +struct _upb_array { upb_atomic_refcount_t refcount; - uint32_t len; - uint32_t size; + upb_arraylen_t len; + upb_arraylen_t size; upb_valueptr elements; }; @@ -31,29 +56,70 @@ INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) { if (upb_atomic_unref(&a->refcount)) _upb_array_free(a, f); } +INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f, + uint32_t elem) { + upb_valueptr p; + p._void = &a->elements.uint8[elem * upb_types[f->type].size]; + return p; +} + INLINE upb_value upb_array_get(upb_array *a, upb_fielddef *f, uint32_t elem) { assert(elem < upb_array_len(a)); return upb_value_read(_upb_array_getptr(a, f, elem), f->type); } // For string or submessages, will release a ref on the previously set value. +// and take a ref on the new value. The array must already be at least "elem" +// long; to append use append_mutable. INLINE void upb_array_set(upb_array *a, upb_fielddef *f, uint32_t elem, upb_value val) { + assert(elem < upb_array_len(a)); + upb_valueptr p = _upb_array_getptr(a, f, elem); + if (upb_elem_ismm(f)) { + _upb_elem_unref(upb_value_read(p, f->type), f); + _upb_value_ref(val); + } + upb_value_write(p, val, f->type); } -// Append an element with the default value, returning it. For strings or -// submessages, this will try to reuse previously allocated memory. -INLINE upb_value upb_array_append_mutable(upb_array *a, upb_fielddef *f) { +INLINE void upb_array_resize(upb_array *a, upb_fielddef *f) { + if (a->len == a->size) { + a->len *= 2; + a->elements._void = realloc(a->elements._void, + a->len * upb_types[f->type].size); + } } -typedef struct { +// Append an element to an array of string or submsg with the default value, +// returning it. This will try to reuse previously allocated memory. +INLINE upb_value upb_array_appendmutable(upb_array *a, upb_fielddef *f) { + assert(upb_elem_ismm(f)); + upb_array_resize(a, f); + upb_valueptr p = _upb_array_getptr(a, f, a->len++); + upb_valuetype_t type = upb_elem_valuetype(f); + upb_value val = upb_value_read(p, type); + val = upb_field_tryrecycle(p, val, f, type); + return val; +} + + +/* upb_msg ********************************************************************/ + +struct _upb_msg { upb_atomic_refcount_t refcount; uint8_t data[4]; // We allocate the appropriate amount per message. -} upb_msg; +}; // Creates a new msg of the given type. upb_msg *upb_msg_new(upb_msgdef *md); +// Returns a pointer to the given field. +INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) { + upb_valueptr p; + p._void = &msg->data[f->byte_offset]; + return p; +} + void _upb_msg_free(upb_msg *msg, upb_msgdef *md); INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) { if (upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md); @@ -65,6 +131,10 @@ INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { return (msg->data[f->field_index/8] & (1 << (f->field_index % 8))) != 0; } +INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { + msg->data[f->field_index/8] |= (1 << (f->field_index % 8)); +} + // Returns the current value of the given field if set, or the default value if // not set. INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { @@ -79,12 +149,29 @@ INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { // Otherwise sets it and returns an empty instance, attempting to reuse any // previously allocated memory. INLINE upb_value upb_msg_getmutable(upb_msg *msg, upb_fielddef *f) { + assert(upb_field_ismm(f)); + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + upb_value val = upb_value_read(p, type); + if (!upb_msg_has(msg, f)) { + upb_msg_sethas(msg, f); + val = upb_field_tryrecycle(p, val, f, type); + } + return val; } // Sets the current value of the field. If this is a string, array, or // submessage field, releases a ref on the value (if any) that was previously // set. INLINE void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) { + upb_valueptr p = _upb_msg_getptr(msg, f); + upb_valuetype_t type = upb_field_valuetype(f); + if (upb_field_ismm(f)) { + _upb_field_unref(upb_value_read(p, type), f); + _upb_value_ref(val); + } + upb_msg_sethas(msg, f); + upb_value_write(p, val, upb_field_valuetype(f)); } // Unsets all field values back to their defaults. @@ -92,6 +179,17 @@ INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { memset(msg->data, 0, md->set_flags_bytes); } +// A convenience function for decoding an entire protobuf all at once, without +// having to worry about setting up the appropriate objects. +void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status); + +// A convenience function for encoding an entire protobuf all at once. If an +// error occurs, the null string is returned and the status object contains +// the error. +void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, + upb_status *status); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 7591f78..c35212e 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -14,8 +14,10 @@ // Returns true if the give wire type and field type combination is valid, // taking into account both packed and non-packed encodings. -static bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { - return (1 << wt) & upb_types[ft].allowed_wire_types; +static bool upb_check_type(upb_wire_type_t wt, upb_fielddef *f) { + // TODO: need to take into account the label; only repeated fields are + // allowed to use packed encoding. + return (1 << wt) & upb_types[f->type].allowed_wire_types; } // Performs zig-zag decoding, which is used by sint32 and sint64. @@ -358,7 +360,7 @@ again: // unknown fields we will implement that here. upb_decoder_skipval(d); goto again; - } else if (!upb_check_type(wire_type, f->type)) { + } else if (!upb_check_type(wire_type, f)) { // This is a recoverable error condition. We skip the value but also // return NULL and report the error. upb_decoder_skipval(d); diff --git a/stream/upb_strstream.h b/stream/upb_strstream.h index fa9bace..d01d21f 100644 --- a/stream/upb_strstream.h +++ b/stream/upb_strstream.h @@ -31,7 +31,7 @@ void upb_stringsrc_free(upb_stringsrc *s); void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); // Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. -upb_bytesrc *upb_stringsrc_bytesrc(); +upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s); /* upb_stringsink *************************************************************/ diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc index 9083788..9446b8f 100644 --- a/tests/test_vs_proto2.cc +++ b/tests/test_vs_proto2.cc @@ -4,9 +4,10 @@ #include #include #include -#include "upb_data.h" +#include "upb_msg.h" #include "upb_def.h" #include "upb_decoder.h" +#include "upb_strstream.h" int num_assertions = 0; #define ASSERT(expr) do { \ @@ -25,7 +26,7 @@ void compare_arrays(const google::protobuf::Reflection *r, upb_msg *upb_msg, upb_fielddef *upb_f) { ASSERT(upb_msg_has(upb_msg, upb_f)); - upb_arrayptr arr = upb_msg_get(upb_msg, upb_f).arr; + upb_array *arr = upb_msg_get(upb_msg, upb_f).arr; ASSERT(upb_array_len(arr) == (upb_arraylen_t)r->FieldSize(proto2_msg, proto2_f)); for(upb_arraylen_t i = 0; i < upb_array_len(arr); i++) { upb_value v = upb_array_get(arr, upb_f, i); @@ -63,7 +64,7 @@ void compare_arrays(const google::protobuf::Reflection *r, case UPB_TYPE(STRING): case UPB_TYPE(BYTES): { std::string str = r->GetRepeatedString(proto2_msg, proto2_f, i); - std::string str2(upb_string_getrobuf(v.str), upb_strlen(v.str)); + std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str)); ASSERT(str == str2); break; } @@ -116,7 +117,7 @@ void compare_values(const google::protobuf::Reflection *r, case UPB_TYPE(STRING): case UPB_TYPE(BYTES): { std::string str = r->GetString(proto2_msg, proto2_f); - std::string str2(upb_string_getrobuf(v.str), upb_strlen(v.str)); + std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str)); ASSERT(str == str2); break; } @@ -133,9 +134,10 @@ void compare(const google::protobuf::Message& proto2_msg, const google::protobuf::Reflection *r = proto2_msg.GetReflection(); const google::protobuf::Descriptor *d = proto2_msg.GetDescriptor(); - ASSERT((upb_field_count_t)d->field_count() == upb_md->num_fields); - for(upb_field_count_t i = 0; i < upb_md->num_fields; i++) { - upb_fielddef *upb_f = &upb_md->fields[i]; + ASSERT((upb_field_count_t)d->field_count() == upb_msgdef_numfields(upb_md)); + upb_msg_iter i; + for(i = upb_msg_begin(upb_md); !upb_msg_done(i); i = upb_msg_next(upb_md, i)) { + upb_fielddef *upb_f = upb_msg_iter_field(i); const google::protobuf::FieldDescriptor *proto2_f = d->FindFieldByNumber(upb_f->number); // Make sure the definitions are equal. @@ -143,7 +145,7 @@ void compare(const google::protobuf::Message& proto2_msg, ASSERT(proto2_f); ASSERT(upb_f->number == proto2_f->number()); ASSERT(std::string(upb_string_getrobuf(upb_f->name), - upb_strlen(upb_f->name)) == + upb_string_len(upb_f->name)) == proto2_f->name()); ASSERT(upb_f->type == proto2_f->type()); ASSERT(upb_isarray(upb_f) == proto2_f->is_repeated()); @@ -166,10 +168,10 @@ void compare(const google::protobuf::Message& proto2_msg, void parse_and_compare(MESSAGE_CIDENT *proto2_msg, upb_msg *upb_msg, upb_msgdef *upb_md, - upb_strptr str) + upb_string *str) { // Parse to both proto2 and upb. - ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_strlen(str))); + ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_string_len(str))); upb_status status = UPB_STATUS_INIT; upb_msg_decodestr(upb_msg, upb_md, str, &status); ASSERT(upb_ok(&status)); @@ -194,22 +196,32 @@ int main(int argc, char *argv[]) // Initialize upb state, parse descriptor. upb_status status = UPB_STATUS_INIT; - upb_symtab *c = upb_symtab_new(); - upb_strptr fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); - if(upb_string_isnull(fds)) { + upb_symtab *symtab = upb_symtab_new(); + upb_string *fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); + if(fds == NULL) { fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n"); return 1; } - upb_symtab_add_desc(c, fds, &status); + upb_symtab_add_descriptorproto(symtab); + upb_def *fds_msgdef = upb_symtab_lookup( + symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + + upb_stringsrc *ssrc = upb_stringsrc_new(); + upb_stringsrc_reset(ssrc, fds); + upb_decoder *decoder = upb_decoder_new(upb_downcast_msgdef(fds_msgdef)); + upb_decoder_reset(decoder, upb_stringsrc_bytesrc(ssrc)); + upb_symtab_addfds(symtab, upb_decoder_src(decoder), &status); if(!upb_ok(&status)) { - fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": %s.\n", - status.msg); + fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": "); + upb_printerr(&status); return 1; } upb_string_unref(fds); + upb_decoder_free(decoder); + upb_stringsrc_free(ssrc); - upb_strptr proto_name = upb_strdupc(MESSAGE_NAME); - upb_msgdef *def = upb_downcast_msgdef(upb_symtab_lookup(c, proto_name)); + upb_string *proto_name = upb_strdupc(MESSAGE_NAME); + upb_msgdef *def = upb_downcast_msgdef(upb_symtab_lookup(symtab, proto_name)); if(!def) { fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n", UPB_STRARG(proto_name)); @@ -218,8 +230,8 @@ int main(int argc, char *argv[]) upb_string_unref(proto_name); // Read the message data itself. - upb_strptr str = upb_strreadfile(MESSAGE_FILE); - if(upb_string_isnull(str)) { + upb_string *str = upb_strreadfile(MESSAGE_FILE); + if(str == NULL) { fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); return 1; } @@ -234,7 +246,7 @@ int main(int argc, char *argv[]) upb_msg_unref(upb_msg, def); upb_def_unref(UPB_UPCAST(def)); upb_string_unref(str); - upb_symtab_unref(c); + upb_symtab_unref(symtab); return 0; } -- cgit v1.2.3 From 2a7f51f3fd534b3e9e098c522cffbb96e1551474 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 6 Oct 2010 08:19:34 -0700 Subject: Change upb_src to use push-based interface. Unfortunately my previous detailed commit message was lost somehow by git or vi. Will have to explain in more detail at a later date the rationale for this change. The build will be broken until I port the old decoder to this new interface. --- core/upb.h | 4 ++ core/upb_stream.h | 124 +++++++++++++++++++++--------------------------------- 2 files changed, 51 insertions(+), 77 deletions(-) (limited to 'core/upb.h') diff --git a/core/upb.h b/core/upb.h index 7ee0469..6ecc2a0 100644 --- a/core/upb.h +++ b/core/upb.h @@ -261,6 +261,10 @@ enum upb_status_code { UPB_ERROR_MAX_NESTING_EXCEEDED = -3 }; +// TODO: consider making this a single word: a upb_string* where we use the low +// bits as flags indicating whether there is an error and whether it is +// resumable. This would improve efficiency, because the code would not need +// to be loaded after a call to a function returning a status. typedef struct { enum upb_status_code code; upb_string *str; diff --git a/core/upb_stream.h b/core/upb_stream.h index 861bd1c..cd00c1e 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -28,98 +28,64 @@ extern "C" { // Forward-declare. We can't include upb_def.h; it would be circular. struct _upb_fielddef; -// Note! The "eof" flags work like feof() in C; they cannot report end-of-file -// until a read has failed due to eof. They cannot preemptively tell you that -// the next call will fail due to eof. Since these are the semantics that C -// and UNIX provide, we're stuck with them if we want to support eg. stdio. - -/* upb_src ********************************************************************/ +/* upb_sink *******************************************************************/ -// A upb_src is a pull parser for protobuf data. Sample usage: -// -// #define CHECK(x) if(!x) goto err; +// A upb_sink is a component that receives a stream of protobuf data. +// It is an abstract interface that is implemented either by the system or +// by users. // -// bool parse_msg(upb_src *src, int indent) { -// upb_fielddef *f; -// while ((f = upb_src_getdef(src)) != NULL) { -// for (int i = 0; i < indent; i++) putchar(' '); -// printf("Parsed field; name=" UPB_STRFMT ", num=%d", -// UPB_STRARG(d->name), d->number); -// if (upb_issubmsg(f)) { -// CHECK(upb_src_startmsg(src)); -// CHECK(parse_msg(src, indent + 2)); -// CHECK(upb_src_endmsg(src)); -// } else { -// CHECK(upb_src_skipval(src)); -// } -// } -// // We should be EOF now, otherwise there was an error. -// CHECK(upb_src_eof(src)); -// return true; -// -// err: -// return false; -// } -// -// TODO: decide how to handle unknown fields. - -// Retrieves the fielddef for the next field in the stream. Returns NULL on -// error or end-of-stream. End of stream can simply mean end of submessage. -struct _upb_fielddef *upb_src_getdef(upb_src *src); - -// Retrieves and stores the next value in "val". upb_src_getval() is for all -// numeric types and upb_src_getstr() is for strings. For string types "str" -// must be a newly-recycled string. Returns false on error. -bool upb_src_getval(upb_src *src, upb_valueptr val); -bool upb_src_getstr(upb_src *src, upb_string *val); - -// Like upb_src_getval() but skips the value. -bool upb_src_skipval(upb_src *src); - -// Descends into a submessage. May only be called when upb_issubmsg(f) is true -// for an f = upb_src_getdef(src) that was just parsed. -bool upb_src_startmsg(upb_src *src); - -// Stops reading a submessage. May be called before the stream is EOF, in -// which case the rest of the submessage is skipped. -bool upb_src_endmsg(upb_src *src); - -// Returns the current error/eof status for the stream. If a stream is eof -// but we are inside a submessage, calling upb_src_endmsg(src) will reset -// the eof marker. -INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; } -INLINE bool upb_src_eof(upb_src *src) { return src->eof; } - -// The following functions are equivalent to upb_src_getval(), but take -// pointers to specific types. In debug mode this may check that the type -// is compatible with the type being read. This check will *not* be performed -// in non-debug mode, and if you get the type wrong the behavior is undefined. -bool upb_src_getbool(upb_src *src, bool *val); -bool upb_src_getint32(upb_src *src, int32_t *val); -bool upb_src_getint64(upb_src *src, int64_t *val); -bool upb_src_getuint32(upb_src *src, uint32_t *val); -bool upb_src_getuint64(upb_src *src, uint64_t *val); -bool upb_src_getfloat(upb_src *src, float *val); -bool upb_src_getdouble(upb_src *src, double *val); +// TODO: unknown fields. -/* upb_sink *******************************************************************/ +// Constants that a sink returns to indicate to its caller whether it should +// continue or not. +typedef enum { + // Caller should continue sending values to the sink. + UPB_SINK_CONTINUE, + + // Return from upb_sink_putdef() to skip the next value (which may be a + // submessage). + UPB_SINK_SKIP, + + // Caller should stop sending values; check sink status for details. + // If processing resumes later, it should resume with the next value. + UPB_SINK_STOP, +} upb_sinkret_t; // Puts the given fielddef into the stream. -bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); +upb_sinkret_t upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); // Puts the given value into the stream. -bool upb_sink_putval(upb_sink *sink, upb_value val); -bool upb_sink_putstr(upb_sink *sink, upb_string *str); +upb_sinkret_t upb_sink_putval(upb_sink *sink, upb_value val); +upb_sinkret_t upb_sink_putstr(upb_sink *sink, upb_string *str); // Starts/ends a submessage. upb_sink_startmsg may seem redundant, but a // client could have a submessage already serialized, and therefore put it // as a string instead of its individual elements. -bool upb_sink_startmsg(upb_sink *sink); -bool upb_sink_endmsg(upb_sink *sink); +upb_sinkret_t upb_sink_startmsg(upb_sink *sink); +upb_sinkret_t upb_sink_endmsg(upb_sink *sink); // Returns the current error status for the stream. upb_status *upb_sink_status(upb_sink *sink); + +/* upb_src ********************************************************************/ + +// A upb_src is a resumable push parser for protobuf data. It works by first +// accepting registration of a upb_sink to which it will push data, then +// in a second phase is parses the actual data. +// + +// Sets the given sink as the target of this src. It will be called when the +// upb_src_parse() is run. +void upb_src_setsink(upb_src *src, upb_sink *sink); + +// Pushes data from this src to the previously registered sink, returning +// true if all data was processed. If false is returned, check +// upb_src_status() for details; if it is a resumable status, upb_src_run +// may be called again to resume processing. +bool upb_src_run(upb_src *src); + + /* upb_bytesrc ****************************************************************/ // Returns the next string in the stream. false is returned on error or eof. @@ -133,6 +99,10 @@ bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); // Returns the current error status for the stream. +// Note! The "eof" flag works like feof() in C; it cannot report end-of-file +// until a read has failed due to eof. It cannot preemptively tell you that +// the next call will fail due to eof. Since these are the semantics that C +// and UNIX provide, we're stuck with them if we want to support eg. stdio. INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } -- cgit v1.2.3 From db512df98e0fac208a716c7807d037f0b0d309f1 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 4 Jan 2011 15:47:25 -0800 Subject: A bunch of work on upb_def and upb_value. --- core/upb.h | 51 +++-- core/upb_def.c | 642 +++++++++++++++++++++++++++++---------------------------- 2 files changed, 370 insertions(+), 323 deletions(-) (limited to 'core/upb.h') diff --git a/core/upb.h b/core/upb.h index 6ecc2a0..7bed779 100644 --- a/core/upb.h +++ b/core/upb.h @@ -130,21 +130,46 @@ typedef uint32_t upb_strlen_t; // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. -typedef union { - double _double; - float _float; - int32_t int32; - int64_t int64; - uint32_t uint32; - uint64_t uint64; - bool _bool; - upb_string *str; - upb_msg *msg; - upb_array *arr; - upb_atomic_refcount_t *refcount; - void *_void; +typedef struct { + union { + double _double; + float _float; + int32_t int32; + int64_t int64; + uint32_t uint32; + uint64_t uint64; + bool _bool; + upb_string *str; + upb_msg *msg; + upb_array *arr; + upb_atomic_refcount_t *refcount; + void *_void; + } val; + + // In debug mode we carry the value type around also so we can check accesses + // to be sure the right member is being read. +#ifndef NDEBUG + upb_valuetype_t type; +#endif } upb_value; +#define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ + ctype upb_value_get ## name(upb_value val) { \ + assert(val.type == UPB_TYPE(proto_type)); \ + return val.membername; \ + } \ + void upb_value_ ## name(upb_value *val, ctype cval) { \ + val.type = UPB_TYPE(proto_type); \ + val.membername = cval; \ + } +UPB_VALUE_ACCESSORS(double, _double, double, DOUBLE); +UPB_VALUE_ACCESSORS(float, _float, float, FLOAT); +UPB_VALUE_ACCESSORS(int32, int32, int32_t, INT32); +UPB_VALUE_ACCESSORS(int64, int64, int64_t, INT64); +UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UINT32); +UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UINT64); +UPB_VALUE_ACCESSORS(bool, _bool, bool, BOOL); + // A pointer to a .proto value. The owner must have an out-of-band way of // knowing the type, so it knows which union member to use. typedef union { diff --git a/core/upb_def.c b/core/upb_def.c index 1c8fbdc..cc771dc 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -22,33 +22,6 @@ static int upb_div_round_up(int numerator, int denominator) { return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; } -// A little dynamic array for storing a growing list of upb_defs. -typedef struct { - upb_def **defs; - uint32_t len; - uint32_t size; -} upb_deflist; - -static void upb_deflist_init(upb_deflist *l) { - l->size = 8; - l->defs = malloc(l->size * sizeof(void*)); - l->len = 0; -} - -static void upb_deflist_uninit(upb_deflist *l) { - for(uint32_t i = 0; i < l->len; i++) - if(l->defs[i]) upb_def_unref(l->defs[i]); - free(l->defs); -} - -static void upb_deflist_push(upb_deflist *l, upb_def *d) { - if(l->len == l->size) { - l->size *= 2; - l->defs = realloc(l->defs, l->size * sizeof(void*)); - } - l->defs[l->len++] = d; -} - /* Joins strings together, for example: * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" * join("", "Baz") -> "Baz" @@ -62,14 +35,12 @@ static upb_string *upb_join(upb_string *base, upb_string *name) { } } -// Qualify the defname for all defs starting with offset "start" with "str". -static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { - for(uint32_t i = start; i < l->len; i++) { - upb_def *def = l->defs[i]; - upb_string *name = def->fqname; - def->fqname = upb_join(str, name); - upb_string_unref(name); - } +/* Search for a character in a string, in reverse. */ +static int my_memrchr(char *data, char c, size_t len) +{ + int off = len-1; + while(off > 0 && data[off] != c) --off; + return off; } /* upb_def ********************************************************************/ @@ -256,26 +227,27 @@ static void upb_enumdef_free(upb_enumdef *e) { } // google.protobuf.EnumValueDescriptorProto. -static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) -{ - int32_t number = -1; - upb_string *name = NULL; - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->number) { - case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: - name = upb_string_tryrecycle(name); - CHECKSRC(upb_src_getstr(src, name)); - break; - case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &number)); - break; - default: - CHECKSRC(upb_src_skipval(src)); - break; - } +static void upb_enumdef_startmsg(upb_defbuilder *b) { + b->number = -1; + name = NULL; +} + +static upb_flow_t upb_enumdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { + switch(f->number) { + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: + name = upb_string_tryrecycle(name); + CHECKSRC(upb_src_getstr(src, name)); + break; + case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: + CHECKSRC(upb_src_getint32(src, &number)); + break; + default: + CHECKSRC(upb_src_skipval(src)); + break; } +} +static void upb_enumdef_endmsg(upb_defbuilder *b) { if(name == NULL || number == -1) { upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); goto err; @@ -287,48 +259,7 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) // We don't unref "name" because we pass our ref to the iton entry of the // table. strtables can ref their keys, but the inttable doesn't know that // the value is a string. - return true; - -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_string_unref(name); - return false; -} - -// google.protobuf.EnumDescriptorProto. -static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status) -{ - upb_enumdef *e = malloc(sizeof(*e)); - upb_def_init(&e->base, UPB_DEF_ENUM); - upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->number) { - case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: - e->base.fqname = upb_string_tryrecycle(e->base.fqname); - CHECKSRC(upb_src_getstr(src, e->base.fqname)); - break; - case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addenum_val(src, e, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - default: - upb_src_skipval(src); - break; - } - } - assert(e->base.fqname); - upb_deflist_push(defs, UPB_UPCAST(e)); - return true; - -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_enumdef_free(e); - return false; + return UPB_CONTINUE; } upb_enum_iter upb_enum_begin(upb_enumdef *e) { @@ -358,47 +289,17 @@ static void upb_fielddef_free(upb_fielddef *f) { free(f); } -static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) -{ +static void upb_fielddef_startmsg(upb_defbuilder *b) { upb_fielddef *f = malloc(sizeof(*f)); f->number = -1; f->name = NULL; f->def = NULL; f->owned = false; f->msgdef = m; - upb_fielddef *parsed_f; - int32_t tmp; - while((parsed_f = upb_src_getdef(src))) { - switch(parsed_f->number) { - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &tmp)); - f->type = tmp; - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &tmp)); - f->label = tmp; - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &tmp)); - f->number = tmp; - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: - f->name = upb_string_tryrecycle(f->name); - CHECKSRC(upb_src_getstr(src, f->name)); - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { - upb_string *str = upb_string_new(); - CHECKSRC(upb_src_getstr(src, str)); - if(f->def) upb_def_unref(f->def); - f->def = UPB_UPCAST(upb_unresolveddef_new(str)); - f->owned = true; - break; - } - default: - upb_src_skipval(src); - } - } - CHECKSRC(upb_src_eof(src)); + b->f = f; +} + +static void upb_fielddef_endmsg(upb_defbuilder *b) { // TODO: verify that all required fields were present. assert(f->number != -1 && f->name != NULL); assert((f->def != NULL) == upb_hasdef(f)); @@ -409,11 +310,33 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) upb_inttable_insert(&m->itof, &itof_ent.e); upb_strtable_insert(&m->ntof, &ntof_ent.e); return true; +} -src_err: - upb_copyerr(status, upb_src_status(src)); - upb_fielddef_free(f); - return false; +static upb_flow_t upb_fielddef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { + switch(parsed_f->number) { + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: + f->type = upb_value_getint32(val); + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: + f->label = upb_value_getint32(val); + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM: + f->number = upb_value_getint32(val); + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: + f->name = upb_string_tryrecycle(f->name); + CHECKSRC(upb_src_getstr(src, f->name)); + break; + case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { + upb_string *str = upb_string_new(); + CHECKSRC(upb_src_getstr(src, str)); + if(f->def) upb_def_unref(f->def); + f->def = UPB_UPCAST(upb_unresolveddef_new(str)); + f->owned = true; + break; + } + } + return UPB_CONTINUE; } @@ -433,48 +356,23 @@ static int upb_compare_fields(const void *f1, const void *f2) { } // Processes a google.protobuf.DescriptorProto, adding defs to "defs." -static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) -{ +static void upb_msgdef_startmsg(upb_defbuilder *b) { upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG); upb_atomic_refcount_init(&m->cycle_refcount, 0); upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); - int32_t start_count = defs->len; - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->number) { - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: - m->base.fqname = upb_string_tryrecycle(m->base.fqname); - CHECKSRC(upb_src_getstr(src, m->base.fqname)); - break; - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addfield(src, m, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addmsg(src, defs, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addenum(src, defs, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - default: - // TODO: extensions. - CHECKSRC(upb_src_skipval(src)); - } - } - CHECK(upb_src_eof(src)); + upb_deflist_push(&b->defs, UPB_UPCAST(m)); + upb_defbuilder_startcontainer(b, UPB_UPCAST(m)); +} + +static void upb_msgdef_endmsg(upb_defbuilder *b) { + upb_msgdef *m = upb_downcast_msgdef(upb_deflist_stacktop(&m->defs)); if(!m->base.fqname) { upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); - goto err; + return UPB_ERROR; } - // Create an ordering over the fields. upb_field_count_t n = upb_msgdef_numfields(m); upb_fielddef **sorted_fields = malloc(sizeof(upb_fielddef*) * n); @@ -512,15 +410,43 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) if (max_align > 0) m->size = upb_align_up(m->size, max_align); - upb_deflist_qualify(defs, m->base.fqname, start_count); - upb_deflist_push(defs, UPB_UPCAST(m)); - return true; + upb_defbuilder_endcontainer(b); + return UPB_CONTINUE; +} -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_msgdef_free(m); - return false; +static bool upb_msgdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { + switch(f->number) { + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: + // XXX + m->base.fqname = upb_string_tryrecycle(m->base.fqname); + m->base.fqname = upb_value_getstr(val); + upb_defbuilder_setscopename(upb_value_getstr(val)); + break; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + return BEGIN_SUBMSG; + default: + // TODO: extensions. + return UPB_SKIP; + } +} + +static upb_flow_t upb_msgdef_startsubmsg(upb_defbuilder *b, upb_fielddef *f, upb_handlers *h) { + switch(f->number) { + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: + upb_register_FieldDescriptorProto(b, h); + return UPB_DELEGATE; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: + upb_register_DescriptorProto(b, h); + return UPB_DELEGATE; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + upb_register_EnumDescriptorProto(b, h); + return UPB_DELEGATE; + break; + default: + return UPB_SKIP; + } } static void upb_msgdef_free(upb_msgdef *m) @@ -551,55 +477,171 @@ upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) { return upb_inttable_next(&m->itof, &iter->e); } -/* symtab internal ***********************************************************/ +/* upb_defbuilder ************************************************************/ -// Processes a google.protobuf.FileDescriptorProto, adding the defs to "defs". -static bool upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) -{ - upb_string *package = NULL; - int32_t start_count = defs->len; - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM: - package = upb_string_tryrecycle(package); - CHECKSRC(upb_src_getstr(src, package)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addmsg(src, defs, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addenum(src, defs, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - default: - // TODO: services and extensions. - CHECKSRC(upb_src_skipval(src)); - } +// A upb_defbuilder builds a list of defs by handling a parse of a protobuf in +// the format defined in descriptor.proto. The output of a upb_defbuilder is +// a list of upb_def* that possibly contain unresolved references. +// +// We use a separate object (upb_defbuilder) instead of having the defs handle +// the parse themselves because we need to store state that is only necessary +// during the building process itself. + +// When we are bootstrapping descriptor.proto, we must help the bare decoder out +// by telling it when to descend into a submessage, because with the wire format +// alone we cannot tell the difference between a submessage and a string. +#define BEGIN_SUBMSG 100 + +// upb_deflist: A little dynamic array for storing a growing list of upb_defs. +typedef struct { + upb_def **defs; + uint32_t len; + uint32_t size; +} upb_deflist; + +static void upb_deflist_init(upb_deflist *l) { + l->size = 8; + l->defs = malloc(l->size * sizeof(void*)); + l->len = 0; +} + +static void upb_deflist_uninit(upb_deflist *l) { + for(uint32_t i = 0; i < l->len; i++) + if(l->defs[i]) upb_def_unref(l->defs[i]); + free(l->defs); +} + +static void upb_deflist_push(upb_deflist *l, upb_def *d) { + if(l->len == l->size) { + l->size *= 2; + l->defs = realloc(l->defs, l->size * sizeof(void*)); } - CHECK(upb_src_eof(src)); - upb_deflist_qualify(defs, package, start_count); - upb_string_unref(package); - return true; + l->defs[l->len++] = d; +} -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_string_unref(package); - return false; +// Qualify the defname for all defs starting with offset "start" with "str". +static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { + for(uint32_t i = start; i < l->len; i++) { + upb_def *def = l->defs[i]; + upb_string *name = def->fqname; + def->fqname = upb_join(str, name); + upb_string_unref(name); + } } -/* Search for a character in a string, in reverse. */ -static int my_memrchr(char *data, char c, size_t len) -{ - int off = len-1; - while(off > 0 && data[off] != c) --off; - return off; +typedef struct { + upb_deflist defs; + struct { + upb_string *name; + int start; + } upb_defbuilder_frame; + upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH]; + int stack_len; +} upb_defbuilder; + +// Start/end handlers for FileDescriptorProto and DescriptorProto (the two +// entities that have names and can contain sub-definitions. +upb_defbuilder_startcontainer(upb_defbuilder *b) { + upb_defbuilder_frame *f = b->stack[b->stack_len++]; + f->start = b->defs.len; + f->name = NULL; +} + +upb_defbuilder_endcontainer(upb_defbuilder *b) { + upb_defbuilder_frame *f = b->stack[--b->stack_len]; + upb_deflist_qualify(&b->defs, f->name, f->start); + upb_string_unref(f->name); +} + +upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { +} + +// Handlers for google.protobuf.FileDescriptorProto. +static bool upb_defbuilder_FileDescriptorProto_value(upb_defbuilder *b, + upb_fielddef *f, + upb_value val) { + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM: + upb_defbuilder_setscopename(b, val.str); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + return BEGIN_SUBMSG; + default: + return UPB_SKIP; + } +} + +static bool upb_defbuilder_FileDescriptorProto_startsubmsg(upb_defbuilder *b, + upb_fielddef *f, + upb_handlers *h) { + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: + upb_defbuilder_register_DescriptorProto(b, h); + return UPB_DELEGATE; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + upb_defbuilder_register_EnumDescriptorProto(b, h); + return UPB_DELEGATE; + default: + // TODO: services and extensions. + return UPB_SKIP; + } +} + +static upb_handlers upb_defbuilder_FileDescriptorProto_handlers = { + NULL, // startmsg + NULL, // endmsg + &upb_defbuilder_FileDescriptorProto_value, + &upb_defbuilder_FileDescriptorProto_startsubmsg, } +upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, upb_handlers *h) { + upb_register_handlerset(h, &upb_defbuilder_FileDescriptorProto_handlers); + upb_set_handler_closure(h, b); +} + +// Handlers for google.protobuf.FileDescriptorSet. +upb_defbuilder_FileDescriptorSet_value(upb_defbuilder *b, upb_fielddef *f, + upb_value val) { + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: + return BEGIN_SUBMSG; + default: + return UPB_SKIP; + } +} + +upb_defbuilder_FileDescriptorSet_startsubmsg(upb_defbuilder *b, + upb_fielddef *f, upb_handlers *h) { + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: + upb_defbuilder_register_FileDescriptorProto(b, h); + return UPB_DELEGATE; + default: + return UPB_SKIP; + } +} + +static upb_handlers upb_defbuilder_FileDescriptorSet_handlers = { + NULL, // startmsg + NULL, // endmsg + &upb_defbuilder_FileDescriptorSet_value, + &upb_defbuilder_FileDescriptorSet_startsubmsg, +} + +upb_defbuilder_register_FileDescriptorSet(upb_defbuilder *b, upb_handlers *h) { + upb_register_handlerset(h, &upb_defbuilder_FileDescriptorSet_handlers); + upb_set_handler_closure(h, b); +} + + + +/* upb_symtab adding defs *****************************************************/ + +// This is a self-contained group of functions that, given a list of upb_defs +// whose references are not yet resolved, resolves references and adds them +// atomically to a upb_symtab. + typedef struct { upb_strtable_entry e; upb_def *def; @@ -751,8 +793,8 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, // indicating whether the new defs can overwrite existing defs in the symtab, // attempts to add the given defs to the symtab. The whole operation either // succeeds or fails. Ownership of "defs" and "exts" is taken. -bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef, - upb_status *status) +bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, + bool allow_redef, upb_status *status) { upb_rwlock_wrlock(&s->lock); @@ -817,7 +859,7 @@ err: } -/* upb_symtab *****************************************************************/ +/* upb_symtab public interface ************************************************/ upb_symtab *upb_symtab_new() { @@ -893,22 +935,13 @@ upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) { - upb_deflist defs; - upb_deflist_init(&defs); - upb_fielddef *f; - while((f = upb_src_getdef(src)) != NULL) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: - CHECKSRC(upb_src_startmsg(src)); - CHECK(upb_addfd(src, &defs, status)); - CHECKSRC(upb_src_endmsg(src)); - break; - default: - CHECKSRC(upb_src_skipval(src)); - } + upb_defbuilder *b = upb_defbuilder_new(); + upb_defbuilder_register_handlers(b, upb_src_gethandlers(src)); + if(!upb_src_run(src)) { + upb_copyerr(status, upb_src_status(src)); + return; } - CHECKSRC(upb_src_eof(src)); - CHECK(upb_symtab_add_defs(s, &defs, false, status)); + upb_symtab_add_defs(s, b->defs, b->defs_len, false, status); upb_deflist_uninit(&defs); return; @@ -937,17 +970,21 @@ err: // * groups. // * zig-zag-encoded types like sint32 and sint64. // -// If descriptor.proto ever changed to use any of these features, this decoder -// would need to be extended to support them. +// Since it cannot tell the difference between submessages and strings, it +// always reports them as strings first, but if the value callback returns +// UPB_TREAT_AS_SUBMSG this signals to the baredecoder that it should be +// treated like a submessage instead. +// +// TODO: for bootstrapping we should define a slightly different wire format +// that includes enough information to know the precise integer types and +// that distinguishes between strings and submessages. This will allow +// us to get rid of the UPB_TREAT_AS_SUBMSG hack. It will also allow us +// to get rid of the upb_value_setraw() scheme, which would be more +// complicated to support on big-endian machines. typedef struct { - upb_src src; upb_string *input; upb_strlen_t offset; - upb_fielddef field; - upb_wire_type_t wire_type; - upb_strlen_t delimited_len; - upb_strlen_t stack[UPB_MAX_NESTING], *top; } upb_baredecoder; static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) @@ -983,75 +1020,62 @@ static uint32_t upb_baredecoder_readf32(upb_baredecoder *d) return val; } -static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d) -{ - // Detect end-of-submessage. - if(d->offset >= *d->top) { - d->src.eof = true; - return NULL; - } - - uint32_t key; - key = upb_baredecoder_readv32(d); - d->wire_type = key & 0x7; - d->field.number = key >> 3; - if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { - // For delimited wire values we parse the length now, since we need it in - // all cases. - d->delimited_len = upb_baredecoder_readv32(d); - } - return &d->field; -} - -static bool upb_baredecoder_getstr(upb_baredecoder *d, upb_string *str) { - upb_string_substr(str, d->input, d->offset, d->delimited_len); - d->offset += d->delimited_len; - return true; -} +bool upb_baredecoder_run(upb_baredecoder *d) { + upb_string *str = NULL; + upb_strlen_t stack[UPB_MAX_NESTING]; + upb_strlen_t *top = &stack[0]; + *top = upb_string_len(d->input); + d->offset = 0; -static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val) -{ - switch(d->wire_type) { - case UPB_WIRE_TYPE_VARINT: - *val.uint64 = upb_baredecoder_readv64(d); - break; - case UPB_WIRE_TYPE_32BIT_VARINT: - *val.uint32 = upb_baredecoder_readv32(d); - break; - case UPB_WIRE_TYPE_64BIT: - *val.uint64 = upb_baredecoder_readf64(d); - break; - case UPB_WIRE_TYPE_32BIT: - *val.uint32 = upb_baredecoder_readf32(d); - break; - default: - *(char*)0 = 0; - assert(false); - } - return true; -} + upb_dispatch_startmsg(&d->dispatcher); + while(d->offset < upb_string_len(d->input)) { + // Detect end-of-submessage. + while(d->offset >= *d->top) { + upb_dispatch_endsubmsg(&d->dispatcher); + d->offset = *(d->top--); + } -static bool upb_baredecoder_skipval(upb_baredecoder *d) -{ - if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) { - d->offset += d->delimited_len; - return true; - } else { - upb_value val; - return upb_baredecoder_getval(d, upb_value_addrof(&val)); + uint32_t key = upb_baredecoder_readv64(d); + upb_fielddef f; + f.number = key >> 3; + upb_wire_type_t wt = key & 0x7; + if(wt == UPB_WIRE_TYPE_DELIMITED) { + uint32_t delim_len = upb_baredecoder_readv32(d); + // We don't know if it's a string or a submessage; deliver first as + // string. + str = upb_string_tryrecycle(str); + upb_string_substr(str, d->input, d->offset, d->delimited_len); + upb_value v; + upb_value_setstr(&v, str); + if(upb_dispatch_value(&d->dispatcher, &f, v) == UPB_TREAT_AS_SUBMSG) { + // Should deliver as a submessage instead. + upb_dispatch_startsubmsg(&d->dispatcher, &f); + *(++d->top) = d->offset + delimited_len; + } else { + d->offset += delimited_len; + } + } else { + upb_value v; + switch(wt) { + case UPB_WIRE_TYPE_VARINT: + upb_value_setraw(&v, upb_baredecoder_readv64(d)); + upb_dispatch_value(&d->dispatcher, &f, v); + break; + case UPB_WIRE_TYPE_64BIT: + upb_value_setraw(&v, upb_baredecoder_readf64(d)); + upb_dispatch_value(&d->dispatcher, &f, v); + break; + case UPB_WIRE_TYPE_32BIT: + upb_value_setraw(&v, upb_baredecoder_readf32(d)); + break; + default: + assert(false); + abort(); + } + upb_dispatch_value(&d->dispatcher, &f, v); + } } -} - -static bool upb_baredecoder_startmsg(upb_baredecoder *d) -{ - *(++d->top) = d->offset + d->delimited_len; - return true; -} - -static bool upb_baredecoder_endmsg(upb_baredecoder *d) -{ - d->offset = *(d->top--); - return true; + upb_dispatch_endmsg(&d->dispatcher); } static upb_src_vtable upb_baredecoder_src_vtbl = { @@ -1068,8 +1092,6 @@ static upb_baredecoder *upb_baredecoder_new(upb_string *str) upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); d->offset = 0; - d->top = &d->stack[0]; - *(d->top) = upb_string_len(d->input); upb_src_init(&d->src, &upb_baredecoder_src_vtbl); return d; } -- cgit v1.2.3 From 45599180905d45a882970f6ca8b6007436ac3f97 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 10 Jan 2011 09:43:28 -0800 Subject: More work on upb_src. --- Makefile | 4 +- core/upb.h | 43 +++--- core/upb_def.c | 445 ++++++++++++++++++++++++++++++++---------------------- core/upb_stream.h | 6 + 4 files changed, 302 insertions(+), 196 deletions(-) (limited to 'core/upb.h') diff --git a/Makefile b/Makefile index 5c6598c..42c7d41 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ CC=gcc CXX=g++ CFLAGS=-std=c99 INCLUDE=-Idescriptor -Icore -Itests -Istream -I. -CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) +CPPFLAGS=-Wall -Wextra -Wno-missing-field-initializers -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags)) LDLIBS=-lpthread core/libupb.a ifeq ($(shell uname), Darwin) CPPFLAGS += -I/usr/include/lua5.1 @@ -61,7 +61,7 @@ SRC=core/upb.c \ core/upb_table.c \ core/upb_string.c \ descriptor/descriptor.c \ -# core/upb_def.c \ + core/upb_def.c \ # core/upb_msg.c \ # stream/upb_decoder.c \ # stream/upb_stdio.c \ diff --git a/core/upb.h b/core/upb.h index 7bed779..2057d60 100644 --- a/core/upb.h +++ b/core/upb.h @@ -12,6 +12,7 @@ #include #include #include // only for size_t. +#include #include "descriptor_const.h" #include "upb_atomic.h" @@ -128,6 +129,11 @@ typedef struct _upb_msg upb_msg; typedef uint32_t upb_strlen_t; +// The type of a upb_value. This is like a upb_fieldtype_t, but adds the +// constant UPB_VALUETYPE_ARRAY to represent an array. +typedef uint8_t upb_valuetype_t; +#define UPB_VALUETYPE_ARRAY 32 + // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. typedef struct { @@ -153,14 +159,20 @@ typedef struct { #endif } upb_value; +#ifdef NDEBUG +#define SET_TYPE(dest, val) +#else +#define SET_TYPE(dest, val) dest = val +#endif + #define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ ctype upb_value_get ## name(upb_value val) { \ assert(val.type == UPB_TYPE(proto_type)); \ - return val.membername; \ + return val.val.membername; \ } \ void upb_value_ ## name(upb_value *val, ctype cval) { \ - val.type = UPB_TYPE(proto_type); \ - val.membername = cval; \ + SET_TYPE(val->type, UPB_TYPE(proto_type)); \ + val->val.membername = cval; \ } UPB_VALUE_ACCESSORS(double, _double, double, DOUBLE); UPB_VALUE_ACCESSORS(float, _float, float, FLOAT); @@ -169,6 +181,7 @@ UPB_VALUE_ACCESSORS(int64, int64, int64_t, INT64); UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UINT32); UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UINT64); UPB_VALUE_ACCESSORS(bool, _bool, bool, BOOL); +UPB_VALUE_ACCESSORS(str, str, upb_string*, STRING); // A pointer to a .proto value. The owner must have an out-of-band way of // knowing the type, so it knows which union member to use. @@ -187,24 +200,23 @@ typedef union { void *_void; } upb_valueptr; -// The type of a upb_value. This is like a upb_fieldtype_t, but adds the -// constant UPB_VALUETYPE_ARRAY to represent an array. -typedef uint8_t upb_valuetype_t; -#define UPB_VALUETYPE_ARRAY 32 - INLINE upb_valueptr upb_value_addrof(upb_value *val) { - upb_valueptr ptr = {&val->_double}; + upb_valueptr ptr = {&val->val._double}; return ptr; } -// Converts upb_value_ptr -> upb_value by reading from the pointer. We need to -// know the value type to perform this operation, because we need to know how -// much memory to copy. +// Reads or writes a upb_value from an address represented by a upb_value_ptr. +// We need to know the value type to perform this operation, because we need to +// know how much memory to copy (and for big-endian machines, we need to know +// where in the upb_value the data goes). +// +// For little endian-machines where we didn't mind overreading, we could make +// upb_value_read simply use memcpy(). INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { upb_value val; #define CASE(t, member_name) \ - case UPB_TYPE(t): val.member_name = *ptr.member_name; break; + case UPB_TYPE(t): val.val.member_name = *ptr.member_name; break; switch(ft) { CASE(DOUBLE, _double) @@ -232,13 +244,10 @@ INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { #undef CASE } -// Writes a upb_value to a upb_value_ptr location. We need to know the value -// type to perform this operation, because we need to know how much memory to -// copy. INLINE void upb_value_write(upb_valueptr ptr, upb_value val, upb_fieldtype_t ft) { #define CASE(t, member_name) \ - case UPB_TYPE(t): *ptr.member_name = val.member_name; break; + case UPB_TYPE(t): *ptr.member_name = val.val.member_name; break; switch(ft) { CASE(DOUBLE, _double) diff --git a/core/upb_def.c b/core/upb_def.c index cc771dc..4320fb6 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -9,9 +9,6 @@ #include "descriptor.h" #include "upb_def.h" -#define CHECKSRC(x) if(!(x)) goto src_err -#define CHECK(x) if(!(x)) goto err - /* Rounds p up to the next multiple of t. */ static size_t upb_align_up(size_t val, size_t align) { return val % align == 0 ? val : val + align - (val % align); @@ -184,6 +181,188 @@ static void upb_def_uninit(upb_def *def) { } +/* upb_defbuilder ************************************************************/ + +// A upb_defbuilder builds a list of defs by handling a parse of a protobuf in +// the format defined in descriptor.proto. The output of a upb_defbuilder is +// a list of upb_def* that possibly contain unresolved references. +// +// We use a separate object (upb_defbuilder) instead of having the defs handle +// the parse themselves because we need to store state that is only necessary +// during the building process itself. + +// When we are bootstrapping descriptor.proto, we must help the bare decoder out +// by telling it when to descend into a submessage, because with the wire format +// alone we cannot tell the difference between a submessage and a string. +// +// TODO: In the long-term, we should bootstrap from a serialization format that +// contains this information, so we can remove this special-case code. This +// would involve defining a serialization format very similar to the existing +// protobuf format, but that contains more information about the wire type. +#define BEGIN_SUBMSG 100 + +// upb_deflist: A little dynamic array for storing a growing list of upb_defs. +typedef struct { + upb_def **defs; + uint32_t len; + uint32_t size; +} upb_deflist; + +static void upb_deflist_init(upb_deflist *l) { + l->size = 8; + l->defs = malloc(l->size * sizeof(void*)); + l->len = 0; +} + +static void upb_deflist_uninit(upb_deflist *l) { + for(uint32_t i = 0; i < l->len; i++) + if(l->defs[i]) upb_def_unref(l->defs[i]); + free(l->defs); +} + +static void upb_deflist_push(upb_deflist *l, upb_def *d) { + if(l->len == l->size) { + l->size *= 2; + l->defs = realloc(l->defs, l->size * sizeof(void*)); + } + l->defs[l->len++] = d; +} + +// Qualify the defname for all defs starting with offset "start" with "str". +static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { + for(uint32_t i = start; i < l->len; i++) { + upb_def *def = l->defs[i]; + upb_string *name = def->fqname; + def->fqname = upb_join(str, name); + upb_string_unref(name); + } +} + +typedef struct { + upb_string *name; + int start; +} upb_defbuilder_frame; + +struct _upb_defbuilder { + upb_deflist defs; + upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH]; + int stack_len; + + uint32_t number; + upb_string *name; +}; +typedef struct _upb_defbuilder upb_defbuilder; + +// Forward declares for top-level file descriptors. +static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, upb_handlers *h); +static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, + upb_handlers *h); + + +// Start/end handlers for FileDescriptorProto and DescriptorProto (the two +// entities that have names and can contain sub-definitions. +void upb_defbuilder_startcontainer(upb_defbuilder *b) { + upb_defbuilder_frame *f = &b->stack[b->stack_len++]; + f->start = b->defs.len; + f->name = NULL; +} + +void upb_defbuilder_endcontainer(upb_defbuilder *b) { + upb_defbuilder_frame *f = &b->stack[--b->stack_len]; + upb_deflist_qualify(&b->defs, f->name, f->start); + upb_string_unref(f->name); +} + +void upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { + upb_defbuilder_frame *f = &b->stack[b->stack_len-1]; + upb_string_unref(f->name); + f->name = upb_string_getref(str); +} + +// Handlers for google.protobuf.FileDescriptorProto. +static upb_flow_t upb_defbuilder_FileDescriptorProto_value(void *_b, + upb_fielddef *f, + upb_value val) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM: + upb_defbuilder_setscopename(b, upb_value_getstr(val)); + break; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + return BEGIN_SUBMSG; + default: + return UPB_SKIP; + } +} + +static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( + void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: + upb_msgdef_register_DescriptorProto(b, h); + return UPB_DELEGATE; + case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: + upb_enumdef_register_EnumDescriptorProto(b, h); + return UPB_DELEGATE; + default: + // TODO: services and extensions. + return UPB_SKIP; + } +} + +static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset upb_defbuilder_FileDescriptorProto_handlers = { + NULL, // startmsg + NULL, // endmsg + &upb_defbuilder_FileDescriptorProto_value, + &upb_defbuilder_FileDescriptorProto_startsubmsg, + }; + upb_register_handlerset(h, &upb_defbuilder_FileDescriptorProto_handlers); + upb_set_handler_closure(h, b); +} + +// Handlers for google.protobuf.FileDescriptorSet. +static upb_flow_t upb_defbuilder_FileDescriptorSet_value(void *b, + upb_fielddef *f, + upb_value val) { + (void)b; + (void)val; + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: + return BEGIN_SUBMSG; + default: + return UPB_SKIP; + } +} + +static upb_flow_t upb_defbuilder_FileDescriptorSet_startsubmsg( + void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; + switch(f->number) { + case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: + upb_defbuilder_register_FileDescriptorProto(b, h); + return UPB_DELEGATE; + default: + return UPB_SKIP; + } +} + +static void upb_defbuilder_register_FileDescriptorSet( + upb_defbuilder *b, upb_handlers *h) { + static upb_handlerset upb_defbuilder_FileDescriptorSet_handlers = { + NULL, // startmsg + NULL, // endmsg + &upb_defbuilder_FileDescriptorSet_value, + &upb_defbuilder_FileDescriptorSet_startsubmsg, + }; + upb_register_handlerset(h, &upb_defbuilder_FileDescriptorSet_handlers); + upb_set_handler_closure(h, b); +} + + /* upb_unresolveddef **********************************************************/ // Unresolved defs are used as temporary placeholders for a def whose name has @@ -227,28 +406,30 @@ static void upb_enumdef_free(upb_enumdef *e) { } // google.protobuf.EnumValueDescriptorProto. -static void upb_enumdef_startmsg(upb_defbuilder *b) { +static void upb_enumdef_EnumValueDescriptorProto_startmsg(upb_defbuilder *b) { b->number = -1; - name = NULL; + b->name = NULL; } -static upb_flow_t upb_enumdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(upb_defbuilder *b, + upb_fielddef *f, + upb_value val) { switch(f->number) { case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: - name = upb_string_tryrecycle(name); + b->name = upb_string_tryrecycle(name); CHECKSRC(upb_src_getstr(src, name)); break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: - CHECKSRC(upb_src_getint32(src, &number)); + b->number = upb_value_getint32(val); break; default: - CHECKSRC(upb_src_skipval(src)); break; } + return UPB_CONTINUE; } -static void upb_enumdef_endmsg(upb_defbuilder *b) { - if(name == NULL || number == -1) { +static void upb_enumdef_EnumValueDescriptorProto_endmsg(upb_defbuilder *b) { + if(b->name == NULL || b->number == -1) { upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); goto err; } @@ -262,7 +443,66 @@ static void upb_enumdef_endmsg(upb_defbuilder *b) { return UPB_CONTINUE; } -upb_enum_iter upb_enum_begin(upb_enumdef *e) { +static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset upb_enumdef_EnumValueDescriptorProto_handlers = { + &upb_enumdef_EnumValueDescriptorProto_startmsg, + &upb_enumdef_EnumValueDescriptorProto_endmsg, + &upb_enumdef_EnumValueDescriptorProto_value, + } + upb_register_handlerset(h, &upb_enumdef_EnumValueDescriptorProto_handlers); + upb_set_handler_closure(h, b); +} + +// google.protobuf.EnumDescriptorProto. +void upb_enumdef_EnumDescriptorProto_startmsg(upb_defbuilder *b) { + upb_enumdef *e = malloc(sizeof(*e)); + upb_def_init(&e->base, UPB_DEF_ENUM); + upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); + upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); + upb_deflist_push(&b->defs, UPB_UPCAST(e)); +} + +void upb_enumdef_EnumDescriptorProto_endmsg(upb_defbuilder *b) { + assert(e->base.fqname); +} + +static upb_flow_t upb_enumdef_EnumDescriptorProto_value(upb_defbuilder *b, + upb_fielddef *f, + upb_value val) { + switch(f->number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: + upb_string_unref(e->base.fqname); + e->base.fqname = upb_value_getstr(val); + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: + return BEGIN_SUBMSG; + } + return UPB_CONTINUE; +} + +static upb_flow_t upb_enumdef_EnumDescriptorProto_startsubmsg(upb_defbuilder *b, + upb_fielddef *f, + upb_handlers *h) { + switch(f->number) { + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: + upb_enumdef_register_EnumValueDescriptorProto(b, h); + return UPB_DELEGATE; + } + return UPB_SKIP; +} + +static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset upb_enumdef_EnumDescriptorProto_handlers = { + &upb_enumdef_EnumDescriptorProto_startmsg, + &upb_enumdef_EnumDescriptorProto_endmsg, + &upb_enumdef_EnumDescriptorProto_value, + } + upb_register_handlerset(h, &upb_enumdef_EnumDescriptorProto_handlers); + upb_set_handler_closure(h, b); +} + +upb_enum_iter upb_enum_begin(upb_enumdef *e) { // We could iterate over either table here; the choice is arbitrary. return upb_inttable_begin(&e->iton); } @@ -355,7 +595,7 @@ static int upb_compare_fields(const void *f1, const void *f2) { return upb_compare_typed_fields(*(void**)f1, *(void**)f2); } -// Processes a google.protobuf.DescriptorProto, adding defs to "defs." +// google.protobuf.DescriptorProto. static void upb_msgdef_startmsg(upb_defbuilder *b) { upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG); @@ -417,9 +657,6 @@ static void upb_msgdef_endmsg(upb_defbuilder *b) { static bool upb_msgdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { switch(f->number) { case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: - // XXX - m->base.fqname = upb_string_tryrecycle(m->base.fqname); - m->base.fqname = upb_value_getstr(val); upb_defbuilder_setscopename(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: @@ -432,13 +669,14 @@ static bool upb_msgdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) } } -static upb_flow_t upb_msgdef_startsubmsg(upb_defbuilder *b, upb_fielddef *f, upb_handlers *h) { +static upb_flow_t upb_msgdef_startsubmsg(upb_defbuilder *b, upb_fielddef *f, + upb_handlers *h) { switch(f->number) { case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: upb_register_FieldDescriptorProto(b, h); return UPB_DELEGATE; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: - upb_register_DescriptorProto(b, h); + upb_msgdef_register_DescriptorProto(b, h); return UPB_DELEGATE; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: upb_register_EnumDescriptorProto(b, h); @@ -449,6 +687,18 @@ static upb_flow_t upb_msgdef_startsubmsg(upb_defbuilder *b, upb_fielddef *f, upb } } +static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset upb_msgdef_DescriptorProto_handlers = { + &upb_msgdef_startmsg, + &upb_msgdef_endmsg, + &upb_msgdef_value, + &upb_msgdef_startsubmsg, + } + upb_register_handlerset(h, &upb_msgdef_DescriptorProto_handlers); + upb_set_handler_closure(h, b); +} + static void upb_msgdef_free(upb_msgdef *m) { upb_msg_iter i; @@ -477,165 +727,6 @@ upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) { return upb_inttable_next(&m->itof, &iter->e); } -/* upb_defbuilder ************************************************************/ - -// A upb_defbuilder builds a list of defs by handling a parse of a protobuf in -// the format defined in descriptor.proto. The output of a upb_defbuilder is -// a list of upb_def* that possibly contain unresolved references. -// -// We use a separate object (upb_defbuilder) instead of having the defs handle -// the parse themselves because we need to store state that is only necessary -// during the building process itself. - -// When we are bootstrapping descriptor.proto, we must help the bare decoder out -// by telling it when to descend into a submessage, because with the wire format -// alone we cannot tell the difference between a submessage and a string. -#define BEGIN_SUBMSG 100 - -// upb_deflist: A little dynamic array for storing a growing list of upb_defs. -typedef struct { - upb_def **defs; - uint32_t len; - uint32_t size; -} upb_deflist; - -static void upb_deflist_init(upb_deflist *l) { - l->size = 8; - l->defs = malloc(l->size * sizeof(void*)); - l->len = 0; -} - -static void upb_deflist_uninit(upb_deflist *l) { - for(uint32_t i = 0; i < l->len; i++) - if(l->defs[i]) upb_def_unref(l->defs[i]); - free(l->defs); -} - -static void upb_deflist_push(upb_deflist *l, upb_def *d) { - if(l->len == l->size) { - l->size *= 2; - l->defs = realloc(l->defs, l->size * sizeof(void*)); - } - l->defs[l->len++] = d; -} - -// Qualify the defname for all defs starting with offset "start" with "str". -static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { - for(uint32_t i = start; i < l->len; i++) { - upb_def *def = l->defs[i]; - upb_string *name = def->fqname; - def->fqname = upb_join(str, name); - upb_string_unref(name); - } -} - -typedef struct { - upb_deflist defs; - struct { - upb_string *name; - int start; - } upb_defbuilder_frame; - upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH]; - int stack_len; -} upb_defbuilder; - -// Start/end handlers for FileDescriptorProto and DescriptorProto (the two -// entities that have names and can contain sub-definitions. -upb_defbuilder_startcontainer(upb_defbuilder *b) { - upb_defbuilder_frame *f = b->stack[b->stack_len++]; - f->start = b->defs.len; - f->name = NULL; -} - -upb_defbuilder_endcontainer(upb_defbuilder *b) { - upb_defbuilder_frame *f = b->stack[--b->stack_len]; - upb_deflist_qualify(&b->defs, f->name, f->start); - upb_string_unref(f->name); -} - -upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) { -} - -// Handlers for google.protobuf.FileDescriptorProto. -static bool upb_defbuilder_FileDescriptorProto_value(upb_defbuilder *b, - upb_fielddef *f, - upb_value val) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM: - upb_defbuilder_setscopename(b, val.str); - break; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - return BEGIN_SUBMSG; - default: - return UPB_SKIP; - } -} - -static bool upb_defbuilder_FileDescriptorProto_startsubmsg(upb_defbuilder *b, - upb_fielddef *f, - upb_handlers *h) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: - upb_defbuilder_register_DescriptorProto(b, h); - return UPB_DELEGATE; - case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - upb_defbuilder_register_EnumDescriptorProto(b, h); - return UPB_DELEGATE; - default: - // TODO: services and extensions. - return UPB_SKIP; - } -} - -static upb_handlers upb_defbuilder_FileDescriptorProto_handlers = { - NULL, // startmsg - NULL, // endmsg - &upb_defbuilder_FileDescriptorProto_value, - &upb_defbuilder_FileDescriptorProto_startsubmsg, -} - -upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - upb_register_handlerset(h, &upb_defbuilder_FileDescriptorProto_handlers); - upb_set_handler_closure(h, b); -} - -// Handlers for google.protobuf.FileDescriptorSet. -upb_defbuilder_FileDescriptorSet_value(upb_defbuilder *b, upb_fielddef *f, - upb_value val) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: - return BEGIN_SUBMSG; - default: - return UPB_SKIP; - } -} - -upb_defbuilder_FileDescriptorSet_startsubmsg(upb_defbuilder *b, - upb_fielddef *f, upb_handlers *h) { - switch(f->number) { - case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: - upb_defbuilder_register_FileDescriptorProto(b, h); - return UPB_DELEGATE; - default: - return UPB_SKIP; - } -} - -static upb_handlers upb_defbuilder_FileDescriptorSet_handlers = { - NULL, // startmsg - NULL, // endmsg - &upb_defbuilder_FileDescriptorSet_value, - &upb_defbuilder_FileDescriptorSet_startsubmsg, -} - -upb_defbuilder_register_FileDescriptorSet(upb_defbuilder *b, upb_handlers *h) { - upb_register_handlerset(h, &upb_defbuilder_FileDescriptorSet_handlers); - upb_set_handler_closure(h, b); -} - - - /* upb_symtab adding defs *****************************************************/ // This is a self-contained group of functions that, given a list of upb_defs diff --git a/core/upb_stream.h b/core/upb_stream.h index 1eb111e..c96c544 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -135,6 +135,12 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, upb_field_number_t fieldnum, upb_value val); +/* upb_src ********************************************************************/ + +struct _upb_src; +typedef struct _upb_src upb_src; + + /* upb_bytesrc ****************************************************************/ struct _upb_bytesrc; -- cgit v1.2.3 From bcc688a303439c758a47da9f0eb1c064ece6ce09 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 10 Jan 2011 20:37:04 -0800 Subject: upb_def compiles again! --- core/upb.c | 2 +- core/upb.h | 37 ++++--- core/upb_def.c | 283 +++++++++++++++++++++++++++++++------------------ core/upb_msg.c | 13 ++- core/upb_stream.h | 62 +++++++---- core/upb_stream_vtbl.h | 88 ++++++++++----- core/upb_string.c | 2 +- core/upb_string.h | 13 +-- 8 files changed, 325 insertions(+), 175 deletions(-) (limited to 'core/upb.h') diff --git a/core/upb.c b/core/upb.c index c396323..2f715d0 100644 --- a/core/upb.c +++ b/core/upb.c @@ -45,7 +45,7 @@ void upb_seterr(upb_status *status, enum upb_status_code code, { if(upb_ok(status)) { // The first error is the most interesting. status->code = code; - status->str = upb_string_tryrecycle(status->str); + upb_string_recycle(&status->str); va_list args; va_start(args, msg); upb_string_vprintf(status->str, msg, args); diff --git a/core/upb.h b/core/upb.h index 2057d60..64bc88c 100644 --- a/core/upb.h +++ b/core/upb.h @@ -126,14 +126,20 @@ struct _upb_array; typedef struct _upb_array upb_array; struct _upb_msg; typedef struct _upb_msg upb_msg; +struct _upb_bytesrc; +typedef struct _upb_bytesrc upb_bytesrc; -typedef uint32_t upb_strlen_t; +typedef int32_t upb_strlen_t; +#define UPB_STRLEN_MAX INT32_MAX // The type of a upb_value. This is like a upb_fieldtype_t, but adds the // constant UPB_VALUETYPE_ARRAY to represent an array. typedef uint8_t upb_valuetype_t; #define UPB_VALUETYPE_ARRAY 32 +#define UPB_VALUETYPE_BYTESRC 32 +#define UPB_VALUETYPE_RAW 33 + // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. typedef struct { @@ -146,6 +152,7 @@ typedef struct { uint64_t uint64; bool _bool; upb_string *str; + upb_bytesrc *bytesrc; upb_msg *msg; upb_array *arr; upb_atomic_refcount_t *refcount; @@ -167,21 +174,27 @@ typedef struct { #define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ ctype upb_value_get ## name(upb_value val) { \ - assert(val.type == UPB_TYPE(proto_type)); \ + assert(val.type == proto_type || val.type == UPB_VALUETYPE_RAW); \ return val.val.membername; \ } \ - void upb_value_ ## name(upb_value *val, ctype cval) { \ - SET_TYPE(val->type, UPB_TYPE(proto_type)); \ + void upb_value_set ## name(upb_value *val, ctype cval) { \ + SET_TYPE(val->type, proto_type); \ val->val.membername = cval; \ } -UPB_VALUE_ACCESSORS(double, _double, double, DOUBLE); -UPB_VALUE_ACCESSORS(float, _float, float, FLOAT); -UPB_VALUE_ACCESSORS(int32, int32, int32_t, INT32); -UPB_VALUE_ACCESSORS(int64, int64, int64_t, INT64); -UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UINT32); -UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UINT64); -UPB_VALUE_ACCESSORS(bool, _bool, bool, BOOL); -UPB_VALUE_ACCESSORS(str, str, upb_string*, STRING); +UPB_VALUE_ACCESSORS(double, _double, double, UPB_TYPE(DOUBLE)); +UPB_VALUE_ACCESSORS(float, _float, float, UPB_TYPE(FLOAT)); +UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_TYPE(INT32)); +UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64)); +UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32)); +UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64)); +UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); +UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); +UPB_VALUE_ACCESSORS(bytesrc, bytesrc, upb_bytesrc*, UPB_VALUETYPE_BYTESRC); + +void upb_value_setraw(upb_value *val, uint64_t cval) { + SET_TYPE(val->type, UPB_VALUETYPE_RAW); + val->val.uint64 = cval; +} // A pointer to a .proto value. The owner must have an out-of-band way of // knowing the type, so it knows which union member to use. diff --git a/core/upb_def.c b/core/upb_def.c index 4320fb6..4f12dbe 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -228,6 +228,10 @@ static void upb_deflist_push(upb_deflist *l, upb_def *d) { l->defs[l->len++] = d; } +static upb_def *upb_deflist_last(upb_deflist *l) { + return l->defs[l->len-1]; +} + // Qualify the defname for all defs starting with offset "start" with "str". static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { for(uint32_t i = start; i < l->len; i++) { @@ -238,8 +242,14 @@ static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) } } +// We keep a stack of all the messages scopes we are currently in, as well as +// the top-level file scope. This is necessary to correctly qualify the +// definitions that are contained inside. "name" tracks the name of the +// message or package (a bare name -- not qualified by any enclosing scopes). typedef struct { upb_string *name; + // Index of the first def that is under this scope. For msgdefs, the + // msgdef itself is at start-1. int start; } upb_defbuilder_frame; @@ -250,6 +260,10 @@ struct _upb_defbuilder { uint32_t number; upb_string *name; + bool saw_number; + bool saw_name; + + upb_fielddef *f; }; typedef struct _upb_defbuilder upb_defbuilder; @@ -259,6 +273,28 @@ static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, upb_handlers *h); +static void upb_defbuilder_init(upb_defbuilder *b) { + upb_deflist_init(&b->defs); + b->stack_len = 0; + b->name = NULL; +} + +static void upb_defbuilder_uninit(upb_defbuilder *b) { + upb_string_unref(b->name); + upb_deflist_uninit(&b->defs); +} + +static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) { + if (b->stack_len <= 1) return NULL; + int index = b->stack[b->stack_len-1].start - 1; + assert(index >= 0); + return upb_downcast_msgdef(b->defs.defs[index]); +} + +static upb_def *upb_defbuilder_last(upb_defbuilder *b) { + return upb_deflist_last(&b->defs); +} + // Start/end handlers for FileDescriptorProto and DescriptorProto (the two // entities that have names and can contain sub-definitions. void upb_defbuilder_startcontainer(upb_defbuilder *b) { @@ -291,9 +327,8 @@ static upb_flow_t upb_defbuilder_FileDescriptorProto_value(void *_b, case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: return BEGIN_SUBMSG; - default: - return UPB_SKIP; } + return UPB_CONTINUE; } static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( @@ -308,19 +343,19 @@ static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg( return UPB_DELEGATE; default: // TODO: services and extensions. - return UPB_SKIP; + return UPB_SKIPSUBMSG; } } static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_defbuilder_FileDescriptorProto_handlers = { + static upb_handlerset handlers = { NULL, // startmsg NULL, // endmsg &upb_defbuilder_FileDescriptorProto_value, &upb_defbuilder_FileDescriptorProto_startsubmsg, }; - upb_register_handlerset(h, &upb_defbuilder_FileDescriptorProto_handlers); + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -333,9 +368,8 @@ static upb_flow_t upb_defbuilder_FileDescriptorSet_value(void *b, switch(f->number) { case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: return BEGIN_SUBMSG; - default: - return UPB_SKIP; } + return UPB_CONTINUE; } static upb_flow_t upb_defbuilder_FileDescriptorSet_startsubmsg( @@ -345,20 +379,19 @@ static upb_flow_t upb_defbuilder_FileDescriptorSet_startsubmsg( case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM: upb_defbuilder_register_FileDescriptorProto(b, h); return UPB_DELEGATE; - default: - return UPB_SKIP; } + return UPB_SKIPSUBMSG; } static void upb_defbuilder_register_FileDescriptorSet( upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_defbuilder_FileDescriptorSet_handlers = { + static upb_handlerset handlers = { NULL, // startmsg NULL, // endmsg &upb_defbuilder_FileDescriptorSet_value, &upb_defbuilder_FileDescriptorSet_startsubmsg, }; - upb_register_handlerset(h, &upb_defbuilder_FileDescriptorSet_handlers); + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -406,18 +439,20 @@ static void upb_enumdef_free(upb_enumdef *e) { } // google.protobuf.EnumValueDescriptorProto. -static void upb_enumdef_EnumValueDescriptorProto_startmsg(upb_defbuilder *b) { - b->number = -1; - b->name = NULL; +static void upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; + b->saw_number = false; + b->saw_name = false; } -static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(upb_defbuilder *b, +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; switch(f->number) { case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: - b->name = upb_string_tryrecycle(name); - CHECKSRC(upb_src_getstr(src, name)); + upb_string_unref(b->name); + upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM: b->number = upb_value_getint32(val); @@ -428,34 +463,37 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(upb_defbuilder *b, return UPB_CONTINUE; } -static void upb_enumdef_EnumValueDescriptorProto_endmsg(upb_defbuilder *b) { - if(b->name == NULL || b->number == -1) { - upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); - goto err; +static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + if(!b->saw_number || !b->saw_name) { + //upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); + //goto err; + return; } - upb_ntoi_ent ntoi_ent = {{name, 0}, number}; - upb_iton_ent iton_ent = {{number, 0}, name}; + upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number}; + upb_iton_ent iton_ent = {{b->number, 0}, b->name}; + upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); upb_strtable_insert(&e->ntoi, &ntoi_ent.e); upb_inttable_insert(&e->iton, &iton_ent.e); // We don't unref "name" because we pass our ref to the iton entry of the // table. strtables can ref their keys, but the inttable doesn't know that // the value is a string. - return UPB_CONTINUE; } static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_enumdef_EnumValueDescriptorProto_handlers = { + static upb_handlerset handlers = { &upb_enumdef_EnumValueDescriptorProto_startmsg, &upb_enumdef_EnumValueDescriptorProto_endmsg, &upb_enumdef_EnumValueDescriptorProto_value, - } - upb_register_handlerset(h, &upb_enumdef_EnumValueDescriptorProto_handlers); + }; + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } // google.protobuf.EnumDescriptorProto. -void upb_enumdef_EnumDescriptorProto_startmsg(upb_defbuilder *b) { +void upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { + upb_defbuilder *b = _b; upb_enumdef *e = malloc(sizeof(*e)); upb_def_init(&e->base, UPB_DEF_ENUM); upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); @@ -463,42 +501,51 @@ void upb_enumdef_EnumDescriptorProto_startmsg(upb_defbuilder *b) { upb_deflist_push(&b->defs, UPB_UPCAST(e)); } -void upb_enumdef_EnumDescriptorProto_endmsg(upb_defbuilder *b) { - assert(e->base.fqname); +void upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { + upb_defbuilder *b = _b; + assert(upb_defbuilder_last(b)->fqname != NULL); } -static upb_flow_t upb_enumdef_EnumDescriptorProto_value(upb_defbuilder *b, +static upb_flow_t upb_enumdef_EnumDescriptorProto_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; switch(f->number) { - case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: + case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: { + upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b)); upb_string_unref(e->base.fqname); - e->base.fqname = upb_value_getstr(val); + e->base.fqname = upb_string_getref(upb_value_getstr(val)); + return UPB_CONTINUE; + } case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: return BEGIN_SUBMSG; + default: + return UPB_CONTINUE; } - return UPB_CONTINUE; } -static upb_flow_t upb_enumdef_EnumDescriptorProto_startsubmsg(upb_defbuilder *b, +static upb_flow_t upb_enumdef_EnumDescriptorProto_startsubmsg(void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; switch(f->number) { case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM: upb_enumdef_register_EnumValueDescriptorProto(b, h); return UPB_DELEGATE; + default: + return UPB_SKIPSUBMSG; } - return UPB_SKIP; } static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_enumdef_EnumDescriptorProto_handlers = { + static upb_handlerset handlers = { &upb_enumdef_EnumDescriptorProto_startmsg, &upb_enumdef_EnumDescriptorProto_endmsg, &upb_enumdef_EnumDescriptorProto_value, - } - upb_register_handlerset(h, &upb_enumdef_EnumDescriptorProto_handlers); + &upb_enumdef_EnumDescriptorProto_startsubmsg, + }; + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -529,56 +576,71 @@ static void upb_fielddef_free(upb_fielddef *f) { free(f); } -static void upb_fielddef_startmsg(upb_defbuilder *b) { +static void upb_fielddef_startmsg(void *_b) { + upb_defbuilder *b = _b; upb_fielddef *f = malloc(sizeof(*f)); f->number = -1; f->name = NULL; f->def = NULL; f->owned = false; - f->msgdef = m; + f->msgdef = upb_defbuilder_top(b); b->f = f; } -static void upb_fielddef_endmsg(upb_defbuilder *b) { +static void upb_fielddef_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_fielddef *f = b->f; // TODO: verify that all required fields were present. assert(f->number != -1 && f->name != NULL); assert((f->def != NULL) == upb_hasdef(f)); // Field was successfully read, add it as a field of the msgdef. + upb_msgdef *m = upb_defbuilder_top(b); upb_itof_ent itof_ent = {{f->number, 0}, f}; upb_ntof_ent ntof_ent = {{f->name, 0}, f}; upb_inttable_insert(&m->itof, &itof_ent.e); upb_strtable_insert(&m->ntof, &ntof_ent.e); - return true; } -static upb_flow_t upb_fielddef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { - switch(parsed_f->number) { +static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; + switch(f->number) { case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM: - f->type = upb_value_getint32(val); + b->f->type = upb_value_getint32(val); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM: - f->label = upb_value_getint32(val); + b->f->label = upb_value_getint32(val); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM: - f->number = upb_value_getint32(val); + b->f->number = upb_value_getint32(val); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: - f->name = upb_string_tryrecycle(f->name); - CHECKSRC(upb_src_getstr(src, f->name)); + upb_string_unref(b->f->name); + b->f->name = upb_string_getref(upb_value_getstr(val)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { upb_string *str = upb_string_new(); - CHECKSRC(upb_src_getstr(src, str)); - if(f->def) upb_def_unref(f->def); - f->def = UPB_UPCAST(upb_unresolveddef_new(str)); - f->owned = true; + if (!upb_value_getfullstr(val, str, NULL)) return UPB_ERROR; + if(b->f->def) upb_def_unref(b->f->def); + b->f->def = UPB_UPCAST(upb_unresolveddef_new(str)); + b->f->owned = true; break; } } return UPB_CONTINUE; } +static void upb_fielddef_register_FieldDescriptorProto(upb_defbuilder *b, + upb_handlers *h) { + static upb_handlerset handlers = { + &upb_fielddef_startmsg, + &upb_fielddef_endmsg, + &upb_fielddef_value, + }; + upb_register_handlerset(h, &handlers); + upb_set_handler_closure(h, b); +} + /* upb_msgdef *****************************************************************/ @@ -596,21 +658,24 @@ static int upb_compare_fields(const void *f1, const void *f2) { } // google.protobuf.DescriptorProto. -static void upb_msgdef_startmsg(upb_defbuilder *b) { +static void upb_msgdef_startmsg(void *_b) { + upb_defbuilder *b = _b; upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG); upb_atomic_refcount_init(&m->cycle_refcount, 0); upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); upb_deflist_push(&b->defs, UPB_UPCAST(m)); - upb_defbuilder_startcontainer(b, UPB_UPCAST(m)); + upb_defbuilder_startcontainer(b); } -static void upb_msgdef_endmsg(upb_defbuilder *b) { - upb_msgdef *m = upb_downcast_msgdef(upb_deflist_stacktop(&m->defs)); +static void upb_msgdef_endmsg(void *_b) { + upb_defbuilder *b = _b; + upb_msgdef *m = upb_defbuilder_top(b); if(!m->base.fqname) { - upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); - return UPB_ERROR; + //upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); + //return UPB_ERROR; + return; } // Create an ordering over the fields. @@ -651,51 +716,57 @@ static void upb_msgdef_endmsg(upb_defbuilder *b) { if (max_align > 0) m->size = upb_align_up(m->size, max_align); upb_defbuilder_endcontainer(b); - return UPB_CONTINUE; + //return UPB_CONTINUE; } -static bool upb_msgdef_value(upb_defbuilder *b, upb_fielddef *f, upb_value val) { +static upb_flow_t upb_msgdef_value(void *_b, upb_fielddef *f, upb_value val) { + upb_defbuilder *b = _b; switch(f->number) { - case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: - upb_defbuilder_setscopename(upb_value_getstr(val)); - break; + case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: { + upb_msgdef *m = upb_defbuilder_top(b); + upb_string_unref(m->base.fqname); + m->base.fqname = upb_string_getref(upb_value_getstr(val)); + upb_defbuilder_setscopename(b, upb_value_getstr(val)); + return UPB_CONTINUE; + } case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: return BEGIN_SUBMSG; default: // TODO: extensions. - return UPB_SKIP; + return UPB_CONTINUE; } } -static upb_flow_t upb_msgdef_startsubmsg(upb_defbuilder *b, upb_fielddef *f, +static upb_flow_t upb_msgdef_startsubmsg(void *_b, upb_fielddef *f, upb_handlers *h) { + upb_defbuilder *b = _b; switch(f->number) { case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: - upb_register_FieldDescriptorProto(b, h); + upb_fielddef_register_FieldDescriptorProto(b, h); return UPB_DELEGATE; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM: upb_msgdef_register_DescriptorProto(b, h); return UPB_DELEGATE; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM: - upb_register_EnumDescriptorProto(b, h); + upb_enumdef_register_EnumDescriptorProto(b, h); return UPB_DELEGATE; break; default: - return UPB_SKIP; + return UPB_SKIPSUBMSG; } } static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, upb_handlers *h) { - static upb_handlerset upb_msgdef_DescriptorProto_handlers = { + static upb_handlerset handlers = { &upb_msgdef_startmsg, &upb_msgdef_endmsg, &upb_msgdef_value, &upb_msgdef_startsubmsg, - } - upb_register_handlerset(h, &upb_msgdef_DescriptorProto_handlers); + }; + upb_register_handlerset(h, &handlers); upb_set_handler_closure(h, b); } @@ -884,7 +955,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, // indicating whether the new defs can overwrite existing defs in the symtab, // attempts to add the given defs to the symtab. The whole operation either // succeeds or fails. Ownership of "defs" and "exts" is taken. -bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, +bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs, bool allow_redef, upb_status *status) { upb_rwlock_wrlock(&s->lock); @@ -892,9 +963,9 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, // Build a table of the defs we mean to add, for duplicate detection and name // resolution. upb_strtable tmptab; - upb_strtable_init(&tmptab, defs->len, sizeof(upb_symtab_ent)); - for (uint32_t i = 0; i < defs->len; i++) { - upb_def *def = defs->defs[i]; + upb_strtable_init(&tmptab, num_defs, sizeof(upb_symtab_ent)); + for (int i = 0; i < num_defs; i++) { + upb_def *def = defs[i]; upb_symtab_ent e = {{def->fqname, 0}, def}; // Redefinition is never allowed within a single FileDescriptorSet. @@ -909,13 +980,13 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_defs **defs, int num_defs, // Pass ownership from the deflist to the strtable. upb_strtable_insert(&tmptab, &e.e); - defs->defs[i] = NULL; + defs[i] = NULL; } // TODO: process the list of extensions by modifying entries from // tmptab in-place (copying them from the symtab first if necessary). - CHECK(upb_resolverefs(&tmptab, &s->symtab, status)); + if (!upb_resolverefs(&tmptab, &s->symtab, status)) goto err; // The defs in tmptab have been vetted, and can be added to the symtab // without causing errors. Now add all tmptab defs to the symtab, @@ -946,6 +1017,7 @@ err: upb_def_unref(e->def); } upb_strtable_free(&tmptab); + for (int i = 0; i < num_defs; i++) upb_def_unref(defs[i]); return false; } @@ -1026,20 +1098,18 @@ upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) { - upb_defbuilder *b = upb_defbuilder_new(); - upb_defbuilder_register_handlers(b, upb_src_gethandlers(src)); + upb_defbuilder b; + upb_defbuilder_init(&b); + //upb_defbuilder_register_FileDescriptorSet(&b, upb_src_gethandlers(src)); + upb_defbuilder_register_FileDescriptorSet(&b, NULL); if(!upb_src_run(src)) { upb_copyerr(status, upb_src_status(src)); + upb_defbuilder_uninit(&b); return; } - upb_symtab_add_defs(s, b->defs, b->defs_len, false, status); - upb_deflist_uninit(&defs); + upb_symtab_add_defs(s, b.defs.defs, b.defs.len, false, status); + upb_defbuilder_uninit(&b); return; - -src_err: - upb_copyerr(status, upb_src_status(src)); -err: - upb_deflist_uninit(&defs); } @@ -1074,8 +1144,10 @@ err: // complicated to support on big-endian machines. typedef struct { + upb_src src; upb_string *input; upb_strlen_t offset; + upb_dispatcher dispatcher; } upb_baredecoder; static uint64_t upb_baredecoder_readv64(upb_baredecoder *d) @@ -1121,9 +1193,9 @@ bool upb_baredecoder_run(upb_baredecoder *d) { upb_dispatch_startmsg(&d->dispatcher); while(d->offset < upb_string_len(d->input)) { // Detect end-of-submessage. - while(d->offset >= *d->top) { + while(d->offset >= *top) { upb_dispatch_endsubmsg(&d->dispatcher); - d->offset = *(d->top--); + d->offset = *(top--); } uint32_t key = upb_baredecoder_readv64(d); @@ -1134,16 +1206,16 @@ bool upb_baredecoder_run(upb_baredecoder *d) { uint32_t delim_len = upb_baredecoder_readv32(d); // We don't know if it's a string or a submessage; deliver first as // string. - str = upb_string_tryrecycle(str); - upb_string_substr(str, d->input, d->offset, d->delimited_len); + upb_string_recycle(&str); + upb_string_substr(str, d->input, d->offset, delim_len); upb_value v; upb_value_setstr(&v, str); - if(upb_dispatch_value(&d->dispatcher, &f, v) == UPB_TREAT_AS_SUBMSG) { + if(upb_dispatch_value(&d->dispatcher, &f, v) == BEGIN_SUBMSG) { // Should deliver as a submessage instead. upb_dispatch_startsubmsg(&d->dispatcher, &f); - *(++d->top) = d->offset + delimited_len; + *(++top) = d->offset + delim_len; } else { - d->offset += delimited_len; + d->offset += delim_len; } } else { upb_value v; @@ -1167,23 +1239,24 @@ bool upb_baredecoder_run(upb_baredecoder *d) { } } upb_dispatch_endmsg(&d->dispatcher); + return true; } -static upb_src_vtable upb_baredecoder_src_vtbl = { - (upb_src_getdef_fptr)&upb_baredecoder_getdef, - (upb_src_getval_fptr)&upb_baredecoder_getval, - (upb_src_getstr_fptr)&upb_baredecoder_getstr, - (upb_src_skipval_fptr)&upb_baredecoder_skipval, - (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, - (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, -}; - static upb_baredecoder *upb_baredecoder_new(upb_string *str) { + //static upb_src_vtable vtbl = { + // (upb_src_getdef_fptr)&upb_baredecoder_getdef, + // (upb_src_getval_fptr)&upb_baredecoder_getval, + // (upb_src_getstr_fptr)&upb_baredecoder_getstr, + // (upb_src_skipval_fptr)&upb_baredecoder_skipval, + // (upb_src_startmsg_fptr)&upb_baredecoder_startmsg, + // (upb_src_endmsg_fptr)&upb_baredecoder_endmsg, + //}; upb_baredecoder *d = malloc(sizeof(*d)); d->input = upb_string_getref(str); d->offset = 0; - upb_src_init(&d->src, &upb_baredecoder_src_vtbl); + upb_dispatcher_init(&d->dispatcher); + //upb_src_init(&d->src, &vtbl); return d; } diff --git a/core/upb_msg.c b/core/upb_msg.c index 75f7a35..a0a5196 100644 --- a/core/upb_msg.c +++ b/core/upb_msg.c @@ -7,6 +7,8 @@ */ #include "upb_msg.h" +#include "upb_decoder.h" +#include "upb_strstream.h" void _upb_elem_free(upb_value v, upb_fielddef *f) { switch(f->type) { @@ -108,10 +110,13 @@ upb_value upb_field_tryrecycle(upb_valueptr p, upb_value val, upb_fielddef *f, void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, upb_status *status) { - (void)msg; - (void)md; - (void)str; - (void)status; + upb_stringsrc *ssrc = upb_stringsrc_new(); + upb_stringsrc_reset(ssrc, str); + upb_decoder *d = upb_decoder_new(md); + upb_decoder_reset(d, upb_stringsrc_bytesrc(ssrc)); + + upb_decoder_free(d); + upb_stringsrc_free(ssrc); } void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, diff --git a/core/upb_stream.h b/core/upb_stream.h index c96c544..9ae69de 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -39,13 +39,16 @@ typedef enum { // Caller should continue sending values to the sink. UPB_CONTINUE, - // Skips to the end of the current submessage (or if we are at the top - // level, skips to the end of the entire message). - UPB_SKIP, + // An error occurred; check status for details. + UPB_ERROR, - // Caller should stop sending values; check sink status for details. + // Processing should stop for now, but could be resumed later. // If processing resumes later, it should resume with the next value. - UPB_STOP, + UPB_SUSPEND, + + // Skips to the end of the current submessage (or if we are at the top + // level, skips to the end of the entire message). + UPB_SKIPSUBMSG, // When returned from a startsubmsg handler, indicates that the submessage // should be handled by a different set of handlers, which have been @@ -117,6 +120,9 @@ INLINE void upb_handlers_uninit(upb_handlers *h); INLINE void upb_handlers_reset(upb_handlers *h); INLINE bool upb_handlers_isempty(upb_handlers *h); INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); +// TODO: for clients that want to increase efficiency by preventing bytesrcs +// from automatically being converted to strings in the value callback. +// INLINE void upb_handlers_use_bytesrcs(bool use_bytesrcs); INLINE void upb_set_handler_closure(upb_handlers *h, void *closure); // An object that transparently handles delegation so that the caller needs @@ -140,21 +146,30 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, struct _upb_src; typedef struct _upb_src upb_src; +bool upb_src_run(upb_src *src); +upb_status *upb_src_status(upb_src *src); -/* upb_bytesrc ****************************************************************/ - -struct _upb_bytesrc; -typedef struct _upb_bytesrc upb_bytesrc; -// Returns the next string in the stream. false is returned on error or eof. -// The string must be at least "minlen" bytes long unless the stream is eof. -INLINE bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); +/* upb_bytesrc ****************************************************************/ -// Appends the next "len" bytes in the stream in-place to "str". This should -// be used when the caller needs to build a contiguous string of the existing -// data in "str" with more data. The call fails if fewer than len bytes are -// available in the stream. -INLINE bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); +// Reads up to "count" bytes into "buf", returning the total number of bytes +// read. If <0, indicates error (check upb_bytesrc_status for details). +INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, + upb_strlen_t count); + +// Like upb_bytesrc_read(), but modifies "str" in-place, possibly aliasing +// existing string data (which avoids a copy). +INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, + upb_strlen_t count); + +// A convenience function for getting all the remaining data in a upb_bytesrc +// as a upb_string. Returns false and sets "status" if the operation fails. +INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, + upb_status *status); +INLINE bool upb_value_getfullstr(upb_value val, upb_string *str, + upb_status *status) { + return upb_bytesrc_getfullstr(upb_value_getbytesrc(val), str, status); +} // Returns the current error status for the stream. // Note! The "eof" flag works like feof() in C; it cannot report end-of-file @@ -164,14 +179,21 @@ INLINE bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t l INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src); INLINE bool upb_bytesrc_eof(upb_bytesrc *src); + /* upb_bytesink ***************************************************************/ struct _upb_bytesink; typedef struct _upb_bytesink upb_bytesink; -// Puts the given string. Returns the number of bytes that were actually, -// consumed, which may be fewer than were in the string, or <0 on error. -INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); +// Writes up to "count" bytes from "buf", returning the total number of bytes +// written. If <0, indicates error (check upb_bytesink_status() for details). +INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, + upb_strlen_t count); + +// Puts the given string, which may alias the string data (which avoids a +// copy). Returns the number of bytes that were actually, consumed, which may +// be fewer than were in the string, or <0 on error. +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str); // Returns the current error status for the stream. INLINE upb_status *upb_bytesink_status(upb_bytesink *sink); diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index 91464a7..c0cf04f 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -20,23 +20,33 @@ extern "C" { // Typedefs for function pointers to all of the virtual functions. +// upb_src +struct _upb_src { +}; +typedef struct { +} upb_src_vtbl; + // upb_bytesrc. -typedef bool (*upb_bytesrc_get_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); -typedef bool (*upb_bytesrc_append_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t len); +typedef upb_strlen_t (*upb_bytesrc_read_fptr)( + upb_bytesrc *src, void *buf, upb_strlen_t count); +typedef bool (*upb_bytesrc_getstr_fptr)( + upb_bytesrc *src, upb_string *str, upb_strlen_t count); // upb_bytesink. -typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); +typedef upb_strlen_t (*upb_bytesink_write_fptr)( + upb_bytesink *bytesink, void *buf, upb_strlen_t count); +typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( + upb_bytesink *bytesink, upb_string *str); // Vtables for the above interfaces. typedef struct { - upb_bytesrc_get_fptr get; - upb_bytesrc_append_fptr append; + upb_bytesrc_read_fptr read; + upb_bytesrc_getstr_fptr getstr; } upb_bytesrc_vtable; typedef struct { - upb_bytesink_put_fptr put; + upb_bytesink_write_fptr write; + upb_bytesink_putstr_fptr putstr; } upb_bytesink_vtable; // "Base Class" definitions; components that implement these interfaces should @@ -69,19 +79,56 @@ INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtable *vtbl) { // Implementation of virtual function dispatch. // upb_bytesrc -INLINE bool upb_bytesrc_get( - upb_bytesrc *bytesrc, upb_string *str, upb_strlen_t minlen) { - return bytesrc->vtbl->get(bytesrc, str, minlen); -} +INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, + upb_strlen_t count) { + return src->vtbl->read(src, buf, count); +} + +INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, + upb_strlen_t count) { + return src->vtbl->getstr(src, str, count); +} + +INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, + upb_status *status) { + // We start with a getstr, because that could possibly alias data instead of + // copying. + if (!upb_bytesrc_getstr(src, str, UPB_STRLEN_MAX)) goto error; + // Trade-off between number of read calls and amount of overallocation. + const size_t bufsize = 4096; + while (!upb_bytesrc_eof(src)) { + upb_strlen_t len = upb_string_len(str); + char *buf = upb_string_getrwbuf(str, len + bufsize); + upb_strlen_t read = upb_bytesrc_read(src, buf + len, bufsize); + if (read < 0) goto error; + // Resize to proper size. + upb_string_getrwbuf(str, len + read); + } + return true; -INLINE bool upb_bytesrc_append( - upb_bytesrc *bytesrc, upb_string *str, upb_strlen_t len) { - return bytesrc->vtbl->append(bytesrc, str, len); +error: + upb_copyerr(status, upb_bytesrc_status(src)); + return false; } INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } + +// upb_bytesink +INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, + upb_strlen_t count) { + return sink->vtbl->write(sink, buf, count); +} + +INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str) { + return sink->vtbl->putstr(sink, str); +} + +INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { + return &sink->status; +} + // upb_handlers struct _upb_handlers { upb_handlerset *set; @@ -182,17 +229,6 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, fieldnum, val); } -// upb_bytesink -INLINE int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str) { - return sink->vtbl->put(sink, str); -} -INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { - return &sink->status; -} - -// upb_bytesink - - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/core/upb_string.c b/core/upb_string.c index 4f5f5c2..b243dfd 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -73,7 +73,7 @@ upb_string *upb_string_tryrecycle(upb_string *str) { char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { // assert(str->ptr == NULL); - uint32_t size = upb_string_size(str); + upb_strlen_t size = upb_string_size(str); if (size < len) { size = upb_round_up_pow2(len); str->cached_mem = realloc(str->cached_mem, size); diff --git a/core/upb_string.h b/core/upb_string.h index ee345e3..f82603b 100644 --- a/core/upb_string.h +++ b/core/upb_string.h @@ -119,20 +119,21 @@ INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } INLINE void upb_string_endread(upb_string *str) { (void)str; } // Attempts to recycle the string "str" so it may be reused and have different -// data written to it. The returned string is either "str" if it could be -// recycled or a newly created string if "str" has other references. +// data written to it. After the function returns, "str" points to a writable +// string, which is either the original string if it had no other references +// or a newly created string if it did have other references. // -// As a special case, passing NULL will allocate a new string. This is -// convenient for the pattern: +// As a special case, passing a pointer to NULL will allocate a new string. +// This is convenient for the pattern: // // upb_string *str = NULL; // while (x) { // if (y) { -// str = upb_string_tryrecycle(str); +// upb_string_recycle(&str); // upb_src_getstr(str); // } // } -upb_string *upb_string_tryrecycle(upb_string *str); +upb_string *upb_string_recycle(upb_string **str); // The options for setting the contents of a string. These may only be called // when a string is first created or recycled; once other functions have been -- cgit v1.2.3 From e9b9bbf216fdcbc86114b074dba6d0f51e4a438e Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 10 Jan 2011 21:01:07 -0800 Subject: Add INLINE to a few identifiers. --- core/upb.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'core/upb.h') diff --git a/core/upb.h b/core/upb.h index 64bc88c..764e9ba 100644 --- a/core/upb.h +++ b/core/upb.h @@ -173,11 +173,11 @@ typedef struct { #endif #define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ - ctype upb_value_get ## name(upb_value val) { \ + INLINE ctype upb_value_get ## name(upb_value val) { \ assert(val.type == proto_type || val.type == UPB_VALUETYPE_RAW); \ return val.val.membername; \ } \ - void upb_value_set ## name(upb_value *val, ctype cval) { \ + INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \ SET_TYPE(val->type, proto_type); \ val->val.membername = cval; \ } @@ -191,7 +191,7 @@ UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); UPB_VALUE_ACCESSORS(bytesrc, bytesrc, upb_bytesrc*, UPB_VALUETYPE_BYTESRC); -void upb_value_setraw(upb_value *val, uint64_t cval) { +INLINE void upb_value_setraw(upb_value *val, uint64_t cval) { SET_TYPE(val->type, UPB_VALUETYPE_RAW); val->val.uint64 = cval; } -- cgit v1.2.3 From a38742bbe1cbc037f15edc053f5cf4dd53c5457a Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 18 Jan 2011 22:33:05 -0800 Subject: A few minor changes to the streaming protocol. 1. the start and end callbacks can now return a upb_flow_t and set a status message. 2. clarified some semantics around passing an error status back from the callbacks. --- core/upb.c | 4 +++ core/upb.h | 27 ++++++++++--------- core/upb_def.c | 70 ++++++++++++++++++++++++++++---------------------- core/upb_stream.h | 29 +++++++++++---------- core/upb_stream_vtbl.h | 5 +++- 5 files changed, 76 insertions(+), 59 deletions(-) (limited to 'core/upb.h') diff --git a/core/upb.c b/core/upb.c index 2f715d0..05e9b7d 100644 --- a/core/upb.c +++ b/core/upb.c @@ -73,3 +73,7 @@ void upb_printerr(upb_status *status) { fprintf(stderr, "code: %d, no msg\n", status->code); } } + +void upb_status_uninit(upb_status *status) { + upb_string_unref(status->str); +} diff --git a/core/upb.h b/core/upb.h index 764e9ba..fb6d9ea 100644 --- a/core/upb.h +++ b/core/upb.h @@ -290,30 +290,27 @@ INLINE void upb_value_write(upb_valueptr ptr, upb_value val, // Status codes used as a return value. Codes >0 are not fatal and can be // resumed. enum upb_status_code { + // The operation completed successfully. UPB_STATUS_OK = 0, - // A read or write from a streaming src/sink could not be completed right now. - UPB_STATUS_TRYAGAIN = 1, + // The bytesrc is at EOF and all data was read successfully. + UPB_STATUS_EOF = 1, - // A value had an incorrect wire type and will be skipped. - UPB_STATUS_BADWIRETYPE = 2, + // A read or write from a streaming src/sink could not be completed right now. + UPB_STATUS_TRYAGAIN = 2, // An unrecoverable error occurred. UPB_STATUS_ERROR = -1, - // A varint went for 10 bytes without terminating. - UPB_ERROR_UNTERMINATED_VARINT = -2, - - // The max nesting level (UPB_MAX_NESTING) was exceeded. - UPB_ERROR_MAX_NESTING_EXCEEDED = -3 + // A recoverable error occurred (for example, data of the wrong type was + // encountered which we can skip over). + // UPB_STATUS_RECOVERABLE_ERROR = -2 }; -// TODO: consider making this a single word: a upb_string* where we use the low -// bits as flags indicating whether there is an error and whether it is -// resumable. This would improve efficiency, because the code would not need -// to be loaded after a call to a function returning a status. +// TODO: consider adding error space and code, to let ie. errno be stored +// as a proper code. typedef struct { - enum upb_status_code code; + char code; upb_string *str; } upb_status; @@ -329,6 +326,8 @@ INLINE void upb_status_init(upb_status *status) { status->str = NULL; } +void upb_status_uninit(upb_status *status); + void upb_printerr(upb_status *status); void upb_clearerr(upb_status *status); void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, diff --git a/core/upb_def.c b/core/upb_def.c index 4f12dbe..0176dc9 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -257,6 +257,7 @@ struct _upb_defbuilder { upb_deflist defs; upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH]; int stack_len; + upb_status status; uint32_t number; upb_string *name; @@ -275,12 +276,14 @@ static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, static void upb_defbuilder_init(upb_defbuilder *b) { upb_deflist_init(&b->defs); + upb_status_init(&b->status); b->stack_len = 0; b->name = NULL; } static void upb_defbuilder_uninit(upb_defbuilder *b) { upb_string_unref(b->name); + upb_status_uninit(&b->status); upb_deflist_uninit(&b->defs); } @@ -356,7 +359,7 @@ static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b, &upb_defbuilder_FileDescriptorProto_startsubmsg, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } // Handlers for google.protobuf.FileDescriptorSet. @@ -392,7 +395,7 @@ static void upb_defbuilder_register_FileDescriptorSet( &upb_defbuilder_FileDescriptorSet_startsubmsg, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } @@ -439,10 +442,11 @@ static void upb_enumdef_free(upb_enumdef *e) { } // google.protobuf.EnumValueDescriptorProto. -static void upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) { +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) { upb_defbuilder *b = _b; b->saw_number = false; b->saw_name = false; + return UPB_CONTINUE; } static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, @@ -463,12 +467,12 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, return UPB_CONTINUE; } -static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { +static upb_flow_t upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { upb_defbuilder *b = _b; if(!b->saw_number || !b->saw_name) { - //upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number."); - //goto err; - return; + upb_seterr(&b->status, UPB_STATUS_ERROR, + "Enum value missing name or number."); + return UPB_STOP; } upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number}; upb_iton_ent iton_ent = {{b->number, 0}, b->name}; @@ -478,6 +482,7 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { // We don't unref "name" because we pass our ref to the iton entry of the // table. strtables can ref their keys, but the inttable doesn't know that // the value is a string. + return UPB_CONTINUE; } static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b, @@ -488,22 +493,24 @@ static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b, &upb_enumdef_EnumValueDescriptorProto_value, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } // google.protobuf.EnumDescriptorProto. -void upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { +static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_b) { upb_defbuilder *b = _b; upb_enumdef *e = malloc(sizeof(*e)); upb_def_init(&e->base, UPB_DEF_ENUM); upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); upb_deflist_push(&b->defs, UPB_UPCAST(e)); + return UPB_CONTINUE; } -void upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { +static upb_flow_t upb_enumdef_EnumDescriptorProto_endmsg(void *_b) { upb_defbuilder *b = _b; assert(upb_defbuilder_last(b)->fqname != NULL); + return UPB_CONTINUE; } static upb_flow_t upb_enumdef_EnumDescriptorProto_value(void *_b, @@ -546,7 +553,7 @@ static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b, &upb_enumdef_EnumDescriptorProto_startsubmsg, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } upb_enum_iter upb_enum_begin(upb_enumdef *e) { @@ -576,7 +583,7 @@ static void upb_fielddef_free(upb_fielddef *f) { free(f); } -static void upb_fielddef_startmsg(void *_b) { +static upb_flow_t upb_fielddef_startmsg(void *_b) { upb_defbuilder *b = _b; upb_fielddef *f = malloc(sizeof(*f)); f->number = -1; @@ -585,9 +592,10 @@ static void upb_fielddef_startmsg(void *_b) { f->owned = false; f->msgdef = upb_defbuilder_top(b); b->f = f; + return UPB_CONTINUE; } -static void upb_fielddef_endmsg(void *_b) { +static upb_flow_t upb_fielddef_endmsg(void *_b) { upb_defbuilder *b = _b; upb_fielddef *f = b->f; // TODO: verify that all required fields were present. @@ -600,6 +608,7 @@ static void upb_fielddef_endmsg(void *_b) { upb_ntof_ent ntof_ent = {{f->name, 0}, f}; upb_inttable_insert(&m->itof, &itof_ent.e); upb_strtable_insert(&m->ntof, &ntof_ent.e); + return UPB_CONTINUE; } static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { @@ -620,7 +629,7 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { upb_string *str = upb_string_new(); - if (!upb_value_getfullstr(val, str, NULL)) return UPB_ERROR; + if (!upb_value_getfullstr(val, str, NULL)) return UPB_STOP; if(b->f->def) upb_def_unref(b->f->def); b->f->def = UPB_UPCAST(upb_unresolveddef_new(str)); b->f->owned = true; @@ -638,7 +647,7 @@ static void upb_fielddef_register_FieldDescriptorProto(upb_defbuilder *b, &upb_fielddef_value, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } @@ -658,7 +667,7 @@ static int upb_compare_fields(const void *f1, const void *f2) { } // google.protobuf.DescriptorProto. -static void upb_msgdef_startmsg(void *_b) { +static upb_flow_t upb_msgdef_startmsg(void *_b) { upb_defbuilder *b = _b; upb_msgdef *m = malloc(sizeof(*m)); upb_def_init(&m->base, UPB_DEF_MSG); @@ -667,15 +676,16 @@ static void upb_msgdef_startmsg(void *_b) { upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); upb_deflist_push(&b->defs, UPB_UPCAST(m)); upb_defbuilder_startcontainer(b); + return UPB_CONTINUE; } -static void upb_msgdef_endmsg(void *_b) { +static upb_flow_t upb_msgdef_endmsg(void *_b) { upb_defbuilder *b = _b; upb_msgdef *m = upb_defbuilder_top(b); if(!m->base.fqname) { - //upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name."); - //return UPB_ERROR; - return; + upb_seterr(&b->status, UPB_STATUS_ERROR, + "Encountered message with no name."); + return UPB_STOP; } // Create an ordering over the fields. @@ -716,7 +726,7 @@ static void upb_msgdef_endmsg(void *_b) { if (max_align > 0) m->size = upb_align_up(m->size, max_align); upb_defbuilder_endcontainer(b); - //return UPB_CONTINUE; + return UPB_CONTINUE; } static upb_flow_t upb_msgdef_value(void *_b, upb_fielddef *f, upb_value val) { @@ -767,7 +777,7 @@ static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, &upb_msgdef_startsubmsg, }; upb_register_handlerset(h, &handlers); - upb_set_handler_closure(h, b); + upb_set_handler_closure(h, b, &b->status); } static void upb_msgdef_free(upb_msgdef *m) @@ -1100,16 +1110,14 @@ void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) { upb_defbuilder b; upb_defbuilder_init(&b); - //upb_defbuilder_register_FileDescriptorSet(&b, upb_src_gethandlers(src)); - upb_defbuilder_register_FileDescriptorSet(&b, NULL); - if(!upb_src_run(src)) { - upb_copyerr(status, upb_src_status(src)); - upb_defbuilder_uninit(&b); - return; - } - upb_symtab_add_defs(s, b.defs.defs, b.defs.len, false, status); + upb_handlers handlers; + upb_handlers_init(&handlers); + upb_defbuilder_register_FileDescriptorSet(&b, &handlers); + upb_src_sethandlers(src, &handlers); + upb_src_run(src, status); + if (upb_ok(status)) + upb_symtab_add_defs(s, b.defs.defs, b.defs.len, false, status); upb_defbuilder_uninit(&b); - return; } diff --git a/core/upb_stream.h b/core/upb_stream.h index 9ae69de..40836e9 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -39,12 +39,8 @@ typedef enum { // Caller should continue sending values to the sink. UPB_CONTINUE, - // An error occurred; check status for details. - UPB_ERROR, - - // Processing should stop for now, but could be resumed later. - // If processing resumes later, it should resume with the next value. - UPB_SUSPEND, + // Stop processing for now; check status for details. + UPB_STOP, // Skips to the end of the current submessage (or if we are at the top // level, skips to the end of the entire message). @@ -61,8 +57,8 @@ typedef enum { struct _upb_handlers; typedef struct _upb_handlers upb_handlers; -typedef void (*upb_startmsg_handler_t)(void *closure); -typedef void (*upb_endmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_startmsg_handler_t)(void *closure); +typedef upb_flow_t (*upb_endmsg_handler_t)(void *closure); typedef upb_flow_t (*upb_value_handler_t)(void *closure, struct _upb_fielddef *f, upb_value val); @@ -76,12 +72,14 @@ typedef upb_flow_t (*upb_unknownval_handler_t)(void *closure, // An empty set of handlers, for convenient copy/paste: // -// static void startmsg(void *closure) { +// static upb_flow_t startmsg(void *closure) { // // Called when the top-level message begins. +// return UPB_CONTINUE; // } // -// static void endmsg(void *closure) { +// static upb_flow_t endmsg(void *closure) { // // Called when the top-level message ends. +// return UPB_CONTINUE; // } // // static upb_flow_t value(void *closure, upb_fielddef *f, upb_value val) { @@ -120,10 +118,15 @@ INLINE void upb_handlers_uninit(upb_handlers *h); INLINE void upb_handlers_reset(upb_handlers *h); INLINE bool upb_handlers_isempty(upb_handlers *h); INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set); + // TODO: for clients that want to increase efficiency by preventing bytesrcs // from automatically being converted to strings in the value callback. // INLINE void upb_handlers_use_bytesrcs(bool use_bytesrcs); -INLINE void upb_set_handler_closure(upb_handlers *h, void *closure); + +// The closure will be passed to every handler. The status will be used +// only immediately after a handler has returned UPB_STOP. +INLINE void upb_set_handler_closure(upb_handlers *h, void *closure, + upb_status *status); // An object that transparently handles delegation so that the caller needs // only follow the protocol as if delegation did not exist. @@ -146,8 +149,8 @@ INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d, struct _upb_src; typedef struct _upb_src upb_src; -bool upb_src_run(upb_src *src); -upb_status *upb_src_status(upb_src *src); +void upb_src_sethandlers(upb_src *src, upb_handlers *handlers); +void upb_src_run(upb_src *src, upb_status *status); /* upb_bytesrc ****************************************************************/ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index c0cf04f..d017177 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -133,6 +133,7 @@ INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { struct _upb_handlers { upb_handlerset *set; void *closure; + upb_status *status; // We don't own this. }; INLINE void upb_handlers_init(upb_handlers *h) { @@ -155,8 +156,10 @@ INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set) { h->set = set; } -INLINE void upb_set_handler_closure(upb_handlers *h, void *closure) { +INLINE void upb_set_handler_closure(upb_handlers *h, void *closure, + upb_status *status) { h->closure = closure; + h->status = status; } // upb_dispatcher -- cgit v1.2.3 From 1dea81b1c244d357a6e46ee22c14b36280bf2100 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 21 Jan 2011 17:29:16 -0800 Subject: Interface refinement: rename some constants. * UPB_STOP -> UPB_BREAK, better represents breaking out of a parsing loop. * UPB_STATUS_OK -> UPB_OK, for all status codes, more concise at no readability cost (perhaps an improvement). --- core/upb.c | 5 ++--- core/upb.h | 16 ++++++++-------- core/upb_def.c | 22 ++++++++++------------ core/upb_stream.h | 6 ++++-- 4 files changed, 24 insertions(+), 25 deletions(-) (limited to 'core/upb.h') diff --git a/core/upb.c b/core/upb.c index 05e9b7d..da2a0f0 100644 --- a/core/upb.c +++ b/core/upb.c @@ -60,9 +60,8 @@ void upb_copyerr(upb_status *to, upb_status *from) } void upb_clearerr(upb_status *status) { - status->code = UPB_STATUS_OK; - upb_string_unref(status->str); - status->str = NULL; + status->code = UPB_OK; + upb_string_recycle(&status->str); } void upb_printerr(upb_status *status) { diff --git a/core/upb.h b/core/upb.h index fb6d9ea..d394a08 100644 --- a/core/upb.h +++ b/core/upb.h @@ -291,16 +291,16 @@ INLINE void upb_value_write(upb_valueptr ptr, upb_value val, // resumed. enum upb_status_code { // The operation completed successfully. - UPB_STATUS_OK = 0, + UPB_OK = 0, // The bytesrc is at EOF and all data was read successfully. - UPB_STATUS_EOF = 1, + UPB_EOF = 1, // A read or write from a streaming src/sink could not be completed right now. - UPB_STATUS_TRYAGAIN = 2, + UPB_TRYAGAIN = 2, // An unrecoverable error occurred. - UPB_STATUS_ERROR = -1, + UPB_ERROR = -1, // A recoverable error occurred (for example, data of the wrong type was // encountered which we can skip over). @@ -308,21 +308,21 @@ enum upb_status_code { }; // TODO: consider adding error space and code, to let ie. errno be stored -// as a proper code. +// as a proper code, or application-specific error codes. typedef struct { char code; upb_string *str; } upb_status; -#define UPB_STATUS_INIT {UPB_STATUS_OK, NULL} +#define UPB_STATUS_INIT {UPB_OK, NULL} #define UPB_ERRORMSG_MAXLEN 256 INLINE bool upb_ok(upb_status *status) { - return status->code == UPB_STATUS_OK; + return status->code == UPB_OK; } INLINE void upb_status_init(upb_status *status) { - status->code = UPB_STATUS_OK; + status->code = UPB_OK; status->str = NULL; } diff --git a/core/upb_def.c b/core/upb_def.c index 0176dc9..79b6632 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -470,9 +470,8 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b, static upb_flow_t upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) { upb_defbuilder *b = _b; if(!b->saw_number || !b->saw_name) { - upb_seterr(&b->status, UPB_STATUS_ERROR, - "Enum value missing name or number."); - return UPB_STOP; + upb_seterr(&b->status, UPB_ERROR, "Enum value missing name or number."); + return UPB_BREAK; } upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number}; upb_iton_ent iton_ent = {{b->number, 0}, b->name}; @@ -629,7 +628,7 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) { break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { upb_string *str = upb_string_new(); - if (!upb_value_getfullstr(val, str, NULL)) return UPB_STOP; + if (!upb_value_getfullstr(val, str, NULL)) return UPB_BREAK; if(b->f->def) upb_def_unref(b->f->def); b->f->def = UPB_UPCAST(upb_unresolveddef_new(str)); b->f->owned = true; @@ -683,9 +682,8 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) { upb_defbuilder *b = _b; upb_msgdef *m = upb_defbuilder_top(b); if(!m->base.fqname) { - upb_seterr(&b->status, UPB_STATUS_ERROR, - "Encountered message with no name."); - return UPB_STOP; + upb_seterr(&b->status, UPB_ERROR, "Encountered message with no name."); + return UPB_BREAK; } // Create an ordering over the fields. @@ -864,7 +862,7 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) // where we recurse over the type tree (like for example, right now) and an // absurdly deep tree could cause us to stack overflow on systems with very // limited stacks. - upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was found at " + upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was found at " "depth %d in the type graph, which exceeds the maximum type " "depth of %d.", UPB_UPCAST(m)->fqname, depth, UPB_MAX_TYPE_DEPTH); @@ -873,7 +871,7 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status) // Cycle! int cycle_len = depth - 1; if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) { - upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was involved " + upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was involved " "in a cycle of length %d, which exceeds the maximum type " "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len, UPB_MAX_TYPE_CYCLE_LEN); @@ -931,7 +929,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, upb_symtab_ent *found; if(!(found = upb_resolve(tmptab, base, name)) && !(found = upb_resolve(symtab, base, name))) { - upb_seterr(status, UPB_STATUS_ERROR, + upb_seterr(status, UPB_ERROR, "could not resolve symbol '" UPB_STRFMT "'" " in context '" UPB_STRFMT "'", UPB_STRARG(name), UPB_STRARG(base)); @@ -941,7 +939,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab, // Check the type of the found def. upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; if(found->def->type != expected) { - upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type"); + upb_seterr(status, UPB_ERROR, "Unexpected type"); return false; } upb_msgdef_resolve(m, f, found->def); @@ -983,7 +981,7 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs, // allow_redef is set. if (upb_strtable_lookup(&tmptab, def->fqname) || (!allow_redef && upb_strtable_lookup(&s->symtab, def->fqname))) { - upb_seterr(status, UPB_STATUS_ERROR, "Redefinition of symbol " UPB_STRFMT, + upb_seterr(status, UPB_ERROR, "Redefinition of symbol " UPB_STRFMT, UPB_STRARG(def->fqname)); goto err; } diff --git a/core/upb_stream.h b/core/upb_stream.h index 40836e9..66bfec2 100644 --- a/core/upb_stream.h +++ b/core/upb_stream.h @@ -39,8 +39,10 @@ typedef enum { // Caller should continue sending values to the sink. UPB_CONTINUE, - // Stop processing for now; check status for details. - UPB_STOP, + // Stop processing for now; check status for details. If no status was set, + // a generic error will be returned. If the error is resumable, processing + // will resume by delivering this callback again. + UPB_BREAK, // Skips to the end of the current submessage (or if we are at the top // level, skips to the end of the entire message). -- cgit v1.2.3 From 93381f1411def0dba5677b71cd4df859d99777f3 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 24 Jan 2011 21:15:44 -0800 Subject: Decoder compiles again! But probably doesn't work. --- Makefile | 6 +- core/upb.c | 2 +- core/upb.h | 2 +- core/upb_def.c | 2 +- core/upb_stream_vtbl.h | 26 +++--- stream/upb_decoder.c | 241 +++++++++++++++++++++++++++++-------------------- 6 files changed, 160 insertions(+), 119 deletions(-) (limited to 'core/upb.h') diff --git a/Makefile b/Makefile index 04779c0..46cb836 100644 --- a/Makefile +++ b/Makefile @@ -62,8 +62,8 @@ SRC=core/upb.c \ core/upb_string.c \ descriptor/descriptor.c \ core/upb_def.c \ + stream/upb_decoder.c \ # core/upb_msg.c \ -# stream/upb_decoder.c \ # stream/upb_stdio.c \ # stream/upb_strstream.c \ # stream/upb_textprinter.c @@ -74,9 +74,9 @@ OTHERSRC=src/upb_encoder.c src/upb_text.c # Override the optimization level for upb_def.o, because it is not in the # critical path but gets very large when -O3 is used. core/upb_def.o: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< + $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< core/upb_def.lo: core/upb_def.c - $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< -fPIC + $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC lang_ext/lua/upb.so: lang_ext/lua/upb.lo $(CC) $(CFLAGS) $(CPPFLAGS) -shared -o $@ $< core/libupb_pic.a diff --git a/core/upb.c b/core/upb.c index da2a0f0..ff2d47e 100644 --- a/core/upb.c +++ b/core/upb.c @@ -18,7 +18,7 @@ (1 << wire_type) | (allows_delimited << UPB_WIRE_TYPE_DELIMITED), \ #ctype}, -upb_type_info upb_types[] = { +const upb_type_info upb_types[] = { {0, 0, 0, 0, ""}, // There is no type 0. TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, 1) // DOUBLE TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, 1) // FLOAT diff --git a/core/upb.h b/core/upb.h index d394a08..7b228a0 100644 --- a/core/upb.h +++ b/core/upb.h @@ -101,7 +101,7 @@ typedef struct { } upb_type_info; // A static array of info about all of the field types, indexed by type number. -extern upb_type_info upb_types[]; +extern const upb_type_info upb_types[]; // The number of a field, eg. "optional string foo = 3". typedef int32_t upb_field_number_t; diff --git a/core/upb_def.c b/core/upb_def.c index c21843e..2eda89f 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -717,7 +717,7 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) { size_t max_align = 0; for (int i = 0; i < n; i++) { upb_fielddef *f = sorted_fields[i]; - upb_type_info *type_info = &upb_types[f->type]; + const upb_type_info *type_info = &upb_types[f->type]; // This identifies the set bit. When we implement is_initialized (a // general check about whether all required bits are set) we will probably diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index fd71b2d..ddefba9 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -27,9 +27,9 @@ typedef void (*upb_src_run_fptr)(upb_src *src, upb_status *status); // upb_bytesrc. typedef upb_strlen_t (*upb_bytesrc_read_fptr)( - upb_bytesrc *src, void *buf, upb_strlen_t count); + upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); typedef bool (*upb_bytesrc_getstr_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t count); + upb_bytesrc *src, upb_string *str, upb_status *status); // upb_bytesink. typedef upb_strlen_t (*upb_bytesink_write_fptr)( @@ -102,35 +102,31 @@ INLINE void upb_src_run(upb_src *src, upb_status *status) { // upb_bytesrc INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, - upb_strlen_t count) { - return src->vtbl->read(src, buf, count); + upb_strlen_t count, upb_status *status) { + return src->vtbl->read(src, buf, count, status); } INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, - upb_strlen_t count) { - return src->vtbl->getstr(src, str, count); + upb_status *status) { + return src->vtbl->getstr(src, str, status); } INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, upb_status *status) { // We start with a getstr, because that could possibly alias data instead of // copying. - if (!upb_bytesrc_getstr(src, str, UPB_STRLEN_MAX)) goto error; + if (!upb_bytesrc_getstr(src, str, status)) return false; // Trade-off between number of read calls and amount of overallocation. const size_t bufsize = 4096; - while (!upb_bytesrc_eof(src)) { + do { upb_strlen_t len = upb_string_len(str); char *buf = upb_string_getrwbuf(str, len + bufsize); - upb_strlen_t read = upb_bytesrc_read(src, buf + len, bufsize); - if (read < 0) goto error; + upb_strlen_t read = upb_bytesrc_read(src, buf + len, bufsize, status); + if (read < 0) return false; // Resize to proper size. upb_string_getrwbuf(str, len + read); - } + } while (!status->code != UPB_EOF); return true; - -error: - upb_copyerr(status, upb_bytesrc_status(src)); - return false; } INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 9a17451..b4b32ff 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -14,27 +14,27 @@ /* Pure Decoding **************************************************************/ // The key fast-path varint-decoding routine. Here we can assume we have at -// least UPB_MAX_ENCODED_SIZE bytes available. There are a lot of +// least UPB_MAX_VARINT_ENCODED_SIZE bytes available. There are a lot of // possibilities for optimization/experimentation here. -INLINE bool upb_decode_varint_fast(uint8_t **ptr, uint64_t &val, +INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *status) { - *high = 0; + uint32_t low, high = 0; uint32_t b; - uint8_t *ptr = p->ptr; - b = *(*ptr++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; - b = *(*ptr++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; - b = *(*ptr++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; - b = *(*ptr++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; - b = *(*ptr++); *low |= (b & 0x7f) << 28; - *high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; - b = *(*ptr++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; - b = *(*ptr++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; - b = *(*ptr++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; - b = *(*ptr++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; + b = *(*ptr++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; + b = *(*ptr++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; + b = *(*ptr++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; + b = *(*ptr++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; + b = *(*ptr++); low |= (b & 0x7f) << 28; + high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; + b = *(*ptr++); high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; + b = *(*ptr++); high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; + b = *(*ptr++); high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; + b = *(*ptr++); high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; upb_seterr(status, UPB_ERROR, "Unterminated varint"); return false; done: + *val = ((uint64_t)high << 32) | low; return true; } @@ -50,7 +50,7 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } typedef struct { upb_msgdef *msgdef; upb_fielddef *field; - size_t end_offset; // For groups, 0. + ssize_t end_offset; // For groups, 0. } upb_decoder_frame; struct upb_decoder { @@ -76,23 +76,50 @@ struct upb_decoder { upb_strlen_t buf_stream_offset; }; +typedef struct { + // Our current position in the data buffer. + const char *ptr; + + // Number of bytes available at ptr, until either end-of-buf or + // end-of-submessage (whichever is smaller). + size_t len; + + // Msgdef for the current level. + upb_msgdef *msgdef; +} upb_dstate; + +INLINE void upb_dstate_advance(upb_dstate *s, size_t len) { + s->ptr += len; + s->len -= len; +} + +static upb_flow_t upb_pop(upb_decoder *d); + +// Constant used to signal that the submessage is a group and therefore we +// don't know its end offset. This cannot be the offset of a real submessage +// end because it takes at least one byte to begin a submessage. +#define UPB_GROUP_END_OFFSET -1 +#define UPB_MAX_VARINT_ENCODED_SIZE 10 + // Called only from the slow path, this function copies the next "len" bytes // from the stream to "data", adjusting "buf" and "len" appropriately. static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, - uint8_t **ptr, size_t *len) { + upb_dstate *s) { while (1) { - memcpy(data, *ptr, *len); - bytes_wanted -= *len; - *ptr += *len; + size_t to_copy = UPB_MIN(bytes_wanted, s->len); + memcpy(data, s->ptr, to_copy); + upb_dstate_advance(s, to_copy); + bytes_wanted -= to_copy; if (bytes_wanted == 0) return true; // Did "len" indicate end-of-submessage or end-of-buffer? - size_t buf_offset = d->buf ? (*ptr - upb_string_getrobuf(d->buf)) : 0; + ssize_t buf_offset = + d->buf ? ((const char*)s->ptr - upb_string_getrobuf(d->buf)) : 0; if (d->top->end_offset > 0 && d->top->end_offset == d->buf_stream_offset + buf_offset) { // End-of-submessage. if (bytes_wanted > 0) { - upb_seterr(d->status, UPB_ERROR, "Bad submessage end.") + upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); return false; } if (upb_pop(d) != UPB_CONTINUE) return false; @@ -100,100 +127,121 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, // End-of-buffer. if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; - *ptr = upb_string_getrobuf(d->buf); + s->ptr = upb_string_getrobuf(d->buf); } // Wait for end-of-submessage or end-of-buffer, whichever comes first. - size_t offset_in_buf = *ptr - upb_string_getrobuf(d->buf); - size_t buf_remaining = upb_string_getbufend(d->buf) - *ptr; - size_t submsg_remaining = + ssize_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf); + ssize_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr; + ssize_t submsg_remaining = d->top->end_offset - d->buf_stream_offset - offset_in_buf; if (d->top->end_offset == UPB_GROUP_END_OFFSET || buf_remaining > submsg_remaining) { - *len = buf_remaining; + s->len = buf_remaining; } else { // Check that non of our subtraction overflowed. assert(d->top->end_offset > d->buf_stream_offset); assert(d->top->end_offset - d->buf_stream_offset > offset_in_buf); - *len = submsg_remaining; + s->len = submsg_remaining; } } } -// We use this path when we don't have UPB_MAX_ENCODED_SIZE contiguous bytes -// available in our current buffer. We don't inline this because we accept -// that it will be slow and we don't want to pay for two copies of it. -static bool upb_decode_varint_slow(upb_decoder *d) { - uint8_t buf[UPB_MAX_ENCODED_SIZE]; - uint8_t *p = buf, *end = buf + sizeof(buf); - for(int bitpos = 0; p < end && getbyte(d, p) && (last & 0x80); p++, bitpos += 7) - *val |= ((uint64_t)((last = *p) & 0x7F)) << bitpos; - - if(d->status->code == UPB_EOF && (last & 0x80)) { - upb_seterr(status, UPB_ERROR, - "Provided data ended in the middle of a varint.\n"); - } else if(buf == maxend) { - upb_seterr(status, UPB_ERROR, +// We use this path when we don't have UPB_MAX_VARINT_ENCODED_SIZE contiguous +// bytes available in our current buffer. We don't inline this because we +// accept that it will be slow and we don't want to pay for two copies of it. +static bool upb_decode_varint_slow(upb_decoder *d, upb_dstate *s, + upb_value *val) { + char byte = 0x80; + uint64_t val64 = 0; + int bitpos; + for(bitpos = 0; + bitpos < 70 && (byte & 0x80) && upb_getbuf(d, &byte, 1, s); + bitpos += 7) + val64 |= ((uint64_t)byte & 0x7F) << bitpos; + + if(bitpos == 70) { + upb_seterr(d->status, UPB_ERROR, "Varint was unterminated after 10 bytes.\n"); + return false; + } else if (d->status->code == UPB_EOF && (byte & 0x80)) { + upb_seterr(d->status, UPB_ERROR, + "Provided data ended in the middle of a varint.\n"); + return false; } else { // Success. - return; + upb_value_setint64(val, val64); + return true; } } -INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_ptr, - const uint8_t **len, upb_tag *tag) { - const uint8_t *ptr = *_ptr, *len = *_end; +typedef struct { + upb_wire_type_t wire_type; + upb_field_number_t field_number; +} upb_tag; + +INLINE bool upb_decode_tag(upb_decoder *d, upb_dstate *s, upb_tag *tag) { + const char *p = s->ptr; uint32_t tag_int; + upb_value val; // Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048). - if (len - ptr < 2) goto slow; // unlikely. - tag_int = *ptr & 0x7f; - if ((*(ptr++) & 0x80) == 0) goto done; // predictable if fields are in order - tag_int |= (*ptr & 0x7f) << 7; - if ((*(ptr++) & 0x80) != 0) goto slow; // unlikely. + if (s->len < 2) goto slow; // unlikely. + tag_int = *p & 0x7f; + if ((*(p++) & 0x80) == 0) goto done; // predictable if fields are in order + tag_int |= (*p & 0x7f) << 7; + if ((*(p++) & 0x80) == 0) goto done; // likely slow: - if (!upb_decode_varint_slow(d, _ptr, _end)) return false; - ptr = *_ptr; // Trick the next line into not overwriting us. + // Decode a full varint starting over from ptr. + if (!upb_decode_varint_slow(d, s, &val)) return false; + tag_int = upb_value_getint64(val); + p = s->ptr; // Trick the next line into not overwriting us. done: - *_ptr = ptr; + upb_dstate_advance(s, p - s->ptr); tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); tag->field_number = tag_int >> 3; return true; } -INLINE bool upb_decode_varint(upb_decoder *d, ptrs *p, - uint32_t *low, uint32_t *high) { - if (p->len - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) - return upb_decode_varint_fast(d); - else - return upb_decode_varint_slow(d); +INLINE bool upb_decode_varint(upb_decoder *d, upb_dstate *s, upb_value *val) { + if (s->len >= UPB_MAX_VARINT_ENCODED_SIZE) { + // Common (fast) case. + uint64_t val64; + const char *p = s->ptr; + if (!upb_decode_varint_fast(&p, &val64, d->status)) return false; + upb_dstate_advance(s, p - s->ptr); + upb_value_setint64(val, val64); + return true; + } else { + return upb_decode_varint_slow(d, s, val); + } } INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, - uint8_t **ptr, uint8_t **len, upb_value *val) { - static const char table = {0, 8, 0, 0, 0, 4}; + upb_dstate *s, upb_value *val) { + static const char table[] = {0, 8, 0, 0, 0, 4}; size_t bytes = table[wt]; - if (*len - *ptr >= bytes) { + if (s->len >= bytes) { // Common (fast) case. - memcpy(&val, *ptr, bytes); - *ptr += bytes; + memcpy(&val, s->ptr, bytes); + upb_dstate_advance(s, bytes); } else { - if (!upb_getptr(d, &val, bytes, ptr, len)) return false; + if (!upb_getbuf(d, &val, bytes, s)) return false; } return true; } // "val" initially holds the length of the string, this is replaced by the // contents of the string. -INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) { +INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str, + upb_dstate *s) { upb_string_recycle(str); - upb_strlen_t len = upb_valu_getint32(*val); - if (*len - *ptr >= len) { + uint32_t strlen = upb_value_getint32(*val); + if (s->len >= strlen) { // Common (fast) case. - upb_string_substr(*str, d->buf, *ptr - upb_string_getrobuf(d->buf), len); - *ptr += len; + upb_string_substr(*str, d->buf, s->ptr - upb_string_getrobuf(d->buf), strlen); + upb_dstate_advance(s, strlen); } else { - if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, ptr, len)) + if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen, s)) return false; } return true; @@ -204,21 +252,22 @@ INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) extern upb_wire_type_t upb_expected_wire_types[]; // Returns true if wt is the correct on-the-wire type for ft. -INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { +INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) { // This doesn't currently support packed arrays. - return upb_types[ft].expected_wire_type == wt; + return upb_types[ft].native_wire_type == wt; } -static upb_flow_t upb_push(upb_decoder *d, upb_fielddef *f, - upb_strlen_t submsg_len, upb_field_type_t type) { +static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, + upb_strlen_t submsg_len, upb_fieldtype_t type) { d->top->field = f; d->top++; if(d->top >= d->limit) { - upb_seterr(status, UPB_ERROR, "Nesting too deep."); + upb_seterr(d->status, UPB_ERROR, "Nesting too deep."); return UPB_ERROR; } - d->top->end_offset = type == UPB_TYPE(GROUP) ? - UPB_GROUP_END_OFFSET : d->completed_offset + submsg_len; + d->top->end_offset = (type == UPB_TYPE(GROUP)) ? + UPB_GROUP_END_OFFSET : + d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + submsg_len; d->top->msgdef = upb_downcast_msgdef(f->def); return upb_dispatch_startsubmsg(&d->dispatcher, f); } @@ -229,15 +278,11 @@ static upb_flow_t upb_pop(upb_decoder *d) { } void upb_decoder_run(upb_src *src, upb_status *status) { - // We use stack variables for our frequently used vars so the compiler knows - // they can't be changed by external code (like when we dispatch a callback). - - // Our current position in the data buffer. - uint8_t *ptr = NULL; - // Number of bytes available at ptr, until either end-of-buf or - // end-of-submessage (whichever is smaller). - size_t len = 0; - + upb_decoder *d = (upb_decoder*)src; + // We put our dstate on the stack so the compiler knows they can't be changed + // by external code (like when we dispatch a callback). We must be sure not + // to let its address escape this source file. + upb_dstate state = {NULL, 0, d->top->msgdef}; upb_string *str = NULL; // TODO: handle UPB_SKIPSUBMSG @@ -250,14 +295,14 @@ void upb_decoder_run(upb_src *src, upb_status *status) { while(1) { // Parse/handle tag. upb_tag tag; - CHECK(upb_decode_tag(d, &ptr, &len, &tag)); + CHECK(upb_decode_tag(d, &state, &tag)); // Decode wire data. Hopefully this branch will predict pretty well // since most types will read a varint here. upb_value val; switch (tag.wire_type) { case UPB_WIRE_TYPE_END_GROUP: - if(d->top->end_offset != UPB_GROUP_END_OFFSET) + if(d->top->end_offset != UPB_GROUP_END_OFFSET) { upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); goto err; } @@ -266,21 +311,21 @@ void upb_decoder_run(upb_src *src, upb_status *status) { case UPB_WIRE_TYPE_VARINT: case UPB_WIRE_TYPE_DELIMITED: // For the delimited case we are parsing the length. - CHECK(upb_decode_varint(d, &ptr, &len, &val)); + CHECK(upb_decode_varint(d, &state, &val)); break; case UPB_WIRE_TYPE_32BIT: case UPB_WIRE_TYPE_64BIT: - CHECK(upb_decode_fixed(d, tag.wire_type, &ptr, &len, &val)); + CHECK(upb_decode_fixed(d, tag.wire_type, &state, &val)); break; } // Look up field by tag number. - upb_fielddef *f = upb_msg_itof(d->top->msgdef, tag.field_number); + upb_fielddef *f = upb_msgdef_itof(d->top->msgdef, tag.field_number); if (!f) { if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED) - CHECK(upb_decode_string(d, &val, &str)); - CHECK_FLOW(upb_dispatch_unknownval(d, tag.field_number, val)); + CHECK(upb_decode_string(d, &val, &str, &state)); + CHECK_FLOW(upb_dispatch_unknownval(&d->dispatcher, tag.field_number, val)); } else if (!upb_check_type(tag.wire_type, f->type)) { // TODO: put more details in this error msg. upb_seterr(status, UPB_ERROR, "Field had incorrect type."); @@ -298,11 +343,11 @@ void upb_decoder_run(upb_src *src, upb_status *status) { switch (f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - CHECK_FLOW(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); + CHECK_FLOW(upb_push(d, &state, f, upb_value_getint32(val), f->type)); continue; // We have no value to dispatch. case UPB_TYPE(STRING): case UPB_TYPE(BYTES): - CHECK(upb_decode_string(d, &val, &str)); + CHECK(upb_decode_string(d, &val, &str, &state)); break; case UPB_TYPE(SINT32): upb_value_setint32(&val, upb_zzdec_32(upb_value_getint32(val))); @@ -313,7 +358,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { default: break; // Other types need no further processing at this point. } - CHECK_FLOW(upb_dispatch_value(d->sink, f, val, status)); + CHECK_FLOW(upb_dispatch_value(&d->dispatcher, f, val)); } CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); @@ -329,7 +374,7 @@ void upb_decoder_sethandlers(upb_src *src, upb_handlers *handlers) { upb_decoder *d = (upb_decoder*)src; upb_dispatcher_reset(&d->dispatcher, handlers); d->top = d->stack; - d->completed_offset = 0; + d->buf_stream_offset = 0; d->top->msgdef = d->toplevel_msgdef; // The top-level message is not delimited (we can keep receiving data for it // indefinitely), so we treat it like a group. -- cgit v1.2.3 From 8465e5e65014ac080d62855f8abfd44acdf7beb2 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 2 Feb 2011 10:00:30 -0800 Subject: Gutted upb_msg a bit, re-adding only the essentials. --- Makefile | 15 +++++-- core/upb.h | 10 ++++- core/upb_msg.c | 121 ++++++++++++--------------------------------------------- core/upb_msg.h | 109 ++++++--------------------------------------------- 4 files changed, 56 insertions(+), 199 deletions(-) (limited to 'core/upb.h') diff --git a/Makefile b/Makefile index 26e036e..bea6980 100644 --- a/Makefile +++ b/Makefile @@ -56,17 +56,24 @@ clean: deps: gen-deps.sh Makefile $(call rwildcard,,*.c) $(call rwildcard,,*.h) @./gen-deps.sh $(SRC) -# The core library (core/libupb.a) -SRC=core/upb.c \ +# The core library -- the absolute minimum you must compile in to successfully +# bootstrap. +CORE= \ + core/upb.c \ core/upb_table.c \ core/upb_string.c \ - descriptor/descriptor.c \ core/upb_def.c \ + descriptor/descriptor.c + +# Common encoders/decoders and upb_msg -- you're almost certain to want these. +STREAM= \ stream/upb_decoder.c \ stream/upb_stdio.c \ stream/upb_textprinter.c \ stream/upb_strstream.c \ -# core/upb_msg.c \ + core/upb_msg.c \ + +SRC=$(CORE) $(STREAM) $(SRC): perf-cppflags # Parts of core that are yet to be converted. diff --git a/core/upb.h b/core/upb.h index 7b228a0..243c7bc 100644 --- a/core/upb.h +++ b/core/upb.h @@ -136,7 +136,6 @@ typedef int32_t upb_strlen_t; // constant UPB_VALUETYPE_ARRAY to represent an array. typedef uint8_t upb_valuetype_t; #define UPB_VALUETYPE_ARRAY 32 - #define UPB_VALUETYPE_BYTESRC 32 #define UPB_VALUETYPE_RAW 33 @@ -189,6 +188,8 @@ UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32)); UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64)); UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); +UPB_VALUE_ACCESSORS(msg, msg, upb_msg*, UPB_TYPE(MESSAGE)); +UPB_VALUE_ACCESSORS(arr, arr, upb_array*, UPB_VALUETYPE_ARRAY); UPB_VALUE_ACCESSORS(bytesrc, bytesrc, upb_bytesrc*, UPB_VALUETYPE_BYTESRC); INLINE void upb_value_setraw(upb_value *val, uint64_t cval) { @@ -196,6 +197,13 @@ INLINE void upb_value_setraw(upb_value *val, uint64_t cval) { val->val.uint64 = cval; } +INLINE upb_atomic_refcount_t *upb_value_getrefcount(upb_value val) { + assert(val.type == UPB_TYPE(MESSAGE) || + val.type == UPB_TYPE(STRING) || + val.type == UPB_VALUETYPE_ARRAY); + return val.val.refcount; +} + // A pointer to a .proto value. The owner must have an out-of-band way of // knowing the type, so it knows which union member to use. typedef union { diff --git a/core/upb_msg.c b/core/upb_msg.c index 83191d2..e9f863d 100644 --- a/core/upb_msg.c +++ b/core/upb_msg.c @@ -10,29 +10,43 @@ #include "upb_decoder.h" #include "upb_strstream.h" -void _upb_elem_free(upb_value v, upb_fielddef *f) { +static void upb_elem_free(upb_value v, upb_fielddef *f) { switch(f->type) { case UPB_TYPE(MESSAGE): case UPB_TYPE(GROUP): - _upb_msg_free(v.msg, upb_downcast_msgdef(f->def)); + _upb_msg_free(upb_value_getmsg(v), upb_downcast_msgdef(f->def)); break; case UPB_TYPE(STRING): case UPB_TYPE(BYTES): - _upb_string_free(v.str); + _upb_string_free(upb_value_getstr(v)); break; default: abort(); } } -void _upb_field_free(upb_value v, upb_fielddef *f) { +static void upb_elem_unref(upb_value v, upb_fielddef *f) { + assert(upb_elem_ismm(f)); + upb_atomic_refcount_t *refcount = upb_value_getrefcount(v); + if (refcount && upb_atomic_unref(refcount)) + upb_elem_free(v, f); +} + +static void upb_field_free(upb_value v, upb_fielddef *f) { if (upb_isarray(f)) { - _upb_array_free(v.arr, f); + _upb_array_free(upb_value_getarr(v), f); } else { - _upb_elem_free(v, f); + upb_elem_free(v, f); } } +static void upb_field_unref(upb_value v, upb_fielddef *f) { + assert(upb_field_ismm(f)); + upb_atomic_refcount_t *refcount = upb_value_getrefcount(v); + if (refcount && upb_atomic_unref(refcount)) + upb_field_free(v, f); +} + upb_msg *upb_msg_new(upb_msgdef *md) { upb_msg *msg = malloc(md->size); // Clear all set bits and cached pointers. @@ -48,50 +62,11 @@ void _upb_msg_free(upb_msg *msg, upb_msgdef *md) { upb_fielddef *f = upb_msg_iter_field(i); upb_valueptr p = _upb_msg_getptr(msg, f); upb_valuetype_t type = upb_field_valuetype(f); - if (upb_field_ismm(f)) _upb_field_unref(upb_value_read(p, type), f); + if (upb_field_ismm(f)) upb_field_unref(upb_value_read(p, type), f); } free(msg); } -void upb_msg_recycle(upb_msg **_msg, upb_msgdef *md); - upb_msg *msg = *_msg; - if(msg && upb_atomic_only(&msg->refcount)) { - upb_msg_clear(msg); - } else { - upb_msg_unref(msg); - *_msg = upb_msg_new(); - } -} - -void upb_msg_appendval(upb_msg *msg, upb_fielddef *f, upb_value val) { - upb_valueptr ptr; - if (upb_isarray(f)) { - } -} - -INLINE upb_value upb_msg_getmutable(upb_msg *msg, upb_fielddef *f); - assert(upb_field_ismm(f)); - upb_valueptr p = _upb_msg_getptr(msg, f); - upb_valuetype_t type = upb_field_valuetype(f); - upb_value val = upb_value_read(p, type); - if (!upb_msg_has(msg, f)) { - upb_msg_sethas(msg, f); - val = upb_field_tryrecycle(p, val, f, type); - } - return val; -} - -INLINE void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) { - upb_valueptr p = _upb_msg_getptr(msg, f); - upb_valuetype_t type = upb_field_valuetype(f); - if (upb_field_ismm(f)) { - _upb_field_unref(upb_value_read(p, type), f); - _upb_value_ref(val); - } - upb_msg_sethas(msg, f); - upb_value_write(p, val, upb_field_valuetype(f)); -} - INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { msg->data[f->field_index/8] |= (1 << (f->field_index % 8)); } @@ -112,61 +87,15 @@ void _upb_array_free(upb_array *arr, upb_fielddef *f) { upb_valuetype_t type = upb_elem_valuetype(f); for (upb_arraylen_t i = 0; i < arr->size; i++) { upb_valueptr p = _upb_array_getptr(arr, f, i); - _upb_elem_unref(upb_value_read(p, type), f); + upb_elem_unref(upb_value_read(p, type), f); } } if (arr->elements._void) free(arr->elements._void); free(arr); } -upb_value upb_field_new(upb_fielddef *f, upb_valuetype_t type) { - upb_value v; - switch(type) { - case UPB_TYPE(MESSAGE): - case UPB_TYPE(GROUP): - v.msg = upb_msg_new(upb_downcast_msgdef(f->def)); - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): - v.str = upb_string_new(); - case UPB_VALUETYPE_ARRAY: - v.arr = upb_array_new(); - default: - abort(); - } - return v; -} - -static void upb_field_recycle(upb_value val) { - (void)val; -} - -upb_value upb_field_tryrecycle(upb_valueptr p, upb_value val, upb_fielddef *f, - upb_valuetype_t type) { - if (val._void == NULL || !upb_atomic_only(val.refcount)) { - if (val._void != NULL) upb_atomic_unref(val.refcount); - val = upb_field_new(f, type); - upb_value_write(p, val, type); - } else { - upb_field_recycle(val); +void upb_msg_register_handlers(upb_msg *msg, upb_msgdef *md, + upb_handlers *handlers, bool merge) { + static upb_handlerset handlerset = { } - return val; -} - -void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, - upb_status *status) { - upb_stringsrc *ssrc = upb_stringsrc_new(); - upb_stringsrc_reset(ssrc, str); - upb_decoder *d = upb_decoder_new(md); - upb_decoder_reset(d, upb_stringsrc_bytesrc(ssrc)); - - upb_decoder_free(d); - upb_stringsrc_free(ssrc); -} - -void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str, - upb_status *status) { - (void)msg; - (void)md; - (void)str; - (void)status; } diff --git a/core/upb_msg.h b/core/upb_msg.h index 815a7cb..0569039 100644 --- a/core/upb_msg.h +++ b/core/upb_msg.h @@ -1,9 +1,15 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * Copyright (c) 2010-2011 Joshua Haberman. See LICENSE for details. * - * Data structure for storing a message of protobuf data. + * Data structure for storing a message of protobuf data. Unlike Google's + * protobuf, upb_msg and upb_array are reference counted instead of having + * exclusive ownership of their fields. This is a better match for dynamic + * languages where statements like a.b = other_b are normal. + * + * upb's parsers and serializers could also be used to populate and serialize + * other kinds of message objects (even one generated by Google's protobuf). */ #ifndef UPB_MSG_H @@ -17,24 +23,6 @@ extern "C" { #endif -upb_value upb_field_tryrecycle(upb_valueptr p, upb_value v, upb_fielddef *f, - upb_valuetype_t type); - -INLINE void _upb_value_ref(upb_value v) { upb_atomic_ref(v.refcount); } - -void _upb_field_free(upb_value v, upb_fielddef *f); -void _upb_elem_free(upb_value v, upb_fielddef *f); -INLINE void _upb_field_unref(upb_value v, upb_fielddef *f) { - assert(upb_field_ismm(f)); - if (v.refcount && upb_atomic_unref(v.refcount)) - _upb_field_free(v, f); -} -INLINE void _upb_elem_unref(upb_value v, upb_fielddef *f) { - assert(upb_elem_ismm(f)); - if (v.refcount && upb_atomic_unref(v.refcount)) - _upb_elem_free(v, f); -} - /* upb_array ******************************************************************/ typedef uint32_t upb_arraylen_t; @@ -63,47 +51,6 @@ INLINE uint32_t upb_array_len(upb_array *a) { return a->len; } -INLINE upb_value upb_array_get(upb_array *a, upb_fielddef *f, uint32_t elem) { - assert(elem < upb_array_len(a)); - return upb_value_read(_upb_array_getptr(a, f, elem), f->type); -} - -// For string or submessages, will release a ref on the previously set value. -// and take a ref on the new value. The array must already be at least "elem" -// long; to append use append_mutable. -INLINE void upb_array_set(upb_array *a, upb_fielddef *f, uint32_t elem, - upb_value val) { - assert(elem < upb_array_len(a)); - upb_valueptr p = _upb_array_getptr(a, f, elem); - if (upb_elem_ismm(f)) { - _upb_elem_unref(upb_value_read(p, f->type), f); - _upb_value_ref(val); - } - upb_value_write(p, val, f->type); -} - -INLINE void upb_array_resize(upb_array *a, upb_fielddef *f) { - if (a->len == a->size) { - a->len *= 2; - a->elements._void = realloc(a->elements._void, - a->len * upb_types[f->type].size); - } -} - -// Append an element to an array of string or submsg with the default value, -// returning it. This will try to reuse previously allocated memory. -INLINE upb_value upb_array_appendmutable(upb_array *a, upb_fielddef *f) { - - assert(upb_elem_ismm(f)); - upb_array_resize(a, f); - upb_valueptr p = _upb_array_getptr(a, f, a->len++); - upb_valuetype_t type = upb_elem_valuetype(f); - upb_value val = upb_value_read(p, type); - val = upb_field_tryrecycle(p, val, f, type); - return val; -} - - /* upb_msg ********************************************************************/ struct _upb_msg { @@ -111,19 +58,14 @@ struct _upb_msg { uint8_t data[4]; // We allocate the appropriate amount per message. }; -// INTERNAL-ONLY FUNCTIONS. - void _upb_msg_free(upb_msg *msg, upb_msgdef *md); -// Returns a pointer to the given field. INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) { upb_valueptr p; p._void = &msg->data[f->byte_offset]; return p; } -// PUBLIC FUNCTIONS. - // Creates a new msg of the given type. upb_msg *upb_msg_new(upb_msgdef *md); @@ -143,38 +85,9 @@ INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { memset(msg->data, 0, md->set_flags_bytes); } -// Used to obtain an empty message of the given type, attempting to reuse the -// memory pointed to by msg if it has no other referents. -void upb_msg_recycle(upb_msg **_msg, upb_msgdef *md); - -// For a repeated field, appends the given scalar value (ie. not a message or -// array) to the field's array; for non-repeated fields, overwrites the -// existing value with this one. -// REQUIRES: !upb_issubmsg(f) -void upb_msg_appendval(upb_msg *msg, upb_fielddef *f, upb_value val); - -upb_msg *upb_msg_append_emptymsg(upb_msg *msg, upb_fielddef *f); - -// Returns the current value of the given field if set, or the default value if -// not set. The returned value is not mutable! (In practice this only matters -// for submessages and arrays). -INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { - if (upb_msg_has(msg, f)) { - return upb_value_read(_upb_msg_getptr(msg, f), f->type); - } else { - return f->default_value; - } -} - -// If the given string, submessage, or array is already set, returns it. -// Otherwise sets it and returns an empty instance, attempting to reuse any -// previously allocated memory. -INLINE upb_value upb_msg_getmutable(upb_msg *msg, upb_fielddef *f); - -// Sets the current value of the field. If this is a string, array, or -// submessage field, releases a ref on the value (if any) that was previously -// set. -INLINE void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val); +// Registers a set of handlers that will populate this msgdef. +void upb_msg_register_handlers(upb_msg *msg, upb_msgdef *md, + upb_handlers *handlers); #ifdef __cplusplus } /* extern "C" */ -- cgit v1.2.3