From 3affb319260263efc3cee502896d9f981186c7da Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 5 Feb 2011 18:53:32 -0800 Subject: Tons of work: we're close to passing test_vs_proto2 again. --- Makefile | 7 +- core/upb.h | 97 +----------------------- core/upb_def.c | 2 + core/upb_glue.c | 38 ++++++++++ core/upb_glue.h | 42 +++++++++++ core/upb_msg.c | 192 +++++++++++++++++++++++++++++++++++++++++++----- core/upb_msg.h | 133 +++++++++++++++++++++++++++++++-- core/upb_stream_vtbl.h | 15 ---- core/upb_string.c | 2 +- stream/upb_decoder.c | 42 ++--------- stream/upb_decoder.h | 38 +++++++++- stream/upb_strstream.c | 44 +++++------ stream/upb_strstream.h | 12 ++- tests/test_decoder.c | 9 ++- tests/test_vs_proto2.cc | 79 +++++++++++--------- 15 files changed, 505 insertions(+), 247 deletions(-) create mode 100644 core/upb_glue.c create mode 100644 core/upb_glue.h diff --git a/Makefile b/Makefile index bea6980..a96c3f7 100644 --- a/Makefile +++ b/Makefile @@ -72,6 +72,7 @@ STREAM= \ stream/upb_textprinter.c \ stream/upb_strstream.c \ core/upb_msg.c \ + core/upb_glue.c \ SRC=$(CORE) $(STREAM) @@ -121,9 +122,9 @@ TESTS=tests/test_string \ tests/test_table \ tests/test_def \ tests/test_stream \ - tests/test_decoder \ -# tests/t.test_vs_proto2.googlemessage1 \ -# tests/t.test_vs_proto2.googlemessage2 \ + tests/t.test_vs_proto2.googlemessage1 \ + tests/t.test_vs_proto2.googlemessage2 \ +# tests/test_decoder \ # tests/test.proto.pb tests: $(LIBUPB) $(TESTS) diff --git a/core/upb.h b/core/upb.h index 243c7bc..779f85a 100644 --- a/core/upb.h +++ b/core/upb.h @@ -204,97 +204,6 @@ INLINE upb_atomic_refcount_t *upb_value_getrefcount(upb_value val) { return val.val.refcount; } -// A pointer to a .proto value. The owner must have an out-of-band way of -// knowing the type, so it knows which union member to use. -typedef union { - double *_double; - float *_float; - int32_t *int32; - int64_t *int64; - uint8_t *uint8; - uint32_t *uint32; - uint64_t *uint64; - bool *_bool; - upb_string **str; - upb_msg **msg; - upb_array **arr; - void *_void; -} upb_valueptr; - -INLINE upb_valueptr upb_value_addrof(upb_value *val) { - upb_valueptr ptr = {&val->val._double}; - return ptr; -} - -// Reads or writes a upb_value from an address represented by a upb_value_ptr. -// We need to know the value type to perform this operation, because we need to -// know how much memory to copy (and for big-endian machines, we need to know -// where in the upb_value the data goes). -// -// For little endian-machines where we didn't mind overreading, we could make -// upb_value_read simply use memcpy(). -INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { - upb_value val; - -#define CASE(t, member_name) \ - case UPB_TYPE(t): val.val.member_name = *ptr.member_name; break; - - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - CASE(STRING, str) - CASE(BYTES, str) - CASE(MESSAGE, msg) - CASE(GROUP, msg) - default: break; - } - return val; - -#undef CASE -} - -INLINE void upb_value_write(upb_valueptr ptr, upb_value val, - upb_fieldtype_t ft) { -#define CASE(t, member_name) \ - case UPB_TYPE(t): *ptr.member_name = val.val.member_name; break; - - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - CASE(STRING, str) - CASE(BYTES, str) - CASE(MESSAGE, msg) - CASE(GROUP, msg) - default: break; - } - -#undef CASE -} - // Status codes used as a return value. Codes >0 are not fatal and can be // resumed. enum upb_status_code { @@ -317,10 +226,12 @@ enum upb_status_code { // TODO: consider adding error space and code, to let ie. errno be stored // as a proper code, or application-specific error codes. -typedef struct { +struct _upb_status { char code; upb_string *str; -} upb_status; +}; + +typedef struct _upb_status upb_status; #define UPB_STATUS_INIT {UPB_OK, NULL} #define UPB_ERRORMSG_MAXLEN 256 diff --git a/core/upb_def.c b/core/upb_def.c index 2eda89f..993d9e3 100644 --- a/core/upb_def.c +++ b/core/upb_def.c @@ -1019,6 +1019,7 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs, upb_symtab_ent *tmptab_e; for(tmptab_e = upb_strtable_begin(&tmptab); tmptab_e; tmptab_e = upb_strtable_next(&tmptab, &tmptab_e->e)) { + printf("Inserting def for " UPB_STRFMT "\n", UPB_STRARG(tmptab_e->def->fqname)); upb_symtab_ent *symtab_e = upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname); if(symtab_e) { @@ -1132,6 +1133,7 @@ void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status) if (upb_ok(status)) upb_symtab_add_defs(s, b.defs.defs, b.defs.len, false, status); upb_defbuilder_uninit(&b); + upb_handlers_uninit(&handlers); } diff --git a/core/upb_glue.c b/core/upb_glue.c new file mode 100644 index 0000000..7540a3a --- /dev/null +++ b/core/upb_glue.c @@ -0,0 +1,38 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_glue.h" +#include "upb_msg.h" +#include "upb_decoder.h" +#include "upb_strstream.h" + +void upb_strtomsg(upb_string *str, upb_msg *msg, upb_msgdef *md, + upb_status *status) { + upb_stringsrc strsrc; + upb_stringsrc_init(&strsrc); + upb_stringsrc_reset(&strsrc, str); + + upb_decoder d; + upb_decoder_init(&d, md); + upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc)); + upb_src *src = upb_decoder_src(&d); + + upb_msgpopulator p; + upb_msgpopulator_init(&p); + upb_msgpopulator_reset(&p, msg, md); + + upb_handlers h; + upb_handlers_init(&h); + upb_msgpopulator_register_handlers(&p, &h); + upb_src_sethandlers(src, &h); + + upb_src_run(src, status); + + upb_stringsrc_uninit(&strsrc); + upb_decoder_uninit(&d); + upb_msgpopulator_uninit(&p); + upb_handlers_uninit(&h); +} diff --git a/core/upb_glue.h b/core/upb_glue.h new file mode 100644 index 0000000..61111f5 --- /dev/null +++ b/core/upb_glue.h @@ -0,0 +1,42 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * upb's core components like upb_decoder and upb_msg are carefully designed to + * avoid depending on each other for maximum orthogonality. In other words, + * you can use a upb_decoder to decode into *any* kind of structure; upb_msg is + * just one such structure. You can use upb_decoder without having to link in + * upb_msg. + * + * However, for convenience we provide functions here for doing common + * operations like deserializing protobuf binary format into a upb_msg. The + * compromise is that this file drags in almost all of upb as a dependency, + * which could be undesirable if you're trying to use a trimmed-down build of + * upb. + * + * Copyright (c) 2011 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_GLUE_H +#define UPB_GLUE_H + +#ifdef __cplusplus +extern "C" { +#endif + +// Forward-declares so we don't have to include everything in this .h file. +// Clients should use the regular, typedef'd names (eg. upb_string). +struct _upb_string; +struct _upb_msg; +struct _upb_msgdef; +struct _upb_status; + +// Decodes the given string, which must be in protobuf binary format, to the +// given upb_msg with msgdef "md", storing the status of the operation in "s". +void upb_strtomsg(struct _upb_string *str, struct _upb_msg *msg, + struct _upb_msgdef *md, struct _upb_status *s); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/core/upb_msg.c b/core/upb_msg.c index e9f863d..1ad73bc 100644 --- a/core/upb_msg.c +++ b/core/upb_msg.c @@ -10,6 +10,18 @@ #include "upb_decoder.h" #include "upb_strstream.h" +static uint32_t upb_round_up_pow2(uint32_t v) { + // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; +} + static void upb_elem_free(upb_value v, upb_fielddef *f) { switch(f->type) { case UPB_TYPE(MESSAGE): @@ -47,6 +59,58 @@ static void upb_field_unref(upb_value v, upb_fielddef *f) { upb_field_free(v, f); } + +/* upb_array ******************************************************************/ + +upb_array *upb_array_new(void) { + upb_array *arr = malloc(sizeof(*arr)); + upb_atomic_refcount_init(&arr->refcount, 1); + arr->size = 0; + arr->len = 0; + arr->ptr = NULL; + return arr; +} + +void upb_array_recycle(upb_array **_arr, upb_fielddef *f) { + upb_array *arr = *_arr; + if(arr && upb_atomic_read(&arr->refcount) == 1) { + arr->len = 0; + } else { + upb_array_unref(arr, f); + *_arr = upb_array_new(); + } +} + +void _upb_array_free(upb_array *arr, upb_fielddef *f) { + if (upb_elem_ismm(f)) { + // Need to release refs on sub-objects. + upb_valuetype_t type = upb_elem_valuetype(f); + for (upb_arraylen_t i = 0; i < arr->size; i++) { + upb_valueptr p = _upb_array_getptr(arr, f, i); + upb_elem_unref(upb_value_read(p, type), f); + } + } + free(arr->ptr); + free(arr); +} + +void upb_array_resize(upb_array *arr, upb_fielddef *f, upb_arraylen_t len) { + size_t type_size = upb_types[f->type].size; + upb_arraylen_t old_size = arr->size; + if (old_size < len) { + // Need to resize. + size_t new_size = upb_round_up_pow2(len); + arr->ptr = realloc(arr->ptr, new_size * type_size); + arr->size = new_size; + memset(arr->ptr + (old_size * type_size), 0, + (new_size - old_size) * type_size); + } + arr->len = len; +} + + +/* upb_msg ********************************************************************/ + upb_msg *upb_msg_new(upb_msgdef *md) { upb_msg *msg = malloc(md->size); // Clear all set bits and cached pointers. @@ -67,35 +131,123 @@ void _upb_msg_free(upb_msg *msg, upb_msgdef *md) { free(msg); } +void upb_msg_recycle(upb_msg **_msg, upb_msgdef *msgdef) { + upb_msg *msg = *_msg; + if(msg && upb_atomic_read(&msg->refcount) == 1) { + upb_msg_clear(msg, msgdef); + } else { + upb_msg_unref(msg, msgdef); + *_msg = upb_msg_new(msgdef); + } +} + INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { msg->data[f->field_index/8] |= (1 << (f->field_index % 8)); } +static upb_valueptr upb_msg_getappendptr(upb_msg *msg, upb_fielddef *f) { + upb_valueptr p = _upb_msg_getptr(msg, f); + if (upb_isarray(f)) { + // Create/recycle/resize the array if necessary, and find a pointer to + // a newly-appended element. + if (!upb_msg_has(msg, f)) { + upb_array_recycle(p.arr, f); + upb_msg_sethas(msg, f); + } + assert(*p.arr != NULL); + upb_arraylen_t oldlen = upb_array_len(*p.arr); + upb_array_resize(*p.arr, f, oldlen + 1); + p = _upb_array_getptr(*p.arr, f, oldlen); + } + return p; +} -upb_array *upb_array_new(void) { - upb_array *arr = malloc(sizeof(*arr)); - upb_atomic_refcount_init(&arr->refcount, 1); - arr->size = 0; - arr->len = 0; - arr->elements._void = NULL; - return arr; +static void upb_msg_appendval(upb_msg *msg, upb_fielddef *f, upb_value val) { + upb_valueptr p = upb_msg_getappendptr(msg, f); + if (upb_isstring(f)) { + // We do: + // - upb_string_recycle(), upb_string_substr() instead of + // - upb_string_unref(), upb_string_getref() + // because we have a convenient place of caching these string objects for + // reuse, where as the upb_src who is sending us these strings may not. + // This saves the upb_src from allocating new upb_strings all the time to + // give us. + // + // If you were using this to copy one upb_msg to another this would + // allocate string objects whereas a upb_string_getref could have avoided + // those allocations completely; if this is an issue, we could make it an + // option of the upb_msgpopulator which behavior is desired. + upb_string *src = upb_value_getstr(val); + upb_string_recycle(p.str); + upb_string_substr(*p.str, src, 0, upb_string_len(src)); + } else { + upb_value_write(p, val, f->type); + } } -void _upb_array_free(upb_array *arr, upb_fielddef *f) { - if (upb_elem_ismm(f)) { - // Need to release refs on sub-objects. - upb_valuetype_t type = upb_elem_valuetype(f); - for (upb_arraylen_t i = 0; i < arr->size; i++) { - upb_valueptr p = _upb_array_getptr(arr, f, i); - upb_elem_unref(upb_value_read(p, type), f); - } +upb_msg *upb_msg_appendmsg(upb_msg *msg, upb_fielddef *f, upb_msgdef *msgdef) { + upb_valueptr p = upb_msg_getappendptr(msg, f); + if (upb_isarray(f) || !upb_msg_has(msg, f)) { + upb_msg_recycle(p.msg, msgdef); + upb_msg_clear(*p.msg, msgdef); + upb_msg_sethas(msg, f); } - if (arr->elements._void) free(arr->elements._void); - free(arr); + return *p.msg; } -void upb_msg_register_handlers(upb_msg *msg, upb_msgdef *md, - upb_handlers *handlers, bool merge) { - static upb_handlerset handlerset = { + +/* upb_msgpopulator ***********************************************************/ + +void upb_msgpopulator_init(upb_msgpopulator *p) { + upb_status_init(&p->status); +} + +void upb_msgpopulator_reset(upb_msgpopulator *p, upb_msg *m, upb_msgdef *md) { + p->top = p->stack; + p->limit = p->stack + sizeof(p->stack); + p->top->msg = m; + p->top->msgdef = md; +} + +void upb_msgpopulator_uninit(upb_msgpopulator *p) { + upb_status_uninit(&p->status); +} + +static upb_flow_t upb_msgpopulator_value(void *_p, upb_fielddef *f, upb_value val) { + upb_msgpopulator *p = _p; + upb_msg_appendval(p->top->msg, f, val); + return UPB_CONTINUE; +} + +static upb_flow_t upb_msgpopulator_startsubmsg(void *_p, upb_fielddef *f, + upb_handlers *delegate_to) { + upb_msgpopulator *p = _p; + (void)delegate_to; + upb_msg *oldmsg = p->top->msg; + if (++p->top == p->limit) { + upb_seterr(&p->status, UPB_ERROR, "Exceeded maximum nesting"); + return UPB_BREAK; } + upb_msgdef *msgdef = upb_downcast_msgdef(f->def); + p->top->msgdef = msgdef; + p->top->msg = upb_msg_appendmsg(oldmsg, f, msgdef); + return UPB_CONTINUE; +} + +static upb_flow_t upb_msgpopulator_endsubmsg(void *_p) { + upb_msgpopulator *p = _p; + --p->top; + return UPB_CONTINUE; +} + +void upb_msgpopulator_register_handlers(upb_msgpopulator *p, upb_handlers *h) { + static upb_handlerset handlerset = { + NULL, // startmsg + NULL, // endmsg + &upb_msgpopulator_value, + &upb_msgpopulator_startsubmsg, + &upb_msgpopulator_endsubmsg, + }; + upb_register_handlerset(h, &handlerset); + upb_set_handler_closure(h, p, &p->status); } diff --git a/core/upb_msg.h b/core/upb_msg.h index 0569039..8e04c95 100644 --- a/core/upb_msg.h +++ b/core/upb_msg.h @@ -23,21 +23,119 @@ extern "C" { #endif +// A pointer to a .proto value. The owner must have an out-of-band way of +// knowing the type, so it knows which union member to use. +typedef union { + double *_double; + float *_float; + int32_t *int32; + int64_t *int64; + uint8_t *uint8; + uint32_t *uint32; + uint64_t *uint64; + bool *_bool; + upb_string **str; + upb_msg **msg; + upb_array **arr; + void *_void; +} upb_valueptr; + +INLINE upb_valueptr upb_value_addrof(upb_value *val) { + upb_valueptr ptr = {&val->val._double}; + return ptr; +} + +// Reads or writes a upb_value from an address represented by a upb_value_ptr. +// We need to know the value type to perform this operation, because we need to +// know how much memory to copy (and for big-endian machines, we need to know +// where in the upb_value the data goes). +// +// For little endian-machines where we didn't mind overreading, we could make +// upb_value_read simply use memcpy(). +INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) { + upb_value val; + +#ifdef NDEBUG +#define CASE(t, member_name) \ + case UPB_TYPE(t): val.val.member_name = *ptr.member_name; break; +#else +#define CASE(t, member_name) \ + case UPB_TYPE(t): val.val.member_name = *ptr.member_name; val.type = ft; break; +#endif + + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + default: assert(false); + } + return val; + +#undef CASE +} + +INLINE void upb_value_write(upb_valueptr ptr, upb_value val, + upb_fieldtype_t ft) { + assert(val.type == ft); +#define CASE(t, member_name) \ + case UPB_TYPE(t): *ptr.member_name = val.val.member_name; break; + + switch(ft) { + CASE(DOUBLE, _double) + CASE(FLOAT, _float) + CASE(INT32, int32) + CASE(INT64, int64) + CASE(UINT32, uint32) + CASE(UINT64, uint64) + CASE(SINT32, int32) + CASE(SINT64, int64) + CASE(FIXED32, uint32) + CASE(FIXED64, uint64) + CASE(SFIXED32, int32) + CASE(SFIXED64, int64) + CASE(BOOL, _bool) + CASE(ENUM, int32) + CASE(STRING, str) + CASE(BYTES, str) + CASE(MESSAGE, msg) + CASE(GROUP, msg) + default: assert(false); + } + +#undef CASE +} + /* upb_array ******************************************************************/ typedef uint32_t upb_arraylen_t; struct _upb_array { upb_atomic_refcount_t refcount; + // "len" and "size" are measured in elements, not bytes. upb_arraylen_t len; upb_arraylen_t size; - upb_valueptr elements; + char *ptr; }; void _upb_array_free(upb_array *a, upb_fielddef *f); INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f, uint32_t elem) { upb_valueptr p; - p._void = &a->elements.uint8[elem * upb_types[f->type].size]; + p._void = &a->ptr[elem * upb_types[f->type].size]; return p; } @@ -47,10 +145,18 @@ INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) { if (upb_atomic_unref(&a->refcount)) _upb_array_free(a, f); } +void upb_array_recycle(upb_array **arr, upb_fielddef *f); + INLINE uint32_t upb_array_len(upb_array *a) { return a->len; } +INLINE upb_value upb_array_get(upb_array *arr, upb_fielddef *f, + upb_arraylen_t i) { + assert(i < upb_array_len(arr)); + return upb_value_read(_upb_array_getptr(arr, f, i), f->type); +} + /* upb_msg ********************************************************************/ struct _upb_msg { @@ -74,20 +180,37 @@ INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) { if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md); } +void upb_msg_recycle(upb_msg **msg, upb_msgdef *msgdef); + // Tests whether the given field is explicitly set, or whether it will return a // default. INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { return (msg->data[f->field_index/8] & (1 << (f->field_index % 8))) != 0; } +INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { + return upb_value_read(_upb_msg_getptr(msg, f), f->type); +} + // Unsets all field values back to their defaults. INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { memset(msg->data, 0, md->set_flags_bytes); } -// Registers a set of handlers that will populate this msgdef. -void upb_msg_register_handlers(upb_msg *msg, upb_msgdef *md, - upb_handlers *handlers); +typedef struct { + upb_msg *msg; + upb_msgdef *msgdef; +} upb_msgpopulator_frame; + +typedef struct { + upb_msgpopulator_frame stack[UPB_MAX_NESTING], *top, *limit; + upb_status status; +} upb_msgpopulator; + +void upb_msgpopulator_init(upb_msgpopulator *p); +void upb_msgpopulator_uninit(upb_msgpopulator *p); +void upb_msgpopulator_reset(upb_msgpopulator *p, upb_msg *m, upb_msgdef *md); +void upb_msgpopulator_register_handlers(upb_msgpopulator *p, upb_handlers *h); #ifdef __cplusplus } /* extern "C" */ diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h index a6990bc..da3eac8 100644 --- a/core/upb_stream_vtbl.h +++ b/core/upb_stream_vtbl.h @@ -62,14 +62,10 @@ typedef struct { struct _upb_bytesrc { upb_bytesrc_vtbl *vtbl; - upb_status status; - bool eof; }; struct _upb_bytesink { upb_bytesink_vtbl *vtbl; - upb_status status; - bool eof; }; struct _upb_src { @@ -78,14 +74,10 @@ struct _upb_src { INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) { s->vtbl = vtbl; - s->eof = false; - upb_status_init(&s->status); } INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) { s->vtbl = vtbl; - s->eof = false; - upb_status_init(&s->status); } INLINE void upb_src_init(upb_src *s, upb_src_vtbl *vtbl) { @@ -132,9 +124,6 @@ INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str, return true; } -INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } -INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } - // upb_bytesink INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf, @@ -146,10 +135,6 @@ INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, upb return sink->vtbl->putstr(sink, str, status); } -INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) { - return &sink->status; -} - INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, const char *fmt, ...) { va_list args; va_start(args, fmt); diff --git a/core/upb_string.c b/core/upb_string.c index c599728..bc5b772 100644 --- a/core/upb_string.c +++ b/core/upb_string.c @@ -56,7 +56,7 @@ static void upb_string_release(upb_string *str) { } void _upb_string_free(upb_string *str) { - if(str->cached_mem) free(str->cached_mem); + free(str->cached_mem); upb_string_release(str); free(str); } diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c index 3a279a1..78710b9 100644 --- a/stream/upb_decoder.c +++ b/stream/upb_decoder.c @@ -47,36 +47,6 @@ done: INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } -// The decoder keeps a stack with one entry per level of recursion. -// upb_decoder_frame is one frame of that stack. -typedef struct { - upb_msgdef *msgdef; - size_t end_offset; // For groups, 0. -} upb_decoder_frame; - -struct upb_decoder { - // Immutable state of the decoder. - upb_src src; - upb_dispatcher dispatcher; - upb_bytesrc *bytesrc; - upb_msgdef *toplevel_msgdef; - upb_decoder_frame stack[UPB_MAX_NESTING]; - - // Mutable state of the decoder. - - // Where we will store any errors that occur. - upb_status *status; - - // Stack entries store the offset where the submsg ends (for groups, 0). - upb_decoder_frame *top, *limit; - - // Current input buffer. - upb_string *buf; - - // The offset within the overall stream represented by the *beginning* of buf. - size_t buf_stream_offset; -}; - typedef struct { // Our current position in the data buffer. const char *ptr; @@ -279,8 +249,8 @@ void upb_decoder_run(upb_src *src, upb_status *status) { upb_string *str = NULL; // TODO: handle UPB_SKIPSUBMSG -#define CHECK_FLOW(expr) if ((expr) != UPB_CONTINUE) goto err -#define CHECK(expr) if (!expr) goto err; +#define CHECK_FLOW(expr) if ((expr) == UPB_BREAK) { assert(!upb_ok(status)); goto err; } +#define CHECK(expr) if (!expr) { assert(!upb_ok(status)); goto err; } CHECK_FLOW(upb_dispatch_startmsg(&d->dispatcher)); @@ -300,6 +270,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) { if (!upb_decode_tag(d, &state, &tag)) { if (status->code == UPB_EOF && d->top == d->stack) { // Normal end-of-file. + printf("Normal end-of-file!\n"); upb_clearerr(status); CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); upb_string_unref(str); @@ -397,18 +368,16 @@ void upb_decoder_sethandlers(upb_src *src, upb_handlers *handlers) { d->top->end_offset = 0; } -upb_decoder *upb_decoder_new(upb_msgdef *msgdef) { +void upb_decoder_init(upb_decoder *d, upb_msgdef *msgdef) { static upb_src_vtbl vtbl = { &upb_decoder_sethandlers, &upb_decoder_run, }; - upb_decoder *d = malloc(sizeof(*d)); upb_src_init(&d->src, &vtbl); upb_dispatcher_init(&d->dispatcher); d->toplevel_msgdef = msgdef; d->limit = &d->stack[UPB_MAX_NESTING]; d->buf = NULL; - return d; } void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { @@ -421,9 +390,8 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { d->buf = NULL; } -void upb_decoder_free(upb_decoder *d) { +void upb_decoder_uninit(upb_decoder *d) { upb_string_unref(d->buf); - free(d); } upb_src *upb_decoder_src(upb_decoder *d) { return &d->src; } diff --git a/stream/upb_decoder.h b/stream/upb_decoder.h index 6ba4d77..0684ea2 100644 --- a/stream/upb_decoder.h +++ b/stream/upb_decoder.h @@ -27,14 +27,44 @@ extern "C" { /* upb_decoder *****************************************************************/ +// The decoder keeps a stack with one entry per level of recursion. +// upb_decoder_frame is one frame of that stack. +typedef struct { + upb_msgdef *msgdef; + size_t end_offset; // For groups, 0. +} upb_decoder_frame; + +struct _upb_decoder { + // Immutable state of the decoder. + upb_src src; + upb_dispatcher dispatcher; + upb_bytesrc *bytesrc; + upb_msgdef *toplevel_msgdef; + upb_decoder_frame stack[UPB_MAX_NESTING]; + + // Mutable state of the decoder. + + // Where we will store any errors that occur. + upb_status *status; + + // Stack entries store the offset where the submsg ends (for groups, 0). + upb_decoder_frame *top, *limit; + + // Current input buffer. + upb_string *buf; + + // The offset within the overall stream represented by the *beginning* of buf. + size_t buf_stream_offset; +}; + // A upb_decoder decodes the binary protocol buffer format, writing the data it // decodes to a upb_sink. -struct upb_decoder; -typedef struct upb_decoder upb_decoder; +struct _upb_decoder; +typedef struct _upb_decoder upb_decoder; // Allocates and frees a upb_decoder, respectively. -upb_decoder *upb_decoder_new(upb_msgdef *md); -void upb_decoder_free(upb_decoder *d); +void upb_decoder_init(upb_decoder *d, upb_msgdef *md); +void upb_decoder_uninit(upb_decoder *d); // Resets the internal state of an already-allocated decoder. This puts it in a // state where it has not seen any data, and expects the next data to be from diff --git a/stream/upb_strstream.c b/stream/upb_strstream.c index d3fd4e0..8a230ae 100644 --- a/stream/upb_strstream.c +++ b/stream/upb_strstream.c @@ -9,25 +9,6 @@ #include #include "upb_string.h" -struct upb_stringsrc { - upb_bytesrc bytesrc; - upb_string *str; - upb_strlen_t offset; -}; - -void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str) { - if (str != s->str) { - if (s->str) upb_string_unref(s->str); - s->str = upb_string_getref(str); - } - s->bytesrc.eof = false; -} - -void upb_stringsrc_free(upb_stringsrc *s) { - if (s->str) upb_string_unref(s->str); - free(s); -} - static upb_strlen_t upb_stringsrc_read(upb_bytesrc *_src, void *buf, upb_strlen_t count, upb_status *status) { upb_stringsrc *src = (upb_stringsrc*)_src; @@ -45,27 +26,40 @@ static upb_strlen_t upb_stringsrc_read(upb_bytesrc *_src, void *buf, static bool upb_stringsrc_getstr(upb_bytesrc *_src, upb_string *str, upb_status *status) { upb_stringsrc *src = (upb_stringsrc*)_src; - if (src->offset == upb_string_len(str)) { + if (src->offset == upb_string_len(src->str)) { upb_seterr(status, UPB_EOF, ""); return false; } else { - upb_string_substr(str, src->str, 0, upb_string_len(src->str)); + upb_strlen_t len = upb_string_len(src->str) - src->offset; + upb_string_substr(str, src->str, src->offset, len); + src->offset += len; + assert(src->offset == upb_string_len(src->str)); return true; } } -upb_stringsrc *upb_stringsrc_new() { +void upb_stringsrc_init(upb_stringsrc *s) { static upb_bytesrc_vtbl bytesrc_vtbl = { upb_stringsrc_read, upb_stringsrc_getstr, }; - - upb_stringsrc *s = malloc(sizeof(*s)); s->str = NULL; upb_bytesrc_init(&s->bytesrc, &bytesrc_vtbl); - return s; } +void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str) { + if (str != s->str) { + upb_string_unref(s->str); + s->str = upb_string_getref(str); + } + s->offset = 0; +} + +void upb_stringsrc_uninit(upb_stringsrc *s) { + upb_string_unref(s->str); +} + + upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) { return &s->bytesrc; } diff --git a/stream/upb_strstream.h b/stream/upb_strstream.h index d01d21f..1a8792b 100644 --- a/stream/upb_strstream.h +++ b/stream/upb_strstream.h @@ -18,12 +18,16 @@ extern "C" { /* upb_stringsrc **************************************************************/ -struct upb_stringsrc; -typedef struct upb_stringsrc upb_stringsrc; +struct _upb_stringsrc { + upb_bytesrc bytesrc; + upb_string *str; + upb_strlen_t offset; +}; +typedef struct _upb_stringsrc upb_stringsrc; // Create/free a stringsrc. -upb_stringsrc *upb_stringsrc_new(); -void upb_stringsrc_free(upb_stringsrc *s); +void upb_stringsrc_init(upb_stringsrc *s); +void upb_stringsrc_uninit(upb_stringsrc *s); // Resets the stringsrc to a state where it will vend the given string. The // stringsrc will take a reference on the string, so the caller need not ensure diff --git a/tests/test_decoder.c b/tests/test_decoder.c index f48472d..714871a 100644 --- a/tests/test_decoder.c +++ b/tests/test_decoder.c @@ -13,13 +13,14 @@ int main() { upb_stdio_reset(in, stdin); upb_stdio *out = upb_stdio_new(); upb_stdio_reset(out, stdout); - upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds)); - upb_decoder_reset(d, upb_stdio_bytesrc(in)); + upb_decoder d; + upb_decoder_init(&d, upb_downcast_msgdef(fds)); + upb_decoder_reset(&d, upb_stdio_bytesrc(in)); upb_textprinter *p = upb_textprinter_new(); upb_handlers handlers; upb_handlers_init(&handlers); upb_textprinter_reset(p, &handlers, upb_stdio_bytesink(out), false); - upb_src *src = upb_decoder_src(d); + upb_src *src = upb_decoder_src(&d); upb_src_sethandlers(src, &handlers); upb_status status = UPB_STATUS_INIT; @@ -30,7 +31,7 @@ int main() { upb_status_uninit(&status); upb_stdio_free(in); upb_stdio_free(out); - upb_decoder_free(d); + upb_decoder_uninit(&d); upb_textprinter_free(p); upb_def_unref(fds); upb_symtab_unref(symtab); diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc index 9446b8f..bf0296c 100644 --- a/tests/test_vs_proto2.cc +++ b/tests/test_vs_proto2.cc @@ -4,9 +4,10 @@ #include #include #include -#include "upb_msg.h" -#include "upb_def.h" #include "upb_decoder.h" +#include "upb_def.h" +#include "upb_glue.h" +#include "upb_msg.h" #include "upb_strstream.h" int num_assertions = 0; @@ -26,7 +27,7 @@ void compare_arrays(const google::protobuf::Reflection *r, upb_msg *upb_msg, upb_fielddef *upb_f) { ASSERT(upb_msg_has(upb_msg, upb_f)); - upb_array *arr = upb_msg_get(upb_msg, upb_f).arr; + upb_array *arr = upb_value_getarr(upb_msg_get(upb_msg, upb_f)); ASSERT(upb_array_len(arr) == (upb_arraylen_t)r->FieldSize(proto2_msg, proto2_f)); for(upb_arraylen_t i = 0; i < upb_array_len(arr); i++) { upb_value v = upb_array_get(arr, upb_f, i); @@ -34,37 +35,38 @@ void compare_arrays(const google::protobuf::Reflection *r, default: ASSERT(false); case UPB_TYPE(DOUBLE): - ASSERT(r->GetRepeatedDouble(proto2_msg, proto2_f, i) == v._double); + ASSERT(r->GetRepeatedDouble(proto2_msg, proto2_f, i) == upb_value_getdouble(v)); break; case UPB_TYPE(FLOAT): - ASSERT(r->GetRepeatedFloat(proto2_msg, proto2_f, i) == v._float); + ASSERT(r->GetRepeatedFloat(proto2_msg, proto2_f, i) == upb_value_getfloat(v)); break; case UPB_TYPE(INT64): case UPB_TYPE(SINT64): case UPB_TYPE(SFIXED64): - ASSERT(r->GetRepeatedInt64(proto2_msg, proto2_f, i) == v.int64); + ASSERT(r->GetRepeatedInt64(proto2_msg, proto2_f, i) == upb_value_getint64(v)); break; case UPB_TYPE(UINT64): case UPB_TYPE(FIXED64): - ASSERT(r->GetRepeatedUInt64(proto2_msg, proto2_f, i) == v.uint64); + ASSERT(r->GetRepeatedUInt64(proto2_msg, proto2_f, i) == upb_value_getuint64(v)); break; case UPB_TYPE(SFIXED32): case UPB_TYPE(SINT32): case UPB_TYPE(INT32): case UPB_TYPE(ENUM): - ASSERT(r->GetRepeatedInt32(proto2_msg, proto2_f, i) == v.int32); + ASSERT(r->GetRepeatedInt32(proto2_msg, proto2_f, i) == upb_value_getint32(v)); break; case UPB_TYPE(FIXED32): case UPB_TYPE(UINT32): - ASSERT(r->GetRepeatedUInt32(proto2_msg, proto2_f, i) == v.uint32); + ASSERT(r->GetRepeatedUInt32(proto2_msg, proto2_f, i) == upb_value_getuint32(v)); break; case UPB_TYPE(BOOL): - ASSERT(r->GetRepeatedBool(proto2_msg, proto2_f, i) == v._bool); + ASSERT(r->GetRepeatedBool(proto2_msg, proto2_f, i) == upb_value_getbool(v)); break; case UPB_TYPE(STRING): case UPB_TYPE(BYTES): { std::string str = r->GetRepeatedString(proto2_msg, proto2_f, i); - std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str)); + upb_string *upbstr = upb_value_getstr(v); + std::string str2(upb_string_getrobuf(upbstr), upb_string_len(upbstr)); ASSERT(str == str2); break; } @@ -72,7 +74,7 @@ void compare_arrays(const google::protobuf::Reflection *r, case UPB_TYPE(MESSAGE): ASSERT(upb_dyncast_msgdef(upb_f->def) != NULL); compare(r->GetRepeatedMessage(proto2_msg, proto2_f, i), - v.msg, upb_downcast_msgdef(upb_f->def)); + upb_value_getmsg(v), upb_downcast_msgdef(upb_f->def)); } } } @@ -87,44 +89,45 @@ void compare_values(const google::protobuf::Reflection *r, default: ASSERT(false); case UPB_TYPE(DOUBLE): - ASSERT(r->GetDouble(proto2_msg, proto2_f) == v._double); + ASSERT(r->GetDouble(proto2_msg, proto2_f) == upb_value_getdouble(v)); break; case UPB_TYPE(FLOAT): - ASSERT(r->GetFloat(proto2_msg, proto2_f) == v._float); + ASSERT(r->GetFloat(proto2_msg, proto2_f) == upb_value_getfloat(v)); break; case UPB_TYPE(INT64): case UPB_TYPE(SINT64): case UPB_TYPE(SFIXED64): - ASSERT(r->GetInt64(proto2_msg, proto2_f) == v.int64); + ASSERT(r->GetInt64(proto2_msg, proto2_f) == upb_value_getint64(v)); break; case UPB_TYPE(UINT64): case UPB_TYPE(FIXED64): - ASSERT(r->GetUInt64(proto2_msg, proto2_f) == v.uint64); + ASSERT(r->GetUInt64(proto2_msg, proto2_f) == upb_value_getuint64(v)); break; case UPB_TYPE(SFIXED32): case UPB_TYPE(SINT32): case UPB_TYPE(INT32): case UPB_TYPE(ENUM): - ASSERT(r->GetInt32(proto2_msg, proto2_f) == v.int32); + ASSERT(r->GetInt32(proto2_msg, proto2_f) == upb_value_getint32(v)); break; case UPB_TYPE(FIXED32): case UPB_TYPE(UINT32): - ASSERT(r->GetUInt32(proto2_msg, proto2_f) == v.uint32); + ASSERT(r->GetUInt32(proto2_msg, proto2_f) == upb_value_getuint32(v)); break; case UPB_TYPE(BOOL): - ASSERT(r->GetBool(proto2_msg, proto2_f) == v._bool); + ASSERT(r->GetBool(proto2_msg, proto2_f) == upb_value_getbool(v)); break; case UPB_TYPE(STRING): case UPB_TYPE(BYTES): { std::string str = r->GetString(proto2_msg, proto2_f); - std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str)); + upb_string *upbstr = upb_value_getstr(v); + std::string str2(upb_string_getrobuf(upbstr), upb_string_len(upbstr)); ASSERT(str == str2); break; } case UPB_TYPE(GROUP): case UPB_TYPE(MESSAGE): compare(r->GetMessage(proto2_msg, proto2_f), - v.msg, upb_downcast_msgdef(upb_f->def)); + upb_value_getmsg(v), upb_downcast_msgdef(upb_f->def)); } } @@ -173,7 +176,7 @@ void parse_and_compare(MESSAGE_CIDENT *proto2_msg, // Parse to both proto2 and upb. ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_string_len(str))); upb_status status = UPB_STATUS_INIT; - upb_msg_decodestr(upb_msg, upb_md, str, &status); + upb_strtomsg(str, upb_msg, upb_md, &status); ASSERT(upb_ok(&status)); compare(*proto2_msg, upb_msg, upb_md); } @@ -205,24 +208,28 @@ int main(int argc, char *argv[]) upb_symtab_add_descriptorproto(symtab); upb_def *fds_msgdef = upb_symtab_lookup( symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet")); + assert(fds_msgdef); - upb_stringsrc *ssrc = upb_stringsrc_new(); - upb_stringsrc_reset(ssrc, fds); - upb_decoder *decoder = upb_decoder_new(upb_downcast_msgdef(fds_msgdef)); - upb_decoder_reset(decoder, upb_stringsrc_bytesrc(ssrc)); - upb_symtab_addfds(symtab, upb_decoder_src(decoder), &status); + upb_stringsrc ssrc; + upb_stringsrc_init(&ssrc); + upb_stringsrc_reset(&ssrc, fds); + upb_decoder decoder; + upb_decoder_init(&decoder, upb_downcast_msgdef(fds_msgdef)); + upb_decoder_reset(&decoder, upb_stringsrc_bytesrc(&ssrc)); + upb_symtab_addfds(symtab, upb_decoder_src(&decoder), &status); if(!upb_ok(&status)) { fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": "); upb_printerr(&status); return 1; } upb_string_unref(fds); - upb_decoder_free(decoder); - upb_stringsrc_free(ssrc); + upb_decoder_uninit(&decoder); + upb_stringsrc_uninit(&ssrc); upb_string *proto_name = upb_strdupc(MESSAGE_NAME); - upb_msgdef *def = upb_downcast_msgdef(upb_symtab_lookup(symtab, proto_name)); - if(!def) { + upb_def *def = upb_symtab_lookup(symtab, proto_name); + upb_msgdef *msgdef; + if(!def || !(msgdef = upb_dyncast_msgdef(def))) { fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n", UPB_STRARG(proto_name)); return 1; @@ -238,13 +245,13 @@ int main(int argc, char *argv[]) // Run twice to test proper object reuse. MESSAGE_CIDENT proto2_msg; - upb_msg *upb_msg = upb_msg_new(def); - parse_and_compare(&proto2_msg, upb_msg, def, str); - parse_and_compare(&proto2_msg, upb_msg, def, str); + upb_msg *upb_msg = upb_msg_new(msgdef); + parse_and_compare(&proto2_msg, upb_msg, msgdef, str); + parse_and_compare(&proto2_msg, upb_msg, msgdef, str); printf("All tests passed, %d assertions.\n", num_assertions); - upb_msg_unref(upb_msg, def); - upb_def_unref(UPB_UPCAST(def)); + upb_msg_unref(upb_msg, msgdef); + upb_def_unref(UPB_UPCAST(msgdef)); upb_string_unref(str); upb_symtab_unref(symtab); -- cgit v1.2.3