From be5ddd8a645eaa949a8d500718257fb7cb71cf44 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Jul 2010 19:25:39 -0700 Subject: Tweaks to upb_src/upb_sink interfaces. --- Makefile | 7 +- src/upb_atomic.h | 23 +++---- src/upb_decoder.c | 171 ++++++++++++++++++++++++------------------------- src/upb_decoder.h | 2 +- src/upb_def.c | 28 ++++---- src/upb_def.h | 2 +- src/upb_srcsink.h | 127 ------------------------------------ src/upb_srcsink_vtbl.h | 93 --------------------------- src/upb_stream.h | 121 ++++++++++++++++++++++++++++++++++ src/upb_stream_vtbl.h | 93 +++++++++++++++++++++++++++ src/upb_string.h | 29 +++++++-- tests/test_table.cc | 8 +-- 12 files changed, 358 insertions(+), 346 deletions(-) delete mode 100644 src/upb_srcsink.h delete mode 100644 src/upb_srcsink_vtbl.h create mode 100644 src/upb_stream.h create mode 100644 src/upb_stream_vtbl.h diff --git a/Makefile b/Makefile index dec18ec..1ba7400 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) all: $(ALL) clean: rm -rf $(LIBUPB) $(LIBUPB_PIC) - rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) + rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(call rwildcard,,*.gc*) rm -rf benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb* rm -rf tests/tests tests/t.* tests/test_table rm -rf descriptor/descriptor.pb @@ -46,7 +46,7 @@ clean: cd lang_ext/python && python setup.py clean --all # The core library (src/libupb.a) -SRC=src/upb.c src/upb_decoder.c src/upb_table.c src/upb_def.c \ +SRC=src/upb.c src/upb_decoder.c src/upb_table.c src/upb_def.c src/upb_string.c \ descriptor/descriptor.c # Parts of core that are yet to be converted. OTHERSRC=src/upb_encoder.c src/upb_text.c @@ -86,11 +86,12 @@ tests/test.proto.pb: tests/test.proto # TODO: replace with upbc protoc tests/test.proto -otests/test.proto.pb -tests: tests/tests \ +TESTS=tests/tests \ tests/test_table \ tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2 \ tests/test.proto.pb +$(TESTS): src/libupb.a #VALGRIND=valgrind --leak-check=full --error-exitcode=1 VALGRIND= diff --git a/src/upb_atomic.h b/src/upb_atomic.h index c2cb8ba..01fc8a2 100644 --- a/src/upb_atomic.h +++ b/src/upb_atomic.h @@ -29,7 +29,6 @@ extern "C" { #define INLINE static inline #endif -#define UPB_THREAD_UNSAFE #ifdef UPB_THREAD_UNSAFE /* Non-thread-safe implementations. ******************************************/ @@ -65,15 +64,6 @@ INLINE int upb_atomic_fetch_and_add(upb_atomic_refcount_t *a, int val) { return ret; } -typedef struct { -} upb_rwlock_t; - -INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; } - #endif /* Atomic refcount ************************************************************/ @@ -111,10 +101,6 @@ INLINE bool upb_atomic_read(upb_atomic_refcount_t *a) { return __sync_fetch_and_add(&a->v, 0); } -INLINE bool upb_atomic_write(upb_atomic_refcount_t *a, int val) { - a->v = val; -} - #elif defined(WIN32) /* Windows defines atomic increment/decrement. */ @@ -145,7 +131,14 @@ INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { #ifdef UPB_THREAD_UNSAFE -/* Already defined. */ +typedef struct { +} upb_rwlock_t; + +INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; } #elif defined(UPB_USE_PTHREADS) diff --git a/src/upb_decoder.c b/src/upb_decoder.c index dd8ffcd..e3fdc49 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -68,9 +68,6 @@ struct upb_decoder { upb_strlen_t packed_end_offset; - // String we return for string values. We try to recycle it if possible. - upb_string *str; - // We keep a stack of messages we have recursed into. upb_decoder_frame *top, *limit, stack[UPB_MAX_NESTING]; }; @@ -93,18 +90,19 @@ static bool upb_decoder_nextbuf(upb_decoder *d) d->buf_bytesleft); } - // Recycle old buffer, pull new one. + // Recycle old buffer. if(d->buf) { - upb_bytesrc_recycle(d->bytesrc, d->buf); + d->buf = upb_string_tryrecycle(d->buf); d->buf_offset -= upb_string_len(d->buf); d->buf_stream_offset += upb_string_len(d->buf); } - d->buf = upb_bytesrc_get(d->bytesrc, UPB_MAX_ENCODED_SIZE); - // Handle cases arising from error or EOF. - if(d->buf) { + // Pull next buffer. + if(upb_bytesrc_get(d->bytesrc, d->buf, UPB_MAX_ENCODED_SIZE)) { d->buf_bytesleft += upb_string_len(d->buf); + return true; } else { + // Error or EOF. if(!upb_bytesrc_eof(d->bytesrc)) { // Error from bytesrc. upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); @@ -113,9 +111,11 @@ static bool upb_decoder_nextbuf(upb_decoder *d) // EOF from bytesrc and we don't have any residual bytes left. d->src.eof = true; return false; + } else { + // No more data left from the bytesrc, but we still have residual bytes. + return true; } } - return true; } static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) @@ -369,85 +369,86 @@ again: bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) { - upb_wire_type_t native_wire_type = upb_types[d->field->type].native_wire_type; - if(native_wire_type == UPB_WIRE_TYPE_DELIMITED) { - // A string, bytes, or a length-delimited submessage. The latter isn't - // technically a string, but can be gotten as one to perform lazy parsing. - d->str = upb_string_tryrecycle(d->str); - const upb_strlen_t total_len = d->delimited_len; - if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) { - // The entire string is inside our current buffer, so we can just - // return a substring of the buffer without copying. - upb_string_substr(d->str, d->buf, - upb_string_len(d->buf) - d->buf_bytesleft, - total_len); - upb_decoder_skipbytes(d, total_len); - *val.str = d->str; - } else { - // The string spans buffers, so we must copy from the current buffer, - // the next buffer (if we have one), and finally from the bytesrc. - uint8_t *str = (uint8_t*)upb_string_getrwbuf(d->str, total_len); - upb_strlen_t len = 0; - if(d->buf_offset < 0) { - // Residual bytes we need to copy from tmpbuf. - memcpy(str, d->tmpbuf, -d->buf_offset); - len += -d->buf_offset; - } - if(d->buf) { - upb_strlen_t to_copy = - UPB_MIN(total_len - len, upb_string_len(d->buf) - d->buf_offset); - memcpy(str + len, upb_string_getrobuf(d->buf) + d->buf_offset, to_copy); - } - upb_decoder_skipbytes(d, len); - upb_string_getrwbuf(d->str, len); // Cheap resize. - if(len < total_len) { - if(!upb_bytesrc_append(d->bytesrc, d->str, total_len - len)) { - upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); - return false; - } - d->buf_stream_offset += total_len - len; - } + switch(upb_types[d->field->type].native_wire_type) { + case UPB_WIRE_TYPE_VARINT: { + uint32_t low, high; + if(!upb_decoder_readv64(d, &low, &high)) return false; + uint64_t u64 = ((uint64_t)high << 32) | low; + if(d->field->type == UPB_TYPE(SINT64)) + *val.int64 = upb_zzdec_64(u64); + else + *val.uint64 = u64; + break; } + case UPB_WIRE_TYPE_32BIT_VARINT: { + uint32_t u32; + if(!upb_decoder_readv32(d, &u32)) return false; + if(d->field->type == UPB_TYPE(SINT32)) + *val.int32 = upb_zzdec_32(u32); + else + *val.uint32 = u32; + break; + } + case UPB_WIRE_TYPE_64BIT: + if(!upb_decoder_readf64(d, val.uint64)) return false; + break; + case UPB_WIRE_TYPE_32BIT: + if(!upb_decoder_readf32(d, val.uint32)) return false; + break; + default: + upb_seterr(&d->src.status, UPB_STATUS_ERROR, + "Attempted to call getval on a group."); + return false; + } + // For a packed field where we have not reached the end, we leave the field + // in the decoder so we will return it again without parsing a key. + if(d->wire_type != UPB_WIRE_TYPE_DELIMITED || + upb_decoder_offset(d) >= d->packed_end_offset) { d->field = NULL; + } + return true; +} + +bool upb_decoder_getstr(upb_decoder *d, upb_string *str) { + // A string, bytes, or a length-delimited submessage. The latter isn't + // technically a string, but can be gotten as one to perform lazy parsing. + const int32_t total_len = d->delimited_len; + if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) { + // The entire string is inside our current buffer, so we can just + // return a substring of the buffer without copying. + upb_string_substr(str, d->buf, + upb_string_len(d->buf) - d->buf_bytesleft, + total_len); + upb_decoder_skipbytes(d, total_len); } else { - switch(native_wire_type) { - case UPB_WIRE_TYPE_VARINT: { - uint32_t low, high; - if(!upb_decoder_readv64(d, &low, &high)) return false; - uint64_t u64 = ((uint64_t)high << 32) | low; - if(d->field->type == UPB_TYPE(SINT64)) - *val.int64 = upb_zzdec_64(u64); - else - *val.uint64 = u64; - break; - } - case UPB_WIRE_TYPE_32BIT_VARINT: { - uint32_t u32; - if(!upb_decoder_readv32(d, &u32)) return false; - if(d->field->type == UPB_TYPE(SINT32)) - *val.int32 = upb_zzdec_32(u32); - else - *val.uint32 = u32; - break; - } - case UPB_WIRE_TYPE_64BIT: - if(!upb_decoder_readf64(d, val.uint64)) return false; - break; - case UPB_WIRE_TYPE_32BIT: - if(!upb_decoder_readf32(d, val.uint32)) return false; - break; - default: - upb_seterr(&d->src.status, UPB_STATUS_ERROR, - "Attempted to call getval on a group."); - return false; + // The string spans buffers, so we must copy from the residual buffer + // (if any bytes are there), then the buffer, and finally from the bytesrc. + uint8_t *ptr = (uint8_t*)upb_string_getrwbuf( + str, UPB_MIN(total_len, d->buf_bytesleft)); + int32_t len = 0; + if(d->buf_offset < 0) { + // Residual bytes we need to copy from tmpbuf. + memcpy(ptr, d->tmpbuf, -d->buf_offset); + len += -d->buf_offset; } - // For a packed field where we have not reached the end, we leave the field - // in the decoder so we will return it again without parsing a key. - if(d->wire_type != UPB_WIRE_TYPE_DELIMITED || - upb_decoder_offset(d) >= d->packed_end_offset) { - d->field = NULL; + if(d->buf) { + // Bytes from the buffer. + memcpy(ptr + len, upb_string_getrobuf(d->buf) + d->buf_offset, + upb_string_len(str) - len); + } + upb_decoder_skipbytes(d, upb_string_len(str)); + if(len < total_len) { + // Bytes from the bytesrc. + if(!upb_bytesrc_append(d->bytesrc, str, total_len - len)) { + upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); + return false; + } + // Have to advance this since the buffering layer of the decoder will + // never see these bytes. + d->buf_stream_offset += total_len - len; } } + d->field = NULL; return true; } @@ -549,21 +550,19 @@ upb_decoder *upb_decoder_new(upb_msgdef *msgdef) d->toplevel_msgdef = msgdef; d->limit = &d->stack[UPB_MAX_NESTING]; d->buf = NULL; - d->str = upb_string_new(); upb_src_init(&d->src, &upb_decoder_src_vtbl); return d; } void upb_decoder_free(upb_decoder *d) { - upb_string_unref(d->str); - if(d->buf) upb_string_unref(d->buf); + upb_string_unref(d->buf); free(d); } void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { - if(d->buf) upb_bytesrc_recycle(d->bytesrc, d->buf); + upb_string_unref(d->buf); d->top = d->stack; d->top->msgdef = d->toplevel_msgdef; // The top-level message is not delimited (we can keep receiving data for it diff --git a/src/upb_decoder.h b/src/upb_decoder.h index d40d9fc..dde61fc 100644 --- a/src/upb_decoder.h +++ b/src/upb_decoder.h @@ -19,7 +19,7 @@ #include #include #include "upb_def.h" -#include "upb_srcsink.h" +#include "upb_stream.h" #ifdef __cplusplus extern "C" { diff --git a/src/upb_def.c b/src/upb_def.c index bb1f07a..bfab738 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -215,10 +215,11 @@ typedef struct _upb_unresolveddef { upb_string *name; } upb_unresolveddef; +// Is passed a ref on the string. static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { upb_unresolveddef *def = malloc(sizeof(*def)); upb_def_init(&def->base, UPB_DEF_UNRESOLVED); - def->name = upb_string_getref(str); + def->name = str; return def; } @@ -258,7 +259,8 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) CHECKSRC(upb_src_getint32(src, &number)); break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: - CHECKSRC(upb_src_getstr(src, &name)); + name = upb_string_tryrecycle(name); + CHECKSRC(upb_src_getstr(src, name)); break; default: CHECKSRC(upb_src_skipval(src)); @@ -274,11 +276,15 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) iton_ent iton_ent = {{number, 0}, name}; upb_strtable_insert(&e->ntoi, &ntoi_ent.e); upb_inttable_insert(&e->iton, &iton_ent.e); + // We don't unref "name" because we pass our ref to the iton entry of the + // table. strtables can ref their keys, but the inttable doesn't know that + // the value is a string. return true; src_err: upb_copyerr(status, upb_src_status(src)); err: + upb_string_unref(name); return false; } @@ -368,12 +374,12 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) f->number = tmp; break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: - CHECKSRC(upb_src_getstr(src, &f->name)); - f->name = upb_string_getref(f->name); + f->name = upb_string_tryrecycle(f->name); + CHECKSRC(upb_src_getstr(src, f->name)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { - upb_string *str; - CHECKSRC(upb_src_getstr(src, &str)); + upb_string *str = upb_string_new(); + CHECKSRC(upb_src_getstr(src, str)); if(f->def) upb_def_unref(f->def); f->def = UPB_UPCAST(upb_unresolveddef_new(str)); f->owned = true; @@ -415,9 +421,8 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: - upb_string_unref(m->base.fqname); - CHECKSRC(upb_src_getstr(src, &m->base.fqname)); - m->base.fqname = upb_string_getref(m->base.fqname); + m->base.fqname = upb_string_tryrecycle(m->base.fqname); + CHECKSRC(upb_src_getstr(src, m->base.fqname)); break; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: CHECKSRC(upb_src_startmsg(src)); @@ -487,9 +492,8 @@ static bool upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: - upb_string_unref(package); - CHECKSRC(upb_src_getstr(src, &package)); - package = upb_string_getref(package); + package = upb_string_tryrecycle(package); + CHECKSRC(upb_src_getstr(src, package)); break; case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: CHECKSRC(upb_src_startmsg(src)); diff --git a/src/upb_def.h b/src/upb_def.h index b73b0f9..c18b424 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -27,7 +27,7 @@ #define UPB_DEF_H_ #include "upb_atomic.h" -#include "upb_srcsink.h" +#include "upb_stream.h" #include "upb_table.h" #ifdef __cplusplus diff --git a/src/upb_srcsink.h b/src/upb_srcsink.h deleted file mode 100644 index dc73613..0000000 --- a/src/upb_srcsink.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * This file defines four general-purpose interfaces for pulling/pushing either - * protobuf data or bytes: - * - * - upb_src: pull interface for protobuf data. - * - upb_sink: push interface for protobuf data. - * - upb_bytesrc: pull interface for bytes. - * - upb_bytesink: push interface for bytes. - * - * These interfaces are used as general-purpose glue in upb. For example, the - * decoder interface works by implementing a upb_src and calling a upb_bytesrc. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - * - */ - -#ifndef UPB_SRCSINK_H -#define UPB_SRCSINK_H - -#include "upb_srcsink_vtbl.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Forward-declare. We can't include upb_def.h; it would be circular. -struct _upb_fielddef; - -// Note! The "eof" flags work like feof() in C; they cannot report end-of-file -// until a read has failed due to eof. They cannot preemptively tell you that -// the next call will fail due to eof. Since these are the semantics that C -// and UNIX provide, we're stuck with them if we want to support eg. stdio. - -/* upb_src ********************************************************************/ - -// TODO: decide how to handle unknown fields. - -// Retrieves the fielddef for the next field in the stream. Returns NULL on -// error or end-of-stream. -struct _upb_fielddef *upb_src_getdef(upb_src *src); - -// Retrieves and stores the next value in "val". For string types the caller -// does not own a ref to the returned type; you must ref it yourself if you -// want one. Returns false on error. -bool upb_src_getval(upb_src *src, upb_valueptr val); - -// Like upb_src_getval() but skips the value. -bool upb_src_skipval(upb_src *src); - -// Descends into a submessage. May only be called after a def has been -// returned that indicates a submessage. -bool upb_src_startmsg(upb_src *src); - -// Stops reading a submessage. May be called before the stream is EOF, in -// which case the rest of the submessage is skipped. -bool upb_src_endmsg(upb_src *src); - -// Returns the current error/eof status for the stream. -INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; } -INLINE bool upb_src_eof(upb_src *src) { return src->eof; } - -// The following functions are equivalent to upb_src_getval(), but take -// pointers to specific types. In debug mode this may check that the type -// is compatible with the type being read. This check will *not* be performed -// in non-debug mode, and if you get the type wrong the behavior is undefined. -bool upb_src_getbool(upb_src *src, bool *val); -bool upb_src_getint32(upb_src *src, int32_t *val); -bool upb_src_getint64(upb_src *src, int64_t *val); -bool upb_src_getuint32(upb_src *src, uint32_t *val); -bool upb_src_getuint64(upb_src *src, uint64_t *val); -bool upb_src_getfloat(upb_src *src, float *val); -bool upb_src_getdouble(upb_src *src, double *val); -bool upb_src_getstr(upb_src *src, upb_string **val); - -/* upb_sink *******************************************************************/ - -// Puts the given fielddef into the stream. -bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); - -// Puts the given value into the stream. -bool upb_sink_putval(upb_sink *sink, upb_value val); - -// Starts a submessage. (needed? the def tells us we're starting a submsg.) -bool upb_sink_startmsg(upb_sink *sink); - -// Ends a submessage. -bool upb_sink_endmsg(upb_sink *sink); - -// Returns the current error status for the stream. -upb_status *upb_sink_status(upb_sink *sink); - -/* upb_bytesrc ****************************************************************/ - -// Returns the next string in the stream. NULL is returned on error or eof. -// The string must be at least "minlen" bytes long unless the stream is eof. -// -// A ref is passed to the caller, though the caller is encouraged to pass the -// ref back to the bytesrc with upb_bytesrc_recycle(). This can help reduce -// memory allocation/deallocation. -upb_string *upb_bytesrc_get(upb_bytesrc *src, upb_strlen_t minlen); -void upb_bytesrc_recycle(upb_bytesrc *src, upb_string *str); - -// Appends the next "len" bytes in the stream in-place to "str". This should -// be used when the caller needs to build a contiguous string of the existing -// data in "str" with more data. -bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); - -// Returns the current error status for the stream. -INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } -INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } - -/* upb_bytesink ***************************************************************/ - -// Puts the given string. Returns the number of bytes that were actually, -// consumed, which may be fewer than were in the string, or <0 on error. -int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); - -// Returns the current error status for the stream. -upb_status *upb_bytesink_status(upb_bytesink *sink); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_srcsink_vtbl.h b/src/upb_srcsink_vtbl.h deleted file mode 100644 index 0ec45d2..0000000 --- a/src/upb_srcsink_vtbl.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * vtable declarations for types that are implementing any of the src or sink - * interfaces. Only components that are implementing these interfaces need - * to worry about this file. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_SRCSINK_VTBL_H_ -#define UPB_SRCSINK_VTBL_H_ - -#include "upb.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct upb_src; -typedef struct upb_src upb_src; -struct upb_sink; -typedef struct upb_sink upb_sink; -struct upb_bytesrc; -typedef struct upb_bytesrc upb_bytesrc; -struct upb_bytesink; -typedef struct upb_bytesink upb_bytesink; - -// Typedefs for function pointers to all of the virtual functions. -typedef struct _upb_fielddef (*upb_src_getdef_fptr)(upb_src *src); -typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); -typedef bool (*upb_src_skipval_fptr)(upb_src *src); -typedef bool (*upb_src_startmsg_fptr)(upb_src *src); -typedef bool (*upb_src_endmsg_fptr)(upb_src *src); - -typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); -typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); -typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); -typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); - -typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src); -typedef void (*upb_bytesrc_recycle_fptr)(upb_bytesrc *src, upb_string *str); -typedef bool (*upb_bytesrc_append_fptr)( - upb_bytesrc *src, upb_string *str, upb_strlen_t len); - -typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); - -// Vtables for the above interfaces. -typedef struct { - upb_src_getdef_fptr getdef; - upb_src_getval_fptr getval; - upb_src_skipval_fptr skipval; - upb_src_startmsg_fptr startmsg; - upb_src_endmsg_fptr endmsg; -} upb_src_vtable; - -typedef struct { - upb_bytesrc_get_fptr get; - upb_bytesrc_append_fptr append; - upb_bytesrc_recycle_fptr recycle; -} upb_bytesrc_vtable; - -// "Base Class" definitions; components that implement these interfaces should -// contain one of these structures. - -struct upb_src { - upb_src_vtable *vtbl; - upb_status status; - bool eof; -#ifndef NDEBUG - int state; // For debug-mode checking of API usage. -#endif -}; - -struct upb_bytesrc { - upb_bytesrc_vtable *vtbl; - upb_status status; - bool eof; -}; - -INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { - s->vtbl = vtbl; - s->eof = false; -#ifndef DEBUG - // TODO: initialize debug-mode checking. -#endif -} - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_stream.h b/src/upb_stream.h new file mode 100644 index 0000000..e7b4074 --- /dev/null +++ b/src/upb_stream.h @@ -0,0 +1,121 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * This file defines four general-purpose streaming interfaces for protobuf + * data or bytes: + * + * - upb_src: pull interface for protobuf data. + * - upb_sink: push interface for protobuf data. + * - upb_bytesrc: pull interface for bytes. + * - upb_bytesink: push interface for bytes. + * + * These interfaces are used as general-purpose glue in upb. For example, the + * decoder interface works by implementing a upb_src and calling a upb_bytesrc. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + */ + +#ifndef UPB_SRCSINK_H +#define UPB_SRCSINK_H + +#include "upb_stream_vtbl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Forward-declare. We can't include upb_def.h; it would be circular. +struct _upb_fielddef; + +// Note! The "eof" flags work like feof() in C; they cannot report end-of-file +// until a read has failed due to eof. They cannot preemptively tell you that +// the next call will fail due to eof. Since these are the semantics that C +// and UNIX provide, we're stuck with them if we want to support eg. stdio. + +/* upb_src ********************************************************************/ + +// TODO: decide how to handle unknown fields. + +// Retrieves the fielddef for the next field in the stream. Returns NULL on +// error or end-of-stream. +struct _upb_fielddef *upb_src_getdef(upb_src *src); + +// Retrieves and stores the next value in "val". For string types "val" must +// be a newly-recycled string. Returns false on error. +bool upb_src_getval(upb_src *src, upb_valueptr val); +bool upb_src_getstr(upb_src *src, upb_string *val); + +// Like upb_src_getval() but skips the value. +bool upb_src_skipval(upb_src *src); + +// Descends into a submessage. May only be called after a def has been +// returned that indicates a submessage. +bool upb_src_startmsg(upb_src *src); + +// Stops reading a submessage. May be called before the stream is EOF, in +// which case the rest of the submessage is skipped. +bool upb_src_endmsg(upb_src *src); + +// Returns the current error/eof status for the stream. +INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; } +INLINE bool upb_src_eof(upb_src *src) { return src->eof; } + +// The following functions are equivalent to upb_src_getval(), but take +// pointers to specific types. In debug mode this may check that the type +// is compatible with the type being read. This check will *not* be performed +// in non-debug mode, and if you get the type wrong the behavior is undefined. +bool upb_src_getbool(upb_src *src, bool *val); +bool upb_src_getint32(upb_src *src, int32_t *val); +bool upb_src_getint64(upb_src *src, int64_t *val); +bool upb_src_getuint32(upb_src *src, uint32_t *val); +bool upb_src_getuint64(upb_src *src, uint64_t *val); +bool upb_src_getfloat(upb_src *src, float *val); +bool upb_src_getdouble(upb_src *src, double *val); + +/* upb_sink *******************************************************************/ + +// Puts the given fielddef into the stream. +bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def); + +// Puts the given value into the stream. +bool upb_sink_putval(upb_sink *sink, upb_value val); + +// Starts a submessage. (needed? the def tells us we're starting a submsg.) +bool upb_sink_startmsg(upb_sink *sink); + +// Ends a submessage. +bool upb_sink_endmsg(upb_sink *sink); + +// Returns the current error status for the stream. +upb_status *upb_sink_status(upb_sink *sink); + +/* upb_bytesrc ****************************************************************/ + +// Returns the next string in the stream. false is returned on error or eof. +// The string must be at least "minlen" bytes long unless the stream is eof. +bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); + +// Appends the next "len" bytes in the stream in-place to "str". This should +// be used when the caller needs to build a contiguous string of the existing +// data in "str" with more data. +bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); + +// Returns the current error status for the stream. +INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; } +INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; } + +/* upb_bytesink ***************************************************************/ + +// Puts the given string. Returns the number of bytes that were actually, +// consumed, which may be fewer than were in the string, or <0 on error. +int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str); + +// Returns the current error status for the stream. +upb_status *upb_bytesink_status(upb_bytesink *sink); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/src/upb_stream_vtbl.h b/src/upb_stream_vtbl.h new file mode 100644 index 0000000..0ec45d2 --- /dev/null +++ b/src/upb_stream_vtbl.h @@ -0,0 +1,93 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * vtable declarations for types that are implementing any of the src or sink + * interfaces. Only components that are implementing these interfaces need + * to worry about this file. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + */ + +#ifndef UPB_SRCSINK_VTBL_H_ +#define UPB_SRCSINK_VTBL_H_ + +#include "upb.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct upb_src; +typedef struct upb_src upb_src; +struct upb_sink; +typedef struct upb_sink upb_sink; +struct upb_bytesrc; +typedef struct upb_bytesrc upb_bytesrc; +struct upb_bytesink; +typedef struct upb_bytesink upb_bytesink; + +// Typedefs for function pointers to all of the virtual functions. +typedef struct _upb_fielddef (*upb_src_getdef_fptr)(upb_src *src); +typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val); +typedef bool (*upb_src_skipval_fptr)(upb_src *src); +typedef bool (*upb_src_startmsg_fptr)(upb_src *src); +typedef bool (*upb_src_endmsg_fptr)(upb_src *src); + +typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def); +typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val); +typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink); +typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink); + +typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src); +typedef void (*upb_bytesrc_recycle_fptr)(upb_bytesrc *src, upb_string *str); +typedef bool (*upb_bytesrc_append_fptr)( + upb_bytesrc *src, upb_string *str, upb_strlen_t len); + +typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str); + +// Vtables for the above interfaces. +typedef struct { + upb_src_getdef_fptr getdef; + upb_src_getval_fptr getval; + upb_src_skipval_fptr skipval; + upb_src_startmsg_fptr startmsg; + upb_src_endmsg_fptr endmsg; +} upb_src_vtable; + +typedef struct { + upb_bytesrc_get_fptr get; + upb_bytesrc_append_fptr append; + upb_bytesrc_recycle_fptr recycle; +} upb_bytesrc_vtable; + +// "Base Class" definitions; components that implement these interfaces should +// contain one of these structures. + +struct upb_src { + upb_src_vtable *vtbl; + upb_status status; + bool eof; +#ifndef NDEBUG + int state; // For debug-mode checking of API usage. +#endif +}; + +struct upb_bytesrc { + upb_bytesrc_vtable *vtbl; + upb_status status; + bool eof; +}; + +INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) { + s->vtbl = vtbl; + s->eof = false; +#ifndef DEBUG + // TODO: initialize debug-mode checking. +#endif +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/src/upb_string.h b/src/upb_string.h index af1f8ce..770dba7 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -38,7 +38,7 @@ extern "C" { // the associated functions. Also, strings may *only* be allocated on the heap. struct _upb_string { char *ptr; - uint32_t len; + int32_t len; uint32_t size; upb_atomic_refcount_t refcount; union { @@ -53,12 +53,22 @@ struct _upb_string { // longer needed, it should be unref'd, never freed directly. upb_string *upb_string_new(); -// Releases a ref on the given string, which may free the memory. -void upb_string_unref(upb_string *str); +void _upb_string_free(upb_string *str); + +// Releases a ref on the given string, which may free the memory. "str" +// can be NULL, in which case this is a no-op. +INLINE void upb_string_unref(upb_string *str) { + if (str && upb_atomic_unref(&str->refcount)) _upb_string_free(str); +} // Returns a string with the same contents as "str". The caller owns a ref on // the returned string, which may or may not be the same object as "str. -upb_string *upb_string_getref(upb_string *str); +INLINE upb_string *upb_string_getref(upb_string *str) { + // If/when we support stack-allocated strings, this will have to allocate + // a new string if the given string is on the stack. + upb_atomic_ref(&str->refcount); + return str; +} // Returns the length of the string. INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } @@ -75,6 +85,17 @@ INLINE void upb_string_endread(upb_string *str) { (void)str; } // Attempts to recycle the string "str" so it may be reused and have different // data written to it. The returned string is either "str" if it could be // recycled or a newly created string if "str" has other references. +// +// As a special case, passing NULL will allocate a new string. This is +// convenient for the pattern: +// +// upb_string *str = NULL; +// while (x) { +// if (y) { +// str = upb_string_tryrecycle(str); +// upb_src_getstr(str); +// } +// } upb_string *upb_string_tryrecycle(upb_string *str); // The three options for setting the contents of a string. These may only be diff --git a/tests/test_table.cc b/tests/test_table.cc index 47d806c..37e14a8 100644 --- a/tests/test_table.cc +++ b/tests/test_table.cc @@ -1,7 +1,7 @@ #undef NDEBUG /* ensure tests always assert. */ #include "upb_table.h" -#include "upb_data.h" +#include "upb_string.h" #include "test_util.h" #include #include @@ -45,7 +45,7 @@ void test_strtable(const vector& keys, uint32_t num_to_insert) all.insert(key); strtable_entry e; e.value = key[0]; - upb_strptr str = upb_strduplen(key.c_str(), key.size()); + upb_string *str = upb_strduplen(key.c_str(), key.size()); e.e.key = str; upb_strtable_insert(&table, &e.e); upb_string_unref(str); // The table still owns a ref. @@ -55,7 +55,7 @@ void test_strtable(const vector& keys, uint32_t num_to_insert) /* Test correctness. */ for(uint32_t i = 0; i < keys.size(); i++) { const string& key = keys[i]; - upb_strptr str = upb_strduplen(key.c_str(), key.size()); + upb_string *str = upb_strduplen(key.c_str(), key.size()); strtable_entry *e = (strtable_entry*)upb_strtable_lookup(&table, str); if(m.find(key) != m.end()) { /* Assume map implementation is correct. */ assert(e); @@ -71,7 +71,7 @@ void test_strtable(const vector& keys, uint32_t num_to_insert) strtable_entry *e; for(e = (strtable_entry*)upb_strtable_begin(&table); e; e = (strtable_entry*)upb_strtable_next(&table, &e->e)) { - string tmp(upb_string_getrobuf(e->e.key), upb_strlen(e->e.key)); + string tmp(upb_string_getrobuf(e->e.key), upb_string_len(e->e.key)); std::set::iterator i = all.find(tmp); assert(i != all.end()); all.erase(i); -- cgit v1.2.3