diff options
author | Joshua Haberman <joshua@reverberate.org> | 2010-07-09 19:25:39 -0700 |
---|---|---|
committer | Joshua Haberman <joshua@reverberate.org> | 2010-07-09 19:25:39 -0700 |
commit | be5ddd8a645eaa949a8d500718257fb7cb71cf44 (patch) | |
tree | 5858669f1f0d2324dc779f60c01b01f015bb2d74 /src | |
parent | 209dce5eb08709bfb5b21e19289b3814619ca6cc (diff) |
Tweaks to upb_src/upb_sink interfaces.
Diffstat (limited to 'src')
-rw-r--r-- | src/upb_atomic.h | 23 | ||||
-rw-r--r-- | src/upb_decoder.c | 171 | ||||
-rw-r--r-- | src/upb_decoder.h | 2 | ||||
-rw-r--r-- | src/upb_def.c | 28 | ||||
-rw-r--r-- | src/upb_def.h | 2 | ||||
-rw-r--r-- | src/upb_stream.h (renamed from src/upb_srcsink.h) | 22 | ||||
-rw-r--r-- | src/upb_stream_vtbl.h (renamed from src/upb_srcsink_vtbl.h) | 0 | ||||
-rw-r--r-- | src/upb_string.h | 29 |
8 files changed, 144 insertions, 133 deletions
diff --git a/src/upb_atomic.h b/src/upb_atomic.h index c2cb8ba..01fc8a2 100644 --- a/src/upb_atomic.h +++ b/src/upb_atomic.h @@ -29,7 +29,6 @@ extern "C" { #define INLINE static inline #endif -#define UPB_THREAD_UNSAFE #ifdef UPB_THREAD_UNSAFE /* Non-thread-safe implementations. ******************************************/ @@ -65,15 +64,6 @@ INLINE int upb_atomic_fetch_and_add(upb_atomic_refcount_t *a, int val) { return ret; } -typedef struct { -} upb_rwlock_t; - -INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; } - #endif /* Atomic refcount ************************************************************/ @@ -111,10 +101,6 @@ INLINE bool upb_atomic_read(upb_atomic_refcount_t *a) { return __sync_fetch_and_add(&a->v, 0); } -INLINE bool upb_atomic_write(upb_atomic_refcount_t *a, int val) { - a->v = val; -} - #elif defined(WIN32) /* Windows defines atomic increment/decrement. */ @@ -145,7 +131,14 @@ INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { #ifdef UPB_THREAD_UNSAFE -/* Already defined. */ +typedef struct { +} upb_rwlock_t; + +INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; } +INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; } #elif defined(UPB_USE_PTHREADS) diff --git a/src/upb_decoder.c b/src/upb_decoder.c index dd8ffcd..e3fdc49 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -68,9 +68,6 @@ struct upb_decoder { upb_strlen_t packed_end_offset; - // String we return for string values. We try to recycle it if possible. - upb_string *str; - // We keep a stack of messages we have recursed into. upb_decoder_frame *top, *limit, stack[UPB_MAX_NESTING]; }; @@ -93,18 +90,19 @@ static bool upb_decoder_nextbuf(upb_decoder *d) d->buf_bytesleft); } - // Recycle old buffer, pull new one. + // Recycle old buffer. if(d->buf) { - upb_bytesrc_recycle(d->bytesrc, d->buf); + d->buf = upb_string_tryrecycle(d->buf); d->buf_offset -= upb_string_len(d->buf); d->buf_stream_offset += upb_string_len(d->buf); } - d->buf = upb_bytesrc_get(d->bytesrc, UPB_MAX_ENCODED_SIZE); - // Handle cases arising from error or EOF. - if(d->buf) { + // Pull next buffer. + if(upb_bytesrc_get(d->bytesrc, d->buf, UPB_MAX_ENCODED_SIZE)) { d->buf_bytesleft += upb_string_len(d->buf); + return true; } else { + // Error or EOF. if(!upb_bytesrc_eof(d->bytesrc)) { // Error from bytesrc. upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); @@ -113,9 +111,11 @@ static bool upb_decoder_nextbuf(upb_decoder *d) // EOF from bytesrc and we don't have any residual bytes left. d->src.eof = true; return false; + } else { + // No more data left from the bytesrc, but we still have residual bytes. + return true; } } - return true; } static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes) @@ -369,85 +369,86 @@ again: bool upb_decoder_getval(upb_decoder *d, upb_valueptr val) { - upb_wire_type_t native_wire_type = upb_types[d->field->type].native_wire_type; - if(native_wire_type == UPB_WIRE_TYPE_DELIMITED) { - // A string, bytes, or a length-delimited submessage. The latter isn't - // technically a string, but can be gotten as one to perform lazy parsing. - d->str = upb_string_tryrecycle(d->str); - const upb_strlen_t total_len = d->delimited_len; - if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) { - // The entire string is inside our current buffer, so we can just - // return a substring of the buffer without copying. - upb_string_substr(d->str, d->buf, - upb_string_len(d->buf) - d->buf_bytesleft, - total_len); - upb_decoder_skipbytes(d, total_len); - *val.str = d->str; - } else { - // The string spans buffers, so we must copy from the current buffer, - // the next buffer (if we have one), and finally from the bytesrc. - uint8_t *str = (uint8_t*)upb_string_getrwbuf(d->str, total_len); - upb_strlen_t len = 0; - if(d->buf_offset < 0) { - // Residual bytes we need to copy from tmpbuf. - memcpy(str, d->tmpbuf, -d->buf_offset); - len += -d->buf_offset; - } - if(d->buf) { - upb_strlen_t to_copy = - UPB_MIN(total_len - len, upb_string_len(d->buf) - d->buf_offset); - memcpy(str + len, upb_string_getrobuf(d->buf) + d->buf_offset, to_copy); - } - upb_decoder_skipbytes(d, len); - upb_string_getrwbuf(d->str, len); // Cheap resize. - if(len < total_len) { - if(!upb_bytesrc_append(d->bytesrc, d->str, total_len - len)) { - upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); - return false; - } - d->buf_stream_offset += total_len - len; - } + switch(upb_types[d->field->type].native_wire_type) { + case UPB_WIRE_TYPE_VARINT: { + uint32_t low, high; + if(!upb_decoder_readv64(d, &low, &high)) return false; + uint64_t u64 = ((uint64_t)high << 32) | low; + if(d->field->type == UPB_TYPE(SINT64)) + *val.int64 = upb_zzdec_64(u64); + else + *val.uint64 = u64; + break; } + case UPB_WIRE_TYPE_32BIT_VARINT: { + uint32_t u32; + if(!upb_decoder_readv32(d, &u32)) return false; + if(d->field->type == UPB_TYPE(SINT32)) + *val.int32 = upb_zzdec_32(u32); + else + *val.uint32 = u32; + break; + } + case UPB_WIRE_TYPE_64BIT: + if(!upb_decoder_readf64(d, val.uint64)) return false; + break; + case UPB_WIRE_TYPE_32BIT: + if(!upb_decoder_readf32(d, val.uint32)) return false; + break; + default: + upb_seterr(&d->src.status, UPB_STATUS_ERROR, + "Attempted to call getval on a group."); + return false; + } + // For a packed field where we have not reached the end, we leave the field + // in the decoder so we will return it again without parsing a key. + if(d->wire_type != UPB_WIRE_TYPE_DELIMITED || + upb_decoder_offset(d) >= d->packed_end_offset) { d->field = NULL; + } + return true; +} + +bool upb_decoder_getstr(upb_decoder *d, upb_string *str) { + // A string, bytes, or a length-delimited submessage. The latter isn't + // technically a string, but can be gotten as one to perform lazy parsing. + const int32_t total_len = d->delimited_len; + if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) { + // The entire string is inside our current buffer, so we can just + // return a substring of the buffer without copying. + upb_string_substr(str, d->buf, + upb_string_len(d->buf) - d->buf_bytesleft, + total_len); + upb_decoder_skipbytes(d, total_len); } else { - switch(native_wire_type) { - case UPB_WIRE_TYPE_VARINT: { - uint32_t low, high; - if(!upb_decoder_readv64(d, &low, &high)) return false; - uint64_t u64 = ((uint64_t)high << 32) | low; - if(d->field->type == UPB_TYPE(SINT64)) - *val.int64 = upb_zzdec_64(u64); - else - *val.uint64 = u64; - break; - } - case UPB_WIRE_TYPE_32BIT_VARINT: { - uint32_t u32; - if(!upb_decoder_readv32(d, &u32)) return false; - if(d->field->type == UPB_TYPE(SINT32)) - *val.int32 = upb_zzdec_32(u32); - else - *val.uint32 = u32; - break; - } - case UPB_WIRE_TYPE_64BIT: - if(!upb_decoder_readf64(d, val.uint64)) return false; - break; - case UPB_WIRE_TYPE_32BIT: - if(!upb_decoder_readf32(d, val.uint32)) return false; - break; - default: - upb_seterr(&d->src.status, UPB_STATUS_ERROR, - "Attempted to call getval on a group."); - return false; + // The string spans buffers, so we must copy from the residual buffer + // (if any bytes are there), then the buffer, and finally from the bytesrc. + uint8_t *ptr = (uint8_t*)upb_string_getrwbuf( + str, UPB_MIN(total_len, d->buf_bytesleft)); + int32_t len = 0; + if(d->buf_offset < 0) { + // Residual bytes we need to copy from tmpbuf. + memcpy(ptr, d->tmpbuf, -d->buf_offset); + len += -d->buf_offset; } - // For a packed field where we have not reached the end, we leave the field - // in the decoder so we will return it again without parsing a key. - if(d->wire_type != UPB_WIRE_TYPE_DELIMITED || - upb_decoder_offset(d) >= d->packed_end_offset) { - d->field = NULL; + if(d->buf) { + // Bytes from the buffer. + memcpy(ptr + len, upb_string_getrobuf(d->buf) + d->buf_offset, + upb_string_len(str) - len); + } + upb_decoder_skipbytes(d, upb_string_len(str)); + if(len < total_len) { + // Bytes from the bytesrc. + if(!upb_bytesrc_append(d->bytesrc, str, total_len - len)) { + upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc)); + return false; + } + // Have to advance this since the buffering layer of the decoder will + // never see these bytes. + d->buf_stream_offset += total_len - len; } } + d->field = NULL; return true; } @@ -549,21 +550,19 @@ upb_decoder *upb_decoder_new(upb_msgdef *msgdef) d->toplevel_msgdef = msgdef; d->limit = &d->stack[UPB_MAX_NESTING]; d->buf = NULL; - d->str = upb_string_new(); upb_src_init(&d->src, &upb_decoder_src_vtbl); return d; } void upb_decoder_free(upb_decoder *d) { - upb_string_unref(d->str); - if(d->buf) upb_string_unref(d->buf); + upb_string_unref(d->buf); free(d); } void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) { - if(d->buf) upb_bytesrc_recycle(d->bytesrc, d->buf); + upb_string_unref(d->buf); d->top = d->stack; d->top->msgdef = d->toplevel_msgdef; // The top-level message is not delimited (we can keep receiving data for it diff --git a/src/upb_decoder.h b/src/upb_decoder.h index d40d9fc..dde61fc 100644 --- a/src/upb_decoder.h +++ b/src/upb_decoder.h @@ -19,7 +19,7 @@ #include <stdbool.h> #include <stdint.h> #include "upb_def.h" -#include "upb_srcsink.h" +#include "upb_stream.h" #ifdef __cplusplus extern "C" { diff --git a/src/upb_def.c b/src/upb_def.c index bb1f07a..bfab738 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -215,10 +215,11 @@ typedef struct _upb_unresolveddef { upb_string *name; } upb_unresolveddef; +// Is passed a ref on the string. static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { upb_unresolveddef *def = malloc(sizeof(*def)); upb_def_init(&def->base, UPB_DEF_UNRESOLVED); - def->name = upb_string_getref(str); + def->name = str; return def; } @@ -258,7 +259,8 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) CHECKSRC(upb_src_getint32(src, &number)); break; case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM: - CHECKSRC(upb_src_getstr(src, &name)); + name = upb_string_tryrecycle(name); + CHECKSRC(upb_src_getstr(src, name)); break; default: CHECKSRC(upb_src_skipval(src)); @@ -274,11 +276,15 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status) iton_ent iton_ent = {{number, 0}, name}; upb_strtable_insert(&e->ntoi, &ntoi_ent.e); upb_inttable_insert(&e->iton, &iton_ent.e); + // We don't unref "name" because we pass our ref to the iton entry of the + // table. strtables can ref their keys, but the inttable doesn't know that + // the value is a string. return true; src_err: upb_copyerr(status, upb_src_status(src)); err: + upb_string_unref(name); return false; } @@ -368,12 +374,12 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status) f->number = tmp; break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM: - CHECKSRC(upb_src_getstr(src, &f->name)); - f->name = upb_string_getref(f->name); + f->name = upb_string_tryrecycle(f->name); + CHECKSRC(upb_src_getstr(src, f->name)); break; case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: { - upb_string *str; - CHECKSRC(upb_src_getstr(src, &str)); + upb_string *str = upb_string_new(); + CHECKSRC(upb_src_getstr(src, str)); if(f->def) upb_def_unref(f->def); f->def = UPB_UPCAST(upb_unresolveddef_new(str)); f->owned = true; @@ -415,9 +421,8 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status) while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: - upb_string_unref(m->base.fqname); - CHECKSRC(upb_src_getstr(src, &m->base.fqname)); - m->base.fqname = upb_string_getref(m->base.fqname); + m->base.fqname = upb_string_tryrecycle(m->base.fqname); + CHECKSRC(upb_src_getstr(src, m->base.fqname)); break; case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM: CHECKSRC(upb_src_startmsg(src)); @@ -487,9 +492,8 @@ static bool upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status) while((f = upb_src_getdef(src)) != NULL) { switch(f->number) { case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM: - upb_string_unref(package); - CHECKSRC(upb_src_getstr(src, &package)); - package = upb_string_getref(package); + package = upb_string_tryrecycle(package); + CHECKSRC(upb_src_getstr(src, package)); break; case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM: CHECKSRC(upb_src_startmsg(src)); diff --git a/src/upb_def.h b/src/upb_def.h index b73b0f9..c18b424 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -27,7 +27,7 @@ #define UPB_DEF_H_ #include "upb_atomic.h" -#include "upb_srcsink.h" +#include "upb_stream.h" #include "upb_table.h" #ifdef __cplusplus diff --git a/src/upb_srcsink.h b/src/upb_stream.h index dc73613..e7b4074 100644 --- a/src/upb_srcsink.h +++ b/src/upb_stream.h @@ -1,8 +1,8 @@ /* * upb - a minimalist implementation of protocol buffers. * - * This file defines four general-purpose interfaces for pulling/pushing either - * protobuf data or bytes: + * This file defines four general-purpose streaming interfaces for protobuf + * data or bytes: * * - upb_src: pull interface for protobuf data. * - upb_sink: push interface for protobuf data. @@ -19,7 +19,7 @@ #ifndef UPB_SRCSINK_H #define UPB_SRCSINK_H -#include "upb_srcsink_vtbl.h" +#include "upb_stream_vtbl.h" #ifdef __cplusplus extern "C" { @@ -41,10 +41,10 @@ struct _upb_fielddef; // error or end-of-stream. struct _upb_fielddef *upb_src_getdef(upb_src *src); -// Retrieves and stores the next value in "val". For string types the caller -// does not own a ref to the returned type; you must ref it yourself if you -// want one. Returns false on error. +// Retrieves and stores the next value in "val". For string types "val" must +// be a newly-recycled string. Returns false on error. bool upb_src_getval(upb_src *src, upb_valueptr val); +bool upb_src_getstr(upb_src *src, upb_string *val); // Like upb_src_getval() but skips the value. bool upb_src_skipval(upb_src *src); @@ -72,7 +72,6 @@ bool upb_src_getuint32(upb_src *src, uint32_t *val); bool upb_src_getuint64(upb_src *src, uint64_t *val); bool upb_src_getfloat(upb_src *src, float *val); bool upb_src_getdouble(upb_src *src, double *val); -bool upb_src_getstr(upb_src *src, upb_string **val); /* upb_sink *******************************************************************/ @@ -93,14 +92,9 @@ upb_status *upb_sink_status(upb_sink *sink); /* upb_bytesrc ****************************************************************/ -// Returns the next string in the stream. NULL is returned on error or eof. +// Returns the next string in the stream. false is returned on error or eof. // The string must be at least "minlen" bytes long unless the stream is eof. -// -// A ref is passed to the caller, though the caller is encouraged to pass the -// ref back to the bytesrc with upb_bytesrc_recycle(). This can help reduce -// memory allocation/deallocation. -upb_string *upb_bytesrc_get(upb_bytesrc *src, upb_strlen_t minlen); -void upb_bytesrc_recycle(upb_bytesrc *src, upb_string *str); +bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen); // Appends the next "len" bytes in the stream in-place to "str". This should // be used when the caller needs to build a contiguous string of the existing diff --git a/src/upb_srcsink_vtbl.h b/src/upb_stream_vtbl.h index 0ec45d2..0ec45d2 100644 --- a/src/upb_srcsink_vtbl.h +++ b/src/upb_stream_vtbl.h diff --git a/src/upb_string.h b/src/upb_string.h index af1f8ce..770dba7 100644 --- a/src/upb_string.h +++ b/src/upb_string.h @@ -38,7 +38,7 @@ extern "C" { // the associated functions. Also, strings may *only* be allocated on the heap. struct _upb_string { char *ptr; - uint32_t len; + int32_t len; uint32_t size; upb_atomic_refcount_t refcount; union { @@ -53,12 +53,22 @@ struct _upb_string { // longer needed, it should be unref'd, never freed directly. upb_string *upb_string_new(); -// Releases a ref on the given string, which may free the memory. -void upb_string_unref(upb_string *str); +void _upb_string_free(upb_string *str); + +// Releases a ref on the given string, which may free the memory. "str" +// can be NULL, in which case this is a no-op. +INLINE void upb_string_unref(upb_string *str) { + if (str && upb_atomic_unref(&str->refcount)) _upb_string_free(str); +} // Returns a string with the same contents as "str". The caller owns a ref on // the returned string, which may or may not be the same object as "str. -upb_string *upb_string_getref(upb_string *str); +INLINE upb_string *upb_string_getref(upb_string *str) { + // If/when we support stack-allocated strings, this will have to allocate + // a new string if the given string is on the stack. + upb_atomic_ref(&str->refcount); + return str; +} // Returns the length of the string. INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } @@ -75,6 +85,17 @@ INLINE void upb_string_endread(upb_string *str) { (void)str; } // Attempts to recycle the string "str" so it may be reused and have different // data written to it. The returned string is either "str" if it could be // recycled or a newly created string if "str" has other references. +// +// As a special case, passing NULL will allocate a new string. This is +// convenient for the pattern: +// +// upb_string *str = NULL; +// while (x) { +// if (y) { +// str = upb_string_tryrecycle(str); +// upb_src_getstr(str); +// } +// } upb_string *upb_string_tryrecycle(upb_string *str); // The three options for setting the contents of a string. These may only be |