summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2010-07-09 19:25:39 -0700
committerJoshua Haberman <joshua@reverberate.org>2010-07-09 19:25:39 -0700
commitbe5ddd8a645eaa949a8d500718257fb7cb71cf44 (patch)
tree5858669f1f0d2324dc779f60c01b01f015bb2d74 /src
parent209dce5eb08709bfb5b21e19289b3814619ca6cc (diff)
Tweaks to upb_src/upb_sink interfaces.
Diffstat (limited to 'src')
-rw-r--r--src/upb_atomic.h23
-rw-r--r--src/upb_decoder.c171
-rw-r--r--src/upb_decoder.h2
-rw-r--r--src/upb_def.c28
-rw-r--r--src/upb_def.h2
-rw-r--r--src/upb_stream.h (renamed from src/upb_srcsink.h)22
-rw-r--r--src/upb_stream_vtbl.h (renamed from src/upb_srcsink_vtbl.h)0
-rw-r--r--src/upb_string.h29
8 files changed, 144 insertions, 133 deletions
diff --git a/src/upb_atomic.h b/src/upb_atomic.h
index c2cb8ba..01fc8a2 100644
--- a/src/upb_atomic.h
+++ b/src/upb_atomic.h
@@ -29,7 +29,6 @@ extern "C" {
#define INLINE static inline
#endif
-#define UPB_THREAD_UNSAFE
#ifdef UPB_THREAD_UNSAFE
/* Non-thread-safe implementations. ******************************************/
@@ -65,15 +64,6 @@ INLINE int upb_atomic_fetch_and_add(upb_atomic_refcount_t *a, int val) {
return ret;
}
-typedef struct {
-} upb_rwlock_t;
-
-INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; }
-
#endif
/* Atomic refcount ************************************************************/
@@ -111,10 +101,6 @@ INLINE bool upb_atomic_read(upb_atomic_refcount_t *a) {
return __sync_fetch_and_add(&a->v, 0);
}
-INLINE bool upb_atomic_write(upb_atomic_refcount_t *a, int val) {
- a->v = val;
-}
-
#elif defined(WIN32)
/* Windows defines atomic increment/decrement. */
@@ -145,7 +131,14 @@ INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) {
#ifdef UPB_THREAD_UNSAFE
-/* Already defined. */
+typedef struct {
+} upb_rwlock_t;
+
+INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; }
+INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; }
+INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; }
+INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; }
+INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; }
#elif defined(UPB_USE_PTHREADS)
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index dd8ffcd..e3fdc49 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -68,9 +68,6 @@ struct upb_decoder {
upb_strlen_t packed_end_offset;
- // String we return for string values. We try to recycle it if possible.
- upb_string *str;
-
// We keep a stack of messages we have recursed into.
upb_decoder_frame *top, *limit, stack[UPB_MAX_NESTING];
};
@@ -93,18 +90,19 @@ static bool upb_decoder_nextbuf(upb_decoder *d)
d->buf_bytesleft);
}
- // Recycle old buffer, pull new one.
+ // Recycle old buffer.
if(d->buf) {
- upb_bytesrc_recycle(d->bytesrc, d->buf);
+ d->buf = upb_string_tryrecycle(d->buf);
d->buf_offset -= upb_string_len(d->buf);
d->buf_stream_offset += upb_string_len(d->buf);
}
- d->buf = upb_bytesrc_get(d->bytesrc, UPB_MAX_ENCODED_SIZE);
- // Handle cases arising from error or EOF.
- if(d->buf) {
+ // Pull next buffer.
+ if(upb_bytesrc_get(d->bytesrc, d->buf, UPB_MAX_ENCODED_SIZE)) {
d->buf_bytesleft += upb_string_len(d->buf);
+ return true;
} else {
+ // Error or EOF.
if(!upb_bytesrc_eof(d->bytesrc)) {
// Error from bytesrc.
upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc));
@@ -113,9 +111,11 @@ static bool upb_decoder_nextbuf(upb_decoder *d)
// EOF from bytesrc and we don't have any residual bytes left.
d->src.eof = true;
return false;
+ } else {
+ // No more data left from the bytesrc, but we still have residual bytes.
+ return true;
}
}
- return true;
}
static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes)
@@ -369,85 +369,86 @@ again:
bool upb_decoder_getval(upb_decoder *d, upb_valueptr val)
{
- upb_wire_type_t native_wire_type = upb_types[d->field->type].native_wire_type;
- if(native_wire_type == UPB_WIRE_TYPE_DELIMITED) {
- // A string, bytes, or a length-delimited submessage. The latter isn't
- // technically a string, but can be gotten as one to perform lazy parsing.
- d->str = upb_string_tryrecycle(d->str);
- const upb_strlen_t total_len = d->delimited_len;
- if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) {
- // The entire string is inside our current buffer, so we can just
- // return a substring of the buffer without copying.
- upb_string_substr(d->str, d->buf,
- upb_string_len(d->buf) - d->buf_bytesleft,
- total_len);
- upb_decoder_skipbytes(d, total_len);
- *val.str = d->str;
- } else {
- // The string spans buffers, so we must copy from the current buffer,
- // the next buffer (if we have one), and finally from the bytesrc.
- uint8_t *str = (uint8_t*)upb_string_getrwbuf(d->str, total_len);
- upb_strlen_t len = 0;
- if(d->buf_offset < 0) {
- // Residual bytes we need to copy from tmpbuf.
- memcpy(str, d->tmpbuf, -d->buf_offset);
- len += -d->buf_offset;
- }
- if(d->buf) {
- upb_strlen_t to_copy =
- UPB_MIN(total_len - len, upb_string_len(d->buf) - d->buf_offset);
- memcpy(str + len, upb_string_getrobuf(d->buf) + d->buf_offset, to_copy);
- }
- upb_decoder_skipbytes(d, len);
- upb_string_getrwbuf(d->str, len); // Cheap resize.
- if(len < total_len) {
- if(!upb_bytesrc_append(d->bytesrc, d->str, total_len - len)) {
- upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc));
- return false;
- }
- d->buf_stream_offset += total_len - len;
- }
+ switch(upb_types[d->field->type].native_wire_type) {
+ case UPB_WIRE_TYPE_VARINT: {
+ uint32_t low, high;
+ if(!upb_decoder_readv64(d, &low, &high)) return false;
+ uint64_t u64 = ((uint64_t)high << 32) | low;
+ if(d->field->type == UPB_TYPE(SINT64))
+ *val.int64 = upb_zzdec_64(u64);
+ else
+ *val.uint64 = u64;
+ break;
}
+ case UPB_WIRE_TYPE_32BIT_VARINT: {
+ uint32_t u32;
+ if(!upb_decoder_readv32(d, &u32)) return false;
+ if(d->field->type == UPB_TYPE(SINT32))
+ *val.int32 = upb_zzdec_32(u32);
+ else
+ *val.uint32 = u32;
+ break;
+ }
+ case UPB_WIRE_TYPE_64BIT:
+ if(!upb_decoder_readf64(d, val.uint64)) return false;
+ break;
+ case UPB_WIRE_TYPE_32BIT:
+ if(!upb_decoder_readf32(d, val.uint32)) return false;
+ break;
+ default:
+ upb_seterr(&d->src.status, UPB_STATUS_ERROR,
+ "Attempted to call getval on a group.");
+ return false;
+ }
+ // For a packed field where we have not reached the end, we leave the field
+ // in the decoder so we will return it again without parsing a key.
+ if(d->wire_type != UPB_WIRE_TYPE_DELIMITED ||
+ upb_decoder_offset(d) >= d->packed_end_offset) {
d->field = NULL;
+ }
+ return true;
+}
+
+bool upb_decoder_getstr(upb_decoder *d, upb_string *str) {
+ // A string, bytes, or a length-delimited submessage. The latter isn't
+ // technically a string, but can be gotten as one to perform lazy parsing.
+ const int32_t total_len = d->delimited_len;
+ if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) {
+ // The entire string is inside our current buffer, so we can just
+ // return a substring of the buffer without copying.
+ upb_string_substr(str, d->buf,
+ upb_string_len(d->buf) - d->buf_bytesleft,
+ total_len);
+ upb_decoder_skipbytes(d, total_len);
} else {
- switch(native_wire_type) {
- case UPB_WIRE_TYPE_VARINT: {
- uint32_t low, high;
- if(!upb_decoder_readv64(d, &low, &high)) return false;
- uint64_t u64 = ((uint64_t)high << 32) | low;
- if(d->field->type == UPB_TYPE(SINT64))
- *val.int64 = upb_zzdec_64(u64);
- else
- *val.uint64 = u64;
- break;
- }
- case UPB_WIRE_TYPE_32BIT_VARINT: {
- uint32_t u32;
- if(!upb_decoder_readv32(d, &u32)) return false;
- if(d->field->type == UPB_TYPE(SINT32))
- *val.int32 = upb_zzdec_32(u32);
- else
- *val.uint32 = u32;
- break;
- }
- case UPB_WIRE_TYPE_64BIT:
- if(!upb_decoder_readf64(d, val.uint64)) return false;
- break;
- case UPB_WIRE_TYPE_32BIT:
- if(!upb_decoder_readf32(d, val.uint32)) return false;
- break;
- default:
- upb_seterr(&d->src.status, UPB_STATUS_ERROR,
- "Attempted to call getval on a group.");
- return false;
+ // The string spans buffers, so we must copy from the residual buffer
+ // (if any bytes are there), then the buffer, and finally from the bytesrc.
+ uint8_t *ptr = (uint8_t*)upb_string_getrwbuf(
+ str, UPB_MIN(total_len, d->buf_bytesleft));
+ int32_t len = 0;
+ if(d->buf_offset < 0) {
+ // Residual bytes we need to copy from tmpbuf.
+ memcpy(ptr, d->tmpbuf, -d->buf_offset);
+ len += -d->buf_offset;
}
- // For a packed field where we have not reached the end, we leave the field
- // in the decoder so we will return it again without parsing a key.
- if(d->wire_type != UPB_WIRE_TYPE_DELIMITED ||
- upb_decoder_offset(d) >= d->packed_end_offset) {
- d->field = NULL;
+ if(d->buf) {
+ // Bytes from the buffer.
+ memcpy(ptr + len, upb_string_getrobuf(d->buf) + d->buf_offset,
+ upb_string_len(str) - len);
+ }
+ upb_decoder_skipbytes(d, upb_string_len(str));
+ if(len < total_len) {
+ // Bytes from the bytesrc.
+ if(!upb_bytesrc_append(d->bytesrc, str, total_len - len)) {
+ upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc));
+ return false;
+ }
+ // Have to advance this since the buffering layer of the decoder will
+ // never see these bytes.
+ d->buf_stream_offset += total_len - len;
}
}
+ d->field = NULL;
return true;
}
@@ -549,21 +550,19 @@ upb_decoder *upb_decoder_new(upb_msgdef *msgdef)
d->toplevel_msgdef = msgdef;
d->limit = &d->stack[UPB_MAX_NESTING];
d->buf = NULL;
- d->str = upb_string_new();
upb_src_init(&d->src, &upb_decoder_src_vtbl);
return d;
}
void upb_decoder_free(upb_decoder *d)
{
- upb_string_unref(d->str);
- if(d->buf) upb_string_unref(d->buf);
+ upb_string_unref(d->buf);
free(d);
}
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc)
{
- if(d->buf) upb_bytesrc_recycle(d->bytesrc, d->buf);
+ upb_string_unref(d->buf);
d->top = d->stack;
d->top->msgdef = d->toplevel_msgdef;
// The top-level message is not delimited (we can keep receiving data for it
diff --git a/src/upb_decoder.h b/src/upb_decoder.h
index d40d9fc..dde61fc 100644
--- a/src/upb_decoder.h
+++ b/src/upb_decoder.h
@@ -19,7 +19,7 @@
#include <stdbool.h>
#include <stdint.h>
#include "upb_def.h"
-#include "upb_srcsink.h"
+#include "upb_stream.h"
#ifdef __cplusplus
extern "C" {
diff --git a/src/upb_def.c b/src/upb_def.c
index bb1f07a..bfab738 100644
--- a/src/upb_def.c
+++ b/src/upb_def.c
@@ -215,10 +215,11 @@ typedef struct _upb_unresolveddef {
upb_string *name;
} upb_unresolveddef;
+// Is passed a ref on the string.
static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) {
upb_unresolveddef *def = malloc(sizeof(*def));
upb_def_init(&def->base, UPB_DEF_UNRESOLVED);
- def->name = upb_string_getref(str);
+ def->name = str;
return def;
}
@@ -258,7 +259,8 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status)
CHECKSRC(upb_src_getint32(src, &number));
break;
case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM:
- CHECKSRC(upb_src_getstr(src, &name));
+ name = upb_string_tryrecycle(name);
+ CHECKSRC(upb_src_getstr(src, name));
break;
default:
CHECKSRC(upb_src_skipval(src));
@@ -274,11 +276,15 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status)
iton_ent iton_ent = {{number, 0}, name};
upb_strtable_insert(&e->ntoi, &ntoi_ent.e);
upb_inttable_insert(&e->iton, &iton_ent.e);
+ // We don't unref "name" because we pass our ref to the iton entry of the
+ // table. strtables can ref their keys, but the inttable doesn't know that
+ // the value is a string.
return true;
src_err:
upb_copyerr(status, upb_src_status(src));
err:
+ upb_string_unref(name);
return false;
}
@@ -368,12 +374,12 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status)
f->number = tmp;
break;
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM:
- CHECKSRC(upb_src_getstr(src, &f->name));
- f->name = upb_string_getref(f->name);
+ f->name = upb_string_tryrecycle(f->name);
+ CHECKSRC(upb_src_getstr(src, f->name));
break;
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: {
- upb_string *str;
- CHECKSRC(upb_src_getstr(src, &str));
+ upb_string *str = upb_string_new();
+ CHECKSRC(upb_src_getstr(src, str));
if(f->def) upb_def_unref(f->def);
f->def = UPB_UPCAST(upb_unresolveddef_new(str));
f->owned = true;
@@ -415,9 +421,8 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status)
while((f = upb_src_getdef(src)) != NULL) {
switch(f->number) {
case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM:
- upb_string_unref(m->base.fqname);
- CHECKSRC(upb_src_getstr(src, &m->base.fqname));
- m->base.fqname = upb_string_getref(m->base.fqname);
+ m->base.fqname = upb_string_tryrecycle(m->base.fqname);
+ CHECKSRC(upb_src_getstr(src, m->base.fqname));
break;
case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM:
CHECKSRC(upb_src_startmsg(src));
@@ -487,9 +492,8 @@ static bool upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status)
while((f = upb_src_getdef(src)) != NULL) {
switch(f->number) {
case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM:
- upb_string_unref(package);
- CHECKSRC(upb_src_getstr(src, &package));
- package = upb_string_getref(package);
+ package = upb_string_tryrecycle(package);
+ CHECKSRC(upb_src_getstr(src, package));
break;
case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM:
CHECKSRC(upb_src_startmsg(src));
diff --git a/src/upb_def.h b/src/upb_def.h
index b73b0f9..c18b424 100644
--- a/src/upb_def.h
+++ b/src/upb_def.h
@@ -27,7 +27,7 @@
#define UPB_DEF_H_
#include "upb_atomic.h"
-#include "upb_srcsink.h"
+#include "upb_stream.h"
#include "upb_table.h"
#ifdef __cplusplus
diff --git a/src/upb_srcsink.h b/src/upb_stream.h
index dc73613..e7b4074 100644
--- a/src/upb_srcsink.h
+++ b/src/upb_stream.h
@@ -1,8 +1,8 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
- * This file defines four general-purpose interfaces for pulling/pushing either
- * protobuf data or bytes:
+ * This file defines four general-purpose streaming interfaces for protobuf
+ * data or bytes:
*
* - upb_src: pull interface for protobuf data.
* - upb_sink: push interface for protobuf data.
@@ -19,7 +19,7 @@
#ifndef UPB_SRCSINK_H
#define UPB_SRCSINK_H
-#include "upb_srcsink_vtbl.h"
+#include "upb_stream_vtbl.h"
#ifdef __cplusplus
extern "C" {
@@ -41,10 +41,10 @@ struct _upb_fielddef;
// error or end-of-stream.
struct _upb_fielddef *upb_src_getdef(upb_src *src);
-// Retrieves and stores the next value in "val". For string types the caller
-// does not own a ref to the returned type; you must ref it yourself if you
-// want one. Returns false on error.
+// Retrieves and stores the next value in "val". For string types "val" must
+// be a newly-recycled string. Returns false on error.
bool upb_src_getval(upb_src *src, upb_valueptr val);
+bool upb_src_getstr(upb_src *src, upb_string *val);
// Like upb_src_getval() but skips the value.
bool upb_src_skipval(upb_src *src);
@@ -72,7 +72,6 @@ bool upb_src_getuint32(upb_src *src, uint32_t *val);
bool upb_src_getuint64(upb_src *src, uint64_t *val);
bool upb_src_getfloat(upb_src *src, float *val);
bool upb_src_getdouble(upb_src *src, double *val);
-bool upb_src_getstr(upb_src *src, upb_string **val);
/* upb_sink *******************************************************************/
@@ -93,14 +92,9 @@ upb_status *upb_sink_status(upb_sink *sink);
/* upb_bytesrc ****************************************************************/
-// Returns the next string in the stream. NULL is returned on error or eof.
+// Returns the next string in the stream. false is returned on error or eof.
// The string must be at least "minlen" bytes long unless the stream is eof.
-//
-// A ref is passed to the caller, though the caller is encouraged to pass the
-// ref back to the bytesrc with upb_bytesrc_recycle(). This can help reduce
-// memory allocation/deallocation.
-upb_string *upb_bytesrc_get(upb_bytesrc *src, upb_strlen_t minlen);
-void upb_bytesrc_recycle(upb_bytesrc *src, upb_string *str);
+bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen);
// Appends the next "len" bytes in the stream in-place to "str". This should
// be used when the caller needs to build a contiguous string of the existing
diff --git a/src/upb_srcsink_vtbl.h b/src/upb_stream_vtbl.h
index 0ec45d2..0ec45d2 100644
--- a/src/upb_srcsink_vtbl.h
+++ b/src/upb_stream_vtbl.h
diff --git a/src/upb_string.h b/src/upb_string.h
index af1f8ce..770dba7 100644
--- a/src/upb_string.h
+++ b/src/upb_string.h
@@ -38,7 +38,7 @@ extern "C" {
// the associated functions. Also, strings may *only* be allocated on the heap.
struct _upb_string {
char *ptr;
- uint32_t len;
+ int32_t len;
uint32_t size;
upb_atomic_refcount_t refcount;
union {
@@ -53,12 +53,22 @@ struct _upb_string {
// longer needed, it should be unref'd, never freed directly.
upb_string *upb_string_new();
-// Releases a ref on the given string, which may free the memory.
-void upb_string_unref(upb_string *str);
+void _upb_string_free(upb_string *str);
+
+// Releases a ref on the given string, which may free the memory. "str"
+// can be NULL, in which case this is a no-op.
+INLINE void upb_string_unref(upb_string *str) {
+ if (str && upb_atomic_unref(&str->refcount)) _upb_string_free(str);
+}
// Returns a string with the same contents as "str". The caller owns a ref on
// the returned string, which may or may not be the same object as "str.
-upb_string *upb_string_getref(upb_string *str);
+INLINE upb_string *upb_string_getref(upb_string *str) {
+ // If/when we support stack-allocated strings, this will have to allocate
+ // a new string if the given string is on the stack.
+ upb_atomic_ref(&str->refcount);
+ return str;
+}
// Returns the length of the string.
INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; }
@@ -75,6 +85,17 @@ INLINE void upb_string_endread(upb_string *str) { (void)str; }
// Attempts to recycle the string "str" so it may be reused and have different
// data written to it. The returned string is either "str" if it could be
// recycled or a newly created string if "str" has other references.
+//
+// As a special case, passing NULL will allocate a new string. This is
+// convenient for the pattern:
+//
+// upb_string *str = NULL;
+// while (x) {
+// if (y) {
+// str = upb_string_tryrecycle(str);
+// upb_src_getstr(str);
+// }
+// }
upb_string *upb_string_tryrecycle(upb_string *str);
// The three options for setting the contents of a string. These may only be
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback