summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoshua Haberman <jhaberman@gmail.com>2011-07-14 23:15:00 -0700
committerJoshua Haberman <jhaberman@gmail.com>2011-07-14 23:15:00 -0700
commit6a1f3a66939308668ab8dce0d195afec16e02af9 (patch)
tree8d1236c0d7269caa1ece95bfe584afe9b550c006 /src
parent559e23c796f973a65d05c76e211835b126ee8ac8 (diff)
Major refactoring: upb_string is gone in favor of upb_strref.
Diffstat (limited to 'src')
-rw-r--r--src/upb.c80
-rw-r--r--src/upb.h110
-rw-r--r--src/upb_bytestream.h213
-rw-r--r--src/upb_decoder.c276
-rw-r--r--src/upb_decoder.h75
-rw-r--r--src/upb_decoder_x86.dasc28
-rw-r--r--src/upb_def.c205
-rw-r--r--src/upb_def.h49
-rw-r--r--src/upb_descriptor.c188
-rw-r--r--src/upb_descriptor.h6
-rw-r--r--src/upb_glue.c49
-rw-r--r--src/upb_glue.h20
-rw-r--r--src/upb_handlers.c25
-rw-r--r--src/upb_handlers.h9
-rw-r--r--src/upb_msg.c53
-rw-r--r--src/upb_msg.h6
-rw-r--r--src/upb_stdio.c168
-rw-r--r--src/upb_stdio.h54
-rw-r--r--src/upb_string.c164
-rw-r--r--src/upb_string.h394
-rw-r--r--src/upb_strstream.c105
-rw-r--r--src/upb_strstream.h24
-rw-r--r--src/upb_table.c127
-rw-r--r--src/upb_table.h69
-rw-r--r--src/upb_textprinter.c44
-rw-r--r--src/upb_varint.h17
26 files changed, 1121 insertions, 1437 deletions
diff --git a/src/upb.c b/src/upb.c
index 82c7fc2..0f3ea18 100644
--- a/src/upb.c
+++ b/src/upb.c
@@ -5,19 +5,21 @@
* Author: Josh Haberman <jhaberman@gmail.com>
*/
+#include <errno.h>
#include <stdarg.h>
#include <stddef.h>
+#include <stdlib.h>
#include <string.h>
#include "descriptor_const.h"
#include "upb.h"
-#include "upb_string.h"
+#include "upb_bytestream.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
#define TYPE_INFO(wire_type, ctype, inmemory_type) \
{alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), #ctype},
const upb_type_info upb_types[] = {
- {0, 0, 0, 0, ""}, // There is no type 0.
+ TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, MESSAGE) // ENDGROUP (fake)
TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE) // DOUBLE
TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT) // FLOAT
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // INT64
@@ -42,39 +44,79 @@ const upb_type_info upb_types[] = {
#ifdef NDEBUG
upb_value UPB_NO_VALUE = {{0}};
#else
-upb_value UPB_NO_VALUE = {{0}, UPB_VALUETYPE_RAW};
+upb_value UPB_NO_VALUE = {{0}, -1};
#endif
-void upb_seterr(upb_status *status, enum upb_status_code code,
- const char *msg, ...) {
- status->code = code;
- upb_string_recycle(&status->str);
+void upb_status_init(upb_status *status) {
+ status->buf = NULL;
+ upb_status_clear(status);
+}
+
+void upb_status_uninit(upb_status *status) {
+ free(status->buf);
+}
+
+void upb_status_setf(upb_status *s, enum upb_status_code code,
+ const char *msg, ...) {
+ s->code = code;
va_list args;
va_start(args, msg);
- upb_string_vprintf(status->str, msg, args);
+ upb_vrprintf(&s->buf, &s->bufsize, 0, msg, args);
va_end(args);
+ s->str = s->buf;
}
-void upb_copyerr(upb_status *to, upb_status *from)
-{
+void upb_status_copy(upb_status *to, upb_status *from) {
to->code = from->code;
- if(from->str) to->str = upb_string_getref(from->str);
+ if (from->str) {
+ if (to->bufsize < from->bufsize) {
+ to->bufsize = from->bufsize;
+ to->buf = realloc(to->buf, to->bufsize);
+ to->str = to->buf;
+ }
+ memcpy(to->str, from->str, from->bufsize);
+ } else {
+ to->str = NULL;
+ }
}
-void upb_clearerr(upb_status *status) {
+void upb_status_clear(upb_status *status) {
status->code = UPB_OK;
- if (status->str) upb_string_recycle(&status->str);
+ status->str = NULL;
}
-void upb_printerr(upb_status *status) {
+void upb_status_print(upb_status *status, FILE *f) {
if(status->str) {
- fprintf(stderr, "code: %d, msg: " UPB_STRFMT "\n",
- status->code, UPB_STRARG(status->str));
+ fprintf(f, "code: %d, msg: %s\n", status->code, status->str);
} else {
- fprintf(stderr, "code: %d, no msg\n", status->code);
+ fprintf(f, "code: %d, no msg\n", status->code);
}
}
-void upb_status_uninit(upb_status *status) {
- upb_string_unref(status->str);
+void upb_status_fromerrno(upb_status *status) {
+ upb_status_setf(status, UPB_ERROR, "%s", strerror(errno));
+}
+
+int upb_vrprintf(char **buf, size_t *size, size_t ofs,
+ const char *fmt, va_list args) {
+ // Try once without reallocating. We have to va_copy because we might have
+ // to call vsnprintf again.
+ uint32_t len = *size - ofs;
+ va_list args_copy;
+ va_copy(args_copy, args);
+ uint32_t true_len = vsnprintf(*buf + ofs, len, fmt, args_copy);
+ va_end(args_copy);
+
+ // Resize to be the correct size.
+ if (true_len >= len) {
+ // Need to print again, because some characters were truncated. vsnprintf
+ // will not write the entire string unless you give it space to store the
+ // NULL terminator also.
+ while (*size < (ofs + true_len + 1)) *size = UPB_MAX(*size * 2, 2);
+ char *newbuf = realloc(*buf, *size);
+ if (!newbuf) return -1;
+ vsnprintf(newbuf + ofs, true_len + 1, fmt, args);
+ *buf = newbuf;
+ }
+ return true_len;
}
diff --git a/src/upb.h b/src/upb.h
index 59429f4..b15340e 100644
--- a/src/upb.h
+++ b/src/upb.h
@@ -30,9 +30,7 @@ extern "C" {
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
-INLINE void nop_printf(const char *fmt, ...) {
- (void)fmt;
-}
+INLINE void nop_printf(const char *fmt, ...) { (void)fmt; }
#ifdef NDEBUG
#define DEBUGPRINTF nop_printf
@@ -45,7 +43,6 @@ INLINE size_t upb_align_up(size_t val, size_t align) {
return val % align == 0 ? val : val + align - (val % align);
}
-
// The maximum that any submessages can be nested. Matches proto2's limit.
// At the moment this specifies the size of several statically-sized arrays
// and therefore setting it high will cause more memory to be used. Will
@@ -122,31 +119,16 @@ typedef struct {
extern const upb_type_info upb_types[];
-/* Polymorphic values of .proto types *****************************************/
+/* upb_value ******************************************************************/
-struct _upb_string;
-typedef struct _upb_string upb_string;
-struct _upb_array;
-typedef struct _upb_array upb_array;
-struct _upb_msg;
-typedef struct _upb_msg upb_msg;
-struct _upb_bytesrc;
-typedef struct _upb_bytesrc upb_bytesrc;
+struct _upb_strref;
struct _upb_fielddef;
-typedef struct _upb_fielddef upb_fielddef;
-typedef int32_t upb_strlen_t;
-#define UPB_STRLEN_MAX INT32_MAX
-
-// The type of a upb_value. This is like a upb_fieldtype_t, but adds the
-// constant UPB_VALUETYPE_ARRAY to represent an array.
-typedef uint8_t upb_valuetype_t;
-#define UPB_TYPE_ENDGROUP 19 // Need to increase if more real types are added!
-#define UPB_VALUETYPE_ARRAY 32
-#define UPB_VALUETYPE_BYTESRC 32
-#define UPB_VALUETYPE_RAW 33
-#define UPB_VALUETYPE_FIELDDEF 34
-#define UPB_VALUETYPE_PTR 35
+// Special constants for the upb_value.type field. These must not conflict
+// with any members of FieldDescriptorProto.Type.
+#define UPB_TYPE_ENDGROUP 0
+#define UPB_VALUETYPE_FIELDDEF 32
+#define UPB_VALUETYPE_PTR 33
// A single .proto value. The owner must have an out-of-band way of knowing
// the type, so that it knows which union member to use.
@@ -159,19 +141,15 @@ typedef struct {
int64_t int64;
uint32_t uint32;
bool _bool;
- upb_string *str;
- upb_bytesrc *bytesrc;
- upb_msg *msg;
- upb_array *arr;
- upb_atomic_t *refcount;
- upb_fielddef *fielddef;
+ struct _upb_strref *strref;
+ struct _upb_fielddef *fielddef;
void *_void;
} val;
+#ifndef NDEBUG
// In debug mode we carry the value type around also so we can check accesses
// to be sure the right member is being read.
-#ifndef NDEBUG
- upb_valuetype_t type;
+ char type;
#endif
} upb_value;
@@ -183,7 +161,7 @@ typedef struct {
#define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \
INLINE ctype upb_value_get ## name(upb_value val) { \
- assert(val.type == proto_type || val.type == UPB_VALUETYPE_RAW); \
+ assert(val.type == proto_type); \
return val.val.membername; \
} \
INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \
@@ -197,18 +175,14 @@ UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64));
UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32));
UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64));
UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL));
-UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); // Marked for destruction.
-UPB_VALUE_ACCESSORS(fielddef, fielddef, upb_fielddef*, UPB_VALUETYPE_FIELDDEF);
+UPB_VALUE_ACCESSORS(strref, strref, struct _upb_strref*, UPB_TYPE(STRING));
+UPB_VALUE_ACCESSORS(fielddef, fielddef, struct _upb_fielddef*, UPB_VALUETYPE_FIELDDEF);
UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_VALUETYPE_PTR);
extern upb_value UPB_NO_VALUE;
-INLINE upb_atomic_t *upb_value_getrefcount(upb_value val) {
- assert(val.type == UPB_TYPE(MESSAGE) ||
- val.type == UPB_TYPE(STRING) ||
- val.type == UPB_VALUETYPE_ARRAY);
- return val.val.refcount;
-}
+
+/* upb_status *****************************************************************/
// Status codes used as a return value. Codes >0 are not fatal and can be
// resumed.
@@ -224,42 +198,38 @@ enum upb_status_code {
// An unrecoverable error occurred.
UPB_ERROR = -1,
-
- // A recoverable error occurred (for example, data of the wrong type was
- // encountered which we can skip over).
- // UPB_STATUS_RECOVERABLE_ERROR = -2
};
// TODO: consider adding error space and code, to let ie. errno be stored
// as a proper code, or application-specific error codes.
-struct _upb_status {
+typedef struct {
char code;
- upb_string *str;
-};
-
-typedef struct _upb_status upb_status;
-
-#define UPB_STATUS_INIT {UPB_OK, NULL}
-#define UPB_ERRORMSG_MAXLEN 256
+ char *str; // NULL when no message is present. NULL-terminated.
+ char *buf; // Owned by the status.
+ size_t bufsize;
+} upb_status;
-INLINE bool upb_ok(upb_status *status) {
- return status->code == UPB_OK;
-}
-
-INLINE void upb_status_init(upb_status *status) {
- status->code = UPB_OK;
- status->str = NULL;
-}
+#define UPB_STATUS_INIT {UPB_OK, NULL, NULL, 0}
+void upb_status_init(upb_status *status);
void upb_status_uninit(upb_status *status);
-// Caller owns a ref on the returned string.
-upb_string *upb_status_tostring(upb_status *status);
-void upb_printerr(upb_status *status);
-void upb_clearerr(upb_status *status);
-void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg,
- ...);
-void upb_copyerr(upb_status *to, upb_status *from);
+INLINE bool upb_ok(upb_status *status) { return status->code == UPB_OK; }
+INLINE bool upb_iseof(upb_status *status) { return status->code == UPB_EOF; }
+
+void upb_status_fromerrno(upb_status *status);
+void upb_status_print(upb_status *status, FILE *f);
+void upb_status_clear(upb_status *status);
+void upb_status_setf(upb_status *status, enum upb_status_code code,
+ const char *fmt, ...);
+void upb_status_copy(upb_status *to, upb_status *from);
+
+// Like vaprintf, but uses *buf (which can be NULL) as a starting point and
+// reallocates it only if the new value will not fit. "size" is updated to
+// reflect the allocated size of the buffer. Returns false on memory alloc
+// failure.
+int upb_vrprintf(char **buf, size_t *size, size_t ofs,
+ const char *fmt, va_list args);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/src/upb_bytestream.h b/src/upb_bytestream.h
index e4b51fd..836abb0 100644
--- a/src/upb_bytestream.h
+++ b/src/upb_bytestream.h
@@ -1,120 +1,195 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
- * Copyright (c) 2010-2011 Google Inc. See LICENSE for details.
+ * Copyright (c) 2011 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
- * Defines the interfaces upb_bytesrc and upb_bytesink, which are abstractions
- * of read()/write() with useful buffering/sharing semantics.
+ * This file contains upb_bytesrc and upb_bytesink, which are abstractions of
+ * stdio (fread()/fwrite()/etc) that provide useful buffering/sharing
+ * semantics. They are virtual base classes so concrete implementations
+ * can get the data from a fd, a string, a cord, etc.
+ *
+ * Byte streams are NOT thread-safe! (Like f{read,write}_unlocked())
*/
#ifndef UPB_BYTESTREAM_H
#define UPB_BYTESTREAM_H
#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
#include "upb.h"
#ifdef __cplusplus
extern "C" {
#endif
-/* upb_bytesrc ****************************************************************/
-// upb_bytesrc is a pull interface for streams of bytes, basically an
-// abstraction of read()/fread(), but it avoids copies where possible.
+/* upb_bytesrc ****************************************************************/
-typedef upb_strlen_t (*upb_bytesrc_read_fptr)(
- upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status);
-typedef bool (*upb_bytesrc_getstr_fptr)(
- upb_bytesrc *src, upb_string *str, upb_status *status);
+// A upb_bytesrc allows the consumer of a stream of bytes to obtain buffers as
+// they become available, and to preserve some trailing amount of data.
+typedef size_t upb_bytesrc_fetch_func(void*, uint64_t, upb_status*);
+typedef void upb_bytesrc_read_func(void*, uint64_t, size_t, char*);
+typedef const char *upb_bytesrc_getptr_func(void*, uint64_t, size_t*);
+typedef void upb_bytesrc_refregion_func(void*, uint64_t, size_t);
+typedef void upb_bytesrc_ref_func(void*);
+typedef struct _upb_bytesrc_vtbl {
+ upb_bytesrc_fetch_func *fetch;
+ upb_bytesrc_read_func *read;
+ upb_bytesrc_getptr_func *getptr;
+ upb_bytesrc_refregion_func *refregion;
+ upb_bytesrc_refregion_func *unrefregion;
+ upb_bytesrc_ref_func *ref;
+ upb_bytesrc_ref_func *unref;
+} upb_bytesrc_vtbl;
typedef struct {
- upb_bytesrc_read_fptr read;
- upb_bytesrc_getstr_fptr getstr;
-} upb_bytesrc_vtbl;
+ upb_bytesrc_vtbl *vtbl;
+} upb_bytesrc;
-struct _upb_bytesrc {
- upb_bytesrc_vtbl *vtbl;
-};
+INLINE void upb_bytesrc_init(upb_bytesrc *src, upb_bytesrc_vtbl *vtbl) {
+ src->vtbl = vtbl;
+}
-INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) {
- s->vtbl = vtbl;
+// Fetches at least minlen bytes starting at ofs, returning the actual number
+// of bytes fetched (or 0 on error: see "s" for details). Gives caller a ref
+// on the fetched region. It is safe to re-fetch existing regions but only if
+// they are ref'd. "ofs" may not greater than the end of the region that was
+// previously fetched.
+INLINE size_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs, upb_status *s) {
+ return src->vtbl->fetch(src, ofs, s);
}
-// Reads up to "count" bytes into "buf", returning the total number of bytes
-// read. If 0, indicates error and puts details in "status".
-INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf,
- upb_strlen_t count, upb_status *status) {
- return src->vtbl->read(src, buf, count, status);
+// Copies "len" bytes of data from offset src_ofs to "dst", which must be at
+// least "len" bytes long. The caller must own a ref on the given region.
+INLINE void upb_bytesrc_read(upb_bytesrc *src, uint64_t src_ofs, size_t len,
+ char *dst) {
+ src->vtbl->read(src, src_ofs, len, dst);
}
-// Like upb_bytesrc_read(), but modifies "str" in-place. Caller must ensure
-// that "str" is created or just recycled. Returns "false" if no data was
-// returned, either due to error or EOF (check status for details).
+// Returns a pointer to the bytesrc's internal buffer, returning how much data
+// was actually returned (which may be less than "len" if the given region is
+// not contiguous). The caller must own refs on the entire region from [ofs,
+// ofs+len]. The returned buffer is valid for as long as the region remains
+// ref'd.
//
-// In comparison to upb_bytesrc_read(), this call can possibly alias existing
-// string data (which avoids a copy). On the other hand, if the data was *not*
-// already in an existing string, this copies it into a upb_string, and if the
-// data needs to be put in a specific range of memory (because eg. you need to
-// put it into a different kind of string object) then upb_bytesrc_get() could
-// save you a copy.
-INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str,
- upb_status *status) {
- return src->vtbl->getstr(src, str, status);
+// TODO: is "len" really required here?
+INLINE const char *upb_bytesrc_getptr(upb_bytesrc *src, uint64_t ofs,
+ size_t *len) {
+ return src->vtbl->getptr(src, ofs, len);
+}
+
+// Gives the caller a ref on the given region. The caller must know that the
+// given region is already ref'd.
+INLINE void upb_bytesrc_refregion(upb_bytesrc *src, uint64_t ofs, size_t len) {
+ src->vtbl->refregion(src, ofs, len);
+}
+
+// Releases a ref on the given region, which the caller must have previously
+// ref'd.
+INLINE void upb_bytesrc_unrefregion(upb_bytesrc *src, uint64_t ofs, size_t len) {
+ src->vtbl->unrefregion(src, ofs, len);
+}
+
+// Attempts to ref the bytesrc itself, returning false if this bytesrc is
+// not ref-able.
+INLINE bool upb_bytesrc_tryref(upb_bytesrc *src) {
+ if (src->vtbl->ref) {
+ src->vtbl->ref(src);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+// Unref's the bytesrc itself. May only be called when upb_bytesrc_tryref()
+// has previously returned true.
+INLINE void upb_bytesrc_unref(upb_bytesrc *src) {
+ assert(src->vtbl->unref);
+ src->vtbl->unref(src);
+}
+
+/* upb_strref *****************************************************************/
+
+// The structure we pass for a string.
+typedef struct _upb_strref {
+ // Pointer to the string data. NULL if the string spans multiple input
+ // buffers (in which case upb_bytesrc_getptr() must be called to obtain
+ // the actual pointers).
+ const char *ptr;
+
+ // Bytesrc from which this string data comes. This is only guaranteed to be
+ // alive from inside the callback; however if the handler knows more about
+ // its type and how to prolong its life, it may do so.
+ upb_bytesrc *bytesrc;
+
+ // Offset in the bytesrc that represents the beginning of this string.
+ uint32_t stream_offset;
+
+ // Length of the string.
+ uint32_t len;
+
+ // Possibly add optional members here like start_line, start_column, etc.
+} upb_strref;
+
+// Copies the contents of the strref into a newly-allocated, NULL-terminated
+// string.
+INLINE char *upb_strref_dup(struct _upb_strref *r) {
+ char *ret = (char*)malloc(r->len + 1);
+ upb_bytesrc_read(r->bytesrc, r->stream_offset, r->len, ret);
+ ret[r->len] = '\0';
+ return ret;
}
/* upb_bytesink ***************************************************************/
-struct _upb_bytesink;
-typedef struct _upb_bytesink upb_bytesink;
-typedef upb_strlen_t (*upb_bytesink_putstr_fptr)(
- upb_bytesink *bytesink, upb_string *str, upb_status *status);
-typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)(
- upb_bytesink *bytesink, upb_status *status, const char *fmt, va_list args);
+typedef bool upb_bytesink_write_func(void*, const char*, size_t, upb_status*);
+typedef int32_t upb_bytesink_vprintf_func(
+ void*, upb_status*, const char *fmt, va_list args);
typedef struct {
- upb_bytesink_putstr_fptr putstr;
- upb_bytesink_vprintf_fptr vprintf;
+ upb_bytesink_write_func *write;
+ upb_bytesink_vprintf_func *vprintf;
} upb_bytesink_vtbl;
-struct _upb_bytesink {
+typedef struct {
upb_bytesink_vtbl *vtbl;
-};
+} upb_bytesink;
-INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) {
- s->vtbl = vtbl;
+INLINE void upb_bytesink_init(upb_bytesink *sink, upb_bytesink_vtbl *vtbl) {
+ sink->vtbl = vtbl;
}
+INLINE bool upb_bytesink_write(upb_bytesink *sink, const char *buf, size_t len,
+ upb_status *s) {
+ return sink->vtbl->write(sink, buf, len, s);
+}
-// TODO: Figure out how buffering should be handled. Should the caller buffer
-// data and only call these functions when a buffer is full? Seems most
-// efficient, but then buffering has to be configured in the caller, which
-// could be anything, which makes it hard to have a standard interface for
-// controlling buffering.
-//
-// The downside of having the bytesink buffer is efficiency: the caller is
-// making more (virtual) function calls, and the caller can't arrange to have
-// a big contiguous buffer. The bytesink can do this, but will have to copy
-// to make the data contiguous.
-
-// Returns the number of bytes written.
-INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status,
- const char *fmt, ...) {
+INLINE bool upb_bytesink_writestr(upb_bytesink *sink, const char *str,
+ upb_status *s) {
+ return upb_bytesink_write(sink, str, strlen(str), s);
+}
+
+// Returns the number of bytes written or -1 on error.
+INLINE int32_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status,
+ const char *fmt, ...) {
va_list args;
va_start(args, fmt);
- upb_strlen_t ret = sink->vtbl->vprintf(sink, status, fmt, args);
+ uint32_t ret = sink->vtbl->vprintf(sink, status, fmt, args);
va_end(args);
return ret;
}
-// Puts the given string, returning true if the operation was successful, otherwise
-// check "status" for details. Ownership of the string is *not* passed; if
-// the callee wants a reference he must call upb_string_getref() on it.
-INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str,
- upb_status *status) {
- return sink->vtbl->putstr(sink, str, status);
-}
+// OPT: add getappendbuf()
+// OPT: add writefrombytesrc()
+// TODO: add flush()
+
+
+/* upb_cbuf *******************************************************************/
+
+// A circular buffer implementation for bytesrcs that do internal buffering.
#ifdef __cplusplus
} /* extern "C" */
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
index a44b561..fed48af 100644
--- a/src/upb_decoder.c
+++ b/src/upb_decoder.c
@@ -8,6 +8,7 @@
#include <inttypes.h>
#include <stddef.h>
#include <stdlib.h>
+#include "bswap.h"
#include "upb_bytestream.h"
#include "upb_decoder.h"
#include "upb_varint.h"
@@ -38,83 +39,97 @@ static void upb_decoder_exit2(void *_d) {
upb_decoder *d = _d;
upb_decoder_exit(d);
}
+static void upb_decoder_abort(upb_decoder *d, const char *msg) {
+ upb_status_setf(d->status, UPB_ERROR, msg);
+ upb_decoder_exit(d);
+}
/* Decoding/Buffering of wire types *******************************************/
-#define UPB_MAX_VARINT_ENCODED_SIZE 10
-
-static void upb_decoder_advance(upb_decoder *d, size_t len) { d->ptr += len; }
static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
+static void upb_decoder_advance(upb_decoder *d, size_t len) {
+ assert((size_t)(d->end - d->ptr) >= len);
+ d->ptr += len;
+}
size_t upb_decoder_offset(upb_decoder *d) {
- size_t offset = d->buf_stream_offset;
- if (d->buf) offset += (d->ptr - d->buf);
+ size_t offset = d->bufstart_ofs;
+ if (d->ptr) offset += (d->ptr - d->buf);
return offset;
}
static void upb_decoder_setmsgend(upb_decoder *d) {
- uint32_t end = d->dispatcher.top->end_offset;
- d->submsg_end = (end == UPB_NONDELIMITED) ? (void*)UINTPTR_MAX : d->buf + end;
+ upb_dispatcher_frame *f = d->dispatcher.top;
+ size_t delimlen = f->end_ofs - d->bufstart_ofs;
+ size_t buflen = d->end - d->buf;
+ if (f->end_ofs != UINT64_MAX && delimlen <= buflen) {
+ d->delim_end = (uintptr_t)(d->buf + delimlen);
+ } else {
+ // Buffers must not run up against the end of memory.
+ assert((uintptr_t)d->end < UINTPTR_MAX);
+ d->delim_end = UINTPTR_MAX;
+ }
}
// Pulls the next buffer from the bytesrc. Should be called only when the
// current buffer is completely empty.
-static void upb_pullbuf(upb_decoder *d, bool need) {
+static bool upb_trypullbuf(upb_decoder *d) {
assert(upb_decoder_bufleft(d) == 0);
- int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1;
- upb_string_recycle(&d->bufstr);
- if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) {
- d->buf = NULL;
- d->end = NULL;
- if (need) upb_seterr(d->status, UPB_ERROR, "Unexpected EOF.");
- upb_decoder_exit(d);
- }
- if (last_buf_len != -1) {
- d->buf_stream_offset += last_buf_len;
- for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f)
- if (f->end_offset != UPB_NONDELIMITED)
- f->end_offset -= last_buf_len;
+ if (d->bufend_ofs == d->refend_ofs) {
+ d->refend_ofs += upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status);
+ if (!upb_ok(d->status)) {
+ d->ptr = NULL;
+ d->end = NULL;
+ if (upb_iseof(d->status)) return false;
+ upb_decoder_exit(d);
+ }
}
- d->buf = upb_string_getrobuf(d->bufstr);
- d->ptr = upb_string_getrobuf(d->bufstr);
- d->end = d->buf + upb_string_len(d->bufstr);
+ d->bufstart_ofs = d->bufend_ofs;
+ size_t len;
+ d->buf = upb_bytesrc_getptr(d->bytesrc, d->bufstart_ofs, &len);
+ assert(len > 0);
+ d->bufend_ofs = d->bufstart_ofs + len;
+ d->ptr = d->buf;
+ d->end = d->buf + len;
+#ifdef UPB_USE_JIT_X64
d->jit_end = d->end - 20;
- upb_string_recycle(&d->tmp);
- upb_string_substr(d->tmp, d->bufstr, 0, 0);
+#endif
upb_decoder_setmsgend(d);
+ return true;
}
-// Called only from the slow path, this function copies the next "len" bytes
-// from the stream to "data", adjusting the decoder state appropriately.
-NOINLINE void upb_getbuf(upb_decoder *d, void *data, size_t bytes, bool need) {
- while (1) {
- size_t to_copy = UPB_MIN(bytes, upb_decoder_bufleft(d));
- memcpy(data, d->ptr, to_copy);
- upb_decoder_advance(d, to_copy);
- bytes -= to_copy;
- if (bytes == 0) return;
- upb_pullbuf(d, need);
+static void upb_pullbuf(upb_decoder *d) {
+ if (!upb_trypullbuf(d)) upb_decoder_abort(d, "Unexpected EOF");
+}
+
+void upb_decoder_commit(upb_decoder *d) {
+ d->completed_ptr = d->ptr;
+ if (d->refstart_ofs < d->bufstart_ofs) {
+ // Drop our ref on the previous buf's region.
+ upb_bytesrc_refregion(d->bytesrc, d->bufstart_ofs, d->refend_ofs);
+ upb_bytesrc_unrefregion(d->bytesrc, d->refstart_ofs, d->refend_ofs);
+ d->refstart_ofs = d->bufstart_ofs;
}
}
-NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d, bool need) {
+NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) {
uint8_t byte = 0x80;
uint64_t u64 = 0;
int bitpos;
+ const char *ptr = d->ptr;
for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
- upb_getbuf(d, &byte, 1, need);
- u64 |= ((uint64_t)byte & 0x7F) << bitpos;
- }
-
- if(bitpos == 70 && (byte & 0x80)) {
- upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
- upb_decoder_exit(d);
+ if (upb_decoder_bufleft(d) == 0) {
+ upb_pullbuf(d);
+ ptr = d->ptr;
+ }
+ u64 |= ((uint64_t)(byte = *ptr++) & 0x7F) << bitpos;
}
+ if(bitpos == 70 && (byte & 0x80)) upb_decoder_abort(d, "Unterminated varint");
return u64;
}
// For tags and delimited lengths, which must be <=32bit and are usually small.
-FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d, bool need) {
+FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) {
const char *p = d->ptr;
uint32_t ret;
uint64_t u64;
@@ -125,11 +140,8 @@ FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d, bool need) {
ret |= (*p & 0x7f) << 7;
if ((*(p++) & 0x80) == 0) goto done; // likely
slow:
- u64 = upb_decode_varint_slow(d, need);
- if (u64 > 0xffffffff) {
- upb_seterr(d->status, UPB_ERROR, "Unterminated 32-bit varint.\n");
- upb_decoder_exit(d);
- }
+ u64 = upb_decode_varint_slow(d);
+ if (u64 > 0xffffffff) upb_decoder_abort(d, "Unterminated 32-bit varint");
ret = (uint32_t)u64;
p = d->ptr; // Turn the next line into a nop.
done:
@@ -137,57 +149,90 @@ done:
return ret;
}
+FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) {
+ if (upb_decoder_bufleft(d) == 0) {
+ // Check for our two normal end-of-message conditions.
+ if (d->bufend_ofs == d->end_ofs) return false;
+ if (!upb_trypullbuf(d)) return false;
+ }
+ *val = upb_decode_varint32(d);
+ return true;
+}
+
FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) {
- if (upb_decoder_bufleft(d) >= 16) {
- // Common (fast) case.
+ if (upb_decoder_bufleft(d) >= 10) {
+ // Fast case.
upb_decoderet r = upb_vdecode_fast(d->ptr);
- if (r.p == NULL) {
- upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
- upb_decoder_exit(d);
- }
+ if (r.p == NULL) upb_decoder_abort(d, "Unterminated varint");
upb_decoder_advance(d, r.p - d->ptr);
return r.val;
- } else {
- return upb_decode_varint_slow(d, true);
+ } else if (upb_decoder_bufleft(d) > 0) {
+ // Intermediate case -- worth it?
+ char tmpbuf[10];
+ memset(tmpbuf, 0x80, 10);
+ memcpy(tmpbuf, d->ptr, upb_decoder_bufleft(d));
+ upb_decoderet r = upb_vdecode_fast(tmpbuf);
+ if (r.p != NULL) {
+ upb_decoder_advance(d, r.p - tmpbuf);
+ return r.val;
+ }
}
+ // Slow case -- varint spans buffer seam.
+ return upb_decode_varint_slow(d);
}
-FORCEINLINE void upb_decode_fixed(upb_decoder *d, void *val, size_t bytes) {
+FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
if (upb_decoder_bufleft(d) >= bytes) {
- // Common (fast) case.
- memcpy(val, d->ptr, bytes);
+ // Fast case.
+ memcpy(buf, d->ptr, bytes);
upb_decoder_advance(d, bytes);
} else {
- upb_getbuf(d, val, bytes, true);
+ // Slow case.
+ size_t read = 0;
+ while (read < bytes) {
+ size_t avail = upb_decoder_bufleft(d);
+ memcpy(buf + read, d->ptr, avail);
+ upb_decoder_advance(d, avail);
+ read += avail;
+ }
}
}
FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) {
uint32_t u32;
- upb_decode_fixed(d, &u32, sizeof(uint32_t));
- return u32;
+ upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t));
+ return le32toh(u32);
}
FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
uint64_t u64;
- upb_decode_fixed(d, &u64, sizeof(uint64_t));
- return u64;
+ upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t));
+ return le64toh(u64);
}
-INLINE upb_string *upb_decode_string(upb_decoder *d) {
- upb_string_recycle(&d->tmp);
- uint32_t strlen = upb_decode_varint32(d, true);
+INLINE upb_strref *upb_decode_string(upb_decoder *d) {
+ uint32_t strlen = upb_decode_varint32(d);
+ d->strref.stream_offset = upb_decoder_offset(d);
+ d->strref.len = strlen;
+ if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d);
if (upb_decoder_bufleft(d) >= strlen) {
- // Common (fast) case.
- upb_string_substr(d->tmp, d->bufstr, d->ptr - d->buf, strlen);
+ // Fast case.
+ d->strref.ptr = d->ptr;
upb_decoder_advance(d, strlen);
} else {
- upb_getbuf(d, upb_string_getrwbuf(d->tmp, strlen), strlen, true);
+ // Slow case.
+ while (1) {
+ size_t consume = UPB_MIN(upb_decoder_bufleft(d), strlen);
+ upb_decoder_advance(d, consume);
+ strlen -= consume;
+ if (strlen == 0) break;
+ upb_pullbuf(d);
+ }
}
- return d->tmp;
+ return &d->strref;
}
INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) {
- upb_dispatch_startsubmsg(&d->dispatcher, f)->end_offset = end;
+ upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end;
upb_decoder_setmsgend(d);
}
@@ -224,7 +269,7 @@ T(DOUBLE, fixed64, double, upb_asdouble)
T(FLOAT, fixed32, float, upb_asfloat)
T(SINT32, varint, int32, upb_zzdec_32)
T(SINT64, varint, int64, upb_zzdec_64)
-T(STRING, string, str, upb_string*)
+T(STRING, string, strref, upb_strref*)
static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) {
upb_push(d, f, UPB_NONDELIMITED);
@@ -235,28 +280,24 @@ static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
upb_decoder_setmsgend(d);
}
static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
- upb_push(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf));
+ upb_push(d, f, upb_decode_varint32(d) + (d->ptr - d->buf));
}
/* The main decoding loop *****************************************************/
-// Called when a user callback returns something other than UPB_CONTINUE.
-// This should unwind one or more stack frames, skipping the corresponding
-// data in the input.
+static void upb_decoder_checkdelim(upb_decoder *d) {
+ while ((uintptr_t)d->ptr >= d->delim_end) {
+ if ((uintptr_t)d->ptr > d->delim_end)
+ upb_decoder_abort(d, "Bad submessage end");
-static void upb_delimend(upb_decoder *d) {
- if (d->ptr > d->submsg_end) {
- upb_seterr(d->status, UPB_ERROR, "Bad submessage end.");
- upb_decoder_exit(d);
- }
-
- if (d->dispatcher.top->is_sequence) {
- upb_dispatch_endseq(&d->dispatcher);
- } else {
- upb_dispatch_endsubmsg(&d->dispatcher);
+ if (d->dispatcher.top->is_sequence) {
+ upb_dispatch_endseq(&d->dispatcher);
+ } else {
+ upb_dispatch_endsubmsg(&d->dispatcher);
+ }
+ upb_decoder_setmsgend(d);
}
- upb_decoder_setmsgend(d);
}
static void upb_decoder_enterjit(upb_decoder *d) {
@@ -273,7 +314,8 @@ static void upb_decoder_enterjit(upb_decoder *d) {
INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
while (1) {
- uint32_t tag = upb_decode_varint32(d, false);
+ uint32_t tag;
+ if (!upb_trydecode_varint32(d, &tag)) return NULL;
upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag);
// There are no explicit "startseq" or "endseq" markers in protobuf
@@ -287,8 +329,8 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
// TODO: support packed.
assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) ||
(tag & 0x7) != UPB_WIRE_TYPE_DELIMITED);
- uint32_t end = d->dispatcher.top->end_offset;
- upb_dispatch_startseq(&d->dispatcher, f)->end_offset = end;
+ uint32_t end = d->dispatcher.top->end_ofs;
+ upb_dispatch_startseq(&d->dispatcher, f)->end_ofs = end;
upb_decoder_setmsgend(d);
}
if (f) return f;
@@ -299,11 +341,13 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break;
case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break;
case UPB_WIRE_TYPE_DELIMITED:
- upb_decoder_advance(d, upb_decode_varint32(d, true));
- break;
+ upb_decoder_advance(d, upb_decode_varint32(d)); break;
+ default:
+ upb_decoder_abort(d, "Invavlid wire type");
}
// TODO: deliver to unknown field callback.
- while (d->ptr >= d->submsg_end) upb_delimend(d);
+ upb_decoder_commit(d);
+ upb_decoder_checkdelim(d);
}
}
@@ -311,11 +355,11 @@ void upb_decoder_onexit(upb_decoder *d) {
if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher);
if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) {
// Normal end-of-file.
- upb_clearerr(d->status);
+ upb_status_clear(d->status);
upb_dispatch_endmsg(&d->dispatcher, d->status);
} else {
if (d->status->code == UPB_EOF)
- upb_seterr(d->status, UPB_ERROR, "Input ended mid-submessage.");
+ upb_status_setf(d->status, UPB_ERROR, "Input ended mid-submessage.");
}
}
@@ -325,26 +369,32 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
return;
}
d->status = status;
- upb_pullbuf(d, true);
upb_dispatch_startmsg(&d->dispatcher);
while(1) { // Main loop: executed once per tag/field pair.
- while (d->ptr >= d->submsg_end) upb_delimend(d);
+ upb_decoder_checkdelim(d);
upb_decoder_enterjit(d);
// if (!d->dispatcher.top->is_packed)
upb_fhandlers *f = upb_decode_tag(d);
+ if (!f) upb_decoder_exit2(d);
f->decode(d, f);
+ upb_decoder_commit(d);
}
}
static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top,
upb_dispatcher_frame *bottom) {
(void)top;
+ (void)bottom;
+ (void)_d;
+#if 0
upb_decoder *d = _d;
+ // TODO
if (bottom->end_offset == UPB_NONDELIMITED) {
// TODO: support skipping groups.
abort();
}
- d->ptr = d->buf + bottom->end_offset;
+ d->ptr = d->buf.ptr + bottom->end_offset;
+#endif
}
void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
@@ -354,10 +404,6 @@ void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
d->jit_code = NULL;
if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d);
#endif
- d->bufstr = NULL;
- d->tmp = NULL;
- upb_string_recycle(&d->tmp);
-
// Set function pointers for each field's decode function.
for (int i = 0; i < handlers->msgs_len; i++) {
upb_mhandlers *m = handlers->msgs[i];
@@ -396,19 +442,27 @@ void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m) {
upb_handlers_unref(h);
}
-void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) {
- upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UPB_NONDELIMITED;
+void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, uint64_t start_ofs,
+ uint64_t end_ofs, void *closure) {
+ upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure);
+ f->end_ofs = end_ofs;
+ d->end_ofs = end_ofs;
+ d->refstart_ofs = start_ofs;
+ d->refend_ofs = start_ofs;
+ d->bufstart_ofs = start_ofs;
+ d->bufend_ofs = start_ofs;
d->bytesrc = bytesrc;
d->buf = NULL;
d->ptr = NULL;
d->end = NULL; // Force a buffer pull.
- d->submsg_end = (void*)0x1; // But don't let end-of-message get triggered.
- d->buf_stream_offset = 0;
+#ifdef UPB_USE_JIT_X64
+ d->jit_end = NULL;
+#endif
+ d->delim_end = UINTPTR_MAX; // But don't let end-of-message get triggered.
+ d->strref.bytesrc = bytesrc;
}
void upb_decoder_uninit(upb_decoder *d) {
- upb_string_unref(d->bufstr);
- upb_string_unref(d->tmp);
#ifdef UPB_USE_JIT_X64
if (d->dispatcher.handlers->should_jit) upb_decoder_freejit(d);
#endif
diff --git a/src/upb_decoder.h b/src/upb_decoder.h
index e9bc0b4..7a813bf 100644
--- a/src/upb_decoder.h
+++ b/src/upb_decoder.h
@@ -30,44 +30,33 @@ extern "C" {
struct dasm_State;
-struct _upb_decoder {
- // Bytesrc from which we pull serialized data.
- upb_bytesrc *bytesrc;
+typedef struct _upb_decoder {
+ upb_bytesrc *bytesrc; // Source of our serialized data.
+ upb_dispatcher dispatcher; // Dispatcher to which we push parsed data.
+ upb_status *status; // Where we will store any errors that occur.
+ upb_strref strref; // For passing string data to callbacks.
- // String to hold our input buffer; is only active if d->buf != NULL.
- upb_string *bufstr;
+ // Offsets for the region we currently have ref'd.
+ uint64_t refstart_ofs, refend_ofs;
- // Temporary string for passing string data to callbacks.
- upb_string *tmp;
+ // Current buffer and its stream offset.
+ const char *buf, *ptr, *end;
+ uint64_t bufstart_ofs, bufend_ofs;
- // The offset within the overall stream represented by the *beginning* of buf.
- size_t buf_stream_offset;
+ // Stream offset for the end of the top-level message, if any.
+ uint64_t end_ofs;
- // Pointer to the beginning of our current data buffer, or NULL if none.
- const char *buf;
+ // Buf offset as of which we've delivered calbacks; needed for rollback on
+ // UPB_TRYAGAIN (or in the future, UPB_SUSPEND).
+ const char *completed_ptr;
- // End of this buffer, relative to *ptr.
- const char *end;
- const char *jit_end;
+ // End of the delimited region, relative to ptr, or UINTPTR_MAX if not in
+ // this buf.
+ uintptr_t delim_end;
- // Members which may also be written by the JIT:
-
- // Our current position in the data buffer.
- const char *ptr;
-
- // End of this submessage, relative to *ptr.
- const char *submsg_end;
-
- // MIN(end, submsg_end)
- const char *effective_end;
-
- upb_fhandlers *f;
-
- // Where we will store any errors that occur.
- upb_status *status;
-
- // Dispatcher to which we push parsed data.
- upb_dispatcher dispatcher;
+#ifdef UPB_USE_JIT_X64
+ // For JIT, which doesn't do bounds checks in the middle of parsing a field.
+ const char *jit_end, *effective_end; // == MIN(jit_end, submsg_end)
// JIT-generated machine code (else NULL).
char *jit_code;
@@ -75,21 +64,10 @@ struct _upb_decoder {
char *debug_info;
struct dasm_State *dynasm;
- sigjmp_buf exitjmp;
-};
-
-// For use in the upb_dispatcher's stack.
-typedef struct {
- // Relative to the beginning of this buffer.
- // For groups and the top-level: UINT32_MAX.
- uint32_t end_offset;
- bool is_packed; // == !upb_issubmsg(f) && end_offset != UPB_REPATEDEND
-} upb_decoder_srcdata;
+#endif
-// A upb_decoder decodes the binary protocol buffer format, writing the data it
-// decodes to a upb_sink.
-struct _upb_decoder;
-typedef struct _upb_decoder upb_decoder;
+ sigjmp_buf exitjmp;
+} upb_decoder;
// Initializes/uninitializes a decoder for calling into the given handlers
// or to write into the given msgdef, given its accessors). Takes a ref
@@ -107,7 +85,10 @@ void upb_decoder_uninit(upb_decoder *d);
// state where it has not seen any data, and expects the next data to be from
// the beginning of a new protobuf. Parsers must be reset before they can be
// used. A decoder can be reset multiple times.
-void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure);
+//
+// Pass UINT64_MAX for end_ofs to indicate a non-delimited top-level message.
+void upb_decoder_reset(upb_decoder *d, upb_bytesrc *src, uint64_t start_ofs,
+ uint64_t end_ofs, void *closure);
void upb_decoder_decode(upb_decoder *d, upb_status *status);
diff --git a/src/upb_decoder_x86.dasc b/src/upb_decoder_x86.dasc
index fec0ffe..800b099 100644
--- a/src/upb_decoder_x86.dasc
+++ b/src/upb_decoder_x86.dasc
@@ -120,7 +120,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|.define PTR, rbx
|.define CLOSURE, r12
|.type FRAME, upb_dispatcher_frame, r13
-|.type STRING, upb_string, r14
+|.type STRREF, upb_strref, r14
|.type DECODER, upb_decoder, r15
|
|.macro callp, addr
@@ -199,7 +199,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
| jae ->exit_jit // Frame stack overflow.
| mov qword FRAME:rax->f, f
| mov qword FRAME:rax->closure, closure_
-| mov dword FRAME:rax->end_offset, end_offset_
+| mov dword FRAME:rax->end_ofs, end_offset_
| mov byte FRAME:rax->is_sequence, is_sequence_
| mov CLOSURE, rdx
| mov DECODER->dispatcher.top, rax
@@ -217,17 +217,17 @@ void upb_reg_jit_gdb(upb_decoder *d) {
| mov rsi, DECODER->jit_end
|| if (m->is_group) {
| mov64 rax, 0xffffffffffffffff
-| mov qword DECODER->submsg_end, rax
+| mov qword DECODER->delim_end, rax
| mov DECODER->effective_end, rsi
|| } else {
| // Could store a correctly-biased version in the frame, at the cost of
| // a larger stack.
-| mov eax, dword FRAME->end_offset
+| mov eax, dword FRAME->end_ofs
| add rax, qword DECODER->buf
-| mov DECODER->submsg_end, rax // submsg_end = d->buf + f->end_offset
+| mov DECODER->delim_end, rax // delim_end = d->buf + f->end_ofs
| cmp rax, rsi
| jb >8
-| mov rax, rsi // effective_end = min(d->submsg_end, d->jit_end)
+| mov rax, rsi // effective_end = min(d->delim_end, d->jit_end)
|8:
| mov DECODER->effective_end, rax
|| }
@@ -293,7 +293,7 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_ta
} else {
| mov rdx, CLOSURE
}
- | mov esi, FRAME->end_offset
+ | mov esi, FRAME->end_ofs
| pushframe f, rdx, esi, true
}
@@ -357,10 +357,14 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_ta
// buf, which sidesteps any security problems. The C path has more
// robust checks.
| decode_varint tag_size
- | mov STRING->len, ARG3_32
- | mov STRING->ptr, PTR
+ | mov STRREF->len, ARG3_32
+ | mov STRREF->ptr, PTR
+ | mov rax, PTR
+ | sub rax, DECODER->buf
+ | add eax, DECODER->bufstart_ofs // = d->ptr - d->buf + d->bufstart_ofs
+ | mov STRREF->stream_offset, eax
| add PTR, ARG3_64
- | mov ARG3_64, STRING
+ | mov ARG3_64, STRREF
| cmp PTR, DECODER->effective_end
| ja ->exit_jit // Can't deliver, whole string not in buf.
break;
@@ -514,7 +518,7 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
// This case doesn't exist for groups, because there eob really means
// eob, so that case just exits the jit directly.
|=>m->jit_endofbuf_pclabel:
- | cmp PTR, DECODER->submsg_end
+ | cmp PTR, DECODER->delim_end
| jb ->exit_jit // We are at eob, but not end-of-submsg.
}
@@ -550,7 +554,7 @@ static void upb_decoder_jit(upb_decoder *d) {
| push rbx
| mov DECODER, ARG1_64
| mov FRAME, DECODER:ARG1_64->dispatcher.top
- | mov STRING, DECODER:ARG1_64->tmp
+ | lea STRREF, DECODER:ARG1_64->strref
| mov CLOSURE, FRAME->closure
| mov PTR, DECODER->ptr
diff --git a/src/upb_def.c b/src/upb_def.c
index 45e7f73..4cd80b1 100644
--- a/src/upb_def.c
+++ b/src/upb_def.c
@@ -7,18 +7,11 @@
#include <stdlib.h>
#include <stddef.h>
+#include <string.h>
#include "upb_def.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
-/* Search for a character in a string, in reverse. */
-static int my_memrchr(char *data, char c, size_t len)
-{
- int off = len-1;
- while(off > 0 && data[off] != c) --off;
- return off;
-}
-
void upb_deflist_init(upb_deflist *l) {
l->size = 8;
l->defs = malloc(l->size * sizeof(void*));
@@ -105,7 +98,8 @@ static void upb_def_init(upb_def *def, upb_deftype_t type) {
}
static void upb_def_uninit(upb_def *def) {
- upb_string_unref(def->fqname);
+ //fprintf(stderr, "Freeing def: %p\n", def);
+ free(def->fqname);
}
@@ -120,19 +114,19 @@ typedef struct _upb_unresolveddef {
// The target type name. This may or may not be fully qualified. It is
// tempting to want to use base.fqname for this, but that will be qualified
// which is inappropriate for a name we still have to resolve.
- upb_string *name;
+ char *name;
} upb_unresolveddef;
// Is passed a ref on the string.
-static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) {
+static upb_unresolveddef *upb_unresolveddef_new(const char *str) {
upb_unresolveddef *def = malloc(sizeof(*def));
upb_def_init(&def->base, UPB_DEF_UNRESOLVED);
- def->name = upb_string_getref(str);
+ def->name = strdup(str);
return def;
}
static void upb_unresolveddef_free(struct _upb_unresolveddef *def) {
- upb_string_unref(def->name);
+ free(def->name);
upb_def_uninit(&def->base);
free(def);
}
@@ -152,7 +146,7 @@ static void upb_enumdef_free(upb_enumdef *e) {
upb_enum_iter i;
for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
// Frees the ref taken when the string was parsed.
- upb_string_unref(upb_enum_iter_name(i));
+ free(upb_enum_iter_name(i));
}
upb_strtable_free(&e->ntoi);
upb_inttable_free(&e->iton);
@@ -170,12 +164,11 @@ upb_enumdef *upb_enumdef_dup(upb_enumdef *e) {
return new_e;
}
-bool upb_enumdef_addval(upb_enumdef *e, upb_string *name, int32_t num) {
- if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL)) return false;
- upb_ntoi_ent ntoi_ent = {{name, 0}, num};
- upb_iton_ent iton_ent = {0, name};
- upb_strtable_insert(&e->ntoi, &ntoi_ent.e);
- upb_inttable_insert(&e->iton, num, &iton_ent); // Uses strtable's ref on name
+bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num) {
+ if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL))
+ return false;
+ upb_strtable_insert(&e->ntoi, name, &num);
+ upb_inttable_insert(&e->iton, num, strdup(name));
return true;
}
@@ -193,19 +186,22 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) {
return upb_inttable_next(&e->iton, iter);
}
-upb_string *upb_enumdef_iton(upb_enumdef *def, int32_t num) {
- upb_iton_ent *e =
- (upb_iton_ent*)upb_inttable_fastlookup(&def->iton, num, sizeof(*e));
- return e ? e->string : NULL;
+const char *upb_enumdef_iton(upb_enumdef *def, int32_t num) {
+ upb_iton_ent *e = upb_inttable_fastlookup(&def->iton, num, sizeof(*e));
+ return e ? e->str : NULL;
}
-bool upb_enumdef_ntoi(upb_enumdef *def, upb_string *name, int32_t *num) {
- upb_ntoi_ent *e = (upb_ntoi_ent*)upb_strtable_lookup(&def->ntoi, name);
+bool upb_enumdef_ntoil(upb_enumdef *def, char *name, size_t len, int32_t *num) {
+ upb_ntoi_ent *e = upb_strtable_lookupl(&def->ntoi, name, len);
if (!e) return false;
if (num) *num = e->value;
return true;
}
+bool upb_enumdef_ntoi(upb_enumdef *e, char *name, int32_t *num) {
+ return upb_enumdef_ntoil(e, name, strlen(name), num);
+}
+
/* upb_fielddef ***************************************************************/
@@ -228,9 +224,9 @@ upb_fielddef *upb_fielddef_new() {
static void upb_fielddef_free(upb_fielddef *f) {
if (upb_isstring(f)) {
- upb_string_unref(upb_value_getstr(f->defaultval));
+ free(upb_value_getptr(f->defaultval));
}
- upb_string_unref(f->name);
+ free(f->name);
free(f);
}
@@ -270,18 +266,18 @@ static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) {
f->def = def;
if (f->type == UPB_TYPE(ENUM)) {
// Resolve the enum's default from a string to an integer.
- upb_string *str = upb_value_getstr(f->defaultval);
+ char *str = upb_value_getptr(f->defaultval);
assert(str); // Should point to either a real default or the empty string.
upb_enumdef *e = upb_downcast_enumdef(f->def);
int32_t val = 0;
- if (str == upb_emptystring()) {
+ if (str[0] == '\0') {
upb_value_setint32(&f->defaultval, e->defaultval);
} else {
bool success = upb_enumdef_ntoi(e, str, &val);
- upb_string_unref(str);
+ free(str);
if (!success) {
- upb_seterr(s, UPB_ERROR, "Default enum value (" UPB_STRFMT ") is not a "
- "member of the enum", UPB_STRARG(str));
+ upb_status_setf(s, UPB_ERROR, "Default enum value (%s) is not a "
+ "member of the enum", str);
return false;
}
upb_value_setint32(&f->defaultval, val);
@@ -295,9 +291,9 @@ void upb_fielddef_setnumber(upb_fielddef *f, int32_t number) {
f->number = number;
}
-void upb_fielddef_setname(upb_fielddef *f, upb_string *name) {
+void upb_fielddef_setname(upb_fielddef *f, const char *name) {
assert(f->msgdef == NULL);
- f->name = upb_string_getref(name);
+ f->name = strdup(name);
}
void upb_fielddef_settype(upb_fielddef *f, uint8_t type) {
@@ -326,7 +322,7 @@ void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl)
f->accessor = vtbl;
}
-void upb_fielddef_settypename(upb_fielddef *f, upb_string *name) {
+void upb_fielddef_settypename(upb_fielddef *f, const char *name) {
upb_def_unref(f->def);
f->def = UPB_UPCAST(upb_unresolveddef_new(name));
}
@@ -424,9 +420,8 @@ bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) {
assert(f->msgdef == NULL);
f->msgdef = m;
upb_itof_ent itof_ent = {0, f};
- upb_ntof_ent ntof_ent = {{f->name, 0}, f};
upb_inttable_insert(&m->itof, f->number, &itof_ent);
- upb_strtable_insert(&m->ntof, &ntof_ent.e);
+ upb_strtable_insert(&m->ntof, f->name, &f);
return true;
}
@@ -493,7 +488,6 @@ upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) {
/* upb_symtabtxn **************************************************************/
typedef struct {
- upb_strtable_entry e;
upb_def *def;
} upb_symtab_ent;
@@ -503,16 +497,19 @@ void upb_symtabtxn_init(upb_symtabtxn *t) {
void upb_symtabtxn_uninit(upb_symtabtxn *txn) {
upb_strtable *t = &txn->deftab;
- upb_symtab_ent *e;
- for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e))
- upb_def_unref(e->def);
+ upb_strtable_iter i;
+ for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ const upb_symtab_ent *e = upb_strtable_iter_value(&i);
+ free(e->def);
+ }
upb_strtable_free(t);
}
bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def) {
// TODO: check if already present.
- upb_symtab_ent e = {{def->fqname, 0}, def};
- upb_strtable_insert(&t->deftab, &e.e);
+ upb_symtab_ent e = {def};
+ //fprintf(stderr, "txn Inserting: %p, ent: %p\n", e.def, &e);
+ upb_strtable_insert(&t->deftab, def->fqname, &e);
return true;
}
@@ -531,59 +528,28 @@ err:
// Given a symbol and the base symbol inside which it is defined, find the
// symbol's definition in t.
static upb_symtab_ent *upb_resolve(upb_strtable *t,
- upb_string *base, upb_string *sym) {
- if(upb_string_len(sym) == 0) return NULL;
- if(upb_string_getrobuf(sym)[0] == UPB_SYMBOL_SEPARATOR) {
+ const char *base, const char *sym) {
+ if(strlen(sym) == 0) return NULL;
+ if(sym[0] == UPB_SYMBOL_SEPARATOR) {
// Symbols starting with '.' are absolute, so we do a single lookup.
// Slice to omit the leading '.'
- upb_string *sym_str = upb_strslice(sym, 1, upb_string_len(sym) - 1);
- upb_symtab_ent *e = upb_strtable_lookup(t, sym_str);
- upb_string_unref(sym_str);
- return e;
+ return upb_strtable_lookup(t, sym + 1);
} else {
// Remove components from base until we find an entry or run out.
// TODO: This branch is totally broken, but currently not used.
- upb_string *sym_str = upb_string_new();
- int baselen = upb_string_len(base);
- upb_symtab_ent *ret = NULL;
- while(1) {
- // sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + sym
- upb_strlen_t len = baselen + upb_string_len(sym) + 1;
- char *buf = upb_string_getrwbuf(sym_str, len);
- memcpy(buf, upb_string_getrobuf(base), baselen);
- buf[baselen] = UPB_SYMBOL_SEPARATOR;
- memcpy(buf + baselen + 1, upb_string_getrobuf(sym), upb_string_len(sym));
-
- upb_symtab_ent *e = upb_strtable_lookup(t, sym_str);
- if (e) {
- ret = e;
- break;
- } else if(baselen == 0) {
- // No more scopes to try.
- ret = NULL;
- break;
- }
- baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen);
- }
- upb_string_unref(sym_str);
- return ret;
+ (void)base;
+ assert(false);
+ return NULL;
}
}
-upb_symtabtxn_iter upb_symtabtxn_begin(upb_symtabtxn *t) {
- return upb_strtable_begin(&t->deftab);
+void upb_symtabtxn_begin(upb_symtabtxn_iter *i, upb_symtabtxn *t) {
+ upb_strtable_begin(i, &t->deftab);
}
-
-upb_symtabtxn_iter upb_symtabtxn_next(upb_symtabtxn *t, upb_symtabtxn_iter i) {
- return upb_strtable_next(&t->deftab, i);
-}
-
-bool upb_symtabtxn_done(upb_symtabtxn_iter i) {
- return i == NULL;
-}
-
-upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter) {
- upb_symtab_ent *e = iter;
+void upb_symtabtxn_next(upb_symtabtxn_iter *i) { upb_strtable_next(i); }
+bool upb_symtabtxn_done(upb_symtabtxn_iter *i) { return upb_strtable_done(i); }
+upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter *i) {
+ const upb_symtab_ent *e = upb_strtable_iter_value(i);
return e->def;
}
@@ -591,8 +557,10 @@ upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter) {
/* upb_symtab public interface ************************************************/
static void _upb_symtab_free(upb_strtable *t) {
- upb_symtab_ent *e;
- for (e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) {
+ upb_strtable_iter i;
+ upb_strtable_begin(&i, t);
+ for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ const upb_symtab_ent *e = upb_strtable_iter_value(&i);
assert(upb_atomic_read(&e->def->refcount) == 0);
upb_def_free(e->def);
}
@@ -632,9 +600,11 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) {
// We may only use part of this, depending on how many symbols are of the
// correct type.
upb_def **defs = malloc(sizeof(*defs) * total);
- upb_symtab_ent *e = upb_strtable_begin(&s->symtab);
+ upb_strtable_iter iter;
+ upb_strtable_begin(&iter, &s->symtab);
int i = 0;
- for(; e; e = upb_strtable_next(&s->symtab, &e->e)) {
+ for(; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
+ const upb_symtab_ent *e = upb_strtable_iter_value(&iter);
upb_def *def = e->def;
assert(def);
if(type == UPB_DEF_ANY || def->type == type)
@@ -646,7 +616,7 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) {
return defs;
}
-upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) {
+upb_def *upb_symtab_lookup(upb_symtab *s, const char *sym) {
upb_rwlock_rdlock(&s->lock);
upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym);
upb_def *ret = NULL;
@@ -658,9 +628,9 @@ upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) {
return ret;
}
-upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) {
+upb_def *upb_symtab_resolve(upb_symtab *s, const char *base, const char *sym) {
upb_rwlock_rdlock(&s->lock);
- upb_symtab_ent *e = upb_resolve(&s->symtab, base, symbol);
+ upb_symtab_ent *e = upb_resolve(&s->symtab, base, sym);
upb_def *ret = NULL;
if(e) {
ret = e->def;
@@ -692,8 +662,9 @@ bool upb_symtab_dfs(upb_def *def, upb_def **open_defs, int n,
bool replacing = (upb_strtable_lookup(&txn->deftab, m->base.fqname) != NULL);
if (needcopy && !replacing) {
- upb_symtab_ent e = {{def->fqname, 0}, upb_def_dup(def)};
- upb_strtable_insert(&txn->deftab, &e.e);
+ upb_symtab_ent e = {upb_def_dup(def)};
+ //fprintf(stderr, "Replacing def: %p\n", e.def);
+ upb_strtable_insert(&txn->deftab, def->fqname, &e);
replacing = true;
}
return replacing;
@@ -706,25 +677,29 @@ bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) {
// themselves be replaced with versions that will point to the new defs.
// Do a DFS -- any path that finds a new def must replace all ancestors.
upb_strtable *symtab = &s->symtab;
- upb_symtab_ent *e;
- for(e = upb_strtable_begin(symtab); e; e = upb_strtable_next(symtab, &e->e)) {
+ upb_strtable_iter i;
+ upb_strtable_begin(&i, symtab);
+ for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
upb_def *open_defs[UPB_MAX_TYPE_DEPTH];
+ const upb_symtab_ent *e = upb_strtable_iter_value(&i);
upb_symtab_dfs(e->def, open_defs, 0, txn);
}
// Resolve all refs.
upb_strtable *txntab = &txn->deftab;
- for(e = upb_strtable_begin(txntab); e; e = upb_strtable_next(txntab, &e->e)) {
+ upb_strtable_begin(&i, txntab);
+ for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ const upb_symtab_ent *e = upb_strtable_iter_value(&i);
upb_msgdef *m = upb_dyncast_msgdef(e->def);
if(!m) continue;
// Type names are resolved relative to the message in which they appear.
- upb_string *base = m->base.fqname;
+ const char *base = m->base.fqname;
- upb_msg_iter i;
- for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
+ upb_msg_iter j;
+ for(j = upb_msg_begin(m); !upb_msg_done(j); j = upb_msg_next(m, j)) {
+ upb_fielddef *f = upb_msg_iter_field(j);
if(!upb_hasdef(f)) continue; // No resolving necessary.
- upb_string *name = upb_downcast_unresolveddef(f->def)->name;
+ const char *name = upb_downcast_unresolveddef(f->def)->name;
// Resolve from either the txntab (pending adds) or symtab (existing
// defs). If both exist, prefer the pending add, because it will be
@@ -732,17 +707,18 @@ bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) {
upb_symtab_ent *found;
if(!(found = upb_resolve(txntab, base, name)) &&
!(found = upb_resolve(symtab, base, name))) {
- upb_seterr(status, UPB_ERROR,
- "could not resolve symbol '" UPB_STRFMT "'"
- " in context '" UPB_STRFMT "'",
- UPB_STRARG(name), UPB_STRARG(base));
+ upb_status_setf(status, UPB_ERROR, "could not resolve symbol '%s' "
+ "in context '%s'", name, base);
return false;
}
// Check the type of the found def.
upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
+ //fprintf(stderr, "found: %p\n", found);
+ //fprintf(stderr, "found->def: %p\n", found->def);
+ //fprintf(stderr, "found->def->type: %d\n", found->def->type);
if(found->def->type != expected) {
- upb_seterr(status, UPB_ERROR, "Unexpected type");
+ upb_status_setf(status, UPB_ERROR, "Unexpected type");
return false;
}
if (!upb_fielddef_resolve(f, found->def, status)) return false;
@@ -751,9 +727,9 @@ bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) {
// The defs in the transaction have been vetted, and can be moved to the
// symtab without causing errors.
- upb_symtab_ent *tmptab_e;
- for(tmptab_e = upb_strtable_begin(txntab); tmptab_e;
- tmptab_e = upb_strtable_next(txntab, &tmptab_e->e)) {
+ upb_strtable_begin(&i, txntab);
+ for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ const upb_symtab_ent *tmptab_e = upb_strtable_iter_value(&i);
upb_def_movetosymtab(tmptab_e->def, s);
upb_symtab_ent *symtab_e =
upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname);
@@ -761,7 +737,8 @@ bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) {
upb_deflist_push(&s->olddefs, symtab_e->def);
symtab_e->def = tmptab_e->def;
} else {
- upb_strtable_insert(&s->symtab, &tmptab_e->e);
+ //fprintf(stderr, "Inserting def: %p\n", tmptab_e->def);
+ upb_strtable_insert(&s->symtab, tmptab_e->def->fqname, tmptab_e);
}
}
diff --git a/src/upb_def.h b/src/upb_def.h
index ca969cb..34f5009 100644
--- a/src/upb_def.h
+++ b/src/upb_def.h
@@ -32,7 +32,7 @@ typedef struct _upb_symtab upb_symtab;
// All the different kind of defs we support. These correspond 1:1 with
// declarations in a .proto file.
typedef enum {
- UPB_DEF_MSG = 0,
+ UPB_DEF_MSG = 1,
UPB_DEF_ENUM,
UPB_DEF_SERVICE, // Not yet implemented.
@@ -44,7 +44,7 @@ typedef enum {
/* upb_def: base class for defs **********************************************/
typedef struct {
- upb_string *fqname; // Fully qualified.
+ char *fqname; // Fully qualified.
upb_symtab *symtab; // Def is mutable iff symtab == NULL.
upb_atomic_t refcount; // Owns a ref on symtab iff (symtab && refcount > 0).
upb_deftype_t type;
@@ -66,7 +66,7 @@ upb_def *upb_def_dup(upb_def *def);
// A upb_fielddef describes a single field in a message. It isn't a full def
// in the sense that it derives from upb_def. It cannot stand on its own; it
// must be part of a upb_msgdef. It is also reference-counted.
-struct _upb_fielddef {
+typedef struct _upb_fielddef {
struct _upb_msgdef *msgdef;
upb_def *def; // if upb_hasdef(f)
upb_atomic_t refcount;
@@ -78,11 +78,11 @@ struct _upb_fielddef {
int16_t hasbit;
uint16_t offset;
int32_t number;
- upb_string *name;
+ char *name;
upb_value defaultval; // Only meaningful for non-repeated scalars and strings.
upb_value fval;
struct _upb_accessor_vtbl *accessor;
-};
+} upb_fielddef;
upb_fielddef *upb_fielddef_new();
void upb_fielddef_ref(upb_fielddef *f);
@@ -93,7 +93,7 @@ upb_fielddef *upb_fielddef_dup(upb_fielddef *f);
INLINE uint8_t upb_fielddef_type(upb_fielddef *f) { return f->type; }
INLINE uint8_t upb_fielddef_label(upb_fielddef *f) { return f->label; }
INLINE int32_t upb_fielddef_number(upb_fielddef *f) { return f->number; }
-INLINE upb_string *upb_fielddef_name(upb_fielddef *f) { return f->name; }
+INLINE char *upb_fielddef_name(upb_fielddef *f) { return f->name; }
INLINE upb_value upb_fielddef_default(upb_fielddef *f) { return f->defaultval; }
INLINE upb_value upb_fielddef_fval(upb_fielddef *f) { return f->fval; }
INLINE bool upb_fielddef_finalized(upb_fielddef *f) { return f->finalized; }
@@ -114,7 +114,7 @@ upb_def *upb_fielddef_subdef(upb_fielddef *f);
// added to a msgdef. For the moment we do not allow these to be set once
// the fielddef is added to a msgdef -- this could be relaxed in the future.
void upb_fielddef_setnumber(upb_fielddef *f, int32_t number);
-void upb_fielddef_setname(upb_fielddef *f, upb_string *name);
+void upb_fielddef_setname(upb_fielddef *f, const char *name);
// These writers may be called at any time prior to being put in a symtab.
void upb_fielddef_settype(upb_fielddef *f, uint8_t type);
@@ -124,7 +124,7 @@ void upb_fielddef_setfval(upb_fielddef *f, upb_value fval);
void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl);
// The name of the message or enum this field is referring to. Must be found
// at name resolution time (when the symtabtxn is committed to the symtab).
-void upb_fielddef_settypename(upb_fielddef *f, upb_string *name);
+void upb_fielddef_settypename(upb_fielddef *f, const char *name);
// A variety of tests about the type of a field.
INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
@@ -227,7 +227,7 @@ INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t i) {
return e ? e->f : NULL;
}
-INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) {
+INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, char *name) {
upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
return e ? e->f : NULL;
}
@@ -272,7 +272,7 @@ typedef struct {
typedef struct {
bool junk;
- upb_string *string;
+ char *str;
} upb_iton_ent;
upb_enumdef *upb_enumdef_new();
@@ -288,12 +288,13 @@ void upb_enumdef_setdefault(upb_enumdef *e, int32_t val);
// Adds a value to the enumdef. Requires that no existing val has this
// name or number (returns false and does not add if there is). May only
// be called before the enumdef is in a symtab.
-bool upb_enumdef_addval(upb_enumdef *e, upb_string *name, int32_t num);
+bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num);
// Lookups from name to integer and vice-versa.
-bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, int32_t *num);
-// Caller does not own a ref on the returned string.
-upb_string *upb_enumdef_iton(upb_enumdef *e, int32_t num);
+bool upb_enumdef_ntoil(upb_enumdef *e, char *name, size_t len, int32_t *num);
+bool upb_enumdef_ntoi(upb_enumdef *e, char *name, int32_t *num);
+// Caller does not own the returned string.
+const char *upb_enumdef_iton(upb_enumdef *e, int32_t num);
// Iteration over name/value pairs. The order is undefined.
// Adding an enum val invalidates any iterators.
@@ -308,9 +309,9 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter);
INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); }
// Iterator accessors.
-INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) {
+INLINE char *upb_enum_iter_name(upb_enum_iter iter) {
upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter);
- return e->string;
+ return e->str;
}
INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
return upb_inttable_iter_key(iter);
@@ -340,7 +341,7 @@ bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def);
// Gets the def (if any) that is associated with this name in the symtab.
// Caller does *not* inherit a ref on the def.
-upb_def *upb_symtabtxn_get(upb_symtabtxn *t, upb_string *name);
+upb_def *upb_symtabtxn_get(upb_symtabtxn *t, char *name);
// Iterate over the defs that are part of the transaction.
// The order is undefined.
@@ -350,12 +351,12 @@ upb_def *upb_symtabtxn_get(upb_symtabtxn *t, upb_string *name);
// i = upb_symtabtxn_next(t, i)) {
// upb_def *def = upb_symtabtxn_iter_def(i);
// }
-typedef void* upb_symtabtxn_iter;
+typedef upb_strtable_iter upb_symtabtxn_iter;
-upb_symtabtxn_iter upb_symtabtxn_begin(upb_symtabtxn *t);
-upb_symtabtxn_iter upb_symtabtxn_next(upb_symtabtxn *t, upb_symtabtxn_iter i);
-bool upb_symtabtxn_done(upb_symtabtxn_iter i);
-upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter);
+void upb_symtabtxn_begin(upb_symtabtxn_iter* i, upb_symtabtxn *t);
+void upb_symtabtxn_next(upb_symtabtxn_iter *i);
+bool upb_symtabtxn_done(upb_symtabtxn_iter *i);
+upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter *iter);
/* upb_symtab *****************************************************************/
@@ -397,12 +398,12 @@ void upb_symtab_unref(upb_symtab *s);
// If a def is found, the caller owns one ref on the returned def. Otherwise
// returns NULL.
// TODO: make return const
-upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym);
+upb_def *upb_symtab_resolve(upb_symtab *s, const char *base, const char *sym);
// Find an entry in the symbol table with this exact name. If a def is found,
// the caller owns one ref on the returned def. Otherwise returns NULL.
// TODO: make return const
-upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym);
+upb_def *upb_symtab_lookup(upb_symtab *s, const char *sym);
// Gets an array of pointers to all currently active defs in this symtab. The
// caller owns the returned array (which is of length *count) as well as a ref
diff --git a/src/upb_descriptor.c b/src/upb_descriptor.c
index 127d19c..f70f1ba 100644
--- a/src/upb_descriptor.c
+++ b/src/upb_descriptor.c
@@ -9,19 +9,22 @@
#include <stdlib.h>
#include <errno.h>
-#include "upb_string.h"
#include "upb_def.h"
-/* Joins strings together, for example:
- * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
- * join("", "Baz") -> "Baz"
- * Caller owns a ref on the returned string. */
-static upb_string *upb_join(upb_string *base, upb_string *name) {
- if (!base || upb_string_len(base) == 0) {
- return upb_string_getref(name);
+// Returns a newly allocated string that joins input strings together, for example:
+// join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
+// join("", "Baz") -> "Baz"
+// Caller owns a ref on the returned string. */
+static char *upb_join(char *base, char *name) {
+ if (!base || strlen(base) == 0) {
+ return strdup(name);
} else {
- return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT,
- UPB_STRARG(base), UPB_STRARG(name));
+ char *ret = malloc(strlen(base) + strlen(name) + 2);
+ ret[0] = '\0';
+ strcat(ret, base);
+ strcat(ret, ".");
+ strcat(ret, name);
+ return ret;
}
}
@@ -36,12 +39,12 @@ static upb_def *upb_deflist_last(upb_deflist *l) {
}
// Qualify the defname for all defs starting with offset "start" with "str".
-static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) {
+static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
for(uint32_t i = start; i < l->len; i++) {
upb_def *def = l->defs[i];
- upb_string *name = def->fqname;
+ char *name = def->fqname;
def->fqname = upb_join(str, name);
- upb_string_unref(name);
+ free(name);
}
}
@@ -59,13 +62,13 @@ void upb_descreader_init(upb_descreader *r, upb_symtabtxn *txn) {
}
void upb_descreader_uninit(upb_descreader *r) {
- upb_string_unref(r->name);
+ free(r->name);
upb_status_uninit(&r->status);
upb_deflist_uninit(&r->defs);
- upb_string_unref(r->default_string);
+ free(r->default_string);
while (r->stack_len > 0) {
upb_descreader_frame *f = &r->stack[--r->stack_len];
- upb_string_unref(f->name);
+ free(f->name);
}
}
@@ -91,13 +94,14 @@ void upb_descreader_startcontainer(upb_descreader *r) {
void upb_descreader_endcontainer(upb_descreader *r) {
upb_descreader_frame *f = &r->stack[--r->stack_len];
upb_deflist_qualify(&r->defs, f->name, f->start);
- upb_string_unref(f->name);
+ free(f->name);
+ f->name = NULL;
}
-void upb_descreader_setscopename(upb_descreader *r, upb_string *str) {
+void upb_descreader_setscopename(upb_descreader *r, char *str) {
upb_descreader_frame *f = &r->stack[r->stack_len-1];
- upb_string_unref(f->name);
- f->name = upb_string_getref(str);
+ free(f->name);
+ f->name = str;
}
// Handlers for google.protobuf.FileDescriptorProto.
@@ -119,7 +123,7 @@ static upb_flow_t upb_descreader_FileDescriptorProto_package(void *_r,
upb_value val) {
(void)fval;
upb_descreader *r = _r;
- upb_descreader_setscopename(r, upb_value_getstr(val));
+ upb_descreader_setscopename(r, upb_strref_dup(upb_value_getstrref(val)));
return UPB_CONTINUE;
}
@@ -190,8 +194,8 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_r,
upb_value val) {
(void)fval;
upb_descreader *r = _r;
- upb_string_unref(r->name);
- r->name = upb_string_getref(upb_value_getstr(val));
+ free(r->name);
+ r->name = upb_strref_dup(upb_value_getstrref(val));
r->saw_name = true;
return UPB_CONTINUE;
}
@@ -210,7 +214,7 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r,
upb_status *status) {
upb_descreader *r = _r;
if(!r->saw_number || !r->saw_name) {
- upb_seterr(status, UPB_ERROR, "Enum value missing name or number.");
+ upb_status_setf(status, UPB_ERROR, "Enum value missing name or number.");
return;
}
upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
@@ -220,7 +224,7 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r,
upb_enumdef_setdefault(e, r->number);
}
upb_enumdef_addval(e, r->name, r->number);
- upb_string_unref(r->name);
+ free(r->name);
r->name = NULL;
}
@@ -254,11 +258,11 @@ static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status)
upb_descreader *r = _r;
upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
if (upb_descreader_last((upb_descreader*)_r)->fqname == NULL) {
- upb_seterr(status, UPB_ERROR, "Enum had no name.");
+ upb_status_setf(status, UPB_ERROR, "Enum had no name.");
return;
}
if (upb_inttable_count(&e->iton) == 0) {
- upb_seterr(status, UPB_ERROR, "Enum had no values.");
+ upb_status_setf(status, UPB_ERROR, "Enum had no values.");
return;
}
}
@@ -269,8 +273,8 @@ static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r,
(void)fval;
upb_descreader *r = _r;
upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
- upb_string_unref(e->base.fqname);
- e->base.fqname = upb_string_getref(upb_value_getstr(val));
+ free(e->base.fqname);
+ e->base.fqname = upb_strref_dup(upb_value_getstrref(val));
return UPB_CONTINUE;
}
@@ -298,99 +302,73 @@ static upb_flow_t upb_fielddef_startmsg(void *_r) {
return UPB_CONTINUE;
}
-// Converts the default value in string "dstr" into "d". Passes a ref on dstr.
+// Converts the default value in string "str" into "d". Passes a ref on str.
// Returns true on success.
-static bool upb_fielddef_parsedefault(upb_string *dstr, upb_value *d, int type) {
+static bool upb_fielddef_parsedefault(char *str, upb_value *d, int type) {
bool success = true;
if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) {
// We'll keep the ref we had on it. We include enums in this case because
// we need the enumdef to resolve the name, but we may not have it yet.
// We'll resolve it later.
- if (dstr) {
- upb_value_setstr(d, dstr);
- } else {
- upb_value_setstr(d, upb_emptystring());
- }
+ if (!str) str = strdup("");
+ upb_value_setptr(d, str);
} else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) {
// We don't expect to get a default value.
- upb_string_unref(dstr);
- if (dstr != NULL) success = false;
+ free(str);
+ if (str != NULL) success = false;
+ } else if (type == UPB_TYPE(BOOL)) {
+ if (!str || strcmp(str, "false") == 0)
+ upb_value_setbool(d, false);
+ else if (strcmp(str, "true") == 0)
+ upb_value_setbool(d, true);
+ else
+ success = false;
+ free(str);
} else {
// The strto* functions need the string to be NULL-terminated.
- char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr);
+ if (!str) str = strdup("0");
char *end;
- upb_string_unref(dstr);
switch (type) {
case UPB_TYPE(INT32):
case UPB_TYPE(SINT32):
- case UPB_TYPE(SFIXED32):
- if (strz) {
- long val = strtol(strz, &end, 0);
- if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
- success = false;
- else
- upb_value_setint32(d, val);
- } else {
- upb_value_setint32(d, 0);
- }
+ case UPB_TYPE(SFIXED32): {
+ long val = strtol(str, &end, 0);
+ if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
+ success = false;
+ else
+ upb_value_setint32(d, val);
break;
+ }
case UPB_TYPE(INT64):
case UPB_TYPE(SINT64):
case UPB_TYPE(SFIXED64):
- if (strz) {
- upb_value_setint64(d, strtoll(strz, &end, 0));
- if (errno == ERANGE || *end) success = false;
- } else {
- upb_value_setint64(d, 0);
- }
+ upb_value_setint64(d, strtoll(str, &end, 0));
+ if (errno == ERANGE || *end) success = false;
break;
case UPB_TYPE(UINT32):
- case UPB_TYPE(FIXED32):
- if (strz) {
- unsigned long val = strtoul(strz, &end, 0);
- if (val > UINT32_MAX || errno == ERANGE || *end)
- success = false;
- else
- upb_value_setuint32(d, val);
- } else {
- upb_value_setuint32(d, 0);
- }
+ case UPB_TYPE(FIXED32): {
+ unsigned long val = strtoul(str, &end, 0);
+ if (val > UINT32_MAX || errno == ERANGE || *end)
+ success = false;
+ else
+ upb_value_setuint32(d, val);
break;
+ }
case UPB_TYPE(UINT64):
case UPB_TYPE(FIXED64):
- if (strz) {
- upb_value_setuint64(d, strtoull(strz, &end, 0));
- if (errno == ERANGE || *end) success = false;
- } else {
- upb_value_setuint64(d, 0);
- }
+ upb_value_setuint64(d, strtoull(str, &end, 0));
+ if (errno == ERANGE || *end) success = false;
break;
case UPB_TYPE(DOUBLE):
- if (strz) {
- upb_value_setdouble(d, strtod(strz, &end));
- if (errno == ERANGE || *end) success = false;
- } else {
- upb_value_setdouble(d, 0.0);
- }
+ upb_value_setdouble(d, strtod(str, &end));
+ if (errno == ERANGE || *end) success = false;
break;
case UPB_TYPE(FLOAT):
- if (strz) {
- upb_value_setfloat(d, strtof(strz, &end));
- if (errno == ERANGE || *end) success = false;
- } else {
- upb_value_setfloat(d, 0.0);
- }
- break;
- case UPB_TYPE(BOOL):
- if (!strz || strcmp(strz, "false") == 0)
- upb_value_setbool(d, false);
- else if (strcmp(strz, "true") == 0)
- upb_value_setbool(d, true);
- else
- success = false;
+ upb_value_setfloat(d, strtof(str, &end));
+ if (errno == ERANGE || *end) success = false;
break;
}
- free(strz);
+ free(str);
}
return success;
}
@@ -405,13 +383,13 @@ static void upb_fielddef_endmsg(void *_r, upb_status *status) {
// Field was successfully read, add it as a field of the msgdef.
upb_msgdef *m = upb_descreader_top(r);
upb_msgdef_addfield(m, f);
- upb_string *dstr = r->default_string;
+ char *dstr = r->default_string;
r->default_string = NULL;
upb_value val;
if (!upb_fielddef_parsedefault(dstr, &val, f->type)) {
// We don't worry too much about giving a great error message since the
// compiler should have ensured this was correct.
- upb_seterr(status, UPB_ERROR, "Error converting default value.");
+ upb_status_setf(status, UPB_ERROR, "Error converting default value.");
return;
}
upb_fielddef_setdefault(f, val);
@@ -441,7 +419,9 @@ static upb_flow_t upb_fielddef_onnumber(void *_r, upb_value fval, upb_value val)
static upb_flow_t upb_fielddef_onname(void *_r, upb_value fval, upb_value val) {
(void)fval;
upb_descreader *r = _r;
- upb_fielddef_setname(r->f, upb_value_getstr(val));
+ char *name = upb_strref_dup(upb_value_getstrref(val));
+ upb_fielddef_setname(r->f, name);
+ free(name);
return UPB_CONTINUE;
}
@@ -449,7 +429,9 @@ static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval,
upb_value val) {
(void)fval;
upb_descreader *r = _r;
- upb_fielddef_settypename(r->f, upb_value_getstr(val));
+ char *name = upb_strref_dup(upb_value_getstrref(val));
+ upb_fielddef_settypename(r->f, name);
+ free(name);
return UPB_CONTINUE;
}
@@ -459,8 +441,8 @@ static upb_flow_t upb_fielddef_ondefaultval(void *_r, upb_value fval,
upb_descreader *r = _r;
// Have to convert from string to the correct type, but we might not know the
// type yet.
- upb_string_unref(r->default_string);
- r->default_string = upb_string_getref(upb_value_getstr(val));
+ free(r->default_string);
+ r->default_string = upb_strref_dup(upb_value_getstrref(val));
return UPB_CONTINUE;
}
@@ -501,7 +483,7 @@ static void upb_msgdef_endmsg(void *_r, upb_status *status) {
upb_descreader *r = _r;
upb_msgdef *m = upb_descreader_top(r);
if(!m->base.fqname) {
- upb_seterr(status, UPB_ERROR, "Encountered message with no name.");
+ upb_status_setf(status, UPB_ERROR, "Encountered message with no name.");
return;
}
@@ -514,9 +496,9 @@ static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) {
upb_descreader *r = _r;
assert(val.type == UPB_TYPE(STRING));
upb_msgdef *m = upb_descreader_top(r);
- upb_string_unref(m->base.fqname);
- m->base.fqname = upb_string_getref(upb_value_getstr(val));
- upb_descreader_setscopename(r, upb_value_getstr(val));
+ free(m->base.fqname);
+ m->base.fqname = upb_strref_dup(upb_value_getstrref(val));
+ upb_descreader_setscopename(r, strdup(m->base.fqname));
return UPB_CONTINUE;
}
diff --git a/src/upb_descriptor.h b/src/upb_descriptor.h
index f74de3b..ee05e2f 100644
--- a/src/upb_descriptor.h
+++ b/src/upb_descriptor.h
@@ -28,7 +28,7 @@ extern "C" {
// definitions that are contained inside. "name" tracks the name of the
// message or package (a bare name -- not qualified by any enclosing scopes).
typedef struct {
- upb_string *name;
+ char *name;
// Index of the first def that is under this scope. For msgdefs, the
// msgdef itself is at start-1.
int start;
@@ -42,11 +42,11 @@ typedef struct {
upb_status status;
uint32_t number;
- upb_string *name;
+ char *name;
bool saw_number;
bool saw_name;
- upb_string *default_string;
+ char *default_string;
upb_fielddef *f;
} upb_descreader;
diff --git a/src/upb_glue.c b/src/upb_glue.c
index f288855..1f5bd3f 100644
--- a/src/upb_glue.c
+++ b/src/upb_glue.c
@@ -12,15 +12,15 @@
#include "upb_strstream.h"
#include "upb_textprinter.h"
-void upb_strtomsg(upb_string *str, void *msg, upb_msgdef *md,
+void upb_strtomsg(const char *str, size_t len, void *msg, upb_msgdef *md,
upb_status *status) {
upb_stringsrc strsrc;
upb_stringsrc_init(&strsrc);
- upb_stringsrc_reset(&strsrc, str);
+ upb_stringsrc_reset(&strsrc, str, len);
upb_decoder d;
upb_decoder_initformsgdef(&d, md);
- upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), msg);
+ upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, msg);
upb_decoder_decode(&d, status);
upb_stringsrc_uninit(&strsrc);
@@ -53,10 +53,11 @@ void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
#endif
// TODO: read->load.
-void upb_read_descriptor(upb_symtab *symtab, upb_string *str, upb_status *status) {
+void upb_read_descriptor(upb_symtab *symtab, const char *str, size_t len,
+ upb_status *status) {
upb_stringsrc strsrc;
upb_stringsrc_init(&strsrc);
- upb_stringsrc_reset(&strsrc, str);
+ upb_stringsrc_reset(&strsrc, str, len);
upb_handlers *h = upb_handlers_new();
upb_descreader_reghandlers(h);
@@ -68,16 +69,16 @@ void upb_read_descriptor(upb_symtab *symtab, upb_string *str, upb_status *status
upb_symtabtxn txn;
upb_symtabtxn_init(&txn);
upb_descreader_init(&r, &txn);
- upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), &r);
+ upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, &r);
upb_decoder_decode(&d, status);
// Set default accessors and layouts on all messages.
// for msgdef in symtabtxn:
upb_symtabtxn_iter i;
- for(i = upb_symtabtxn_begin(&txn); !upb_symtabtxn_done(i);
- i = upb_symtabtxn_next(&txn, i)) {
- upb_def *def = upb_symtabtxn_iter_def(i);
+ upb_symtabtxn_begin(&i, &txn);
+ for(; !upb_symtabtxn_done(&i); upb_symtabtxn_next(&i)) {
+ upb_def *def = upb_symtabtxn_iter_def(&i);
upb_msgdef *md = upb_dyncast_msgdef(def);
if (!md) return;
// For field in msgdef:
@@ -96,3 +97,33 @@ void upb_read_descriptor(upb_symtab *symtab, upb_string *str, upb_status *status
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
}
+
+char *upb_readfile(const char *filename, size_t *len) {
+ FILE *f = fopen(filename, "rb");
+ if(!f) return NULL;
+ if(fseek(f, 0, SEEK_END) != 0) goto error;
+ long size = ftell(f);
+ if(size < 0) goto error;
+ if(fseek(f, 0, SEEK_SET) != 0) goto error;
+ char *buf = malloc(size);
+ if(fread(buf, size, 1, f) != 1) goto error;
+ fclose(f);
+ if (len) *len = size;
+ return buf;
+
+error:
+ fclose(f);
+ return NULL;
+}
+
+void upb_read_descriptorfile(upb_symtab *symtab, const char *fname,
+ upb_status *status) {
+ size_t len;
+ char *data = upb_readfile(fname, &len);
+ if (!data) {
+ upb_status_setf(status, UPB_ERROR, "Couldn't read file: %s", fname);
+ return;
+ }
+ upb_read_descriptor(symtab, data, len, status);
+ free(data);
+}
diff --git a/src/upb_glue.h b/src/upb_glue.h
index 27611cd..0448c2f 100644
--- a/src/upb_glue.h
+++ b/src/upb_glue.h
@@ -27,6 +27,7 @@
#define UPB_GLUE_H
#include <stdbool.h>
+#include "upb.h"
#ifdef __cplusplus
extern "C" {
@@ -36,20 +37,23 @@ extern "C" {
// Clients should use the regular, typedef'd names (eg. upb_string).
struct _upb_msg;
struct _upb_msgdef;
-struct _upb_status;
-struct _upb_string;
struct _upb_symtab;
// Decodes the given string, which must be in protobuf binary format, to the
// given upb_msg with msgdef "md", storing the status of the operation in "s".
-void upb_strtomsg(struct _upb_string *str, void *msg,
- struct _upb_msgdef *md, struct _upb_status *s);
+void upb_strtomsg(const char *str, size_t len, void *msg,
+ struct _upb_msgdef *md, upb_status *s);
-void upb_msgtotext(struct _upb_string *str, void *msg,
- struct _upb_msgdef *md, bool single_line);
+//void upb_msgtotext(struct _upb_string *str, void *msg,
+// struct _upb_msgdef *md, bool single_line);
-void upb_read_descriptor(struct _upb_symtab *symtab, struct _upb_string *str,
- struct _upb_status *status);
+void upb_read_descriptor(struct _upb_symtab *symtab, const char *str, size_t len,
+ upb_status *status);
+
+void upb_read_descriptorfile(struct _upb_symtab *symtab, const char *fname,
+ upb_status *status);
+
+char *upb_readfile(const char *filename, size_t *len);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/src/upb_handlers.c b/src/upb_handlers.c
index c29281a..f513dfd 100644
--- a/src/upb_handlers.c
+++ b/src/upb_handlers.c
@@ -96,7 +96,6 @@ upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h) {
}
typedef struct {
- upb_strtable_entry e;
upb_mhandlers *mh;
} upb_mtab_ent;
@@ -105,8 +104,8 @@ static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, upb_msgdef *m,
upb_onfieldreg *fieldreg_cb,
void *closure, upb_strtable *mtab) {
upb_mhandlers *mh = upb_handlers_newmhandlers(h);
- upb_mtab_ent e = {{m->base.fqname, 0}, mh};
- upb_strtable_insert(mtab, &e.e);
+ upb_mtab_ent e = {mh};
+ upb_strtable_insert(mtab, m->base.fqname, &e);
if (msgreg_cb) msgreg_cb(closure, mh, m);
upb_msg_iter i;
for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
@@ -153,7 +152,7 @@ static upb_fhandlers toplevel_f = {
#ifdef NDEBUG
{{0}},
#else
- {{0}, UPB_VALUETYPE_RAW},
+ {{0}, -1},
#endif
NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL};
@@ -198,23 +197,23 @@ void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) {
assert(d->top == d->stack);
if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status);
// TODO: should we avoid this copy by passing client's status obj to cbs?
- upb_copyerr(status, &d->status);
+ upb_status_copy(status, &d->status);
}
void indent(upb_dispatcher *d) {
- for (int i = 0; i < (d->top - d->stack); i++) printf(" ");
+ for (int i = 0; i < (d->top - d->stack); i++) fprintf(stderr, " ");
}
void indentm1(upb_dispatcher *d) {
- for (int i = 0; i < (d->top - d->stack - 1); i++) printf(" ");
+ for (int i = 0; i < (d->top - d->stack - 1); i++) fprintf(stderr, " ");
}
upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
upb_fhandlers *f) {
//indent(d);
- //printf("START SEQ: %d\n", f->number);
+ //fprintf(stderr, "START SEQ: %d\n", f->number);
if((d->top+1) >= d->limit) {
- upb_seterr(&d->status, UPB_ERROR, "Nesting too deep.");
+ upb_status_setf(&d->status, UPB_ERROR, "Nesting too deep.");
_upb_dispatcher_unwind(d, UPB_BREAK);
return d->top; // Dummy.
}
@@ -235,7 +234,7 @@ upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
//indentm1(d);
- //printf("END SEQ\n");
+ //fprintf(stderr, "END SEQ\n");
assert(d->top > d->stack);
assert(d->top->is_sequence);
upb_fhandlers *f = d->top->f;
@@ -255,9 +254,9 @@ upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
upb_fhandlers *f) {
//indent(d);
- //printf("START SUBMSG: %d\n", f->number);
+ //fprintf(stderr, "START SUBMSG: %d\n", f->number);
if((d->top+1) >= d->limit) {
- upb_seterr(&d->status, UPB_ERROR, "Nesting too deep.");
+ upb_status_setf(&d->status, UPB_ERROR, "Nesting too deep.");
_upb_dispatcher_unwind(d, UPB_BREAK);
return d->top; // Dummy.
}
@@ -281,7 +280,7 @@ upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) {
//indentm1(d);
- //printf("END SUBMSG\n");
+ //fprintf(stderr, "END SUBMSG\n");
assert(d->top > d->stack);
assert(!d->top->is_sequence);
upb_fhandlers *f = d->top->f;
diff --git a/src/upb_handlers.h b/src/upb_handlers.h
index caf0645..1ccc59f 100644
--- a/src/upb_handlers.h
+++ b/src/upb_handlers.h
@@ -17,6 +17,7 @@
#include <limits.h>
#include "upb.h"
#include "upb_def.h"
+#include "upb_bytestream.h"
#ifdef __cplusplus
extern "C" {
@@ -303,14 +304,12 @@ typedef struct {
// Members to use as the data source requires.
void *srcclosure;
+ uint64_t end_ofs;
uint16_t msgindex;
uint16_t fieldindex;
- uint32_t end_offset;
- // Does this frame represent a sequence or a submsg (f might be both).
- // We only need a single bit here, but this will make each individual
- // frame grow from 32 to 40 bytes on LP64, which is a bit excessive.
- bool is_sequence;
+ bool is_sequence; // frame represents seq or submsg? (f might be both).
+ bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX (strings aren't pushed)
} upb_dispatcher_frame;
// Called when some of the input needs to be skipped. All frames from
diff --git a/src/upb_msg.c b/src/upb_msg.c
index b88df32..83fa6ff 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -7,6 +7,7 @@
* Data structure for storing a message of protobuf data.
*/
+#include "upb.h"
#include "upb_msg.h"
void upb_msg_clear(void *msg, upb_msgdef *md) {
@@ -132,23 +133,23 @@ UPB_ACCESSORS(bool, bool)
UPB_ACCESSORS(ptr, void*)
#undef UPB_ACCESSORS
-static void _upb_stdmsg_setstr(void *_dst, upb_value _src) {
- // We do:
- // - upb_string_recycle(), upb_string_substr() instead of
- // - upb_string_unref(), upb_string_getref()
- // because we can conveniently cache these upb_string objects in the
- // upb_msg, whereas the upb_src who is sending us these strings may not
- // have a good way of caching them. This saves the upb_src from allocating
- // new upb_strings all the time to give us.
- //
- // If you were using this to copy one upb_msg to another this would
- // allocate string objects whereas a upb_string_getref could have avoided
- // those allocations completely; if this is an issue, we could make it an
- // option of the upb_msgsink which behavior is desired.
- upb_string **dst = _dst;
- upb_string *src = upb_value_getstr(_src);
- upb_string_recycle(dst);
- upb_string_substr(*dst, src, 0, upb_string_len(src));
+static void _upb_stdmsg_setstr(void *_dst, upb_value src) {
+ upb_stdarray **dstp = _dst;
+ upb_stdarray *dst = *dstp;
+ if (!dst) {
+ dst = malloc(sizeof(*dst));
+ dst->size = 0;
+ dst->ptr = NULL;
+ *dstp = dst;
+ }
+ dst->len = 0;
+ upb_strref *ref = upb_value_getstrref(src);
+ if (ref->len > dst->size) {
+ dst->size = ref->len;
+ dst->ptr = realloc(dst->ptr, dst->size);
+ }
+ dst->len = ref->len;
+ upb_bytesrc_read(ref->bytesrc, ref->stream_offset, ref->len, dst->ptr);
}
upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) {
@@ -166,15 +167,11 @@ upb_flow_t upb_stdmsg_setstr_r(void *a, upb_value fval, upb_value val) {
}
upb_value upb_stdmsg_getstr(void *m, upb_value fval) {
- upb_value val = upb_stdmsg_getptr(m, fval);
- upb_value_setstr(&val, upb_value_getptr(val));
- return val;
+ return upb_stdmsg_getptr(m, fval);
}
upb_value upb_stdmsg_seqgetstr(void *i) {
- upb_value val = upb_stdmsg_seqgetptr(i);
- upb_value_setstr(&val, upb_value_getptr(val));
- return val;
+ return upb_stdmsg_seqgetptr(i);
}
void *upb_stdmsg_new(upb_msgdef *md) {
@@ -188,11 +185,13 @@ void upb_stdseq_free(void *s, upb_fielddef *f) {
upb_stdarray *a = s;
if (upb_issubmsg(f) || upb_isstring(f)) {
void **p = (void**)a->ptr;
- for (int i = 0; i < a->size; i++) {
+ for (uint32_t i = 0; i < a->size; i++) {
if (upb_issubmsg(f)) {
upb_stdmsg_free(p[i], upb_downcast_msgdef(f->def));
} else {
- upb_string_unref(p[i]);
+ upb_stdarray *str = p[i];
+ free(str->ptr);
+ free(str);
}
}
}
@@ -213,7 +212,9 @@ void upb_stdmsg_free(void *m, upb_msgdef *md) {
} else if (upb_issubmsg(f)) {
upb_stdmsg_free(subp, upb_downcast_msgdef(f->def));
} else {
- upb_string_unref(subp);
+ upb_stdarray *str = subp;
+ free(str->ptr);
+ free(str);
}
}
free(m);
diff --git a/src/upb_msg.h b/src/upb_msg.h
index b93037b..af328e3 100644
--- a/src/upb_msg.h
+++ b/src/upb_msg.h
@@ -148,7 +148,7 @@ typedef struct {
void upb_msgvisitor_init(upb_msgvisitor *v, upb_msgdef *md, upb_handlers *h);
void upb_msgvisitor_uninit(upb_msgvisitor *v);
-void upb_msgvisitor_reset(upb_msgvisitor *v, upb_msg *m);
+void upb_msgvisitor_reset(upb_msgvisitor *v, void *m);
void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status);
@@ -183,8 +183,8 @@ upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val);
// if necessary.
typedef struct {
char *ptr;
- int32_t len; // Number of elements present.
- int32_t size; // Number of elements allocated.
+ uint32_t len; // Number of elements present.
+ uint32_t size; // Number of elements allocated.
} upb_stdarray;
upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val);
diff --git a/src/upb_stdio.c b/src/upb_stdio.c
index c84d52a..20a3c15 100644
--- a/src/upb_stdio.c
+++ b/src/upb_stdio.c
@@ -9,96 +9,158 @@
#include <stddef.h>
#include <stdlib.h>
-#include "upb_string.h"
+#include <string.h>
// We can make this configurable if necessary.
-#define BLOCK_SIZE 4096
+#define BUF_SIZE 32768
-struct upb_stdio {
- upb_bytesrc bytesrc;
- upb_bytesink bytesink;
- FILE *file;
-};
-void upb_stdio_reset(upb_stdio *stdio, FILE* file) {
- stdio->file = file;
+/* upb_bytesrc methods ********************************************************/
+
+int upb_stdio_cmpbuf(const void *_key, const void *_elem) {
+ const uint64_t *ofs = _key;
+ const upb_stdio_buf *buf = _elem;
+ return (*ofs / BUF_SIZE) - (buf->ofs / BUF_SIZE);
}
+static upb_stdio_buf *upb_stdio_findbuf(upb_stdio *s, uint64_t ofs) {
+ // TODO: it is probably faster to linear search short lists, and to
+ // special-case the last one or two bufs.
+ return bsearch(&ofs, s->bufs, s->nbuf, sizeof(*s->bufs), &upb_stdio_cmpbuf);
+}
-/* upb_bytesrc methods ********************************************************/
+//static upb_strlen_t upb_stdio_read(void *src, uint32_t ofs, upb_buf *b,
+// upb_status *status) {
+// upb_stdio *stdio = (upb_stdio*)src;
+// size_t read = fread(buf, 1, BLOCK_SIZE, stdio->file);
+// if(read < (size_t)BLOCK_SIZE) {
+// // Error or EOF.
+// if(feof(stdio->file)) {
+// upb_seterr(status, UPB_EOF, "");
+// } else if(ferror(stdio->file)) {
+// upb_status_fromerrno(s);
+// return 0;
+// }
+// }
+// b->len = read;
+// stdio->next_ofs += read;
+// return stdio->next_ofs;
+//}
+
+size_t upb_stdio_fetch(void *src, uint64_t ofs, upb_status *s) {
+ (void)src;
+ (void)ofs;
+ (void)s;
+
+ return 0;
+}
-static upb_strlen_t upb_stdio_read(upb_bytesrc *src, void *buf,
- upb_strlen_t count, upb_status *status) {
- upb_stdio *stdio = (upb_stdio*)src;
- assert(count > 0);
- size_t read = fread(buf, 1, count, stdio->file);
- if(read < (size_t)count) {
- // Error or EOF.
- if(feof(stdio->file)) {
- upb_seterr(status, UPB_EOF, "");
- return read;
- } else if(ferror(stdio->file)) {
- upb_seterr(status, UPB_ERROR, "Error reading from stdio stream.");
- return -1;
- }
+void upb_stdio_read(void *src, uint64_t src_ofs, size_t len, char *dst) {
+ upb_stdio_buf *buf = upb_stdio_findbuf(src, src_ofs);
+ src_ofs -= buf->ofs;
+ memcpy(dst, &buf->data[src_ofs], BUF_SIZE - src_ofs);
+ len -= (BUF_SIZE - src_ofs);
+ dst += (BUF_SIZE - src_ofs);
+ while (len > 0) {
+ ++buf;
+ size_t bytes = UPB_MIN(len, BUF_SIZE);
+ memcpy(dst, buf->data, bytes);
+ len -= bytes;
+ dst += bytes;
}
- return read;
}
-static bool upb_stdio_getstr(upb_bytesrc *src, upb_string *str,
- upb_status *status) {
- upb_strlen_t read = upb_stdio_read(
- src, upb_string_getrwbuf(str, BLOCK_SIZE), BLOCK_SIZE, status);
- if (read <= 0) return false;
- upb_string_getrwbuf(str, read);
- return true;
+const char *upb_stdio_getptr(void *src, uint64_t ofs, size_t *len) {
+ upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
+ ofs -= buf->ofs;
+ *len = BUF_SIZE - ofs;
+ return &buf->data[ofs];
+}
+
+void upb_stdio_refregion(void *src, uint64_t ofs, size_t len) {
+ upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
+ len -= (BUF_SIZE - ofs);
+ ++buf->refcount;
+ while (len > 0) {
+ ++buf;
+ ++buf->refcount;
+ }
+}
+
+void upb_stdio_unrefregion(void *src, uint64_t ofs, size_t len) {
+ (void)src;
+ (void)ofs;
+ (void)len;
}
/* upb_bytesink methods *******************************************************/
+#if 0
upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) {
- upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink));
+ upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink));
upb_strlen_t len = upb_string_len(str);
upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file);
if(written < len) {
- upb_seterr(status, UPB_ERROR, "Error writing to stdio stream.");
+ upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream.");
return -1;
}
return written;
}
+#endif
-upb_strlen_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status,
- const char *fmt, va_list args) {
- upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink));
- upb_strlen_t written = vfprintf(stdio->file, fmt, args);
+uint32_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status,
+ const char *fmt, va_list args) {
+ upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink));
+ int written = vfprintf(stdio->file, fmt, args);
if (written < 0) {
- upb_seterr(status, UPB_ERROR, "Error writing to stdio stream.");
+ upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream.");
return -1;
}
return written;
}
-upb_stdio *upb_stdio_new() {
+void upb_stdio_init(upb_stdio *stdio) {
static upb_bytesrc_vtbl bytesrc_vtbl = {
+ upb_stdio_fetch,
upb_stdio_read,
- upb_stdio_getstr,
+ upb_stdio_getptr,
+ upb_stdio_refregion,
+ upb_stdio_unrefregion,
+ NULL,
+ NULL
};
+ upb_bytesrc_init(&stdio->src, &bytesrc_vtbl);
- static upb_bytesink_vtbl bytesink_vtbl = {
- upb_stdio_putstr,
- upb_stdio_vprintf
- };
+ //static upb_bytesink_vtbl bytesink_vtbl = {
+ // upb_stdio_putstr,
+ // upb_stdio_vprintf
+ //};
+ //upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl);
+}
- upb_stdio *stdio = malloc(sizeof(*stdio));
- upb_bytesrc_init(&stdio->bytesrc, &bytesrc_vtbl);
- upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl);
- return stdio;
+void upb_stdio_reset(upb_stdio* stdio, FILE *file) {
+ stdio->file = file;
+ stdio->should_close = false;
+}
+
+void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
+ upb_status *s) {
+ FILE *f = fopen(filename, mode);
+ if (!f) {
+ upb_status_fromerrno(s);
+ return;
+ }
+ setvbuf(stdio->file, NULL, _IONBF, 0); // Disable buffering; we do our own.
+ upb_stdio_reset(stdio, f);
+ stdio->should_close = true;
}
-void upb_stdio_free(upb_stdio *stdio) {
- free(stdio);
+void upb_stdio_uninit(upb_stdio *stdio) {
+ // Can't report status; caller should flush() to ensure data is written.
+ if (stdio->should_close) fclose(stdio->file);
+ stdio->file = NULL;
}
-upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->bytesrc; }
-upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->bytesink; }
+upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->src; }
+upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; }
diff --git a/src/upb_stdio.h b/src/upb_stdio.h
index a164821..858830c 100644
--- a/src/upb_stdio.h
+++ b/src/upb_stdio.h
@@ -5,7 +5,12 @@
* Author: Josh Haberman <jhaberman@gmail.com>
*
* This file provides upb_bytesrc and upb_bytesink implementations for
- * ANSI C stdio.
+ * ANSI C stdio, which is less efficient than posixfd, but more portable.
+ *
+ * Specifically, stdio functions acquire locks on every operation (unless you
+ * use the f{read,write,...}_unlocked variants, which are not standard) and
+ * performs redundant buffering (unless you disable it with setvbuf(), but we
+ * can only do this on newly-opened filehandles).
*/
#include <stdio.h>
@@ -18,21 +23,44 @@
extern "C" {
#endif
-struct upb_stdio;
-typedef struct upb_stdio upb_stdio;
+typedef struct {
+ uint64_t ofs;
+ uint32_t refcount;
+ char data[];
+} upb_stdio_buf;
+
+// We use a single object for both bytesrc and bytesink for simplicity.
+// The object is still not thread-safe, and may only be used by one reader
+// and one writer at a time.
+typedef struct {
+ upb_bytesrc src;
+ upb_bytesink sink;
+ FILE *file;
+ bool should_close;
+ upb_stdio_buf **bufs;
+ uint32_t nbuf, szbuf;
+} upb_stdio;
+
+void upb_stdio_init(upb_stdio *stdio);
+// Caller should call upb_stdio_flush prior to calling this to ensure that
+// all data is flushed, otherwise data can be silently dropped if an error
+// occurs flushing the remaining buffers.
+void upb_stdio_uninit(upb_stdio *stdio);
+
+// Resets the object to read/write to the given "file." The caller is
+// responsible for closing the file, which must outlive this object.
+void upb_stdio_reset(upb_stdio *stdio, FILE *file);
-// Creation/deletion.
-upb_stdio *upb_stdio_new();
-void upb_stdio_free(upb_stdio *stdio);
+// As an alternative to upb_stdio_reset(), initializes the object by opening a
+// file, and will handle closing it. This may result in more efficient I/O
+// than the previous since we can call setvbuf() to disable buffering.
+void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
+ upb_status *s);
-// Reset/initialize the object for use. The src or sink will call
-// fread()/fwrite()/etc. on the given FILE*.
-void upb_stdio_reset(upb_stdio *stdio, FILE* file);
+// Must be called to cleanup after the object, including closing the file if
+// it was opened with upb_stdio_open() (which can fail, hence the status).
+//
-// Gets a bytesrc or bytesink for the given stdio. The returned pointer is
-// invalidated by upb_stdio_reset above. It is perfectly valid to get both
-// a bytesrc and a bytesink for the same stdio if the FILE* is open for reading
-// and writing.
upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio);
upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio);
diff --git a/src/upb_string.c b/src/upb_string.c
deleted file mode 100644
index 122eec4..0000000
--- a/src/upb_string.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2010 Google Inc. See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- */
-
-#include "upb_string.h"
-
-#include <stdlib.h>
-#ifdef __GLIBC__
-#include <malloc.h>
-#elif defined(__APPLE__)
-#include <malloc/malloc.h>
-#endif
-
-static uint32_t upb_round_up_pow2(uint32_t v) {
- // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
- v--;
- v |= v >> 1;
- v |= v >> 2;
- v |= v >> 4;
- v |= v >> 8;
- v |= v >> 16;
- v++;
- return v;
-}
-
-upb_string *upb_string_new() {
- upb_string *str = malloc(sizeof(*str));
- str->ptr = NULL;
- str->cached_mem = NULL;
- str->len = 0;
-#ifndef UPB_HAVE_MSIZE
- str->size = 0;
-#endif
- str->src = NULL;
- upb_atomic_init(&str->refcount, 1);
- return str;
-}
-
-uint32_t upb_string_size(upb_string *str) {
-#ifdef __GLIBC__
- return malloc_usable_size(str->cached_mem);
-#elif defined(__APPLE__)
- return malloc_size(str->cached_mem);
-#else
- return str->size;
-#endif
-}
-
-void _upb_string_free(upb_string *str) {
- free(str->cached_mem);
- _upb_string_release(str);
- free(str);
-}
-
-char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) {
- // assert(str->ptr == NULL);
- upb_strlen_t size = upb_string_size(str);
- if (size < len) {
- size = upb_round_up_pow2(len);
- str->cached_mem = realloc(str->cached_mem, size);
-#ifndef UPB_HAVE_MSIZE
- str->size = size;
-#endif
- }
- str->len = len;
- str->ptr = str->cached_mem;
- return str->cached_mem;
-}
-
-void upb_string_substr(upb_string *str, upb_string *target_str,
- upb_strlen_t start, upb_strlen_t len) {
- assert(str->ptr == NULL);
- assert(start + len <= upb_string_len(target_str));
- if (target_str->src) {
- start += (target_str->ptr - target_str->src->ptr);
- target_str = target_str->src;
- }
- str->src = upb_string_getref(target_str);
- str->ptr = upb_string_getrobuf(target_str) + start;
- str->len = len;
-}
-
-size_t upb_string_vprintf_at(upb_string *str, size_t offset, const char *format,
- va_list args) {
- // Try once without reallocating. We have to va_copy because we might have
- // to call vsnprintf again.
- uint32_t size = UPB_MAX(upb_string_size(str) - offset, 16);
- char *buf = upb_string_getrwbuf(str, offset + size) + offset;
- va_list args_copy;
- va_copy(args_copy, args);
- uint32_t true_size = vsnprintf(buf, size, format, args_copy);
- va_end(args_copy);
-
- // Resize to be the correct size.
- if (true_size >= size) {
- // Need to print again, because some characters were truncated. vsnprintf
- // has weird behavior (and contrary IMO to what the standard says): it will
- // not write the entire string unless you give it space to store the NULL
- // terminator also. So we can't give it space for the string itself and
- // let NULL get truncated (after all, we don't care about it): we *must*
- // give it space for NULL.
- buf = upb_string_getrwbuf(str, offset + true_size + 1) + offset;
- vsnprintf(buf, true_size + 1, format, args);
- }
- str->len = offset + true_size;
- return true_size;
-}
-
-upb_string *upb_string_asprintf(const char *format, ...) {
- upb_string *str = upb_string_new();
- va_list args;
- va_start(args, format);
- upb_string_vprintf(str, format, args);
- va_end(args);
- return str;
-}
-
-upb_string *upb_strdup(upb_string *s) {
- upb_string *str = upb_string_new();
- upb_strcpy(str, s);
- return str;
-}
-
-void upb_strcat(upb_string *s, upb_string *append) {
- uint32_t old_size = upb_string_len(s);
- uint32_t append_size = upb_string_len(append);
- uint32_t new_size = old_size + append_size;
- char *buf = upb_string_getrwbuf(s, new_size);
- memcpy(buf + old_size, upb_string_getrobuf(append), append_size);
-}
-
-upb_string *upb_strreadfile(const char *filename) {
- FILE *f = fopen(filename, "rb");
- if(!f) return NULL;
- if(fseek(f, 0, SEEK_END) != 0) goto error;
- long size = ftell(f);
- if(size < 0) goto error;
- if(fseek(f, 0, SEEK_SET) != 0) goto error;
- upb_string *s = upb_string_new();
- char *buf = upb_string_getrwbuf(s, size);
- if(fread(buf, size, 1, f) != 1) goto error;
- fclose(f);
- return s;
-
-error:
- fclose(f);
- return NULL;
-}
-
-upb_string *upb_emptystring() {
- static upb_string empty = UPB_STATIC_STRING("");
- return &empty;
-}
-
-char *upb_string_newcstr(upb_string *str) {
- upb_strlen_t len = upb_string_len(str);
- char *ret = malloc(len+1);
- memcpy(ret, upb_string_getrobuf(str), len);
- ret[len] = '\0';
- return ret;
-}
diff --git a/src/upb_string.h b/src/upb_string.h
deleted file mode 100644
index 1f92850..0000000
--- a/src/upb_string.h
+++ /dev/null
@@ -1,394 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2010 Google Inc. See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- *
- * This file defines a simple string type which is length-delimited instead
- * of NULL-terminated, and which has useful sharing semantics.
- *
- * The overriding goal of upb_string is to avoid memcpy(), malloc(), and free()
- * wheverever possible, while keeping both CPU and memory overhead low.
- * Throughout upb there are situations where one wants to reference all or part
- * of another string without copying. upb_string provides APIs for doing this,
- * and allows the referenced string to be kept alive for as long as anyone is
- * referencing it.
- *
- * Characteristics of upb_string:
- * - strings are reference-counted.
- * - strings are immutable (can be mutated only when first created or recycled).
- * - if a string has no other referents, it can be "recycled" into a new string
- * without having to reallocate the upb_string.
- * - strings can be substrings of other strings (owning a ref on the source
- * string).
- *
- * Reference-counted strings have recently fallen out of favor because of the
- * performance impacts of doing thread-safe reference counting with atomic
- * operations. We side-step this issue by not performing atomic operations
- * unless the string has been marked thread-safe. Time will tell whether this
- * scheme is easy and convenient enough to be practical.
- *
- * Strings are expected to be 8-bit-clean, but "char*" is such an entrenched
- * idiom that we go with it instead of making our pointers uint8_t*.
- *
- * WARNING: THE GETREF, UNREF, AND RECYCLE OPERATIONS ARE NOT THREAD_SAFE
- * UNLESS THE STRING HAS BEEN MARKED SYNCHRONIZED! What this means is that if
- * you are logically passing a reference to a upb_string to another thread
- * (which implies that the other thread must eventually call unref of recycle),
- * you have two options:
- *
- * - create a copy of the string that will be used in the other thread only.
- * - call upb_string_get_synchronized_ref(), which will make getref, unref, and
- * recycle thread-safe for this upb_string.
- */
-
-#ifndef UPB_STRING_H
-#define UPB_STRING_H
-
-#include <assert.h>
-#include <string.h>
-#include <stdarg.h>
-#include "upb_atomic.h"
-#include "upb.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// All members of this struct are private, and may only be read/written through
-// the associated functions.
-struct _upb_string {
- // The string's refcount.
- upb_atomic_t refcount;
-
- // The pointer to our currently active data. This may be memory we own
- // or a pointer into memory we don't own.
- const char *ptr;
-
- // If non-NULL, this is a block of memory we own. We keep this cached even
- // if "ptr" is currently aliasing memory we don't own.
- char *cached_mem;
-
- // The effective length of the string (the bytes at ptr).
- int32_t len;
-#ifndef UPB_HAVE_MSIZE
- // How many bytes are allocated in cached_mem.
- //
- // Many platforms have a function that can tell you the size of a block
- // that was previously malloc'd. In this case we can avoid storing the
- // size explicitly.
- uint32_t size;
-#endif
-
- // Used if this is a slice of another string, NULL otherwise. We own a ref
- // on src.
- struct _upb_string *src;
-};
-
-// Internal-only initializer for upb_string instances.
-#ifdef UPB_HAVE_MSIZE
-#define _UPB_STRING_INIT(str, len, refcount) {{refcount}, (char*)str, NULL, len, NULL}
-#else
-#define _UPB_STRING_INIT(str, len, refcount) {{refcount}, (char*)str, NULL, len, 0, NULL}
-#endif
-
-// Special pseudo-refcounts for static/stack-allocated strings, respectively.
-#define _UPB_STRING_REFCOUNT_STATIC -1
-#define _UPB_STRING_REFCOUNT_STACK -2
-
-// Returns a newly-created, empty, non-finalized string. When the string is no
-// longer needed, it should be unref'd, never freed directly.
-upb_string *upb_string_new();
-
-// Internal-only; clients should call upb_string_unref().
-void _upb_string_free(upb_string *str);
-
-// Releases a ref on the given string, which may free the memory. "str"
-// can be NULL, in which case this is a no-op. WARNING: NOT THREAD_SAFE
-// UNLESS THE STRING IS SYNCHRONIZED.
-INLINE void upb_string_unref(upb_string *str) {
- if (str) {
- }
- if (str && upb_atomic_read(&str->refcount) > 0 &&
- upb_atomic_unref(&str->refcount)) {
- _upb_string_free(str);
- }
-}
-
-static void _upb_string_release(upb_string *str) {
- if(str->src) {
- upb_string_unref(str->src);
- str->src = NULL;
- }
-}
-
-upb_string *upb_strdup(upb_string *s); // Forward-declare.
-
-// Returns a string with the same contents as "str". The caller owns a ref on
-// the returned string, which may or may not be the same object as "str.
-// WARNING: NOT THREAD-SAFE UNLESS THE STRING IS SYNCHRONIZED!
-INLINE upb_string *upb_string_getref(upb_string *str) {
- int refcount = upb_atomic_read(&str->refcount);
- if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str);
- // We don't ref the special <0 refcount for static strings.
- if (refcount > 0) {
- upb_atomic_ref(&str->refcount);
- }
- return str;
-}
-
-// Returns the length of the string.
-INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; }
-INLINE bool upb_string_isempty(upb_string *str) {
- return !str || upb_string_len(str) == 0;
-}
-
-// Use to read the bytes of the string. The caller *must* call
-// upb_string_endread() after the data has been read. The window between
-// upb_string_getrobuf() and upb_string_endread() should be kept as short as
-// possible, because any pending upb_string_detach() may be blocked until
-// upb_string_endread is called(). No other functions may be called on the
-// string during this window except upb_string_len().
-INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; }
-INLINE void upb_string_endread(upb_string *str) { (void)str; }
-
-// Convenience method for getting the end of the string. Calls
-// upb_string_getrobuf() so inherits the caveats of calling that function.
-INLINE const char *upb_string_getbufend(upb_string *str) {
- return upb_string_getrobuf(str) + upb_string_len(str);
-}
-
-// Attempts to recycle the string "str" so it may be reused and have different
-// data written to it. The caller MUST own a reference on the given string
-// prior to making this call (ie. the caller must have either created the
-// string or obtained a reference with upb_string_getref()).
-//
-// After the function returns, "str" points to a writable string, which is
-// either the original string if it had no other references or a newly created
-// string if it did have other references.
-//
-// As a special case, passing a pointer to NULL will allocate a new string.
-// This is convenient for the pattern:
-//
-// upb_string *str = NULL;
-// while (x) {
-// if (y) {
-// upb_string_recycle(&str);
-// upb_src_getstr(str);
-// }
-// }
-INLINE void upb_string_recycle(upb_string **_str) {
- upb_string *str = *_str;
- int r;
- if(str && ((r = upb_atomic_read(&str->refcount)) == 1 ||
- (r == _UPB_STRING_REFCOUNT_STACK))) {
- str->ptr = NULL;
- str->len = 0;
- _upb_string_release(str);
- } else {
- //if (!str) {
- // printf("!str\n");
- //}
- //else if (upb_atomic_read(&str->refcount) != 1) { printf("refcount: %d\n", upb_atomic_read(&str->refcount)); }
- //else { printf("Some other reason.\n"); }
- upb_string_unref(str);
- *_str = upb_string_new();
- }
-}
-
-
-// The options for setting the contents of a string. These may only be called
-// when a string is first created or recycled; once other functions have been
-// called on the string, these functions are not allowed until the string is
-// recycled.
-
-// Gets a pointer suitable for writing to the string, which is guaranteed to
-// have at least "len" bytes of data available. The size of the string will
-// become "len".
-char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len);
-
-// Replaces the contents of str with the contents of the given printf.
-size_t upb_string_vprintf_at(upb_string *str, size_t offset, const char *format,
- va_list args);
-INLINE size_t upb_string_vprintf(upb_string *str, const char *format,
- va_list args) {
- return upb_string_vprintf_at(str, 0, format, args);
-}
-INLINE size_t upb_string_printf(upb_string *str, const char *format, ...) {
- va_list args;
- va_start(args, format);
- size_t written = upb_string_vprintf(str, format, args);
- va_end(args);
- return written;
-}
-
-// Sets the contents of "str" to be the given substring of "target_str", to
-// which the caller must own a ref.
-void upb_string_substr(upb_string *str, upb_string *target_str,
- upb_strlen_t start, upb_strlen_t len);
-
-// Sketch of an API for allowing upb_strings to reference external, unowned
-// data. Waiting for a clear use case before actually implementing it.
-//
-// Makes the string "str" a reference to the given string data. The caller
-// guarantees that the given string data will not change or be deleted until a
-// matching call to upb_string_detach(), which may block until any concurrent
-// readers have finished reading. upb_string_detach() preserves the contents
-// of the string by copying the referenced data if there are any other
-// referents.
-// void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len);
-// void upb_string_detach(upb_string *str);
-
-// Allows using upb_strings in printf, ie:
-// upb_strptr str = UPB_STRLIT("Hello, World!\n");
-// printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */
-#define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str)
-#define UPB_STRFMT "%.*s"
-
-// Macros for constructing upb_string objects statically or on the stack. These
-// can be used like:
-//
-// upb_string static_str = UPB_STATIC_STRING("Foo");
-//
-// int main() {
-// upb_string stack_str = UPB_STACK_STRING("Foo");
-// // Now:
-// // upb_streql(&static_str, &stack_str) == true
-// // upb_streql(&static_str, UPB_STRLIT("Foo")) == true
-// }
-//
-// You can also use UPB_STACK_STRING or UPB_STATIC_STRING with character arrays,
-// but you must not change the underlying data once you've passed the string on:
-//
-// void foo() {
-// char data[] = "ABC123";
-// upb_string stack_str = UPB_STACK_STR(data);
-// bar(&stack_str);
-// data[0] = "B"; // NOT ALLOWED!!
-// }
-//
-// TODO: should the stack business just be like attach/detach? The latter seems
-// more flexible, though it does require a stack allocation. Maybe put this off
-// until there is a clear use case.
-#define UPB_STATIC_STRING(str) \
- _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STATIC)
-#define UPB_STATIC_STRING_ARRAY(str) \
- _UPB_STRING_INIT(str, sizeof(str), _UPB_STRING_REFCOUNT_STATIC)
-#define UPB_STATIC_STRING_LEN(str, len) \
- _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STATIC)
-#define UPB_STACK_STRING(str) \
- _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STACK)
-#define UPB_STACK_STRING_LEN(str, len) \
- _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STACK)
-
-// A convenient way of specifying upb_strings as literals, like:
-//
-// upb_streql(UPB_STRLIT("expected"), other_str);
-//
-// However, this requires either C99 compound initializers or C++.
-// Must ONLY be called with a string literal as its argument!
-//#ifdef __cplusplus
-//namespace upb {
-//class String : public upb_string {
-// // This constructor must ONLY be called with a string literal.
-// String(const char *str) : upb_string(UPB_STATIC_STRING(str)) {}
-//};
-//}
-//#define UPB_STRLIT(str) upb::String(str)
-//#endif
-#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str)
-
-// Returns a singleton empty string.
-upb_string *upb_emptystring();
-
-
-/* upb_string library functions ***********************************************/
-
-// Named like their <string.h> counterparts, these are all safe against buffer
-// overflow. For the most part these only use the public upb_string interface.
-
-// More efficient than upb_strcmp if all you need is to test equality.
-INLINE bool upb_streql(upb_string *s1, upb_string *s2) {
- upb_strlen_t len = upb_string_len(s1);
- if(len != upb_string_len(s2)) {
- return false;
- } else {
- bool ret =
- memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0;
- upb_string_endread(s1);
- upb_string_endread(s2);
- return ret;
- }
-}
-
-// Like strcmp().
-int upb_strcmp(upb_string *s1, upb_string *s2);
-
-// Compare a upb_string with memory or a NULL-terminated C string.
-INLINE bool upb_streqllen(upb_string *str, const void *buf, upb_strlen_t len) {
- return len == upb_string_len(str) &&
- memcmp(upb_string_getrobuf(str), buf, len) == 0;
-}
-
-INLINE bool upb_streqlc(upb_string *str, const void *buf) {
- // Could be made one-pass.
- return upb_streqllen(str, buf, strlen((const char*)buf));
-}
-
-// Like upb_strcpy, but copies from a buffer and length.
-INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) {
- memcpy(upb_string_getrwbuf(dest, len), src, len);
-}
-
-// Replaces the contents of "dest" with the contents of "src".
-INLINE void upb_strcpy(upb_string *dest, upb_string *src) {
- upb_strcpylen(dest, upb_string_getrobuf(src), upb_string_len(src));
- upb_string_endread(src);
-}
-
-// Like upb_strcpy, but copies from a NULL-terminated string.
-INLINE void upb_strcpyc(upb_string *dest, const void *src) {
- // This does two passes over src, but that is necessary unless we want to
- // repeatedly re-allocate dst, which seems worse.
- upb_strcpylen(dest, src, strlen((const char*)src));
-}
-
-// Returns a new string whose contents are a copy of s.
-upb_string *upb_strdup(upb_string *s);
-
-// Like upb_strdup(), but duplicates a given buffer and length.
-INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) {
- upb_string *s = upb_string_new();
- upb_strcpylen(s, src, len);
- return s;
-}
-
-// Like upb_strdup(), but duplicates a C NULL-terminated string.
-INLINE upb_string *upb_strdupc(const char *src) {
- return upb_strduplen(src, strlen(src));
-}
-
-// Returns a newly-allocated NULL-terminated copy of str.
-char *upb_string_newcstr(upb_string *str);
-
-// Appends 'append' to 's' in-place, resizing s if necessary.
-void upb_strcat(upb_string *s, upb_string *append);
-
-// Returns a new string that is a substring of the given string.
-INLINE upb_string *upb_strslice(upb_string *s, int offset, int len) {
- upb_string *str = upb_string_new();
- upb_string_substr(str, s, offset, len);
- return str;
-}
-
-// Reads an entire file into a newly-allocated string.
-upb_string *upb_strreadfile(const char *filename);
-
-// Returns a new string with the contents of the given printf.
-upb_string *upb_string_asprintf(const char *format, ...);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
diff --git a/src/upb_strstream.c b/src/upb_strstream.c
index 284a4d7..9e17d75 100644
--- a/src/upb_strstream.c
+++ b/src/upb_strstream.c
@@ -8,61 +8,45 @@
#include "upb_strstream.h"
#include <stdlib.h>
-#include "upb_string.h"
/* upb_stringsrc **************************************************************/
-static upb_strlen_t upb_stringsrc_read(upb_bytesrc *_src, void *buf,
- upb_strlen_t count, upb_status *status) {
- upb_stringsrc *src = (upb_stringsrc*)_src;
- if (src->offset == upb_string_len(src->str)) {
- status->code = UPB_EOF;
- return -1;
- } else {
- upb_strlen_t to_read = UPB_MIN(count, upb_string_len(src->str) - src->offset);
- memcpy(buf, upb_string_getrobuf(src->str) + src->offset, to_read);
- src->offset += to_read;
- return to_read;
- }
+size_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) {
+ upb_stringsrc *src = _src;
+ size_t bytes = src->len - ofs;
+ if (bytes == 0) s->code = UPB_EOF;
+ return bytes;
}
-static bool upb_stringsrc_getstr(upb_bytesrc *_src, upb_string *str,
- upb_status *status) {
- upb_stringsrc *src = (upb_stringsrc*)_src;
- if (src->offset == upb_string_len(src->str)) {
- status->code = UPB_EOF;
- return false;
- } else {
- upb_strlen_t len = upb_string_len(src->str) - src->offset;
- upb_string_substr(str, src->str, src->offset, len);
- src->offset += len;
- assert(src->offset == upb_string_len(src->str));
- return true;
- }
+void upb_stringsrc_read(void *_src, uint64_t src_ofs, size_t len, char *dst) {
+ upb_stringsrc *src = _src;
+ memcpy(dst, src->str + src_ofs, len);
+}
+
+const char *upb_stringsrc_getptr(void *_src, uint64_t ofs, size_t *len) {
+ upb_stringsrc *src = _src;
+ *len = src->len - ofs;
+ return src->str + ofs;
}
void upb_stringsrc_init(upb_stringsrc *s) {
static upb_bytesrc_vtbl vtbl = {
- upb_stringsrc_read,
- upb_stringsrc_getstr,
+ &upb_stringsrc_fetch,
+ &upb_stringsrc_read,
+ &upb_stringsrc_getptr,
+ NULL, NULL, NULL, NULL
};
upb_bytesrc_init(&s->bytesrc, &vtbl);
s->str = NULL;
}
-void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str) {
- if (str != s->str) {
- upb_string_unref(s->str);
- s->str = upb_string_getref(str);
- }
- s->offset = 0;
-}
-
-void upb_stringsrc_uninit(upb_stringsrc *s) {
- upb_string_unref(s->str);
+void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len) {
+ s->str = str;
+ s->len = len;
}
+void upb_stringsrc_uninit(upb_stringsrc *s) { (void)s; }
upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
return &s->bytesrc;
@@ -72,44 +56,49 @@ upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
/* upb_stringsink *************************************************************/
void upb_stringsink_uninit(upb_stringsink *s) {
- upb_string_unref(s->str);
+ free(s->str);
}
// Resets the stringsink to a state where it will append to the given string.
// The string must be newly created or recycled. The stringsink will take a
// reference on the string, so the caller need not ensure that it outlives the
// stringsink. A stringsink can be reset multiple times.
-void upb_stringsink_reset(upb_stringsink *s, upb_string *str) {
- if (str != s->str) {
- upb_string_unref(s->str);
- s->str = upb_string_getref(str);
- }
- // Resize to 0.
- upb_string_getrwbuf(s->str, 0);
+void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size) {
+ free(s->str);
+ s->str = str;
+ s->len = 0;
+ s->size = size;
}
upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s) {
return &s->bytesink;
}
-static upb_strlen_t upb_stringsink_vprintf(upb_bytesink *_sink, upb_status *s,
- const char *fmt, va_list args) {
- (void)s; // No errors can occur.
- upb_stringsink *sink = (upb_stringsink*)_sink;
- return upb_string_vprintf_at(sink->str, upb_string_len(sink->str), fmt, args);
+static int32_t upb_stringsink_vprintf(void *_s, upb_status *status,
+ const char *fmt, va_list args) {
+ (void)status; // TODO: report realloc() errors.
+ upb_stringsink *s = _s;
+ int ret = upb_vrprintf(&s->str, &s->size, s->len, fmt, args);
+ if (ret >= 0) s->len += ret;
+ return ret;
}
-static upb_strlen_t upb_stringsink_putstr(upb_bytesink *_sink, upb_string *str,
- upb_status *s) {
- (void)s; // No errors can occur.
- upb_stringsink *sink = (upb_stringsink*)_sink;
- upb_strcat(sink->str, str);
- return upb_string_len(str);
+bool upb_stringsink_write(void *_s, const char *buf, size_t len,
+ upb_status *status) {
+ (void)status; // TODO: report realloc() errors.
+ upb_stringsink *s = _s;
+ if (s->len + len > s->size) {
+ while(s->len + len > s->size) s->size *= 2;
+ s->str = realloc(s->str, s->size);
+ }
+ memcpy(s->str + s->len, buf, len);
+ s->len += len;
+ return true;
}
void upb_stringsink_init(upb_stringsink *s) {
static upb_bytesink_vtbl vtbl = {
- upb_stringsink_putstr,
+ upb_stringsink_write,
upb_stringsink_vprintf
};
upb_bytesink_init(&s->bytesink, &vtbl);
diff --git a/src/upb_strstream.h b/src/upb_strstream.h
index e092b55..e57406e 100644
--- a/src/upb_strstream.h
+++ b/src/upb_strstream.h
@@ -21,8 +21,8 @@ extern "C" {
struct _upb_stringsrc {
upb_bytesrc bytesrc;
- upb_string *str;
- upb_strlen_t offset;
+ const char *str;
+ size_t len;
};
typedef struct _upb_stringsrc upb_stringsrc;
@@ -33,9 +33,9 @@ void upb_stringsrc_uninit(upb_stringsrc *s);
// Resets the stringsrc to a state where it will vend the given string. The
// stringsrc will take a reference on the string, so the caller need not ensure
// that it outlives the stringsrc. A stringsrc can be reset multiple times.
-void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str);
+void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len);
-// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above.
+// Returns the upb_bytesrc* for this stringsrc.
upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s);
@@ -43,7 +43,8 @@ upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s);
struct _upb_stringsink {
upb_bytesink bytesink;
- upb_string *str;
+ char *str;
+ size_t len, size;
};
typedef struct _upb_stringsink upb_stringsink;
@@ -51,11 +52,14 @@ typedef struct _upb_stringsink upb_stringsink;
void upb_stringsink_init(upb_stringsink *s);
void upb_stringsink_uninit(upb_stringsink *s);
-// Resets the stringsink to a state where it will append to the given string.
-// The string must be newly created or recycled. The stringsink will take a
-// reference on the string, so the caller need not ensure that it outlives the
-// stringsink. A stringsink can be reset multiple times.
-void upb_stringsink_reset(upb_stringsink *s, upb_string *str);
+// Resets the sink's string to "str", which the sink takes ownership of.
+// "str" may be NULL, which will make the sink allocate a new string.
+void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size);
+
+// Releases ownership of the returned string (which is "len" bytes long) and
+// resets the internal string to be empty again (as if reset were called with
+// NULL).
+const char *upb_stringsink_release(upb_stringsink *s, size_t *len);
// Returns the upb_bytesink* for this stringsrc. Invalidated by reset above.
upb_bytesink *upb_stringsink_bytesink();
diff --git a/src/upb_table.c b/src/upb_table.c
index a754097..fc9e9de 100644
--- a/src/upb_table.c
+++ b/src/upb_table.c
@@ -97,7 +97,7 @@ static uint32_t empty_intbucket(upb_inttable *table)
// The insert routines have a lot more code duplication between int/string
// variants than I would like, but there's just a bit too much that varies to
// parameterize them.
-static void intinsert(upb_inttable *t, upb_inttable_key_t key, void *val) {
+static void intinsert(upb_inttable *t, uint32_t key, const void *val) {
assert(upb_inttable_lookup(t, key) == NULL);
upb_inttable_value *table_val;
if (_upb_inttable_isarrkey(t, key)) {
@@ -160,7 +160,7 @@ static void upb_inttable_insertall(upb_inttable *dst, upb_inttable *src) {
}
}
-void upb_inttable_insert(upb_inttable *t, upb_inttable_key_t key, void *val) {
+void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val) {
if((double)(t->t.count + 1) / upb_inttable_hashtablesize(t) > MAX_LOAD) {
//printf("RESIZE!\n");
// Need to resize. Allocate new table with double the size of however many
@@ -181,7 +181,7 @@ void upb_inttable_insert(upb_inttable *t, upb_inttable_key_t key, void *val) {
void upb_inttable_compact(upb_inttable *t) {
// Find the largest array part we can that satisfies the MIN_DENSITY
// definition. For now we just count down powers of two.
- upb_inttable_key_t largest_key = 0;
+ uint32_t largest_key = 0;
for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
i = upb_inttable_next(t, i)) {
largest_key = UPB_MAX(largest_key, upb_inttable_iter_key(i));
@@ -260,6 +260,8 @@ upb_inttable_iter upb_inttable_next(upb_inttable *t, upb_inttable_iter iter) {
/* upb_strtable ***************************************************************/
static upb_strtable_entry *strent(upb_strtable *t, int32_t i) {
+ //fprintf(stderr, "i: %d, table_size: %d\n", i, upb_table_size(&t->t));
+ assert(i <= (int32_t)upb_table_size(&t->t));
return UPB_INDEX(t->t.entries, i, t->t.entry_size);
}
@@ -267,121 +269,134 @@ static uint32_t upb_strtable_size(upb_strtable *t) {
return upb_table_size(&t->t);
}
-void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t entsize) {
+void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t valuesize) {
+ t->t.value_size = valuesize;
+ size_t entsize = upb_align_up(sizeof(upb_strtable_header) + valuesize, 8);
upb_table_init(&t->t, size, entsize);
for (uint32_t i = 0; i < upb_table_size(&t->t); i++) {
upb_strtable_entry *e = strent(t, i);
- e->key = NULL;
- e->next = UPB_END_OF_CHAIN;
+ e->hdr.key = NULL;
+ e->hdr.next = UPB_END_OF_CHAIN;
}
}
void upb_strtable_free(upb_strtable *t) {
- // Free refs from the strtable.
- upb_strtable_entry *e = upb_strtable_begin(t);
- for(; e; e = upb_strtable_next(t, e)) {
- upb_string_unref(e->key);
- }
+ // Free keys from the strtable.
+ upb_strtable_iter i;
+ for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i))
+ free((char*)upb_strtable_iter_key(&i));
upb_table_free(&t->t);
}
-static uint32_t strtable_bucket(upb_strtable *t, upb_string *key)
-{
- uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_string_len(key), 0);
+static uint32_t strtable_bucket(upb_strtable *t, const char *key) {
+ uint32_t hash = MurmurHash2(key, strlen(key), 0);
return (hash & t->t.mask);
}
-void *upb_strtable_lookup(upb_strtable *t, upb_string *key)
-{
+void *upb_strtable_lookup(upb_strtable *t, const char *key) {
uint32_t bucket = strtable_bucket(t, key);
upb_strtable_entry *e;
do {
e = strent(t, bucket);
- if(e->key && upb_streql(e->key, key)) return e;
- } while((bucket = e->next) != UPB_END_OF_CHAIN);
+ if(e->hdr.key && strcmp(e->hdr.key, key) == 0) return &e->val;
+ } while((bucket = e->hdr.next) != UPB_END_OF_CHAIN);
return NULL;
}
-static uint32_t empty_strbucket(upb_strtable *table)
-{
+void *upb_strtable_lookupl(upb_strtable *t, const char *key, size_t len) {
+ // TODO: improve.
+ char key2[len+1];
+ memcpy(key2, key, len);
+ key2[len] = '\0';
+ return upb_strtable_lookup(t, key2);
+}
+
+static uint32_t empty_strbucket(upb_strtable *table) {
// TODO: does it matter that this is biased towards the front of the table?
for(uint32_t i = 0; i < upb_strtable_size(table); i++) {
upb_strtable_entry *e = strent(table, i);
- if(!e->key) return i;
+ if(!e->hdr.key) return i;
}
assert(false);
return 0;
}
-static void strinsert(upb_strtable *t, upb_strtable_entry *e)
-{
- assert(upb_strtable_lookup(t, e->key) == NULL);
- e->key = upb_string_getref(e->key);
+static void strinsert(upb_strtable *t, const char *key, const void *val) {
+ assert(upb_strtable_lookup(t, key) == NULL);
t->t.count++;
- uint32_t bucket = strtable_bucket(t, e->key);
+ uint32_t bucket = strtable_bucket(t, key);
upb_strtable_entry *table_e = strent(t, bucket);
- if(table_e->key) { /* Collision. */
- if(bucket == strtable_bucket(t, table_e->key)) {
+ if(table_e->hdr.key) { /* Collision. */
+ if(bucket == strtable_bucket(t, table_e->hdr.key)) {
/* Existing element is in its main posisiton. Find an empty slot to
* place our new element and append it to this key's chain. */
uint32_t empty_bucket = empty_strbucket(t);
- while (table_e->next != UPB_END_OF_CHAIN)
- table_e = strent(t, table_e->next);
- table_e->next = empty_bucket;
+ while (table_e->hdr.next != UPB_END_OF_CHAIN)
+ table_e = strent(t, table_e->hdr.next);
+ table_e->hdr.next = empty_bucket;
table_e = strent(t, empty_bucket);
} else {
/* Existing element is not in its main position. Move it to an empty
* slot and put our element in its main position. */
uint32_t empty_bucket = empty_strbucket(t);
- uint32_t evictee_bucket = strtable_bucket(t, table_e->key);
+ uint32_t evictee_bucket = strtable_bucket(t, table_e->hdr.key);
memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
upb_strtable_entry *evictee_e = strent(t, evictee_bucket);
while(1) {
- assert(evictee_e->key);
- assert(evictee_e->next != UPB_END_OF_CHAIN);
- if(evictee_e->next == bucket) {
- evictee_e->next = empty_bucket;
+ assert(evictee_e->hdr.key);
+ assert(evictee_e->hdr.next != UPB_END_OF_CHAIN);
+ if(evictee_e->hdr.next == bucket) {
+ evictee_e->hdr.next = empty_bucket;
break;
}
- evictee_e = strent(t, evictee_e->next);
+ evictee_e = strent(t, evictee_e->hdr.next);
}
/* table_e remains set to our mainpos. */
}
}
- memcpy(table_e, e, t->t.entry_size);
- table_e->next = UPB_END_OF_CHAIN;
- //printf("Looking up, string=" UPB_STRFMT "...\n", UPB_STRARG(e->key));
- assert(upb_strtable_lookup(t, e->key) == table_e);
+ //fprintf(stderr, "val: %p\n", val);
+ //fprintf(stderr, "val size: %d\n", t->t.value_size);
+ memcpy(&table_e->val, val, t->t.value_size);
+ table_e->hdr.key = strdup(key);
+ table_e->hdr.next = UPB_END_OF_CHAIN;
+ //fprintf(stderr, "Looking up, string=%s...\n", key);
+ assert(upb_strtable_lookup(t, key) == &table_e->val);
//printf("Yay!\n");
}
-void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e)
-{
+void upb_strtable_insert(upb_strtable *t, const char *key, const void *val) {
if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) {
// Need to resize. New table of double the size, add old elements to it.
//printf("RESIZE!!\n");
upb_strtable new_table;
- upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.entry_size);
- upb_strtable_entry *old_e;
- for(old_e = upb_strtable_begin(t); old_e; old_e = upb_strtable_next(t, old_e))
- strinsert(&new_table, old_e);
+ upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.value_size);
+ upb_strtable_iter i;
+ upb_strtable_begin(&i, t);
+ for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ strinsert(&new_table,
+ upb_strtable_iter_key(&i),
+ upb_strtable_iter_value(&i));
+ }
upb_strtable_free(t);
*t = new_table;
}
- strinsert(t, e);
+ strinsert(t, key, val);
}
-void *upb_strtable_begin(upb_strtable *t) {
- return upb_strtable_next(t, strent(t, -1));
+void upb_strtable_begin(upb_strtable_iter *i, upb_strtable *t) {
+ i->e = strent(t, -1);
+ i->t = t;
+ upb_strtable_next(i);
}
-void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur) {
- upb_strtable_entry *end = strent(t, upb_strtable_size(t));
+void upb_strtable_next(upb_strtable_iter *i) {
+ upb_strtable_entry *end = strent(i->t, upb_strtable_size(i->t));
+ upb_strtable_entry *cur = i->e;
do {
- cur = (void*)((char*)cur + t->t.entry_size);
- if(cur == end) return NULL;
- } while(cur->key == NULL);
- return cur;
+ cur = (void*)((char*)cur + i->t->t.entry_size);
+ if(cur == end) { i->e = NULL; return; }
+ } while(cur->hdr.key == NULL);
+ i->e = cur;
}
#ifdef UPB_UNALIGNED_READS_OK
diff --git a/src/upb_table.h b/src/upb_table.h
index 631709c..376465b 100644
--- a/src/upb_table.h
+++ b/src/upb_table.h
@@ -18,14 +18,11 @@
#include <assert.h>
#include "upb.h"
-#include "upb_string.h"
#ifdef __cplusplus
extern "C" {
#endif
-typedef uint32_t upb_inttable_key_t;
-
#define UPB_END_OF_CHAIN (uint32_t)-1
typedef struct {
@@ -34,7 +31,7 @@ typedef struct {
} upb_inttable_value;
typedef struct {
- upb_inttable_key_t key;
+ uint32_t key;
uint32_t next; // Internal chaining.
} upb_inttable_header;
@@ -48,8 +45,13 @@ typedef struct {
// performance by letting us compare hashes before comparing lengths or the
// strings themselves.
typedef struct {
- upb_string *key; // We own a ref.
- uint32_t next; // Internal chaining.
+ char *key; // We own, nullz. TODO: store explicit len?
+ uint32_t next; // Internal chaining.
+} upb_strtable_header;
+
+typedef struct {
+ upb_strtable_header hdr;
+ uint32_t val; // Val is at least 32 bits.
} upb_strtable_entry;
typedef struct {
@@ -81,7 +83,7 @@ typedef struct {
// when looked up!
void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t value_size);
void upb_inttable_free(upb_inttable *table);
-void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t entry_size); // TODO: update
+void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t value_size);
void upb_strtable_free(upb_strtable *table);
// Number of values in the hash table.
@@ -97,11 +99,13 @@ INLINE uint32_t upb_strtable_count(upb_strtable *t) {
// not already exist in the hash table. The data will be copied from val into
// the hashtable (the amount of data copied comes from value_size when the
// table was constructed). Therefore the data at val may be freed once the
-// call returns. For string tables, the table takes a ref on str.
+// call returns. For string tables, the table takes ownership of the string.
//
// WARNING: the lowest bit of val is reserved and will be overwritten!
-void upb_inttable_insert(upb_inttable *t, upb_inttable_key_t key, void *val);
-void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *ent); // TODO: update
+void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val);
+// TODO: may want to allow for more complex keys with custom hash/comparison
+// functions.
+void upb_strtable_insert(upb_strtable *t, const char *key, const void *val);
void upb_inttable_compact(upb_inttable *t);
INLINE void upb_strtable_clear(upb_strtable *t) {
// TODO: improve.
@@ -110,14 +114,14 @@ INLINE void upb_strtable_clear(upb_strtable *t) {
upb_strtable_init(t, 8, entry_size);
}
-INLINE uint32_t _upb_inttable_bucket(upb_inttable *t, upb_inttable_key_t k) {
+INLINE uint32_t _upb_inttable_bucket(upb_inttable *t, uint32_t k) {
uint32_t bucket = k & t->t.mask; // Identity hash for ints.
assert(bucket != UPB_END_OF_CHAIN);
return bucket;
}
// Returns true if this key belongs in the array part of the table.
-INLINE bool _upb_inttable_isarrkey(upb_inttable *t, upb_inttable_key_t k) {
+INLINE bool _upb_inttable_isarrkey(upb_inttable *t, uint32_t k) {
return (k < t->array_size);
}
@@ -162,21 +166,44 @@ INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) {
return _upb_inttable_fastlookup(t, key, t->t.entry_size, t->t.value_size);
}
-void *upb_strtable_lookup(upb_strtable *t, upb_string *key);
+void *upb_strtable_lookupl(upb_strtable *t, const char *key, size_t len);
+void *upb_strtable_lookup(upb_strtable *t, const char *key);
+
+
+/* upb_strtable_iter **********************************************************/
+
+// Strtable iteration. Order is undefined. Insertions invalidate iterators.
+// upb_strtable_iter i;
+// for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) {
+// const char *key = upb_strtable_iter_key(&i);
+// const myval *val = upb_strtable_iter_value(&i);
+// // ...
+// }
+typedef struct {
+ upb_strtable *t;
+ upb_strtable_entry *e;
+} upb_strtable_iter;
+
+void upb_strtable_begin(upb_strtable_iter *i, upb_strtable *t);
+void upb_strtable_next(upb_strtable_iter *i);
+INLINE bool upb_strtable_done(upb_strtable_iter *i) { return i->e == NULL; }
+INLINE const char *upb_strtable_iter_key(upb_strtable_iter *i) {
+ return i->e->hdr.key;
+}
+INLINE const void *upb_strtable_iter_value(upb_strtable_iter *i) {
+ return &i->e->val;
+}
+
-// Provides iteration over the table. The order in which the entries are
-// returned is undefined. Insertions invalidate iterators.
-void *upb_strtable_begin(upb_strtable *t);
-void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur);
+/* upb_inttable_iter **********************************************************/
-// Inttable iteration (should update strtable iteration to use this scheme too).
-// The order is undefined.
+// Inttable iteration. Order is undefined. Insertions invalidate iterators.
// for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
// i = upb_inttable_next(t, i)) {
// // ...
// }
typedef struct {
- upb_inttable_key_t key;
+ uint32_t key;
upb_inttable_value *value;
bool array_part;
} upb_inttable_iter;
@@ -184,7 +211,7 @@ typedef struct {
upb_inttable_iter upb_inttable_begin(upb_inttable *t);
upb_inttable_iter upb_inttable_next(upb_inttable *t, upb_inttable_iter iter);
INLINE bool upb_inttable_done(upb_inttable_iter iter) { return iter.value == NULL; }
-INLINE upb_inttable_key_t upb_inttable_iter_key(upb_inttable_iter iter) {
+INLINE uint32_t upb_inttable_iter_key(upb_inttable_iter iter) {
return iter.key;
}
INLINE void *upb_inttable_iter_value(upb_inttable_iter iter) {
diff --git a/src/upb_textprinter.c b/src/upb_textprinter.c
index ec76d56..14cce9b 100644
--- a/src/upb_textprinter.c
+++ b/src/upb_textprinter.c
@@ -21,12 +21,15 @@ struct _upb_textprinter {
#define CHECK(x) if ((x) < 0) goto err;
-static int upb_textprinter_putescaped(upb_textprinter *p, upb_string *str,
+static int upb_textprinter_putescaped(upb_textprinter *p, upb_strref *strref,
bool preserve_utf8) {
// Based on CEscapeInternal() from Google's protobuf release.
+ // TODO; we could read directly fraom a bytesrc's buffer instead.
// TODO; we could write directly into a bytesink's buffer instead.
char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
- const char *src = upb_string_getrobuf(str), *end = src + upb_string_len(str);
+ char buf[strref->len], *src = buf;
+ char *end = src + strref->len;
+ upb_strref_read(strref, src);
// I think hex is prettier and more useful, but proto2 uses octal; should
// investigate whether it can parse hex also.
@@ -35,8 +38,7 @@ static int upb_textprinter_putescaped(upb_textprinter *p, upb_string *str,
for (; src < end; src++) {
if (dstend - dst < 4) {
- upb_string str = UPB_STACK_STRING_LEN(dstbuf, dst - dstbuf);
- CHECK(upb_bytesink_putstr(p->bytesink, &str, &p->status));
+ CHECK(upb_bytesink_write(p->bytesink, dstbuf, dst - dstbuf, &p->status));
dst = dstbuf;
}
@@ -64,8 +66,7 @@ static int upb_textprinter_putescaped(upb_textprinter *p, upb_string *str,
last_hex_escape = is_hex_escape;
}
// Flush remaining data.
- upb_string outstr = UPB_STACK_STRING_LEN(dstbuf, dst - dstbuf);
- CHECK(upb_bytesink_putstr(p->bytesink, &outstr, &p->status));
+ CHECK(upb_bytesink_write(p->bytesink, dst, dst - dstbuf, &p->status));
return 0;
err:
return -1;
@@ -74,7 +75,7 @@ err:
static int upb_textprinter_indent(upb_textprinter *p) {
if(!p->single_line)
for(int i = 0; i < p->indent_depth; i++)
- CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status));
+ CHECK(upb_bytesink_writestr(p->bytesink, " ", &p->status));
return 0;
err:
return -1;
@@ -82,9 +83,9 @@ err:
static int upb_textprinter_endfield(upb_textprinter *p) {
if(p->single_line) {
- CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status));
+ CHECK(upb_bytesink_writestr(p->bytesink, " ", &p->status));
} else {
- CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status));
+ CHECK(upb_bytesink_writestr(p->bytesink, "\n", &p->status));
}
return 0;
err:
@@ -96,7 +97,7 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_value fval,
upb_textprinter *p = _p;
upb_fielddef *f = upb_value_getfielddef(fval);
upb_textprinter_indent(p);
- CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name)));
+ CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%s: ", f->name));
#define CASE(fmtstr, member) \
CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break;
switch(f->type) {
@@ -118,12 +119,11 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_value fval,
CASE("%" PRIu32, uint32);
case UPB_TYPE(ENUM): {
upb_enumdef *enum_def = upb_downcast_enumdef(f->def);
- upb_string *enum_label =
- upb_enumdef_iton(enum_def, upb_value_getint32(val));
- if (enum_label) {
+ const char *label = upb_enumdef_iton(enum_def, upb_value_getint32(val));
+ if (label) {
// We found a corresponding string for this enum. Otherwise we fall
// through to the int32 code path.
- CHECK(upb_bytesink_putstr(p->bytesink, enum_label, &p->status));
+ CHECK(upb_bytesink_writestr(p->bytesink, label, &p->status));
break;
}
}
@@ -134,12 +134,13 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_value fval,
case UPB_TYPE(BOOL):
CASE("%hhu", bool);
case UPB_TYPE(STRING):
- case UPB_TYPE(BYTES):
- CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status));
- CHECK(upb_textprinter_putescaped(p, upb_value_getstr(val),
+ case UPB_TYPE(BYTES): {
+ CHECK(upb_bytesink_writestr(p->bytesink, "\"", &p->status));
+ CHECK(upb_textprinter_putescaped(p, upb_value_getstrref(val),
f->type == UPB_TYPE(STRING)));
- CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status));
+ CHECK(upb_bytesink_writestr(p->bytesink, "\"", &p->status));
break;
+ }
}
upb_textprinter_endfield(p);
return UPB_CONTINUE;
@@ -151,11 +152,10 @@ static upb_sflow_t upb_textprinter_startsubmsg(void *_p, upb_value fval) {
upb_textprinter *p = _p;
upb_fielddef *f = upb_value_getfielddef(fval);
upb_textprinter_indent(p);
- bool ret = upb_bytesink_printf(p->bytesink, &p->status,
- UPB_STRFMT " {", UPB_STRARG(f->name));
+ bool ret = upb_bytesink_printf(p->bytesink, &p->status, "%s {", f->name);
if (!ret) return UPB_SBREAK;
if (!p->single_line)
- upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status);
+ upb_bytesink_writestr(p->bytesink, "\n", &p->status);
p->indent_depth++;
return UPB_CONTINUE_WITH(_p);
}
@@ -165,7 +165,7 @@ static upb_flow_t upb_textprinter_endsubmsg(void *_p, upb_value fval) {
upb_textprinter *p = _p;
p->indent_depth--;
upb_textprinter_indent(p);
- upb_bytesink_putstr(p->bytesink, UPB_STRLIT("}"), &p->status);
+ upb_bytesink_writestr(p->bytesink, "}", &p->status);
upb_textprinter_endfield(p);
return UPB_CONTINUE;
}
diff --git a/src/upb_varint.h b/src/upb_varint.h
index fb44cd9..87fca2b 100644
--- a/src/upb_varint.h
+++ b/src/upb_varint.h
@@ -83,16 +83,13 @@ upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);
// Template for a function that checks the first two bytes with branching
// and dispatches 2-10 bytes with a separate function.
-#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \
-INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *p) { \
- uint64_t b = 0; \
- upb_decoderet r = {p, 0}; \
- memcpy(&b, r.p, 2); \
- if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; } \
- r.val = (b & 0x7f) | ((b & 0x7f00) >> 1); \
- r.p = p + 2; \
- if ((b & 0x8000) == 0) return r; \
- return decode_max8_function(r); \
+#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \
+INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \
+ uint8_t *p = (uint8_t*)_p; \
+ if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7f}; return r; } \
+ upb_decoderet r = {_p + 2, (*p & 0x7f) | ((*(p + 1) & 0x7f) << 7)}; \
+ if ((*(p + 1) & 0x80) == 0) return r; \
+ return decode_max8_function(r); \
}
UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright);
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback