From 6a1f3a66939308668ab8dce0d195afec16e02af9 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 14 Jul 2011 23:15:00 -0700 Subject: Major refactoring: upb_string is gone in favor of upb_strref. --- src/upb.c | 80 +++++++--- src/upb.h | 110 +++++-------- src/upb_bytestream.h | 213 ++++++++++++++++--------- src/upb_decoder.c | 276 ++++++++++++++++++++------------- src/upb_decoder.h | 75 ++++----- src/upb_decoder_x86.dasc | 28 ++-- src/upb_def.c | 205 +++++++++++------------- src/upb_def.h | 49 +++--- src/upb_descriptor.c | 188 ++++++++++------------ src/upb_descriptor.h | 6 +- src/upb_glue.c | 49 ++++-- src/upb_glue.h | 20 ++- src/upb_handlers.c | 25 ++- src/upb_handlers.h | 9 +- src/upb_msg.c | 53 +++---- src/upb_msg.h | 6 +- src/upb_stdio.c | 168 +++++++++++++------- src/upb_stdio.h | 54 +++++-- src/upb_string.c | 164 -------------------- src/upb_string.h | 394 ----------------------------------------------- src/upb_strstream.c | 105 ++++++------- src/upb_strstream.h | 24 +-- src/upb_table.c | 127 ++++++++------- src/upb_table.h | 69 ++++++--- src/upb_textprinter.c | 44 +++--- src/upb_varint.h | 17 +- 26 files changed, 1121 insertions(+), 1437 deletions(-) delete mode 100644 src/upb_string.c delete mode 100644 src/upb_string.h (limited to 'src') diff --git a/src/upb.c b/src/upb.c index 82c7fc2..0f3ea18 100644 --- a/src/upb.c +++ b/src/upb.c @@ -5,19 +5,21 @@ * Author: Josh Haberman */ +#include #include #include +#include #include #include "descriptor_const.h" #include "upb.h" -#include "upb_string.h" +#include "upb_bytestream.h" #define alignof(t) offsetof(struct { char c; t x; }, x) #define TYPE_INFO(wire_type, ctype, inmemory_type) \ {alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), #ctype}, const upb_type_info upb_types[] = { - {0, 0, 0, 0, ""}, // There is no type 0. + TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, MESSAGE) // ENDGROUP (fake) TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE) // DOUBLE TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT) // FLOAT TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // INT64 @@ -42,39 +44,79 @@ const upb_type_info upb_types[] = { #ifdef NDEBUG upb_value UPB_NO_VALUE = {{0}}; #else -upb_value UPB_NO_VALUE = {{0}, UPB_VALUETYPE_RAW}; +upb_value UPB_NO_VALUE = {{0}, -1}; #endif -void upb_seterr(upb_status *status, enum upb_status_code code, - const char *msg, ...) { - status->code = code; - upb_string_recycle(&status->str); +void upb_status_init(upb_status *status) { + status->buf = NULL; + upb_status_clear(status); +} + +void upb_status_uninit(upb_status *status) { + free(status->buf); +} + +void upb_status_setf(upb_status *s, enum upb_status_code code, + const char *msg, ...) { + s->code = code; va_list args; va_start(args, msg); - upb_string_vprintf(status->str, msg, args); + upb_vrprintf(&s->buf, &s->bufsize, 0, msg, args); va_end(args); + s->str = s->buf; } -void upb_copyerr(upb_status *to, upb_status *from) -{ +void upb_status_copy(upb_status *to, upb_status *from) { to->code = from->code; - if(from->str) to->str = upb_string_getref(from->str); + if (from->str) { + if (to->bufsize < from->bufsize) { + to->bufsize = from->bufsize; + to->buf = realloc(to->buf, to->bufsize); + to->str = to->buf; + } + memcpy(to->str, from->str, from->bufsize); + } else { + to->str = NULL; + } } -void upb_clearerr(upb_status *status) { +void upb_status_clear(upb_status *status) { status->code = UPB_OK; - if (status->str) upb_string_recycle(&status->str); + status->str = NULL; } -void upb_printerr(upb_status *status) { +void upb_status_print(upb_status *status, FILE *f) { if(status->str) { - fprintf(stderr, "code: %d, msg: " UPB_STRFMT "\n", - status->code, UPB_STRARG(status->str)); + fprintf(f, "code: %d, msg: %s\n", status->code, status->str); } else { - fprintf(stderr, "code: %d, no msg\n", status->code); + fprintf(f, "code: %d, no msg\n", status->code); } } -void upb_status_uninit(upb_status *status) { - upb_string_unref(status->str); +void upb_status_fromerrno(upb_status *status) { + upb_status_setf(status, UPB_ERROR, "%s", strerror(errno)); +} + +int upb_vrprintf(char **buf, size_t *size, size_t ofs, + const char *fmt, va_list args) { + // Try once without reallocating. We have to va_copy because we might have + // to call vsnprintf again. + uint32_t len = *size - ofs; + va_list args_copy; + va_copy(args_copy, args); + uint32_t true_len = vsnprintf(*buf + ofs, len, fmt, args_copy); + va_end(args_copy); + + // Resize to be the correct size. + if (true_len >= len) { + // Need to print again, because some characters were truncated. vsnprintf + // will not write the entire string unless you give it space to store the + // NULL terminator also. + while (*size < (ofs + true_len + 1)) *size = UPB_MAX(*size * 2, 2); + char *newbuf = realloc(*buf, *size); + if (!newbuf) return -1; + vsnprintf(newbuf + ofs, true_len + 1, fmt, args); + *buf = newbuf; + } + return true_len; } diff --git a/src/upb.h b/src/upb.h index 59429f4..b15340e 100644 --- a/src/upb.h +++ b/src/upb.h @@ -30,9 +30,7 @@ extern "C" { #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) #define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m))) -INLINE void nop_printf(const char *fmt, ...) { - (void)fmt; -} +INLINE void nop_printf(const char *fmt, ...) { (void)fmt; } #ifdef NDEBUG #define DEBUGPRINTF nop_printf @@ -45,7 +43,6 @@ INLINE size_t upb_align_up(size_t val, size_t align) { return val % align == 0 ? val : val + align - (val % align); } - // The maximum that any submessages can be nested. Matches proto2's limit. // At the moment this specifies the size of several statically-sized arrays // and therefore setting it high will cause more memory to be used. Will @@ -122,31 +119,16 @@ typedef struct { extern const upb_type_info upb_types[]; -/* Polymorphic values of .proto types *****************************************/ +/* upb_value ******************************************************************/ -struct _upb_string; -typedef struct _upb_string upb_string; -struct _upb_array; -typedef struct _upb_array upb_array; -struct _upb_msg; -typedef struct _upb_msg upb_msg; -struct _upb_bytesrc; -typedef struct _upb_bytesrc upb_bytesrc; +struct _upb_strref; struct _upb_fielddef; -typedef struct _upb_fielddef upb_fielddef; -typedef int32_t upb_strlen_t; -#define UPB_STRLEN_MAX INT32_MAX - -// The type of a upb_value. This is like a upb_fieldtype_t, but adds the -// constant UPB_VALUETYPE_ARRAY to represent an array. -typedef uint8_t upb_valuetype_t; -#define UPB_TYPE_ENDGROUP 19 // Need to increase if more real types are added! -#define UPB_VALUETYPE_ARRAY 32 -#define UPB_VALUETYPE_BYTESRC 32 -#define UPB_VALUETYPE_RAW 33 -#define UPB_VALUETYPE_FIELDDEF 34 -#define UPB_VALUETYPE_PTR 35 +// Special constants for the upb_value.type field. These must not conflict +// with any members of FieldDescriptorProto.Type. +#define UPB_TYPE_ENDGROUP 0 +#define UPB_VALUETYPE_FIELDDEF 32 +#define UPB_VALUETYPE_PTR 33 // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. @@ -159,19 +141,15 @@ typedef struct { int64_t int64; uint32_t uint32; bool _bool; - upb_string *str; - upb_bytesrc *bytesrc; - upb_msg *msg; - upb_array *arr; - upb_atomic_t *refcount; - upb_fielddef *fielddef; + struct _upb_strref *strref; + struct _upb_fielddef *fielddef; void *_void; } val; +#ifndef NDEBUG // In debug mode we carry the value type around also so we can check accesses // to be sure the right member is being read. -#ifndef NDEBUG - upb_valuetype_t type; + char type; #endif } upb_value; @@ -183,7 +161,7 @@ typedef struct { #define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ INLINE ctype upb_value_get ## name(upb_value val) { \ - assert(val.type == proto_type || val.type == UPB_VALUETYPE_RAW); \ + assert(val.type == proto_type); \ return val.val.membername; \ } \ INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \ @@ -197,18 +175,14 @@ UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64)); UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32)); UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64)); UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); -UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); // Marked for destruction. -UPB_VALUE_ACCESSORS(fielddef, fielddef, upb_fielddef*, UPB_VALUETYPE_FIELDDEF); +UPB_VALUE_ACCESSORS(strref, strref, struct _upb_strref*, UPB_TYPE(STRING)); +UPB_VALUE_ACCESSORS(fielddef, fielddef, struct _upb_fielddef*, UPB_VALUETYPE_FIELDDEF); UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_VALUETYPE_PTR); extern upb_value UPB_NO_VALUE; -INLINE upb_atomic_t *upb_value_getrefcount(upb_value val) { - assert(val.type == UPB_TYPE(MESSAGE) || - val.type == UPB_TYPE(STRING) || - val.type == UPB_VALUETYPE_ARRAY); - return val.val.refcount; -} + +/* upb_status *****************************************************************/ // Status codes used as a return value. Codes >0 are not fatal and can be // resumed. @@ -224,42 +198,38 @@ enum upb_status_code { // An unrecoverable error occurred. UPB_ERROR = -1, - - // A recoverable error occurred (for example, data of the wrong type was - // encountered which we can skip over). - // UPB_STATUS_RECOVERABLE_ERROR = -2 }; // TODO: consider adding error space and code, to let ie. errno be stored // as a proper code, or application-specific error codes. -struct _upb_status { +typedef struct { char code; - upb_string *str; -}; - -typedef struct _upb_status upb_status; - -#define UPB_STATUS_INIT {UPB_OK, NULL} -#define UPB_ERRORMSG_MAXLEN 256 + char *str; // NULL when no message is present. NULL-terminated. + char *buf; // Owned by the status. + size_t bufsize; +} upb_status; -INLINE bool upb_ok(upb_status *status) { - return status->code == UPB_OK; -} - -INLINE void upb_status_init(upb_status *status) { - status->code = UPB_OK; - status->str = NULL; -} +#define UPB_STATUS_INIT {UPB_OK, NULL, NULL, 0} +void upb_status_init(upb_status *status); void upb_status_uninit(upb_status *status); -// Caller owns a ref on the returned string. -upb_string *upb_status_tostring(upb_status *status); -void upb_printerr(upb_status *status); -void upb_clearerr(upb_status *status); -void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, - ...); -void upb_copyerr(upb_status *to, upb_status *from); +INLINE bool upb_ok(upb_status *status) { return status->code == UPB_OK; } +INLINE bool upb_iseof(upb_status *status) { return status->code == UPB_EOF; } + +void upb_status_fromerrno(upb_status *status); +void upb_status_print(upb_status *status, FILE *f); +void upb_status_clear(upb_status *status); +void upb_status_setf(upb_status *status, enum upb_status_code code, + const char *fmt, ...); +void upb_status_copy(upb_status *to, upb_status *from); + +// Like vaprintf, but uses *buf (which can be NULL) as a starting point and +// reallocates it only if the new value will not fit. "size" is updated to +// reflect the allocated size of the buffer. Returns false on memory alloc +// failure. +int upb_vrprintf(char **buf, size_t *size, size_t ofs, + const char *fmt, va_list args); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/upb_bytestream.h b/src/upb_bytestream.h index e4b51fd..836abb0 100644 --- a/src/upb_bytestream.h +++ b/src/upb_bytestream.h @@ -1,120 +1,195 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2010-2011 Google Inc. See LICENSE for details. + * Copyright (c) 2011 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * Defines the interfaces upb_bytesrc and upb_bytesink, which are abstractions - * of read()/write() with useful buffering/sharing semantics. + * This file contains upb_bytesrc and upb_bytesink, which are abstractions of + * stdio (fread()/fwrite()/etc) that provide useful buffering/sharing + * semantics. They are virtual base classes so concrete implementations + * can get the data from a fd, a string, a cord, etc. + * + * Byte streams are NOT thread-safe! (Like f{read,write}_unlocked()) */ #ifndef UPB_BYTESTREAM_H #define UPB_BYTESTREAM_H #include +#include +#include #include "upb.h" #ifdef __cplusplus extern "C" { #endif -/* upb_bytesrc ****************************************************************/ -// upb_bytesrc is a pull interface for streams of bytes, basically an -// abstraction of read()/fread(), but it avoids copies where possible. +/* upb_bytesrc ****************************************************************/ -typedef upb_strlen_t (*upb_bytesrc_read_fptr)( - upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); -typedef bool (*upb_bytesrc_getstr_fptr)( - upb_bytesrc *src, upb_string *str, upb_status *status); +// A upb_bytesrc allows the consumer of a stream of bytes to obtain buffers as +// they become available, and to preserve some trailing amount of data. +typedef size_t upb_bytesrc_fetch_func(void*, uint64_t, upb_status*); +typedef void upb_bytesrc_read_func(void*, uint64_t, size_t, char*); +typedef const char *upb_bytesrc_getptr_func(void*, uint64_t, size_t*); +typedef void upb_bytesrc_refregion_func(void*, uint64_t, size_t); +typedef void upb_bytesrc_ref_func(void*); +typedef struct _upb_bytesrc_vtbl { + upb_bytesrc_fetch_func *fetch; + upb_bytesrc_read_func *read; + upb_bytesrc_getptr_func *getptr; + upb_bytesrc_refregion_func *refregion; + upb_bytesrc_refregion_func *unrefregion; + upb_bytesrc_ref_func *ref; + upb_bytesrc_ref_func *unref; +} upb_bytesrc_vtbl; typedef struct { - upb_bytesrc_read_fptr read; - upb_bytesrc_getstr_fptr getstr; -} upb_bytesrc_vtbl; + upb_bytesrc_vtbl *vtbl; +} upb_bytesrc; -struct _upb_bytesrc { - upb_bytesrc_vtbl *vtbl; -}; +INLINE void upb_bytesrc_init(upb_bytesrc *src, upb_bytesrc_vtbl *vtbl) { + src->vtbl = vtbl; +} -INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) { - s->vtbl = vtbl; +// Fetches at least minlen bytes starting at ofs, returning the actual number +// of bytes fetched (or 0 on error: see "s" for details). Gives caller a ref +// on the fetched region. It is safe to re-fetch existing regions but only if +// they are ref'd. "ofs" may not greater than the end of the region that was +// previously fetched. +INLINE size_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs, upb_status *s) { + return src->vtbl->fetch(src, ofs, s); } -// Reads up to "count" bytes into "buf", returning the total number of bytes -// read. If 0, indicates error and puts details in "status". -INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, - upb_strlen_t count, upb_status *status) { - return src->vtbl->read(src, buf, count, status); +// Copies "len" bytes of data from offset src_ofs to "dst", which must be at +// least "len" bytes long. The caller must own a ref on the given region. +INLINE void upb_bytesrc_read(upb_bytesrc *src, uint64_t src_ofs, size_t len, + char *dst) { + src->vtbl->read(src, src_ofs, len, dst); } -// Like upb_bytesrc_read(), but modifies "str" in-place. Caller must ensure -// that "str" is created or just recycled. Returns "false" if no data was -// returned, either due to error or EOF (check status for details). +// Returns a pointer to the bytesrc's internal buffer, returning how much data +// was actually returned (which may be less than "len" if the given region is +// not contiguous). The caller must own refs on the entire region from [ofs, +// ofs+len]. The returned buffer is valid for as long as the region remains +// ref'd. // -// In comparison to upb_bytesrc_read(), this call can possibly alias existing -// string data (which avoids a copy). On the other hand, if the data was *not* -// already in an existing string, this copies it into a upb_string, and if the -// data needs to be put in a specific range of memory (because eg. you need to -// put it into a different kind of string object) then upb_bytesrc_get() could -// save you a copy. -INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, - upb_status *status) { - return src->vtbl->getstr(src, str, status); +// TODO: is "len" really required here? +INLINE const char *upb_bytesrc_getptr(upb_bytesrc *src, uint64_t ofs, + size_t *len) { + return src->vtbl->getptr(src, ofs, len); +} + +// Gives the caller a ref on the given region. The caller must know that the +// given region is already ref'd. +INLINE void upb_bytesrc_refregion(upb_bytesrc *src, uint64_t ofs, size_t len) { + src->vtbl->refregion(src, ofs, len); +} + +// Releases a ref on the given region, which the caller must have previously +// ref'd. +INLINE void upb_bytesrc_unrefregion(upb_bytesrc *src, uint64_t ofs, size_t len) { + src->vtbl->unrefregion(src, ofs, len); +} + +// Attempts to ref the bytesrc itself, returning false if this bytesrc is +// not ref-able. +INLINE bool upb_bytesrc_tryref(upb_bytesrc *src) { + if (src->vtbl->ref) { + src->vtbl->ref(src); + return true; + } else { + return false; + } +} + +// Unref's the bytesrc itself. May only be called when upb_bytesrc_tryref() +// has previously returned true. +INLINE void upb_bytesrc_unref(upb_bytesrc *src) { + assert(src->vtbl->unref); + src->vtbl->unref(src); +} + +/* upb_strref *****************************************************************/ + +// The structure we pass for a string. +typedef struct _upb_strref { + // Pointer to the string data. NULL if the string spans multiple input + // buffers (in which case upb_bytesrc_getptr() must be called to obtain + // the actual pointers). + const char *ptr; + + // Bytesrc from which this string data comes. This is only guaranteed to be + // alive from inside the callback; however if the handler knows more about + // its type and how to prolong its life, it may do so. + upb_bytesrc *bytesrc; + + // Offset in the bytesrc that represents the beginning of this string. + uint32_t stream_offset; + + // Length of the string. + uint32_t len; + + // Possibly add optional members here like start_line, start_column, etc. +} upb_strref; + +// Copies the contents of the strref into a newly-allocated, NULL-terminated +// string. +INLINE char *upb_strref_dup(struct _upb_strref *r) { + char *ret = (char*)malloc(r->len + 1); + upb_bytesrc_read(r->bytesrc, r->stream_offset, r->len, ret); + ret[r->len] = '\0'; + return ret; } /* upb_bytesink ***************************************************************/ -struct _upb_bytesink; -typedef struct _upb_bytesink upb_bytesink; -typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( - upb_bytesink *bytesink, upb_string *str, upb_status *status); -typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)( - upb_bytesink *bytesink, upb_status *status, const char *fmt, va_list args); +typedef bool upb_bytesink_write_func(void*, const char*, size_t, upb_status*); +typedef int32_t upb_bytesink_vprintf_func( + void*, upb_status*, const char *fmt, va_list args); typedef struct { - upb_bytesink_putstr_fptr putstr; - upb_bytesink_vprintf_fptr vprintf; + upb_bytesink_write_func *write; + upb_bytesink_vprintf_func *vprintf; } upb_bytesink_vtbl; -struct _upb_bytesink { +typedef struct { upb_bytesink_vtbl *vtbl; -}; +} upb_bytesink; -INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) { - s->vtbl = vtbl; +INLINE void upb_bytesink_init(upb_bytesink *sink, upb_bytesink_vtbl *vtbl) { + sink->vtbl = vtbl; } +INLINE bool upb_bytesink_write(upb_bytesink *sink, const char *buf, size_t len, + upb_status *s) { + return sink->vtbl->write(sink, buf, len, s); +} -// TODO: Figure out how buffering should be handled. Should the caller buffer -// data and only call these functions when a buffer is full? Seems most -// efficient, but then buffering has to be configured in the caller, which -// could be anything, which makes it hard to have a standard interface for -// controlling buffering. -// -// The downside of having the bytesink buffer is efficiency: the caller is -// making more (virtual) function calls, and the caller can't arrange to have -// a big contiguous buffer. The bytesink can do this, but will have to copy -// to make the data contiguous. - -// Returns the number of bytes written. -INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, - const char *fmt, ...) { +INLINE bool upb_bytesink_writestr(upb_bytesink *sink, const char *str, + upb_status *s) { + return upb_bytesink_write(sink, str, strlen(str), s); +} + +// Returns the number of bytes written or -1 on error. +INLINE int32_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, + const char *fmt, ...) { va_list args; va_start(args, fmt); - upb_strlen_t ret = sink->vtbl->vprintf(sink, status, fmt, args); + uint32_t ret = sink->vtbl->vprintf(sink, status, fmt, args); va_end(args); return ret; } -// Puts the given string, returning true if the operation was successful, otherwise -// check "status" for details. Ownership of the string is *not* passed; if -// the callee wants a reference he must call upb_string_getref() on it. -INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, - upb_status *status) { - return sink->vtbl->putstr(sink, str, status); -} +// OPT: add getappendbuf() +// OPT: add writefrombytesrc() +// TODO: add flush() + + +/* upb_cbuf *******************************************************************/ + +// A circular buffer implementation for bytesrcs that do internal buffering. #ifdef __cplusplus } /* extern "C" */ diff --git a/src/upb_decoder.c b/src/upb_decoder.c index a44b561..fed48af 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -8,6 +8,7 @@ #include #include #include +#include "bswap.h" #include "upb_bytestream.h" #include "upb_decoder.h" #include "upb_varint.h" @@ -38,83 +39,97 @@ static void upb_decoder_exit2(void *_d) { upb_decoder *d = _d; upb_decoder_exit(d); } +static void upb_decoder_abort(upb_decoder *d, const char *msg) { + upb_status_setf(d->status, UPB_ERROR, msg); + upb_decoder_exit(d); +} /* Decoding/Buffering of wire types *******************************************/ -#define UPB_MAX_VARINT_ENCODED_SIZE 10 - -static void upb_decoder_advance(upb_decoder *d, size_t len) { d->ptr += len; } static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; } +static void upb_decoder_advance(upb_decoder *d, size_t len) { + assert((size_t)(d->end - d->ptr) >= len); + d->ptr += len; +} size_t upb_decoder_offset(upb_decoder *d) { - size_t offset = d->buf_stream_offset; - if (d->buf) offset += (d->ptr - d->buf); + size_t offset = d->bufstart_ofs; + if (d->ptr) offset += (d->ptr - d->buf); return offset; } static void upb_decoder_setmsgend(upb_decoder *d) { - uint32_t end = d->dispatcher.top->end_offset; - d->submsg_end = (end == UPB_NONDELIMITED) ? (void*)UINTPTR_MAX : d->buf + end; + upb_dispatcher_frame *f = d->dispatcher.top; + size_t delimlen = f->end_ofs - d->bufstart_ofs; + size_t buflen = d->end - d->buf; + if (f->end_ofs != UINT64_MAX && delimlen <= buflen) { + d->delim_end = (uintptr_t)(d->buf + delimlen); + } else { + // Buffers must not run up against the end of memory. + assert((uintptr_t)d->end < UINTPTR_MAX); + d->delim_end = UINTPTR_MAX; + } } // Pulls the next buffer from the bytesrc. Should be called only when the // current buffer is completely empty. -static void upb_pullbuf(upb_decoder *d, bool need) { +static bool upb_trypullbuf(upb_decoder *d) { assert(upb_decoder_bufleft(d) == 0); - int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1; - upb_string_recycle(&d->bufstr); - if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) { - d->buf = NULL; - d->end = NULL; - if (need) upb_seterr(d->status, UPB_ERROR, "Unexpected EOF."); - upb_decoder_exit(d); - } - if (last_buf_len != -1) { - d->buf_stream_offset += last_buf_len; - for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f) - if (f->end_offset != UPB_NONDELIMITED) - f->end_offset -= last_buf_len; + if (d->bufend_ofs == d->refend_ofs) { + d->refend_ofs += upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status); + if (!upb_ok(d->status)) { + d->ptr = NULL; + d->end = NULL; + if (upb_iseof(d->status)) return false; + upb_decoder_exit(d); + } } - d->buf = upb_string_getrobuf(d->bufstr); - d->ptr = upb_string_getrobuf(d->bufstr); - d->end = d->buf + upb_string_len(d->bufstr); + d->bufstart_ofs = d->bufend_ofs; + size_t len; + d->buf = upb_bytesrc_getptr(d->bytesrc, d->bufstart_ofs, &len); + assert(len > 0); + d->bufend_ofs = d->bufstart_ofs + len; + d->ptr = d->buf; + d->end = d->buf + len; +#ifdef UPB_USE_JIT_X64 d->jit_end = d->end - 20; - upb_string_recycle(&d->tmp); - upb_string_substr(d->tmp, d->bufstr, 0, 0); +#endif upb_decoder_setmsgend(d); + return true; } -// Called only from the slow path, this function copies the next "len" bytes -// from the stream to "data", adjusting the decoder state appropriately. -NOINLINE void upb_getbuf(upb_decoder *d, void *data, size_t bytes, bool need) { - while (1) { - size_t to_copy = UPB_MIN(bytes, upb_decoder_bufleft(d)); - memcpy(data, d->ptr, to_copy); - upb_decoder_advance(d, to_copy); - bytes -= to_copy; - if (bytes == 0) return; - upb_pullbuf(d, need); +static void upb_pullbuf(upb_decoder *d) { + if (!upb_trypullbuf(d)) upb_decoder_abort(d, "Unexpected EOF"); +} + +void upb_decoder_commit(upb_decoder *d) { + d->completed_ptr = d->ptr; + if (d->refstart_ofs < d->bufstart_ofs) { + // Drop our ref on the previous buf's region. + upb_bytesrc_refregion(d->bytesrc, d->bufstart_ofs, d->refend_ofs); + upb_bytesrc_unrefregion(d->bytesrc, d->refstart_ofs, d->refend_ofs); + d->refstart_ofs = d->bufstart_ofs; } } -NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d, bool need) { +NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) { uint8_t byte = 0x80; uint64_t u64 = 0; int bitpos; + const char *ptr = d->ptr; for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) { - upb_getbuf(d, &byte, 1, need); - u64 |= ((uint64_t)byte & 0x7F) << bitpos; - } - - if(bitpos == 70 && (byte & 0x80)) { - upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n"); - upb_decoder_exit(d); + if (upb_decoder_bufleft(d) == 0) { + upb_pullbuf(d); + ptr = d->ptr; + } + u64 |= ((uint64_t)(byte = *ptr++) & 0x7F) << bitpos; } + if(bitpos == 70 && (byte & 0x80)) upb_decoder_abort(d, "Unterminated varint"); return u64; } // For tags and delimited lengths, which must be <=32bit and are usually small. -FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d, bool need) { +FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) { const char *p = d->ptr; uint32_t ret; uint64_t u64; @@ -125,11 +140,8 @@ FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d, bool need) { ret |= (*p & 0x7f) << 7; if ((*(p++) & 0x80) == 0) goto done; // likely slow: - u64 = upb_decode_varint_slow(d, need); - if (u64 > 0xffffffff) { - upb_seterr(d->status, UPB_ERROR, "Unterminated 32-bit varint.\n"); - upb_decoder_exit(d); - } + u64 = upb_decode_varint_slow(d); + if (u64 > 0xffffffff) upb_decoder_abort(d, "Unterminated 32-bit varint"); ret = (uint32_t)u64; p = d->ptr; // Turn the next line into a nop. done: @@ -137,57 +149,90 @@ done: return ret; } +FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) { + if (upb_decoder_bufleft(d) == 0) { + // Check for our two normal end-of-message conditions. + if (d->bufend_ofs == d->end_ofs) return false; + if (!upb_trypullbuf(d)) return false; + } + *val = upb_decode_varint32(d); + return true; +} + FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) { - if (upb_decoder_bufleft(d) >= 16) { - // Common (fast) case. + if (upb_decoder_bufleft(d) >= 10) { + // Fast case. upb_decoderet r = upb_vdecode_fast(d->ptr); - if (r.p == NULL) { - upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n"); - upb_decoder_exit(d); - } + if (r.p == NULL) upb_decoder_abort(d, "Unterminated varint"); upb_decoder_advance(d, r.p - d->ptr); return r.val; - } else { - return upb_decode_varint_slow(d, true); + } else if (upb_decoder_bufleft(d) > 0) { + // Intermediate case -- worth it? + char tmpbuf[10]; + memset(tmpbuf, 0x80, 10); + memcpy(tmpbuf, d->ptr, upb_decoder_bufleft(d)); + upb_decoderet r = upb_vdecode_fast(tmpbuf); + if (r.p != NULL) { + upb_decoder_advance(d, r.p - tmpbuf); + return r.val; + } } + // Slow case -- varint spans buffer seam. + return upb_decode_varint_slow(d); } -FORCEINLINE void upb_decode_fixed(upb_decoder *d, void *val, size_t bytes) { +FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) { if (upb_decoder_bufleft(d) >= bytes) { - // Common (fast) case. - memcpy(val, d->ptr, bytes); + // Fast case. + memcpy(buf, d->ptr, bytes); upb_decoder_advance(d, bytes); } else { - upb_getbuf(d, val, bytes, true); + // Slow case. + size_t read = 0; + while (read < bytes) { + size_t avail = upb_decoder_bufleft(d); + memcpy(buf + read, d->ptr, avail); + upb_decoder_advance(d, avail); + read += avail; + } } } FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) { uint32_t u32; - upb_decode_fixed(d, &u32, sizeof(uint32_t)); - return u32; + upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t)); + return le32toh(u32); } FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) { uint64_t u64; - upb_decode_fixed(d, &u64, sizeof(uint64_t)); - return u64; + upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t)); + return le64toh(u64); } -INLINE upb_string *upb_decode_string(upb_decoder *d) { - upb_string_recycle(&d->tmp); - uint32_t strlen = upb_decode_varint32(d, true); +INLINE upb_strref *upb_decode_string(upb_decoder *d) { + uint32_t strlen = upb_decode_varint32(d); + d->strref.stream_offset = upb_decoder_offset(d); + d->strref.len = strlen; + if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d); if (upb_decoder_bufleft(d) >= strlen) { - // Common (fast) case. - upb_string_substr(d->tmp, d->bufstr, d->ptr - d->buf, strlen); + // Fast case. + d->strref.ptr = d->ptr; upb_decoder_advance(d, strlen); } else { - upb_getbuf(d, upb_string_getrwbuf(d->tmp, strlen), strlen, true); + // Slow case. + while (1) { + size_t consume = UPB_MIN(upb_decoder_bufleft(d), strlen); + upb_decoder_advance(d, consume); + strlen -= consume; + if (strlen == 0) break; + upb_pullbuf(d); + } } - return d->tmp; + return &d->strref; } INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) { - upb_dispatch_startsubmsg(&d->dispatcher, f)->end_offset = end; + upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end; upb_decoder_setmsgend(d); } @@ -224,7 +269,7 @@ T(DOUBLE, fixed64, double, upb_asdouble) T(FLOAT, fixed32, float, upb_asfloat) T(SINT32, varint, int32, upb_zzdec_32) T(SINT64, varint, int64, upb_zzdec_64) -T(STRING, string, str, upb_string*) +T(STRING, string, strref, upb_strref*) static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) { upb_push(d, f, UPB_NONDELIMITED); @@ -235,28 +280,24 @@ static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) { upb_decoder_setmsgend(d); } static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) { - upb_push(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf)); + upb_push(d, f, upb_decode_varint32(d) + (d->ptr - d->buf)); } /* The main decoding loop *****************************************************/ -// Called when a user callback returns something other than UPB_CONTINUE. -// This should unwind one or more stack frames, skipping the corresponding -// data in the input. +static void upb_decoder_checkdelim(upb_decoder *d) { + while ((uintptr_t)d->ptr >= d->delim_end) { + if ((uintptr_t)d->ptr > d->delim_end) + upb_decoder_abort(d, "Bad submessage end"); -static void upb_delimend(upb_decoder *d) { - if (d->ptr > d->submsg_end) { - upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); - upb_decoder_exit(d); - } - - if (d->dispatcher.top->is_sequence) { - upb_dispatch_endseq(&d->dispatcher); - } else { - upb_dispatch_endsubmsg(&d->dispatcher); + if (d->dispatcher.top->is_sequence) { + upb_dispatch_endseq(&d->dispatcher); + } else { + upb_dispatch_endsubmsg(&d->dispatcher); + } + upb_decoder_setmsgend(d); } - upb_decoder_setmsgend(d); } static void upb_decoder_enterjit(upb_decoder *d) { @@ -273,7 +314,8 @@ static void upb_decoder_enterjit(upb_decoder *d) { INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { while (1) { - uint32_t tag = upb_decode_varint32(d, false); + uint32_t tag; + if (!upb_trydecode_varint32(d, &tag)) return NULL; upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag); // There are no explicit "startseq" or "endseq" markers in protobuf @@ -287,8 +329,8 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { // TODO: support packed. assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) || (tag & 0x7) != UPB_WIRE_TYPE_DELIMITED); - uint32_t end = d->dispatcher.top->end_offset; - upb_dispatch_startseq(&d->dispatcher, f)->end_offset = end; + uint32_t end = d->dispatcher.top->end_ofs; + upb_dispatch_startseq(&d->dispatcher, f)->end_ofs = end; upb_decoder_setmsgend(d); } if (f) return f; @@ -299,11 +341,13 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break; case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break; case UPB_WIRE_TYPE_DELIMITED: - upb_decoder_advance(d, upb_decode_varint32(d, true)); - break; + upb_decoder_advance(d, upb_decode_varint32(d)); break; + default: + upb_decoder_abort(d, "Invavlid wire type"); } // TODO: deliver to unknown field callback. - while (d->ptr >= d->submsg_end) upb_delimend(d); + upb_decoder_commit(d); + upb_decoder_checkdelim(d); } } @@ -311,11 +355,11 @@ void upb_decoder_onexit(upb_decoder *d) { if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher); if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) { // Normal end-of-file. - upb_clearerr(d->status); + upb_status_clear(d->status); upb_dispatch_endmsg(&d->dispatcher, d->status); } else { if (d->status->code == UPB_EOF) - upb_seterr(d->status, UPB_ERROR, "Input ended mid-submessage."); + upb_status_setf(d->status, UPB_ERROR, "Input ended mid-submessage."); } } @@ -325,26 +369,32 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) { return; } d->status = status; - upb_pullbuf(d, true); upb_dispatch_startmsg(&d->dispatcher); while(1) { // Main loop: executed once per tag/field pair. - while (d->ptr >= d->submsg_end) upb_delimend(d); + upb_decoder_checkdelim(d); upb_decoder_enterjit(d); // if (!d->dispatcher.top->is_packed) upb_fhandlers *f = upb_decode_tag(d); + if (!f) upb_decoder_exit2(d); f->decode(d, f); + upb_decoder_commit(d); } } static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top, upb_dispatcher_frame *bottom) { (void)top; + (void)bottom; + (void)_d; +#if 0 upb_decoder *d = _d; + // TODO if (bottom->end_offset == UPB_NONDELIMITED) { // TODO: support skipping groups. abort(); } - d->ptr = d->buf + bottom->end_offset; + d->ptr = d->buf.ptr + bottom->end_offset; +#endif } void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) { @@ -354,10 +404,6 @@ void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) { d->jit_code = NULL; if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d); #endif - d->bufstr = NULL; - d->tmp = NULL; - upb_string_recycle(&d->tmp); - // Set function pointers for each field's decode function. for (int i = 0; i < handlers->msgs_len; i++) { upb_mhandlers *m = handlers->msgs[i]; @@ -396,19 +442,27 @@ void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m) { upb_handlers_unref(h); } -void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) { - upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UPB_NONDELIMITED; +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, uint64_t start_ofs, + uint64_t end_ofs, void *closure) { + upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure); + f->end_ofs = end_ofs; + d->end_ofs = end_ofs; + d->refstart_ofs = start_ofs; + d->refend_ofs = start_ofs; + d->bufstart_ofs = start_ofs; + d->bufend_ofs = start_ofs; d->bytesrc = bytesrc; d->buf = NULL; d->ptr = NULL; d->end = NULL; // Force a buffer pull. - d->submsg_end = (void*)0x1; // But don't let end-of-message get triggered. - d->buf_stream_offset = 0; +#ifdef UPB_USE_JIT_X64 + d->jit_end = NULL; +#endif + d->delim_end = UINTPTR_MAX; // But don't let end-of-message get triggered. + d->strref.bytesrc = bytesrc; } void upb_decoder_uninit(upb_decoder *d) { - upb_string_unref(d->bufstr); - upb_string_unref(d->tmp); #ifdef UPB_USE_JIT_X64 if (d->dispatcher.handlers->should_jit) upb_decoder_freejit(d); #endif diff --git a/src/upb_decoder.h b/src/upb_decoder.h index e9bc0b4..7a813bf 100644 --- a/src/upb_decoder.h +++ b/src/upb_decoder.h @@ -30,44 +30,33 @@ extern "C" { struct dasm_State; -struct _upb_decoder { - // Bytesrc from which we pull serialized data. - upb_bytesrc *bytesrc; +typedef struct _upb_decoder { + upb_bytesrc *bytesrc; // Source of our serialized data. + upb_dispatcher dispatcher; // Dispatcher to which we push parsed data. + upb_status *status; // Where we will store any errors that occur. + upb_strref strref; // For passing string data to callbacks. - // String to hold our input buffer; is only active if d->buf != NULL. - upb_string *bufstr; + // Offsets for the region we currently have ref'd. + uint64_t refstart_ofs, refend_ofs; - // Temporary string for passing string data to callbacks. - upb_string *tmp; + // Current buffer and its stream offset. + const char *buf, *ptr, *end; + uint64_t bufstart_ofs, bufend_ofs; - // The offset within the overall stream represented by the *beginning* of buf. - size_t buf_stream_offset; + // Stream offset for the end of the top-level message, if any. + uint64_t end_ofs; - // Pointer to the beginning of our current data buffer, or NULL if none. - const char *buf; + // Buf offset as of which we've delivered calbacks; needed for rollback on + // UPB_TRYAGAIN (or in the future, UPB_SUSPEND). + const char *completed_ptr; - // End of this buffer, relative to *ptr. - const char *end; - const char *jit_end; + // End of the delimited region, relative to ptr, or UINTPTR_MAX if not in + // this buf. + uintptr_t delim_end; - // Members which may also be written by the JIT: - - // Our current position in the data buffer. - const char *ptr; - - // End of this submessage, relative to *ptr. - const char *submsg_end; - - // MIN(end, submsg_end) - const char *effective_end; - - upb_fhandlers *f; - - // Where we will store any errors that occur. - upb_status *status; - - // Dispatcher to which we push parsed data. - upb_dispatcher dispatcher; +#ifdef UPB_USE_JIT_X64 + // For JIT, which doesn't do bounds checks in the middle of parsing a field. + const char *jit_end, *effective_end; // == MIN(jit_end, submsg_end) // JIT-generated machine code (else NULL). char *jit_code; @@ -75,21 +64,10 @@ struct _upb_decoder { char *debug_info; struct dasm_State *dynasm; - sigjmp_buf exitjmp; -}; - -// For use in the upb_dispatcher's stack. -typedef struct { - // Relative to the beginning of this buffer. - // For groups and the top-level: UINT32_MAX. - uint32_t end_offset; - bool is_packed; // == !upb_issubmsg(f) && end_offset != UPB_REPATEDEND -} upb_decoder_srcdata; +#endif -// A upb_decoder decodes the binary protocol buffer format, writing the data it -// decodes to a upb_sink. -struct _upb_decoder; -typedef struct _upb_decoder upb_decoder; + sigjmp_buf exitjmp; +} upb_decoder; // Initializes/uninitializes a decoder for calling into the given handlers // or to write into the given msgdef, given its accessors). Takes a ref @@ -107,7 +85,10 @@ void upb_decoder_uninit(upb_decoder *d); // state where it has not seen any data, and expects the next data to be from // the beginning of a new protobuf. Parsers must be reset before they can be // used. A decoder can be reset multiple times. -void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure); +// +// Pass UINT64_MAX for end_ofs to indicate a non-delimited top-level message. +void upb_decoder_reset(upb_decoder *d, upb_bytesrc *src, uint64_t start_ofs, + uint64_t end_ofs, void *closure); void upb_decoder_decode(upb_decoder *d, upb_status *status); diff --git a/src/upb_decoder_x86.dasc b/src/upb_decoder_x86.dasc index fec0ffe..800b099 100644 --- a/src/upb_decoder_x86.dasc +++ b/src/upb_decoder_x86.dasc @@ -120,7 +120,7 @@ void upb_reg_jit_gdb(upb_decoder *d) { |.define PTR, rbx |.define CLOSURE, r12 |.type FRAME, upb_dispatcher_frame, r13 -|.type STRING, upb_string, r14 +|.type STRREF, upb_strref, r14 |.type DECODER, upb_decoder, r15 | |.macro callp, addr @@ -199,7 +199,7 @@ void upb_reg_jit_gdb(upb_decoder *d) { | jae ->exit_jit // Frame stack overflow. | mov qword FRAME:rax->f, f | mov qword FRAME:rax->closure, closure_ -| mov dword FRAME:rax->end_offset, end_offset_ +| mov dword FRAME:rax->end_ofs, end_offset_ | mov byte FRAME:rax->is_sequence, is_sequence_ | mov CLOSURE, rdx | mov DECODER->dispatcher.top, rax @@ -217,17 +217,17 @@ void upb_reg_jit_gdb(upb_decoder *d) { | mov rsi, DECODER->jit_end || if (m->is_group) { | mov64 rax, 0xffffffffffffffff -| mov qword DECODER->submsg_end, rax +| mov qword DECODER->delim_end, rax | mov DECODER->effective_end, rsi || } else { | // Could store a correctly-biased version in the frame, at the cost of | // a larger stack. -| mov eax, dword FRAME->end_offset +| mov eax, dword FRAME->end_ofs | add rax, qword DECODER->buf -| mov DECODER->submsg_end, rax // submsg_end = d->buf + f->end_offset +| mov DECODER->delim_end, rax // delim_end = d->buf + f->end_ofs | cmp rax, rsi | jb >8 -| mov rax, rsi // effective_end = min(d->submsg_end, d->jit_end) +| mov rax, rsi // effective_end = min(d->delim_end, d->jit_end) |8: | mov DECODER->effective_end, rax || } @@ -293,7 +293,7 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_ta } else { | mov rdx, CLOSURE } - | mov esi, FRAME->end_offset + | mov esi, FRAME->end_ofs | pushframe f, rdx, esi, true } @@ -357,10 +357,14 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_ta // buf, which sidesteps any security problems. The C path has more // robust checks. | decode_varint tag_size - | mov STRING->len, ARG3_32 - | mov STRING->ptr, PTR + | mov STRREF->len, ARG3_32 + | mov STRREF->ptr, PTR + | mov rax, PTR + | sub rax, DECODER->buf + | add eax, DECODER->bufstart_ofs // = d->ptr - d->buf + d->bufstart_ofs + | mov STRREF->stream_offset, eax | add PTR, ARG3_64 - | mov ARG3_64, STRING + | mov ARG3_64, STRREF | cmp PTR, DECODER->effective_end | ja ->exit_jit // Can't deliver, whole string not in buf. break; @@ -514,7 +518,7 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) { // This case doesn't exist for groups, because there eob really means // eob, so that case just exits the jit directly. |=>m->jit_endofbuf_pclabel: - | cmp PTR, DECODER->submsg_end + | cmp PTR, DECODER->delim_end | jb ->exit_jit // We are at eob, but not end-of-submsg. } @@ -550,7 +554,7 @@ static void upb_decoder_jit(upb_decoder *d) { | push rbx | mov DECODER, ARG1_64 | mov FRAME, DECODER:ARG1_64->dispatcher.top - | mov STRING, DECODER:ARG1_64->tmp + | lea STRREF, DECODER:ARG1_64->strref | mov CLOSURE, FRAME->closure | mov PTR, DECODER->ptr diff --git a/src/upb_def.c b/src/upb_def.c index 45e7f73..4cd80b1 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -7,18 +7,11 @@ #include #include +#include #include "upb_def.h" #define alignof(t) offsetof(struct { char c; t x; }, x) -/* Search for a character in a string, in reverse. */ -static int my_memrchr(char *data, char c, size_t len) -{ - int off = len-1; - while(off > 0 && data[off] != c) --off; - return off; -} - void upb_deflist_init(upb_deflist *l) { l->size = 8; l->defs = malloc(l->size * sizeof(void*)); @@ -105,7 +98,8 @@ static void upb_def_init(upb_def *def, upb_deftype_t type) { } static void upb_def_uninit(upb_def *def) { - upb_string_unref(def->fqname); + //fprintf(stderr, "Freeing def: %p\n", def); + free(def->fqname); } @@ -120,19 +114,19 @@ typedef struct _upb_unresolveddef { // The target type name. This may or may not be fully qualified. It is // tempting to want to use base.fqname for this, but that will be qualified // which is inappropriate for a name we still have to resolve. - upb_string *name; + char *name; } upb_unresolveddef; // Is passed a ref on the string. -static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { +static upb_unresolveddef *upb_unresolveddef_new(const char *str) { upb_unresolveddef *def = malloc(sizeof(*def)); upb_def_init(&def->base, UPB_DEF_UNRESOLVED); - def->name = upb_string_getref(str); + def->name = strdup(str); return def; } static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { - upb_string_unref(def->name); + free(def->name); upb_def_uninit(&def->base); free(def); } @@ -152,7 +146,7 @@ static void upb_enumdef_free(upb_enumdef *e) { upb_enum_iter i; for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { // Frees the ref taken when the string was parsed. - upb_string_unref(upb_enum_iter_name(i)); + free(upb_enum_iter_name(i)); } upb_strtable_free(&e->ntoi); upb_inttable_free(&e->iton); @@ -170,12 +164,11 @@ upb_enumdef *upb_enumdef_dup(upb_enumdef *e) { return new_e; } -bool upb_enumdef_addval(upb_enumdef *e, upb_string *name, int32_t num) { - if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL)) return false; - upb_ntoi_ent ntoi_ent = {{name, 0}, num}; - upb_iton_ent iton_ent = {0, name}; - upb_strtable_insert(&e->ntoi, &ntoi_ent.e); - upb_inttable_insert(&e->iton, num, &iton_ent); // Uses strtable's ref on name +bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num) { + if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL)) + return false; + upb_strtable_insert(&e->ntoi, name, &num); + upb_inttable_insert(&e->iton, num, strdup(name)); return true; } @@ -193,19 +186,22 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) { return upb_inttable_next(&e->iton, iter); } -upb_string *upb_enumdef_iton(upb_enumdef *def, int32_t num) { - upb_iton_ent *e = - (upb_iton_ent*)upb_inttable_fastlookup(&def->iton, num, sizeof(*e)); - return e ? e->string : NULL; +const char *upb_enumdef_iton(upb_enumdef *def, int32_t num) { + upb_iton_ent *e = upb_inttable_fastlookup(&def->iton, num, sizeof(*e)); + return e ? e->str : NULL; } -bool upb_enumdef_ntoi(upb_enumdef *def, upb_string *name, int32_t *num) { - upb_ntoi_ent *e = (upb_ntoi_ent*)upb_strtable_lookup(&def->ntoi, name); +bool upb_enumdef_ntoil(upb_enumdef *def, char *name, size_t len, int32_t *num) { + upb_ntoi_ent *e = upb_strtable_lookupl(&def->ntoi, name, len); if (!e) return false; if (num) *num = e->value; return true; } +bool upb_enumdef_ntoi(upb_enumdef *e, char *name, int32_t *num) { + return upb_enumdef_ntoil(e, name, strlen(name), num); +} + /* upb_fielddef ***************************************************************/ @@ -228,9 +224,9 @@ upb_fielddef *upb_fielddef_new() { static void upb_fielddef_free(upb_fielddef *f) { if (upb_isstring(f)) { - upb_string_unref(upb_value_getstr(f->defaultval)); + free(upb_value_getptr(f->defaultval)); } - upb_string_unref(f->name); + free(f->name); free(f); } @@ -270,18 +266,18 @@ static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) { f->def = def; if (f->type == UPB_TYPE(ENUM)) { // Resolve the enum's default from a string to an integer. - upb_string *str = upb_value_getstr(f->defaultval); + char *str = upb_value_getptr(f->defaultval); assert(str); // Should point to either a real default or the empty string. upb_enumdef *e = upb_downcast_enumdef(f->def); int32_t val = 0; - if (str == upb_emptystring()) { + if (str[0] == '\0') { upb_value_setint32(&f->defaultval, e->defaultval); } else { bool success = upb_enumdef_ntoi(e, str, &val); - upb_string_unref(str); + free(str); if (!success) { - upb_seterr(s, UPB_ERROR, "Default enum value (" UPB_STRFMT ") is not a " - "member of the enum", UPB_STRARG(str)); + upb_status_setf(s, UPB_ERROR, "Default enum value (%s) is not a " + "member of the enum", str); return false; } upb_value_setint32(&f->defaultval, val); @@ -295,9 +291,9 @@ void upb_fielddef_setnumber(upb_fielddef *f, int32_t number) { f->number = number; } -void upb_fielddef_setname(upb_fielddef *f, upb_string *name) { +void upb_fielddef_setname(upb_fielddef *f, const char *name) { assert(f->msgdef == NULL); - f->name = upb_string_getref(name); + f->name = strdup(name); } void upb_fielddef_settype(upb_fielddef *f, uint8_t type) { @@ -326,7 +322,7 @@ void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl) f->accessor = vtbl; } -void upb_fielddef_settypename(upb_fielddef *f, upb_string *name) { +void upb_fielddef_settypename(upb_fielddef *f, const char *name) { upb_def_unref(f->def); f->def = UPB_UPCAST(upb_unresolveddef_new(name)); } @@ -424,9 +420,8 @@ bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) { assert(f->msgdef == NULL); f->msgdef = m; upb_itof_ent itof_ent = {0, f}; - upb_ntof_ent ntof_ent = {{f->name, 0}, f}; upb_inttable_insert(&m->itof, f->number, &itof_ent); - upb_strtable_insert(&m->ntof, &ntof_ent.e); + upb_strtable_insert(&m->ntof, f->name, &f); return true; } @@ -493,7 +488,6 @@ upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) { /* upb_symtabtxn **************************************************************/ typedef struct { - upb_strtable_entry e; upb_def *def; } upb_symtab_ent; @@ -503,16 +497,19 @@ void upb_symtabtxn_init(upb_symtabtxn *t) { void upb_symtabtxn_uninit(upb_symtabtxn *txn) { upb_strtable *t = &txn->deftab; - upb_symtab_ent *e; - for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) - upb_def_unref(e->def); + upb_strtable_iter i; + for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) { + const upb_symtab_ent *e = upb_strtable_iter_value(&i); + free(e->def); + } upb_strtable_free(t); } bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def) { // TODO: check if already present. - upb_symtab_ent e = {{def->fqname, 0}, def}; - upb_strtable_insert(&t->deftab, &e.e); + upb_symtab_ent e = {def}; + //fprintf(stderr, "txn Inserting: %p, ent: %p\n", e.def, &e); + upb_strtable_insert(&t->deftab, def->fqname, &e); return true; } @@ -531,59 +528,28 @@ err: // Given a symbol and the base symbol inside which it is defined, find the // symbol's definition in t. static upb_symtab_ent *upb_resolve(upb_strtable *t, - upb_string *base, upb_string *sym) { - if(upb_string_len(sym) == 0) return NULL; - if(upb_string_getrobuf(sym)[0] == UPB_SYMBOL_SEPARATOR) { + const char *base, const char *sym) { + if(strlen(sym) == 0) return NULL; + if(sym[0] == UPB_SYMBOL_SEPARATOR) { // Symbols starting with '.' are absolute, so we do a single lookup. // Slice to omit the leading '.' - upb_string *sym_str = upb_strslice(sym, 1, upb_string_len(sym) - 1); - upb_symtab_ent *e = upb_strtable_lookup(t, sym_str); - upb_string_unref(sym_str); - return e; + return upb_strtable_lookup(t, sym + 1); } else { // Remove components from base until we find an entry or run out. // TODO: This branch is totally broken, but currently not used. - upb_string *sym_str = upb_string_new(); - int baselen = upb_string_len(base); - upb_symtab_ent *ret = NULL; - while(1) { - // sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + sym - upb_strlen_t len = baselen + upb_string_len(sym) + 1; - char *buf = upb_string_getrwbuf(sym_str, len); - memcpy(buf, upb_string_getrobuf(base), baselen); - buf[baselen] = UPB_SYMBOL_SEPARATOR; - memcpy(buf + baselen + 1, upb_string_getrobuf(sym), upb_string_len(sym)); - - upb_symtab_ent *e = upb_strtable_lookup(t, sym_str); - if (e) { - ret = e; - break; - } else if(baselen == 0) { - // No more scopes to try. - ret = NULL; - break; - } - baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen); - } - upb_string_unref(sym_str); - return ret; + (void)base; + assert(false); + return NULL; } } -upb_symtabtxn_iter upb_symtabtxn_begin(upb_symtabtxn *t) { - return upb_strtable_begin(&t->deftab); +void upb_symtabtxn_begin(upb_symtabtxn_iter *i, upb_symtabtxn *t) { + upb_strtable_begin(i, &t->deftab); } - -upb_symtabtxn_iter upb_symtabtxn_next(upb_symtabtxn *t, upb_symtabtxn_iter i) { - return upb_strtable_next(&t->deftab, i); -} - -bool upb_symtabtxn_done(upb_symtabtxn_iter i) { - return i == NULL; -} - -upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter) { - upb_symtab_ent *e = iter; +void upb_symtabtxn_next(upb_symtabtxn_iter *i) { upb_strtable_next(i); } +bool upb_symtabtxn_done(upb_symtabtxn_iter *i) { return upb_strtable_done(i); } +upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter *i) { + const upb_symtab_ent *e = upb_strtable_iter_value(i); return e->def; } @@ -591,8 +557,10 @@ upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter) { /* upb_symtab public interface ************************************************/ static void _upb_symtab_free(upb_strtable *t) { - upb_symtab_ent *e; - for (e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) { + upb_strtable_iter i; + upb_strtable_begin(&i, t); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + const upb_symtab_ent *e = upb_strtable_iter_value(&i); assert(upb_atomic_read(&e->def->refcount) == 0); upb_def_free(e->def); } @@ -632,9 +600,11 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) { // We may only use part of this, depending on how many symbols are of the // correct type. upb_def **defs = malloc(sizeof(*defs) * total); - upb_symtab_ent *e = upb_strtable_begin(&s->symtab); + upb_strtable_iter iter; + upb_strtable_begin(&iter, &s->symtab); int i = 0; - for(; e; e = upb_strtable_next(&s->symtab, &e->e)) { + for(; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { + const upb_symtab_ent *e = upb_strtable_iter_value(&iter); upb_def *def = e->def; assert(def); if(type == UPB_DEF_ANY || def->type == type) @@ -646,7 +616,7 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) { return defs; } -upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) { +upb_def *upb_symtab_lookup(upb_symtab *s, const char *sym) { upb_rwlock_rdlock(&s->lock); upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); upb_def *ret = NULL; @@ -658,9 +628,9 @@ upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) { return ret; } -upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) { +upb_def *upb_symtab_resolve(upb_symtab *s, const char *base, const char *sym) { upb_rwlock_rdlock(&s->lock); - upb_symtab_ent *e = upb_resolve(&s->symtab, base, symbol); + upb_symtab_ent *e = upb_resolve(&s->symtab, base, sym); upb_def *ret = NULL; if(e) { ret = e->def; @@ -692,8 +662,9 @@ bool upb_symtab_dfs(upb_def *def, upb_def **open_defs, int n, bool replacing = (upb_strtable_lookup(&txn->deftab, m->base.fqname) != NULL); if (needcopy && !replacing) { - upb_symtab_ent e = {{def->fqname, 0}, upb_def_dup(def)}; - upb_strtable_insert(&txn->deftab, &e.e); + upb_symtab_ent e = {upb_def_dup(def)}; + //fprintf(stderr, "Replacing def: %p\n", e.def); + upb_strtable_insert(&txn->deftab, def->fqname, &e); replacing = true; } return replacing; @@ -706,25 +677,29 @@ bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) { // themselves be replaced with versions that will point to the new defs. // Do a DFS -- any path that finds a new def must replace all ancestors. upb_strtable *symtab = &s->symtab; - upb_symtab_ent *e; - for(e = upb_strtable_begin(symtab); e; e = upb_strtable_next(symtab, &e->e)) { + upb_strtable_iter i; + upb_strtable_begin(&i, symtab); + for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { upb_def *open_defs[UPB_MAX_TYPE_DEPTH]; + const upb_symtab_ent *e = upb_strtable_iter_value(&i); upb_symtab_dfs(e->def, open_defs, 0, txn); } // Resolve all refs. upb_strtable *txntab = &txn->deftab; - for(e = upb_strtable_begin(txntab); e; e = upb_strtable_next(txntab, &e->e)) { + upb_strtable_begin(&i, txntab); + for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { + const upb_symtab_ent *e = upb_strtable_iter_value(&i); upb_msgdef *m = upb_dyncast_msgdef(e->def); if(!m) continue; // Type names are resolved relative to the message in which they appear. - upb_string *base = m->base.fqname; + const char *base = m->base.fqname; - upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); + upb_msg_iter j; + for(j = upb_msg_begin(m); !upb_msg_done(j); j = upb_msg_next(m, j)) { + upb_fielddef *f = upb_msg_iter_field(j); if(!upb_hasdef(f)) continue; // No resolving necessary. - upb_string *name = upb_downcast_unresolveddef(f->def)->name; + const char *name = upb_downcast_unresolveddef(f->def)->name; // Resolve from either the txntab (pending adds) or symtab (existing // defs). If both exist, prefer the pending add, because it will be @@ -732,17 +707,18 @@ bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) { upb_symtab_ent *found; if(!(found = upb_resolve(txntab, base, name)) && !(found = upb_resolve(symtab, base, name))) { - upb_seterr(status, UPB_ERROR, - "could not resolve symbol '" UPB_STRFMT "'" - " in context '" UPB_STRFMT "'", - UPB_STRARG(name), UPB_STRARG(base)); + upb_status_setf(status, UPB_ERROR, "could not resolve symbol '%s' " + "in context '%s'", name, base); return false; } // Check the type of the found def. upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; + //fprintf(stderr, "found: %p\n", found); + //fprintf(stderr, "found->def: %p\n", found->def); + //fprintf(stderr, "found->def->type: %d\n", found->def->type); if(found->def->type != expected) { - upb_seterr(status, UPB_ERROR, "Unexpected type"); + upb_status_setf(status, UPB_ERROR, "Unexpected type"); return false; } if (!upb_fielddef_resolve(f, found->def, status)) return false; @@ -751,9 +727,9 @@ bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) { // The defs in the transaction have been vetted, and can be moved to the // symtab without causing errors. - upb_symtab_ent *tmptab_e; - for(tmptab_e = upb_strtable_begin(txntab); tmptab_e; - tmptab_e = upb_strtable_next(txntab, &tmptab_e->e)) { + upb_strtable_begin(&i, txntab); + for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { + const upb_symtab_ent *tmptab_e = upb_strtable_iter_value(&i); upb_def_movetosymtab(tmptab_e->def, s); upb_symtab_ent *symtab_e = upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname); @@ -761,7 +737,8 @@ bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) { upb_deflist_push(&s->olddefs, symtab_e->def); symtab_e->def = tmptab_e->def; } else { - upb_strtable_insert(&s->symtab, &tmptab_e->e); + //fprintf(stderr, "Inserting def: %p\n", tmptab_e->def); + upb_strtable_insert(&s->symtab, tmptab_e->def->fqname, tmptab_e); } } diff --git a/src/upb_def.h b/src/upb_def.h index ca969cb..34f5009 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -32,7 +32,7 @@ typedef struct _upb_symtab upb_symtab; // All the different kind of defs we support. These correspond 1:1 with // declarations in a .proto file. typedef enum { - UPB_DEF_MSG = 0, + UPB_DEF_MSG = 1, UPB_DEF_ENUM, UPB_DEF_SERVICE, // Not yet implemented. @@ -44,7 +44,7 @@ typedef enum { /* upb_def: base class for defs **********************************************/ typedef struct { - upb_string *fqname; // Fully qualified. + char *fqname; // Fully qualified. upb_symtab *symtab; // Def is mutable iff symtab == NULL. upb_atomic_t refcount; // Owns a ref on symtab iff (symtab && refcount > 0). upb_deftype_t type; @@ -66,7 +66,7 @@ upb_def *upb_def_dup(upb_def *def); // A upb_fielddef describes a single field in a message. It isn't a full def // in the sense that it derives from upb_def. It cannot stand on its own; it // must be part of a upb_msgdef. It is also reference-counted. -struct _upb_fielddef { +typedef struct _upb_fielddef { struct _upb_msgdef *msgdef; upb_def *def; // if upb_hasdef(f) upb_atomic_t refcount; @@ -78,11 +78,11 @@ struct _upb_fielddef { int16_t hasbit; uint16_t offset; int32_t number; - upb_string *name; + char *name; upb_value defaultval; // Only meaningful for non-repeated scalars and strings. upb_value fval; struct _upb_accessor_vtbl *accessor; -}; +} upb_fielddef; upb_fielddef *upb_fielddef_new(); void upb_fielddef_ref(upb_fielddef *f); @@ -93,7 +93,7 @@ upb_fielddef *upb_fielddef_dup(upb_fielddef *f); INLINE uint8_t upb_fielddef_type(upb_fielddef *f) { return f->type; } INLINE uint8_t upb_fielddef_label(upb_fielddef *f) { return f->label; } INLINE int32_t upb_fielddef_number(upb_fielddef *f) { return f->number; } -INLINE upb_string *upb_fielddef_name(upb_fielddef *f) { return f->name; } +INLINE char *upb_fielddef_name(upb_fielddef *f) { return f->name; } INLINE upb_value upb_fielddef_default(upb_fielddef *f) { return f->defaultval; } INLINE upb_value upb_fielddef_fval(upb_fielddef *f) { return f->fval; } INLINE bool upb_fielddef_finalized(upb_fielddef *f) { return f->finalized; } @@ -114,7 +114,7 @@ upb_def *upb_fielddef_subdef(upb_fielddef *f); // added to a msgdef. For the moment we do not allow these to be set once // the fielddef is added to a msgdef -- this could be relaxed in the future. void upb_fielddef_setnumber(upb_fielddef *f, int32_t number); -void upb_fielddef_setname(upb_fielddef *f, upb_string *name); +void upb_fielddef_setname(upb_fielddef *f, const char *name); // These writers may be called at any time prior to being put in a symtab. void upb_fielddef_settype(upb_fielddef *f, uint8_t type); @@ -124,7 +124,7 @@ void upb_fielddef_setfval(upb_fielddef *f, upb_value fval); void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl); // The name of the message or enum this field is referring to. Must be found // at name resolution time (when the symtabtxn is committed to the symtab). -void upb_fielddef_settypename(upb_fielddef *f, upb_string *name); +void upb_fielddef_settypename(upb_fielddef *f, const char *name); // A variety of tests about the type of a field. INLINE bool upb_issubmsgtype(upb_fieldtype_t type) { @@ -227,7 +227,7 @@ INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t i) { return e ? e->f : NULL; } -INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) { +INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, char *name) { upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); return e ? e->f : NULL; } @@ -272,7 +272,7 @@ typedef struct { typedef struct { bool junk; - upb_string *string; + char *str; } upb_iton_ent; upb_enumdef *upb_enumdef_new(); @@ -288,12 +288,13 @@ void upb_enumdef_setdefault(upb_enumdef *e, int32_t val); // Adds a value to the enumdef. Requires that no existing val has this // name or number (returns false and does not add if there is). May only // be called before the enumdef is in a symtab. -bool upb_enumdef_addval(upb_enumdef *e, upb_string *name, int32_t num); +bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num); // Lookups from name to integer and vice-versa. -bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, int32_t *num); -// Caller does not own a ref on the returned string. -upb_string *upb_enumdef_iton(upb_enumdef *e, int32_t num); +bool upb_enumdef_ntoil(upb_enumdef *e, char *name, size_t len, int32_t *num); +bool upb_enumdef_ntoi(upb_enumdef *e, char *name, int32_t *num); +// Caller does not own the returned string. +const char *upb_enumdef_iton(upb_enumdef *e, int32_t num); // Iteration over name/value pairs. The order is undefined. // Adding an enum val invalidates any iterators. @@ -308,9 +309,9 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter); INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); } // Iterator accessors. -INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) { +INLINE char *upb_enum_iter_name(upb_enum_iter iter) { upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter); - return e->string; + return e->str; } INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) { return upb_inttable_iter_key(iter); @@ -340,7 +341,7 @@ bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def); // Gets the def (if any) that is associated with this name in the symtab. // Caller does *not* inherit a ref on the def. -upb_def *upb_symtabtxn_get(upb_symtabtxn *t, upb_string *name); +upb_def *upb_symtabtxn_get(upb_symtabtxn *t, char *name); // Iterate over the defs that are part of the transaction. // The order is undefined. @@ -350,12 +351,12 @@ upb_def *upb_symtabtxn_get(upb_symtabtxn *t, upb_string *name); // i = upb_symtabtxn_next(t, i)) { // upb_def *def = upb_symtabtxn_iter_def(i); // } -typedef void* upb_symtabtxn_iter; +typedef upb_strtable_iter upb_symtabtxn_iter; -upb_symtabtxn_iter upb_symtabtxn_begin(upb_symtabtxn *t); -upb_symtabtxn_iter upb_symtabtxn_next(upb_symtabtxn *t, upb_symtabtxn_iter i); -bool upb_symtabtxn_done(upb_symtabtxn_iter i); -upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter); +void upb_symtabtxn_begin(upb_symtabtxn_iter* i, upb_symtabtxn *t); +void upb_symtabtxn_next(upb_symtabtxn_iter *i); +bool upb_symtabtxn_done(upb_symtabtxn_iter *i); +upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter *iter); /* upb_symtab *****************************************************************/ @@ -397,12 +398,12 @@ void upb_symtab_unref(upb_symtab *s); // If a def is found, the caller owns one ref on the returned def. Otherwise // returns NULL. // TODO: make return const -upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym); +upb_def *upb_symtab_resolve(upb_symtab *s, const char *base, const char *sym); // Find an entry in the symbol table with this exact name. If a def is found, // the caller owns one ref on the returned def. Otherwise returns NULL. // TODO: make return const -upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym); +upb_def *upb_symtab_lookup(upb_symtab *s, const char *sym); // Gets an array of pointers to all currently active defs in this symtab. The // caller owns the returned array (which is of length *count) as well as a ref diff --git a/src/upb_descriptor.c b/src/upb_descriptor.c index 127d19c..f70f1ba 100644 --- a/src/upb_descriptor.c +++ b/src/upb_descriptor.c @@ -9,19 +9,22 @@ #include #include -#include "upb_string.h" #include "upb_def.h" -/* Joins strings together, for example: - * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" - * join("", "Baz") -> "Baz" - * Caller owns a ref on the returned string. */ -static upb_string *upb_join(upb_string *base, upb_string *name) { - if (!base || upb_string_len(base) == 0) { - return upb_string_getref(name); +// Returns a newly allocated string that joins input strings together, for example: +// join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" +// join("", "Baz") -> "Baz" +// Caller owns a ref on the returned string. */ +static char *upb_join(char *base, char *name) { + if (!base || strlen(base) == 0) { + return strdup(name); } else { - return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT, - UPB_STRARG(base), UPB_STRARG(name)); + char *ret = malloc(strlen(base) + strlen(name) + 2); + ret[0] = '\0'; + strcat(ret, base); + strcat(ret, "."); + strcat(ret, name); + return ret; } } @@ -36,12 +39,12 @@ static upb_def *upb_deflist_last(upb_deflist *l) { } // Qualify the defname for all defs starting with offset "start" with "str". -static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { +static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) { for(uint32_t i = start; i < l->len; i++) { upb_def *def = l->defs[i]; - upb_string *name = def->fqname; + char *name = def->fqname; def->fqname = upb_join(str, name); - upb_string_unref(name); + free(name); } } @@ -59,13 +62,13 @@ void upb_descreader_init(upb_descreader *r, upb_symtabtxn *txn) { } void upb_descreader_uninit(upb_descreader *r) { - upb_string_unref(r->name); + free(r->name); upb_status_uninit(&r->status); upb_deflist_uninit(&r->defs); - upb_string_unref(r->default_string); + free(r->default_string); while (r->stack_len > 0) { upb_descreader_frame *f = &r->stack[--r->stack_len]; - upb_string_unref(f->name); + free(f->name); } } @@ -91,13 +94,14 @@ void upb_descreader_startcontainer(upb_descreader *r) { void upb_descreader_endcontainer(upb_descreader *r) { upb_descreader_frame *f = &r->stack[--r->stack_len]; upb_deflist_qualify(&r->defs, f->name, f->start); - upb_string_unref(f->name); + free(f->name); + f->name = NULL; } -void upb_descreader_setscopename(upb_descreader *r, upb_string *str) { +void upb_descreader_setscopename(upb_descreader *r, char *str) { upb_descreader_frame *f = &r->stack[r->stack_len-1]; - upb_string_unref(f->name); - f->name = upb_string_getref(str); + free(f->name); + f->name = str; } // Handlers for google.protobuf.FileDescriptorProto. @@ -119,7 +123,7 @@ static upb_flow_t upb_descreader_FileDescriptorProto_package(void *_r, upb_value val) { (void)fval; upb_descreader *r = _r; - upb_descreader_setscopename(r, upb_value_getstr(val)); + upb_descreader_setscopename(r, upb_strref_dup(upb_value_getstrref(val))); return UPB_CONTINUE; } @@ -190,8 +194,8 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_r, upb_value val) { (void)fval; upb_descreader *r = _r; - upb_string_unref(r->name); - r->name = upb_string_getref(upb_value_getstr(val)); + free(r->name); + r->name = upb_strref_dup(upb_value_getstrref(val)); r->saw_name = true; return UPB_CONTINUE; } @@ -210,7 +214,7 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r, upb_status *status) { upb_descreader *r = _r; if(!r->saw_number || !r->saw_name) { - upb_seterr(status, UPB_ERROR, "Enum value missing name or number."); + upb_status_setf(status, UPB_ERROR, "Enum value missing name or number."); return; } upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); @@ -220,7 +224,7 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r, upb_enumdef_setdefault(e, r->number); } upb_enumdef_addval(e, r->name, r->number); - upb_string_unref(r->name); + free(r->name); r->name = NULL; } @@ -254,11 +258,11 @@ static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status) upb_descreader *r = _r; upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); if (upb_descreader_last((upb_descreader*)_r)->fqname == NULL) { - upb_seterr(status, UPB_ERROR, "Enum had no name."); + upb_status_setf(status, UPB_ERROR, "Enum had no name."); return; } if (upb_inttable_count(&e->iton) == 0) { - upb_seterr(status, UPB_ERROR, "Enum had no values."); + upb_status_setf(status, UPB_ERROR, "Enum had no values."); return; } } @@ -269,8 +273,8 @@ static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r, (void)fval; upb_descreader *r = _r; upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); - upb_string_unref(e->base.fqname); - e->base.fqname = upb_string_getref(upb_value_getstr(val)); + free(e->base.fqname); + e->base.fqname = upb_strref_dup(upb_value_getstrref(val)); return UPB_CONTINUE; } @@ -298,99 +302,73 @@ static upb_flow_t upb_fielddef_startmsg(void *_r) { return UPB_CONTINUE; } -// Converts the default value in string "dstr" into "d". Passes a ref on dstr. +// Converts the default value in string "str" into "d". Passes a ref on str. // Returns true on success. -static bool upb_fielddef_parsedefault(upb_string *dstr, upb_value *d, int type) { +static bool upb_fielddef_parsedefault(char *str, upb_value *d, int type) { bool success = true; if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) { // We'll keep the ref we had on it. We include enums in this case because // we need the enumdef to resolve the name, but we may not have it yet. // We'll resolve it later. - if (dstr) { - upb_value_setstr(d, dstr); - } else { - upb_value_setstr(d, upb_emptystring()); - } + if (!str) str = strdup(""); + upb_value_setptr(d, str); } else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) { // We don't expect to get a default value. - upb_string_unref(dstr); - if (dstr != NULL) success = false; + free(str); + if (str != NULL) success = false; + } else if (type == UPB_TYPE(BOOL)) { + if (!str || strcmp(str, "false") == 0) + upb_value_setbool(d, false); + else if (strcmp(str, "true") == 0) + upb_value_setbool(d, true); + else + success = false; + free(str); } else { // The strto* functions need the string to be NULL-terminated. - char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr); + if (!str) str = strdup("0"); char *end; - upb_string_unref(dstr); switch (type) { case UPB_TYPE(INT32): case UPB_TYPE(SINT32): - case UPB_TYPE(SFIXED32): - if (strz) { - long val = strtol(strz, &end, 0); - if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) - success = false; - else - upb_value_setint32(d, val); - } else { - upb_value_setint32(d, 0); - } + case UPB_TYPE(SFIXED32): { + long val = strtol(str, &end, 0); + if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) + success = false; + else + upb_value_setint32(d, val); break; + } case UPB_TYPE(INT64): case UPB_TYPE(SINT64): case UPB_TYPE(SFIXED64): - if (strz) { - upb_value_setint64(d, strtoll(strz, &end, 0)); - if (errno == ERANGE || *end) success = false; - } else { - upb_value_setint64(d, 0); - } + upb_value_setint64(d, strtoll(str, &end, 0)); + if (errno == ERANGE || *end) success = false; break; case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): - if (strz) { - unsigned long val = strtoul(strz, &end, 0); - if (val > UINT32_MAX || errno == ERANGE || *end) - success = false; - else - upb_value_setuint32(d, val); - } else { - upb_value_setuint32(d, 0); - } + case UPB_TYPE(FIXED32): { + unsigned long val = strtoul(str, &end, 0); + if (val > UINT32_MAX || errno == ERANGE || *end) + success = false; + else + upb_value_setuint32(d, val); break; + } case UPB_TYPE(UINT64): case UPB_TYPE(FIXED64): - if (strz) { - upb_value_setuint64(d, strtoull(strz, &end, 0)); - if (errno == ERANGE || *end) success = false; - } else { - upb_value_setuint64(d, 0); - } + upb_value_setuint64(d, strtoull(str, &end, 0)); + if (errno == ERANGE || *end) success = false; break; case UPB_TYPE(DOUBLE): - if (strz) { - upb_value_setdouble(d, strtod(strz, &end)); - if (errno == ERANGE || *end) success = false; - } else { - upb_value_setdouble(d, 0.0); - } + upb_value_setdouble(d, strtod(str, &end)); + if (errno == ERANGE || *end) success = false; break; case UPB_TYPE(FLOAT): - if (strz) { - upb_value_setfloat(d, strtof(strz, &end)); - if (errno == ERANGE || *end) success = false; - } else { - upb_value_setfloat(d, 0.0); - } - break; - case UPB_TYPE(BOOL): - if (!strz || strcmp(strz, "false") == 0) - upb_value_setbool(d, false); - else if (strcmp(strz, "true") == 0) - upb_value_setbool(d, true); - else - success = false; + upb_value_setfloat(d, strtof(str, &end)); + if (errno == ERANGE || *end) success = false; break; } - free(strz); + free(str); } return success; } @@ -405,13 +383,13 @@ static void upb_fielddef_endmsg(void *_r, upb_status *status) { // Field was successfully read, add it as a field of the msgdef. upb_msgdef *m = upb_descreader_top(r); upb_msgdef_addfield(m, f); - upb_string *dstr = r->default_string; + char *dstr = r->default_string; r->default_string = NULL; upb_value val; if (!upb_fielddef_parsedefault(dstr, &val, f->type)) { // We don't worry too much about giving a great error message since the // compiler should have ensured this was correct. - upb_seterr(status, UPB_ERROR, "Error converting default value."); + upb_status_setf(status, UPB_ERROR, "Error converting default value."); return; } upb_fielddef_setdefault(f, val); @@ -441,7 +419,9 @@ static upb_flow_t upb_fielddef_onnumber(void *_r, upb_value fval, upb_value val) static upb_flow_t upb_fielddef_onname(void *_r, upb_value fval, upb_value val) { (void)fval; upb_descreader *r = _r; - upb_fielddef_setname(r->f, upb_value_getstr(val)); + char *name = upb_strref_dup(upb_value_getstrref(val)); + upb_fielddef_setname(r->f, name); + free(name); return UPB_CONTINUE; } @@ -449,7 +429,9 @@ static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval, upb_value val) { (void)fval; upb_descreader *r = _r; - upb_fielddef_settypename(r->f, upb_value_getstr(val)); + char *name = upb_strref_dup(upb_value_getstrref(val)); + upb_fielddef_settypename(r->f, name); + free(name); return UPB_CONTINUE; } @@ -459,8 +441,8 @@ static upb_flow_t upb_fielddef_ondefaultval(void *_r, upb_value fval, upb_descreader *r = _r; // Have to convert from string to the correct type, but we might not know the // type yet. - upb_string_unref(r->default_string); - r->default_string = upb_string_getref(upb_value_getstr(val)); + free(r->default_string); + r->default_string = upb_strref_dup(upb_value_getstrref(val)); return UPB_CONTINUE; } @@ -501,7 +483,7 @@ static void upb_msgdef_endmsg(void *_r, upb_status *status) { upb_descreader *r = _r; upb_msgdef *m = upb_descreader_top(r); if(!m->base.fqname) { - upb_seterr(status, UPB_ERROR, "Encountered message with no name."); + upb_status_setf(status, UPB_ERROR, "Encountered message with no name."); return; } @@ -514,9 +496,9 @@ static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) { upb_descreader *r = _r; assert(val.type == UPB_TYPE(STRING)); upb_msgdef *m = upb_descreader_top(r); - upb_string_unref(m->base.fqname); - m->base.fqname = upb_string_getref(upb_value_getstr(val)); - upb_descreader_setscopename(r, upb_value_getstr(val)); + free(m->base.fqname); + m->base.fqname = upb_strref_dup(upb_value_getstrref(val)); + upb_descreader_setscopename(r, strdup(m->base.fqname)); return UPB_CONTINUE; } diff --git a/src/upb_descriptor.h b/src/upb_descriptor.h index f74de3b..ee05e2f 100644 --- a/src/upb_descriptor.h +++ b/src/upb_descriptor.h @@ -28,7 +28,7 @@ extern "C" { // definitions that are contained inside. "name" tracks the name of the // message or package (a bare name -- not qualified by any enclosing scopes). typedef struct { - upb_string *name; + char *name; // Index of the first def that is under this scope. For msgdefs, the // msgdef itself is at start-1. int start; @@ -42,11 +42,11 @@ typedef struct { upb_status status; uint32_t number; - upb_string *name; + char *name; bool saw_number; bool saw_name; - upb_string *default_string; + char *default_string; upb_fielddef *f; } upb_descreader; diff --git a/src/upb_glue.c b/src/upb_glue.c index f288855..1f5bd3f 100644 --- a/src/upb_glue.c +++ b/src/upb_glue.c @@ -12,15 +12,15 @@ #include "upb_strstream.h" #include "upb_textprinter.h" -void upb_strtomsg(upb_string *str, void *msg, upb_msgdef *md, +void upb_strtomsg(const char *str, size_t len, void *msg, upb_msgdef *md, upb_status *status) { upb_stringsrc strsrc; upb_stringsrc_init(&strsrc); - upb_stringsrc_reset(&strsrc, str); + upb_stringsrc_reset(&strsrc, str, len); upb_decoder d; upb_decoder_initformsgdef(&d, md); - upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), msg); + upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, msg); upb_decoder_decode(&d, status); upb_stringsrc_uninit(&strsrc); @@ -53,10 +53,11 @@ void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md, #endif // TODO: read->load. -void upb_read_descriptor(upb_symtab *symtab, upb_string *str, upb_status *status) { +void upb_read_descriptor(upb_symtab *symtab, const char *str, size_t len, + upb_status *status) { upb_stringsrc strsrc; upb_stringsrc_init(&strsrc); - upb_stringsrc_reset(&strsrc, str); + upb_stringsrc_reset(&strsrc, str, len); upb_handlers *h = upb_handlers_new(); upb_descreader_reghandlers(h); @@ -68,16 +69,16 @@ void upb_read_descriptor(upb_symtab *symtab, upb_string *str, upb_status *status upb_symtabtxn txn; upb_symtabtxn_init(&txn); upb_descreader_init(&r, &txn); - upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), &r); + upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, &r); upb_decoder_decode(&d, status); // Set default accessors and layouts on all messages. // for msgdef in symtabtxn: upb_symtabtxn_iter i; - for(i = upb_symtabtxn_begin(&txn); !upb_symtabtxn_done(i); - i = upb_symtabtxn_next(&txn, i)) { - upb_def *def = upb_symtabtxn_iter_def(i); + upb_symtabtxn_begin(&i, &txn); + for(; !upb_symtabtxn_done(&i); upb_symtabtxn_next(&i)) { + upb_def *def = upb_symtabtxn_iter_def(&i); upb_msgdef *md = upb_dyncast_msgdef(def); if (!md) return; // For field in msgdef: @@ -96,3 +97,33 @@ void upb_read_descriptor(upb_symtab *symtab, upb_string *str, upb_status *status upb_stringsrc_uninit(&strsrc); upb_decoder_uninit(&d); } + +char *upb_readfile(const char *filename, size_t *len) { + FILE *f = fopen(filename, "rb"); + if(!f) return NULL; + if(fseek(f, 0, SEEK_END) != 0) goto error; + long size = ftell(f); + if(size < 0) goto error; + if(fseek(f, 0, SEEK_SET) != 0) goto error; + char *buf = malloc(size); + if(fread(buf, size, 1, f) != 1) goto error; + fclose(f); + if (len) *len = size; + return buf; + +error: + fclose(f); + return NULL; +} + +void upb_read_descriptorfile(upb_symtab *symtab, const char *fname, + upb_status *status) { + size_t len; + char *data = upb_readfile(fname, &len); + if (!data) { + upb_status_setf(status, UPB_ERROR, "Couldn't read file: %s", fname); + return; + } + upb_read_descriptor(symtab, data, len, status); + free(data); +} diff --git a/src/upb_glue.h b/src/upb_glue.h index 27611cd..0448c2f 100644 --- a/src/upb_glue.h +++ b/src/upb_glue.h @@ -27,6 +27,7 @@ #define UPB_GLUE_H #include +#include "upb.h" #ifdef __cplusplus extern "C" { @@ -36,20 +37,23 @@ extern "C" { // Clients should use the regular, typedef'd names (eg. upb_string). struct _upb_msg; struct _upb_msgdef; -struct _upb_status; -struct _upb_string; struct _upb_symtab; // Decodes the given string, which must be in protobuf binary format, to the // given upb_msg with msgdef "md", storing the status of the operation in "s". -void upb_strtomsg(struct _upb_string *str, void *msg, - struct _upb_msgdef *md, struct _upb_status *s); +void upb_strtomsg(const char *str, size_t len, void *msg, + struct _upb_msgdef *md, upb_status *s); -void upb_msgtotext(struct _upb_string *str, void *msg, - struct _upb_msgdef *md, bool single_line); +//void upb_msgtotext(struct _upb_string *str, void *msg, +// struct _upb_msgdef *md, bool single_line); -void upb_read_descriptor(struct _upb_symtab *symtab, struct _upb_string *str, - struct _upb_status *status); +void upb_read_descriptor(struct _upb_symtab *symtab, const char *str, size_t len, + upb_status *status); + +void upb_read_descriptorfile(struct _upb_symtab *symtab, const char *fname, + upb_status *status); + +char *upb_readfile(const char *filename, size_t *len); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/upb_handlers.c b/src/upb_handlers.c index c29281a..f513dfd 100644 --- a/src/upb_handlers.c +++ b/src/upb_handlers.c @@ -96,7 +96,6 @@ upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h) { } typedef struct { - upb_strtable_entry e; upb_mhandlers *mh; } upb_mtab_ent; @@ -105,8 +104,8 @@ static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, upb_msgdef *m, upb_onfieldreg *fieldreg_cb, void *closure, upb_strtable *mtab) { upb_mhandlers *mh = upb_handlers_newmhandlers(h); - upb_mtab_ent e = {{m->base.fqname, 0}, mh}; - upb_strtable_insert(mtab, &e.e); + upb_mtab_ent e = {mh}; + upb_strtable_insert(mtab, m->base.fqname, &e); if (msgreg_cb) msgreg_cb(closure, mh, m); upb_msg_iter i; for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { @@ -153,7 +152,7 @@ static upb_fhandlers toplevel_f = { #ifdef NDEBUG {{0}}, #else - {{0}, UPB_VALUETYPE_RAW}, + {{0}, -1}, #endif NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL}; @@ -198,23 +197,23 @@ void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) { assert(d->top == d->stack); if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status); // TODO: should we avoid this copy by passing client's status obj to cbs? - upb_copyerr(status, &d->status); + upb_status_copy(status, &d->status); } void indent(upb_dispatcher *d) { - for (int i = 0; i < (d->top - d->stack); i++) printf(" "); + for (int i = 0; i < (d->top - d->stack); i++) fprintf(stderr, " "); } void indentm1(upb_dispatcher *d) { - for (int i = 0; i < (d->top - d->stack - 1); i++) printf(" "); + for (int i = 0; i < (d->top - d->stack - 1); i++) fprintf(stderr, " "); } upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_fhandlers *f) { //indent(d); - //printf("START SEQ: %d\n", f->number); + //fprintf(stderr, "START SEQ: %d\n", f->number); if((d->top+1) >= d->limit) { - upb_seterr(&d->status, UPB_ERROR, "Nesting too deep."); + upb_status_setf(&d->status, UPB_ERROR, "Nesting too deep."); _upb_dispatcher_unwind(d, UPB_BREAK); return d->top; // Dummy. } @@ -235,7 +234,7 @@ upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) { //indentm1(d); - //printf("END SEQ\n"); + //fprintf(stderr, "END SEQ\n"); assert(d->top > d->stack); assert(d->top->is_sequence); upb_fhandlers *f = d->top->f; @@ -255,9 +254,9 @@ upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) { upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, upb_fhandlers *f) { //indent(d); - //printf("START SUBMSG: %d\n", f->number); + //fprintf(stderr, "START SUBMSG: %d\n", f->number); if((d->top+1) >= d->limit) { - upb_seterr(&d->status, UPB_ERROR, "Nesting too deep."); + upb_status_setf(&d->status, UPB_ERROR, "Nesting too deep."); _upb_dispatcher_unwind(d, UPB_BREAK); return d->top; // Dummy. } @@ -281,7 +280,7 @@ upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) { //indentm1(d); - //printf("END SUBMSG\n"); + //fprintf(stderr, "END SUBMSG\n"); assert(d->top > d->stack); assert(!d->top->is_sequence); upb_fhandlers *f = d->top->f; diff --git a/src/upb_handlers.h b/src/upb_handlers.h index caf0645..1ccc59f 100644 --- a/src/upb_handlers.h +++ b/src/upb_handlers.h @@ -17,6 +17,7 @@ #include #include "upb.h" #include "upb_def.h" +#include "upb_bytestream.h" #ifdef __cplusplus extern "C" { @@ -303,14 +304,12 @@ typedef struct { // Members to use as the data source requires. void *srcclosure; + uint64_t end_ofs; uint16_t msgindex; uint16_t fieldindex; - uint32_t end_offset; - // Does this frame represent a sequence or a submsg (f might be both). - // We only need a single bit here, but this will make each individual - // frame grow from 32 to 40 bytes on LP64, which is a bit excessive. - bool is_sequence; + bool is_sequence; // frame represents seq or submsg? (f might be both). + bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX (strings aren't pushed) } upb_dispatcher_frame; // Called when some of the input needs to be skipped. All frames from diff --git a/src/upb_msg.c b/src/upb_msg.c index b88df32..83fa6ff 100644 --- a/src/upb_msg.c +++ b/src/upb_msg.c @@ -7,6 +7,7 @@ * Data structure for storing a message of protobuf data. */ +#include "upb.h" #include "upb_msg.h" void upb_msg_clear(void *msg, upb_msgdef *md) { @@ -132,23 +133,23 @@ UPB_ACCESSORS(bool, bool) UPB_ACCESSORS(ptr, void*) #undef UPB_ACCESSORS -static void _upb_stdmsg_setstr(void *_dst, upb_value _src) { - // We do: - // - upb_string_recycle(), upb_string_substr() instead of - // - upb_string_unref(), upb_string_getref() - // because we can conveniently cache these upb_string objects in the - // upb_msg, whereas the upb_src who is sending us these strings may not - // have a good way of caching them. This saves the upb_src from allocating - // new upb_strings all the time to give us. - // - // If you were using this to copy one upb_msg to another this would - // allocate string objects whereas a upb_string_getref could have avoided - // those allocations completely; if this is an issue, we could make it an - // option of the upb_msgsink which behavior is desired. - upb_string **dst = _dst; - upb_string *src = upb_value_getstr(_src); - upb_string_recycle(dst); - upb_string_substr(*dst, src, 0, upb_string_len(src)); +static void _upb_stdmsg_setstr(void *_dst, upb_value src) { + upb_stdarray **dstp = _dst; + upb_stdarray *dst = *dstp; + if (!dst) { + dst = malloc(sizeof(*dst)); + dst->size = 0; + dst->ptr = NULL; + *dstp = dst; + } + dst->len = 0; + upb_strref *ref = upb_value_getstrref(src); + if (ref->len > dst->size) { + dst->size = ref->len; + dst->ptr = realloc(dst->ptr, dst->size); + } + dst->len = ref->len; + upb_bytesrc_read(ref->bytesrc, ref->stream_offset, ref->len, dst->ptr); } upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) { @@ -166,15 +167,11 @@ upb_flow_t upb_stdmsg_setstr_r(void *a, upb_value fval, upb_value val) { } upb_value upb_stdmsg_getstr(void *m, upb_value fval) { - upb_value val = upb_stdmsg_getptr(m, fval); - upb_value_setstr(&val, upb_value_getptr(val)); - return val; + return upb_stdmsg_getptr(m, fval); } upb_value upb_stdmsg_seqgetstr(void *i) { - upb_value val = upb_stdmsg_seqgetptr(i); - upb_value_setstr(&val, upb_value_getptr(val)); - return val; + return upb_stdmsg_seqgetptr(i); } void *upb_stdmsg_new(upb_msgdef *md) { @@ -188,11 +185,13 @@ void upb_stdseq_free(void *s, upb_fielddef *f) { upb_stdarray *a = s; if (upb_issubmsg(f) || upb_isstring(f)) { void **p = (void**)a->ptr; - for (int i = 0; i < a->size; i++) { + for (uint32_t i = 0; i < a->size; i++) { if (upb_issubmsg(f)) { upb_stdmsg_free(p[i], upb_downcast_msgdef(f->def)); } else { - upb_string_unref(p[i]); + upb_stdarray *str = p[i]; + free(str->ptr); + free(str); } } } @@ -213,7 +212,9 @@ void upb_stdmsg_free(void *m, upb_msgdef *md) { } else if (upb_issubmsg(f)) { upb_stdmsg_free(subp, upb_downcast_msgdef(f->def)); } else { - upb_string_unref(subp); + upb_stdarray *str = subp; + free(str->ptr); + free(str); } } free(m); diff --git a/src/upb_msg.h b/src/upb_msg.h index b93037b..af328e3 100644 --- a/src/upb_msg.h +++ b/src/upb_msg.h @@ -148,7 +148,7 @@ typedef struct { void upb_msgvisitor_init(upb_msgvisitor *v, upb_msgdef *md, upb_handlers *h); void upb_msgvisitor_uninit(upb_msgvisitor *v); -void upb_msgvisitor_reset(upb_msgvisitor *v, upb_msg *m); +void upb_msgvisitor_reset(upb_msgvisitor *v, void *m); void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status); @@ -183,8 +183,8 @@ upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val); // if necessary. typedef struct { char *ptr; - int32_t len; // Number of elements present. - int32_t size; // Number of elements allocated. + uint32_t len; // Number of elements present. + uint32_t size; // Number of elements allocated. } upb_stdarray; upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val); diff --git a/src/upb_stdio.c b/src/upb_stdio.c index c84d52a..20a3c15 100644 --- a/src/upb_stdio.c +++ b/src/upb_stdio.c @@ -9,96 +9,158 @@ #include #include -#include "upb_string.h" +#include // We can make this configurable if necessary. -#define BLOCK_SIZE 4096 +#define BUF_SIZE 32768 -struct upb_stdio { - upb_bytesrc bytesrc; - upb_bytesink bytesink; - FILE *file; -}; -void upb_stdio_reset(upb_stdio *stdio, FILE* file) { - stdio->file = file; +/* upb_bytesrc methods ********************************************************/ + +int upb_stdio_cmpbuf(const void *_key, const void *_elem) { + const uint64_t *ofs = _key; + const upb_stdio_buf *buf = _elem; + return (*ofs / BUF_SIZE) - (buf->ofs / BUF_SIZE); } +static upb_stdio_buf *upb_stdio_findbuf(upb_stdio *s, uint64_t ofs) { + // TODO: it is probably faster to linear search short lists, and to + // special-case the last one or two bufs. + return bsearch(&ofs, s->bufs, s->nbuf, sizeof(*s->bufs), &upb_stdio_cmpbuf); +} -/* upb_bytesrc methods ********************************************************/ +//static upb_strlen_t upb_stdio_read(void *src, uint32_t ofs, upb_buf *b, +// upb_status *status) { +// upb_stdio *stdio = (upb_stdio*)src; +// size_t read = fread(buf, 1, BLOCK_SIZE, stdio->file); +// if(read < (size_t)BLOCK_SIZE) { +// // Error or EOF. +// if(feof(stdio->file)) { +// upb_seterr(status, UPB_EOF, ""); +// } else if(ferror(stdio->file)) { +// upb_status_fromerrno(s); +// return 0; +// } +// } +// b->len = read; +// stdio->next_ofs += read; +// return stdio->next_ofs; +//} + +size_t upb_stdio_fetch(void *src, uint64_t ofs, upb_status *s) { + (void)src; + (void)ofs; + (void)s; + + return 0; +} -static upb_strlen_t upb_stdio_read(upb_bytesrc *src, void *buf, - upb_strlen_t count, upb_status *status) { - upb_stdio *stdio = (upb_stdio*)src; - assert(count > 0); - size_t read = fread(buf, 1, count, stdio->file); - if(read < (size_t)count) { - // Error or EOF. - if(feof(stdio->file)) { - upb_seterr(status, UPB_EOF, ""); - return read; - } else if(ferror(stdio->file)) { - upb_seterr(status, UPB_ERROR, "Error reading from stdio stream."); - return -1; - } +void upb_stdio_read(void *src, uint64_t src_ofs, size_t len, char *dst) { + upb_stdio_buf *buf = upb_stdio_findbuf(src, src_ofs); + src_ofs -= buf->ofs; + memcpy(dst, &buf->data[src_ofs], BUF_SIZE - src_ofs); + len -= (BUF_SIZE - src_ofs); + dst += (BUF_SIZE - src_ofs); + while (len > 0) { + ++buf; + size_t bytes = UPB_MIN(len, BUF_SIZE); + memcpy(dst, buf->data, bytes); + len -= bytes; + dst += bytes; } - return read; } -static bool upb_stdio_getstr(upb_bytesrc *src, upb_string *str, - upb_status *status) { - upb_strlen_t read = upb_stdio_read( - src, upb_string_getrwbuf(str, BLOCK_SIZE), BLOCK_SIZE, status); - if (read <= 0) return false; - upb_string_getrwbuf(str, read); - return true; +const char *upb_stdio_getptr(void *src, uint64_t ofs, size_t *len) { + upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs); + ofs -= buf->ofs; + *len = BUF_SIZE - ofs; + return &buf->data[ofs]; +} + +void upb_stdio_refregion(void *src, uint64_t ofs, size_t len) { + upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs); + len -= (BUF_SIZE - ofs); + ++buf->refcount; + while (len > 0) { + ++buf; + ++buf->refcount; + } +} + +void upb_stdio_unrefregion(void *src, uint64_t ofs, size_t len) { + (void)src; + (void)ofs; + (void)len; } /* upb_bytesink methods *******************************************************/ +#if 0 upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { - upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); + upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink)); upb_strlen_t len = upb_string_len(str); upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); if(written < len) { - upb_seterr(status, UPB_ERROR, "Error writing to stdio stream."); + upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream."); return -1; } return written; } +#endif -upb_strlen_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status, - const char *fmt, va_list args) { - upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); - upb_strlen_t written = vfprintf(stdio->file, fmt, args); +uint32_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status, + const char *fmt, va_list args) { + upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink)); + int written = vfprintf(stdio->file, fmt, args); if (written < 0) { - upb_seterr(status, UPB_ERROR, "Error writing to stdio stream."); + upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream."); return -1; } return written; } -upb_stdio *upb_stdio_new() { +void upb_stdio_init(upb_stdio *stdio) { static upb_bytesrc_vtbl bytesrc_vtbl = { + upb_stdio_fetch, upb_stdio_read, - upb_stdio_getstr, + upb_stdio_getptr, + upb_stdio_refregion, + upb_stdio_unrefregion, + NULL, + NULL }; + upb_bytesrc_init(&stdio->src, &bytesrc_vtbl); - static upb_bytesink_vtbl bytesink_vtbl = { - upb_stdio_putstr, - upb_stdio_vprintf - }; + //static upb_bytesink_vtbl bytesink_vtbl = { + // upb_stdio_putstr, + // upb_stdio_vprintf + //}; + //upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); +} - upb_stdio *stdio = malloc(sizeof(*stdio)); - upb_bytesrc_init(&stdio->bytesrc, &bytesrc_vtbl); - upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); - return stdio; +void upb_stdio_reset(upb_stdio* stdio, FILE *file) { + stdio->file = file; + stdio->should_close = false; +} + +void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode, + upb_status *s) { + FILE *f = fopen(filename, mode); + if (!f) { + upb_status_fromerrno(s); + return; + } + setvbuf(stdio->file, NULL, _IONBF, 0); // Disable buffering; we do our own. + upb_stdio_reset(stdio, f); + stdio->should_close = true; } -void upb_stdio_free(upb_stdio *stdio) { - free(stdio); +void upb_stdio_uninit(upb_stdio *stdio) { + // Can't report status; caller should flush() to ensure data is written. + if (stdio->should_close) fclose(stdio->file); + stdio->file = NULL; } -upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->bytesrc; } -upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->bytesink; } +upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->src; } +upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; } diff --git a/src/upb_stdio.h b/src/upb_stdio.h index a164821..858830c 100644 --- a/src/upb_stdio.h +++ b/src/upb_stdio.h @@ -5,7 +5,12 @@ * Author: Josh Haberman * * This file provides upb_bytesrc and upb_bytesink implementations for - * ANSI C stdio. + * ANSI C stdio, which is less efficient than posixfd, but more portable. + * + * Specifically, stdio functions acquire locks on every operation (unless you + * use the f{read,write,...}_unlocked variants, which are not standard) and + * performs redundant buffering (unless you disable it with setvbuf(), but we + * can only do this on newly-opened filehandles). */ #include @@ -18,21 +23,44 @@ extern "C" { #endif -struct upb_stdio; -typedef struct upb_stdio upb_stdio; +typedef struct { + uint64_t ofs; + uint32_t refcount; + char data[]; +} upb_stdio_buf; + +// We use a single object for both bytesrc and bytesink for simplicity. +// The object is still not thread-safe, and may only be used by one reader +// and one writer at a time. +typedef struct { + upb_bytesrc src; + upb_bytesink sink; + FILE *file; + bool should_close; + upb_stdio_buf **bufs; + uint32_t nbuf, szbuf; +} upb_stdio; + +void upb_stdio_init(upb_stdio *stdio); +// Caller should call upb_stdio_flush prior to calling this to ensure that +// all data is flushed, otherwise data can be silently dropped if an error +// occurs flushing the remaining buffers. +void upb_stdio_uninit(upb_stdio *stdio); + +// Resets the object to read/write to the given "file." The caller is +// responsible for closing the file, which must outlive this object. +void upb_stdio_reset(upb_stdio *stdio, FILE *file); -// Creation/deletion. -upb_stdio *upb_stdio_new(); -void upb_stdio_free(upb_stdio *stdio); +// As an alternative to upb_stdio_reset(), initializes the object by opening a +// file, and will handle closing it. This may result in more efficient I/O +// than the previous since we can call setvbuf() to disable buffering. +void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode, + upb_status *s); -// Reset/initialize the object for use. The src or sink will call -// fread()/fwrite()/etc. on the given FILE*. -void upb_stdio_reset(upb_stdio *stdio, FILE* file); +// Must be called to cleanup after the object, including closing the file if +// it was opened with upb_stdio_open() (which can fail, hence the status). +// -// Gets a bytesrc or bytesink for the given stdio. The returned pointer is -// invalidated by upb_stdio_reset above. It is perfectly valid to get both -// a bytesrc and a bytesink for the same stdio if the FILE* is open for reading -// and writing. upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio); upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio); diff --git a/src/upb_string.c b/src/upb_string.c deleted file mode 100644 index 122eec4..0000000 --- a/src/upb_string.c +++ /dev/null @@ -1,164 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Google Inc. See LICENSE for details. - * Author: Josh Haberman - */ - -#include "upb_string.h" - -#include -#ifdef __GLIBC__ -#include -#elif defined(__APPLE__) -#include -#endif - -static uint32_t upb_round_up_pow2(uint32_t v) { - // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 - v--; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v++; - return v; -} - -upb_string *upb_string_new() { - upb_string *str = malloc(sizeof(*str)); - str->ptr = NULL; - str->cached_mem = NULL; - str->len = 0; -#ifndef UPB_HAVE_MSIZE - str->size = 0; -#endif - str->src = NULL; - upb_atomic_init(&str->refcount, 1); - return str; -} - -uint32_t upb_string_size(upb_string *str) { -#ifdef __GLIBC__ - return malloc_usable_size(str->cached_mem); -#elif defined(__APPLE__) - return malloc_size(str->cached_mem); -#else - return str->size; -#endif -} - -void _upb_string_free(upb_string *str) { - free(str->cached_mem); - _upb_string_release(str); - free(str); -} - -char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) { - // assert(str->ptr == NULL); - upb_strlen_t size = upb_string_size(str); - if (size < len) { - size = upb_round_up_pow2(len); - str->cached_mem = realloc(str->cached_mem, size); -#ifndef UPB_HAVE_MSIZE - str->size = size; -#endif - } - str->len = len; - str->ptr = str->cached_mem; - return str->cached_mem; -} - -void upb_string_substr(upb_string *str, upb_string *target_str, - upb_strlen_t start, upb_strlen_t len) { - assert(str->ptr == NULL); - assert(start + len <= upb_string_len(target_str)); - if (target_str->src) { - start += (target_str->ptr - target_str->src->ptr); - target_str = target_str->src; - } - str->src = upb_string_getref(target_str); - str->ptr = upb_string_getrobuf(target_str) + start; - str->len = len; -} - -size_t upb_string_vprintf_at(upb_string *str, size_t offset, const char *format, - va_list args) { - // Try once without reallocating. We have to va_copy because we might have - // to call vsnprintf again. - uint32_t size = UPB_MAX(upb_string_size(str) - offset, 16); - char *buf = upb_string_getrwbuf(str, offset + size) + offset; - va_list args_copy; - va_copy(args_copy, args); - uint32_t true_size = vsnprintf(buf, size, format, args_copy); - va_end(args_copy); - - // Resize to be the correct size. - if (true_size >= size) { - // Need to print again, because some characters were truncated. vsnprintf - // has weird behavior (and contrary IMO to what the standard says): it will - // not write the entire string unless you give it space to store the NULL - // terminator also. So we can't give it space for the string itself and - // let NULL get truncated (after all, we don't care about it): we *must* - // give it space for NULL. - buf = upb_string_getrwbuf(str, offset + true_size + 1) + offset; - vsnprintf(buf, true_size + 1, format, args); - } - str->len = offset + true_size; - return true_size; -} - -upb_string *upb_string_asprintf(const char *format, ...) { - upb_string *str = upb_string_new(); - va_list args; - va_start(args, format); - upb_string_vprintf(str, format, args); - va_end(args); - return str; -} - -upb_string *upb_strdup(upb_string *s) { - upb_string *str = upb_string_new(); - upb_strcpy(str, s); - return str; -} - -void upb_strcat(upb_string *s, upb_string *append) { - uint32_t old_size = upb_string_len(s); - uint32_t append_size = upb_string_len(append); - uint32_t new_size = old_size + append_size; - char *buf = upb_string_getrwbuf(s, new_size); - memcpy(buf + old_size, upb_string_getrobuf(append), append_size); -} - -upb_string *upb_strreadfile(const char *filename) { - FILE *f = fopen(filename, "rb"); - if(!f) return NULL; - if(fseek(f, 0, SEEK_END) != 0) goto error; - long size = ftell(f); - if(size < 0) goto error; - if(fseek(f, 0, SEEK_SET) != 0) goto error; - upb_string *s = upb_string_new(); - char *buf = upb_string_getrwbuf(s, size); - if(fread(buf, size, 1, f) != 1) goto error; - fclose(f); - return s; - -error: - fclose(f); - return NULL; -} - -upb_string *upb_emptystring() { - static upb_string empty = UPB_STATIC_STRING(""); - return ∅ -} - -char *upb_string_newcstr(upb_string *str) { - upb_strlen_t len = upb_string_len(str); - char *ret = malloc(len+1); - memcpy(ret, upb_string_getrobuf(str), len); - ret[len] = '\0'; - return ret; -} diff --git a/src/upb_string.h b/src/upb_string.h deleted file mode 100644 index 1f92850..0000000 --- a/src/upb_string.h +++ /dev/null @@ -1,394 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Google Inc. See LICENSE for details. - * Author: Josh Haberman - * - * This file defines a simple string type which is length-delimited instead - * of NULL-terminated, and which has useful sharing semantics. - * - * The overriding goal of upb_string is to avoid memcpy(), malloc(), and free() - * wheverever possible, while keeping both CPU and memory overhead low. - * Throughout upb there are situations where one wants to reference all or part - * of another string without copying. upb_string provides APIs for doing this, - * and allows the referenced string to be kept alive for as long as anyone is - * referencing it. - * - * Characteristics of upb_string: - * - strings are reference-counted. - * - strings are immutable (can be mutated only when first created or recycled). - * - if a string has no other referents, it can be "recycled" into a new string - * without having to reallocate the upb_string. - * - strings can be substrings of other strings (owning a ref on the source - * string). - * - * Reference-counted strings have recently fallen out of favor because of the - * performance impacts of doing thread-safe reference counting with atomic - * operations. We side-step this issue by not performing atomic operations - * unless the string has been marked thread-safe. Time will tell whether this - * scheme is easy and convenient enough to be practical. - * - * Strings are expected to be 8-bit-clean, but "char*" is such an entrenched - * idiom that we go with it instead of making our pointers uint8_t*. - * - * WARNING: THE GETREF, UNREF, AND RECYCLE OPERATIONS ARE NOT THREAD_SAFE - * UNLESS THE STRING HAS BEEN MARKED SYNCHRONIZED! What this means is that if - * you are logically passing a reference to a upb_string to another thread - * (which implies that the other thread must eventually call unref of recycle), - * you have two options: - * - * - create a copy of the string that will be used in the other thread only. - * - call upb_string_get_synchronized_ref(), which will make getref, unref, and - * recycle thread-safe for this upb_string. - */ - -#ifndef UPB_STRING_H -#define UPB_STRING_H - -#include -#include -#include -#include "upb_atomic.h" -#include "upb.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// All members of this struct are private, and may only be read/written through -// the associated functions. -struct _upb_string { - // The string's refcount. - upb_atomic_t refcount; - - // The pointer to our currently active data. This may be memory we own - // or a pointer into memory we don't own. - const char *ptr; - - // If non-NULL, this is a block of memory we own. We keep this cached even - // if "ptr" is currently aliasing memory we don't own. - char *cached_mem; - - // The effective length of the string (the bytes at ptr). - int32_t len; -#ifndef UPB_HAVE_MSIZE - // How many bytes are allocated in cached_mem. - // - // Many platforms have a function that can tell you the size of a block - // that was previously malloc'd. In this case we can avoid storing the - // size explicitly. - uint32_t size; -#endif - - // Used if this is a slice of another string, NULL otherwise. We own a ref - // on src. - struct _upb_string *src; -}; - -// Internal-only initializer for upb_string instances. -#ifdef UPB_HAVE_MSIZE -#define _UPB_STRING_INIT(str, len, refcount) {{refcount}, (char*)str, NULL, len, NULL} -#else -#define _UPB_STRING_INIT(str, len, refcount) {{refcount}, (char*)str, NULL, len, 0, NULL} -#endif - -// Special pseudo-refcounts for static/stack-allocated strings, respectively. -#define _UPB_STRING_REFCOUNT_STATIC -1 -#define _UPB_STRING_REFCOUNT_STACK -2 - -// Returns a newly-created, empty, non-finalized string. When the string is no -// longer needed, it should be unref'd, never freed directly. -upb_string *upb_string_new(); - -// Internal-only; clients should call upb_string_unref(). -void _upb_string_free(upb_string *str); - -// Releases a ref on the given string, which may free the memory. "str" -// can be NULL, in which case this is a no-op. WARNING: NOT THREAD_SAFE -// UNLESS THE STRING IS SYNCHRONIZED. -INLINE void upb_string_unref(upb_string *str) { - if (str) { - } - if (str && upb_atomic_read(&str->refcount) > 0 && - upb_atomic_unref(&str->refcount)) { - _upb_string_free(str); - } -} - -static void _upb_string_release(upb_string *str) { - if(str->src) { - upb_string_unref(str->src); - str->src = NULL; - } -} - -upb_string *upb_strdup(upb_string *s); // Forward-declare. - -// Returns a string with the same contents as "str". The caller owns a ref on -// the returned string, which may or may not be the same object as "str. -// WARNING: NOT THREAD-SAFE UNLESS THE STRING IS SYNCHRONIZED! -INLINE upb_string *upb_string_getref(upb_string *str) { - int refcount = upb_atomic_read(&str->refcount); - if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str); - // We don't ref the special <0 refcount for static strings. - if (refcount > 0) { - upb_atomic_ref(&str->refcount); - } - return str; -} - -// Returns the length of the string. -INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } -INLINE bool upb_string_isempty(upb_string *str) { - return !str || upb_string_len(str) == 0; -} - -// Use to read the bytes of the string. The caller *must* call -// upb_string_endread() after the data has been read. The window between -// upb_string_getrobuf() and upb_string_endread() should be kept as short as -// possible, because any pending upb_string_detach() may be blocked until -// upb_string_endread is called(). No other functions may be called on the -// string during this window except upb_string_len(). -INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } -INLINE void upb_string_endread(upb_string *str) { (void)str; } - -// Convenience method for getting the end of the string. Calls -// upb_string_getrobuf() so inherits the caveats of calling that function. -INLINE const char *upb_string_getbufend(upb_string *str) { - return upb_string_getrobuf(str) + upb_string_len(str); -} - -// Attempts to recycle the string "str" so it may be reused and have different -// data written to it. The caller MUST own a reference on the given string -// prior to making this call (ie. the caller must have either created the -// string or obtained a reference with upb_string_getref()). -// -// After the function returns, "str" points to a writable string, which is -// either the original string if it had no other references or a newly created -// string if it did have other references. -// -// As a special case, passing a pointer to NULL will allocate a new string. -// This is convenient for the pattern: -// -// upb_string *str = NULL; -// while (x) { -// if (y) { -// upb_string_recycle(&str); -// upb_src_getstr(str); -// } -// } -INLINE void upb_string_recycle(upb_string **_str) { - upb_string *str = *_str; - int r; - if(str && ((r = upb_atomic_read(&str->refcount)) == 1 || - (r == _UPB_STRING_REFCOUNT_STACK))) { - str->ptr = NULL; - str->len = 0; - _upb_string_release(str); - } else { - //if (!str) { - // printf("!str\n"); - //} - //else if (upb_atomic_read(&str->refcount) != 1) { printf("refcount: %d\n", upb_atomic_read(&str->refcount)); } - //else { printf("Some other reason.\n"); } - upb_string_unref(str); - *_str = upb_string_new(); - } -} - - -// The options for setting the contents of a string. These may only be called -// when a string is first created or recycled; once other functions have been -// called on the string, these functions are not allowed until the string is -// recycled. - -// Gets a pointer suitable for writing to the string, which is guaranteed to -// have at least "len" bytes of data available. The size of the string will -// become "len". -char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); - -// Replaces the contents of str with the contents of the given printf. -size_t upb_string_vprintf_at(upb_string *str, size_t offset, const char *format, - va_list args); -INLINE size_t upb_string_vprintf(upb_string *str, const char *format, - va_list args) { - return upb_string_vprintf_at(str, 0, format, args); -} -INLINE size_t upb_string_printf(upb_string *str, const char *format, ...) { - va_list args; - va_start(args, format); - size_t written = upb_string_vprintf(str, format, args); - va_end(args); - return written; -} - -// Sets the contents of "str" to be the given substring of "target_str", to -// which the caller must own a ref. -void upb_string_substr(upb_string *str, upb_string *target_str, - upb_strlen_t start, upb_strlen_t len); - -// Sketch of an API for allowing upb_strings to reference external, unowned -// data. Waiting for a clear use case before actually implementing it. -// -// Makes the string "str" a reference to the given string data. The caller -// guarantees that the given string data will not change or be deleted until a -// matching call to upb_string_detach(), which may block until any concurrent -// readers have finished reading. upb_string_detach() preserves the contents -// of the string by copying the referenced data if there are any other -// referents. -// void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); -// void upb_string_detach(upb_string *str); - -// Allows using upb_strings in printf, ie: -// upb_strptr str = UPB_STRLIT("Hello, World!\n"); -// printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */ -#define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str) -#define UPB_STRFMT "%.*s" - -// Macros for constructing upb_string objects statically or on the stack. These -// can be used like: -// -// upb_string static_str = UPB_STATIC_STRING("Foo"); -// -// int main() { -// upb_string stack_str = UPB_STACK_STRING("Foo"); -// // Now: -// // upb_streql(&static_str, &stack_str) == true -// // upb_streql(&static_str, UPB_STRLIT("Foo")) == true -// } -// -// You can also use UPB_STACK_STRING or UPB_STATIC_STRING with character arrays, -// but you must not change the underlying data once you've passed the string on: -// -// void foo() { -// char data[] = "ABC123"; -// upb_string stack_str = UPB_STACK_STR(data); -// bar(&stack_str); -// data[0] = "B"; // NOT ALLOWED!! -// } -// -// TODO: should the stack business just be like attach/detach? The latter seems -// more flexible, though it does require a stack allocation. Maybe put this off -// until there is a clear use case. -#define UPB_STATIC_STRING(str) \ - _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STATIC) -#define UPB_STATIC_STRING_ARRAY(str) \ - _UPB_STRING_INIT(str, sizeof(str), _UPB_STRING_REFCOUNT_STATIC) -#define UPB_STATIC_STRING_LEN(str, len) \ - _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STATIC) -#define UPB_STACK_STRING(str) \ - _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STACK) -#define UPB_STACK_STRING_LEN(str, len) \ - _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STACK) - -// A convenient way of specifying upb_strings as literals, like: -// -// upb_streql(UPB_STRLIT("expected"), other_str); -// -// However, this requires either C99 compound initializers or C++. -// Must ONLY be called with a string literal as its argument! -//#ifdef __cplusplus -//namespace upb { -//class String : public upb_string { -// // This constructor must ONLY be called with a string literal. -// String(const char *str) : upb_string(UPB_STATIC_STRING(str)) {} -//}; -//} -//#define UPB_STRLIT(str) upb::String(str) -//#endif -#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str) - -// Returns a singleton empty string. -upb_string *upb_emptystring(); - - -/* upb_string library functions ***********************************************/ - -// Named like their counterparts, these are all safe against buffer -// overflow. For the most part these only use the public upb_string interface. - -// More efficient than upb_strcmp if all you need is to test equality. -INLINE bool upb_streql(upb_string *s1, upb_string *s2) { - upb_strlen_t len = upb_string_len(s1); - if(len != upb_string_len(s2)) { - return false; - } else { - bool ret = - memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0; - upb_string_endread(s1); - upb_string_endread(s2); - return ret; - } -} - -// Like strcmp(). -int upb_strcmp(upb_string *s1, upb_string *s2); - -// Compare a upb_string with memory or a NULL-terminated C string. -INLINE bool upb_streqllen(upb_string *str, const void *buf, upb_strlen_t len) { - return len == upb_string_len(str) && - memcmp(upb_string_getrobuf(str), buf, len) == 0; -} - -INLINE bool upb_streqlc(upb_string *str, const void *buf) { - // Could be made one-pass. - return upb_streqllen(str, buf, strlen((const char*)buf)); -} - -// Like upb_strcpy, but copies from a buffer and length. -INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) { - memcpy(upb_string_getrwbuf(dest, len), src, len); -} - -// Replaces the contents of "dest" with the contents of "src". -INLINE void upb_strcpy(upb_string *dest, upb_string *src) { - upb_strcpylen(dest, upb_string_getrobuf(src), upb_string_len(src)); - upb_string_endread(src); -} - -// Like upb_strcpy, but copies from a NULL-terminated string. -INLINE void upb_strcpyc(upb_string *dest, const void *src) { - // This does two passes over src, but that is necessary unless we want to - // repeatedly re-allocate dst, which seems worse. - upb_strcpylen(dest, src, strlen((const char*)src)); -} - -// Returns a new string whose contents are a copy of s. -upb_string *upb_strdup(upb_string *s); - -// Like upb_strdup(), but duplicates a given buffer and length. -INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) { - upb_string *s = upb_string_new(); - upb_strcpylen(s, src, len); - return s; -} - -// Like upb_strdup(), but duplicates a C NULL-terminated string. -INLINE upb_string *upb_strdupc(const char *src) { - return upb_strduplen(src, strlen(src)); -} - -// Returns a newly-allocated NULL-terminated copy of str. -char *upb_string_newcstr(upb_string *str); - -// Appends 'append' to 's' in-place, resizing s if necessary. -void upb_strcat(upb_string *s, upb_string *append); - -// Returns a new string that is a substring of the given string. -INLINE upb_string *upb_strslice(upb_string *s, int offset, int len) { - upb_string *str = upb_string_new(); - upb_string_substr(str, s, offset, len); - return str; -} - -// Reads an entire file into a newly-allocated string. -upb_string *upb_strreadfile(const char *filename); - -// Returns a new string with the contents of the given printf. -upb_string *upb_string_asprintf(const char *format, ...); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_strstream.c b/src/upb_strstream.c index 284a4d7..9e17d75 100644 --- a/src/upb_strstream.c +++ b/src/upb_strstream.c @@ -8,61 +8,45 @@ #include "upb_strstream.h" #include -#include "upb_string.h" /* upb_stringsrc **************************************************************/ -static upb_strlen_t upb_stringsrc_read(upb_bytesrc *_src, void *buf, - upb_strlen_t count, upb_status *status) { - upb_stringsrc *src = (upb_stringsrc*)_src; - if (src->offset == upb_string_len(src->str)) { - status->code = UPB_EOF; - return -1; - } else { - upb_strlen_t to_read = UPB_MIN(count, upb_string_len(src->str) - src->offset); - memcpy(buf, upb_string_getrobuf(src->str) + src->offset, to_read); - src->offset += to_read; - return to_read; - } +size_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) { + upb_stringsrc *src = _src; + size_t bytes = src->len - ofs; + if (bytes == 0) s->code = UPB_EOF; + return bytes; } -static bool upb_stringsrc_getstr(upb_bytesrc *_src, upb_string *str, - upb_status *status) { - upb_stringsrc *src = (upb_stringsrc*)_src; - if (src->offset == upb_string_len(src->str)) { - status->code = UPB_EOF; - return false; - } else { - upb_strlen_t len = upb_string_len(src->str) - src->offset; - upb_string_substr(str, src->str, src->offset, len); - src->offset += len; - assert(src->offset == upb_string_len(src->str)); - return true; - } +void upb_stringsrc_read(void *_src, uint64_t src_ofs, size_t len, char *dst) { + upb_stringsrc *src = _src; + memcpy(dst, src->str + src_ofs, len); +} + +const char *upb_stringsrc_getptr(void *_src, uint64_t ofs, size_t *len) { + upb_stringsrc *src = _src; + *len = src->len - ofs; + return src->str + ofs; } void upb_stringsrc_init(upb_stringsrc *s) { static upb_bytesrc_vtbl vtbl = { - upb_stringsrc_read, - upb_stringsrc_getstr, + &upb_stringsrc_fetch, + &upb_stringsrc_read, + &upb_stringsrc_getptr, + NULL, NULL, NULL, NULL }; upb_bytesrc_init(&s->bytesrc, &vtbl); s->str = NULL; } -void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str) { - if (str != s->str) { - upb_string_unref(s->str); - s->str = upb_string_getref(str); - } - s->offset = 0; -} - -void upb_stringsrc_uninit(upb_stringsrc *s) { - upb_string_unref(s->str); +void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len) { + s->str = str; + s->len = len; } +void upb_stringsrc_uninit(upb_stringsrc *s) { (void)s; } upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) { return &s->bytesrc; @@ -72,44 +56,49 @@ upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) { /* upb_stringsink *************************************************************/ void upb_stringsink_uninit(upb_stringsink *s) { - upb_string_unref(s->str); + free(s->str); } // Resets the stringsink to a state where it will append to the given string. // The string must be newly created or recycled. The stringsink will take a // reference on the string, so the caller need not ensure that it outlives the // stringsink. A stringsink can be reset multiple times. -void upb_stringsink_reset(upb_stringsink *s, upb_string *str) { - if (str != s->str) { - upb_string_unref(s->str); - s->str = upb_string_getref(str); - } - // Resize to 0. - upb_string_getrwbuf(s->str, 0); +void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size) { + free(s->str); + s->str = str; + s->len = 0; + s->size = size; } upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s) { return &s->bytesink; } -static upb_strlen_t upb_stringsink_vprintf(upb_bytesink *_sink, upb_status *s, - const char *fmt, va_list args) { - (void)s; // No errors can occur. - upb_stringsink *sink = (upb_stringsink*)_sink; - return upb_string_vprintf_at(sink->str, upb_string_len(sink->str), fmt, args); +static int32_t upb_stringsink_vprintf(void *_s, upb_status *status, + const char *fmt, va_list args) { + (void)status; // TODO: report realloc() errors. + upb_stringsink *s = _s; + int ret = upb_vrprintf(&s->str, &s->size, s->len, fmt, args); + if (ret >= 0) s->len += ret; + return ret; } -static upb_strlen_t upb_stringsink_putstr(upb_bytesink *_sink, upb_string *str, - upb_status *s) { - (void)s; // No errors can occur. - upb_stringsink *sink = (upb_stringsink*)_sink; - upb_strcat(sink->str, str); - return upb_string_len(str); +bool upb_stringsink_write(void *_s, const char *buf, size_t len, + upb_status *status) { + (void)status; // TODO: report realloc() errors. + upb_stringsink *s = _s; + if (s->len + len > s->size) { + while(s->len + len > s->size) s->size *= 2; + s->str = realloc(s->str, s->size); + } + memcpy(s->str + s->len, buf, len); + s->len += len; + return true; } void upb_stringsink_init(upb_stringsink *s) { static upb_bytesink_vtbl vtbl = { - upb_stringsink_putstr, + upb_stringsink_write, upb_stringsink_vprintf }; upb_bytesink_init(&s->bytesink, &vtbl); diff --git a/src/upb_strstream.h b/src/upb_strstream.h index e092b55..e57406e 100644 --- a/src/upb_strstream.h +++ b/src/upb_strstream.h @@ -21,8 +21,8 @@ extern "C" { struct _upb_stringsrc { upb_bytesrc bytesrc; - upb_string *str; - upb_strlen_t offset; + const char *str; + size_t len; }; typedef struct _upb_stringsrc upb_stringsrc; @@ -33,9 +33,9 @@ void upb_stringsrc_uninit(upb_stringsrc *s); // Resets the stringsrc to a state where it will vend the given string. The // stringsrc will take a reference on the string, so the caller need not ensure // that it outlives the stringsrc. A stringsrc can be reset multiple times. -void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); +void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len); -// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. +// Returns the upb_bytesrc* for this stringsrc. upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s); @@ -43,7 +43,8 @@ upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s); struct _upb_stringsink { upb_bytesink bytesink; - upb_string *str; + char *str; + size_t len, size; }; typedef struct _upb_stringsink upb_stringsink; @@ -51,11 +52,14 @@ typedef struct _upb_stringsink upb_stringsink; void upb_stringsink_init(upb_stringsink *s); void upb_stringsink_uninit(upb_stringsink *s); -// Resets the stringsink to a state where it will append to the given string. -// The string must be newly created or recycled. The stringsink will take a -// reference on the string, so the caller need not ensure that it outlives the -// stringsink. A stringsink can be reset multiple times. -void upb_stringsink_reset(upb_stringsink *s, upb_string *str); +// Resets the sink's string to "str", which the sink takes ownership of. +// "str" may be NULL, which will make the sink allocate a new string. +void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size); + +// Releases ownership of the returned string (which is "len" bytes long) and +// resets the internal string to be empty again (as if reset were called with +// NULL). +const char *upb_stringsink_release(upb_stringsink *s, size_t *len); // Returns the upb_bytesink* for this stringsrc. Invalidated by reset above. upb_bytesink *upb_stringsink_bytesink(); diff --git a/src/upb_table.c b/src/upb_table.c index a754097..fc9e9de 100644 --- a/src/upb_table.c +++ b/src/upb_table.c @@ -97,7 +97,7 @@ static uint32_t empty_intbucket(upb_inttable *table) // The insert routines have a lot more code duplication between int/string // variants than I would like, but there's just a bit too much that varies to // parameterize them. -static void intinsert(upb_inttable *t, upb_inttable_key_t key, void *val) { +static void intinsert(upb_inttable *t, uint32_t key, const void *val) { assert(upb_inttable_lookup(t, key) == NULL); upb_inttable_value *table_val; if (_upb_inttable_isarrkey(t, key)) { @@ -160,7 +160,7 @@ static void upb_inttable_insertall(upb_inttable *dst, upb_inttable *src) { } } -void upb_inttable_insert(upb_inttable *t, upb_inttable_key_t key, void *val) { +void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val) { if((double)(t->t.count + 1) / upb_inttable_hashtablesize(t) > MAX_LOAD) { //printf("RESIZE!\n"); // Need to resize. Allocate new table with double the size of however many @@ -181,7 +181,7 @@ void upb_inttable_insert(upb_inttable *t, upb_inttable_key_t key, void *val) { void upb_inttable_compact(upb_inttable *t) { // Find the largest array part we can that satisfies the MIN_DENSITY // definition. For now we just count down powers of two. - upb_inttable_key_t largest_key = 0; + uint32_t largest_key = 0; for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i); i = upb_inttable_next(t, i)) { largest_key = UPB_MAX(largest_key, upb_inttable_iter_key(i)); @@ -260,6 +260,8 @@ upb_inttable_iter upb_inttable_next(upb_inttable *t, upb_inttable_iter iter) { /* upb_strtable ***************************************************************/ static upb_strtable_entry *strent(upb_strtable *t, int32_t i) { + //fprintf(stderr, "i: %d, table_size: %d\n", i, upb_table_size(&t->t)); + assert(i <= (int32_t)upb_table_size(&t->t)); return UPB_INDEX(t->t.entries, i, t->t.entry_size); } @@ -267,121 +269,134 @@ static uint32_t upb_strtable_size(upb_strtable *t) { return upb_table_size(&t->t); } -void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t entsize) { +void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t valuesize) { + t->t.value_size = valuesize; + size_t entsize = upb_align_up(sizeof(upb_strtable_header) + valuesize, 8); upb_table_init(&t->t, size, entsize); for (uint32_t i = 0; i < upb_table_size(&t->t); i++) { upb_strtable_entry *e = strent(t, i); - e->key = NULL; - e->next = UPB_END_OF_CHAIN; + e->hdr.key = NULL; + e->hdr.next = UPB_END_OF_CHAIN; } } void upb_strtable_free(upb_strtable *t) { - // Free refs from the strtable. - upb_strtable_entry *e = upb_strtable_begin(t); - for(; e; e = upb_strtable_next(t, e)) { - upb_string_unref(e->key); - } + // Free keys from the strtable. + upb_strtable_iter i; + for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) + free((char*)upb_strtable_iter_key(&i)); upb_table_free(&t->t); } -static uint32_t strtable_bucket(upb_strtable *t, upb_string *key) -{ - uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_string_len(key), 0); +static uint32_t strtable_bucket(upb_strtable *t, const char *key) { + uint32_t hash = MurmurHash2(key, strlen(key), 0); return (hash & t->t.mask); } -void *upb_strtable_lookup(upb_strtable *t, upb_string *key) -{ +void *upb_strtable_lookup(upb_strtable *t, const char *key) { uint32_t bucket = strtable_bucket(t, key); upb_strtable_entry *e; do { e = strent(t, bucket); - if(e->key && upb_streql(e->key, key)) return e; - } while((bucket = e->next) != UPB_END_OF_CHAIN); + if(e->hdr.key && strcmp(e->hdr.key, key) == 0) return &e->val; + } while((bucket = e->hdr.next) != UPB_END_OF_CHAIN); return NULL; } -static uint32_t empty_strbucket(upb_strtable *table) -{ +void *upb_strtable_lookupl(upb_strtable *t, const char *key, size_t len) { + // TODO: improve. + char key2[len+1]; + memcpy(key2, key, len); + key2[len] = '\0'; + return upb_strtable_lookup(t, key2); +} + +static uint32_t empty_strbucket(upb_strtable *table) { // TODO: does it matter that this is biased towards the front of the table? for(uint32_t i = 0; i < upb_strtable_size(table); i++) { upb_strtable_entry *e = strent(table, i); - if(!e->key) return i; + if(!e->hdr.key) return i; } assert(false); return 0; } -static void strinsert(upb_strtable *t, upb_strtable_entry *e) -{ - assert(upb_strtable_lookup(t, e->key) == NULL); - e->key = upb_string_getref(e->key); +static void strinsert(upb_strtable *t, const char *key, const void *val) { + assert(upb_strtable_lookup(t, key) == NULL); t->t.count++; - uint32_t bucket = strtable_bucket(t, e->key); + uint32_t bucket = strtable_bucket(t, key); upb_strtable_entry *table_e = strent(t, bucket); - if(table_e->key) { /* Collision. */ - if(bucket == strtable_bucket(t, table_e->key)) { + if(table_e->hdr.key) { /* Collision. */ + if(bucket == strtable_bucket(t, table_e->hdr.key)) { /* Existing element is in its main posisiton. Find an empty slot to * place our new element and append it to this key's chain. */ uint32_t empty_bucket = empty_strbucket(t); - while (table_e->next != UPB_END_OF_CHAIN) - table_e = strent(t, table_e->next); - table_e->next = empty_bucket; + while (table_e->hdr.next != UPB_END_OF_CHAIN) + table_e = strent(t, table_e->hdr.next); + table_e->hdr.next = empty_bucket; table_e = strent(t, empty_bucket); } else { /* Existing element is not in its main position. Move it to an empty * slot and put our element in its main position. */ uint32_t empty_bucket = empty_strbucket(t); - uint32_t evictee_bucket = strtable_bucket(t, table_e->key); + uint32_t evictee_bucket = strtable_bucket(t, table_e->hdr.key); memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ upb_strtable_entry *evictee_e = strent(t, evictee_bucket); while(1) { - assert(evictee_e->key); - assert(evictee_e->next != UPB_END_OF_CHAIN); - if(evictee_e->next == bucket) { - evictee_e->next = empty_bucket; + assert(evictee_e->hdr.key); + assert(evictee_e->hdr.next != UPB_END_OF_CHAIN); + if(evictee_e->hdr.next == bucket) { + evictee_e->hdr.next = empty_bucket; break; } - evictee_e = strent(t, evictee_e->next); + evictee_e = strent(t, evictee_e->hdr.next); } /* table_e remains set to our mainpos. */ } } - memcpy(table_e, e, t->t.entry_size); - table_e->next = UPB_END_OF_CHAIN; - //printf("Looking up, string=" UPB_STRFMT "...\n", UPB_STRARG(e->key)); - assert(upb_strtable_lookup(t, e->key) == table_e); + //fprintf(stderr, "val: %p\n", val); + //fprintf(stderr, "val size: %d\n", t->t.value_size); + memcpy(&table_e->val, val, t->t.value_size); + table_e->hdr.key = strdup(key); + table_e->hdr.next = UPB_END_OF_CHAIN; + //fprintf(stderr, "Looking up, string=%s...\n", key); + assert(upb_strtable_lookup(t, key) == &table_e->val); //printf("Yay!\n"); } -void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e) -{ +void upb_strtable_insert(upb_strtable *t, const char *key, const void *val) { if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) { // Need to resize. New table of double the size, add old elements to it. //printf("RESIZE!!\n"); upb_strtable new_table; - upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.entry_size); - upb_strtable_entry *old_e; - for(old_e = upb_strtable_begin(t); old_e; old_e = upb_strtable_next(t, old_e)) - strinsert(&new_table, old_e); + upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.value_size); + upb_strtable_iter i; + upb_strtable_begin(&i, t); + for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { + strinsert(&new_table, + upb_strtable_iter_key(&i), + upb_strtable_iter_value(&i)); + } upb_strtable_free(t); *t = new_table; } - strinsert(t, e); + strinsert(t, key, val); } -void *upb_strtable_begin(upb_strtable *t) { - return upb_strtable_next(t, strent(t, -1)); +void upb_strtable_begin(upb_strtable_iter *i, upb_strtable *t) { + i->e = strent(t, -1); + i->t = t; + upb_strtable_next(i); } -void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur) { - upb_strtable_entry *end = strent(t, upb_strtable_size(t)); +void upb_strtable_next(upb_strtable_iter *i) { + upb_strtable_entry *end = strent(i->t, upb_strtable_size(i->t)); + upb_strtable_entry *cur = i->e; do { - cur = (void*)((char*)cur + t->t.entry_size); - if(cur == end) return NULL; - } while(cur->key == NULL); - return cur; + cur = (void*)((char*)cur + i->t->t.entry_size); + if(cur == end) { i->e = NULL; return; } + } while(cur->hdr.key == NULL); + i->e = cur; } #ifdef UPB_UNALIGNED_READS_OK diff --git a/src/upb_table.h b/src/upb_table.h index 631709c..376465b 100644 --- a/src/upb_table.h +++ b/src/upb_table.h @@ -18,14 +18,11 @@ #include #include "upb.h" -#include "upb_string.h" #ifdef __cplusplus extern "C" { #endif -typedef uint32_t upb_inttable_key_t; - #define UPB_END_OF_CHAIN (uint32_t)-1 typedef struct { @@ -34,7 +31,7 @@ typedef struct { } upb_inttable_value; typedef struct { - upb_inttable_key_t key; + uint32_t key; uint32_t next; // Internal chaining. } upb_inttable_header; @@ -48,8 +45,13 @@ typedef struct { // performance by letting us compare hashes before comparing lengths or the // strings themselves. typedef struct { - upb_string *key; // We own a ref. - uint32_t next; // Internal chaining. + char *key; // We own, nullz. TODO: store explicit len? + uint32_t next; // Internal chaining. +} upb_strtable_header; + +typedef struct { + upb_strtable_header hdr; + uint32_t val; // Val is at least 32 bits. } upb_strtable_entry; typedef struct { @@ -81,7 +83,7 @@ typedef struct { // when looked up! void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t value_size); void upb_inttable_free(upb_inttable *table); -void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t entry_size); // TODO: update +void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t value_size); void upb_strtable_free(upb_strtable *table); // Number of values in the hash table. @@ -97,11 +99,13 @@ INLINE uint32_t upb_strtable_count(upb_strtable *t) { // not already exist in the hash table. The data will be copied from val into // the hashtable (the amount of data copied comes from value_size when the // table was constructed). Therefore the data at val may be freed once the -// call returns. For string tables, the table takes a ref on str. +// call returns. For string tables, the table takes ownership of the string. // // WARNING: the lowest bit of val is reserved and will be overwritten! -void upb_inttable_insert(upb_inttable *t, upb_inttable_key_t key, void *val); -void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *ent); // TODO: update +void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val); +// TODO: may want to allow for more complex keys with custom hash/comparison +// functions. +void upb_strtable_insert(upb_strtable *t, const char *key, const void *val); void upb_inttable_compact(upb_inttable *t); INLINE void upb_strtable_clear(upb_strtable *t) { // TODO: improve. @@ -110,14 +114,14 @@ INLINE void upb_strtable_clear(upb_strtable *t) { upb_strtable_init(t, 8, entry_size); } -INLINE uint32_t _upb_inttable_bucket(upb_inttable *t, upb_inttable_key_t k) { +INLINE uint32_t _upb_inttable_bucket(upb_inttable *t, uint32_t k) { uint32_t bucket = k & t->t.mask; // Identity hash for ints. assert(bucket != UPB_END_OF_CHAIN); return bucket; } // Returns true if this key belongs in the array part of the table. -INLINE bool _upb_inttable_isarrkey(upb_inttable *t, upb_inttable_key_t k) { +INLINE bool _upb_inttable_isarrkey(upb_inttable *t, uint32_t k) { return (k < t->array_size); } @@ -162,21 +166,44 @@ INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) { return _upb_inttable_fastlookup(t, key, t->t.entry_size, t->t.value_size); } -void *upb_strtable_lookup(upb_strtable *t, upb_string *key); +void *upb_strtable_lookupl(upb_strtable *t, const char *key, size_t len); +void *upb_strtable_lookup(upb_strtable *t, const char *key); + + +/* upb_strtable_iter **********************************************************/ + +// Strtable iteration. Order is undefined. Insertions invalidate iterators. +// upb_strtable_iter i; +// for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) { +// const char *key = upb_strtable_iter_key(&i); +// const myval *val = upb_strtable_iter_value(&i); +// // ... +// } +typedef struct { + upb_strtable *t; + upb_strtable_entry *e; +} upb_strtable_iter; + +void upb_strtable_begin(upb_strtable_iter *i, upb_strtable *t); +void upb_strtable_next(upb_strtable_iter *i); +INLINE bool upb_strtable_done(upb_strtable_iter *i) { return i->e == NULL; } +INLINE const char *upb_strtable_iter_key(upb_strtable_iter *i) { + return i->e->hdr.key; +} +INLINE const void *upb_strtable_iter_value(upb_strtable_iter *i) { + return &i->e->val; +} + -// Provides iteration over the table. The order in which the entries are -// returned is undefined. Insertions invalidate iterators. -void *upb_strtable_begin(upb_strtable *t); -void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur); +/* upb_inttable_iter **********************************************************/ -// Inttable iteration (should update strtable iteration to use this scheme too). -// The order is undefined. +// Inttable iteration. Order is undefined. Insertions invalidate iterators. // for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i); // i = upb_inttable_next(t, i)) { // // ... // } typedef struct { - upb_inttable_key_t key; + uint32_t key; upb_inttable_value *value; bool array_part; } upb_inttable_iter; @@ -184,7 +211,7 @@ typedef struct { upb_inttable_iter upb_inttable_begin(upb_inttable *t); upb_inttable_iter upb_inttable_next(upb_inttable *t, upb_inttable_iter iter); INLINE bool upb_inttable_done(upb_inttable_iter iter) { return iter.value == NULL; } -INLINE upb_inttable_key_t upb_inttable_iter_key(upb_inttable_iter iter) { +INLINE uint32_t upb_inttable_iter_key(upb_inttable_iter iter) { return iter.key; } INLINE void *upb_inttable_iter_value(upb_inttable_iter iter) { diff --git a/src/upb_textprinter.c b/src/upb_textprinter.c index ec76d56..14cce9b 100644 --- a/src/upb_textprinter.c +++ b/src/upb_textprinter.c @@ -21,12 +21,15 @@ struct _upb_textprinter { #define CHECK(x) if ((x) < 0) goto err; -static int upb_textprinter_putescaped(upb_textprinter *p, upb_string *str, +static int upb_textprinter_putescaped(upb_textprinter *p, upb_strref *strref, bool preserve_utf8) { // Based on CEscapeInternal() from Google's protobuf release. + // TODO; we could read directly fraom a bytesrc's buffer instead. // TODO; we could write directly into a bytesink's buffer instead. char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf); - const char *src = upb_string_getrobuf(str), *end = src + upb_string_len(str); + char buf[strref->len], *src = buf; + char *end = src + strref->len; + upb_strref_read(strref, src); // I think hex is prettier and more useful, but proto2 uses octal; should // investigate whether it can parse hex also. @@ -35,8 +38,7 @@ static int upb_textprinter_putescaped(upb_textprinter *p, upb_string *str, for (; src < end; src++) { if (dstend - dst < 4) { - upb_string str = UPB_STACK_STRING_LEN(dstbuf, dst - dstbuf); - CHECK(upb_bytesink_putstr(p->bytesink, &str, &p->status)); + CHECK(upb_bytesink_write(p->bytesink, dstbuf, dst - dstbuf, &p->status)); dst = dstbuf; } @@ -64,8 +66,7 @@ static int upb_textprinter_putescaped(upb_textprinter *p, upb_string *str, last_hex_escape = is_hex_escape; } // Flush remaining data. - upb_string outstr = UPB_STACK_STRING_LEN(dstbuf, dst - dstbuf); - CHECK(upb_bytesink_putstr(p->bytesink, &outstr, &p->status)); + CHECK(upb_bytesink_write(p->bytesink, dst, dst - dstbuf, &p->status)); return 0; err: return -1; @@ -74,7 +75,7 @@ err: static int upb_textprinter_indent(upb_textprinter *p) { if(!p->single_line) for(int i = 0; i < p->indent_depth; i++) - CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); + CHECK(upb_bytesink_writestr(p->bytesink, " ", &p->status)); return 0; err: return -1; @@ -82,9 +83,9 @@ err: static int upb_textprinter_endfield(upb_textprinter *p) { if(p->single_line) { - CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); + CHECK(upb_bytesink_writestr(p->bytesink, " ", &p->status)); } else { - CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status)); + CHECK(upb_bytesink_writestr(p->bytesink, "\n", &p->status)); } return 0; err: @@ -96,7 +97,7 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_value fval, upb_textprinter *p = _p; upb_fielddef *f = upb_value_getfielddef(fval); upb_textprinter_indent(p); - CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name))); + CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%s: ", f->name)); #define CASE(fmtstr, member) \ CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break; switch(f->type) { @@ -118,12 +119,11 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_value fval, CASE("%" PRIu32, uint32); case UPB_TYPE(ENUM): { upb_enumdef *enum_def = upb_downcast_enumdef(f->def); - upb_string *enum_label = - upb_enumdef_iton(enum_def, upb_value_getint32(val)); - if (enum_label) { + const char *label = upb_enumdef_iton(enum_def, upb_value_getint32(val)); + if (label) { // We found a corresponding string for this enum. Otherwise we fall // through to the int32 code path. - CHECK(upb_bytesink_putstr(p->bytesink, enum_label, &p->status)); + CHECK(upb_bytesink_writestr(p->bytesink, label, &p->status)); break; } } @@ -134,12 +134,13 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_value fval, case UPB_TYPE(BOOL): CASE("%hhu", bool); case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): - CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); - CHECK(upb_textprinter_putescaped(p, upb_value_getstr(val), + case UPB_TYPE(BYTES): { + CHECK(upb_bytesink_writestr(p->bytesink, "\"", &p->status)); + CHECK(upb_textprinter_putescaped(p, upb_value_getstrref(val), f->type == UPB_TYPE(STRING))); - CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); + CHECK(upb_bytesink_writestr(p->bytesink, "\"", &p->status)); break; + } } upb_textprinter_endfield(p); return UPB_CONTINUE; @@ -151,11 +152,10 @@ static upb_sflow_t upb_textprinter_startsubmsg(void *_p, upb_value fval) { upb_textprinter *p = _p; upb_fielddef *f = upb_value_getfielddef(fval); upb_textprinter_indent(p); - bool ret = upb_bytesink_printf(p->bytesink, &p->status, - UPB_STRFMT " {", UPB_STRARG(f->name)); + bool ret = upb_bytesink_printf(p->bytesink, &p->status, "%s {", f->name); if (!ret) return UPB_SBREAK; if (!p->single_line) - upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status); + upb_bytesink_writestr(p->bytesink, "\n", &p->status); p->indent_depth++; return UPB_CONTINUE_WITH(_p); } @@ -165,7 +165,7 @@ static upb_flow_t upb_textprinter_endsubmsg(void *_p, upb_value fval) { upb_textprinter *p = _p; p->indent_depth--; upb_textprinter_indent(p); - upb_bytesink_putstr(p->bytesink, UPB_STRLIT("}"), &p->status); + upb_bytesink_writestr(p->bytesink, "}", &p->status); upb_textprinter_endfield(p); return UPB_CONTINUE; } diff --git a/src/upb_varint.h b/src/upb_varint.h index fb44cd9..87fca2b 100644 --- a/src/upb_varint.h +++ b/src/upb_varint.h @@ -83,16 +83,13 @@ upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r); // Template for a function that checks the first two bytes with branching // and dispatches 2-10 bytes with a separate function. -#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \ -INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *p) { \ - uint64_t b = 0; \ - upb_decoderet r = {p, 0}; \ - memcpy(&b, r.p, 2); \ - if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; } \ - r.val = (b & 0x7f) | ((b & 0x7f00) >> 1); \ - r.p = p + 2; \ - if ((b & 0x8000) == 0) return r; \ - return decode_max8_function(r); \ +#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \ +INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \ + uint8_t *p = (uint8_t*)_p; \ + if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7f}; return r; } \ + upb_decoderet r = {_p + 2, (*p & 0x7f) | ((*(p + 1) & 0x7f) << 7)}; \ + if ((*(p + 1) & 0x80) == 0) return r; \ + return decode_max8_function(r); \ } UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright); -- cgit v1.2.3