diff options
author | Joshua Haberman <joshua@reverberate.org> | 2011-02-03 16:02:35 -0800 |
---|---|---|
committer | Joshua Haberman <joshua@reverberate.org> | 2011-02-03 16:02:35 -0800 |
commit | f07cd8ff1d2a5079a7ce3cc571b40c9e209175c9 (patch) | |
tree | a040c23f951328414d9e0160dc1583716292b989 /src | |
parent | 63daaaca4f750d9c1e88b2b3ca258912d58d4120 (diff) | |
parent | 8465e5e65014ac080d62855f8abfd44acdf7beb2 (diff) |
Merge branch 'src-refactoring'
Diffstat (limited to 'src')
-rw-r--r-- | src/upb.c | 49 | ||||
-rw-r--r-- | src/upb.h | 309 | ||||
-rw-r--r-- | src/upb_atomic.h | 192 | ||||
-rw-r--r-- | src/upb_data.c | 500 | ||||
-rw-r--r-- | src/upb_data.h | 552 | ||||
-rw-r--r-- | src/upb_decoder.c | 494 | ||||
-rw-r--r-- | src/upb_decoder.h | 56 | ||||
-rw-r--r-- | src/upb_def.c | 823 | ||||
-rw-r--r-- | src/upb_def.h | 302 | ||||
-rw-r--r-- | src/upb_encoder.c | 423 | ||||
-rw-r--r-- | src/upb_encoder.h | 73 | ||||
-rw-r--r-- | src/upb_inlinedefs.c | 20 | ||||
-rw-r--r-- | src/upb_sink.h | 155 | ||||
-rw-r--r-- | src/upb_string.h | 165 | ||||
-rw-r--r-- | src/upb_table.c | 411 | ||||
-rw-r--r-- | src/upb_table.h | 132 | ||||
-rw-r--r-- | src/upb_text.c | 121 | ||||
-rw-r--r-- | src/upb_text.h | 36 |
18 files changed, 0 insertions, 4813 deletions
diff --git a/src/upb.c b/src/upb.c deleted file mode 100644 index 146a9a5..0000000 --- a/src/upb.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - */ - -#include <stdarg.h> -#include <stddef.h> - -#include "upb.h" - -#define alignof(t) offsetof(struct { char c; t x; }, x) -#define TYPE_INFO(proto_type, wire_type, ctype) \ - [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## proto_type] = \ - {alignof(ctype), sizeof(ctype), wire_type, #ctype}, - -upb_type_info upb_types[] = { - TYPE_INFO(DOUBLE, UPB_WIRE_TYPE_64BIT, double) - TYPE_INFO(FLOAT, UPB_WIRE_TYPE_32BIT, float) - TYPE_INFO(INT64, UPB_WIRE_TYPE_VARINT, int64_t) - TYPE_INFO(UINT64, UPB_WIRE_TYPE_VARINT, uint64_t) - TYPE_INFO(INT32, UPB_WIRE_TYPE_VARINT, int32_t) - TYPE_INFO(FIXED64, UPB_WIRE_TYPE_64BIT, uint64_t) - TYPE_INFO(FIXED32, UPB_WIRE_TYPE_32BIT, uint32_t) - TYPE_INFO(BOOL, UPB_WIRE_TYPE_VARINT, bool) - TYPE_INFO(MESSAGE, UPB_WIRE_TYPE_DELIMITED, void*) - TYPE_INFO(GROUP, UPB_WIRE_TYPE_START_GROUP, void*) - TYPE_INFO(UINT32, UPB_WIRE_TYPE_VARINT, uint32_t) - TYPE_INFO(ENUM, UPB_WIRE_TYPE_VARINT, uint32_t) - TYPE_INFO(SFIXED32, UPB_WIRE_TYPE_32BIT, int32_t) - TYPE_INFO(SFIXED64, UPB_WIRE_TYPE_64BIT, int64_t) - TYPE_INFO(SINT32, UPB_WIRE_TYPE_VARINT, int32_t) - TYPE_INFO(SINT64, UPB_WIRE_TYPE_VARINT, int64_t) - TYPE_INFO(STRING, UPB_WIRE_TYPE_DELIMITED, upb_strptr) - TYPE_INFO(BYTES, UPB_WIRE_TYPE_DELIMITED, upb_strptr) -}; - -void upb_seterr(upb_status *status, enum upb_status_code code, - const char *msg, ...) -{ - if(upb_ok(status)) { // The first error is the most interesting. - status->code = code; - va_list args; - va_start(args, msg); - vsnprintf(status->msg, UPB_ERRORMSG_MAXLEN, msg, args); - va_end(args); - } -} diff --git a/src/upb.h b/src/upb.h deleted file mode 100644 index 4fb5773..0000000 --- a/src/upb.h +++ /dev/null @@ -1,309 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * This file contains shared definitions that are widely used across upb. - */ - -#ifndef UPB_H_ -#define UPB_H_ - -#include <stdbool.h> -#include <stdint.h> -#include <stdio.h> // only for size_t. -#include "descriptor_const.h" -#include "upb_atomic.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// inline if possible, emit standalone code if required. -#ifndef INLINE -#define INLINE static inline -#endif - -#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) -#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) -#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m))) - -// The maximum that any submessages can be nested. Matches proto2's limit. -#define UPB_MAX_NESTING 64 - -// The maximum number of fields that any one .proto type can have. Note that -// this is very different than the max field number. It is hard to imagine a -// scenario where more than 32k fields makes sense. -#define UPB_MAX_FIELDS (1<<15) -typedef int16_t upb_field_count_t; - -// Nested type names are separated by periods. -#define UPB_SYMBOL_SEPARATOR '.' - -// This limit is for the longest fully-qualified symbol, eg. foo.bar.MsgType -#define UPB_SYMBOL_MAXLEN 128 - -// The longest chain that mutually-recursive types are allowed to form. For -// example, this is a type cycle of length 2: -// message A { -// B b = 1; -// } -// message B { -// A a = 1; -// } -#define UPB_MAX_TYPE_CYCLE_LEN 16 - -// The maximum depth that the type graph can have. Note that this setting does -// not automatically constrain UPB_MAX_NESTING, because type cycles allow for -// unlimited nesting if we do not limit it. -#define UPB_MAX_TYPE_DEPTH 64 - -/* Fundamental types and type constants. **************************************/ - -// A list of types as they are encoded on-the-wire. -enum upb_wire_type { - UPB_WIRE_TYPE_VARINT = 0, - UPB_WIRE_TYPE_64BIT = 1, - UPB_WIRE_TYPE_DELIMITED = 2, - UPB_WIRE_TYPE_START_GROUP = 3, - UPB_WIRE_TYPE_END_GROUP = 4, - UPB_WIRE_TYPE_32BIT = 5 -}; - -typedef uint8_t upb_wire_type_t; - -// Value type as defined in a .proto file. eg. string, int32, etc. The -// integers that represent this are defined by descriptor.proto. Note that -// descriptor.proto reserves "0" for errors, and we use it to represent -// exceptional circumstances. -typedef uint8_t upb_field_type_t; - -// For referencing the type constants tersely. -#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type -#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type - -INLINE bool upb_issubmsgtype(upb_field_type_t type) { - return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); -} - -INLINE bool upb_isstringtype(upb_field_type_t type) { - return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); -} - -// Info for a given field type. -typedef struct { - uint8_t align; - uint8_t size; - upb_wire_type_t expected_wire_type; - char *ctype; -} upb_type_info; - -// A static array of info about all of the field types, indexed by type number. -extern upb_type_info upb_types[]; - -// The number of a field, eg. "optional string foo = 3". -typedef int32_t upb_field_number_t; - -// Label (optional, repeated, required) as defined in a .proto file. The -// values of this are defined by google.protobuf.FieldDescriptorProto.Label -// (from descriptor.proto). -typedef uint8_t upb_label_t; - -// A scalar (non-string) wire value. Used only for parsing unknown fields. -typedef union { - uint64_t varint; - uint64_t _64bit; - uint32_t _32bit; -} upb_wire_value; - -// A tag occurs before each value on-the-wire. -typedef struct { - upb_field_number_t field_number; - upb_wire_type_t wire_type; -} upb_tag; - - -/* Polymorphic values of .proto types *****************************************/ - -// INTERNAL-ONLY: never refer to these types with a tag ("union", "struct"). -// Always use the typedefs. -struct _upb_msg; - -typedef struct _upb_msg upb_msg; - -typedef upb_atomic_refcount_t upb_data; - -typedef uint32_t upb_strlen_t; - -struct upb_norefcount_string; -struct upb_refcounted_string; -typedef union { - // Must be first, for the UPB_STATIC_STRING_PTR_INIT() macro. - struct upb_norefcount_string *norefcount; - struct upb_refcounted_string *refcounted; - upb_data *base; -} upb_strptr; - -typedef uint32_t upb_arraylen_t; - -typedef union { - // Must be first, for the UPB_STATIC_ARRAY_PTR_INIT() macro. - struct upb_norefcount_array *norefcount; - struct upb_refcounted_array *refcounted; - upb_data *base; -} upb_arrayptr; - -// A single .proto value. The owner must have an out-of-band way of knowing -// the type, so that it knows which union member to use. -typedef union { - double _double; - float _float; - int32_t int32; - int64_t int64; - uint32_t uint32; - uint64_t uint64; - bool _bool; - upb_strptr str; - upb_arrayptr arr; - upb_msg *msg; - upb_data *data; -} upb_value; - -// A pointer to a .proto value. The owner must have an out-of-band way of -// knowing the type, so it knows which union member to use. -typedef union { - double *_double; - float *_float; - int32_t *int32; - int64_t *int64; - uint8_t *uint8; - uint32_t *uint32; - uint64_t *uint64; - bool *_bool; - upb_strptr *str; - upb_arrayptr *arr; - upb_msg **msg; - upb_data **data; - void *_void; -} upb_valueptr; - -INLINE upb_valueptr upb_value_addrof(upb_value *val) { - upb_valueptr ptr = {&val->_double}; - return ptr; -} - -/** - * Converts upb_value_ptr -> upb_value by reading from the pointer. We need to - * know the field type to perform this operation, because we need to know how - * much memory to copy. - */ -INLINE upb_value upb_value_read(upb_valueptr ptr, upb_field_type_t ft) { - upb_value val; - -#define CASE(t, member_name) \ - case UPB_TYPE(t): val.member_name = *ptr.member_name; break; - - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - CASE(STRING, str) - CASE(BYTES, str) - CASE(MESSAGE, msg) - CASE(GROUP, msg) - default: break; - } - return val; - -#undef CASE -} - -/** - * Writes a upb_value to a upb_value_ptr location. We need to know the field - * type to perform this operation, because we need to know how much memory to - * copy. - */ -INLINE void upb_value_write(upb_valueptr ptr, upb_value val, - upb_field_type_t ft) { -#define CASE(t, member_name) \ - case UPB_TYPE(t): *ptr.member_name = val.member_name; break; - - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - CASE(STRING, str) - CASE(BYTES, str) - CASE(MESSAGE, msg) - CASE(GROUP, msg) - default: break; - } - -#undef CASE -} - -// Status codes used as a return value. Codes >0 are not fatal and can be -// resumed. -enum upb_status_code { - UPB_STATUS_OK = 0, - - // The input byte stream ended in the middle of a record. - UPB_STATUS_NEED_MORE_DATA = 1, - - // An unrecoverable error occurred. - UPB_STATUS_ERROR = -1, - - // A varint went for 10 bytes without terminating. - UPB_ERROR_UNTERMINATED_VARINT = -2, - - // The max nesting level (UPB_MAX_NESTING) was exceeded. - UPB_ERROR_MAX_NESTING_EXCEEDED = -3 -}; - -#define UPB_ERRORMSG_MAXLEN 256 -typedef struct { - enum upb_status_code code; - char msg[UPB_ERRORMSG_MAXLEN]; -} upb_status; - -#define UPB_STATUS_INIT {UPB_STATUS_OK, ""} - -INLINE bool upb_ok(upb_status *status) { - return status->code == UPB_STATUS_OK; -} - -INLINE void upb_reset(upb_status *status) { - status->code = UPB_STATUS_OK; - status->msg[0] = '\0'; -} - -void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, - ...); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_H_ */ diff --git a/src/upb_atomic.h b/src/upb_atomic.h deleted file mode 100644 index c2cb8ba..0000000 --- a/src/upb_atomic.h +++ /dev/null @@ -1,192 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * Only a very small part of upb is thread-safe. Notably, individual - * messages, arrays, and strings are *not* thread safe for mutating. - * However, we do make message *metadata* such as upb_msgdef and - * upb_context thread-safe, and their ownership is tracked via atomic - * refcounting. This header implements the small number of atomic - * primitives required to support this. The primitives we implement - * are: - * - * - a reader/writer lock (wrappers around platform-provided mutexes). - * - an atomic refcount. - */ - -#ifndef UPB_ATOMIC_H_ -#define UPB_ATOMIC_H_ - -#include <stdbool.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/* inline if possible, emit standalone code if required. */ -#ifndef INLINE -#define INLINE static inline -#endif - -#define UPB_THREAD_UNSAFE -#ifdef UPB_THREAD_UNSAFE - -/* Non-thread-safe implementations. ******************************************/ - -typedef struct { - int v; -} upb_atomic_refcount_t; - -INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { - a->v = val; -} - -INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { - return a->v++ == 0; -} - -INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { - return --a->v == 0; -} - -INLINE int upb_atomic_read(upb_atomic_refcount_t *a) { - return a->v; -} - -INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int val) { - a->v += val; - return a->v == 0; -} - -INLINE int upb_atomic_fetch_and_add(upb_atomic_refcount_t *a, int val) { - int ret = a->v; - a->v += val; - return ret; -} - -typedef struct { -} upb_rwlock_t; - -INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; } - -#endif - -/* Atomic refcount ************************************************************/ - -#ifdef UPB_THREAD_UNSAFE - -/* Already defined above. */ - -#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 - -/* GCC includes atomic primitives. */ - -typedef struct { - volatile int v; -} upb_atomic_refcount_t; - -INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { - a->v = val; - __sync_synchronize(); /* Ensure the initialized value is visible. */ -} - -INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { - return __sync_fetch_and_add(&a->v, 1) == 0; -} - -INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int n) { - return __sync_add_and_fetch(&a->v, n) == 0; -} - -INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { - return __sync_sub_and_fetch(&a->v, 1) == 0; -} - -INLINE bool upb_atomic_read(upb_atomic_refcount_t *a) { - return __sync_fetch_and_add(&a->v, 0); -} - -INLINE bool upb_atomic_write(upb_atomic_refcount_t *a, int val) { - a->v = val; -} - -#elif defined(WIN32) - -/* Windows defines atomic increment/decrement. */ -#include <Windows.h> - -typedef struct { - volatile LONG val; -} upb_atomic_refcount_t; - -INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { - InterlockedExchange(&a->val, val); -} - -INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { - return InterlockedIncrement(&a->val) == 1; -} - -INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { - return InterlockedDecrement(&a->val) == 0; -} - -#else -#error Atomic primitives not defined for your platform/CPU. \ - Implement them or compile with UPB_THREAD_UNSAFE. -#endif - -/* Reader/Writer lock. ********************************************************/ - -#ifdef UPB_THREAD_UNSAFE - -/* Already defined. */ - -#elif defined(UPB_USE_PTHREADS) - -#include <pthread.h> - -typedef struct { - pthread_rwlock_t lock; -} upb_rwlock_t; - -INLINE void upb_rwlock_init(upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_init(&l->lock, NULL); -} - -INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_destroy(&l->lock); -} - -INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_rdlock(&l->lock); -} - -INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_wrlock(&l->lock); -} - -INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_unlock(&l->lock); -} - -#else -#error Reader/writer lock is not defined for your platform/CPU. \ - Implement it or compile with UPB_THREAD_UNSAFE. -#endif - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_ATOMIC_H_ */ diff --git a/src/upb_data.c b/src/upb_data.c deleted file mode 100644 index 3b4f7ab..0000000 --- a/src/upb_data.c +++ /dev/null @@ -1,500 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include <stdlib.h> -#include "upb_data.h" -#include "upb_decoder.h" -#include "upb_def.h" - -static uint32_t round_up_to_pow2(uint32_t v) -{ - /* cf. http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */ - v--; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v++; - return v; -} - -/* upb_data *******************************************************************/ - -static void data_elem_unref(upb_valueptr p, upb_fielddef *f) { - if(upb_issubmsg(f)) { - upb_msg_unref(*p.msg, upb_downcast_msgdef(f->def)); - } else if(upb_isstring(f)) { - upb_string_unref(*p.str); - } else { - assert(false); - } -} - -static void data_unref(upb_valueptr p, upb_fielddef *f) { - if(upb_isarray(f)) { - upb_array_unref(*p.arr, f); - } else { - data_elem_unref(p, f); - } -} - -INLINE void data_init(upb_data *d, int flags) { - d->v = REFCOUNT_ONE | flags; -} - -static void check_not_frozen(upb_data *d) { - // On one hand I am reluctant to put abort() calls in a low-level library - // that are enabled in a production build. On the other hand, this is a bug - // in the client code that we cannot recover from, and it seems better to get - // the error here than later. - if(upb_data_hasflag(d, UPB_DATA_FROZEN)) abort(); -} - - -/* upb_string *******************************************************************/ - -void _upb_string_setptr(upb_strptr s, char *ptr) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) - s.refcounted->ptr = ptr; - else - s.norefcount->ptr = ptr; -} - -static void _upb_string_set_bytelen(upb_strptr s, upb_strlen_t newlen) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) { - s.refcounted->byte_len = newlen; - } else { - s.norefcount->byte_len = newlen; - } -} - -upb_strptr upb_string_new() { - upb_strptr s; - s.refcounted = malloc(sizeof(struct upb_refcounted_string)); - data_init(s.base, UPB_DATA_HEAPALLOCATED | UPB_DATA_REFCOUNTED); - s.refcounted->byte_size = 0; - s.refcounted->byte_len = 0; - s.refcounted->ptr = NULL; - return s; -} - -static upb_strlen_t string_get_bytesize(upb_strptr s) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) { - return s.refcounted->byte_size; - } else { - return (s.norefcount->byte_size_and_flags & 0xFFFFFFF8) >> 3; - } -} - -static void string_set_bytesize(upb_strptr s, upb_strlen_t newsize) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) { - s.refcounted->byte_size = newsize; - } else { - s.norefcount->byte_size_and_flags &= 0x7; - s.norefcount->byte_size_and_flags |= (newsize << 3); - } -} - -void _upb_string_free(upb_strptr s) -{ - if(string_get_bytesize(s) != 0) free((void*)upb_string_getrobuf(s)); - free(s.base); -} - -void upb_string_resize(upb_strptr s, upb_strlen_t byte_len) { - check_not_frozen(s.base); - if(string_get_bytesize(s) < byte_len) { - // Need to resize. - size_t new_byte_size = round_up_to_pow2(byte_len); - _upb_string_setptr(s, realloc(_upb_string_getptr(s), new_byte_size)); - string_set_bytesize(s, new_byte_size); - } - _upb_string_set_bytelen(s, byte_len); -} - -upb_strptr upb_string_getref(upb_strptr s, int ref_flags) { - if(_upb_data_incref(s.base, ref_flags)) return s; - upb_strptr copy = upb_strdup(s); - if(ref_flags == UPB_REF_FROZEN) - upb_data_setflag(copy.base, UPB_DATA_FROZEN); - return copy; -} - -upb_strptr upb_strreadfile(const char *filename) { - FILE *f = fopen(filename, "rb"); - if(!f) return UPB_STRING_NULL; - if(fseek(f, 0, SEEK_END) != 0) goto error; - long size = ftell(f); - if(size < 0) goto error; - if(fseek(f, 0, SEEK_SET) != 0) goto error; - upb_strptr s = upb_string_new(); - char *buf = upb_string_getrwbuf(s, size); - if(fread(buf, size, 1, f) != 1) goto error; - fclose(f); - return s; - -error: - fclose(f); - return UPB_STRING_NULL; -} - -upb_strptr upb_strdupc(const char *src) { - upb_strptr copy = upb_string_new(); - upb_strlen_t len = strlen(src); - char *buf = upb_string_getrwbuf(copy, len); - memcpy(buf, src, len); - return copy; -} - -void upb_strcat(upb_strptr s, upb_strptr append) { - upb_strlen_t s_len = upb_strlen(s); - upb_strlen_t append_len = upb_strlen(append); - upb_strlen_t newlen = s_len + append_len; - memcpy(upb_string_getrwbuf(s, newlen) + s_len, - upb_string_getrobuf(append), append_len); -} - -upb_strptr upb_strslice(upb_strptr s, int offset, int len) { - upb_strptr slice = upb_string_new(); - len = UPB_MIN((upb_strlen_t)len, upb_strlen(s) - (upb_strlen_t)offset); - memcpy(upb_string_getrwbuf(slice, len), upb_string_getrobuf(s) + offset, len); - return slice; -} - -upb_strptr upb_strdup(upb_strptr s) { - upb_strptr copy = upb_string_new(); - upb_strcpy(copy, s); - return copy; -} - -int upb_strcmp(upb_strptr s1, upb_strptr s2) { - upb_strlen_t common_length = UPB_MIN(upb_strlen(s1), upb_strlen(s2)); - int common_diff = memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), - common_length); - return common_diff == - 0 ? ((int)upb_strlen(s1) - (int)upb_strlen(s2)) : common_diff; -} - - -/* upb_array ******************************************************************/ - -static void _upb_array_setptr(upb_arrayptr a, void *ptr) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) - a.refcounted->elements._void = ptr; - else - a.norefcount->elements._void = ptr; -} - -static void _upb_array_setlen(upb_arrayptr a, upb_strlen_t newlen) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) { - a.refcounted->len = newlen; - } else { - a.norefcount->len = newlen; - } -} - -upb_arrayptr upb_array_new() { - upb_arrayptr a; - a.refcounted = malloc(sizeof(struct upb_refcounted_array)); - data_init(a.base, UPB_DATA_HEAPALLOCATED | UPB_DATA_REFCOUNTED); - a.refcounted->size = 0; - a.refcounted->len = 0; - a.refcounted->elements._void = NULL; - return a; -} - -// ONLY handles refcounted arrays for the moment. -void _upb_array_free(upb_arrayptr a, upb_fielddef *f) -{ - if(upb_elem_ismm(f)) { - for(upb_arraylen_t i = 0; i < a.refcounted->size; i++) { - upb_valueptr p = _upb_array_getptr(a, f, i); - if(!*p.data) continue; - data_elem_unref(p, f); - } - } - if(a.refcounted->size != 0) free(a.refcounted->elements._void); - free(a.refcounted); -} - -static upb_arraylen_t array_get_size(upb_arrayptr a) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) { - return a.refcounted->size; - } else { - return (a.norefcount->base.v & 0xFFFFFFF8) >> 3; - } -} - -static void array_set_size(upb_arrayptr a, upb_arraylen_t newsize) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) { - a.refcounted->size = newsize; - } else { - a.norefcount->base.v &= 0x7; - a.norefcount->base.v |= (newsize << 3); - } -} - -void upb_array_resize(upb_arrayptr a, upb_fielddef *f, upb_strlen_t len) { - check_not_frozen(a.base); - size_t type_size = upb_types[f->type].size; - upb_arraylen_t old_size = array_get_size(a); - if(old_size < len) { - // Need to resize. - size_t new_size = round_up_to_pow2(len); - _upb_array_setptr(a, realloc(_upb_array_getptr_raw(a, 0, 0)._void, new_size * type_size)); - array_set_size(a, new_size); - memset(_upb_array_getptr_raw(a, old_size, type_size)._void, - 0, - (new_size - old_size) * type_size); - } - _upb_array_setlen(a, len); -} - - -/* upb_msg ********************************************************************/ - -static void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) { - msg->data[f->field_index/8] |= (1 << (f->field_index % 8)); -} - -upb_msg *upb_msg_new(upb_msgdef *md) { - upb_msg *msg = malloc(md->size); - memset(msg, 0, md->size); - data_init(&msg->base, UPB_DATA_HEAPALLOCATED | UPB_DATA_REFCOUNTED); - upb_def_ref(UPB_UPCAST(md)); - return msg; -} - -// ONLY handles refcounted messages for the moment. -void _upb_msg_free(upb_msg *msg, upb_msgdef *md) -{ - for(int i = 0; i < md->num_fields; i++) { - upb_fielddef *f = &md->fields[i]; - upb_valueptr p = _upb_msg_getptr(msg, f); - if(!upb_field_ismm(f) || !*p.data) continue; - data_unref(p, f); - } - upb_def_unref(UPB_UPCAST(md)); - free(msg); -} - -void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_strptr str, - upb_status *status) -{ - upb_decoder *d = upb_decoder_new(md); - upb_msgsink *s = upb_msgsink_new(md); - - upb_msgsink_reset(s, msg); - upb_decoder_reset(d, upb_msgsink_sink(s)); - upb_msg_clear(msg, md); - upb_decoder_decode(d, str, status); - - upb_decoder_free(d); - upb_msgsink_free(s); -} - -#if 0 -void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_strptr str, - upb_status *status) -{ - upb_sizebuilder *sb = upb_sizebuilder_new(md); - upb_encoder *e = upb_encoder_new(md); - upb_strsink *sink = upb_strsink_new(); - - // Get sizes. We could avoid performing this step in some cases by having a - // bool in the msgdef indicating whether it or any of its children have - // submessages in the def (groups don't count). - upb_sizebuilder_reset(sb); - upb_msgsrc_produce(msg, md, upb_sizebuilder_sink(sb), true); - - upb_strsink_reset(); - upb_encoder_reset(e, sb, sink); - upb_msgsrc_produce(msg, md, sink, false); -} -#endif - -/* upb_msgsrc ****************************************************************/ - -static void _upb_msgsrc_produceval(upb_value v, upb_fielddef *f, upb_sink *sink, - bool reverse, upb_status *status) -{ - // TODO: We need to check status for failure, but how often? - if(upb_issubmsg(f)) { - upb_msgdef *md = upb_downcast_msgdef(f->def); - upb_sink_onstart(sink, f, status); - upb_msgsrc_produce(v.msg, md, sink, reverse, status); - upb_sink_onend(sink, f, status); - } else if(upb_isstring(f)) { - upb_sink_onstr(sink, f, v.str, 0, upb_strlen(v.str), status); - } else { - upb_sink_onvalue(sink, f, v, status); - } -} - -void upb_msgsrc_produce(upb_msg *msg, upb_msgdef *md, upb_sink *sink, - bool reverse, upb_status *status) -{ - for(int i = 0; i < md->num_fields; i++) { - upb_fielddef *f = &md->fields[reverse ? md->num_fields - i - 1 : i]; - if(!upb_msg_has(msg, f)) continue; - upb_value v = upb_msg_get(msg, f); - if(upb_isarray(f)) { - upb_arrayptr arr = v.arr; - upb_arraylen_t len = upb_array_len(arr); - for(upb_arraylen_t j = 0; j < upb_array_len(arr); j++) { - upb_value elem = upb_array_get(arr, f, reverse ? len - j - 1 : j); - _upb_msgsrc_produceval(elem, f, sink, reverse, status); - } - } else { - _upb_msgsrc_produceval(v, f, sink, reverse, status); - } - } -} - - -/* upb_msgsink ***************************************************************/ - -typedef struct { - upb_msg *msg; - upb_msgdef *md; -} upb_msgsink_frame; - -struct upb_msgsink { - upb_sink base; - upb_msgdef *toplevel_msgdef; - upb_msgsink_frame stack[UPB_MAX_NESTING], *top; -}; - -/* Helper function that returns a pointer to where the next value for field "f" - * should be stored, taking into account whether f is an array that may need to - * be allocated or resized. */ -static upb_valueptr get_valueptr(upb_msg *msg, upb_fielddef *f) -{ - upb_valueptr p = _upb_msg_getptr(msg, f); - if(upb_isarray(f)) { - if(!upb_msg_has(msg, f)) { - if(upb_array_isnull(*p.arr) || !upb_data_only(*p.data)) { - if(!upb_array_isnull(*p.arr)) - upb_array_unref(*p.arr, f); - *p.arr = upb_array_new(); - } - upb_array_truncate(*p.arr); - upb_msg_sethas(msg, f); - } else { - assert(!upb_array_isnull(*p.arr)); - } - upb_arraylen_t oldlen = upb_array_len(*p.arr); - upb_array_resize(*p.arr, f, oldlen + 1); - p = _upb_array_getptr(*p.arr, f, oldlen); - } - return p; -} - -// Callbacks for upb_sink. -// TODO: implement these in terms of public interfaces. - -static upb_sink_status _upb_msgsink_valuecb(upb_sink *s, upb_fielddef *f, - upb_value val, upb_status *status) -{ - (void)status; // No detectable errors can occur. - upb_msgsink *ms = (upb_msgsink*)s; - upb_msg *msg = ms->top->msg; - upb_valueptr p = get_valueptr(msg, f); - upb_msg_sethas(msg, f); - upb_value_write(p, val, f->type); - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_msgsink_strcb(upb_sink *s, upb_fielddef *f, - upb_strptr str, - int32_t start, uint32_t end, - upb_status *status) -{ - (void)status; // No detectable errors can occur. - upb_msgsink *ms = (upb_msgsink*)s; - upb_msg *msg = ms->top->msg; - upb_valueptr p = get_valueptr(msg, f); - upb_msg_sethas(msg, f); - if(end > upb_strlen(str)) abort(); /* TODO: support streaming. */ - if(upb_string_isnull(*p.str) || !upb_data_only(*p.data)) { - if(!upb_string_isnull(*p.str)) - upb_string_unref(*p.str); - *p.str = upb_string_new(); - } - upb_strcpylen(*p.str, upb_string_getrobuf(str) + start, end - start); - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_msgsink_startcb(upb_sink *s, upb_fielddef *f, - upb_status *status) -{ - (void)status; // No detectable errors can occur. - upb_msgsink *ms = (upb_msgsink*)s; - upb_msg *oldmsg = ms->top->msg; - upb_valueptr p = get_valueptr(oldmsg, f); - ms->top++; - - if(upb_isarray(f) || !upb_msg_has(oldmsg, f)) { - upb_msgdef *md = upb_downcast_msgdef(f->def); - if(!*p.msg || !upb_data_only(*p.data)) { - if(*p.msg) - upb_msg_unref(*p.msg, md); - *p.msg = upb_msg_new(md); - } - upb_msg_clear(*p.msg, md); - upb_msg_sethas(oldmsg, f); - } - - ms->top->msg = *p.msg; - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_msgsink_endcb(upb_sink *s, upb_fielddef *f, - upb_status *status) -{ - (void)status; // No detectable errors can occur. - (void)f; // Unused. - upb_msgsink *ms = (upb_msgsink*)s; - ms->top--; - return UPB_SINK_CONTINUE; -} - -static upb_sink_callbacks _upb_msgsink_vtbl = { - _upb_msgsink_valuecb, - _upb_msgsink_strcb, - _upb_msgsink_startcb, - _upb_msgsink_endcb -}; - -// -// External upb_msgsink interface. -// - -upb_msgsink *upb_msgsink_new(upb_msgdef *md) -{ - upb_msgsink *ms = malloc(sizeof(*ms)); - upb_sink_init(&ms->base, &_upb_msgsink_vtbl); - ms->toplevel_msgdef = md; - return ms; -} - -void upb_msgsink_free(upb_msgsink *sink) -{ - free(sink); -} - -upb_sink *upb_msgsink_sink(upb_msgsink *sink) -{ - return &sink->base; -} - -void upb_msgsink_reset(upb_msgsink *ms, upb_msg *msg) -{ - ms->top = ms->stack; - ms->top->msg = msg; - ms->top->md = ms->toplevel_msgdef; -} diff --git a/src/upb_data.h b/src/upb_data.h deleted file mode 100644 index cdb7af2..0000000 --- a/src/upb_data.h +++ /dev/null @@ -1,552 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * This file defines the in-memory format for messages, arrays, and strings - * (which are the three dynamically-allocated structures that make up all - * protobufs). - * - * The members of all structs should be considered private. Access should - * only happen through the provided functions. - * - * Unlike Google's protobuf, messages contain *pointers* to strings and arrays - * instead of including them by value. This makes unused strings and arrays - * use less memory, and lets the strings and arrays have multiple possible - * representations (for example, a string could be a slice). It also gives - * us more flexibility wrt refcounting. The cost is that when a field *is* - * being used, the net memory usage is one pointer more than if we had - * included the thing directly. */ - -#ifndef UPB_DATA_H -#define UPB_DATA_H - -#include <assert.h> -#include <string.h> -#include "upb.h" -#include "upb_atomic.h" -#include "upb_def.h" -#include "upb_sink.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_data *******************************************************************/ - -// The "base class" of strings, arrays, and messages. Contains a few flags and -// possibly a reference count. None of the functions for upb_data are public, -// but some of the constants are. - -// typedef upb_atomic_refcount_t upb_data; - -// The flags in upb_data. -typedef enum { - // Set if the object itself was allocated with malloc() and should be freed - // with free(). This flag would be false if the object was allocated on the - // stack or is data from the static segment of an object file. Note that this - // flag does not apply to the data being referenced by a string or array. - // - // If this flag is false, UPB_FLAG_HAS_REFCOUNT must be false also; there is - // no sense refcounting something that does not need to be freed. - UPB_DATA_HEAPALLOCATED = 1, - - // Set if the object is frozen against modification. While an object is - // frozen, it is suitable for concurrent readonly access. Note that this - // flag alone is not a sufficient mechanism for preventing any kind of writes - // to the object's memory, because the object could still have a refcount. - UPB_DATA_FROZEN = (1<<1), - - // Set if the object has an embedded refcount. - UPB_DATA_REFCOUNTED = (1<<2) -} upb_data_flag; - -#define REFCOUNT_MASK 0xFFFFFFF8 -#define REFCOUNT_SHIFT 3 -#define REFCOUNT_ONE (1<<REFCOUNT_SHIFT) - -INLINE bool upb_data_hasflag(upb_data *d, upb_data_flag flag) { - // We read this unsynchronized, because the is_frozen flag (the only flag - // that can change during the life of a upb_data) may not change if the - // data has more than one owner. - return d->v & flag; -} - -// INTERNAL-ONLY -INLINE void upb_data_setflag(upb_data *d, upb_data_flag flag) { - d->v |= flag; -} - -INLINE uint32_t upb_data_getrefcount(upb_data *d) { - int data; - if(upb_data_hasflag(d, UPB_DATA_FROZEN)) - data = upb_atomic_read(d); - else - data = d->v; - return (data & REFCOUNT_MASK) >> REFCOUNT_SHIFT; -} - -// Returns true if the given data has only one owner. -INLINE bool upb_data_only(upb_data *data) { - return !upb_data_hasflag(data, UPB_DATA_REFCOUNTED) || - upb_data_getrefcount(data) == 1; -} - -// Specifies the type of ref that is requested based on the kind of access the -// caller needs to the object. -typedef enum { - // Use when the client plans to perform read-only access to the object, and - // only in one thread at a time. This imposes the least requirements on the - // object; it can be either frozen or not. As a result, requesting a - // reference of this type never performs a copy unless the object has no - // refcount. - // - // A ref of this type can always be explicitly converted to frozen or - // unfrozen later. - UPB_REF_THREADUNSAFE_READONLY = 0, - - // Use when the client plans to perform read-only access, but from multiple - // threads concurrently. This will force the object to eagerly perform any - // parsing that may have been lazily deferred, and will force a copy if the - // object is not current frozen. - // - // Asking for a reference of this type is equivalent to: - // x = getref(y, UPB_REF_THREADUNSAFE_READONLY); - // x = freeze(x); - // ...except it is more efficient. - UPB_REF_FROZEN = 1, - - // Use when the client plans to perform read/write access. As a result, the - // reference will not be thread-safe for concurrent reading *or* writing; the - // object must be externally synchronized if it is being accessed from more - // than one thread. This will force a copy if the object is currently frozen. - // - // Asking for a reference of this type is equivalent to: - // x = getref(y, UPB_REF_THREADUNSAFE_READONLY); - // x = thaw(x); - // ...except it is more efficient. - UPB_REF_MUTABLE = 2 -} upb_reftype; - -// INTERNAL-ONLY FUNCTION: -// Attempts to increment the reference on d with the given type of ref. If -// this is not possible, returns false. -INLINE bool _upb_data_incref(upb_data *d, upb_reftype reftype) { - bool frozen = upb_data_hasflag(d, UPB_DATA_FROZEN); - if((reftype == UPB_REF_FROZEN && !frozen) || - (reftype == UPB_REF_MUTABLE && frozen) || - (upb_data_hasflag(d, UPB_DATA_HEAPALLOCATED) && - !upb_data_hasflag(d, UPB_DATA_REFCOUNTED))) { - return false; - } - // Increment the ref. Only need to use atomic ops if the ref is frozen. - if(upb_data_hasflag(d, UPB_DATA_FROZEN)) upb_atomic_add(d, REFCOUNT_ONE); - else d->v += REFCOUNT_ONE; - return true; -} - -// INTERNAL-ONLY FUNCTION: -// Releases a reference on d, returning true if the object should be deleted. -INLINE bool _upb_data_unref(upb_data *d) { - if(upb_data_hasflag(d, UPB_DATA_HEAPALLOCATED)) { - // A heap-allocated object without a refcount should never be decref'd. - // Its owner owns it exlusively and should free it directly. - assert(upb_data_hasflag(d, UPB_DATA_REFCOUNTED)); - if(upb_data_hasflag(d, UPB_DATA_FROZEN)) { - int32_t old_val = upb_atomic_fetch_and_add(d, -REFCOUNT_ONE); - return (old_val & REFCOUNT_MASK) == REFCOUNT_ONE; - } else { - d->v -= REFCOUNT_ONE; - return (d->v & REFCOUNT_MASK) == 0; - } - } else { - // Non heap-allocated data never should be deleted. - return false; - } -} - -/* upb_string *****************************************************************/ - -// We have several different representations for string, depending on whether -// it has a refcount (and likely in the future, depending on whether it is a -// slice of another string). We could just have one representation with -// members that are sometimes unused, but this is wasteful in memory. The -// flags that are always part of the first word tell us which representation -// to use. -// -// In a way, this is like inheritance but instead of using a virtual pointer, -// we do switch/case in every "virtual" method. This may sound expensive but -// in many cases the different cases compile to exactly the same code, so there -// is no branch. - -struct upb_norefcount_string { - uint32_t byte_size_and_flags; - upb_strlen_t byte_len; - // We expect the data to be 8-bit clean (uint8_t), but char* is such an - // ingrained convention that we follow it. - char *ptr; -}; - -// Used for a string with a refcount. -struct upb_refcounted_string { - upb_data base; - upb_strlen_t byte_len; - char *ptr; - uint32_t byte_size; -}; - - -// Returns a newly constructed, refcounted string which starts out empty. -// Caller owns one ref on it. The returned string will not be frozen. -upb_strptr upb_string_new(void); - -// INTERNAL-ONLY: -// Frees the given string, alone with any memory the string owned. -void _upb_string_free(upb_strptr s); - -// Returns a string to which caller owns a ref, and contains the same contents -// as src. The returned value may be a copy of src, if the requested flags -// were incompatible with src's. -upb_strptr upb_string_getref(upb_strptr s, int ref_flags); - -#define UPB_STRING_NULL_INITIALIZER {NULL} -static const upb_strptr UPB_STRING_NULL = UPB_STRING_NULL_INITIALIZER; -INLINE bool upb_string_isnull(upb_strptr s) { return s.base == NULL; } - -// The caller releases a ref on src, which it must previously have owned a ref -// on. -INLINE void upb_string_unref(upb_strptr s) { - if(_upb_data_unref(s.base)) _upb_string_free(s); -} - -// The string is resized to byte_len. The string must not be frozen. -void upb_string_resize(upb_strptr s, upb_strlen_t len); - -// Returns a buffer to which the caller may write. The string is resized to -// byte_len (which may or may not trigger a reallocation). The string must not -// be frozen. -INLINE char *upb_string_getrwbuf(upb_strptr s, upb_strlen_t byte_len) { - upb_string_resize(s, byte_len); - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) - return s.refcounted->ptr; - else - return s.norefcount->ptr; -} - -INLINE void upb_string_clear(upb_strptr s) { - upb_string_getrwbuf(s, 0); -} - -// INTERNAL-ONLY: -// Gets/sets the pointer. -INLINE char *_upb_string_getptr(upb_strptr s) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) - return s.refcounted->ptr; - else - return s.norefcount->ptr; -} - -// Returns a buffer that the caller may use to read the current contents of -// the string. The number of bytes available is upb_strlen(s). -INLINE const char *upb_string_getrobuf(upb_strptr s) { - return _upb_string_getptr(s); -} - -// Returns the current length of the string. -INLINE upb_strlen_t upb_strlen(upb_strptr s) { - if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) - return s.refcounted->byte_len; - else - return s.norefcount->byte_len; -} - -/* upb_string library functions ***********************************************/ - -// Named like their <string.h> counterparts, these are all safe against buffer -// overflow. These only use the public upb_string interface. - -// More efficient than upb_strcmp if all you need is to test equality. -INLINE bool upb_streql(upb_strptr s1, upb_strptr s2) { - upb_strlen_t len = upb_strlen(s1); - if(len != upb_strlen(s2)) { - return false; - } else { - return memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0; - } -} - -// Like strcmp(). -int upb_strcmp(upb_strptr s1, upb_strptr s2); - -// Like upb_strcpy, but copies from a buffer and length. -INLINE void upb_strcpylen(upb_strptr dest, const void *src, upb_strlen_t len) { - memcpy(upb_string_getrwbuf(dest, len), src, len); -} - -// Replaces the contents of "dest" with the contents of "src". -INLINE void upb_strcpy(upb_strptr dest, upb_strptr src) { - upb_strcpylen(dest, upb_string_getrobuf(src), upb_strlen(src)); -} - -// Like upb_strcpy, but copies from a NULL-terminated string. -INLINE void upb_strcpyc(upb_strptr dest, const char *src) { - // This does two passes over src, but that is necessary unless we want to - // repeatedly re-allocate dst, which seems worse. - upb_strcpylen(dest, src, strlen(src)); -} - -// Returns a new string whose contents are a copy of s. -upb_strptr upb_strdup(upb_strptr s); - -// Like upb_strdup(), but duplicates a given buffer and length. -INLINE upb_strptr upb_strduplen(const void *src, upb_strlen_t len) { - upb_strptr s = upb_string_new(); - upb_strcpylen(s, src, len); - return s; -} - -// Like upb_strdup(), but duplicates a C NULL-terminated string. -upb_strptr upb_strdupc(const char *src); - -// Appends 'append' to 's' in-place, resizing s if necessary. -void upb_strcat(upb_strptr s, upb_strptr append); - -// Returns a string that is a substring of the given string. Currently this -// returns a copy, but in the future this may return an object that references -// the original string data instead of copying it. Both now and in the future, -// the caller owns a ref on whatever is returned. -upb_strptr upb_strslice(upb_strptr s, int offset, int len); - -// Reads an entire file into a newly-allocated string (caller owns one ref). -upb_strptr upb_strreadfile(const char *filename); - -// Typedef for a read-only string that is allocated statically or on the stack. -// Initialize with the given macro, which must resolve to a const char*. You -// must not dynamically allocate this type. Example usage: -// -// upb_static_string mystr = UPB_STATIC_STRING_INIT("biscuits"); -// upb_strptr mystr_ptr = UPB_STATIC_STRING_PTR_INIT(mystr); -// -// If C99 compund literals are available, the much nicer UPB_STRLIT macro is -// available instead: -// -// upb_strtr mystr_ptr = UPB_STRLIT("biscuits"); -// -typedef struct upb_norefcount_string upb_static_string; -#define UPB_STATIC_STRING_INIT_LEN(str, len) {0 | UPB_DATA_FROZEN, len, str} -#define UPB_STATIC_STRING_INIT(str) UPB_STATIC_STRING_INIT_LEN(str, sizeof(str)-1) -#define UPB_STATIC_STRING_PTR_INIT(static_string) {&static_string} -#define UPB_STRLIT(str) (upb_strptr){&(upb_static_string)UPB_STATIC_STRING_INIT(str)} - -// Allows using upb_strings in printf, ie: -// upb_strptr str = UPB_STRLIT("Hello, World!\n"); -// printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */ -#define UPB_STRARG(str) upb_strlen(str), upb_string_getrobuf(str) -#define UPB_STRFMT "%.*s" - -/* upb_array ******************************************************************/ - -// The comments attached to upb_string above also apply here. -struct upb_norefcount_array { - upb_data base; // We co-opt the refcount for the size. - upb_arraylen_t len; - upb_valueptr elements; -}; - -struct upb_refcounted_array { - upb_data base; - upb_arraylen_t len; - upb_valueptr elements; - upb_arraylen_t size; -}; - -typedef struct upb_norefcount_array upb_static_array; -#define UPB_STATIC_ARRAY_INIT(arr, len) {{0 | UPB_DATA_FROZEN}, len, {._void=arr}} -#define UPB_STATIC_ARRAY_PTR_TYPED_INIT(static_arr) {{&static_arr}} - -#define UPB_ARRAY_NULL_INITIALIZER {NULL} -static const upb_arrayptr UPB_ARRAY_NULL = UPB_ARRAY_NULL_INITIALIZER; -INLINE bool upb_array_isnull(upb_arrayptr a) { return a.base == NULL; } -INLINE bool upb_array_ptreql(upb_arrayptr a1, upb_arrayptr a2) { - return a1.base == a2.base; -} - -#define UPB_MSG_ARRAYPTR(type) type ## _array -#define UPB_DEFINE_MSG_ARRAY(type) \ -typedef struct { upb_arrayptr ptr; } UPB_MSG_ARRAYPTR(type); \ -INLINE upb_arraylen_t type ## _array_len(UPB_MSG_ARRAYPTR(type) a) { \ - return upb_array_len(a.ptr); \ -} \ -INLINE type* type ## _array_get(UPB_MSG_ARRAYPTR(type) a, upb_arraylen_t elem) { \ - return *(type**)_upb_array_getptr_raw(a.ptr, elem, sizeof(void*))._void; \ -} - -// Constructs a newly-allocated, reference-counted array which starts out -// empty. Caller owns one ref on it. -upb_arrayptr upb_array_new(void); - -// Returns the current number of elements in the array. -INLINE size_t upb_array_len(upb_arrayptr a) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) - return a.refcounted->len; - else - return a.norefcount->len; -} - -// INTERNAL-ONLY: -// Frees the given message and releases references on members. -void _upb_array_free(upb_arrayptr a, upb_fielddef *f); - -// INTERNAL-ONLY: -// Returns a pointer to the given elem. -INLINE upb_valueptr _upb_array_getptr_raw(upb_arrayptr a, upb_arraylen_t elem, - size_t type_size) { - upb_valueptr p; - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) - p._void = &a.refcounted->elements.uint8[elem * type_size]; - else - p._void = &a.norefcount->elements.uint8[elem * type_size]; - return p; -} - -INLINE upb_valueptr _upb_array_getptr(upb_arrayptr a, upb_fielddef *f, - upb_arraylen_t elem) { - return _upb_array_getptr_raw(a, elem, upb_types[f->type].size); -} - -INLINE upb_value upb_array_get(upb_arrayptr a, upb_fielddef *f, - upb_arraylen_t elem) { - assert(elem < upb_array_len(a)); - return upb_value_read(_upb_array_getptr(a, f, elem), f->type); -} - -// The caller releases a ref on the given array, which it must previously have -// owned a ref on. -INLINE void upb_array_unref(upb_arrayptr a, upb_fielddef *f) { - if(_upb_data_unref(a.base)) _upb_array_free(a, f); -} - -#if 0 -// Returns an array to which caller owns a ref, and contains the same contents -// as src. The returned value may be a copy of src, if the requested flags -// were incompatible with src's. -INLINE upb_arrayptr upb_array_getref(upb_arrayptr src, int ref_flags); - -// Sets the given element in the array to val. The current length of the array -// must be greater than elem. If the field type is dynamic, the array will -// take a ref on val and release a ref on what was previously in the array. -INLINE void upb_array_set(upb_arrayptr a, upb_fielddef *f, int elem, - upb_value val); - - -// Note that array_append will attempt to take a reference on the given value, -// so to avoid a copy use append_default and get. -INLINE void upb_array_append(upb_arrayptr a, upb_fielddef *f, - upb_value val); -INLINE void upb_array_append_default(upb_arrayptr a, upb_fielddef *f, - upb_value val); -#endif - -INLINE void upb_array_truncate(upb_arrayptr a) { - if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) - a.refcounted->len = 0; - else - a.norefcount->len = 0; -} - - -/* upb_msg ********************************************************************/ - -// Note that some inline functions for upb_msg are defined in upb_def.h since -// they rely on the defs. - -struct _upb_msg { - upb_data base; - uint8_t data[4]; // We allocate the appropriate amount per message. -}; - -// Creates a new msg of the given type. -upb_msg *upb_msg_new(upb_msgdef *md); - -// INTERNAL-ONLY: -// Frees the given message and releases references on members. -void _upb_msg_free(upb_msg *msg, upb_msgdef *md); - -// INTERNAL-ONLY: -// Returns a pointer to the given field. -INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) { - upb_valueptr p; - p._void = &msg->data[f->byte_offset]; - return p; -} - -// Releases a references on msg. -INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) { - if(_upb_data_unref(&msg->base)) _upb_msg_free(msg, md); -} - -// Tests whether the given field is explicitly set, or whether it will return -// a default. -INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) { - return (msg->data[f->field_index/8] & (1 << (f->field_index % 8))) != 0; -} - -// Returns the current value if set, or the default value if not set, of the -// specified field. The caller does *not* own a ref. -INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) { - if(upb_msg_has(msg, f)) { - return upb_value_read(_upb_msg_getptr(msg, f), f->type); - } else { - return f->default_value; - } -} - -// Sets the given field to the given value. The msg will take a ref on val, -// and will drop a ref on whatever was there before. -void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val); - -INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) { - memset(msg->data, 0, md->set_flags_bytes); -} - -// A convenience function for decoding an entire protobuf all at once, without -// having to worry about setting up the appropriate objects. -void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_strptr str, - upb_status *status); - -// A convenience function for encoding an entire protobuf all at once. If an -// error occurs, the null string is returned and the status object contains -// the error. -void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_strptr str, - upb_status *status); - - -/* upb_msgsrc *****************************************************************/ - -// A nonresumable, non-interruptable (but simple and fast) source for pushing -// the data of a upb_msg to a upb_sink. -void upb_msgsrc_produce(upb_msg *msg, upb_msgdef *md, upb_sink *sink, - bool reverse, upb_status *status); - - -/* upb_msgsink ****************************************************************/ - -// A upb_msgsink can accept the data from a source and write it into a message. -struct upb_msgsink; -typedef struct upb_msgsink upb_msgsink; - -// Allocate and free a msgsink, respectively. -upb_msgsink *upb_msgsink_new(upb_msgdef *md); -void upb_msgsink_free(upb_msgsink *sink); - -// Returns the upb_sink (like an upcast). -upb_sink *upb_msgsink_sink(upb_msgsink *sink); - -// Resets the msgsink for the given msg. -void upb_msgsink_reset(upb_msgsink *sink, upb_msg *msg); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_decoder.c b/src/upb_decoder.c deleted file mode 100644 index 209db56..0000000 --- a/src/upb_decoder.c +++ /dev/null @@ -1,494 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_decoder.h" - -#include <inttypes.h> -#include <stddef.h> -#include <stdlib.h> -#include "upb_def.h" - -/* Functions to read wire values. *********************************************/ - -// These functions are internal to the decode, but might be moved into an -// internal header file if we at some point in the future opt to do code -// generation, because the generated code would want to inline these functions. -// The same applies to the functions to read .proto values below. - -const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status); - -// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). -INLINE const uint8_t *upb_get_v_uint64_t(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status) -{ - // We inline this common case (1-byte varints), if that fails we dispatch to - // the full (non-inlined) version. - if((*buf & 0x80) == 0) { - *val = *buf & 0x7f; - return buf + 1; - } else { - return upb_get_v_uint64_t_full(buf, end, val, status); - } -} - -// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit -// varint is not a true wire type. -INLINE const uint8_t *upb_get_v_uint32_t(const uint8_t *buf, const uint8_t *end, - uint32_t *val, upb_status *status) -{ - uint64_t val64; - const uint8_t *ret = upb_get_v_uint64_t(buf, end, &val64, status); - *val = (uint32_t)val64; // Discard the high bits. - return ret; -} - -// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). -INLINE const uint8_t *upb_get_f_uint32_t(const uint8_t *buf, const uint8_t *end, - uint32_t *val, upb_status *status) -{ - const uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } -#if UPB_UNALIGNED_READS_OK - *val = *(uint32_t*)buf; -#else -#define SHL(val, bits) ((uint32_t)val << bits) - *val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24); -#undef SHL -#endif - return uint32_end; -} - -// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). -INLINE const uint8_t *upb_get_f_uint64_t(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status) -{ - const uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } -#if UPB_UNALIGNED_READS_OK - *val = *(uint64_t*)buf; -#else -#define SHL(val, bits) ((uint64_t)val << bits) - *val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24) | - SHL(buf[4], 32) | SHL(buf[5], 40) | SHL(buf[6], 48) | SHL(buf[7], 56); -#undef SHL -#endif - return uint64_end; -} - -INLINE const uint8_t *upb_skip_v_uint64_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *const maxend = buf + 10; - uint8_t last = 0x80; - for(; buf < (uint8_t*)end && (last & 0x80); buf++) - last = *buf; - - if(buf >= end && buf <= maxend && (last & 0x80)) { - status->code = UPB_STATUS_NEED_MORE_DATA; - buf = end; - } else if(buf > maxend) { - status->code = UPB_ERROR_UNTERMINATED_VARINT; - buf = end; - } - return buf; -} - -INLINE const uint8_t *upb_skip_f_uint32_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *uint32_end = buf + sizeof(uint32_t); - if(uint32_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - return uint32_end; -} - -INLINE const uint8_t *upb_skip_f_uint64_t(const uint8_t *buf, - const uint8_t *end, - upb_status *status) -{ - const uint8_t *uint64_end = buf + sizeof(uint64_t); - if(uint64_end > end) { - status->code = UPB_STATUS_NEED_MORE_DATA; - return end; - } - return uint64_end; -} - -/* Functions to read .proto values. *******************************************/ - -// Performs zig-zag decoding, which is used by sint32 and sint64. -INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } -INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } - -// Use macros to define a set of two functions for each .proto type: -// -// // Reads and converts a .proto value from buf, placing it in d. -// // "end" indicates the end of the current buffer (if the buffer does -// // not contain the entire value UPB_STATUS_NEED_MORE_DATA is returned). -// // On success, a pointer will be returned to the first byte that was -// // not consumed. -// const uint8_t *upb_get_INT32(const uint8_t *buf, const uint8_t *end, -// int32_t *d, upb_status *status); -// -// // Given an already read wire value s (source), convert it to a .proto -// // value and return it. -// int32_t upb_wvtov_INT32(uint32_t s); -// -// These are the most efficient functions to call if you want to decode a value -// for a known type. - -#define WVTOV(type, wire_t, val_t) \ - INLINE val_t upb_wvtov_ ## type(wire_t s) - -#define GET(type, v_or_f, wire_t, val_t, member_name) \ - INLINE const uint8_t *upb_get_ ## type(const uint8_t *buf, const uint8_t *end, \ - val_t *d, upb_status *status) { \ - wire_t tmp = 0; \ - const uint8_t *ret = upb_get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp, status); \ - *d = upb_wvtov_ ## type(tmp); \ - return ret; \ - } - -#define T(type, v_or_f, wire_t, val_t, member_name) \ - WVTOV(type, wire_t, val_t); /* prototype for GET below */ \ - GET(type, v_or_f, wire_t, val_t, member_name) \ - WVTOV(type, wire_t, val_t) - -T(INT32, v, uint32_t, int32_t, int32) { return (int32_t)s; } -T(INT64, v, uint64_t, int64_t, int64) { return (int64_t)s; } -T(UINT32, v, uint32_t, uint32_t, uint32) { return s; } -T(UINT64, v, uint64_t, uint64_t, uint64) { return s; } -T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzdec_32(s); } -T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzdec_64(s); } -T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; } -T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; } -T(SFIXED32, f, uint32_t, int32_t, int32) { return (int32_t)s; } -T(SFIXED64, f, uint64_t, int64_t, int64) { return (int64_t)s; } -T(BOOL, v, uint32_t, bool, _bool) { return (bool)s; } -T(ENUM, v, uint32_t, int32_t, int32) { return (int32_t)s; } -T(DOUBLE, f, uint64_t, double, _double) { - upb_value v; - v.uint64 = s; - return v._double; -} -T(FLOAT, f, uint32_t, float, _float) { - upb_value v; - v.uint32 = s; - return v._float; -} - -#undef WVTOV -#undef GET -#undef T - -// Parses a tag, places the result in *tag. -INLINE const uint8_t *decode_tag(const uint8_t *buf, const uint8_t *end, - upb_tag *tag, upb_status *status) -{ - uint32_t tag_int; - const uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); - tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); - tag->field_number = tag_int >> 3; - return ret; -} - - -// Parses a 64-bit varint that is known to be >= 2 bytes (the inline version -// handles 1 and 2 byte varints). -const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end, - uint64_t *val, upb_status *status) -{ - const uint8_t *const maxend = buf + 10; - uint8_t last = 0x80; - *val = 0; - int bitpos; - - for(bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7) - *val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos; - - if(buf >= end && buf <= maxend && (last & 0x80)) { - upb_seterr(status, UPB_STATUS_NEED_MORE_DATA, - "Provided data ended in the middle of a varint.\n"); - buf = end; - } else if(buf > maxend) { - upb_seterr(status, UPB_ERROR_UNTERMINATED_VARINT, - "Varint was unterminated after 10 bytes.\n"); - buf = end; - } - - return buf; -} - -const uint8_t *upb_decode_wire_value(uint8_t *buf, uint8_t *end, - upb_wire_type_t wt, upb_wire_value *wv, - upb_status *status) -{ - switch(wt) { - case UPB_WIRE_TYPE_VARINT: - return upb_get_v_uint64_t(buf, end, &wv->varint, status); - case UPB_WIRE_TYPE_64BIT: - return upb_get_f_uint64_t(buf, end, &wv->_64bit, status); - case UPB_WIRE_TYPE_32BIT: - return upb_get_f_uint32_t(buf, end, &wv->_32bit, status); - default: - status->code = UPB_STATUS_ERROR; // Doesn't handle delimited, groups. - return end; - } -} - -// Advances buf past the current wire value (of type wt), saving the result in -// outbuf. -static const uint8_t *skip_wire_value(const uint8_t *buf, const uint8_t *end, - upb_wire_type_t wt, upb_status *status) -{ - switch(wt) { - case UPB_WIRE_TYPE_VARINT: - return upb_skip_v_uint64_t(buf, end, status); - case UPB_WIRE_TYPE_64BIT: - return upb_skip_f_uint64_t(buf, end, status); - case UPB_WIRE_TYPE_32BIT: - return upb_skip_f_uint32_t(buf, end, status); - case UPB_WIRE_TYPE_START_GROUP: - // TODO: skip to matching end group. - case UPB_WIRE_TYPE_END_GROUP: - return buf; - default: - status->code = UPB_STATUS_ERROR; - return end; - } -} - -static const uint8_t *upb_decode_value(const uint8_t *buf, const uint8_t *end, - upb_field_type_t ft, upb_valueptr v, - upb_status *status) -{ -#define CASE(t, member_name) \ - case UPB_TYPE(t): return upb_get_ ## t(buf, end, v.member_name, status); - - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - default: return end; - } - -#undef CASE -} - -// The decoder keeps a stack with one entry per level of recursion. -// upb_decoder_frame is one frame of that stack. -typedef struct { - upb_msgdef *msgdef; - upb_fielddef *field; - size_t end_offset; // For groups, 0. -} upb_decoder_frame; - -struct upb_decoder { - // Immutable state of the decoder. - upb_msgdef *toplevel_msgdef; - upb_sink *sink; - - // State pertaining to a particular decode (resettable). - // Stack entries store the offset where the submsg ends (for groups, 0). - upb_decoder_frame stack[UPB_MAX_NESTING], *top, *limit; - size_t completed_offset; - void *udata; -}; - -upb_decoder *upb_decoder_new(upb_msgdef *msgdef) -{ - upb_decoder *d = malloc(sizeof(*d)); - d->toplevel_msgdef = msgdef; - d->limit = &d->stack[UPB_MAX_NESTING]; - return d; -} - -void upb_decoder_free(upb_decoder *d) -{ - free(d); -} - -void upb_decoder_reset(upb_decoder *d, upb_sink *sink) -{ - d->top = d->stack; - d->completed_offset = 0; - d->sink = sink; - d->top->msgdef = d->toplevel_msgdef; - // The top-level message is not delimited (we can keep receiving data for it - // indefinitely), so we treat it like a group. - d->top->end_offset = 0; -} - -static const void *get_msgend(upb_decoder *d, const uint8_t *start) -{ - if(d->top->end_offset > 0) - return start + (d->top->end_offset - d->completed_offset); - else - return (void*)UINTPTR_MAX; // group. -} - -static bool isgroup(const void *submsg_end) -{ - return submsg_end == (void*)UINTPTR_MAX; -} - -extern upb_wire_type_t upb_expected_wire_types[]; -// Returns true if wt is the correct on-the-wire type for ft. -INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { - // This doesn't currently support packed arrays. - return upb_types[ft].expected_wire_type == wt; -} - - -// Pushes a new stack frame for a submessage with the given len (which will -// be zero if the submessage is a group). -static const uint8_t *push(upb_decoder *d, const uint8_t *start, - uint32_t submsg_len, upb_fielddef *f, - upb_status *status) -{ - d->top->field = f; - d->top++; - if(d->top >= d->limit) { - upb_seterr(status, UPB_ERROR_MAX_NESTING_EXCEEDED, - "Nesting exceeded maximum (%d levels)\n", - UPB_MAX_NESTING); - return NULL; - } - upb_decoder_frame *frame = d->top; - frame->end_offset = d->completed_offset + submsg_len; - frame->msgdef = upb_downcast_msgdef(f->def); - - upb_sink_onstart(d->sink, f, status); - return get_msgend(d, start); -} - -// Pops a stack frame, returning a pointer for where the next submsg should -// end (or a pointer that is out of range for a group). -static const void *pop(upb_decoder *d, const uint8_t *start, upb_status *status) -{ - d->top--; - upb_sink_onend(d->sink, d->top->field, status); - return get_msgend(d, start); -} - - -size_t upb_decoder_decode(upb_decoder *d, upb_strptr str, upb_status *status) -{ - // buf is our current offset, moves from start to end. - const uint8_t *buf = (uint8_t*)upb_string_getrobuf(str); - const uint8_t *const start = buf; // ptr equivalent of d->completed_offset - const uint8_t *const end = buf + upb_strlen(str); - - // When we have fully decoded a tag/value pair, we advance this. - const uint8_t *completed = buf; - - const uint8_t *submsg_end = get_msgend(d, start); - upb_msgdef *msgdef = d->top->msgdef; - upb_sink_status sink_status = UPB_SINK_CONTINUE; - - // We need to check the status of operations that can fail, but we do so as - // late as possible to avoid introducing branches that have to wait on - // (status->code) which must be loaded from memory. We must always check - // before calling a user callback. -#define CHECK_STATUS() do { if(!upb_ok(status)) goto err; } while(0) - - // Main loop: executed once per tag/field pair. - while(sink_status == UPB_SINK_CONTINUE && buf < end) { - // Parse/handle tag. - upb_tag tag; - buf = decode_tag(buf, end, &tag, status); - if(tag.wire_type == UPB_WIRE_TYPE_END_GROUP) { - CHECK_STATUS(); - if(!isgroup(submsg_end)) { - upb_seterr(status, UPB_STATUS_ERROR, "End group seen but current " - "message is not a group, byte offset: %zd", - d->completed_offset + (completed - start)); - goto err; - } - submsg_end = pop(d, start, status); - msgdef = d->top->msgdef; - completed = buf; - continue; - } - - // Look up field by tag number. - upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number); - - // Parse/handle field. - if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) { - int32_t delim_len; - buf = upb_get_INT32(buf, end, &delim_len, status); - CHECK_STATUS(); // Checking decode_tag() and upb_get_INT32(). - const uint8_t *delim_end = buf + delim_len; - if(f && f->type == UPB_TYPE(MESSAGE)) { - submsg_end = push(d, start, delim_end - start, f, status); - msgdef = d->top->msgdef; - } else { - if(f && upb_isstringtype(f->type)) { - int32_t str_start = buf - start; - uint32_t len = str_start + delim_len; - sink_status = upb_sink_onstr(d->sink, f, str, str_start, len, status); - } // else { TODO: packed arrays } - // If field was not found, it is skipped silently. - buf = delim_end; // Could be >end. - } - } else { - if(!f || !upb_check_type(tag.wire_type, f->type)) { - buf = skip_wire_value(buf, end, tag.wire_type, status); - } else if (f->type == UPB_TYPE(GROUP)) { - submsg_end = push(d, start, 0, f, status); - msgdef = d->top->msgdef; - } else { - upb_value val; - buf = upb_decode_value(buf, end, f->type, upb_value_addrof(&val), - status); - CHECK_STATUS(); // Checking upb_decode_value(). - sink_status = upb_sink_onvalue(d->sink, f, val, status); - } - } - CHECK_STATUS(); - - while(buf >= submsg_end) { - if(buf > submsg_end) { - upb_seterr(status, UPB_STATUS_ERROR, "Expected submsg end offset " - "did not lie on a tag/value boundary."); - goto err; - } - submsg_end = pop(d, start, status); - msgdef = d->top->msgdef; - } - // while(buf < d->packed_end) { TODO: packed arrays } - completed = buf; - } - - size_t read; -err: - read = (char*)completed - (char*)start; - d->completed_offset += read; - return read; -} diff --git a/src/upb_decoder.h b/src/upb_decoder.h deleted file mode 100644 index b84c149..0000000 --- a/src/upb_decoder.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * upb_decoder implements a high performance, callback-based, stream-oriented - * decoder (comparable to the SAX model in XML parsers). For parsing protobufs - * into in-memory messages (a more DOM-like model), see the routines in - * upb_msg.h, which are layered on top of this decoder. - * - * TODO: the decoder currently does not support returning unknown values. This - * can easily be added when it is needed. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_DECODER_H_ -#define UPB_DECODER_H_ - -#include <stdbool.h> -#include <stdint.h> -#include "upb.h" -#include "descriptor.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_decoder *****************************************************************/ - -// A upb_decoder decodes the binary protocol buffer format, writing the data it -// decodes to a upb_sink. -struct upb_decoder; -typedef struct upb_decoder upb_decoder; - -// Allocates and frees a upb_decoder, respectively. -upb_decoder *upb_decoder_new(upb_msgdef *md); -void upb_decoder_free(upb_decoder *p); - -// Resets the internal state of an already-allocated decoder. This puts it in a -// state where it has not seen any data, and expects the next data to be from -// the beginning of a new protobuf. Parsers must be reset before they can be -// used. A decoder can be reset multiple times. -void upb_decoder_reset(upb_decoder *p, upb_sink *sink); - -// Decodes protobuf data out of str, returning how much data was decoded. The -// next call to upb_decoder_decode should begin with the first byte that was -// not decoded. "status" indicates whether an error occurred. -// -// TODO: provide the following guarantee: -// retval will always be >= len. -size_t upb_decoder_decode(upb_decoder *p, upb_strptr str, upb_status *status); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_DECODER_H_ */ diff --git a/src/upb_def.c b/src/upb_def.c deleted file mode 100644 index 7c9777d..0000000 --- a/src/upb_def.c +++ /dev/null @@ -1,823 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. - */ - -#include <stdlib.h> -#include <limits.h> -#include "descriptor.h" -#include "upb_def.h" -#include "upb_data.h" - -/* Rounds p up to the next multiple of t. */ -#define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t)))) - -static int div_round_up(int numerator, int denominator) { - /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ - return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; -} - -/* upb_def ********************************************************************/ - -// Defs are reference counted, but can have cycles when types are -// self-recursive or mutually recursive, so we need to be capable of collecting -// the cycles. In our situation defs are immutable (so cycles cannot be -// created or destroyed post-initialization). We need to be thread-safe but -// want to avoid locks if at all possible and rely only on atomic operations. -// -// Our scheme is as follows. First we give each def a flag indicating whether -// it is part of a cycle or not. Because defs are immutable, this flag will -// never change. For acyclic defs, we can use a naive algorithm and avoid the -// overhead of dealing with cycles. Most defs will be acyclic, and most cycles -// will be very short. -// -// For defs that participate in cycles we keep two reference counts. One -// tracks references that come from outside the cycle (we call these external -// references), and is incremented and decremented like a regular refcount. -// The other is a cycle refcount, and works as follows. Every cycle is -// considered distinct, even if two cycles share members. For example, this -// graph has two distinct cycles: -// -// A-->B-->C -// ^ | | -// +---+---+ -// -// The cycles in this graph are AB and ABC. When A's external refcount -// transitions from 0->1, we say that A takes "cycle references" on both -// cycles. Taking a cycle reference means incrementing the cycle refcount of -// all defs in the cycle. Since A and B are common to both cycles, A and B's -// cycle refcounts will be incremented by two, and C's will be incremented by -// one. Likewise, when A's external refcount transitions from 1->0, we -// decrement A and B's cycle refcounts by two and C's by one. We collect a -// cyclic type when its cycle refcount drops to zero. A precondition for this -// is that the external refcount has dropped to zero also. -// -// This algorithm is relatively cheap, since it only requires extra work when -// the external refcount on a cyclic type transitions from 0->1 or 1->0. - -static void msgdef_free(upb_msgdef *m); -static void enumdef_free(upb_enumdef *e); -static void unresolveddef_free(struct _upb_unresolveddef *u); - -static void def_free(upb_def *def) -{ - switch(def->type) { - case UPB_DEF_MSG: - msgdef_free(upb_downcast_msgdef(def)); - break; - case UPB_DEF_ENUM: - enumdef_free(upb_downcast_enumdef(def)); - break; - case UPB_DEF_SVC: - assert(false); /* Unimplemented. */ - break; - case UPB_DEF_EXT: - assert(false); /* Unimplemented. */ - break; - case UPB_DEF_UNRESOLVED: - unresolveddef_free(upb_downcast_unresolveddef(def)); - break; - default: - assert(false); - } -} - -// Depth-first search for all cycles that include cycle_base. Returns the -// number of paths from def that lead to cycle_base, which is equivalent to the -// number of cycles def is in that include cycle_base. -// -// open_defs tracks the set of nodes that are currently being visited in the -// search so we can stop the search if we detect a cycles that do not involve -// cycle_base. We can't color the nodes as we go by writing to a member of the -// def, because another thread could be performing the search concurrently. -static int cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base, - upb_msgdef **open_defs, int num_open_defs, - bool ref) { - bool found = false; - for(int i = 0; i < num_open_defs; i++) { - if(open_defs[i] == m) { - // We encountered a cycle that did not involve cycle_base. - found = true; - break; - } - } - - if(found || num_open_defs == UPB_MAX_TYPE_CYCLE_LEN) { - return 0; - } else if(m == cycle_base) { - return 1; - } else { - int path_count = 0; - if(cycle_base == NULL) { - cycle_base = m; - } else { - open_defs[num_open_defs++] = m; - } - for(int i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; - upb_def *def = f->def; - if(upb_issubmsg(f) && def->is_cyclic) { - upb_msgdef *sub_m = upb_downcast_msgdef(def); - path_count += cycle_ref_or_unref(sub_m, cycle_base, open_defs, - num_open_defs, ref); - } - } - if(ref) { - upb_atomic_add(&m->cycle_refcount, path_count); - } else { - if(upb_atomic_add(&m->cycle_refcount, -path_count)) - def_free(UPB_UPCAST(m)); - } - return path_count; - } -} - -void _upb_def_reftozero(upb_def *def) { - if(def->is_cyclic) { - upb_msgdef *m = upb_downcast_msgdef(def); - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - cycle_ref_or_unref(m, NULL, open_defs, 0, false); - } else { - def_free(def); - } -} - -void _upb_def_cyclic_ref(upb_def *def) { - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true); -} - -static void upb_def_init(upb_def *def, enum upb_def_type type, - upb_strptr fqname) { - def->type = type; - def->is_cyclic = 0; // We detect this later, after resolving refs. - def->search_depth = 0; - def->fqname = upb_string_getref(fqname, UPB_REF_FROZEN); - upb_atomic_refcount_init(&def->refcount, 1); -} - -static void upb_def_uninit(upb_def *def) { - upb_string_unref(def->fqname); -} - -/* upb_unresolveddef **********************************************************/ - -typedef struct _upb_unresolveddef { - upb_def base; - upb_strptr name; -} upb_unresolveddef; - -static upb_unresolveddef *upb_unresolveddef_new(upb_strptr str) { - upb_unresolveddef *def = malloc(sizeof(*def)); - upb_strptr name = upb_string_getref(str, UPB_REF_THREADUNSAFE_READONLY); - upb_def_init(&def->base, UPB_DEF_UNRESOLVED, name); - def->name = name; - return def; -} - -static void unresolveddef_free(struct _upb_unresolveddef *def) { - upb_string_unref(def->name); - upb_def_uninit(&def->base); - free(def); -} - -/* upb_fielddef ***************************************************************/ - -static void fielddef_init(upb_fielddef *f, - google_protobuf_FieldDescriptorProto *fd) -{ - f->type = fd->type; - f->label = fd->label; - f->number = fd->number; - f->name = upb_string_getref(fd->name, UPB_REF_FROZEN); - f->def = NULL; - f->owned = false; - assert(fd->set_flags.has.type_name == upb_hasdef(f)); - if(fd->set_flags.has.type_name) { - f->def = UPB_UPCAST(upb_unresolveddef_new(fd->type_name)); - f->owned = true; - } -} - -static upb_fielddef *fielddef_new(google_protobuf_FieldDescriptorProto *fd) -{ - upb_fielddef *f = malloc(sizeof(*f)); - fielddef_init(f, fd); - return f; -} - -static void fielddef_uninit(upb_fielddef *f) -{ - upb_string_unref(f->name); - if(upb_hasdef(f) && f->owned) { - upb_def_unref(f->def); - } -} - -static void fielddef_free(upb_fielddef *f) { - fielddef_uninit(f); - free(f); -} - -static void fielddef_copy(upb_fielddef *dst, upb_fielddef *src) -{ - *dst = *src; - dst->name = upb_string_getref(src->name, UPB_REF_FROZEN); - if(upb_hasdef(src)) { - upb_def_ref(dst->def); - dst->owned = true; - } -} - -// Callback for sorting fields. -static int compare_fields(upb_fielddef *f1, upb_fielddef *f2) { - // Required fields go before non-required. - bool req1 = f1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; - bool req2 = f2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED; - if(req1 != req2) { - return req2 - req1; - } else { - // Within required and non-required field lists, list in number order. - // TODO: consider ordering by data size to reduce padding. */ - return f1->number - f2->number; - } -} - -static int compare_fielddefs(const void *e1, const void *e2) { - return compare_fields(*(void**)e1, *(void**)e2); -} - -static int compare_fds(const void *e1, const void *e2) { - upb_fielddef f1, f2; - fielddef_init(&f1, *(void**)e1); - fielddef_init(&f2, *(void**)e2); - int ret = compare_fields(&f1, &f2); - fielddef_uninit(&f1); - fielddef_uninit(&f2); - return ret; -} - -void upb_fielddef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num) -{ - qsort(fds, num, sizeof(*fds), compare_fds); -} - -static void fielddef_sort(upb_fielddef **defs, size_t num) -{ - qsort(defs, num, sizeof(*defs), compare_fielddefs); -} - -/* upb_msgdef *****************************************************************/ - -static upb_msgdef *msgdef_new(upb_fielddef **fields, int num_fields, - upb_strptr fqname, upb_status *status) -{ - if(num_fields > UPB_MAX_FIELDS) { - upb_seterr(status, UPB_STATUS_ERROR, - "Tried to create a msgdef with more than %d fields", num_fields); - free(fields); - return NULL; - } - upb_msgdef *m = malloc(sizeof(*m)); - upb_def_init(&m->base, UPB_DEF_MSG, fqname); - upb_atomic_refcount_init(&m->cycle_refcount, 0); - upb_inttable_init(&m->itof, num_fields, sizeof(upb_itof_ent)); - upb_strtable_init(&m->ntof, num_fields, sizeof(upb_ntof_ent)); - - m->num_fields = num_fields; - m->set_flags_bytes = div_round_up(m->num_fields, 8); - // These are incremented in the loop. - m->num_required_fields = 0; - m->size = m->set_flags_bytes + 4; // 4 for the refcount. - m->fields = malloc(sizeof(upb_fielddef) * num_fields); - - size_t max_align = 0; - for(int i = 0; i < num_fields; i++) { - upb_fielddef *f = &m->fields[i]; - upb_type_info *type_info = &upb_types[fields[i]->type]; - fielddef_copy(f, fields[i]); - - // General alignment rules are: each member must be at an address that is a - // multiple of that type's alignment. Also, the size of the structure as - // a whole must be a multiple of the greatest alignment of any member. */ - f->field_index = i; - size_t offset = ALIGN_UP(m->size, type_info->align); - f->byte_offset = offset - 4; // Offsets are relative to the refcount. - m->size = offset + type_info->size; - max_align = UPB_MAX(max_align, type_info->align); - if(f->label == UPB_LABEL(REQUIRED)) { - // We currently rely on the fact that required fields are always sorted - // to occur before non-required fields. - m->num_required_fields++; - } - - // Insert into the tables. - upb_itof_ent itof_ent = {{f->number, 0}, f}; - upb_ntof_ent ntof_ent = {{f->name, 0}, f}; - upb_inttable_insert(&m->itof, &itof_ent.e); - upb_strtable_insert(&m->ntof, &ntof_ent.e); - } - - if(max_align > 0) m->size = ALIGN_UP(m->size, max_align); - return m; -} - -static void msgdef_free(upb_msgdef *m) -{ - for (upb_field_count_t i = 0; i < m->num_fields; i++) - fielddef_uninit(&m->fields[i]); - free(m->fields); - upb_strtable_free(&m->ntof); - upb_inttable_free(&m->itof); - upb_def_uninit(&m->base); - free(m); -} - -static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) { - (void)m; - if(f->owned) upb_def_unref(f->def); - f->def = def; - // We will later make the ref unowned if it is a part of a cycle. - f->owned = true; - upb_def_ref(def); -} - -/* upb_enumdef ****************************************************************/ - -typedef struct { - upb_strtable_entry e; - uint32_t value; -} ntoi_ent; - -typedef struct { - upb_inttable_entry e; - upb_strptr string; -} iton_ent; - -static upb_enumdef *enumdef_new(google_protobuf_EnumDescriptorProto *ed, - upb_strptr fqname) -{ - upb_enumdef *e = malloc(sizeof(*e)); - upb_def_init(&e->base, UPB_DEF_ENUM, fqname); - int num_values = ed->set_flags.has.value ? - google_protobuf_EnumValueDescriptorProto_array_len(ed->value) : 0; - upb_strtable_init(&e->ntoi, num_values, sizeof(ntoi_ent)); - upb_inttable_init(&e->iton, num_values, sizeof(iton_ent)); - - for(int i = 0; i < num_values; i++) { - google_protobuf_EnumValueDescriptorProto *value = - google_protobuf_EnumValueDescriptorProto_array_get(ed->value, i); - ntoi_ent ntoi_ent = {{value->name, 0}, value->number}; - iton_ent iton_ent = {{value->number, 0}, value->name}; - upb_strtable_insert(&e->ntoi, &ntoi_ent.e); - upb_inttable_insert(&e->iton, &iton_ent.e); - } - return e; -} - -static void enumdef_free(upb_enumdef *e) { - upb_strtable_free(&e->ntoi); - upb_inttable_free(&e->iton); - upb_def_uninit(&e->base); - free(e); -} - -static void fill_iter(upb_enum_iter *iter, ntoi_ent *ent) { - iter->state = ent; - iter->name = ent->e.key; - iter->val = ent->value; -} - -void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e) { - // We could iterate over either table here; the choice is arbitrary. - ntoi_ent *ent = upb_strtable_begin(&e->ntoi); - iter->e = e; - fill_iter(iter, ent); -} - -void upb_enum_next(upb_enum_iter *iter) { - ntoi_ent *ent = iter->state; - assert(ent); - ent = upb_strtable_next(&iter->e->ntoi, &ent->e); - iter->state = ent; - if(ent) fill_iter(iter, ent); -} - -bool upb_enum_done(upb_enum_iter *iter) { - return iter->state == NULL; -} - -/* symtab internal ***********************************************************/ - -typedef struct { - upb_strtable_entry e; - upb_def *def; -} symtab_ent; - -/* Search for a character in a string, in reverse. */ -static int my_memrchr(char *data, char c, size_t len) -{ - int off = len-1; - while(off > 0 && data[off] != c) --off; - return off; -} - -/* Given a symbol and the base symbol inside which it is defined, find the - * symbol's definition in t. */ -static symtab_ent *resolve(upb_strtable *t, upb_strptr base, upb_strptr symbol) -{ - if(upb_strlen(base) + upb_strlen(symbol) + 1 >= UPB_SYMBOL_MAXLEN || - upb_strlen(symbol) == 0) return NULL; - - if(upb_string_getrobuf(symbol)[0] == UPB_SYMBOL_SEPARATOR) { - // Symbols starting with '.' are absolute, so we do a single lookup. - // Slice to omit the leading '.' - upb_strptr sym_str = upb_strslice(symbol, 1, INT_MAX); - symtab_ent *e = upb_strtable_lookup(t, sym_str); - upb_string_unref(sym_str); - return e; - } else { - // Remove components from base until we find an entry or run out. - upb_strptr sym_str = upb_string_new(); - int baselen = upb_strlen(base); - while(1) { - // sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + symbol - upb_strlen_t len = baselen + upb_strlen(symbol) + 1; - char *buf = upb_string_getrwbuf(sym_str, len); - memcpy(buf, upb_string_getrobuf(base), baselen); - buf[baselen] = UPB_SYMBOL_SEPARATOR; - memcpy(buf + baselen + 1, upb_string_getrobuf(symbol), upb_strlen(symbol)); - - symtab_ent *e = upb_strtable_lookup(t, sym_str); - if (e) return e; - else if(baselen == 0) return NULL; /* No more scopes to try. */ - - baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen); - } - } -} - -/* Joins strings together, for example: - * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" - * join("", "Baz") -> "Baz" - * Caller owns a ref on the returned string. */ -static upb_strptr join(upb_strptr base, upb_strptr name) { - upb_strptr joined = upb_strdup(base); - upb_strlen_t len = upb_strlen(joined); - if(len > 0) { - upb_string_getrwbuf(joined, len + 1)[len] = UPB_SYMBOL_SEPARATOR; - } - upb_strcat(joined, name); - return joined; -} - -static upb_strptr try_define(upb_strtable *t, upb_strptr base, - upb_strptr name, upb_status *status) -{ - if(upb_string_isnull(name)) { - upb_seterr(status, UPB_STATUS_ERROR, - "symbol in context '" UPB_STRFMT "' does not have a name", - UPB_STRARG(base)); - return UPB_STRING_NULL; - } - upb_strptr fqname = join(base, name); - if(upb_strtable_lookup(t, fqname)) { - upb_seterr(status, UPB_STATUS_ERROR, - "attempted to redefine symbol '" UPB_STRFMT "'", - UPB_STRARG(fqname)); - upb_string_unref(fqname); - return UPB_STRING_NULL; - } - return fqname; -} - -static void insert_enum(upb_strtable *t, - google_protobuf_EnumDescriptorProto *ed, - upb_strptr base, upb_status *status) -{ - upb_strptr name = ed->set_flags.has.name ? ed->name : UPB_STRING_NULL; - upb_strptr fqname = try_define(t, base, name, status); - if(upb_string_isnull(fqname)) return; - - symtab_ent e; - e.e.key = fqname; - e.def = UPB_UPCAST(enumdef_new(ed, fqname)); - upb_strtable_insert(t, &e.e); - upb_string_unref(fqname); -} - -static void insert_message(upb_strtable *t, google_protobuf_DescriptorProto *d, - upb_strptr base, bool sort, upb_status *status) -{ - upb_strptr name = d->set_flags.has.name ? d->name : UPB_STRING_NULL; - upb_strptr fqname = try_define(t, base, name, status); - if(upb_string_isnull(fqname)) return; - - int num_fields = d->set_flags.has.field ? - google_protobuf_FieldDescriptorProto_array_len(d->field) : 0; - symtab_ent e; - e.e.key = fqname; - - // Gather our list of fields, sorting if necessary. - upb_fielddef **fielddefs = malloc(sizeof(*fielddefs) * num_fields); - for (int i = 0; i < num_fields; i++) { - google_protobuf_FieldDescriptorProto *fd = - google_protobuf_FieldDescriptorProto_array_get(d->field, i); - fielddefs[i] = fielddef_new(fd); - } - if(sort) fielddef_sort(fielddefs, num_fields); - - // Create the msgdef with that list of fields. - e.def = UPB_UPCAST(msgdef_new(fielddefs, num_fields, fqname, status)); - - // Cleanup. - for (int i = 0; i < num_fields; i++) fielddef_free(fielddefs[i]); - free(fielddefs); - - if(!upb_ok(status)) goto error; - - upb_strtable_insert(t, &e.e); - - /* Add nested messages and enums. */ - if(d->set_flags.has.nested_type) - for(unsigned int i = 0; i < google_protobuf_DescriptorProto_array_len(d->nested_type); i++) - insert_message(t, google_protobuf_DescriptorProto_array_get(d->nested_type, i), fqname, sort, status); - - if(d->set_flags.has.enum_type) - for(unsigned int i = 0; i < google_protobuf_EnumDescriptorProto_array_len(d->enum_type); i++) - insert_enum(t, google_protobuf_EnumDescriptorProto_array_get(d->enum_type, i), fqname, status); - -error: - // Free the ref we got from try_define(). - upb_string_unref(fqname); -} - -static bool find_cycles(upb_msgdef *m, int search_depth, upb_status *status) -{ - if(search_depth > UPB_MAX_TYPE_DEPTH) { - // There are many situations in upb where we recurse over the type tree - // (like for example, right now) and an absurdly deep tree could cause us - // to stack overflow on systems with very limited stacks. - upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was found at " - "depth %d in the type graph, which exceeds the maximum type " - "depth of %d.", UPB_UPCAST(m)->fqname, search_depth, - UPB_MAX_TYPE_DEPTH); - return false; - } else if(UPB_UPCAST(m)->search_depth == 1) { - // Cycle! - int cycle_len = search_depth - 1; - if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) { - upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was involved " - "in a cycle of length %d, which exceeds the maximum type " - "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len, - UPB_MAX_TYPE_CYCLE_LEN); - } - return true; - } else if(UPB_UPCAST(m)->search_depth > 0) { - // This was a cycle, but did not originate from the base of our search tree. - // We'll find it when we call find_cycles() on this node directly. - return false; - } else { - UPB_UPCAST(m)->search_depth = ++search_depth; - bool cycle_found = false; - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; - if(!upb_issubmsg(f)) continue; - upb_def *sub_def = f->def; - upb_msgdef *sub_m = upb_downcast_msgdef(sub_def); - if(find_cycles(sub_m, search_depth, status)) { - cycle_found = true; - UPB_UPCAST(m)->is_cyclic = true; - if(f->owned) { - upb_atomic_unref(&sub_def->refcount); - f->owned = false; - } - } - } - UPB_UPCAST(m)->search_depth = 0; - return cycle_found; - } -} - -static void addfd(upb_strtable *addto, upb_strtable *existingdefs, - google_protobuf_FileDescriptorProto *fd, bool sort, - upb_status *status) -{ - upb_strptr pkg; - if(fd->set_flags.has.package) { - pkg = upb_string_getref(fd->package, UPB_REF_FROZEN); - } else { - pkg = upb_string_new(); - } - - if(fd->set_flags.has.message_type) - for(unsigned int i = 0; i < google_protobuf_DescriptorProto_array_len(fd->message_type); i++) - insert_message(addto, google_protobuf_DescriptorProto_array_get(fd->message_type, i), pkg, sort, status); - - if(fd->set_flags.has.enum_type) - for(unsigned int i = 0; i < google_protobuf_EnumDescriptorProto_array_len(fd->enum_type); i++) - insert_enum(addto, google_protobuf_EnumDescriptorProto_array_get(fd->enum_type, i), pkg, status); - - upb_string_unref(pkg); - - if(!upb_ok(status)) { - // TODO: make sure we don't leak any memory in this case. - return; - } - - /* TODO: handle extensions and services. */ - - // Attempt to resolve all references. - symtab_ent *e; - for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; - upb_strptr base = e->e.key; - for(upb_field_count_t i = 0; i < m->num_fields; i++) { - upb_fielddef *f = &m->fields[i]; - if(!upb_hasdef(f)) continue; // No resolving necessary. - upb_strptr name = upb_downcast_unresolveddef(f->def)->name; - symtab_ent *found = resolve(existingdefs, base, name); - if(!found) found = resolve(addto, base, name); - upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; - if(!found) { - upb_seterr(status, UPB_STATUS_ERROR, - "could not resolve symbol '" UPB_STRFMT "'" - " in context '" UPB_STRFMT "'", - UPB_STRARG(name), UPB_STRARG(base)); - return; - } else if(found->def->type != expected) { - upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type"); - return; - } - upb_msgdef_resolve(m, f, found->def); - } - } - - // Deal with type cycles. - for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; - - // Do an initial pass over the graph to check that there are no cycles - // longer than the maximum length. We also mark all cyclic defs as such, - // and decrement refs on cyclic defs. - find_cycles(m, 0, status); - upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN]; - cycle_ref_or_unref(m, NULL, open_defs, 0, true); - } -} - -/* upb_symtab *****************************************************************/ - -upb_symtab *upb_symtab_new() -{ - upb_symtab *s = malloc(sizeof(*s)); - upb_atomic_refcount_init(&s->refcount, 1); - upb_rwlock_init(&s->lock); - upb_strtable_init(&s->symtab, 16, sizeof(symtab_ent)); - upb_strtable_init(&s->psymtab, 16, sizeof(symtab_ent)); - - // Add descriptor.proto types to private symtable so we can parse descriptors. - // We know there is only 1. - google_protobuf_FileDescriptorProto *fd = - google_protobuf_FileDescriptorProto_array_get(upb_file_descriptor_set->file, 0); - upb_status status = UPB_STATUS_INIT; - addfd(&s->psymtab, &s->symtab, fd, false, &status); - if(!upb_ok(&status)) { - fprintf(stderr, "Failed to initialize upb: %s.\n", status.msg); - assert(false); - return NULL; // Indicates that upb is buggy or corrupt. - } - upb_static_string name = - UPB_STATIC_STRING_INIT("google.protobuf.FileDescriptorSet"); - upb_strptr nameptr = UPB_STATIC_STRING_PTR_INIT(name); - symtab_ent *e = upb_strtable_lookup(&s->psymtab, nameptr); - assert(e); - s->fds_msgdef = upb_downcast_msgdef(e->def); - return s; -} - -static void free_symtab(upb_strtable *t) -{ - symtab_ent *e; - for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) - upb_def_unref(e->def); - upb_strtable_free(t); -} - -void _upb_symtab_free(upb_symtab *s) -{ - free_symtab(&s->symtab); - free_symtab(&s->psymtab); - upb_rwlock_destroy(&s->lock); - free(s); -} - -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type) -{ - upb_rwlock_rdlock(&s->lock); - int total = upb_strtable_count(&s->symtab); - // We may only use part of this, depending on how many symbols are of the - // correct type. - upb_def **defs = malloc(sizeof(*defs) * total); - symtab_ent *e = upb_strtable_begin(&s->symtab); - int i = 0; - for(; e; e = upb_strtable_next(&s->symtab, &e->e)) { - upb_def *def = e->def; - assert(def); - if(type == UPB_DEF_ANY || def->type == type) - defs[i++] = def; - } - upb_rwlock_unlock(&s->lock); - *count = i; - for(i = 0; i < *count; i++) - upb_def_ref(defs[i]); - return defs; -} - -upb_def *upb_symtab_lookup(upb_symtab *s, upb_strptr sym) -{ - upb_rwlock_rdlock(&s->lock); - symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); - upb_def *ret = NULL; - if(e) { - ret = e->def; - upb_def_ref(ret); - } - upb_rwlock_unlock(&s->lock); - return ret; -} - - -upb_def *upb_symtab_resolve(upb_symtab *s, upb_strptr base, upb_strptr symbol) { - upb_rwlock_rdlock(&s->lock); - symtab_ent *e = resolve(&s->symtab, base, symbol); - upb_def *ret = NULL; - if(e) { - ret = e->def; - upb_def_ref(ret); - } - upb_rwlock_unlock(&s->lock); - return ret; -} - -void upb_symtab_addfds(upb_symtab *s, google_protobuf_FileDescriptorSet *fds, - upb_status *status) -{ - if(fds->set_flags.has.file) { - // Insert new symbols into a temporary table until we have verified that - // the descriptor is valid. - upb_strtable tmp; - upb_strtable_init(&tmp, 0, sizeof(symtab_ent)); - - { // Read lock scope - upb_rwlock_rdlock(&s->lock); - for(uint32_t i = 0; i < google_protobuf_FileDescriptorProto_array_len(fds->file); i++) { - addfd(&tmp, &s->symtab, google_protobuf_FileDescriptorProto_array_get(fds->file, i), true, status); - if(!upb_ok(status)) { - free_symtab(&tmp); - upb_rwlock_unlock(&s->lock); - return; - } - } - upb_rwlock_unlock(&s->lock); - } - - // Everything was successfully added, copy from the tmp symtable. - { // Write lock scope - upb_rwlock_wrlock(&s->lock); - symtab_ent *e; - for(e = upb_strtable_begin(&tmp); e; e = upb_strtable_next(&tmp, &e->e)) { - // We checked for duplicates when we had only the read lock, but it is - // theoretically possible that a duplicate symbol when we dropped the - // read lock to acquire a write lock. - if(upb_strtable_lookup(&s->symtab, e->e.key)) { - upb_seterr(status, UPB_STATUS_ERROR, "Attempted to insert duplicate " - "symbol: " UPB_STRFMT, UPB_STRARG(e->e.key)); - // To truly handle this situation we would need to remove any symbols - // from tmp that were successfully inserted into s->symtab. Because - // this case is exceedingly unlikely, and because our hashtable - // doesn't support deletions right now, we leave them in there, which - // means we must not call free_symtab(&s->symtab), so we will leak it. - break; - } - upb_strtable_insert(&s->symtab, &e->e); - } - upb_rwlock_unlock(&s->lock); - } - upb_strtable_free(&tmp); - } - return; -} - -void upb_symtab_add_desc(upb_symtab *s, upb_strptr desc, upb_status *status) -{ - upb_msg *fds = upb_msg_new(s->fds_msgdef); - upb_msg_decodestr(fds, s->fds_msgdef, desc, status); - if(!upb_ok(status)) return; - upb_symtab_addfds(s, (google_protobuf_FileDescriptorSet*)fds, status); - upb_msg_unref(fds, s->fds_msgdef); - return; -} diff --git a/src/upb_def.h b/src/upb_def.h deleted file mode 100644 index 25c7ff6..0000000 --- a/src/upb_def.h +++ /dev/null @@ -1,302 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * Provides definitions of .proto constructs: - * - upb_msgdef: describes a "message" construct. - * - upb_fielddef: describes a message field. - * - upb_enumdef: describes an enum. - * (TODO: definitions of extensions and services). - * - * Defs are obtained from a upb_symtab object. A upb_symtab is empty when - * constructed, and definitions can be added by supplying serialized - * descriptors. - * - * Defs are immutable and reference-counted. Symbol tables reference any defs - * that are the "current" definitions. If an extension is loaded that adds a - * field to an existing message, a new msgdef is constructed that includes the - * new field and the old msgdef is unref'd. The old msgdef will still be ref'd - * by messages (if any) that were constructed with that msgdef. - * - * This file contains routines for creating and manipulating the definitions - * themselves. To create and manipulate actual messages, see upb_msg.h. - */ - -#ifndef UPB_DEF_H_ -#define UPB_DEF_H_ - -#include "upb_atomic.h" -#include "upb_table.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_def: base class for defs **********************************************/ - -// All the different kind of defs we support. These correspond 1:1 with -// declarations in a .proto file. -enum upb_def_type { - UPB_DEF_MSG = 0, - UPB_DEF_ENUM, - UPB_DEF_SVC, - UPB_DEF_EXT, - // Internal-only, placeholder for a def that hasn't be resolved yet. - UPB_DEF_UNRESOLVED, - - // For specifying that defs of any type are requsted from getdefs. - UPB_DEF_ANY = -1 -}; - -// This typedef is more space-efficient than declaring an enum var directly. -typedef int8_t upb_def_type_t; - -typedef struct { - upb_strptr fqname; // Fully qualified. - upb_atomic_refcount_t refcount; - upb_def_type_t type; - - // The is_cyclic flag could go in upb_msgdef instead of here, because only - // messages can be involved in cycles. However, putting them here is free - // from a space perspective because structure alignment will otherwise leave - // three bytes empty after type. It is also makes ref and unref more - // efficient, because we don't have to downcast to msgdef before checking the - // is_cyclic flag. - bool is_cyclic; - uint16_t search_depth; // Used during initialization dfs. -} upb_def; - -// These must not be called directly! -void _upb_def_cyclic_ref(upb_def *def); -void _upb_def_reftozero(upb_def *def); - -// Call to ref/deref a def. -INLINE void upb_def_ref(upb_def *def) { - if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def); -} -INLINE void upb_def_unref(upb_def *def) { - if(upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def); -} - -/* upb_fielddef ***************************************************************/ - -// A upb_fielddef describes a single field in a message. It isn't a full def -// in the sense that it derives from upb_def. It cannot stand on its own; it -// is either a field of a upb_msgdef or contained inside a upb_extensiondef. -// It is also reference-counted. -typedef struct _upb_fielddef { - upb_atomic_refcount_t refcount; - upb_field_type_t type; - upb_label_t label; - upb_field_number_t number; - upb_strptr name; - upb_value default_value; - - // These are set only when this fielddef is part of a msgdef. - uint32_t byte_offset; // Where in a upb_msg to find the data. - upb_field_count_t field_index; // Indicates set bit. - - // For the case of an enum or a submessage, points to the def for that type. - // We own a ref on this def. - bool owned; - upb_def *def; -} upb_fielddef; - -// A variety of tests about the type of a field. -INLINE bool upb_issubmsg(upb_fielddef *f) { - return upb_issubmsgtype(f->type); -} -INLINE bool upb_isstring(upb_fielddef *f) { - return upb_isstringtype(f->type); -} -INLINE bool upb_isarray(upb_fielddef *f) { - return f->label == UPB_LABEL(REPEATED); -} -// Does the type of this field imply that it should contain an associated def? -INLINE bool upb_hasdef(upb_fielddef *f) { - return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); -} - -INLINE bool upb_field_ismm(upb_fielddef *f) { - return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f); -} - -INLINE bool upb_elem_ismm(upb_fielddef *f) { - return upb_isstring(f) || upb_issubmsg(f); -} - -// Internal-only interface for the upb compiler. -// Sorts the given fielddefs in-place, according to what we think is an optimal -// ordering of fields. This can change from upb release to upb release. -struct google_protobuf_FieldDescriptorProto; -void upb_fielddef_sortfds(struct google_protobuf_FieldDescriptorProto **fds, - size_t num); - -/* upb_msgdef *****************************************************************/ - -struct google_protobuf_EnumDescriptorProto; -struct google_protobuf_DescriptorProto; - -// Structure that describes a single .proto message type. -typedef struct _upb_msgdef { - upb_def base; - upb_atomic_refcount_t cycle_refcount; - upb_msg *default_msg; // Message with all default values set. - size_t size; - upb_field_count_t num_fields; - uint32_t set_flags_bytes; - uint32_t num_required_fields; // Required fields have the lowest set bytemasks. - upb_fielddef *fields; // We have exclusive ownership of these. - - // Tables for looking up fields by number and name. - upb_inttable itof; // int to field - upb_strtable ntof; // name to field -} upb_msgdef; - -// Hash table entries for looking up fields by name or number. -typedef struct { - upb_inttable_entry e; - upb_fielddef *f; -} upb_itof_ent; -typedef struct { - upb_strtable_entry e; - upb_fielddef *f; -} upb_ntof_ent; - -// Looks up a field by name or number. While these are written to be as fast -// as possible, it will still be faster to cache the results of this lookup if -// possible. These return NULL if no such field is found. -INLINE upb_fielddef *upb_msg_itof(upb_msgdef *m, uint32_t num) { - upb_itof_ent *e = - (upb_itof_ent*)upb_inttable_fastlookup(&m->itof, num, sizeof(*e)); - return e ? e->f : NULL; -} - -INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_strptr name) { - upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); - return e ? e->f : NULL; -} - -/* upb_enumdef ****************************************************************/ - -typedef struct _upb_enumdef { - upb_def base; - upb_strtable ntoi; - upb_inttable iton; -} upb_enumdef; - -typedef int32_t upb_enumval_t; - -// Lookups from name to integer and vice-versa. -bool upb_enumdef_ntoi(upb_enumdef *e, upb_strptr name, upb_enumval_t *num); -upb_strptr upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num); - -// Iteration over name/value pairs. The order is undefined. -// upb_enum_iter i; -// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { -// // ... -// } -typedef struct { - upb_enumdef *e; - void *state; // Internal iteration state. - upb_strptr name; - upb_enumval_t val; -} upb_enum_iter; -void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e); -void upb_enum_next(upb_enum_iter *iter); -bool upb_enum_done(upb_enum_iter *iter); - -/* upb_symtab *****************************************************************/ - -// A SymbolTable is where upb_defs live. It is empty when first constructed. -// Clients add definitions to the symtab by supplying unserialized or -// serialized descriptors (as defined in descriptor.proto). -typedef struct { - upb_atomic_refcount_t refcount; - upb_rwlock_t lock; // Protects all members except the refcount. - upb_msgdef *fds_msgdef; // In psymtab, ptr here for convenience. - - // Our symbol tables; we own refs to the defs therein. - upb_strtable symtab; // The main symbol table. - upb_strtable psymtab; // Private symbols, for internal use. -} upb_symtab; - -// Initializes a upb_symtab. Contexts are not freed explicitly, but unref'd -// when the caller is done with them. -upb_symtab *upb_symtab_new(void); -void _upb_symtab_free(upb_symtab *s); // Must not be called directly! - -INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); } -INLINE void upb_symtab_unref(upb_symtab *s) { - if(upb_atomic_unref(&s->refcount)) _upb_symtab_free(s); -} - -// Resolves the given symbol using the rules described in descriptor.proto, -// namely: -// -// If the name starts with a '.', it is fully-qualified. Otherwise, C++-like -// scoping rules are used to find the type (i.e. first the nested types -// within this message are searched, then within the parent, on up to the -// root namespace). -// -// If a def is found, the caller owns one ref on the returned def. Otherwise -// returns NULL. -upb_def *upb_symtab_resolve(upb_symtab *s, upb_strptr base, upb_strptr symbol); - -// Find an entry in the symbol table with this exact name. If a def is found, -// the caller owns one ref on the returned def. Otherwise returns NULL. -upb_def *upb_symtab_lookup(upb_symtab *s, upb_strptr sym); - -// Gets an array of pointers to all currently active defs in this symtab. The -// caller owns the returned array (which is of length *count) as well as a ref -// to each symbol inside. If type is UPB_DEF_ANY then defs of all types are -// returned, otherwise only defs of the required type are returned. -upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type); - -// Adds the definitions in the given serialized descriptor to this symtab. All -// types that are referenced from desc must have previously been defined (or be -// defined in desc). desc may not attempt to define any names that are already -// defined in this symtab. Caller retains ownership of desc. status indicates -// whether the operation was successful or not, and the error message (if any). -void upb_symtab_add_desc(upb_symtab *s, upb_strptr desc, upb_status *status); - - -/* upb_def casts **************************************************************/ - -// Dynamic casts, for determining if a def is of a particular type at runtime. -#define UPB_DYNAMIC_CAST_DEF(lower, upper) \ - struct _upb_ ## lower; /* Forward-declare. */ \ - INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \ - if(def->type != UPB_DEF_ ## upper) return NULL; \ - return (struct _upb_ ## lower*)def; \ - } -UPB_DYNAMIC_CAST_DEF(msgdef, MSG); -UPB_DYNAMIC_CAST_DEF(enumdef, ENUM); -UPB_DYNAMIC_CAST_DEF(svcdef, SVC); -UPB_DYNAMIC_CAST_DEF(extdef, EXT); -UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED); -#undef UPB_DYNAMIC_CAST_DEF - -// Downcasts, for when some wants to assert that a def is of a particular type. -// These are only checked if we are building debug. -#define UPB_DOWNCAST_DEF(lower, upper) \ - struct _upb_ ## lower; /* Forward-declare. */ \ - INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \ - assert(def->type == UPB_DEF_ ## upper); \ - return (struct _upb_ ## lower*)def; \ - } -UPB_DOWNCAST_DEF(msgdef, MSG); -UPB_DOWNCAST_DEF(enumdef, ENUM); -UPB_DOWNCAST_DEF(svcdef, SVC); -UPB_DOWNCAST_DEF(extdef, EXT); -UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED); -#undef UPB_DOWNCAST_DEF - -#define UPB_UPCAST(ptr) (&(ptr)->base) - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_DEF_H_ */ diff --git a/src/upb_encoder.c b/src/upb_encoder.c deleted file mode 100644 index f1156a8..0000000 --- a/src/upb_encoder.c +++ /dev/null @@ -1,423 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_encoder.h" - -#include <stdlib.h> -#include "descriptor.h" - -/* Functions for calculating sizes of wire values. ****************************/ - -static size_t upb_v_uint64_t_size(uint64_t val) { -#ifdef __GNUC__ - int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0. -#else - int high_bit = 0; - uint64_t tmp = val; - while(tmp >>= 1) high_bit++; -#endif - return val == 0 ? 1 : high_bit / 7 + 1; -} - -static size_t upb_v_int32_t_size(int32_t val) { - // v_uint32's are sign-extended to maintain wire compatibility with int64s. - return upb_v_uint64_t_size((int64_t)val); -} -static size_t upb_v_uint32_t_size(uint32_t val) { - return upb_v_uint64_t_size(val); -} -static size_t upb_f_uint64_t_size(uint64_t val) { - (void)val; // Length is independent of value. - return sizeof(uint64_t); -} -static size_t upb_f_uint32_t_size(uint32_t val) { - (void)val; // Length is independent of value. - return sizeof(uint32_t); -} - -// The biggest possible single value is a 10-byte varint. -#define UPB_MAX_ENCODED_SIZE 10 - - -/* Functions to write wire values. ********************************************/ - -// Since we know in advance the longest that the value could be, we always make -// sure that our buffer is long enough. This saves us from having to perform -// bounds checks. - -// Puts a varint (wire type: UPB_WIRE_TYPE_VARINT). -static uint8_t *upb_put_v_uint64_t(uint8_t *buf, uint64_t val) -{ - do { - uint8_t byte = val & 0x7f; - val >>= 7; - if(val) byte |= 0x80; - *buf++ = byte; - } while(val); - return buf; -} - -// Puts an unsigned 32-bit varint, verbatim. Never uses the high 64 bits. -static uint8_t *upb_put_v_uint32_t(uint8_t *buf, uint32_t val) -{ - return upb_put_v_uint64_t(buf, val); -} - -// Puts a signed 32-bit varint, first sign-extending to 64-bits. We do this to -// maintain wire-compatibility with 64-bit signed integers. -static uint8_t *upb_put_v_int32_t(uint8_t *buf, int32_t val) -{ - return upb_put_v_uint64_t(buf, (int64_t)val); -} - -static void upb_put32(uint8_t *buf, uint32_t val) { - buf[0] = val & 0xff; - buf[1] = (val >> 8) & 0xff; - buf[2] = (val >> 16) & 0xff; - buf[3] = (val >> 24); -} - -// Puts a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). -static uint8_t *upb_put_f_uint32_t(uint8_t *buf, uint32_t val) -{ - uint8_t *uint32_end = buf + sizeof(uint32_t); -#if UPB_UNALIGNED_READS_OK - *(uint32_t*)buf = val; -#else - upb_put32(buf, val); -#endif - return uint32_end; -} - -// Puts a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). -static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val) -{ - uint8_t *uint64_end = buf + sizeof(uint64_t); -#if UPB_UNALIGNED_READS_OK - *(uint64_t*)buf = val; -#else - upb_put32(buf, (uint32_t)val); - upb_put32(buf, (uint32_t)(val >> 32)); -#endif - return uint64_end; -} - -/* Functions to write and calculate sizes for .proto values. ******************/ - -// Performs zig-zag encoding, which is used by sint32 and sint64. -static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } -static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } - -/* Use macros to define a set of two functions for each .proto type: - * - * // Converts and writes a .proto value into buf. "end" indicates the end - * // of the current available buffer (if the buffer does not contain enough - * // space UPB_STATUS_NEED_MORE_DATA is returned). On success, *outbuf will - * // point one past the data that was written. - * uint8_t *upb_put_INT32(uint8_t *buf, int32_t val); - * - * // Returns the number of bytes required to encode val. - * size_t upb_get_INT32_size(int32_t val); - * - * // Given a .proto value s (source) convert it to a wire value. - * uint32_t upb_vtowv_INT32(int32_t s); - */ - -#define VTOWV(type, wire_t, val_t) \ - static wire_t upb_vtowv_ ## type(val_t s) - -#define PUT(type, v_or_f, wire_t, val_t, member_name) \ - static uint8_t *upb_put_ ## type(uint8_t *buf, val_t val) { \ - wire_t tmp = upb_vtowv_ ## type(val); \ - return upb_put_ ## v_or_f ## _ ## wire_t(buf, tmp); \ - } - -#define T(type, v_or_f, wire_t, val_t, member_name) \ - static size_t upb_get_ ## type ## _size(val_t val) { \ - return upb_ ## v_or_f ## _ ## wire_t ## _size(val); \ - } \ - VTOWV(type, wire_t, val_t); /* prototype for PUT below */ \ - PUT(type, v_or_f, wire_t, val_t, member_name) \ - VTOWV(type, wire_t, val_t) - -T(INT32, v, int32_t, int32_t, int32) { return (uint32_t)s; } -T(INT64, v, uint64_t, int64_t, int64) { return (uint64_t)s; } -T(UINT32, v, uint32_t, uint32_t, uint32) { return s; } -T(UINT64, v, uint64_t, uint64_t, uint64) { return s; } -T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzenc_32(s); } -T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzenc_64(s); } -T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; } -T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; } -T(SFIXED32, f, uint32_t, int32_t, int32) { return (uint32_t)s; } -T(SFIXED64, f, uint64_t, int64_t, int64) { return (uint64_t)s; } -T(BOOL, v, uint32_t, bool, _bool) { return (uint32_t)s; } -T(ENUM, v, uint32_t, int32_t, int32) { return (uint32_t)s; } -T(DOUBLE, f, uint64_t, double, _double) { - upb_value v; - v._double = s; - return v.uint64; -} -T(FLOAT, f, uint32_t, float, _float) { - upb_value v; - v._float = s; - return v.uint32; -} -#undef VTOWV -#undef PUT -#undef T - -static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v) -{ -#define CASE(t, member_name) \ - case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name); - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - default: assert(false); return buf; - } -#undef CASE -} - -static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v) -{ -#define CASE(t, member_name) \ - case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name); - switch(ft) { - CASE(DOUBLE, _double) - CASE(FLOAT, _float) - CASE(INT32, int32) - CASE(INT64, int64) - CASE(UINT32, uint32) - CASE(UINT64, uint64) - CASE(SINT32, int32) - CASE(SINT64, int64) - CASE(FIXED32, uint32) - CASE(FIXED64, uint64) - CASE(SFIXED32, int32) - CASE(SFIXED64, int64) - CASE(BOOL, _bool) - CASE(ENUM, int32) - default: assert(false); return 0; - } -#undef CASE -} - -static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num, - upb_wire_type_t wt) -{ - return upb_put_UINT32(buf, wt | (num << 3)); -} - -static uint32_t _upb_get_tag_size(upb_field_number_t num) -{ - return upb_get_UINT32_size(num << 3); -} - - -/* upb_sizebuilder ************************************************************/ - -struct upb_sizebuilder { - // Accumulating size for the current level. - uint32_t size; - - // Stack of sizes for our current nesting. - uint32_t stack[UPB_MAX_NESTING], *top; - - // Vector of sizes. - uint32_t *sizes; - int sizes_len; - int sizes_size; - - upb_status status; -}; - -// upb_sink callbacks. -static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f, - upb_value val, - upb_status *status) -{ - (void)status; - upb_sizebuilder *sb = (upb_sizebuilder*)sink; - uint32_t size = 0; - size += _upb_get_tag_size(f->number); - size += _upb_get_value_size(f->type, val); - sb->size += size; - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f, - upb_strptr str, - int32_t start, uint32_t end, - upb_status *status) -{ - (void)status; - (void)str; // String data itself is not used. - upb_sizebuilder *sb = (upb_sizebuilder*)sink; - if(start >= 0) { - uint32_t size = 0; - size += _upb_get_tag_size(f->number); - size += upb_get_UINT32_size(end - start); - sb->size += size; - } - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f, - upb_status *status) -{ - (void)status; - (void)f; // Unused (we calculate tag size and delimiter in endcb). - upb_sizebuilder *sb = (upb_sizebuilder*)sink; - if(f->type == UPB_TYPE(MESSAGE)) { - *sb->top = sb->size; - sb->top++; - sb->size = 0; - } else { - assert(f->type == UPB_TYPE(GROUP)); - sb->size += _upb_get_tag_size(f->number); - } - return UPB_SINK_CONTINUE; -} - -static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f, - upb_status *status) -{ - (void)status; - upb_sizebuilder *sb = (upb_sizebuilder*)sink; - if(f->type == UPB_TYPE(MESSAGE)) { - sb->top--; - if(sb->sizes_len == sb->sizes_size) { - sb->sizes_size *= 2; - sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes)); - } - uint32_t child_size = sb->size; - uint32_t parent_size = *sb->top; - sb->sizes[sb->sizes_len++] = child_size; - // The size according to the parent includes the tag size and delimiter of - // the submessage. - parent_size += upb_get_UINT32_size(child_size); - parent_size += _upb_get_tag_size(f->number); - // Include size accumulated in parent before child began. - sb->size = child_size + parent_size; - } else { - assert(f->type == UPB_TYPE(GROUP)); - // As an optimization, we could just add this number twice in startcb, to - // avoid having to recalculate it. - sb->size += _upb_get_tag_size(f->number); - } - return UPB_SINK_CONTINUE; -} - -upb_sink_callbacks _upb_sizebuilder_sink_vtbl = { - _upb_sizebuilder_valuecb, - _upb_sizebuilder_strcb, - _upb_sizebuilder_startcb, - _upb_sizebuilder_endcb -}; - - -/* upb_sink callbacks *********************************************************/ - -struct upb_encoder { - upb_sink base; - //upb_bytesink *bytesink; - uint32_t *sizes; - int size_offset; -}; - - -// Within one callback we may need to encode up to two separate values. -#define UPB_ENCODER_BUFSIZE (UPB_MAX_ENCODED_SIZE * 2) - -static upb_sink_status _upb_encoder_push_buf(upb_encoder *s, const uint8_t *buf, - size_t len, upb_status *status) -{ - // TODO: conjure a upb_strptr that points to buf. - //upb_strptr ptr; - (void)s; - (void)buf; - (void)status; - size_t written = 5;// = upb_bytesink_onbytes(s->bytesink, ptr); - if(written < len) { - // TODO: mark to skip "written" bytes next time. - return UPB_SINK_STOP; - } else { - return UPB_SINK_CONTINUE; - } -} - -static upb_sink_status _upb_encoder_valuecb(upb_sink *sink, upb_fielddef *f, - upb_value val, upb_status *status) -{ - upb_encoder *s = (upb_encoder*)sink; - uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; - upb_wire_type_t wt = upb_types[f->type].expected_wire_type; - // TODO: handle packed encoding. - ptr = _upb_put_tag(ptr, f->number, wt); - ptr = upb_encode_value(ptr, f->type, val); - return _upb_encoder_push_buf(s, buf, ptr - buf, status); -} - -static upb_sink_status _upb_encoder_strcb(upb_sink *sink, upb_fielddef *f, - upb_strptr str, - int32_t start, uint32_t end, - upb_status *status) -{ - upb_encoder *s = (upb_encoder*)sink; - uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; - if(start >= 0) { - ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED); - ptr = upb_put_UINT32(ptr, end - start); - } - // TODO: properly handle partially consumed strings and partially supplied - // strings. - _upb_encoder_push_buf(s, buf, ptr - buf, status); - return _upb_encoder_push_buf(s, (uint8_t*)upb_string_getrobuf(str), end - start, status); -} - -static upb_sink_status _upb_encoder_startcb(upb_sink *sink, upb_fielddef *f, - upb_status *status) -{ - upb_encoder *s = (upb_encoder*)sink; - uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; - if(f->type == UPB_TYPE(GROUP)) { - ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_START_GROUP); - } else { - ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED); - ptr = upb_put_UINT32(ptr, s->sizes[--s->size_offset]); - } - return _upb_encoder_push_buf(s, buf, ptr - buf, status); -} - -static upb_sink_status _upb_encoder_endcb(upb_sink *sink, upb_fielddef *f, - upb_status *status) -{ - upb_encoder *s = (upb_encoder*)sink; - uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf; - if(f->type != UPB_TYPE(GROUP)) return UPB_SINK_CONTINUE; - ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_END_GROUP); - return _upb_encoder_push_buf(s, buf, ptr - buf, status); -} - -upb_sink_callbacks _upb_encoder_sink_vtbl = { - _upb_encoder_valuecb, - _upb_encoder_strcb, - _upb_encoder_startcb, - _upb_encoder_endcb -}; - diff --git a/src/upb_encoder.h b/src/upb_encoder.h deleted file mode 100644 index b4d0c98..0000000 --- a/src/upb_encoder.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Implements a upb_sink that writes protobuf data to the binary wire format. - * - * For messages that have any submessages, the encoder needs a buffer - * containing the submessage sizes, so they can be properly written at the - * front of each message. Note that groups do *not* have this requirement. - * - * Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_ENCODER_H_ -#define UPB_ENCODER_H_ - -#include "upb.h" -#include "upb_sink.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_sizebuilder ************************************************************/ - -// A upb_sizebuilder performs a pre-pass on data to be serialized that gathers -// the sizes of submessages. This size data is required for serialization, -// because we have to know at the beginning of a submessage how many encoded -// bytes the submessage will represent. -struct upb_sizebuilder; -typedef struct upb_sizebuilder upb_sizebuilder; - -upb_sizebuilder *upb_sizebuilder_new(upb_msgdef *md); -void upb_sizebuilder_free(upb_sizebuilder *sb); - -void upb_sizebuilder_reset(upb_sizebuilder *sb); - -// Returns a sink that must be used to perform the pre-pass. Note that the -// pre-pass *must* occur in the opposite order from the actual encode that -// follows, and the data *must* be identical both times (except for the -// reversed order. -upb_sink *upb_sizebuilder_sink(upb_sizebuilder *sb); - - -/* upb_encoder ****************************************************************/ - -// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol -// buffer binary wire format. -struct upb_encoder; -typedef struct upb_encoder upb_encoder; - -upb_encoder *upb_encoder_new(upb_msgdef *md); -void upb_encoder_free(upb_encoder *e); - -// Resets the given upb_encoder such that is is ready to begin encoding. The -// upb_sizebuilder "sb" is used to determine submessage sizes; it must have -// previously been initialized by feeding it the same data in reverse order. -// "sb" may be null if and only if the data contains no submessages; groups -// are ok and do not require sizes to be precalculated. The upb_bytesink -// "out" is where the encoded output data will be sent. -// -// Both "sb" and "out" must live until the encoder is either reset or freed. -void upb_encoder_reset(upb_encoder *e, upb_sizebuilder *sb, upb_bytesink *out); - -// The upb_sink to which data can be sent to be encoded. Note that this data -// must be identical to the data that was previously given to the sizebuilder -// (if any). -upb_sink *upb_encoder_sink(upb_encoder *e); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_ENCODER_H_ */ diff --git a/src/upb_inlinedefs.c b/src/upb_inlinedefs.c deleted file mode 100644 index 5db04f6..0000000 --- a/src/upb_inlinedefs.c +++ /dev/null @@ -1,20 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * This file, if compiled, will contain standalone (non-inlined) versions of - * all inline functions defined in header files. We don't generally use this - * file since we use "static inline" for inline functions (which will put a - * standalone version of the function in any .o file that needs it, but - * compiling this file and dumping the object file will let us inspect how - * inline functions are compiled, so we keep it around. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#define INLINE -#include "upb.h" -#include "upb_data.h" -#include "upb_def.h" -#include "upb_parse.h" -#include "upb_table.h" -#include "upb_text.h" diff --git a/src/upb_sink.h b/src/upb_sink.h deleted file mode 100644 index 5dc5b52..0000000 --- a/src/upb_sink.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - * - * upb_sink is a general purpose interface for pushing the contents of a - * protobuf from one component to another in a streaming fashion. We call the - * component that calls a upb_sink a "source". By "pushing" we mean that the - * source calls into the sink; the opposite (where a sink calls into the - * source) is known as "pull". In the push model the source gets the main - * loop; in a pull model the sink does. - * - * This interface is used as general-purpose glue in upb. For example, the - * parser interface works by implementing a source. Likewise the serialization - * simply implements a sink. Copying one protobuf to another is just a matter - * of using one message as a source and another as a sink. - * - * In terms of efficiency, we would generally expect "push" to be faster if the - * source had more state to track, and "pull" to be faster if the sink had more - * state. The reason is that whoever has the main loop can keep state on the - * stack (and possibly even in callee-save registers), whereas the the - * component that is "called into" always needs to reload its state from - * memory. - * - * In terms of programming complexity, it is easier and simpler to have the - * main loop, because you can store state in local variables. - * - * So the assumption inherent in using the push model is that sources are - * generally more complicated and stateful than consumers. For example, in the - * parser case, it has to deal with malformed input and associated errors; in - * comparison, the serializer deals with known-good input. - */ - -#ifndef UPB_SINK_H -#define UPB_SINK_H - -#include "upb_def.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Each of the upb_sink callbacks returns a status of this type. -typedef enum { - // The normal case, where the consumer wants to continue consuming. - UPB_SINK_CONTINUE, - - // The sink did not consume this value, and wants to halt further processing. - // If the source is resumable, it should save the current state so that when - // resumed, the value that was just provided will be replayed. - UPB_SINK_STOP, - - // The consumer wants to skip to the end of the current submessage and - // continue consuming. If we are at the top-level, the rest of the - // data is discarded. - UPB_SINK_SKIP -} upb_sink_status; - - -typedef struct { - struct upb_sink_callbacks *vtbl; -} upb_sink; - -/* upb_sink callbacks *********************************************************/ - -// The value callback is called for a regular value (ie. not a string or -// submessage). -typedef upb_sink_status (*upb_value_cb)(upb_sink *s, upb_fielddef *f, - upb_value val, upb_status *status); - -// The string callback is called for string data. "str" is the string in which -// the data lives, but it may contain more data than the effective string. -// "start" and "end" indicate the substring of "str" that is the effective -// string. If "start" is <0, this string is a continuation of the previous -// string for this field. If end > upb_strlen(str) then there is more data to -// follow for this string. "end" can also be used as a hint for how much data -// follows, but this is only a hint and is not guaranteed. -// -// The data is supplied this way to give you the opportunity to reference this -// data instead of copying it (perhaps using upb_strslice), or to minimize -// copying if it is unavoidable. -typedef upb_sink_status (*upb_str_cb)(upb_sink *s, upb_fielddef *f, - upb_strptr str, - int32_t start, uint32_t end, - upb_status *status); - -// The start and end callbacks are called when a submessage begins and ends, -// respectively. The caller is responsible for ensuring that the nesting -// level never exceeds UPB_MAX_NESTING. -typedef upb_sink_status (*upb_start_cb)(upb_sink *s, upb_fielddef *f, - upb_status *status); -typedef upb_sink_status (*upb_end_cb)(upb_sink *s, upb_fielddef *f, - upb_status *status); - - -/* upb_sink implementation ****************************************************/ - -typedef struct upb_sink_callbacks { - upb_value_cb value_cb; - upb_str_cb str_cb; - upb_start_cb start_cb; - upb_end_cb end_cb; -} upb_sink_callbacks; - -// These macros implement a mini virtual function dispatch for upb_sink instances. -// This allows functions that call upb_sinks to just write: -// -// upb_sink_onvalue(sink, field, val); -// -// The macro will handle the virtual function lookup and dispatch. We could -// potentially define these later to also be capable of calling a C++ virtual -// method instead of doing the virtual dispatch manually. This would make it -// possible to write C++ sinks in a more natural style without loss of -// efficiency. We could have a flag in upb_sink defining whether it is a C -// sink or a C++ one. -#define upb_sink_onvalue(s, f, val, status) s->vtbl->value_cb(s, f, val, status) -#define upb_sink_onstr(s, f, str, start, end, status) s->vtbl->str_cb(s, f, str, start, end, status) -#define upb_sink_onstart(s, f, status) s->vtbl->start_cb(s, f, status) -#define upb_sink_onend(s, f, status) s->vtbl->end_cb(s, f, status) - -// Initializes a plain C visitor with the given vtbl. The sink must have been -// allocated separately. -INLINE void upb_sink_init(upb_sink *s, upb_sink_callbacks *vtbl) { - s->vtbl = vtbl; -} - - -/* upb_bytesink ***************************************************************/ - -// A upb_bytesink is like a upb_sync, but for bytes instead of structured -// protobuf data. Parsers implement upb_bytesink and push to a upb_sink, -// serializers do the opposite (implement upb_sink and push to upb_bytesink). -// -// The two simplest kinds of sinks are "write to string" and "write to FILE*". - -// A forward declaration solely for the benefit of declaring upb_byte_cb below. -// Always prefer upb_bytesink (without the "struct" keyword) instead. -struct _upb_bytesink; - -// The single bytesink callback; it takes the bytes to be written and returns -// how many were successfully written. If the return value is <0, the caller -// should stop processing. -typedef int32_t (*upb_byte_cb)(struct _upb_bytesink *s, upb_strptr str, - uint32_t start, uint32_t end, - upb_status *status); - -typedef struct _upb_bytesink { - upb_byte_cb *cb; -} upb_bytesink; - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_string.h b/src/upb_string.h deleted file mode 100644 index 0516377..0000000 --- a/src/upb_string.h +++ /dev/null @@ -1,165 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. - * - * This file defines a simple string type. The overriding goal of upb_string - * is to avoid memcpy(), malloc(), and free() wheverever possible, while - * keeping both CPU and memory overhead low. Throughout upb there are - * situations where one wants to reference all or part of another string - * without copying. upb_string provides APIs for doing this. - * - * Characteristics of upb_string: - * - strings are reference-counted. - * - strings are logically immutable. - * - if a string has no other referents, it can be "recycled" into a new string - * without having to reallocate the upb_string. - * - strings can be substrings of other strings (owning a ref on the source - * string). - * - strings can refer to memory that they do not own, in which case we avoid - * copies if possible (the exact strategy for doing this can vary). - * - strings are not thread-safe by default, but can be made so by calling a - * function. This is not the default because it causes extra CPU overhead. - */ - -#ifndef UPB_STRING_H -#define UPB_STRING_H - -#include <assert.h> -#include <string.h> -#include "upb_atomic.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// All members of this struct are private, and may only be read/written through -// the associated functions. Also, strings may *only* be allocated on the heap. -typedef struct _upb_string { - char *ptr; - uint32_t len; - uint32_t size; - upb_atomic_refcount_t refcount; - union { - // Used if this is a slice of another string. - struct _upb_string *src; - // Used if this string is referencing external unowned memory. - upb_stomic_refcount_t reader_count; - } extra; -} upb_string; - -// Returns a newly-created, empty, non-finalized string. When the string is no -// longer needed, it should be unref'd, never freed directly. -upb_string *upb_string_new(); - -// Releases a ref on the given string, which may free the memory. -void upb_string_unref(upb_string *str); - -// Returns a string with the same contents as "str". The caller owns a ref on -// the returned string, which may or may not be the same object as "str. -upb_string *upb_string_getref(upb_string *str); - -// Returns the length of the string. -INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; } - -// Use to read the bytes of the string. The caller *must* call -// upb_string_endread() after the data has been read. The window between -// upb_string_getrobuf() and upb_string_endread() should be kept as short -// as possible. No other functions may be called on the string during this -// window except upb_string_len(). -INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; } -INLINE void upb_string_endread(upb_string *str); - -// Attempts to recycle the string "str" so it may be reused and have different -// data written to it. The returned string is either "str" if it could be -// recycled or a newly created string if "str" has other references. -upb_string *upb_string_tryrecycle(upb_string *str); - -// The three options for setting the contents of a string. These may only be -// called when a string is first created or recycled; once other functions have -// been called on the string, these functions are not allowed until the string -// is recycled. - -// Gets a pointer suitable for writing to the string, which is guaranteed to -// have at least "len" bytes of data available. The size of the string will -// become "len". -char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len); - -// Sets the contents of "str" to be the given substring of "target_str", to -// which the caller must own a ref. -void upb_string_substr(upb_string *str, upb_string *target_str, - upb_strlen_t start, upb_strlen_t len); - -// Makes the string "str" a reference to the given string data. The caller -// guarantees that the given string data will not change or be deleted until -// a matching call to upb_string_detach(). -void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len); -void upb_string_detach(upb_string *str); - -/* upb_string library functions ***********************************************/ - -// Named like their <string.h> counterparts, these are all safe against buffer -// overflow. These only use the public upb_string interface. - -// More efficient than upb_strcmp if all you need is to test equality. -INLINE bool upb_streql(upb_string *s1, upb_string *s2) { - upb_strlen_t len = upb_string_len(s1); - if(len != upb_string_len(s2)) { - return false; - } else { - bool ret = - memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0; - upb_string_endread(s1); - upb_string_endread(s2); - return ret; - } -} - -// Like strcmp(). -int upb_strcmp(upb_string *s1, upb_string *s2); - -// Like upb_strcpy, but copies from a buffer and length. -INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) { - memcpy(upb_string_getrwbuf(dest, len), src, len); -} - -// Replaces the contents of "dest" with the contents of "src". -INLINE void upb_strcpy(upb_string *dest, upb_string *src) { - upb_strcpylen(dest, upb_string_getrobuf(src), upb_strlen(src)); - upb_string_endread(src); -} - -// Like upb_strcpy, but copies from a NULL-terminated string. -INLINE void upb_strcpyc(upb_string *dest, const char *src) { - // This does two passes over src, but that is necessary unless we want to - // repeatedly re-allocate dst, which seems worse. - upb_strcpylen(dest, src, strlen(src)); -} - -// Returns a new string whose contents are a copy of s. -upb_string *upb_strdup(upb_string *s); - -// Like upb_strdup(), but duplicates a given buffer and length. -INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) { - upb_string *s = upb_string_new(); - upb_strcpylen(s, src, len); - return s; -} - -// Like upb_strdup(), but duplicates a C NULL-terminated string. -upb_string *upb_strdupc(const char *src); - -// Appends 'append' to 's' in-place, resizing s if necessary. -void upb_strcat(upb_string *s, upb_string *append); - -// Returns a new string that is a substring of the given string. -upb_string *upb_strslice(upb_string *s, int offset, int len); - -// Reads an entire file into a newly-allocated string. -upb_string *upb_strreadfile(const char *filename); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_table.c b/src/upb_table.c deleted file mode 100644 index a477121..0000000 --- a/src/upb_table.c +++ /dev/null @@ -1,411 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include "upb_table.h" -#include "upb_data.h" - -#include <assert.h> -#include <stdlib.h> -#include <string.h> - -static const upb_inttable_key_t EMPTYENT = 0; -static const double MAX_LOAD = 0.85; - -static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); - -/* We use 1-based indexes into the table so that 0 can be "NULL". */ -static upb_inttable_entry *intent(upb_inttable *t, int32_t i) { - return UPB_INDEX(t->t.entries, i-1, t->t.entry_size); -} -static upb_strtable_entry *strent(upb_strtable *t, int32_t i) { - return UPB_INDEX(t->t.entries, i-1, t->t.entry_size); -} - -void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) -{ - t->count = 0; - t->entry_size = entry_size; - t->size_lg2 = 1; - while(size >>= 1) t->size_lg2++; - size_t bytes = upb_table_size(t) * t->entry_size; - t->mask = upb_table_size(t) - 1; - t->entries = malloc(bytes); - memset(t->entries, 0, bytes); /* Both tables consider 0's an empty entry. */ -} - -void upb_inttable_init(upb_inttable *t, uint32_t size, uint16_t entsize) -{ - upb_table_init(&t->t, size, entsize); -} - -void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t entsize) -{ - upb_table_init(&t->t, size, entsize); -} - -void upb_table_free(upb_table *t) { free(t->entries); } -void upb_inttable_free(upb_inttable *t) { upb_table_free(&t->t); } -void upb_strtable_free(upb_strtable *t) { - // Free refs from the strtable. - upb_strtable_entry *e = upb_strtable_begin(t); - for(; e; e = upb_strtable_next(t, e)) { - upb_string_unref(e->key); - } - upb_table_free(&t->t); -} - -static uint32_t strtable_bucket(upb_strtable *t, upb_strptr key) -{ - uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_strlen(key), 0); - return (hash & (upb_strtable_size(t)-1)) + 1; -} - -void *upb_strtable_lookup(upb_strtable *t, upb_strptr key) -{ - uint32_t bucket = strtable_bucket(t, key); - upb_strtable_entry *e; - do { - e = strent(t, bucket); - if(!upb_string_isnull(e->key) && upb_streql(e->key, key)) return e; - } while((bucket = e->next) != UPB_END_OF_CHAIN); - return NULL; -} - -static uint32_t empty_intbucket(upb_inttable *table) -{ - /* TODO: does it matter that this is biased towards the front of the table? */ - for(uint32_t i = 1; i <= upb_inttable_size(table); i++) { - upb_inttable_entry *e = intent(table, i); - if(e->key == EMPTYENT) return i; - } - assert(false); - return 0; -} - -/* The insert routines have a lot more code duplication between int/string - * variants than I would like, but there's just a bit too much that varies to - * parameterize them. */ -static void intinsert(upb_inttable *t, upb_inttable_entry *e) -{ - assert(upb_inttable_lookup(t, e->key) == NULL); - t->t.count++; - uint32_t bucket = upb_inttable_bucket(t, e->key); - upb_inttable_entry *table_e = intent(t, bucket); - if(table_e->key != EMPTYENT) { /* Collision. */ - if(bucket == upb_inttable_bucket(t, table_e->key)) { - /* Existing element is in its main posisiton. Find an empty slot to - * place our new element and append it to this key's chain. */ - uint32_t empty_bucket = empty_intbucket(t); - while (table_e->next != UPB_END_OF_CHAIN) - table_e = intent(t, table_e->next); - table_e->next = empty_bucket; - table_e = intent(t, empty_bucket); - } else { - /* Existing element is not in its main position. Move it to an empty - * slot and put our element in its main position. */ - uint32_t empty_bucket = empty_intbucket(t); - uint32_t evictee_bucket = upb_inttable_bucket(t, table_e->key); - memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ - upb_inttable_entry *evictee_e = intent(t, evictee_bucket); - while(1) { - assert(evictee_e->key != UPB_EMPTY_ENTRY); - assert(evictee_e->next != UPB_END_OF_CHAIN); - if(evictee_e->next == bucket) { - evictee_e->next = empty_bucket; - break; - } - evictee_e = intent(t, evictee_e->next); - } - /* table_e remains set to our mainpos. */ - } - } - memcpy(table_e, e, t->t.entry_size); - table_e->next = UPB_END_OF_CHAIN; - assert(upb_inttable_lookup(t, e->key) == table_e); -} - -void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e) -{ - assert(e->key != 0); - if((double)(t->t.count + 1) / upb_inttable_size(t) > MAX_LOAD) { - /* Need to resize. New table of double the size, add old elements to it. */ - upb_inttable new_table; - upb_inttable_init(&new_table, upb_inttable_size(t)*2, t->t.entry_size); - new_table.t.count = t->t.count; - upb_inttable_entry *old_e; - for(old_e = upb_inttable_begin(t); old_e; old_e = upb_inttable_next(t, old_e)) - intinsert(&new_table, old_e); - upb_inttable_free(t); - *t = new_table; - } - intinsert(t, e); -} - -static uint32_t empty_strbucket(upb_strtable *table) -{ - /* TODO: does it matter that this is biased towards the front of the table? */ - for(uint32_t i = 1; i <= upb_strtable_size(table); i++) { - upb_strtable_entry *e = strent(table, i); - if(upb_string_isnull(e->key)) return i; - } - assert(false); - return 0; -} - -static void strinsert(upb_strtable *t, upb_strtable_entry *e) -{ - assert(upb_strtable_lookup(t, e->key) == NULL); - e->key = upb_string_getref(e->key, UPB_REF_FROZEN); - t->t.count++; - uint32_t bucket = strtable_bucket(t, e->key); - upb_strtable_entry *table_e = strent(t, bucket); - if(!upb_string_isnull(table_e->key)) { /* Collision. */ - if(bucket == strtable_bucket(t, table_e->key)) { - /* Existing element is in its main posisiton. Find an empty slot to - * place our new element and append it to this key's chain. */ - uint32_t empty_bucket = empty_strbucket(t); - while (table_e->next != UPB_END_OF_CHAIN) - table_e = strent(t, table_e->next); - table_e->next = empty_bucket; - table_e = strent(t, empty_bucket); - } else { - /* Existing element is not in its main position. Move it to an empty - * slot and put our element in its main position. */ - uint32_t empty_bucket = empty_strbucket(t); - uint32_t evictee_bucket = strtable_bucket(t, table_e->key); - memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ - upb_strtable_entry *evictee_e = strent(t, evictee_bucket); - while(1) { - assert(!upb_string_isnull(evictee_e->key)); - assert(evictee_e->next != UPB_END_OF_CHAIN); - if(evictee_e->next == bucket) { - evictee_e->next = empty_bucket; - break; - } - evictee_e = strent(t, evictee_e->next); - } - /* table_e remains set to our mainpos. */ - } - } - memcpy(table_e, e, t->t.entry_size); - table_e->next = UPB_END_OF_CHAIN; - assert(upb_strtable_lookup(t, e->key) == table_e); -} - -void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e) -{ - if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) { - /* Need to resize. New table of double the size, add old elements to it. */ - upb_strtable new_table; - upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.entry_size); - upb_strtable_entry *old_e; - for(old_e = upb_strtable_begin(t); old_e; old_e = upb_strtable_next(t, old_e)) - strinsert(&new_table, old_e); - upb_strtable_free(t); - *t = new_table; - } - strinsert(t, e); -} - -void *upb_inttable_begin(upb_inttable *t) { - return upb_inttable_next(t, intent(t, 0)); -} - -void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur) { - upb_inttable_entry *end = intent(t, upb_inttable_size(t)+1); - do { - cur = (void*)((char*)cur + t->t.entry_size); - if(cur == end) return NULL; - } while(cur->key == UPB_EMPTY_ENTRY); - return cur; -} - -void *upb_strtable_begin(upb_strtable *t) { - return upb_strtable_next(t, strent(t, 0)); -} - -void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur) { - upb_strtable_entry *end = strent(t, upb_strtable_size(t)+1); - do { - cur = (void*)((char*)cur + t->t.entry_size); - if(cur == end) return NULL; - } while(upb_string_isnull(cur->key)); - return cur; -} - -#ifdef UPB_UNALIGNED_READS_OK -//----------------------------------------------------------------------------- -// MurmurHash2, by Austin Appleby (released as public domain). -// Reformatted and C99-ified by Joshua Haberman. -// Note - This code makes a few assumptions about how your machine behaves - -// 1. We can read a 4-byte value from any address without crashing -// 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t -// And it has a few limitations - -// 1. It will not work incrementally. -// 2. It will not produce the same results on little-endian and big-endian -// machines. -static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) -{ - // 'm' and 'r' are mixing constants generated offline. - // They're not really 'magic', they just happen to work well. - const uint32_t m = 0x5bd1e995; - const int32_t r = 24; - - // Initialize the hash to a 'random' value - uint32_t h = seed ^ len; - - // Mix 4 bytes at a time into the hash - const uint8_t * data = (const uint8_t *)key; - while(len >= 4) { - uint32_t k = *(uint32_t *)data; - - k *= m; - k ^= k >> r; - k *= m; - - h *= m; - h ^= k; - - data += 4; - len -= 4; - } - - // Handle the last few bytes of the input array - switch(len) { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; h *= m; - }; - - // Do a few final mixes of the hash to ensure the last few - // bytes are well-incorporated. - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; -} - -#else // !UPB_UNALIGNED_READS_OK - -//----------------------------------------------------------------------------- -// MurmurHashAligned2, by Austin Appleby -// Same algorithm as MurmurHash2, but only does aligned reads - should be safer -// on certain platforms. -// Performance will be lower than MurmurHash2 - -#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } - -static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) -{ - const uint32_t m = 0x5bd1e995; - const int32_t r = 24; - const uint8_t * data = (const uint8_t *)key; - uint32_t h = seed ^ len; - uint8_t align = (uintptr_t)data & 3; - - if(align && (len >= 4)) { - // Pre-load the temp registers - uint32_t t = 0, d = 0; - - switch(align) { - case 1: t |= data[2] << 16; - case 2: t |= data[1] << 8; - case 3: t |= data[0]; - } - - t <<= (8 * align); - - data += 4-align; - len -= 4-align; - - int32_t sl = 8 * (4-align); - int32_t sr = 8 * align; - - // Mix - - while(len >= 4) { - d = *(uint32_t *)data; - t = (t >> sr) | (d << sl); - - uint32_t k = t; - - MIX(h,k,m); - - t = d; - - data += 4; - len -= 4; - } - - // Handle leftover data in temp registers - - d = 0; - - if(len >= align) { - switch(align) { - case 3: d |= data[2] << 16; - case 2: d |= data[1] << 8; - case 1: d |= data[0]; - } - - uint32_t k = (t >> sr) | (d << sl); - MIX(h,k,m); - - data += align; - len -= align; - - //---------- - // Handle tail bytes - - switch(len) { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; h *= m; - }; - } else { - switch(len) { - case 3: d |= data[2] << 16; - case 2: d |= data[1] << 8; - case 1: d |= data[0]; - case 0: h ^= (t >> sr) | (d << sl); h *= m; - } - } - - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; - } else { - while(len >= 4) { - uint32_t k = *(uint32_t *)data; - - MIX(h,k,m); - - data += 4; - len -= 4; - } - - //---------- - // Handle tail bytes - - switch(len) { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; h *= m; - }; - - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; - } -} -#undef MIX - -#endif // UPB_UNALIGNED_READS_OK diff --git a/src/upb_table.h b/src/upb_table.h deleted file mode 100644 index 122aed3..0000000 --- a/src/upb_table.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - * - * This file defines very fast int->struct (inttable) and string->struct - * (strtable) hash tables. The struct can be of any size, and it is stored - * in the table itself, for cache-friendly performance. - * - * The table uses internal chaining with Brent's variation (inspired by the - * Lua implementation of hash tables). The hash function for strings is - * Austin Appleby's "MurmurHash." - */ - -#ifndef UPB_TABLE_H_ -#define UPB_TABLE_H_ - -#include <assert.h> -#include "upb.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* Note: the key cannot be zero! Zero is used by the implementation. */ -typedef uint32_t upb_inttable_key_t; - -#define UPB_END_OF_CHAIN (uint32_t)0 -#define UPB_EMPTY_ENTRY (uint32_t)0 - -typedef struct { - upb_inttable_key_t key; - uint32_t next; /* Internal chaining. */ -} upb_inttable_entry; - -// TODO: consider storing the hash in the entry. This would avoid the need to -// rehash on table resizes, but more importantly could possibly improve lookup -// performance by letting us compare hashes before comparing lengths or the -// strings themselves. -typedef struct { - upb_strptr key; // We own a frozen ref. - uint32_t next; // Internal chaining. -} upb_strtable_entry; - -typedef struct { - void *entries; - uint32_t count; /* How many elements are currently in the table? */ - uint16_t entry_size; /* How big is each entry? */ - uint8_t size_lg2; /* The table is 2^size_lg2 in size. */ - uint32_t mask; -} upb_table; - -typedef struct { - upb_table t; -} upb_strtable; - -typedef struct { - upb_table t; -} upb_inttable; - -/* Initialize and free a table, respectively. Specify the initial size - * with 'size' (the size will be increased as necessary). Entry size - * specifies how many bytes each entry in the table is. */ -void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t entry_size); -void upb_inttable_free(upb_inttable *table); -void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t entry_size); -void upb_strtable_free(upb_strtable *table); - -INLINE uint32_t upb_table_size(upb_table *t) { return 1 << t->size_lg2; } -INLINE uint32_t upb_inttable_size(upb_inttable *t) { - return upb_table_size(&t->t); -} -INLINE uint32_t upb_strtable_size(upb_strtable *t) { - return upb_table_size(&t->t); -} - -INLINE uint32_t upb_table_count(upb_table *t) { return t->count; } -INLINE uint32_t upb_inttable_count(upb_inttable *t) { - return upb_table_count(&t->t); -} -INLINE uint32_t upb_strtable_count(upb_strtable *t) { - return upb_table_count(&t->t); -} - -/* Inserts the given key into the hashtable with the given value. The key must - * not already exist in the hash table. The data will be copied from e into - * the hashtable (the amount of data copied comes from entry_size when the - * table was constructed). Therefore the data at val may be freed once the - * call returns. */ -void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e); -void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e); - -INLINE uint32_t upb_inttable_bucket(upb_inttable *t, upb_inttable_key_t k) { - return (k & t->t.mask) + 1; /* Identity hash for ints. */ -} - -/* Looks up key in this table. Inlined because this is in the critical path of - * decoding. We have the caller specify the entry_size because fixing this as - * a literal (instead of reading table->entry_size) gives the compiler more - * ability to optimize. */ -INLINE void *upb_inttable_fastlookup(upb_inttable *t, uint32_t key, - uint32_t entry_size) { - assert(key != 0); - uint32_t bucket = upb_inttable_bucket(t, key); - upb_inttable_entry *e; - do { - e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket-1, entry_size); - if(e->key == key) return e; - } while((bucket = e->next) != UPB_END_OF_CHAIN); - return NULL; /* Not found. */ -} - -INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) { - return upb_inttable_fastlookup(t, key, t->t.entry_size); -} - -void *upb_strtable_lookup(upb_strtable *t, upb_strptr key); - -/* Provides iteration over the table. The order in which the entries are - * returned is undefined. Insertions invalidate iterators. The _next - * functions return NULL when the end has been reached. */ -void *upb_inttable_begin(upb_inttable *t); -void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur); - -void *upb_strtable_begin(upb_strtable *t); -void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_TABLE_H_ */ diff --git a/src/upb_text.c b/src/upb_text.c deleted file mode 100644 index 8662269..0000000 --- a/src/upb_text.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#include <inttypes.h> -#include "descriptor.h" -#include "upb_text.h" -#include "upb_data.h" - -void upb_text_printval(upb_field_type_t type, upb_value val, FILE *file) -{ -#define CASE(fmtstr, member) fprintf(file, fmtstr, val.member); break; - switch(type) { - case UPB_TYPE(DOUBLE): - CASE("%0.f", _double); - case UPB_TYPE(FLOAT): - CASE("%0.f", _float) - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): - CASE("%" PRId64, int64) - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - CASE("%" PRIu64, uint64) - case UPB_TYPE(INT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - CASE("%" PRId32, int32) - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): - case UPB_TYPE(ENUM): - CASE("%" PRIu32, uint32); - case UPB_TYPE(BOOL): - CASE("%hhu", _bool); - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): - /* TODO: escaping. */ - fprintf(file, "\"" UPB_STRFMT "\"", UPB_STRARG(val.str)); break; - } -} - -static void print_indent(upb_text_printer *p, FILE *stream) -{ - if(!p->single_line) - for(int i = 0; i < p->indent_depth; i++) - fprintf(stream, " "); -} - -void upb_text_printfield(upb_text_printer *p, upb_strptr name, - upb_field_type_t valtype, upb_value val, - FILE *stream) -{ - print_indent(p, stream); - fprintf(stream, UPB_STRFMT ":", UPB_STRARG(name)); - upb_text_printval(valtype, val, stream); - if(p->single_line) - fputc(' ', stream); - else - fputc('\n', stream); -} - -void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, FILE *stream) -{ - print_indent(p, stream); - fprintf(stream, UPB_STRFMT " {", UPB_STRARG(submsg_type)); - if(!p->single_line) fputc('\n', stream); - p->indent_depth++; -} - -void upb_text_pop(upb_text_printer *p, FILE *stream) -{ - p->indent_depth--; - print_indent(p, stream); - fprintf(stream, "}\n"); -} - -static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, - FILE *stream); - -static void printmsg(upb_text_printer *printer, upb_msg *msg, upb_msgdef *md, - FILE *stream) -{ - for(upb_field_count_t i = 0; i < md->num_fields; i++) { - upb_fielddef *f = &md->fields[i]; - if(!upb_msg_has(msg, f)) continue; - upb_value v = upb_msg_get(msg, f); - if(upb_isarray(f)) { - upb_arrayptr arr = v.arr; - for(uint32_t j = 0; j < upb_array_len(arr); j++) { - upb_value elem = upb_array_get(arr, f, j); - printval(printer, elem, f, stream); - } - } else { - printval(printer, v, f, stream); - } - } -} - -static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f, - FILE *stream) -{ - if(upb_issubmsg(f)) { - upb_text_push(printer, f->name, stream); - printmsg(printer, v.msg, upb_downcast_msgdef(f->def), stream); - upb_text_pop(printer, stream); - } else { - upb_text_printfield(printer, f->name, f->type, v, stream); - } -} - - -void upb_msg_print(upb_msg *msg, upb_msgdef *md, bool single_line, - FILE *stream) -{ - upb_text_printer printer; - upb_text_printer_init(&printer, single_line); - printmsg(&printer, msg, md, stream); -} - diff --git a/src/upb_text.h b/src/upb_text.h deleted file mode 100644 index d89c9d6..0000000 --- a/src/upb_text.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. - */ - -#ifndef UPB_TEXT_H_ -#define UPB_TEXT_H_ - -#include "upb.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - int indent_depth; - bool single_line; -} upb_text_printer; - -INLINE void upb_text_printer_init(upb_text_printer *p, bool single_line) { - p->indent_depth = 0; - p->single_line = single_line; -} -void upb_text_printval(upb_field_type_t type, upb_value p, FILE *file); -void upb_text_printfield(upb_text_printer *p, upb_strptr name, - upb_field_type_t valtype, upb_value val, FILE *stream); -void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, - FILE *stream); -void upb_text_pop(upb_text_printer *p, FILE *stream); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_TEXT_H_ */ |