summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorJoshua Haberman <joshua@reverberate.org>2011-02-13 12:59:54 -0800
committerJoshua Haberman <joshua@reverberate.org>2011-02-13 12:59:54 -0800
commit6bdbb45e88e7b88b294dfb6e4cb493cbc3c8cf74 (patch)
tree0e00246fb124ebdf6a2210c816704c1d840e2138 /core
parentee84a7da167d2211066c4a663d41febdf9544438 (diff)
Merged core/ and stream/ -> src/. The split wasn't worth it.
Diffstat (limited to 'core')
-rw-r--r--core/upb.c75
-rw-r--r--core/upb.h262
-rw-r--r--core/upb_atomic.h189
-rw-r--r--core/upb_def.c1349
-rw-r--r--core/upb_def.h362
-rw-r--r--core/upb_glue.c54
-rw-r--r--core/upb_glue.h46
-rw-r--r--core/upb_msg.c253
-rw-r--r--core/upb_msg.h232
-rw-r--r--core/upb_stream.h276
-rw-r--r--core/upb_stream_vtbl.h295
-rw-r--r--core/upb_string.c143
-rw-r--r--core/upb_string.h360
-rw-r--r--core/upb_table.c411
-rw-r--r--core/upb_table.h133
15 files changed, 0 insertions, 4440 deletions
diff --git a/core/upb.c b/core/upb.c
deleted file mode 100644
index 897ca4e..0000000
--- a/core/upb.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
- *
- */
-
-#include <stdarg.h>
-#include <stddef.h>
-#include <string.h>
-
-#include "upb.h"
-#include "upb_string.h"
-
-#define alignof(t) offsetof(struct { char c; t x; }, x)
-#define TYPE_INFO(wire_type, ctype, allows_delimited, inmemory_type) \
- {alignof(ctype), sizeof(ctype), wire_type, \
- (1 << wire_type) | (allows_delimited << UPB_WIRE_TYPE_DELIMITED), \
- UPB_TYPE(inmemory_type), #ctype},
-
-const upb_type_info upb_types[] = {
- {0, 0, 0, 0, 0, ""}, // There is no type 0.
- TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, 1, DOUBLE) // DOUBLE
- TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, 1, FLOAT) // FLOAT
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1, INT64) // INT64
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, 1, UINT64) // UINT64
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1, INT32) // INT32
- TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, 1, UINT64) // FIXED64
- TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, 1, UINT32) // FIXED32
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, 1, BOOL) // BOOL
- TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1, STRING) // STRING
- TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, 0, MESSAGE) // GROUP
- TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1, MESSAGE) // MESSAGE
- TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1, STRING) // BYTES
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1, UINT32) // UINT32
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1, ENUM) // ENUM
- TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, 1, INT32) // SFIXED32
- TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, 1, INT64) // SFIXED64
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1, INT32) // SINT32
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1, INT64) // SINT64
-};
-
-void upb_seterr(upb_status *status, enum upb_status_code code,
- const char *msg, ...) {
- status->code = code;
- upb_string_recycle(&status->str);
- va_list args;
- va_start(args, msg);
- upb_string_vprintf(status->str, msg, args);
- va_end(args);
-}
-
-void upb_copyerr(upb_status *to, upb_status *from)
-{
- to->code = from->code;
- if(from->str) to->str = upb_string_getref(from->str);
-}
-
-void upb_clearerr(upb_status *status) {
- status->code = UPB_OK;
- upb_string_recycle(&status->str);
-}
-
-void upb_printerr(upb_status *status) {
- if(status->str) {
- fprintf(stderr, "code: %d, msg: " UPB_STRFMT "\n",
- status->code, UPB_STRARG(status->str));
- } else {
- fprintf(stderr, "code: %d, no msg\n", status->code);
- }
-}
-
-void upb_status_uninit(upb_status *status) {
- upb_string_unref(status->str);
-}
diff --git a/core/upb.h b/core/upb.h
deleted file mode 100644
index 837fc52..0000000
--- a/core/upb.h
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
- *
- * This file contains shared definitions that are widely used across upb.
- */
-
-#ifndef UPB_H_
-#define UPB_H_
-
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h> // only for size_t.
-#include <assert.h>
-#include "descriptor_const.h"
-#include "upb_atomic.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// inline if possible, emit standalone code if required.
-#ifndef INLINE
-#define INLINE static inline
-#endif
-
-#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
-#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
-#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
-
-// The maximum that any submessages can be nested. Matches proto2's limit.
-#define UPB_MAX_NESTING 64
-
-// The maximum number of fields that any one .proto type can have. Note that
-// this is very different than the max field number. It is hard to imagine a
-// scenario where more than 32k fields makes sense.
-#define UPB_MAX_FIELDS (1<<15)
-typedef int16_t upb_field_count_t;
-
-// Nested type names are separated by periods.
-#define UPB_SYMBOL_SEPARATOR '.'
-
-// This limit is for the longest fully-qualified symbol, eg. foo.bar.MsgType
-#define UPB_SYMBOL_MAXLEN 128
-
-// The longest chain that mutually-recursive types are allowed to form. For
-// example, this is a type cycle of length 2:
-// message A {
-// B b = 1;
-// }
-// message B {
-// A a = 1;
-// }
-#define UPB_MAX_TYPE_CYCLE_LEN 16
-
-// The maximum depth that the type graph can have. Note that this setting does
-// not automatically constrain UPB_MAX_NESTING, because type cycles allow for
-// unlimited nesting if we do not limit it.
-#define UPB_MAX_TYPE_DEPTH 64
-
-// The biggest possible single value is a 10-byte varint.
-#define UPB_MAX_ENCODED_SIZE 10
-
-
-/* Fundamental types and type constants. **************************************/
-
-// A list of types as they are encoded on-the-wire.
-enum upb_wire_type {
- UPB_WIRE_TYPE_VARINT = 0,
- UPB_WIRE_TYPE_64BIT = 1,
- UPB_WIRE_TYPE_DELIMITED = 2,
- UPB_WIRE_TYPE_START_GROUP = 3,
- UPB_WIRE_TYPE_END_GROUP = 4,
- UPB_WIRE_TYPE_32BIT = 5,
-
- // This isn't a real wire type, but we use this constant to describe varints
- // that are expected to be a maximum of 32 bits.
- UPB_WIRE_TYPE_32BIT_VARINT = 8
-};
-
-typedef uint8_t upb_wire_type_t;
-
-// Type of a field as defined in a .proto file. eg. string, int32, etc. The
-// integers that represent this are defined by descriptor.proto. Note that
-// descriptor.proto reserves "0" for errors, and we use it to represent
-// exceptional circumstances.
-typedef uint8_t upb_fieldtype_t;
-
-// For referencing the type constants tersely.
-#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type
-#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type
-
-// Info for a given field type.
-typedef struct {
- uint8_t align;
- uint8_t size;
- upb_wire_type_t native_wire_type;
- uint8_t allowed_wire_types; // For packable fields, also allows delimited.
- uint8_t inmemory_type; // For example, INT32, SINT32, and SFIXED32 -> INT32
- char *ctype;
-} upb_type_info;
-
-// A static array of info about all of the field types, indexed by type number.
-extern const upb_type_info upb_types[];
-
-// The number of a field, eg. "optional string foo = 3".
-typedef int32_t upb_field_number_t;
-
-// Label (optional, repeated, required) as defined in a .proto file. The
-// values of this are defined by google.protobuf.FieldDescriptorProto.Label
-// (from descriptor.proto).
-typedef uint8_t upb_label_t;
-
-// A scalar (non-string) wire value. Used only for parsing unknown fields.
-typedef union {
- uint64_t varint;
- uint64_t _64bit;
- uint32_t _32bit;
-} upb_wire_value;
-
-/* Polymorphic values of .proto types *****************************************/
-
-struct _upb_string;
-typedef struct _upb_string upb_string;
-struct _upb_array;
-typedef struct _upb_array upb_array;
-struct _upb_msg;
-typedef struct _upb_msg upb_msg;
-struct _upb_bytesrc;
-typedef struct _upb_bytesrc upb_bytesrc;
-
-typedef int32_t upb_strlen_t;
-#define UPB_STRLEN_MAX INT32_MAX
-
-// The type of a upb_value. This is like a upb_fieldtype_t, but adds the
-// constant UPB_VALUETYPE_ARRAY to represent an array.
-typedef uint8_t upb_valuetype_t;
-#define UPB_VALUETYPE_ARRAY 32
-#define UPB_VALUETYPE_BYTESRC 32
-#define UPB_VALUETYPE_RAW 33
-
-// A single .proto value. The owner must have an out-of-band way of knowing
-// the type, so that it knows which union member to use.
-typedef struct {
- union {
- double _double;
- float _float;
- int32_t int32;
- int64_t int64;
- uint32_t uint32;
- uint64_t uint64;
- bool _bool;
- upb_string *str;
- upb_bytesrc *bytesrc;
- upb_msg *msg;
- upb_array *arr;
- upb_atomic_refcount_t *refcount;
- void *_void;
- } val;
-
- // In debug mode we carry the value type around also so we can check accesses
- // to be sure the right member is being read.
-#ifndef NDEBUG
- upb_valuetype_t type;
-#endif
-} upb_value;
-
-#ifdef NDEBUG
-#define SET_TYPE(dest, val)
-#else
-#define SET_TYPE(dest, val) dest = val
-#endif
-
-#define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \
- INLINE ctype upb_value_get ## name(upb_value val) { \
- assert(val.type == proto_type || val.type == UPB_VALUETYPE_RAW); \
- return val.val.membername; \
- } \
- INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \
- SET_TYPE(val->type, proto_type); \
- val->val.membername = cval; \
- }
-UPB_VALUE_ACCESSORS(double, _double, double, UPB_TYPE(DOUBLE));
-UPB_VALUE_ACCESSORS(float, _float, float, UPB_TYPE(FLOAT));
-UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_TYPE(INT32));
-UPB_VALUE_ACCESSORS(enumval, int32, int32_t, UPB_TYPE(ENUM));
-UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64));
-UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32));
-UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64));
-UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL));
-UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING));
-UPB_VALUE_ACCESSORS(msg, msg, upb_msg*, UPB_TYPE(MESSAGE));
-UPB_VALUE_ACCESSORS(arr, arr, upb_array*, UPB_VALUETYPE_ARRAY);
-UPB_VALUE_ACCESSORS(bytesrc, bytesrc, upb_bytesrc*, UPB_VALUETYPE_BYTESRC);
-
-INLINE void upb_value_setraw(upb_value *val, uint64_t cval) {
- SET_TYPE(val->type, UPB_VALUETYPE_RAW);
- val->val.uint64 = cval;
-}
-
-INLINE upb_atomic_refcount_t *upb_value_getrefcount(upb_value val) {
- assert(val.type == UPB_TYPE(MESSAGE) ||
- val.type == UPB_TYPE(STRING) ||
- val.type == UPB_VALUETYPE_ARRAY);
- return val.val.refcount;
-}
-
-// Status codes used as a return value. Codes >0 are not fatal and can be
-// resumed.
-enum upb_status_code {
- // The operation completed successfully.
- UPB_OK = 0,
-
- // The bytesrc is at EOF and all data was read successfully.
- UPB_EOF = 1,
-
- // A read or write from a streaming src/sink could not be completed right now.
- UPB_TRYAGAIN = 2,
-
- // An unrecoverable error occurred.
- UPB_ERROR = -1,
-
- // A recoverable error occurred (for example, data of the wrong type was
- // encountered which we can skip over).
- // UPB_STATUS_RECOVERABLE_ERROR = -2
-};
-
-// TODO: consider adding error space and code, to let ie. errno be stored
-// as a proper code, or application-specific error codes.
-struct _upb_status {
- char code;
- upb_string *str;
-};
-
-typedef struct _upb_status upb_status;
-
-#define UPB_STATUS_INIT {UPB_OK, NULL}
-#define UPB_ERRORMSG_MAXLEN 256
-
-INLINE bool upb_ok(upb_status *status) {
- return status->code == UPB_OK;
-}
-
-INLINE void upb_status_init(upb_status *status) {
- status->code = UPB_OK;
- status->str = NULL;
-}
-
-void upb_status_uninit(upb_status *status);
-
-void upb_printerr(upb_status *status);
-void upb_clearerr(upb_status *status);
-void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg,
- ...);
-void upb_copyerr(upb_status *to, upb_status *from);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* UPB_H_ */
diff --git a/core/upb_atomic.h b/core/upb_atomic.h
deleted file mode 100644
index 1cd848b..0000000
--- a/core/upb_atomic.h
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
- *
- * Only a very small part of upb is thread-safe. Notably, individual
- * messages, arrays, and strings are *not* thread safe for mutating.
- * However, we do make message *metadata* such as upb_msgdef and
- * upb_context thread-safe, and their ownership is tracked via atomic
- * refcounting. This header implements the small number of atomic
- * primitives required to support this. The primitives we implement
- * are:
- *
- * - a reader/writer lock (wrappers around platform-provided mutexes).
- * - an atomic refcount.
- */
-
-#ifndef UPB_ATOMIC_H_
-#define UPB_ATOMIC_H_
-
-#include <stdbool.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* inline if possible, emit standalone code if required. */
-#ifndef INLINE
-#define INLINE static inline
-#endif
-
-#ifdef UPB_THREAD_UNSAFE
-
-/* Non-thread-safe implementations. ******************************************/
-
-typedef struct {
- int v;
-} upb_atomic_refcount_t;
-
-INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) {
- a->v = val;
-}
-
-INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) {
- return a->v++ == 0;
-}
-
-INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) {
- return --a->v == 0;
-}
-
-INLINE int upb_atomic_read(upb_atomic_refcount_t *a) {
- return a->v;
-}
-
-INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int val) {
- a->v += val;
- return a->v == 0;
-}
-
-INLINE int upb_atomic_fetch_and_add(upb_atomic_refcount_t *a, int val) {
- int ret = a->v;
- a->v += val;
- return ret;
-}
-
-#endif
-
-/* Atomic refcount ************************************************************/
-
-#ifdef UPB_THREAD_UNSAFE
-
-/* Already defined above. */
-
-#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4
-
-/* GCC includes atomic primitives. */
-
-typedef struct {
- volatile int v;
-} upb_atomic_refcount_t;
-
-INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) {
- a->v = val;
- __sync_synchronize(); /* Ensure the initialized value is visible. */
-}
-
-INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) {
- return __sync_fetch_and_add(&a->v, 1) == 0;
-}
-
-INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int n) {
- return __sync_add_and_fetch(&a->v, n) == 0;
-}
-
-INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) {
- return __sync_sub_and_fetch(&a->v, 1) == 0;
-}
-
-INLINE bool upb_atomic_read(upb_atomic_refcount_t *a) {
- return __sync_fetch_and_add(&a->v, 0);
-}
-
-#elif defined(WIN32)
-
-/* Windows defines atomic increment/decrement. */
-#include <Windows.h>
-
-typedef struct {
- volatile LONG val;
-} upb_atomic_refcount_t;
-
-INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) {
- InterlockedExchange(&a->val, val);
-}
-
-INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) {
- return InterlockedIncrement(&a->val) == 1;
-}
-
-INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) {
- return InterlockedDecrement(&a->val) == 0;
-}
-
-#else
-#error Atomic primitives not defined for your platform/CPU. \
- Implement them or compile with UPB_THREAD_UNSAFE.
-#endif
-
-INLINE bool upb_atomic_only(upb_atomic_refcount_t *a) {
- return upb_atomic_read(a) == 1;
-}
-
-/* Reader/Writer lock. ********************************************************/
-
-#ifdef UPB_THREAD_UNSAFE
-
-typedef struct {
-} upb_rwlock_t;
-
-INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; }
-
-#elif defined(UPB_USE_PTHREADS)
-
-#include <pthread.h>
-
-typedef struct {
- pthread_rwlock_t lock;
-} upb_rwlock_t;
-
-INLINE void upb_rwlock_init(upb_rwlock_t *l) {
- /* TODO: check return value. */
- pthread_rwlock_init(&l->lock, NULL);
-}
-
-INLINE void upb_rwlock_destroy(upb_rwlock_t *l) {
- /* TODO: check return value. */
- pthread_rwlock_destroy(&l->lock);
-}
-
-INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) {
- /* TODO: check return value. */
- pthread_rwlock_rdlock(&l->lock);
-}
-
-INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) {
- /* TODO: check return value. */
- pthread_rwlock_wrlock(&l->lock);
-}
-
-INLINE void upb_rwlock_unlock(upb_rwlock_t *l) {
- /* TODO: check return value. */
- pthread_rwlock_unlock(&l->lock);
-}
-
-#else
-#error Reader/writer lock is not defined for your platform/CPU. \
- Implement it or compile with UPB_THREAD_UNSAFE.
-#endif
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* UPB_ATOMIC_H_ */
diff --git a/core/upb_def.c b/core/upb_def.c
deleted file mode 100644
index 651afc1..0000000
--- a/core/upb_def.c
+++ /dev/null
@@ -1,1349 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details.
- */
-
-#include <stdlib.h>
-#include <stddef.h>
-#include "descriptor_const.h"
-#include "descriptor.h"
-#include "upb_def.h"
-
-#define alignof(t) offsetof(struct { char c; t x; }, x)
-
-/* Rounds p up to the next multiple of t. */
-static size_t upb_align_up(size_t val, size_t align) {
- return val % align == 0 ? val : val + align - (val % align);
-}
-
-static int upb_div_round_up(int numerator, int denominator) {
- /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */
- return numerator > 0 ? (numerator - 1) / denominator + 1 : 0;
-}
-
-/* Joins strings together, for example:
- * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
- * join("", "Baz") -> "Baz"
- * Caller owns a ref on the returned string. */
-static upb_string *upb_join(upb_string *base, upb_string *name) {
- if (!base || upb_string_len(base) == 0) {
- return upb_string_getref(name);
- } else {
- return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT,
- UPB_STRARG(base), UPB_STRARG(name));
- }
-}
-
-/* Search for a character in a string, in reverse. */
-static int my_memrchr(char *data, char c, size_t len)
-{
- int off = len-1;
- while(off > 0 && data[off] != c) --off;
- return off;
-}
-
-/* upb_def ********************************************************************/
-
-// Defs are reference counted, but can have cycles when types are
-// self-recursive or mutually recursive, so we need to be capable of collecting
-// the cycles. In our situation defs are immutable (so cycles cannot be
-// created or destroyed post-initialization). We need to be thread-safe but
-// want to avoid locks if at all possible and rely only on atomic operations.
-//
-// Our scheme is as follows. First we give each def a flag indicating whether
-// it is part of a cycle or not. Because defs are immutable, this flag will
-// never change. For acyclic defs, we can use a naive algorithm and avoid the
-// overhead of dealing with cycles. Most defs will be acyclic, and most cycles
-// will be very short.
-//
-// For defs that participate in cycles we keep two reference counts. One
-// tracks references that come from outside the cycle (we call these external
-// references), and is incremented and decremented like a regular refcount.
-// The other is a cycle refcount, and works as follows. Every cycle is
-// considered distinct, even if two cycles share members. For example, this
-// graph has two distinct cycles:
-//
-// A-->B-->C
-// ^ | |
-// +---+---+
-//
-// The cycles in this graph are AB and ABC. When A's external refcount
-// transitions from 0->1, we say that A takes "cycle references" on both
-// cycles. Taking a cycle reference means incrementing the cycle refcount of
-// all defs in the cycle. Since A and B are common to both cycles, A and B's
-// cycle refcounts will be incremented by two, and C's will be incremented by
-// one. Likewise, when A's external refcount transitions from 1->0, we
-// decrement A and B's cycle refcounts by two and C's by one. We collect a
-// cyclic type when its cycle refcount drops to zero. A precondition for this
-// is that the external refcount has dropped to zero also.
-//
-// This algorithm is relatively cheap, since it only requires extra work when
-// the external refcount on a cyclic type transitions from 0->1 or 1->0.
-
-static void upb_msgdef_free(upb_msgdef *m);
-static void upb_enumdef_free(upb_enumdef *e);
-static void upb_unresolveddef_free(struct _upb_unresolveddef *u);
-
-static void upb_def_free(upb_def *def)
-{
- switch(def->type) {
- case UPB_DEF_MSG:
- upb_msgdef_free(upb_downcast_msgdef(def));
- break;
- case UPB_DEF_ENUM:
- upb_enumdef_free(upb_downcast_enumdef(def));
- break;
- case UPB_DEF_SVC:
- assert(false); /* Unimplemented. */
- break;
- case UPB_DEF_UNRESOLVED:
- upb_unresolveddef_free(upb_downcast_unresolveddef(def));
- break;
- default:
- assert(false);
- }
-}
-
-// Depth-first search for all cycles that include cycle_base. Returns the
-// number of paths from def that lead to cycle_base, which is equivalent to the
-// number of cycles def is in that include cycle_base.
-//
-// open_defs tracks the set of nodes that are currently being visited in the
-// search so we can stop the search if we detect a cycles that do not involve
-// cycle_base. We can't color the nodes as we go by writing to a member of the
-// def, because another thread could be performing the search concurrently.
-static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base,
- upb_msgdef **open_defs, int num_open_defs,
- bool ref) {
- bool found = false;
- for(int i = 0; i < num_open_defs; i++) {
- if(open_defs[i] == m) {
- // We encountered a cycle that did not involve cycle_base.
- found = true;
- break;
- }
- }
-
- if(found || num_open_defs == UPB_MAX_TYPE_CYCLE_LEN) {
- return 0;
- } else if(m == cycle_base) {
- return 1;
- } else {
- int path_count = 0;
- if(cycle_base == NULL) {
- cycle_base = m;
- } else {
- open_defs[num_open_defs++] = m;
- }
- upb_msg_iter iter = upb_msg_begin(m);
- for(; !upb_msg_done(iter); iter = upb_msg_next(m, iter)) {
- upb_fielddef *f = upb_msg_iter_field(iter);
- upb_def *def = f->def;
- if(upb_issubmsg(f) && def->is_cyclic) {
- upb_msgdef *sub_m = upb_downcast_msgdef(def);
- path_count += upb_cycle_ref_or_unref(sub_m, cycle_base, open_defs,
- num_open_defs, ref);
- }
- }
- if(ref) {
- upb_atomic_add(&m->cycle_refcount, path_count);
- } else {
- if(upb_atomic_add(&m->cycle_refcount, -path_count))
- upb_def_free(UPB_UPCAST(m));
- }
- return path_count;
- }
-}
-
-void _upb_def_reftozero(upb_def *def) {
- if(def->is_cyclic) {
- upb_msgdef *m = upb_downcast_msgdef(def);
- upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
- upb_cycle_ref_or_unref(m, NULL, open_defs, 0, false);
- } else {
- upb_def_free(def);
- }
-}
-
-void _upb_def_cyclic_ref(upb_def *def) {
- upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
- upb_cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true);
-}
-
-static void upb_def_init(upb_def *def, upb_deftype type) {
- def->type = type;
- def->is_cyclic = 0; // We detect this later, after resolving refs.
- def->search_depth = 0;
- def->fqname = NULL;
- upb_atomic_refcount_init(&def->refcount, 1);
-}
-
-static void upb_def_uninit(upb_def *def) {
- upb_string_unref(def->fqname);
-}
-
-
-/* upb_defbuilder ************************************************************/
-
-// A upb_defbuilder builds a list of defs by handling a parse of a protobuf in
-// the format defined in descriptor.proto. The output of a upb_defbuilder is
-// a list of upb_def* that possibly contain unresolved references.
-//
-// We use a separate object (upb_defbuilder) instead of having the defs handle
-// the parse themselves because we need to store state that is only necessary
-// during the building process itself.
-
-// When we are bootstrapping descriptor.proto, we must help the bare decoder out
-// by telling it when to descend into a submessage, because with the wire format
-// alone we cannot tell the difference between a submessage and a string.
-//
-// TODO: In the long-term, we should bootstrap from a serialization format that
-// contains this information, so we can remove this special-case code. This
-// would involve defining a serialization format very similar to the existing
-// protobuf format, but that contains more information about the wire type.
-#define BEGIN_SUBMSG 100
-
-// upb_deflist: A little dynamic array for storing a growing list of upb_defs.
-typedef struct {
- upb_def **defs;
- uint32_t len;
- uint32_t size;
-} upb_deflist;
-
-static void upb_deflist_init(upb_deflist *l) {
- l->size = 8;
- l->defs = malloc(l->size * sizeof(void*));
- l->len = 0;
-}
-
-static void upb_deflist_uninit(upb_deflist *l) {
- for(uint32_t i = 0; i < l->len; i++)
- if(l->defs[i]) upb_def_unref(l->defs[i]);
- free(l->defs);
-}
-
-static void upb_deflist_push(upb_deflist *l, upb_def *d) {
- if(l->len == l->size) {
- l->size *= 2;
- l->defs = realloc(l->defs, l->size * sizeof(void*));
- }
- l->defs[l->len++] = d;
-}
-
-static upb_def *upb_deflist_last(upb_deflist *l) {
- return l->defs[l->len-1];
-}
-
-// Qualify the defname for all defs starting with offset "start" with "str".
-static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) {
- for(uint32_t i = start; i < l->len; i++) {
- upb_def *def = l->defs[i];
- upb_string *name = def->fqname;
- def->fqname = upb_join(str, name);
- upb_string_unref(name);
- }
-}
-
-// We keep a stack of all the messages scopes we are currently in, as well as
-// the top-level file scope. This is necessary to correctly qualify the
-// definitions that are contained inside. "name" tracks the name of the
-// message or package (a bare name -- not qualified by any enclosing scopes).
-typedef struct {
- upb_string *name;
- // Index of the first def that is under this scope. For msgdefs, the
- // msgdef itself is at start-1.
- int start;
-} upb_defbuilder_frame;
-
-struct _upb_defbuilder {
- upb_deflist defs;
- upb_defbuilder_frame stack[UPB_MAX_TYPE_DEPTH];
- int stack_len;
- upb_status status;
-
- uint32_t number;
- upb_string *name;
- bool saw_number;
- bool saw_name;
-
- upb_fielddef *f;
-};
-typedef struct _upb_defbuilder upb_defbuilder;
-
-// Forward declares for top-level file descriptors.
-static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b, upb_handlers *h);
-static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b,
- upb_handlers *h);
-
-
-static void upb_defbuilder_init(upb_defbuilder *b) {
- upb_deflist_init(&b->defs);
- upb_status_init(&b->status);
- b->stack_len = 0;
- b->name = NULL;
-}
-
-static void upb_defbuilder_uninit(upb_defbuilder *b) {
- upb_string_unref(b->name);
- upb_status_uninit(&b->status);
- upb_deflist_uninit(&b->defs);
-}
-
-static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) {
- if (b->stack_len <= 1) return NULL;
- int index = b->stack[b->stack_len-1].start - 1;
- assert(index >= 0);
- return upb_downcast_msgdef(b->defs.defs[index]);
-}
-
-static upb_def *upb_defbuilder_last(upb_defbuilder *b) {
- return upb_deflist_last(&b->defs);
-}
-
-// Start/end handlers for FileDescriptorProto and DescriptorProto (the two
-// entities that have names and can contain sub-definitions.
-void upb_defbuilder_startcontainer(upb_defbuilder *b) {
- upb_defbuilder_frame *f = &b->stack[b->stack_len++];
- f->start = b->defs.len;
- f->name = NULL;
-}
-
-void upb_defbuilder_endcontainer(upb_defbuilder *b) {
- upb_defbuilder_frame *f = &b->stack[--b->stack_len];
- upb_deflist_qualify(&b->defs, f->name, f->start);
- upb_string_unref(f->name);
-}
-
-void upb_defbuilder_setscopename(upb_defbuilder *b, upb_string *str) {
- upb_defbuilder_frame *f = &b->stack[b->stack_len-1];
- upb_string_unref(f->name);
- f->name = upb_string_getref(str);
-}
-
-// Handlers for google.protobuf.FileDescriptorProto.
-static upb_flow_t upb_defbuilder_FileDescriptorProto_startmsg(void *_b) {
- upb_defbuilder *b = _b;
- upb_defbuilder_startcontainer(b);
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_defbuilder_FileDescriptorProto_endmsg(void *_b) {
- upb_defbuilder *b = _b;
- upb_defbuilder_endcontainer(b);
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_defbuilder_FileDescriptorProto_value(void *_b,
- upb_fielddef *f,
- upb_value val) {
- upb_defbuilder *b = _b;
- switch(f->number) {
- case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_FIELDNUM:
- upb_defbuilder_setscopename(b, upb_value_getstr(val));
- break;
- case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM:
- case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM:
- return BEGIN_SUBMSG;
- }
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_defbuilder_FileDescriptorProto_startsubmsg(
- void *_b, upb_fielddef *f, upb_handlers *h) {
- upb_defbuilder *b = _b;
- switch(f->number) {
- case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM:
- upb_msgdef_register_DescriptorProto(b, h);
- return UPB_DELEGATE;
- case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM:
- upb_enumdef_register_EnumDescriptorProto(b, h);
- return UPB_DELEGATE;
- default:
- // TODO: services and extensions.
- return UPB_SKIPSUBMSG;
- }
-}
-
-static void upb_defbuilder_register_FileDescriptorProto(upb_defbuilder *b,
- upb_handlers *h) {
- static upb_handlerset handlers = {
- &upb_defbuilder_FileDescriptorProto_startmsg,
- &upb_defbuilder_FileDescriptorProto_endmsg,
- &upb_defbuilder_FileDescriptorProto_value,
- &upb_defbuilder_FileDescriptorProto_startsubmsg,
- };
- upb_register_handlerset(h, &handlers);
- upb_set_handler_closure(h, b, &b->status);
-}
-
-// Handlers for google.protobuf.FileDescriptorSet.
-static upb_flow_t upb_defbuilder_FileDescriptorSet_value(void *b,
- upb_fielddef *f,
- upb_value val) {
- (void)b;
- (void)val;
- switch(f->number) {
- case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM:
- return BEGIN_SUBMSG;
- }
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_defbuilder_FileDescriptorSet_startsubmsg(
- void *_b, upb_fielddef *f, upb_handlers *h) {
- upb_defbuilder *b = _b;
- switch(f->number) {
- case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM:
- upb_defbuilder_register_FileDescriptorProto(b, h);
- return UPB_DELEGATE;
- }
- return UPB_SKIPSUBMSG;
-}
-
-static void upb_defbuilder_register_FileDescriptorSet(
- upb_defbuilder *b, upb_handlers *h) {
- static upb_handlerset handlers = {
- NULL, // startmsg
- NULL, // endmsg
- &upb_defbuilder_FileDescriptorSet_value,
- &upb_defbuilder_FileDescriptorSet_startsubmsg,
- };
- upb_register_handlerset(h, &handlers);
- upb_set_handler_closure(h, b, &b->status);
-}
-
-
-/* upb_unresolveddef **********************************************************/
-
-// Unresolved defs are used as temporary placeholders for a def whose name has
-// not been resolved yet. During the name resolution step, all unresolved defs
-// are replaced with pointers to the actual def being referenced.
-typedef struct _upb_unresolveddef {
- upb_def base;
-
- // The target type name. This may or may not be fully qualified. It is
- // tempting to want to use base.fqname for this, but that will be qualified
- // which is inappropriate for a name we still have to resolve.
- upb_string *name;
-} upb_unresolveddef;
-
-// Is passed a ref on the string.
-static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) {
- upb_unresolveddef *def = malloc(sizeof(*def));
- upb_def_init(&def->base, UPB_DEF_UNRESOLVED);
- def->name = upb_string_getref(str);
- return def;
-}
-
-static void upb_unresolveddef_free(struct _upb_unresolveddef *def) {
- upb_string_unref(def->name);
- upb_def_uninit(&def->base);
- free(def);
-}
-
-
-/* upb_enumdef ****************************************************************/
-
-static void upb_enumdef_free(upb_enumdef *e) {
- upb_enum_iter i;
- for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
- // Frees the ref taken when the string was parsed.
- upb_string_unref(upb_enum_iter_name(i));
- }
- upb_strtable_free(&e->ntoi);
- upb_inttable_free(&e->iton);
- upb_def_uninit(&e->base);
- free(e);
-}
-
-// google.protobuf.EnumValueDescriptorProto.
-static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_b) {
- upb_defbuilder *b = _b;
- b->saw_number = false;
- b->saw_name = false;
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_enumdef_EnumValueDescriptorProto_value(void *_b,
- upb_fielddef *f,
- upb_value val) {
- upb_defbuilder *b = _b;
- switch(f->number) {
- case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM:
- upb_string_unref(b->name);
- b->name = upb_string_getref(upb_value_getstr(val));
- b->saw_name = true;
- break;
- case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM:
- b->number = upb_value_getint32(val);
- b->saw_number = true;
- break;
- default:
- break;
- }
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_enumdef_EnumValueDescriptorProto_endmsg(void *_b) {
- upb_defbuilder *b = _b;
- if(!b->saw_number || !b->saw_name) {
- upb_seterr(&b->status, UPB_ERROR, "Enum value missing name or number.");
- return UPB_BREAK;
- }
- upb_ntoi_ent ntoi_ent = {{b->name, 0}, b->number};
- upb_iton_ent iton_ent = {{b->number, 0}, b->name};
- upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b));
- upb_strtable_insert(&e->ntoi, &ntoi_ent.e);
- upb_inttable_insert(&e->iton, &iton_ent.e);
- // We don't unref "name" because we pass our ref to the iton entry of the
- // table. strtables can ref their keys, but the inttable doesn't know that
- // the value is a string.
- b->name = NULL;
- return UPB_CONTINUE;
-}
-
-static void upb_enumdef_register_EnumValueDescriptorProto(upb_defbuilder *b,
- upb_handlers *h) {
- static upb_handlerset handlers = {
- &upb_enumdef_EnumValueDescriptorProto_startmsg,
- &upb_enumdef_EnumValueDescriptorProto_endmsg,
- &upb_enumdef_EnumValueDescriptorProto_value,
- };
- upb_register_handlerset(h, &handlers);
- upb_set_handler_closure(h, b, &b->status);
-}
-
-// google.protobuf.EnumDescriptorProto.
-static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_b) {
- upb_defbuilder *b = _b;
- upb_enumdef *e = malloc(sizeof(*e));
- upb_def_init(&e->base, UPB_DEF_ENUM);
- upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent));
- upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent));
- upb_deflist_push(&b->defs, UPB_UPCAST(e));
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_enumdef_EnumDescriptorProto_endmsg(void *_b) {
- (void)_b;
- assert(upb_defbuilder_last((upb_defbuilder*)_b)->fqname != NULL);
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_enumdef_EnumDescriptorProto_value(void *_b,
- upb_fielddef *f,
- upb_value val) {
- upb_defbuilder *b = _b;
- switch(f->number) {
- case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_FIELDNUM: {
- upb_enumdef *e = upb_downcast_enumdef(upb_defbuilder_last(b));
- upb_string_unref(e->base.fqname);
- e->base.fqname = upb_string_getref(upb_value_getstr(val));
- return UPB_CONTINUE;
- }
- case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM:
- return BEGIN_SUBMSG;
- default:
- return UPB_CONTINUE;
- }
-}
-
-static upb_flow_t upb_enumdef_EnumDescriptorProto_startsubmsg(void *_b,
- upb_fielddef *f,
- upb_handlers *h) {
- upb_defbuilder *b = _b;
- switch(f->number) {
- case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM:
- upb_enumdef_register_EnumValueDescriptorProto(b, h);
- return UPB_DELEGATE;
- default:
- return UPB_SKIPSUBMSG;
- }
-}
-
-static void upb_enumdef_register_EnumDescriptorProto(upb_defbuilder *b,
- upb_handlers *h) {
- static upb_handlerset handlers = {
- &upb_enumdef_EnumDescriptorProto_startmsg,
- &upb_enumdef_EnumDescriptorProto_endmsg,
- &upb_enumdef_EnumDescriptorProto_value,
- &upb_enumdef_EnumDescriptorProto_startsubmsg,
- };
- upb_register_handlerset(h, &handlers);
- upb_set_handler_closure(h, b, &b->status);
-}
-
-upb_enum_iter upb_enum_begin(upb_enumdef *e) {
- // We could iterate over either table here; the choice is arbitrary.
- return upb_inttable_begin(&e->iton);
-}
-
-upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) {
- assert(iter);
- return upb_inttable_next(&e->iton, &iter->e);
-}
-
-upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) {
- upb_iton_ent *e =
- (upb_iton_ent*)upb_inttable_fastlookup(&def->iton, num, sizeof(*e));
- return e ? e->string : NULL;
-}
-
-
-/* upb_fielddef ***************************************************************/
-
-static void upb_fielddef_free(upb_fielddef *f) {
- upb_string_unref(f->name);
- if(f->owned) {
- upb_def_unref(f->def);
- }
- free(f);
-}
-
-static upb_flow_t upb_fielddef_startmsg(void *_b) {
- upb_defbuilder *b = _b;
- upb_fielddef *f = malloc(sizeof(*f));
- f->number = -1;
- f->name = NULL;
- f->def = NULL;
- f->owned = false;
- f->msgdef = upb_defbuilder_top(b);
- b->f = f;
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_fielddef_endmsg(void *_b) {
- upb_defbuilder *b = _b;
- upb_fielddef *f = b->f;
- // TODO: verify that all required fields were present.
- assert(f->number != -1 && f->name != NULL);
- assert((f->def != NULL) == upb_hasdef(f));
-
- // Field was successfully read, add it as a field of the msgdef.
- upb_msgdef *m = upb_defbuilder_top(b);
- upb_itof_ent itof_ent = {{f->number, 0}, f};
- upb_ntof_ent ntof_ent = {{f->name, 0}, f};
- upb_inttable_insert(&m->itof, &itof_ent.e);
- upb_strtable_insert(&m->ntof, &ntof_ent.e);
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) {
- upb_defbuilder *b = _b;
- switch(f->number) {
- case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM:
- b->f->type = upb_value_getenumval(val);
- break;
- case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM:
- b->f->label = upb_value_getenumval(val);
- break;
- case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM:
- b->f->number = upb_value_getint32(val);
- break;
- case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM:
- upb_string_unref(b->f->name);
- b->f->name = upb_string_getref(upb_value_getstr(val));
- break;
- case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: {
- upb_def_unref(b->f->def);
- b->f->def = UPB_UPCAST(upb_unresolveddef_new(upb_value_getstr(val)));
- b->f->owned = true;
- break;
- }
- }
- return UPB_CONTINUE;
-}
-
-static void upb_fielddef_register_FieldDescriptorProto(upb_defbuilder *b,
- upb_handlers *h) {
- static upb_handlerset handlers = {
- &upb_fielddef_startmsg,
- &upb_fielddef_endmsg,
- &upb_fielddef_value,
- };
- upb_register_handlerset(h, &handlers);
- upb_set_handler_closure(h, b, &b->status);
-}
-
-
-/* upb_msgdef *****************************************************************/
-
-static int upb_compare_typed_fields(upb_fielddef *f1, upb_fielddef *f2) {
- // Sort by data size (ascending) to reduce padding.
- size_t size1 = upb_types[f1->type].size;
- size_t size2 = upb_types[f2->type].size;
- if (size1 != size2) return size1 - size2;
- // Otherwise return in number order (just so we get a reproduceable order.
- return f1->number - f2->number;
-}
-
-static int upb_compare_fields(const void *f1, const void *f2) {
- return upb_compare_typed_fields(*(void**)f1, *(void**)f2);
-}
-
-// google.protobuf.DescriptorProto.
-static upb_flow_t upb_msgdef_startmsg(void *_b) {
- upb_defbuilder *b = _b;
- upb_msgdef *m = malloc(sizeof(*m));
- upb_def_init(&m->base, UPB_DEF_MSG);
- upb_atomic_refcount_init(&m->cycle_refcount, 0);
- upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent));
- upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent));
- upb_deflist_push(&b->defs, UPB_UPCAST(m));
- upb_defbuilder_startcontainer(b);
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_msgdef_endmsg(void *_b) {
- upb_defbuilder *b = _b;
- upb_msgdef *m = upb_defbuilder_top(b);
- if(!m->base.fqname) {
- upb_seterr(&b->status, UPB_ERROR, "Encountered message with no name.");
- return UPB_BREAK;
- }
-
- // Create an ordering over the fields.
- upb_field_count_t n = upb_msgdef_numfields(m);
- upb_fielddef **sorted_fields = malloc(sizeof(upb_fielddef*) * n);
- upb_field_count_t field = 0;
- upb_msg_iter i;
- for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- sorted_fields[field++]= upb_msg_iter_field(i);
- }
- qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields);
-
- // Assign offsets in the msg.
- m->set_flags_bytes = upb_div_round_up(n, 8);
- m->size = sizeof(upb_atomic_refcount_t) + m->set_flags_bytes;
-
- size_t max_align = 0;
- for (int i = 0; i < n; i++) {
- upb_fielddef *f = sorted_fields[i];
- const upb_type_info *type_info = &upb_types[f->type];
-
- // This identifies the set bit. When we implement is_initialized (a
- // general check about whether all required bits are set) we will probably
- // want to use a different ordering that puts all the required bits
- // together.
- f->field_index = i;
- f->set_bit_mask = 1 << (i % 8);
- f->set_bit_offset = i / 8;
-
- size_t size, align;
- if (upb_isarray(f)) {
- size = sizeof(void*);
- align = alignof(void*);
- } else {
- size = type_info->size;
- align = type_info->align;
- }
- // General alignment rules are: each member must be at an address that is a
- // multiple of that type's alignment. Also, the size of the structure as a
- // whole must be a multiple of the greatest alignment of any member.
- size_t offset = upb_align_up(m->size, align);
- // Offsets are relative to the end of the refcount.
- f->byte_offset = offset - sizeof(upb_atomic_refcount_t);
- m->size = offset + size;
- max_align = UPB_MAX(max_align, align);
- }
- free(sorted_fields);
-
- if (max_align > 0) m->size = upb_align_up(m->size, max_align);
-
- upb_defbuilder_endcontainer(b);
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_msgdef_value(void *_b, upb_fielddef *f, upb_value val) {
- upb_defbuilder *b = _b;
- switch(f->number) {
- case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM: {
- upb_msgdef *m = upb_defbuilder_top(b);
- upb_string_unref(m->base.fqname);
- m->base.fqname = upb_string_getref(upb_value_getstr(val));
- upb_defbuilder_setscopename(b, upb_value_getstr(val));
- return UPB_CONTINUE;
- }
- case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM:
- case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM:
- case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM:
- return BEGIN_SUBMSG;
- default:
- // TODO: extensions.
- return UPB_CONTINUE;
- }
-}
-
-static upb_flow_t upb_msgdef_startsubmsg(void *_b, upb_fielddef *f,
- upb_handlers *h) {
- upb_defbuilder *b = _b;
- switch(f->number) {
- case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM:
- upb_fielddef_register_FieldDescriptorProto(b, h);
- return UPB_DELEGATE;
- case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM:
- upb_msgdef_register_DescriptorProto(b, h);
- return UPB_DELEGATE;
- case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM:
- upb_enumdef_register_EnumDescriptorProto(b, h);
- return UPB_DELEGATE;
- break;
- default:
- return UPB_SKIPSUBMSG;
- }
-}
-
-static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b,
- upb_handlers *h) {
- static upb_handlerset handlers = {
- &upb_msgdef_startmsg,
- &upb_msgdef_endmsg,
- &upb_msgdef_value,
- &upb_msgdef_startsubmsg,
- };
- upb_register_handlerset(h, &handlers);
- upb_set_handler_closure(h, b, &b->status);
-}
-
-static void upb_msgdef_free(upb_msgdef *m)
-{
- upb_msg_iter i;
- for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
- upb_fielddef_free(upb_msg_iter_field(i));
- upb_strtable_free(&m->ntof);
- upb_inttable_free(&m->itof);
- upb_def_uninit(&m->base);
- free(m);
-}
-
-static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) {
- (void)m;
- if(f->owned) upb_def_unref(f->def);
- f->def = def;
- // We will later make the ref unowned if it is a part of a cycle.
- f->owned = true;
- upb_def_ref(def);
-}
-
-upb_msg_iter upb_msg_begin(upb_msgdef *m) {
- return upb_inttable_begin(&m->itof);
-}
-
-upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) {
- return upb_inttable_next(&m->itof, &iter->e);
-}
-
-/* upb_symtab adding defs *****************************************************/
-
-// This is a self-contained group of functions that, given a list of upb_defs
-// whose references are not yet resolved, resolves references and adds them
-// atomically to a upb_symtab.
-
-typedef struct {
- upb_strtable_entry e;
- upb_def *def;
-} upb_symtab_ent;
-
-// Given a symbol and the base symbol inside which it is defined, find the
-// symbol's definition in t.
-static upb_symtab_ent *upb_resolve(upb_strtable *t,
- upb_string *base, upb_string *sym)
-{
- if(upb_string_len(base) + upb_string_len(sym) + 1 >= UPB_SYMBOL_MAXLEN ||
- upb_string_len(sym) == 0) return NULL;
-
- if(upb_string_getrobuf(sym)[0] == UPB_SYMBOL_SEPARATOR) {
- // Symbols starting with '.' are absolute, so we do a single lookup.
- // Slice to omit the leading '.'
- upb_string *sym_str = upb_strslice(sym, 1, upb_string_len(sym) - 1);
- upb_symtab_ent *e = upb_strtable_lookup(t, sym_str);
- upb_string_unref(sym_str);
- return e;
- } else {
- // Remove components from base until we find an entry or run out.
- // TODO: This branch is totally broken, but currently not used.
- upb_string *sym_str = upb_string_new();
- int baselen = upb_string_len(base);
- while(1) {
- // sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + sym
- upb_strlen_t len = baselen + upb_string_len(sym) + 1;
- char *buf = upb_string_getrwbuf(sym_str, len);
- memcpy(buf, upb_string_getrobuf(base), baselen);
- buf[baselen] = UPB_SYMBOL_SEPARATOR;
- memcpy(buf + baselen + 1, upb_string_getrobuf(sym), upb_string_len(sym));
-
- upb_symtab_ent *e = upb_strtable_lookup(t, sym_str);
- if (e) return e;
- else if(baselen == 0) return NULL; // No more scopes to try.
-
- baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen);
- }
- }
-}
-
-// Performs a pass over the type graph to find all cycles that include m.
-static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status)
-{
- if(depth > UPB_MAX_TYPE_DEPTH) {
- // We have found a non-cyclic path from the base of the type tree that
- // exceeds the maximum allowed depth. There are many situations in upb
- // where we recurse over the type tree (like for example, right now) and an
- // absurdly deep tree could cause us to stack overflow on systems with very
- // limited stacks.
- upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was found at "
- "depth %d in the type graph, which exceeds the maximum type "
- "depth of %d.", UPB_UPCAST(m)->fqname, depth,
- UPB_MAX_TYPE_DEPTH);
- return false;
- } else if(UPB_UPCAST(m)->search_depth == 1) {
- // Cycle!
- int cycle_len = depth - 1;
- if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) {
- upb_seterr(status, UPB_ERROR, "Type " UPB_STRFMT " was involved "
- "in a cycle of length %d, which exceeds the maximum type "
- "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len,
- UPB_MAX_TYPE_CYCLE_LEN);
- return false;
- }
- return true;
- } else if(UPB_UPCAST(m)->search_depth > 0) {
- // This was a cycle, but did not originate from the base of our search tree.
- // We'll find it when we call find_cycles() on this node directly.
- return false;
- } else {
- UPB_UPCAST(m)->search_depth = ++depth;
- bool cycle_found = false;
- upb_msg_iter i;
- for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
- if(!upb_issubmsg(f)) continue;
- upb_def *sub_def = f->def;
- upb_msgdef *sub_m = upb_downcast_msgdef(sub_def);
- if(upb_symtab_findcycles(sub_m, depth, status)) {
- cycle_found = true;
- UPB_UPCAST(m)->is_cyclic = true;
- if(f->owned) {
- upb_atomic_unref(&sub_def->refcount);
- f->owned = false;
- }
- }
- }
- UPB_UPCAST(m)->search_depth = 0;
- return cycle_found;
- }
-}
-
-// Given a table of pending defs "tmptab" and a table of existing defs "symtab",
-// resolves all of the unresolved refs for the defs in tmptab.
-bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab,
- upb_status *status)
-{
- upb_symtab_ent *e;
- for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) {
- upb_msgdef *m = upb_dyncast_msgdef(e->def);
- if(!m) continue;
- // Type names are resolved relative to the message in which they appear.
- upb_string *base = e->e.key;
-
- upb_msg_iter i;
- for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
- if(!upb_hasdef(f)) continue; // No resolving necessary.
- upb_string *name = upb_downcast_unresolveddef(f->def)->name;
-
- // Resolve from either the tmptab (pending adds) or symtab (existing
- // defs). If both exist, prefer the pending add, because it will be
- // overwriting the existing def.
- upb_symtab_ent *found;
- if(!(found = upb_resolve(tmptab, base, name)) &&
- !(found = upb_resolve(symtab, base, name))) {
- upb_seterr(status, UPB_ERROR,
- "could not resolve symbol '" UPB_STRFMT "'"
- " in context '" UPB_STRFMT "'",
- UPB_STRARG(name), UPB_STRARG(base));
- return false;
- }
-
- // Check the type of the found def.
- upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
- if(found->def->type != expected) {
- upb_seterr(status, UPB_ERROR, "Unexpected type");
- return false;
- }
- upb_msgdef_resolve(m, f, found->def);
- }
- }
-
- // Deal with type cycles.
- for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) {
- upb_msgdef *m = upb_dyncast_msgdef(e->def);
- if(!m) continue;
- // The findcycles() call will decrement the external refcount of the
- upb_symtab_findcycles(m, 0, status);
- upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
- upb_cycle_ref_or_unref(m, NULL, open_defs, 0, true);
- }
-
- return true;
-}
-
-// Given a list of defs, a list of extensions (in the future), and a flag
-// indicating whether the new defs can overwrite existing defs in the symtab,
-// attempts to add the given defs to the symtab. The whole operation either
-// succeeds or fails. Ownership of "defs" and "exts" is taken.
-bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs,
- bool allow_redef, upb_status *status)
-{
- upb_rwlock_wrlock(&s->lock);
-
- // Build a table of the defs we mean to add, for duplicate detection and name
- // resolution.
- upb_strtable tmptab;
- upb_strtable_init(&tmptab, num_defs, sizeof(upb_symtab_ent));
- for (int i = 0; i < num_defs; i++) {
- upb_def *def = defs[i];
- upb_symtab_ent e = {{def->fqname, 0}, def};
-
- // Redefinition is never allowed within a single FileDescriptorSet.
- // Additionally, we only allow overwriting of an existing definition if
- // allow_redef is set.
- if (upb_strtable_lookup(&tmptab, def->fqname) ||
- (!allow_redef && upb_strtable_lookup(&s->symtab, def->fqname))) {
- upb_seterr(status, UPB_ERROR, "Redefinition of symbol " UPB_STRFMT,
- UPB_STRARG(def->fqname));
- goto err;
- }
-
- // Pass ownership from the deflist to the strtable.
- upb_strtable_insert(&tmptab, &e.e);
- defs[i] = NULL;
- }
-
- // TODO: process the list of extensions by modifying entries from
- // tmptab in-place (copying them from the symtab first if necessary).
-
- if (!upb_resolverefs(&tmptab, &s->symtab, status)) goto err;
-
- // The defs in tmptab have been vetted, and can be added to the symtab
- // without causing errors. Now add all tmptab defs to the symtab,
- // overwriting (and releasing a ref on) any existing defs with the same
- // names. Ownership for tmptab defs passes from the tmptab to the symtab.
- upb_symtab_ent *tmptab_e;
- for(tmptab_e = upb_strtable_begin(&tmptab); tmptab_e;
- tmptab_e = upb_strtable_next(&tmptab, &tmptab_e->e)) {
- upb_symtab_ent *symtab_e =
- upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname);
- if(symtab_e) {
- upb_def_unref(symtab_e->def);
- symtab_e->def = tmptab_e->def;
- } else {
- upb_strtable_insert(&s->symtab, &tmptab_e->e);
- }
- }
-
- upb_rwlock_unlock(&s->lock);
- upb_strtable_free(&tmptab);
- return true;
-
-err:
- // We need to free all defs from "tmptab."
- upb_rwlock_unlock(&s->lock);
- for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e;
- e = upb_strtable_next(&tmptab, &e->e)) {
- upb_def_unref(e->def);
- }
- upb_strtable_free(&tmptab);
- for (int i = 0; i < num_defs; i++) upb_def_unref(defs[i]);
- return false;
-}
-
-
-/* upb_symtab public interface ************************************************/
-
-upb_symtab *upb_symtab_new()
-{
- upb_symtab *s = malloc(sizeof(*s));
- upb_atomic_refcount_init(&s->refcount, 1);
- upb_rwlock_init(&s->lock);
- upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent));
- s->fds_msgdef = NULL;
- return s;
-}
-
-static void upb_free_symtab(upb_strtable *t)
-{
- upb_symtab_ent *e;
- for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e))
- upb_def_unref(e->def);
- upb_strtable_free(t);
-}
-
-void _upb_symtab_free(upb_symtab *s)
-{
- upb_free_symtab(&s->symtab);
- upb_rwlock_destroy(&s->lock);
- free(s);
-}
-
-upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type)
-{
- upb_rwlock_rdlock(&s->lock);
- int total = upb_strtable_count(&s->symtab);
- // We may only use part of this, depending on how many symbols are of the
- // correct type.
- upb_def **defs = malloc(sizeof(*defs) * total);
- upb_symtab_ent *e = upb_strtable_begin(&s->symtab);
- int i = 0;
- for(; e; e = upb_strtable_next(&s->symtab, &e->e)) {
- upb_def *def = e->def;
- assert(def);
- if(type == UPB_DEF_ANY || def->type == type)
- defs[i++] = def;
- }
- upb_rwlock_unlock(&s->lock);
- *count = i;
- for(i = 0; i < *count; i++)
- upb_def_ref(defs[i]);
- return defs;
-}
-
-upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym)
-{
- upb_rwlock_rdlock(&s->lock);
- upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym);
- upb_def *ret = NULL;
- if(e) {
- ret = e->def;
- upb_def_ref(ret);
- }
- upb_rwlock_unlock(&s->lock);
- return ret;
-}
-
-
-upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) {
- upb_rwlock_rdlock(&s->lock);
- upb_symtab_ent *e = upb_resolve(&s->symtab, base, symbol);
- upb_def *ret = NULL;
- if(e) {
- ret = e->def;
- upb_def_ref(ret);
- }
- upb_rwlock_unlock(&s->lock);
- return ret;
-}
-
-void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status)
-{
- upb_defbuilder b;
- upb_defbuilder_init(&b);
- upb_handlers handlers;
- upb_handlers_init(&handlers);
- upb_defbuilder_register_FileDescriptorSet(&b, &handlers);
- upb_src_sethandlers(src, &handlers);
- upb_src_run(src, status);
- if (upb_ok(status))
- upb_symtab_add_defs(s, b.defs.defs, b.defs.len, false, status);
- upb_defbuilder_uninit(&b);
- upb_handlers_uninit(&handlers);
-}
-
-
-/* upb_baredecoder ************************************************************/
-
-// upb_baredecoder is a upb_src that can parse a subset of the protocol buffer
-// binary format. It is only used for bootstrapping. It can parse without
-// having a upb_msgdef, which is why it is useful for bootstrapping the first
-// msgdef. On the downside, it does not support:
-//
-// * having its input span multiple upb_strings.
-// * reading any field of the returned upb_fielddef's except f->number.
-// * keeping a pointer to the upb_fielddef* and reading it later (the same
-// upb_fielddef is reused over and over).
-// * detecting errors in the input (we trust that our input is known-good).
-// * skipping the rest of the submessage (UPB_SKIPSUBMSG).
-//
-// It also does not support any of the follow protobuf features:
-// * packed fields.
-// * groups.
-// * zig-zag-encoded types like sint32 and sint64.
-//
-// Since it cannot tell the difference between submessages and strings, it
-// always reports them as strings first, but if the value callback returns
-// UPB_TREAT_AS_SUBMSG this signals to the baredecoder that it should be
-// treated like a submessage instead.
-//
-// TODO: for bootstrapping we should define a slightly different wire format
-// that includes enough information to know the precise integer types and
-// that distinguishes between strings and submessages. This will allow
-// us to get rid of the UPB_TREAT_AS_SUBMSG hack. It will also allow us
-// to get rid of the upb_value_setraw() scheme, which would be more
-// complicated to support on big-endian machines.
-
-typedef struct {
- upb_src src;
- upb_string *input;
- upb_strlen_t offset;
- upb_dispatcher dispatcher;
-} upb_baredecoder;
-
-static uint64_t upb_baredecoder_readv64(upb_baredecoder *d)
-{
- const uint8_t *start = (uint8_t*)upb_string_getrobuf(d->input) + d->offset;
- const uint8_t *buf = start;
- uint8_t last = 0x80;
- uint64_t val = 0;
- for(int bitpos = 0; (last & 0x80); buf++, bitpos += 7)
- val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos;
- d->offset += buf - start;
- return val;
-}
-
-static uint32_t upb_baredecoder_readv32(upb_baredecoder *d)
-{
- return (uint32_t)upb_baredecoder_readv64(d); // Truncate.
-}
-
-static uint64_t upb_baredecoder_readf64(upb_baredecoder *d)
-{
- uint64_t val;
- memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 8);
- d->offset += 8;
- return val;
-}
-
-static uint32_t upb_baredecoder_readf32(upb_baredecoder *d)
-{
- uint32_t val;
- memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 4);
- d->offset += 4;
- return val;
-}
-
-static void upb_baredecoder_sethandlers(upb_src *src, upb_handlers *handlers) {
- upb_baredecoder *d = (upb_baredecoder*)src;
- upb_dispatcher_reset(&d->dispatcher, handlers, false);
-}
-
-static void upb_baredecoder_run(upb_src *src, upb_status *status) {
- upb_baredecoder *d = (upb_baredecoder*)src;
- assert(!upb_handlers_isempty(&d->dispatcher.top->handlers));
- upb_string *str = NULL;
- upb_strlen_t stack[UPB_MAX_NESTING] = {UPB_STRLEN_MAX};
- upb_strlen_t *top = &stack[0];
- d->offset = 0;
-
-#define CHECK(x) if (x != UPB_CONTINUE && x != BEGIN_SUBMSG) goto err;
-
- CHECK(upb_dispatch_startmsg(&d->dispatcher));
- while(d->offset < upb_string_len(d->input)) {
- uint32_t key = upb_baredecoder_readv64(d);
- upb_fielddef f;
- f.number = key >> 3;
- upb_wire_type_t wt = key & 0x7;
- if(wt == UPB_WIRE_TYPE_DELIMITED) {
- uint32_t delim_len = upb_baredecoder_readv32(d);
- // We don't know if it's a string or a submessage; deliver first as
- // string.
- upb_string_recycle(&str);
- upb_string_substr(str, d->input, d->offset, delim_len);
- upb_value v;
- upb_value_setstr(&v, str);
- upb_flow_t ret = upb_dispatch_value(&d->dispatcher, &f, v);
- CHECK(ret);
- if(ret == BEGIN_SUBMSG) {
- // Should deliver as a submessage instead.
- CHECK(upb_dispatch_startsubmsg(&d->dispatcher, &f));
- *(++top) = d->offset + delim_len;
- } else {
- d->offset += delim_len;
- }
- } else {
- upb_value v;
- switch(wt) {
- case UPB_WIRE_TYPE_VARINT:
- upb_value_setraw(&v, upb_baredecoder_readv64(d));
- break;
- case UPB_WIRE_TYPE_64BIT:
- upb_value_setraw(&v, upb_baredecoder_readf64(d));
- break;
- case UPB_WIRE_TYPE_32BIT:
- upb_value_setraw(&v, upb_baredecoder_readf32(d));
- break;
- default:
- assert(false);
- abort();
- }
- CHECK(upb_dispatch_value(&d->dispatcher, &f, v));
- }
- // Detect end-of-submessage.
- while(d->offset >= *top) {
- CHECK(upb_dispatch_endsubmsg(&d->dispatcher));
- d->offset = *(top--);
- }
- }
- CHECK(upb_dispatch_endmsg(&d->dispatcher));
- upb_string_unref(str);
- return;
-
-err:
- upb_copyerr(status, d->dispatcher.top->handlers.status);
- upb_string_unref(str);
-}
-
-static upb_baredecoder *upb_baredecoder_new(upb_string *str) {
- static upb_src_vtbl vtbl = {
- &upb_baredecoder_sethandlers,
- &upb_baredecoder_run,
- };
- upb_baredecoder *d = malloc(sizeof(*d));
- upb_src_init(&d->src, &vtbl);
- d->input = upb_string_getref(str);
- d->offset = 0;
- upb_dispatcher_init(&d->dispatcher);
- return d;
-}
-
-static void upb_baredecoder_free(upb_baredecoder *d) {
- upb_string_unref(d->input);
- free(d);
-}
-
-static upb_src *upb_baredecoder_src(upb_baredecoder *d) {
- return &d->src;
-}
-
-void upb_symtab_add_descriptorproto(upb_symtab *symtab) {
- // For the moment we silently decline to perform the operation if the symbols
- // already exist in the symtab. Revisit this when we have a better story
- // about whether syms in a table can be replaced.
- if(symtab->fds_msgdef) upb_def_unref(UPB_UPCAST(symtab->fds_msgdef));
-
- upb_baredecoder *decoder = upb_baredecoder_new(&descriptor_str);
- upb_status status = UPB_STATUS_INIT;
- upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status);
- upb_baredecoder_free(decoder);
-
- if(!upb_ok(&status)) {
- // upb itself is corrupt.
- upb_printerr(&status);
- upb_clearerr(&status);
- upb_symtab_unref(symtab);
- abort();
- }
- upb_def *def = upb_symtab_lookup(
- symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet"));
- if (!def || (symtab->fds_msgdef = upb_dyncast_msgdef(def)) == NULL) {
- // upb itself is corrupt.
- abort();
- }
- upb_def_unref(def); // The symtab already holds a ref on it.
- upb_status_uninit(&status);
-}
-
-upb_msgdef *upb_symtab_fds_def(upb_symtab *s) {
- assert(s->fds_msgdef != NULL);
- upb_def_ref(UPB_UPCAST(s->fds_msgdef));
- return s->fds_msgdef;
-}
diff --git a/core/upb_def.h b/core/upb_def.h
deleted file mode 100644
index 28cc258..0000000
--- a/core/upb_def.h
+++ /dev/null
@@ -1,362 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009-2011 Joshua Haberman. See LICENSE for details.
- *
- * Provides a mechanism for loading proto definitions from descriptors, and
- * data structures to represent those definitions. These form the protobuf
- * schema, and are used extensively throughout upb:
- * - upb_msgdef: describes a "message" construct.
- * - upb_fielddef: describes a message field.
- * - upb_enumdef: describes an enum.
- * (TODO: definitions of extensions and services).
- *
- * Defs are obtained from a upb_symtab object. A upb_symtab is empty when
- * constructed, and definitions can be added by supplying descriptors.
- *
- * Defs are immutable and reference-counted. Symbol tables reference any defs
- * that are the "current" definitions. If an extension is loaded that adds a
- * field to an existing message, a new msgdef is constructed that includes the
- * new field and the old msgdef is unref'd. The old msgdef will still be ref'd
- * by messages (if any) that were constructed with that msgdef.
- *
- * This file contains routines for creating and manipulating the definitions
- * themselves. To create and manipulate actual messages, see upb_msg.h.
- */
-
-#ifndef UPB_DEF_H_
-#define UPB_DEF_H_
-
-#include "upb_atomic.h"
-#include "upb_stream.h"
-#include "upb_table.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* upb_def: base class for defs **********************************************/
-
-// All the different kind of defs we support. These correspond 1:1 with
-// declarations in a .proto file.
-typedef enum {
- UPB_DEF_MSG = 0,
- UPB_DEF_ENUM,
- UPB_DEF_SVC,
- UPB_DEF_EXT,
- // Internal-only, placeholder for a def that hasn't be resolved yet.
- UPB_DEF_UNRESOLVED,
-
- // For specifying that defs of any type are requsted from getdefs.
- UPB_DEF_ANY = -1
-} upb_deftype;
-
-// This typedef is more space-efficient than declaring an enum var directly.
-typedef int8_t upb_deftype_t;
-
-typedef struct {
- upb_string *fqname; // Fully qualified.
- upb_atomic_refcount_t refcount;
- upb_deftype_t type;
-
- // The is_cyclic flag could go in upb_msgdef instead of here, because only
- // messages can be involved in cycles. However, putting them here is free
- // from a space perspective because structure alignment will otherwise leave
- // three bytes empty after type. It is also makes ref and unref more
- // efficient, because we don't have to downcast to msgdef before checking the
- // is_cyclic flag.
- bool is_cyclic;
- uint16_t search_depth; // Used during initialization dfs.
-} upb_def;
-
-// These must not be called directly!
-void _upb_def_cyclic_ref(upb_def *def);
-void _upb_def_reftozero(upb_def *def);
-
-// Call to ref/deref a def.
-INLINE void upb_def_ref(upb_def *def) {
- if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def);
-}
-INLINE void upb_def_unref(upb_def *def) {
- if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def);
-}
-
-/* upb_fielddef ***************************************************************/
-
-// A upb_fielddef describes a single field in a message. It isn't a full def
-// in the sense that it derives from upb_def. It cannot stand on its own; it
-// is either a field of a upb_msgdef or contained inside a upb_extensiondef.
-// It is also reference-counted.
-typedef struct _upb_fielddef {
- upb_value default_value;
-
- upb_string *name;
-
- struct _upb_msgdef *msgdef;
-
- // For the case of an enum or a submessage, points to the def for that type.
- upb_def *def;
-
- upb_atomic_refcount_t refcount;
- uint32_t byte_offset; // Where in a upb_msg to find the data.
-
- // These are set only when this fielddef is part of a msgdef.
- upb_field_number_t number;
- upb_field_count_t field_index; // Indicates set bit.
-
- upb_fieldtype_t type;
- upb_label_t label;
- // True if we own a ref on "def" (above). This is true unless this edge is
- // part of a cycle.
- bool owned;
- uint8_t set_bit_mask;
- uint16_t set_bit_offset;
-} upb_fielddef;
-
-// A variety of tests about the type of a field.
-INLINE bool upb_issubmsg(upb_fielddef *f) {
- return f->type == UPB_TYPE(GROUP) || f->type == UPB_TYPE(MESSAGE);
-}
-INLINE bool upb_isstring(upb_fielddef *f) {
- return f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES);
-}
-INLINE bool upb_isarray(upb_fielddef *f) {
- return f->label == UPB_LABEL(REPEATED);
-}
-// Does the type of this field imply that it should contain an associated def?
-INLINE bool upb_hasdef(upb_fielddef *f) {
- return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM);
-}
-
-INLINE upb_valuetype_t upb_field_valuetype(upb_fielddef *f) {
- if (upb_isarray(f)) {
- return UPB_VALUETYPE_ARRAY;
- } else {
- return f->type;
- }
-}
-
-INLINE upb_valuetype_t upb_elem_valuetype(upb_fielddef *f) {
- assert(upb_isarray(f));
- return f->type;
-}
-
-INLINE bool upb_field_ismm(upb_fielddef *f) {
- return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f);
-}
-
-INLINE bool upb_elem_ismm(upb_fielddef *f) {
- return upb_isstring(f) || upb_issubmsg(f);
-}
-
-/* upb_msgdef *****************************************************************/
-
-// Structure that describes a single .proto message type.
-typedef struct _upb_msgdef {
- upb_def base;
- upb_atomic_refcount_t cycle_refcount;
- uint32_t size;
- uint32_t set_flags_bytes;
-
- // Tables for looking up fields by number and name.
- upb_inttable itof; // int to field
- upb_strtable ntof; // name to field
-} upb_msgdef;
-
-// Hash table entries for looking up fields by name or number.
-typedef struct {
- upb_inttable_entry e;
- upb_fielddef *f;
-} upb_itof_ent;
-typedef struct {
- upb_strtable_entry e;
- upb_fielddef *f;
-} upb_ntof_ent;
-
-// Looks up a field by name or number. While these are written to be as fast
-// as possible, it will still be faster to cache the results of this lookup if
-// possible. These return NULL if no such field is found.
-INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t num) {
- upb_itof_ent *e =
- (upb_itof_ent*)upb_inttable_fastlookup(&m->itof, num, sizeof(*e));
- return e ? e->f : NULL;
-}
-
-INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) {
- upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
- return e ? e->f : NULL;
-}
-
-INLINE upb_field_count_t upb_msgdef_numfields(upb_msgdef *m) {
- return upb_strtable_count(&m->ntof);
-}
-
-// Iteration over fields. The order is undefined.
-// upb_msg_iter i;
-// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
-// upb_fielddef *f = upb_msg_iter_field(i);
-// // ...
-// }
-typedef upb_itof_ent *upb_msg_iter;
-
-upb_msg_iter upb_msg_begin(upb_msgdef *m);
-upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter);
-INLINE bool upb_msg_done(upb_msg_iter iter) { return iter == NULL; }
-
-INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) {
- return iter->f;
-}
-
-/* upb_enumdef ****************************************************************/
-
-typedef struct _upb_enumdef {
- upb_def base;
- upb_strtable ntoi;
- upb_inttable iton;
-} upb_enumdef;
-
-typedef struct {
- upb_strtable_entry e;
- uint32_t value;
-} upb_ntoi_ent;
-
-typedef struct {
- upb_inttable_entry e;
- upb_string *string;
-} upb_iton_ent;
-
-typedef int32_t upb_enumval_t;
-
-// Lookups from name to integer and vice-versa.
-bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num);
-// Caller does not own a ref on the returned string.
-upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num);
-
-// Iteration over name/value pairs. The order is undefined.
-// upb_enum_iter i;
-// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
-// // ...
-// }
-typedef upb_iton_ent *upb_enum_iter;
-
-upb_enum_iter upb_enum_begin(upb_enumdef *e);
-upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter);
-INLINE bool upb_enum_done(upb_enum_iter iter) { return iter == NULL; }
-
-INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) {
- return iter->string;
-}
-INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
- return iter->e.key;
-}
-
-
-/* upb_symtab *****************************************************************/
-
-// A SymbolTable is where upb_defs live. It is empty when first constructed.
-// Clients add definitions to the symtab by supplying unserialized or
-// serialized descriptors (as defined in descriptor.proto).
-struct _upb_symtab {
- upb_atomic_refcount_t refcount;
- upb_rwlock_t lock; // Protects all members except the refcount.
- upb_strtable symtab; // The symbol table.
- upb_msgdef *fds_msgdef; // Msgdef for google.protobuf.FileDescriptorSet.
-};
-typedef struct _upb_symtab upb_symtab;
-
-// Initializes a upb_symtab. Contexts are not freed explicitly, but unref'd
-// when the caller is done with them.
-upb_symtab *upb_symtab_new(void);
-void _upb_symtab_free(upb_symtab *s); // Must not be called directly!
-
-INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); }
-INLINE void upb_symtab_unref(upb_symtab *s) {
- if(upb_atomic_unref(&s->refcount)) _upb_symtab_free(s);
-}
-
-// Resolves the given symbol using the rules described in descriptor.proto,
-// namely:
-//
-// If the name starts with a '.', it is fully-qualified. Otherwise, C++-like
-// scoping rules are used to find the type (i.e. first the nested types
-// within this message are searched, then within the parent, on up to the
-// root namespace).
-//
-// If a def is found, the caller owns one ref on the returned def. Otherwise
-// returns NULL.
-upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym);
-
-// Find an entry in the symbol table with this exact name. If a def is found,
-// the caller owns one ref on the returned def. Otherwise returns NULL.
-upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym);
-
-// Gets an array of pointers to all currently active defs in this symtab. The
-// caller owns the returned array (which is of length *count) as well as a ref
-// to each symbol inside. If type is UPB_DEF_ANY then defs of all types are
-// returned, otherwise only defs of the required type are returned.
-upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type);
-
-// "fds" is a upb_src that will yield data from the
-// google.protobuf.FileDescriptorSet message type. It is not necessary that
-// the upb_def for FileDescriptorSet came from this symtab, but it must be
-// compatible with the official descriptor.proto, as published by Google.
-//
-// upb_symtab_addfds() adds all the definitions from the given
-// FileDescriptorSet and adds them to the symtab. status indicates whether the
-// operation was successful or not, and the error message (if any).
-//
-// TODO: should this allow redefinition? Either is possible, but which is
-// more useful? Maybe it should be an option.
-void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status);
-
-// Adds defs for google.protobuf.FileDescriptorSet and friends to this symtab.
-// This is necessary for bootstrapping, since these are the upb_defs that
-// specify other defs and allow them to be loaded.
-void upb_symtab_add_descriptorproto(upb_symtab *s);
-
-// Returns the upb_msgdef for google.protobuf.FileDescriptorSet, which the
-// caller owns a ref on. This is a convenience method that is equivalent to
-// looking up the symbol called "google.protobuf.FileDescriptorSet" yourself,
-// except that it only will return a def that was added by
-// upb_symtab_add_descriptorproto().
-upb_msgdef *upb_symtab_fds_def(upb_symtab *s);
-
-
-/* upb_def casts **************************************************************/
-
-// Dynamic casts, for determining if a def is of a particular type at runtime.
-#define UPB_DYNAMIC_CAST_DEF(lower, upper) \
- struct _upb_ ## lower; /* Forward-declare. */ \
- INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \
- if(def->type != UPB_DEF_ ## upper) return NULL; \
- return (struct _upb_ ## lower*)def; \
- }
-UPB_DYNAMIC_CAST_DEF(msgdef, MSG);
-UPB_DYNAMIC_CAST_DEF(enumdef, ENUM);
-UPB_DYNAMIC_CAST_DEF(svcdef, SVC);
-UPB_DYNAMIC_CAST_DEF(extdef, EXT);
-UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED);
-#undef UPB_DYNAMIC_CAST_DEF
-
-// Downcasts, for when some wants to assert that a def is of a particular type.
-// These are only checked if we are building debug.
-#define UPB_DOWNCAST_DEF(lower, upper) \
- struct _upb_ ## lower; /* Forward-declare. */ \
- INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \
- assert(def->type == UPB_DEF_ ## upper); \
- return (struct _upb_ ## lower*)def; \
- }
-UPB_DOWNCAST_DEF(msgdef, MSG);
-UPB_DOWNCAST_DEF(enumdef, ENUM);
-UPB_DOWNCAST_DEF(svcdef, SVC);
-UPB_DOWNCAST_DEF(extdef, EXT);
-UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
-#undef UPB_DOWNCAST_DEF
-
-#define UPB_UPCAST(ptr) (&(ptr)->base)
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* UPB_DEF_H_ */
diff --git a/core/upb_glue.c b/core/upb_glue.c
deleted file mode 100644
index 541827e..0000000
--- a/core/upb_glue.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2010 Joshua Haberman. See LICENSE for details.
- */
-
-#include "upb_glue.h"
-#include "upb_msg.h"
-#include "upb_decoder.h"
-#include "upb_strstream.h"
-
-void upb_strtomsg(upb_string *str, upb_msg *msg, upb_msgdef *md,
- upb_status *status) {
- upb_stringsrc strsrc;
- upb_stringsrc_init(&strsrc);
- upb_stringsrc_reset(&strsrc, str);
-
- upb_decoder d;
- upb_decoder_init(&d, md);
- upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc));
- upb_src *src = upb_decoder_src(&d);
-
- upb_msgpopulator p;
- upb_msgpopulator_init(&p);
- upb_msgpopulator_reset(&p, msg, md);
-
- upb_handlers h;
- upb_handlers_init(&h);
- upb_msgpopulator_register_handlers(&p, &h);
- upb_src_sethandlers(src, &h);
-
- upb_src_run(src, status);
-
- upb_stringsrc_uninit(&strsrc);
- upb_decoder_uninit(&d);
- upb_msgpopulator_uninit(&p);
- upb_handlers_uninit(&h);
-}
-
-void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) {
- upb_stringsrc strsrc;
- upb_stringsrc_init(&strsrc);
- upb_stringsrc_reset(&strsrc, str);
-
- upb_decoder d;
- upb_msgdef *fds_msgdef = upb_symtab_fds_def(symtab);
- upb_decoder_init(&d, fds_msgdef);
- upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc));
-
- upb_symtab_addfds(symtab, upb_decoder_src(&d), status);
- upb_stringsrc_uninit(&strsrc);
- upb_decoder_uninit(&d);
- upb_def_unref(UPB_UPCAST(fds_msgdef));
-}
diff --git a/core/upb_glue.h b/core/upb_glue.h
deleted file mode 100644
index ca32436..0000000
--- a/core/upb_glue.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * upb's core components like upb_decoder and upb_msg are carefully designed to
- * avoid depending on each other for maximum orthogonality. In other words,
- * you can use a upb_decoder to decode into *any* kind of structure; upb_msg is
- * just one such structure. You can use upb_decoder without having to link in
- * upb_msg.
- *
- * However, for convenience we provide functions here for doing common
- * operations like deserializing protobuf binary format into a upb_msg. The
- * compromise is that this file drags in almost all of upb as a dependency,
- * which could be undesirable if you're trying to use a trimmed-down build of
- * upb.
- *
- * Copyright (c) 2011 Joshua Haberman. See LICENSE for details.
- */
-
-#ifndef UPB_GLUE_H
-#define UPB_GLUE_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Forward-declares so we don't have to include everything in this .h file.
-// Clients should use the regular, typedef'd names (eg. upb_string).
-struct _upb_msg;
-struct _upb_msgdef;
-struct _upb_status;
-struct _upb_string;
-struct _upb_symtab;
-
-// Decodes the given string, which must be in protobuf binary format, to the
-// given upb_msg with msgdef "md", storing the status of the operation in "s".
-void upb_strtomsg(struct _upb_string *str, struct _upb_msg *msg,
- struct _upb_msgdef *md, struct _upb_status *s);
-
-void upb_parsedesc(struct _upb_symtab *symtab, struct _upb_string *str,
- struct _upb_status *status);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
diff --git a/core/upb_msg.c b/core/upb_msg.c
deleted file mode 100644
index 9dfbea4..0000000
--- a/core/upb_msg.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2010 Joshua Haberman. See LICENSE for details.
- *
- * Data structure for storing a message of protobuf data.
- */
-
-#include "upb_msg.h"
-#include "upb_decoder.h"
-#include "upb_strstream.h"
-
-static uint32_t upb_round_up_pow2(uint32_t v) {
- // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
- v--;
- v |= v >> 1;
- v |= v >> 2;
- v |= v >> 4;
- v |= v >> 8;
- v |= v >> 16;
- v++;
- return v;
-}
-
-static void upb_elem_free(upb_value v, upb_fielddef *f) {
- switch(f->type) {
- case UPB_TYPE(MESSAGE):
- case UPB_TYPE(GROUP):
- _upb_msg_free(upb_value_getmsg(v), upb_downcast_msgdef(f->def));
- break;
- case UPB_TYPE(STRING):
- case UPB_TYPE(BYTES):
- _upb_string_free(upb_value_getstr(v));
- break;
- default:
- abort();
- }
-}
-
-static void upb_elem_unref(upb_value v, upb_fielddef *f) {
- assert(upb_elem_ismm(f));
- upb_atomic_refcount_t *refcount = upb_value_getrefcount(v);
- if (refcount && upb_atomic_unref(refcount))
- upb_elem_free(v, f);
-}
-
-static void upb_field_free(upb_value v, upb_fielddef *f) {
- if (upb_isarray(f)) {
- _upb_array_free(upb_value_getarr(v), f);
- } else {
- upb_elem_free(v, f);
- }
-}
-
-static void upb_field_unref(upb_value v, upb_fielddef *f) {
- assert(upb_field_ismm(f));
- upb_atomic_refcount_t *refcount = upb_value_getrefcount(v);
- if (refcount && upb_atomic_unref(refcount))
- upb_field_free(v, f);
-}
-
-
-/* upb_array ******************************************************************/
-
-upb_array *upb_array_new(void) {
- upb_array *arr = malloc(sizeof(*arr));
- upb_atomic_refcount_init(&arr->refcount, 1);
- arr->size = 0;
- arr->len = 0;
- arr->ptr = NULL;
- return arr;
-}
-
-void upb_array_recycle(upb_array **_arr, upb_fielddef *f) {
- upb_array *arr = *_arr;
- if(arr && upb_atomic_only(&arr->refcount)) {
- arr->len = 0;
- } else {
- upb_array_unref(arr, f);
- *_arr = upb_array_new();
- }
-}
-
-void _upb_array_free(upb_array *arr, upb_fielddef *f) {
- if (upb_elem_ismm(f)) {
- // Need to release refs on sub-objects.
- upb_valuetype_t type = upb_elem_valuetype(f);
- for (upb_arraylen_t i = 0; i < arr->size; i++) {
- upb_valueptr p = _upb_array_getptr(arr, f, i);
- upb_elem_unref(upb_value_read(p, type), f);
- }
- }
- free(arr->ptr);
- free(arr);
-}
-
-void upb_array_resize(upb_array *arr, upb_fielddef *f, upb_arraylen_t len) {
- size_t type_size = upb_types[f->type].size;
- upb_arraylen_t old_size = arr->size;
- if (old_size < len) {
- // Need to resize.
- size_t new_size = upb_round_up_pow2(len);
- arr->ptr = realloc(arr->ptr, new_size * type_size);
- arr->size = new_size;
- memset(arr->ptr + (old_size * type_size), 0,
- (new_size - old_size) * type_size);
- }
- arr->len = len;
-}
-
-
-/* upb_msg ********************************************************************/
-
-upb_msg *upb_msg_new(upb_msgdef *md) {
- upb_msg *msg = malloc(md->size);
- // Clear all set bits and cached pointers.
- memset(msg, 0, md->size);
- upb_atomic_refcount_init(&msg->refcount, 1);
- return msg;
-}
-
-void _upb_msg_free(upb_msg *msg, upb_msgdef *md) {
- // Need to release refs on all sub-objects.
- upb_msg_iter i;
- for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
- upb_fielddef *f = upb_msg_iter_field(i);
- upb_valueptr p = _upb_msg_getptr(msg, f);
- upb_valuetype_t type = upb_field_valuetype(f);
- if (upb_field_ismm(f)) upb_field_unref(upb_value_read(p, type), f);
- }
- free(msg);
-}
-
-void upb_msg_recycle(upb_msg **_msg, upb_msgdef *msgdef) {
- upb_msg *msg = *_msg;
- if(msg && upb_atomic_only(&msg->refcount)) {
- upb_msg_clear(msg, msgdef);
- } else {
- upb_msg_unref(msg, msgdef);
- *_msg = upb_msg_new(msgdef);
- }
-}
-
-INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) {
- msg->data[f->set_bit_offset] |= f->set_bit_mask;
-}
-
-static upb_valueptr upb_msg_getappendptr(upb_msg *msg, upb_fielddef *f) {
- upb_valueptr p = _upb_msg_getptr(msg, f);
- if (upb_isarray(f)) {
- // Create/recycle/resize the array if necessary, and find a pointer to
- // a newly-appended element.
- if (!upb_msg_has(msg, f)) {
- upb_array_recycle(p.arr, f);
- upb_msg_sethas(msg, f);
- }
- assert(*p.arr != NULL);
- upb_arraylen_t oldlen = upb_array_len(*p.arr);
- upb_array_resize(*p.arr, f, oldlen + 1);
- p = _upb_array_getptr(*p.arr, f, oldlen);
- }
- return p;
-}
-
-static void upb_msg_appendval(upb_msg *msg, upb_fielddef *f, upb_value val) {
- upb_valueptr p = upb_msg_getappendptr(msg, f);
- if (upb_isstring(f)) {
- // We do:
- // - upb_string_recycle(), upb_string_substr() instead of
- // - upb_string_unref(), upb_string_getref()
- // because we can conveniently cache these upb_string objects in the
- // upb_msg, whereas the upb_src who is sending us these strings may not
- // have a good way of caching them. This saves the upb_src from allocating
- // new upb_strings all the time to give us.
- //
- // If you were using this to copy one upb_msg to another this would
- // allocate string objects whereas a upb_string_getref could have avoided
- // those allocations completely; if this is an issue, we could make it an
- // option of the upb_msgpopulator which behavior is desired.
- upb_string *src = upb_value_getstr(val);
- upb_string_recycle(p.str);
- upb_string_substr(*p.str, src, 0, upb_string_len(src));
- } else {
- upb_value_write(p, val, f->type);
- }
- upb_msg_sethas(msg, f);
-}
-
-upb_msg *upb_msg_appendmsg(upb_msg *msg, upb_fielddef *f, upb_msgdef *msgdef) {
- upb_valueptr p = upb_msg_getappendptr(msg, f);
- if (upb_isarray(f) || !upb_msg_has(msg, f)) {
- upb_msg_recycle(p.msg, msgdef);
- upb_msg_sethas(msg, f);
- }
- return *p.msg;
-}
-
-
-/* upb_msgpopulator ***********************************************************/
-
-void upb_msgpopulator_init(upb_msgpopulator *p) {
- upb_status_init(&p->status);
-}
-
-void upb_msgpopulator_reset(upb_msgpopulator *p, upb_msg *m, upb_msgdef *md) {
- p->top = p->stack;
- p->limit = p->stack + sizeof(p->stack);
- p->top->msg = m;
- p->top->msgdef = md;
-}
-
-void upb_msgpopulator_uninit(upb_msgpopulator *p) {
- upb_status_uninit(&p->status);
-}
-
-static upb_flow_t upb_msgpopulator_value(void *_p, upb_fielddef *f, upb_value val) {
- upb_msgpopulator *p = _p;
- upb_msg_appendval(p->top->msg, f, val);
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_msgpopulator_startsubmsg(void *_p, upb_fielddef *f,
- upb_handlers *delegate_to) {
- upb_msgpopulator *p = _p;
- (void)delegate_to;
- upb_msg *oldmsg = p->top->msg;
- if (++p->top == p->limit) {
- upb_seterr(&p->status, UPB_ERROR, "Exceeded maximum nesting");
- return UPB_BREAK;
- }
- upb_msgdef *msgdef = upb_downcast_msgdef(f->def);
- p->top->msgdef = msgdef;
- p->top->msg = upb_msg_appendmsg(oldmsg, f, msgdef);
- return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_msgpopulator_endsubmsg(void *_p) {
- upb_msgpopulator *p = _p;
- --p->top;
- return UPB_CONTINUE;
-}
-
-void upb_msgpopulator_register_handlers(upb_msgpopulator *p, upb_handlers *h) {
- static upb_handlerset handlerset = {
- NULL, // startmsg
- NULL, // endmsg
- &upb_msgpopulator_value,
- &upb_msgpopulator_startsubmsg,
- &upb_msgpopulator_endsubmsg,
- };
- upb_register_handlerset(h, &handlerset);
- upb_set_handler_closure(h, p, &p->status);
-}
diff --git a/core/upb_msg.h b/core/upb_msg.h
deleted file mode 100644
index 8a3c63f..0000000
--- a/core/upb_msg.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2010-2011 Joshua Haberman. See LICENSE for details.
- *
- * Data structure for storing a message of protobuf data. Unlike Google's
- * protobuf, upb_msg and upb_array are reference counted instead of having
- * exclusive ownership of their fields. This is a better match for dynamic
- * languages where statements like a.b = other_b are normal.
- *
- * upb's parsers and serializers could also be used to populate and serialize
- * other kinds of message objects (even one generated by Google's protobuf).
- */
-
-#ifndef UPB_MSG_H
-#define UPB_MSG_H
-
-#include "upb.h"
-#include "upb_def.h"
-#include <stdlib.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// A pointer to a .proto value. The owner must have an out-of-band way of
-// knowing the type, so it knows which union member to use.
-typedef union {
- double *_double;
- float *_float;
- int32_t *int32;
- int64_t *int64;
- uint8_t *uint8;
- uint32_t *uint32;
- uint64_t *uint64;
- bool *_bool;
- upb_string **str;
- upb_msg **msg;
- upb_array **arr;
- void *_void;
-} upb_valueptr;
-
-INLINE upb_valueptr upb_value_addrof(upb_value *val) {
- upb_valueptr ptr = {&val->val._double};
- return ptr;
-}
-
-// Reads or writes a upb_value from an address represented by a upb_value_ptr.
-// We need to know the value type to perform this operation, because we need to
-// know how much memory to copy (and for big-endian machines, we need to know
-// where in the upb_value the data goes).
-//
-// For little endian-machines where we didn't mind overreading, we could make
-// upb_value_read simply use memcpy().
-INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) {
- upb_value val;
-
-#ifdef NDEBUG
-#define CASE(t, member_name) \
- case UPB_TYPE(t): val.val.member_name = *ptr.member_name; break;
-#else
-#define CASE(t, member_name) \
- case UPB_TYPE(t): val.val.member_name = *ptr.member_name; val.type = upb_types[ft].inmemory_type; break;
-#endif
-
- switch(ft) {
- CASE(DOUBLE, _double)
- CASE(FLOAT, _float)
- CASE(INT32, int32)
- CASE(INT64, int64)
- CASE(UINT32, uint32)
- CASE(UINT64, uint64)
- CASE(SINT32, int32)
- CASE(SINT64, int64)
- CASE(FIXED32, uint32)
- CASE(FIXED64, uint64)
- CASE(SFIXED32, int32)
- CASE(SFIXED64, int64)
- CASE(BOOL, _bool)
- CASE(ENUM, int32)
- CASE(STRING, str)
- CASE(BYTES, str)
- CASE(MESSAGE, msg)
- CASE(GROUP, msg)
- case UPB_VALUETYPE_ARRAY:
- val.val.arr = *ptr.arr;
-#ifndef NDEBUG
- val.type = UPB_VALUETYPE_ARRAY;
-#endif
- break;
- default: assert(false);
- }
- return val;
-
-#undef CASE
-}
-
-INLINE void upb_value_write(upb_valueptr ptr, upb_value val,
- upb_fieldtype_t ft) {
- if (ft == UPB_VALUETYPE_ARRAY) {
- assert(val.type == UPB_VALUETYPE_ARRAY);
- } else {
- assert(val.type == upb_types[ft].inmemory_type);
- }
-#define CASE(t, member_name) \
- case UPB_TYPE(t): *ptr.member_name = val.val.member_name; break;
-
- switch(ft) {
- CASE(DOUBLE, _double)
- CASE(FLOAT, _float)
- CASE(INT32, int32)
- CASE(INT64, int64)
- CASE(UINT32, uint32)
- CASE(UINT64, uint64)
- CASE(SINT32, int32)
- CASE(SINT64, int64)
- CASE(FIXED32, uint32)
- CASE(FIXED64, uint64)
- CASE(SFIXED32, int32)
- CASE(SFIXED64, int64)
- CASE(BOOL, _bool)
- CASE(ENUM, int32)
- CASE(STRING, str)
- CASE(BYTES, str)
- CASE(MESSAGE, msg)
- CASE(GROUP, msg)
- case UPB_VALUETYPE_ARRAY:
- *ptr.arr = val.val.arr;
- break;
- default: assert(false);
- }
-
-#undef CASE
-}
-
-/* upb_array ******************************************************************/
-
-typedef uint32_t upb_arraylen_t;
-struct _upb_array {
- upb_atomic_refcount_t refcount;
- // "len" and "size" are measured in elements, not bytes.
- upb_arraylen_t len;
- upb_arraylen_t size;
- char *ptr;
-};
-
-void _upb_array_free(upb_array *a, upb_fielddef *f);
-INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f,
- uint32_t elem) {
- upb_valueptr p;
- p._void = &a->ptr[elem * upb_types[f->type].size];
- return p;
-}
-
-upb_array *upb_array_new(void);
-
-INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) {
- if (a && upb_atomic_unref(&a->refcount)) _upb_array_free(a, f);
-}
-
-void upb_array_recycle(upb_array **arr, upb_fielddef *f);
-
-INLINE uint32_t upb_array_len(upb_array *a) {
- return a->len;
-}
-
-INLINE upb_value upb_array_get(upb_array *arr, upb_fielddef *f,
- upb_arraylen_t i) {
- assert(i < upb_array_len(arr));
- return upb_value_read(_upb_array_getptr(arr, f, i), f->type);
-}
-
-/* upb_msg ********************************************************************/
-
-struct _upb_msg {
- upb_atomic_refcount_t refcount;
- uint8_t data[4]; // We allocate the appropriate amount per message.
-};
-
-void _upb_msg_free(upb_msg *msg, upb_msgdef *md);
-
-INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) {
- upb_valueptr p;
- p._void = &msg->data[f->byte_offset];
- return p;
-}
-
-// Creates a new msg of the given type.
-upb_msg *upb_msg_new(upb_msgdef *md);
-
-// Unrefs the given message.
-INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) {
- if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md);
-}
-
-void upb_msg_recycle(upb_msg **msg, upb_msgdef *msgdef);
-
-// Tests whether the given field is explicitly set, or whether it will return a
-// default.
-INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) {
- return (msg->data[f->set_bit_offset] & f->set_bit_mask) != 0;
-}
-
-INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) {
- return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f));
-}
-
-// Unsets all field values back to their defaults.
-INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) {
- memset(msg->data, 0, md->set_flags_bytes);
-}
-
-typedef struct {
- upb_msg *msg;
- upb_msgdef *msgdef;
-} upb_msgpopulator_frame;
-
-typedef struct {
- upb_msgpopulator_frame stack[UPB_MAX_NESTING], *top, *limit;
- upb_status status;
-} upb_msgpopulator;
-
-void upb_msgpopulator_init(upb_msgpopulator *p);
-void upb_msgpopulator_uninit(upb_msgpopulator *p);
-void upb_msgpopulator_reset(upb_msgpopulator *p, upb_msg *m, upb_msgdef *md);
-void upb_msgpopulator_register_handlers(upb_msgpopulator *p, upb_handlers *h);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
diff --git a/core/upb_stream.h b/core/upb_stream.h
deleted file mode 100644
index 3f7c843..0000000
--- a/core/upb_stream.h
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * This file defines four general-purpose streaming data interfaces.
- *
- * - upb_handlers: represents a set of callbacks, very much like in XML's SAX
- * API, that a client can register to do a streaming tree traversal over a
- * stream of structured protobuf data, without knowing where that data is
- * coming from. There is only one upb_handlers type (it is not a virtual
- * base class), but the object lets you register any set of handlers.
- *
- * The upb_handlers interface supports delegation: when entering a submessage,
- * you can delegate to another set of upb_handlers instead of handling the
- * submessage yourself. This allows upb_handlers objects to *compose* -- you
- * can implement a set of upb_handlers without knowing or caring whether this
- * is the top-level message or not.
- *
- * The other interfaces are the C equivalent of "virtual base classes" that
- * anyone can implement:
- *
- * - upb_src: an interface that represents a source of streaming protobuf data.
- * It lets you register a set of upb_handlers, and then call upb_src_run(),
- * which pulls the protobuf data from somewhere and then calls the handlers.
- *
- * - upb_bytesrc: a pull interface for streams of bytes, basically an
- * abstraction of read()/fread(), but it avoids copies where possible.
- *
- * - upb_bytesink: push interface for streams of bytes, basically an
- * abstraction of write()/fwrite(), but it avoids copies where possible.
- *
- * All of the encoders and decoders are based on these generic interfaces,
- * which lets you write streaming algorithms that do not depend on a specific
- * serialization format; for example, you can write a pretty printer that works
- * with input that came from protobuf binary format, protobuf text format, or
- * even an in-memory upb_msg -- the pretty printer will not know the
- * difference.
- *
- * Copyright (c) 2010-2011 Joshua Haberman. See LICENSE for details.
- *
- */
-
-#ifndef UPB_STREAM_H
-#define UPB_STREAM_H
-
-#include "upb.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Forward-declare. We can't include upb_def.h; it would be circular.
-struct _upb_fielddef;
-
-/* upb_handlers ***************************************************************/
-
-// upb_handlers define the interface by which a upb_src passes data to a
-// upb_sink.
-
-// Constants that a handler returns to indicate to its caller whether it should
-// continue or not.
-typedef enum {
- // Caller should continue sending values to the sink.
- UPB_CONTINUE,
-
- // Stop processing for now; check status for details. If no status was set,
- // a generic error will be returned. If the error is resumable, it is not
- // (yet) defined where processing will resume -- waiting for real-world
- // examples of resumable decoders and resume-requiring clients. upb_src
- // implementations that are not capable of resuming will override the return
- // status to be non-resumable if a resumable status was set by the handlers.
- UPB_BREAK,
-
- // Skips to the end of the current submessage (or if we are at the top
- // level, skips to the end of the entire message).
- UPB_SKIPSUBMSG,
-
- // When returned from a startsubmsg handler, indicates that the submessage
- // should be handled by a different set of handlers, which have been
- // registered on the provided upb_handlers object. This allows upb_handlers
- // objects to compose; a set of upb_handlers need not know whether it is the
- // top-level message or a sub-message. May not be returned from any other
- // callback.
- UPB_DELEGATE,
-} upb_flow_t;
-
-// upb_handlers
-struct _upb_handlers;
-typedef struct _upb_handlers upb_handlers;
-
-typedef upb_flow_t (*upb_startmsg_handler_t)(void *closure);
-typedef upb_flow_t (*upb_endmsg_handler_t)(void *closure);
-typedef upb_flow_t (*upb_value_handler_t)(void *closure,
- struct _upb_fielddef *f,
- upb_value val);
-typedef upb_flow_t (*upb_startsubmsg_handler_t)(void *closure,
- struct _upb_fielddef *f,
- upb_handlers *delegate_to);
-typedef upb_flow_t (*upb_endsubmsg_handler_t)(void *closure);
-typedef upb_flow_t (*upb_unknownval_handler_t)(void *closure,
- upb_field_number_t fieldnum,
- upb_value val);
-
-// An empty set of handlers, for convenient copy/paste:
-//
-// static upb_flow_t startmsg(void *closure) {
-// // Called when the top-level message begins.
-// return UPB_CONTINUE;
-// }
-//
-// static upb_flow_t endmsg(void *closure) {
-// // Called when the top-level message ends.
-// return UPB_CONTINUE;
-// }
-//
-// static upb_flow_t value(void *closure, upb_fielddef *f, upb_value val) {
-// // Called for every value in the stream.
-// return UPB_CONTINUE;
-// }
-//
-// static upb_flow_t startsubmsg(void *closure, upb_fielddef *f,
-// upb_handlers *delegate_to) {
-// // Called when a submessage begins; can delegate by returning UPB_DELEGATE.
-// return UPB_CONTINUE;
-// }
-//
-// static upb_flow_t endsubmsg(void *closure) {
-// // Called when a submessage ends.
-// return UPB_CONTINUE;
-// }
-//
-// static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum,
-// upb_value val) {
-// // Called with an unknown value is encountered.
-// return UPB_CONTINUE;
-// }
-//
-// // Any handlers you don't need can be set to NULL.
-// static upb_handlerset handlers = {
-// startmsg,
-// endmsg,
-// value,
-// startsubmsg,
-// endsubmsg,
-// unknownval,
-// };
-typedef struct {
- upb_startmsg_handler_t startmsg;
- upb_endmsg_handler_t endmsg;
- upb_value_handler_t value;
- upb_startsubmsg_handler_t startsubmsg;
- upb_endsubmsg_handler_t endsubmsg;
- upb_unknownval_handler_t unknownval;
-} upb_handlerset;
-
-// Functions to register handlers on a upb_handlers object.
-INLINE void upb_handlers_init(upb_handlers *h);
-INLINE void upb_handlers_uninit(upb_handlers *h);
-INLINE void upb_handlers_reset(upb_handlers *h);
-INLINE bool upb_handlers_isempty(upb_handlers *h);
-INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set);
-
-// TODO: for clients that want to increase efficiency by preventing bytesrcs
-// from automatically being converted to strings in the value callback.
-// INLINE void upb_handlers_use_bytesrcs(bool use_bytesrcs);
-
-// The closure will be passed to every handler. The status will be read by the
-// upb_src immediately after a handler has returned UPB_BREAK and used as the
-// overall upb_src status; it will not be referenced at any other time.
-INLINE void upb_set_handler_closure(upb_handlers *h, void *closure,
- upb_status *status);
-
-
-/* upb_src ********************************************************************/
-
-struct _upb_src;
-typedef struct _upb_src upb_src;
-
-// upb_src_sethandlers() must be called once and only once before upb_src_run()
-// is called. This sets up the callbacks that will handle the parse. A
-// upb_src that is fully initialized except for the call to
-// upb_src_sethandlers() is called "prepared" -- this is useful for library
-// functions that want to consume the output of a generic upb_src.
-// Calling sethandlers() multiple times is an error and will trigger an abort().
-INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers);
-
-// Runs the src, calling the callbacks that were registered with
-// upb_src_sethandlers(), and returning the status of the operation in
-// "status." The status might indicate UPB_TRYAGAIN (indicating EAGAIN on a
-// non-blocking socket) or a resumable error; in both cases upb_src_run can be
-// called again later. TRYAGAIN could come from either the src (input buffers
-// are empty) or the handlers (output buffers are full).
-INLINE void upb_src_run(upb_src *src, upb_status *status);
-
-
-// A convenience object that a upb_src can use to invoke handlers. It
-// transparently handles delegation so that the upb_src needs only follow the
-// protocol as if delegation did not exist.
-struct _upb_dispatcher;
-typedef struct _upb_dispatcher upb_dispatcher;
-INLINE void upb_dispatcher_init(upb_dispatcher *d);
-INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h,
- bool supports_skip);
-INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d);
-INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d);
-INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d,
- struct _upb_fielddef *f);
-INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d);
-INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f,
- upb_value val);
-INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d,
- upb_field_number_t fieldnum,
- upb_value val);
-
-/* upb_bytesrc ****************************************************************/
-
-// Reads up to "count" bytes into "buf", returning the total number of bytes
-// read. If 0, indicates error and puts details in "status".
-INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf,
- upb_strlen_t count, upb_status *status);
-
-// Like upb_bytesrc_read(), but modifies "str" in-place. Caller must ensure
-// that "str" is created or just recycled. Returns "false" if no data was
-// returned, either due to error or EOF (check status for details).
-//
-// In comparison to upb_bytesrc_read(), this call can possibly alias existing
-// string data (which avoids a copy). On the other hand, if the data was *not*
-// already in an existing string, this copies it into a upb_string, and if the
-// data needs to be put in a specific range of memory (because eg. you need to
-// put it into a different kind of string object) then upb_bytesrc_get() could
-// save you a copy.
-INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str,
- upb_status *status);
-
-// A convenience function for getting all the remaining data in a upb_bytesrc
-// as a upb_string. Returns false and sets "status" if the operation fails.
-INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str,
- upb_status *status);
-INLINE bool upb_value_getfullstr(upb_value val, upb_string *str,
- upb_status *status) {
- return upb_bytesrc_getfullstr(upb_value_getbytesrc(val), str, status);
-}
-
-
-/* upb_bytesink ***************************************************************/
-
-struct _upb_bytesink;
-typedef struct _upb_bytesink upb_bytesink;
-
-// TODO: Figure out how buffering should be handled. Should the caller buffer
-// data and only call these functions when a buffer is full? Seems most
-// efficient, but then buffering has to be configured in the caller, which
-// could be anything, which makes it hard to have a standard interface for
-// controlling buffering.
-//
-// The downside of having the bytesink buffer is efficiency: the caller is
-// making more (virtual) function calls, and the caller can't arrange to have
-// a big contiguous buffer. The bytesink can do this, but will have to copy
-// to make the data contiguous.
-
-// Returns the number of bytes written.
-INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status,
- const char *fmt, ...);
-
-// Puts the given string, returning true if the operation was successful, otherwise
-// check "status" for details. Ownership of the string is *not* passed; if
-// the callee wants a reference he must call upb_string_getref() on it.
-INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str,
- upb_status *status);
-
-#include "upb_stream_vtbl.h"
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h
deleted file mode 100644
index e1f9cb8..0000000
--- a/core/upb_stream_vtbl.h
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * vtable declarations for types that are implementing any of the src or sink
- * interfaces. Only components that are implementing these interfaces need
- * to worry about this file.
- *
- * Copyright (c) 2010 Joshua Haberman. See LICENSE for details.
- */
-
-#ifndef UPB_SRCSINK_VTBL_H_
-#define UPB_SRCSINK_VTBL_H_
-
-#include <assert.h>
-#include "upb_stream.h"
-#include "upb_string.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Typedefs for function pointers to all of the virtual functions.
-
-// upb_src
-typedef void (*upb_src_sethandlers_fptr)(upb_src *src, upb_handlers *handlers);
-typedef void (*upb_src_run_fptr)(upb_src *src, upb_status *status);
-
-// upb_bytesrc.
-typedef upb_strlen_t (*upb_bytesrc_read_fptr)(
- upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status);
-typedef bool (*upb_bytesrc_getstr_fptr)(
- upb_bytesrc *src, upb_string *str, upb_status *status);
-
-// upb_bytesink.
-typedef upb_strlen_t (*upb_bytesink_write_fptr)(
- upb_bytesink *bytesink, void *buf, upb_strlen_t count);
-typedef upb_strlen_t (*upb_bytesink_putstr_fptr)(
- upb_bytesink *bytesink, upb_string *str, upb_status *status);
-typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)(
- upb_bytesink *bytesink, upb_status *status, const char *fmt, va_list args);
-
-// Vtables for the above interfaces.
-typedef struct {
- upb_bytesrc_read_fptr read;
- upb_bytesrc_getstr_fptr getstr;
-} upb_bytesrc_vtbl;
-
-typedef struct {
- upb_bytesink_write_fptr write;
- upb_bytesink_putstr_fptr putstr;
- upb_bytesink_vprintf_fptr vprintf;
-} upb_bytesink_vtbl;
-
-typedef struct {
- upb_src_sethandlers_fptr sethandlers;
- upb_src_run_fptr run;
-} upb_src_vtbl;
-
-
-// "Base Class" definitions; components that implement these interfaces should
-// contain one of these structures.
-
-struct _upb_bytesrc {
- upb_bytesrc_vtbl *vtbl;
-};
-
-struct _upb_bytesink {
- upb_bytesink_vtbl *vtbl;
-};
-
-struct _upb_src {
- upb_src_vtbl *vtbl;
-};
-
-INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) {
- s->vtbl = vtbl;
-}
-
-INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) {
- s->vtbl = vtbl;
-}
-
-INLINE void upb_src_init(upb_src *s, upb_src_vtbl *vtbl) {
- s->vtbl = vtbl;
-}
-
-// Implementation of virtual function dispatch.
-
-// upb_src
-INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers) {
- src->vtbl->sethandlers(src, handlers);
-}
-
-INLINE void upb_src_run(upb_src *src, upb_status *status) {
- src->vtbl->run(src, status);
-}
-
-// upb_bytesrc
-INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf,
- upb_strlen_t count, upb_status *status) {
- return src->vtbl->read(src, buf, count, status);
-}
-
-INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str,
- upb_status *status) {
- return src->vtbl->getstr(src, str, status);
-}
-
-INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str,
- upb_status *status) {
- // We start with a getstr, because that could possibly alias data instead of
- // copying.
- if (!upb_bytesrc_getstr(src, str, status)) return false;
- // Trade-off between number of read calls and amount of overallocation.
- const size_t bufsize = 4096;
- do {
- upb_strlen_t len = upb_string_len(str);
- char *buf = upb_string_getrwbuf(str, len + bufsize);
- upb_strlen_t read = upb_bytesrc_read(src, buf + len, bufsize, status);
- if (read < 0) return false;
- // Resize to proper size.
- upb_string_getrwbuf(str, len + read);
- } while (!status->code != UPB_EOF);
- return true;
-}
-
-
-// upb_bytesink
-INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf,
- upb_strlen_t count) {
- return sink->vtbl->write(sink, buf, count);
-}
-
-INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) {
- return sink->vtbl->putstr(sink, str, status);
-}
-
-INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, const char *fmt, ...) {
- va_list args;
- va_start(args, fmt);
- upb_strlen_t ret = sink->vtbl->vprintf(sink, status, fmt, args);
- va_end(args);
- return ret;
-}
-
-// upb_handlers
-struct _upb_handlers {
- upb_handlerset *set;
- void *closure;
- upb_status *status; // We don't own this.
-};
-
-INLINE void upb_handlers_init(upb_handlers *h) {
- (void)h;
-}
-INLINE void upb_handlers_uninit(upb_handlers *h) {
- (void)h;
-}
-
-INLINE void upb_handlers_reset(upb_handlers *h) {
- h->set = NULL;
- h->closure = NULL;
-}
-
-INLINE bool upb_handlers_isempty(upb_handlers *h) {
- return !h->set && !h->closure;
-}
-
-INLINE upb_flow_t upb_nop(void *closure) {
- (void)closure;
- return UPB_CONTINUE;
-}
-
-INLINE upb_flow_t upb_value_nop(void *closure, struct _upb_fielddef *f, upb_value val) {
- (void)closure;
- (void)f;
- (void)val;
- return UPB_CONTINUE;
-}
-
-INLINE upb_flow_t upb_startsubmsg_nop(void *closure, struct _upb_fielddef *f,
- upb_handlers *delegate_to) {
- (void)closure;
- (void)f;
- (void)delegate_to;
- return UPB_CONTINUE;
-}
-
-INLINE upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum,
- upb_value val) {
- (void)closure;
- (void)fieldnum;
- (void)val;
- return UPB_CONTINUE;
-}
-
-INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set) {
- if (!set->startmsg) set->startmsg = &upb_nop;
- if (!set->endmsg) set->endmsg = &upb_nop;
- if (!set->value) set->value = &upb_value_nop;
- if (!set->startsubmsg) set->startsubmsg = &upb_startsubmsg_nop;
- if (!set->endsubmsg) set->endsubmsg = &upb_nop;
- if (!set->unknownval) set->unknownval = &upb_unknownval_nop;
- h->set = set;
-}
-
-INLINE void upb_set_handler_closure(upb_handlers *h, void *closure,
- upb_status *status) {
- h->closure = closure;
- h->status = status;
-}
-
-// upb_dispatcher
-typedef struct {
- upb_handlers handlers;
- int depth;
-} upb_dispatcher_frame;
-
-struct _upb_dispatcher {
- upb_dispatcher_frame stack[UPB_MAX_NESTING], *top, *limit;
- bool supports_skip;
-};
-
-INLINE void upb_dispatcher_init(upb_dispatcher *d) {
- d->limit = d->stack + sizeof(d->stack);
-}
-
-INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h,
- bool supports_skip) {
- d->top = d->stack;
- d->top->depth = 1; // Never want to trigger end-of-delegation.
- d->top->handlers = *h;
- d->supports_skip = supports_skip;
-}
-
-INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d) {
- assert(d->stack == d->top);
- return d->top->handlers.set->startmsg(d->top->handlers.closure);
-}
-
-INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d) {
- assert(d->stack == d->top);
- return d->top->handlers.set->endmsg(d->top->handlers.closure);
-}
-
-// TODO: several edge cases to fix:
-// - delegated start returns UPB_BREAK, should replay the start on resume.
-// - endsubmsg returns UPB_BREAK, should NOT replay the delegated endmsg.
-INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d,
- struct _upb_fielddef *f) {
- upb_handlers handlers;
- upb_handlers_init(&handlers);
- upb_handlers_reset(&handlers);
- upb_flow_t ret = d->top->handlers.set->startsubmsg(d->top->handlers.closure, f, &handlers);
- assert((ret == UPB_DELEGATE) == !upb_handlers_isempty(&handlers));
- if (ret == UPB_DELEGATE) {
- ++d->top;
- d->top->handlers = handlers;
- d->top->depth = 0;
- ret = d->top->handlers.set->startmsg(d->top->handlers.closure);
- }
- if (ret == UPB_CONTINUE || !d->supports_skip) ++d->top->depth;
- upb_handlers_uninit(&handlers);
- return ret;
-}
-
-INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) {
- upb_flow_t ret;
- if (--d->top->depth == 0) {
- ret = d->top->handlers.set->endmsg(d->top->handlers.closure);
- if (ret != UPB_CONTINUE) return ret;
- --d->top;
- assert(d->top >= d->stack);
- }
- return d->top->handlers.set->endsubmsg(d->top->handlers.closure);
-}
-
-INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d,
- struct _upb_fielddef *f,
- upb_value val) {
- return d->top->handlers.set->value(d->top->handlers.closure, f, val);
-}
-
-INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d,
- upb_field_number_t fieldnum,
- upb_value val) {
- return d->top->handlers.set->unknownval(d->top->handlers.closure,
- fieldnum, val);
-}
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
diff --git a/core/upb_string.c b/core/upb_string.c
deleted file mode 100644
index 30ed88f..0000000
--- a/core/upb_string.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2010 Joshua Haberman. See LICENSE for details.
- */
-
-#include "upb_string.h"
-
-#include <stdlib.h>
-#ifdef __GLIBC__
-#include <malloc.h>
-#elif defined(__APPLE__)
-#include <malloc/malloc.h>
-#endif
-
-static uint32_t upb_round_up_pow2(uint32_t v) {
- // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
- v--;
- v |= v >> 1;
- v |= v >> 2;
- v |= v >> 4;
- v |= v >> 8;
- v |= v >> 16;
- v++;
- return v;
-}
-
-upb_string *upb_string_new() {
- upb_string *str = malloc(sizeof(*str));
- str->ptr = NULL;
- str->cached_mem = NULL;
- str->len = 0;
-#ifndef UPB_HAVE_MSIZE
- str->size = 0;
-#endif
- str->src = NULL;
- upb_atomic_refcount_init(&str->refcount, 1);
- return str;
-}
-
-uint32_t upb_string_size(upb_string *str) {
-#ifdef __GLIBC__
- return malloc_usable_size(str->cached_mem);
-#elif defined(__APPLE__)
- return malloc_size(str->cached_mem);
-#else
- return str->size;
-#endif
-}
-
-void _upb_string_free(upb_string *str) {
- free(str->cached_mem);
- _upb_string_release(str);
- free(str);
-}
-
-char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) {
- // assert(str->ptr == NULL);
- upb_strlen_t size = upb_string_size(str);
- if (size < len) {
- size = upb_round_up_pow2(len);
- str->cached_mem = realloc(str->cached_mem, size);
-#ifndef UPB_HAVE_MSIZE
- str->size = size;
-#endif
- }
- str->len = len;
- str->ptr = str->cached_mem;
- return str->cached_mem;
-}
-
-void upb_string_substr(upb_string *str, upb_string *target_str,
- upb_strlen_t start, upb_strlen_t len) {
- if(str->ptr) *(char*)0 = 0;
- assert(str->ptr == NULL);
- str->src = upb_string_getref(target_str);
- str->ptr = upb_string_getrobuf(target_str) + start;
- str->len = len;
-}
-
-void upb_string_vprintf(upb_string *str, const char *format, va_list args) {
- // Try once without reallocating. We have to va_copy because we might have
- // to call vsnprintf again.
- uint32_t size = UPB_MAX(upb_string_size(str), 16);
- char *buf = upb_string_getrwbuf(str, size);
- va_list args_copy;
- va_copy(args_copy, args);
- uint32_t true_size = vsnprintf(buf, size, format, args_copy);
- va_end(args_copy);
-
- if (true_size >= size) {
- // Need to reallocate. We reallocate even if the sizes were equal,
- // because snprintf excludes the terminating NULL from its count.
- // We don't care about the terminating NULL, but snprintf might
- // bail out of printing even other characters if it doesn't have
- // enough space to write the NULL also.
- upb_string_recycle(&str);
- buf = upb_string_getrwbuf(str, true_size + 1);
- vsnprintf(buf, true_size + 1, format, args);
- }
- str->len = true_size;
-}
-
-upb_string *upb_string_asprintf(const char *format, ...) {
- upb_string *str = upb_string_new();
- va_list args;
- va_start(args, format);
- upb_string_vprintf(str, format, args);
- va_end(args);
- return str;
-}
-
-upb_string *upb_strdup(upb_string *s) {
- upb_string *str = upb_string_new();
- upb_strcpy(str, s);
- return str;
-}
-
-void upb_strcat(upb_string *s, upb_string *append) {
- uint32_t old_size = upb_string_len(s);
- uint32_t append_size = upb_string_len(append);
- uint32_t new_size = old_size + append_size;
- char *buf = upb_string_getrwbuf(s, new_size);
- memcpy(buf + old_size, upb_string_getrobuf(append), append_size);
-}
-
-upb_string *upb_strreadfile(const char *filename) {
- FILE *f = fopen(filename, "rb");
- if(!f) return NULL;
- if(fseek(f, 0, SEEK_END) != 0) goto error;
- long size = ftell(f);
- if(size < 0) goto error;
- if(fseek(f, 0, SEEK_SET) != 0) goto error;
- upb_string *s = upb_string_new();
- char *buf = upb_string_getrwbuf(s, size);
- if(fread(buf, size, 1, f) != 1) goto error;
- fclose(f);
- return s;
-
-error:
- fclose(f);
- return NULL;
-}
diff --git a/core/upb_string.h b/core/upb_string.h
deleted file mode 100644
index 0694a23..0000000
--- a/core/upb_string.h
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2010 Joshua Haberman. See LICENSE for details.
- *
- * This file defines a simple string type which is length-delimited instead
- * of NULL-terminated, and which has useful sharing semantics.
- *
- * The overriding goal of upb_string is to avoid memcpy(), malloc(), and free()
- * wheverever possible, while keeping both CPU and memory overhead low.
- * Throughout upb there are situations where one wants to reference all or part
- * of another string without copying. upb_string provides APIs for doing this,
- * and allows the referenced string to be kept alive for as long as anyone is
- * referencing it.
- *
- * Characteristics of upb_string:
- * - strings are reference-counted.
- * - strings are immutable (can be mutated only when first created or recycled).
- * - if a string has no other referents, it can be "recycled" into a new string
- * without having to reallocate the upb_string.
- * - strings can be substrings of other strings (owning a ref on the source
- * string).
- *
- * Reference-counted strings have recently fallen out of favor because of the
- * performance impacts of doing thread-safe reference counting with atomic
- * operations. We side-step this issue by not performing atomic operations
- * unless the string has been marked thread-safe. Time will tell whether this
- * scheme is easy and convenient enough to be practical.
- *
- * Strings are expected to be 8-bit-clean, but "char*" is such an entrenched
- * idiom that we go with it instead of making our pointers uint8_t*.
- *
- * WARNING: THE GETREF, UNREF, AND RECYCLE OPERATIONS ARE NOT THREAD_SAFE
- * UNLESS THE STRING HAS BEEN MARKED SYNCHRONIZED! What this means is that if
- * you are logically passing a reference to a upb_string to another thread
- * (which implies that the other thread must eventually call unref of recycle),
- * you have two options:
- *
- * - create a copy of the string that will be used in the other thread only.
- * - call upb_string_get_synchronized_ref(), which will make getref, unref, and
- * recycle thread-safe for this upb_string.
- */
-
-#ifndef UPB_STRING_H
-#define UPB_STRING_H
-
-#include <assert.h>
-#include <string.h>
-#include <stdarg.h>
-#include "upb_atomic.h"
-#include "upb.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// All members of this struct are private, and may only be read/written through
-// the associated functions.
-struct _upb_string {
- // The string's refcount.
- upb_atomic_refcount_t refcount;
-
- // The pointer to our currently active data. This may be memory we own
- // or a pointer into memory we don't own.
- const char *ptr;
-
- // If non-NULL, this is a block of memory we own. We keep this cached even
- // if "ptr" is currently aliasing memory we don't own.
- char *cached_mem;
-
- // The effective length of the string (the bytes at ptr).
- int32_t len;
-#ifndef UPB_HAVE_MSIZE
- // How many bytes are allocated in cached_mem.
- //
- // Many platforms have a function that can tell you the size of a block
- // that was previously malloc'd. In this case we can avoid storing the
- // size explicitly.
- uint32_t size;
-#endif
-
- // Used if this is a slice of another string, NULL otherwise. We own a ref
- // on src.
- struct _upb_string *src;
-};
-
-// Internal-only initializer for upb_string instances.
-#ifdef UPB_HAVE_MSIZE
-#define _UPB_STRING_INIT(str, len, refcount) {{refcount}, (char*)str, NULL, len, NULL}
-#else
-#define _UPB_STRING_INIT(str, len, refcount) {{refcount}, (char*)str, NULL, len, 0, NULL}
-#endif
-
-// Special pseudo-refcounts for static/stack-allocated strings, respectively.
-#define _UPB_STRING_REFCOUNT_STATIC -1
-#define _UPB_STRING_REFCOUNT_STACK -2
-
-// Returns a newly-created, empty, non-finalized string. When the string is no
-// longer needed, it should be unref'd, never freed directly.
-upb_string *upb_string_new();
-
-// Internal-only; clients should call upb_string_unref().
-void _upb_string_free(upb_string *str);
-
-// Releases a ref on the given string, which may free the memory. "str"
-// can be NULL, in which case this is a no-op. WARNING: NOT THREAD_SAFE
-// UNLESS THE STRING IS SYNCHRONIZED.
-INLINE void upb_string_unref(upb_string *str) {
- if (str && upb_atomic_read(&str->refcount) > 0 &&
- upb_atomic_unref(&str->refcount)) {
- _upb_string_free(str);
- }
-}
-
-static void _upb_string_release(upb_string *str) {
- if(str->src) {
- upb_string_unref(str->src);
- str->src = NULL;
- }
-}
-
-upb_string *upb_strdup(upb_string *s); // Forward-declare.
-
-// Returns a string with the same contents as "str". The caller owns a ref on
-// the returned string, which may or may not be the same object as "str.
-// WARNING: NOT THREAD-SAFE UNLESS THE STRING IS SYNCHRONIZED!
-INLINE upb_string *upb_string_getref(upb_string *str) {
- int refcount = upb_atomic_read(&str->refcount);
- if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str);
- // We don't ref the special <0 refcount for static strings.
- if (refcount > 0) upb_atomic_ref(&str->refcount);
- return str;
-}
-
-// Returns the length of the string.
-INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; }
-
-// Use to read the bytes of the string. The caller *must* call
-// upb_string_endread() after the data has been read. The window between
-// upb_string_getrobuf() and upb_string_endread() should be kept as short as
-// possible, because any pending upb_string_detach() may be blocked until
-// upb_string_endread is called(). No other functions may be called on the
-// string during this window except upb_string_len().
-INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; }
-INLINE void upb_string_endread(upb_string *str) { (void)str; }
-
-// Convenience method for getting the end of the string. Calls
-// upb_string_getrobuf() so inherits the caveats of calling that function.
-INLINE const char *upb_string_getbufend(upb_string *str) {
- return upb_string_getrobuf(str) + upb_string_len(str);
-}
-
-// Attempts to recycle the string "str" so it may be reused and have different
-// data written to it. After the function returns, "str" points to a writable
-// string, which is either the original string if it had no other references
-// or a newly created string if it did have other references.
-//
-// As a special case, passing a pointer to NULL will allocate a new string.
-// This is convenient for the pattern:
-//
-// upb_string *str = NULL;
-// while (x) {
-// if (y) {
-// upb_string_recycle(&str);
-// upb_src_getstr(str);
-// }
-// }
-INLINE void upb_string_recycle(upb_string **_str) {
- upb_string *str = *_str;
- if(str && upb_atomic_only(&str->refcount)) {
- str->ptr = NULL;
- str->len = 0;
- _upb_string_release(str);
- } else {
- upb_string_unref(str);
- *_str = upb_string_new();
- }
-}
-
-
-// The options for setting the contents of a string. These may only be called
-// when a string is first created or recycled; once other functions have been
-// called on the string, these functions are not allowed until the string is
-// recycled.
-
-// Gets a pointer suitable for writing to the string, which is guaranteed to
-// have at least "len" bytes of data available. The size of the string will
-// become "len".
-char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len);
-
-// Replaces the contents of str with the contents of the given printf.
-void upb_string_vprintf(upb_string *str, const char *format, va_list args);
-INLINE void upb_string_printf(upb_string *str, const char *format, ...) {
- va_list args;
- va_start(args, format);
- upb_string_vprintf(str, format, args);
- va_end(args);
-}
-
-// Sets the contents of "str" to be the given substring of "target_str", to
-// which the caller must own a ref.
-void upb_string_substr(upb_string *str, upb_string *target_str,
- upb_strlen_t start, upb_strlen_t len);
-
-// Sketch of an API for allowing upb_strings to reference external, unowned
-// data. Waiting for a clear use case before actually implementing it.
-//
-// Makes the string "str" a reference to the given string data. The caller
-// guarantees that the given string data will not change or be deleted until a
-// matching call to upb_string_detach(), which may block until any concurrent
-// readers have finished reading. upb_string_detach() preserves the contents
-// of the string by copying the referenced data if there are any other
-// referents.
-// void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len);
-// void upb_string_detach(upb_string *str);
-
-// Allows using upb_strings in printf, ie:
-// upb_strptr str = UPB_STRLIT("Hello, World!\n");
-// printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */
-#define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str)
-#define UPB_STRFMT "%.*s"
-
-// Macros for constructing upb_string objects statically or on the stack. These
-// can be used like:
-//
-// upb_string static_str = UPB_STATIC_STRING("Foo");
-//
-// int main() {
-// upb_string stack_str = UPB_STACK_STRING("Foo");
-// // Now:
-// // upb_streql(&static_str, &stack_str) == true
-// // upb_streql(&static_str, UPB_STRLIT("Foo")) == true
-// }
-//
-// You can also use UPB_STACK_STRING or UPB_STATIC_STRING with character arrays,
-// but you must not change the underlying data once you've passed the string on:
-//
-// void foo() {
-// char data[] = "ABC123";
-// upb_string stack_str = UPB_STACK_STR(data);
-// bar(&stack_str);
-// data[0] = "B"; // NOT ALLOWED!!
-// }
-//
-// TODO: should the stack business just be like attach/detach? The latter seems
-// more flexible, though it does require a stack allocation. Maybe put this off
-// until there is a clear use case.
-#define UPB_STATIC_STRING(str) \
- _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STATIC)
-#define UPB_STATIC_STRING_LEN(str, len) \
- _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STATIC)
-#define UPB_STACK_STRING(str) \
- _UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STACK)
-#define UPB_STACK_STRING_LEN(str, len) \
- _UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STACK)
-
-// A convenient way of specifying upb_strings as literals, like:
-//
-// upb_streql(UPB_STRLIT("expected"), other_str);
-//
-// However, this requires either C99 compound initializers or C++.
-// Must ONLY be called with a string literal as its argument!
-//#ifdef __cplusplus
-//namespace upb {
-//class String : public upb_string {
-// // This constructor must ONLY be called with a string literal.
-// String(const char *str) : upb_string(UPB_STATIC_STRING(str)) {}
-//};
-//}
-//#define UPB_STRLIT(str) upb::String(str)
-//#endif
-#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str)
-
-/* upb_string library functions ***********************************************/
-
-// Named like their <string.h> counterparts, these are all safe against buffer
-// overflow. For the most part these only use the public upb_string interface.
-
-// More efficient than upb_strcmp if all you need is to test equality.
-INLINE bool upb_streql(upb_string *s1, upb_string *s2) {
- upb_strlen_t len = upb_string_len(s1);
- if(len != upb_string_len(s2)) {
- return false;
- } else {
- bool ret =
- memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0;
- upb_string_endread(s1);
- upb_string_endread(s2);
- return ret;
- }
-}
-
-// Like strcmp().
-int upb_strcmp(upb_string *s1, upb_string *s2);
-
-// Compare a upb_string with memory or a NULL-terminated C string.
-INLINE bool upb_streqllen(upb_string *str, const void *buf, upb_strlen_t len) {
- return len == upb_string_len(str) &&
- memcmp(upb_string_getrobuf(str), buf, len) == 0;
-}
-
-INLINE bool upb_streqlc(upb_string *str, const void *buf) {
- // Could be made one-pass.
- return upb_streqllen(str, buf, strlen((const char*)buf));
-}
-
-// Like upb_strcpy, but copies from a buffer and length.
-INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) {
- memcpy(upb_string_getrwbuf(dest, len), src, len);
-}
-
-// Replaces the contents of "dest" with the contents of "src".
-INLINE void upb_strcpy(upb_string *dest, upb_string *src) {
- upb_strcpylen(dest, upb_string_getrobuf(src), upb_string_len(src));
- upb_string_endread(src);
-}
-
-// Like upb_strcpy, but copies from a NULL-terminated string.
-INLINE void upb_strcpyc(upb_string *dest, const void *src) {
- // This does two passes over src, but that is necessary unless we want to
- // repeatedly re-allocate dst, which seems worse.
- upb_strcpylen(dest, src, strlen((const char*)src));
-}
-
-// Returns a new string whose contents are a copy of s.
-upb_string *upb_strdup(upb_string *s);
-
-// Like upb_strdup(), but duplicates a given buffer and length.
-INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) {
- upb_string *s = upb_string_new();
- upb_strcpylen(s, src, len);
- return s;
-}
-
-// Like upb_strdup(), but duplicates a C NULL-terminated string.
-INLINE upb_string *upb_strdupc(const char *src) {
- return upb_strduplen(src, strlen(src));
-}
-
-// Appends 'append' to 's' in-place, resizing s if necessary.
-void upb_strcat(upb_string *s, upb_string *append);
-
-// Returns a new string that is a substring of the given string.
-INLINE upb_string *upb_strslice(upb_string *s, int offset, int len) {
- upb_string *str = upb_string_new();
- upb_string_substr(str, s, offset, len);
- return str;
-}
-
-// Reads an entire file into a newly-allocated string.
-upb_string *upb_strreadfile(const char *filename);
-
-// Returns a new string with the contents of the given printf.
-upb_string *upb_string_asprintf(const char *format, ...);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
diff --git a/core/upb_table.c b/core/upb_table.c
deleted file mode 100644
index a6e0a56..0000000
--- a/core/upb_table.c
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
- */
-
-#include "upb_table.h"
-#include "upb_string.h"
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-
-static const upb_inttable_key_t EMPTYENT = 0;
-static const double MAX_LOAD = 0.85;
-
-static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
-
-/* We use 1-based indexes into the table so that 0 can be "NULL". */
-static upb_inttable_entry *intent(upb_inttable *t, int32_t i) {
- return UPB_INDEX(t->t.entries, i-1, t->t.entry_size);
-}
-static upb_strtable_entry *strent(upb_strtable *t, int32_t i) {
- return UPB_INDEX(t->t.entries, i-1, t->t.entry_size);
-}
-
-void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size)
-{
- t->count = 0;
- t->entry_size = entry_size;
- t->size_lg2 = 0;
- while(size >>= 1) t->size_lg2++;
- size_t bytes = upb_table_size(t) * t->entry_size;
- t->mask = upb_table_size(t) - 1;
- t->entries = malloc(bytes);
- memset(t->entries, 0, bytes); /* Both tables consider 0's an empty entry. */
-}
-
-void upb_inttable_init(upb_inttable *t, uint32_t size, uint16_t entsize)
-{
- upb_table_init(&t->t, size, entsize);
-}
-
-void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t entsize)
-{
- upb_table_init(&t->t, size, entsize);
-}
-
-void upb_table_free(upb_table *t) { free(t->entries); }
-void upb_inttable_free(upb_inttable *t) { upb_table_free(&t->t); }
-void upb_strtable_free(upb_strtable *t) {
- // Free refs from the strtable.
- upb_strtable_entry *e = upb_strtable_begin(t);
- for(; e; e = upb_strtable_next(t, e)) {
- upb_string_unref(e->key);
- }
- upb_table_free(&t->t);
-}
-
-static uint32_t strtable_bucket(upb_strtable *t, upb_string *key)
-{
- uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_string_len(key), 0);
- return (hash & (upb_strtable_size(t)-1)) + 1;
-}
-
-void *upb_strtable_lookup(upb_strtable *t, upb_string *key)
-{
- uint32_t bucket = strtable_bucket(t, key);
- upb_strtable_entry *e;
- do {
- e = strent(t, bucket);
- if(e->key && upb_streql(e->key, key)) return e;
- } while((bucket = e->next) != UPB_END_OF_CHAIN);
- return NULL;
-}
-
-static uint32_t empty_intbucket(upb_inttable *table)
-{
- /* TODO: does it matter that this is biased towards the front of the table? */
- for(uint32_t i = 1; i <= upb_inttable_size(table); i++) {
- upb_inttable_entry *e = intent(table, i);
- if(e->key == EMPTYENT) return i;
- }
- assert(false);
- return 0;
-}
-
-/* The insert routines have a lot more code duplication between int/string
- * variants than I would like, but there's just a bit too much that varies to
- * parameterize them. */
-static void intinsert(upb_inttable *t, upb_inttable_entry *e)
-{
- assert(upb_inttable_lookup(t, e->key) == NULL);
- t->t.count++;
- uint32_t bucket = upb_inttable_bucket(t, e->key);
- upb_inttable_entry *table_e = intent(t, bucket);
- if(table_e->key != EMPTYENT) { /* Collision. */
- if(bucket == upb_inttable_bucket(t, table_e->key)) {
- /* Existing element is in its main posisiton. Find an empty slot to
- * place our new element and append it to this key's chain. */
- uint32_t empty_bucket = empty_intbucket(t);
- while (table_e->next != UPB_END_OF_CHAIN)
- table_e = intent(t, table_e->next);
- table_e->next = empty_bucket;
- table_e = intent(t, empty_bucket);
- } else {
- /* Existing element is not in its main position. Move it to an empty
- * slot and put our element in its main position. */
- uint32_t empty_bucket = empty_intbucket(t);
- uint32_t evictee_bucket = upb_inttable_bucket(t, table_e->key);
- memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
- upb_inttable_entry *evictee_e = intent(t, evictee_bucket);
- while(1) {
- assert(evictee_e->key != UPB_EMPTY_ENTRY);
- assert(evictee_e->next != UPB_END_OF_CHAIN);
- if(evictee_e->next == bucket) {
- evictee_e->next = empty_bucket;
- break;
- }
- evictee_e = intent(t, evictee_e->next);
- }
- /* table_e remains set to our mainpos. */
- }
- }
- memcpy(table_e, e, t->t.entry_size);
- table_e->next = UPB_END_OF_CHAIN;
- assert(upb_inttable_lookup(t, e->key) == table_e);
-}
-
-void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e)
-{
- assert(e->key != 0);
- if((double)(t->t.count + 1) / upb_inttable_size(t) > MAX_LOAD) {
- /* Need to resize. New table of double the size, add old elements to it. */
- upb_inttable new_table;
- upb_inttable_init(&new_table, upb_inttable_size(t)*2, t->t.entry_size);
- new_table.t.count = t->t.count;
- upb_inttable_entry *old_e;
- for(old_e = upb_inttable_begin(t); old_e; old_e = upb_inttable_next(t, old_e))
- intinsert(&new_table, old_e);
- upb_inttable_free(t);
- *t = new_table;
- }
- intinsert(t, e);
-}
-
-static uint32_t empty_strbucket(upb_strtable *table)
-{
- /* TODO: does it matter that this is biased towards the front of the table? */
- for(uint32_t i = 1; i <= upb_strtable_size(table); i++) {
- upb_strtable_entry *e = strent(table, i);
- if(!e->key) return i;
- }
- assert(false);
- return 0;
-}
-
-static void strinsert(upb_strtable *t, upb_strtable_entry *e)
-{
- assert(upb_strtable_lookup(t, e->key) == NULL);
- e->key = upb_string_getref(e->key);
- t->t.count++;
- uint32_t bucket = strtable_bucket(t, e->key);
- upb_strtable_entry *table_e = strent(t, bucket);
- if(table_e->key) { /* Collision. */
- if(bucket == strtable_bucket(t, table_e->key)) {
- /* Existing element is in its main posisiton. Find an empty slot to
- * place our new element and append it to this key's chain. */
- uint32_t empty_bucket = empty_strbucket(t);
- while (table_e->next != UPB_END_OF_CHAIN)
- table_e = strent(t, table_e->next);
- table_e->next = empty_bucket;
- table_e = strent(t, empty_bucket);
- } else {
- /* Existing element is not in its main position. Move it to an empty
- * slot and put our element in its main position. */
- uint32_t empty_bucket = empty_strbucket(t);
- uint32_t evictee_bucket = strtable_bucket(t, table_e->key);
- memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
- upb_strtable_entry *evictee_e = strent(t, evictee_bucket);
- while(1) {
- assert(evictee_e->key);
- assert(evictee_e->next != UPB_END_OF_CHAIN);
- if(evictee_e->next == bucket) {
- evictee_e->next = empty_bucket;
- break;
- }
- evictee_e = strent(t, evictee_e->next);
- }
- /* table_e remains set to our mainpos. */
- }
- }
- memcpy(table_e, e, t->t.entry_size);
- table_e->next = UPB_END_OF_CHAIN;
- assert(upb_strtable_lookup(t, e->key) == table_e);
-}
-
-void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e)
-{
- if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) {
- /* Need to resize. New table of double the size, add old elements to it. */
- upb_strtable new_table;
- upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.entry_size);
- upb_strtable_entry *old_e;
- for(old_e = upb_strtable_begin(t); old_e; old_e = upb_strtable_next(t, old_e))
- strinsert(&new_table, old_e);
- upb_strtable_free(t);
- *t = new_table;
- }
- strinsert(t, e);
-}
-
-void *upb_inttable_begin(upb_inttable *t) {
- return upb_inttable_next(t, intent(t, 0));
-}
-
-void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur) {
- upb_inttable_entry *end = intent(t, upb_inttable_size(t)+1);
- do {
- cur = (void*)((char*)cur + t->t.entry_size);
- if(cur == end) return NULL;
- } while(cur->key == UPB_EMPTY_ENTRY);
- return cur;
-}
-
-void *upb_strtable_begin(upb_strtable *t) {
- return upb_strtable_next(t, strent(t, 0));
-}
-
-void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur) {
- upb_strtable_entry *end = strent(t, upb_strtable_size(t)+1);
- do {
- cur = (void*)((char*)cur + t->t.entry_size);
- if(cur == end) return NULL;
- } while(cur->key == NULL);
- return cur;
-}
-
-#ifdef UPB_UNALIGNED_READS_OK
-//-----------------------------------------------------------------------------
-// MurmurHash2, by Austin Appleby (released as public domain).
-// Reformatted and C99-ified by Joshua Haberman.
-// Note - This code makes a few assumptions about how your machine behaves -
-// 1. We can read a 4-byte value from any address without crashing
-// 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
-// And it has a few limitations -
-// 1. It will not work incrementally.
-// 2. It will not produce the same results on little-endian and big-endian
-// machines.
-static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed)
-{
- // 'm' and 'r' are mixing constants generated offline.
- // They're not really 'magic', they just happen to work well.
- const uint32_t m = 0x5bd1e995;
- const int32_t r = 24;
-
- // Initialize the hash to a 'random' value
- uint32_t h = seed ^ len;
-
- // Mix 4 bytes at a time into the hash
- const uint8_t * data = (const uint8_t *)key;
- while(len >= 4) {
- uint32_t k = *(uint32_t *)data;
-
- k *= m;
- k ^= k >> r;
- k *= m;
-
- h *= m;
- h ^= k;
-
- data += 4;
- len -= 4;
- }
-
- // Handle the last few bytes of the input array
- switch(len) {
- case 3: h ^= data[2] << 16;
- case 2: h ^= data[1] << 8;
- case 1: h ^= data[0]; h *= m;
- };
-
- // Do a few final mixes of the hash to ensure the last few
- // bytes are well-incorporated.
- h ^= h >> 13;
- h *= m;
- h ^= h >> 15;
-
- return h;
-}
-
-#else // !UPB_UNALIGNED_READS_OK
-
-//-----------------------------------------------------------------------------
-// MurmurHashAligned2, by Austin Appleby
-// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
-// on certain platforms.
-// Performance will be lower than MurmurHash2
-
-#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
-
-static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed)
-{
- const uint32_t m = 0x5bd1e995;
- const int32_t r = 24;
- const uint8_t * data = (const uint8_t *)key;
- uint32_t h = seed ^ len;
- uint8_t align = (uintptr_t)data & 3;
-
- if(align && (len >= 4)) {
- // Pre-load the temp registers
- uint32_t t = 0, d = 0;
-
- switch(align) {
- case 1: t |= data[2] << 16;
- case 2: t |= data[1] << 8;
- case 3: t |= data[0];
- }
-
- t <<= (8 * align);
-
- data += 4-align;
- len -= 4-align;
-
- int32_t sl = 8 * (4-align);
- int32_t sr = 8 * align;
-
- // Mix
-
- while(len >= 4) {
- d = *(uint32_t *)data;
- t = (t >> sr) | (d << sl);
-
- uint32_t k = t;
-
- MIX(h,k,m);
-
- t = d;
-
- data += 4;
- len -= 4;
- }
-
- // Handle leftover data in temp registers
-
- d = 0;
-
- if(len >= align) {
- switch(align) {
- case 3: d |= data[2] << 16;
- case 2: d |= data[1] << 8;
- case 1: d |= data[0];
- }
-
- uint32_t k = (t >> sr) | (d << sl);
- MIX(h,k,m);
-
- data += align;
- len -= align;
-
- //----------
- // Handle tail bytes
-
- switch(len) {
- case 3: h ^= data[2] << 16;
- case 2: h ^= data[1] << 8;
- case 1: h ^= data[0]; h *= m;
- };
- } else {
- switch(len) {
- case 3: d |= data[2] << 16;
- case 2: d |= data[1] << 8;
- case 1: d |= data[0];
- case 0: h ^= (t >> sr) | (d << sl); h *= m;
- }
- }
-
- h ^= h >> 13;
- h *= m;
- h ^= h >> 15;
-
- return h;
- } else {
- while(len >= 4) {
- uint32_t k = *(uint32_t *)data;
-
- MIX(h,k,m);
-
- data += 4;
- len -= 4;
- }
-
- //----------
- // Handle tail bytes
-
- switch(len) {
- case 3: h ^= data[2] << 16;
- case 2: h ^= data[1] << 8;
- case 1: h ^= data[0]; h *= m;
- };
-
- h ^= h >> 13;
- h *= m;
- h ^= h >> 15;
-
- return h;
- }
-}
-#undef MIX
-
-#endif // UPB_UNALIGNED_READS_OK
diff --git a/core/upb_table.h b/core/upb_table.h
deleted file mode 100644
index 20dae92..0000000
--- a/core/upb_table.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
- *
- * This file defines very fast int->struct (inttable) and string->struct
- * (strtable) hash tables. The struct can be of any size, and it is stored
- * in the table itself, for cache-friendly performance.
- *
- * The table uses internal chaining with Brent's variation (inspired by the
- * Lua implementation of hash tables). The hash function for strings is
- * Austin Appleby's "MurmurHash."
- */
-
-#ifndef UPB_TABLE_H_
-#define UPB_TABLE_H_
-
-#include <assert.h>
-#include "upb.h"
-#include "upb_string.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Note: the key cannot be zero! Zero is used by the implementation. */
-typedef uint32_t upb_inttable_key_t;
-
-#define UPB_END_OF_CHAIN (uint32_t)0
-#define UPB_EMPTY_ENTRY (uint32_t)0
-
-typedef struct {
- upb_inttable_key_t key;
- uint32_t next; /* Internal chaining. */
-} upb_inttable_entry;
-
-// TODO: consider storing the hash in the entry. This would avoid the need to
-// rehash on table resizes, but more importantly could possibly improve lookup
-// performance by letting us compare hashes before comparing lengths or the
-// strings themselves.
-typedef struct {
- upb_string *key; // We own a ref.
- uint32_t next; // Internal chaining.
-} upb_strtable_entry;
-
-typedef struct {
- void *entries;
- uint32_t count; /* How many elements are currently in the table? */
- uint16_t entry_size; /* How big is each entry? */
- uint8_t size_lg2; /* The table is 2^size_lg2 in size. */
- uint32_t mask;
-} upb_table;
-
-typedef struct {
- upb_table t;
-} upb_strtable;
-
-typedef struct {
- upb_table t;
-} upb_inttable;
-
-/* Initialize and free a table, respectively. Specify the initial size
- * with 'size' (the size will be increased as necessary). Entry size
- * specifies how many bytes each entry in the table is. */
-void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t entry_size);
-void upb_inttable_free(upb_inttable *table);
-void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t entry_size);
-void upb_strtable_free(upb_strtable *table);
-
-INLINE uint32_t upb_table_size(upb_table *t) { return 1 << t->size_lg2; }
-INLINE uint32_t upb_inttable_size(upb_inttable *t) {
- return upb_table_size(&t->t);
-}
-INLINE uint32_t upb_strtable_size(upb_strtable *t) {
- return upb_table_size(&t->t);
-}
-
-INLINE uint32_t upb_table_count(upb_table *t) { return t->count; }
-INLINE uint32_t upb_inttable_count(upb_inttable *t) {
- return upb_table_count(&t->t);
-}
-INLINE uint32_t upb_strtable_count(upb_strtable *t) {
- return upb_table_count(&t->t);
-}
-
-/* Inserts the given key into the hashtable with the given value. The key must
- * not already exist in the hash table. The data will be copied from e into
- * the hashtable (the amount of data copied comes from entry_size when the
- * table was constructed). Therefore the data at val may be freed once the
- * call returns. */
-void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e);
-void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e);
-
-INLINE uint32_t upb_inttable_bucket(upb_inttable *t, upb_inttable_key_t k) {
- return (k & t->t.mask) + 1; /* Identity hash for ints. */
-}
-
-/* Looks up key in this table. Inlined because this is in the critical path of
- * decoding. We have the caller specify the entry_size because fixing this as
- * a literal (instead of reading table->entry_size) gives the compiler more
- * ability to optimize. */
-INLINE void *upb_inttable_fastlookup(upb_inttable *t, uint32_t key,
- uint32_t entry_size) {
- assert(key != 0);
- uint32_t bucket = upb_inttable_bucket(t, key);
- upb_inttable_entry *e;
- do {
- e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket-1, entry_size);
- if(e->key == key) return e;
- } while((bucket = e->next) != UPB_END_OF_CHAIN);
- return NULL; /* Not found. */
-}
-
-INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) {
- return upb_inttable_fastlookup(t, key, t->t.entry_size);
-}
-
-void *upb_strtable_lookup(upb_strtable *t, upb_string *key);
-
-/* Provides iteration over the table. The order in which the entries are
- * returned is undefined. Insertions invalidate iterators. The _next
- * functions return NULL when the end has been reached. */
-void *upb_inttable_begin(upb_inttable *t);
-void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur);
-
-void *upb_strtable_begin(upb_strtable *t);
-void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* UPB_TABLE_H_ */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback