summaryrefslogtreecommitdiff
path: root/upb
diff options
context:
space:
mode:
Diffstat (limited to 'upb')
-rw-r--r--upb/atomic.h177
-rw-r--r--upb/bytestream.c265
-rw-r--r--upb/bytestream.h286
-rw-r--r--upb/def.c754
-rw-r--r--upb/def.h465
-rw-r--r--upb/descriptor.c529
-rw-r--r--upb/descriptor.h67
-rw-r--r--upb/descriptor.proto533
-rw-r--r--upb/descriptor_const.h349
-rw-r--r--upb/handlers.c311
-rw-r--r--upb/handlers.h373
-rw-r--r--upb/msg.c349
-rw-r--r--upb/msg.h270
-rw-r--r--upb/pb/decoder.c469
-rw-r--r--upb/pb/decoder.h99
-rw-r--r--upb/pb/decoder_x86.dasc694
-rw-r--r--upb/pb/encoder.c421
-rw-r--r--upb/pb/encoder.h58
-rw-r--r--upb/pb/glue.c129
-rw-r--r--upb/pb/glue.h62
-rw-r--r--upb/pb/jit_debug_elf_file.s7
-rw-r--r--upb/pb/textprinter.c199
-rw-r--r--upb/pb/textprinter.h31
-rw-r--r--upb/pb/varint.c54
-rw-r--r--upb/pb/varint.h142
-rw-r--r--upb/table.c574
-rw-r--r--upb/table.h225
-rw-r--r--upb/upb.c122
-rw-r--r--upb/upb.h238
29 files changed, 8252 insertions, 0 deletions
diff --git a/upb/atomic.h b/upb/atomic.h
new file mode 100644
index 0000000..53501b5
--- /dev/null
+++ b/upb/atomic.h
@@ -0,0 +1,177 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Only a very small part of upb is thread-safe. Notably, individual
+ * messages, arrays, and strings are *not* thread safe for mutating.
+ * However, we do make message *metadata* such as upb_msgdef and
+ * upb_context thread-safe, and their ownership is tracked via atomic
+ * refcounting. This header implements the small number of atomic
+ * primitives required to support this. The primitives we implement
+ * are:
+ *
+ * - a reader/writer lock (wrappers around platform-provided mutexes).
+ * - an atomic refcount.
+ */
+
+#ifndef UPB_ATOMIC_H_
+#define UPB_ATOMIC_H_
+
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* inline if possible, emit standalone code if required. */
+#ifndef INLINE
+#define INLINE static inline
+#endif
+
+// Until this stuff is actually working, make thread-unsafe the default.
+#define UPB_THREAD_UNSAFE
+
+#ifdef UPB_THREAD_UNSAFE
+
+/* Non-thread-safe implementations. ******************************************/
+
+typedef struct {
+ int v;
+} upb_atomic_t;
+
+#define UPB_ATOMIC_INIT(x) {x}
+
+INLINE void upb_atomic_init(upb_atomic_t *a, int val) { a->v = val; }
+INLINE bool upb_atomic_ref(upb_atomic_t *a) { return a->v++ == 0; }
+INLINE bool upb_atomic_unref(upb_atomic_t *a) { return --a->v == 0; }
+INLINE int upb_atomic_read(upb_atomic_t *a) { return a->v; }
+INLINE bool upb_atomic_add(upb_atomic_t *a, int val) {
+ a->v += val;
+ return a->v == 0;
+}
+
+#endif
+
+/* Atomic refcount ************************************************************/
+
+#ifdef UPB_THREAD_UNSAFE
+
+/* Already defined above. */
+
+#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4
+
+/* GCC includes atomic primitives. */
+
+typedef struct {
+ volatile int v;
+} upb_atomic_t;
+
+INLINE void upb_atomic_init(upb_atomic_t *a, int val) {
+ a->v = val;
+ __sync_synchronize(); /* Ensure the initialized value is visible. */
+}
+
+INLINE bool upb_atomic_ref(upb_atomic_t *a) {
+ return __sync_fetch_and_add(&a->v, 1) == 0;
+}
+
+INLINE bool upb_atomic_add(upb_atomic_t *a, int n) {
+ return __sync_add_and_fetch(&a->v, n) == 0;
+}
+
+INLINE bool upb_atomic_unref(upb_atomic_t *a) {
+ return __sync_sub_and_fetch(&a->v, 1) == 0;
+}
+
+INLINE bool upb_atomic_read(upb_atomic_t *a) {
+ return __sync_fetch_and_add(&a->v, 0);
+}
+
+#elif defined(WIN32)
+
+/* Windows defines atomic increment/decrement. */
+#include <Windows.h>
+
+typedef struct {
+ volatile LONG val;
+} upb_atomic_t;
+
+INLINE void upb_atomic_init(upb_atomic_t *a, int val) {
+ InterlockedExchange(&a->val, val);
+}
+
+INLINE bool upb_atomic_ref(upb_atomic_t *a) {
+ return InterlockedIncrement(&a->val) == 1;
+}
+
+INLINE bool upb_atomic_unref(upb_atomic_t *a) {
+ return InterlockedDecrement(&a->val) == 0;
+}
+
+#else
+#error Atomic primitives not defined for your platform/CPU. \
+ Implement them or compile with UPB_THREAD_UNSAFE.
+#endif
+
+INLINE bool upb_atomic_only(upb_atomic_t *a) {
+ return upb_atomic_read(a) == 1;
+}
+
+/* Reader/Writer lock. ********************************************************/
+
+#ifdef UPB_THREAD_UNSAFE
+
+typedef struct {
+} upb_rwlock_t;
+
+INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; }
+INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; }
+INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; }
+INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; }
+INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; }
+
+#elif defined(UPB_USE_PTHREADS)
+
+#include <pthread.h>
+
+typedef struct {
+ pthread_rwlock_t lock;
+} upb_rwlock_t;
+
+INLINE void upb_rwlock_init(upb_rwlock_t *l) {
+ /* TODO: check return value. */
+ pthread_rwlock_init(&l->lock, NULL);
+}
+
+INLINE void upb_rwlock_destroy(upb_rwlock_t *l) {
+ /* TODO: check return value. */
+ pthread_rwlock_destroy(&l->lock);
+}
+
+INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) {
+ /* TODO: check return value. */
+ pthread_rwlock_rdlock(&l->lock);
+}
+
+INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) {
+ /* TODO: check return value. */
+ pthread_rwlock_wrlock(&l->lock);
+}
+
+INLINE void upb_rwlock_unlock(upb_rwlock_t *l) {
+ /* TODO: check return value. */
+ pthread_rwlock_unlock(&l->lock);
+}
+
+#else
+#error Reader/writer lock is not defined for your platform/CPU. \
+ Implement it or compile with UPB_THREAD_UNSAFE.
+#endif
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* UPB_ATOMIC_H_ */
diff --git a/upb/bytestream.c b/upb/bytestream.c
new file mode 100644
index 0000000..846b8ee
--- /dev/null
+++ b/upb/bytestream.c
@@ -0,0 +1,265 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2010 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include "upb/bytestream.h"
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+// We can make this configurable if necessary.
+#define BUF_SIZE 32768
+
+char *upb_strref_dup(struct _upb_strref *r) {
+ char *ret = (char*)malloc(r->len + 1);
+ upb_bytesrc_read(r->bytesrc, r->stream_offset, r->len, ret);
+ ret[r->len] = '\0';
+ return ret;
+}
+
+/* upb_stdio ******************************************************************/
+
+int upb_stdio_cmpbuf(const void *_key, const void *_elem) {
+ const uint64_t *ofs = _key;
+ const upb_stdio_buf *buf = _elem;
+ return (*ofs / BUF_SIZE) - (buf->ofs / BUF_SIZE);
+}
+
+static upb_stdio_buf *upb_stdio_findbuf(upb_stdio *s, uint64_t ofs) {
+ // TODO: it is probably faster to linear search short lists, and to
+ // special-case the last one or two bufs.
+ return bsearch(&ofs, s->bufs, s->nbuf, sizeof(*s->bufs), &upb_stdio_cmpbuf);
+}
+
+//static upb_strlen_t upb_stdio_read(void *src, uint32_t ofs, upb_buf *b,
+// upb_status *status) {
+// upb_stdio *stdio = (upb_stdio*)src;
+// size_t read = fread(buf, 1, BLOCK_SIZE, stdio->file);
+// if(read < (size_t)BLOCK_SIZE) {
+// // Error or EOF.
+// if(feof(stdio->file)) {
+// upb_seterr(status, UPB_EOF, "");
+// } else if(ferror(stdio->file)) {
+// upb_status_fromerrno(s);
+// return 0;
+// }
+// }
+// b->len = read;
+// stdio->next_ofs += read;
+// return stdio->next_ofs;
+//}
+
+size_t upb_stdio_fetch(void *src, uint64_t ofs, upb_status *s) {
+ (void)src;
+ (void)ofs;
+ (void)s;
+
+ return 0;
+}
+
+void upb_stdio_read(void *src, uint64_t src_ofs, size_t len, char *dst) {
+ upb_stdio_buf *buf = upb_stdio_findbuf(src, src_ofs);
+ src_ofs -= buf->ofs;
+ memcpy(dst, &buf->data[src_ofs], BUF_SIZE - src_ofs);
+ len -= (BUF_SIZE - src_ofs);
+ dst += (BUF_SIZE - src_ofs);
+ while (len > 0) {
+ ++buf;
+ size_t bytes = UPB_MIN(len, BUF_SIZE);
+ memcpy(dst, buf->data, bytes);
+ len -= bytes;
+ dst += bytes;
+ }
+}
+
+const char *upb_stdio_getptr(void *src, uint64_t ofs, size_t *len) {
+ upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
+ ofs -= buf->ofs;
+ *len = BUF_SIZE - ofs;
+ return &buf->data[ofs];
+}
+
+void upb_stdio_refregion(void *src, uint64_t ofs, size_t len) {
+ upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
+ len -= (BUF_SIZE - ofs);
+ ++buf->refcount;
+ while (len > 0) {
+ ++buf;
+ ++buf->refcount;
+ }
+}
+
+void upb_stdio_unrefregion(void *src, uint64_t ofs, size_t len) {
+ (void)src;
+ (void)ofs;
+ (void)len;
+}
+
+#if 0
+upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) {
+ upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink));
+ upb_strlen_t len = upb_string_len(str);
+ upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file);
+ if(written < len) {
+ upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream.");
+ return -1;
+ }
+ return written;
+}
+#endif
+
+uint32_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status,
+ const char *fmt, va_list args) {
+ upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink));
+ int written = vfprintf(stdio->file, fmt, args);
+ if (written < 0) {
+ upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream.");
+ return -1;
+ }
+ return written;
+}
+
+void upb_stdio_init(upb_stdio *stdio) {
+ static upb_bytesrc_vtbl bytesrc_vtbl = {
+ upb_stdio_fetch,
+ upb_stdio_read,
+ upb_stdio_getptr,
+ upb_stdio_refregion,
+ upb_stdio_unrefregion,
+ NULL,
+ NULL
+ };
+ upb_bytesrc_init(&stdio->src, &bytesrc_vtbl);
+
+ //static upb_bytesink_vtbl bytesink_vtbl = {
+ // upb_stdio_putstr,
+ // upb_stdio_vprintf
+ //};
+ //upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl);
+}
+
+void upb_stdio_reset(upb_stdio* stdio, FILE *file) {
+ stdio->file = file;
+ stdio->should_close = false;
+}
+
+void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
+ upb_status *s) {
+ FILE *f = fopen(filename, mode);
+ if (!f) {
+ upb_status_fromerrno(s);
+ return;
+ }
+ setvbuf(stdio->file, NULL, _IONBF, 0); // Disable buffering; we do our own.
+ upb_stdio_reset(stdio, f);
+ stdio->should_close = true;
+}
+
+void upb_stdio_uninit(upb_stdio *stdio) {
+ // Can't report status; caller should flush() to ensure data is written.
+ if (stdio->should_close) fclose(stdio->file);
+ stdio->file = NULL;
+}
+
+upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->src; }
+upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; }
+
+
+/* upb_stringsrc **************************************************************/
+
+size_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) {
+ upb_stringsrc *src = _src;
+ size_t bytes = src->len - ofs;
+ if (bytes == 0) s->code = UPB_EOF;
+ return bytes;
+}
+
+void upb_stringsrc_read(void *_src, uint64_t src_ofs, size_t len, char *dst) {
+ upb_stringsrc *src = _src;
+ memcpy(dst, src->str + src_ofs, len);
+}
+
+const char *upb_stringsrc_getptr(void *_src, uint64_t ofs, size_t *len) {
+ upb_stringsrc *src = _src;
+ *len = src->len - ofs;
+ return src->str + ofs;
+}
+
+void upb_stringsrc_init(upb_stringsrc *s) {
+ static upb_bytesrc_vtbl vtbl = {
+ &upb_stringsrc_fetch,
+ &upb_stringsrc_read,
+ &upb_stringsrc_getptr,
+ NULL, NULL, NULL, NULL
+ };
+ upb_bytesrc_init(&s->bytesrc, &vtbl);
+ s->str = NULL;
+}
+
+void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len) {
+ s->str = str;
+ s->len = len;
+}
+
+void upb_stringsrc_uninit(upb_stringsrc *s) { (void)s; }
+
+upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
+ return &s->bytesrc;
+}
+
+
+/* upb_stringsink *************************************************************/
+
+void upb_stringsink_uninit(upb_stringsink *s) {
+ free(s->str);
+}
+
+// Resets the stringsink to a state where it will append to the given string.
+// The string must be newly created or recycled. The stringsink will take a
+// reference on the string, so the caller need not ensure that it outlives the
+// stringsink. A stringsink can be reset multiple times.
+void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size) {
+ free(s->str);
+ s->str = str;
+ s->len = 0;
+ s->size = size;
+}
+
+upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s) {
+ return &s->bytesink;
+}
+
+static int32_t upb_stringsink_vprintf(void *_s, upb_status *status,
+ const char *fmt, va_list args) {
+ (void)status; // TODO: report realloc() errors.
+ upb_stringsink *s = _s;
+ int ret = upb_vrprintf(&s->str, &s->size, s->len, fmt, args);
+ if (ret >= 0) s->len += ret;
+ return ret;
+}
+
+bool upb_stringsink_write(void *_s, const char *buf, size_t len,
+ upb_status *status) {
+ (void)status; // TODO: report realloc() errors.
+ upb_stringsink *s = _s;
+ if (s->len + len > s->size) {
+ while(s->len + len > s->size) s->size *= 2;
+ s->str = realloc(s->str, s->size);
+ }
+ memcpy(s->str + s->len, buf, len);
+ s->len += len;
+ return true;
+}
+
+void upb_stringsink_init(upb_stringsink *s) {
+ static upb_bytesink_vtbl vtbl = {
+ upb_stringsink_write,
+ upb_stringsink_vprintf
+ };
+ upb_bytesink_init(&s->bytesink, &vtbl);
+ s->str = NULL;
+}
diff --git a/upb/bytestream.h b/upb/bytestream.h
new file mode 100644
index 0000000..2a6f7d2
--- /dev/null
+++ b/upb/bytestream.h
@@ -0,0 +1,286 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * This file contains upb_bytesrc and upb_bytesink, which are abstractions of
+ * stdio (fread()/fwrite()/etc) that provide useful buffering/sharing
+ * semantics. They are virtual base classes so concrete implementations
+ * can get the data from a fd, a string, a cord, etc.
+ *
+ * Byte streams are NOT thread-safe! (Like f{read,write}_unlocked())
+ */
+
+#ifndef UPB_BYTESTREAM_H
+#define UPB_BYTESTREAM_H
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include "upb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* upb_bytesrc ****************************************************************/
+
+// A upb_bytesrc allows the consumer of a stream of bytes to obtain buffers as
+// they become available, and to preserve some trailing amount of data.
+typedef size_t upb_bytesrc_fetch_func(void*, uint64_t, upb_status*);
+typedef void upb_bytesrc_read_func(void*, uint64_t, size_t, char*);
+typedef const char *upb_bytesrc_getptr_func(void*, uint64_t, size_t*);
+typedef void upb_bytesrc_refregion_func(void*, uint64_t, size_t);
+typedef void upb_bytesrc_ref_func(void*);
+typedef struct _upb_bytesrc_vtbl {
+ upb_bytesrc_fetch_func *fetch;
+ upb_bytesrc_read_func *read;
+ upb_bytesrc_getptr_func *getptr;
+ upb_bytesrc_refregion_func *refregion;
+ upb_bytesrc_refregion_func *unrefregion;
+ upb_bytesrc_ref_func *ref;
+ upb_bytesrc_ref_func *unref;
+} upb_bytesrc_vtbl;
+
+typedef struct {
+ upb_bytesrc_vtbl *vtbl;
+} upb_bytesrc;
+
+INLINE void upb_bytesrc_init(upb_bytesrc *src, upb_bytesrc_vtbl *vtbl) {
+ src->vtbl = vtbl;
+}
+
+// Fetches at least one byte starting at ofs, returning the actual number of
+// bytes fetched (or 0 on error: see "s" for details). Gives caller a ref on
+// the fetched region. It is safe to re-fetch existing regions but only if
+// they are ref'd. "ofs" may not greater than the end of the region that was
+// previously fetched.
+INLINE size_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs, upb_status *s) {
+ return src->vtbl->fetch(src, ofs, s);
+}
+
+// Copies "len" bytes of data from offset src_ofs to "dst", which must be at
+// least "len" bytes long. The caller must own a ref on the given region.
+INLINE void upb_bytesrc_read(upb_bytesrc *src, uint64_t src_ofs, size_t len,
+ char *dst) {
+ src->vtbl->read(src, src_ofs, len, dst);
+}
+
+// Returns a pointer to the bytesrc's internal buffer, returning how much data
+// was actually returned (which may be less than "len" if the given region is
+// not contiguous). The caller must own refs on the entire region from [ofs,
+// ofs+len]. The returned buffer is valid for as long as the region remains
+// ref'd.
+//
+// TODO: is "len" really required here?
+INLINE const char *upb_bytesrc_getptr(upb_bytesrc *src, uint64_t ofs,
+ size_t *len) {
+ return src->vtbl->getptr(src, ofs, len);
+}
+
+// Gives the caller a ref on the given region. The caller must know that the
+// given region is already ref'd.
+INLINE void upb_bytesrc_refregion(upb_bytesrc *src, uint64_t ofs, size_t len) {
+ src->vtbl->refregion(src, ofs, len);
+}
+
+// Releases a ref on the given region, which the caller must have previously
+// ref'd.
+INLINE void upb_bytesrc_unrefregion(upb_bytesrc *src, uint64_t ofs, size_t len) {
+ src->vtbl->unrefregion(src, ofs, len);
+}
+
+// Attempts to ref the bytesrc itself, returning false if this bytesrc is
+// not ref-able.
+INLINE bool upb_bytesrc_tryref(upb_bytesrc *src) {
+ if (src->vtbl->ref) {
+ src->vtbl->ref(src);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+// Unref's the bytesrc itself. May only be called when upb_bytesrc_tryref()
+// has previously returned true.
+INLINE void upb_bytesrc_unref(upb_bytesrc *src) {
+ assert(src->vtbl->unref);
+ src->vtbl->unref(src);
+}
+
+/* upb_strref *****************************************************************/
+
+// The structure we pass for a string.
+typedef struct _upb_strref {
+ // Pointer to the string data. NULL if the string spans multiple input
+ // buffers (in which case upb_bytesrc_getptr() must be called to obtain
+ // the actual pointers).
+ const char *ptr;
+
+ // Bytesrc from which this string data comes. This is only guaranteed to be
+ // alive from inside the callback; however if the handler knows more about
+ // its type and how to prolong its life, it may do so.
+ upb_bytesrc *bytesrc;
+
+ // Offset in the bytesrc that represents the beginning of this string.
+ uint32_t stream_offset;
+
+ // Length of the string.
+ uint32_t len;
+
+ // Possibly add optional members here like start_line, start_column, etc.
+} upb_strref;
+
+// Copies the contents of the strref into a newly-allocated, NULL-terminated
+// string.
+char *upb_strref_dup(struct _upb_strref *r);
+
+
+/* upb_bytesink ***************************************************************/
+
+typedef bool upb_bytesink_write_func(void*, const char*, size_t, upb_status*);
+typedef int32_t upb_bytesink_vprintf_func(
+ void*, upb_status*, const char *fmt, va_list args);
+
+typedef struct {
+ upb_bytesink_write_func *write;
+ upb_bytesink_vprintf_func *vprintf;
+} upb_bytesink_vtbl;
+
+typedef struct {
+ upb_bytesink_vtbl *vtbl;
+} upb_bytesink;
+
+INLINE void upb_bytesink_init(upb_bytesink *sink, upb_bytesink_vtbl *vtbl) {
+ sink->vtbl = vtbl;
+}
+
+INLINE bool upb_bytesink_write(upb_bytesink *sink, const char *buf, size_t len,
+ upb_status *s) {
+ return sink->vtbl->write(sink, buf, len, s);
+}
+
+INLINE bool upb_bytesink_writestr(upb_bytesink *sink, const char *str,
+ upb_status *s) {
+ return upb_bytesink_write(sink, str, strlen(str), s);
+}
+
+// Returns the number of bytes written or -1 on error.
+INLINE int32_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status,
+ const char *fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ uint32_t ret = sink->vtbl->vprintf(sink, status, fmt, args);
+ va_end(args);
+ return ret;
+}
+
+// OPT: add getappendbuf()
+// OPT: add writefrombytesrc()
+// TODO: add flush()
+
+
+/* upb_stdio ******************************************************************/
+
+// bytesrc/bytesink for ANSI C stdio, which is less efficient than posixfd, but
+// more portable.
+//
+// Specifically, stdio functions acquire locks on every operation (unless you
+// use the f{read,write,...}_unlocked variants, which are not standard) and
+// performs redundant buffering (unless you disable it with setvbuf(), but we
+// can only do this on newly-opened filehandles).
+
+typedef struct {
+ uint64_t ofs;
+ uint32_t refcount;
+ char data[];
+} upb_stdio_buf;
+
+// We use a single object for both bytesrc and bytesink for simplicity.
+// The object is still not thread-safe, and may only be used by one reader
+// and one writer at a time.
+typedef struct {
+ upb_bytesrc src;
+ upb_bytesink sink;
+ FILE *file;
+ bool should_close;
+ upb_stdio_buf **bufs;
+ uint32_t nbuf, szbuf;
+} upb_stdio;
+
+void upb_stdio_init(upb_stdio *stdio);
+// Caller should call upb_stdio_flush prior to calling this to ensure that
+// all data is flushed, otherwise data can be silently dropped if an error
+// occurs flushing the remaining buffers.
+void upb_stdio_uninit(upb_stdio *stdio);
+
+// Resets the object to read/write to the given "file." The caller is
+// responsible for closing the file, which must outlive this object.
+void upb_stdio_reset(upb_stdio *stdio, FILE *file);
+
+// As an alternative to upb_stdio_reset(), initializes the object by opening a
+// file, and will handle closing it. This may result in more efficient I/O
+// than the previous since we can call setvbuf() to disable buffering.
+void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
+ upb_status *s);
+
+upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio);
+upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio);
+
+
+/* upb_stringsrc **************************************************************/
+
+// bytesrc/bytesink for a simple contiguous string.
+
+struct _upb_stringsrc {
+ upb_bytesrc bytesrc;
+ const char *str;
+ size_t len;
+};
+typedef struct _upb_stringsrc upb_stringsrc;
+
+// Create/free a stringsrc.
+void upb_stringsrc_init(upb_stringsrc *s);
+void upb_stringsrc_uninit(upb_stringsrc *s);
+
+// Resets the stringsrc to a state where it will vend the given string. The
+// stringsrc will take a reference on the string, so the caller need not ensure
+// that it outlives the stringsrc. A stringsrc can be reset multiple times.
+void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len);
+
+// Returns the upb_bytesrc* for this stringsrc.
+upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s);
+
+
+/* upb_stringsink *************************************************************/
+
+struct _upb_stringsink {
+ upb_bytesink bytesink;
+ char *str;
+ size_t len, size;
+};
+typedef struct _upb_stringsink upb_stringsink;
+
+// Create/free a stringsrc.
+void upb_stringsink_init(upb_stringsink *s);
+void upb_stringsink_uninit(upb_stringsink *s);
+
+// Resets the sink's string to "str", which the sink takes ownership of.
+// "str" may be NULL, which will make the sink allocate a new string.
+void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size);
+
+// Releases ownership of the returned string (which is "len" bytes long) and
+// resets the internal string to be empty again (as if reset were called with
+// NULL).
+const char *upb_stringsink_release(upb_stringsink *s, size_t *len);
+
+// Returns the upb_bytesink* for this stringsrc. Invalidated by reset above.
+upb_bytesink *upb_stringsink_bytesink();
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/upb/def.c b/upb/def.c
new file mode 100644
index 0000000..000b7f2
--- /dev/null
+++ b/upb/def.c
@@ -0,0 +1,754 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2008-2009 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include "upb/def.h"
+
+#define alignof(t) offsetof(struct { char c; t x; }, x)
+
+void upb_deflist_init(upb_deflist *l) {
+ l->size = 8;
+ l->defs = malloc(l->size * sizeof(void*));
+ l->len = 0;
+}
+
+void upb_deflist_uninit(upb_deflist *l) {
+ for(uint32_t i = 0; i < l->len; i++) upb_def_unref(l->defs[i]);
+ free(l->defs);
+}
+
+void upb_deflist_push(upb_deflist *l, upb_def *d) {
+ if(l->len == l->size) {
+ l->size *= 2;
+ l->defs = realloc(l->defs, l->size * sizeof(void*));
+ }
+ l->defs[l->len++] = d;
+}
+
+
+/* upb_def ********************************************************************/
+
+static void upb_msgdef_free(upb_msgdef *m);
+static void upb_enumdef_free(upb_enumdef *e);
+static void upb_unresolveddef_free(struct _upb_unresolveddef *u);
+
+#ifndef NDEBUG
+static bool upb_def_ismutable(upb_def *def) { return def->symtab == NULL; }
+#endif
+
+static void upb_def_free(upb_def *def) {
+ switch (def->type) {
+ case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break;
+ case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break;
+ case UPB_DEF_UNRESOLVED:
+ upb_unresolveddef_free(upb_downcast_unresolveddef(def)); break;
+ default:
+ assert(false);
+ }
+}
+
+upb_def *upb_def_dup(upb_def *def) {
+ switch (def->type) {
+ case UPB_DEF_MSG: return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef(def)));
+ case UPB_DEF_ENUM: return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef(def)));
+ default: assert(false); return NULL;
+ }
+}
+
+// Prior to being in a symtab, the def's refcount controls the lifetime of the
+// def itself. If the refcount falls to zero, the def is deleted. Once the
+// def belongs to a symtab, the def is owned by the symtab and its refcount
+// determines whether the def owns a ref on the symtab or not.
+void upb_def_ref(upb_def *def) {
+ if (upb_atomic_ref(&def->refcount) && def->symtab)
+ upb_symtab_ref(def->symtab);
+}
+
+static void upb_def_movetosymtab(upb_def *d, upb_symtab *s) {
+ assert(upb_atomic_read(&d->refcount) > 0);
+ d->symtab = s;
+ if (!upb_atomic_unref(&d->refcount)) upb_symtab_ref(s);
+ upb_msgdef *m = upb_dyncast_msgdef(d);
+ if (m) upb_inttable_compact(&m->itof);
+}
+
+void upb_def_unref(upb_def *def) {
+ if (!def) return;
+ if (upb_atomic_unref(&def->refcount)) {
+ if (def->symtab) {
+ upb_symtab_unref(def->symtab);
+ // Def might be deleted now.
+ } else {
+ upb_def_free(def);
+ }
+ }
+}
+
+static void upb_def_init(upb_def *def, upb_deftype_t type) {
+ def->type = type;
+ def->fqname = NULL;
+ def->symtab = NULL;
+ upb_atomic_init(&def->refcount, 1);
+}
+
+static void upb_def_uninit(upb_def *def) {
+ //fprintf(stderr, "Freeing def: %p\n", def);
+ free(def->fqname);
+}
+
+
+/* upb_unresolveddef **********************************************************/
+
+// Unresolved defs are used as temporary placeholders for a def whose name has
+// not been resolved yet. During the name resolution step, all unresolved defs
+// are replaced with pointers to the actual def being referenced.
+typedef struct _upb_unresolveddef {
+ upb_def base;
+
+ // The target type name. This may or may not be fully qualified. It is
+ // tempting to want to use base.fqname for this, but that will be qualified
+ // which is inappropriate for a name we still have to resolve.
+ char *name;
+} upb_unresolveddef;
+
+// Is passed a ref on the string.
+static upb_unresolveddef *upb_unresolveddef_new(const char *str) {
+ upb_unresolveddef *def = malloc(sizeof(*def));
+ upb_def_init(&def->base, UPB_DEF_UNRESOLVED);
+ def->name = strdup(str);
+ return def;
+}
+
+static void upb_unresolveddef_free(struct _upb_unresolveddef *def) {
+ free(def->name);
+ upb_def_uninit(&def->base);
+ free(def);
+}
+
+
+/* upb_enumdef ****************************************************************/
+
+upb_enumdef *upb_enumdef_new() {
+ upb_enumdef *e = malloc(sizeof(*e));
+ upb_def_init(&e->base, UPB_DEF_ENUM);
+ upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent));
+ upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent));
+ return e;
+}
+
+static void upb_enumdef_free(upb_enumdef *e) {
+ upb_enum_iter i;
+ for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
+ // Frees the ref taken when the string was parsed.
+ free(upb_enum_iter_name(i));
+ }
+ upb_strtable_free(&e->ntoi);
+ upb_inttable_free(&e->iton);
+ upb_def_uninit(&e->base);
+ free(e);
+}
+
+upb_enumdef *upb_enumdef_dup(upb_enumdef *e) {
+ upb_enumdef *new_e = upb_enumdef_new();
+ upb_enum_iter i;
+ for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
+ assert(upb_enumdef_addval(new_e, upb_enum_iter_name(i),
+ upb_enum_iter_number(i)));
+ }
+ return new_e;
+}
+
+bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num) {
+ if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL))
+ return false;
+ upb_strtable_insert(&e->ntoi, name, &num);
+ upb_inttable_insert(&e->iton, num, strdup(name));
+ return true;
+}
+
+void upb_enumdef_setdefault(upb_enumdef *e, int32_t val) {
+ assert(upb_def_ismutable(UPB_UPCAST(e)));
+ e->defaultval = val;
+}
+
+upb_enum_iter upb_enum_begin(upb_enumdef *e) {
+ // We could iterate over either table here; the choice is arbitrary.
+ return upb_inttable_begin(&e->iton);
+}
+
+upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) {
+ return upb_inttable_next(&e->iton, iter);
+}
+
+const char *upb_enumdef_iton(upb_enumdef *def, int32_t num) {
+ upb_iton_ent *e = upb_inttable_fastlookup(&def->iton, num, sizeof(*e));
+ return e ? e->str : NULL;
+}
+
+bool upb_enumdef_ntoil(upb_enumdef *def, char *name, size_t len, int32_t *num) {
+ upb_ntoi_ent *e = upb_strtable_lookupl(&def->ntoi, name, len);
+ if (!e) return false;
+ if (num) *num = e->value;
+ return true;
+}
+
+bool upb_enumdef_ntoi(upb_enumdef *e, char *name, int32_t *num) {
+ return upb_enumdef_ntoil(e, name, strlen(name), num);
+}
+
+
+/* upb_fielddef ***************************************************************/
+
+upb_fielddef *upb_fielddef_new() {
+ upb_fielddef *f = malloc(sizeof(*f));
+ f->msgdef = NULL;
+ f->def = NULL;
+ upb_atomic_init(&f->refcount, 1);
+ f->finalized = false;
+ f->type = 0;
+ f->label = UPB_LABEL(OPTIONAL);
+ f->hasbit = 0;
+ f->offset = 0;
+ f->number = 0; // not a valid field number.
+ f->name = NULL;
+ f->accessor = NULL;
+ upb_value_setfielddef(&f->fval, f);
+ return f;
+}
+
+static void upb_fielddef_free(upb_fielddef *f) {
+ if (upb_isstring(f)) {
+ free(upb_value_getptr(f->defaultval));
+ }
+ free(f->name);
+ free(f);
+}
+
+void upb_fielddef_ref(upb_fielddef *f) {
+ // TODO.
+ (void)f;
+}
+
+void upb_fielddef_unref(upb_fielddef *f) {
+ // TODO.
+ (void)f;
+ if (!f) return;
+ if (upb_atomic_unref(&f->refcount)) {
+ if (f->msgdef) {
+ upb_msgdef_unref(f->msgdef);
+ // fielddef might be deleted now.
+ } else {
+ upb_fielddef_free(f);
+ }
+ }
+}
+
+upb_fielddef *upb_fielddef_dup(upb_fielddef *f) {
+ upb_fielddef *newf = upb_fielddef_new();
+ newf->msgdef = f->msgdef;
+ newf->type = f->type;
+ newf->label = f->label;
+ newf->number = f->number;
+ newf->name = f->name;
+ upb_fielddef_settypename(newf, f->def->fqname);
+ return f;
+}
+
+static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) {
+ assert(upb_dyncast_unresolveddef(f->def));
+ upb_def_unref(f->def);
+ f->def = def;
+ if (f->type == UPB_TYPE(ENUM)) {
+ // Resolve the enum's default from a string to an integer.
+ char *str = upb_value_getptr(f->defaultval);
+ assert(str); // Should point to either a real default or the empty string.
+ upb_enumdef *e = upb_downcast_enumdef(f->def);
+ int32_t val = 0;
+ if (str[0] == '\0') {
+ upb_value_setint32(&f->defaultval, e->defaultval);
+ } else {
+ bool success = upb_enumdef_ntoi(e, str, &val);
+ free(str);
+ if (!success) {
+ upb_status_setf(s, UPB_ERROR, "Default enum value (%s) is not a "
+ "member of the enum", str);
+ return false;
+ }
+ upb_value_setint32(&f->defaultval, val);
+ }
+ }
+ return true;
+}
+
+void upb_fielddef_setnumber(upb_fielddef *f, int32_t number) {
+ assert(f->msgdef == NULL);
+ f->number = number;
+}
+
+void upb_fielddef_setname(upb_fielddef *f, const char *name) {
+ assert(f->msgdef == NULL);
+ f->name = strdup(name);
+}
+
+void upb_fielddef_settype(upb_fielddef *f, uint8_t type) {
+ assert(!f->finalized);
+ f->type = type;
+}
+
+void upb_fielddef_setlabel(upb_fielddef *f, uint8_t label) {
+ assert(!f->finalized);
+ f->label = label;
+}
+void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) {
+ assert(!f->finalized);
+ // TODO: string ownership?
+ f->defaultval = value;
+}
+
+void upb_fielddef_setfval(upb_fielddef *f, upb_value fval) {
+ assert(!f->finalized);
+ // TODO: string ownership?
+ f->fval = fval;
+}
+
+void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl) {
+ assert(!f->finalized);
+ f->accessor = vtbl;
+}
+
+void upb_fielddef_settypename(upb_fielddef *f, const char *name) {
+ upb_def_unref(f->def);
+ f->def = UPB_UPCAST(upb_unresolveddef_new(name));
+}
+
+// Returns an ordering of fields based on:
+// 1. value size (small to large).
+// 2. field number.
+static int upb_fielddef_cmpval(const void *_f1, const void *_f2) {
+ upb_fielddef *f1 = *(void**)_f1;
+ upb_fielddef *f2 = *(void**)_f2;
+ size_t size1 = upb_types[f1->type].size;
+ size_t size2 = upb_types[f2->type].size;
+ if (size1 != size2) return size1 - size2;
+ // Otherwise return in number order.
+ return f1->number - f2->number;
+}
+
+// Returns an ordering of all fields based on:
+// 1. required/optional (required fields first).
+// 2. field number
+static int upb_fielddef_cmphasbit(const void *_f1, const void *_f2) {
+ upb_fielddef *f1 = *(void**)_f1;
+ upb_fielddef *f2 = *(void**)_f2;
+ size_t req1 = f1->label == UPB_LABEL(REQUIRED);
+ size_t req2 = f2->label == UPB_LABEL(REQUIRED);
+ if (req1 != req2) return req1 - req2;
+ // Otherwise return in number order.
+ return f1->number - f2->number;
+}
+
+
+/* upb_msgdef *****************************************************************/
+
+upb_msgdef *upb_msgdef_new() {
+ upb_msgdef *m = malloc(sizeof(*m));
+ upb_def_init(&m->base, UPB_DEF_MSG);
+ upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent));
+ upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent));
+ m->size = 0;
+ m->hasbit_bytes = 0;
+ m->extension_start = 0;
+ m->extension_end = 0;
+ return m;
+}
+
+static void upb_msgdef_free(upb_msgdef *m) {
+ upb_msg_iter i;
+ for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
+ upb_fielddef_free(upb_msg_iter_field(i));
+ upb_strtable_free(&m->ntof);
+ upb_inttable_free(&m->itof);
+ upb_def_uninit(&m->base);
+ free(m);
+}
+
+upb_msgdef *upb_msgdef_dup(upb_msgdef *m) {
+ upb_msgdef *newm = upb_msgdef_new();
+ newm->size = m->size;
+ newm->hasbit_bytes = m->hasbit_bytes;
+ newm->extension_start = m->extension_start;
+ newm->extension_end = m->extension_end;
+ upb_msg_iter i;
+ for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
+ upb_msgdef_addfield(newm, upb_fielddef_dup(upb_msg_iter_field(i)));
+ return newm;
+}
+
+void upb_msgdef_setsize(upb_msgdef *m, uint16_t size) {
+ assert(upb_def_ismutable(UPB_UPCAST(m)));
+ m->size = size;
+}
+
+void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes) {
+ assert(upb_def_ismutable(UPB_UPCAST(m)));
+ m->hasbit_bytes = bytes;
+}
+
+void upb_msgdef_setextension_start(upb_msgdef *m, uint32_t start) {
+ assert(upb_def_ismutable(UPB_UPCAST(m)));
+ m->extension_start = start;
+}
+
+void upb_msgdef_setextension_end(upb_msgdef *m, uint32_t end) {
+ assert(upb_def_ismutable(UPB_UPCAST(m)));
+ m->extension_end = end;
+}
+
+bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) {
+ assert(upb_atomic_read(&f->refcount) > 0);
+ if (!upb_atomic_unref(&f->refcount)) upb_msgdef_ref(m);
+ if (upb_msgdef_itof(m, f->number) || upb_msgdef_ntof(m, f->name)) {
+ upb_fielddef_unref(f);
+ return false;
+ }
+ assert(f->msgdef == NULL);
+ f->msgdef = m;
+ upb_itof_ent itof_ent = {0, f};
+ upb_inttable_insert(&m->itof, f->number, &itof_ent);
+ upb_strtable_insert(&m->ntof, f->name, &f);
+ return true;
+}
+
+static int upb_div_round_up(int numerator, int denominator) {
+ /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */
+ return numerator > 0 ? (numerator - 1) / denominator + 1 : 0;
+}
+
+void upb_msgdef_layout(upb_msgdef *m) {
+ // Create an ordering over the fields, but only include fields with accessors.
+ upb_fielddef **sorted_fields =
+ malloc(sizeof(upb_fielddef*) * upb_msgdef_numfields(m));
+ int n = 0;
+ upb_msg_iter i;
+ for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ if (f->accessor) sorted_fields[n++] = f;
+ }
+
+ m->hasbit_bytes = upb_div_round_up(n, 8);
+ m->size = m->hasbit_bytes; // + header_size?
+
+ // Assign hasbits.
+ qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmphasbit);
+ for (int i = 0; i < n; i++) {
+ upb_fielddef *f = sorted_fields[i];
+ f->hasbit = i;
+ }
+
+ // Assign value offsets.
+ qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmpval);
+ size_t max_align = 0;
+ for (int i = 0; i < n; i++) {
+ upb_fielddef *f = sorted_fields[i];
+ const upb_type_info *type_info = &upb_types[f->type];
+ size_t size = type_info->size;
+ size_t align = type_info->align;
+ if (upb_isseq(f)) {
+ size = sizeof(void*);
+ align = alignof(void*);
+ }
+
+ // General alignment rules are: each member must be at an address that is a
+ // multiple of that type's alignment. Also, the size of the structure as a
+ // whole must be a multiple of the greatest alignment of any member.
+ f->offset = upb_align_up(m->size, align);
+ m->size = f->offset + size;
+ max_align = UPB_MAX(max_align, align);
+ }
+ if (max_align > 0) m->size = upb_align_up(m->size, max_align);
+
+ free(sorted_fields);
+}
+
+upb_msg_iter upb_msg_begin(upb_msgdef *m) {
+ return upb_inttable_begin(&m->itof);
+}
+
+upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) {
+ return upb_inttable_next(&m->itof, iter);
+}
+
+
+/* upb_symtabtxn **************************************************************/
+
+typedef struct {
+ upb_def *def;
+} upb_symtab_ent;
+
+void upb_symtabtxn_init(upb_symtabtxn *t) {
+ upb_strtable_init(&t->deftab, 16, sizeof(upb_symtab_ent));
+}
+
+void upb_symtabtxn_uninit(upb_symtabtxn *txn) {
+ upb_strtable *t = &txn->deftab;
+ upb_strtable_iter i;
+ for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ const upb_symtab_ent *e = upb_strtable_iter_value(&i);
+ free(e->def);
+ }
+ upb_strtable_free(t);
+}
+
+bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def) {
+ // TODO: check if already present.
+ upb_symtab_ent e = {def};
+ //fprintf(stderr, "txn Inserting: %p, ent: %p\n", e.def, &e);
+ upb_strtable_insert(&t->deftab, def->fqname, &e);
+ return true;
+}
+
+#if 0
+err:
+ // We need to free all defs from "tmptab."
+ upb_rwlock_unlock(&s->lock);
+ for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e;
+ e = upb_strtable_next(&tmptab, &e->e)) {
+ upb_def_unref(e->def);
+ }
+ upb_strtable_free(&tmptab);
+ return false;
+#endif
+
+// Given a symbol and the base symbol inside which it is defined, find the
+// symbol's definition in t.
+static upb_symtab_ent *upb_resolve(upb_strtable *t,
+ const char *base, const char *sym) {
+ if(strlen(sym) == 0) return NULL;
+ if(sym[0] == UPB_SYMBOL_SEPARATOR) {
+ // Symbols starting with '.' are absolute, so we do a single lookup.
+ // Slice to omit the leading '.'
+ return upb_strtable_lookup(t, sym + 1);
+ } else {
+ // Remove components from base until we find an entry or run out.
+ // TODO: This branch is totally broken, but currently not used.
+ (void)base;
+ assert(false);
+ return NULL;
+ }
+}
+
+void upb_symtabtxn_begin(upb_symtabtxn_iter *i, upb_symtabtxn *t) {
+ upb_strtable_begin(i, &t->deftab);
+}
+void upb_symtabtxn_next(upb_symtabtxn_iter *i) { upb_strtable_next(i); }
+bool upb_symtabtxn_done(upb_symtabtxn_iter *i) { return upb_strtable_done(i); }
+upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter *i) {
+ const upb_symtab_ent *e = upb_strtable_iter_value(i);
+ return e->def;
+}
+
+
+/* upb_symtab public interface ************************************************/
+
+static void _upb_symtab_free(upb_strtable *t) {
+ upb_strtable_iter i;
+ upb_strtable_begin(&i, t);
+ for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ const upb_symtab_ent *e = upb_strtable_iter_value(&i);
+ assert(upb_atomic_read(&e->def->refcount) == 0);
+ upb_def_free(e->def);
+ }
+ upb_strtable_free(t);
+}
+
+static void upb_symtab_free(upb_symtab *s) {
+ _upb_symtab_free(&s->symtab);
+ for (uint32_t i = 0; i < s->olddefs.len; i++) {
+ upb_def *d = s->olddefs.defs[i];
+ assert(upb_atomic_read(&d->refcount) == 0);
+ upb_def_free(d);
+ }
+ upb_rwlock_destroy(&s->lock);
+ upb_deflist_uninit(&s->olddefs);
+ free(s);
+}
+
+void upb_symtab_unref(upb_symtab *s) {
+ if(s && upb_atomic_unref(&s->refcount)) {
+ upb_symtab_free(s);
+ }
+}
+
+upb_symtab *upb_symtab_new() {
+ upb_symtab *s = malloc(sizeof(*s));
+ upb_atomic_init(&s->refcount, 1);
+ upb_rwlock_init(&s->lock);
+ upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent));
+ upb_deflist_init(&s->olddefs);
+ return s;
+}
+
+upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) {
+ upb_rwlock_rdlock(&s->lock);
+ int total = upb_strtable_count(&s->symtab);
+ // We may only use part of this, depending on how many symbols are of the
+ // correct type.
+ upb_def **defs = malloc(sizeof(*defs) * total);
+ upb_strtable_iter iter;
+ upb_strtable_begin(&iter, &s->symtab);
+ int i = 0;
+ for(; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
+ const upb_symtab_ent *e = upb_strtable_iter_value(&iter);
+ upb_def *def = e->def;
+ assert(def);
+ if(type == UPB_DEF_ANY || def->type == type)
+ defs[i++] = def;
+ }
+ upb_rwlock_unlock(&s->lock);
+ *count = i;
+ for(i = 0; i < *count; i++) upb_def_ref(defs[i]);
+ return defs;
+}
+
+upb_def *upb_symtab_lookup(upb_symtab *s, const char *sym) {
+ upb_rwlock_rdlock(&s->lock);
+ upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym);
+ upb_def *ret = NULL;
+ if(e) {
+ ret = e->def;
+ upb_def_ref(ret);
+ }
+ upb_rwlock_unlock(&s->lock);
+ return ret;
+}
+
+upb_def *upb_symtab_resolve(upb_symtab *s, const char *base, const char *sym) {
+ upb_rwlock_rdlock(&s->lock);
+ upb_symtab_ent *e = upb_resolve(&s->symtab, base, sym);
+ upb_def *ret = NULL;
+ if(e) {
+ ret = e->def;
+ upb_def_ref(ret);
+ }
+ upb_rwlock_unlock(&s->lock);
+ return ret;
+}
+
+bool upb_symtab_dfs(upb_def *def, upb_def **open_defs, int n,
+ upb_symtabtxn *txn) {
+ // This linear search makes the DFS O(n^2) in the length of the paths.
+ // Could make this O(n) with a hash table, but n is small.
+ for (int i = 0; i < n; i++) {
+ if (def == open_defs[i]) return false;
+ }
+
+ bool needcopy = false;
+ upb_msgdef *m = upb_dyncast_msgdef(def);
+ if (m) {
+ upb_msg_iter i;
+ open_defs[n++] = def;
+ for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ if (!upb_hasdef(f)) continue;
+ needcopy |= upb_symtab_dfs(f->def, open_defs, n, txn);
+ }
+ }
+
+ bool replacing = (upb_strtable_lookup(&txn->deftab, m->base.fqname) != NULL);
+ if (needcopy && !replacing) {
+ upb_symtab_ent e = {upb_def_dup(def)};
+ //fprintf(stderr, "Replacing def: %p\n", e.def);
+ upb_strtable_insert(&txn->deftab, def->fqname, &e);
+ replacing = true;
+ }
+ return replacing;
+}
+
+bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) {
+ upb_rwlock_wrlock(&s->lock);
+
+ // All existing defs that can reach defs that are being replaced must
+ // themselves be replaced with versions that will point to the new defs.
+ // Do a DFS -- any path that finds a new def must replace all ancestors.
+ upb_strtable *symtab = &s->symtab;
+ upb_strtable_iter i;
+ upb_strtable_begin(&i, symtab);
+ for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ upb_def *open_defs[UPB_MAX_TYPE_DEPTH];
+ const upb_symtab_ent *e = upb_strtable_iter_value(&i);
+ upb_symtab_dfs(e->def, open_defs, 0, txn);
+ }
+
+ // Resolve all refs.
+ upb_strtable *txntab = &txn->deftab;
+ upb_strtable_begin(&i, txntab);
+ for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ const upb_symtab_ent *e = upb_strtable_iter_value(&i);
+ upb_msgdef *m = upb_dyncast_msgdef(e->def);
+ if(!m) continue;
+ // Type names are resolved relative to the message in which they appear.
+ const char *base = m->base.fqname;
+
+ upb_msg_iter j;
+ for(j = upb_msg_begin(m); !upb_msg_done(j); j = upb_msg_next(m, j)) {
+ upb_fielddef *f = upb_msg_iter_field(j);
+ if(!upb_hasdef(f)) continue; // No resolving necessary.
+ const char *name = upb_downcast_unresolveddef(f->def)->name;
+
+ // Resolve from either the txntab (pending adds) or symtab (existing
+ // defs). If both exist, prefer the pending add, because it will be
+ // overwriting the existing def.
+ upb_symtab_ent *found;
+ if(!(found = upb_resolve(txntab, base, name)) &&
+ !(found = upb_resolve(symtab, base, name))) {
+ upb_status_setf(status, UPB_ERROR, "could not resolve symbol '%s' "
+ "in context '%s'", name, base);
+ return false;
+ }
+
+ // Check the type of the found def.
+ upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
+ //fprintf(stderr, "found: %p\n", found);
+ //fprintf(stderr, "found->def: %p\n", found->def);
+ //fprintf(stderr, "found->def->type: %d\n", found->def->type);
+ if(found->def->type != expected) {
+ upb_status_setf(status, UPB_ERROR, "Unexpected type");
+ return false;
+ }
+ if (!upb_fielddef_resolve(f, found->def, status)) return false;
+ }
+ }
+
+ // The defs in the transaction have been vetted, and can be moved to the
+ // symtab without causing errors.
+ upb_strtable_begin(&i, txntab);
+ for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ const upb_symtab_ent *tmptab_e = upb_strtable_iter_value(&i);
+ upb_def_movetosymtab(tmptab_e->def, s);
+ upb_symtab_ent *symtab_e =
+ upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname);
+ if(symtab_e) {
+ upb_deflist_push(&s->olddefs, symtab_e->def);
+ symtab_e->def = tmptab_e->def;
+ } else {
+ //fprintf(stderr, "Inserting def: %p\n", tmptab_e->def);
+ upb_strtable_insert(&s->symtab, tmptab_e->def->fqname, tmptab_e);
+ }
+ }
+
+ upb_strtable_clear(txntab);
+ upb_rwlock_unlock(&s->lock);
+ upb_symtab_gc(s);
+ return true;
+}
+
+void upb_symtab_gc(upb_symtab *s) {
+ (void)s;
+ // TODO.
+}
diff --git a/upb/def.h b/upb/def.h
new file mode 100644
index 0000000..4a7a017
--- /dev/null
+++ b/upb/def.h
@@ -0,0 +1,465 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009-2011 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Provides a mechanism for creating and linking proto definitions.
+ * These form the protobuf schema, and are used extensively throughout upb:
+ * - upb_msgdef: describes a "message" construct.
+ * - upb_fielddef: describes a message field.
+ * - upb_enumdef: describes an enum.
+ * (TODO: definitions of services).
+ *
+ * These defs are mutable (and not thread-safe) when first created.
+ * Once they are added to a defbuilder (and later its symtab) they become
+ * immutable.
+ */
+
+#ifndef UPB_DEF_H_
+#define UPB_DEF_H_
+
+#include "upb/atomic.h"
+#include "upb/table.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct _upb_symtab;
+typedef struct _upb_symtab upb_symtab;
+
+// All the different kind of defs we support. These correspond 1:1 with
+// declarations in a .proto file.
+typedef enum {
+ UPB_DEF_MSG = 1,
+ UPB_DEF_ENUM,
+ UPB_DEF_SERVICE, // Not yet implemented.
+
+ UPB_DEF_ANY = -1, // Wildcard for upb_symtab_get*()
+ UPB_DEF_UNRESOLVED = 99, // Internal-only.
+} upb_deftype_t;
+
+
+/* upb_def: base class for defs **********************************************/
+
+typedef struct {
+ char *fqname; // Fully qualified.
+ upb_symtab *symtab; // Def is mutable iff symtab == NULL.
+ upb_atomic_t refcount; // Owns a ref on symtab iff (symtab && refcount > 0).
+ upb_deftype_t type;
+} upb_def;
+
+// Call to ref/unref a def. Can be used at any time, but is not thread-safe
+// until the def is in a symtab. While a def is in a symtab, everything
+// reachable from that def (the symtab and all defs in the symtab) are
+// guaranteed to be alive.
+void upb_def_ref(upb_def *def);
+void upb_def_unref(upb_def *def);
+upb_def *upb_def_dup(upb_def *def);
+
+#define UPB_UPCAST(ptr) (&(ptr)->base)
+
+
+/* upb_fielddef ***************************************************************/
+
+// A upb_fielddef describes a single field in a message. It isn't a full def
+// in the sense that it derives from upb_def. It cannot stand on its own; it
+// must be part of a upb_msgdef. It is also reference-counted.
+typedef struct _upb_fielddef {
+ struct _upb_msgdef *msgdef;
+ upb_def *def; // if upb_hasdef(f)
+ upb_atomic_t refcount;
+ bool finalized;
+
+ // The following fields may be modified until the def is finalized.
+ uint8_t type; // Use UPB_TYPE() constants.
+ uint8_t label; // Use UPB_LABEL() constants.
+ int16_t hasbit;
+ uint16_t offset;
+ int32_t number;
+ char *name;
+ upb_value defaultval; // Only meaningful for non-repeated scalars and strings.
+ upb_value fval;
+ struct _upb_accessor_vtbl *accessor;
+} upb_fielddef;
+
+upb_fielddef *upb_fielddef_new();
+void upb_fielddef_ref(upb_fielddef *f);
+void upb_fielddef_unref(upb_fielddef *f);
+upb_fielddef *upb_fielddef_dup(upb_fielddef *f);
+
+// Read accessors. May be called any time.
+INLINE uint8_t upb_fielddef_type(upb_fielddef *f) { return f->type; }
+INLINE uint8_t upb_fielddef_label(upb_fielddef *f) { return f->label; }
+INLINE int32_t upb_fielddef_number(upb_fielddef *f) { return f->number; }
+INLINE char *upb_fielddef_name(upb_fielddef *f) { return f->name; }
+INLINE upb_value upb_fielddef_default(upb_fielddef *f) { return f->defaultval; }
+INLINE upb_value upb_fielddef_fval(upb_fielddef *f) { return f->fval; }
+INLINE bool upb_fielddef_finalized(upb_fielddef *f) { return f->finalized; }
+INLINE struct _upb_msgdef *upb_fielddef_msgdef(upb_fielddef *f) {
+ return f->msgdef;
+}
+INLINE struct _upb_accessor_vtbl *upb_fielddef_accessor(upb_fielddef *f) {
+ return f->accessor;
+}
+
+// Only meaningful once the def is in a symtab (returns NULL otherwise, or for
+// a fielddef where !upb_hassubdef(f)).
+upb_def *upb_fielddef_subdef(upb_fielddef *f);
+
+// NULL until the fielddef has been added to a msgdef.
+
+// Write accessors. "Number" and "name" must be set before the fielddef is
+// added to a msgdef. For the moment we do not allow these to be set once
+// the fielddef is added to a msgdef -- this could be relaxed in the future.
+void upb_fielddef_setnumber(upb_fielddef *f, int32_t number);
+void upb_fielddef_setname(upb_fielddef *f, const char *name);
+
+// These writers may be called at any time prior to being put in a symtab.
+void upb_fielddef_settype(upb_fielddef *f, uint8_t type);
+void upb_fielddef_setlabel(upb_fielddef *f, uint8_t label);
+void upb_fielddef_setdefault(upb_fielddef *f, upb_value value);
+void upb_fielddef_setfval(upb_fielddef *f, upb_value fval);
+void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl);
+// The name of the message or enum this field is referring to. Must be found
+// at name resolution time (when the symtabtxn is committed to the symtab).
+void upb_fielddef_settypename(upb_fielddef *f, const char *name);
+
+// A variety of tests about the type of a field.
+INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
+ return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
+}
+INLINE bool upb_isstringtype(upb_fieldtype_t type) {
+ return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
+}
+INLINE bool upb_isprimitivetype(upb_fieldtype_t type) {
+ return !upb_issubmsgtype(type) && !upb_isstringtype(type);
+}
+INLINE bool upb_issubmsg(upb_fielddef *f) { return upb_issubmsgtype(f->type); }
+INLINE bool upb_isstring(upb_fielddef *f) { return upb_isstringtype(f->type); }
+INLINE bool upb_isseq(upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); }
+
+// Does the type of this field imply that it should contain an associated def?
+INLINE bool upb_hasdef(upb_fielddef *f) {
+ return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM);
+}
+
+
+/* upb_msgdef *****************************************************************/
+
+// Structure that describes a single .proto message type.
+typedef struct _upb_msgdef {
+ upb_def base;
+
+ // Tables for looking up fields by number and name.
+ upb_inttable itof; // int to field
+ upb_strtable ntof; // name to field
+
+ // The following fields may be modified until finalized.
+ uint16_t size;
+ uint8_t hasbit_bytes;
+ // The range of tag numbers used to store extensions.
+ uint32_t extension_start;
+ uint32_t extension_end;
+} upb_msgdef;
+
+// Hash table entries for looking up fields by name or number.
+typedef struct {
+ bool junk;
+ upb_fielddef *f;
+} upb_itof_ent;
+typedef struct {
+ upb_strtable_entry e;
+ upb_fielddef *f;
+} upb_ntof_ent;
+
+upb_msgdef *upb_msgdef_new();
+INLINE void upb_msgdef_unref(upb_msgdef *md) { upb_def_unref(UPB_UPCAST(md)); }
+INLINE void upb_msgdef_ref(upb_msgdef *md) { upb_def_ref(UPB_UPCAST(md)); }
+
+// Returns a new msgdef that is a copy of the given msgdef (and a copy of all
+// the fields) but with any references to submessages broken and replaced with
+// just the name of the submessage. This can be put back into another symtab
+// and the names will be re-resolved in the new context.
+upb_msgdef *upb_msgdef_dup(upb_msgdef *m);
+
+// Read accessors. May be called at any time.
+INLINE uint16_t upb_msgdef_size(upb_msgdef *m) { return m->size; }
+INLINE uint8_t upb_msgdef_hasbit_bytes(upb_msgdef *m) {
+ return m->hasbit_bytes;
+}
+INLINE uint32_t upb_msgdef_extension_start(upb_msgdef *m) {
+ return m->extension_start;
+}
+INLINE uint32_t upb_msgdef_extension_end(upb_msgdef *m) {
+ return m->extension_end;
+}
+
+// Write accessors. May only be called before the msgdef is in a symtab.
+void upb_msgdef_setsize(upb_msgdef *m, uint16_t size);
+void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes);
+void upb_msgdef_setextension_start(upb_msgdef *m, uint32_t start);
+void upb_msgdef_setextension_end(upb_msgdef *m, uint32_t end);
+
+// Adds a fielddef to a msgdef, and passes a ref on the field to the msgdef.
+// May only be done before the msgdef is in a symtab. The fielddef's name and
+// number must be set, and the message may not already contain any field with
+// this name or number -- if it does, the fielddef is unref'd and false is
+// returned. The fielddef may not already belong to another message.
+bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f);
+
+// Sets the layout of all fields according to default rules:
+// 1. Hasbits for required fields come first, then optional fields.
+// 2. Values are laid out in a way that respects alignment rules.
+// 3. The order is chosen to minimize memory usage.
+// This should only be called once all fielddefs have been added.
+// TODO: will likely want the ability to exclude strings/submessages/arrays.
+// TODO: will likely want the ability to define a header size.
+void upb_msgdef_layout(upb_msgdef *m);
+
+// Looks up a field by name or number. While these are written to be as fast
+// as possible, it will still be faster to cache the results of this lookup if
+// possible. These return NULL if no such field is found.
+INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t i) {
+ upb_itof_ent *e = (upb_itof_ent*)
+ upb_inttable_fastlookup(&m->itof, i, sizeof(upb_itof_ent));
+ return e ? e->f : NULL;
+}
+
+INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, char *name) {
+ upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
+ return e ? e->f : NULL;
+}
+
+INLINE int upb_msgdef_numfields(upb_msgdef *m) {
+ return upb_strtable_count(&m->ntof);
+}
+
+// Iteration over fields. The order is undefined.
+// Iterators are invalidated when a field is added or removed.
+// upb_msg_iter i;
+// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+// upb_fielddef *f = upb_msg_iter_field(i);
+// // ...
+// }
+typedef upb_inttable_iter upb_msg_iter;
+
+upb_msg_iter upb_msg_begin(upb_msgdef *m);
+upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter);
+INLINE bool upb_msg_done(upb_msg_iter iter) { return upb_inttable_done(iter); }
+
+// Iterator accessor.
+INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) {
+ upb_itof_ent *ent = (upb_itof_ent*)upb_inttable_iter_value(iter);
+ return ent->f;
+}
+
+
+/* upb_enumdef ****************************************************************/
+
+typedef struct _upb_enumdef {
+ upb_def base;
+ upb_strtable ntoi;
+ upb_inttable iton;
+ int32_t defaultval;
+} upb_enumdef;
+
+typedef struct {
+ upb_strtable_entry e;
+ uint32_t value;
+} upb_ntoi_ent;
+
+typedef struct {
+ bool junk;
+ char *str;
+} upb_iton_ent;
+
+upb_enumdef *upb_enumdef_new();
+INLINE void upb_enumdef_ref(upb_enumdef *e) { upb_def_ref(UPB_UPCAST(e)); }
+INLINE void upb_enumdef_unref(upb_enumdef *e) { upb_def_unref(UPB_UPCAST(e)); }
+upb_enumdef *upb_enumdef_dup(upb_enumdef *e);
+
+INLINE int32_t upb_enumdef_default(upb_enumdef *e) { return e->defaultval; }
+
+// May only be set before the enumdef is in a symtab.
+void upb_enumdef_setdefault(upb_enumdef *e, int32_t val);
+
+// Adds a value to the enumdef. Requires that no existing val has this
+// name or number (returns false and does not add if there is). May only
+// be called before the enumdef is in a symtab.
+bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num);
+
+// Lookups from name to integer and vice-versa.
+bool upb_enumdef_ntoil(upb_enumdef *e, char *name, size_t len, int32_t *num);
+bool upb_enumdef_ntoi(upb_enumdef *e, char *name, int32_t *num);
+// Caller does not own the returned string.
+const char *upb_enumdef_iton(upb_enumdef *e, int32_t num);
+
+// Iteration over name/value pairs. The order is undefined.
+// Adding an enum val invalidates any iterators.
+// upb_enum_iter i;
+// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
+// // ...
+// }
+typedef upb_inttable_iter upb_enum_iter;
+
+upb_enum_iter upb_enum_begin(upb_enumdef *e);
+upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter);
+INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); }
+
+// Iterator accessors.
+INLINE char *upb_enum_iter_name(upb_enum_iter iter) {
+ upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter);
+ return e->str;
+}
+INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
+ return upb_inttable_iter_key(iter);
+}
+
+
+/* upb_symtabtxn **************************************************************/
+
+// A symbol table transaction is a map of defs that can be added to a symtab
+// in one single atomic operation that either succeeds or fails. Mutable defs
+// can be added to this map (and perhaps removed, in the future).
+//
+// A symtabtxn is not thread-safe.
+
+typedef struct {
+ upb_strtable deftab;
+} upb_symtabtxn;
+
+void upb_symtabtxn_init(upb_symtabtxn *t);
+void upb_symtabtxn_uninit(upb_symtabtxn *t);
+
+// Adds a def to the symtab. Caller passes a ref on the def to the symtabtxn.
+// The def's name must be set and there must not be any existing defs in the
+// symtabtxn with this name, otherwise false will be returned and no operation
+// will be performed (and the ref on the def will be released).
+bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def);
+
+// Gets the def (if any) that is associated with this name in the symtab.
+// Caller does *not* inherit a ref on the def.
+upb_def *upb_symtabtxn_get(upb_symtabtxn *t, char *name);
+
+// Iterate over the defs that are part of the transaction.
+// The order is undefined.
+// The iterator is invalidated by upb_symtabtxn_add().
+// upb_symtabtxn_iter i;
+// for(i = upb_symtabtxn_begin(t); !upb_symtabtxn_done(t);
+// i = upb_symtabtxn_next(t, i)) {
+// upb_def *def = upb_symtabtxn_iter_def(i);
+// }
+typedef upb_strtable_iter upb_symtabtxn_iter;
+
+void upb_symtabtxn_begin(upb_symtabtxn_iter* i, upb_symtabtxn *t);
+void upb_symtabtxn_next(upb_symtabtxn_iter *i);
+bool upb_symtabtxn_done(upb_symtabtxn_iter *i);
+upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter *iter);
+
+
+/* upb_symtab *****************************************************************/
+
+// A SymbolTable is where upb_defs live. It is empty when first constructed.
+// Clients add definitions to the symtab (or replace existing definitions) by
+// using a upb_symtab_commit() or calling upb_symtab_add().
+
+// upb_deflist: A little dynamic array for storing a growing list of upb_defs.
+typedef struct {
+ upb_def **defs;
+ uint32_t len;
+ uint32_t size;
+} upb_deflist;
+
+void upb_deflist_init(upb_deflist *l);
+void upb_deflist_uninit(upb_deflist *l);
+void upb_deflist_push(upb_deflist *l, upb_def *d);
+
+struct _upb_symtab {
+ upb_atomic_t refcount;
+ upb_rwlock_t lock; // Protects all members except the refcount.
+ upb_strtable symtab; // The symbol table.
+ upb_deflist olddefs;
+};
+
+upb_symtab *upb_symtab_new(void);
+INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); }
+void upb_symtab_unref(upb_symtab *s);
+
+// Resolves the given symbol using the rules described in descriptor.proto,
+// namely:
+//
+// If the name starts with a '.', it is fully-qualified. Otherwise, C++-like
+// scoping rules are used to find the type (i.e. first the nested types
+// within this message are searched, then within the parent, on up to the
+// root namespace).
+//
+// If a def is found, the caller owns one ref on the returned def. Otherwise
+// returns NULL.
+// TODO: make return const
+upb_def *upb_symtab_resolve(upb_symtab *s, const char *base, const char *sym);
+
+// Find an entry in the symbol table with this exact name. If a def is found,
+// the caller owns one ref on the returned def. Otherwise returns NULL.
+// TODO: make return const
+upb_def *upb_symtab_lookup(upb_symtab *s, const char *sym);
+
+// Gets an array of pointers to all currently active defs in this symtab. The
+// caller owns the returned array (which is of length *count) as well as a ref
+// to each symbol inside. If type is UPB_DEF_ANY then defs of all types are
+// returned, otherwise only defs of the required type are returned.
+// TODO: make return const
+upb_def **upb_symtab_getdefs(upb_symtab *s, int *n, upb_deftype_t type);
+
+// Adds a single upb_def into the symtab. A ref on the def is passed to the
+// symtab. If any references cannot be resolved, false is returned and the
+// symtab is unchanged. The error (if any) is saved to status if non-NULL.
+bool upb_symtab_add(upb_symtab *s, upb_def *d, upb_status *status);
+
+// Adds the set of defs contained in the transaction to the symtab, clearing
+// the txn. The entire operation either succeeds or fails. If the operation
+// fails, the symtab is unchanged, false is returned, and status indicates
+// the error.
+bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *t, upb_status *status);
+
+// Frees defs that are no longer active in the symtab and are no longer
+// reachable. Such defs are not freed when they are replaced in the symtab
+// if they are still reachable from defs that are still referenced.
+void upb_symtab_gc(upb_symtab *s);
+
+
+/* upb_def casts **************************************************************/
+
+// Dynamic casts, for determining if a def is of a particular type at runtime.
+#define UPB_DYNAMIC_CAST_DEF(lower, upper) \
+ struct _upb_ ## lower; /* Forward-declare. */ \
+ INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \
+ if(def->type != UPB_DEF_ ## upper) return NULL; \
+ return (struct _upb_ ## lower*)def; \
+ }
+UPB_DYNAMIC_CAST_DEF(msgdef, MSG);
+UPB_DYNAMIC_CAST_DEF(enumdef, ENUM);
+UPB_DYNAMIC_CAST_DEF(svcdef, SERVICE);
+UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED);
+#undef UPB_DYNAMIC_CAST_DEF
+
+// Downcasts, for when some wants to assert that a def is of a particular type.
+// These are only checked if we are building debug.
+#define UPB_DOWNCAST_DEF(lower, upper) \
+ struct _upb_ ## lower; /* Forward-declare. */ \
+ INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \
+ assert(def->type == UPB_DEF_ ## upper); \
+ return (struct _upb_ ## lower*)def; \
+ }
+UPB_DOWNCAST_DEF(msgdef, MSG);
+UPB_DOWNCAST_DEF(enumdef, ENUM);
+UPB_DOWNCAST_DEF(svcdef, SERVICE);
+UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
+#undef UPB_DOWNCAST_DEF
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* UPB_DEF_H_ */
diff --git a/upb/descriptor.c b/upb/descriptor.c
new file mode 100644
index 0000000..48f0165
--- /dev/null
+++ b/upb/descriptor.c
@@ -0,0 +1,529 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2008-2009 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include "upb/def.h"
+#include "upb/descriptor.h"
+
+// Returns a newly allocated string that joins input strings together, for example:
+// join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
+// join("", "Baz") -> "Baz"
+// Caller owns a ref on the returned string. */
+static char *upb_join(char *base, char *name) {
+ if (!base || strlen(base) == 0) {
+ return strdup(name);
+ } else {
+ char *ret = malloc(strlen(base) + strlen(name) + 2);
+ ret[0] = '\0';
+ strcat(ret, base);
+ strcat(ret, ".");
+ strcat(ret, name);
+ return ret;
+ }
+}
+
+/* upb_descreader ************************************************************/
+
+// A upb_descreader builds a list of defs by handling a parse of a protobuf in
+// the format defined in descriptor.proto. The output of a upb_descreader is
+// a upb_symtabtxn.
+
+static upb_def *upb_deflist_last(upb_deflist *l) {
+ return l->defs[l->len-1];
+}
+
+// Qualify the defname for all defs starting with offset "start" with "str".
+static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
+ for(uint32_t i = start; i < l->len; i++) {
+ upb_def *def = l->defs[i];
+ char *name = def->fqname;
+ def->fqname = upb_join(str, name);
+ free(name);
+ }
+}
+
+// Forward declares for top-level file descriptors.
+static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h);
+static upb_mhandlers * upb_enumdef_register_EnumDescriptorProto(upb_handlers *h);
+
+void upb_descreader_init(upb_descreader *r, upb_symtabtxn *txn) {
+ upb_deflist_init(&r->defs);
+ upb_status_init(&r->status);
+ r->txn = txn;
+ r->stack_len = 0;
+ r->name = NULL;
+ r->default_string = NULL;
+}
+
+void upb_descreader_uninit(upb_descreader *r) {
+ free(r->name);
+ upb_status_uninit(&r->status);
+ upb_deflist_uninit(&r->defs);
+ free(r->default_string);
+ while (r->stack_len > 0) {
+ upb_descreader_frame *f = &r->stack[--r->stack_len];
+ free(f->name);
+ }
+}
+
+static upb_msgdef *upb_descreader_top(upb_descreader *r) {
+ if (r->stack_len <= 1) return NULL;
+ int index = r->stack[r->stack_len-1].start - 1;
+ assert(index >= 0);
+ return upb_downcast_msgdef(r->defs.defs[index]);
+}
+
+static upb_def *upb_descreader_last(upb_descreader *r) {
+ return upb_deflist_last(&r->defs);
+}
+
+// Start/end handlers for FileDescriptorProto and DescriptorProto (the two
+// entities that have names and can contain sub-definitions.
+void upb_descreader_startcontainer(upb_descreader *r) {
+ upb_descreader_frame *f = &r->stack[r->stack_len++];
+ f->start = r->defs.len;
+ f->name = NULL;
+}
+
+void upb_descreader_endcontainer(upb_descreader *r) {
+ upb_descreader_frame *f = &r->stack[--r->stack_len];
+ upb_deflist_qualify(&r->defs, f->name, f->start);
+ free(f->name);
+ f->name = NULL;
+}
+
+void upb_descreader_setscopename(upb_descreader *r, char *str) {
+ upb_descreader_frame *f = &r->stack[r->stack_len-1];
+ free(f->name);
+ f->name = str;
+}
+
+// Handlers for google.protobuf.FileDescriptorProto.
+static upb_flow_t upb_descreader_FileDescriptorProto_startmsg(void *_r) {
+ upb_descreader *r = _r;
+ upb_descreader_startcontainer(r);
+ return UPB_CONTINUE;
+}
+
+static void upb_descreader_FileDescriptorProto_endmsg(void *_r,
+ upb_status *status) {
+ (void)status;
+ upb_descreader *r = _r;
+ upb_descreader_endcontainer(r);
+}
+
+static upb_flow_t upb_descreader_FileDescriptorProto_package(void *_r,
+ upb_value fval,
+ upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ upb_descreader_setscopename(r, upb_strref_dup(upb_value_getstrref(val)));
+ return UPB_CONTINUE;
+}
+
+static upb_mhandlers *upb_descreader_register_FileDescriptorProto(
+ upb_handlers *h) {
+ upb_mhandlers *m = upb_handlers_newmhandlers(h);
+ upb_mhandlers_setstartmsg(m, &upb_descreader_FileDescriptorProto_startmsg);
+ upb_mhandlers_setendmsg(m, &upb_descreader_FileDescriptorProto_endmsg);
+
+#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDNUM
+#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDTYPE
+ upb_fhandlers *f =
+ upb_mhandlers_newfhandlers(m, FNUM(PACKAGE), FTYPE(PACKAGE), false);
+ upb_fhandlers_setvalue(f, &upb_descreader_FileDescriptorProto_package);
+
+ upb_mhandlers_newfhandlers_subm(m, FNUM(MESSAGE_TYPE), FTYPE(MESSAGE_TYPE), true,
+ upb_msgdef_register_DescriptorProto(h));
+ upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true,
+ upb_enumdef_register_EnumDescriptorProto(h));
+ // TODO: services, extensions
+ return m;
+}
+#undef FNUM
+#undef FTYPE
+
+// Handlers for google.protobuf.FileDescriptorSet.
+static void upb_descreader_FileDescriptorSet_onendmsg(void *_r,
+ upb_status *status) {
+ // Move all defs (which are now guaranteed to be fully-qualified) to the txn.
+ upb_descreader *r = _r;
+ if (upb_ok(status)) {
+ for (unsigned int i = 0; i < r->defs.len; i++) {
+ // TODO: check return for duplicate def.
+ upb_symtabtxn_add(r->txn, r->defs.defs[i]);
+ }
+ r->defs.len = 0;
+ }
+}
+
+static upb_mhandlers *upb_descreader_register_FileDescriptorSet(upb_handlers *h) {
+ upb_mhandlers *m = upb_handlers_newmhandlers(h);
+ upb_mhandlers_setendmsg(m, upb_descreader_FileDescriptorSet_onendmsg);
+
+#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDNUM
+#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDTYPE
+ upb_mhandlers_newfhandlers_subm(m, FNUM(FILE), FTYPE(FILE), true,
+ upb_descreader_register_FileDescriptorProto(h));
+ return m;
+}
+#undef FNUM
+#undef FTYPE
+
+upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h) {
+ h->should_jit = false;
+ return upb_descreader_register_FileDescriptorSet(h);
+}
+
+// google.protobuf.EnumValueDescriptorProto.
+static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_r) {
+ upb_descreader *r = _r;
+ r->saw_number = false;
+ r->saw_name = false;
+ return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_r,
+ upb_value fval,
+ upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ free(r->name);
+ r->name = upb_strref_dup(upb_value_getstrref(val));
+ r->saw_name = true;
+ return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_enumdef_EnumValueDescriptorProto_number(void *_r,
+ upb_value fval,
+ upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ r->number = upb_value_getint32(val);
+ r->saw_number = true;
+ return UPB_CONTINUE;
+}
+
+static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r,
+ upb_status *status) {
+ upb_descreader *r = _r;
+ if(!r->saw_number || !r->saw_name) {
+ upb_status_setf(status, UPB_ERROR, "Enum value missing name or number.");
+ return;
+ }
+ upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
+ if (upb_inttable_count(&e->iton) == 0) {
+ // The default value of an enum (in the absence of an explicit default) is
+ // its first listed value.
+ upb_enumdef_setdefault(e, r->number);
+ }
+ upb_enumdef_addval(e, r->name, r->number);
+ free(r->name);
+ r->name = NULL;
+}
+
+static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto(
+ upb_handlers *h) {
+ upb_mhandlers *m = upb_handlers_newmhandlers(h);
+ upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumValueDescriptorProto_startmsg);
+ upb_mhandlers_setendmsg(m, &upb_enumdef_EnumValueDescriptorProto_endmsg);
+
+#define FNUM(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDNUM
+#define FTYPE(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDTYPE
+ upb_fhandlers *f;
+ f = upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
+ upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_name);
+
+ f = upb_mhandlers_newfhandlers(m, FNUM(NUMBER), FTYPE(NUMBER), false);
+ upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_number);
+ return m;
+}
+#undef FNUM
+#undef FTYPE
+
+// google.protobuf.EnumDescriptorProto.
+static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_r) {
+ upb_descreader *r = _r;
+ upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new()));
+ return UPB_CONTINUE;
+}
+
+static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status) {
+ upb_descreader *r = _r;
+ upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
+ if (upb_descreader_last((upb_descreader*)_r)->fqname == NULL) {
+ upb_status_setf(status, UPB_ERROR, "Enum had no name.");
+ return;
+ }
+ if (upb_inttable_count(&e->iton) == 0) {
+ upb_status_setf(status, UPB_ERROR, "Enum had no values.");
+ return;
+ }
+}
+
+static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r,
+ upb_value fval,
+ upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
+ free(e->base.fqname);
+ e->base.fqname = upb_strref_dup(upb_value_getstrref(val));
+ return UPB_CONTINUE;
+}
+
+static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) {
+ upb_mhandlers *m = upb_handlers_newmhandlers(h);
+ upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumDescriptorProto_startmsg);
+ upb_mhandlers_setendmsg(m, &upb_enumdef_EnumDescriptorProto_endmsg);
+
+#define FNUM(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDNUM
+#define FTYPE(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDTYPE
+ upb_fhandlers *f =
+ upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
+ upb_fhandlers_setvalue(f, &upb_enumdef_EnumDescriptorProto_name);
+
+ upb_mhandlers_newfhandlers_subm(m, FNUM(VALUE), FTYPE(VALUE), true,
+ upb_enumdef_register_EnumValueDescriptorProto(h));
+ return m;
+}
+#undef FNUM
+#undef FTYPE
+
+static upb_flow_t upb_fielddef_startmsg(void *_r) {
+ upb_descreader *r = _r;
+ r->f = upb_fielddef_new();
+ return UPB_CONTINUE;
+}
+
+// Converts the default value in string "str" into "d". Passes a ref on str.
+// Returns true on success.
+static bool upb_fielddef_parsedefault(char *str, upb_value *d, int type) {
+ bool success = true;
+ if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) {
+ // We'll keep the ref we had on it. We include enums in this case because
+ // we need the enumdef to resolve the name, but we may not have it yet.
+ // We'll resolve it later.
+ if (!str) str = strdup("");
+ upb_value_setptr(d, str);
+ } else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) {
+ // We don't expect to get a default value.
+ free(str);
+ if (str != NULL) success = false;
+ } else if (type == UPB_TYPE(BOOL)) {
+ if (!str || strcmp(str, "false") == 0)
+ upb_value_setbool(d, false);
+ else if (strcmp(str, "true") == 0)
+ upb_value_setbool(d, true);
+ else
+ success = false;
+ free(str);
+ } else {
+ // The strto* functions need the string to be NULL-terminated.
+ if (!str) str = strdup("0");
+ char *end;
+ switch (type) {
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(SINT32):
+ case UPB_TYPE(SFIXED32): {
+ long val = strtol(str, &end, 0);
+ if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
+ success = false;
+ else
+ upb_value_setint32(d, val);
+ break;
+ }
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(SINT64):
+ case UPB_TYPE(SFIXED64):
+ upb_value_setint64(d, strtoll(str, &end, 0));
+ if (errno == ERANGE || *end) success = false;
+ break;
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(FIXED32): {
+ unsigned long val = strtoul(str, &end, 0);
+ if (val > UINT32_MAX || errno == ERANGE || *end)
+ success = false;
+ else
+ upb_value_setuint32(d, val);
+ break;
+ }
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(FIXED64):
+ upb_value_setuint64(d, strtoull(str, &end, 0));
+ if (errno == ERANGE || *end) success = false;
+ break;
+ case UPB_TYPE(DOUBLE):
+ upb_value_setdouble(d, strtod(str, &end));
+ if (errno == ERANGE || *end) success = false;
+ break;
+ case UPB_TYPE(FLOAT):
+ upb_value_setfloat(d, strtof(str, &end));
+ if (errno == ERANGE || *end) success = false;
+ break;
+ }
+ free(str);
+ }
+ return success;
+}
+
+static void upb_fielddef_endmsg(void *_r, upb_status *status) {
+ upb_descreader *r = _r;
+ upb_fielddef *f = r->f;
+ // TODO: verify that all required fields were present.
+ assert(f->number != -1 && f->name != NULL);
+ assert((f->def != NULL) == upb_hasdef(f));
+
+ // Field was successfully read, add it as a field of the msgdef.
+ upb_msgdef *m = upb_descreader_top(r);
+ upb_msgdef_addfield(m, f);
+ char *dstr = r->default_string;
+ r->default_string = NULL;
+ upb_value val;
+ if (!upb_fielddef_parsedefault(dstr, &val, f->type)) {
+ // We don't worry too much about giving a great error message since the
+ // compiler should have ensured this was correct.
+ upb_status_setf(status, UPB_ERROR, "Error converting default value.");
+ return;
+ }
+ upb_fielddef_setdefault(f, val);
+}
+
+static upb_flow_t upb_fielddef_ontype(void *_r, upb_value fval, upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ upb_fielddef_settype(r->f, upb_value_getint32(val));
+ return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_fielddef_onlabel(void *_r, upb_value fval, upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ upb_fielddef_setlabel(r->f, upb_value_getint32(val));
+ return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_fielddef_onnumber(void *_r, upb_value fval, upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ upb_fielddef_setnumber(r->f, upb_value_getint32(val));
+ return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_fielddef_onname(void *_r, upb_value fval, upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ char *name = upb_strref_dup(upb_value_getstrref(val));
+ upb_fielddef_setname(r->f, name);
+ free(name);
+ return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval,
+ upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ char *name = upb_strref_dup(upb_value_getstrref(val));
+ upb_fielddef_settypename(r->f, name);
+ free(name);
+ return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_fielddef_ondefaultval(void *_r, upb_value fval,
+ upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ // Have to convert from string to the correct type, but we might not know the
+ // type yet.
+ free(r->default_string);
+ r->default_string = upb_strref_dup(upb_value_getstrref(val));
+ return UPB_CONTINUE;
+}
+
+static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto(
+ upb_handlers *h) {
+ upb_mhandlers *m = upb_handlers_newmhandlers(h);
+ upb_mhandlers_setstartmsg(m, &upb_fielddef_startmsg);
+ upb_mhandlers_setendmsg(m, &upb_fielddef_endmsg);
+
+#define FIELD(name, handler) \
+ upb_fhandlers_setvalue( \
+ upb_mhandlers_newfhandlers(m, \
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDNUM, \
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDTYPE, \
+ false), \
+ handler);
+ FIELD(TYPE, &upb_fielddef_ontype);
+ FIELD(LABEL, &upb_fielddef_onlabel);
+ FIELD(NUMBER, &upb_fielddef_onnumber);
+ FIELD(NAME, &upb_fielddef_onname);
+ FIELD(TYPE_NAME, &upb_fielddef_ontypename);
+ FIELD(DEFAULT_VALUE, &upb_fielddef_ondefaultval);
+ return m;
+}
+#undef FNUM
+#undef FTYPE
+
+
+// google.protobuf.DescriptorProto.
+static upb_flow_t upb_msgdef_startmsg(void *_r) {
+ upb_descreader *r = _r;
+ upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new()));
+ upb_descreader_startcontainer(r);
+ return UPB_CONTINUE;
+}
+
+static void upb_msgdef_endmsg(void *_r, upb_status *status) {
+ upb_descreader *r = _r;
+ upb_msgdef *m = upb_descreader_top(r);
+ if(!m->base.fqname) {
+ upb_status_setf(status, UPB_ERROR, "Encountered message with no name.");
+ return;
+ }
+
+ upb_msgdef_layout(m);
+ upb_descreader_endcontainer(r);
+}
+
+static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) {
+ (void)fval;
+ upb_descreader *r = _r;
+ assert(val.type == UPB_TYPE(STRING));
+ upb_msgdef *m = upb_descreader_top(r);
+ free(m->base.fqname);
+ m->base.fqname = upb_strref_dup(upb_value_getstrref(val));
+ upb_descreader_setscopename(r, strdup(m->base.fqname));
+ return UPB_CONTINUE;
+}
+
+static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) {
+ upb_mhandlers *m = upb_handlers_newmhandlers(h);
+ upb_mhandlers_setstartmsg(m, &upb_msgdef_startmsg);
+ upb_mhandlers_setendmsg(m, &upb_msgdef_endmsg);
+
+#define FNUM(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDNUM
+#define FTYPE(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDTYPE
+ upb_fhandlers *f =
+ upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
+ upb_fhandlers_setvalue(f, &upb_msgdef_onname);
+
+ upb_mhandlers_newfhandlers_subm(m, FNUM(FIELD), FTYPE(FIELD), true,
+ upb_fielddef_register_FieldDescriptorProto(h));
+ upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true,
+ upb_enumdef_register_EnumDescriptorProto(h));
+
+ // DescriptorProto is self-recursive, so we must link the definition.
+ upb_mhandlers_newfhandlers_subm(
+ m, FNUM(NESTED_TYPE), FTYPE(NESTED_TYPE), true, m);
+
+ // TODO: extensions.
+ return m;
+}
+#undef FNUM
+#undef FTYPE
+
diff --git a/upb/descriptor.h b/upb/descriptor.h
new file mode 100644
index 0000000..4d658fb
--- /dev/null
+++ b/upb/descriptor.h
@@ -0,0 +1,67 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Routines for building defs by parsing descriptors in descriptor.proto format.
+ * This only needs to use the public API of upb_symtab. Later we may also
+ * add routines for dumping a symtab to a descriptor.
+ */
+
+#ifndef UPB_DESCRIPTOR_H
+#define UPB_DESCRIPTOR_H
+
+#include "upb/handlers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* upb_descreader ************************************************************/
+
+// upb_descreader reads a descriptor and puts defs in a upb_symtabtxn.
+
+// We keep a stack of all the messages scopes we are currently in, as well as
+// the top-level file scope. This is necessary to correctly qualify the
+// definitions that are contained inside. "name" tracks the name of the
+// message or package (a bare name -- not qualified by any enclosing scopes).
+typedef struct {
+ char *name;
+ // Index of the first def that is under this scope. For msgdefs, the
+ // msgdef itself is at start-1.
+ int start;
+} upb_descreader_frame;
+
+typedef struct {
+ upb_deflist defs;
+ upb_symtabtxn *txn;
+ upb_descreader_frame stack[UPB_MAX_TYPE_DEPTH];
+ int stack_len;
+ upb_status status;
+
+ uint32_t number;
+ char *name;
+ bool saw_number;
+ bool saw_name;
+
+ char *default_string;
+
+ upb_fielddef *f;
+} upb_descreader;
+
+// Creates a new descriptor builder that will add defs to the given txn.
+void upb_descreader_init(upb_descreader *r, upb_symtabtxn *txn);
+void upb_descreader_uninit(upb_descreader *r);
+
+// Registers handlers that will load descriptor data into a symtabtxn.
+// Pass the descreader as the closure. The messages will have
+// upb_msgdef_layout() called on them before adding to the txn.
+upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/upb/descriptor.proto b/upb/descriptor.proto
new file mode 100644
index 0000000..233f879
--- /dev/null
+++ b/upb/descriptor.proto
@@ -0,0 +1,533 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// The messages in this file describe the definitions found in .proto files.
+// A valid .proto file can be translated directly to a FileDescriptorProto
+// without any other information (e.g. without reading its imports).
+
+
+
+package google.protobuf;
+option java_package = "com.google.protobuf";
+option java_outer_classname = "DescriptorProtos";
+
+// descriptor.proto must be optimized for speed because reflection-based
+// algorithms don't work during bootstrapping.
+option optimize_for = SPEED;
+
+// The protocol compiler can output a FileDescriptorSet containing the .proto
+// files it parses.
+message FileDescriptorSet {
+ repeated FileDescriptorProto file = 1;
+}
+
+// Describes a complete .proto file.
+message FileDescriptorProto {
+ optional string name = 1; // file name, relative to root of source tree
+ optional string package = 2; // e.g. "foo", "foo.bar", etc.
+
+ // Names of files imported by this file.
+ repeated string dependency = 3;
+
+ // All top-level definitions in this file.
+ repeated DescriptorProto message_type = 4;
+ repeated EnumDescriptorProto enum_type = 5;
+ repeated ServiceDescriptorProto service = 6;
+ repeated FieldDescriptorProto extension = 7;
+
+ optional FileOptions options = 8;
+
+ // This field contains optional information about the original source code.
+ // You may safely remove this entire field whithout harming runtime
+ // functionality of the descriptors -- the information is needed only by
+ // development tools.
+ optional SourceCodeInfo source_code_info = 9;
+}
+
+// Describes a message type.
+message DescriptorProto {
+ optional string name = 1;
+
+ repeated FieldDescriptorProto field = 2;
+ repeated FieldDescriptorProto extension = 6;
+
+ repeated DescriptorProto nested_type = 3;
+ repeated EnumDescriptorProto enum_type = 4;
+
+ message ExtensionRange {
+ optional int32 start = 1;
+ optional int32 end = 2;
+ }
+ repeated ExtensionRange extension_range = 5;
+
+ optional MessageOptions options = 7;
+}
+
+// Describes a field within a message.
+message FieldDescriptorProto {
+ enum Type {
+ // 0 is reserved for errors.
+ // Order is weird for historical reasons.
+ TYPE_DOUBLE = 1;
+ TYPE_FLOAT = 2;
+ TYPE_INT64 = 3; // Not ZigZag encoded. Negative numbers
+ // take 10 bytes. Use TYPE_SINT64 if negative
+ // values are likely.
+ TYPE_UINT64 = 4;
+ TYPE_INT32 = 5; // Not ZigZag encoded. Negative numbers
+ // take 10 bytes. Use TYPE_SINT32 if negative
+ // values are likely.
+ TYPE_FIXED64 = 6;
+ TYPE_FIXED32 = 7;
+ TYPE_BOOL = 8;
+ TYPE_STRING = 9;
+ TYPE_GROUP = 10; // Tag-delimited aggregate.
+ TYPE_MESSAGE = 11; // Length-delimited aggregate.
+
+ // New in version 2.
+ TYPE_BYTES = 12;
+ TYPE_UINT32 = 13;
+ TYPE_ENUM = 14;
+ TYPE_SFIXED32 = 15;
+ TYPE_SFIXED64 = 16;
+ TYPE_SINT32 = 17; // Uses ZigZag encoding.
+ TYPE_SINT64 = 18; // Uses ZigZag encoding.
+ };
+
+ enum Label {
+ // 0 is reserved for errors
+ LABEL_OPTIONAL = 1;
+ LABEL_REQUIRED = 2;
+ LABEL_REPEATED = 3;
+ // TODO(sanjay): Should we add LABEL_MAP?
+ };
+
+ optional string name = 1;
+ optional int32 number = 3;
+ optional Label label = 4;
+
+ // If type_name is set, this need not be set. If both this and type_name
+ // are set, this must be either TYPE_ENUM or TYPE_MESSAGE.
+ optional Type type = 5;
+
+ // For message and enum types, this is the name of the type. If the name
+ // starts with a '.', it is fully-qualified. Otherwise, C++-like scoping
+ // rules are used to find the type (i.e. first the nested types within this
+ // message are searched, then within the parent, on up to the root
+ // namespace).
+ optional string type_name = 6;
+
+ // For extensions, this is the name of the type being extended. It is
+ // resolved in the same manner as type_name.
+ optional string extendee = 2;
+
+ // For numeric types, contains the original text representation of the value.
+ // For booleans, "true" or "false".
+ // For strings, contains the default text contents (not escaped in any way).
+ // For bytes, contains the C escaped value. All bytes >= 128 are escaped.
+ // TODO(kenton): Base-64 encode?
+ optional string default_value = 7;
+
+ optional FieldOptions options = 8;
+}
+
+// Describes an enum type.
+message EnumDescriptorProto {
+ optional string name = 1;
+
+ repeated EnumValueDescriptorProto value = 2;
+
+ optional EnumOptions options = 3;
+}
+
+// Describes a value within an enum.
+message EnumValueDescriptorProto {
+ optional string name = 1;
+ optional int32 number = 2;
+
+ optional EnumValueOptions options = 3;
+}
+
+// Describes a service.
+message ServiceDescriptorProto {
+ optional string name = 1;
+ repeated MethodDescriptorProto method = 2;
+
+ optional ServiceOptions options = 3;
+}
+
+// Describes a method of a service.
+message MethodDescriptorProto {
+ optional string name = 1;
+
+ // Input and output type names. These are resolved in the same way as
+ // FieldDescriptorProto.type_name, but must refer to a message type.
+ optional string input_type = 2;
+ optional string output_type = 3;
+
+ optional MethodOptions options = 4;
+}
+
+// ===================================================================
+// Options
+
+// Each of the definitions above may have "options" attached. These are
+// just annotations which may cause code to be generated slightly differently
+// or may contain hints for code that manipulates protocol messages.
+//
+// Clients may define custom options as extensions of the *Options messages.
+// These extensions may not yet be known at parsing time, so the parser cannot
+// store the values in them. Instead it stores them in a field in the *Options
+// message called uninterpreted_option. This field must have the same name
+// across all *Options messages. We then use this field to populate the
+// extensions when we build a descriptor, at which point all protos have been
+// parsed and so all extensions are known.
+//
+// Extension numbers for custom options may be chosen as follows:
+// * For options which will only be used within a single application or
+// organization, or for experimental options, use field numbers 50000
+// through 99999. It is up to you to ensure that you do not use the
+// same number for multiple options.
+// * For options which will be published and used publicly by multiple
+// independent entities, e-mail kenton@google.com to reserve extension
+// numbers. Simply tell me how many you need and I'll send you back a
+// set of numbers to use -- there's no need to explain how you intend to
+// use them. If this turns out to be popular, a web service will be set up
+// to automatically assign option numbers.
+
+
+message FileOptions {
+
+ // Sets the Java package where classes generated from this .proto will be
+ // placed. By default, the proto package is used, but this is often
+ // inappropriate because proto packages do not normally start with backwards
+ // domain names.
+ optional string java_package = 1;
+
+
+ // If set, all the classes from the .proto file are wrapped in a single
+ // outer class with the given name. This applies to both Proto1
+ // (equivalent to the old "--one_java_file" option) and Proto2 (where
+ // a .proto always translates to a single class, but you may want to
+ // explicitly choose the class name).
+ optional string java_outer_classname = 8;
+
+ // If set true, then the Java code generator will generate a separate .java
+ // file for each top-level message, enum, and service defined in the .proto
+ // file. Thus, these types will *not* be nested inside the outer class
+ // named by java_outer_classname. However, the outer class will still be
+ // generated to contain the file's getDescriptor() method as well as any
+ // top-level extensions defined in the file.
+ optional bool java_multiple_files = 10 [default=false];
+
+ // If set true, then the Java code generator will generate equals() and
+ // hashCode() methods for all messages defined in the .proto file. This is
+ // purely a speed optimization, as the AbstractMessage base class includes
+ // reflection-based implementations of these methods.
+ optional bool java_generate_equals_and_hash = 20 [default=false];
+
+ // Generated classes can be optimized for speed or code size.
+ enum OptimizeMode {
+ SPEED = 1; // Generate complete code for parsing, serialization,
+ // etc.
+ CODE_SIZE = 2; // Use ReflectionOps to implement these methods.
+ LITE_RUNTIME = 3; // Generate code using MessageLite and the lite runtime.
+ }
+ optional OptimizeMode optimize_for = 9 [default=SPEED];
+
+
+
+
+ // Should generic services be generated in each language? "Generic" services
+ // are not specific to any particular RPC system. They are generated by the
+ // main code generators in each language (without additional plugins).
+ // Generic services were the only kind of service generation supported by
+ // early versions of proto2.
+ //
+ // Generic services are now considered deprecated in favor of using plugins
+ // that generate code specific to your particular RPC system. Therefore,
+ // these default to false. Old code which depends on generic services should
+ // explicitly set them to true.
+ optional bool cc_generic_services = 16 [default=false];
+ optional bool java_generic_services = 17 [default=false];
+ optional bool py_generic_services = 18 [default=false];
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+message MessageOptions {
+ // Set true to use the old proto1 MessageSet wire format for extensions.
+ // This is provided for backwards-compatibility with the MessageSet wire
+ // format. You should not use this for any other reason: It's less
+ // efficient, has fewer features, and is more complicated.
+ //
+ // The message must be defined exactly as follows:
+ // message Foo {
+ // option message_set_wire_format = true;
+ // extensions 4 to max;
+ // }
+ // Note that the message cannot have any defined fields; MessageSets only
+ // have extensions.
+ //
+ // All extensions of your type must be singular messages; e.g. they cannot
+ // be int32s, enums, or repeated messages.
+ //
+ // Because this is an option, the above two restrictions are not enforced by
+ // the protocol compiler.
+ optional bool message_set_wire_format = 1 [default=false];
+
+ // Disables the generation of the standard "descriptor()" accessor, which can
+ // conflict with a field of the same name. This is meant to make migration
+ // from proto1 easier; new code should avoid fields named "descriptor".
+ optional bool no_standard_descriptor_accessor = 2 [default=false];
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+message FieldOptions {
+ // The ctype option instructs the C++ code generator to use a different
+ // representation of the field than it normally would. See the specific
+ // options below. This option is not yet implemented in the open source
+ // release -- sorry, we'll try to include it in a future version!
+ optional CType ctype = 1 [default = STRING];
+ enum CType {
+ // Default mode.
+ STRING = 0;
+
+ CORD = 1;
+
+ STRING_PIECE = 2;
+ }
+ // The packed option can be enabled for repeated primitive fields to enable
+ // a more efficient representation on the wire. Rather than repeatedly
+ // writing the tag and type for each element, the entire array is encoded as
+ // a single length-delimited blob.
+ optional bool packed = 2;
+
+
+ // Is this field deprecated?
+ // Depending on the target platform, this can emit Deprecated annotations
+ // for accessors, or it will be completely ignored; in the very least, this
+ // is a formalization for deprecating fields.
+ optional bool deprecated = 3 [default=false];
+
+ // EXPERIMENTAL. DO NOT USE.
+ // For "map" fields, the name of the field in the enclosed type that
+ // is the key for this map. For example, suppose we have:
+ // message Item {
+ // required string name = 1;
+ // required string value = 2;
+ // }
+ // message Config {
+ // repeated Item items = 1 [experimental_map_key="name"];
+ // }
+ // In this situation, the map key for Item will be set to "name".
+ // TODO: Fully-implement this, then remove the "experimental_" prefix.
+ optional string experimental_map_key = 9;
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+message EnumOptions {
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+message EnumValueOptions {
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+message ServiceOptions {
+
+ // Note: Field numbers 1 through 32 are reserved for Google's internal RPC
+ // framework. We apologize for hoarding these numbers to ourselves, but
+ // we were already using them long before we decided to release Protocol
+ // Buffers.
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+message MethodOptions {
+
+ // Note: Field numbers 1 through 32 are reserved for Google's internal RPC
+ // framework. We apologize for hoarding these numbers to ourselves, but
+ // we were already using them long before we decided to release Protocol
+ // Buffers.
+
+ // The parser stores options it doesn't recognize here. See above.
+ repeated UninterpretedOption uninterpreted_option = 999;
+
+ // Clients can define custom options in extensions of this message. See above.
+ extensions 1000 to max;
+}
+
+// A message representing a option the parser does not recognize. This only
+// appears in options protos created by the compiler::Parser class.
+// DescriptorPool resolves these when building Descriptor objects. Therefore,
+// options protos in descriptor objects (e.g. returned by Descriptor::options(),
+// or produced by Descriptor::CopyTo()) will never have UninterpretedOptions
+// in them.
+message UninterpretedOption {
+ // The name of the uninterpreted option. Each string represents a segment in
+ // a dot-separated name. is_extension is true iff a segment represents an
+ // extension (denoted with parentheses in options specs in .proto files).
+ // E.g.,{ ["foo", false], ["bar.baz", true], ["qux", false] } represents
+ // "foo.(bar.baz).qux".
+ message NamePart {
+ required string name_part = 1;
+ required bool is_extension = 2;
+ }
+ repeated NamePart name = 2;
+
+ // The value of the uninterpreted option, in whatever type the tokenizer
+ // identified it as during parsing. Exactly one of these should be set.
+ optional string identifier_value = 3;
+ optional uint64 positive_int_value = 4;
+ optional int64 negative_int_value = 5;
+ optional double double_value = 6;
+ optional bytes string_value = 7;
+ optional string aggregate_value = 8;
+}
+
+// ===================================================================
+// Optional source code info
+
+// Encapsulates information about the original source file from which a
+// FileDescriptorProto was generated.
+message SourceCodeInfo {
+ // A Location identifies a piece of source code in a .proto file which
+ // corresponds to a particular definition. This information is intended
+ // to be useful to IDEs, code indexers, documentation generators, and similar
+ // tools.
+ //
+ // For example, say we have a file like:
+ // message Foo {
+ // optional string foo = 1;
+ // }
+ // Let's look at just the field definition:
+ // optional string foo = 1;
+ // ^ ^^ ^^ ^ ^^^
+ // a bc de f ghi
+ // We have the following locations:
+ // span path represents
+ // [a,i) [ 4, 0, 2, 0 ] The whole field definition.
+ // [a,b) [ 4, 0, 2, 0, 4 ] The label (optional).
+ // [c,d) [ 4, 0, 2, 0, 5 ] The type (string).
+ // [e,f) [ 4, 0, 2, 0, 1 ] The name (foo).
+ // [g,h) [ 4, 0, 2, 0, 3 ] The number (1).
+ //
+ // Notes:
+ // - A location may refer to a repeated field itself (i.e. not to any
+ // particular index within it). This is used whenever a set of elements are
+ // logically enclosed in a single code segment. For example, an entire
+ // extend block (possibly containing multiple extension definitions) will
+ // have an outer location whose path refers to the "extensions" repeated
+ // field without an index.
+ // - Multiple locations may have the same path. This happens when a single
+ // logical declaration is spread out across multiple places. The most
+ // obvious example is the "extend" block again -- there may be multiple
+ // extend blocks in the same scope, each of which will have the same path.
+ // - A location's span is not always a subset of its parent's span. For
+ // example, the "extendee" of an extension declaration appears at the
+ // beginning of the "extend" block and is shared by all extensions within
+ // the block.
+ // - Just because a location's span is a subset of some other location's span
+ // does not mean that it is a descendent. For example, a "group" defines
+ // both a type and a field in a single declaration. Thus, the locations
+ // corresponding to the type and field and their components will overlap.
+ // - Code which tries to interpret locations should probably be designed to
+ // ignore those that it doesn't understand, as more types of locations could
+ // be recorded in the future.
+ repeated Location location = 1;
+ message Location {
+ // Identifies which part of the FileDescriptorProto was defined at this
+ // location.
+ //
+ // Each element is a field number or an index. They form a path from
+ // the root FileDescriptorProto to the place where the definition. For
+ // example, this path:
+ // [ 4, 3, 2, 7, 1 ]
+ // refers to:
+ // file.message_type(3) // 4, 3
+ // .field(7) // 2, 7
+ // .name() // 1
+ // This is because FileDescriptorProto.message_type has field number 4:
+ // repeated DescriptorProto message_type = 4;
+ // and DescriptorProto.field has field number 2:
+ // repeated FieldDescriptorProto field = 2;
+ // and FieldDescriptorProto.name has field number 1:
+ // optional string name = 1;
+ //
+ // Thus, the above path gives the location of a field name. If we removed
+ // the last element:
+ // [ 4, 3, 2, 7 ]
+ // this path refers to the whole field declaration (from the beginning
+ // of the label to the terminating semicolon).
+ repeated int32 path = 1 [packed=true];
+
+ // Always has exactly three or four elements: start line, start column,
+ // end line (optional, otherwise assumed same as start line), end column.
+ // These are packed into a single field for efficiency. Note that line
+ // and column numbers are zero-based -- typically you will want to add
+ // 1 to each before displaying to a user.
+ repeated int32 span = 2 [packed=true];
+
+ // TODO(kenton): Record comments appearing before and after the
+ // declaration.
+ }
+}
diff --git a/upb/descriptor_const.h b/upb/descriptor_const.h
new file mode 100644
index 0000000..228c95a
--- /dev/null
+++ b/upb/descriptor_const.h
@@ -0,0 +1,349 @@
+/* This file was generated by upbc (the upb compiler). Do not edit. */
+
+#ifndef SRC_DESCRIPTOR_CONST_C
+#define SRC_DESCRIPTOR_CONST_C
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Enums. */
+
+typedef enum google_protobuf_FieldOptions_CType {
+ GOOGLE_PROTOBUF_FIELDOPTIONS_STRING = 0,
+ GOOGLE_PROTOBUF_FIELDOPTIONS_STRING_PIECE = 2,
+ GOOGLE_PROTOBUF_FIELDOPTIONS_CORD = 1
+} google_protobuf_FieldOptions_CType;
+
+typedef enum google_protobuf_FieldDescriptorProto_Type {
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE = 1,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT = 2,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64 = 3,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64 = 4,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32 = 5,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64 = 6,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32 = 7,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL = 8,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING = 9,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP = 10,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE = 11,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES = 12,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32 = 13,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM = 14,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32 = 15,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64 = 16,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32 = 17,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64 = 18
+} google_protobuf_FieldDescriptorProto_Type;
+
+typedef enum google_protobuf_FieldDescriptorProto_Label {
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_OPTIONAL = 1,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED = 2,
+ GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED = 3
+} google_protobuf_FieldDescriptorProto_Label;
+
+typedef enum google_protobuf_FileOptions_OptimizeMode {
+ GOOGLE_PROTOBUF_FILEOPTIONS_SPEED = 1,
+ GOOGLE_PROTOBUF_FILEOPTIONS_CODE_SIZE = 2,
+ GOOGLE_PROTOBUF_FILEOPTIONS_LITE_RUNTIME = 3
+} google_protobuf_FileOptions_OptimizeMode;
+
+/* Constants for field names and numbers. */
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM 1
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNAME "file"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM 2
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNAME "field"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM 3
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNAME "nested_type"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 4
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNUM 5
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNAME "extension_range"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNUM 6
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNUM 7
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNUM 1
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNAME "path"
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDTYPE 5
+
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN__FIELDNUM 2
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN__FIELDNAME "span"
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN__FIELDTYPE 5
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME__FIELDNUM 2
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE__FIELDNUM 3
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE__FIELDNAME "identifier_value"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE__FIELDNUM 4
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE__FIELDNAME "positive_int_value"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE__FIELDTYPE 4
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDNUM 5
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDNAME "negative_int_value"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDTYPE 3
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNUM 6
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNAME "double_value"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDTYPE 1
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDNUM 7
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDNAME "string_value"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDTYPE 12
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNUM 8
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNAME "aggregate_value"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDNUM 2
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDNAME "package"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY__FIELDNUM 3
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY__FIELDNAME "dependency"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDNUM 4
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDNAME "message_type"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 5
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNUM 6
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNAME "service"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNUM 7
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNUM 8
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDNUM 9
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDNAME "source_code_info"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE__FIELDNUM 2
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE__FIELDNAME "input_type"
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE__FIELDNUM 3
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE__FIELDNAME "output_type"
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDNUM 4
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM 2
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNAME "value"
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM 2
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNAME "number"
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE 5
+
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNUM 2
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNAME "method"
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNUM 1
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNAME "name_part"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNUM 2
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNAME "is_extension"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNUM 1
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNAME "location"
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START__FIELDNUM 1
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START__FIELDNAME "start"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START__FIELDTYPE 5
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDNUM 2
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDNAME "end"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDTYPE 5
+
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNUM 1
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNAME "ctype"
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDTYPE 14
+
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED__FIELDNUM 2
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED__FIELDNAME "packed"
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED__FIELDNUM 3
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED__FIELDNAME "deprecated"
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY__FIELDNUM 9
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY__FIELDNAME "experimental_map_key"
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDNUM 1
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDNAME "java_package"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNUM 8
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNAME "java_outer_classname"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNUM 9
+#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNAME "optimize_for"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDTYPE 14
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNUM 10
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNAME "java_multiple_files"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNUM 16
+#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNAME "cc_generic_services"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES__FIELDNUM 17
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES__FIELDNAME "java_generic_services"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES__FIELDNUM 18
+#define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES__FIELDNAME "py_generic_services"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH__FIELDNUM 20
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH__FIELDNAME "java_generate_equals_and_hash"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNUM 1
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNAME "message_set_wire_format"
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNUM 2
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNAME "no_standard_descriptor_accessor"
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE__FIELDNUM 2
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE__FIELDNAME "extendee"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDNUM 3
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDNAME "number"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDTYPE 5
+
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDNUM 4
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDNAME "label"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDTYPE 14
+
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNUM 5
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNAME "type"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE 14
+
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNUM 6
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNAME "type_name"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNUM 7
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNAME "default_value"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNUM 8
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* SRC_DESCRIPTOR_CONST_C */
diff --git a/upb/handlers.c b/upb/handlers.c
new file mode 100644
index 0000000..05300c0
--- /dev/null
+++ b/upb/handlers.c
@@ -0,0 +1,311 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <stdlib.h>
+#include "upb/handlers.h"
+
+
+/* upb_mhandlers **************************************************************/
+
+static upb_mhandlers *upb_mhandlers_new() {
+ upb_mhandlers *m = malloc(sizeof(*m));
+ upb_inttable_init(&m->fieldtab, 8, sizeof(upb_fhandlers));
+ m->startmsg = NULL;
+ m->endmsg = NULL;
+ m->tablearray = NULL;
+ m->is_group = false;
+ return m;
+}
+
+static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
+ upb_fieldtype_t type,
+ bool repeated) {
+ uint32_t tag = n << 3 | upb_types[type].native_wire_type;
+ upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, tag);
+ if (f) abort();
+ upb_fhandlers new_f = {false, type, repeated,
+ repeated && upb_isprimitivetype(type), UPB_ATOMIC_INIT(0),
+ n, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL};
+ upb_inttable_insert(&m->fieldtab, tag, &new_f);
+ f = upb_inttable_lookup(&m->fieldtab, tag);
+ assert(f);
+ assert(f->type == type);
+ return f;
+}
+
+upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
+ upb_fieldtype_t type, bool repeated) {
+ assert(type != UPB_TYPE(MESSAGE));
+ assert(type != UPB_TYPE(GROUP));
+ return _upb_mhandlers_newfhandlers(m, n, type, repeated);
+}
+
+upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n,
+ upb_fieldtype_t type,
+ bool repeated,
+ upb_mhandlers *subm) {
+ assert(type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP));
+ assert(subm);
+ upb_fhandlers *f = _upb_mhandlers_newfhandlers(m, n, type, repeated);
+ f->submsg = subm;
+ if (type == UPB_TYPE(GROUP))
+ _upb_mhandlers_newfhandlers(subm, n, UPB_TYPE_ENDGROUP, false);
+ return f;
+}
+
+
+/* upb_handlers ***************************************************************/
+
+upb_handlers *upb_handlers_new() {
+ upb_handlers *h = malloc(sizeof(*h));
+ upb_atomic_init(&h->refcount, 1);
+ h->msgs_len = 0;
+ h->msgs_size = 4;
+ h->msgs = malloc(h->msgs_size * sizeof(*h->msgs));
+ h->should_jit = true;
+ return h;
+}
+
+void upb_handlers_ref(upb_handlers *h) { upb_atomic_ref(&h->refcount); }
+
+void upb_handlers_unref(upb_handlers *h) {
+ if (upb_atomic_unref(&h->refcount)) {
+ for (int i = 0; i < h->msgs_len; i++) {
+ upb_mhandlers *mh = h->msgs[i];
+ upb_inttable_free(&mh->fieldtab);
+ free(mh->tablearray);
+ free(mh);
+ }
+ free(h->msgs);
+ free(h);
+ }
+}
+
+upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h) {
+ if (h->msgs_len == h->msgs_size) {
+ h->msgs_size *= 2;
+ h->msgs = realloc(h->msgs, h->msgs_size * sizeof(*h->msgs));
+ }
+ upb_mhandlers *mh = upb_mhandlers_new();
+ h->msgs[h->msgs_len++] = mh;
+ return mh;
+}
+
+typedef struct {
+ upb_mhandlers *mh;
+} upb_mtab_ent;
+
+static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, upb_msgdef *m,
+ upb_onmsgreg *msgreg_cb,
+ upb_onfieldreg *fieldreg_cb,
+ void *closure, upb_strtable *mtab) {
+ upb_mhandlers *mh = upb_handlers_newmhandlers(h);
+ upb_mtab_ent e = {mh};
+ upb_strtable_insert(mtab, m->base.fqname, &e);
+ if (msgreg_cb) msgreg_cb(closure, mh, m);
+ upb_msg_iter i;
+ for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ upb_fhandlers *fh;
+ if (upb_issubmsg(f)) {
+ upb_mhandlers *sub_mh;
+ upb_mtab_ent *subm_ent;
+ // The table lookup is necessary to break the DFS for type cycles.
+ if ((subm_ent = upb_strtable_lookup(mtab, f->def->fqname)) != NULL) {
+ sub_mh = subm_ent->mh;
+ } else {
+ sub_mh = upb_regmsg_dfs(h, upb_downcast_msgdef(f->def), msgreg_cb,
+ fieldreg_cb, closure, mtab);
+ }
+ fh = upb_mhandlers_newfhandlers_subm(
+ mh, f->number, f->type, upb_isseq(f), sub_mh);
+ } else {
+ fh = upb_mhandlers_newfhandlers(mh, f->number, f->type, upb_isseq(f));
+ }
+ if (fieldreg_cb) fieldreg_cb(closure, fh, f);
+ }
+ return mh;
+}
+
+upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, upb_msgdef *m,
+ upb_onmsgreg *msgreg_cb,
+ upb_onfieldreg *fieldreg_cb,
+ void *closure) {
+ upb_strtable mtab;
+ upb_strtable_init(&mtab, 8, sizeof(upb_mtab_ent));
+ upb_mhandlers *ret =
+ upb_regmsg_dfs(h, m, msgreg_cb, fieldreg_cb, closure, &mtab);
+ upb_strtable_free(&mtab);
+ return ret;
+}
+
+
+/* upb_dispatcher *************************************************************/
+
+static upb_fhandlers toplevel_f = {
+ false, UPB_TYPE(GROUP), false, false, UPB_ATOMIC_INIT(0), 0,
+ NULL, NULL, // submsg
+#ifdef NDEBUG
+ {{0}},
+#else
+ {{0}, -1},
+#endif
+ NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL};
+
+void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
+ upb_skip_handler *skip, upb_exit_handler *exit,
+ void *srcclosure) {
+ d->handlers = h;
+ upb_handlers_ref(h);
+ for (int i = 0; i < h->msgs_len; i++) {
+ upb_mhandlers *m = h->msgs[i];
+ upb_inttable_compact(&m->fieldtab);
+ }
+ d->stack[0].f = &toplevel_f;
+ d->limit = &d->stack[UPB_MAX_NESTING];
+ d->skip = skip;
+ d->exit = exit;
+ d->srcclosure = srcclosure;
+ upb_status_init(&d->status);
+}
+
+upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure) {
+ d->msgent = d->handlers->msgs[0];
+ d->dispatch_table = &d->msgent->fieldtab;
+ d->top = d->stack;
+ d->top->closure = closure;
+ d->top->is_sequence = false;
+ return d->top;
+}
+
+void upb_dispatcher_uninit(upb_dispatcher *d) {
+ upb_handlers_unref(d->handlers);
+ upb_status_uninit(&d->status);
+}
+
+void upb_dispatch_startmsg(upb_dispatcher *d) {
+ upb_flow_t flow = UPB_CONTINUE;
+ if (d->msgent->startmsg) d->msgent->startmsg(d->top->closure);
+ if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
+}
+
+void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) {
+ assert(d->top == d->stack);
+ if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status);
+ // TODO: should we avoid this copy by passing client's status obj to cbs?
+ upb_status_copy(status, &d->status);
+}
+
+void indent(upb_dispatcher *d) {
+ for (int i = 0; i < (d->top - d->stack); i++) fprintf(stderr, " ");
+}
+
+void indentm1(upb_dispatcher *d) {
+ for (int i = 0; i < (d->top - d->stack - 1); i++) fprintf(stderr, " ");
+}
+
+upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
+ upb_fhandlers *f) {
+ //indent(d);
+ //fprintf(stderr, "START SEQ: %d\n", f->number);
+ if((d->top+1) >= d->limit) {
+ upb_status_setf(&d->status, UPB_ERROR, "Nesting too deep.");
+ _upb_dispatcher_unwind(d, UPB_BREAK);
+ return d->top; // Dummy.
+ }
+
+ upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
+ if (f->startseq) sflow = f->startseq(d->top->closure, f->fval);
+ if (sflow.flow != UPB_CONTINUE) {
+ _upb_dispatcher_unwind(d, sflow.flow);
+ return d->top; // Dummy.
+ }
+
+ ++d->top;
+ d->top->f = f;
+ d->top->is_sequence = true;
+ d->top->closure = sflow.closure;
+ return d->top;
+}
+
+upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
+ //indentm1(d);
+ //fprintf(stderr, "END SEQ\n");
+ assert(d->top > d->stack);
+ assert(d->top->is_sequence);
+ upb_fhandlers *f = d->top->f;
+ --d->top;
+ upb_flow_t flow = UPB_CONTINUE;
+ if (f->endseq) flow = f->endseq(d->top->closure, f->fval);
+ if (flow != UPB_CONTINUE) {
+ printf("YO, UNWINDING!\n");
+ _upb_dispatcher_unwind(d, flow);
+ return d->top; // Dummy.
+ }
+ d->msgent = d->top->f->submsg ? d->top->f->submsg : d->handlers->msgs[0];
+ d->dispatch_table = &d->msgent->fieldtab;
+ return d->top;
+}
+
+upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
+ upb_fhandlers *f) {
+ //indent(d);
+ //fprintf(stderr, "START SUBMSG: %d\n", f->number);
+ if((d->top+1) >= d->limit) {
+ upb_status_setf(&d->status, UPB_ERROR, "Nesting too deep.");
+ _upb_dispatcher_unwind(d, UPB_BREAK);
+ return d->top; // Dummy.
+ }
+
+ upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
+ if (f->startsubmsg) sflow = f->startsubmsg(d->top->closure, f->fval);
+ if (sflow.flow != UPB_CONTINUE) {
+ _upb_dispatcher_unwind(d, sflow.flow);
+ return d->top; // Dummy.
+ }
+
+ ++d->top;
+ d->top->f = f;
+ d->top->is_sequence = false;
+ d->top->closure = sflow.closure;
+ d->msgent = f->submsg;
+ d->dispatch_table = &d->msgent->fieldtab;
+ upb_dispatch_startmsg(d);
+ return d->top;
+}
+
+upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) {
+ //indentm1(d);
+ //fprintf(stderr, "END SUBMSG\n");
+ assert(d->top > d->stack);
+ assert(!d->top->is_sequence);
+ upb_fhandlers *f = d->top->f;
+ if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status);
+ d->msgent = d->top->f->msg;
+ d->dispatch_table = &d->msgent->fieldtab;
+ --d->top;
+ upb_flow_t flow = UPB_CONTINUE;
+ if (f->endsubmsg) f->endsubmsg(d->top->closure, f->fval);
+ if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
+ return d->top;
+}
+
+bool upb_dispatcher_stackempty(upb_dispatcher *d) {
+ return d->top == d->stack;
+}
+
+void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow) {
+ upb_dispatcher_frame *frame = d->top;
+ while (1) {
+ frame->f->submsg->endmsg(frame->closure, &d->status);
+ frame->f->endsubmsg(frame->closure, frame->f->fval);
+ --frame;
+ if (frame < d->stack) { d->exit(d->srcclosure); return; }
+ d->top = frame;
+ if (flow == UPB_SKIPSUBMSG) return;
+ }
+}
diff --git a/upb/handlers.h b/upb/handlers.h
new file mode 100644
index 0000000..e3d91cf
--- /dev/null
+++ b/upb/handlers.h
@@ -0,0 +1,373 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2010-2011 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * upb_handlers is a generic visitor-like interface for iterating over a stream
+ * of protobuf data. You can register function pointers that will be called
+ * for each message and/or field as the data is being parsed or iterated over,
+ * without having to know the source format that we are parsing from. This
+ * decouples the parsing logic from the processing logic.
+ */
+
+#ifndef UPB_HANDLERS_H
+#define UPB_HANDLERS_H
+
+#include <limits.h>
+#include "upb/upb.h"
+#include "upb/def.h"
+#include "upb/bytestream.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Handlers protocol definition ***********************************************/
+
+// A upb_handlers object represents a graph of handlers. Each message can have
+// a set of handlers as well as a set of fields which themselves have handlers.
+// Fields that represent submessages or groups are linked to other message
+// handlers, so the overall set of handlers can form a graph structure (which
+// may be cyclic).
+//
+// The upb_mhandlers (message handlers) object can have the following handlers:
+//
+// static upb_flow_t startmsg(void *closure) {
+// // Called when the message begins. "closure" was supplied by our caller.
+// return UPB_CONTINUE;
+// }
+//
+// static void endmsg(void *closure, upb_status *status) {
+// // Called when processing of this message ends, whether in success or
+// // failure. "status" indicates the final status of processing, and can
+// / also be modified in-place to update the final status.
+// //
+// // Since this callback is guaranteed to always be called eventually, it
+// // can be used to free any resources that were allocated during processing.
+// }
+//
+// TODO: unknown field handler.
+//
+// The upb_fhandlers (field handlers) object can have the following handlers:
+//
+// static upb_flow_t value(void *closure, upb_value fval, upb_value val) {
+// // Called when the field's value is encountered. "fval" contains
+// // whatever value was bound to this field at registration type
+// // (for upb_register_all(), this will be the field's upb_fielddef*).
+// return UPB_CONTINUE;
+// }
+//
+// static upb_sflow_t startsubmsg(void *closure, upb_value fval) {
+// // Called when a submessage begins. The second element of the return
+// // value is the closure for the submessage.
+// return UPB_CONTINUE_WITH(closure);
+// }
+//
+// static upb_flow_t endsubmsg(void *closure, upb_value fval) {
+// // Called when a submessage ends.
+// return UPB_CONTINUE;
+// }
+//
+// static upb_sflow_t startseqmsg(void *closure, upb_value fval) {
+// // Called when a sequence (repeated field) begins. The second element
+// // of the return value is the closure for the sequence.
+// return UPB_CONTINUE_WITH(closure);
+// }
+//
+// static upb_flow_t endeqvoid *closure, upb_value fval) {
+// // Called when a sequence ends.
+// return UPB_CONTINUE;
+// }
+//
+// All handlers except the endmsg handler return a value from this enum, to
+// control whether parsing will continue or not.
+typedef enum {
+ // Data source should continue calling callbacks.
+ UPB_CONTINUE = 0,
+
+ // Halt processing permanently (in a non-resumable way). The endmsg handlers
+ // for any currently open messages will be called which can supply a more
+ // specific status message. No further input data will be consumed.
+ UPB_BREAK,
+
+ // Skips to the end of the current submessage (or if we are at the top
+ // level, skips to the end of the entire message). In other words, it is
+ // like a UPB_BREAK that applies only to the current level.
+ //
+ // If you UPB_SKIPSUBMSG from a startmsg handler, the endmsg handler will
+ // be called to perform cleanup and return a status. Returning
+ // UPB_SKIPSUBMSG from a startsubmsg handler will *not* call the startmsg,
+ // endmsg, or endsubmsg handlers.
+ //
+ // If UPB_SKIPSUBMSG is called from the top-level message, no further input
+ // data will be consumed.
+ UPB_SKIPSUBMSG,
+
+ // TODO: Add UPB_SUSPEND, for resumable producers/consumers.
+} upb_flow_t;
+
+// The startsubmsg handler needs to also pass a closure to the submsg.
+typedef struct {
+ upb_flow_t flow;
+ void *closure;
+} upb_sflow_t;
+
+INLINE upb_sflow_t UPB_SFLOW(upb_flow_t flow, void *closure) {
+ upb_sflow_t ret = {flow, closure};
+ return ret;
+}
+#define UPB_CONTINUE_WITH(c) UPB_SFLOW(UPB_CONTINUE, c)
+#define UPB_SBREAK UPB_SFLOW(UPB_BREAK, NULL)
+
+// Typedefs for all of the handler functions defined above.
+typedef upb_flow_t (upb_startmsg_handler)(void *c);
+typedef void (upb_endmsg_handler)(void *c, upb_status *status);
+typedef upb_flow_t (upb_value_handler)(void *c, upb_value fval, upb_value val);
+typedef upb_sflow_t (upb_startfield_handler)(void *closure, upb_value fval);
+typedef upb_flow_t (upb_endfield_handler)(void *closure, upb_value fval);
+
+
+/* upb_fhandlers **************************************************************/
+
+// A upb_fhandlers object represents the set of handlers associated with one
+// specific message field.
+struct _upb_decoder;
+struct _upb_mhandlers;
+typedef struct _upb_fieldent {
+ bool junk;
+ upb_fieldtype_t type;
+ bool repeated;
+ bool is_repeated_primitive;
+ upb_atomic_t refcount;
+ uint32_t number;
+ struct _upb_mhandlers *msg;
+ struct _upb_mhandlers *submsg; // Set iff upb_issubmsgtype(type) == true.
+ upb_value fval;
+ upb_value_handler *value;
+ upb_startfield_handler *startsubmsg;
+ upb_endfield_handler *endsubmsg;
+ upb_startfield_handler *startseq;
+ upb_endfield_handler *endseq;
+ uint32_t jit_pclabel;
+ uint32_t jit_pclabel_notypecheck;
+ uint32_t jit_submsg_done_pclabel;
+ void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
+} upb_fhandlers;
+
+// fhandlers are created as part of a upb_handlers instance, but can be ref'd
+// and unref'd to prolong the life of the handlers.
+void upb_fhandlers_ref(upb_fhandlers *m);
+void upb_fhandlers_unref(upb_fhandlers *m);
+
+// upb_fhandlers accessors
+#define UPB_FHANDLERS_ACCESSORS(name, type) \
+ INLINE void upb_fhandlers_set ## name(upb_fhandlers *f, type v){f->name = v;} \
+ INLINE type upb_fhandlers_get ## name(upb_fhandlers *f) { return f->name; }
+UPB_FHANDLERS_ACCESSORS(fval, upb_value)
+UPB_FHANDLERS_ACCESSORS(value, upb_value_handler*)
+UPB_FHANDLERS_ACCESSORS(startsubmsg, upb_startfield_handler*)
+UPB_FHANDLERS_ACCESSORS(endsubmsg, upb_endfield_handler*)
+UPB_FHANDLERS_ACCESSORS(startseq, upb_startfield_handler*)
+UPB_FHANDLERS_ACCESSORS(endseq, upb_endfield_handler*)
+UPB_FHANDLERS_ACCESSORS(submsg, struct _upb_mhandlers*)
+
+
+/* upb_mhandlers **************************************************************/
+
+// A upb_mhandlers object represents the set of handlers associated with a
+// message in the graph of messages.
+
+typedef struct _upb_mhandlers {
+ upb_atomic_t refcount;
+ upb_startmsg_handler *startmsg;
+ upb_endmsg_handler *endmsg;
+ upb_inttable fieldtab; // Maps field number -> upb_fhandlers.
+ uint32_t jit_startmsg_pclabel;
+ uint32_t jit_endofbuf_pclabel;
+ uint32_t jit_endofmsg_pclabel;
+ uint32_t jit_unknownfield_pclabel;
+ bool is_group;
+ int32_t jit_parent_field_done_pclabel;
+ uint32_t max_field_number;
+ // Currently keyed on field number. Could also try keying it
+ // on encoded or decoded tag, or on encoded field number.
+ void **tablearray;
+} upb_mhandlers;
+
+// mhandlers are created as part of a upb_handlers instance, but can be ref'd
+// and unref'd to prolong the life of the handlers.
+void upb_mhandlers_ref(upb_mhandlers *m);
+void upb_mhandlers_unref(upb_mhandlers *m);
+
+// Creates a new field with the given name and number. There must not be an
+// existing field with either this name or number or abort() will be called.
+// TODO: this should take a name also.
+upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
+ upb_fieldtype_t type, bool repeated);
+// Like the previous but for MESSAGE or GROUP fields. For GROUP fields, the
+// given submessage must not have any fields with this field number.
+upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n,
+ upb_fieldtype_t type,
+ bool repeated,
+ upb_mhandlers *subm);
+
+// upb_mhandlers accessors.
+#define UPB_MHANDLERS_ACCESSORS(name, type) \
+ INLINE void upb_mhandlers_set ## name(upb_mhandlers *m, type v){m->name = v;} \
+ INLINE type upb_mhandlers_get ## name(upb_mhandlers *m) { return m->name; }
+UPB_MHANDLERS_ACCESSORS(startmsg, upb_startmsg_handler*);
+UPB_MHANDLERS_ACCESSORS(endmsg, upb_endmsg_handler*);
+
+
+/* upb_handlers ***************************************************************/
+
+struct _upb_handlers {
+ upb_atomic_t refcount;
+ upb_mhandlers **msgs; // Array of msgdefs, [0]=toplevel.
+ int msgs_len, msgs_size;
+ bool should_jit;
+};
+typedef struct _upb_handlers upb_handlers;
+
+upb_handlers *upb_handlers_new();
+void upb_handlers_ref(upb_handlers *h);
+void upb_handlers_unref(upb_handlers *h);
+
+// Appends a new message to the graph of handlers and returns it. This message
+// can be obtained later at index upb_handlers_msgcount()-1. All handlers will
+// be initialized to no-op handlers.
+upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h);
+upb_mhandlers *upb_handlers_getmhandlers(upb_handlers *h, int index);
+
+// Convenience function for registering handlers for all messages and
+// fields in a msgdef and all its children. For every registered message
+// "msgreg_cb" will be called with the newly-created mhandlers, and likewise
+// with "fieldreg_cb"
+//
+// See upb_handlers_reghandlerset() below for an example.
+typedef void upb_onmsgreg(void *closure, upb_mhandlers *mh, upb_msgdef *m);
+typedef void upb_onfieldreg(void *closure, upb_fhandlers *mh, upb_fielddef *m);
+upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, upb_msgdef *m,
+ upb_onmsgreg *msgreg_cb,
+ upb_onfieldreg *fieldreg_cb,
+ void *closure);
+
+// Convenience function for registering a set of handlers for all messages and
+// fields in a msgdef and its children, with the fval bound to the upb_fielddef.
+// Any of the handlers may be NULL, in which case no callback will be set and
+// the nop callback will be used.
+typedef struct {
+ upb_startmsg_handler *startmsg;
+ upb_endmsg_handler *endmsg;
+ upb_value_handler *value;
+ upb_startfield_handler *startsubmsg;
+ upb_endfield_handler *endsubmsg;
+ upb_startfield_handler *startseq;
+ upb_endfield_handler *endseq;
+} upb_handlerset;
+
+INLINE void upb_onmreg_hset(void *c, upb_mhandlers *mh, upb_msgdef *m) {
+ (void)m;
+ upb_handlerset *hs = (upb_handlerset*)c;
+ if (hs->startmsg) upb_mhandlers_setstartmsg(mh, hs->startmsg);
+ if (hs->endmsg) upb_mhandlers_setendmsg(mh, hs->endmsg);
+}
+INLINE void upb_onfreg_hset(void *c, upb_fhandlers *fh, upb_fielddef *f) {
+ upb_handlerset *hs = (upb_handlerset*)c;
+ if (hs->value) upb_fhandlers_setvalue(fh, hs->value);
+ if (hs->startsubmsg) upb_fhandlers_setstartsubmsg(fh, hs->startsubmsg);
+ if (hs->endsubmsg) upb_fhandlers_setendsubmsg(fh, hs->endsubmsg);
+ if (hs->startseq) upb_fhandlers_setstartseq(fh, hs->startseq);
+ if (hs->endseq) upb_fhandlers_setendseq(fh, hs->endseq);
+ upb_value val;
+ upb_value_setfielddef(&val, f);
+ upb_fhandlers_setfval(fh, val);
+}
+INLINE upb_mhandlers *upb_handlers_reghandlerset(upb_handlers *h, upb_msgdef *m,
+ upb_handlerset *hs) {
+ return upb_handlers_regmsgdef(h, m, &upb_onmreg_hset, &upb_onfreg_hset, hs);
+}
+
+
+/* upb_dispatcher *************************************************************/
+
+// upb_dispatcher can be used by sources of data to invoke the appropriate
+// handlers on a upb_handlers object. Besides maintaining the runtime stack of
+// closures and handlers, the dispatcher checks the return status of user
+// callbacks and properly handles statuses other than UPB_CONTINUE, invoking
+// "skip" or "exit" handlers on the underlying data source as appropriate.
+
+typedef struct {
+ upb_fhandlers *f;
+ void *closure;
+
+ // Members to use as the data source requires.
+ void *srcclosure;
+ uint64_t end_ofs;
+ uint16_t msgindex;
+ uint16_t fieldindex;
+
+ bool is_sequence; // frame represents seq or submsg? (f might be both).
+ bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX (strings aren't pushed)
+} upb_dispatcher_frame;
+
+// Called when some of the input needs to be skipped. All frames from
+// top to bottom, inclusive, should be skipped.
+typedef void upb_skip_handler(void *, upb_dispatcher_frame *top,
+ upb_dispatcher_frame *bottom);
+typedef void upb_exit_handler(void *);
+
+typedef struct {
+ upb_dispatcher_frame *top, *limit;
+
+ upb_handlers *handlers;
+
+ // Msg and dispatch table for the current level.
+ upb_mhandlers *msgent;
+ upb_inttable *dispatch_table;
+ upb_skip_handler *skip;
+ upb_exit_handler *exit;
+ void *srcclosure;
+
+ // Stack.
+ upb_status status;
+ upb_dispatcher_frame stack[UPB_MAX_NESTING];
+} upb_dispatcher;
+
+void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
+ upb_skip_handler *skip, upb_exit_handler *exit,
+ void *closure);
+upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure);
+void upb_dispatcher_uninit(upb_dispatcher *d);
+
+// Tests whether the runtime stack is in the base level message.
+bool upb_dispatcher_stackempty(upb_dispatcher *d);
+
+// Looks up a field by number for the current message.
+INLINE upb_fhandlers *upb_dispatcher_lookup(upb_dispatcher *d, uint32_t n) {
+ return (upb_fhandlers*)upb_inttable_fastlookup(
+ d->dispatch_table, n, sizeof(upb_fhandlers));
+}
+
+void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow);
+
+// Dispatch functions -- call the user handler and handle errors.
+INLINE void upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f,
+ upb_value val) {
+ upb_flow_t flow = UPB_CONTINUE;
+ if (f->value) flow = f->value(d->top->closure, f->fval, val);
+ if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
+}
+void upb_dispatch_startmsg(upb_dispatcher *d);
+void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status);
+upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, upb_fhandlers *f);
+upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d);
+upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_fhandlers *f);
+upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/upb/msg.c b/upb/msg.c
new file mode 100644
index 0000000..a2b2cf7
--- /dev/null
+++ b/upb/msg.c
@@ -0,0 +1,349 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2010 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Data structure for storing a message of protobuf data.
+ */
+
+#include "upb/upb.h"
+#include "upb/msg.h"
+
+void upb_msg_clear(void *msg, upb_msgdef *md) {
+ memset(msg, 0, md->hasbit_bytes);
+ // TODO: set primitive fields to defaults?
+}
+
+void *upb_stdarray_append(upb_stdarray *a, size_t type_size) {
+ assert(a->len <= a->size);
+ if (a->len == a->size) {
+ size_t old_size = a->size;
+ a->size = old_size == 0 ? 8 : (old_size * 2);
+ a->ptr = realloc(a->ptr, a->size * type_size);
+ memset(&a->ptr[old_size * type_size], 0, (a->size - old_size) * type_size);
+ }
+ return &a->ptr[a->len++ * type_size];
+}
+
+#if 0
+static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
+ upb_dispatcher *d);
+
+static upb_flow_t upb_msg_pushval(upb_value val, upb_fielddef *f,
+ upb_dispatcher *d, upb_fhandlers *hf) {
+ if (upb_issubmsg(f)) {
+ upb_msg *msg = upb_value_getmsg(val);
+ upb_dispatch_startsubmsg(d, hf);
+ upb_msg_dispatch(msg, upb_downcast_msgdef(f->def), d);
+ upb_dispatch_endsubmsg(d);
+ } else {
+ upb_dispatch_value(d, hf, val);
+ }
+ return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
+ upb_dispatcher *d) {
+ upb_msg_iter i;
+ for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ if (!upb_msg_has(msg, f)) continue;
+ upb_fhandlers *hf = upb_dispatcher_lookup(d, f->number);
+ if (!hf) continue;
+ upb_value val = upb_msg_get(msg, f);
+ if (upb_isarray(f)) {
+ upb_array *arr = upb_value_getarr(val);
+ for (uint32_t j = 0; j < upb_array_len(arr); ++j) {
+ upb_msg_pushval(upb_array_get(arr, f, j), f, d, hf);
+ }
+ } else {
+ upb_msg_pushval(val, f, d, hf);
+ }
+ }
+ return UPB_CONTINUE;
+}
+
+void upb_msg_runhandlers(upb_msg *msg, upb_msgdef *md, upb_handlers *h,
+ void *closure, upb_status *status) {
+ upb_dispatcher d;
+ upb_dispatcher_init(&d, h, NULL, NULL, NULL);
+ upb_dispatcher_reset(&d, closure);
+
+ upb_dispatch_startmsg(&d);
+ upb_msg_dispatch(msg, md, &d);
+ upb_dispatch_endmsg(&d, status);
+
+ upb_dispatcher_uninit(&d);
+}
+#endif
+
+/* Standard writers. **********************************************************/
+
+void upb_stdmsg_sethas(void *_m, upb_value fval) {
+ char *m = _m;
+ upb_fielddef *f = upb_value_getfielddef(fval);
+ if (f->hasbit >= 0) m[f->hasbit / 8] |= (1 << (f->hasbit % 8));
+}
+
+bool upb_stdmsg_has(void *_m, upb_value fval) {
+ char *m = _m;
+ upb_fielddef *f = upb_value_getfielddef(fval);
+ return f->hasbit < 0 || (m[f->hasbit / 8] & (1 << (f->hasbit % 8)));
+}
+
+#define UPB_ACCESSORS(type, ctype) \
+ upb_flow_t upb_stdmsg_set ## type (void *_m, upb_value fval, \
+ upb_value val) { \
+ upb_fielddef *f = upb_value_getfielddef(fval); \
+ uint8_t *m = _m; \
+ upb_stdmsg_sethas(_m, fval); \
+ *(ctype*)&m[f->offset] = upb_value_get ## type(val); \
+ return UPB_CONTINUE; \
+ } \
+ \
+ upb_flow_t upb_stdmsg_set ## type ## _r(void *a, upb_value _fval, \
+ upb_value val) { \
+ (void)_fval; \
+ ctype *p = upb_stdarray_append((upb_stdarray*)a, sizeof(ctype)); \
+ *p = upb_value_get ## type(val); \
+ return UPB_CONTINUE; \
+ } \
+ \
+ upb_value upb_stdmsg_get ## type(void *_m, upb_value fval) { \
+ uint8_t *m = _m; \
+ upb_fielddef *f = upb_value_getfielddef(fval); \
+ upb_value ret; \
+ upb_value_set ## type(&ret, *(ctype*)&m[f->offset]); \
+ return ret; \
+ } \
+ upb_value upb_stdmsg_seqget ## type(void *i) { \
+ upb_value val; \
+ upb_value_set ## type(&val, *(ctype*)i); \
+ return val; \
+ }
+
+UPB_ACCESSORS(double, double)
+UPB_ACCESSORS(float, float)
+UPB_ACCESSORS(int32, int32_t)
+UPB_ACCESSORS(int64, int64_t)
+UPB_ACCESSORS(uint32, uint32_t)
+UPB_ACCESSORS(uint64, uint64_t)
+UPB_ACCESSORS(bool, bool)
+UPB_ACCESSORS(ptr, void*)
+#undef UPB_ACCESSORS
+
+static void _upb_stdmsg_setstr(void *_dst, upb_value src) {
+ upb_stdarray **dstp = _dst;
+ upb_stdarray *dst = *dstp;
+ if (!dst) {
+ dst = malloc(sizeof(*dst));
+ dst->size = 0;
+ dst->ptr = NULL;
+ *dstp = dst;
+ }
+ dst->len = 0;
+ upb_strref *ref = upb_value_getstrref(src);
+ if (ref->len > dst->size) {
+ dst->size = ref->len;
+ dst->ptr = realloc(dst->ptr, dst->size);
+ }
+ dst->len = ref->len;
+ upb_bytesrc_read(ref->bytesrc, ref->stream_offset, ref->len, dst->ptr);
+}
+
+upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) {
+ char *m = _m;
+ upb_fielddef *f = upb_value_getfielddef(fval);
+ upb_stdmsg_sethas(_m, fval);
+ _upb_stdmsg_setstr(&m[f->offset], val);
+ return UPB_CONTINUE;
+}
+
+upb_flow_t upb_stdmsg_setstr_r(void *a, upb_value fval, upb_value val) {
+ (void)fval;
+ _upb_stdmsg_setstr(upb_stdarray_append((upb_stdarray*)a, sizeof(void*)), val);
+ return UPB_CONTINUE;
+}
+
+upb_value upb_stdmsg_getstr(void *m, upb_value fval) {
+ return upb_stdmsg_getptr(m, fval);
+}
+
+upb_value upb_stdmsg_seqgetstr(void *i) {
+ return upb_stdmsg_seqgetptr(i);
+}
+
+void *upb_stdmsg_new(upb_msgdef *md) {
+ void *m = malloc(md->size);
+ memset(m, 0, md->size);
+ upb_msg_clear(m, md);
+ return m;
+}
+
+void upb_stdseq_free(void *s, upb_fielddef *f) {
+ upb_stdarray *a = s;
+ if (upb_issubmsg(f) || upb_isstring(f)) {
+ void **p = (void**)a->ptr;
+ for (uint32_t i = 0; i < a->size; i++) {
+ if (upb_issubmsg(f)) {
+ upb_stdmsg_free(p[i], upb_downcast_msgdef(f->def));
+ } else {
+ upb_stdarray *str = p[i];
+ free(str->ptr);
+ free(str);
+ }
+ }
+ }
+ free(a->ptr);
+ free(a);
+}
+
+void upb_stdmsg_free(void *m, upb_msgdef *md) {
+ if (m == NULL) return;
+ upb_msg_iter i;
+ for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ if (!upb_isseq(f) && !upb_issubmsg(f) && !upb_isstring(f)) continue;
+ void *subp = upb_value_getptr(upb_stdmsg_getptr(m, f->fval));
+ if (subp == NULL) continue;
+ if (upb_isseq(f)) {
+ upb_stdseq_free(subp, f);
+ } else if (upb_issubmsg(f)) {
+ upb_stdmsg_free(subp, upb_downcast_msgdef(f->def));
+ } else {
+ upb_stdarray *str = subp;
+ free(str->ptr);
+ free(str);
+ }
+ }
+ free(m);
+}
+
+upb_sflow_t upb_stdmsg_startseq(void *_m, upb_value fval) {
+ char *m = _m;
+ upb_fielddef *f = upb_value_getfielddef(fval);
+ upb_stdarray **arr = (void*)&m[f->offset];
+ if (!upb_stdmsg_has(_m, fval)) {
+ if (!*arr) {
+ *arr = malloc(sizeof(**arr));
+ (*arr)->size = 0;
+ (*arr)->ptr = NULL;
+ }
+ (*arr)->len = 0;
+ upb_stdmsg_sethas(m, fval);
+ }
+ return UPB_CONTINUE_WITH(*arr);
+}
+
+void upb_stdmsg_recycle(void **m, upb_msgdef *md) {
+ if (*m)
+ upb_msg_clear(*m, md);
+ else
+ *m = upb_stdmsg_new(md);
+}
+
+upb_sflow_t upb_stdmsg_startsubmsg(void *_m, upb_value fval) {
+ char *m = _m;
+ upb_fielddef *f = upb_value_getfielddef(fval);
+ void **subm = (void*)&m[f->offset];
+ if (!upb_stdmsg_has(m, fval)) {
+ upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def));
+ upb_stdmsg_sethas(m, fval);
+ }
+ return UPB_CONTINUE_WITH(*subm);
+}
+
+upb_sflow_t upb_stdmsg_startsubmsg_r(void *a, upb_value fval) {
+ assert(a != NULL);
+ upb_fielddef *f = upb_value_getfielddef(fval);
+ void **subm = upb_stdarray_append((upb_stdarray*)a, sizeof(void*));
+ upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def));
+ return UPB_CONTINUE_WITH(*subm);
+}
+
+void *upb_stdmsg_seqbegin(void *_a) {
+ upb_stdarray *a = _a;
+ return a->len > 0 ? a->ptr : NULL;
+}
+
+#define NEXTFUNC(size) \
+ void *upb_stdmsg_ ## size ## byte_seqnext(void *_a, void *iter) { \
+ upb_stdarray *a = _a; \
+ void *next = (char*)iter + size; \
+ return (char*)next < (char*)a->ptr + (a->len * size) ? next : NULL; \
+ }
+
+NEXTFUNC(8)
+NEXTFUNC(4)
+NEXTFUNC(1)
+
+#define STDMSG(type) { static upb_accessor_vtbl vtbl = {NULL, &upb_stdmsg_startsubmsg, \
+ &upb_stdmsg_set ## type, &upb_stdmsg_has, &upb_stdmsg_get ## type, \
+ NULL, NULL, NULL}; return &vtbl; }
+#define STDMSG_R(type, size) { static upb_accessor_vtbl vtbl = { \
+ &upb_stdmsg_startseq, &upb_stdmsg_startsubmsg_r, &upb_stdmsg_set ## type ## _r, \
+ &upb_stdmsg_has, &upb_stdmsg_getptr, &upb_stdmsg_seqbegin, \
+ &upb_stdmsg_ ## size ## byte_seqnext, &upb_stdmsg_seqget ## type}; \
+ return &vtbl; }
+
+upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f) {
+ if (upb_isseq(f)) {
+ switch (f->type) {
+ case UPB_TYPE(DOUBLE): STDMSG_R(double, 8)
+ case UPB_TYPE(FLOAT): STDMSG_R(float, 4)
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(FIXED64): STDMSG_R(uint64, 8)
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(SFIXED64):
+ case UPB_TYPE(SINT64): STDMSG_R(int64, 8)
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(SINT32):
+ case UPB_TYPE(ENUM):
+ case UPB_TYPE(SFIXED32): STDMSG_R(int32, 4)
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(FIXED32): STDMSG_R(uint32, 4)
+ case UPB_TYPE(BOOL): STDMSG_R(bool, 1)
+ case UPB_TYPE(STRING):
+ case UPB_TYPE(BYTES):
+ case UPB_TYPE(GROUP):
+ case UPB_TYPE(MESSAGE): STDMSG_R(str, 8) // TODO: 32-bit
+ }
+ } else {
+ switch (f->type) {
+ case UPB_TYPE(DOUBLE): STDMSG(double)
+ case UPB_TYPE(FLOAT): STDMSG(float)
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(FIXED64): STDMSG(uint64)
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(SFIXED64):
+ case UPB_TYPE(SINT64): STDMSG(int64)
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(SINT32):
+ case UPB_TYPE(ENUM):
+ case UPB_TYPE(SFIXED32): STDMSG(int32)
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(FIXED32): STDMSG(uint32)
+ case UPB_TYPE(BOOL): STDMSG(bool)
+ case UPB_TYPE(STRING):
+ case UPB_TYPE(BYTES):
+ case UPB_TYPE(GROUP):
+ case UPB_TYPE(MESSAGE): STDMSG(str)
+ }
+ }
+ return NULL;
+}
+
+static void upb_accessors_onfreg(void *c, upb_fhandlers *fh, upb_fielddef *f) {
+ (void)c;
+ if (f->accessor) {
+ upb_fhandlers_setstartseq(fh, f->accessor->appendseq);
+ upb_fhandlers_setvalue(fh, f->accessor->set);
+ upb_fhandlers_setstartsubmsg(fh, f->accessor->appendsubmsg);
+ upb_fhandlers_setfval(fh, f->fval);
+ }
+}
+
+upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, upb_msgdef *m) {
+ return upb_handlers_regmsgdef(h, m, NULL, &upb_accessors_onfreg, NULL);
+}
diff --git a/upb/msg.h b/upb/msg.h
new file mode 100644
index 0000000..625d805
--- /dev/null
+++ b/upb/msg.h
@@ -0,0 +1,270 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2010-2011 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Routines for reading and writing message data to an in-memory structure,
+ * similar to a C struct.
+ *
+ * upb does not define one single message object that everyone must use.
+ * Rather it defines an abstract interface for reading and writing members
+ * of a message object, and all of the parsers and serializers use this
+ * abstract interface. This allows upb's parsers and serializers to be used
+ * regardless of what memory management scheme or synchronization model the
+ * application is using.
+ *
+ * A standard set of accessors is provided for doing simple reads and writes at
+ * a known offset into the message. These accessors should be used when
+ * possible, because they are specially optimized -- for example, the JIT can
+ * recognize them and emit specialized code instead of having to call the
+ * function at all. The application can substitute its own accessors when the
+ * standard accessors are not suitable.
+ */
+
+#ifndef UPB_MSG_H
+#define UPB_MSG_H
+
+#include <stdlib.h>
+#include "upb/def.h"
+#include "upb/handlers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* upb_accessor ***************************************************************/
+
+// A upb_accessor is a table of function pointers for doing reads and writes
+// for one specific upb_fielddef. Each field has a separate accessor, which
+// lives in the fielddef.
+
+typedef bool upb_has_reader(void *m, upb_value fval);
+typedef upb_value upb_value_reader(void *m, upb_value fval);
+
+typedef void *upb_seqbegin_handler(void *s);
+typedef void *upb_seqnext_handler(void *s, void *iter);
+typedef upb_value upb_seqget_handler(void *iter);
+INLINE bool upb_seq_done(void *iter) { return iter == NULL; }
+
+typedef struct _upb_accessor_vtbl {
+ // Writers. These take an fval as a parameter because the callbacks are used
+ // as upb_handlers, but the fval is always the fielddef for that field.
+ upb_startfield_handler *appendseq; // Repeated fields only.
+ upb_startfield_handler *appendsubmsg; // Submsg fields (repeated or no).
+ upb_value_handler *set; // Scalar fields (repeated or no).
+
+ // Readers.
+ upb_has_reader *has;
+ upb_value_reader *get;
+ upb_seqbegin_handler *seqbegin;
+ upb_seqnext_handler *seqnext;
+ upb_seqget_handler *seqget;
+} upb_accessor_vtbl;
+
+// Registers handlers for writing into a message of the given type.
+upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, upb_msgdef *m);
+
+// Returns an stdmsg accessor for the given fielddef.
+upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f);
+
+
+/* upb_msg/upb_seq ************************************************************/
+
+// upb_msg and upb_seq allow for generic access to a message through its
+// accessor vtable. Note that these do *not* allow you to create, destroy, or
+// take references on the objects -- these operations are specifically outside
+// the scope of what the accessors define.
+
+// Clears all hasbits.
+// TODO: Add a separate function for setting primitive values back to their
+// defaults (but not strings, submessages, or arrays).
+void upb_msg_clear(void *msg, upb_msgdef *md);
+
+// Could add a method that recursively clears submessages, strings, and
+// arrays if desired. This could be a win if you wanted to merge without
+// needing hasbits, because during parsing you would never clear submessages
+// or arrays. Also this could be desired to provide proto2 operations on
+// generated messages.
+
+INLINE bool upb_msg_has(void *m, upb_fielddef *f) {
+ return f->accessor && f->accessor->has(m, f->fval);
+}
+
+// May only be called for fields that are known to be set.
+INLINE upb_value upb_msg_get(void *m, upb_fielddef *f) {
+ assert(upb_msg_has(m, f));
+ return f->accessor->get(m, f->fval);
+}
+
+INLINE void *upb_seq_begin(void *s, upb_fielddef *f) {
+ assert(f->accessor);
+ return f->accessor->seqbegin(s);
+}
+INLINE void *upb_seq_next(void *s, void *iter, upb_fielddef *f) {
+ assert(f->accessor);
+ assert(!upb_seq_done(iter));
+ return f->accessor->seqnext(s, iter);
+}
+INLINE upb_value upb_seq_get(void *iter, upb_fielddef *f) {
+ assert(f->accessor);
+ assert(!upb_seq_done(iter));
+ return f->accessor->seqget(iter);
+}
+
+
+/* upb_msgvisitor *************************************************************/
+
+// A upb_msgvisitor reads data from an in-memory structure using its accessors,
+// pushing the results to a given set of upb_handlers.
+// TODO: not yet implemented.
+
+typedef struct {
+ upb_fhandlers *fh;
+ upb_fielddef *f;
+ uint16_t msgindex; // Only when upb_issubmsg(f).
+} upb_msgvisitor_field;
+
+typedef struct {
+ upb_msgvisitor_field *fields;
+ int fields_len;
+} upb_msgvisitor_msg;
+
+typedef struct {
+ uint16_t msgindex;
+ uint16_t fieldindex;
+ uint32_t arrayindex; // UINT32_MAX if not an array frame.
+} upb_msgvisitor_frame;
+
+typedef struct {
+ upb_msgvisitor_msg *messages;
+ int messages_len;
+ upb_dispatcher dispatcher;
+} upb_msgvisitor;
+
+// Initializes a msgvisitor that will push data from messages of the given
+// msgdef to the given set of handlers.
+void upb_msgvisitor_init(upb_msgvisitor *v, upb_msgdef *md, upb_handlers *h);
+void upb_msgvisitor_uninit(upb_msgvisitor *v);
+
+void upb_msgvisitor_reset(upb_msgvisitor *v, void *m);
+void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status);
+
+
+/* Standard writers. **********************************************************/
+
+// Allocates a new stdmsg.
+void *upb_stdmsg_new(upb_msgdef *md);
+
+// Recursively frees any strings or submessages that the message refers to.
+void upb_stdmsg_free(void *m, upb_msgdef *md);
+
+// "hasbit" must be <= UPB_MAX_FIELDS. If it is <0, this field has no hasbit.
+upb_value upb_stdmsg_packfval(int16_t hasbit, uint16_t value_offset);
+upb_value upb_stdmsg_packfval_subm(int16_t hasbit, uint16_t value_offset,
+ uint16_t subm_size, uint8_t subm_setbytes);
+
+// Value writers for every in-memory type: write the data to a known offset
+// from the closure "c" and set the hasbit (if any).
+// TODO: can we get away with having only one for int64, uint64, double, etc?
+// The main thing in the way atm is that the upb_value is strongly typed.
+// in debug mode.
+upb_flow_t upb_stdmsg_setint64(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setint32(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setuint64(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setuint32(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setdouble(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setfloat(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val);
+
+// Value writers for repeated fields: the closure points to a standard array
+// struct, appends the value to the end of the array, resizing with realloc()
+// if necessary.
+typedef struct {
+ char *ptr;
+ uint32_t len; // Number of elements present.
+ uint32_t size; // Number of elements allocated.
+} upb_stdarray;
+
+upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setint32_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setuint64_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setuint32_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setdouble_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setfloat_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setbool_r(void *c, upb_value fval, upb_value val);
+
+// Writers for C strings (NULL-terminated): we can find a char* at a known
+// offset from the closure "c". Calls realloc() on the pointer to allocate
+// the memory (TODO: investigate whether checking malloc_usable_size() would
+// be cheaper than realloc()). Also sets the hasbit, if any.
+//
+// Since the string is NULL terminated and does not store an explicit length,
+// these are not suitable for binary data that can contain NULLs.
+upb_flow_t upb_stdmsg_setcstr(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setcstr_r(void *c, upb_value fval, upb_value val);
+
+// Writers for length-delimited strings: we explicitly store the length, so
+// the data can contain NULLs. Stores the data using upb_stdarray
+// which is located at a known offset from the closure "c" (note that it
+// is included inline rather than pointed to). Also sets the hasbit, if any.
+upb_flow_t upb_stdmsg_setstr(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setstr_r(void *c, upb_value fval, upb_value val);
+
+// Writers for startseq and startmsg which allocate (or reuse, if possible)
+// a sub data structure (upb_stdarray or a submessage, respectively),
+// setting the hasbit. If the hasbit is already set, the existing data
+// structure is used verbatim. If the hasbit is not already set, the pointer
+// is checked for NULL. If it is NULL, a new substructure is allocated,
+// cleared, and used. If it is not NULL, the existing substructure is
+// cleared and reused.
+//
+// If there is no hasbit, we always behave as if the hasbit was not set,
+// so any existing data for this array or submessage is cleared. In most
+// cases this will be fine since each array or non-repeated submessage should
+// occur at most once in the stream. But if the client is using "concatenation
+// as merging", it will want to make sure hasbits are allocated so merges can
+// happen appropriately.
+//
+// If there was a demand for the behavior that absence of a hasbit acts as if
+// the bit was always set, we could provide that also. But Clear() would need
+// to act recursively, which is less efficient since it requires an extra pass
+// over the tree.
+upb_sflow_t upb_stdmsg_startseq(void *c, upb_value fval);
+upb_sflow_t upb_stdmsg_startsubmsg(void *c, upb_value fval);
+upb_sflow_t upb_stdmsg_startsubmsg_r(void *c, upb_value fval);
+
+
+/* Standard readers. **********************************************************/
+
+bool upb_stdmsg_has(void *c, upb_value fval);
+void *upb_stdmsg_seqbegin(void *c);
+
+upb_value upb_stdmsg_getint64(void *c, upb_value fval);
+upb_value upb_stdmsg_getint32(void *c, upb_value fval);
+upb_value upb_stdmsg_getuint64(void *c, upb_value fval);
+upb_value upb_stdmsg_getuint32(void *c, upb_value fval);
+upb_value upb_stdmsg_getdouble(void *c, upb_value fval);
+upb_value upb_stdmsg_getfloat(void *c, upb_value fval);
+upb_value upb_stdmsg_getbool(void *c, upb_value fval);
+upb_value upb_stdmsg_getptr(void *c, upb_value fval);
+
+void *upb_stdmsg_8byte_seqnext(void *c, void *iter);
+void *upb_stdmsg_4byte_seqnext(void *c, void *iter);
+void *upb_stdmsg_1byte_seqnext(void *c, void *iter);
+
+upb_value upb_stdmsg_seqgetint64(void *c);
+upb_value upb_stdmsg_seqgetint32(void *c);
+upb_value upb_stdmsg_seqgetuint64(void *c);
+upb_value upb_stdmsg_seqgetuint32(void *c);
+upb_value upb_stdmsg_seqgetdouble(void *c);
+upb_value upb_stdmsg_seqgetfloat(void *c);
+upb_value upb_stdmsg_seqgetbool(void *c);
+upb_value upb_stdmsg_seqgetptr(void *c);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
new file mode 100644
index 0000000..218c780
--- /dev/null
+++ b/upb/pb/decoder.c
@@ -0,0 +1,469 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2008-2011 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include "upb/bytestream.h"
+#include "upb/msg.h"
+#include "upb/pb/decoder.h"
+#include "upb/pb/varint.h"
+
+// Used for frames that have no specific end offset: groups, repeated primitive
+// fields inside groups, and the top-level message.
+#define UPB_NONDELIMITED UINT32_MAX
+
+#ifdef UPB_USE_JIT_X64
+#define Dst_DECL upb_decoder *d
+#define Dst_REF (d->dynasm)
+#define Dst (d)
+#include "dynasm/dasm_proto.h"
+#include "upb/pb/decoder_x86.h"
+#endif
+
+// It's unfortunate that we have to micro-manage the compiler this way,
+// especially since this tuning is necessarily specific to one hardware
+// configuration. But emperically on a Core i7, performance increases 30-50%
+// with these annotations. Every instance where these appear, gcc 4.2.1 made
+// the wrong decision and degraded performance in benchmarks.
+#define FORCEINLINE static __attribute__((always_inline))
+#define NOINLINE static __attribute__((noinline))
+
+static void upb_decoder_exit(upb_decoder *d) { siglongjmp(d->exitjmp, 1); }
+static void upb_decoder_exit2(void *_d) {
+ upb_decoder *d = _d;
+ upb_decoder_exit(d);
+}
+static void upb_decoder_abort(upb_decoder *d, const char *msg) {
+ upb_status_setf(d->status, UPB_ERROR, msg);
+ upb_decoder_exit(d);
+}
+
+/* Decoding/Buffering of wire types *******************************************/
+
+static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
+static void upb_decoder_advance(upb_decoder *d, size_t len) {
+ assert((size_t)(d->end - d->ptr) >= len);
+ d->ptr += len;
+}
+
+size_t upb_decoder_offset(upb_decoder *d) {
+ size_t offset = d->bufstart_ofs;
+ if (d->ptr) offset += (d->ptr - d->buf);
+ return offset;
+}
+
+static void upb_decoder_setmsgend(upb_decoder *d) {
+ upb_dispatcher_frame *f = d->dispatcher.top;
+ size_t delimlen = f->end_ofs - d->bufstart_ofs;
+ size_t buflen = d->end - d->buf;
+ if (f->end_ofs != UINT64_MAX && delimlen <= buflen) {
+ d->delim_end = (uintptr_t)(d->buf + delimlen);
+ } else {
+ // Buffers must not run up against the end of memory.
+ assert((uintptr_t)d->end < UINTPTR_MAX);
+ d->delim_end = UINTPTR_MAX;
+ }
+}
+
+// Pulls the next buffer from the bytesrc. Should be called only when the
+// current buffer is completely empty.
+static bool upb_trypullbuf(upb_decoder *d) {
+ assert(upb_decoder_bufleft(d) == 0);
+ if (d->bufend_ofs == d->refend_ofs) {
+ d->refend_ofs += upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status);
+ if (!upb_ok(d->status)) {
+ d->ptr = NULL;
+ d->end = NULL;
+ if (upb_iseof(d->status)) return false;
+ upb_decoder_exit(d);
+ }
+ }
+ d->bufstart_ofs = d->bufend_ofs;
+ size_t len;
+ d->buf = upb_bytesrc_getptr(d->bytesrc, d->bufstart_ofs, &len);
+ assert(len > 0);
+ d->bufend_ofs = d->bufstart_ofs + len;
+ d->ptr = d->buf;
+ d->end = d->buf + len;
+#ifdef UPB_USE_JIT_X64
+ d->jit_end = d->end - 20;
+#endif
+ upb_decoder_setmsgend(d);
+ return true;
+}
+
+static void upb_pullbuf(upb_decoder *d) {
+ if (!upb_trypullbuf(d)) upb_decoder_abort(d, "Unexpected EOF");
+}
+
+void upb_decoder_commit(upb_decoder *d) {
+ d->completed_ptr = d->ptr;
+ if (d->refstart_ofs < d->bufstart_ofs) {
+ // Drop our ref on the previous buf's region.
+ upb_bytesrc_refregion(d->bytesrc, d->bufstart_ofs, d->refend_ofs);
+ upb_bytesrc_unrefregion(d->bytesrc, d->refstart_ofs, d->refend_ofs);
+ d->refstart_ofs = d->bufstart_ofs;
+ }
+}
+
+NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) {
+ uint8_t byte = 0x80;
+ uint64_t u64 = 0;
+ int bitpos;
+ const char *ptr = d->ptr;
+ for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
+ if (upb_decoder_bufleft(d) == 0) {
+ upb_pullbuf(d);
+ ptr = d->ptr;
+ }
+ u64 |= ((uint64_t)(byte = *ptr++) & 0x7F) << bitpos;
+ }
+ if(bitpos == 70 && (byte & 0x80)) upb_decoder_abort(d, "Unterminated varint");
+ return u64;
+}
+
+// For tags and delimited lengths, which must be <=32bit and are usually small.
+FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) {
+ const char *p = d->ptr;
+ uint32_t ret;
+ uint64_t u64;
+ // Nearly all will be either 1 byte (1-16) or 2 bytes (17-2048).
+ if (upb_decoder_bufleft(d) < 2) goto slow; // unlikely.
+ ret = *p & 0x7f;
+ if ((*(p++) & 0x80) == 0) goto done; // predictable if fields are in order
+ ret |= (*p & 0x7f) << 7;
+ if ((*(p++) & 0x80) == 0) goto done; // likely
+slow:
+ u64 = upb_decode_varint_slow(d);
+ if (u64 > 0xffffffff) upb_decoder_abort(d, "Unterminated 32-bit varint");
+ ret = (uint32_t)u64;
+ p = d->ptr; // Turn the next line into a nop.
+done:
+ upb_decoder_advance(d, p - d->ptr);
+ return ret;
+}
+
+FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) {
+ if (upb_decoder_bufleft(d) == 0) {
+ // Check for our two normal end-of-message conditions.
+ if (d->bufend_ofs == d->end_ofs) return false;
+ if (!upb_trypullbuf(d)) return false;
+ }
+ *val = upb_decode_varint32(d);
+ return true;
+}
+
+FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) {
+ if (upb_decoder_bufleft(d) >= 10) {
+ // Fast case.
+ upb_decoderet r = upb_vdecode_fast(d->ptr);
+ if (r.p == NULL) upb_decoder_abort(d, "Unterminated varint");
+ upb_decoder_advance(d, r.p - d->ptr);
+ return r.val;
+ } else if (upb_decoder_bufleft(d) > 0) {
+ // Intermediate case -- worth it?
+ char tmpbuf[10];
+ memset(tmpbuf, 0x80, 10);
+ memcpy(tmpbuf, d->ptr, upb_decoder_bufleft(d));
+ upb_decoderet r = upb_vdecode_fast(tmpbuf);
+ if (r.p != NULL) {
+ upb_decoder_advance(d, r.p - tmpbuf);
+ return r.val;
+ }
+ }
+ // Slow case -- varint spans buffer seam.
+ return upb_decode_varint_slow(d);
+}
+
+FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
+ if (upb_decoder_bufleft(d) >= bytes) {
+ // Fast case.
+ memcpy(buf, d->ptr, bytes);
+ upb_decoder_advance(d, bytes);
+ } else {
+ // Slow case.
+ size_t read = 0;
+ while (read < bytes) {
+ size_t avail = upb_decoder_bufleft(d);
+ memcpy(buf + read, d->ptr, avail);
+ upb_decoder_advance(d, avail);
+ read += avail;
+ }
+ }
+}
+
+FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) {
+ uint32_t u32;
+ upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t));
+ return u32; // TODO: proper byte swapping
+}
+FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
+ uint64_t u64;
+ upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t));
+ return u64; // TODO: proper byte swapping
+}
+
+INLINE upb_strref *upb_decode_string(upb_decoder *d) {
+ uint32_t strlen = upb_decode_varint32(d);
+ d->strref.stream_offset = upb_decoder_offset(d);
+ d->strref.len = strlen;
+ if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d);
+ if (upb_decoder_bufleft(d) >= strlen) {
+ // Fast case.
+ d->strref.ptr = d->ptr;
+ upb_decoder_advance(d, strlen);
+ } else {
+ // Slow case.
+ while (1) {
+ size_t consume = UPB_MIN(upb_decoder_bufleft(d), strlen);
+ upb_decoder_advance(d, consume);
+ strlen -= consume;
+ if (strlen == 0) break;
+ upb_pullbuf(d);
+ }
+ }
+ return &d->strref;
+}
+
+INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) {
+ upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end;
+ upb_decoder_setmsgend(d);
+}
+
+
+/* Decoding of .proto types ***************************************************/
+
+// Technically, we are losing data if we see a 32-bit varint that is not
+// properly sign-extended. We could detect this and error about the data loss,
+// but proto2 does not do this, so we pass.
+
+#define T(type, wt, valtype, convfunc) \
+ INLINE void upb_decode_ ## type(upb_decoder *d, upb_fhandlers *f) { \
+ upb_value val; \
+ upb_value_set ## valtype(&val, (convfunc)(upb_decode_ ## wt(d))); \
+ upb_dispatch_value(&d->dispatcher, f, val); \
+ } \
+
+static double upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
+static float upb_asfloat(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
+static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
+static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
+
+T(INT32, varint, int32, int32_t)
+T(INT64, varint, int64, int64_t)
+T(UINT32, varint, uint32, uint32_t)
+T(UINT64, varint, uint64, uint64_t)
+T(FIXED32, fixed32, uint32, uint32_t)
+T(FIXED64, fixed64, uint64, uint64_t)
+T(SFIXED32, fixed32, int32, int32_t)
+T(SFIXED64, fixed64, int64, int64_t)
+T(BOOL, varint, bool, bool)
+T(ENUM, varint, int32, int32_t)
+T(DOUBLE, fixed64, double, upb_asdouble)
+T(FLOAT, fixed32, float, upb_asfloat)
+T(SINT32, varint, int32, upb_zzdec_32)
+T(SINT64, varint, int64, upb_zzdec_64)
+T(STRING, string, strref, upb_strref*)
+
+static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) {
+ upb_push(d, f, UPB_NONDELIMITED);
+}
+static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
+ (void)f;
+ upb_dispatch_endsubmsg(&d->dispatcher);
+ upb_decoder_setmsgend(d);
+}
+static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
+ upb_push(d, f, upb_decode_varint32(d) + (d->ptr - d->buf));
+}
+
+
+/* The main decoding loop *****************************************************/
+
+static void upb_decoder_checkdelim(upb_decoder *d) {
+ while ((uintptr_t)d->ptr >= d->delim_end) {
+ if ((uintptr_t)d->ptr > d->delim_end)
+ upb_decoder_abort(d, "Bad submessage end");
+
+ if (d->dispatcher.top->is_sequence) {
+ upb_dispatch_endseq(&d->dispatcher);
+ } else {
+ upb_dispatch_endsubmsg(&d->dispatcher);
+ }
+ upb_decoder_setmsgend(d);
+ }
+}
+
+static void upb_decoder_enterjit(upb_decoder *d) {
+ (void)d;
+#ifdef UPB_USE_JIT_X64
+ if (d->jit_code && d->dispatcher.top == d->dispatcher.stack && d->ptr < d->jit_end) {
+ // Decodes as many fields as possible, updating d->ptr appropriately,
+ // before falling through to the slow(er) path.
+ void (*upb_jit_decode)(upb_decoder *d) = (void*)d->jit_code;
+ upb_jit_decode(d);
+ }
+#endif
+}
+
+INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
+ while (1) {
+ uint32_t tag;
+ if (!upb_trydecode_varint32(d, &tag)) return NULL;
+ upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag);
+
+ // There are no explicit "startseq" or "endseq" markers in protobuf
+ // streams, so we have to infer them by noticing when a repeated field
+ // starts or ends.
+ if (d->dispatcher.top->is_sequence && d->dispatcher.top->f != f) {
+ upb_dispatch_endseq(&d->dispatcher);
+ upb_decoder_setmsgend(d);
+ }
+ if (f && f->repeated && d->dispatcher.top->f != f) {
+ // TODO: support packed.
+ assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) ||
+ (tag & 0x7) != UPB_WIRE_TYPE_DELIMITED);
+ uint32_t end = d->dispatcher.top->end_ofs;
+ upb_dispatch_startseq(&d->dispatcher, f)->end_ofs = end;
+ upb_decoder_setmsgend(d);
+ }
+ if (f) return f;
+
+ // Unknown field.
+ switch (tag & 0x7) {
+ case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break;
+ case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break;
+ case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break;
+ case UPB_WIRE_TYPE_DELIMITED:
+ upb_decoder_advance(d, upb_decode_varint32(d)); break;
+ default:
+ upb_decoder_abort(d, "Invavlid wire type");
+ }
+ // TODO: deliver to unknown field callback.
+ upb_decoder_commit(d);
+ upb_decoder_checkdelim(d);
+ }
+}
+
+void upb_decoder_onexit(upb_decoder *d) {
+ if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher);
+ if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) {
+ // Normal end-of-file.
+ upb_status_clear(d->status);
+ upb_dispatch_endmsg(&d->dispatcher, d->status);
+ } else {
+ if (d->status->code == UPB_EOF)
+ upb_status_setf(d->status, UPB_ERROR, "Input ended mid-submessage.");
+ }
+}
+
+void upb_decoder_decode(upb_decoder *d, upb_status *status) {
+ if (sigsetjmp(d->exitjmp, 0)) {
+ upb_decoder_onexit(d);
+ return;
+ }
+ d->status = status;
+ upb_dispatch_startmsg(&d->dispatcher);
+ while(1) { // Main loop: executed once per tag/field pair.
+ upb_decoder_checkdelim(d);
+ upb_decoder_enterjit(d);
+ // if (!d->dispatcher.top->is_packed)
+ upb_fhandlers *f = upb_decode_tag(d);
+ if (!f) upb_decoder_exit2(d);
+ f->decode(d, f);
+ upb_decoder_commit(d);
+ }
+}
+
+static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top,
+ upb_dispatcher_frame *bottom) {
+ (void)top;
+ (void)bottom;
+ (void)_d;
+#if 0
+ upb_decoder *d = _d;
+ // TODO
+ if (bottom->end_offset == UPB_NONDELIMITED) {
+ // TODO: support skipping groups.
+ abort();
+ }
+ d->ptr = d->buf.ptr + bottom->end_offset;
+#endif
+}
+
+void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
+ upb_dispatcher_init(
+ &d->dispatcher, handlers, upb_decoder_skip, upb_decoder_exit2, d);
+#ifdef UPB_USE_JIT_X64
+ d->jit_code = NULL;
+ if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d);
+#endif
+ // Set function pointers for each field's decode function.
+ for (int i = 0; i < handlers->msgs_len; i++) {
+ upb_mhandlers *m = handlers->msgs[i];
+ for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+ i = upb_inttable_next(&m->fieldtab, i)) {
+ upb_fhandlers *f = upb_inttable_iter_value(i);
+ switch (f->type) {
+ case UPB_TYPE(INT32): f->decode = &upb_decode_INT32; break;
+ case UPB_TYPE(INT64): f->decode = &upb_decode_INT64; break;
+ case UPB_TYPE(UINT32): f->decode = &upb_decode_UINT32; break;
+ case UPB_TYPE(UINT64): f->decode = &upb_decode_UINT64; break;
+ case UPB_TYPE(FIXED32): f->decode = &upb_decode_FIXED32; break;
+ case UPB_TYPE(FIXED64): f->decode = &upb_decode_FIXED64; break;
+ case UPB_TYPE(SFIXED32): f->decode = &upb_decode_SFIXED32; break;
+ case UPB_TYPE(SFIXED64): f->decode = &upb_decode_SFIXED64; break;
+ case UPB_TYPE(BOOL): f->decode = &upb_decode_BOOL; break;
+ case UPB_TYPE(ENUM): f->decode = &upb_decode_ENUM; break;
+ case UPB_TYPE(DOUBLE): f->decode = &upb_decode_DOUBLE; break;
+ case UPB_TYPE(FLOAT): f->decode = &upb_decode_FLOAT; break;
+ case UPB_TYPE(SINT32): f->decode = &upb_decode_SINT32; break;
+ case UPB_TYPE(SINT64): f->decode = &upb_decode_SINT64; break;
+ case UPB_TYPE(STRING): f->decode = &upb_decode_STRING; break;
+ case UPB_TYPE(BYTES): f->decode = &upb_decode_STRING; break;
+ case UPB_TYPE(GROUP): f->decode = &upb_decode_GROUP; break;
+ case UPB_TYPE(MESSAGE): f->decode = &upb_decode_MESSAGE; break;
+ case UPB_TYPE_ENDGROUP: f->decode = &upb_endgroup; break;
+ }
+ }
+ }
+}
+
+void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m) {
+ upb_handlers *h = upb_handlers_new();
+ upb_accessors_reghandlers(h, m);
+ upb_decoder_initforhandlers(d, h);
+ upb_handlers_unref(h);
+}
+
+void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, uint64_t start_ofs,
+ uint64_t end_ofs, void *closure) {
+ upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure);
+ f->end_ofs = end_ofs;
+ d->end_ofs = end_ofs;
+ d->refstart_ofs = start_ofs;
+ d->refend_ofs = start_ofs;
+ d->bufstart_ofs = start_ofs;
+ d->bufend_ofs = start_ofs;
+ d->bytesrc = bytesrc;
+ d->buf = NULL;
+ d->ptr = NULL;
+ d->end = NULL; // Force a buffer pull.
+#ifdef UPB_USE_JIT_X64
+ d->jit_end = NULL;
+#endif
+ d->delim_end = UINTPTR_MAX; // But don't let end-of-message get triggered.
+ d->strref.bytesrc = bytesrc;
+}
+
+void upb_decoder_uninit(upb_decoder *d) {
+#ifdef UPB_USE_JIT_X64
+ if (d->dispatcher.handlers->should_jit) upb_decoder_freejit(d);
+#endif
+ upb_dispatcher_uninit(&d->dispatcher);
+}
diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h
new file mode 100644
index 0000000..3981359
--- /dev/null
+++ b/upb/pb/decoder.h
@@ -0,0 +1,99 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009-2010 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * upb_decoder implements a high performance, streaming decoder for protobuf
+ * data that works by implementing upb_src and getting its data from a
+ * upb_bytesrc.
+ *
+ * The decoder does not currently support non-blocking I/O, in the sense that
+ * if the bytesrc returns UPB_STATUS_TRYAGAIN it is not possible to resume the
+ * decoder when data becomes available again. Support for this could be added,
+ * but it would add complexity and perhaps cost efficiency also.
+ */
+
+#ifndef UPB_DECODER_H_
+#define UPB_DECODER_H_
+
+#include <setjmp.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include "upb/handlers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* upb_decoder *****************************************************************/
+
+struct dasm_State;
+
+typedef struct _upb_decoder {
+ upb_bytesrc *bytesrc; // Source of our serialized data.
+ upb_dispatcher dispatcher; // Dispatcher to which we push parsed data.
+ upb_status *status; // Where we will store any errors that occur.
+ upb_strref strref; // For passing string data to callbacks.
+
+ // Offsets for the region we currently have ref'd.
+ uint64_t refstart_ofs, refend_ofs;
+
+ // Current buffer and its stream offset.
+ const char *buf, *ptr, *end;
+ uint64_t bufstart_ofs, bufend_ofs;
+
+ // Stream offset for the end of the top-level message, if any.
+ uint64_t end_ofs;
+
+ // Buf offset as of which we've delivered calbacks; needed for rollback on
+ // UPB_TRYAGAIN (or in the future, UPB_SUSPEND).
+ const char *completed_ptr;
+
+ // End of the delimited region, relative to ptr, or UINTPTR_MAX if not in
+ // this buf.
+ uintptr_t delim_end;
+
+#ifdef UPB_USE_JIT_X64
+ // For JIT, which doesn't do bounds checks in the middle of parsing a field.
+ const char *jit_end, *effective_end; // == MIN(jit_end, submsg_end)
+
+ // JIT-generated machine code (else NULL).
+ char *jit_code;
+ size_t jit_size;
+ char *debug_info;
+
+ struct dasm_State *dynasm;
+#endif
+
+ sigjmp_buf exitjmp;
+} upb_decoder;
+
+// Initializes/uninitializes a decoder for calling into the given handlers
+// or to write into the given msgdef, given its accessors). Takes a ref
+// on the handlers or msgdef.
+void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *h);
+
+// Equivalent to:
+// upb_accessors_reghandlers(m, h);
+// upb_decoder_initforhandlers(d, h);
+// except possibly more efficient, by using cached state in the msgdef.
+void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m);
+void upb_decoder_uninit(upb_decoder *d);
+
+// Resets the internal state of an already-allocated decoder. This puts it in a
+// state where it has not seen any data, and expects the next data to be from
+// the beginning of a new protobuf. Parsers must be reset before they can be
+// used. A decoder can be reset multiple times.
+//
+// Pass UINT64_MAX for end_ofs to indicate a non-delimited top-level message.
+void upb_decoder_reset(upb_decoder *d, upb_bytesrc *src, uint64_t start_ofs,
+ uint64_t end_ofs, void *closure);
+
+void upb_decoder_decode(upb_decoder *d, upb_status *status);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* UPB_DECODER_H_ */
diff --git a/upb/pb/decoder_x86.dasc b/upb/pb/decoder_x86.dasc
new file mode 100644
index 0000000..19043c6
--- /dev/null
+++ b/upb/pb/decoder_x86.dasc
@@ -0,0 +1,694 @@
+|//
+|// upb - a minimalist implementation of protocol buffers.
+|//
+|// Copyright (c) 2011 Google Inc. See LICENSE for details.
+|// Author: Josh Haberman <jhaberman@gmail.com>
+|//
+|// JIT compiler for upb_decoder on x86. Given a upb_handlers object,
+|// generates code specialized to parsing the specific message and
+|// calling specific handlers.
+
+#define UPB_NONE -1
+#define UPB_MULTIPLE -2
+#define UPB_TOPLEVEL_ONE -3
+
+#include <sys/mman.h>
+#include "dynasm/dasm_proto.h"
+#include "dynasm/dasm_x86.h"
+
+#ifndef MAP_ANONYMOUS
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+// We map into the low 32 bits when we can, but if this is not available
+// (like on OS X) we take what we can get. It's not required for correctness,
+// it's just a performance thing that makes it more likely that our jumps
+// can be rel32 (i.e. within 32-bits of our pc) instead of the longer
+// sequence required for other jumps (see callp).
+#ifndef MAP_32BIT
+#define MAP_32BIT 0
+#endif
+
+// To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code
+// at runtime. GDB 7.x+ has defined an interface for doing this, and these
+// structure/function defintions are copied out of gdb/jit.h
+//
+// We need to give GDB an ELF file at runtime describing the symbols we have
+// generated. To avoid implementing the ELF format, we generate an ELF file
+// at compile-time and compile it in as a character string. We can replace
+// a few key constants (address of JIT-ted function and its size) by looking
+// for a few magic numbers and doing a dumb string replacement.
+
+#ifndef __APPLE__
+#include "upb/pb/jit_debug_elf_file.h"
+
+typedef enum
+{
+ GDB_JIT_NOACTION = 0,
+ GDB_JIT_REGISTER,
+ GDB_JIT_UNREGISTER
+} jit_actions_t;
+
+typedef struct gdb_jit_entry {
+ struct gdb_jit_entry *next_entry;
+ struct gdb_jit_entry *prev_entry;
+ const char *symfile_addr;
+ uint64_t symfile_size;
+} gdb_jit_entry;
+
+typedef struct {
+ uint32_t version;
+ uint32_t action_flag;
+ gdb_jit_entry *relevant_entry;
+ gdb_jit_entry *first_entry;
+} gdb_jit_descriptor;
+
+gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL};
+
+void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); }
+
+void upb_reg_jit_gdb(upb_decoder *d) {
+ // Create debug info.
+ size_t elf_len = upb_pb_jit_debug_elf_file_o_len;
+ d->debug_info = malloc(elf_len);
+ memcpy(d->debug_info, upb_pb_jit_debug_elf_file_o, elf_len);
+ uint64_t *p = (void*)d->debug_info;
+ for (; (void*)(p+1) <= (void*)d->debug_info + elf_len; ++p) {
+ if (*p == 0x12345678) { *p = (uintptr_t)d->jit_code; }
+ if (*p == 0x321) { *p = d->jit_size; }
+ }
+
+ // Register the JIT-ted code with GDB.
+ gdb_jit_entry *e = malloc(sizeof(gdb_jit_entry));
+ e->next_entry = __jit_debug_descriptor.first_entry;
+ e->prev_entry = NULL;
+ if (e->next_entry) e->next_entry->prev_entry = e;
+ e->symfile_addr = d->debug_info;
+ e->symfile_size = elf_len;
+ __jit_debug_descriptor.first_entry = e;
+ __jit_debug_descriptor.relevant_entry = e;
+ __jit_debug_descriptor.action_flag = GDB_JIT_REGISTER;
+ __jit_debug_register_code();
+}
+
+#else
+
+void upb_reg_jit_gdb(upb_decoder *d) {
+ (void)d;
+}
+
+#endif
+
+|.arch x64
+|.actionlist upb_jit_actionlist
+|.globals UPB_JIT_GLOBAL_
+|.globalnames upb_jit_globalnames
+|
+|// Calling conventions.
+|.define ARG1_64, rdi
+|.define ARG2_8, sil
+|.define ARG2_32, esi
+|.define ARG2_64, rsi
+|.define ARG3_8, dl
+|.define ARG3_32, edx
+|.define ARG3_64, rdx
+|
+|// Register allocation / type map.
+|// ALL of the code in this file uses these register allocations.
+|// When we "call" within this file, we do not use regular calling
+|// conventions, but of course when calling to user callbacks we must.
+|.define PTR, rbx
+|.define CLOSURE, r12
+|.type FRAME, upb_dispatcher_frame, r13
+|.type STRREF, upb_strref, r14
+|.type DECODER, upb_decoder, r15
+|
+|.macro callp, addr
+|| if ((uintptr_t)addr < 0xffffffff) {
+ | call &addr
+|| } else {
+ | mov64 rax, (uintptr_t)addr
+ | call rax
+|| }
+|.endmacro
+|
+|// Checks PTR for end-of-buffer.
+|.macro check_eob, m
+| cmp PTR, DECODER->effective_end
+|| if (m->is_group) {
+ | jae ->exit_jit
+|| } else {
+ | jae =>m->jit_endofbuf_pclabel
+|| }
+|.endmacro
+|
+|// Decodes varint from [PTR + offset] -> ARG3.
+|// Saves new pointer as rax.
+|.macro decode_loaded_varint, offset
+| // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder.
+| lea rax, [PTR + offset + 1]
+| mov ARG3_32, ecx
+| and ARG3_32, 0x7f
+| test cl, cl
+| jns >9
+| lea rax, [PTR + offset + 2]
+| movzx esi, ch
+| and esi, 0x7f
+| shl esi, 7
+| or ARG3_32, esi
+| test cx, cx
+| jns >9
+| mov ARG1_64, rax
+| mov ARG2_32, ARG3_32
+| callp upb_vdecode_max8_fast
+| test rax, rax
+| jz ->exit_jit // >10-byte varint.
+|9:
+|.endmacro
+|
+|.macro decode_varint, offset
+| mov ecx, dword [PTR + offset]
+| decode_loaded_varint offset
+| mov PTR, rax
+|.endmacro
+|
+|// Decode the tag -> edx.
+|// Could specialize this by avoiding the value masking: could just key the
+|// table on the raw (length-masked) varint to save 3-4 cycles of latency.
+|// Currently only support tables where all entries are in the array part.
+|.macro dyndispatch, m
+| decode_loaded_varint, 0
+| mov ecx, edx
+| shr ecx, 3
+| and edx, 0x7
+| cmp ecx, m->max_field_number // Bounds-check the field.
+| ja ->exit_jit // In the future; could be unknown label
+|| if ((uintptr_t)m->tablearray < 0xffffffff) {
+| mov rax, qword [rcx*8 + m->tablearray] // TODO: support hybrid array/hash tables.
+|| } else {
+| mov64 rax, (uintptr_t)m->tablearray
+| mov rax, qword [rax + rcx*8]
+|| }
+| jmp rax // Dispatch: unpredictable jump.
+|.endmacro
+|
+|// Push a stack frame (not the CPU stack, the upb_decoder stack).
+|.macro pushframe, f, closure_, end_offset_, is_sequence_
+| lea rax, [FRAME + sizeof(upb_dispatcher_frame)] // rax for shorter addressing.
+| cmp rax, qword DECODER->dispatcher.limit
+| jae ->exit_jit // Frame stack overflow.
+| mov qword FRAME:rax->f, f
+| mov qword FRAME:rax->closure, closure_
+| mov dword FRAME:rax->end_ofs, end_offset_
+| mov byte FRAME:rax->is_sequence, is_sequence_
+| mov CLOSURE, rdx
+| mov DECODER->dispatcher.top, rax
+| mov FRAME, rax
+|.endmacro
+|
+|.macro popframe
+| sub FRAME, sizeof(upb_dispatcher_frame)
+| mov DECODER->dispatcher.top, FRAME
+| setmsgend m
+| mov CLOSURE, FRAME->closure
+|.endmacro
+|
+|.macro setmsgend, m
+| mov rsi, DECODER->jit_end
+|| if (m->is_group) {
+| mov64 rax, 0xffffffffffffffff
+| mov qword DECODER->delim_end, rax
+| mov DECODER->effective_end, rsi
+|| } else {
+| // Could store a correctly-biased version in the frame, at the cost of
+| // a larger stack.
+| mov eax, dword FRAME->end_ofs
+| add rax, qword DECODER->buf
+| mov DECODER->delim_end, rax // delim_end = d->buf + f->end_ofs
+| cmp rax, rsi
+| jb >8
+| mov rax, rsi // effective_end = min(d->delim_end, d->jit_end)
+|8:
+| mov DECODER->effective_end, rax
+|| }
+|.endmacro
+|
+|// rax contains the tag, compare it against "tag", but since it is a varint
+|// we must only compare as many bytes as actually have data.
+|.macro checktag, tag
+|| switch (upb_value_size(tag)) {
+|| case 1:
+| cmp cl, tag
+|| break;
+|| case 2:
+| cmp cx, tag
+|| break;
+|| case 3:
+| and ecx, 0xffffff // 3 bytes
+| cmp rcx, tag
+|| case 4:
+| cmp ecx, tag
+|| break;
+|| case 5:
+| mov64 rdx, 0xffffffffff // 5 bytes
+| and rcx, rdx
+| cmp rcx, tag
+|| break;
+|| default: abort();
+|| }
+|.endmacro
+|
+|// TODO: optimize for 0 (xor) and 32-bits.
+|.macro loadfval, f
+|| if (f->fval.val.uint64 == 0) {
+| xor ARG2_32, ARG2_32
+|| } else if (f->fval.val.uint64 < 0xffffffff) {
+| mov ARG2_32, f->fval.val.uint64
+|| } else {
+| mov64 ARG2_64, f->fval.val.uint64
+|| }
+|.endmacro
+
+#include <stdlib.h>
+#include "upb/pb/varint.h"
+
+// PTR should point to the beginning of the tag.
+static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag,
+ upb_mhandlers *m,
+ upb_fhandlers *f, upb_fhandlers *next_f) {
+ int tag_size = upb_value_size(tag);
+
+ // PC-label for the dispatch table.
+ // We check the wire type (which must be loaded in edx) because the
+ // table is keyed on field number, not type.
+ |=>f->jit_pclabel:
+ | cmp edx, (tag & 0x7)
+ | jne ->exit_jit // In the future: could be an unknown field or packed.
+ |=>f->jit_pclabel_notypecheck:
+ if (f->repeated) {
+ if (f->startseq) {
+ | mov ARG1_64, CLOSURE
+ | loadfval f
+ | callp f->startseq
+ } else {
+ | mov rdx, CLOSURE
+ }
+ | mov esi, FRAME->end_ofs
+ | pushframe f, rdx, esi, true
+ }
+
+ |1: // Label for repeating this field.
+
+ // Decode the value into arg 3 for the callback.
+ switch (f->type) {
+ case UPB_TYPE(DOUBLE):
+ case UPB_TYPE(FIXED64):
+ case UPB_TYPE(SFIXED64):
+ | mov ARG3_64, qword [PTR + tag_size]
+ | add PTR, 8 + tag_size
+ break;
+
+ case UPB_TYPE(FLOAT):
+ case UPB_TYPE(FIXED32):
+ case UPB_TYPE(SFIXED32):
+ | mov ARG3_32, dword [PTR + tag_size]
+ | add PTR, 4 + tag_size
+ break;
+
+ case UPB_TYPE(BOOL):
+ // Can't assume it's one byte long, because bool must be wire-compatible
+ // with all of the varint integer types.
+ | decode_varint tag_size
+ | test ARG3_64, ARG3_64
+ | setne ARG3_8 // Other bytes left with val, should be ok.
+ break;
+
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(ENUM):
+ | decode_varint tag_size
+ break;
+
+ case UPB_TYPE(SINT64):
+ // 64-bit zig-zag decoding.
+ | decode_varint tag_size
+ | mov rax, ARG3_64
+ | shr ARG3_64, 1
+ | and rax, 1
+ | neg rax
+ | xor ARG3_64, rax
+ break;
+
+ case UPB_TYPE(SINT32):
+ // 32-bit zig-zag decoding.
+ | decode_varint tag_size
+ | mov eax, ARG3_32
+ | shr ARG3_32, 1
+ | and eax, 1
+ | neg eax
+ | xor ARG3_32, eax
+ break;
+
+ case UPB_TYPE(STRING):
+ case UPB_TYPE(BYTES):
+ // We only handle the case where the entire string is in our current
+ // buf, which sidesteps any security problems. The C path has more
+ // robust checks.
+ | decode_varint tag_size
+ | mov STRREF->len, ARG3_32
+ | mov STRREF->ptr, PTR
+ | mov rax, PTR
+ | sub rax, DECODER->buf
+ | add eax, DECODER->bufstart_ofs // = d->ptr - d->buf + d->bufstart_ofs
+ | mov STRREF->stream_offset, eax
+ | add PTR, ARG3_64
+ | mov ARG3_64, STRREF
+ | cmp PTR, DECODER->effective_end
+ | ja ->exit_jit // Can't deliver, whole string not in buf.
+ break;
+
+ case UPB_TYPE_ENDGROUP: // A pseudo-type.
+ | add PTR, tag_size
+ | mov DECODER->ptr, PTR
+ | jmp =>m->jit_endofmsg_pclabel
+ return;
+
+ // Will dispatch callbacks and call submessage in a second.
+ case UPB_TYPE(MESSAGE):
+ | decode_varint tag_size
+ break;
+ case UPB_TYPE(GROUP):
+ | add PTR, tag_size
+ break;
+
+ default: abort();
+ }
+ // Commit our work by advancing ptr.
+ // (If in the future we wanted to support a UPB_SUSPEND_AGAIN that
+ // suspends the decoder and redelivers the value later, we would
+ // need to adjust this to happen perhaps after the callback ran).
+ | mov DECODER->ptr, PTR
+
+ // Load closure and fval into arg registers.
+ | mov ARG1_64, CLOSURE
+ | loadfval f
+
+ // Call callbacks.
+ if (upb_issubmsgtype(f->type)) {
+ // Call startsubmsg handler (if any).
+ if (f->startsubmsg) {
+ // upb_sflow_t startsubmsg(void *closure, upb_value fval)
+ | mov r12d, ARG3_32
+ | callp f->startsubmsg
+ } else {
+ | mov rdx, CLOSURE
+ | mov r12d, ARG3_32
+ }
+ if (f->type == UPB_TYPE(MESSAGE)) {
+ | mov rsi, PTR
+ | sub rsi, DECODER->buf
+ | add esi, r12d // = (d->ptr - d->buf) + delim_len
+ } else {
+ assert(f->type == UPB_TYPE(GROUP));
+ | mov esi, UPB_NONDELIMITED
+ }
+ | pushframe f, rdx, esi, false
+
+ upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
+ if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) {
+ | jmp =>sub_m->jit_startmsg_pclabel;
+ } else {
+ | call =>sub_m->jit_startmsg_pclabel;
+ }
+
+ |=>f->jit_submsg_done_pclabel:
+ | popframe
+
+ // Call endsubmsg handler (if any).
+ if (f->endsubmsg) {
+ // upb_flow_t endsubmsg(void *closure, upb_value fval);
+ | mov ARG1_64, CLOSURE
+ | loadfval f
+ | callp f->endsubmsg
+ }
+ } else {
+ | callp f->value
+ }
+ // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+
+ // Epilogue: load next tag, check for repeated field.
+ | check_eob m
+ | mov rcx, qword [PTR]
+ if (f->repeated) {
+ | checktag tag
+ | je <1
+ | popframe
+ if (f->endseq) {
+ | mov ARG1_64, CLOSURE
+ | loadfval f
+ | callp f->endseq
+ }
+ }
+ if (next_tag != 0) {
+ | checktag next_tag
+ | je =>next_f->jit_pclabel_notypecheck
+ }
+
+ // Fall back to dynamic dispatch. Replicate the dispatch
+ // here so we can learn what fields generally follow others.
+ | dyndispatch m
+ |1:
+}
+
+static int upb_compare_uint32(const void *a, const void *b) {
+ // TODO: always put ENDGROUP at the end.
+ return *(uint32_t*)a - *(uint32_t*)b;
+}
+
+static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
+ |=>m->jit_startmsg_pclabel:
+ // Call startmsg handler (if any):
+ if (m->startmsg) {
+ // upb_flow_t startmsg(void *closure);
+ | mov ARG1_64, FRAME->closure
+ | callp m->startmsg
+ // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+ }
+
+ | setmsgend m
+ | check_eob m
+ | mov ecx, dword [PTR]
+ | dyndispatch m
+
+ // --------- New code section (does not fall through) ------------------------
+
+ // Emit code for parsing each field (dynamic dispatch contains pointers to
+ // all of these).
+
+ // Create an ordering over the fields (inttable ordering is undefined).
+ int num_keys = upb_inttable_count(&m->fieldtab);
+ uint32_t *keys = malloc(num_keys * sizeof(*keys));
+ int idx = 0;
+ for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+ i = upb_inttable_next(&m->fieldtab, i)) {
+ keys[idx++] = upb_inttable_iter_key(i);
+ }
+ qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
+
+ upb_fhandlers *last_f = NULL;
+ uint32_t last_tag = 0;
+ for(int i = 0; i < num_keys; i++) {
+ uint32_t key = keys[i];
+ upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, key);
+ uint32_t tag = upb_vencode32(key);
+ if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f);
+ last_tag = tag;
+ last_f = f;
+ }
+ upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL);
+
+ free(keys);
+
+ // --------- New code section (does not fall through) ------------------------
+
+ // End-of-buf / end-of-message.
+ if (!m->is_group) {
+ // This case doesn't exist for groups, because there eob really means
+ // eob, so that case just exits the jit directly.
+ |=>m->jit_endofbuf_pclabel:
+ | cmp PTR, DECODER->delim_end
+ | jb ->exit_jit // We are at eob, but not end-of-submsg.
+ }
+
+ |=>m->jit_endofmsg_pclabel:
+ // We are at end-of-submsg: call endmsg handler (if any):
+ if (m->endmsg) {
+ // void endmsg(void *closure, upb_status *status) {
+ | mov ARG1_64, FRAME->closure
+ | lea ARG2_64, DECODER->dispatcher.status
+ | callp m->endmsg
+ }
+
+ if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
+ | ret
+ } else if (m->jit_parent_field_done_pclabel == UPB_TOPLEVEL_ONE) {
+ | jmp ->exit_jit
+ } else {
+ | jmp =>m->jit_parent_field_done_pclabel
+ }
+
+}
+
+static const char *dbgfmt =
+ "JIT encountered unknown field! wt=%d, fn=%d\n";
+
+static void upb_decoder_jit(upb_decoder *d) {
+ | push rbp
+ | mov rbp, rsp
+ | push r15
+ | push r14
+ | push r13
+ | push r12
+ | push rbx
+ | mov DECODER, ARG1_64
+ | mov FRAME, DECODER:ARG1_64->dispatcher.top
+ | lea STRREF, DECODER:ARG1_64->strref
+ | mov CLOSURE, FRAME->closure
+ | mov PTR, DECODER->ptr
+
+ upb_handlers *h = d->dispatcher.handlers;
+ if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
+ | call =>h->msgs[0]->jit_startmsg_pclabel
+ | jmp ->exit_jit
+ }
+
+ // TODO: push return addresses for re-entry (will be necessary for multiple
+ // buffer support).
+ for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_msg(d, h->msgs[i]);
+
+ |->exit_jit:
+ | pop rbx
+ | pop r12
+ | pop r13
+ | pop r14
+ | pop r15
+ | leave
+ | ret
+ |=>0:
+ | mov rdi, stderr
+ | mov rsi, dbgfmt
+ | callp fprintf
+ | callp abort
+}
+
+void upb_decoder_jit_assignfieldlabs(upb_fhandlers *f,
+ uint32_t *pclabel_count) {
+ f->jit_pclabel = (*pclabel_count)++;
+ f->jit_pclabel_notypecheck = (*pclabel_count)++;
+ f->jit_submsg_done_pclabel = (*pclabel_count)++;
+}
+
+void upb_decoder_jit_assignmsglabs(upb_mhandlers *m, uint32_t *pclabel_count) {
+ m->jit_startmsg_pclabel = (*pclabel_count)++;
+ m->jit_endofbuf_pclabel = (*pclabel_count)++;
+ m->jit_endofmsg_pclabel = (*pclabel_count)++;
+ m->jit_unknownfield_pclabel = (*pclabel_count)++;
+ m->jit_parent_field_done_pclabel = UPB_NONE;
+ m->max_field_number = 0;
+ upb_inttable_iter i;
+ for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+ i = upb_inttable_next(&m->fieldtab, i)) {
+ uint32_t key = upb_inttable_iter_key(i);
+ m->max_field_number = UPB_MAX(m->max_field_number, key);
+ upb_fhandlers *f = upb_inttable_iter_value(i);
+ upb_decoder_jit_assignfieldlabs(f, pclabel_count);
+ }
+ // XXX: Won't work for large field numbers; will need to use a upb_table.
+ m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*));
+}
+
+// Second pass: for messages that have only one parent, link them to the field
+// from which they are called.
+void upb_decoder_jit_assignmsglabs2(upb_mhandlers *m) {
+ upb_inttable_iter i;
+ for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+ i = upb_inttable_next(&m->fieldtab, i)) {
+ upb_fhandlers *f = upb_inttable_iter_value(i);
+ if (upb_issubmsgtype(f->type)) {
+ upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
+ if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) {
+ sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel;
+ } else {
+ sub_m->jit_parent_field_done_pclabel = UPB_MULTIPLE;
+ }
+ }
+ }
+}
+
+void upb_decoder_makejit(upb_decoder *d) {
+ d->debug_info = NULL;
+
+ // Assign pclabels.
+ uint32_t pclabel_count = 1;
+ upb_handlers *h = d->dispatcher.handlers;
+ for (int i = 0; i < h->msgs_len; i++)
+ upb_decoder_jit_assignmsglabs(h->msgs[i], &pclabel_count);
+ for (int i = 0; i < h->msgs_len; i++)
+ upb_decoder_jit_assignmsglabs2(h->msgs[i]);
+
+ if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_NONE) {
+ h->msgs[0]->jit_parent_field_done_pclabel = UPB_TOPLEVEL_ONE;
+ }
+
+ void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals));
+ dasm_init(d, 1);
+ dasm_setupglobal(d, globals, UPB_JIT_GLOBAL__MAX);
+ dasm_growpc(d, pclabel_count);
+ dasm_setup(d, upb_jit_actionlist);
+
+ upb_decoder_jit(d);
+
+ dasm_link(d, &d->jit_size);
+
+ d->jit_code = mmap(NULL, d->jit_size, PROT_READ | PROT_WRITE,
+ MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+
+ upb_reg_jit_gdb(d);
+
+ dasm_encode(d, d->jit_code);
+
+ // Create dispatch tables.
+ for (int i = 0; i < h->msgs_len; i++) {
+ upb_mhandlers *m = h->msgs[i];
+ for (uint32_t j = 0; j <= m->max_field_number; j++) {
+ upb_fhandlers *f = NULL;
+ for (int k = 0; k < 8; k++) {
+ f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k);
+ if (f) break;
+ }
+ if (f) {
+ m->tablearray[j] = d->jit_code + dasm_getpclabel(d, f->jit_pclabel);
+ } else {
+ // Don't handle unknown fields yet.
+ m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0);
+ }
+ }
+ }
+
+ dasm_free(d);
+ free(globals);
+
+ mprotect(d->jit_code, d->jit_size, PROT_EXEC | PROT_READ);
+
+ FILE *f = fopen("/tmp/machine-code", "wb");
+ fwrite(d->jit_code, d->jit_size, 1, f);
+ fclose(f);
+}
+
+void upb_decoder_freejit(upb_decoder *d) {
+ munmap(d->jit_code, d->jit_size);
+ free(d->debug_info);
+ // TODO: unregister
+}
diff --git a/upb/pb/encoder.c b/upb/pb/encoder.c
new file mode 100644
index 0000000..139dc88
--- /dev/null
+++ b/upb/pb/encoder.c
@@ -0,0 +1,421 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include "upb_encoder.h"
+
+#include <stdlib.h>
+#include "descriptor.h"
+
+/* Functions for calculating sizes of wire values. ****************************/
+
+static size_t upb_v_uint64_t_size(uint64_t val) {
+#ifdef __GNUC__
+ int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0.
+#else
+ int high_bit = 0;
+ uint64_t tmp = val;
+ while(tmp >>= 1) high_bit++;
+#endif
+ return val == 0 ? 1 : high_bit / 7 + 1;
+}
+
+static size_t upb_v_int32_t_size(int32_t val) {
+ // v_uint32's are sign-extended to maintain wire compatibility with int64s.
+ return upb_v_uint64_t_size((int64_t)val);
+}
+static size_t upb_v_uint32_t_size(uint32_t val) {
+ return upb_v_uint64_t_size(val);
+}
+static size_t upb_f_uint64_t_size(uint64_t val) {
+ (void)val; // Length is independent of value.
+ return sizeof(uint64_t);
+}
+static size_t upb_f_uint32_t_size(uint32_t val) {
+ (void)val; // Length is independent of value.
+ return sizeof(uint32_t);
+}
+
+
+/* Functions to write wire values. ********************************************/
+
+// Since we know in advance the longest that the value could be, we always make
+// sure that our buffer is long enough. This saves us from having to perform
+// bounds checks.
+
+// Puts a varint (wire type: UPB_WIRE_TYPE_VARINT).
+static uint8_t *upb_put_v_uint64_t(uint8_t *buf, uint64_t val)
+{
+ do {
+ uint8_t byte = val & 0x7f;
+ val >>= 7;
+ if(val) byte |= 0x80;
+ *buf++ = byte;
+ } while(val);
+ return buf;
+}
+
+// Puts an unsigned 32-bit varint, verbatim. Never uses the high 64 bits.
+static uint8_t *upb_put_v_uint32_t(uint8_t *buf, uint32_t val)
+{
+ return upb_put_v_uint64_t(buf, val);
+}
+
+// Puts a signed 32-bit varint, first sign-extending to 64-bits. We do this to
+// maintain wire-compatibility with 64-bit signed integers.
+static uint8_t *upb_put_v_int32_t(uint8_t *buf, int32_t val)
+{
+ return upb_put_v_uint64_t(buf, (int64_t)val);
+}
+
+static void upb_put32(uint8_t *buf, uint32_t val) {
+ buf[0] = val & 0xff;
+ buf[1] = (val >> 8) & 0xff;
+ buf[2] = (val >> 16) & 0xff;
+ buf[3] = (val >> 24);
+}
+
+// Puts a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT).
+static uint8_t *upb_put_f_uint32_t(uint8_t *buf, uint32_t val)
+{
+ uint8_t *uint32_end = buf + sizeof(uint32_t);
+#if UPB_UNALIGNED_READS_OK
+ *(uint32_t*)buf = val;
+#else
+ upb_put32(buf, val);
+#endif
+ return uint32_end;
+}
+
+// Puts a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT).
+static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val)
+{
+ uint8_t *uint64_end = buf + sizeof(uint64_t);
+#if UPB_UNALIGNED_READS_OK
+ *(uint64_t*)buf = val;
+#else
+ upb_put32(buf, (uint32_t)val);
+ upb_put32(buf, (uint32_t)(val >> 32));
+#endif
+ return uint64_end;
+}
+
+/* Functions to write and calculate sizes for .proto values. ******************/
+
+// Performs zig-zag encoding, which is used by sint32 and sint64.
+static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
+static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
+
+/* Use macros to define a set of two functions for each .proto type:
+ *
+ * // Converts and writes a .proto value into buf. "end" indicates the end
+ * // of the current available buffer (if the buffer does not contain enough
+ * // space UPB_STATUS_NEED_MORE_DATA is returned). On success, *outbuf will
+ * // point one past the data that was written.
+ * uint8_t *upb_put_INT32(uint8_t *buf, int32_t val);
+ *
+ * // Returns the number of bytes required to encode val.
+ * size_t upb_get_INT32_size(int32_t val);
+ *
+ * // Given a .proto value s (source) convert it to a wire value.
+ * uint32_t upb_vtowv_INT32(int32_t s);
+ */
+
+#define VTOWV(type, wire_t, val_t) \
+ static wire_t upb_vtowv_ ## type(val_t s)
+
+#define PUT(type, v_or_f, wire_t, val_t, member_name) \
+ static uint8_t *upb_put_ ## type(uint8_t *buf, val_t val) { \
+ wire_t tmp = upb_vtowv_ ## type(val); \
+ return upb_put_ ## v_or_f ## _ ## wire_t(buf, tmp); \
+ }
+
+#define T(type, v_or_f, wire_t, val_t, member_name) \
+ static size_t upb_get_ ## type ## _size(val_t val) { \
+ return upb_ ## v_or_f ## _ ## wire_t ## _size(val); \
+ } \
+ VTOWV(type, wire_t, val_t); /* prototype for PUT below */ \
+ PUT(type, v_or_f, wire_t, val_t, member_name) \
+ VTOWV(type, wire_t, val_t)
+
+T(INT32, v, int32_t, int32_t, int32) { return (uint32_t)s; }
+T(INT64, v, uint64_t, int64_t, int64) { return (uint64_t)s; }
+T(UINT32, v, uint32_t, uint32_t, uint32) { return s; }
+T(UINT64, v, uint64_t, uint64_t, uint64) { return s; }
+T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzenc_32(s); }
+T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzenc_64(s); }
+T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; }
+T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; }
+T(SFIXED32, f, uint32_t, int32_t, int32) { return (uint32_t)s; }
+T(SFIXED64, f, uint64_t, int64_t, int64) { return (uint64_t)s; }
+T(BOOL, v, uint32_t, bool, _bool) { return (uint32_t)s; }
+T(ENUM, v, uint32_t, int32_t, int32) { return (uint32_t)s; }
+T(DOUBLE, f, uint64_t, double, _double) {
+ upb_value v;
+ v._double = s;
+ return v.uint64;
+}
+T(FLOAT, f, uint32_t, float, _float) {
+ upb_value v;
+ v._float = s;
+ return v.uint32;
+}
+#undef VTOWV
+#undef PUT
+#undef T
+
+static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v)
+{
+#define CASE(t, member_name) \
+ case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name);
+ switch(ft) {
+ CASE(DOUBLE, _double)
+ CASE(FLOAT, _float)
+ CASE(INT32, int32)
+ CASE(INT64, int64)
+ CASE(UINT32, uint32)
+ CASE(UINT64, uint64)
+ CASE(SINT32, int32)
+ CASE(SINT64, int64)
+ CASE(FIXED32, uint32)
+ CASE(FIXED64, uint64)
+ CASE(SFIXED32, int32)
+ CASE(SFIXED64, int64)
+ CASE(BOOL, _bool)
+ CASE(ENUM, int32)
+ default: assert(false); return buf;
+ }
+#undef CASE
+}
+
+static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v)
+{
+#define CASE(t, member_name) \
+ case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name);
+ switch(ft) {
+ CASE(DOUBLE, _double)
+ CASE(FLOAT, _float)
+ CASE(INT32, int32)
+ CASE(INT64, int64)
+ CASE(UINT32, uint32)
+ CASE(UINT64, uint64)
+ CASE(SINT32, int32)
+ CASE(SINT64, int64)
+ CASE(FIXED32, uint32)
+ CASE(FIXED64, uint64)
+ CASE(SFIXED32, int32)
+ CASE(SFIXED64, int64)
+ CASE(BOOL, _bool)
+ CASE(ENUM, int32)
+ default: assert(false); return 0;
+ }
+#undef CASE
+}
+
+static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num,
+ upb_wire_type_t wt)
+{
+ return upb_put_UINT32(buf, wt | (num << 3));
+}
+
+static uint32_t _upb_get_tag_size(upb_field_number_t num)
+{
+ return upb_get_UINT32_size(num << 3);
+}
+
+
+/* upb_sizebuilder ************************************************************/
+
+struct upb_sizebuilder {
+ // Accumulating size for the current level.
+ uint32_t size;
+
+ // Stack of sizes for our current nesting.
+ uint32_t stack[UPB_MAX_NESTING], *top;
+
+ // Vector of sizes.
+ uint32_t *sizes;
+ int sizes_len;
+ int sizes_size;
+
+ upb_status status;
+};
+
+// upb_sink callbacks.
+static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f,
+ upb_value val,
+ upb_status *status)
+{
+ (void)status;
+ upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+ uint32_t size = 0;
+ size += _upb_get_tag_size(f->number);
+ size += _upb_get_value_size(f->type, val);
+ sb->size += size;
+ return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f,
+ upb_strptr str,
+ int32_t start, uint32_t end,
+ upb_status *status)
+{
+ (void)status;
+ (void)str; // String data itself is not used.
+ upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+ if(start >= 0) {
+ uint32_t size = 0;
+ size += _upb_get_tag_size(f->number);
+ size += upb_get_UINT32_size(end - start);
+ sb->size += size;
+ }
+ return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f,
+ upb_status *status)
+{
+ (void)status;
+ (void)f; // Unused (we calculate tag size and delimiter in endcb).
+ upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+ if(f->type == UPB_TYPE(MESSAGE)) {
+ *sb->top = sb->size;
+ sb->top++;
+ sb->size = 0;
+ } else {
+ assert(f->type == UPB_TYPE(GROUP));
+ sb->size += _upb_get_tag_size(f->number);
+ }
+ return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f,
+ upb_status *status)
+{
+ (void)status;
+ upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+ if(f->type == UPB_TYPE(MESSAGE)) {
+ sb->top--;
+ if(sb->sizes_len == sb->sizes_size) {
+ sb->sizes_size *= 2;
+ sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes));
+ }
+ uint32_t child_size = sb->size;
+ uint32_t parent_size = *sb->top;
+ sb->sizes[sb->sizes_len++] = child_size;
+ // The size according to the parent includes the tag size and delimiter of
+ // the submessage.
+ parent_size += upb_get_UINT32_size(child_size);
+ parent_size += _upb_get_tag_size(f->number);
+ // Include size accumulated in parent before child began.
+ sb->size = child_size + parent_size;
+ } else {
+ assert(f->type == UPB_TYPE(GROUP));
+ // As an optimization, we could just add this number twice in startcb, to
+ // avoid having to recalculate it.
+ sb->size += _upb_get_tag_size(f->number);
+ }
+ return UPB_SINK_CONTINUE;
+}
+
+upb_sink_callbacks _upb_sizebuilder_sink_vtbl = {
+ _upb_sizebuilder_valuecb,
+ _upb_sizebuilder_strcb,
+ _upb_sizebuilder_startcb,
+ _upb_sizebuilder_endcb
+};
+
+
+/* upb_sink callbacks *********************************************************/
+
+struct upb_encoder {
+ upb_sink base;
+ //upb_bytesink *bytesink;
+ uint32_t *sizes;
+ int size_offset;
+};
+
+
+// Within one callback we may need to encode up to two separate values.
+#define UPB_ENCODER_BUFSIZE (UPB_MAX_ENCODED_SIZE * 2)
+
+static upb_sink_status _upb_encoder_push_buf(upb_encoder *s, const uint8_t *buf,
+ size_t len, upb_status *status)
+{
+ // TODO: conjure a upb_strptr that points to buf.
+ //upb_strptr ptr;
+ (void)s;
+ (void)buf;
+ (void)status;
+ size_t written = 5;// = upb_bytesink_onbytes(s->bytesink, ptr);
+ if(written < len) {
+ // TODO: mark to skip "written" bytes next time.
+ return UPB_SINK_STOP;
+ } else {
+ return UPB_SINK_CONTINUE;
+ }
+}
+
+static upb_sink_status _upb_encoder_valuecb(upb_sink *sink, upb_fielddef *f,
+ upb_value val, upb_status *status)
+{
+ upb_encoder *s = (upb_encoder*)sink;
+ uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+ upb_wire_type_t wt = upb_types[f->type].expected_wire_type;
+ // TODO: handle packed encoding.
+ ptr = _upb_put_tag(ptr, f->number, wt);
+ ptr = upb_encode_value(ptr, f->type, val);
+ return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+}
+
+static upb_sink_status _upb_encoder_strcb(upb_sink *sink, upb_fielddef *f,
+ upb_strptr str,
+ int32_t start, uint32_t end,
+ upb_status *status)
+{
+ upb_encoder *s = (upb_encoder*)sink;
+ uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+ if(start >= 0) {
+ ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED);
+ ptr = upb_put_UINT32(ptr, end - start);
+ }
+ // TODO: properly handle partially consumed strings and partially supplied
+ // strings.
+ _upb_encoder_push_buf(s, buf, ptr - buf, status);
+ return _upb_encoder_push_buf(s, (uint8_t*)upb_string_getrobuf(str), end - start, status);
+}
+
+static upb_sink_status _upb_encoder_startcb(upb_sink *sink, upb_fielddef *f,
+ upb_status *status)
+{
+ upb_encoder *s = (upb_encoder*)sink;
+ uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+ if(f->type == UPB_TYPE(GROUP)) {
+ ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_START_GROUP);
+ } else {
+ ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED);
+ ptr = upb_put_UINT32(ptr, s->sizes[--s->size_offset]);
+ }
+ return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+}
+
+static upb_sink_status _upb_encoder_endcb(upb_sink *sink, upb_fielddef *f,
+ upb_status *status)
+{
+ upb_encoder *s = (upb_encoder*)sink;
+ uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+ if(f->type != UPB_TYPE(GROUP)) return UPB_SINK_CONTINUE;
+ ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_END_GROUP);
+ return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+}
+
+upb_sink_callbacks _upb_encoder_sink_vtbl = {
+ _upb_encoder_valuecb,
+ _upb_encoder_strcb,
+ _upb_encoder_startcb,
+ _upb_encoder_endcb
+};
+
diff --git a/upb/pb/encoder.h b/upb/pb/encoder.h
new file mode 100644
index 0000000..64c5047
--- /dev/null
+++ b/upb/pb/encoder.h
@@ -0,0 +1,58 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009-2010 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Implements a set of upb_handlers that write protobuf data to the binary wire
+ * format.
+ *
+ * For messages that have any submessages, the encoder needs a buffer
+ * containing the submessage sizes, so they can be properly written at the
+ * front of each message. Note that groups do *not* have this requirement.
+ */
+
+#ifndef UPB_ENCODER_H_
+#define UPB_ENCODER_H_
+
+#include "upb.h"
+#include "upb_stream.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* upb_encoder ****************************************************************/
+
+// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol
+// buffer binary wire format.
+struct upb_encoder;
+typedef struct upb_encoder upb_encoder;
+
+upb_encoder *upb_encoder_new(upb_msgdef *md);
+void upb_encoder_free(upb_encoder *e);
+
+// Resets the given upb_encoder such that is is ready to begin encoding,
+// outputting data to "bytesink" (which must live until the encoder is
+// reset or destroyed).
+void upb_encoder_reset(upb_encoder *e, upb_bytesink *bytesink);
+
+// Returns the upb_sink to which data can be written. The sink is invalidated
+// when the encoder is reset or destroyed. Note that if the client wants to
+// encode any length-delimited submessages it must first call
+// upb_encoder_buildsizes() below.
+upb_sink *upb_encoder_sink(upb_encoder *e);
+
+// Call prior to pushing any data with embedded submessages. "src" must yield
+// exactly the same data as what will next be encoded, but in reverse order.
+// The encoder iterates over this data in order to determine the sizes of the
+// submessages. If any errors are returned by the upb_src, the status will
+// be saved in *status. If the client is sure that the upb_src will not throw
+// any errors, "status" may be NULL.
+void upb_encoder_buildsizes(upb_encoder *e, upb_src *src, upb_status *status);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* UPB_ENCODER_H_ */
diff --git a/upb/pb/glue.c b/upb/pb/glue.c
new file mode 100644
index 0000000..3763ae0
--- /dev/null
+++ b/upb/pb/glue.c
@@ -0,0 +1,129 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2010 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include "upb/bytestream.h"
+#include "upb/descriptor.h"
+#include "upb/msg.h"
+#include "upb/pb/decoder.h"
+#include "upb/pb/glue.h"
+#include "upb/pb/textprinter.h"
+
+void upb_strtomsg(const char *str, size_t len, void *msg, upb_msgdef *md,
+ upb_status *status) {
+ upb_stringsrc strsrc;
+ upb_stringsrc_init(&strsrc);
+ upb_stringsrc_reset(&strsrc, str, len);
+
+ upb_decoder d;
+ upb_decoder_initformsgdef(&d, md);
+ upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, msg);
+ upb_decoder_decode(&d, status);
+
+ upb_stringsrc_uninit(&strsrc);
+ upb_decoder_uninit(&d);
+}
+
+#if 0
+void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
+ bool single_line) {
+ upb_stringsink strsink;
+ upb_stringsink_init(&strsink);
+ upb_stringsink_reset(&strsink, str);
+
+ upb_textprinter *p = upb_textprinter_new();
+ upb_handlers *h = upb_handlers_new();
+ upb_textprinter_reghandlers(h, md);
+ upb_textprinter_reset(p, upb_stringsink_bytesink(&strsink), single_line);
+
+ upb_status status = UPB_STATUS_INIT;
+ upb_msg_runhandlers(msg, md, h, p, &status);
+ // None of {upb_msg_runhandlers, upb_textprinter, upb_stringsink} should be
+ // capable of returning an error.
+ assert(upb_ok(&status));
+ upb_status_uninit(&status);
+
+ upb_stringsink_uninit(&strsink);
+ upb_textprinter_free(p);
+ upb_handlers_unref(h);
+}
+#endif
+
+// TODO: read->load.
+void upb_read_descriptor(upb_symtab *symtab, const char *str, size_t len,
+ upb_status *status) {
+ upb_stringsrc strsrc;
+ upb_stringsrc_init(&strsrc);
+ upb_stringsrc_reset(&strsrc, str, len);
+
+ upb_handlers *h = upb_handlers_new();
+ upb_descreader_reghandlers(h);
+
+ upb_decoder d;
+ upb_decoder_initforhandlers(&d, h);
+ upb_handlers_unref(h);
+ upb_descreader r;
+ upb_symtabtxn txn;
+ upb_symtabtxn_init(&txn);
+ upb_descreader_init(&r, &txn);
+ upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, &r);
+
+ upb_decoder_decode(&d, status);
+
+ // Set default accessors and layouts on all messages.
+ // for msgdef in symtabtxn:
+ upb_symtabtxn_iter i;
+ upb_symtabtxn_begin(&i, &txn);
+ for(; !upb_symtabtxn_done(&i); upb_symtabtxn_next(&i)) {
+ upb_def *def = upb_symtabtxn_iter_def(&i);
+ upb_msgdef *md = upb_dyncast_msgdef(def);
+ if (!md) return;
+ // For field in msgdef:
+ upb_msg_iter i;
+ for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
+ upb_fielddef *f = upb_msg_iter_field(i);
+ upb_fielddef_setaccessor(f, upb_stdmsg_accessor(f));
+ }
+ upb_msgdef_layout(md);
+ }
+
+ if (upb_ok(status)) upb_symtab_commit(symtab, &txn, status);
+
+ upb_symtabtxn_uninit(&txn);
+ upb_descreader_uninit(&r);
+ upb_stringsrc_uninit(&strsrc);
+ upb_decoder_uninit(&d);
+}
+
+char *upb_readfile(const char *filename, size_t *len) {
+ FILE *f = fopen(filename, "rb");
+ if(!f) return NULL;
+ if(fseek(f, 0, SEEK_END) != 0) goto error;
+ long size = ftell(f);
+ if(size < 0) goto error;
+ if(fseek(f, 0, SEEK_SET) != 0) goto error;
+ char *buf = malloc(size);
+ if(fread(buf, size, 1, f) != 1) goto error;
+ fclose(f);
+ if (len) *len = size;
+ return buf;
+
+error:
+ fclose(f);
+ return NULL;
+}
+
+void upb_read_descriptorfile(upb_symtab *symtab, const char *fname,
+ upb_status *status) {
+ size_t len;
+ char *data = upb_readfile(fname, &len);
+ if (!data) {
+ upb_status_setf(status, UPB_ERROR, "Couldn't read file: %s", fname);
+ return;
+ }
+ upb_read_descriptor(symtab, data, len, status);
+ free(data);
+}
diff --git a/upb/pb/glue.h b/upb/pb/glue.h
new file mode 100644
index 0000000..5359120
--- /dev/null
+++ b/upb/pb/glue.h
@@ -0,0 +1,62 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * upb's core components like upb_decoder and upb_msg are carefully designed to
+ * avoid depending on each other for maximum orthogonality. In other words,
+ * you can use a upb_decoder to decode into *any* kind of structure; upb_msg is
+ * just one such structure. A upb_msg can be serialized/deserialized into any
+ * format, protobuf binary format is just one such format.
+ *
+ * However, for convenience we provide functions here for doing common
+ * operations like deserializing protobuf binary format into a upb_msg. The
+ * compromise is that this file drags in almost all of upb as a dependency,
+ * which could be undesirable if you're trying to use a trimmed-down build of
+ * upb.
+ *
+ * While these routines are convenient, they do not reuse any encoding/decoding
+ * state. For example, if a decoder is JIT-based, it will be re-JITted every
+ * time these functions are called. For this reason, if you are parsing lots
+ * of data and efficiency is an issue, these may not be the best functions to
+ * use (though they are useful for prototyping, before optimizing).
+ */
+
+#ifndef UPB_GLUE_H
+#define UPB_GLUE_H
+
+#include <stdbool.h>
+#include "upb/upb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Forward-declares so we don't have to include everything in this .h file.
+// Clients should use the regular, typedef'd names (eg. upb_string).
+struct _upb_msg;
+struct _upb_msgdef;
+struct _upb_symtab;
+
+// Decodes the given string, which must be in protobuf binary format, to the
+// given upb_msg with msgdef "md", storing the status of the operation in "s".
+void upb_strtomsg(const char *str, size_t len, void *msg,
+ struct _upb_msgdef *md, upb_status *s);
+
+//void upb_msgtotext(struct _upb_string *str, void *msg,
+// struct _upb_msgdef *md, bool single_line);
+
+void upb_read_descriptor(struct _upb_symtab *symtab, const char *str, size_t len,
+ upb_status *status);
+
+void upb_read_descriptorfile(struct _upb_symtab *symtab, const char *fname,
+ upb_status *status);
+
+char *upb_readfile(const char *filename, size_t *len);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/upb/pb/jit_debug_elf_file.s b/upb/pb/jit_debug_elf_file.s
new file mode 100644
index 0000000..0b74630
--- /dev/null
+++ b/upb/pb/jit_debug_elf_file.s
@@ -0,0 +1,7 @@
+ .file "JIT mcode"
+ .text
+upb_jit_compiled_decoder:
+ .globl upb_jit_compiled_decoder
+ .size upb_jit_compiled_decoder, 0x321
+ .type upb_jit_compiled_decoder STT_FUNC
+ .space 0x321
diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c
new file mode 100644
index 0000000..ce029d5
--- /dev/null
+++ b/upb/pb/textprinter.c
@@ -0,0 +1,199 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <ctype.h>
+#include <float.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include "upb/pb/textprinter.h"
+
+struct _upb_textprinter {
+ upb_bytesink *bytesink;
+ int indent_depth;
+ bool single_line;
+ upb_status status;
+};
+
+#define CHECK(x) if ((x) < 0) goto err;
+
+static int upb_textprinter_putescaped(upb_textprinter *p, upb_strref *strref,
+ bool preserve_utf8) {
+ // Based on CEscapeInternal() from Google's protobuf release.
+ // TODO; we could read directly fraom a bytesrc's buffer instead.
+ // TODO; we could write directly into a bytesink's buffer instead.
+ char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
+ char buf[strref->len], *src = buf;
+ char *end = src + strref->len;
+ upb_bytesrc_read(strref->bytesrc, strref->stream_offset, strref->len, buf);
+
+ // I think hex is prettier and more useful, but proto2 uses octal; should
+ // investigate whether it can parse hex also.
+ bool use_hex = false;
+ bool last_hex_escape = false; // true if last output char was \xNN
+
+ for (; src < end; src++) {
+ if (dstend - dst < 4) {
+ CHECK(upb_bytesink_write(p->bytesink, dstbuf, dst - dstbuf, &p->status));
+ dst = dstbuf;
+ }
+
+ bool is_hex_escape = false;
+ switch (*src) {
+ case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break;
+ case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break;
+ case '\t': *(dst++) = '\\'; *(dst++) = 't'; break;
+ case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
+ case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
+ case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
+ default:
+ // Note that if we emit \xNN and the src character after that is a hex
+ // digit then that digit must be escaped too to prevent it being
+ // interpreted as part of the character code by C.
+ if ((!preserve_utf8 || (uint8_t)*src < 0x80) &&
+ (!isprint(*src) || (last_hex_escape && isxdigit(*src)))) {
+ sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*src);
+ is_hex_escape = use_hex;
+ dst += 4;
+ } else {
+ *(dst++) = *src; break;
+ }
+ }
+ last_hex_escape = is_hex_escape;
+ }
+ // Flush remaining data.
+ CHECK(upb_bytesink_write(p->bytesink, dst, dst - dstbuf, &p->status));
+ return 0;
+err:
+ return -1;
+}
+
+static int upb_textprinter_indent(upb_textprinter *p) {
+ if(!p->single_line)
+ for(int i = 0; i < p->indent_depth; i++)
+ CHECK(upb_bytesink_writestr(p->bytesink, " ", &p->status));
+ return 0;
+err:
+ return -1;
+}
+
+static int upb_textprinter_endfield(upb_textprinter *p) {
+ if(p->single_line) {
+ CHECK(upb_bytesink_writestr(p->bytesink, " ", &p->status));
+ } else {
+ CHECK(upb_bytesink_writestr(p->bytesink, "\n", &p->status));
+ }
+ return 0;
+err:
+ return -1;
+}
+
+static upb_flow_t upb_textprinter_value(void *_p, upb_value fval,
+ upb_value val) {
+ upb_textprinter *p = _p;
+ upb_fielddef *f = upb_value_getfielddef(fval);
+ upb_textprinter_indent(p);
+ CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%s: ", f->name));
+#define CASE(fmtstr, member) \
+ CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break;
+ switch(f->type) {
+ // TODO: figure out what we should really be doing for these
+ // floating-point formats.
+ case UPB_TYPE(DOUBLE):
+ CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%.*g", DBL_DIG, upb_value_getdouble(val))); break;
+ case UPB_TYPE(FLOAT):
+ CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%.*g", FLT_DIG+2, upb_value_getfloat(val))); break;
+ case UPB_TYPE(INT64):
+ case UPB_TYPE(SFIXED64):
+ case UPB_TYPE(SINT64):
+ CASE("%" PRId64, int64)
+ case UPB_TYPE(UINT64):
+ case UPB_TYPE(FIXED64):
+ CASE("%" PRIu64, uint64)
+ case UPB_TYPE(UINT32):
+ case UPB_TYPE(FIXED32):
+ CASE("%" PRIu32, uint32);
+ case UPB_TYPE(ENUM): {
+ upb_enumdef *enum_def = upb_downcast_enumdef(f->def);
+ const char *label = upb_enumdef_iton(enum_def, upb_value_getint32(val));
+ if (label) {
+ // We found a corresponding string for this enum. Otherwise we fall
+ // through to the int32 code path.
+ CHECK(upb_bytesink_writestr(p->bytesink, label, &p->status));
+ break;
+ }
+ }
+ case UPB_TYPE(INT32):
+ case UPB_TYPE(SFIXED32):
+ case UPB_TYPE(SINT32):
+ CASE("%" PRId32, int32)
+ case UPB_TYPE(BOOL):
+ CASE("%hhu", bool);
+ case UPB_TYPE(STRING):
+ case UPB_TYPE(BYTES): {
+ CHECK(upb_bytesink_writestr(p->bytesink, "\"", &p->status));
+ CHECK(upb_textprinter_putescaped(p, upb_value_getstrref(val),
+ f->type == UPB_TYPE(STRING)));
+ CHECK(upb_bytesink_writestr(p->bytesink, "\"", &p->status));
+ break;
+ }
+ }
+ upb_textprinter_endfield(p);
+ return UPB_CONTINUE;
+err:
+ return UPB_BREAK;
+}
+
+static upb_sflow_t upb_textprinter_startsubmsg(void *_p, upb_value fval) {
+ upb_textprinter *p = _p;
+ upb_fielddef *f = upb_value_getfielddef(fval);
+ upb_textprinter_indent(p);
+ bool ret = upb_bytesink_printf(p->bytesink, &p->status, "%s {", f->name);
+ if (!ret) return UPB_SBREAK;
+ if (!p->single_line)
+ upb_bytesink_writestr(p->bytesink, "\n", &p->status);
+ p->indent_depth++;
+ return UPB_CONTINUE_WITH(_p);
+}
+
+static upb_flow_t upb_textprinter_endsubmsg(void *_p, upb_value fval) {
+ (void)fval;
+ upb_textprinter *p = _p;
+ p->indent_depth--;
+ upb_textprinter_indent(p);
+ upb_bytesink_writestr(p->bytesink, "}", &p->status);
+ upb_textprinter_endfield(p);
+ return UPB_CONTINUE;
+}
+
+upb_textprinter *upb_textprinter_new() {
+ upb_textprinter *p = malloc(sizeof(*p));
+ return p;
+}
+
+void upb_textprinter_free(upb_textprinter *p) {
+ free(p);
+}
+
+void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink,
+ bool single_line) {
+ p->bytesink = sink;
+ p->single_line = single_line;
+ p->indent_depth = 0;
+}
+
+upb_mhandlers *upb_textprinter_reghandlers(upb_handlers *h, upb_msgdef *m) {
+ upb_handlerset hset = {
+ NULL, // startmsg
+ NULL, // endmsg
+ upb_textprinter_value,
+ upb_textprinter_startsubmsg,
+ upb_textprinter_endsubmsg,
+ NULL, // startseq
+ NULL, // endseq
+ };
+ return upb_handlers_reghandlerset(h, m, &hset);
+}
diff --git a/upb/pb/textprinter.h b/upb/pb/textprinter.h
new file mode 100644
index 0000000..9455208
--- /dev/null
+++ b/upb/pb/textprinter.h
@@ -0,0 +1,31 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#ifndef UPB_TEXT_H_
+#define UPB_TEXT_H_
+
+#include "upb/bytestream.h"
+#include "upb/handlers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct _upb_textprinter;
+typedef struct _upb_textprinter upb_textprinter;
+
+upb_textprinter *upb_textprinter_new();
+void upb_textprinter_free(upb_textprinter *p);
+void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink,
+ bool single_line);
+upb_mhandlers *upb_textprinter_reghandlers(upb_handlers *h, upb_msgdef *m);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* UPB_TEXT_H_ */
diff --git a/upb/pb/varint.c b/upb/pb/varint.c
new file mode 100644
index 0000000..45caec1
--- /dev/null
+++ b/upb/pb/varint.c
@@ -0,0 +1,54 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include "upb/pb/varint.h"
+
+// Given an encoded varint v, returns an integer with a single bit set that
+// indicates the end of the varint. Subtracting one from this value will
+// yield a mask that leaves only bits that are part of the varint. Returns
+// 0 if the varint is unterminated.
+INLINE uint64_t upb_get_vstopbit(uint64_t v) {
+ uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
+ return ~cbits & (cbits+1);
+}
+INLINE uint64_t upb_get_vmask(uint64_t v) { return upb_get_vstopbit(v) - 1; }
+
+upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
+ uint64_t b;
+ memcpy(&b, r.p, sizeof(b));
+ uint64_t stop_bit = upb_get_vstopbit(b);
+ b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
+ b += b & 0x007f007f007f007fULL;
+ b += 3 * (b & 0x0000ffff0000ffffULL);
+ b += 15 * (b & 0x00000000ffffffffULL);
+ if (stop_bit == 0) {
+ // Error: unterminated varint.
+ upb_decoderet err_r = {(void*)0, 0};
+ return err_r;
+ }
+ upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
+ r.val | (b << 7)};
+ return my_r;
+}
+
+upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
+ uint64_t b;
+ memcpy(&b, r.p, sizeof(b));
+ uint64_t stop_bit = upb_get_vstopbit(b);
+ b &= (stop_bit - 1);
+ b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f);
+ b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff);
+ b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff);
+ if (stop_bit == 0) {
+ // Error: unterminated varint.
+ upb_decoderet err_r = {(void*)0, 0};
+ return err_r;
+ }
+ upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
+ r.val | (b << 14)};
+ return my_r;
+}
diff --git a/upb/pb/varint.h b/upb/pb/varint.h
new file mode 100644
index 0000000..1bbd193
--- /dev/null
+++ b/upb/pb/varint.h
@@ -0,0 +1,142 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * A number of routines for varint manipulation (we keep them all around to
+ * have multiple approaches available for benchmarking).
+ */
+
+#ifndef UPB_VARINT_DECODER_H_
+#define UPB_VARINT_DECODER_H_
+
+#include <stdint.h>
+#include <string.h>
+#include "upb/upb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Decoding *******************************************************************/
+
+// All decoding functions return this struct by value.
+typedef struct {
+ const char *p; // NULL if the varint was unterminated.
+ uint64_t val;
+} upb_decoderet;
+
+// A basic branch-based decoder, uses 32-bit values to get good performance
+// on 32-bit architectures (but performs well on 64-bits also).
+INLINE upb_decoderet upb_vdecode_branch32(const char *p) {
+ upb_decoderet r = {NULL, 0};
+ uint32_t low, high = 0;
+ uint32_t b;
+ b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done;
+ b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
+ b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
+ b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
+ b = *(p++); low |= (b & 0x7f) << 28;
+ high = (b & 0x7f) >> 4; if(!(b & 0x80)) goto done;
+ b = *(p++); high |= (b & 0x7f) << 3; if(!(b & 0x80)) goto done;
+ b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done;
+ b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done;
+ b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done;
+ b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done;
+ return r;
+
+done:
+ r.val = ((uint64_t)high << 32) | low;
+ r.p = p;
+ return r;
+}
+
+// Like the previous, but uses 64-bit values.
+INLINE upb_decoderet upb_vdecode_branch64(const char *p) {
+ uint64_t val;
+ uint64_t b;
+ upb_decoderet r = {(void*)0, 0};
+ b = *(p++); val = (b & 0x7f) ; if(!(b & 0x80)) goto done;
+ b = *(p++); val |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
+ b = *(p++); val |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
+ b = *(p++); val |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
+ b = *(p++); val |= (b & 0x7f) << 28; if(!(b & 0x80)) goto done;
+ b = *(p++); val |= (b & 0x7f) << 35; if(!(b & 0x80)) goto done;
+ b = *(p++); val |= (b & 0x7f) << 42; if(!(b & 0x80)) goto done;
+ b = *(p++); val |= (b & 0x7f) << 49; if(!(b & 0x80)) goto done;
+ b = *(p++); val |= (b & 0x7f) << 56; if(!(b & 0x80)) goto done;
+ b = *(p++); val |= (b & 0x7f) << 63; if(!(b & 0x80)) goto done;
+ return r;
+
+done:
+ r.val = val;
+ r.p = p;
+ return r;
+}
+
+// Decodes a varint of at most 8 bytes without branching (except for error).
+upb_decoderet upb_vdecode_max8_wright(upb_decoderet r);
+
+// Another implementation of the previous.
+upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);
+
+// Template for a function that checks the first two bytes with branching
+// and dispatches 2-10 bytes with a separate function.
+#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \
+INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \
+ uint8_t *p = (uint8_t*)_p; \
+ if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7f}; return r; } \
+ upb_decoderet r = {_p + 2, (*p & 0x7f) | ((*(p + 1) & 0x7f) << 7)}; \
+ if ((*(p + 1) & 0x80) == 0) return r; \
+ return decode_max8_function(r); \
+}
+
+UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright);
+UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino);
+#undef UPB_VARINT_DECODER_CHECK2
+
+// Our canonical functions for decoding varints, based on the currently
+// favored best-performing implementations.
+INLINE upb_decoderet upb_vdecode_fast(const char *p) {
+ // Use nobranch2 on 64-bit, branch32 on 32-bit.
+ if (sizeof(long) == 8)
+ return upb_vdecode_check2_massimino(p);
+ else
+ return upb_vdecode_branch32(p);
+}
+
+INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
+ return upb_vdecode_max8_massimino(r);
+}
+
+
+/* Encoding *******************************************************************/
+
+INLINE size_t upb_value_size(uint64_t val) {
+#ifdef __GNUC__
+ int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0.
+#else
+ int high_bit = 0;
+ uint64_t tmp = val;
+ while(tmp >>= 1) high_bit++;
+#endif
+ return val == 0 ? 1 : high_bit / 8 + 1;
+}
+
+// Encodes a 32-bit varint, *not* sign-extended.
+INLINE uint64_t upb_vencode32(uint32_t val) {
+ uint64_t ret = 0;
+ for (int bitpos = 0; val; bitpos+=8, val >>=7) {
+ if (bitpos > 0) ret |= (1 << (bitpos-1));
+ ret |= (val & 0x7f) << bitpos;
+ }
+ return ret;
+}
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* UPB_VARINT_DECODER_H_ */
diff --git a/upb/table.c b/upb/table.c
new file mode 100644
index 0000000..71aca16
--- /dev/null
+++ b/upb/table.c
@@ -0,0 +1,574 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * There are a few printf's strewn throughout this file, uncommenting them
+ * can be useful for debugging.
+ */
+
+#include "upb/table.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+static const double MAX_LOAD = 0.85;
+
+// The minimum percentage of an array part that we will allow. This is a
+// speed/memory-usage tradeoff (though it's not straightforward because of
+// cache effects). The lower this is, the more memory we'll use.
+static const double MIN_DENSITY = 0.1;
+
+static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
+
+/* Base table (shared code) ***************************************************/
+
+static uint32_t upb_table_size(upb_table *t) { return 1 << t->size_lg2; }
+static size_t upb_table_entrysize(upb_table *t) { return t->entry_size; }
+static size_t upb_table_valuesize(upb_table *t) { return t->value_size; }
+
+void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) {
+ t->count = 0;
+ t->entry_size = entry_size;
+ t->size_lg2 = 1;
+ while(upb_table_size(t) < size) t->size_lg2++;
+ size_t bytes = upb_table_size(t) * t->entry_size;
+ t->mask = upb_table_size(t) - 1;
+ t->entries = malloc(bytes);
+}
+
+void upb_table_free(upb_table *t) { free(t->entries); }
+
+/* upb_inttable ***************************************************************/
+
+static upb_inttable_entry *intent(upb_inttable *t, int32_t i) {
+ //printf("looking up int entry %d, size of entry: %d\n", i, t->t.entry_size);
+ return UPB_INDEX(t->t.entries, i, t->t.entry_size);
+}
+
+static uint32_t upb_inttable_hashtablesize(upb_inttable *t) {
+ return upb_table_size(&t->t);
+}
+
+void upb_inttable_sizedinit(upb_inttable *t, uint32_t arrsize, uint32_t hashsize,
+ uint16_t value_size) {
+ size_t entsize = _upb_inttable_entrysize(value_size);
+ upb_table_init(&t->t, hashsize, entsize);
+ for (uint32_t i = 0; i < upb_table_size(&t->t); i++) {
+ upb_inttable_entry *e = intent(t, i);
+ e->hdr.key = 0;
+ e->hdr.next = UPB_END_OF_CHAIN;
+ e->val.has_entry = 0;
+ }
+ t->t.value_size = value_size;
+ // Always make the array part at least 1 long, so that we know key 0
+ // won't be in the hash part (which lets us speed up that code path).
+ t->array_size = UPB_MAX(1, arrsize);
+ t->array = malloc(upb_table_valuesize(&t->t) * t->array_size);
+ t->array_count = 0;
+ for (uint32_t i = 0; i < t->array_size; i++) {
+ upb_inttable_value *val = UPB_INDEX(t->array, i, upb_table_valuesize(&t->t));
+ val->has_entry = false;
+ }
+}
+
+void upb_inttable_init(upb_inttable *t, uint32_t hashsize, uint16_t value_size) {
+ upb_inttable_sizedinit(t, 0, hashsize, value_size);
+}
+
+void upb_inttable_free(upb_inttable *t) {
+ upb_table_free(&t->t);
+ free(t->array);
+}
+
+static uint32_t empty_intbucket(upb_inttable *table)
+{
+ // TODO: does it matter that this is biased towards the front of the table?
+ for(uint32_t i = 0; i < upb_inttable_hashtablesize(table); i++) {
+ upb_inttable_entry *e = intent(table, i);
+ if(!e->val.has_entry) return i;
+ }
+ assert(false);
+ return 0;
+}
+
+// The insert routines have a lot more code duplication between int/string
+// variants than I would like, but there's just a bit too much that varies to
+// parameterize them.
+static void intinsert(upb_inttable *t, uint32_t key, const void *val) {
+ assert(upb_inttable_lookup(t, key) == NULL);
+ upb_inttable_value *table_val;
+ if (_upb_inttable_isarrkey(t, key)) {
+ table_val = UPB_INDEX(t->array, key, upb_table_valuesize(&t->t));
+ t->array_count++;
+ //printf("Inserting key %d to Array part! %p\n", key, table_val);
+ } else {
+ t->t.count++;
+ uint32_t bucket = _upb_inttable_bucket(t, key);
+ upb_inttable_entry *table_e = intent(t, bucket);
+ //printf("Hash part! Inserting into bucket %d?\n", bucket);
+ if(table_e->val.has_entry) { /* Collision. */
+ //printf("Collision!\n");
+ if(bucket == _upb_inttable_bucket(t, table_e->hdr.key)) {
+ /* Existing element is in its main posisiton. Find an empty slot to
+ * place our new element and append it to this key's chain. */
+ uint32_t empty_bucket = empty_intbucket(t);
+ while (table_e->hdr.next != UPB_END_OF_CHAIN)
+ table_e = intent(t, table_e->hdr.next);
+ table_e->hdr.next = empty_bucket;
+ table_e = intent(t, empty_bucket);
+ } else {
+ /* Existing element is not in its main position. Move it to an empty
+ * slot and put our element in its main position. */
+ uint32_t empty_bucket = empty_intbucket(t);
+ uint32_t evictee_bucket = _upb_inttable_bucket(t, table_e->hdr.key);
+ memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
+ upb_inttable_entry *evictee_e = intent(t, evictee_bucket);
+ while(1) {
+ assert(evictee_e->val.has_entry);
+ assert(evictee_e->hdr.next != UPB_END_OF_CHAIN);
+ if(evictee_e->hdr.next == bucket) {
+ evictee_e->hdr.next = empty_bucket;
+ break;
+ }
+ evictee_e = intent(t, evictee_e->hdr.next);
+ }
+ /* table_e remains set to our mainpos. */
+ }
+ }
+ //printf("Inserting! to:%p, copying to: %p\n", table_e, &table_e->val);
+ table_val = &table_e->val;
+ table_e->hdr.key = key;
+ table_e->hdr.next = UPB_END_OF_CHAIN;
+ }
+ memcpy(table_val, val, upb_table_valuesize(&t->t));
+ table_val->has_entry = true;
+ assert(upb_inttable_lookup(t, key) == table_val);
+}
+
+// Insert all elements from src into dest. Caller ensures that a resize will
+// not be necessary.
+static void upb_inttable_insertall(upb_inttable *dst, upb_inttable *src) {
+ for(upb_inttable_iter i = upb_inttable_begin(src); !upb_inttable_done(i);
+ i = upb_inttable_next(src, i)) {
+ //printf("load check: %d %d\n", upb_table_count(&dst->t), upb_inttable_hashtablesize(dst));
+ assert((double)(upb_table_count(&dst->t)) /
+ upb_inttable_hashtablesize(dst) <= MAX_LOAD);
+ intinsert(dst, upb_inttable_iter_key(i), upb_inttable_iter_value(i));
+ }
+}
+
+void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val) {
+ if((double)(t->t.count + 1) / upb_inttable_hashtablesize(t) > MAX_LOAD) {
+ //printf("RESIZE!\n");
+ // Need to resize. Allocate new table with double the size of however many
+ // elements we have now, add old elements to it. We create the new hash
+ // table without an array part, even if the old table had an array part.
+ // If/when the user calls upb_inttable_compact() again, we'll create an
+ // array part then.
+ upb_inttable new_table;
+ //printf("Old table count=%d, size=%d\n", upb_inttable_count(t), upb_inttable_hashtablesize(t));
+ upb_inttable_init(&new_table, upb_inttable_count(t)*2, upb_table_valuesize(&t->t));
+ upb_inttable_insertall(&new_table, t);
+ upb_inttable_free(t);
+ *t = new_table;
+ }
+ intinsert(t, key, val);
+}
+
+void upb_inttable_compact(upb_inttable *t) {
+ // Find the largest array part we can that satisfies the MIN_DENSITY
+ // definition. For now we just count down powers of two.
+ uint32_t largest_key = 0;
+ for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
+ i = upb_inttable_next(t, i)) {
+ largest_key = UPB_MAX(largest_key, upb_inttable_iter_key(i));
+ }
+ int lg2_array = 0;
+ while ((1UL << lg2_array) < largest_key) ++lg2_array;
+ ++lg2_array; // Undo the first iteration.
+ size_t array_size;
+ int array_count = 0;
+ while (lg2_array > 0) {
+ array_size = (1 << --lg2_array);
+ //printf("Considering size %d (btw, our table has %d things total)\n", array_size, upb_inttable_count(t));
+ if ((double)upb_inttable_count(t) / array_size < MIN_DENSITY) {
+ // Even if 100% of the keys were in the array pary, an array of this
+ // size would not be dense enough.
+ continue;
+ }
+ array_count = 0;
+ for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
+ i = upb_inttable_next(t, i)) {
+ if (upb_inttable_iter_key(i) < array_size)
+ array_count++;
+ }
+ //printf("There would be %d things in that array\n", array_count);
+ if ((double)array_count / array_size >= MIN_DENSITY) break;
+ }
+ upb_inttable new_table;
+ int hash_size = (upb_inttable_count(t) - array_count + 1) / MAX_LOAD;
+ //printf("array_count: %d, array_size: %d, hash_size: %d, table size: %d\n", array_count, array_size, hash_size, upb_inttable_count(t));
+ upb_inttable_sizedinit(&new_table, array_size, hash_size,
+ upb_table_valuesize(&t->t));
+ //printf("For %d things, using array size=%d, hash_size = %d\n", upb_inttable_count(t), array_size, hash_size);
+ upb_inttable_insertall(&new_table, t);
+ upb_inttable_free(t);
+ *t = new_table;
+}
+
+upb_inttable_iter upb_inttable_begin(upb_inttable *t) {
+ upb_inttable_iter iter = {-1, NULL, true}; // -1 will overflow to 0 on the first iteration.
+ return upb_inttable_next(t, iter);
+}
+
+upb_inttable_iter upb_inttable_next(upb_inttable *t, upb_inttable_iter iter) {
+ const size_t hdrsize = sizeof(upb_inttable_header);
+ const size_t entsize = upb_table_entrysize(&t->t);
+ if (iter.array_part) {
+ while (++iter.key < t->array_size) {
+ //printf("considering value %d\n", iter.key);
+ iter.value = UPB_INDEX(t->array, iter.key, t->t.value_size);
+ if (iter.value->has_entry) return iter;
+ }
+ //printf("Done with array part!\n");
+ iter.array_part = false;
+ // Point to the value of the table[-1] entry.
+ iter.value = UPB_INDEX(intent(t, -1), 1, hdrsize);
+ }
+ void *end = intent(t, upb_inttable_hashtablesize(t));
+ // Point to the entry for the value that was previously in iter.
+ upb_inttable_entry *e = UPB_INDEX(iter.value, -1, hdrsize);
+ do {
+ e = UPB_INDEX(e, 1, entsize);
+ //printf("considering value %p (val: %p)\n", e, &e->val);
+ if(e == end) {
+ //printf("No values.\n");
+ iter.value = NULL;
+ return iter;
+ }
+ } while(!e->val.has_entry);
+ //printf("USING VALUE! %p\n", e);
+ iter.key = e->hdr.key;
+ iter.value = &e->val;
+ return iter;
+}
+
+
+/* upb_strtable ***************************************************************/
+
+static upb_strtable_entry *strent(upb_strtable *t, int32_t i) {
+ //fprintf(stderr, "i: %d, table_size: %d\n", i, upb_table_size(&t->t));
+ assert(i <= (int32_t)upb_table_size(&t->t));
+ return UPB_INDEX(t->t.entries, i, t->t.entry_size);
+}
+
+static uint32_t upb_strtable_size(upb_strtable *t) {
+ return upb_table_size(&t->t);
+}
+
+void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t valuesize) {
+ t->t.value_size = valuesize;
+ size_t entsize = upb_align_up(sizeof(upb_strtable_header) + valuesize, 8);
+ upb_table_init(&t->t, size, entsize);
+ for (uint32_t i = 0; i < upb_table_size(&t->t); i++) {
+ upb_strtable_entry *e = strent(t, i);
+ e->hdr.key = NULL;
+ e->hdr.next = UPB_END_OF_CHAIN;
+ }
+}
+
+void upb_strtable_free(upb_strtable *t) {
+ // Free keys from the strtable.
+ upb_strtable_iter i;
+ for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i))
+ free((char*)upb_strtable_iter_key(&i));
+ upb_table_free(&t->t);
+}
+
+static uint32_t strtable_bucket(upb_strtable *t, const char *key) {
+ uint32_t hash = MurmurHash2(key, strlen(key), 0);
+ return (hash & t->t.mask);
+}
+
+void *upb_strtable_lookup(upb_strtable *t, const char *key) {
+ uint32_t bucket = strtable_bucket(t, key);
+ upb_strtable_entry *e;
+ do {
+ e = strent(t, bucket);
+ if(e->hdr.key && strcmp(e->hdr.key, key) == 0) return &e->val;
+ } while((bucket = e->hdr.next) != UPB_END_OF_CHAIN);
+ return NULL;
+}
+
+void *upb_strtable_lookupl(upb_strtable *t, const char *key, size_t len) {
+ // TODO: improve.
+ char key2[len+1];
+ memcpy(key2, key, len);
+ key2[len] = '\0';
+ return upb_strtable_lookup(t, key2);
+}
+
+static uint32_t empty_strbucket(upb_strtable *table) {
+ // TODO: does it matter that this is biased towards the front of the table?
+ for(uint32_t i = 0; i < upb_strtable_size(table); i++) {
+ upb_strtable_entry *e = strent(table, i);
+ if(!e->hdr.key) return i;
+ }
+ assert(false);
+ return 0;
+}
+
+static void strinsert(upb_strtable *t, const char *key, const void *val) {
+ assert(upb_strtable_lookup(t, key) == NULL);
+ t->t.count++;
+ uint32_t bucket = strtable_bucket(t, key);
+ upb_strtable_entry *table_e = strent(t, bucket);
+ if(table_e->hdr.key) { /* Collision. */
+ if(bucket == strtable_bucket(t, table_e->hdr.key)) {
+ /* Existing element is in its main posisiton. Find an empty slot to
+ * place our new element and append it to this key's chain. */
+ uint32_t empty_bucket = empty_strbucket(t);
+ while (table_e->hdr.next != UPB_END_OF_CHAIN)
+ table_e = strent(t, table_e->hdr.next);
+ table_e->hdr.next = empty_bucket;
+ table_e = strent(t, empty_bucket);
+ } else {
+ /* Existing element is not in its main position. Move it to an empty
+ * slot and put our element in its main position. */
+ uint32_t empty_bucket = empty_strbucket(t);
+ uint32_t evictee_bucket = strtable_bucket(t, table_e->hdr.key);
+ memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
+ upb_strtable_entry *evictee_e = strent(t, evictee_bucket);
+ while(1) {
+ assert(evictee_e->hdr.key);
+ assert(evictee_e->hdr.next != UPB_END_OF_CHAIN);
+ if(evictee_e->hdr.next == bucket) {
+ evictee_e->hdr.next = empty_bucket;
+ break;
+ }
+ evictee_e = strent(t, evictee_e->hdr.next);
+ }
+ /* table_e remains set to our mainpos. */
+ }
+ }
+ //fprintf(stderr, "val: %p\n", val);
+ //fprintf(stderr, "val size: %d\n", t->t.value_size);
+ memcpy(&table_e->val, val, t->t.value_size);
+ table_e->hdr.key = strdup(key);
+ table_e->hdr.next = UPB_END_OF_CHAIN;
+ //fprintf(stderr, "Looking up, string=%s...\n", key);
+ assert(upb_strtable_lookup(t, key) == &table_e->val);
+ //printf("Yay!\n");
+}
+
+void upb_strtable_insert(upb_strtable *t, const char *key, const void *val) {
+ if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) {
+ // Need to resize. New table of double the size, add old elements to it.
+ //printf("RESIZE!!\n");
+ upb_strtable new_table;
+ upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.value_size);
+ upb_strtable_iter i;
+ upb_strtable_begin(&i, t);
+ for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ strinsert(&new_table,
+ upb_strtable_iter_key(&i),
+ upb_strtable_iter_value(&i));
+ }
+ upb_strtable_free(t);
+ *t = new_table;
+ }
+ strinsert(t, key, val);
+}
+
+void upb_strtable_begin(upb_strtable_iter *i, upb_strtable *t) {
+ i->e = strent(t, -1);
+ i->t = t;
+ upb_strtable_next(i);
+}
+
+void upb_strtable_next(upb_strtable_iter *i) {
+ upb_strtable_entry *end = strent(i->t, upb_strtable_size(i->t));
+ upb_strtable_entry *cur = i->e;
+ do {
+ cur = (void*)((char*)cur + i->t->t.entry_size);
+ if(cur == end) { i->e = NULL; return; }
+ } while(cur->hdr.key == NULL);
+ i->e = cur;
+}
+
+#ifdef UPB_UNALIGNED_READS_OK
+//-----------------------------------------------------------------------------
+// MurmurHash2, by Austin Appleby (released as public domain).
+// Reformatted and C99-ified by Joshua Haberman.
+// Note - This code makes a few assumptions about how your machine behaves -
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
+// And it has a few limitations -
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+// machines.
+static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed)
+{
+ // 'm' and 'r' are mixing constants generated offline.
+ // They're not really 'magic', they just happen to work well.
+ const uint32_t m = 0x5bd1e995;
+ const int32_t r = 24;
+
+ // Initialize the hash to a 'random' value
+ uint32_t h = seed ^ len;
+
+ // Mix 4 bytes at a time into the hash
+ const uint8_t * data = (const uint8_t *)key;
+ while(len >= 4) {
+ uint32_t k = *(uint32_t *)data;
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h *= m;
+ h ^= k;
+
+ data += 4;
+ len -= 4;
+ }
+
+ // Handle the last few bytes of the input array
+ switch(len) {
+ case 3: h ^= data[2] << 16;
+ case 2: h ^= data[1] << 8;
+ case 1: h ^= data[0]; h *= m;
+ };
+
+ // Do a few final mixes of the hash to ensure the last few
+ // bytes are well-incorporated.
+ h ^= h >> 13;
+ h *= m;
+ h ^= h >> 15;
+
+ return h;
+}
+
+#else // !UPB_UNALIGNED_READS_OK
+
+//-----------------------------------------------------------------------------
+// MurmurHashAligned2, by Austin Appleby
+// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
+// on certain platforms.
+// Performance will be lower than MurmurHash2
+
+#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+
+static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed)
+{
+ const uint32_t m = 0x5bd1e995;
+ const int32_t r = 24;
+ const uint8_t * data = (const uint8_t *)key;
+ uint32_t h = seed ^ len;
+ uint8_t align = (uintptr_t)data & 3;
+
+ if(align && (len >= 4)) {
+ // Pre-load the temp registers
+ uint32_t t = 0, d = 0;
+
+ switch(align) {
+ case 1: t |= data[2] << 16;
+ case 2: t |= data[1] << 8;
+ case 3: t |= data[0];
+ }
+
+ t <<= (8 * align);
+
+ data += 4-align;
+ len -= 4-align;
+
+ int32_t sl = 8 * (4-align);
+ int32_t sr = 8 * align;
+
+ // Mix
+
+ while(len >= 4) {
+ d = *(uint32_t *)data;
+ t = (t >> sr) | (d << sl);
+
+ uint32_t k = t;
+
+ MIX(h,k,m);
+
+ t = d;
+
+ data += 4;
+ len -= 4;
+ }
+
+ // Handle leftover data in temp registers
+
+ d = 0;
+
+ if(len >= align) {
+ switch(align) {
+ case 3: d |= data[2] << 16;
+ case 2: d |= data[1] << 8;
+ case 1: d |= data[0];
+ }
+
+ uint32_t k = (t >> sr) | (d << sl);
+ MIX(h,k,m);
+
+ data += align;
+ len -= align;
+
+ //----------
+ // Handle tail bytes
+
+ switch(len) {
+ case 3: h ^= data[2] << 16;
+ case 2: h ^= data[1] << 8;
+ case 1: h ^= data[0]; h *= m;
+ };
+ } else {
+ switch(len) {
+ case 3: d |= data[2] << 16;
+ case 2: d |= data[1] << 8;
+ case 1: d |= data[0];
+ case 0: h ^= (t >> sr) | (d << sl); h *= m;
+ }
+ }
+
+ h ^= h >> 13;
+ h *= m;
+ h ^= h >> 15;
+
+ return h;
+ } else {
+ while(len >= 4) {
+ uint32_t k = *(uint32_t *)data;
+
+ MIX(h,k,m);
+
+ data += 4;
+ len -= 4;
+ }
+
+ //----------
+ // Handle tail bytes
+
+ switch(len) {
+ case 3: h ^= data[2] << 16;
+ case 2: h ^= data[1] << 8;
+ case 1: h ^= data[0]; h *= m;
+ };
+
+ h ^= h >> 13;
+ h *= m;
+ h ^= h >> 15;
+
+ return h;
+ }
+}
+#undef MIX
+
+#endif // UPB_UNALIGNED_READS_OK
diff --git a/upb/table.h b/upb/table.h
new file mode 100644
index 0000000..376465b
--- /dev/null
+++ b/upb/table.h
@@ -0,0 +1,225 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * This file defines very fast int->struct (inttable) and string->struct
+ * (strtable) hash tables. The struct can be of any size, and it is stored
+ * in the table itself, for cache-friendly performance.
+ *
+ * The table uses internal chaining with Brent's variation (inspired by the
+ * Lua implementation of hash tables). The hash function for strings is
+ * Austin Appleby's "MurmurHash."
+ */
+
+#ifndef UPB_TABLE_H_
+#define UPB_TABLE_H_
+
+#include <assert.h>
+#include "upb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define UPB_END_OF_CHAIN (uint32_t)-1
+
+typedef struct {
+ bool has_entry:1;
+ // The rest of the bits are the user's.
+} upb_inttable_value;
+
+typedef struct {
+ uint32_t key;
+ uint32_t next; // Internal chaining.
+} upb_inttable_header;
+
+typedef struct {
+ upb_inttable_header hdr;
+ upb_inttable_value val;
+} upb_inttable_entry;
+
+// TODO: consider storing the hash in the entry. This would avoid the need to
+// rehash on table resizes, but more importantly could possibly improve lookup
+// performance by letting us compare hashes before comparing lengths or the
+// strings themselves.
+typedef struct {
+ char *key; // We own, nullz. TODO: store explicit len?
+ uint32_t next; // Internal chaining.
+} upb_strtable_header;
+
+typedef struct {
+ upb_strtable_header hdr;
+ uint32_t val; // Val is at least 32 bits.
+} upb_strtable_entry;
+
+typedef struct {
+ void *entries; // Hash table.
+ uint32_t count; // Number of entries in the hash part.
+ uint32_t mask; // Mask to turn hash value -> bucket.
+ uint16_t entry_size; // Size of each entry.
+ uint16_t value_size; // Size of each value.
+ uint8_t size_lg2; // Size of the hash table part is 2^size_lg2 entries.
+} upb_table;
+
+typedef struct {
+ upb_table t;
+} upb_strtable;
+
+typedef struct {
+ upb_table t;
+ void *array; // Array part of the table.
+ uint32_t array_size; // Array part size.
+ uint32_t array_count; // Array part number of elements.
+} upb_inttable;
+
+// Initialize and free a table, respectively. Specify the initial size
+// with 'size' (the size will be increased as necessary). Value size
+// specifies how many bytes each value in the table is.
+//
+// WARNING! The lowest bit of every entry is reserved by the hash table.
+// It will always be overwritten when you insert, and must not be modified
+// when looked up!
+void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t value_size);
+void upb_inttable_free(upb_inttable *table);
+void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t value_size);
+void upb_strtable_free(upb_strtable *table);
+
+// Number of values in the hash table.
+INLINE uint32_t upb_table_count(upb_table *t) { return t->count; }
+INLINE uint32_t upb_inttable_count(upb_inttable *t) {
+ return t->array_count + upb_table_count(&t->t);
+}
+INLINE uint32_t upb_strtable_count(upb_strtable *t) {
+ return upb_table_count(&t->t);
+}
+
+// Inserts the given key into the hashtable with the given value. The key must
+// not already exist in the hash table. The data will be copied from val into
+// the hashtable (the amount of data copied comes from value_size when the
+// table was constructed). Therefore the data at val may be freed once the
+// call returns. For string tables, the table takes ownership of the string.
+//
+// WARNING: the lowest bit of val is reserved and will be overwritten!
+void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val);
+// TODO: may want to allow for more complex keys with custom hash/comparison
+// functions.
+void upb_strtable_insert(upb_strtable *t, const char *key, const void *val);
+void upb_inttable_compact(upb_inttable *t);
+INLINE void upb_strtable_clear(upb_strtable *t) {
+ // TODO: improve.
+ uint16_t entry_size = t->t.entry_size;
+ upb_strtable_free(t);
+ upb_strtable_init(t, 8, entry_size);
+}
+
+INLINE uint32_t _upb_inttable_bucket(upb_inttable *t, uint32_t k) {
+ uint32_t bucket = k & t->t.mask; // Identity hash for ints.
+ assert(bucket != UPB_END_OF_CHAIN);
+ return bucket;
+}
+
+// Returns true if this key belongs in the array part of the table.
+INLINE bool _upb_inttable_isarrkey(upb_inttable *t, uint32_t k) {
+ return (k < t->array_size);
+}
+
+// Looks up key in this table, returning a pointer to the user's inserted data.
+// We have the caller specify the entry_size because fixing this as a literal
+// (instead of reading table->entry_size) gives the compiler more ability to
+// optimize.
+INLINE void *_upb_inttable_fastlookup(upb_inttable *t, uint32_t key,
+ size_t entry_size, size_t value_size) {
+ upb_inttable_value *arrval =
+ (upb_inttable_value*)UPB_INDEX(t->array, key, value_size);
+ if (_upb_inttable_isarrkey(t, key)) {
+ //DEBUGPRINTF("array lookup for key %d, &val=%p, has_entry=%d\n", key, val, val->has_entry);
+ return (arrval->has_entry) ? arrval : NULL;
+ }
+ uint32_t bucket = _upb_inttable_bucket(t, key);
+ upb_inttable_entry *e =
+ (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
+ //DEBUGPRINTF("looking in first bucket %d, entry size=%zd, addr=%p\n", bucket, entry_size, e);
+ while (1) {
+ //DEBUGPRINTF("%d, %d, %d\n", e->val.has_entry, e->hdr.key, key);
+ if (e->hdr.key == key) {
+ //DEBUGPRINTF("returning val from hash part\n");
+ return &e->val;
+ }
+ if ((bucket = e->hdr.next) == UPB_END_OF_CHAIN) return NULL;
+ //DEBUGPRINTF("looking in bucket %d\n", bucket);
+ e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
+ }
+}
+
+INLINE size_t _upb_inttable_entrysize(size_t value_size) {
+ return upb_align_up(sizeof(upb_inttable_header) + value_size, 8);
+}
+
+INLINE void *upb_inttable_fastlookup(upb_inttable *t, uint32_t key,
+ uint32_t value_size) {
+ return _upb_inttable_fastlookup(t, key, _upb_inttable_entrysize(value_size), value_size);
+}
+
+INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) {
+ return _upb_inttable_fastlookup(t, key, t->t.entry_size, t->t.value_size);
+}
+
+void *upb_strtable_lookupl(upb_strtable *t, const char *key, size_t len);
+void *upb_strtable_lookup(upb_strtable *t, const char *key);
+
+
+/* upb_strtable_iter **********************************************************/
+
+// Strtable iteration. Order is undefined. Insertions invalidate iterators.
+// upb_strtable_iter i;
+// for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) {
+// const char *key = upb_strtable_iter_key(&i);
+// const myval *val = upb_strtable_iter_value(&i);
+// // ...
+// }
+typedef struct {
+ upb_strtable *t;
+ upb_strtable_entry *e;
+} upb_strtable_iter;
+
+void upb_strtable_begin(upb_strtable_iter *i, upb_strtable *t);
+void upb_strtable_next(upb_strtable_iter *i);
+INLINE bool upb_strtable_done(upb_strtable_iter *i) { return i->e == NULL; }
+INLINE const char *upb_strtable_iter_key(upb_strtable_iter *i) {
+ return i->e->hdr.key;
+}
+INLINE const void *upb_strtable_iter_value(upb_strtable_iter *i) {
+ return &i->e->val;
+}
+
+
+/* upb_inttable_iter **********************************************************/
+
+// Inttable iteration. Order is undefined. Insertions invalidate iterators.
+// for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
+// i = upb_inttable_next(t, i)) {
+// // ...
+// }
+typedef struct {
+ uint32_t key;
+ upb_inttable_value *value;
+ bool array_part;
+} upb_inttable_iter;
+
+upb_inttable_iter upb_inttable_begin(upb_inttable *t);
+upb_inttable_iter upb_inttable_next(upb_inttable *t, upb_inttable_iter iter);
+INLINE bool upb_inttable_done(upb_inttable_iter iter) { return iter.value == NULL; }
+INLINE uint32_t upb_inttable_iter_key(upb_inttable_iter iter) {
+ return iter.key;
+}
+INLINE void *upb_inttable_iter_value(upb_inttable_iter iter) {
+ return iter.value;
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* UPB_TABLE_H_ */
diff --git a/upb/upb.c b/upb/upb.c
new file mode 100644
index 0000000..0ff082f
--- /dev/null
+++ b/upb/upb.c
@@ -0,0 +1,122 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include "upb/descriptor_const.h"
+#include "upb/upb.h"
+#include "upb/bytestream.h"
+
+#define alignof(t) offsetof(struct { char c; t x; }, x)
+#define TYPE_INFO(wire_type, ctype, inmemory_type) \
+ {alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), #ctype},
+
+const upb_type_info upb_types[] = {
+ TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, MESSAGE) // ENDGROUP (fake)
+ TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE) // DOUBLE
+ TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT) // FLOAT
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // INT64
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, UINT64) // UINT64
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32) // INT32
+ TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, UINT64) // FIXED64
+ TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, UINT32) // FIXED32
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, BOOL) // BOOL
+ TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING) // STRING
+ TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, MESSAGE) // GROUP
+ TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, MESSAGE) // MESSAGE
+ TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING) // BYTES
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, UINT32) // UINT32
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, INT32) // ENUM
+ TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, INT32) // SFIXED32
+ TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, INT64) // SFIXED64
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32) // SINT32
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // SINT64
+ TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, INT64) // SINT64
+};
+
+#ifdef NDEBUG
+upb_value UPB_NO_VALUE = {{0}};
+#else
+upb_value UPB_NO_VALUE = {{0}, -1};
+#endif
+
+void upb_status_init(upb_status *status) {
+ status->buf = NULL;
+ upb_status_clear(status);
+}
+
+void upb_status_uninit(upb_status *status) {
+ free(status->buf);
+}
+
+void upb_status_setf(upb_status *s, enum upb_status_code code,
+ const char *msg, ...) {
+ s->code = code;
+ va_list args;
+ va_start(args, msg);
+ upb_vrprintf(&s->buf, &s->bufsize, 0, msg, args);
+ va_end(args);
+ s->str = s->buf;
+}
+
+void upb_status_copy(upb_status *to, upb_status *from) {
+ to->code = from->code;
+ if (from->str) {
+ if (to->bufsize < from->bufsize) {
+ to->bufsize = from->bufsize;
+ to->buf = realloc(to->buf, to->bufsize);
+ to->str = to->buf;
+ }
+ memcpy(to->str, from->str, from->bufsize);
+ } else {
+ to->str = NULL;
+ }
+}
+
+void upb_status_clear(upb_status *status) {
+ status->code = UPB_OK;
+ status->str = NULL;
+}
+
+void upb_status_print(upb_status *status, FILE *f) {
+ if(status->str) {
+ fprintf(f, "code: %d, msg: %s\n", status->code, status->str);
+ } else {
+ fprintf(f, "code: %d, no msg\n", status->code);
+ }
+}
+
+void upb_status_fromerrno(upb_status *status) {
+ upb_status_setf(status, UPB_ERROR, "%s", strerror(errno));
+}
+
+int upb_vrprintf(char **buf, size_t *size, size_t ofs,
+ const char *fmt, va_list args) {
+ // Try once without reallocating. We have to va_copy because we might have
+ // to call vsnprintf again.
+ uint32_t len = *size - ofs;
+ va_list args_copy;
+ va_copy(args_copy, args);
+ uint32_t true_len = vsnprintf(*buf + ofs, len, fmt, args_copy);
+ va_end(args_copy);
+
+ // Resize to be the correct size.
+ if (true_len >= len) {
+ // Need to print again, because some characters were truncated. vsnprintf
+ // will not write the entire string unless you give it space to store the
+ // NULL terminator also.
+ while (*size < (ofs + true_len + 1)) *size = UPB_MAX(*size * 2, 2);
+ char *newbuf = realloc(*buf, *size);
+ if (!newbuf) return -1;
+ vsnprintf(newbuf + ofs, true_len + 1, fmt, args);
+ *buf = newbuf;
+ }
+ return true_len;
+}
diff --git a/upb/upb.h b/upb/upb.h
new file mode 100644
index 0000000..153057d
--- /dev/null
+++ b/upb/upb.h
@@ -0,0 +1,238 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc. See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * This file contains shared definitions that are widely used across upb.
+ */
+
+#ifndef UPB_H_
+#define UPB_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h> // only for size_t.
+#include <assert.h>
+#include "descriptor_const.h"
+#include "atomic.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// inline if possible, emit standalone code if required.
+#ifndef INLINE
+#define INLINE static inline
+#endif
+
+#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
+#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
+#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
+
+INLINE void nop_printf(const char *fmt, ...) { (void)fmt; }
+
+#ifdef NDEBUG
+#define DEBUGPRINTF nop_printf
+#else
+#define DEBUGPRINTF printf
+#endif
+
+// Rounds val up to the next multiple of align.
+INLINE size_t upb_align_up(size_t val, size_t align) {
+ return val % align == 0 ? val : val + align - (val % align);
+}
+
+// The maximum that any submessages can be nested. Matches proto2's limit.
+// At the moment this specifies the size of several statically-sized arrays
+// and therefore setting it high will cause more memory to be used. Will
+// be replaced by a runtime-configurable limit and dynamically-resizing arrays.
+// TODO: make this a runtime-settable property of upb_handlers.
+#define UPB_MAX_NESTING 64
+
+// The maximum number of fields that any one .proto type can have. Note that
+// this is very different than the max field number. It is hard to imagine a
+// scenario where more than 2k fields (each with its own name and field number)
+// makes sense. The .proto file to describe it would be 2000 lines long and
+// contain 2000 unique names.
+//
+// With this limit we can store a has-bit offset in 8 bits (2**8 * 8 = 2048)
+// and we can store a value offset in 16 bits, since the maximum message
+// size is 16,640 bytes (2**8 has-bits + 2048 * 8-byte value). Note that
+// strings and arrays are not counted in this, only the *pointer* to them is.
+// An individual string or array is unaffected by this 16k byte limit.
+#define UPB_MAX_FIELDS (2048)
+
+// Nested type names are separated by periods.
+#define UPB_SYMBOL_SEPARATOR '.'
+
+// The longest chain that mutually-recursive types are allowed to form. For
+// example, this is a type cycle of length 2:
+// message A {
+// B b = 1;
+// }
+// message B {
+// A a = 1;
+// }
+#define UPB_MAX_TYPE_CYCLE_LEN 16
+
+// The maximum depth that the type graph can have. Note that this setting does
+// not automatically constrain UPB_MAX_NESTING, because type cycles allow for
+// unlimited nesting if we do not limit it. Many algorithms in upb call
+// recursive functions that traverse the type graph, so we must limit this to
+// avoid blowing the C stack.
+#define UPB_MAX_TYPE_DEPTH 64
+
+
+/* Fundamental types and type constants. **************************************/
+
+// A list of types as they are encoded on-the-wire.
+enum upb_wire_type {
+ UPB_WIRE_TYPE_VARINT = 0,
+ UPB_WIRE_TYPE_64BIT = 1,
+ UPB_WIRE_TYPE_DELIMITED = 2,
+ UPB_WIRE_TYPE_START_GROUP = 3,
+ UPB_WIRE_TYPE_END_GROUP = 4,
+ UPB_WIRE_TYPE_32BIT = 5,
+};
+
+// Type of a field as defined in a .proto file. eg. string, int32, etc. The
+// integers that represent this are defined by descriptor.proto. Note that
+// descriptor.proto reserves "0" for errors, and we use it to represent
+// exceptional circumstances.
+typedef uint8_t upb_fieldtype_t;
+
+// For referencing the type constants tersely.
+#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type
+#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type
+
+// Info for a given field type.
+typedef struct {
+ uint8_t align;
+ uint8_t size;
+ uint8_t native_wire_type;
+ uint8_t inmemory_type; // For example, INT32, SINT32, and SFIXED32 -> INT32
+ char *ctype;
+} upb_type_info;
+
+// A static array of info about all of the field types, indexed by type number.
+extern const upb_type_info upb_types[];
+
+
+/* upb_value ******************************************************************/
+
+struct _upb_strref;
+struct _upb_fielddef;
+
+// Special constants for the upb_value.type field. These must not conflict
+// with any members of FieldDescriptorProto.Type.
+#define UPB_TYPE_ENDGROUP 0
+#define UPB_VALUETYPE_FIELDDEF 32
+#define UPB_VALUETYPE_PTR 33
+
+// A single .proto value. The owner must have an out-of-band way of knowing
+// the type, so that it knows which union member to use.
+typedef struct {
+ union {
+ uint64_t uint64;
+ double _double;
+ float _float;
+ int32_t int32;
+ int64_t int64;
+ uint32_t uint32;
+ bool _bool;
+ struct _upb_strref *strref;
+ struct _upb_fielddef *fielddef;
+ void *_void;
+ } val;
+
+#ifndef NDEBUG
+ // In debug mode we carry the value type around also so we can check accesses
+ // to be sure the right member is being read.
+ char type;
+#endif
+} upb_value;
+
+#ifdef NDEBUG
+#define SET_TYPE(dest, val)
+#else
+#define SET_TYPE(dest, val) dest = val
+#endif
+
+#define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \
+ INLINE ctype upb_value_get ## name(upb_value val) { \
+ assert(val.type == proto_type); \
+ return val.val.membername; \
+ } \
+ INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \
+ SET_TYPE(val->type, proto_type); \
+ val->val.membername = cval; \
+ }
+UPB_VALUE_ACCESSORS(double, _double, double, UPB_TYPE(DOUBLE));
+UPB_VALUE_ACCESSORS(float, _float, float, UPB_TYPE(FLOAT));
+UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_TYPE(INT32));
+UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64));
+UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32));
+UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64));
+UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL));
+UPB_VALUE_ACCESSORS(strref, strref, struct _upb_strref*, UPB_TYPE(STRING));
+UPB_VALUE_ACCESSORS(fielddef, fielddef, struct _upb_fielddef*, UPB_VALUETYPE_FIELDDEF);
+UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_VALUETYPE_PTR);
+
+extern upb_value UPB_NO_VALUE;
+
+
+/* upb_status *****************************************************************/
+
+// Status codes used as a return value. Codes >0 are not fatal and can be
+// resumed.
+enum upb_status_code {
+ // The operation completed successfully.
+ UPB_OK = 0,
+
+ // The bytesrc is at EOF and all data was read successfully.
+ UPB_EOF = 1,
+
+ // A read or write from a streaming src/sink could not be completed right now.
+ UPB_TRYAGAIN = 2,
+
+ // An unrecoverable error occurred.
+ UPB_ERROR = -1,
+};
+
+// TODO: consider adding error space and code, to let ie. errno be stored
+// as a proper code, or application-specific error codes.
+typedef struct {
+ char code;
+ char *str; // NULL when no message is present. NULL-terminated.
+ char *buf; // Owned by the status.
+ size_t bufsize;
+} upb_status;
+
+#define UPB_STATUS_INIT {UPB_OK, NULL, NULL, 0}
+
+void upb_status_init(upb_status *status);
+void upb_status_uninit(upb_status *status);
+
+INLINE bool upb_ok(upb_status *status) { return status->code == UPB_OK; }
+INLINE bool upb_iseof(upb_status *status) { return status->code == UPB_EOF; }
+
+void upb_status_fromerrno(upb_status *status);
+void upb_status_print(upb_status *status, FILE *f);
+void upb_status_clear(upb_status *status);
+void upb_status_setf(upb_status *status, enum upb_status_code code,
+ const char *fmt, ...);
+void upb_status_copy(upb_status *to, upb_status *from);
+
+// Like vaprintf, but uses *buf (which can be NULL) as a starting point and
+// reallocates it only if the new value will not fit. "size" is updated to
+// reflect the allocated size of the buffer. Returns false on memory alloc
+// failure.
+int upb_vrprintf(char **buf, size_t *size, size_t ofs,
+ const char *fmt, va_list args);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* UPB_H_ */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback