29 files changed, 8252 insertions, 0 deletions
diff --git a/upb/atomic.h b/upb/atomic.h
new file mode 100644
index 0000000..53501b5
--- /dev/null
+++ b/upb/atomic.h
@@ -0,0 +1,177 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Only a very small part of upb is thread-safe.  Notably, individual
+ * messages, arrays, and strings are *not* thread safe for mutating.
+ * However, we do make message *metadata* such as upb_msgdef and
+ * upb_context thread-safe, and their ownership is tracked via atomic
+ * refcounting.  This header implements the small number of atomic
+ * primitives required to support this.  The primitives we implement
+ * are:
+ *
+ * - a reader/writer lock (wrappers around platform-provided mutexes).
+ * - an atomic refcount.
+ */
+
+#ifndef UPB_ATOMIC_H_
+#define UPB_ATOMIC_H_
+
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* inline if possible, emit standalone code if required. */
+#ifndef INLINE
+#define INLINE static inline
+#endif
+
+// Until this stuff is actually working, make thread-unsafe the default.
+#define UPB_THREAD_UNSAFE
+
+#ifdef UPB_THREAD_UNSAFE
+
+/* Non-thread-safe implementations. ******************************************/
+
+typedef struct {
+  int v;
+} upb_atomic_t;
+
+#define UPB_ATOMIC_INIT(x) {x}
+
+INLINE void upb_atomic_init(upb_atomic_t *a, int val) { a->v = val; }
+INLINE bool upb_atomic_ref(upb_atomic_t *a) { return a->v++ == 0; }
+INLINE bool upb_atomic_unref(upb_atomic_t *a) { return --a->v == 0; }
+INLINE int upb_atomic_read(upb_atomic_t *a) { return a->v; }
+INLINE bool upb_atomic_add(upb_atomic_t *a, int val) {
+  a->v += val;
+  return a->v == 0;
+}
+
+#endif
+
+/* Atomic refcount ************************************************************/
+
+#ifdef UPB_THREAD_UNSAFE
+
+/* Already defined above. */
+
+#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4
+
+/* GCC includes atomic primitives. */
+
+typedef struct {
+  volatile int v;
+} upb_atomic_t;
+
+INLINE void upb_atomic_init(upb_atomic_t *a, int val) {
+  a->v = val;
+  __sync_synchronize();   /* Ensure the initialized value is visible. */
+}
+
+INLINE bool upb_atomic_ref(upb_atomic_t *a) {
+  return __sync_fetch_and_add(&a->v, 1) == 0;
+}
+
+INLINE bool upb_atomic_add(upb_atomic_t *a, int n) {
+  return __sync_add_and_fetch(&a->v, n) == 0;
+}
+
+INLINE bool upb_atomic_unref(upb_atomic_t *a) {
+  return __sync_sub_and_fetch(&a->v, 1) == 0;
+}
+
+INLINE bool upb_atomic_read(upb_atomic_t *a) {
+  return __sync_fetch_and_add(&a->v, 0);
+}
+
+#elif defined(WIN32)
+
+/* Windows defines atomic increment/decrement. */
+#include <Windows.h>
+
+typedef struct {
+  volatile LONG val;
+} upb_atomic_t;
+
+INLINE void upb_atomic_init(upb_atomic_t *a, int val) {
+  InterlockedExchange(&a->val, val);
+}
+
+INLINE bool upb_atomic_ref(upb_atomic_t *a) {
+  return InterlockedIncrement(&a->val) == 1;
+}
+
+INLINE bool upb_atomic_unref(upb_atomic_t *a) {
+  return InterlockedDecrement(&a->val) == 0;
+}
+
+#else
+#error Atomic primitives not defined for your platform/CPU.  \
+       Implement them or compile with UPB_THREAD_UNSAFE.
+#endif
+
+INLINE bool upb_atomic_only(upb_atomic_t *a) {
+  return upb_atomic_read(a) == 1;
+}
+
+/* Reader/Writer lock. ********************************************************/
+
+#ifdef UPB_THREAD_UNSAFE
+
+typedef struct {
+} upb_rwlock_t;
+
+INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; }
+INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; }
+INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; }
+INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; }
+INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; }
+
+#elif defined(UPB_USE_PTHREADS)
+
+#include <pthread.h>
+
+typedef struct {
+  pthread_rwlock_t lock;
+} upb_rwlock_t;
+
+INLINE void upb_rwlock_init(upb_rwlock_t *l) {
+  /* TODO: check return value. */
+  pthread_rwlock_init(&l->lock, NULL);
+}
+
+INLINE void upb_rwlock_destroy(upb_rwlock_t *l) {
+  /* TODO: check return value. */
+  pthread_rwlock_destroy(&l->lock);
+}
+
+INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) {
+  /* TODO: check return value. */
+  pthread_rwlock_rdlock(&l->lock);
+}
+
+INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) {
+  /* TODO: check return value. */
+  pthread_rwlock_wrlock(&l->lock);
+}
+
+INLINE void upb_rwlock_unlock(upb_rwlock_t *l) {
+  /* TODO: check return value. */
+  pthread_rwlock_unlock(&l->lock);
+}
+
+#else
+#error Reader/writer lock is not defined for your platform/CPU.  \
+       Implement it or compile with UPB_THREAD_UNSAFE.
+#endif
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_ATOMIC_H_ */
diff --git a/upb/bytestream.c b/upb/bytestream.c
new file mode 100644
index 0000000..846b8ee
--- /dev/null
+++ b/upb/bytestream.c
@@ -0,0 +1,265 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2010 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include "upb/bytestream.h"
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+// We can make this configurable if necessary.
+#define BUF_SIZE 32768
+
+char *upb_strref_dup(struct _upb_strref *r) {
+  char *ret = (char*)malloc(r->len + 1);
+  upb_bytesrc_read(r->bytesrc, r->stream_offset, r->len, ret);
+  ret[r->len] = '\0';
+  return ret;
+}
+
+/* upb_stdio ******************************************************************/
+
+int upb_stdio_cmpbuf(const void *_key, const void *_elem) {
+  const uint64_t *ofs = _key;
+  const upb_stdio_buf *buf = _elem;
+  return (*ofs / BUF_SIZE) - (buf->ofs / BUF_SIZE);
+}
+
+static upb_stdio_buf *upb_stdio_findbuf(upb_stdio *s, uint64_t ofs) {
+  // TODO: it is probably faster to linear search short lists, and to
+  // special-case the last one or two bufs.
+  return bsearch(&ofs, s->bufs, s->nbuf, sizeof(*s->bufs), &upb_stdio_cmpbuf);
+}
+
+//static upb_strlen_t upb_stdio_read(void *src, uint32_t ofs, upb_buf *b,
+//                                   upb_status *status) {
+//  upb_stdio *stdio = (upb_stdio*)src;
+//  size_t read = fread(buf, 1, BLOCK_SIZE, stdio->file);
+//  if(read < (size_t)BLOCK_SIZE) {
+//    // Error or EOF.
+//    if(feof(stdio->file)) {
+//      upb_seterr(status, UPB_EOF, "");
+//    } else if(ferror(stdio->file)) {
+//      upb_status_fromerrno(s);
+//      return 0;
+//    }
+//  }
+//  b->len = read;
+//  stdio->next_ofs += read;
+//  return stdio->next_ofs;
+//}
+
+size_t upb_stdio_fetch(void *src, uint64_t ofs, upb_status *s) {
+  (void)src;
+  (void)ofs;
+  (void)s;
+
+  return 0;
+}
+
+void upb_stdio_read(void *src, uint64_t src_ofs, size_t len, char *dst) {
+  upb_stdio_buf *buf = upb_stdio_findbuf(src, src_ofs);
+  src_ofs -= buf->ofs;
+  memcpy(dst, &buf->data[src_ofs], BUF_SIZE - src_ofs);
+  len -= (BUF_SIZE - src_ofs);
+  dst += (BUF_SIZE - src_ofs);
+  while (len > 0) {
+    ++buf;
+    size_t bytes = UPB_MIN(len, BUF_SIZE);
+    memcpy(dst, buf->data, bytes);
+    len -= bytes;
+    dst += bytes;
+  }
+}
+
+const char *upb_stdio_getptr(void *src, uint64_t ofs, size_t *len) {
+  upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
+  ofs -= buf->ofs;
+  *len = BUF_SIZE - ofs;
+  return &buf->data[ofs];
+}
+
+void upb_stdio_refregion(void *src, uint64_t ofs, size_t len) {
+  upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
+  len -= (BUF_SIZE - ofs);
+  ++buf->refcount;
+  while (len > 0) {
+    ++buf;
+    ++buf->refcount;
+  }
+}
+
+void upb_stdio_unrefregion(void *src, uint64_t ofs, size_t len) {
+  (void)src;
+  (void)ofs;
+  (void)len;
+}
+
+#if 0
+upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) {
+  upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink));
+  upb_strlen_t len = upb_string_len(str);
+  upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file);
+  if(written < len) {
+    upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream.");
+    return -1;
+  }
+  return written;
+}
+#endif
+
+uint32_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status,
+                           const char *fmt, va_list args) {
+  upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink));
+  int written = vfprintf(stdio->file, fmt, args);
+  if (written < 0) {
+    upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream.");
+    return -1;
+  }
+  return written;
+}
+
+void upb_stdio_init(upb_stdio *stdio) {
+  static upb_bytesrc_vtbl bytesrc_vtbl = {
+    upb_stdio_fetch,
+    upb_stdio_read,
+    upb_stdio_getptr,
+    upb_stdio_refregion,
+    upb_stdio_unrefregion,
+    NULL,
+    NULL
+  };
+  upb_bytesrc_init(&stdio->src, &bytesrc_vtbl);
+
+  //static upb_bytesink_vtbl bytesink_vtbl = {
+  //  upb_stdio_putstr,
+  //  upb_stdio_vprintf
+  //};
+  //upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl);
+}
+
+void upb_stdio_reset(upb_stdio* stdio, FILE *file) {
+  stdio->file = file;
+  stdio->should_close = false;
+}
+
+void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
+                    upb_status *s) {
+  FILE *f = fopen(filename, mode);
+  if (!f) {
+    upb_status_fromerrno(s);
+    return;
+  }
+  setvbuf(stdio->file, NULL, _IONBF, 0);  // Disable buffering; we do our own.
+  upb_stdio_reset(stdio, f);
+  stdio->should_close = true;
+}
+
+void upb_stdio_uninit(upb_stdio *stdio) {
+  // Can't report status; caller should flush() to ensure data is written.
+  if (stdio->should_close) fclose(stdio->file);
+  stdio->file = NULL;
+}
+
+upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->src; }
+upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; }
+
+
+/* upb_stringsrc **************************************************************/
+
+size_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) {
+  upb_stringsrc *src = _src;
+  size_t bytes = src->len - ofs;
+  if (bytes == 0) s->code = UPB_EOF;
+  return bytes;
+}
+
+void upb_stringsrc_read(void *_src, uint64_t src_ofs, size_t len, char *dst) {
+  upb_stringsrc *src = _src;
+  memcpy(dst, src->str + src_ofs, len);
+}
+
+const char *upb_stringsrc_getptr(void *_src, uint64_t ofs, size_t *len) {
+  upb_stringsrc *src = _src;
+  *len = src->len - ofs;
+  return src->str + ofs;
+}
+
+void upb_stringsrc_init(upb_stringsrc *s) {
+  static upb_bytesrc_vtbl vtbl = {
+    &upb_stringsrc_fetch,
+    &upb_stringsrc_read,
+    &upb_stringsrc_getptr,
+    NULL, NULL, NULL, NULL
+  };
+  upb_bytesrc_init(&s->bytesrc, &vtbl);
+  s->str = NULL;
+}
+
+void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len) {
+  s->str = str;
+  s->len = len;
+}
+
+void upb_stringsrc_uninit(upb_stringsrc *s) { (void)s; }
+
+upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
+  return &s->bytesrc;
+}
+
+
+/* upb_stringsink *************************************************************/
+
+void upb_stringsink_uninit(upb_stringsink *s) {
+  free(s->str);
+}
+
+// Resets the stringsink to a state where it will append to the given string.
+// The string must be newly created or recycled.  The stringsink will take a
+// reference on the string, so the caller need not ensure that it outlives the
+// stringsink.  A stringsink can be reset multiple times.
+void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size) {
+  free(s->str);
+  s->str = str;
+  s->len = 0;
+  s->size = size;
+}
+
+upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s) {
+  return &s->bytesink;
+}
+
+static int32_t upb_stringsink_vprintf(void *_s, upb_status *status,
+                                      const char *fmt, va_list args) {
+  (void)status;  // TODO: report realloc() errors.
+  upb_stringsink *s = _s;
+  int ret = upb_vrprintf(&s->str, &s->size, s->len, fmt, args);
+  if (ret >= 0) s->len += ret;
+  return ret;
+}
+
+bool upb_stringsink_write(void *_s, const char *buf, size_t len,
+                          upb_status *status) {
+  (void)status;  // TODO: report realloc() errors.
+  upb_stringsink *s = _s;
+  if (s->len + len > s->size) {
+    while(s->len + len > s->size) s->size *= 2;
+    s->str = realloc(s->str, s->size);
+  }
+  memcpy(s->str + s->len, buf, len);
+  s->len += len;
+  return true;
+}
+
+void upb_stringsink_init(upb_stringsink *s) {
+  static upb_bytesink_vtbl vtbl = {
+    upb_stringsink_write,
+    upb_stringsink_vprintf
+  };
+  upb_bytesink_init(&s->bytesink, &vtbl);
+  s->str = NULL;
+}
diff --git a/upb/bytestream.h b/upb/bytestream.h
new file mode 100644
index 0000000..2a6f7d2
--- /dev/null
+++ b/upb/bytestream.h
@@ -0,0 +1,286 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * This file contains upb_bytesrc and upb_bytesink, which are abstractions of
+ * stdio (fread()/fwrite()/etc) that provide useful buffering/sharing
+ * semantics.  They are virtual base classes so concrete implementations
+ * can get the data from a fd, a string, a cord, etc.
+ *
+ * Byte streams are NOT thread-safe!  (Like f{read,write}_unlocked())
+ */
+
+#ifndef UPB_BYTESTREAM_H
+#define UPB_BYTESTREAM_H
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include "upb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* upb_bytesrc ****************************************************************/
+
+// A upb_bytesrc allows the consumer of a stream of bytes to obtain buffers as
+// they become available, and to preserve some trailing amount of data.
+typedef size_t upb_bytesrc_fetch_func(void*, uint64_t, upb_status*);
+typedef void upb_bytesrc_read_func(void*, uint64_t, size_t, char*);
+typedef const char *upb_bytesrc_getptr_func(void*, uint64_t, size_t*);
+typedef void upb_bytesrc_refregion_func(void*, uint64_t, size_t);
+typedef void upb_bytesrc_ref_func(void*);
+typedef struct _upb_bytesrc_vtbl {
+  upb_bytesrc_fetch_func     *fetch;
+  upb_bytesrc_read_func      *read;
+  upb_bytesrc_getptr_func    *getptr;
+  upb_bytesrc_refregion_func *refregion;
+  upb_bytesrc_refregion_func *unrefregion;
+  upb_bytesrc_ref_func       *ref;
+  upb_bytesrc_ref_func       *unref;
+} upb_bytesrc_vtbl;
+
+typedef struct {
+  upb_bytesrc_vtbl  *vtbl;
+} upb_bytesrc;
+
+INLINE void upb_bytesrc_init(upb_bytesrc *src, upb_bytesrc_vtbl *vtbl) {
+  src->vtbl = vtbl;
+}
+
+// Fetches at least one byte starting at ofs, returning the actual number of
+// bytes fetched (or 0 on error: see "s" for details).  Gives caller a ref on
+// the fetched region.  It is safe to re-fetch existing regions but only if
+// they are ref'd.  "ofs" may not greater than the end of the region that was
+// previously fetched.
+INLINE size_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs, upb_status *s) {
+  return src->vtbl->fetch(src, ofs, s);
+}
+
+// Copies "len" bytes of data from offset src_ofs to "dst", which must be at
+// least "len" bytes long.  The caller must own a ref on the given region.
+INLINE void upb_bytesrc_read(upb_bytesrc *src, uint64_t src_ofs, size_t len,
+                             char *dst) {
+  src->vtbl->read(src, src_ofs, len, dst);
+}
+
+// Returns a pointer to the bytesrc's internal buffer, returning how much data
+// was actually returned (which may be less than "len" if the given region is
+// not contiguous).  The caller must own refs on the entire region from [ofs,
+// ofs+len].  The returned buffer is valid for as long as the region remains
+// ref'd.
+//
+// TODO: is "len" really required here?
+INLINE const char *upb_bytesrc_getptr(upb_bytesrc *src, uint64_t ofs,
+                                      size_t *len) {
+  return src->vtbl->getptr(src, ofs, len);
+}
+
+// Gives the caller a ref on the given region.  The caller must know that the
+// given region is already ref'd.
+INLINE void upb_bytesrc_refregion(upb_bytesrc *src, uint64_t ofs, size_t len) {
+  src->vtbl->refregion(src, ofs, len);
+}
+
+// Releases a ref on the given region, which the caller must have previously
+// ref'd.
+INLINE void upb_bytesrc_unrefregion(upb_bytesrc *src, uint64_t ofs, size_t len) {
+  src->vtbl->unrefregion(src, ofs, len);
+}
+
+// Attempts to ref the bytesrc itself, returning false if this bytesrc is
+// not ref-able.
+INLINE bool upb_bytesrc_tryref(upb_bytesrc *src) {
+  if (src->vtbl->ref) {
+    src->vtbl->ref(src);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+// Unref's the bytesrc itself.  May only be called when upb_bytesrc_tryref()
+// has previously returned true.
+INLINE void upb_bytesrc_unref(upb_bytesrc *src) {
+  assert(src->vtbl->unref);
+  src->vtbl->unref(src);
+}
+
+/* upb_strref *****************************************************************/
+
+// The structure we pass for a string.
+typedef struct _upb_strref {
+  // Pointer to the string data.  NULL if the string spans multiple input
+  // buffers (in which case upb_bytesrc_getptr() must be called to obtain
+  // the actual pointers).
+  const char *ptr;
+
+  // Bytesrc from which this string data comes.  This is only guaranteed to be
+  // alive from inside the callback; however if the handler knows more about
+  // its type and how to prolong its life, it may do so.
+  upb_bytesrc *bytesrc;
+
+  // Offset in the bytesrc that represents the beginning of this string.
+  uint32_t stream_offset;
+
+  // Length of the string.
+  uint32_t len;
+
+  // Possibly add optional members here like start_line, start_column, etc.
+} upb_strref;
+
+// Copies the contents of the strref into a newly-allocated, NULL-terminated
+// string.
+char *upb_strref_dup(struct _upb_strref *r);
+
+
+/* upb_bytesink ***************************************************************/
+
+typedef bool upb_bytesink_write_func(void*, const char*, size_t, upb_status*);
+typedef int32_t upb_bytesink_vprintf_func(
+    void*, upb_status*, const char *fmt, va_list args);
+
+typedef struct {
+  upb_bytesink_write_func   *write;
+  upb_bytesink_vprintf_func *vprintf;
+} upb_bytesink_vtbl;
+
+typedef struct {
+  upb_bytesink_vtbl *vtbl;
+} upb_bytesink;
+
+INLINE void upb_bytesink_init(upb_bytesink *sink, upb_bytesink_vtbl *vtbl) {
+  sink->vtbl = vtbl;
+}
+
+INLINE bool upb_bytesink_write(upb_bytesink *sink, const char *buf, size_t len,
+                               upb_status *s) {
+  return sink->vtbl->write(sink, buf, len, s);
+}
+
+INLINE bool upb_bytesink_writestr(upb_bytesink *sink, const char *str,
+                                  upb_status *s) {
+  return upb_bytesink_write(sink, str, strlen(str), s);
+}
+
+// Returns the number of bytes written or -1 on error.
+INLINE int32_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status,
+                                   const char *fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  uint32_t ret = sink->vtbl->vprintf(sink, status, fmt, args);
+  va_end(args);
+  return ret;
+}
+
+// OPT: add getappendbuf()
+// OPT: add writefrombytesrc()
+// TODO: add flush()
+
+
+/* upb_stdio ******************************************************************/
+
+// bytesrc/bytesink for ANSI C stdio, which is less efficient than posixfd, but
+// more portable.
+//
+// Specifically, stdio functions acquire locks on every operation (unless you
+// use the f{read,write,...}_unlocked variants, which are not standard) and
+// performs redundant buffering (unless you disable it with setvbuf(), but we
+// can only do this on newly-opened filehandles).
+
+typedef struct {
+  uint64_t ofs;
+  uint32_t refcount;
+  char data[];
+} upb_stdio_buf;
+
+// We use a single object for both bytesrc and bytesink for simplicity.
+// The object is still not thread-safe, and may only be used by one reader
+// and one writer at a time.
+typedef struct {
+  upb_bytesrc src;
+  upb_bytesink sink;
+  FILE *file;
+  bool should_close;
+  upb_stdio_buf **bufs;
+  uint32_t nbuf, szbuf;
+} upb_stdio;
+
+void upb_stdio_init(upb_stdio *stdio);
+// Caller should call upb_stdio_flush prior to calling this to ensure that
+// all data is flushed, otherwise data can be silently dropped if an error
+// occurs flushing the remaining buffers.
+void upb_stdio_uninit(upb_stdio *stdio);
+
+// Resets the object to read/write to the given "file."  The caller is
+// responsible for closing the file, which must outlive this object.
+void upb_stdio_reset(upb_stdio *stdio, FILE *file);
+
+// As an alternative to upb_stdio_reset(), initializes the object by opening a
+// file, and will handle closing it.  This may result in more efficient I/O
+// than the previous since we can call setvbuf() to disable buffering.
+void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
+                    upb_status *s);
+
+upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio);
+upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio);
+
+
+/* upb_stringsrc **************************************************************/
+
+// bytesrc/bytesink for a simple contiguous string.
+
+struct _upb_stringsrc {
+  upb_bytesrc bytesrc;
+  const char *str;
+  size_t len;
+};
+typedef struct _upb_stringsrc upb_stringsrc;
+
+// Create/free a stringsrc.
+void upb_stringsrc_init(upb_stringsrc *s);
+void upb_stringsrc_uninit(upb_stringsrc *s);
+
+// Resets the stringsrc to a state where it will vend the given string.  The
+// stringsrc will take a reference on the string, so the caller need not ensure
+// that it outlives the stringsrc.  A stringsrc can be reset multiple times.
+void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len);
+
+// Returns the upb_bytesrc* for this stringsrc.
+upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s);
+
+
+/* upb_stringsink *************************************************************/
+
+struct _upb_stringsink {
+  upb_bytesink bytesink;
+  char *str;
+  size_t len, size;
+};
+typedef struct _upb_stringsink upb_stringsink;
+
+// Create/free a stringsrc.
+void upb_stringsink_init(upb_stringsink *s);
+void upb_stringsink_uninit(upb_stringsink *s);
+
+// Resets the sink's string to "str", which the sink takes ownership of.
+// "str" may be NULL, which will make the sink allocate a new string.
+void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size);
+
+// Releases ownership of the returned string (which is "len" bytes long) and
+// resets the internal string to be empty again (as if reset were called with
+// NULL).
+const char *upb_stringsink_release(upb_stringsink *s, size_t *len);
+
+// Returns the upb_bytesink* for this stringsrc.  Invalidated by reset above.
+upb_bytesink *upb_stringsink_bytesink();
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif
diff --git a/upb/def.c b/upb/def.c
new file mode 100644
index 0000000..000b7f2
--- /dev/null
+++ b/upb/def.c
@@ -0,0 +1,754 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2008-2009 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include "upb/def.h"
+
+#define alignof(t) offsetof(struct { char c; t x; }, x)
+
+void upb_deflist_init(upb_deflist *l) {
+  l->size = 8;
+  l->defs = malloc(l->size * sizeof(void*));
+  l->len = 0;
+}
+
+void upb_deflist_uninit(upb_deflist *l) {
+  for(uint32_t i = 0; i < l->len; i++) upb_def_unref(l->defs[i]);
+  free(l->defs);
+}
+
+void upb_deflist_push(upb_deflist *l, upb_def *d) {
+  if(l->len == l->size) {
+    l->size *= 2;
+    l->defs = realloc(l->defs, l->size * sizeof(void*));
+  }
+  l->defs[l->len++] = d;
+}
+
+
+/* upb_def ********************************************************************/
+
+static void upb_msgdef_free(upb_msgdef *m);
+static void upb_enumdef_free(upb_enumdef *e);
+static void upb_unresolveddef_free(struct _upb_unresolveddef *u);
+
+#ifndef NDEBUG
+static bool upb_def_ismutable(upb_def *def) { return def->symtab == NULL; }
+#endif
+
+static void upb_def_free(upb_def *def) {
+  switch (def->type) {
+    case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break;
+    case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break;
+    case UPB_DEF_UNRESOLVED:
+        upb_unresolveddef_free(upb_downcast_unresolveddef(def)); break;
+    default:
+      assert(false);
+  }
+}
+
+upb_def *upb_def_dup(upb_def *def) {
+  switch (def->type) {
+    case UPB_DEF_MSG: return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef(def)));
+    case UPB_DEF_ENUM: return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef(def)));
+    default: assert(false); return NULL;
+  }
+}
+
+// Prior to being in a symtab, the def's refcount controls the lifetime of the
+// def itself.  If the refcount falls to zero, the def is deleted.  Once the
+// def belongs to a symtab, the def is owned by the symtab and its refcount
+// determines whether the def owns a ref on the symtab or not.
+void upb_def_ref(upb_def *def) {
+  if (upb_atomic_ref(&def->refcount) && def->symtab)
+    upb_symtab_ref(def->symtab);
+}
+
+static void upb_def_movetosymtab(upb_def *d, upb_symtab *s) {
+  assert(upb_atomic_read(&d->refcount) > 0);
+  d->symtab = s;
+  if (!upb_atomic_unref(&d->refcount)) upb_symtab_ref(s);
+  upb_msgdef *m = upb_dyncast_msgdef(d);
+  if (m) upb_inttable_compact(&m->itof);
+}
+
+void upb_def_unref(upb_def *def) {
+  if (!def) return;
+  if (upb_atomic_unref(&def->refcount)) {
+    if (def->symtab) {
+      upb_symtab_unref(def->symtab);
+      // Def might be deleted now.
+    } else {
+      upb_def_free(def);
+    }
+  }
+}
+
+static void upb_def_init(upb_def *def, upb_deftype_t type) {
+  def->type = type;
+  def->fqname = NULL;
+  def->symtab = NULL;
+  upb_atomic_init(&def->refcount, 1);
+}
+
+static void upb_def_uninit(upb_def *def) {
+  //fprintf(stderr, "Freeing def: %p\n", def);
+  free(def->fqname);
+}
+
+
+/* upb_unresolveddef **********************************************************/
+
+// Unresolved defs are used as temporary placeholders for a def whose name has
+// not been resolved yet.  During the name resolution step, all unresolved defs
+// are replaced with pointers to the actual def being referenced.
+typedef struct _upb_unresolveddef {
+  upb_def base;
+
+  // The target type name.  This may or may not be fully qualified.  It is
+  // tempting to want to use base.fqname for this, but that will be qualified
+  // which is inappropriate for a name we still have to resolve.
+  char *name;
+} upb_unresolveddef;
+
+// Is passed a ref on the string.
+static upb_unresolveddef *upb_unresolveddef_new(const char *str) {
+  upb_unresolveddef *def = malloc(sizeof(*def));
+  upb_def_init(&def->base, UPB_DEF_UNRESOLVED);
+  def->name = strdup(str);
+  return def;
+}
+
+static void upb_unresolveddef_free(struct _upb_unresolveddef *def) {
+  free(def->name);
+  upb_def_uninit(&def->base);
+  free(def);
+}
+
+
+/* upb_enumdef ****************************************************************/
+
+upb_enumdef *upb_enumdef_new() {
+  upb_enumdef *e = malloc(sizeof(*e));
+  upb_def_init(&e->base, UPB_DEF_ENUM);
+  upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent));
+  upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent));
+  return e;
+}
+
+static void upb_enumdef_free(upb_enumdef *e) {
+  upb_enum_iter i;
+  for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
+    // Frees the ref taken when the string was parsed.
+    free(upb_enum_iter_name(i));
+  }
+  upb_strtable_free(&e->ntoi);
+  upb_inttable_free(&e->iton);
+  upb_def_uninit(&e->base);
+  free(e);
+}
+
+upb_enumdef *upb_enumdef_dup(upb_enumdef *e) {
+  upb_enumdef *new_e = upb_enumdef_new();
+  upb_enum_iter i;
+  for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
+    assert(upb_enumdef_addval(new_e, upb_enum_iter_name(i),
+                                     upb_enum_iter_number(i)));
+  }
+  return new_e;
+}
+
+bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num) {
+  if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL))
+    return false;
+  upb_strtable_insert(&e->ntoi, name, &num);
+  upb_inttable_insert(&e->iton, num, strdup(name));
+  return true;
+}
+
+void upb_enumdef_setdefault(upb_enumdef *e, int32_t val) {
+  assert(upb_def_ismutable(UPB_UPCAST(e)));
+  e->defaultval = val;
+}
+
+upb_enum_iter upb_enum_begin(upb_enumdef *e) {
+  // We could iterate over either table here; the choice is arbitrary.
+  return upb_inttable_begin(&e->iton);
+}
+
+upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) {
+  return upb_inttable_next(&e->iton, iter);
+}
+
+const char *upb_enumdef_iton(upb_enumdef *def, int32_t num) {
+  upb_iton_ent *e = upb_inttable_fastlookup(&def->iton, num, sizeof(*e));
+  return e ? e->str : NULL;
+}
+
+bool upb_enumdef_ntoil(upb_enumdef *def, char *name, size_t len, int32_t *num) {
+  upb_ntoi_ent *e = upb_strtable_lookupl(&def->ntoi, name, len);
+  if (!e) return false;
+  if (num) *num = e->value;
+  return true;
+}
+
+bool upb_enumdef_ntoi(upb_enumdef *e, char *name, int32_t *num) {
+  return upb_enumdef_ntoil(e, name, strlen(name), num);
+}
+
+
+/* upb_fielddef ***************************************************************/
+
+upb_fielddef *upb_fielddef_new() {
+  upb_fielddef *f = malloc(sizeof(*f));
+  f->msgdef = NULL;
+  f->def = NULL;
+  upb_atomic_init(&f->refcount, 1);
+  f->finalized = false;
+  f->type = 0;
+  f->label = UPB_LABEL(OPTIONAL);
+  f->hasbit = 0;
+  f->offset = 0;
+  f->number = 0;  // not a valid field number.
+  f->name = NULL;
+  f->accessor = NULL;
+  upb_value_setfielddef(&f->fval, f);
+  return f;
+}
+
+static void upb_fielddef_free(upb_fielddef *f) {
+  if (upb_isstring(f)) {
+    free(upb_value_getptr(f->defaultval));
+  }
+  free(f->name);
+  free(f);
+}
+
+void upb_fielddef_ref(upb_fielddef *f) {
+  // TODO.
+  (void)f;
+}
+
+void upb_fielddef_unref(upb_fielddef *f) {
+  // TODO.
+  (void)f;
+  if (!f) return;
+  if (upb_atomic_unref(&f->refcount)) {
+    if (f->msgdef) {
+      upb_msgdef_unref(f->msgdef);
+      // fielddef might be deleted now.
+    } else {
+      upb_fielddef_free(f);
+    }
+  }
+}
+
+upb_fielddef *upb_fielddef_dup(upb_fielddef *f) {
+  upb_fielddef *newf = upb_fielddef_new();
+  newf->msgdef = f->msgdef;
+  newf->type = f->type;
+  newf->label = f->label;
+  newf->number = f->number;
+  newf->name = f->name;
+  upb_fielddef_settypename(newf, f->def->fqname);
+  return f;
+}
+
+static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) {
+  assert(upb_dyncast_unresolveddef(f->def));
+  upb_def_unref(f->def);
+  f->def = def;
+  if (f->type == UPB_TYPE(ENUM)) {
+    // Resolve the enum's default from a string to an integer.
+    char *str = upb_value_getptr(f->defaultval);
+    assert(str);  // Should point to either a real default or the empty string.
+    upb_enumdef *e = upb_downcast_enumdef(f->def);
+    int32_t val = 0;
+    if (str[0] == '\0') {
+      upb_value_setint32(&f->defaultval, e->defaultval);
+    } else {
+      bool success = upb_enumdef_ntoi(e, str, &val);
+      free(str);
+      if (!success) {
+        upb_status_setf(s, UPB_ERROR, "Default enum value (%s) is not a "
+                                      "member of the enum", str);
+        return false;
+      }
+      upb_value_setint32(&f->defaultval, val);
+    }
+  }
+  return true;
+}
+
+void upb_fielddef_setnumber(upb_fielddef *f, int32_t number) {
+  assert(f->msgdef == NULL);
+  f->number = number;
+}
+
+void upb_fielddef_setname(upb_fielddef *f, const char *name) {
+  assert(f->msgdef == NULL);
+  f->name = strdup(name);
+}
+
+void upb_fielddef_settype(upb_fielddef *f, uint8_t type) {
+  assert(!f->finalized);
+  f->type = type;
+}
+
+void upb_fielddef_setlabel(upb_fielddef *f, uint8_t label) {
+  assert(!f->finalized);
+  f->label = label;
+}
+void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) {
+  assert(!f->finalized);
+  // TODO: string ownership?
+  f->defaultval = value;
+}
+
+void upb_fielddef_setfval(upb_fielddef *f, upb_value fval) {
+  assert(!f->finalized);
+  // TODO: string ownership?
+  f->fval = fval;
+}
+
+void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl) {
+  assert(!f->finalized);
+  f->accessor = vtbl;
+}
+
+void upb_fielddef_settypename(upb_fielddef *f, const char *name) {
+  upb_def_unref(f->def);
+  f->def = UPB_UPCAST(upb_unresolveddef_new(name));
+}
+
+// Returns an ordering of fields based on:
+// 1. value size (small to large).
+// 2. field number.
+static int upb_fielddef_cmpval(const void *_f1, const void *_f2) {
+  upb_fielddef *f1 = *(void**)_f1;
+  upb_fielddef *f2 = *(void**)_f2;
+  size_t size1 = upb_types[f1->type].size;
+  size_t size2 = upb_types[f2->type].size;
+  if (size1 != size2) return size1 - size2;
+  // Otherwise return in number order.
+  return f1->number - f2->number;
+}
+
+// Returns an ordering of all fields based on:
+// 1. required/optional (required fields first).
+// 2. field number
+static int upb_fielddef_cmphasbit(const void *_f1, const void *_f2) {
+  upb_fielddef *f1 = *(void**)_f1;
+  upb_fielddef *f2 = *(void**)_f2;
+  size_t req1 = f1->label == UPB_LABEL(REQUIRED);
+  size_t req2 = f2->label == UPB_LABEL(REQUIRED);
+  if (req1 != req2) return req1 - req2;
+  // Otherwise return in number order.
+  return f1->number - f2->number;
+}
+
+
+/* upb_msgdef *****************************************************************/
+
+upb_msgdef *upb_msgdef_new() {
+  upb_msgdef *m = malloc(sizeof(*m));
+  upb_def_init(&m->base, UPB_DEF_MSG);
+  upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent));
+  upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent));
+  m->size = 0;
+  m->hasbit_bytes = 0;
+  m->extension_start = 0;
+  m->extension_end = 0;
+  return m;
+}
+
+static void upb_msgdef_free(upb_msgdef *m) {
+  upb_msg_iter i;
+  for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
+    upb_fielddef_free(upb_msg_iter_field(i));
+  upb_strtable_free(&m->ntof);
+  upb_inttable_free(&m->itof);
+  upb_def_uninit(&m->base);
+  free(m);
+}
+
+upb_msgdef *upb_msgdef_dup(upb_msgdef *m) {
+  upb_msgdef *newm = upb_msgdef_new();
+  newm->size = m->size;
+  newm->hasbit_bytes = m->hasbit_bytes;
+  newm->extension_start = m->extension_start;
+  newm->extension_end = m->extension_end;
+  upb_msg_iter i;
+  for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
+    upb_msgdef_addfield(newm, upb_fielddef_dup(upb_msg_iter_field(i)));
+  return newm;
+}
+
+void upb_msgdef_setsize(upb_msgdef *m, uint16_t size) {
+  assert(upb_def_ismutable(UPB_UPCAST(m)));
+  m->size = size;
+}
+
+void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes) {
+  assert(upb_def_ismutable(UPB_UPCAST(m)));
+  m->hasbit_bytes = bytes;
+}
+
+void upb_msgdef_setextension_start(upb_msgdef *m, uint32_t start) {
+  assert(upb_def_ismutable(UPB_UPCAST(m)));
+  m->extension_start = start;
+}
+
+void upb_msgdef_setextension_end(upb_msgdef *m, uint32_t end) {
+  assert(upb_def_ismutable(UPB_UPCAST(m)));
+  m->extension_end = end;
+}
+
+bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) {
+  assert(upb_atomic_read(&f->refcount) > 0);
+  if (!upb_atomic_unref(&f->refcount)) upb_msgdef_ref(m);
+  if (upb_msgdef_itof(m, f->number) || upb_msgdef_ntof(m, f->name)) {
+    upb_fielddef_unref(f);
+    return false;
+  }
+  assert(f->msgdef == NULL);
+  f->msgdef = m;
+  upb_itof_ent itof_ent = {0, f};
+  upb_inttable_insert(&m->itof, f->number, &itof_ent);
+  upb_strtable_insert(&m->ntof, f->name, &f);
+  return true;
+}
+
+static int upb_div_round_up(int numerator, int denominator) {
+  /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */
+  return numerator > 0 ? (numerator - 1) / denominator + 1 : 0;
+}
+
+void upb_msgdef_layout(upb_msgdef *m) {
+  // Create an ordering over the fields, but only include fields with accessors.
+  upb_fielddef **sorted_fields =
+      malloc(sizeof(upb_fielddef*) * upb_msgdef_numfields(m));
+  int n = 0;
+  upb_msg_iter i;
+  for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+    upb_fielddef *f = upb_msg_iter_field(i);
+    if (f->accessor) sorted_fields[n++] = f;
+  }
+
+  m->hasbit_bytes = upb_div_round_up(n, 8);
+  m->size = m->hasbit_bytes;  // + header_size?
+
+  // Assign hasbits.
+  qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmphasbit);
+  for (int i = 0; i < n; i++) {
+    upb_fielddef *f = sorted_fields[i];
+    f->hasbit = i;
+  }
+
+  // Assign value offsets.
+  qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmpval);
+  size_t max_align = 0;
+  for (int i = 0; i < n; i++) {
+    upb_fielddef *f = sorted_fields[i];
+    const upb_type_info *type_info = &upb_types[f->type];
+    size_t size = type_info->size;
+    size_t align = type_info->align;
+    if (upb_isseq(f)) {
+      size = sizeof(void*);
+      align = alignof(void*);
+    }
+
+    // General alignment rules are: each member must be at an address that is a
+    // multiple of that type's alignment.  Also, the size of the structure as a
+    // whole must be a multiple of the greatest alignment of any member.
+    f->offset = upb_align_up(m->size, align);
+    m->size = f->offset + size;
+    max_align = UPB_MAX(max_align, align);
+  }
+  if (max_align > 0) m->size = upb_align_up(m->size, max_align);
+
+  free(sorted_fields);
+}
+
+upb_msg_iter upb_msg_begin(upb_msgdef *m) {
+  return upb_inttable_begin(&m->itof);
+}
+
+upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) {
+  return upb_inttable_next(&m->itof, iter);
+}
+
+
+/* upb_symtabtxn **************************************************************/
+
+typedef struct {
+  upb_def *def;
+} upb_symtab_ent;
+
+void upb_symtabtxn_init(upb_symtabtxn *t) {
+  upb_strtable_init(&t->deftab, 16, sizeof(upb_symtab_ent));
+}
+
+void upb_symtabtxn_uninit(upb_symtabtxn *txn) {
+  upb_strtable *t = &txn->deftab;
+  upb_strtable_iter i;
+  for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) {
+    const upb_symtab_ent *e = upb_strtable_iter_value(&i);
+    free(e->def);
+  }
+  upb_strtable_free(t);
+}
+
+bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def) {
+  // TODO: check if already present.
+  upb_symtab_ent e = {def};
+  //fprintf(stderr, "txn Inserting: %p, ent: %p\n", e.def, &e);
+  upb_strtable_insert(&t->deftab, def->fqname, &e);
+  return true;
+}
+
+#if 0
+err:
+  // We need to free all defs from "tmptab."
+  upb_rwlock_unlock(&s->lock);
+  for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e;
+      e = upb_strtable_next(&tmptab, &e->e)) {
+    upb_def_unref(e->def);
+  }
+  upb_strtable_free(&tmptab);
+  return false;
+#endif
+
+// Given a symbol and the base symbol inside which it is defined, find the
+// symbol's definition in t.
+static upb_symtab_ent *upb_resolve(upb_strtable *t,
+                                   const char *base, const char *sym) {
+  if(strlen(sym) == 0) return NULL;
+  if(sym[0] == UPB_SYMBOL_SEPARATOR) {
+    // Symbols starting with '.' are absolute, so we do a single lookup.
+    // Slice to omit the leading '.'
+    return upb_strtable_lookup(t, sym + 1);
+  } else {
+    // Remove components from base until we find an entry or run out.
+    // TODO: This branch is totally broken, but currently not used.
+    (void)base;
+    assert(false);
+    return NULL;
+  }
+}
+
+void upb_symtabtxn_begin(upb_symtabtxn_iter *i, upb_symtabtxn *t) {
+  upb_strtable_begin(i, &t->deftab);
+}
+void upb_symtabtxn_next(upb_symtabtxn_iter *i) { upb_strtable_next(i); }
+bool upb_symtabtxn_done(upb_symtabtxn_iter *i) { return upb_strtable_done(i); }
+upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter *i) {
+  const upb_symtab_ent *e = upb_strtable_iter_value(i);
+  return e->def;
+}
+
+
+/* upb_symtab public interface ************************************************/
+
+static void _upb_symtab_free(upb_strtable *t) {
+  upb_strtable_iter i;
+  upb_strtable_begin(&i, t);
+  for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+    const upb_symtab_ent *e = upb_strtable_iter_value(&i);
+    assert(upb_atomic_read(&e->def->refcount) == 0);
+    upb_def_free(e->def);
+  }
+  upb_strtable_free(t);
+}
+
+static void upb_symtab_free(upb_symtab *s) {
+  _upb_symtab_free(&s->symtab);
+  for (uint32_t i = 0; i < s->olddefs.len; i++) {
+    upb_def *d = s->olddefs.defs[i];
+    assert(upb_atomic_read(&d->refcount) == 0);
+    upb_def_free(d);
+  }
+  upb_rwlock_destroy(&s->lock);
+  upb_deflist_uninit(&s->olddefs);
+  free(s);
+}
+
+void upb_symtab_unref(upb_symtab *s) {
+  if(s && upb_atomic_unref(&s->refcount)) {
+    upb_symtab_free(s);
+  }
+}
+
+upb_symtab *upb_symtab_new() {
+  upb_symtab *s = malloc(sizeof(*s));
+  upb_atomic_init(&s->refcount, 1);
+  upb_rwlock_init(&s->lock);
+  upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent));
+  upb_deflist_init(&s->olddefs);
+  return s;
+}
+
+upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) {
+  upb_rwlock_rdlock(&s->lock);
+  int total = upb_strtable_count(&s->symtab);
+  // We may only use part of this, depending on how many symbols are of the
+  // correct type.
+  upb_def **defs = malloc(sizeof(*defs) * total);
+  upb_strtable_iter iter;
+  upb_strtable_begin(&iter, &s->symtab);
+  int i = 0;
+  for(; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
+    const upb_symtab_ent *e = upb_strtable_iter_value(&iter);
+    upb_def *def = e->def;
+    assert(def);
+    if(type == UPB_DEF_ANY || def->type == type)
+      defs[i++] = def;
+  }
+  upb_rwlock_unlock(&s->lock);
+  *count = i;
+  for(i = 0; i < *count; i++) upb_def_ref(defs[i]);
+  return defs;
+}
+
+upb_def *upb_symtab_lookup(upb_symtab *s, const char *sym) {
+  upb_rwlock_rdlock(&s->lock);
+  upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym);
+  upb_def *ret = NULL;
+  if(e) {
+    ret = e->def;
+    upb_def_ref(ret);
+  }
+  upb_rwlock_unlock(&s->lock);
+  return ret;
+}
+
+upb_def *upb_symtab_resolve(upb_symtab *s, const char *base, const char *sym) {
+  upb_rwlock_rdlock(&s->lock);
+  upb_symtab_ent *e = upb_resolve(&s->symtab, base, sym);
+  upb_def *ret = NULL;
+  if(e) {
+    ret = e->def;
+    upb_def_ref(ret);
+  }
+  upb_rwlock_unlock(&s->lock);
+  return ret;
+}
+
+bool upb_symtab_dfs(upb_def *def, upb_def **open_defs, int n,
+                    upb_symtabtxn *txn) {
+  // This linear search makes the DFS O(n^2) in the length of the paths.
+  // Could make this O(n) with a hash table, but n is small.
+  for (int i = 0; i < n; i++) {
+    if (def == open_defs[i]) return false;
+  }
+
+  bool needcopy = false;
+  upb_msgdef *m = upb_dyncast_msgdef(def);
+  if (m) {
+    upb_msg_iter i;
+    open_defs[n++] = def;
+    for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+      upb_fielddef *f = upb_msg_iter_field(i);
+      if (!upb_hasdef(f)) continue;
+      needcopy |= upb_symtab_dfs(f->def, open_defs, n, txn);
+    }
+  }
+
+  bool replacing = (upb_strtable_lookup(&txn->deftab, m->base.fqname) != NULL);
+  if (needcopy && !replacing) {
+    upb_symtab_ent e = {upb_def_dup(def)};
+    //fprintf(stderr, "Replacing def: %p\n", e.def);
+    upb_strtable_insert(&txn->deftab, def->fqname, &e);
+    replacing = true;
+  }
+  return replacing;
+}
+
+bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) {
+  upb_rwlock_wrlock(&s->lock);
+
+  // All existing defs that can reach defs that are being replaced must
+  // themselves be replaced with versions that will point to the new defs.
+  // Do a DFS -- any path that finds a new def must replace all ancestors.
+  upb_strtable *symtab = &s->symtab;
+  upb_strtable_iter i;
+  upb_strtable_begin(&i, symtab);
+  for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+    upb_def *open_defs[UPB_MAX_TYPE_DEPTH];
+    const upb_symtab_ent *e = upb_strtable_iter_value(&i);
+    upb_symtab_dfs(e->def, open_defs, 0, txn);
+  }
+
+  // Resolve all refs.
+  upb_strtable *txntab = &txn->deftab;
+  upb_strtable_begin(&i, txntab);
+  for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+    const upb_symtab_ent *e = upb_strtable_iter_value(&i);
+    upb_msgdef *m = upb_dyncast_msgdef(e->def);
+    if(!m) continue;
+    // Type names are resolved relative to the message in which they appear.
+    const char *base = m->base.fqname;
+
+    upb_msg_iter j;
+    for(j = upb_msg_begin(m); !upb_msg_done(j); j = upb_msg_next(m, j)) {
+      upb_fielddef *f = upb_msg_iter_field(j);
+      if(!upb_hasdef(f)) continue;  // No resolving necessary.
+      const char *name = upb_downcast_unresolveddef(f->def)->name;
+
+      // Resolve from either the txntab (pending adds) or symtab (existing
+      // defs).  If both exist, prefer the pending add, because it will be
+      // overwriting the existing def.
+      upb_symtab_ent *found;
+      if(!(found = upb_resolve(txntab, base, name)) &&
+         !(found = upb_resolve(symtab, base, name))) {
+        upb_status_setf(status, UPB_ERROR, "could not resolve symbol '%s' "
+                                           "in context '%s'", name, base);
+        return false;
+      }
+
+      // Check the type of the found def.
+      upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
+      //fprintf(stderr, "found: %p\n", found);
+      //fprintf(stderr, "found->def: %p\n", found->def);
+      //fprintf(stderr, "found->def->type: %d\n", found->def->type);
+      if(found->def->type != expected) {
+        upb_status_setf(status, UPB_ERROR, "Unexpected type");
+        return false;
+      }
+      if (!upb_fielddef_resolve(f, found->def, status)) return false;
+    }
+  }
+
+  // The defs in the transaction have been vetted, and can be moved to the
+  // symtab without causing errors.
+  upb_strtable_begin(&i, txntab);
+  for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+    const upb_symtab_ent *tmptab_e = upb_strtable_iter_value(&i);
+    upb_def_movetosymtab(tmptab_e->def, s);
+    upb_symtab_ent *symtab_e =
+        upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname);
+    if(symtab_e) {
+      upb_deflist_push(&s->olddefs, symtab_e->def);
+      symtab_e->def = tmptab_e->def;
+    } else {
+      //fprintf(stderr, "Inserting def: %p\n", tmptab_e->def);
+      upb_strtable_insert(&s->symtab, tmptab_e->def->fqname, tmptab_e);
+    }
+  }
+
+  upb_strtable_clear(txntab);
+  upb_rwlock_unlock(&s->lock);
+  upb_symtab_gc(s);
+  return true;
+}
+
+void upb_symtab_gc(upb_symtab *s) {
+  (void)s;
+  // TODO.
+}
diff --git a/upb/def.h b/upb/def.h
new file mode 100644
index 0000000..4a7a017
--- /dev/null
+++ b/upb/def.h
@@ -0,0 +1,465 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009-2011 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Provides a mechanism for creating and linking proto definitions.
+ * These form the protobuf schema, and are used extensively throughout upb:
+ * - upb_msgdef: describes a "message" construct.
+ * - upb_fielddef: describes a message field.
+ * - upb_enumdef: describes an enum.
+ * (TODO: definitions of services).
+ *
+ * These defs are mutable (and not thread-safe) when first created.
+ * Once they are added to a defbuilder (and later its symtab) they become
+ * immutable.
+ */
+
+#ifndef UPB_DEF_H_
+#define UPB_DEF_H_
+
+#include "upb/atomic.h"
+#include "upb/table.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct _upb_symtab;
+typedef struct _upb_symtab upb_symtab;
+
+// All the different kind of defs we support.  These correspond 1:1 with
+// declarations in a .proto file.
+typedef enum {
+  UPB_DEF_MSG = 1,
+  UPB_DEF_ENUM,
+  UPB_DEF_SERVICE,          // Not yet implemented.
+
+  UPB_DEF_ANY = -1,         // Wildcard for upb_symtab_get*()
+  UPB_DEF_UNRESOLVED = 99,  // Internal-only.
+} upb_deftype_t;
+
+
+/* upb_def: base class for defs  **********************************************/
+
+typedef struct {
+  char *fqname;     // Fully qualified.
+  upb_symtab *symtab;     // Def is mutable iff symtab == NULL.
+  upb_atomic_t refcount;  // Owns a ref on symtab iff (symtab && refcount > 0).
+  upb_deftype_t type;
+} upb_def;
+
+// Call to ref/unref a def.  Can be used at any time, but is not thread-safe
+// until the def is in a symtab.  While a def is in a symtab, everything
+// reachable from that def (the symtab and all defs in the symtab) are
+// guaranteed to be alive.
+void upb_def_ref(upb_def *def);
+void upb_def_unref(upb_def *def);
+upb_def *upb_def_dup(upb_def *def);
+
+#define UPB_UPCAST(ptr) (&(ptr)->base)
+
+
+/* upb_fielddef ***************************************************************/
+
+// A upb_fielddef describes a single field in a message.  It isn't a full def
+// in the sense that it derives from upb_def.  It cannot stand on its own; it
+// must be part of a upb_msgdef.  It is also reference-counted.
+typedef struct _upb_fielddef {
+  struct _upb_msgdef *msgdef;
+  upb_def *def;  // if upb_hasdef(f)
+  upb_atomic_t refcount;
+  bool finalized;
+
+  // The following fields may be modified until the def is finalized.
+  uint8_t type;          // Use UPB_TYPE() constants.
+  uint8_t label;         // Use UPB_LABEL() constants.
+  int16_t hasbit;
+  uint16_t offset;
+  int32_t number;
+  char *name;
+  upb_value defaultval;  // Only meaningful for non-repeated scalars and strings.
+  upb_value fval;
+  struct _upb_accessor_vtbl *accessor;
+} upb_fielddef;
+
+upb_fielddef *upb_fielddef_new();
+void upb_fielddef_ref(upb_fielddef *f);
+void upb_fielddef_unref(upb_fielddef *f);
+upb_fielddef *upb_fielddef_dup(upb_fielddef *f);
+
+// Read accessors.  May be called any time.
+INLINE uint8_t upb_fielddef_type(upb_fielddef *f) { return f->type; }
+INLINE uint8_t upb_fielddef_label(upb_fielddef *f) { return f->label; }
+INLINE int32_t upb_fielddef_number(upb_fielddef *f) { return f->number; }
+INLINE char *upb_fielddef_name(upb_fielddef *f) { return f->name; }
+INLINE upb_value upb_fielddef_default(upb_fielddef *f) { return f->defaultval; }
+INLINE upb_value upb_fielddef_fval(upb_fielddef *f) { return f->fval; }
+INLINE bool upb_fielddef_finalized(upb_fielddef *f) { return f->finalized; }
+INLINE struct _upb_msgdef *upb_fielddef_msgdef(upb_fielddef *f) {
+  return f->msgdef;
+}
+INLINE struct _upb_accessor_vtbl *upb_fielddef_accessor(upb_fielddef *f) {
+  return f->accessor;
+}
+
+// Only meaningful once the def is in a symtab (returns NULL otherwise, or for
+// a fielddef where !upb_hassubdef(f)).
+upb_def *upb_fielddef_subdef(upb_fielddef *f);
+
+// NULL until the fielddef has been added to a msgdef.
+
+// Write accessors.  "Number" and "name" must be set before the fielddef is
+// added to a msgdef.  For the moment we do not allow these to be set once
+// the fielddef is added to a msgdef -- this could be relaxed in the future.
+void upb_fielddef_setnumber(upb_fielddef *f, int32_t number);
+void upb_fielddef_setname(upb_fielddef *f, const char *name);
+
+// These writers may be called at any time prior to being put in a symtab.
+void upb_fielddef_settype(upb_fielddef *f, uint8_t type);
+void upb_fielddef_setlabel(upb_fielddef *f, uint8_t label);
+void upb_fielddef_setdefault(upb_fielddef *f, upb_value value);
+void upb_fielddef_setfval(upb_fielddef *f, upb_value fval);
+void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl);
+// The name of the message or enum this field is referring to.  Must be found
+// at name resolution time (when the symtabtxn is committed to the symtab).
+void upb_fielddef_settypename(upb_fielddef *f, const char *name);
+
+// A variety of tests about the type of a field.
+INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
+  return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
+}
+INLINE bool upb_isstringtype(upb_fieldtype_t type) {
+  return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
+}
+INLINE bool upb_isprimitivetype(upb_fieldtype_t type) {
+  return !upb_issubmsgtype(type) && !upb_isstringtype(type);
+}
+INLINE bool upb_issubmsg(upb_fielddef *f) { return upb_issubmsgtype(f->type); }
+INLINE bool upb_isstring(upb_fielddef *f) { return upb_isstringtype(f->type); }
+INLINE bool upb_isseq(upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); }
+
+// Does the type of this field imply that it should contain an associated def?
+INLINE bool upb_hasdef(upb_fielddef *f) {
+  return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM);
+}
+
+
+/* upb_msgdef *****************************************************************/
+
+// Structure that describes a single .proto message type.
+typedef struct _upb_msgdef {
+  upb_def base;
+
+  // Tables for looking up fields by number and name.
+  upb_inttable itof;  // int to field
+  upb_strtable ntof;  // name to field
+
+  // The following fields may be modified until finalized.
+  uint16_t size;
+  uint8_t hasbit_bytes;
+  // The range of tag numbers used to store extensions.
+  uint32_t extension_start;
+  uint32_t extension_end;
+} upb_msgdef;
+
+// Hash table entries for looking up fields by name or number.
+typedef struct {
+  bool junk;
+  upb_fielddef *f;
+} upb_itof_ent;
+typedef struct {
+  upb_strtable_entry e;
+  upb_fielddef *f;
+} upb_ntof_ent;
+
+upb_msgdef *upb_msgdef_new();
+INLINE void upb_msgdef_unref(upb_msgdef *md) { upb_def_unref(UPB_UPCAST(md)); }
+INLINE void upb_msgdef_ref(upb_msgdef *md) { upb_def_ref(UPB_UPCAST(md)); }
+
+// Returns a new msgdef that is a copy of the given msgdef (and a copy of all
+// the fields) but with any references to submessages broken and replaced with
+// just the name of the submessage.  This can be put back into another symtab
+// and the names will be re-resolved in the new context.
+upb_msgdef *upb_msgdef_dup(upb_msgdef *m);
+
+// Read accessors.  May be called at any time.
+INLINE uint16_t upb_msgdef_size(upb_msgdef *m) { return m->size; }
+INLINE uint8_t upb_msgdef_hasbit_bytes(upb_msgdef *m) {
+  return m->hasbit_bytes;
+}
+INLINE uint32_t upb_msgdef_extension_start(upb_msgdef *m) {
+  return m->extension_start;
+}
+INLINE uint32_t upb_msgdef_extension_end(upb_msgdef *m) {
+  return m->extension_end;
+}
+
+// Write accessors.  May only be called before the msgdef is in a symtab.
+void upb_msgdef_setsize(upb_msgdef *m, uint16_t size);
+void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes);
+void upb_msgdef_setextension_start(upb_msgdef *m, uint32_t start);
+void upb_msgdef_setextension_end(upb_msgdef *m, uint32_t end);
+
+// Adds a fielddef to a msgdef, and passes a ref on the field to the msgdef.
+// May only be done before the msgdef is in a symtab.  The fielddef's name and
+// number must be set, and the message may not already contain any field with
+// this name or number -- if it does, the fielddef is unref'd and false is
+// returned.  The fielddef may not already belong to another message.
+bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f);
+
+// Sets the layout of all fields according to default rules:
+// 1. Hasbits for required fields come first, then optional fields.
+// 2. Values are laid out in a way that respects alignment rules.
+// 3. The order is chosen to minimize memory usage.
+// This should only be called once all fielddefs have been added.
+// TODO: will likely want the ability to exclude strings/submessages/arrays.
+// TODO: will likely want the ability to define a header size.
+void upb_msgdef_layout(upb_msgdef *m);
+
+// Looks up a field by name or number.  While these are written to be as fast
+// as possible, it will still be faster to cache the results of this lookup if
+// possible.  These return NULL if no such field is found.
+INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t i) {
+  upb_itof_ent *e = (upb_itof_ent*)
+      upb_inttable_fastlookup(&m->itof, i, sizeof(upb_itof_ent));
+  return e ? e->f : NULL;
+}
+
+INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, char *name) {
+  upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
+  return e ? e->f : NULL;
+}
+
+INLINE int upb_msgdef_numfields(upb_msgdef *m) {
+  return upb_strtable_count(&m->ntof);
+}
+
+// Iteration over fields.  The order is undefined.
+// Iterators are invalidated when a field is added or removed.
+//   upb_msg_iter i;
+//   for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+//     upb_fielddef *f = upb_msg_iter_field(i);
+//     // ...
+//   }
+typedef upb_inttable_iter upb_msg_iter;
+
+upb_msg_iter upb_msg_begin(upb_msgdef *m);
+upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter);
+INLINE bool upb_msg_done(upb_msg_iter iter) { return upb_inttable_done(iter); }
+
+// Iterator accessor.
+INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) {
+  upb_itof_ent *ent = (upb_itof_ent*)upb_inttable_iter_value(iter);
+  return ent->f;
+}
+
+
+/* upb_enumdef ****************************************************************/
+
+typedef struct _upb_enumdef {
+  upb_def base;
+  upb_strtable ntoi;
+  upb_inttable iton;
+  int32_t defaultval;
+} upb_enumdef;
+
+typedef struct {
+  upb_strtable_entry e;
+  uint32_t value;
+} upb_ntoi_ent;
+
+typedef struct {
+  bool junk;
+  char *str;
+} upb_iton_ent;
+
+upb_enumdef *upb_enumdef_new();
+INLINE void upb_enumdef_ref(upb_enumdef *e) { upb_def_ref(UPB_UPCAST(e)); }
+INLINE void upb_enumdef_unref(upb_enumdef *e) { upb_def_unref(UPB_UPCAST(e)); }
+upb_enumdef *upb_enumdef_dup(upb_enumdef *e);
+
+INLINE int32_t upb_enumdef_default(upb_enumdef *e) { return e->defaultval; }
+
+// May only be set before the enumdef is in a symtab.
+void upb_enumdef_setdefault(upb_enumdef *e, int32_t val);
+
+// Adds a value to the enumdef.  Requires that no existing val has this
+// name or number (returns false and does not add if there is).  May only
+// be called before the enumdef is in a symtab.
+bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num);
+
+// Lookups from name to integer and vice-versa.
+bool upb_enumdef_ntoil(upb_enumdef *e, char *name, size_t len, int32_t *num);
+bool upb_enumdef_ntoi(upb_enumdef *e, char *name, int32_t *num);
+// Caller does not own the returned string.
+const char *upb_enumdef_iton(upb_enumdef *e, int32_t num);
+
+// Iteration over name/value pairs.  The order is undefined.
+// Adding an enum val invalidates any iterators.
+//   upb_enum_iter i;
+//   for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
+//     // ...
+//   }
+typedef upb_inttable_iter upb_enum_iter;
+
+upb_enum_iter upb_enum_begin(upb_enumdef *e);
+upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter);
+INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); }
+
+// Iterator accessors.
+INLINE char *upb_enum_iter_name(upb_enum_iter iter) {
+  upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter);
+  return e->str;
+}
+INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
+  return upb_inttable_iter_key(iter);
+}
+
+
+/* upb_symtabtxn **************************************************************/
+
+// A symbol table transaction is a map of defs that can be added to a symtab
+// in one single atomic operation that either succeeds or fails.  Mutable defs
+// can be added to this map (and perhaps removed, in the future).
+//
+// A symtabtxn is not thread-safe.
+
+typedef struct {
+  upb_strtable deftab;
+} upb_symtabtxn;
+
+void upb_symtabtxn_init(upb_symtabtxn *t);
+void upb_symtabtxn_uninit(upb_symtabtxn *t);
+
+// Adds a def to the symtab.  Caller passes a ref on the def to the symtabtxn.
+// The def's name must be set and there must not be any existing defs in the
+// symtabtxn with this name, otherwise false will be returned and no operation
+// will be performed (and the ref on the def will be released).
+bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def);
+
+// Gets the def (if any) that is associated with this name in the symtab.
+// Caller does *not* inherit a ref on the def.
+upb_def *upb_symtabtxn_get(upb_symtabtxn *t, char *name);
+
+// Iterate over the defs that are part of the transaction.
+// The order is undefined.
+// The iterator is invalidated by upb_symtabtxn_add().
+//   upb_symtabtxn_iter i;
+//   for(i = upb_symtabtxn_begin(t); !upb_symtabtxn_done(t);
+//       i = upb_symtabtxn_next(t, i)) {
+//     upb_def *def = upb_symtabtxn_iter_def(i);
+//   }
+typedef upb_strtable_iter upb_symtabtxn_iter;
+
+void upb_symtabtxn_begin(upb_symtabtxn_iter* i, upb_symtabtxn *t);
+void upb_symtabtxn_next(upb_symtabtxn_iter *i);
+bool upb_symtabtxn_done(upb_symtabtxn_iter *i);
+upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter *iter);
+
+
+/* upb_symtab *****************************************************************/
+
+// A SymbolTable is where upb_defs live.  It is empty when first constructed.
+// Clients add definitions to the symtab (or replace existing definitions) by
+// using a upb_symtab_commit() or calling upb_symtab_add().
+
+// upb_deflist: A little dynamic array for storing a growing list of upb_defs.
+typedef struct {
+  upb_def **defs;
+  uint32_t len;
+  uint32_t size;
+} upb_deflist;
+
+void upb_deflist_init(upb_deflist *l);
+void upb_deflist_uninit(upb_deflist *l);
+void upb_deflist_push(upb_deflist *l, upb_def *d);
+
+struct _upb_symtab {
+  upb_atomic_t refcount;
+  upb_rwlock_t lock;       // Protects all members except the refcount.
+  upb_strtable symtab;     // The symbol table.
+  upb_deflist olddefs;
+};
+
+upb_symtab *upb_symtab_new(void);
+INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); }
+void upb_symtab_unref(upb_symtab *s);
+
+// Resolves the given symbol using the rules described in descriptor.proto,
+// namely:
+//
+//    If the name starts with a '.', it is fully-qualified.  Otherwise, C++-like
+//    scoping rules are used to find the type (i.e. first the nested types
+//    within this message are searched, then within the parent, on up to the
+//    root namespace).
+//
+// If a def is found, the caller owns one ref on the returned def.  Otherwise
+// returns NULL.
+// TODO: make return const
+upb_def *upb_symtab_resolve(upb_symtab *s, const char *base, const char *sym);
+
+// Find an entry in the symbol table with this exact name.  If a def is found,
+// the caller owns one ref on the returned def.  Otherwise returns NULL.
+// TODO: make return const
+upb_def *upb_symtab_lookup(upb_symtab *s, const char *sym);
+
+// Gets an array of pointers to all currently active defs in this symtab.  The
+// caller owns the returned array (which is of length *count) as well as a ref
+// to each symbol inside.  If type is UPB_DEF_ANY then defs of all types are
+// returned, otherwise only defs of the required type are returned.
+// TODO: make return const
+upb_def **upb_symtab_getdefs(upb_symtab *s, int *n, upb_deftype_t type);
+
+// Adds a single upb_def into the symtab.  A ref on the def is passed to the
+// symtab.  If any references cannot be resolved, false is returned and the
+// symtab is unchanged.  The error (if any) is saved to status if non-NULL.
+bool upb_symtab_add(upb_symtab *s, upb_def *d, upb_status *status);
+
+// Adds the set of defs contained in the transaction to the symtab, clearing
+// the txn.  The entire operation either succeeds or fails.  If the operation
+// fails, the symtab is unchanged, false is returned, and status indicates
+// the error.
+bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *t, upb_status *status);
+
+// Frees defs that are no longer active in the symtab and are no longer
+// reachable.  Such defs are not freed when they are replaced in the symtab
+// if they are still reachable from defs that are still referenced.
+void upb_symtab_gc(upb_symtab *s);
+
+
+/* upb_def casts **************************************************************/
+
+// Dynamic casts, for determining if a def is of a particular type at runtime.
+#define UPB_DYNAMIC_CAST_DEF(lower, upper) \
+  struct _upb_ ## lower;  /* Forward-declare. */ \
+  INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \
+    if(def->type != UPB_DEF_ ## upper) return NULL; \
+    return (struct _upb_ ## lower*)def; \
+  }
+UPB_DYNAMIC_CAST_DEF(msgdef, MSG);
+UPB_DYNAMIC_CAST_DEF(enumdef, ENUM);
+UPB_DYNAMIC_CAST_DEF(svcdef, SERVICE);
+UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED);
+#undef UPB_DYNAMIC_CAST_DEF
+
+// Downcasts, for when some wants to assert that a def is of a particular type.
+// These are only checked if we are building debug.
+#define UPB_DOWNCAST_DEF(lower, upper) \
+  struct _upb_ ## lower;  /* Forward-declare. */ \
+  INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \
+    assert(def->type == UPB_DEF_ ## upper); \
+    return (struct _upb_ ## lower*)def; \
+  }
+UPB_DOWNCAST_DEF(msgdef, MSG);
+UPB_DOWNCAST_DEF(enumdef, ENUM);
+UPB_DOWNCAST_DEF(svcdef, SERVICE);
+UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
+#undef UPB_DOWNCAST_DEF
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_DEF_H_ */
diff --git a/upb/descriptor.c b/upb/descriptor.c
new file mode 100644
index 0000000..48f0165
--- /dev/null
+++ b/upb/descriptor.c
@@ -0,0 +1,529 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2008-2009 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include "upb/def.h"
+#include "upb/descriptor.h"
+
+// Returns a newly allocated string that joins input strings together, for example:
+//   join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
+//   join("", "Baz") -> "Baz"
+// Caller owns a ref on the returned string. */
+static char *upb_join(char *base, char *name) {
+  if (!base || strlen(base) == 0) {
+    return strdup(name);
+  } else {
+    char *ret = malloc(strlen(base) + strlen(name) + 2);
+    ret[0] = '\0';
+    strcat(ret, base);
+    strcat(ret, ".");
+    strcat(ret, name);
+    return ret;
+  }
+}
+
+/* upb_descreader  ************************************************************/
+
+// A upb_descreader builds a list of defs by handling a parse of a protobuf in
+// the format defined in descriptor.proto.  The output of a upb_descreader is
+// a upb_symtabtxn.
+
+static upb_def *upb_deflist_last(upb_deflist *l) {
+  return l->defs[l->len-1];
+}
+
+// Qualify the defname for all defs starting with offset "start" with "str".
+static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
+  for(uint32_t i = start; i < l->len; i++) {
+    upb_def *def = l->defs[i];
+    char *name = def->fqname;
+    def->fqname = upb_join(str, name);
+    free(name);
+  }
+}
+
+// Forward declares for top-level file descriptors.
+static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h);
+static upb_mhandlers * upb_enumdef_register_EnumDescriptorProto(upb_handlers *h);
+
+void upb_descreader_init(upb_descreader *r, upb_symtabtxn *txn) {
+  upb_deflist_init(&r->defs);
+  upb_status_init(&r->status);
+  r->txn = txn;
+  r->stack_len = 0;
+  r->name = NULL;
+  r->default_string = NULL;
+}
+
+void upb_descreader_uninit(upb_descreader *r) {
+  free(r->name);
+  upb_status_uninit(&r->status);
+  upb_deflist_uninit(&r->defs);
+  free(r->default_string);
+  while (r->stack_len > 0) {
+    upb_descreader_frame *f = &r->stack[--r->stack_len];
+    free(f->name);
+  }
+}
+
+static upb_msgdef *upb_descreader_top(upb_descreader *r) {
+  if (r->stack_len <= 1) return NULL;
+  int index = r->stack[r->stack_len-1].start - 1;
+  assert(index >= 0);
+  return upb_downcast_msgdef(r->defs.defs[index]);
+}
+
+static upb_def *upb_descreader_last(upb_descreader *r) {
+  return upb_deflist_last(&r->defs);
+}
+
+// Start/end handlers for FileDescriptorProto and DescriptorProto (the two
+// entities that have names and can contain sub-definitions.
+void upb_descreader_startcontainer(upb_descreader *r) {
+  upb_descreader_frame *f = &r->stack[r->stack_len++];
+  f->start = r->defs.len;
+  f->name = NULL;
+}
+
+void upb_descreader_endcontainer(upb_descreader *r) {
+  upb_descreader_frame *f = &r->stack[--r->stack_len];
+  upb_deflist_qualify(&r->defs, f->name, f->start);
+  free(f->name);
+  f->name = NULL;
+}
+
+void upb_descreader_setscopename(upb_descreader *r, char *str) {
+  upb_descreader_frame *f = &r->stack[r->stack_len-1];
+  free(f->name);
+  f->name = str;
+}
+
+// Handlers for google.protobuf.FileDescriptorProto.
+static upb_flow_t upb_descreader_FileDescriptorProto_startmsg(void *_r) {
+  upb_descreader *r = _r;
+  upb_descreader_startcontainer(r);
+  return UPB_CONTINUE;
+}
+
+static void upb_descreader_FileDescriptorProto_endmsg(void *_r,
+                                                      upb_status *status) {
+  (void)status;
+  upb_descreader *r = _r;
+  upb_descreader_endcontainer(r);
+}
+
+static upb_flow_t upb_descreader_FileDescriptorProto_package(void *_r,
+                                                             upb_value fval,
+                                                             upb_value val) {
+  (void)fval;
+  upb_descreader *r = _r;
+  upb_descreader_setscopename(r, upb_strref_dup(upb_value_getstrref(val)));
+  return UPB_CONTINUE;
+}
+
+static upb_mhandlers *upb_descreader_register_FileDescriptorProto(
+    upb_handlers *h) {
+  upb_mhandlers *m = upb_handlers_newmhandlers(h);
+  upb_mhandlers_setstartmsg(m, &upb_descreader_FileDescriptorProto_startmsg);
+  upb_mhandlers_setendmsg(m, &upb_descreader_FileDescriptorProto_endmsg);
+
+#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDNUM
+#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDTYPE
+  upb_fhandlers *f =
+      upb_mhandlers_newfhandlers(m, FNUM(PACKAGE), FTYPE(PACKAGE), false);
+  upb_fhandlers_setvalue(f, &upb_descreader_FileDescriptorProto_package);
+
+  upb_mhandlers_newfhandlers_subm(m, FNUM(MESSAGE_TYPE), FTYPE(MESSAGE_TYPE), true,
+                                  upb_msgdef_register_DescriptorProto(h));
+  upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true,
+                                  upb_enumdef_register_EnumDescriptorProto(h));
+  // TODO: services, extensions
+  return m;
+}
+#undef FNUM
+#undef FTYPE
+
+// Handlers for google.protobuf.FileDescriptorSet.
+static void upb_descreader_FileDescriptorSet_onendmsg(void *_r,
+                                                      upb_status *status) {
+  // Move all defs (which are now guaranteed to be fully-qualified) to the txn.
+  upb_descreader *r = _r;
+  if (upb_ok(status)) {
+    for (unsigned int i = 0; i < r->defs.len; i++) {
+      // TODO: check return for duplicate def.
+      upb_symtabtxn_add(r->txn, r->defs.defs[i]);
+    }
+    r->defs.len = 0;
+  }
+}
+
+static upb_mhandlers *upb_descreader_register_FileDescriptorSet(upb_handlers *h) {
+  upb_mhandlers *m = upb_handlers_newmhandlers(h);
+  upb_mhandlers_setendmsg(m, upb_descreader_FileDescriptorSet_onendmsg);
+
+#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDNUM
+#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDTYPE
+  upb_mhandlers_newfhandlers_subm(m, FNUM(FILE), FTYPE(FILE), true,
+                                   upb_descreader_register_FileDescriptorProto(h));
+  return m;
+}
+#undef FNUM
+#undef FTYPE
+
+upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h) {
+  h->should_jit = false;
+  return upb_descreader_register_FileDescriptorSet(h);
+}
+
+// google.protobuf.EnumValueDescriptorProto.
+static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_r) {
+  upb_descreader *r = _r;
+  r->saw_number = false;
+  r->saw_name = false;
+  return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_r,
+                                                            upb_value fval,
+                                                            upb_value val) {
+  (void)fval;
+  upb_descreader *r = _r;
+  free(r->name);
+  r->name = upb_strref_dup(upb_value_getstrref(val));
+  r->saw_name = true;
+  return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_enumdef_EnumValueDescriptorProto_number(void *_r,
+                                                              upb_value fval,
+                                                              upb_value val) {
+  (void)fval;
+  upb_descreader *r = _r;
+  r->number = upb_value_getint32(val);
+  r->saw_number = true;
+  return UPB_CONTINUE;
+}
+
+static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r,
+                                                        upb_status *status) {
+  upb_descreader *r = _r;
+  if(!r->saw_number || !r->saw_name) {
+    upb_status_setf(status, UPB_ERROR, "Enum value missing name or number.");
+    return;
+  }
+  upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
+  if (upb_inttable_count(&e->iton) == 0) {
+    // The default value of an enum (in the absence of an explicit default) is
+    // its first listed value.
+    upb_enumdef_setdefault(e, r->number);
+  }
+  upb_enumdef_addval(e, r->name, r->number);
+  free(r->name);
+  r->name = NULL;
+}
+
+static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto(
+    upb_handlers *h) {
+  upb_mhandlers *m = upb_handlers_newmhandlers(h);
+  upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumValueDescriptorProto_startmsg);
+  upb_mhandlers_setendmsg(m, &upb_enumdef_EnumValueDescriptorProto_endmsg);
+
+#define FNUM(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDNUM
+#define FTYPE(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDTYPE
+  upb_fhandlers *f;
+  f = upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
+  upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_name);
+
+  f = upb_mhandlers_newfhandlers(m, FNUM(NUMBER), FTYPE(NUMBER), false);
+  upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_number);
+  return m;
+}
+#undef FNUM
+#undef FTYPE
+
+// google.protobuf.EnumDescriptorProto.
+static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_r) {
+  upb_descreader *r = _r;
+  upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new()));
+  return UPB_CONTINUE;
+}
+
+static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status) {
+  upb_descreader *r = _r;
+  upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
+  if (upb_descreader_last((upb_descreader*)_r)->fqname == NULL) {
+    upb_status_setf(status, UPB_ERROR, "Enum had no name.");
+    return;
+  }
+  if (upb_inttable_count(&e->iton) == 0) {
+    upb_status_setf(status, UPB_ERROR, "Enum had no values.");
+    return;
+  }
+}
+
+static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r,
+                                                       upb_value fval,
+                                                       upb_value val) {
+  (void)fval;
+  upb_descreader *r = _r;
+  upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
+  free(e->base.fqname);
+  e->base.fqname = upb_strref_dup(upb_value_getstrref(val));
+  return UPB_CONTINUE;
+}
+
+static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) {
+  upb_mhandlers *m = upb_handlers_newmhandlers(h);
+  upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumDescriptorProto_startmsg);
+  upb_mhandlers_setendmsg(m, &upb_enumdef_EnumDescriptorProto_endmsg);
+
+#define FNUM(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDNUM
+#define FTYPE(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDTYPE
+  upb_fhandlers *f =
+      upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
+  upb_fhandlers_setvalue(f, &upb_enumdef_EnumDescriptorProto_name);
+
+  upb_mhandlers_newfhandlers_subm(m, FNUM(VALUE), FTYPE(VALUE), true,
+                               upb_enumdef_register_EnumValueDescriptorProto(h));
+  return m;
+}
+#undef FNUM
+#undef FTYPE
+
+static upb_flow_t upb_fielddef_startmsg(void *_r) {
+  upb_descreader *r = _r;
+  r->f = upb_fielddef_new();
+  return UPB_CONTINUE;
+}
+
+// Converts the default value in string "str" into "d".  Passes a ref on str.
+// Returns true on success.
+static bool upb_fielddef_parsedefault(char *str, upb_value *d, int type) {
+  bool success = true;
+  if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) {
+    // We'll keep the ref we had on it.  We include enums in this case because
+    // we need the enumdef to resolve the name, but we may not have it yet.
+    // We'll resolve it later.
+    if (!str) str = strdup("");
+    upb_value_setptr(d, str);
+  } else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) {
+    // We don't expect to get a default value.
+    free(str);
+    if (str != NULL) success = false;
+  } else if (type == UPB_TYPE(BOOL)) {
+    if (!str || strcmp(str, "false") == 0)
+      upb_value_setbool(d, false);
+    else if (strcmp(str, "true") == 0)
+      upb_value_setbool(d, true);
+    else
+      success = false;
+    free(str);
+  } else {
+    // The strto* functions need the string to be NULL-terminated.
+    if (!str) str = strdup("0");
+    char *end;
+    switch (type) {
+      case UPB_TYPE(INT32):
+      case UPB_TYPE(SINT32):
+      case UPB_TYPE(SFIXED32): {
+        long val = strtol(str, &end, 0);
+        if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
+          success = false;
+        else
+          upb_value_setint32(d, val);
+        break;
+      }
+      case UPB_TYPE(INT64):
+      case UPB_TYPE(SINT64):
+      case UPB_TYPE(SFIXED64):
+        upb_value_setint64(d, strtoll(str, &end, 0));
+        if (errno == ERANGE || *end) success = false;
+        break;
+      case UPB_TYPE(UINT32):
+      case UPB_TYPE(FIXED32): {
+        unsigned long val = strtoul(str, &end, 0);
+        if (val > UINT32_MAX || errno == ERANGE || *end)
+          success = false;
+        else
+          upb_value_setuint32(d, val);
+        break;
+      }
+      case UPB_TYPE(UINT64):
+      case UPB_TYPE(FIXED64):
+        upb_value_setuint64(d, strtoull(str, &end, 0));
+        if (errno == ERANGE || *end) success = false;
+        break;
+      case UPB_TYPE(DOUBLE):
+        upb_value_setdouble(d, strtod(str, &end));
+        if (errno == ERANGE || *end) success = false;
+        break;
+      case UPB_TYPE(FLOAT):
+        upb_value_setfloat(d, strtof(str, &end));
+        if (errno == ERANGE || *end) success = false;
+        break;
+    }
+    free(str);
+  }
+  return success;
+}
+
+static void upb_fielddef_endmsg(void *_r, upb_status *status) {
+  upb_descreader *r = _r;
+  upb_fielddef *f = r->f;
+  // TODO: verify that all required fields were present.
+  assert(f->number != -1 && f->name != NULL);
+  assert((f->def != NULL) == upb_hasdef(f));
+
+  // Field was successfully read, add it as a field of the msgdef.
+  upb_msgdef *m = upb_descreader_top(r);
+  upb_msgdef_addfield(m, f);
+  char *dstr = r->default_string;
+  r->default_string = NULL;
+  upb_value val;
+  if (!upb_fielddef_parsedefault(dstr, &val, f->type)) {
+    // We don't worry too much about giving a great error message since the
+    // compiler should have ensured this was correct.
+    upb_status_setf(status, UPB_ERROR, "Error converting default value.");
+    return;
+  }
+  upb_fielddef_setdefault(f, val);
+}
+
+static upb_flow_t upb_fielddef_ontype(void *_r, upb_value fval, upb_value val) {
+  (void)fval;
+  upb_descreader *r = _r;
+  upb_fielddef_settype(r->f, upb_value_getint32(val));
+  return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_fielddef_onlabel(void *_r, upb_value fval, upb_value val) {
+  (void)fval;
+  upb_descreader *r = _r;
+  upb_fielddef_setlabel(r->f, upb_value_getint32(val));
+  return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_fielddef_onnumber(void *_r, upb_value fval, upb_value val) {
+  (void)fval;
+  upb_descreader *r = _r;
+  upb_fielddef_setnumber(r->f, upb_value_getint32(val));
+  return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_fielddef_onname(void *_r, upb_value fval, upb_value val) {
+  (void)fval;
+  upb_descreader *r = _r;
+  char *name = upb_strref_dup(upb_value_getstrref(val));
+  upb_fielddef_setname(r->f, name);
+  free(name);
+  return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval,
+                                          upb_value val) {
+  (void)fval;
+  upb_descreader *r = _r;
+  char *name = upb_strref_dup(upb_value_getstrref(val));
+  upb_fielddef_settypename(r->f, name);
+  free(name);
+  return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_fielddef_ondefaultval(void *_r, upb_value fval,
+                                            upb_value val) {
+  (void)fval;
+  upb_descreader *r = _r;
+  // Have to convert from string to the correct type, but we might not know the
+  // type yet.
+  free(r->default_string);
+  r->default_string = upb_strref_dup(upb_value_getstrref(val));
+  return UPB_CONTINUE;
+}
+
+static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto(
+    upb_handlers *h) {
+  upb_mhandlers *m = upb_handlers_newmhandlers(h);
+  upb_mhandlers_setstartmsg(m, &upb_fielddef_startmsg);
+  upb_mhandlers_setendmsg(m, &upb_fielddef_endmsg);
+
+#define FIELD(name, handler) \
+  upb_fhandlers_setvalue( \
+      upb_mhandlers_newfhandlers(m, \
+          GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDNUM, \
+          GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDTYPE, \
+          false), \
+      handler);
+  FIELD(TYPE, &upb_fielddef_ontype);
+  FIELD(LABEL, &upb_fielddef_onlabel);
+  FIELD(NUMBER, &upb_fielddef_onnumber);
+  FIELD(NAME, &upb_fielddef_onname);
+  FIELD(TYPE_NAME, &upb_fielddef_ontypename);
+  FIELD(DEFAULT_VALUE, &upb_fielddef_ondefaultval);
+  return m;
+}
+#undef FNUM
+#undef FTYPE
+
+
+// google.protobuf.DescriptorProto.
+static upb_flow_t upb_msgdef_startmsg(void *_r) {
+  upb_descreader *r = _r;
+  upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new()));
+  upb_descreader_startcontainer(r);
+  return UPB_CONTINUE;
+}
+
+static void upb_msgdef_endmsg(void *_r, upb_status *status) {
+  upb_descreader *r = _r;
+  upb_msgdef *m = upb_descreader_top(r);
+  if(!m->base.fqname) {
+    upb_status_setf(status, UPB_ERROR, "Encountered message with no name.");
+    return;
+  }
+
+  upb_msgdef_layout(m);
+  upb_descreader_endcontainer(r);
+}
+
+static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) {
+  (void)fval;
+  upb_descreader *r = _r;
+  assert(val.type == UPB_TYPE(STRING));
+  upb_msgdef *m = upb_descreader_top(r);
+  free(m->base.fqname);
+  m->base.fqname = upb_strref_dup(upb_value_getstrref(val));
+  upb_descreader_setscopename(r, strdup(m->base.fqname));
+  return UPB_CONTINUE;
+}
+
+static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) {
+  upb_mhandlers *m = upb_handlers_newmhandlers(h);
+  upb_mhandlers_setstartmsg(m, &upb_msgdef_startmsg);
+  upb_mhandlers_setendmsg(m, &upb_msgdef_endmsg);
+
+#define FNUM(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDNUM
+#define FTYPE(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDTYPE
+  upb_fhandlers *f =
+      upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
+  upb_fhandlers_setvalue(f, &upb_msgdef_onname);
+
+  upb_mhandlers_newfhandlers_subm(m, FNUM(FIELD), FTYPE(FIELD), true,
+                                  upb_fielddef_register_FieldDescriptorProto(h));
+  upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true,
+                                  upb_enumdef_register_EnumDescriptorProto(h));
+
+  // DescriptorProto is self-recursive, so we must link the definition.
+  upb_mhandlers_newfhandlers_subm(
+      m, FNUM(NESTED_TYPE), FTYPE(NESTED_TYPE), true, m);
+
+  // TODO: extensions.
+  return m;
+}
+#undef FNUM
+#undef FTYPE
+
diff --git a/upb/descriptor.h b/upb/descriptor.h
new file mode 100644
index 0000000..4d658fb
--- /dev/null
+++ b/upb/descriptor.h
@@ -0,0 +1,67 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Routines for building defs by parsing descriptors in descriptor.proto format.
+ * This only needs to use the public API of upb_symtab.  Later we may also
+ * add routines for dumping a symtab to a descriptor.
+ */
+
+#ifndef UPB_DESCRIPTOR_H
+#define UPB_DESCRIPTOR_H
+
+#include "upb/handlers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* upb_descreader  ************************************************************/
+
+// upb_descreader reads a descriptor and puts defs in a upb_symtabtxn.
+
+// We keep a stack of all the messages scopes we are currently in, as well as
+// the top-level file scope.  This is necessary to correctly qualify the
+// definitions that are contained inside.  "name" tracks the name of the
+// message or package (a bare name -- not qualified by any enclosing scopes).
+typedef struct {
+  char *name;
+  // Index of the first def that is under this scope.  For msgdefs, the
+  // msgdef itself is at start-1.
+  int start;
+} upb_descreader_frame;
+
+typedef struct {
+  upb_deflist defs;
+  upb_symtabtxn *txn;
+  upb_descreader_frame stack[UPB_MAX_TYPE_DEPTH];
+  int stack_len;
+  upb_status status;
+
+  uint32_t number;
+  char *name;
+  bool saw_number;
+  bool saw_name;
+
+  char *default_string;
+
+  upb_fielddef *f;
+} upb_descreader;
+
+// Creates a new descriptor builder that will add defs to the given txn.
+void upb_descreader_init(upb_descreader *r, upb_symtabtxn *txn);
+void upb_descreader_uninit(upb_descreader *r);
+
+// Registers handlers that will load descriptor data into a symtabtxn.
+// Pass the descreader as the closure.  The messages will have
+// upb_msgdef_layout() called on them before adding to the txn.
+upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif
diff --git a/upb/descriptor.proto b/upb/descriptor.proto
new file mode 100644
index 0000000..233f879
--- /dev/null
+++ b/upb/descriptor.proto
@@ -0,0 +1,533 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+//  Based on original Protocol Buffers design by
+//  Sanjay Ghemawat, Jeff Dean, and others.
+//
+// The messages in this file describe the definitions found in .proto files.
+// A valid .proto file can be translated directly to a FileDescriptorProto
+// without any other information (e.g. without reading its imports).
+
+
+
+package google.protobuf;
+option java_package = "com.google.protobuf";
+option java_outer_classname = "DescriptorProtos";
+
+// descriptor.proto must be optimized for speed because reflection-based
+// algorithms don't work during bootstrapping.
+option optimize_for = SPEED;
+
+// The protocol compiler can output a FileDescriptorSet containing the .proto
+// files it parses.
+message FileDescriptorSet {
+  repeated FileDescriptorProto file = 1;
+}
+
+// Describes a complete .proto file.
+message FileDescriptorProto {
+  optional string name = 1;       // file name, relative to root of source tree
+  optional string package = 2;    // e.g. "foo", "foo.bar", etc.
+
+  // Names of files imported by this file.
+  repeated string dependency = 3;
+
+  // All top-level definitions in this file.
+  repeated DescriptorProto message_type = 4;
+  repeated EnumDescriptorProto enum_type = 5;
+  repeated ServiceDescriptorProto service = 6;
+  repeated FieldDescriptorProto extension = 7;
+
+  optional FileOptions options = 8;
+
+  // This field contains optional information about the original source code.
+  // You may safely remove this entire field whithout harming runtime
+  // functionality of the descriptors -- the information is needed only by
+  // development tools.
+  optional SourceCodeInfo source_code_info = 9;
+}
+
+// Describes a message type.
+message DescriptorProto {
+  optional string name = 1;
+
+  repeated FieldDescriptorProto field = 2;
+  repeated FieldDescriptorProto extension = 6;
+
+  repeated DescriptorProto nested_type = 3;
+  repeated EnumDescriptorProto enum_type = 4;
+
+  message ExtensionRange {
+    optional int32 start = 1;
+    optional int32 end = 2;
+  }
+  repeated ExtensionRange extension_range = 5;
+
+  optional MessageOptions options = 7;
+}
+
+// Describes a field within a message.
+message FieldDescriptorProto {
+  enum Type {
+    // 0 is reserved for errors.
+    // Order is weird for historical reasons.
+    TYPE_DOUBLE         = 1;
+    TYPE_FLOAT          = 2;
+    TYPE_INT64          = 3;   // Not ZigZag encoded.  Negative numbers
+                               // take 10 bytes.  Use TYPE_SINT64 if negative
+                               // values are likely.
+    TYPE_UINT64         = 4;
+    TYPE_INT32          = 5;   // Not ZigZag encoded.  Negative numbers
+                               // take 10 bytes.  Use TYPE_SINT32 if negative
+                               // values are likely.
+    TYPE_FIXED64        = 6;
+    TYPE_FIXED32        = 7;
+    TYPE_BOOL           = 8;
+    TYPE_STRING         = 9;
+    TYPE_GROUP          = 10;  // Tag-delimited aggregate.
+    TYPE_MESSAGE        = 11;  // Length-delimited aggregate.
+
+    // New in version 2.
+    TYPE_BYTES          = 12;
+    TYPE_UINT32         = 13;
+    TYPE_ENUM           = 14;
+    TYPE_SFIXED32       = 15;
+    TYPE_SFIXED64       = 16;
+    TYPE_SINT32         = 17;  // Uses ZigZag encoding.
+    TYPE_SINT64         = 18;  // Uses ZigZag encoding.
+  };
+
+  enum Label {
+    // 0 is reserved for errors
+    LABEL_OPTIONAL      = 1;
+    LABEL_REQUIRED      = 2;
+    LABEL_REPEATED      = 3;
+    // TODO(sanjay): Should we add LABEL_MAP?
+  };
+
+  optional string name = 1;
+  optional int32 number = 3;
+  optional Label label = 4;
+
+  // If type_name is set, this need not be set.  If both this and type_name
+  // are set, this must be either TYPE_ENUM or TYPE_MESSAGE.
+  optional Type type = 5;
+
+  // For message and enum types, this is the name of the type.  If the name
+  // starts with a '.', it is fully-qualified.  Otherwise, C++-like scoping
+  // rules are used to find the type (i.e. first the nested types within this
+  // message are searched, then within the parent, on up to the root
+  // namespace).
+  optional string type_name = 6;
+
+  // For extensions, this is the name of the type being extended.  It is
+  // resolved in the same manner as type_name.
+  optional string extendee = 2;
+
+  // For numeric types, contains the original text representation of the value.
+  // For booleans, "true" or "false".
+  // For strings, contains the default text contents (not escaped in any way).
+  // For bytes, contains the C escaped value.  All bytes >= 128 are escaped.
+  // TODO(kenton):  Base-64 encode?
+  optional string default_value = 7;
+
+  optional FieldOptions options = 8;
+}
+
+// Describes an enum type.
+message EnumDescriptorProto {
+  optional string name = 1;
+
+  repeated EnumValueDescriptorProto value = 2;
+
+  optional EnumOptions options = 3;
+}
+
+// Describes a value within an enum.
+message EnumValueDescriptorProto {
+  optional string name = 1;
+  optional int32 number = 2;
+
+  optional EnumValueOptions options = 3;
+}
+
+// Describes a service.
+message ServiceDescriptorProto {
+  optional string name = 1;
+  repeated MethodDescriptorProto method = 2;
+
+  optional ServiceOptions options = 3;
+}
+
+// Describes a method of a service.
+message MethodDescriptorProto {
+  optional string name = 1;
+
+  // Input and output type names.  These are resolved in the same way as
+  // FieldDescriptorProto.type_name, but must refer to a message type.
+  optional string input_type = 2;
+  optional string output_type = 3;
+
+  optional MethodOptions options = 4;
+}
+
+// ===================================================================
+// Options
+
+// Each of the definitions above may have "options" attached.  These are
+// just annotations which may cause code to be generated slightly differently
+// or may contain hints for code that manipulates protocol messages.
+//
+// Clients may define custom options as extensions of the *Options messages.
+// These extensions may not yet be known at parsing time, so the parser cannot
+// store the values in them.  Instead it stores them in a field in the *Options
+// message called uninterpreted_option. This field must have the same name
+// across all *Options messages. We then use this field to populate the
+// extensions when we build a descriptor, at which point all protos have been
+// parsed and so all extensions are known.
+//
+// Extension numbers for custom options may be chosen as follows:
+// * For options which will only be used within a single application or
+//   organization, or for experimental options, use field numbers 50000
+//   through 99999.  It is up to you to ensure that you do not use the
+//   same number for multiple options.
+// * For options which will be published and used publicly by multiple
+//   independent entities, e-mail kenton@google.com to reserve extension
+//   numbers.  Simply tell me how many you need and I'll send you back a
+//   set of numbers to use -- there's no need to explain how you intend to
+//   use them.  If this turns out to be popular, a web service will be set up
+//   to automatically assign option numbers.
+
+
+message FileOptions {
+
+  // Sets the Java package where classes generated from this .proto will be
+  // placed.  By default, the proto package is used, but this is often
+  // inappropriate because proto packages do not normally start with backwards
+  // domain names.
+  optional string java_package = 1;
+
+
+  // If set, all the classes from the .proto file are wrapped in a single
+  // outer class with the given name.  This applies to both Proto1
+  // (equivalent to the old "--one_java_file" option) and Proto2 (where
+  // a .proto always translates to a single class, but you may want to
+  // explicitly choose the class name).
+  optional string java_outer_classname = 8;
+
+  // If set true, then the Java code generator will generate a separate .java
+  // file for each top-level message, enum, and service defined in the .proto
+  // file.  Thus, these types will *not* be nested inside the outer class
+  // named by java_outer_classname.  However, the outer class will still be
+  // generated to contain the file's getDescriptor() method as well as any
+  // top-level extensions defined in the file.
+  optional bool java_multiple_files = 10 [default=false];
+
+  // If set true, then the Java code generator will generate equals() and
+  // hashCode() methods for all messages defined in the .proto file. This is
+  // purely a speed optimization, as the AbstractMessage base class includes
+  // reflection-based implementations of these methods.
+  optional bool java_generate_equals_and_hash = 20 [default=false];
+
+  // Generated classes can be optimized for speed or code size.
+  enum OptimizeMode {
+    SPEED = 1;        // Generate complete code for parsing, serialization,
+                      // etc.
+    CODE_SIZE = 2;    // Use ReflectionOps to implement these methods.
+    LITE_RUNTIME = 3; // Generate code using MessageLite and the lite runtime.
+  }
+  optional OptimizeMode optimize_for = 9 [default=SPEED];
+
+
+
+
+  // Should generic services be generated in each language?  "Generic" services
+  // are not specific to any particular RPC system.  They are generated by the
+  // main code generators in each language (without additional plugins).
+  // Generic services were the only kind of service generation supported by
+  // early versions of proto2.
+  //
+  // Generic services are now considered deprecated in favor of using plugins
+  // that generate code specific to your particular RPC system.  Therefore,
+  // these default to false.  Old code which depends on generic services should
+  // explicitly set them to true.
+  optional bool cc_generic_services = 16 [default=false];
+  optional bool java_generic_services = 17 [default=false];
+  optional bool py_generic_services = 18 [default=false];
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+message MessageOptions {
+  // Set true to use the old proto1 MessageSet wire format for extensions.
+  // This is provided for backwards-compatibility with the MessageSet wire
+  // format.  You should not use this for any other reason:  It's less
+  // efficient, has fewer features, and is more complicated.
+  //
+  // The message must be defined exactly as follows:
+  //   message Foo {
+  //     option message_set_wire_format = true;
+  //     extensions 4 to max;
+  //   }
+  // Note that the message cannot have any defined fields; MessageSets only
+  // have extensions.
+  //
+  // All extensions of your type must be singular messages; e.g. they cannot
+  // be int32s, enums, or repeated messages.
+  //
+  // Because this is an option, the above two restrictions are not enforced by
+  // the protocol compiler.
+  optional bool message_set_wire_format = 1 [default=false];
+
+  // Disables the generation of the standard "descriptor()" accessor, which can
+  // conflict with a field of the same name.  This is meant to make migration
+  // from proto1 easier; new code should avoid fields named "descriptor".
+  optional bool no_standard_descriptor_accessor = 2 [default=false];
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+message FieldOptions {
+  // The ctype option instructs the C++ code generator to use a different
+  // representation of the field than it normally would.  See the specific
+  // options below.  This option is not yet implemented in the open source
+  // release -- sorry, we'll try to include it in a future version!
+  optional CType ctype = 1 [default = STRING];
+  enum CType {
+    // Default mode.
+    STRING = 0;
+
+    CORD = 1;
+
+    STRING_PIECE = 2;
+  }
+  // The packed option can be enabled for repeated primitive fields to enable
+  // a more efficient representation on the wire. Rather than repeatedly
+  // writing the tag and type for each element, the entire array is encoded as
+  // a single length-delimited blob.
+  optional bool packed = 2;
+
+
+  // Is this field deprecated?
+  // Depending on the target platform, this can emit Deprecated annotations
+  // for accessors, or it will be completely ignored; in the very least, this
+  // is a formalization for deprecating fields.
+  optional bool deprecated = 3 [default=false];
+
+  // EXPERIMENTAL.  DO NOT USE.
+  // For "map" fields, the name of the field in the enclosed type that
+  // is the key for this map.  For example, suppose we have:
+  //   message Item {
+  //     required string name = 1;
+  //     required string value = 2;
+  //   }
+  //   message Config {
+  //     repeated Item items = 1 [experimental_map_key="name"];
+  //   }
+  // In this situation, the map key for Item will be set to "name".
+  // TODO: Fully-implement this, then remove the "experimental_" prefix.
+  optional string experimental_map_key = 9;
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+message EnumOptions {
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+message EnumValueOptions {
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+message ServiceOptions {
+
+  // Note:  Field numbers 1 through 32 are reserved for Google's internal RPC
+  //   framework.  We apologize for hoarding these numbers to ourselves, but
+  //   we were already using them long before we decided to release Protocol
+  //   Buffers.
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+message MethodOptions {
+
+  // Note:  Field numbers 1 through 32 are reserved for Google's internal RPC
+  //   framework.  We apologize for hoarding these numbers to ourselves, but
+  //   we were already using them long before we decided to release Protocol
+  //   Buffers.
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+// A message representing a option the parser does not recognize. This only
+// appears in options protos created by the compiler::Parser class.
+// DescriptorPool resolves these when building Descriptor objects. Therefore,
+// options protos in descriptor objects (e.g. returned by Descriptor::options(),
+// or produced by Descriptor::CopyTo()) will never have UninterpretedOptions
+// in them.
+message UninterpretedOption {
+  // The name of the uninterpreted option.  Each string represents a segment in
+  // a dot-separated name.  is_extension is true iff a segment represents an
+  // extension (denoted with parentheses in options specs in .proto files).
+  // E.g.,{ ["foo", false], ["bar.baz", true], ["qux", false] } represents
+  // "foo.(bar.baz).qux".
+  message NamePart {
+    required string name_part = 1;
+    required bool is_extension = 2;
+  }
+  repeated NamePart name = 2;
+
+  // The value of the uninterpreted option, in whatever type the tokenizer
+  // identified it as during parsing. Exactly one of these should be set.
+  optional string identifier_value = 3;
+  optional uint64 positive_int_value = 4;
+  optional int64 negative_int_value = 5;
+  optional double double_value = 6;
+  optional bytes string_value = 7;
+  optional string aggregate_value = 8;
+}
+
+// ===================================================================
+// Optional source code info
+
+// Encapsulates information about the original source file from which a
+// FileDescriptorProto was generated.
+message SourceCodeInfo {
+  // A Location identifies a piece of source code in a .proto file which
+  // corresponds to a particular definition.  This information is intended
+  // to be useful to IDEs, code indexers, documentation generators, and similar
+  // tools.
+  //
+  // For example, say we have a file like:
+  //   message Foo {
+  //     optional string foo = 1;
+  //   }
+  // Let's look at just the field definition:
+  //   optional string foo = 1;
+  //   ^       ^^     ^^  ^  ^^^
+  //   a       bc     de  f  ghi
+  // We have the following locations:
+  //   span   path               represents
+  //   [a,i)  [ 4, 0, 2, 0 ]     The whole field definition.
+  //   [a,b)  [ 4, 0, 2, 0, 4 ]  The label (optional).
+  //   [c,d)  [ 4, 0, 2, 0, 5 ]  The type (string).
+  //   [e,f)  [ 4, 0, 2, 0, 1 ]  The name (foo).
+  //   [g,h)  [ 4, 0, 2, 0, 3 ]  The number (1).
+  //
+  // Notes:
+  // - A location may refer to a repeated field itself (i.e. not to any
+  //   particular index within it).  This is used whenever a set of elements are
+  //   logically enclosed in a single code segment.  For example, an entire
+  //   extend block (possibly containing multiple extension definitions) will
+  //   have an outer location whose path refers to the "extensions" repeated
+  //   field without an index.
+  // - Multiple locations may have the same path.  This happens when a single
+  //   logical declaration is spread out across multiple places.  The most
+  //   obvious example is the "extend" block again -- there may be multiple
+  //   extend blocks in the same scope, each of which will have the same path.
+  // - A location's span is not always a subset of its parent's span.  For
+  //   example, the "extendee" of an extension declaration appears at the
+  //   beginning of the "extend" block and is shared by all extensions within
+  //   the block.
+  // - Just because a location's span is a subset of some other location's span
+  //   does not mean that it is a descendent.  For example, a "group" defines
+  //   both a type and a field in a single declaration.  Thus, the locations
+  //   corresponding to the type and field and their components will overlap.
+  // - Code which tries to interpret locations should probably be designed to
+  //   ignore those that it doesn't understand, as more types of locations could
+  //   be recorded in the future.
+  repeated Location location = 1;
+  message Location {
+    // Identifies which part of the FileDescriptorProto was defined at this
+    // location.
+    //
+    // Each element is a field number or an index.  They form a path from
+    // the root FileDescriptorProto to the place where the definition.  For
+    // example, this path:
+    //   [ 4, 3, 2, 7, 1 ]
+    // refers to:
+    //   file.message_type(3)  // 4, 3
+    //       .field(7)         // 2, 7
+    //       .name()           // 1
+    // This is because FileDescriptorProto.message_type has field number 4:
+    //   repeated DescriptorProto message_type = 4;
+    // and DescriptorProto.field has field number 2:
+    //   repeated FieldDescriptorProto field = 2;
+    // and FieldDescriptorProto.name has field number 1:
+    //   optional string name = 1;
+    //
+    // Thus, the above path gives the location of a field name.  If we removed
+    // the last element:
+    //   [ 4, 3, 2, 7 ]
+    // this path refers to the whole field declaration (from the beginning
+    // of the label to the terminating semicolon).
+    repeated int32 path = 1 [packed=true];
+
+    // Always has exactly three or four elements: start line, start column,
+    // end line (optional, otherwise assumed same as start line), end column.
+    // These are packed into a single field for efficiency.  Note that line
+    // and column numbers are zero-based -- typically you will want to add
+    // 1 to each before displaying to a user.
+    repeated int32 span = 2 [packed=true];
+
+    // TODO(kenton):  Record comments appearing before and after the
+    // declaration.
+  }
+}
diff --git a/upb/descriptor_const.h b/upb/descriptor_const.h
new file mode 100644
index 0000000..228c95a
--- /dev/null
+++ b/upb/descriptor_const.h
@@ -0,0 +1,349 @@
+/* This file was generated by upbc (the upb compiler).  Do not edit. */
+
+#ifndef SRC_DESCRIPTOR_CONST_C
+#define SRC_DESCRIPTOR_CONST_C
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Enums. */
+
+typedef enum google_protobuf_FieldOptions_CType {
+  GOOGLE_PROTOBUF_FIELDOPTIONS_STRING = 0,
+  GOOGLE_PROTOBUF_FIELDOPTIONS_STRING_PIECE = 2,
+  GOOGLE_PROTOBUF_FIELDOPTIONS_CORD = 1
+} google_protobuf_FieldOptions_CType;
+
+typedef enum google_protobuf_FieldDescriptorProto_Type {
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE = 1,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT = 2,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64 = 3,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64 = 4,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32 = 5,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64 = 6,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32 = 7,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL = 8,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING = 9,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP = 10,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE = 11,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES = 12,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32 = 13,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM = 14,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32 = 15,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64 = 16,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32 = 17,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64 = 18
+} google_protobuf_FieldDescriptorProto_Type;
+
+typedef enum google_protobuf_FieldDescriptorProto_Label {
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_OPTIONAL = 1,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED = 2,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED = 3
+} google_protobuf_FieldDescriptorProto_Label;
+
+typedef enum google_protobuf_FileOptions_OptimizeMode {
+  GOOGLE_PROTOBUF_FILEOPTIONS_SPEED = 1,
+  GOOGLE_PROTOBUF_FILEOPTIONS_CODE_SIZE = 2,
+  GOOGLE_PROTOBUF_FILEOPTIONS_LITE_RUNTIME = 3
+} google_protobuf_FileOptions_OptimizeMode;
+
+/* Constants for field names and numbers. */
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM 1
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNAME "file"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM 2
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNAME "field"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM 3
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNAME "nested_type"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 4
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNUM 5
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNAME "extension_range"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNUM 6
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNUM 7
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNUM 1
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNAME "path"
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDTYPE 5
+
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN__FIELDNUM 2
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN__FIELDNAME "span"
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN__FIELDTYPE 5
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME__FIELDNUM 2
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE__FIELDNUM 3
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE__FIELDNAME "identifier_value"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE__FIELDNUM 4
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE__FIELDNAME "positive_int_value"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE__FIELDTYPE 4
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDNUM 5
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDNAME "negative_int_value"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDTYPE 3
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNUM 6
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNAME "double_value"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDTYPE 1
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDNUM 7
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDNAME "string_value"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDTYPE 12
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNUM 8
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNAME "aggregate_value"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDNUM 2
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDNAME "package"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY__FIELDNUM 3
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY__FIELDNAME "dependency"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDNUM 4
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDNAME "message_type"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 5
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNUM 6
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNAME "service"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNUM 7
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNUM 8
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDNUM 9
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDNAME "source_code_info"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE__FIELDNUM 2
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE__FIELDNAME "input_type"
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE__FIELDNUM 3
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE__FIELDNAME "output_type"
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDNUM 4
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM 2
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNAME "value"
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM 2
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNAME "number"
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE 5
+
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNUM 2
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNAME "method"
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNUM 1
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNAME "name_part"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNUM 2
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNAME "is_extension"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNUM 1
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNAME "location"
+#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START__FIELDNUM 1
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START__FIELDNAME "start"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START__FIELDTYPE 5
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDNUM 2
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDNAME "end"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDTYPE 5
+
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNUM 1
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNAME "ctype"
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDTYPE 14
+
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED__FIELDNUM 2
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED__FIELDNAME "packed"
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED__FIELDNUM 3
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED__FIELDNAME "deprecated"
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY__FIELDNUM 9
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY__FIELDNAME "experimental_map_key"
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDNUM 1
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDNAME "java_package"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNUM 8
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNAME "java_outer_classname"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNUM 9
+#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNAME "optimize_for"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDTYPE 14
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNUM 10
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNAME "java_multiple_files"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNUM 16
+#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNAME "cc_generic_services"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES__FIELDNUM 17
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES__FIELDNAME "java_generic_services"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES__FIELDNUM 18
+#define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES__FIELDNAME "py_generic_services"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH__FIELDNUM 20
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH__FIELDNAME "java_generate_equals_and_hash"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNUM 1
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNAME "message_set_wire_format"
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNUM 2
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNAME "no_standard_descriptor_accessor"
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE__FIELDNUM 2
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE__FIELDNAME "extendee"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDNUM 3
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDNAME "number"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER__FIELDTYPE 5
+
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDNUM 4
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDNAME "label"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL__FIELDTYPE 14
+
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNUM 5
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNAME "type"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE 14
+
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNUM 6
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNAME "type_name"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNUM 7
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNAME "default_value"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNUM 8
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* SRC_DESCRIPTOR_CONST_C */
diff --git a/upb/handlers.c b/upb/handlers.c
new file mode 100644
index 0000000..05300c0
--- /dev/null
+++ b/upb/handlers.c
@@ -0,0 +1,311 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <stdlib.h>
+#include "upb/handlers.h"
+
+
+/* upb_mhandlers **************************************************************/
+
+static upb_mhandlers *upb_mhandlers_new() {
+  upb_mhandlers *m = malloc(sizeof(*m));
+  upb_inttable_init(&m->fieldtab, 8, sizeof(upb_fhandlers));
+  m->startmsg = NULL;
+  m->endmsg = NULL;
+  m->tablearray = NULL;
+  m->is_group = false;
+  return m;
+}
+
+static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
+                                                  upb_fieldtype_t type,
+                                                  bool repeated) {
+  uint32_t tag = n << 3 | upb_types[type].native_wire_type;
+  upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, tag);
+  if (f) abort();
+  upb_fhandlers new_f = {false, type, repeated,
+      repeated && upb_isprimitivetype(type), UPB_ATOMIC_INIT(0),
+      n, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL};
+  upb_inttable_insert(&m->fieldtab, tag, &new_f);
+  f = upb_inttable_lookup(&m->fieldtab, tag);
+  assert(f);
+  assert(f->type == type);
+  return f;
+}
+
+upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
+                                          upb_fieldtype_t type, bool repeated) {
+  assert(type != UPB_TYPE(MESSAGE));
+  assert(type != UPB_TYPE(GROUP));
+  return _upb_mhandlers_newfhandlers(m, n, type, repeated);
+}
+
+upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n,
+                                               upb_fieldtype_t type,
+                                               bool repeated,
+                                               upb_mhandlers *subm) {
+  assert(type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP));
+  assert(subm);
+  upb_fhandlers *f = _upb_mhandlers_newfhandlers(m, n, type, repeated);
+  f->submsg = subm;
+  if (type == UPB_TYPE(GROUP))
+    _upb_mhandlers_newfhandlers(subm, n, UPB_TYPE_ENDGROUP, false);
+  return f;
+}
+
+
+/* upb_handlers ***************************************************************/
+
+upb_handlers *upb_handlers_new() {
+  upb_handlers *h = malloc(sizeof(*h));
+  upb_atomic_init(&h->refcount, 1);
+  h->msgs_len = 0;
+  h->msgs_size = 4;
+  h->msgs = malloc(h->msgs_size * sizeof(*h->msgs));
+  h->should_jit = true;
+  return h;
+}
+
+void upb_handlers_ref(upb_handlers *h) { upb_atomic_ref(&h->refcount); }
+
+void upb_handlers_unref(upb_handlers *h) {
+  if (upb_atomic_unref(&h->refcount)) {
+    for (int i = 0; i < h->msgs_len; i++) {
+      upb_mhandlers *mh = h->msgs[i];
+      upb_inttable_free(&mh->fieldtab);
+      free(mh->tablearray);
+      free(mh);
+    }
+    free(h->msgs);
+    free(h);
+  }
+}
+
+upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h) {
+  if (h->msgs_len == h->msgs_size) {
+    h->msgs_size *= 2;
+    h->msgs = realloc(h->msgs, h->msgs_size * sizeof(*h->msgs));
+  }
+  upb_mhandlers *mh = upb_mhandlers_new();
+  h->msgs[h->msgs_len++] = mh;
+  return mh;
+}
+
+typedef struct {
+  upb_mhandlers *mh;
+} upb_mtab_ent;
+
+static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, upb_msgdef *m,
+                                     upb_onmsgreg *msgreg_cb,
+                                     upb_onfieldreg *fieldreg_cb,
+                                     void *closure, upb_strtable *mtab) {
+  upb_mhandlers *mh = upb_handlers_newmhandlers(h);
+  upb_mtab_ent e = {mh};
+  upb_strtable_insert(mtab, m->base.fqname, &e);
+  if (msgreg_cb) msgreg_cb(closure, mh, m);
+  upb_msg_iter i;
+  for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+    upb_fielddef *f = upb_msg_iter_field(i);
+    upb_fhandlers *fh;
+    if (upb_issubmsg(f)) {
+      upb_mhandlers *sub_mh;
+      upb_mtab_ent *subm_ent;
+      // The table lookup is necessary to break the DFS for type cycles.
+      if ((subm_ent = upb_strtable_lookup(mtab, f->def->fqname)) != NULL) {
+        sub_mh = subm_ent->mh;
+      } else {
+        sub_mh = upb_regmsg_dfs(h, upb_downcast_msgdef(f->def), msgreg_cb,
+                                fieldreg_cb, closure, mtab);
+      }
+      fh = upb_mhandlers_newfhandlers_subm(
+          mh, f->number, f->type, upb_isseq(f), sub_mh);
+    } else {
+      fh = upb_mhandlers_newfhandlers(mh, f->number, f->type, upb_isseq(f));
+    }
+    if (fieldreg_cb) fieldreg_cb(closure, fh, f);
+  }
+  return mh;
+}
+
+upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, upb_msgdef *m,
+                                      upb_onmsgreg *msgreg_cb,
+                                      upb_onfieldreg *fieldreg_cb,
+                                      void *closure) {
+  upb_strtable mtab;
+  upb_strtable_init(&mtab, 8, sizeof(upb_mtab_ent));
+  upb_mhandlers *ret =
+      upb_regmsg_dfs(h, m, msgreg_cb, fieldreg_cb, closure, &mtab);
+  upb_strtable_free(&mtab);
+  return ret;
+}
+
+
+/* upb_dispatcher *************************************************************/
+
+static upb_fhandlers toplevel_f = {
+  false, UPB_TYPE(GROUP), false, false, UPB_ATOMIC_INIT(0), 0,
+  NULL, NULL, // submsg
+#ifdef NDEBUG
+  {{0}},
+#else
+  {{0}, -1},
+#endif
+  NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL};
+
+void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
+                         upb_skip_handler *skip, upb_exit_handler *exit,
+                         void *srcclosure) {
+  d->handlers = h;
+  upb_handlers_ref(h);
+  for (int i = 0; i < h->msgs_len; i++) {
+    upb_mhandlers *m = h->msgs[i];
+    upb_inttable_compact(&m->fieldtab);
+  }
+  d->stack[0].f = &toplevel_f;
+  d->limit = &d->stack[UPB_MAX_NESTING];
+  d->skip = skip;
+  d->exit = exit;
+  d->srcclosure = srcclosure;
+  upb_status_init(&d->status);
+}
+
+upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure) {
+  d->msgent = d->handlers->msgs[0];
+  d->dispatch_table = &d->msgent->fieldtab;
+  d->top = d->stack;
+  d->top->closure = closure;
+  d->top->is_sequence = false;
+  return d->top;
+}
+
+void upb_dispatcher_uninit(upb_dispatcher *d) {
+  upb_handlers_unref(d->handlers);
+  upb_status_uninit(&d->status);
+}
+
+void upb_dispatch_startmsg(upb_dispatcher *d) {
+  upb_flow_t flow = UPB_CONTINUE;
+  if (d->msgent->startmsg) d->msgent->startmsg(d->top->closure);
+  if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
+}
+
+void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) {
+  assert(d->top == d->stack);
+  if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status);
+  // TODO: should we avoid this copy by passing client's status obj to cbs?
+  upb_status_copy(status, &d->status);
+}
+
+void indent(upb_dispatcher *d) {
+  for (int i = 0; i < (d->top - d->stack); i++) fprintf(stderr, " ");
+}
+
+void indentm1(upb_dispatcher *d) {
+  for (int i = 0; i < (d->top - d->stack - 1); i++) fprintf(stderr, " ");
+}
+
+upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
+                                            upb_fhandlers *f) {
+  //indent(d);
+  //fprintf(stderr, "START SEQ: %d\n", f->number);
+  if((d->top+1) >= d->limit) {
+    upb_status_setf(&d->status, UPB_ERROR, "Nesting too deep.");
+    _upb_dispatcher_unwind(d, UPB_BREAK);
+    return d->top;  // Dummy.
+  }
+
+  upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
+  if (f->startseq) sflow = f->startseq(d->top->closure, f->fval);
+  if (sflow.flow != UPB_CONTINUE) {
+    _upb_dispatcher_unwind(d, sflow.flow);
+    return d->top;  // Dummy.
+  }
+
+  ++d->top;
+  d->top->f = f;
+  d->top->is_sequence = true;
+  d->top->closure = sflow.closure;
+  return d->top;
+}
+
+upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
+  //indentm1(d);
+  //fprintf(stderr, "END SEQ\n");
+  assert(d->top > d->stack);
+  assert(d->top->is_sequence);
+  upb_fhandlers *f = d->top->f;
+  --d->top;
+  upb_flow_t flow = UPB_CONTINUE;
+  if (f->endseq) flow = f->endseq(d->top->closure, f->fval);
+  if (flow != UPB_CONTINUE) {
+    printf("YO, UNWINDING!\n");
+    _upb_dispatcher_unwind(d, flow);
+    return d->top;  // Dummy.
+  }
+  d->msgent = d->top->f->submsg ? d->top->f->submsg : d->handlers->msgs[0];
+  d->dispatch_table = &d->msgent->fieldtab;
+  return d->top;
+}
+
+upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
+                                               upb_fhandlers *f) {
+  //indent(d);
+  //fprintf(stderr, "START SUBMSG: %d\n", f->number);
+  if((d->top+1) >= d->limit) {
+    upb_status_setf(&d->status, UPB_ERROR, "Nesting too deep.");
+    _upb_dispatcher_unwind(d, UPB_BREAK);
+    return d->top;  // Dummy.
+  }
+
+  upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
+  if (f->startsubmsg) sflow = f->startsubmsg(d->top->closure, f->fval);
+  if (sflow.flow != UPB_CONTINUE) {
+    _upb_dispatcher_unwind(d, sflow.flow);
+    return d->top;  // Dummy.
+  }
+
+  ++d->top;
+  d->top->f = f;
+  d->top->is_sequence = false;
+  d->top->closure = sflow.closure;
+  d->msgent = f->submsg;
+  d->dispatch_table = &d->msgent->fieldtab;
+  upb_dispatch_startmsg(d);
+  return d->top;
+}
+
+upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) {
+  //indentm1(d);
+  //fprintf(stderr, "END SUBMSG\n");
+  assert(d->top > d->stack);
+  assert(!d->top->is_sequence);
+  upb_fhandlers *f = d->top->f;
+  if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status);
+  d->msgent = d->top->f->msg;
+  d->dispatch_table = &d->msgent->fieldtab;
+  --d->top;
+  upb_flow_t flow = UPB_CONTINUE;
+  if (f->endsubmsg) f->endsubmsg(d->top->closure, f->fval);
+  if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
+  return d->top;
+}
+
+bool upb_dispatcher_stackempty(upb_dispatcher *d) {
+  return d->top == d->stack;
+}
+
+void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow) {
+  upb_dispatcher_frame *frame = d->top;
+  while (1) {
+    frame->f->submsg->endmsg(frame->closure, &d->status);
+    frame->f->endsubmsg(frame->closure, frame->f->fval);
+    --frame;
+    if (frame < d->stack) { d->exit(d->srcclosure); return; }
+    d->top = frame;
+    if (flow == UPB_SKIPSUBMSG) return;
+  }
+}
diff --git a/upb/handlers.h b/upb/handlers.h
new file mode 100644
index 0000000..e3d91cf
--- /dev/null
+++ b/upb/handlers.h
@@ -0,0 +1,373 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2010-2011 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * upb_handlers is a generic visitor-like interface for iterating over a stream
+ * of protobuf data.  You can register function pointers that will be called
+ * for each message and/or field as the data is being parsed or iterated over,
+ * without having to know the source format that we are parsing from.  This
+ * decouples the parsing logic from the processing logic.
+ */
+
+#ifndef UPB_HANDLERS_H
+#define UPB_HANDLERS_H
+
+#include <limits.h>
+#include "upb/upb.h"
+#include "upb/def.h"
+#include "upb/bytestream.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Handlers protocol definition ***********************************************/
+
+// A upb_handlers object represents a graph of handlers.  Each message can have
+// a set of handlers as well as a set of fields which themselves have handlers.
+// Fields that represent submessages or groups are linked to other message
+// handlers, so the overall set of handlers can form a graph structure (which
+// may be cyclic).
+//
+// The upb_mhandlers (message handlers) object can have the following handlers:
+//
+//   static upb_flow_t startmsg(void *closure) {
+//     // Called when the message begins.  "closure" was supplied by our caller.
+//     return UPB_CONTINUE;
+//   }
+//
+//   static void endmsg(void *closure, upb_status *status) {
+//     // Called when processing of this message ends, whether in success or
+//     // failure.  "status" indicates the final status of processing, and can
+//     /  also be modified in-place to update the final status.
+//     //
+//     // Since this callback is guaranteed to always be called eventually, it
+//     // can be used to free any resources that were allocated during processing.
+//   }
+//
+//   TODO: unknown field handler.
+//
+// The upb_fhandlers (field handlers) object can have the following handlers:
+//
+//   static upb_flow_t value(void *closure, upb_value fval, upb_value val) {
+//     // Called when the field's value is encountered.  "fval" contains
+//     // whatever value was bound to this field at registration type
+//     // (for upb_register_all(), this will be the field's upb_fielddef*).
+//     return UPB_CONTINUE;
+//   }
+//
+//   static upb_sflow_t startsubmsg(void *closure, upb_value fval) {
+//     // Called when a submessage begins.  The second element of the return
+//     // value is the closure for the submessage.
+//     return UPB_CONTINUE_WITH(closure);
+//   }
+//
+//   static upb_flow_t endsubmsg(void *closure, upb_value fval) {
+//     // Called when a submessage ends.
+//     return UPB_CONTINUE;
+//   }
+//
+//   static upb_sflow_t startseqmsg(void *closure, upb_value fval) {
+//     // Called when a sequence (repeated field) begins.  The second element
+//     // of the return value is the closure for the sequence.
+//     return UPB_CONTINUE_WITH(closure);
+//   }
+//
+//   static upb_flow_t endeqvoid *closure, upb_value fval) {
+//     // Called when a sequence ends.
+//     return UPB_CONTINUE;
+//   }
+//
+// All handlers except the endmsg handler return a value from this enum, to
+// control whether parsing will continue or not.
+typedef enum {
+  // Data source should continue calling callbacks.
+  UPB_CONTINUE = 0,
+
+  // Halt processing permanently (in a non-resumable way).  The endmsg handlers
+  // for any currently open messages will be called which can supply a more
+  // specific status message.  No further input data will be consumed.
+  UPB_BREAK,
+
+  // Skips to the end of the current submessage (or if we are at the top
+  // level, skips to the end of the entire message).  In other words, it is
+  // like a UPB_BREAK that applies only to the current level.
+  //
+  // If you UPB_SKIPSUBMSG from a startmsg handler, the endmsg handler will
+  // be called to perform cleanup and return a status.  Returning
+  // UPB_SKIPSUBMSG from a startsubmsg handler will *not* call the startmsg,
+  // endmsg, or endsubmsg handlers.
+  //
+  // If UPB_SKIPSUBMSG is called from the top-level message, no further input
+  // data will be consumed.
+  UPB_SKIPSUBMSG,
+
+  // TODO: Add UPB_SUSPEND, for resumable producers/consumers.
+} upb_flow_t;
+
+// The startsubmsg handler needs to also pass a closure to the submsg.
+typedef struct {
+  upb_flow_t flow;
+  void *closure;
+} upb_sflow_t;
+
+INLINE upb_sflow_t UPB_SFLOW(upb_flow_t flow, void *closure) {
+  upb_sflow_t ret = {flow, closure};
+  return ret;
+}
+#define UPB_CONTINUE_WITH(c) UPB_SFLOW(UPB_CONTINUE, c)
+#define UPB_SBREAK UPB_SFLOW(UPB_BREAK, NULL)
+
+// Typedefs for all of the handler functions defined above.
+typedef upb_flow_t (upb_startmsg_handler)(void *c);
+typedef void (upb_endmsg_handler)(void *c, upb_status *status);
+typedef upb_flow_t (upb_value_handler)(void *c, upb_value fval, upb_value val);
+typedef upb_sflow_t (upb_startfield_handler)(void *closure, upb_value fval);
+typedef upb_flow_t (upb_endfield_handler)(void *closure, upb_value fval);
+
+
+/* upb_fhandlers **************************************************************/
+
+// A upb_fhandlers object represents the set of handlers associated with one
+// specific message field.
+struct _upb_decoder;
+struct _upb_mhandlers;
+typedef struct _upb_fieldent {
+  bool junk;
+  upb_fieldtype_t type;
+  bool repeated;
+  bool is_repeated_primitive;
+  upb_atomic_t refcount;
+  uint32_t number;
+  struct _upb_mhandlers *msg;
+  struct _upb_mhandlers *submsg;  // Set iff upb_issubmsgtype(type) == true.
+  upb_value fval;
+  upb_value_handler *value;
+  upb_startfield_handler *startsubmsg;
+  upb_endfield_handler *endsubmsg;
+  upb_startfield_handler *startseq;
+  upb_endfield_handler *endseq;
+  uint32_t jit_pclabel;
+  uint32_t jit_pclabel_notypecheck;
+  uint32_t jit_submsg_done_pclabel;
+  void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
+} upb_fhandlers;
+
+// fhandlers are created as part of a upb_handlers instance, but can be ref'd
+// and unref'd to prolong the life of the handlers.
+void upb_fhandlers_ref(upb_fhandlers *m);
+void upb_fhandlers_unref(upb_fhandlers *m);
+
+// upb_fhandlers accessors
+#define UPB_FHANDLERS_ACCESSORS(name, type) \
+  INLINE void upb_fhandlers_set ## name(upb_fhandlers *f, type v){f->name = v;} \
+  INLINE type upb_fhandlers_get ## name(upb_fhandlers *f) { return f->name; }
+UPB_FHANDLERS_ACCESSORS(fval, upb_value)
+UPB_FHANDLERS_ACCESSORS(value, upb_value_handler*)
+UPB_FHANDLERS_ACCESSORS(startsubmsg, upb_startfield_handler*)
+UPB_FHANDLERS_ACCESSORS(endsubmsg, upb_endfield_handler*)
+UPB_FHANDLERS_ACCESSORS(startseq, upb_startfield_handler*)
+UPB_FHANDLERS_ACCESSORS(endseq, upb_endfield_handler*)
+UPB_FHANDLERS_ACCESSORS(submsg, struct _upb_mhandlers*)
+
+
+/* upb_mhandlers **************************************************************/
+
+// A upb_mhandlers object represents the set of handlers associated with a
+// message in the graph of messages.
+
+typedef struct _upb_mhandlers {
+  upb_atomic_t refcount;
+  upb_startmsg_handler *startmsg;
+  upb_endmsg_handler *endmsg;
+  upb_inttable fieldtab;  // Maps field number -> upb_fhandlers.
+  uint32_t jit_startmsg_pclabel;
+  uint32_t jit_endofbuf_pclabel;
+  uint32_t jit_endofmsg_pclabel;
+  uint32_t jit_unknownfield_pclabel;
+  bool is_group;
+  int32_t jit_parent_field_done_pclabel;
+  uint32_t max_field_number;
+  // Currently keyed on field number.  Could also try keying it
+  // on encoded or decoded tag, or on encoded field number.
+  void **tablearray;
+} upb_mhandlers;
+
+// mhandlers are created as part of a upb_handlers instance, but can be ref'd
+// and unref'd to prolong the life of the handlers.
+void upb_mhandlers_ref(upb_mhandlers *m);
+void upb_mhandlers_unref(upb_mhandlers *m);
+
+// Creates a new field with the given name and number.  There must not be an
+// existing field with either this name or number or abort() will be called.
+// TODO: this should take a name also.
+upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
+                                          upb_fieldtype_t type, bool repeated);
+// Like the previous but for MESSAGE or GROUP fields.  For GROUP fields, the
+// given submessage must not have any fields with this field number.
+upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n,
+                                               upb_fieldtype_t type,
+                                               bool repeated,
+                                               upb_mhandlers *subm);
+
+// upb_mhandlers accessors.
+#define UPB_MHANDLERS_ACCESSORS(name, type) \
+  INLINE void upb_mhandlers_set ## name(upb_mhandlers *m, type v){m->name = v;} \
+  INLINE type upb_mhandlers_get ## name(upb_mhandlers *m) { return m->name; }
+UPB_MHANDLERS_ACCESSORS(startmsg, upb_startmsg_handler*);
+UPB_MHANDLERS_ACCESSORS(endmsg, upb_endmsg_handler*);
+
+
+/* upb_handlers ***************************************************************/
+
+struct _upb_handlers {
+  upb_atomic_t refcount;
+  upb_mhandlers **msgs;  // Array of msgdefs, [0]=toplevel.
+  int msgs_len, msgs_size;
+  bool should_jit;
+};
+typedef struct _upb_handlers upb_handlers;
+
+upb_handlers *upb_handlers_new();
+void upb_handlers_ref(upb_handlers *h);
+void upb_handlers_unref(upb_handlers *h);
+
+// Appends a new message to the graph of handlers and returns it.  This message
+// can be obtained later at index upb_handlers_msgcount()-1.  All handlers will
+// be initialized to no-op handlers.
+upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h);
+upb_mhandlers *upb_handlers_getmhandlers(upb_handlers *h, int index);
+
+// Convenience function for registering handlers for all messages and
+// fields in a msgdef and all its children.  For every registered message
+// "msgreg_cb" will be called with the newly-created mhandlers, and likewise
+// with "fieldreg_cb"
+//
+// See upb_handlers_reghandlerset() below for an example.
+typedef void upb_onmsgreg(void *closure, upb_mhandlers *mh, upb_msgdef *m);
+typedef void upb_onfieldreg(void *closure, upb_fhandlers *mh, upb_fielddef *m);
+upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, upb_msgdef *m,
+                                      upb_onmsgreg *msgreg_cb,
+                                      upb_onfieldreg *fieldreg_cb,
+                                      void *closure);
+
+// Convenience function for registering a set of handlers for all messages and
+// fields in a msgdef and its children, with the fval bound to the upb_fielddef.
+// Any of the handlers may be NULL, in which case no callback will be set and
+// the nop callback will be used.
+typedef struct {
+  upb_startmsg_handler *startmsg;
+  upb_endmsg_handler *endmsg;
+  upb_value_handler *value;
+  upb_startfield_handler *startsubmsg;
+  upb_endfield_handler *endsubmsg;
+  upb_startfield_handler *startseq;
+  upb_endfield_handler *endseq;
+} upb_handlerset;
+
+INLINE void upb_onmreg_hset(void *c, upb_mhandlers *mh, upb_msgdef *m) {
+  (void)m;
+  upb_handlerset *hs = (upb_handlerset*)c;
+  if (hs->startmsg) upb_mhandlers_setstartmsg(mh, hs->startmsg);
+  if (hs->endmsg) upb_mhandlers_setendmsg(mh, hs->endmsg);
+}
+INLINE void upb_onfreg_hset(void *c, upb_fhandlers *fh, upb_fielddef *f) {
+  upb_handlerset *hs = (upb_handlerset*)c;
+  if (hs->value) upb_fhandlers_setvalue(fh, hs->value);
+  if (hs->startsubmsg) upb_fhandlers_setstartsubmsg(fh, hs->startsubmsg);
+  if (hs->endsubmsg) upb_fhandlers_setendsubmsg(fh, hs->endsubmsg);
+  if (hs->startseq) upb_fhandlers_setstartseq(fh, hs->startseq);
+  if (hs->endseq) upb_fhandlers_setendseq(fh, hs->endseq);
+  upb_value val;
+  upb_value_setfielddef(&val, f);
+  upb_fhandlers_setfval(fh, val);
+}
+INLINE upb_mhandlers *upb_handlers_reghandlerset(upb_handlers *h, upb_msgdef *m,
+                                                 upb_handlerset *hs) {
+  return upb_handlers_regmsgdef(h, m, &upb_onmreg_hset, &upb_onfreg_hset, hs);
+}
+
+
+/* upb_dispatcher *************************************************************/
+
+// upb_dispatcher can be used by sources of data to invoke the appropriate
+// handlers on a upb_handlers object.  Besides maintaining the runtime stack of
+// closures and handlers, the dispatcher checks the return status of user
+// callbacks and properly handles statuses other than UPB_CONTINUE, invoking
+// "skip" or "exit" handlers on the underlying data source as appropriate.
+
+typedef struct {
+  upb_fhandlers *f;
+  void *closure;
+
+  // Members to use as the data source requires.
+  void *srcclosure;
+  uint64_t end_ofs;
+  uint16_t msgindex;
+  uint16_t fieldindex;
+
+  bool is_sequence;   // frame represents seq or submsg? (f might be both).
+  bool is_packed;     // !upb_issubmsg(f) && end_ofs != UINT64_MAX (strings aren't pushed)
+} upb_dispatcher_frame;
+
+// Called when some of the input needs to be skipped.  All frames from
+// top to bottom, inclusive, should be skipped.
+typedef void upb_skip_handler(void *, upb_dispatcher_frame *top,
+                              upb_dispatcher_frame *bottom);
+typedef void upb_exit_handler(void *);
+
+typedef struct {
+  upb_dispatcher_frame *top, *limit;
+
+  upb_handlers *handlers;
+
+  // Msg and dispatch table for the current level.
+  upb_mhandlers *msgent;
+  upb_inttable *dispatch_table;
+  upb_skip_handler *skip;
+  upb_exit_handler *exit;
+  void *srcclosure;
+
+  // Stack.
+  upb_status status;
+  upb_dispatcher_frame stack[UPB_MAX_NESTING];
+} upb_dispatcher;
+
+void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
+                         upb_skip_handler *skip, upb_exit_handler *exit,
+                         void *closure);
+upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure);
+void upb_dispatcher_uninit(upb_dispatcher *d);
+
+// Tests whether the runtime stack is in the base level message.
+bool upb_dispatcher_stackempty(upb_dispatcher *d);
+
+// Looks up a field by number for the current message.
+INLINE upb_fhandlers *upb_dispatcher_lookup(upb_dispatcher *d, uint32_t n) {
+  return (upb_fhandlers*)upb_inttable_fastlookup(
+      d->dispatch_table, n, sizeof(upb_fhandlers));
+}
+
+void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow);
+
+// Dispatch functions -- call the user handler and handle errors.
+INLINE void upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f,
+                               upb_value val) {
+  upb_flow_t flow = UPB_CONTINUE;
+  if (f->value) flow = f->value(d->top->closure, f->fval, val);
+  if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
+}
+void upb_dispatch_startmsg(upb_dispatcher *d);
+void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status);
+upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, upb_fhandlers *f);
+upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d);
+upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_fhandlers *f);
+upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif
diff --git a/upb/msg.c b/upb/msg.c
new file mode 100644
index 0000000..a2b2cf7
--- /dev/null
+++ b/upb/msg.c
@@ -0,0 +1,349 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2010 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Data structure for storing a message of protobuf data.
+ */
+
+#include "upb/upb.h"
+#include "upb/msg.h"
+
+void upb_msg_clear(void *msg, upb_msgdef *md) {
+  memset(msg, 0, md->hasbit_bytes);
+  // TODO: set primitive fields to defaults?
+}
+
+void *upb_stdarray_append(upb_stdarray *a, size_t type_size) {
+  assert(a->len <= a->size);
+  if (a->len == a->size) {
+    size_t old_size = a->size;
+    a->size = old_size == 0 ? 8 : (old_size * 2);
+    a->ptr = realloc(a->ptr, a->size * type_size);
+    memset(&a->ptr[old_size * type_size], 0, (a->size - old_size) * type_size);
+  }
+  return &a->ptr[a->len++ * type_size];
+}
+
+#if 0
+static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
+                                   upb_dispatcher *d);
+
+static upb_flow_t upb_msg_pushval(upb_value val, upb_fielddef *f,
+                                  upb_dispatcher *d, upb_fhandlers *hf) {
+  if (upb_issubmsg(f)) {
+    upb_msg *msg = upb_value_getmsg(val);
+    upb_dispatch_startsubmsg(d, hf);
+    upb_msg_dispatch(msg, upb_downcast_msgdef(f->def), d);
+    upb_dispatch_endsubmsg(d);
+  } else {
+    upb_dispatch_value(d, hf, val);
+  }
+  return UPB_CONTINUE;
+}
+
+static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
+                                   upb_dispatcher *d) {
+  upb_msg_iter i;
+  for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
+    upb_fielddef *f = upb_msg_iter_field(i);
+    if (!upb_msg_has(msg, f)) continue;
+    upb_fhandlers *hf = upb_dispatcher_lookup(d, f->number);
+    if (!hf) continue;
+    upb_value val = upb_msg_get(msg, f);
+    if (upb_isarray(f)) {
+      upb_array *arr = upb_value_getarr(val);
+      for (uint32_t j = 0; j < upb_array_len(arr); ++j) {
+        upb_msg_pushval(upb_array_get(arr, f, j), f, d, hf);
+      }
+    } else {
+      upb_msg_pushval(val, f, d, hf);
+    }
+  }
+  return UPB_CONTINUE;
+}
+
+void upb_msg_runhandlers(upb_msg *msg, upb_msgdef *md, upb_handlers *h,
+                         void *closure, upb_status *status) {
+  upb_dispatcher d;
+  upb_dispatcher_init(&d, h, NULL, NULL, NULL);
+  upb_dispatcher_reset(&d, closure);
+
+  upb_dispatch_startmsg(&d);
+  upb_msg_dispatch(msg, md, &d);
+  upb_dispatch_endmsg(&d, status);
+
+  upb_dispatcher_uninit(&d);
+}
+#endif
+
+/* Standard writers. **********************************************************/
+
+void upb_stdmsg_sethas(void *_m, upb_value fval) {
+  char *m = _m;
+  upb_fielddef *f = upb_value_getfielddef(fval);
+  if (f->hasbit >= 0) m[f->hasbit / 8] |= (1 << (f->hasbit % 8));
+}
+
+bool upb_stdmsg_has(void *_m, upb_value fval) {
+  char *m = _m;
+  upb_fielddef *f = upb_value_getfielddef(fval);
+  return f->hasbit < 0 || (m[f->hasbit / 8] & (1 << (f->hasbit % 8)));
+}
+
+#define UPB_ACCESSORS(type, ctype)                                            \
+  upb_flow_t upb_stdmsg_set ## type (void *_m, upb_value fval,                \
+                                     upb_value val) {                         \
+    upb_fielddef *f = upb_value_getfielddef(fval);                            \
+    uint8_t *m = _m;                                                          \
+    upb_stdmsg_sethas(_m, fval);                                              \
+    *(ctype*)&m[f->offset] = upb_value_get ## type(val);                      \
+    return UPB_CONTINUE;                                                      \
+  }                                                                           \
+                                                                              \
+  upb_flow_t upb_stdmsg_set ## type ## _r(void *a, upb_value _fval,           \
+                                          upb_value val) {                    \
+    (void)_fval;                                                              \
+    ctype *p = upb_stdarray_append((upb_stdarray*)a, sizeof(ctype));          \
+    *p = upb_value_get ## type(val);                                          \
+    return UPB_CONTINUE;                                                      \
+  }                                                                           \
+                                                                              \
+  upb_value upb_stdmsg_get ## type(void *_m, upb_value fval) {                \
+    uint8_t *m = _m;                                                          \
+    upb_fielddef *f = upb_value_getfielddef(fval);                            \
+    upb_value ret;                                                            \
+    upb_value_set ## type(&ret, *(ctype*)&m[f->offset]);                      \
+    return ret;                                                               \
+  }                                                                           \
+  upb_value upb_stdmsg_seqget ## type(void *i) {                              \
+    upb_value val;                                                            \
+    upb_value_set ## type(&val, *(ctype*)i);                                  \
+    return val;                                                               \
+  }
+
+UPB_ACCESSORS(double, double)
+UPB_ACCESSORS(float, float)
+UPB_ACCESSORS(int32, int32_t)
+UPB_ACCESSORS(int64, int64_t)
+UPB_ACCESSORS(uint32, uint32_t)
+UPB_ACCESSORS(uint64, uint64_t)
+UPB_ACCESSORS(bool, bool)
+UPB_ACCESSORS(ptr, void*)
+#undef UPB_ACCESSORS
+
+static void _upb_stdmsg_setstr(void *_dst, upb_value src) {
+  upb_stdarray **dstp = _dst;
+  upb_stdarray *dst = *dstp;
+  if (!dst) {
+    dst = malloc(sizeof(*dst));
+    dst->size = 0;
+    dst->ptr = NULL;
+    *dstp = dst;
+  }
+  dst->len = 0;
+  upb_strref *ref = upb_value_getstrref(src);
+  if (ref->len > dst->size) {
+    dst->size = ref->len;
+    dst->ptr = realloc(dst->ptr, dst->size);
+  }
+  dst->len = ref->len;
+  upb_bytesrc_read(ref->bytesrc, ref->stream_offset, ref->len, dst->ptr);
+}
+
+upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) {
+  char *m = _m;
+  upb_fielddef *f = upb_value_getfielddef(fval);
+  upb_stdmsg_sethas(_m, fval);
+  _upb_stdmsg_setstr(&m[f->offset], val);
+  return UPB_CONTINUE;
+}
+
+upb_flow_t upb_stdmsg_setstr_r(void *a, upb_value fval, upb_value val) {
+  (void)fval;
+  _upb_stdmsg_setstr(upb_stdarray_append((upb_stdarray*)a, sizeof(void*)), val);
+  return UPB_CONTINUE;
+}
+
+upb_value upb_stdmsg_getstr(void *m, upb_value fval) {
+  return upb_stdmsg_getptr(m, fval);
+}
+
+upb_value upb_stdmsg_seqgetstr(void *i) {
+  return upb_stdmsg_seqgetptr(i);
+}
+
+void *upb_stdmsg_new(upb_msgdef *md) {
+  void *m = malloc(md->size);
+  memset(m, 0, md->size);
+  upb_msg_clear(m, md);
+  return m;
+}
+
+void upb_stdseq_free(void *s, upb_fielddef *f) {
+  upb_stdarray *a = s;
+  if (upb_issubmsg(f) || upb_isstring(f)) {
+    void **p = (void**)a->ptr;
+    for (uint32_t i = 0; i < a->size; i++) {
+      if (upb_issubmsg(f)) {
+        upb_stdmsg_free(p[i], upb_downcast_msgdef(f->def));
+      } else {
+        upb_stdarray *str = p[i];
+        free(str->ptr);
+        free(str);
+      }
+    }
+  }
+  free(a->ptr);
+  free(a);
+}
+
+void upb_stdmsg_free(void *m, upb_msgdef *md) {
+  if (m == NULL) return;
+  upb_msg_iter i;
+  for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
+    upb_fielddef *f = upb_msg_iter_field(i);
+    if (!upb_isseq(f) && !upb_issubmsg(f) && !upb_isstring(f)) continue;
+    void *subp = upb_value_getptr(upb_stdmsg_getptr(m, f->fval));
+    if (subp == NULL) continue;
+    if (upb_isseq(f)) {
+      upb_stdseq_free(subp, f);
+    } else if (upb_issubmsg(f)) {
+      upb_stdmsg_free(subp, upb_downcast_msgdef(f->def));
+    } else {
+      upb_stdarray *str = subp;
+      free(str->ptr);
+      free(str);
+    }
+  }
+  free(m);
+}
+
+upb_sflow_t upb_stdmsg_startseq(void *_m, upb_value fval) {
+  char *m = _m;
+  upb_fielddef *f = upb_value_getfielddef(fval);
+  upb_stdarray **arr = (void*)&m[f->offset];
+  if (!upb_stdmsg_has(_m, fval)) {
+    if (!*arr) {
+      *arr = malloc(sizeof(**arr));
+      (*arr)->size = 0;
+      (*arr)->ptr = NULL;
+    }
+    (*arr)->len = 0;
+    upb_stdmsg_sethas(m, fval);
+  }
+  return UPB_CONTINUE_WITH(*arr);
+}
+
+void upb_stdmsg_recycle(void **m, upb_msgdef *md) {
+  if (*m)
+    upb_msg_clear(*m, md);
+  else
+    *m = upb_stdmsg_new(md);
+}
+
+upb_sflow_t upb_stdmsg_startsubmsg(void *_m, upb_value fval) {
+  char *m = _m;
+  upb_fielddef *f = upb_value_getfielddef(fval);
+  void **subm = (void*)&m[f->offset];
+  if (!upb_stdmsg_has(m, fval)) {
+    upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def));
+    upb_stdmsg_sethas(m, fval);
+  }
+  return UPB_CONTINUE_WITH(*subm);
+}
+
+upb_sflow_t upb_stdmsg_startsubmsg_r(void *a, upb_value fval) {
+  assert(a != NULL);
+  upb_fielddef *f = upb_value_getfielddef(fval);
+  void **subm = upb_stdarray_append((upb_stdarray*)a, sizeof(void*));
+  upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def));
+  return UPB_CONTINUE_WITH(*subm);
+}
+
+void *upb_stdmsg_seqbegin(void *_a) {
+  upb_stdarray *a = _a;
+  return a->len > 0 ? a->ptr : NULL;
+}
+
+#define NEXTFUNC(size) \
+  void *upb_stdmsg_ ## size ## byte_seqnext(void *_a, void *iter) {      \
+    upb_stdarray *a = _a;                                                \
+    void *next = (char*)iter + size;                                     \
+    return (char*)next < (char*)a->ptr + (a->len * size) ? next : NULL;  \
+  }
+
+NEXTFUNC(8)
+NEXTFUNC(4)
+NEXTFUNC(1)
+
+#define STDMSG(type) { static upb_accessor_vtbl vtbl = {NULL, &upb_stdmsg_startsubmsg, \
+  &upb_stdmsg_set ## type, &upb_stdmsg_has, &upb_stdmsg_get ## type, \
+  NULL, NULL, NULL}; return &vtbl; }
+#define STDMSG_R(type, size) { static upb_accessor_vtbl vtbl = { \
+  &upb_stdmsg_startseq, &upb_stdmsg_startsubmsg_r, &upb_stdmsg_set ## type ## _r, \
+  &upb_stdmsg_has, &upb_stdmsg_getptr, &upb_stdmsg_seqbegin, \
+  &upb_stdmsg_ ## size ## byte_seqnext, &upb_stdmsg_seqget ## type}; \
+  return &vtbl; }
+
+upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f) {
+  if (upb_isseq(f)) {
+    switch (f->type) {
+      case UPB_TYPE(DOUBLE): STDMSG_R(double, 8)
+      case UPB_TYPE(FLOAT): STDMSG_R(float, 4)
+      case UPB_TYPE(UINT64):
+      case UPB_TYPE(FIXED64): STDMSG_R(uint64, 8)
+      case UPB_TYPE(INT64):
+      case UPB_TYPE(SFIXED64):
+      case UPB_TYPE(SINT64): STDMSG_R(int64, 8)
+      case UPB_TYPE(INT32):
+      case UPB_TYPE(SINT32):
+      case UPB_TYPE(ENUM):
+      case UPB_TYPE(SFIXED32): STDMSG_R(int32, 4)
+      case UPB_TYPE(UINT32):
+      case UPB_TYPE(FIXED32): STDMSG_R(uint32, 4)
+      case UPB_TYPE(BOOL): STDMSG_R(bool, 1)
+      case UPB_TYPE(STRING):
+      case UPB_TYPE(BYTES):
+      case UPB_TYPE(GROUP):
+      case UPB_TYPE(MESSAGE): STDMSG_R(str, 8)  // TODO: 32-bit
+    }
+  } else {
+    switch (f->type) {
+      case UPB_TYPE(DOUBLE): STDMSG(double)
+      case UPB_TYPE(FLOAT): STDMSG(float)
+      case UPB_TYPE(UINT64):
+      case UPB_TYPE(FIXED64): STDMSG(uint64)
+      case UPB_TYPE(INT64):
+      case UPB_TYPE(SFIXED64):
+      case UPB_TYPE(SINT64): STDMSG(int64)
+      case UPB_TYPE(INT32):
+      case UPB_TYPE(SINT32):
+      case UPB_TYPE(ENUM):
+      case UPB_TYPE(SFIXED32): STDMSG(int32)
+      case UPB_TYPE(UINT32):
+      case UPB_TYPE(FIXED32): STDMSG(uint32)
+      case UPB_TYPE(BOOL): STDMSG(bool)
+      case UPB_TYPE(STRING):
+      case UPB_TYPE(BYTES):
+      case UPB_TYPE(GROUP):
+      case UPB_TYPE(MESSAGE): STDMSG(str)
+    }
+  }
+  return NULL;
+}
+
+static void upb_accessors_onfreg(void *c, upb_fhandlers *fh, upb_fielddef *f) {
+  (void)c;
+  if (f->accessor) {
+    upb_fhandlers_setstartseq(fh, f->accessor->appendseq);
+    upb_fhandlers_setvalue(fh, f->accessor->set);
+    upb_fhandlers_setstartsubmsg(fh, f->accessor->appendsubmsg);
+    upb_fhandlers_setfval(fh, f->fval);
+  }
+}
+
+upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, upb_msgdef *m) {
+  return upb_handlers_regmsgdef(h, m, NULL, &upb_accessors_onfreg, NULL);
+}
diff --git a/upb/msg.h b/upb/msg.h
new file mode 100644
index 0000000..625d805
--- /dev/null
+++ b/upb/msg.h
@@ -0,0 +1,270 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2010-2011 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Routines for reading and writing message data to an in-memory structure,
+ * similar to a C struct.
+ *
+ * upb does not define one single message object that everyone must use.
+ * Rather it defines an abstract interface for reading and writing members
+ * of a message object, and all of the parsers and serializers use this
+ * abstract interface.  This allows upb's parsers and serializers to be used
+ * regardless of what memory management scheme or synchronization model the
+ * application is using.
+ *
+ * A standard set of accessors is provided for doing simple reads and writes at
+ * a known offset into the message.  These accessors should be used when
+ * possible, because they are specially optimized -- for example, the JIT can
+ * recognize them and emit specialized code instead of having to call the
+ * function at all.  The application can substitute its own accessors when the
+ * standard accessors are not suitable.
+ */
+
+#ifndef UPB_MSG_H
+#define UPB_MSG_H
+
+#include <stdlib.h>
+#include "upb/def.h"
+#include "upb/handlers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* upb_accessor ***************************************************************/
+
+// A upb_accessor is a table of function pointers for doing reads and writes
+// for one specific upb_fielddef.  Each field has a separate accessor, which
+// lives in the fielddef.
+
+typedef bool upb_has_reader(void *m, upb_value fval);
+typedef upb_value upb_value_reader(void *m, upb_value fval);
+
+typedef void *upb_seqbegin_handler(void *s);
+typedef void *upb_seqnext_handler(void *s, void *iter);
+typedef upb_value upb_seqget_handler(void *iter);
+INLINE bool upb_seq_done(void *iter) { return iter == NULL; }
+
+typedef struct _upb_accessor_vtbl {
+  // Writers.  These take an fval as a parameter because the callbacks are used
+  // as upb_handlers, but the fval is always the fielddef for that field.
+  upb_startfield_handler *appendseq;     // Repeated fields only.
+  upb_startfield_handler *appendsubmsg;  // Submsg fields (repeated or no).
+  upb_value_handler      *set;           // Scalar fields (repeated or no).
+
+  // Readers.
+  upb_has_reader         *has;
+  upb_value_reader       *get;
+  upb_seqbegin_handler   *seqbegin;
+  upb_seqnext_handler    *seqnext;
+  upb_seqget_handler     *seqget;
+} upb_accessor_vtbl;
+
+// Registers handlers for writing into a message of the given type.
+upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, upb_msgdef *m);
+
+// Returns an stdmsg accessor for the given fielddef.
+upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f);
+
+
+/* upb_msg/upb_seq ************************************************************/
+
+// upb_msg and upb_seq allow for generic access to a message through its
+// accessor vtable.  Note that these do *not* allow you to create, destroy, or
+// take references on the objects -- these operations are specifically outside
+// the scope of what the accessors define.
+
+// Clears all hasbits.
+// TODO: Add a separate function for setting primitive values back to their
+// defaults (but not strings, submessages, or arrays).
+void upb_msg_clear(void *msg, upb_msgdef *md);
+
+// Could add a method that recursively clears submessages, strings, and
+// arrays if desired.  This could be a win if you wanted to merge without
+// needing hasbits, because during parsing you would never clear submessages
+// or arrays.  Also this could be desired to provide proto2 operations on
+// generated messages.
+
+INLINE bool upb_msg_has(void *m, upb_fielddef *f) {
+  return f->accessor && f->accessor->has(m, f->fval);
+}
+
+// May only be called for fields that are known to be set.
+INLINE upb_value upb_msg_get(void *m, upb_fielddef *f) {
+  assert(upb_msg_has(m, f));
+  return f->accessor->get(m, f->fval);
+}
+
+INLINE void *upb_seq_begin(void *s, upb_fielddef *f) {
+  assert(f->accessor);
+  return f->accessor->seqbegin(s);
+}
+INLINE void *upb_seq_next(void *s, void *iter, upb_fielddef *f) {
+  assert(f->accessor);
+  assert(!upb_seq_done(iter));
+  return f->accessor->seqnext(s, iter);
+}
+INLINE upb_value upb_seq_get(void *iter, upb_fielddef *f) {
+  assert(f->accessor);
+  assert(!upb_seq_done(iter));
+  return f->accessor->seqget(iter);
+}
+
+
+/* upb_msgvisitor *************************************************************/
+
+// A upb_msgvisitor reads data from an in-memory structure using its accessors,
+// pushing the results to a given set of upb_handlers.
+// TODO: not yet implemented.
+
+typedef struct {
+  upb_fhandlers *fh;
+  upb_fielddef *f;
+  uint16_t msgindex;  // Only when upb_issubmsg(f).
+} upb_msgvisitor_field;
+
+typedef struct {
+  upb_msgvisitor_field *fields;
+  int fields_len;
+} upb_msgvisitor_msg;
+
+typedef struct {
+  uint16_t msgindex;
+  uint16_t fieldindex;
+  uint32_t arrayindex;  // UINT32_MAX if not an array frame.
+} upb_msgvisitor_frame;
+
+typedef struct {
+  upb_msgvisitor_msg *messages;
+  int messages_len;
+  upb_dispatcher dispatcher;
+} upb_msgvisitor;
+
+// Initializes a msgvisitor that will push data from messages of the given
+// msgdef to the given set of handlers.
+void upb_msgvisitor_init(upb_msgvisitor *v, upb_msgdef *md, upb_handlers *h);
+void upb_msgvisitor_uninit(upb_msgvisitor *v);
+
+void upb_msgvisitor_reset(upb_msgvisitor *v, void *m);
+void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status);
+
+
+/* Standard writers. **********************************************************/
+
+// Allocates a new stdmsg.
+void *upb_stdmsg_new(upb_msgdef *md);
+
+// Recursively frees any strings or submessages that the message refers to.
+void upb_stdmsg_free(void *m, upb_msgdef *md);
+
+// "hasbit" must be <= UPB_MAX_FIELDS.  If it is <0, this field has no hasbit.
+upb_value upb_stdmsg_packfval(int16_t hasbit, uint16_t value_offset);
+upb_value upb_stdmsg_packfval_subm(int16_t hasbit, uint16_t value_offset,
+                                   uint16_t subm_size, uint8_t subm_setbytes);
+
+// Value writers for every in-memory type: write the data to a known offset
+// from the closure "c" and set the hasbit (if any).
+// TODO: can we get away with having only one for int64, uint64, double, etc?
+// The main thing in the way atm is that the upb_value is strongly typed.
+// in debug mode.
+upb_flow_t upb_stdmsg_setint64(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setint32(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setuint64(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setuint32(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setdouble(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setfloat(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val);
+
+// Value writers for repeated fields: the closure points to a standard array
+// struct, appends the value to the end of the array, resizing with realloc()
+// if necessary.
+typedef struct {
+  char *ptr;
+  uint32_t len;   // Number of elements present.
+  uint32_t size;  // Number of elements allocated.
+} upb_stdarray;
+
+upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setint32_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setuint64_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setuint32_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setdouble_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setfloat_r(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setbool_r(void *c, upb_value fval, upb_value val);
+
+// Writers for C strings (NULL-terminated): we can find a char* at a known
+// offset from the closure "c".  Calls realloc() on the pointer to allocate
+// the memory (TODO: investigate whether checking malloc_usable_size() would
+// be cheaper than realloc()).  Also sets the hasbit, if any.
+//
+// Since the string is NULL terminated and does not store an explicit length,
+// these are not suitable for binary data that can contain NULLs.
+upb_flow_t upb_stdmsg_setcstr(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setcstr_r(void *c, upb_value fval, upb_value val);
+
+// Writers for length-delimited strings: we explicitly store the length, so
+// the data can contain NULLs.  Stores the data using upb_stdarray
+// which is located at a known offset from the closure "c" (note that it
+// is included inline rather than pointed to).  Also sets the hasbit, if any.
+upb_flow_t upb_stdmsg_setstr(void *c, upb_value fval, upb_value val);
+upb_flow_t upb_stdmsg_setstr_r(void *c, upb_value fval, upb_value val);
+
+// Writers for startseq and startmsg which allocate (or reuse, if possible)
+// a sub data structure (upb_stdarray or a submessage, respectively),
+// setting the hasbit.  If the hasbit is already set, the existing data
+// structure is used verbatim.  If the hasbit is not already set, the pointer
+// is checked for NULL.  If it is NULL, a new substructure is allocated,
+// cleared, and used.  If it is not NULL, the existing substructure is
+// cleared and reused.
+//
+// If there is no hasbit, we always behave as if the hasbit was not set,
+// so any existing data for this array or submessage is cleared.  In most
+// cases this will be fine since each array or non-repeated submessage should
+// occur at most once in the stream.  But if the client is using "concatenation
+// as merging", it will want to make sure hasbits are allocated so merges can
+// happen appropriately.
+//
+// If there was a demand for the behavior that absence of a hasbit acts as if
+// the bit was always set, we could provide that also.  But Clear() would need
+// to act recursively, which is less efficient since it requires an extra pass
+// over the tree.
+upb_sflow_t upb_stdmsg_startseq(void *c, upb_value fval);
+upb_sflow_t upb_stdmsg_startsubmsg(void *c, upb_value fval);
+upb_sflow_t upb_stdmsg_startsubmsg_r(void *c, upb_value fval);
+
+
+/* Standard readers. **********************************************************/
+
+bool upb_stdmsg_has(void *c, upb_value fval);
+void *upb_stdmsg_seqbegin(void *c);
+
+upb_value upb_stdmsg_getint64(void *c, upb_value fval);
+upb_value upb_stdmsg_getint32(void *c, upb_value fval);
+upb_value upb_stdmsg_getuint64(void *c, upb_value fval);
+upb_value upb_stdmsg_getuint32(void *c, upb_value fval);
+upb_value upb_stdmsg_getdouble(void *c, upb_value fval);
+upb_value upb_stdmsg_getfloat(void *c, upb_value fval);
+upb_value upb_stdmsg_getbool(void *c, upb_value fval);
+upb_value upb_stdmsg_getptr(void *c, upb_value fval);
+
+void *upb_stdmsg_8byte_seqnext(void *c, void *iter);
+void *upb_stdmsg_4byte_seqnext(void *c, void *iter);
+void *upb_stdmsg_1byte_seqnext(void *c, void *iter);
+
+upb_value upb_stdmsg_seqgetint64(void *c);
+upb_value upb_stdmsg_seqgetint32(void *c);
+upb_value upb_stdmsg_seqgetuint64(void *c);
+upb_value upb_stdmsg_seqgetuint32(void *c);
+upb_value upb_stdmsg_seqgetdouble(void *c);
+upb_value upb_stdmsg_seqgetfloat(void *c);
+upb_value upb_stdmsg_seqgetbool(void *c);
+upb_value upb_stdmsg_seqgetptr(void *c);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
new file mode 100644
index 0000000..218c780
--- /dev/null
+++ b/upb/pb/decoder.c
@@ -0,0 +1,469 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2008-2011 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include "upb/bytestream.h"
+#include "upb/msg.h"
+#include "upb/pb/decoder.h"
+#include "upb/pb/varint.h"
+
+// Used for frames that have no specific end offset: groups, repeated primitive
+// fields inside groups, and the top-level message.
+#define UPB_NONDELIMITED UINT32_MAX
+
+#ifdef UPB_USE_JIT_X64
+#define Dst_DECL upb_decoder *d
+#define Dst_REF (d->dynasm)
+#define Dst (d)
+#include "dynasm/dasm_proto.h"
+#include "upb/pb/decoder_x86.h"
+#endif
+
+// It's unfortunate that we have to micro-manage the compiler this way,
+// especially since this tuning is necessarily specific to one hardware
+// configuration.  But emperically on a Core i7, performance increases 30-50%
+// with these annotations.  Every instance where these appear, gcc 4.2.1 made
+// the wrong decision and degraded performance in benchmarks.
+#define FORCEINLINE static __attribute__((always_inline))
+#define NOINLINE static __attribute__((noinline))
+
+static void upb_decoder_exit(upb_decoder *d) { siglongjmp(d->exitjmp, 1); }
+static void upb_decoder_exit2(void *_d) {
+  upb_decoder *d = _d;
+  upb_decoder_exit(d);
+}
+static void upb_decoder_abort(upb_decoder *d, const char *msg) {
+  upb_status_setf(d->status, UPB_ERROR, msg);
+  upb_decoder_exit(d);
+}
+
+/* Decoding/Buffering of wire types *******************************************/
+
+static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
+static void upb_decoder_advance(upb_decoder *d, size_t len) {
+  assert((size_t)(d->end - d->ptr) >= len);
+  d->ptr += len;
+}
+
+size_t upb_decoder_offset(upb_decoder *d) {
+  size_t offset = d->bufstart_ofs;
+  if (d->ptr) offset += (d->ptr - d->buf);
+  return offset;
+}
+
+static void upb_decoder_setmsgend(upb_decoder *d) {
+  upb_dispatcher_frame *f = d->dispatcher.top;
+  size_t delimlen = f->end_ofs - d->bufstart_ofs;
+  size_t buflen = d->end - d->buf;
+  if (f->end_ofs != UINT64_MAX && delimlen <= buflen) {
+    d->delim_end = (uintptr_t)(d->buf + delimlen);
+  } else {
+    // Buffers must not run up against the end of memory.
+    assert((uintptr_t)d->end < UINTPTR_MAX);
+    d->delim_end = UINTPTR_MAX;
+  }
+}
+
+// Pulls the next buffer from the bytesrc.  Should be called only when the
+// current buffer is completely empty.
+static bool upb_trypullbuf(upb_decoder *d) {
+  assert(upb_decoder_bufleft(d) == 0);
+  if (d->bufend_ofs == d->refend_ofs) {
+    d->refend_ofs += upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status);
+    if (!upb_ok(d->status)) {
+      d->ptr = NULL;
+      d->end = NULL;
+      if (upb_iseof(d->status)) return false;
+      upb_decoder_exit(d);
+    }
+  }
+  d->bufstart_ofs = d->bufend_ofs;
+  size_t len;
+  d->buf = upb_bytesrc_getptr(d->bytesrc, d->bufstart_ofs, &len);
+  assert(len > 0);
+  d->bufend_ofs = d->bufstart_ofs + len;
+  d->ptr = d->buf;
+  d->end = d->buf + len;
+#ifdef UPB_USE_JIT_X64
+  d->jit_end = d->end - 20;
+#endif
+  upb_decoder_setmsgend(d);
+  return true;
+}
+
+static void upb_pullbuf(upb_decoder *d) {
+  if (!upb_trypullbuf(d)) upb_decoder_abort(d, "Unexpected EOF");
+}
+
+void upb_decoder_commit(upb_decoder *d) {
+  d->completed_ptr = d->ptr;
+  if (d->refstart_ofs < d->bufstart_ofs) {
+    // Drop our ref on the previous buf's region.
+    upb_bytesrc_refregion(d->bytesrc, d->bufstart_ofs, d->refend_ofs);
+    upb_bytesrc_unrefregion(d->bytesrc, d->refstart_ofs, d->refend_ofs);
+    d->refstart_ofs = d->bufstart_ofs;
+  }
+}
+
+NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) {
+  uint8_t byte = 0x80;
+  uint64_t u64 = 0;
+  int bitpos;
+  const char *ptr = d->ptr;
+  for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
+    if (upb_decoder_bufleft(d) == 0) {
+      upb_pullbuf(d);
+      ptr = d->ptr;
+    }
+    u64 |= ((uint64_t)(byte = *ptr++) & 0x7F) << bitpos;
+  }
+  if(bitpos == 70 && (byte & 0x80)) upb_decoder_abort(d, "Unterminated varint");
+  return u64;
+}
+
+// For tags and delimited lengths, which must be <=32bit and are usually small.
+FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) {
+  const char *p = d->ptr;
+  uint32_t ret;
+  uint64_t u64;
+  // Nearly all will be either 1 byte (1-16) or 2 bytes (17-2048).
+  if (upb_decoder_bufleft(d) < 2) goto slow;  // unlikely.
+  ret = *p & 0x7f;
+  if ((*(p++) & 0x80) == 0) goto done;  // predictable if fields are in order
+  ret |= (*p & 0x7f) << 7;
+  if ((*(p++) & 0x80) == 0) goto done;  // likely
+slow:
+  u64 = upb_decode_varint_slow(d);
+  if (u64 > 0xffffffff) upb_decoder_abort(d, "Unterminated 32-bit varint");
+  ret = (uint32_t)u64;
+  p = d->ptr;  // Turn the next line into a nop.
+done:
+  upb_decoder_advance(d, p - d->ptr);
+  return ret;
+}
+
+FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) {
+  if (upb_decoder_bufleft(d) == 0) {
+    // Check for our two normal end-of-message conditions.
+    if (d->bufend_ofs == d->end_ofs) return false;
+    if (!upb_trypullbuf(d)) return false;
+  }
+  *val = upb_decode_varint32(d);
+  return true;
+}
+
+FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) {
+  if (upb_decoder_bufleft(d) >= 10) {
+    // Fast case.
+    upb_decoderet r = upb_vdecode_fast(d->ptr);
+    if (r.p == NULL) upb_decoder_abort(d, "Unterminated varint");
+    upb_decoder_advance(d, r.p - d->ptr);
+    return r.val;
+  } else if (upb_decoder_bufleft(d) > 0) {
+    // Intermediate case -- worth it?
+    char tmpbuf[10];
+    memset(tmpbuf, 0x80, 10);
+    memcpy(tmpbuf, d->ptr, upb_decoder_bufleft(d));
+    upb_decoderet r = upb_vdecode_fast(tmpbuf);
+    if (r.p != NULL) {
+      upb_decoder_advance(d, r.p - tmpbuf);
+      return r.val;
+    }
+  }
+  // Slow case -- varint spans buffer seam.
+  return upb_decode_varint_slow(d);
+}
+
+FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
+  if (upb_decoder_bufleft(d) >= bytes) {
+    // Fast case.
+    memcpy(buf, d->ptr, bytes);
+    upb_decoder_advance(d, bytes);
+  } else {
+    // Slow case.
+    size_t read = 0;
+    while (read < bytes) {
+      size_t avail = upb_decoder_bufleft(d);
+      memcpy(buf + read, d->ptr, avail);
+      upb_decoder_advance(d, avail);
+      read += avail;
+    }
+  }
+}
+
+FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) {
+  uint32_t u32;
+  upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t));
+  return u32;  // TODO: proper byte swapping
+}
+FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
+  uint64_t u64;
+  upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t));
+  return u64;  // TODO: proper byte swapping
+}
+
+INLINE upb_strref *upb_decode_string(upb_decoder *d) {
+  uint32_t strlen = upb_decode_varint32(d);
+  d->strref.stream_offset = upb_decoder_offset(d);
+  d->strref.len = strlen;
+  if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d);
+  if (upb_decoder_bufleft(d) >= strlen) {
+    // Fast case.
+    d->strref.ptr = d->ptr;
+    upb_decoder_advance(d, strlen);
+  } else {
+    // Slow case.
+    while (1) {
+      size_t consume = UPB_MIN(upb_decoder_bufleft(d), strlen);
+      upb_decoder_advance(d, consume);
+      strlen -= consume;
+      if (strlen == 0) break;
+      upb_pullbuf(d);
+    }
+  }
+  return &d->strref;
+}
+
+INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) {
+  upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end;
+  upb_decoder_setmsgend(d);
+}
+
+
+/* Decoding of .proto types ***************************************************/
+
+// Technically, we are losing data if we see a 32-bit varint that is not
+// properly sign-extended.  We could detect this and error about the data loss,
+// but proto2 does not do this, so we pass.
+
+#define T(type, wt, valtype, convfunc) \
+  INLINE void upb_decode_ ## type(upb_decoder *d, upb_fhandlers *f) { \
+    upb_value val; \
+    upb_value_set ## valtype(&val, (convfunc)(upb_decode_ ## wt(d))); \
+    upb_dispatch_value(&d->dispatcher, f, val); \
+  } \
+
+static double  upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
+static float   upb_asfloat(uint32_t n)  { float  f; memcpy(&f, &n, 4); return f; }
+static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
+static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
+
+T(INT32,    varint,  int32,  int32_t)
+T(INT64,    varint,  int64,  int64_t)
+T(UINT32,   varint,  uint32, uint32_t)
+T(UINT64,   varint,  uint64, uint64_t)
+T(FIXED32,  fixed32, uint32, uint32_t)
+T(FIXED64,  fixed64, uint64, uint64_t)
+T(SFIXED32, fixed32, int32,  int32_t)
+T(SFIXED64, fixed64, int64,  int64_t)
+T(BOOL,     varint,  bool,   bool)
+T(ENUM,     varint,  int32,  int32_t)
+T(DOUBLE,   fixed64, double, upb_asdouble)
+T(FLOAT,    fixed32, float,  upb_asfloat)
+T(SINT32,   varint,  int32,  upb_zzdec_32)
+T(SINT64,   varint,  int64,  upb_zzdec_64)
+T(STRING,   string,  strref, upb_strref*)
+
+static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) {
+  upb_push(d, f, UPB_NONDELIMITED);
+}
+static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
+  (void)f;
+  upb_dispatch_endsubmsg(&d->dispatcher);
+  upb_decoder_setmsgend(d);
+}
+static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
+  upb_push(d, f, upb_decode_varint32(d) + (d->ptr - d->buf));
+}
+
+
+/* The main decoding loop *****************************************************/
+
+static void upb_decoder_checkdelim(upb_decoder *d) {
+  while ((uintptr_t)d->ptr >= d->delim_end) {
+    if ((uintptr_t)d->ptr > d->delim_end)
+      upb_decoder_abort(d, "Bad submessage end");
+
+    if (d->dispatcher.top->is_sequence) {
+      upb_dispatch_endseq(&d->dispatcher);
+    } else {
+      upb_dispatch_endsubmsg(&d->dispatcher);
+    }
+    upb_decoder_setmsgend(d);
+  }
+}
+
+static void upb_decoder_enterjit(upb_decoder *d) {
+  (void)d;
+#ifdef UPB_USE_JIT_X64
+  if (d->jit_code && d->dispatcher.top == d->dispatcher.stack && d->ptr < d->jit_end) {
+    // Decodes as many fields as possible, updating d->ptr appropriately,
+    // before falling through to the slow(er) path.
+    void (*upb_jit_decode)(upb_decoder *d) = (void*)d->jit_code;
+    upb_jit_decode(d);
+  }
+#endif
+}
+
+INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
+  while (1) {
+    uint32_t tag;
+    if (!upb_trydecode_varint32(d, &tag)) return NULL;
+    upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag);
+
+    // There are no explicit "startseq" or "endseq" markers in protobuf
+    // streams, so we have to infer them by noticing when a repeated field
+    // starts or ends.
+    if (d->dispatcher.top->is_sequence && d->dispatcher.top->f != f) {
+      upb_dispatch_endseq(&d->dispatcher);
+      upb_decoder_setmsgend(d);
+    }
+    if (f && f->repeated && d->dispatcher.top->f != f) {
+      // TODO: support packed.
+      assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) ||
+             (tag & 0x7) != UPB_WIRE_TYPE_DELIMITED);
+      uint32_t end = d->dispatcher.top->end_ofs;
+      upb_dispatch_startseq(&d->dispatcher, f)->end_ofs = end;
+      upb_decoder_setmsgend(d);
+    }
+    if (f) return f;
+
+    // Unknown field.
+    switch (tag & 0x7) {
+      case UPB_WIRE_TYPE_VARINT:    upb_decode_varint(d); break;
+      case UPB_WIRE_TYPE_32BIT:     upb_decoder_advance(d, 4); break;
+      case UPB_WIRE_TYPE_64BIT:     upb_decoder_advance(d, 8); break;
+      case UPB_WIRE_TYPE_DELIMITED:
+        upb_decoder_advance(d, upb_decode_varint32(d)); break;
+      default:
+        upb_decoder_abort(d, "Invavlid wire type");
+    }
+    // TODO: deliver to unknown field callback.
+    upb_decoder_commit(d);
+    upb_decoder_checkdelim(d);
+  }
+}
+
+void upb_decoder_onexit(upb_decoder *d) {
+  if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher);
+  if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) {
+    // Normal end-of-file.
+    upb_status_clear(d->status);
+    upb_dispatch_endmsg(&d->dispatcher, d->status);
+  } else {
+    if (d->status->code == UPB_EOF)
+      upb_status_setf(d->status, UPB_ERROR, "Input ended mid-submessage.");
+  }
+}
+
+void upb_decoder_decode(upb_decoder *d, upb_status *status) {
+  if (sigsetjmp(d->exitjmp, 0)) {
+    upb_decoder_onexit(d);
+    return;
+  }
+  d->status = status;
+  upb_dispatch_startmsg(&d->dispatcher);
+  while(1) { // Main loop: executed once per tag/field pair.
+    upb_decoder_checkdelim(d);
+    upb_decoder_enterjit(d);
+    // if (!d->dispatcher.top->is_packed)
+    upb_fhandlers *f = upb_decode_tag(d);
+    if (!f) upb_decoder_exit2(d);
+    f->decode(d, f);
+    upb_decoder_commit(d);
+  }
+}
+
+static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top,
+                             upb_dispatcher_frame *bottom) {
+  (void)top;
+  (void)bottom;
+  (void)_d;
+#if 0
+  upb_decoder *d = _d;
+  // TODO
+  if (bottom->end_offset == UPB_NONDELIMITED) {
+    // TODO: support skipping groups.
+    abort();
+  }
+  d->ptr = d->buf.ptr + bottom->end_offset;
+#endif
+}
+
+void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
+  upb_dispatcher_init(
+      &d->dispatcher, handlers, upb_decoder_skip, upb_decoder_exit2, d);
+#ifdef UPB_USE_JIT_X64
+  d->jit_code = NULL;
+  if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d);
+#endif
+  // Set function pointers for each field's decode function.
+  for (int i = 0; i < handlers->msgs_len; i++) {
+    upb_mhandlers *m = handlers->msgs[i];
+    for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+        i = upb_inttable_next(&m->fieldtab, i)) {
+      upb_fhandlers *f = upb_inttable_iter_value(i);
+      switch (f->type) {
+        case UPB_TYPE(INT32):    f->decode = &upb_decode_INT32;    break;
+        case UPB_TYPE(INT64):    f->decode = &upb_decode_INT64;    break;
+        case UPB_TYPE(UINT32):   f->decode = &upb_decode_UINT32;   break;
+        case UPB_TYPE(UINT64):   f->decode = &upb_decode_UINT64;   break;
+        case UPB_TYPE(FIXED32):  f->decode = &upb_decode_FIXED32;  break;
+        case UPB_TYPE(FIXED64):  f->decode = &upb_decode_FIXED64;  break;
+        case UPB_TYPE(SFIXED32): f->decode = &upb_decode_SFIXED32; break;
+        case UPB_TYPE(SFIXED64): f->decode = &upb_decode_SFIXED64; break;
+        case UPB_TYPE(BOOL):     f->decode = &upb_decode_BOOL;     break;
+        case UPB_TYPE(ENUM):     f->decode = &upb_decode_ENUM;     break;
+        case UPB_TYPE(DOUBLE):   f->decode = &upb_decode_DOUBLE;   break;
+        case UPB_TYPE(FLOAT):    f->decode = &upb_decode_FLOAT;    break;
+        case UPB_TYPE(SINT32):   f->decode = &upb_decode_SINT32;   break;
+        case UPB_TYPE(SINT64):   f->decode = &upb_decode_SINT64;   break;
+        case UPB_TYPE(STRING):   f->decode = &upb_decode_STRING;   break;
+        case UPB_TYPE(BYTES):    f->decode = &upb_decode_STRING;   break;
+        case UPB_TYPE(GROUP):    f->decode = &upb_decode_GROUP;    break;
+        case UPB_TYPE(MESSAGE):  f->decode = &upb_decode_MESSAGE;  break;
+        case UPB_TYPE_ENDGROUP:  f->decode = &upb_endgroup;        break;
+      }
+    }
+  }
+}
+
+void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m) {
+  upb_handlers *h = upb_handlers_new();
+  upb_accessors_reghandlers(h, m);
+  upb_decoder_initforhandlers(d, h);
+  upb_handlers_unref(h);
+}
+
+void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, uint64_t start_ofs,
+                       uint64_t end_ofs, void *closure) {
+  upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure);
+  f->end_ofs = end_ofs;
+  d->end_ofs = end_ofs;
+  d->refstart_ofs = start_ofs;
+  d->refend_ofs = start_ofs;
+  d->bufstart_ofs = start_ofs;
+  d->bufend_ofs = start_ofs;
+  d->bytesrc = bytesrc;
+  d->buf = NULL;
+  d->ptr = NULL;
+  d->end = NULL;  // Force a buffer pull.
+#ifdef UPB_USE_JIT_X64
+  d->jit_end = NULL;
+#endif
+  d->delim_end = UINTPTR_MAX;  // But don't let end-of-message get triggered.
+  d->strref.bytesrc = bytesrc;
+}
+
+void upb_decoder_uninit(upb_decoder *d) {
+#ifdef UPB_USE_JIT_X64
+  if (d->dispatcher.handlers->should_jit) upb_decoder_freejit(d);
+#endif
+  upb_dispatcher_uninit(&d->dispatcher);
+}
diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h
new file mode 100644
index 0000000..3981359
--- /dev/null
+++ b/upb/pb/decoder.h
@@ -0,0 +1,99 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009-2010 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * upb_decoder implements a high performance, streaming decoder for protobuf
+ * data that works by implementing upb_src and getting its data from a
+ * upb_bytesrc.
+ *
+ * The decoder does not currently support non-blocking I/O, in the sense that
+ * if the bytesrc returns UPB_STATUS_TRYAGAIN it is not possible to resume the
+ * decoder when data becomes available again.  Support for this could be added,
+ * but it would add complexity and perhaps cost efficiency also.
+ */
+
+#ifndef UPB_DECODER_H_
+#define UPB_DECODER_H_
+
+#include <setjmp.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include "upb/handlers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* upb_decoder *****************************************************************/
+
+struct dasm_State;
+
+typedef struct _upb_decoder {
+  upb_bytesrc *bytesrc;       // Source of our serialized data.
+  upb_dispatcher dispatcher;  // Dispatcher to which we push parsed data.
+  upb_status *status;         // Where we will store any errors that occur.
+  upb_strref strref;          // For passing string data to callbacks.
+
+  // Offsets for the region we currently have ref'd.
+  uint64_t refstart_ofs, refend_ofs;
+
+  // Current buffer and its stream offset.
+  const char *buf, *ptr, *end;
+  uint64_t bufstart_ofs, bufend_ofs;
+
+  // Stream offset for the end of the top-level message, if any.
+  uint64_t end_ofs;
+
+  // Buf offset as of which we've delivered calbacks; needed for rollback on
+  // UPB_TRYAGAIN (or in the future, UPB_SUSPEND).
+  const char *completed_ptr;
+
+  // End of the delimited region, relative to ptr, or UINTPTR_MAX if not in
+  // this buf.
+  uintptr_t delim_end;
+
+#ifdef UPB_USE_JIT_X64
+  // For JIT, which doesn't do bounds checks in the middle of parsing a field.
+  const char *jit_end, *effective_end;  // == MIN(jit_end, submsg_end)
+
+  // JIT-generated machine code (else NULL).
+  char *jit_code;
+  size_t jit_size;
+  char *debug_info;
+
+  struct dasm_State *dynasm;
+#endif
+
+  sigjmp_buf exitjmp;
+} upb_decoder;
+
+// Initializes/uninitializes a decoder for calling into the given handlers
+// or to write into the given msgdef, given its accessors).  Takes a ref
+// on the handlers or msgdef.
+void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *h);
+
+// Equivalent to:
+//   upb_accessors_reghandlers(m, h);
+//   upb_decoder_initforhandlers(d, h);
+// except possibly more efficient, by using cached state in the msgdef.
+void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m);
+void upb_decoder_uninit(upb_decoder *d);
+
+// Resets the internal state of an already-allocated decoder.  This puts it in a
+// state where it has not seen any data, and expects the next data to be from
+// the beginning of a new protobuf.  Parsers must be reset before they can be
+// used.  A decoder can be reset multiple times.
+//
+// Pass UINT64_MAX for end_ofs to indicate a non-delimited top-level message.
+void upb_decoder_reset(upb_decoder *d, upb_bytesrc *src, uint64_t start_ofs,
+                       uint64_t end_ofs, void *closure);
+
+void upb_decoder_decode(upb_decoder *d, upb_status *status);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_DECODER_H_ */
diff --git a/upb/pb/decoder_x86.dasc b/upb/pb/decoder_x86.dasc
new file mode 100644
index 0000000..19043c6
--- /dev/null
+++ b/upb/pb/decoder_x86.dasc
@@ -0,0 +1,694 @@
+|//
+|// upb - a minimalist implementation of protocol buffers.
+|//
+|// Copyright (c) 2011 Google Inc.  See LICENSE for details.
+|// Author: Josh Haberman <jhaberman@gmail.com>
+|//
+|// JIT compiler for upb_decoder on x86.  Given a upb_handlers object,
+|// generates code specialized to parsing the specific message and
+|// calling specific handlers.
+
+#define UPB_NONE -1
+#define UPB_MULTIPLE -2
+#define UPB_TOPLEVEL_ONE -3
+
+#include <sys/mman.h>
+#include "dynasm/dasm_proto.h"
+#include "dynasm/dasm_x86.h"
+
+#ifndef MAP_ANONYMOUS
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+// We map into the low 32 bits when we can, but if this is not available
+// (like on OS X) we take what we can get.  It's not required for correctness,
+// it's just a performance thing that makes it more likely that our jumps
+// can be rel32 (i.e. within 32-bits of our pc) instead of the longer
+// sequence required for other jumps (see callp).
+#ifndef MAP_32BIT
+#define MAP_32BIT 0
+#endif
+
+// To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code
+// at runtime.  GDB 7.x+ has defined an interface for doing this, and these
+// structure/function defintions are copied out of gdb/jit.h
+//
+// We need to give GDB an ELF file at runtime describing the symbols we have
+// generated.  To avoid implementing the ELF format, we generate an ELF file
+// at compile-time and compile it in as a character string.  We can replace
+// a few key constants (address of JIT-ted function and its size) by looking
+// for a few magic numbers and doing a dumb string replacement.
+
+#ifndef __APPLE__
+#include "upb/pb/jit_debug_elf_file.h"
+
+typedef enum
+{
+  GDB_JIT_NOACTION = 0,
+  GDB_JIT_REGISTER,
+  GDB_JIT_UNREGISTER
+} jit_actions_t;
+
+typedef struct gdb_jit_entry {
+  struct gdb_jit_entry *next_entry;
+  struct gdb_jit_entry *prev_entry;
+  const char *symfile_addr;
+  uint64_t symfile_size;
+} gdb_jit_entry;
+
+typedef struct {
+  uint32_t version;
+  uint32_t action_flag;
+  gdb_jit_entry *relevant_entry;
+  gdb_jit_entry *first_entry;
+} gdb_jit_descriptor;
+
+gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL};
+
+void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); }
+
+void upb_reg_jit_gdb(upb_decoder *d) {
+  // Create debug info.
+  size_t elf_len = upb_pb_jit_debug_elf_file_o_len;
+  d->debug_info = malloc(elf_len);
+  memcpy(d->debug_info, upb_pb_jit_debug_elf_file_o, elf_len);
+  uint64_t *p = (void*)d->debug_info;
+  for (; (void*)(p+1) <= (void*)d->debug_info + elf_len; ++p) {
+    if (*p == 0x12345678) { *p = (uintptr_t)d->jit_code; }
+    if (*p == 0x321) { *p = d->jit_size; }
+  }
+
+  // Register the JIT-ted code with GDB.
+  gdb_jit_entry *e = malloc(sizeof(gdb_jit_entry));
+  e->next_entry = __jit_debug_descriptor.first_entry;
+  e->prev_entry = NULL;
+  if (e->next_entry) e->next_entry->prev_entry = e;
+  e->symfile_addr = d->debug_info;
+  e->symfile_size = elf_len;
+  __jit_debug_descriptor.first_entry = e;
+  __jit_debug_descriptor.relevant_entry = e;
+  __jit_debug_descriptor.action_flag = GDB_JIT_REGISTER;
+  __jit_debug_register_code();
+}
+
+#else
+
+void upb_reg_jit_gdb(upb_decoder *d) {
+  (void)d;
+}
+
+#endif
+
+|.arch x64
+|.actionlist upb_jit_actionlist
+|.globals UPB_JIT_GLOBAL_
+|.globalnames upb_jit_globalnames
+|
+|// Calling conventions.
+|.define ARG1_64,   rdi
+|.define ARG2_8,    sil
+|.define ARG2_32,   esi
+|.define ARG2_64,   rsi
+|.define ARG3_8,    dl
+|.define ARG3_32,   edx
+|.define ARG3_64,   rdx
+|
+|// Register allocation / type map.
+|// ALL of the code in this file uses these register allocations.
+|// When we "call" within this file, we do not use regular calling
+|// conventions, but of course when calling to user callbacks we must.
+|.define PTR,       rbx
+|.define CLOSURE,   r12
+|.type   FRAME,     upb_dispatcher_frame, r13
+|.type   STRREF,    upb_strref, r14
+|.type   DECODER,   upb_decoder, r15
+|
+|.macro callp, addr
+|| if ((uintptr_t)addr < 0xffffffff) {
+     |  call   &addr
+|| } else {
+     |  mov64  rax, (uintptr_t)addr
+     |  call   rax
+|| }
+|.endmacro
+|
+|// Checks PTR for end-of-buffer.
+|.macro check_eob, m
+|  cmp   PTR, DECODER->effective_end
+|| if (m->is_group) {
+     |  jae  ->exit_jit
+|| } else {
+     |  jae  =>m->jit_endofbuf_pclabel
+|| }
+|.endmacro
+|
+|// Decodes varint from [PTR + offset] -> ARG3.
+|// Saves new pointer as rax.
+|.macro decode_loaded_varint, offset
+|  // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder.
+|  lea    rax, [PTR + offset + 1]
+|  mov    ARG3_32, ecx
+|  and    ARG3_32, 0x7f
+|  test   cl, cl
+|  jns    >9
+|  lea    rax, [PTR + offset + 2]
+|  movzx  esi, ch
+|  and    esi, 0x7f
+|  shl    esi, 7
+|  or     ARG3_32, esi
+|  test   cx, cx
+|  jns    >9
+|  mov    ARG1_64, rax
+|  mov    ARG2_32, ARG3_32
+|  callp  upb_vdecode_max8_fast
+|  test   rax, rax
+|  jz     ->exit_jit   // >10-byte varint.
+|9:
+|.endmacro
+|
+|.macro decode_varint, offset
+|  mov    ecx, dword [PTR + offset]
+|  decode_loaded_varint offset
+|  mov    PTR, rax
+|.endmacro
+|
+|// Decode the tag -> edx.
+|// Could specialize this by avoiding the value masking: could just key the
+|// table on the raw (length-masked) varint to save 3-4 cycles of latency.
+|// Currently only support tables where all entries are in the array part.
+|.macro dyndispatch, m
+|  decode_loaded_varint, 0
+|  mov  ecx, edx
+|  shr  ecx, 3
+|  and  edx, 0x7
+|  cmp  ecx, m->max_field_number  // Bounds-check the field.
+|  ja   ->exit_jit                // In the future; could be unknown label
+|| if ((uintptr_t)m->tablearray < 0xffffffff) {
+|    mov  rax, qword [rcx*8 + m->tablearray]  // TODO: support hybrid array/hash tables.
+|| } else {
+|    mov64  rax, (uintptr_t)m->tablearray
+|    mov  rax, qword [rax + rcx*8]
+|| }
+|  jmp  rax  // Dispatch: unpredictable jump.
+|.endmacro
+|
+|// Push a stack frame (not the CPU stack, the upb_decoder stack).
+|.macro pushframe, f, closure_, end_offset_, is_sequence_
+|  lea   rax, [FRAME + sizeof(upb_dispatcher_frame)]  // rax for shorter addressing.
+|  cmp   rax, qword DECODER->dispatcher.limit
+|  jae   ->exit_jit  // Frame stack overflow.
+|  mov   qword FRAME:rax->f, f
+|  mov   qword FRAME:rax->closure, closure_
+|  mov   dword FRAME:rax->end_ofs, end_offset_
+|  mov   byte FRAME:rax->is_sequence, is_sequence_
+|  mov   CLOSURE, rdx
+|  mov   DECODER->dispatcher.top, rax
+|  mov   FRAME, rax
+|.endmacro
+|
+|.macro popframe
+|  sub   FRAME, sizeof(upb_dispatcher_frame)
+|  mov   DECODER->dispatcher.top, FRAME
+|  setmsgend  m
+|  mov   CLOSURE, FRAME->closure
+|.endmacro
+|
+|.macro setmsgend, m
+|    mov    rsi, DECODER->jit_end
+|| if (m->is_group) {
+|    mov64  rax, 0xffffffffffffffff
+|    mov    qword DECODER->delim_end, rax
+|    mov    DECODER->effective_end, rsi
+|| } else {
+|    // Could store a correctly-biased version in the frame, at the cost of
+|    // a larger stack.
+|    mov    eax, dword FRAME->end_ofs
+|    add    rax, qword DECODER->buf
+|    mov    DECODER->delim_end, rax  // delim_end = d->buf + f->end_ofs
+|    cmp    rax, rsi
+|    jb     >8
+|    mov    rax, rsi                  // effective_end = min(d->delim_end, d->jit_end)
+|8:
+|    mov    DECODER->effective_end, rax
+|| }
+|.endmacro
+|
+|// rax contains the tag, compare it against "tag", but since it is a varint
+|// we must only compare as many bytes as actually have data.
+|.macro checktag, tag
+|| switch (upb_value_size(tag)) {
+||    case 1:
+|       cmp   cl, tag
+||      break;
+||    case 2:
+|       cmp   cx, tag
+||      break;
+||    case 3:
+|       and   ecx, 0xffffff  // 3 bytes
+|       cmp   rcx, tag
+||    case 4:
+|       cmp   ecx, tag
+||      break;
+||    case 5:
+|       mov64 rdx, 0xffffffffff  // 5 bytes
+|       and   rcx, rdx
+|       cmp   rcx, tag
+||      break;
+||    default: abort();
+||  }
+|.endmacro
+|
+|// TODO: optimize for 0 (xor) and 32-bits.
+|.macro loadfval, f
+|| if (f->fval.val.uint64 == 0) {
+|    xor     ARG2_32, ARG2_32
+|| } else if (f->fval.val.uint64 < 0xffffffff) {
+|    mov     ARG2_32, f->fval.val.uint64
+|| } else {
+|    mov64   ARG2_64, f->fval.val.uint64
+|| }
+|.endmacro
+
+#include <stdlib.h>
+#include "upb/pb/varint.h"
+
+// PTR should point to the beginning of the tag.
+static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag,
+                                  upb_mhandlers *m,
+                                  upb_fhandlers *f, upb_fhandlers *next_f) {
+  int tag_size = upb_value_size(tag);
+
+  // PC-label for the dispatch table.
+  // We check the wire type (which must be loaded in edx) because the
+  // table is keyed on field number, not type.
+  |=>f->jit_pclabel:
+  |  cmp  edx, (tag & 0x7)
+  |  jne  ->exit_jit     // In the future: could be an unknown field or packed.
+  |=>f->jit_pclabel_notypecheck:
+  if (f->repeated) {
+    if (f->startseq) {
+      |  mov   ARG1_64, CLOSURE
+      |  loadfval f
+      |  callp f->startseq
+    } else {
+      |  mov   rdx, CLOSURE
+    }
+    |  mov   esi, FRAME->end_ofs
+    |  pushframe  f, rdx, esi, true
+  }
+
+  |1:  // Label for repeating this field.
+
+  // Decode the value into arg 3 for the callback.
+  switch (f->type) {
+    case UPB_TYPE(DOUBLE):
+    case UPB_TYPE(FIXED64):
+    case UPB_TYPE(SFIXED64):
+      |  mov  ARG3_64, qword [PTR + tag_size]
+      |  add  PTR, 8 + tag_size
+      break;
+
+    case UPB_TYPE(FLOAT):
+    case UPB_TYPE(FIXED32):
+    case UPB_TYPE(SFIXED32):
+      |  mov  ARG3_32, dword [PTR + tag_size]
+      |  add  PTR, 4 + tag_size
+      break;
+
+    case UPB_TYPE(BOOL):
+      // Can't assume it's one byte long, because bool must be wire-compatible
+      // with all of the varint integer types.
+      |  decode_varint  tag_size
+      |  test  ARG3_64, ARG3_64
+      |  setne ARG3_8   // Other bytes left with val, should be ok.
+      break;
+
+    case UPB_TYPE(INT64):
+    case UPB_TYPE(UINT64):
+    case UPB_TYPE(INT32):
+    case UPB_TYPE(UINT32):
+    case UPB_TYPE(ENUM):
+      |  decode_varint  tag_size
+      break;
+
+    case UPB_TYPE(SINT64):
+      // 64-bit zig-zag decoding.
+      |  decode_varint  tag_size
+      |  mov  rax, ARG3_64
+      |  shr  ARG3_64, 1
+      |  and  rax, 1
+      |  neg  rax
+      |  xor  ARG3_64, rax
+      break;
+
+    case UPB_TYPE(SINT32):
+      // 32-bit zig-zag decoding.
+      |  decode_varint  tag_size
+      |  mov  eax, ARG3_32
+      |  shr  ARG3_32, 1
+      |  and  eax, 1
+      |  neg  eax
+      |  xor  ARG3_32, eax
+      break;
+
+    case UPB_TYPE(STRING):
+    case UPB_TYPE(BYTES):
+      // We only handle the case where the entire string is in our current
+      // buf, which sidesteps any security problems.  The C path has more
+      // robust checks.
+      |  decode_varint  tag_size
+      |  mov  STRREF->len, ARG3_32
+      |  mov  STRREF->ptr, PTR
+      |  mov  rax, PTR
+      |  sub  rax, DECODER->buf
+      |  add  eax, DECODER->bufstart_ofs  // = d->ptr - d->buf + d->bufstart_ofs
+      |  mov  STRREF->stream_offset, eax
+      |  add  PTR, ARG3_64
+      |  mov  ARG3_64, STRREF
+      |  cmp  PTR, DECODER->effective_end
+      |  ja   ->exit_jit   // Can't deliver, whole string not in buf.
+      break;
+
+    case UPB_TYPE_ENDGROUP:  // A pseudo-type.
+      |  add  PTR, tag_size
+      |  mov  DECODER->ptr, PTR
+      |  jmp  =>m->jit_endofmsg_pclabel
+      return;
+
+    // Will dispatch callbacks and call submessage in a second.
+    case UPB_TYPE(MESSAGE):
+      |  decode_varint  tag_size
+      break;
+    case UPB_TYPE(GROUP):
+      |  add  PTR, tag_size
+      break;
+
+    default: abort();
+  }
+  // Commit our work by advancing ptr.
+  // (If in the future we wanted to support a UPB_SUSPEND_AGAIN that
+  // suspends the decoder and redelivers the value later, we would
+  // need to adjust this to happen perhaps after the callback ran).
+  |  mov   DECODER->ptr, PTR
+
+  // Load closure and fval into arg registers.
+  |  mov   ARG1_64, CLOSURE
+  |  loadfval f
+
+  // Call callbacks.
+  if (upb_issubmsgtype(f->type)) {
+    // Call startsubmsg handler (if any).
+    if (f->startsubmsg) {
+      // upb_sflow_t startsubmsg(void *closure, upb_value fval)
+      |  mov   r12d, ARG3_32
+      |  callp f->startsubmsg
+    } else {
+      |  mov   rdx, CLOSURE
+      |  mov   r12d, ARG3_32
+    }
+    if (f->type == UPB_TYPE(MESSAGE)) {
+      |   mov   rsi, PTR
+      |   sub   rsi, DECODER->buf
+      |   add   esi, r12d   // = (d->ptr - d->buf) + delim_len
+    } else {
+      assert(f->type == UPB_TYPE(GROUP));
+      |   mov   esi, UPB_NONDELIMITED
+    }
+    |  pushframe  f, rdx, esi, false
+
+    upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
+    if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) {
+      |  jmp   =>sub_m->jit_startmsg_pclabel;
+    } else {
+      |  call  =>sub_m->jit_startmsg_pclabel;
+    }
+
+    |=>f->jit_submsg_done_pclabel:
+    |   popframe
+
+    // Call endsubmsg handler (if any).
+    if (f->endsubmsg) {
+      // upb_flow_t endsubmsg(void *closure, upb_value fval);
+      |  mov   ARG1_64, CLOSURE
+      |  loadfval  f
+      |  callp f->endsubmsg
+    }
+  } else {
+    |  callp  f->value
+  }
+  // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+
+  // Epilogue: load next tag, check for repeated field.
+  |  check_eob   m
+  |  mov         rcx, qword [PTR]
+  if (f->repeated) {
+    |  checktag  tag
+    |  je  <1
+    |  popframe
+    if (f->endseq) {
+      |  mov   ARG1_64, CLOSURE
+      |  loadfval f
+      |  callp f->endseq
+    }
+  }
+  if (next_tag != 0) {
+    |  checktag  next_tag
+    |  je  =>next_f->jit_pclabel_notypecheck
+  }
+
+  // Fall back to dynamic dispatch.  Replicate the dispatch
+  // here so we can learn what fields generally follow others.
+  |  dyndispatch  m
+  |1:
+}
+
+static int upb_compare_uint32(const void *a, const void *b) {
+  // TODO: always put ENDGROUP at the end.
+  return *(uint32_t*)a - *(uint32_t*)b;
+}
+
+static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
+  |=>m->jit_startmsg_pclabel:
+  // Call startmsg handler (if any):
+  if (m->startmsg) {
+    // upb_flow_t startmsg(void *closure);
+    |  mov   ARG1_64, FRAME->closure
+    |  callp m->startmsg
+    // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+  }
+
+  |  setmsgend  m
+  |  check_eob   m
+  |  mov    ecx, dword [PTR]
+  |  dyndispatch m
+
+  // --------- New code section (does not fall through) ------------------------
+
+  // Emit code for parsing each field (dynamic dispatch contains pointers to
+  // all of these).
+
+  // Create an ordering over the fields (inttable ordering is undefined).
+  int num_keys = upb_inttable_count(&m->fieldtab);
+  uint32_t *keys = malloc(num_keys * sizeof(*keys));
+  int idx = 0;
+  for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+      i = upb_inttable_next(&m->fieldtab, i)) {
+    keys[idx++] = upb_inttable_iter_key(i);
+  }
+  qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
+
+  upb_fhandlers *last_f = NULL;
+  uint32_t last_tag = 0;
+  for(int i = 0; i < num_keys; i++) {
+    uint32_t key = keys[i];
+    upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, key);
+    uint32_t tag = upb_vencode32(key);
+    if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f);
+    last_tag = tag;
+    last_f = f;
+  }
+  upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL);
+
+  free(keys);
+
+  // --------- New code section (does not fall through) ------------------------
+
+  // End-of-buf / end-of-message.
+  if (!m->is_group) {
+    // This case doesn't exist for groups, because there eob really means
+    // eob, so that case just exits the jit directly.
+    |=>m->jit_endofbuf_pclabel:
+    |  cmp  PTR, DECODER->delim_end
+    |  jb   ->exit_jit    // We are at eob, but not end-of-submsg.
+  }
+
+  |=>m->jit_endofmsg_pclabel:
+  // We are at end-of-submsg: call endmsg handler (if any):
+  if (m->endmsg) {
+    // void endmsg(void *closure, upb_status *status) {
+    |  mov   ARG1_64, FRAME->closure
+    |  lea   ARG2_64, DECODER->dispatcher.status
+    |  callp m->endmsg
+  }
+
+  if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
+    |  ret
+  } else if (m->jit_parent_field_done_pclabel == UPB_TOPLEVEL_ONE) {
+    |  jmp  ->exit_jit
+  } else {
+    |  jmp  =>m->jit_parent_field_done_pclabel
+  }
+
+}
+
+static const char *dbgfmt =
+    "JIT encountered unknown field!  wt=%d, fn=%d\n";
+
+static void upb_decoder_jit(upb_decoder *d) {
+  |  push  rbp
+  |  mov   rbp, rsp
+  |  push  r15
+  |  push  r14
+  |  push  r13
+  |  push  r12
+  |  push  rbx
+  |  mov   DECODER, ARG1_64
+  |  mov   FRAME, DECODER:ARG1_64->dispatcher.top
+  |  lea   STRREF, DECODER:ARG1_64->strref
+  |  mov   CLOSURE, FRAME->closure
+  |  mov   PTR, DECODER->ptr
+
+  upb_handlers *h = d->dispatcher.handlers;
+  if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
+    |  call  =>h->msgs[0]->jit_startmsg_pclabel
+    |  jmp   ->exit_jit
+  }
+
+  // TODO: push return addresses for re-entry (will be necessary for multiple
+  // buffer support).
+  for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_msg(d, h->msgs[i]);
+
+  |->exit_jit:
+  |  pop   rbx
+  |  pop   r12
+  |  pop   r13
+  |  pop   r14
+  |  pop   r15
+  |  leave
+  |  ret
+  |=>0:
+  |  mov rdi, stderr
+  |  mov rsi, dbgfmt
+  |  callp  fprintf
+  |  callp  abort
+}
+
+void upb_decoder_jit_assignfieldlabs(upb_fhandlers *f,
+                                     uint32_t *pclabel_count) {
+  f->jit_pclabel = (*pclabel_count)++;
+  f->jit_pclabel_notypecheck = (*pclabel_count)++;
+  f->jit_submsg_done_pclabel = (*pclabel_count)++;
+}
+
+void upb_decoder_jit_assignmsglabs(upb_mhandlers *m, uint32_t *pclabel_count) {
+  m->jit_startmsg_pclabel = (*pclabel_count)++;
+  m->jit_endofbuf_pclabel = (*pclabel_count)++;
+  m->jit_endofmsg_pclabel = (*pclabel_count)++;
+  m->jit_unknownfield_pclabel = (*pclabel_count)++;
+  m->jit_parent_field_done_pclabel = UPB_NONE;
+  m->max_field_number = 0;
+  upb_inttable_iter i;
+  for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+      i = upb_inttable_next(&m->fieldtab, i)) {
+    uint32_t key = upb_inttable_iter_key(i);
+    m->max_field_number = UPB_MAX(m->max_field_number, key);
+    upb_fhandlers *f = upb_inttable_iter_value(i);
+    upb_decoder_jit_assignfieldlabs(f, pclabel_count);
+  }
+  // XXX: Won't work for large field numbers; will need to use a upb_table.
+  m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*));
+}
+
+// Second pass: for messages that have only one parent, link them to the field
+// from which they are called.
+void upb_decoder_jit_assignmsglabs2(upb_mhandlers *m) {
+  upb_inttable_iter i;
+  for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+      i = upb_inttable_next(&m->fieldtab, i)) {
+    upb_fhandlers *f = upb_inttable_iter_value(i);
+    if (upb_issubmsgtype(f->type)) {
+      upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
+      if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) {
+        sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel;
+      } else {
+        sub_m->jit_parent_field_done_pclabel = UPB_MULTIPLE;
+      }
+    }
+  }
+}
+
+void upb_decoder_makejit(upb_decoder *d) {
+  d->debug_info = NULL;
+
+  // Assign pclabels.
+  uint32_t pclabel_count = 1;
+  upb_handlers *h = d->dispatcher.handlers;
+  for (int i = 0; i < h->msgs_len; i++)
+    upb_decoder_jit_assignmsglabs(h->msgs[i], &pclabel_count);
+  for (int i = 0; i < h->msgs_len; i++)
+    upb_decoder_jit_assignmsglabs2(h->msgs[i]);
+
+  if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_NONE) {
+    h->msgs[0]->jit_parent_field_done_pclabel = UPB_TOPLEVEL_ONE;
+  }
+
+  void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals));
+  dasm_init(d, 1);
+  dasm_setupglobal(d, globals, UPB_JIT_GLOBAL__MAX);
+  dasm_growpc(d, pclabel_count);
+  dasm_setup(d, upb_jit_actionlist);
+
+  upb_decoder_jit(d);
+
+  dasm_link(d, &d->jit_size);
+
+  d->jit_code = mmap(NULL, d->jit_size, PROT_READ | PROT_WRITE,
+                     MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+
+  upb_reg_jit_gdb(d);
+
+  dasm_encode(d, d->jit_code);
+
+  // Create dispatch tables.
+  for (int i = 0; i < h->msgs_len; i++) {
+    upb_mhandlers *m = h->msgs[i];
+    for (uint32_t j = 0; j <= m->max_field_number; j++) {
+      upb_fhandlers *f = NULL;
+      for (int k = 0; k < 8; k++) {
+        f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k);
+        if (f) break;
+      }
+      if (f) {
+        m->tablearray[j] = d->jit_code + dasm_getpclabel(d, f->jit_pclabel);
+      } else {
+        // Don't handle unknown fields yet.
+        m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0);
+      }
+    }
+  }
+
+  dasm_free(d);
+  free(globals);
+
+  mprotect(d->jit_code, d->jit_size, PROT_EXEC | PROT_READ);
+
+  FILE *f = fopen("/tmp/machine-code", "wb");
+  fwrite(d->jit_code, d->jit_size, 1, f);
+  fclose(f);
+}
+
+void upb_decoder_freejit(upb_decoder *d) {
+  munmap(d->jit_code, d->jit_size);
+  free(d->debug_info);
+  // TODO: unregister
+}
diff --git a/upb/pb/encoder.c b/upb/pb/encoder.c
new file mode 100644
index 0000000..139dc88
--- /dev/null
+++ b/upb/pb/encoder.c
@@ -0,0 +1,421 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include "upb_encoder.h"
+
+#include <stdlib.h>
+#include "descriptor.h"
+
+/* Functions for calculating sizes of wire values. ****************************/
+
+static size_t upb_v_uint64_t_size(uint64_t val) {
+#ifdef __GNUC__
+  int high_bit = 63 - __builtin_clzll(val);  // 0-based, undef if val == 0.
+#else
+  int high_bit = 0;
+  uint64_t tmp = val;
+  while(tmp >>= 1) high_bit++;
+#endif
+  return val == 0 ? 1 : high_bit / 7 + 1;
+}
+
+static size_t upb_v_int32_t_size(int32_t val) {
+  // v_uint32's are sign-extended to maintain wire compatibility with int64s.
+  return upb_v_uint64_t_size((int64_t)val);
+}
+static size_t upb_v_uint32_t_size(uint32_t val) {
+  return upb_v_uint64_t_size(val);
+}
+static size_t upb_f_uint64_t_size(uint64_t val) {
+  (void)val;  // Length is independent of value.
+  return sizeof(uint64_t);
+}
+static size_t upb_f_uint32_t_size(uint32_t val) {
+  (void)val;  // Length is independent of value.
+  return sizeof(uint32_t);
+}
+
+
+/* Functions to write wire values. ********************************************/
+
+// Since we know in advance the longest that the value could be, we always make
+// sure that our buffer is long enough.  This saves us from having to perform
+// bounds checks.
+
+// Puts a varint (wire type: UPB_WIRE_TYPE_VARINT).
+static uint8_t *upb_put_v_uint64_t(uint8_t *buf, uint64_t val)
+{
+  do {
+    uint8_t byte = val & 0x7f;
+    val >>= 7;
+    if(val) byte |= 0x80;
+    *buf++ = byte;
+  } while(val);
+  return buf;
+}
+
+// Puts an unsigned 32-bit varint, verbatim.  Never uses the high 64 bits.
+static uint8_t *upb_put_v_uint32_t(uint8_t *buf, uint32_t val)
+{
+  return upb_put_v_uint64_t(buf, val);
+}
+
+// Puts a signed 32-bit varint, first sign-extending to 64-bits.  We do this to
+// maintain wire-compatibility with 64-bit signed integers.
+static uint8_t *upb_put_v_int32_t(uint8_t *buf, int32_t val)
+{
+  return upb_put_v_uint64_t(buf, (int64_t)val);
+}
+
+static void upb_put32(uint8_t *buf, uint32_t val) {
+  buf[0] = val & 0xff;
+  buf[1] = (val >> 8) & 0xff;
+  buf[2] = (val >> 16) & 0xff;
+  buf[3] = (val >> 24);
+}
+
+// Puts a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT).
+static uint8_t *upb_put_f_uint32_t(uint8_t *buf, uint32_t val)
+{
+  uint8_t *uint32_end = buf + sizeof(uint32_t);
+#if UPB_UNALIGNED_READS_OK
+  *(uint32_t*)buf = val;
+#else
+  upb_put32(buf, val);
+#endif
+  return uint32_end;
+}
+
+// Puts a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT).
+static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val)
+{
+  uint8_t *uint64_end = buf + sizeof(uint64_t);
+#if UPB_UNALIGNED_READS_OK
+  *(uint64_t*)buf = val;
+#else
+  upb_put32(buf, (uint32_t)val);
+  upb_put32(buf, (uint32_t)(val >> 32));
+#endif
+  return uint64_end;
+}
+
+/* Functions to write and calculate sizes for .proto values. ******************/
+
+// Performs zig-zag encoding, which is used by sint32 and sint64.
+static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
+static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
+
+/* Use macros to define a set of two functions for each .proto type:
+ *
+ *  // Converts and writes a .proto value into buf.  "end" indicates the end
+ *  // of the current available buffer (if the buffer does not contain enough
+ *  // space UPB_STATUS_NEED_MORE_DATA is returned).  On success, *outbuf will
+ *  // point one past the data that was written.
+ *  uint8_t *upb_put_INT32(uint8_t *buf, int32_t val);
+ *
+ *  // Returns the number of bytes required to encode val.
+ *  size_t upb_get_INT32_size(int32_t val);
+ *
+ *  // Given a .proto value s (source) convert it to a wire value.
+ *  uint32_t upb_vtowv_INT32(int32_t s);
+ */
+
+#define VTOWV(type, wire_t, val_t) \
+  static wire_t upb_vtowv_ ## type(val_t s)
+
+#define PUT(type, v_or_f, wire_t, val_t, member_name) \
+  static uint8_t *upb_put_ ## type(uint8_t *buf, val_t val) { \
+    wire_t tmp = upb_vtowv_ ## type(val); \
+    return upb_put_ ## v_or_f ## _ ## wire_t(buf, tmp); \
+  }
+
+#define T(type, v_or_f, wire_t, val_t, member_name) \
+  static size_t upb_get_ ## type ## _size(val_t val) { \
+    return upb_ ## v_or_f ## _ ## wire_t ## _size(val); \
+  } \
+  VTOWV(type, wire_t, val_t);  /* prototype for PUT below */ \
+  PUT(type, v_or_f, wire_t, val_t, member_name) \
+  VTOWV(type, wire_t, val_t)
+
+T(INT32,    v,  int32_t, int32_t,  int32)   { return (uint32_t)s;     }
+T(INT64,    v, uint64_t, int64_t,  int64)   { return (uint64_t)s;     }
+T(UINT32,   v, uint32_t, uint32_t, uint32)  { return s;               }
+T(UINT64,   v, uint64_t, uint64_t, uint64)  { return s;               }
+T(SINT32,   v, uint32_t, int32_t,  int32)   { return upb_zzenc_32(s); }
+T(SINT64,   v, uint64_t, int64_t,  int64)   { return upb_zzenc_64(s); }
+T(FIXED32,  f, uint32_t, uint32_t, uint32)  { return s;               }
+T(FIXED64,  f, uint64_t, uint64_t, uint64)  { return s;               }
+T(SFIXED32, f, uint32_t, int32_t,  int32)   { return (uint32_t)s;     }
+T(SFIXED64, f, uint64_t, int64_t,  int64)   { return (uint64_t)s;     }
+T(BOOL,     v, uint32_t, bool,     _bool)   { return (uint32_t)s;     }
+T(ENUM,     v, uint32_t, int32_t,  int32)   { return (uint32_t)s;     }
+T(DOUBLE,   f, uint64_t, double,   _double) {
+  upb_value v;
+  v._double = s;
+  return v.uint64;
+}
+T(FLOAT,    f, uint32_t, float,    _float)  {
+  upb_value v;
+  v._float = s;
+  return v.uint32;
+}
+#undef VTOWV
+#undef PUT
+#undef T
+
+static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v)
+{
+#define CASE(t, member_name) \
+  case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name);
+  switch(ft) {
+    CASE(DOUBLE,   _double)
+    CASE(FLOAT,    _float)
+    CASE(INT32,    int32)
+    CASE(INT64,    int64)
+    CASE(UINT32,   uint32)
+    CASE(UINT64,   uint64)
+    CASE(SINT32,   int32)
+    CASE(SINT64,   int64)
+    CASE(FIXED32,  uint32)
+    CASE(FIXED64,  uint64)
+    CASE(SFIXED32, int32)
+    CASE(SFIXED64, int64)
+    CASE(BOOL,     _bool)
+    CASE(ENUM,     int32)
+    default: assert(false); return buf;
+  }
+#undef CASE
+}
+
+static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v)
+{
+#define CASE(t, member_name) \
+  case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name);
+  switch(ft) {
+    CASE(DOUBLE,   _double)
+    CASE(FLOAT,    _float)
+    CASE(INT32,    int32)
+    CASE(INT64,    int64)
+    CASE(UINT32,   uint32)
+    CASE(UINT64,   uint64)
+    CASE(SINT32,   int32)
+    CASE(SINT64,   int64)
+    CASE(FIXED32,  uint32)
+    CASE(FIXED64,  uint64)
+    CASE(SFIXED32, int32)
+    CASE(SFIXED64, int64)
+    CASE(BOOL,     _bool)
+    CASE(ENUM,     int32)
+    default: assert(false); return 0;
+  }
+#undef CASE
+}
+
+static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num,
+                             upb_wire_type_t wt)
+{
+  return upb_put_UINT32(buf, wt | (num << 3));
+}
+
+static uint32_t _upb_get_tag_size(upb_field_number_t num)
+{
+  return upb_get_UINT32_size(num << 3);
+}
+
+
+/* upb_sizebuilder ************************************************************/
+
+struct upb_sizebuilder {
+  // Accumulating size for the current level.
+  uint32_t size;
+
+  // Stack of sizes for our current nesting.
+  uint32_t stack[UPB_MAX_NESTING], *top;
+
+  // Vector of sizes.
+  uint32_t *sizes;
+  int sizes_len;
+  int sizes_size;
+
+  upb_status status;
+};
+
+// upb_sink callbacks.
+static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f,
+                                                upb_value val,
+                                                upb_status *status)
+{
+  (void)status;
+  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+  uint32_t size = 0;
+  size += _upb_get_tag_size(f->number);
+  size += _upb_get_value_size(f->type, val);
+  sb->size += size;
+  return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f,
+                                              upb_strptr str,
+                                              int32_t start, uint32_t end,
+                                              upb_status *status)
+{
+  (void)status;
+  (void)str;   // String data itself is not used.
+  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+  if(start >= 0) {
+    uint32_t size = 0;
+    size += _upb_get_tag_size(f->number);
+    size += upb_get_UINT32_size(end - start);
+    sb->size += size;
+  }
+  return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f,
+                                                upb_status *status)
+{
+  (void)status;
+  (void)f;  // Unused (we calculate tag size and delimiter in endcb).
+  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+  if(f->type == UPB_TYPE(MESSAGE)) {
+    *sb->top = sb->size;
+    sb->top++;
+    sb->size = 0;
+  } else {
+    assert(f->type == UPB_TYPE(GROUP));
+    sb->size += _upb_get_tag_size(f->number);
+  }
+  return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f,
+                                              upb_status *status)
+{
+  (void)status;
+  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+  if(f->type == UPB_TYPE(MESSAGE)) {
+    sb->top--;
+    if(sb->sizes_len == sb->sizes_size) {
+      sb->sizes_size *= 2;
+      sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes));
+    }
+    uint32_t child_size = sb->size;
+    uint32_t parent_size = *sb->top;
+    sb->sizes[sb->sizes_len++] = child_size;
+    // The size according to the parent includes the tag size and delimiter of
+    // the submessage.
+    parent_size += upb_get_UINT32_size(child_size);
+    parent_size += _upb_get_tag_size(f->number);
+    // Include size accumulated in parent before child began.
+    sb->size = child_size + parent_size;
+  } else {
+    assert(f->type == UPB_TYPE(GROUP));
+    // As an optimization, we could just add this number twice in startcb, to
+    // avoid having to recalculate it.
+    sb->size += _upb_get_tag_size(f->number);
+  }
+  return UPB_SINK_CONTINUE;
+}
+
+upb_sink_callbacks _upb_sizebuilder_sink_vtbl = {
+  _upb_sizebuilder_valuecb,
+  _upb_sizebuilder_strcb,
+  _upb_sizebuilder_startcb,
+  _upb_sizebuilder_endcb
+};
+
+
+/* upb_sink callbacks *********************************************************/
+
+struct upb_encoder {
+  upb_sink base;
+  //upb_bytesink *bytesink;
+  uint32_t *sizes;
+  int size_offset;
+};
+
+
+// Within one callback we may need to encode up to two separate values.
+#define UPB_ENCODER_BUFSIZE (UPB_MAX_ENCODED_SIZE * 2)
+
+static upb_sink_status _upb_encoder_push_buf(upb_encoder *s, const uint8_t *buf,
+                                             size_t len, upb_status *status)
+{
+  // TODO: conjure a upb_strptr that points to buf.
+  //upb_strptr ptr;
+  (void)s;
+  (void)buf;
+  (void)status;
+  size_t written = 5;// = upb_bytesink_onbytes(s->bytesink, ptr);
+  if(written < len) {
+    // TODO: mark to skip "written" bytes next time.
+    return UPB_SINK_STOP;
+  } else {
+    return UPB_SINK_CONTINUE;
+  }
+}
+
+static upb_sink_status _upb_encoder_valuecb(upb_sink *sink, upb_fielddef *f,
+                                            upb_value val, upb_status *status)
+{
+  upb_encoder *s = (upb_encoder*)sink;
+  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+  upb_wire_type_t wt = upb_types[f->type].expected_wire_type;
+  // TODO: handle packed encoding.
+  ptr = _upb_put_tag(ptr, f->number, wt);
+  ptr = upb_encode_value(ptr, f->type, val);
+  return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+}
+
+static upb_sink_status _upb_encoder_strcb(upb_sink *sink, upb_fielddef *f,
+                                          upb_strptr str,
+                                          int32_t start, uint32_t end,
+                                          upb_status *status)
+{
+  upb_encoder *s = (upb_encoder*)sink;
+  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+  if(start >= 0) {
+    ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED);
+    ptr = upb_put_UINT32(ptr, end - start);
+  }
+  // TODO: properly handle partially consumed strings and partially supplied
+  // strings.
+  _upb_encoder_push_buf(s, buf, ptr - buf, status);
+  return _upb_encoder_push_buf(s, (uint8_t*)upb_string_getrobuf(str), end - start, status);
+}
+
+static upb_sink_status _upb_encoder_startcb(upb_sink *sink, upb_fielddef *f,
+                                            upb_status *status)
+{
+  upb_encoder *s = (upb_encoder*)sink;
+  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+  if(f->type == UPB_TYPE(GROUP)) {
+    ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_START_GROUP);
+  } else {
+    ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED);
+    ptr = upb_put_UINT32(ptr, s->sizes[--s->size_offset]);
+  }
+  return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+}
+
+static upb_sink_status _upb_encoder_endcb(upb_sink *sink, upb_fielddef *f,
+                                          upb_status *status)
+{
+  upb_encoder *s = (upb_encoder*)sink;
+  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+  if(f->type != UPB_TYPE(GROUP)) return UPB_SINK_CONTINUE;
+  ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_END_GROUP);
+  return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+}
+
+upb_sink_callbacks _upb_encoder_sink_vtbl = {
+  _upb_encoder_valuecb,
+  _upb_encoder_strcb,
+  _upb_encoder_startcb,
+  _upb_encoder_endcb
+};
+
diff --git a/upb/pb/encoder.h b/upb/pb/encoder.h
new file mode 100644
index 0000000..64c5047
--- /dev/null
+++ b/upb/pb/encoder.h
@@ -0,0 +1,58 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009-2010 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Implements a set of upb_handlers that write protobuf data to the binary wire
+ * format.
+ *
+ * For messages that have any submessages, the encoder needs a buffer
+ * containing the submessage sizes, so they can be properly written at the
+ * front of each message.  Note that groups do *not* have this requirement.
+ */
+
+#ifndef UPB_ENCODER_H_
+#define UPB_ENCODER_H_
+
+#include "upb.h"
+#include "upb_stream.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* upb_encoder ****************************************************************/
+
+// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol
+// buffer binary wire format.
+struct upb_encoder;
+typedef struct upb_encoder upb_encoder;
+
+upb_encoder *upb_encoder_new(upb_msgdef *md);
+void upb_encoder_free(upb_encoder *e);
+
+// Resets the given upb_encoder such that is is ready to begin encoding,
+// outputting data to "bytesink" (which must live until the encoder is
+// reset or destroyed).
+void upb_encoder_reset(upb_encoder *e, upb_bytesink *bytesink);
+
+// Returns the upb_sink to which data can be written.  The sink is invalidated
+// when the encoder is reset or destroyed.  Note that if the client wants to
+// encode any length-delimited submessages it must first call
+// upb_encoder_buildsizes() below.
+upb_sink *upb_encoder_sink(upb_encoder *e);
+
+// Call prior to pushing any data with embedded submessages.  "src" must yield
+// exactly the same data as what will next be encoded, but in reverse order.
+// The encoder iterates over this data in order to determine the sizes of the
+// submessages.  If any errors are returned by the upb_src, the status will
+// be saved in *status.  If the client is sure that the upb_src will not throw
+// any errors, "status" may be NULL.
+void upb_encoder_buildsizes(upb_encoder *e, upb_src *src, upb_status *status);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_ENCODER_H_ */
diff --git a/upb/pb/glue.c b/upb/pb/glue.c
new file mode 100644
index 0000000..3763ae0
--- /dev/null
+++ b/upb/pb/glue.c
@@ -0,0 +1,129 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2010 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include "upb/bytestream.h"
+#include "upb/descriptor.h"
+#include "upb/msg.h"
+#include "upb/pb/decoder.h"
+#include "upb/pb/glue.h"
+#include "upb/pb/textprinter.h"
+
+void upb_strtomsg(const char *str, size_t len, void *msg, upb_msgdef *md,
+                  upb_status *status) {
+  upb_stringsrc strsrc;
+  upb_stringsrc_init(&strsrc);
+  upb_stringsrc_reset(&strsrc, str, len);
+
+  upb_decoder d;
+  upb_decoder_initformsgdef(&d, md);
+  upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, msg);
+  upb_decoder_decode(&d, status);
+
+  upb_stringsrc_uninit(&strsrc);
+  upb_decoder_uninit(&d);
+}
+
+#if 0
+void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
+                   bool single_line) {
+  upb_stringsink strsink;
+  upb_stringsink_init(&strsink);
+  upb_stringsink_reset(&strsink, str);
+
+  upb_textprinter *p = upb_textprinter_new();
+  upb_handlers *h = upb_handlers_new();
+  upb_textprinter_reghandlers(h, md);
+  upb_textprinter_reset(p, upb_stringsink_bytesink(&strsink), single_line);
+
+  upb_status status = UPB_STATUS_INIT;
+  upb_msg_runhandlers(msg, md, h, p, &status);
+  // None of {upb_msg_runhandlers, upb_textprinter, upb_stringsink} should be
+  // capable of returning an error.
+  assert(upb_ok(&status));
+  upb_status_uninit(&status);
+
+  upb_stringsink_uninit(&strsink);
+  upb_textprinter_free(p);
+  upb_handlers_unref(h);
+}
+#endif
+
+// TODO: read->load.
+void upb_read_descriptor(upb_symtab *symtab, const char *str, size_t len,
+                         upb_status *status) {
+  upb_stringsrc strsrc;
+  upb_stringsrc_init(&strsrc);
+  upb_stringsrc_reset(&strsrc, str, len);
+
+  upb_handlers *h = upb_handlers_new();
+  upb_descreader_reghandlers(h);
+
+  upb_decoder d;
+  upb_decoder_initforhandlers(&d, h);
+  upb_handlers_unref(h);
+  upb_descreader r;
+  upb_symtabtxn txn;
+  upb_symtabtxn_init(&txn);
+  upb_descreader_init(&r, &txn);
+  upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, &r);
+
+  upb_decoder_decode(&d, status);
+
+  // Set default accessors and layouts on all messages.
+  // for msgdef in symtabtxn:
+  upb_symtabtxn_iter i;
+  upb_symtabtxn_begin(&i, &txn);
+  for(; !upb_symtabtxn_done(&i); upb_symtabtxn_next(&i)) {
+    upb_def *def = upb_symtabtxn_iter_def(&i);
+    upb_msgdef *md = upb_dyncast_msgdef(def);
+    if (!md) return;
+    // For field in msgdef:
+    upb_msg_iter i;
+    for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
+      upb_fielddef *f = upb_msg_iter_field(i);
+      upb_fielddef_setaccessor(f, upb_stdmsg_accessor(f));
+    }
+    upb_msgdef_layout(md);
+  }
+
+  if (upb_ok(status)) upb_symtab_commit(symtab, &txn, status);
+
+  upb_symtabtxn_uninit(&txn);
+  upb_descreader_uninit(&r);
+  upb_stringsrc_uninit(&strsrc);
+  upb_decoder_uninit(&d);
+}
+
+char *upb_readfile(const char *filename, size_t *len) {
+  FILE *f = fopen(filename, "rb");
+  if(!f) return NULL;
+  if(fseek(f, 0, SEEK_END) != 0) goto error;
+  long size = ftell(f);
+  if(size < 0) goto error;
+  if(fseek(f, 0, SEEK_SET) != 0) goto error;
+  char *buf = malloc(size);
+  if(fread(buf, size, 1, f) != 1) goto error;
+  fclose(f);
+  if (len) *len = size;
+  return buf;
+
+error:
+  fclose(f);
+  return NULL;
+}
+
+void upb_read_descriptorfile(upb_symtab *symtab, const char *fname,
+                             upb_status *status) {
+  size_t len;
+  char *data = upb_readfile(fname, &len);
+  if (!data) {
+    upb_status_setf(status, UPB_ERROR, "Couldn't read file: %s", fname);
+    return;
+  }
+  upb_read_descriptor(symtab, data, len, status);
+  free(data);
+}
diff --git a/upb/pb/glue.h b/upb/pb/glue.h
new file mode 100644
index 0000000..5359120
--- /dev/null
+++ b/upb/pb/glue.h
@@ -0,0 +1,62 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * upb's core components like upb_decoder and upb_msg are carefully designed to
+ * avoid depending on each other for maximum orthogonality.  In other words,
+ * you can use a upb_decoder to decode into *any* kind of structure; upb_msg is
+ * just one such structure.  A upb_msg can be serialized/deserialized into any
+ * format, protobuf binary format is just one such format.
+ *
+ * However, for convenience we provide functions here for doing common
+ * operations like deserializing protobuf binary format into a upb_msg.  The
+ * compromise is that this file drags in almost all of upb as a dependency,
+ * which could be undesirable if you're trying to use a trimmed-down build of
+ * upb.
+ *
+ * While these routines are convenient, they do not reuse any encoding/decoding
+ * state.  For example, if a decoder is JIT-based, it will be re-JITted every
+ * time these functions are called.  For this reason, if you are parsing lots
+ * of data and efficiency is an issue, these may not be the best functions to
+ * use (though they are useful for prototyping, before optimizing).
+ */
+
+#ifndef UPB_GLUE_H
+#define UPB_GLUE_H
+
+#include <stdbool.h>
+#include "upb/upb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Forward-declares so we don't have to include everything in this .h file.
+// Clients should use the regular, typedef'd names (eg. upb_string).
+struct _upb_msg;
+struct _upb_msgdef;
+struct _upb_symtab;
+
+// Decodes the given string, which must be in protobuf binary format, to the
+// given upb_msg with msgdef "md", storing the status of the operation in "s".
+void upb_strtomsg(const char *str, size_t len, void *msg,
+                  struct _upb_msgdef *md, upb_status *s);
+
+//void upb_msgtotext(struct _upb_string *str, void *msg,
+//                   struct _upb_msgdef *md, bool single_line);
+
+void upb_read_descriptor(struct _upb_symtab *symtab, const char *str, size_t len,
+                         upb_status *status);
+
+void upb_read_descriptorfile(struct _upb_symtab *symtab, const char *fname,
+                             upb_status *status);
+
+char *upb_readfile(const char *filename, size_t *len);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif
diff --git a/upb/pb/jit_debug_elf_file.s b/upb/pb/jit_debug_elf_file.s
new file mode 100644
index 0000000..0b74630
--- /dev/null
+++ b/upb/pb/jit_debug_elf_file.s
@@ -0,0 +1,7 @@
+   .file "JIT mcode"
+   .text
+upb_jit_compiled_decoder:
+   .globl upb_jit_compiled_decoder
+   .size upb_jit_compiled_decoder, 0x321
+   .type upb_jit_compiled_decoder STT_FUNC
+   .space 0x321
diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c
new file mode 100644
index 0000000..ce029d5
--- /dev/null
+++ b/upb/pb/textprinter.c
@@ -0,0 +1,199 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <ctype.h>
+#include <float.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include "upb/pb/textprinter.h"
+
+struct _upb_textprinter {
+  upb_bytesink *bytesink;
+  int indent_depth;
+  bool single_line;
+  upb_status status;
+};
+
+#define CHECK(x) if ((x) < 0) goto err;
+
+static int upb_textprinter_putescaped(upb_textprinter *p, upb_strref *strref,
+                                      bool preserve_utf8) {
+  // Based on CEscapeInternal() from Google's protobuf release.
+  // TODO; we could read directly fraom a bytesrc's buffer instead.
+  // TODO; we could write directly into a bytesink's buffer instead.
+  char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
+  char buf[strref->len], *src = buf;
+  char *end = src + strref->len;
+  upb_bytesrc_read(strref->bytesrc, strref->stream_offset, strref->len, buf);
+
+  // I think hex is prettier and more useful, but proto2 uses octal; should
+  // investigate whether it can parse hex also.
+  bool use_hex = false;
+  bool last_hex_escape = false; // true if last output char was \xNN
+
+  for (; src < end; src++) {
+    if (dstend - dst < 4) {
+      CHECK(upb_bytesink_write(p->bytesink, dstbuf, dst - dstbuf, &p->status));
+      dst = dstbuf;
+    }
+
+    bool is_hex_escape = false;
+    switch (*src) {
+      case '\n': *(dst++) = '\\'; *(dst++) = 'n';  break;
+      case '\r': *(dst++) = '\\'; *(dst++) = 'r';  break;
+      case '\t': *(dst++) = '\\'; *(dst++) = 't';  break;
+      case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
+      case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
+      case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
+      default:
+        // Note that if we emit \xNN and the src character after that is a hex
+        // digit then that digit must be escaped too to prevent it being
+        // interpreted as part of the character code by C.
+        if ((!preserve_utf8 || (uint8_t)*src < 0x80) &&
+            (!isprint(*src) || (last_hex_escape && isxdigit(*src)))) {
+          sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*src);
+          is_hex_escape = use_hex;
+          dst += 4;
+        } else {
+          *(dst++) = *src; break;
+        }
+    }
+    last_hex_escape = is_hex_escape;
+  }
+  // Flush remaining data.
+  CHECK(upb_bytesink_write(p->bytesink, dst, dst - dstbuf, &p->status));
+  return 0;
+err:
+  return -1;
+}
+
+static int upb_textprinter_indent(upb_textprinter *p) {
+  if(!p->single_line)
+    for(int i = 0; i < p->indent_depth; i++)
+      CHECK(upb_bytesink_writestr(p->bytesink, "  ", &p->status));
+  return 0;
+err:
+  return -1;
+}
+
+static int upb_textprinter_endfield(upb_textprinter *p) {
+  if(p->single_line) {
+    CHECK(upb_bytesink_writestr(p->bytesink, " ", &p->status));
+  } else {
+    CHECK(upb_bytesink_writestr(p->bytesink, "\n", &p->status));
+  }
+  return 0;
+err:
+  return -1;
+}
+
+static upb_flow_t upb_textprinter_value(void *_p, upb_value fval,
+                                        upb_value val) {
+  upb_textprinter *p = _p;
+  upb_fielddef *f = upb_value_getfielddef(fval);
+  upb_textprinter_indent(p);
+  CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%s: ", f->name));
+#define CASE(fmtstr, member) \
+    CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break;
+  switch(f->type) {
+    // TODO: figure out what we should really be doing for these
+    // floating-point formats.
+    case UPB_TYPE(DOUBLE):
+      CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%.*g", DBL_DIG, upb_value_getdouble(val))); break;
+    case UPB_TYPE(FLOAT):
+      CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%.*g", FLT_DIG+2, upb_value_getfloat(val))); break;
+    case UPB_TYPE(INT64):
+    case UPB_TYPE(SFIXED64):
+    case UPB_TYPE(SINT64):
+      CASE("%" PRId64, int64)
+    case UPB_TYPE(UINT64):
+    case UPB_TYPE(FIXED64):
+      CASE("%" PRIu64, uint64)
+    case UPB_TYPE(UINT32):
+    case UPB_TYPE(FIXED32):
+      CASE("%" PRIu32, uint32);
+    case UPB_TYPE(ENUM): {
+      upb_enumdef *enum_def = upb_downcast_enumdef(f->def);
+      const char *label = upb_enumdef_iton(enum_def, upb_value_getint32(val));
+      if (label) {
+        // We found a corresponding string for this enum.  Otherwise we fall
+        // through to the int32 code path.
+        CHECK(upb_bytesink_writestr(p->bytesink, label, &p->status));
+        break;
+      }
+    }
+    case UPB_TYPE(INT32):
+    case UPB_TYPE(SFIXED32):
+    case UPB_TYPE(SINT32):
+      CASE("%" PRId32, int32)
+    case UPB_TYPE(BOOL):
+      CASE("%hhu", bool);
+    case UPB_TYPE(STRING):
+    case UPB_TYPE(BYTES): {
+      CHECK(upb_bytesink_writestr(p->bytesink, "\"", &p->status));
+      CHECK(upb_textprinter_putescaped(p, upb_value_getstrref(val),
+                                       f->type == UPB_TYPE(STRING)));
+      CHECK(upb_bytesink_writestr(p->bytesink, "\"", &p->status));
+      break;
+    }
+  }
+  upb_textprinter_endfield(p);
+  return UPB_CONTINUE;
+err:
+  return UPB_BREAK;
+}
+
+static upb_sflow_t upb_textprinter_startsubmsg(void *_p, upb_value fval) {
+  upb_textprinter *p = _p;
+  upb_fielddef *f = upb_value_getfielddef(fval);
+  upb_textprinter_indent(p);
+  bool ret = upb_bytesink_printf(p->bytesink, &p->status, "%s {", f->name);
+  if (!ret) return UPB_SBREAK;
+  if (!p->single_line)
+    upb_bytesink_writestr(p->bytesink, "\n", &p->status);
+  p->indent_depth++;
+  return UPB_CONTINUE_WITH(_p);
+}
+
+static upb_flow_t upb_textprinter_endsubmsg(void *_p, upb_value fval) {
+  (void)fval;
+  upb_textprinter *p = _p;
+  p->indent_depth--;
+  upb_textprinter_indent(p);
+  upb_bytesink_writestr(p->bytesink, "}", &p->status);
+  upb_textprinter_endfield(p);
+  return UPB_CONTINUE;
+}
+
+upb_textprinter *upb_textprinter_new() {
+  upb_textprinter *p = malloc(sizeof(*p));
+  return p;
+}
+
+void upb_textprinter_free(upb_textprinter *p) {
+  free(p);
+}
+
+void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink,
+                           bool single_line) {
+  p->bytesink = sink;
+  p->single_line = single_line;
+  p->indent_depth = 0;
+}
+
+upb_mhandlers *upb_textprinter_reghandlers(upb_handlers *h, upb_msgdef *m) {
+  upb_handlerset hset = {
+    NULL,  // startmsg
+    NULL,  // endmsg
+    upb_textprinter_value,
+    upb_textprinter_startsubmsg,
+    upb_textprinter_endsubmsg,
+    NULL,  // startseq
+    NULL,  // endseq
+  };
+  return upb_handlers_reghandlerset(h, m, &hset);
+}
diff --git a/upb/pb/textprinter.h b/upb/pb/textprinter.h
new file mode 100644
index 0000000..9455208
--- /dev/null
+++ b/upb/pb/textprinter.h
@@ -0,0 +1,31 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#ifndef UPB_TEXT_H_
+#define UPB_TEXT_H_
+
+#include "upb/bytestream.h"
+#include "upb/handlers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct _upb_textprinter;
+typedef struct _upb_textprinter upb_textprinter;
+
+upb_textprinter *upb_textprinter_new();
+void upb_textprinter_free(upb_textprinter *p);
+void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink,
+                           bool single_line);
+upb_mhandlers *upb_textprinter_reghandlers(upb_handlers *h, upb_msgdef *m);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_TEXT_H_ */
diff --git a/upb/pb/varint.c b/upb/pb/varint.c
new file mode 100644
index 0000000..45caec1
--- /dev/null
+++ b/upb/pb/varint.c
@@ -0,0 +1,54 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include "upb/pb/varint.h"
+
+// Given an encoded varint v, returns an integer with a single bit set that
+// indicates the end of the varint.  Subtracting one from this value will
+// yield a mask that leaves only bits that are part of the varint.  Returns
+// 0 if the varint is unterminated.
+INLINE uint64_t upb_get_vstopbit(uint64_t v) {
+  uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
+  return ~cbits & (cbits+1);
+}
+INLINE uint64_t upb_get_vmask(uint64_t v) { return upb_get_vstopbit(v) - 1; }
+
+upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
+  uint64_t b;
+  memcpy(&b, r.p, sizeof(b));
+  uint64_t stop_bit = upb_get_vstopbit(b);
+  b =  (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
+  b +=       b & 0x007f007f007f007fULL;
+  b +=  3 * (b & 0x0000ffff0000ffffULL);
+  b += 15 * (b & 0x00000000ffffffffULL);
+  if (stop_bit == 0) {
+    // Error: unterminated varint.
+    upb_decoderet err_r = {(void*)0, 0};
+    return err_r;
+  }
+  upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
+                        r.val | (b << 7)};
+  return my_r;
+}
+
+upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
+  uint64_t b;
+  memcpy(&b, r.p, sizeof(b));
+  uint64_t stop_bit = upb_get_vstopbit(b);
+  b &= (stop_bit - 1);
+  b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f);
+  b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff);
+  b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff);
+  if (stop_bit == 0) {
+    // Error: unterminated varint.
+    upb_decoderet err_r = {(void*)0, 0};
+    return err_r;
+  }
+  upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
+                        r.val | (b << 14)};
+  return my_r;
+}
diff --git a/upb/pb/varint.h b/upb/pb/varint.h
new file mode 100644
index 0000000..1bbd193
--- /dev/null
+++ b/upb/pb/varint.h
@@ -0,0 +1,142 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * A number of routines for varint manipulation (we keep them all around to
+ * have multiple approaches available for benchmarking).
+ */
+
+#ifndef UPB_VARINT_DECODER_H_
+#define UPB_VARINT_DECODER_H_
+
+#include <stdint.h>
+#include <string.h>
+#include "upb/upb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Decoding *******************************************************************/
+
+// All decoding functions return this struct by value.
+typedef struct {
+  const char *p;  // NULL if the varint was unterminated.
+  uint64_t val;
+} upb_decoderet;
+
+// A basic branch-based decoder, uses 32-bit values to get good performance
+// on 32-bit architectures (but performs well on 64-bits also).
+INLINE upb_decoderet upb_vdecode_branch32(const char *p) {
+  upb_decoderet r = {NULL, 0};
+  uint32_t low, high = 0;
+  uint32_t b;
+  b = *(p++); low   = (b & 0x7f)      ; if(!(b & 0x80)) goto done;
+  b = *(p++); low  |= (b & 0x7f) <<  7; if(!(b & 0x80)) goto done;
+  b = *(p++); low  |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
+  b = *(p++); low  |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
+  b = *(p++); low  |= (b & 0x7f) << 28;
+              high  = (b & 0x7f) >>  4; if(!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7f) <<  3; if(!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done;
+  return r;
+
+done:
+  r.val = ((uint64_t)high << 32) | low;
+  r.p = p;
+  return r;
+}
+
+// Like the previous, but uses 64-bit values.
+INLINE upb_decoderet upb_vdecode_branch64(const char *p) {
+  uint64_t val;
+  uint64_t b;
+  upb_decoderet r = {(void*)0, 0};
+  b = *(p++); val  = (b & 0x7f)      ; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) <<  7; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) << 28; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) << 35; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) << 42; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) << 49; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) << 56; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) << 63; if(!(b & 0x80)) goto done;
+  return r;
+
+done:
+  r.val = val;
+  r.p = p;
+  return r;
+}
+
+// Decodes a varint of at most 8 bytes without branching (except for error).
+upb_decoderet upb_vdecode_max8_wright(upb_decoderet r);
+
+// Another implementation of the previous.
+upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);
+
+// Template for a function that checks the first two bytes with branching
+// and dispatches 2-10 bytes with a separate function.
+#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function)                \
+INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) {           \
+  uint8_t *p = (uint8_t*)_p;                                                 \
+  if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7f}; return r; } \
+  upb_decoderet r = {_p + 2, (*p & 0x7f) | ((*(p + 1) & 0x7f) << 7)};        \
+  if ((*(p + 1) & 0x80) == 0) return r;                                      \
+  return decode_max8_function(r);                                            \
+}
+
+UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright);
+UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino);
+#undef UPB_VARINT_DECODER_CHECK2
+
+// Our canonical functions for decoding varints, based on the currently
+// favored best-performing implementations.
+INLINE upb_decoderet upb_vdecode_fast(const char *p) {
+  // Use nobranch2 on 64-bit, branch32 on 32-bit.
+  if (sizeof(long) == 8)
+    return upb_vdecode_check2_massimino(p);
+  else
+    return upb_vdecode_branch32(p);
+}
+
+INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
+  return upb_vdecode_max8_massimino(r);
+}
+
+
+/* Encoding *******************************************************************/
+
+INLINE size_t upb_value_size(uint64_t val) {
+#ifdef __GNUC__
+  int high_bit = 63 - __builtin_clzll(val);  // 0-based, undef if val == 0.
+#else
+  int high_bit = 0;
+  uint64_t tmp = val;
+  while(tmp >>= 1) high_bit++;
+#endif
+  return val == 0 ? 1 : high_bit / 8 + 1;
+}
+
+// Encodes a 32-bit varint, *not* sign-extended.
+INLINE uint64_t upb_vencode32(uint32_t val) {
+  uint64_t ret = 0;
+  for (int bitpos = 0; val; bitpos+=8, val >>=7) {
+    if (bitpos > 0) ret |= (1 << (bitpos-1));
+    ret |= (val & 0x7f) << bitpos;
+  }
+  return ret;
+}
+
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_VARINT_DECODER_H_ */
diff --git a/upb/table.c b/upb/table.c
new file mode 100644
index 0000000..71aca16
--- /dev/null
+++ b/upb/table.c
@@ -0,0 +1,574 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * There are a few printf's strewn throughout this file, uncommenting them
+ * can be useful for debugging.
+ */
+
+#include "upb/table.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+static const double MAX_LOAD = 0.85;
+
+// The minimum percentage of an array part that we will allow.  This is a
+// speed/memory-usage tradeoff (though it's not straightforward because of
+// cache effects).  The lower this is, the more memory we'll use.
+static const double MIN_DENSITY = 0.1;
+
+static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
+
+/* Base table (shared code) ***************************************************/
+
+static uint32_t upb_table_size(upb_table *t) { return 1 << t->size_lg2; }
+static size_t upb_table_entrysize(upb_table *t) { return t->entry_size; }
+static size_t upb_table_valuesize(upb_table *t) { return t->value_size; }
+
+void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) {
+  t->count = 0;
+  t->entry_size = entry_size;
+  t->size_lg2 = 1;
+  while(upb_table_size(t) < size) t->size_lg2++;
+  size_t bytes = upb_table_size(t) * t->entry_size;
+  t->mask = upb_table_size(t) - 1;
+  t->entries = malloc(bytes);
+}
+
+void upb_table_free(upb_table *t) { free(t->entries); }
+
+/* upb_inttable ***************************************************************/
+
+static upb_inttable_entry *intent(upb_inttable *t, int32_t i) {
+  //printf("looking up int entry %d, size of entry: %d\n", i, t->t.entry_size);
+  return UPB_INDEX(t->t.entries, i, t->t.entry_size);
+}
+
+static uint32_t upb_inttable_hashtablesize(upb_inttable *t) {
+  return upb_table_size(&t->t);
+}
+
+void upb_inttable_sizedinit(upb_inttable *t, uint32_t arrsize, uint32_t hashsize,
+                            uint16_t value_size) {
+  size_t entsize = _upb_inttable_entrysize(value_size);
+  upb_table_init(&t->t, hashsize, entsize);
+  for (uint32_t i = 0; i < upb_table_size(&t->t); i++) {
+    upb_inttable_entry *e = intent(t, i);
+    e->hdr.key = 0;
+    e->hdr.next = UPB_END_OF_CHAIN;
+    e->val.has_entry = 0;
+  }
+  t->t.value_size = value_size;
+  // Always make the array part at least 1 long, so that we know key 0
+  // won't be in the hash part (which lets us speed up that code path).
+  t->array_size = UPB_MAX(1, arrsize);
+  t->array = malloc(upb_table_valuesize(&t->t) * t->array_size);
+  t->array_count = 0;
+  for (uint32_t i = 0; i < t->array_size; i++) {
+    upb_inttable_value *val = UPB_INDEX(t->array, i, upb_table_valuesize(&t->t));
+    val->has_entry = false;
+  }
+}
+
+void upb_inttable_init(upb_inttable *t, uint32_t hashsize, uint16_t value_size) {
+  upb_inttable_sizedinit(t, 0, hashsize, value_size);
+}
+
+void upb_inttable_free(upb_inttable *t) {
+  upb_table_free(&t->t);
+  free(t->array);
+}
+
+static uint32_t empty_intbucket(upb_inttable *table)
+{
+  // TODO: does it matter that this is biased towards the front of the table?
+  for(uint32_t i = 0; i < upb_inttable_hashtablesize(table); i++) {
+    upb_inttable_entry *e = intent(table, i);
+    if(!e->val.has_entry) return i;
+  }
+  assert(false);
+  return 0;
+}
+
+// The insert routines have a lot more code duplication between int/string
+// variants than I would like, but there's just a bit too much that varies to
+// parameterize them.
+static void intinsert(upb_inttable *t, uint32_t key, const void *val) {
+  assert(upb_inttable_lookup(t, key) == NULL);
+  upb_inttable_value *table_val;
+  if (_upb_inttable_isarrkey(t, key)) {
+    table_val = UPB_INDEX(t->array, key, upb_table_valuesize(&t->t));
+    t->array_count++;
+    //printf("Inserting key %d to Array part! %p\n", key, table_val);
+  } else {
+    t->t.count++;
+    uint32_t bucket = _upb_inttable_bucket(t, key);
+    upb_inttable_entry *table_e = intent(t, bucket);
+    //printf("Hash part!  Inserting into bucket %d?\n", bucket);
+    if(table_e->val.has_entry) {  /* Collision. */
+      //printf("Collision!\n");
+      if(bucket == _upb_inttable_bucket(t, table_e->hdr.key)) {
+        /* Existing element is in its main posisiton.  Find an empty slot to
+         * place our new element and append it to this key's chain. */
+        uint32_t empty_bucket = empty_intbucket(t);
+        while (table_e->hdr.next != UPB_END_OF_CHAIN)
+          table_e = intent(t, table_e->hdr.next);
+        table_e->hdr.next = empty_bucket;
+        table_e = intent(t, empty_bucket);
+      } else {
+        /* Existing element is not in its main position.  Move it to an empty
+         * slot and put our element in its main position. */
+        uint32_t empty_bucket = empty_intbucket(t);
+        uint32_t evictee_bucket = _upb_inttable_bucket(t, table_e->hdr.key);
+        memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
+        upb_inttable_entry *evictee_e = intent(t, evictee_bucket);
+        while(1) {
+          assert(evictee_e->val.has_entry);
+          assert(evictee_e->hdr.next != UPB_END_OF_CHAIN);
+          if(evictee_e->hdr.next == bucket) {
+            evictee_e->hdr.next = empty_bucket;
+            break;
+          }
+          evictee_e = intent(t, evictee_e->hdr.next);
+        }
+        /* table_e remains set to our mainpos. */
+      }
+    }
+    //printf("Inserting!  to:%p, copying to: %p\n", table_e, &table_e->val);
+    table_val = &table_e->val;
+    table_e->hdr.key = key;
+    table_e->hdr.next = UPB_END_OF_CHAIN;
+  }
+  memcpy(table_val, val, upb_table_valuesize(&t->t));
+  table_val->has_entry = true;
+  assert(upb_inttable_lookup(t, key) == table_val);
+}
+
+// Insert all elements from src into dest.  Caller ensures that a resize will
+// not be necessary.
+static void upb_inttable_insertall(upb_inttable *dst, upb_inttable *src) {
+  for(upb_inttable_iter i = upb_inttable_begin(src); !upb_inttable_done(i);
+      i = upb_inttable_next(src, i)) {
+    //printf("load check: %d %d\n", upb_table_count(&dst->t), upb_inttable_hashtablesize(dst));
+    assert((double)(upb_table_count(&dst->t)) /
+                    upb_inttable_hashtablesize(dst) <= MAX_LOAD);
+    intinsert(dst, upb_inttable_iter_key(i), upb_inttable_iter_value(i));
+  }
+}
+
+void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val) {
+  if((double)(t->t.count + 1) / upb_inttable_hashtablesize(t) > MAX_LOAD) {
+    //printf("RESIZE!\n");
+    // Need to resize.  Allocate new table with double the size of however many
+    // elements we have now, add old elements to it.  We create the new hash
+    // table without an array part, even if the old table had an array part.
+    // If/when the user calls upb_inttable_compact() again, we'll create an
+    // array part then.
+    upb_inttable new_table;
+    //printf("Old table count=%d, size=%d\n", upb_inttable_count(t), upb_inttable_hashtablesize(t));
+    upb_inttable_init(&new_table, upb_inttable_count(t)*2, upb_table_valuesize(&t->t));
+    upb_inttable_insertall(&new_table, t);
+    upb_inttable_free(t);
+    *t = new_table;
+  }
+  intinsert(t, key, val);
+}
+
+void upb_inttable_compact(upb_inttable *t) {
+  // Find the largest array part we can that satisfies the MIN_DENSITY
+  // definition.  For now we just count down powers of two.
+  uint32_t largest_key = 0;
+  for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
+      i = upb_inttable_next(t, i)) {
+    largest_key = UPB_MAX(largest_key, upb_inttable_iter_key(i));
+  }
+  int lg2_array = 0;
+  while ((1UL << lg2_array) < largest_key) ++lg2_array;
+  ++lg2_array;  // Undo the first iteration.
+  size_t array_size;
+  int array_count = 0;
+  while (lg2_array > 0) {
+    array_size = (1 << --lg2_array);
+    //printf("Considering size %d (btw, our table has %d things total)\n", array_size, upb_inttable_count(t));
+    if ((double)upb_inttable_count(t) / array_size < MIN_DENSITY) {
+      // Even if 100% of the keys were in the array pary, an array of this
+      // size would not be dense enough.
+      continue;
+    }
+    array_count = 0;
+    for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
+        i = upb_inttable_next(t, i)) {
+      if (upb_inttable_iter_key(i) < array_size)
+        array_count++;
+    }
+    //printf("There would be %d things in that array\n", array_count);
+    if ((double)array_count / array_size >= MIN_DENSITY) break;
+  }
+  upb_inttable new_table;
+  int hash_size = (upb_inttable_count(t) - array_count + 1) / MAX_LOAD;
+  //printf("array_count: %d, array_size: %d, hash_size: %d, table size: %d\n", array_count, array_size, hash_size, upb_inttable_count(t));
+  upb_inttable_sizedinit(&new_table, array_size, hash_size,
+                         upb_table_valuesize(&t->t));
+  //printf("For %d things, using array size=%d, hash_size = %d\n", upb_inttable_count(t), array_size, hash_size);
+  upb_inttable_insertall(&new_table, t);
+  upb_inttable_free(t);
+  *t = new_table;
+}
+
+upb_inttable_iter upb_inttable_begin(upb_inttable *t) {
+  upb_inttable_iter iter = {-1, NULL, true};  // -1 will overflow to 0 on the first iteration.
+  return upb_inttable_next(t, iter);
+}
+
+upb_inttable_iter upb_inttable_next(upb_inttable *t, upb_inttable_iter iter) {
+  const size_t hdrsize = sizeof(upb_inttable_header);
+  const size_t entsize = upb_table_entrysize(&t->t);
+  if (iter.array_part) {
+    while (++iter.key < t->array_size) {
+      //printf("considering value %d\n", iter.key);
+      iter.value = UPB_INDEX(t->array, iter.key, t->t.value_size);
+      if (iter.value->has_entry) return iter;
+    }
+    //printf("Done with array part!\n");
+    iter.array_part = false;
+    // Point to the value of the table[-1] entry.
+    iter.value = UPB_INDEX(intent(t, -1), 1, hdrsize);
+  }
+  void *end = intent(t, upb_inttable_hashtablesize(t));
+  // Point to the entry for the value that was previously in iter.
+  upb_inttable_entry *e = UPB_INDEX(iter.value, -1, hdrsize);
+  do {
+    e = UPB_INDEX(e, 1, entsize);
+    //printf("considering value %p (val: %p)\n", e, &e->val);
+    if(e == end) {
+      //printf("No values.\n");
+      iter.value = NULL;
+      return iter;
+    }
+  } while(!e->val.has_entry);
+  //printf("USING VALUE! %p\n", e);
+  iter.key = e->hdr.key;
+  iter.value = &e->val;
+  return iter;
+}
+
+
+/* upb_strtable ***************************************************************/
+
+static upb_strtable_entry *strent(upb_strtable *t, int32_t i) {
+  //fprintf(stderr, "i: %d, table_size: %d\n", i, upb_table_size(&t->t));
+  assert(i <= (int32_t)upb_table_size(&t->t));
+  return UPB_INDEX(t->t.entries, i, t->t.entry_size);
+}
+
+static uint32_t upb_strtable_size(upb_strtable *t) {
+  return upb_table_size(&t->t);
+}
+
+void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t valuesize) {
+  t->t.value_size = valuesize;
+  size_t entsize = upb_align_up(sizeof(upb_strtable_header) + valuesize, 8);
+  upb_table_init(&t->t, size, entsize);
+  for (uint32_t i = 0; i < upb_table_size(&t->t); i++) {
+    upb_strtable_entry *e = strent(t, i);
+    e->hdr.key = NULL;
+    e->hdr.next = UPB_END_OF_CHAIN;
+  }
+}
+
+void upb_strtable_free(upb_strtable *t) {
+  // Free keys from the strtable.
+  upb_strtable_iter i;
+  for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i))
+    free((char*)upb_strtable_iter_key(&i));
+  upb_table_free(&t->t);
+}
+
+static uint32_t strtable_bucket(upb_strtable *t, const char *key) {
+  uint32_t hash = MurmurHash2(key, strlen(key), 0);
+  return (hash & t->t.mask);
+}
+
+void *upb_strtable_lookup(upb_strtable *t, const char *key) {
+  uint32_t bucket = strtable_bucket(t, key);
+  upb_strtable_entry *e;
+  do {
+    e = strent(t, bucket);
+    if(e->hdr.key && strcmp(e->hdr.key, key) == 0) return &e->val;
+  } while((bucket = e->hdr.next) != UPB_END_OF_CHAIN);
+  return NULL;
+}
+
+void *upb_strtable_lookupl(upb_strtable *t, const char *key, size_t len) {
+  // TODO: improve.
+  char key2[len+1];
+  memcpy(key2, key, len);
+  key2[len] = '\0';
+  return upb_strtable_lookup(t, key2);
+}
+
+static uint32_t empty_strbucket(upb_strtable *table) {
+  // TODO: does it matter that this is biased towards the front of the table?
+  for(uint32_t i = 0; i < upb_strtable_size(table); i++) {
+    upb_strtable_entry *e = strent(table, i);
+    if(!e->hdr.key) return i;
+  }
+  assert(false);
+  return 0;
+}
+
+static void strinsert(upb_strtable *t, const char *key, const void *val) {
+  assert(upb_strtable_lookup(t, key) == NULL);
+  t->t.count++;
+  uint32_t bucket = strtable_bucket(t, key);
+  upb_strtable_entry *table_e = strent(t, bucket);
+  if(table_e->hdr.key) {  /* Collision. */
+    if(bucket == strtable_bucket(t, table_e->hdr.key)) {
+      /* Existing element is in its main posisiton.  Find an empty slot to
+       * place our new element and append it to this key's chain. */
+      uint32_t empty_bucket = empty_strbucket(t);
+      while (table_e->hdr.next != UPB_END_OF_CHAIN)
+        table_e = strent(t, table_e->hdr.next);
+      table_e->hdr.next = empty_bucket;
+      table_e = strent(t, empty_bucket);
+    } else {
+      /* Existing element is not in its main position.  Move it to an empty
+       * slot and put our element in its main position. */
+      uint32_t empty_bucket = empty_strbucket(t);
+      uint32_t evictee_bucket = strtable_bucket(t, table_e->hdr.key);
+      memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
+      upb_strtable_entry *evictee_e = strent(t, evictee_bucket);
+      while(1) {
+        assert(evictee_e->hdr.key);
+        assert(evictee_e->hdr.next != UPB_END_OF_CHAIN);
+        if(evictee_e->hdr.next == bucket) {
+          evictee_e->hdr.next = empty_bucket;
+          break;
+        }
+        evictee_e = strent(t, evictee_e->hdr.next);
+      }
+      /* table_e remains set to our mainpos. */
+    }
+  }
+  //fprintf(stderr, "val: %p\n", val);
+  //fprintf(stderr, "val size: %d\n", t->t.value_size);
+  memcpy(&table_e->val, val, t->t.value_size);
+  table_e->hdr.key = strdup(key);
+  table_e->hdr.next = UPB_END_OF_CHAIN;
+  //fprintf(stderr, "Looking up, string=%s...\n", key);
+  assert(upb_strtable_lookup(t, key) == &table_e->val);
+  //printf("Yay!\n");
+}
+
+void upb_strtable_insert(upb_strtable *t, const char *key, const void *val) {
+  if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) {
+    // Need to resize.  New table of double the size, add old elements to it.
+    //printf("RESIZE!!\n");
+    upb_strtable new_table;
+    upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.value_size);
+    upb_strtable_iter i;
+    upb_strtable_begin(&i, t);
+    for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+      strinsert(&new_table,
+                upb_strtable_iter_key(&i),
+                upb_strtable_iter_value(&i));
+    }
+    upb_strtable_free(t);
+    *t = new_table;
+  }
+  strinsert(t, key, val);
+}
+
+void upb_strtable_begin(upb_strtable_iter *i, upb_strtable *t) {
+  i->e = strent(t, -1);
+  i->t = t;
+  upb_strtable_next(i);
+}
+
+void upb_strtable_next(upb_strtable_iter *i) {
+  upb_strtable_entry *end = strent(i->t, upb_strtable_size(i->t));
+  upb_strtable_entry *cur = i->e;
+  do {
+    cur = (void*)((char*)cur + i->t->t.entry_size);
+    if(cur == end) { i->e = NULL; return; }
+  } while(cur->hdr.key == NULL);
+  i->e = cur;
+}
+
+#ifdef UPB_UNALIGNED_READS_OK
+//-----------------------------------------------------------------------------
+// MurmurHash2, by Austin Appleby (released as public domain).
+// Reformatted and C99-ified by Joshua Haberman.
+// Note - This code makes a few assumptions about how your machine behaves -
+//   1. We can read a 4-byte value from any address without crashing
+//   2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
+// And it has a few limitations -
+//   1. It will not work incrementally.
+//   2. It will not produce the same results on little-endian and big-endian
+//      machines.
+static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed)
+{
+  // 'm' and 'r' are mixing constants generated offline.
+  // They're not really 'magic', they just happen to work well.
+  const uint32_t m = 0x5bd1e995;
+  const int32_t r = 24;
+
+  // Initialize the hash to a 'random' value
+  uint32_t h = seed ^ len;
+
+  // Mix 4 bytes at a time into the hash
+  const uint8_t * data = (const uint8_t *)key;
+  while(len >= 4) {
+    uint32_t k = *(uint32_t *)data;
+
+    k *= m;
+    k ^= k >> r;
+    k *= m;
+
+    h *= m;
+    h ^= k;
+
+    data += 4;
+    len -= 4;
+  }
+
+  // Handle the last few bytes of the input array
+  switch(len) {
+    case 3: h ^= data[2] << 16;
+    case 2: h ^= data[1] << 8;
+    case 1: h ^= data[0]; h *= m;
+  };
+
+  // Do a few final mixes of the hash to ensure the last few
+  // bytes are well-incorporated.
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
+
+  return h;
+}
+
+#else // !UPB_UNALIGNED_READS_OK
+
+//-----------------------------------------------------------------------------
+// MurmurHashAligned2, by Austin Appleby
+// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
+// on certain platforms.
+// Performance will be lower than MurmurHash2
+
+#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+
+static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed)
+{
+  const uint32_t m = 0x5bd1e995;
+  const int32_t r = 24;
+  const uint8_t * data = (const uint8_t *)key;
+  uint32_t h = seed ^ len;
+  uint8_t align = (uintptr_t)data & 3;
+
+  if(align && (len >= 4)) {
+    // Pre-load the temp registers
+    uint32_t t = 0, d = 0;
+
+    switch(align) {
+      case 1: t |= data[2] << 16;
+      case 2: t |= data[1] << 8;
+      case 3: t |= data[0];
+    }
+
+    t <<= (8 * align);
+
+    data += 4-align;
+    len -= 4-align;
+
+    int32_t sl = 8 * (4-align);
+    int32_t sr = 8 * align;
+
+    // Mix
+
+    while(len >= 4) {
+      d = *(uint32_t *)data;
+      t = (t >> sr) | (d << sl);
+
+      uint32_t k = t;
+
+      MIX(h,k,m);
+
+      t = d;
+
+      data += 4;
+      len -= 4;
+    }
+
+    // Handle leftover data in temp registers
+
+    d = 0;
+
+    if(len >= align) {
+      switch(align) {
+        case 3: d |= data[2] << 16;
+        case 2: d |= data[1] << 8;
+        case 1: d |= data[0];
+      }
+
+      uint32_t k = (t >> sr) | (d << sl);
+      MIX(h,k,m);
+
+      data += align;
+      len -= align;
+
+      //----------
+      // Handle tail bytes
+
+      switch(len) {
+        case 3: h ^= data[2] << 16;
+        case 2: h ^= data[1] << 8;
+        case 1: h ^= data[0]; h *= m;
+      };
+    } else {
+      switch(len) {
+        case 3: d |= data[2] << 16;
+        case 2: d |= data[1] << 8;
+        case 1: d |= data[0];
+        case 0: h ^= (t >> sr) | (d << sl); h *= m;
+      }
+    }
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+  } else {
+    while(len >= 4) {
+      uint32_t k = *(uint32_t *)data;
+
+      MIX(h,k,m);
+
+      data += 4;
+      len -= 4;
+    }
+
+    //----------
+    // Handle tail bytes
+
+    switch(len) {
+      case 3: h ^= data[2] << 16;
+      case 2: h ^= data[1] << 8;
+      case 1: h ^= data[0]; h *= m;
+    };
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+  }
+}
+#undef MIX
+
+#endif // UPB_UNALIGNED_READS_OK
diff --git a/upb/table.h b/upb/table.h
new file mode 100644
index 0000000..376465b
--- /dev/null
+++ b/upb/table.h
@@ -0,0 +1,225 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * This file defines very fast int->struct (inttable) and string->struct
+ * (strtable) hash tables.  The struct can be of any size, and it is stored
+ * in the table itself, for cache-friendly performance.
+ *
+ * The table uses internal chaining with Brent's variation (inspired by the
+ * Lua implementation of hash tables).  The hash function for strings is
+ * Austin Appleby's "MurmurHash."
+ */
+
+#ifndef UPB_TABLE_H_
+#define UPB_TABLE_H_
+
+#include <assert.h>
+#include "upb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define UPB_END_OF_CHAIN (uint32_t)-1
+
+typedef struct {
+  bool has_entry:1;
+  // The rest of the bits are the user's.
+} upb_inttable_value;
+
+typedef struct {
+  uint32_t key;
+  uint32_t next;  // Internal chaining.
+} upb_inttable_header;
+
+typedef struct {
+  upb_inttable_header hdr;
+  upb_inttable_value val;
+} upb_inttable_entry;
+
+// TODO: consider storing the hash in the entry.  This would avoid the need to
+// rehash on table resizes, but more importantly could possibly improve lookup
+// performance by letting us compare hashes before comparing lengths or the
+// strings themselves.
+typedef struct {
+  char *key;         // We own, nullz. TODO: store explicit len?
+  uint32_t next;     // Internal chaining.
+} upb_strtable_header;
+
+typedef struct {
+  upb_strtable_header hdr;
+  uint32_t val;      // Val is at least 32 bits.
+} upb_strtable_entry;
+
+typedef struct {
+  void *entries;        // Hash table.
+  uint32_t count;       // Number of entries in the hash part.
+  uint32_t mask;        // Mask to turn hash value -> bucket.
+  uint16_t entry_size;  // Size of each entry.
+  uint16_t value_size;  // Size of each value.
+  uint8_t size_lg2;     // Size of the hash table part is 2^size_lg2 entries.
+} upb_table;
+
+typedef struct {
+  upb_table t;
+} upb_strtable;
+
+typedef struct {
+  upb_table t;
+  void *array;           // Array part of the table.
+  uint32_t array_size;   // Array part size.
+  uint32_t array_count;  // Array part number of elements.
+} upb_inttable;
+
+// Initialize and free a table, respectively.  Specify the initial size
+// with 'size' (the size will be increased as necessary).  Value size
+// specifies how many bytes each value in the table is.
+//
+// WARNING!  The lowest bit of every entry is reserved by the hash table.
+// It will always be overwritten when you insert, and must not be modified
+// when looked up!
+void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t value_size);
+void upb_inttable_free(upb_inttable *table);
+void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t value_size);
+void upb_strtable_free(upb_strtable *table);
+
+// Number of values in the hash table.
+INLINE uint32_t upb_table_count(upb_table *t) { return t->count; }
+INLINE uint32_t upb_inttable_count(upb_inttable *t) {
+  return t->array_count + upb_table_count(&t->t);
+}
+INLINE uint32_t upb_strtable_count(upb_strtable *t) {
+  return upb_table_count(&t->t);
+}
+
+// Inserts the given key into the hashtable with the given value.  The key must
+// not already exist in the hash table.  The data will be copied from val into
+// the hashtable (the amount of data copied comes from value_size when the
+// table was constructed).  Therefore the data at val may be freed once the
+// call returns.  For string tables, the table takes ownership of the string.
+//
+// WARNING: the lowest bit of val is reserved and will be overwritten!
+void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val);
+// TODO: may want to allow for more complex keys with custom hash/comparison
+// functions.
+void upb_strtable_insert(upb_strtable *t, const char *key, const void *val);
+void upb_inttable_compact(upb_inttable *t);
+INLINE void upb_strtable_clear(upb_strtable *t) {
+  // TODO: improve.
+  uint16_t entry_size = t->t.entry_size;
+  upb_strtable_free(t);
+  upb_strtable_init(t, 8, entry_size);
+}
+
+INLINE uint32_t _upb_inttable_bucket(upb_inttable *t, uint32_t k) {
+  uint32_t bucket = k & t->t.mask;  // Identity hash for ints.
+  assert(bucket != UPB_END_OF_CHAIN);
+  return bucket;
+}
+
+// Returns true if this key belongs in the array part of the table.
+INLINE bool _upb_inttable_isarrkey(upb_inttable *t, uint32_t k) {
+  return (k < t->array_size);
+}
+
+// Looks up key in this table, returning a pointer to the user's inserted data.
+// We have the caller specify the entry_size because fixing this as a literal
+// (instead of reading table->entry_size) gives the compiler more ability to
+// optimize.
+INLINE void *_upb_inttable_fastlookup(upb_inttable *t, uint32_t key,
+                                      size_t entry_size, size_t value_size) {
+  upb_inttable_value *arrval =
+      (upb_inttable_value*)UPB_INDEX(t->array, key, value_size);
+  if (_upb_inttable_isarrkey(t, key)) {
+    //DEBUGPRINTF("array lookup for key %d, &val=%p, has_entry=%d\n", key, val, val->has_entry);
+    return (arrval->has_entry) ? arrval : NULL;
+  }
+  uint32_t bucket = _upb_inttable_bucket(t, key);
+  upb_inttable_entry *e =
+      (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
+  //DEBUGPRINTF("looking in first bucket %d, entry size=%zd, addr=%p\n", bucket, entry_size, e);
+  while (1) {
+    //DEBUGPRINTF("%d, %d, %d\n", e->val.has_entry, e->hdr.key, key);
+    if (e->hdr.key == key) {
+      //DEBUGPRINTF("returning val from hash part\n");
+      return &e->val;
+    }
+    if ((bucket = e->hdr.next) == UPB_END_OF_CHAIN) return NULL;
+    //DEBUGPRINTF("looking in bucket %d\n", bucket);
+    e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
+  }
+}
+
+INLINE size_t _upb_inttable_entrysize(size_t value_size) {
+  return upb_align_up(sizeof(upb_inttable_header) + value_size, 8);
+}
+
+INLINE void *upb_inttable_fastlookup(upb_inttable *t, uint32_t key,
+                                      uint32_t value_size) {
+  return _upb_inttable_fastlookup(t, key, _upb_inttable_entrysize(value_size), value_size);
+}
+
+INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) {
+  return _upb_inttable_fastlookup(t, key, t->t.entry_size, t->t.value_size);
+}
+
+void *upb_strtable_lookupl(upb_strtable *t, const char *key, size_t len);
+void *upb_strtable_lookup(upb_strtable *t, const char *key);
+
+
+/* upb_strtable_iter **********************************************************/
+
+// Strtable iteration.  Order is undefined.  Insertions invalidate iterators.
+//   upb_strtable_iter i;
+//   for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) {
+//     const char *key = upb_strtable_iter_key(&i);
+//     const myval *val = upb_strtable_iter_value(&i);
+//     // ...
+//   }
+typedef struct {
+  upb_strtable *t;
+  upb_strtable_entry *e;
+} upb_strtable_iter;
+
+void upb_strtable_begin(upb_strtable_iter *i, upb_strtable *t);
+void upb_strtable_next(upb_strtable_iter *i);
+INLINE bool upb_strtable_done(upb_strtable_iter *i) { return i->e == NULL; }
+INLINE const char *upb_strtable_iter_key(upb_strtable_iter *i) {
+  return i->e->hdr.key;
+}
+INLINE const void *upb_strtable_iter_value(upb_strtable_iter *i) {
+  return &i->e->val;
+}
+
+
+/* upb_inttable_iter **********************************************************/
+
+// Inttable iteration.  Order is undefined.  Insertions invalidate iterators.
+//   for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
+//       i = upb_inttable_next(t, i)) {
+//     // ...
+//   }
+typedef struct {
+  uint32_t key;
+  upb_inttable_value *value;
+  bool array_part;
+} upb_inttable_iter;
+
+upb_inttable_iter upb_inttable_begin(upb_inttable *t);
+upb_inttable_iter upb_inttable_next(upb_inttable *t, upb_inttable_iter iter);
+INLINE bool upb_inttable_done(upb_inttable_iter iter) { return iter.value == NULL; }
+INLINE uint32_t upb_inttable_iter_key(upb_inttable_iter iter) {
+  return iter.key;
+}
+INLINE void *upb_inttable_iter_value(upb_inttable_iter iter) {
+  return iter.value;
+}
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_TABLE_H_ */
diff --git a/upb/upb.c b/upb/upb.c
new file mode 100644
index 0000000..0ff082f
--- /dev/null
+++ b/upb/upb.c
@@ -0,0 +1,122 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include "upb/descriptor_const.h"
+#include "upb/upb.h"
+#include "upb/bytestream.h"
+
+#define alignof(t) offsetof(struct { char c; t x; }, x)
+#define TYPE_INFO(wire_type, ctype, inmemory_type) \
+    {alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), #ctype},
+
+const upb_type_info upb_types[] = {
+  TYPE_INFO(UPB_WIRE_TYPE_END_GROUP,   void*,     MESSAGE)   // ENDGROUP (fake)
+  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       double,    DOUBLE)    // DOUBLE
+  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       float,     FLOAT)     // FLOAT
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int64_t,   INT64)     // INT64
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint64_t,  UINT64)    // UINT64
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int32_t,   INT32)     // INT32
+  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       uint64_t,  UINT64)    // FIXED64
+  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       uint32_t,  UINT32)    // FIXED32
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      bool,      BOOL)      // BOOL
+  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     STRING)    // STRING
+  TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*,     MESSAGE)   // GROUP
+  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     MESSAGE)   // MESSAGE
+  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     STRING)    // BYTES
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint32_t,  UINT32)    // UINT32
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint32_t,  INT32)     // ENUM
+  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       int32_t,   INT32)     // SFIXED32
+  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       int64_t,   INT64)     // SFIXED64
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int32_t,   INT32)     // SINT32
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int64_t,   INT64)     // SINT64
+  TYPE_INFO(UPB_WIRE_TYPE_END_GROUP,   void*,     INT64)     // SINT64
+};
+
+#ifdef NDEBUG
+upb_value UPB_NO_VALUE = {{0}};
+#else
+upb_value UPB_NO_VALUE = {{0}, -1};
+#endif
+
+void upb_status_init(upb_status *status) {
+  status->buf = NULL;
+  upb_status_clear(status);
+}
+
+void upb_status_uninit(upb_status *status) {
+  free(status->buf);
+}
+
+void upb_status_setf(upb_status *s, enum upb_status_code code,
+                     const char *msg, ...) {
+  s->code = code;
+  va_list args;
+  va_start(args, msg);
+  upb_vrprintf(&s->buf, &s->bufsize, 0, msg, args);
+  va_end(args);
+  s->str = s->buf;
+}
+
+void upb_status_copy(upb_status *to, upb_status *from) {
+  to->code = from->code;
+  if (from->str) {
+    if (to->bufsize < from->bufsize) {
+      to->bufsize = from->bufsize;
+      to->buf = realloc(to->buf, to->bufsize);
+      to->str = to->buf;
+    }
+    memcpy(to->str, from->str, from->bufsize);
+  } else {
+    to->str = NULL;
+  }
+}
+
+void upb_status_clear(upb_status *status) {
+  status->code = UPB_OK;
+  status->str = NULL;
+}
+
+void upb_status_print(upb_status *status, FILE *f) {
+  if(status->str) {
+    fprintf(f, "code: %d, msg: %s\n", status->code, status->str);
+  } else {
+    fprintf(f, "code: %d, no msg\n", status->code);
+  }
+}
+
+void upb_status_fromerrno(upb_status *status) {
+  upb_status_setf(status, UPB_ERROR, "%s", strerror(errno));
+}
+
+int upb_vrprintf(char **buf, size_t *size, size_t ofs,
+                 const char *fmt, va_list args) {
+  // Try once without reallocating.  We have to va_copy because we might have
+  // to call vsnprintf again.
+  uint32_t len = *size - ofs;
+  va_list args_copy;
+  va_copy(args_copy, args);
+  uint32_t true_len = vsnprintf(*buf + ofs, len, fmt, args_copy);
+  va_end(args_copy);
+
+  // Resize to be the correct size.
+  if (true_len >= len) {
+    // Need to print again, because some characters were truncated.  vsnprintf
+    // will not write the entire string unless you give it space to store the
+    // NULL terminator also.
+    while (*size < (ofs + true_len + 1)) *size = UPB_MAX(*size * 2, 2);
+    char *newbuf = realloc(*buf, *size);
+    if (!newbuf) return -1;
+    vsnprintf(newbuf + ofs, true_len + 1, fmt, args);
+    *buf = newbuf;
+  }
+  return true_len;
+}
diff --git a/upb/upb.h b/upb/upb.h
new file mode 100644
index 0000000..153057d
--- /dev/null
+++ b/upb/upb.h
@@ -0,0 +1,238 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * This file contains shared definitions that are widely used across upb.
+ */
+
+#ifndef UPB_H_
+#define UPB_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>  // only for size_t.
+#include <assert.h>
+#include "descriptor_const.h"
+#include "atomic.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// inline if possible, emit standalone code if required.
+#ifndef INLINE
+#define INLINE static inline
+#endif
+
+#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
+#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
+#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
+
+INLINE void nop_printf(const char *fmt, ...) { (void)fmt; }
+
+#ifdef NDEBUG
+#define DEBUGPRINTF nop_printf
+#else
+#define DEBUGPRINTF printf
+#endif
+
+// Rounds val up to the next multiple of align.
+INLINE size_t upb_align_up(size_t val, size_t align) {
+  return val % align == 0 ? val : val + align - (val % align);
+}
+
+// The maximum that any submessages can be nested.  Matches proto2's limit.
+// At the moment this specifies the size of several statically-sized arrays
+// and therefore setting it high will cause more memory to be used.  Will
+// be replaced by a runtime-configurable limit and dynamically-resizing arrays.
+// TODO: make this a runtime-settable property of upb_handlers.
+#define UPB_MAX_NESTING 64
+
+// The maximum number of fields that any one .proto type can have.  Note that
+// this is very different than the max field number.  It is hard to imagine a
+// scenario where more than 2k fields (each with its own name and field number)
+// makes sense.  The .proto file to describe it would be 2000 lines long and
+// contain 2000 unique names.
+//
+// With this limit we can store a has-bit offset in 8 bits (2**8 * 8 = 2048)
+// and we can store a value offset in 16 bits, since the maximum message
+// size is 16,640 bytes (2**8 has-bits + 2048 * 8-byte value).  Note that
+// strings and arrays are not counted in this, only the *pointer* to them is.
+// An individual string or array is unaffected by this 16k byte limit.
+#define UPB_MAX_FIELDS (2048)
+
+// Nested type names are separated by periods.
+#define UPB_SYMBOL_SEPARATOR '.'
+
+// The longest chain that mutually-recursive types are allowed to form.  For
+// example, this is a type cycle of length 2:
+//   message A {
+//     B b = 1;
+//   }
+//   message B {
+//     A a = 1;
+//   }
+#define UPB_MAX_TYPE_CYCLE_LEN 16
+
+// The maximum depth that the type graph can have.  Note that this setting does
+// not automatically constrain UPB_MAX_NESTING, because type cycles allow for
+// unlimited nesting if we do not limit it.  Many algorithms in upb call
+// recursive functions that traverse the type graph, so we must limit this to
+// avoid blowing the C stack.
+#define UPB_MAX_TYPE_DEPTH 64
+
+
+/* Fundamental types and type constants. **************************************/
+
+// A list of types as they are encoded on-the-wire.
+enum upb_wire_type {
+  UPB_WIRE_TYPE_VARINT      = 0,
+  UPB_WIRE_TYPE_64BIT       = 1,
+  UPB_WIRE_TYPE_DELIMITED   = 2,
+  UPB_WIRE_TYPE_START_GROUP = 3,
+  UPB_WIRE_TYPE_END_GROUP   = 4,
+  UPB_WIRE_TYPE_32BIT       = 5,
+};
+
+// Type of a field as defined in a .proto file.  eg. string, int32, etc.  The
+// integers that represent this are defined by descriptor.proto.  Note that
+// descriptor.proto reserves "0" for errors, and we use it to represent
+// exceptional circumstances.
+typedef uint8_t upb_fieldtype_t;
+
+// For referencing the type constants tersely.
+#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type
+#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type
+
+// Info for a given field type.
+typedef struct {
+  uint8_t align;
+  uint8_t size;
+  uint8_t native_wire_type;
+  uint8_t inmemory_type;    // For example, INT32, SINT32, and SFIXED32 -> INT32
+  char *ctype;
+} upb_type_info;
+
+// A static array of info about all of the field types, indexed by type number.
+extern const upb_type_info upb_types[];
+
+
+/* upb_value ******************************************************************/
+
+struct _upb_strref;
+struct _upb_fielddef;
+
+// Special constants for the upb_value.type field.  These must not conflict
+// with any members of FieldDescriptorProto.Type.
+#define UPB_TYPE_ENDGROUP 0
+#define UPB_VALUETYPE_FIELDDEF 32
+#define UPB_VALUETYPE_PTR 33
+
+// A single .proto value.  The owner must have an out-of-band way of knowing
+// the type, so that it knows which union member to use.
+typedef struct {
+  union {
+    uint64_t uint64;
+    double _double;
+    float _float;
+    int32_t int32;
+    int64_t int64;
+    uint32_t uint32;
+    bool _bool;
+    struct _upb_strref *strref;
+    struct _upb_fielddef *fielddef;
+    void *_void;
+  } val;
+
+#ifndef NDEBUG
+  // In debug mode we carry the value type around also so we can check accesses
+  // to be sure the right member is being read.
+  char type;
+#endif
+} upb_value;
+
+#ifdef NDEBUG
+#define SET_TYPE(dest, val)
+#else
+#define SET_TYPE(dest, val) dest = val
+#endif
+
+#define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \
+  INLINE ctype upb_value_get ## name(upb_value val) { \
+    assert(val.type == proto_type); \
+    return val.val.membername; \
+  } \
+  INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \
+    SET_TYPE(val->type, proto_type); \
+    val->val.membername = cval; \
+  }
+UPB_VALUE_ACCESSORS(double, _double, double, UPB_TYPE(DOUBLE));
+UPB_VALUE_ACCESSORS(float, _float, float, UPB_TYPE(FLOAT));
+UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_TYPE(INT32));
+UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64));
+UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32));
+UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64));
+UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL));
+UPB_VALUE_ACCESSORS(strref, strref, struct _upb_strref*, UPB_TYPE(STRING));
+UPB_VALUE_ACCESSORS(fielddef, fielddef, struct _upb_fielddef*, UPB_VALUETYPE_FIELDDEF);
+UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_VALUETYPE_PTR);
+
+extern upb_value UPB_NO_VALUE;
+
+
+/* upb_status *****************************************************************/
+
+// Status codes used as a return value.  Codes >0 are not fatal and can be
+// resumed.
+enum upb_status_code {
+  // The operation completed successfully.
+  UPB_OK = 0,
+
+  // The bytesrc is at EOF and all data was read successfully.
+  UPB_EOF = 1,
+
+  // A read or write from a streaming src/sink could not be completed right now.
+  UPB_TRYAGAIN = 2,
+
+  // An unrecoverable error occurred.
+  UPB_ERROR = -1,
+};
+
+// TODO: consider adding error space and code, to let ie. errno be stored
+// as a proper code, or application-specific error codes.
+typedef struct {
+  char code;
+  char *str;  // NULL when no message is present.  NULL-terminated.
+  char *buf;  // Owned by the status.
+  size_t bufsize;
+} upb_status;
+
+#define UPB_STATUS_INIT {UPB_OK, NULL, NULL, 0}
+
+void upb_status_init(upb_status *status);
+void upb_status_uninit(upb_status *status);
+
+INLINE bool upb_ok(upb_status *status) { return status->code == UPB_OK; }
+INLINE bool upb_iseof(upb_status *status) { return status->code == UPB_EOF; }
+
+void upb_status_fromerrno(upb_status *status);
+void upb_status_print(upb_status *status, FILE *f);
+void upb_status_clear(upb_status *status);
+void upb_status_setf(upb_status *status, enum upb_status_code code,
+                     const char *fmt, ...);
+void upb_status_copy(upb_status *to, upb_status *from);
+
+// Like vaprintf, but uses *buf (which can be NULL) as a starting point and
+// reallocates it only if the new value will not fit.  "size" is updated to
+// reflect the allocated size of the buffer.  Returns false on memory alloc
+// failure.
+int upb_vrprintf(char **buf, size_t *size, size_t ofs,
+                 const char *fmt, va_list args);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_H_ */