From d29f80d6f320143363fb101a9e94f89c17788468 Mon Sep 17 00:00:00 2001
From: Joshua Haberman <joshua@reverberate.org>
Date: Thu, 3 Jun 2010 19:55:39 -0700
Subject: More incremental work.

---
 src/upb_encoder.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'src/upb_encoder.c')

diff --git a/src/upb_encoder.c b/src/upb_encoder.c
index f1156a8..304a423 100644
--- a/src/upb_encoder.c
+++ b/src/upb_encoder.c
@@ -38,9 +38,6 @@ static size_t upb_f_uint32_t_size(uint32_t val) {
   return sizeof(uint32_t);
 }
 
-// The biggest possible single value is a 10-byte varint.
-#define UPB_MAX_ENCODED_SIZE 10
-
 
 /* Functions to write wire values. ********************************************/
 
-- 
cgit v1.2.3


From 28ec9a1fa0f9b1d741920dfa8afc91fa2532c43d Mon Sep 17 00:00:00 2001
From: Joshua Haberman <joshua@reverberate.org>
Date: Fri, 9 Jul 2010 20:20:33 -0700
Subject: Split src/ into core/ and stream/.

---
 Makefile               |   22 +-
 README                 |    9 +-
 core/upb.c             |   67 ++++
 core/upb.h             |  207 ++++++++++
 core/upb_atomic.h      |  185 +++++++++
 core/upb_def.c         | 1022 ++++++++++++++++++++++++++++++++++++++++++++++++
 core/upb_def.h         |  302 ++++++++++++++
 core/upb_stream.h      |  121 ++++++
 core/upb_stream_vtbl.h |   93 +++++
 core/upb_string.c      |   47 +++
 core/upb_string.h      |  194 +++++++++
 core/upb_table.c       |  411 +++++++++++++++++++
 core/upb_table.h       |  133 +++++++
 gen-deps.sh            |    2 +-
 src/upb.c              |   67 ----
 src/upb.h              |  207 ----------
 src/upb_atomic.h       |  185 ---------
 src/upb_byteio.h       |   43 --
 src/upb_decoder.c      |  577 ---------------------------
 src/upb_decoder.h      |   53 ---
 src/upb_def.c          | 1022 ------------------------------------------------
 src/upb_def.h          |  302 --------------
 src/upb_encoder.c      |  420 --------------------
 src/upb_encoder.h      |   56 ---
 src/upb_inlinedefs.c   |   20 -
 src/upb_stream.h       |  121 ------
 src/upb_stream_vtbl.h  |   93 -----
 src/upb_string.c       |   47 ---
 src/upb_string.h       |  194 ---------
 src/upb_table.c        |  411 -------------------
 src/upb_table.h        |  133 -------
 src/upb_text.c         |  121 ------
 src/upb_text.h         |   36 --
 stream/upb_byteio.h    |   43 ++
 stream/upb_decoder.c   |  577 +++++++++++++++++++++++++++
 stream/upb_decoder.h   |   53 +++
 stream/upb_encoder.c   |  420 ++++++++++++++++++++
 stream/upb_encoder.h   |   56 +++
 stream/upb_text.c      |  121 ++++++
 stream/upb_text.h      |   36 ++
 40 files changed, 4107 insertions(+), 4122 deletions(-)
 create mode 100644 core/upb.c
 create mode 100644 core/upb.h
 create mode 100644 core/upb_atomic.h
 create mode 100644 core/upb_def.c
 create mode 100644 core/upb_def.h
 create mode 100644 core/upb_stream.h
 create mode 100644 core/upb_stream_vtbl.h
 create mode 100644 core/upb_string.c
 create mode 100644 core/upb_string.h
 create mode 100644 core/upb_table.c
 create mode 100644 core/upb_table.h
 delete mode 100644 src/upb.c
 delete mode 100644 src/upb.h
 delete mode 100644 src/upb_atomic.h
 delete mode 100644 src/upb_byteio.h
 delete mode 100644 src/upb_decoder.c
 delete mode 100644 src/upb_decoder.h
 delete mode 100644 src/upb_def.c
 delete mode 100644 src/upb_def.h
 delete mode 100644 src/upb_encoder.c
 delete mode 100644 src/upb_encoder.h
 delete mode 100644 src/upb_inlinedefs.c
 delete mode 100644 src/upb_stream.h
 delete mode 100644 src/upb_stream_vtbl.h
 delete mode 100644 src/upb_string.c
 delete mode 100644 src/upb_string.h
 delete mode 100644 src/upb_table.c
 delete mode 100644 src/upb_table.h
 delete mode 100644 src/upb_text.c
 delete mode 100644 src/upb_text.h
 create mode 100644 stream/upb_byteio.h
 create mode 100644 stream/upb_decoder.c
 create mode 100644 stream/upb_decoder.h
 create mode 100644 stream/upb_encoder.c
 create mode 100644 stream/upb_encoder.h
 create mode 100644 stream/upb_text.c
 create mode 100644 stream/upb_text.h

(limited to 'src/upb_encoder.c')

diff --git a/Makefile b/Makefile
index 1ba7400..ca4f940 100644
--- a/Makefile
+++ b/Makefile
@@ -27,13 +27,13 @@ rwildcard=$(strip $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2)$(filter $
 CC=gcc
 CXX=g++
 CFLAGS=-std=c99
-INCLUDE=-Idescriptor -Isrc -Itests -I.
+INCLUDE=-Idescriptor -Icore -Itests -I.
 CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags))
 LDLIBS=-lpthread
 
-LIBUPB=src/libupb.a
-LIBUPB_PIC=src/libupb_pic.a
-LIBUPB_SHARED=src/libupb.so
+LIBUPB=core/libupb.a
+LIBUPB_PIC=core/libupb_pic.a
+LIBUPB_SHARED=core/libupb.so
 ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC)
 all: $(ALL)
 clean:
@@ -45,16 +45,16 @@ clean:
 	rm -rf tools/upbc deps
 	cd lang_ext/python && python setup.py clean --all
 
-# The core library (src/libupb.a)
-SRC=src/upb.c src/upb_decoder.c src/upb_table.c src/upb_def.c src/upb_string.c \
+# The core library (core/libupb.a)
+SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \
     descriptor/descriptor.c
 # Parts of core that are yet to be converted.
 OTHERSRC=src/upb_encoder.c src/upb_text.c
 # Override the optimization level for upb_def.o, because it is not in the
 # critical path but gets very large when -O3 is used.
-src/upb_def.o: src/upb_def.c
+core/upb_def.o: core/upb_def.c
 	$(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $<
-src/upb_def.lo: src/upb_def.c
+core/upb_def.lo: core/upb_def.c
 	$(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC
 
 
@@ -91,7 +91,7 @@ TESTS=tests/tests \
     tests/t.test_vs_proto2.googlemessage1 \
     tests/t.test_vs_proto2.googlemessage2 \
     tests/test.proto.pb
-$(TESTS): src/libupb.a
+$(TESTS): core/libupb.a
 
 #VALGRIND=valgrind --leak-check=full --error-exitcode=1 
 VALGRIND=
@@ -129,10 +129,10 @@ tests/test_table: tests/test_table.cc
 	# Includes <hash_set> which is a deprecated header.
 	$(CXX) $(CXXFLAGS) $(CPPFLAGS) -Wno-deprecated -o $@ $< $(LIBUPB)
 
-tests/tests: src/libupb.a
+tests/tests: core/libupb.a
 
 # Tools
-tools/upbc: src/libupb.a
+tools/upbc: core/libupb.a
 
 # Benchmarks
 UPB_BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \
diff --git a/README b/README
index e869667..efa3ddb 100644
--- a/README
+++ b/README
@@ -10,6 +10,11 @@ ROADMAP OF THE SOURCE
 
 benchmark/
   Benchmarks of upb and other protocol buffer implementations.
+core/
+  The core source directory.  builds into core/libupb.a.  Contains only the
+  very core library, which is capable of loading descriptors given the
+  appropriate decoder.  Does not even contain decoders for the standard
+  formats like the protobuf text and binary formats.
 descriptor/
   Files that describe the format of Protocol Buffer "descriptors", which are
   protocol buffers that describe the format of other protocol buffers.  These
@@ -19,8 +24,8 @@ labs/
   about alternate ways of implementing things.  When possible, these are
   benchmarked by the tests in benchmark/.  We also test these with the tests
   in tests/, to ensure that the alternate implementations are actually correct.
-src/
-  The core source directory.  builds into src/libupb.a.
+stream/
+  Implementations of streaming protobuf encoders and decoders.
 tests/
   Unit tests.
 tools/
diff --git a/core/upb.c b/core/upb.c
new file mode 100644
index 0000000..a98512d
--- /dev/null
+++ b/core/upb.c
@@ -0,0 +1,67 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ *
+ */
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "upb.h"
+#include "upb_string.h"
+
+#define alignof(t) offsetof(struct { char c; t x; }, x)
+#define TYPE_INFO(wire_type, ctype, allows_delimited) \
+    {alignof(ctype), sizeof(ctype), wire_type, \
+     (1 << wire_type) | (allows_delimited << UPB_WIRE_TYPE_DELIMITED), \
+     #ctype},
+
+upb_type_info upb_types[] = {
+  {0, 0, 0, 0, ""}, // There is no type 0.
+  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       double,    1)    // DOUBLE
+  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       float,     1)    // FLOAT
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int64_t,   1)    // INT64
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint64_t,  1)    // UINT64
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int32_t,   1)    // INT32
+  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       uint64_t,  1)    // FIXED64
+  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       uint32_t,  1)    // FIXED32
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      bool,      1)    // BOOL
+  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     1)    // STRING
+  TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*,     0)    // GROUP
+  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     1)    // MESSAGE
+  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     1)    // BYTES
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint32_t,  1)    // UINT32
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint32_t,  1)    // ENUM
+  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       int32_t,   1)    // SFIXED32
+  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       int64_t,   1)    // SFIXED64
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int32_t,   1)    // SINT32
+  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int64_t,   1)    // SINT64
+};
+
+void upb_seterr(upb_status *status, enum upb_status_code code,
+                const char *msg, ...)
+{
+  if(upb_ok(status)) {  // The first error is the most interesting.
+    status->str = upb_string_new();
+    char *str = upb_string_getrwbuf(status->str, UPB_ERRORMSG_MAXLEN);
+    status->code = code;
+    va_list args;
+    va_start(args, msg);
+    vsnprintf(str, UPB_ERRORMSG_MAXLEN, msg, args);
+    va_end(args);
+  }
+}
+
+void upb_copyerr(upb_status *to, upb_status *from)
+{
+  to->code = from->code;
+  to->str = upb_string_getref(from->str);
+}
+
+void upb_reset(upb_status *status) {
+  status->code = UPB_STATUS_OK;
+  upb_string_unref(status->str);
+  status->str = NULL;
+}
diff --git a/core/upb.h b/core/upb.h
new file mode 100644
index 0000000..230e638
--- /dev/null
+++ b/core/upb.h
@@ -0,0 +1,207 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ *
+ * This file contains shared definitions that are widely used across upb.
+ */
+
+#ifndef UPB_H_
+#define UPB_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>  // only for size_t.
+#include "descriptor_const.h"
+#include "upb_atomic.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// inline if possible, emit standalone code if required.
+#ifndef INLINE
+#define INLINE static inline
+#endif
+
+#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
+#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
+#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
+
+// The maximum that any submessages can be nested.  Matches proto2's limit.
+#define UPB_MAX_NESTING 64
+
+// The maximum number of fields that any one .proto type can have.  Note that
+// this is very different than the max field number.  It is hard to imagine a
+// scenario where more than 32k fields makes sense.
+#define UPB_MAX_FIELDS (1<<15)
+typedef int16_t upb_field_count_t;
+
+// Nested type names are separated by periods.
+#define UPB_SYMBOL_SEPARATOR '.'
+
+// This limit is for the longest fully-qualified symbol, eg. foo.bar.MsgType
+#define UPB_SYMBOL_MAXLEN 128
+
+// The longest chain that mutually-recursive types are allowed to form.  For
+// example, this is a type cycle of length 2:
+//   message A {
+//     B b = 1;
+//   }
+//   message B {
+//     A a = 1;
+//   }
+#define UPB_MAX_TYPE_CYCLE_LEN 16
+
+// The maximum depth that the type graph can have.  Note that this setting does
+// not automatically constrain UPB_MAX_NESTING, because type cycles allow for
+// unlimited nesting if we do not limit it.
+#define UPB_MAX_TYPE_DEPTH 64
+
+// The biggest possible single value is a 10-byte varint.
+#define UPB_MAX_ENCODED_SIZE 10
+
+
+/* Fundamental types and type constants. **************************************/
+
+// A list of types as they are encoded on-the-wire.
+enum upb_wire_type {
+  UPB_WIRE_TYPE_VARINT      = 0,
+  UPB_WIRE_TYPE_64BIT       = 1,
+  UPB_WIRE_TYPE_DELIMITED   = 2,
+  UPB_WIRE_TYPE_START_GROUP = 3,
+  UPB_WIRE_TYPE_END_GROUP   = 4,
+  UPB_WIRE_TYPE_32BIT       = 5,
+
+  // This isn't a real wire type, but we use this constant to describe varints
+  // that are expected to be a maximum of 32 bits.
+  UPB_WIRE_TYPE_32BIT_VARINT = 8
+};
+
+typedef uint8_t upb_wire_type_t;
+
+// Value type as defined in a .proto file.  eg. string, int32, etc.  The
+// integers that represent this are defined by descriptor.proto.  Note that
+// descriptor.proto reserves "0" for errors, and we use it to represent
+// exceptional circumstances.
+typedef uint8_t upb_field_type_t;
+
+// For referencing the type constants tersely.
+#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type
+#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type
+
+INLINE bool upb_issubmsgtype(upb_field_type_t type) {
+  return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
+}
+
+INLINE bool upb_isstringtype(upb_field_type_t type) {
+  return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
+}
+
+// Info for a given field type.
+typedef struct {
+  uint8_t align;
+  uint8_t size;
+  upb_wire_type_t native_wire_type;
+  uint8_t allowed_wire_types;  // For packable fields, also allows delimited.
+  char *ctype;
+} upb_type_info;
+
+// A static array of info about all of the field types, indexed by type number.
+extern upb_type_info upb_types[];
+
+// The number of a field, eg. "optional string foo = 3".
+typedef int32_t upb_field_number_t;
+
+// Label (optional, repeated, required) as defined in a .proto file.  The
+// values of this are defined by google.protobuf.FieldDescriptorProto.Label
+// (from descriptor.proto).
+typedef uint8_t  upb_label_t;
+
+// A scalar (non-string) wire value.  Used only for parsing unknown fields.
+typedef union {
+  uint64_t varint;
+  uint64_t _64bit;
+  uint32_t _32bit;
+} upb_wire_value;
+
+/* Polymorphic values of .proto types *****************************************/
+
+struct _upb_string;
+typedef struct _upb_string upb_string;
+
+typedef uint32_t upb_strlen_t;
+
+// A single .proto value.  The owner must have an out-of-band way of knowing
+// the type, so that it knows which union member to use.
+typedef union {
+  double _double;
+  float _float;
+  int32_t int32;
+  int64_t int64;
+  uint32_t uint32;
+  uint64_t uint64;
+  bool _bool;
+} upb_value;
+
+// A pointer to a .proto value.  The owner must have an out-of-band way of
+// knowing the type, so it knows which union member to use.
+typedef union {
+  double *_double;
+  float *_float;
+  int32_t *int32;
+  int64_t *int64;
+  uint8_t *uint8;
+  uint32_t *uint32;
+  uint64_t *uint64;
+  bool *_bool;
+} upb_valueptr;
+
+INLINE upb_valueptr upb_value_addrof(upb_value *val) {
+  upb_valueptr ptr = {&val->_double};
+  return ptr;
+}
+
+// Status codes used as a return value.  Codes >0 are not fatal and can be
+// resumed.
+enum upb_status_code {
+  UPB_STATUS_OK = 0,
+
+  // A read or write from a streaming src/sink could not be completed right now.
+  UPB_STATUS_TRYAGAIN = 1,
+
+  // A value had an incorrect wire type and will be skipped.
+  UPB_STATUS_BADWIRETYPE = 2,
+
+  // An unrecoverable error occurred.
+  UPB_STATUS_ERROR = -1,
+
+  // A varint went for 10 bytes without terminating.
+  UPB_ERROR_UNTERMINATED_VARINT = -2,
+
+  // The max nesting level (UPB_MAX_NESTING) was exceeded.
+  UPB_ERROR_MAX_NESTING_EXCEEDED = -3
+};
+
+typedef struct {
+  enum upb_status_code code;
+  upb_string *str;
+} upb_status;
+
+#define UPB_STATUS_INIT {UPB_STATUS_OK, NULL}
+#define UPB_ERRORMSG_MAXLEN 256
+
+INLINE bool upb_ok(upb_status *status) {
+  return status->code == UPB_STATUS_OK;
+}
+
+void upb_reset(upb_status *status);
+void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg,
+                ...);
+void upb_copyerr(upb_status *to, upb_status *from);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_H_ */
diff --git a/core/upb_atomic.h b/core/upb_atomic.h
new file mode 100644
index 0000000..01fc8a2
--- /dev/null
+++ b/core/upb_atomic.h
@@ -0,0 +1,185 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ *
+ * Only a very small part of upb is thread-safe.  Notably, individual
+ * messages, arrays, and strings are *not* thread safe for mutating.
+ * However, we do make message *metadata* such as upb_msgdef and
+ * upb_context thread-safe, and their ownership is tracked via atomic
+ * refcounting.  This header implements the small number of atomic
+ * primitives required to support this.  The primitives we implement
+ * are:
+ *
+ * - a reader/writer lock (wrappers around platform-provided mutexes).
+ * - an atomic refcount.
+ */
+
+#ifndef UPB_ATOMIC_H_
+#define UPB_ATOMIC_H_
+
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* inline if possible, emit standalone code if required. */
+#ifndef INLINE
+#define INLINE static inline
+#endif
+
+#ifdef UPB_THREAD_UNSAFE
+
+/* Non-thread-safe implementations. ******************************************/
+
+typedef struct {
+  int v;
+} upb_atomic_refcount_t;
+
+INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) {
+  a->v = val;
+}
+
+INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) {
+  return a->v++ == 0;
+}
+
+INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) {
+  return --a->v == 0;
+}
+
+INLINE int upb_atomic_read(upb_atomic_refcount_t *a) {
+  return a->v;
+}
+
+INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int val) {
+  a->v += val;
+  return a->v == 0;
+}
+
+INLINE int upb_atomic_fetch_and_add(upb_atomic_refcount_t *a, int val) {
+  int ret = a->v;
+  a->v += val;
+  return ret;
+}
+
+#endif
+
+/* Atomic refcount ************************************************************/
+
+#ifdef UPB_THREAD_UNSAFE
+
+/* Already defined above. */
+
+#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4
+
+/* GCC includes atomic primitives. */
+
+typedef struct {
+  volatile int v;
+} upb_atomic_refcount_t;
+
+INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) {
+  a->v = val;
+  __sync_synchronize();   /* Ensure the initialized value is visible. */
+}
+
+INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) {
+  return __sync_fetch_and_add(&a->v, 1) == 0;
+}
+
+INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int n) {
+  return __sync_add_and_fetch(&a->v, n) == 0;
+}
+
+INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) {
+  return __sync_sub_and_fetch(&a->v, 1) == 0;
+}
+
+INLINE bool upb_atomic_read(upb_atomic_refcount_t *a) {
+  return __sync_fetch_and_add(&a->v, 0);
+}
+
+#elif defined(WIN32)
+
+/* Windows defines atomic increment/decrement. */
+#include <Windows.h>
+
+typedef struct {
+  volatile LONG val;
+} upb_atomic_refcount_t;
+
+INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) {
+  InterlockedExchange(&a->val, val);
+}
+
+INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) {
+  return InterlockedIncrement(&a->val) == 1;
+}
+
+INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) {
+  return InterlockedDecrement(&a->val) == 0;
+}
+
+#else
+#error Atomic primitives not defined for your platform/CPU.  \
+       Implement them or compile with UPB_THREAD_UNSAFE.
+#endif
+
+/* Reader/Writer lock. ********************************************************/
+
+#ifdef UPB_THREAD_UNSAFE
+
+typedef struct {
+} upb_rwlock_t;
+
+INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; }
+INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; }
+INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; }
+INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; }
+INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; }
+
+#elif defined(UPB_USE_PTHREADS)
+
+#include <pthread.h>
+
+typedef struct {
+  pthread_rwlock_t lock;
+} upb_rwlock_t;
+
+INLINE void upb_rwlock_init(upb_rwlock_t *l) {
+  /* TODO: check return value. */
+  pthread_rwlock_init(&l->lock, NULL);
+}
+
+INLINE void upb_rwlock_destroy(upb_rwlock_t *l) {
+  /* TODO: check return value. */
+  pthread_rwlock_destroy(&l->lock);
+}
+
+INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) {
+  /* TODO: check return value. */
+  pthread_rwlock_rdlock(&l->lock);
+}
+
+INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) {
+  /* TODO: check return value. */
+  pthread_rwlock_wrlock(&l->lock);
+}
+
+INLINE void upb_rwlock_unlock(upb_rwlock_t *l) {
+  /* TODO: check return value. */
+  pthread_rwlock_unlock(&l->lock);
+}
+
+#else
+#error Reader/writer lock is not defined for your platform/CPU.  \
+       Implement it or compile with UPB_THREAD_UNSAFE.
+#endif
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_ATOMIC_H_ */
diff --git a/core/upb_def.c b/core/upb_def.c
new file mode 100644
index 0000000..bfab738
--- /dev/null
+++ b/core/upb_def.c
@@ -0,0 +1,1022 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2008-2009 Joshua Haberman.  See LICENSE for details.
+ */
+
+#include <stdlib.h>
+#include "descriptor_const.h"
+#include "descriptor.h"
+#include "upb_def.h"
+
+#define CHECKSRC(x) if(!(x)) goto src_err
+#define CHECK(x) if(!(x)) goto err
+
+// A little dynamic array for storing a growing list of upb_defs.
+typedef struct {
+  upb_def **defs;
+  uint32_t len;
+  uint32_t size;
+} upb_deflist;
+
+static void upb_deflist_init(upb_deflist *l) {
+  l->size = 8;
+  l->defs = malloc(l->size);
+  l->len = 0;
+}
+
+static void upb_deflist_uninit(upb_deflist *l) {
+  for(uint32_t i = 0; i < l->len; i++)
+    if(l->defs[i]) upb_def_unref(l->defs[i]);
+  free(l->defs);
+}
+
+static void upb_deflist_push(upb_deflist *l, upb_def *d) {
+  if(l->len == l->size) {
+    l->size *= 2;
+    l->defs = realloc(l->defs, l->size);
+  }
+  l->defs[l->len++] = d;
+}
+
+/* Joins strings together, for example:
+ *   join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
+ *   join("", "Baz") -> "Baz"
+ * Caller owns a ref on the returned string. */
+static upb_string *upb_join(upb_string *base, upb_string *name) {
+  upb_string *joined = upb_strdup(base);
+  upb_strlen_t len = upb_string_len(joined);
+  if(len > 0) {
+    upb_string_getrwbuf(joined, len + 1)[len] = UPB_SYMBOL_SEPARATOR;
+  }
+  upb_strcat(joined, name);
+  return joined;
+}
+
+// Qualify the defname for all defs starting with offset "start" with "str".
+static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) {
+  for(uint32_t i = start; i < l->len; i++) {
+    upb_def *def = l->defs[i];
+    upb_string *name = def->fqname;
+    def->fqname = upb_join(str, name);
+    upb_string_unref(name);
+  }
+}
+
+/* upb_def ********************************************************************/
+
+// Defs are reference counted, but can have cycles when types are
+// self-recursive or mutually recursive, so we need to be capable of collecting
+// the cycles.  In our situation defs are immutable (so cycles cannot be
+// created or destroyed post-initialization).  We need to be thread-safe but
+// want to avoid locks if at all possible and rely only on atomic operations.
+//
+// Our scheme is as follows.  First we give each def a flag indicating whether
+// it is part of a cycle or not.  Because defs are immutable, this flag will
+// never change.  For acyclic defs, we can use a naive algorithm and avoid the
+// overhead of dealing with cycles.  Most defs will be acyclic, and most cycles
+// will be very short.
+//
+// For defs that participate in cycles we keep two reference counts.  One
+// tracks references that come from outside the cycle (we call these external
+// references), and is incremented and decremented like a regular refcount.
+// The other is a cycle refcount, and works as follows.  Every cycle is
+// considered distinct, even if two cycles share members.  For example, this
+// graph has two distinct cycles:
+//
+//   A-->B-->C
+//   ^   |   |
+//   +---+---+
+//
+// The cycles in this graph are AB and ABC.  When A's external refcount
+// transitions from 0->1, we say that A takes "cycle references" on both
+// cycles.  Taking a cycle reference means incrementing the cycle refcount of
+// all defs in the cycle.  Since A and B are common to both cycles, A and B's
+// cycle refcounts will be incremented by two, and C's will be incremented by
+// one.  Likewise, when A's external refcount transitions from 1->0, we
+// decrement A and B's cycle refcounts by two and C's by one.  We collect a
+// cyclic type when its cycle refcount drops to zero.  A precondition for this
+// is that the external refcount has dropped to zero also.
+//
+// This algorithm is relatively cheap, since it only requires extra work when
+// the external refcount on a cyclic type transitions from 0->1 or 1->0.
+
+static void upb_msgdef_free(upb_msgdef *m);
+static void upb_enumdef_free(upb_enumdef *e);
+static void upb_unresolveddef_free(struct _upb_unresolveddef *u);
+
+static void upb_def_free(upb_def *def)
+{
+  switch(def->type) {
+    case UPB_DEF_MSG:
+      upb_msgdef_free(upb_downcast_msgdef(def));
+      break;
+    case UPB_DEF_ENUM:
+      upb_enumdef_free(upb_downcast_enumdef(def));
+      break;
+    case UPB_DEF_SVC:
+      assert(false);  /* Unimplemented. */
+      break;
+    case UPB_DEF_UNRESOLVED:
+      upb_unresolveddef_free(upb_downcast_unresolveddef(def));
+      break;
+    default:
+      assert(false);
+  }
+}
+
+// Depth-first search for all cycles that include cycle_base.  Returns the
+// number of paths from def that lead to cycle_base, which is equivalent to the
+// number of cycles def is in that include cycle_base.
+//
+// open_defs tracks the set of nodes that are currently being visited in the
+// search so we can stop the search if we detect a cycles that do not involve
+// cycle_base.  We can't color the nodes as we go by writing to a member of the
+// def, because another thread could be performing the search concurrently.
+static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base,
+                                  upb_msgdef **open_defs, int num_open_defs,
+                                  bool ref) {
+  bool found = false;
+  for(int i = 0; i < num_open_defs; i++) {
+    if(open_defs[i] == m) {
+      // We encountered a cycle that did not involve cycle_base.
+      found = true;
+      break;
+    }
+  }
+
+  if(found || num_open_defs == UPB_MAX_TYPE_CYCLE_LEN) {
+    return 0;
+  } else if(m == cycle_base) {
+    return 1;
+  } else {
+    int path_count = 0;
+    if(cycle_base == NULL) {
+      cycle_base = m;
+    } else {
+      open_defs[num_open_defs++] = m;
+    }
+    for(int i = 0; i < m->num_fields; i++) {
+      upb_fielddef *f = &m->fields[i];
+      upb_def *def = f->def;
+      if(upb_issubmsg(f) && def->is_cyclic) {
+        upb_msgdef *sub_m = upb_downcast_msgdef(def);
+        path_count += upb_cycle_ref_or_unref(sub_m, cycle_base, open_defs,
+                                         num_open_defs, ref);
+      }
+    }
+    if(ref) {
+      upb_atomic_add(&m->cycle_refcount, path_count);
+    } else {
+      if(upb_atomic_add(&m->cycle_refcount, -path_count))
+        upb_def_free(UPB_UPCAST(m));
+    }
+    return path_count;
+  }
+}
+
+void _upb_def_reftozero(upb_def *def) {
+  if(def->is_cyclic) {
+    upb_msgdef *m = upb_downcast_msgdef(def);
+    upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
+    upb_cycle_ref_or_unref(m, NULL, open_defs, 0, false);
+  } else {
+    upb_def_free(def);
+  }
+}
+
+void _upb_def_cyclic_ref(upb_def *def) {
+  upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
+  upb_cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true);
+}
+
+static void upb_def_init(upb_def *def, upb_def_type type) {
+  def->type = type;
+  def->is_cyclic = 0;  // We detect this later, after resolving refs.
+  def->search_depth = 0;
+  def->fqname = NULL;
+  upb_atomic_refcount_init(&def->refcount, 1);
+}
+
+static void upb_def_uninit(upb_def *def) {
+  upb_string_unref(def->fqname);
+}
+
+
+/* upb_unresolveddef **********************************************************/
+
+// Unresolved defs are used as temporary placeholders for a def whose name has
+// not been resolved yet.  During the name resolution step, all unresolved defs
+// are replaced with pointers to the actual def being referenced.
+typedef struct _upb_unresolveddef {
+  upb_def base;
+
+  // The target type name.  This may or may not be fully qualified.
+  upb_string *name;
+} upb_unresolveddef;
+
+// Is passed a ref on the string.
+static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) {
+  upb_unresolveddef *def = malloc(sizeof(*def));
+  upb_def_init(&def->base, UPB_DEF_UNRESOLVED);
+  def->name = str;
+  return def;
+}
+
+static void upb_unresolveddef_free(struct _upb_unresolveddef *def) {
+  upb_def_uninit(&def->base);
+  free(def);
+}
+
+
+/* upb_enumdef ****************************************************************/
+
+typedef struct {
+  upb_strtable_entry e;
+  uint32_t value;
+} ntoi_ent;
+
+typedef struct {
+  upb_inttable_entry e;
+  upb_string *string;
+} iton_ent;
+
+static void upb_enumdef_free(upb_enumdef *e) {
+  upb_strtable_free(&e->ntoi);
+  upb_inttable_free(&e->iton);
+  upb_def_uninit(&e->base);
+  free(e);
+}
+
+static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status)
+{
+  int32_t number = -1;
+  upb_string *name = NULL;
+  upb_fielddef *f;
+  while((f = upb_src_getdef(src)) != NULL) {
+    switch(f->number) {
+      case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM:
+        CHECKSRC(upb_src_getint32(src, &number));
+        break;
+      case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM:
+        name = upb_string_tryrecycle(name);
+        CHECKSRC(upb_src_getstr(src, name));
+        break;
+      default:
+        CHECKSRC(upb_src_skipval(src));
+        break;
+    }
+  }
+
+  if(name == NULL || number == -1) {
+    upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number.");
+    goto err;
+  }
+  ntoi_ent ntoi_ent = {{name, 0}, number};
+  iton_ent iton_ent = {{number, 0}, name};
+  upb_strtable_insert(&e->ntoi, &ntoi_ent.e);
+  upb_inttable_insert(&e->iton, &iton_ent.e);
+  // We don't unref "name" because we pass our ref to the iton entry of the
+  // table.  strtables can ref their keys, but the inttable doesn't know that
+  // the value is a string.
+  return true;
+
+src_err:
+  upb_copyerr(status, upb_src_status(src));
+err:
+  upb_string_unref(name);
+  return false;
+}
+
+static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status)
+{
+  upb_enumdef *e = malloc(sizeof(*e));
+  upb_def_init(&e->base, UPB_DEF_ENUM);
+  upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent));
+  upb_inttable_init(&e->iton, 0, sizeof(iton_ent));
+  upb_fielddef *f;
+  while((f = upb_src_getdef(src)) != NULL) {
+    switch(f->number) {
+      case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM:
+        CHECK(upb_addenum_val(src, e, status));
+        break;
+      default:
+        upb_src_skipval(src);
+        break;
+    }
+  }
+  upb_deflist_push(defs, UPB_UPCAST(e));
+  return true;
+
+err:
+  upb_enumdef_free(e);
+  return false;
+}
+
+static void fill_iter(upb_enum_iter *iter, ntoi_ent *ent) {
+  iter->state = ent;
+  iter->name = ent->e.key;
+  iter->val = ent->value;
+}
+
+void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e) {
+  // We could iterate over either table here; the choice is arbitrary.
+  ntoi_ent *ent = upb_strtable_begin(&e->ntoi);
+  iter->e = e;
+  fill_iter(iter, ent);
+}
+
+void upb_enum_next(upb_enum_iter *iter) {
+  ntoi_ent *ent = iter->state;
+  assert(ent);
+  ent = upb_strtable_next(&iter->e->ntoi, &ent->e);
+  iter->state = ent;
+  if(ent) fill_iter(iter, ent);
+}
+
+bool upb_enum_done(upb_enum_iter *iter) {
+  return iter->state == NULL;
+}
+
+
+/* upb_fielddef ***************************************************************/
+
+static void upb_fielddef_free(upb_fielddef *f) {
+  free(f);
+}
+
+static void upb_fielddef_uninit(upb_fielddef *f) {
+  upb_string_unref(f->name);
+  if(upb_hasdef(f) && f->owned) {
+    upb_def_unref(f->def);
+  }
+}
+
+static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status)
+{
+  upb_fielddef *f = malloc(sizeof(*f));
+  f->def = NULL;
+  f->owned = false;
+  upb_fielddef *parsed_f;
+  int32_t tmp;
+  while((parsed_f = upb_src_getdef(src))) {
+    switch(parsed_f->number) {
+      case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM:
+        CHECKSRC(upb_src_getint32(src, &tmp));
+        f->type = tmp;
+        break;
+      case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM:
+        CHECKSRC(upb_src_getint32(src, &tmp));
+        f->label = tmp;
+        break;
+      case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM:
+        CHECKSRC(upb_src_getint32(src, &tmp));
+        f->number = tmp;
+        break;
+      case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM:
+        f->name = upb_string_tryrecycle(f->name);
+        CHECKSRC(upb_src_getstr(src, f->name));
+        break;
+      case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: {
+        upb_string *str = upb_string_new();
+        CHECKSRC(upb_src_getstr(src, str));
+        if(f->def) upb_def_unref(f->def);
+        f->def = UPB_UPCAST(upb_unresolveddef_new(str));
+        f->owned = true;
+        break;
+      }
+    }
+  }
+  CHECKSRC(upb_src_eof(src));
+  // TODO: verify that all required fields were present.
+  assert((f->def != NULL) == upb_hasdef(f));
+
+  // Field was successfully read, add it as a field of the msgdef.
+  upb_itof_ent itof_ent = {{f->number, 0}, f};
+  upb_ntof_ent ntof_ent = {{f->name, 0}, f};
+  upb_inttable_insert(&m->itof, &itof_ent.e);
+  upb_strtable_insert(&m->ntof, &ntof_ent.e);
+  return true;
+
+src_err:
+  upb_copyerr(status, upb_src_status(src));
+  upb_fielddef_free(f);
+  return false;
+}
+
+
+/* upb_msgdef *****************************************************************/
+
+// Processes a google.protobuf.DescriptorProto, adding defs to "defs."
+static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status)
+{
+  upb_msgdef *m = malloc(sizeof(*m));
+  upb_def_init(&m->base, UPB_DEF_MSG);
+  upb_atomic_refcount_init(&m->cycle_refcount, 0);
+  upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent));
+  upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent));
+  int32_t start_count = defs->len;
+
+  upb_fielddef *f;
+  while((f = upb_src_getdef(src)) != NULL) {
+    switch(f->number) {
+      case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM:
+        m->base.fqname = upb_string_tryrecycle(m->base.fqname);
+        CHECKSRC(upb_src_getstr(src, m->base.fqname));
+        break;
+      case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM:
+        CHECKSRC(upb_src_startmsg(src));
+        CHECK(upb_addfield(src, m, status));
+        CHECKSRC(upb_src_endmsg(src));
+        break;
+      case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM:
+        CHECKSRC(upb_src_startmsg(src));
+        CHECK(upb_addmsg(src, defs, status));
+        CHECKSRC(upb_src_endmsg(src));
+        break;
+      case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM:
+        CHECKSRC(upb_src_startmsg(src));
+        CHECK(upb_addenum(src, defs, status));
+        CHECKSRC(upb_src_endmsg(src));
+        break;
+      default:
+        // TODO: extensions.
+        CHECKSRC(upb_src_skipval(src));
+    }
+  }
+  CHECK(upb_src_eof(src));
+  if(!m->base.fqname) {
+    upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name.");
+    goto err;
+  }
+  upb_deflist_qualify(defs, m->base.fqname, start_count);
+  upb_deflist_push(defs, UPB_UPCAST(m));
+  return true;
+
+src_err:
+  upb_copyerr(status, upb_src_status(src));
+err:
+  upb_msgdef_free(m);
+  return false;
+}
+
+static void upb_msgdef_free(upb_msgdef *m)
+{
+  for (upb_field_count_t i = 0; i < m->num_fields; i++)
+    upb_fielddef_uninit(&m->fields[i]);
+  free(m->fields);
+  upb_strtable_free(&m->ntof);
+  upb_inttable_free(&m->itof);
+  upb_def_uninit(&m->base);
+  free(m);
+}
+
+static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) {
+  (void)m;
+  if(f->owned) upb_def_unref(f->def);
+  f->def = def;
+  // We will later make the ref unowned if it is a part of a cycle.
+  f->owned = true;
+  upb_def_ref(def);
+}
+
+
+/* symtab internal  ***********************************************************/
+
+// Processes a google.protobuf.FileDescriptorProto, adding the defs to "defs".
+static bool upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status)
+{
+  upb_string *package = NULL;
+  int32_t start_count = defs->len;
+  upb_fielddef *f;
+  while((f = upb_src_getdef(src)) != NULL) {
+    switch(f->number) {
+      case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM:
+        package = upb_string_tryrecycle(package);
+        CHECKSRC(upb_src_getstr(src, package));
+        break;
+      case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM:
+        CHECKSRC(upb_src_startmsg(src));
+        CHECK(upb_addmsg(src, defs, status));
+        CHECKSRC(upb_src_endmsg(src));
+        break;
+      case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM:
+        CHECKSRC(upb_src_startmsg(src));
+        CHECK(upb_addenum(src, defs, status));
+        CHECKSRC(upb_src_endmsg(src));
+        break;
+      default:
+        // TODO: services and extensions.
+        CHECKSRC(upb_src_skipval(src));
+    }
+  }
+  CHECK(upb_src_eof(src));
+  upb_deflist_qualify(defs, package, start_count);
+  upb_string_unref(package);
+  return true;
+
+src_err:
+  upb_copyerr(status, upb_src_status(src));
+err:
+  upb_string_unref(package);
+  return false;
+}
+
+/* Search for a character in a string, in reverse. */
+static int my_memrchr(char *data, char c, size_t len)
+{
+  int off = len-1;
+  while(off > 0 && data[off] != c) --off;
+  return off;
+}
+
+typedef struct {
+  upb_strtable_entry e;
+  upb_def *def;
+} upb_symtab_ent;
+
+// Given a symbol and the base symbol inside which it is defined, find the
+// symbol's definition in t.
+static upb_symtab_ent *upb_resolve(upb_strtable *t,
+                                   upb_string *base, upb_string *sym)
+{
+  if(upb_string_len(base) + upb_string_len(sym) + 1 >= UPB_SYMBOL_MAXLEN ||
+     upb_string_len(sym) == 0) return NULL;
+
+  if(upb_string_getrobuf(sym)[0] == UPB_SYMBOL_SEPARATOR) {
+    // Symbols starting with '.' are absolute, so we do a single lookup.
+    // Slice to omit the leading '.'
+    upb_string *sym_str = upb_strslice(sym, 1, upb_string_len(sym) - 1);
+    upb_symtab_ent *e = upb_strtable_lookup(t, sym_str);
+    upb_string_unref(sym_str);
+    return e;
+  } else {
+    // Remove components from base until we find an entry or run out.
+    upb_string *sym_str = upb_string_new();
+    int baselen = upb_string_len(base);
+    while(1) {
+      // sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + sym
+      upb_strlen_t len = baselen + upb_string_len(sym) + 1;
+      char *buf = upb_string_getrwbuf(sym_str, len);
+      memcpy(buf, upb_string_getrobuf(base), baselen);
+      buf[baselen] = UPB_SYMBOL_SEPARATOR;
+      memcpy(buf + baselen + 1, upb_string_getrobuf(sym), upb_string_len(sym));
+
+      upb_symtab_ent *e = upb_strtable_lookup(t, sym_str);
+      if (e) return e;
+      else if(baselen == 0) return NULL;  // No more scopes to try.
+
+      baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen);
+    }
+  }
+}
+
+// Performs a pass over the type graph to find all cycles that include m.
+static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status)
+{
+  if(depth > UPB_MAX_TYPE_DEPTH) {
+    // We have found a non-cyclic path from the base of the type tree that
+    // exceeds the maximum allowed depth.  There are many situations in upb
+    // where we recurse over the type tree (like for example, right now) and an
+    // absurdly deep tree could cause us to stack overflow on systems with very
+    // limited stacks.
+    upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was found at "
+               "depth %d in the type graph, which exceeds the maximum type "
+               "depth of %d.", UPB_UPCAST(m)->fqname, depth,
+               UPB_MAX_TYPE_DEPTH);
+    return false;
+  } else if(UPB_UPCAST(m)->search_depth == 1) {
+    // Cycle!
+    int cycle_len = depth - 1;
+    if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) {
+      upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was involved "
+                 "in a cycle of length %d, which exceeds the maximum type "
+                 "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len,
+                 UPB_MAX_TYPE_CYCLE_LEN);
+    }
+    return true;
+  } else if(UPB_UPCAST(m)->search_depth > 0) {
+    // This was a cycle, but did not originate from the base of our search tree.
+    // We'll find it when we call find_cycles() on this node directly.
+    return false;
+  } else {
+    UPB_UPCAST(m)->search_depth = ++depth;
+    bool cycle_found = false;
+    for(upb_field_count_t i = 0; i < m->num_fields; i++) {
+      upb_fielddef *f = &m->fields[i];
+      if(!upb_issubmsg(f)) continue;
+      upb_def *sub_def = f->def;
+      upb_msgdef *sub_m = upb_downcast_msgdef(sub_def);
+      if(upb_symtab_findcycles(sub_m, depth, status)) {
+        cycle_found = true;
+        UPB_UPCAST(m)->is_cyclic = true;
+        if(f->owned) {
+          upb_atomic_unref(&sub_def->refcount);
+          f->owned = false;
+        }
+      }
+    }
+    UPB_UPCAST(m)->search_depth = 0;
+    return cycle_found;
+  }
+}
+
+// Given a table of pending defs "tmptab" and a table of existing defs "symtab",
+// resolves all of the unresolved refs for the defs in tmptab.
+bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab,
+                     upb_status *status)
+{
+  upb_symtab_ent *e;
+  for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) {
+    upb_msgdef *m = upb_dyncast_msgdef(e->def);
+    if(!m) continue;
+    // Type names are resolved relative to the message in which they appear.
+    upb_string *base = e->e.key;
+
+    for(upb_field_count_t i = 0; i < m->num_fields; i++) {
+      upb_fielddef *f = &m->fields[i];
+      if(!upb_hasdef(f)) continue;  // No resolving necessary.
+      upb_string *name = upb_downcast_unresolveddef(f->def)->name;
+
+      // Resolve from either the tmptab (pending adds) or symtab (existing
+      // defs).  If both exist, prefer the pending add, because it will be
+      // overwriting the existing def.
+      upb_symtab_ent *found;
+      if(!(found = upb_resolve(tmptab, base, name)) &&
+         !(found = upb_resolve(symtab, base, name))) {
+        upb_seterr(status, UPB_STATUS_ERROR,
+                   "could not resolve symbol '" UPB_STRFMT "'"
+                   " in context '" UPB_STRFMT "'",
+                   UPB_STRARG(name), UPB_STRARG(base));
+        return false;
+      }
+
+      // Check the type of the found def.
+      upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
+      if(found->def->type != expected) {
+        upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type");
+        return false;
+      }
+      upb_msgdef_resolve(m, f, found->def);
+    }
+  }
+
+  // Deal with type cycles.
+  for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) {
+    upb_msgdef *m = upb_dyncast_msgdef(e->def);
+    if(!m) continue;
+    // The findcycles() call will decrement the external refcount of the
+    if(!upb_symtab_findcycles(m, 0, status)) return false;
+    upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
+    upb_cycle_ref_or_unref(m, NULL, open_defs, 0, true);
+  }
+
+  return true;
+}
+
+// Given a list of defs, a list of extensions (in the future), and a flag
+// indicating whether the new defs can overwrite existing defs in the symtab,
+// attempts to add the given defs to the symtab.  The whole operation either
+// succeeds or fails.  Ownership of "defs" and "exts" is taken.
+bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef,
+                         upb_status *status)
+{
+  upb_rwlock_wrlock(&s->lock);
+
+  // Build a table of the defs we mean to add, for duplicate detection and name
+  // resolution.
+  upb_strtable tmptab;
+  upb_strtable_init(&tmptab, defs->len, sizeof(upb_symtab_ent));
+  for (uint32_t i = 0; i < defs->len; i++) {
+    upb_def *def = defs->defs[i];
+    upb_symtab_ent e = {{def->fqname, 0}, def};
+
+    // Redefinition is never allowed within a single FileDescriptorSet.
+    // Additionally, we only allow overwriting of an existing definition if
+    // allow_redef is set.
+    if (upb_strtable_lookup(&tmptab, def->fqname) ||
+        (!allow_redef && upb_strtable_lookup(&s->symtab, def->fqname))) {
+      upb_seterr(status, UPB_STATUS_ERROR, "Redefinition of symbol " UPB_STRFMT,
+                 UPB_STRARG(def->fqname));
+      goto err;
+    }
+
+    // Pass ownership from the deflist to the strtable.
+    upb_strtable_insert(&tmptab, &e.e);
+    defs->defs[i] = NULL;
+  }
+
+  // TODO: process the list of extensions by modifying entries from
+  // tmptab in-place (copying them from the symtab first if necessary).
+
+  CHECK(upb_resolverefs(&tmptab, &s->symtab, status));
+
+  // The defs in tmptab have been vetted, and can be added to the symtab
+  // without causing errors.  Now add all tmptab defs to the symtab,
+  // overwriting (and releasing a ref on) any existing defs with the same
+  // names.  Ownership for tmptab defs passes from the tmptab to the symtab.
+  upb_symtab_ent *tmptab_e;
+  for(tmptab_e = upb_strtable_begin(&tmptab); tmptab_e;
+      tmptab_e = upb_strtable_next(&tmptab, &tmptab_e->e)) {
+    upb_symtab_ent *symtab_e =
+        upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname);
+    if(symtab_e) {
+      upb_def_unref(symtab_e->def);
+      symtab_e->def = tmptab_e->def;
+    } else {
+      upb_strtable_insert(&s->symtab, &tmptab_e->e);
+    }
+  }
+
+  upb_rwlock_unlock(&s->lock);
+  upb_strtable_free(&tmptab);
+  return true;
+
+err:
+  // We need to free all defs from "tmptab."
+  upb_rwlock_unlock(&s->lock);
+  for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e;
+      e = upb_strtable_next(&tmptab, &e->e))
+    upb_def_unref(e->def);
+  upb_strtable_free(&tmptab);
+  return false;
+}
+
+
+/* upb_symtab *****************************************************************/
+
+upb_symtab *upb_symtab_new()
+{
+  upb_symtab *s = malloc(sizeof(*s));
+  upb_atomic_refcount_init(&s->refcount, 1);
+  upb_rwlock_init(&s->lock);
+  upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent));
+  return s;
+}
+
+static void upb_free_symtab(upb_strtable *t)
+{
+  upb_symtab_ent *e;
+  for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e))
+    upb_def_unref(e->def);
+  upb_strtable_free(t);
+}
+
+void _upb_symtab_free(upb_symtab *s)
+{
+  upb_free_symtab(&s->symtab);
+  upb_free_symtab(&s->psymtab);
+  upb_rwlock_destroy(&s->lock);
+  free(s);
+}
+
+upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type)
+{
+  upb_rwlock_rdlock(&s->lock);
+  int total = upb_strtable_count(&s->symtab);
+  // We may only use part of this, depending on how many symbols are of the
+  // correct type.
+  upb_def **defs = malloc(sizeof(*defs) * total);
+  upb_symtab_ent *e = upb_strtable_begin(&s->symtab);
+  int i = 0;
+  for(; e; e = upb_strtable_next(&s->symtab, &e->e)) {
+    upb_def *def = e->def;
+    assert(def);
+    if(type == UPB_DEF_ANY || def->type == type)
+      defs[i++] = def;
+  }
+  upb_rwlock_unlock(&s->lock);
+  *count = i;
+  for(i = 0; i < *count; i++)
+    upb_def_ref(defs[i]);
+  return defs;
+}
+
+upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym)
+{
+  upb_rwlock_rdlock(&s->lock);
+  upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym);
+  upb_def *ret = NULL;
+  if(e) {
+    ret = e->def;
+    upb_def_ref(ret);
+  }
+  upb_rwlock_unlock(&s->lock);
+  return ret;
+}
+
+
+upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) {
+  upb_rwlock_rdlock(&s->lock);
+  upb_symtab_ent *e = upb_resolve(&s->symtab, base, symbol);
+  upb_def *ret = NULL;
+  if(e) {
+    ret = e->def;
+    upb_def_ref(ret);
+  }
+  upb_rwlock_unlock(&s->lock);
+  return ret;
+}
+
+void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status)
+{
+  upb_deflist defs;
+  upb_deflist_init(&defs);
+  upb_fielddef *f;
+  while((f = upb_src_getdef(src)) != NULL) {
+    switch(f->number) {
+      case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM:
+        CHECKSRC(upb_src_startmsg(src));
+        CHECK(upb_addfd(src, &defs, status));
+        CHECKSRC(upb_src_endmsg(src));
+        break;
+      default:
+        CHECKSRC(upb_src_skipval(src));
+    }
+  }
+  CHECKSRC(upb_src_eof(src));
+  CHECK(upb_symtab_add_defs(s, &defs, false, status));
+  upb_deflist_uninit(&defs);
+  return;
+
+src_err:
+  upb_copyerr(status, upb_src_status(src));
+err:
+  upb_deflist_uninit(&defs);
+}
+
+
+/* upb_baredecoder ************************************************************/
+
+// upb_baredecoder is a upb_src that can parse a subset of the protocol buffer
+// binary format.  It is only used for bootstrapping.  It can parse without
+// having a upb_msgdef, which is why it is useful for bootstrapping the first
+// msgdef.  On the downside, it does not support:
+//
+// * having its input span multiple upb_strings.
+// * reading any field of the returned upb_fielddef's except f->number.
+// * keeping a pointer to the upb_fielddef* and reading it later (the same
+//   upb_fielddef is reused over and over).
+// * detecting errors in the input (we trust that our input is known-good).
+//
+// It also does not support any of the follow protobuf features:
+// * packed fields.
+// * groups.
+// * zig-zag-encoded types like sint32 and sint64.
+//
+// If descriptor.proto ever changed to use any of these features, this decoder
+// would need to be extended to support them.
+
+typedef struct {
+  upb_src src;
+  upb_string *input;
+  upb_strlen_t offset;
+  upb_fielddef field;
+  upb_wire_type_t wire_type;
+  upb_strlen_t delimited_len;
+  upb_strlen_t stack[UPB_MAX_NESTING], *top;
+  upb_string *str;
+} upb_baredecoder;
+
+static uint64_t upb_baredecoder_readv64(upb_baredecoder *d)
+{
+  const uint8_t *start = (uint8_t*)upb_string_getrobuf(d->input) + d->offset;
+  const uint8_t *buf = start;
+  uint8_t last = 0x80;
+  uint64_t val = 0;
+  for(int bitpos = 0; (last & 0x80); buf++, bitpos += 7)
+    val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos;
+  d->offset += buf - start;
+  return val;
+}
+
+static uint32_t upb_baredecoder_readv32(upb_baredecoder *d)
+{
+  return (uint32_t)upb_baredecoder_readv64(d); // Truncate.
+}
+
+static uint64_t upb_baredecoder_readf64(upb_baredecoder *d)
+{
+  uint64_t val;
+  memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 8);
+  d->offset += 8;
+  return val;
+}
+
+static uint32_t upb_baredecoder_readf32(upb_baredecoder *d)
+{
+  uint32_t val;
+  memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 4);
+  d->offset += 4;
+  return val;
+}
+
+static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d)
+{
+  // Detect end-of-submessage.
+  if(d->offset >= *d->top) {
+    d->src.eof = true;
+    return NULL;
+  }
+
+  uint32_t key;
+  key = upb_baredecoder_readv32(d);
+  d->wire_type = key & 0x7;
+  d->field.number = key >> 3;
+  if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) {
+    // For delimited wire values we parse the length now, since we need it in
+    // all cases.
+    d->delimited_len = upb_baredecoder_readv32(d);
+  }
+  return &d->field;
+}
+
+static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val)
+{
+  if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) {
+    d->str = upb_string_tryrecycle(d->str);
+    upb_string_substr(d->str, d->input, d->offset, d->delimited_len);
+  } else {
+    switch(d->wire_type) {
+      case UPB_WIRE_TYPE_VARINT:
+        *val.uint64 = upb_baredecoder_readv64(d);
+        break;
+      case UPB_WIRE_TYPE_32BIT_VARINT:
+        *val.uint32 = upb_baredecoder_readv32(d);
+        break;
+      case UPB_WIRE_TYPE_64BIT:
+        *val.uint64 = upb_baredecoder_readf64(d);
+        break;
+      case UPB_WIRE_TYPE_32BIT:
+        *val.uint32 = upb_baredecoder_readf32(d);
+        break;
+      default:
+        assert(false);
+    }
+  }
+  return true;
+}
+
+static bool upb_baredecoder_skipval(upb_baredecoder *d)
+{
+  upb_value val;
+  return upb_baredecoder_getval(d, upb_value_addrof(&val));
+}
+
+static bool upb_baredecoder_startmsg(upb_baredecoder *d)
+{
+  *(d->top++) = d->offset + d->delimited_len;
+  return true;
+}
+
+static bool upb_baredecoder_endmsg(upb_baredecoder *d)
+{
+  d->offset = *(--d->top);
+  return true;
+}
+
+static upb_src_vtable upb_baredecoder_src_vtbl = {
+  (upb_src_getdef_fptr)&upb_baredecoder_getdef,
+  (upb_src_getval_fptr)&upb_baredecoder_getval,
+  (upb_src_skipval_fptr)&upb_baredecoder_skipval,
+  (upb_src_startmsg_fptr)&upb_baredecoder_startmsg,
+  (upb_src_endmsg_fptr)&upb_baredecoder_endmsg,
+};
+
+static upb_baredecoder *upb_baredecoder_new(upb_string *str)
+{
+  upb_baredecoder *d = malloc(sizeof(*d));
+  d->input = upb_string_getref(str);
+  d->str = upb_string_new();
+  d->top = &d->stack[0];
+  upb_src_init(&d->src, &upb_baredecoder_src_vtbl);
+  return d;
+}
+
+static void upb_baredecoder_free(upb_baredecoder *d)
+{
+  upb_string_unref(d->input);
+  upb_string_unref(d->str);
+  free(d);
+}
+
+static upb_src *upb_baredecoder_src(upb_baredecoder *d)
+{
+  return &d->src;
+}
+
+upb_symtab *upb_get_descriptor_symtab()
+{
+  // TODO: implement sharing of symtabs, so that successive calls to this
+  // function will return the same symtab.
+  upb_symtab *symtab = upb_symtab_new();
+  // TODO: allow upb_strings to be static or on the stack.
+  upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len);
+  upb_baredecoder *decoder = upb_baredecoder_new(descriptor);
+  upb_status status;
+  upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status);
+  assert(upb_ok(&status));
+  upb_baredecoder_free(decoder);
+  upb_string_unref(descriptor);
+  return symtab;
+}
diff --git a/core/upb_def.h b/core/upb_def.h
new file mode 100644
index 0000000..c297e83
--- /dev/null
+++ b/core/upb_def.h
@@ -0,0 +1,302 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ *
+ * Provides definitions of .proto constructs:
+ * - upb_msgdef: describes a "message" construct.
+ * - upb_fielddef: describes a message field.
+ * - upb_enumdef: describes an enum.
+ * (TODO: definitions of extensions and services).
+ *
+ * Defs are obtained from a upb_symtab object.  A upb_symtab is empty when
+ * constructed, and definitions can be added by supplying serialized
+ * descriptors.
+ *
+ * Defs are immutable and reference-counted.  Symbol tables reference any defs
+ * that are the "current" definitions.  If an extension is loaded that adds a
+ * field to an existing message, a new msgdef is constructed that includes the
+ * new field and the old msgdef is unref'd.  The old msgdef will still be ref'd
+ * by messages (if any) that were constructed with that msgdef.
+ *
+ * This file contains routines for creating and manipulating the definitions
+ * themselves.  To create and manipulate actual messages, see upb_msg.h.
+ */
+
+#ifndef UPB_DEF_H_
+#define UPB_DEF_H_
+
+#include "upb_atomic.h"
+#include "upb_stream.h"
+#include "upb_table.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* upb_def: base class for defs  **********************************************/
+
+// All the different kind of defs we support.  These correspond 1:1 with
+// declarations in a .proto file.
+typedef enum {
+  UPB_DEF_MSG = 0,
+  UPB_DEF_ENUM,
+  UPB_DEF_SVC,
+  UPB_DEF_EXT,
+  // Internal-only, placeholder for a def that hasn't be resolved yet.
+  UPB_DEF_UNRESOLVED,
+
+  // For specifying that defs of any type are requsted from getdefs.
+  UPB_DEF_ANY = -1
+} upb_def_type;
+
+// This typedef is more space-efficient than declaring an enum var directly.
+typedef int8_t upb_def_type_t;
+
+typedef struct {
+  upb_string *fqname;  // Fully qualified.
+  upb_atomic_refcount_t refcount;
+  upb_def_type_t type;
+
+  // The is_cyclic flag could go in upb_msgdef instead of here, because only
+  // messages can be involved in cycles.  However, putting them here is free
+  // from a space perspective because structure alignment will otherwise leave
+  // three bytes empty after type.  It is also makes ref and unref more
+  // efficient, because we don't have to downcast to msgdef before checking the
+  // is_cyclic flag.
+  bool is_cyclic;
+  uint16_t search_depth;  // Used during initialization dfs.
+} upb_def;
+
+// These must not be called directly!
+void _upb_def_cyclic_ref(upb_def *def);
+void _upb_def_reftozero(upb_def *def);
+
+// Call to ref/deref a def.
+INLINE void upb_def_ref(upb_def *def) {
+  if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def);
+}
+INLINE void upb_def_unref(upb_def *def) {
+  if(upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def);
+}
+
+/* upb_fielddef ***************************************************************/
+
+// A upb_fielddef describes a single field in a message.  It isn't a full def
+// in the sense that it derives from upb_def.  It cannot stand on its own; it
+// is either a field of a upb_msgdef or contained inside a upb_extensiondef.
+// It is also reference-counted.
+typedef struct _upb_fielddef {
+  upb_atomic_refcount_t refcount;
+  upb_string *name;
+  upb_field_number_t number;
+  upb_field_type_t type;
+  upb_label_t label;
+  upb_value default_value;
+
+  // For the case of an enum or a submessage, points to the def for that type.
+  upb_def *def;
+
+  // True if we own a ref on "def" (above).  This is true unless this edge is
+  // part of a cycle.
+  bool owned;
+
+  // These are set only when this fielddef is part of a msgdef.
+  uint32_t byte_offset;           // Where in a upb_msg to find the data.
+  upb_field_count_t field_index;  // Indicates set bit.
+} upb_fielddef;
+
+// A variety of tests about the type of a field.
+INLINE bool upb_issubmsg(upb_fielddef *f) {
+  return upb_issubmsgtype(f->type);
+}
+INLINE bool upb_isstring(upb_fielddef *f) {
+  return upb_isstringtype(f->type);
+}
+INLINE bool upb_isarray(upb_fielddef *f) {
+  return f->label == UPB_LABEL(REPEATED);
+}
+// Does the type of this field imply that it should contain an associated def?
+INLINE bool upb_hasdef(upb_fielddef *f) {
+  return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM);
+}
+
+INLINE bool upb_field_ismm(upb_fielddef *f) {
+  return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f);
+}
+
+INLINE bool upb_elem_ismm(upb_fielddef *f) {
+  return upb_isstring(f) || upb_issubmsg(f);
+}
+
+/* upb_msgdef *****************************************************************/
+
+// Structure that describes a single .proto message type.
+typedef struct _upb_msgdef {
+  upb_def base;
+  upb_atomic_refcount_t cycle_refcount;
+  size_t size;
+  upb_field_count_t num_fields;
+  uint32_t set_flags_bytes;
+  uint32_t num_required_fields;  // Required fields have the lowest set bytemasks.
+  upb_fielddef *fields;   // We have exclusive ownership of these.
+
+  // Tables for looking up fields by number and name.
+  upb_inttable itof;  // int to field
+  upb_strtable ntof;  // name to field
+} upb_msgdef;
+
+// Hash table entries for looking up fields by name or number.
+typedef struct {
+  upb_inttable_entry e;
+  upb_fielddef *f;
+} upb_itof_ent;
+typedef struct {
+  upb_strtable_entry e;
+  upb_fielddef *f;
+} upb_ntof_ent;
+
+// Looks up a field by name or number.  While these are written to be as fast
+// as possible, it will still be faster to cache the results of this lookup if
+// possible.  These return NULL if no such field is found.
+INLINE upb_fielddef *upb_msg_itof(upb_msgdef *m, uint32_t num) {
+  upb_itof_ent *e =
+      (upb_itof_ent*)upb_inttable_fastlookup(&m->itof, num, sizeof(*e));
+  return e ? e->f : NULL;
+}
+
+INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_string *name) {
+  upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
+  return e ? e->f : NULL;
+}
+
+/* upb_enumdef ****************************************************************/
+
+typedef struct _upb_enumdef {
+  upb_def base;
+  upb_strtable ntoi;
+  upb_inttable iton;
+} upb_enumdef;
+
+typedef int32_t upb_enumval_t;
+
+// Lookups from name to integer and vice-versa.
+bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num);
+upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num);
+
+// Iteration over name/value pairs.  The order is undefined.
+//   upb_enum_iter i;
+//   for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
+//     // ...
+//   }
+typedef struct {
+  upb_enumdef *e;
+  void *state;   // Internal iteration state.
+  upb_string *name;
+  upb_enumval_t val;
+} upb_enum_iter;
+void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e);
+void upb_enum_next(upb_enum_iter *iter);
+bool upb_enum_done(upb_enum_iter *iter);
+
+/* upb_symtab *****************************************************************/
+
+// A SymbolTable is where upb_defs live.  It is empty when first constructed.
+// Clients add definitions to the symtab by supplying unserialized or
+// serialized descriptors (as defined in descriptor.proto).
+typedef struct {
+  upb_atomic_refcount_t refcount;
+  upb_rwlock_t lock;       // Protects all members except the refcount.
+  upb_msgdef *fds_msgdef;  // In psymtab, ptr here for convenience.
+
+  // Our symbol tables; we own refs to the defs therein.
+  upb_strtable symtab;     // The main symbol table.
+  upb_strtable psymtab;    // Private symbols, for internal use.
+} upb_symtab;
+
+// Initializes a upb_symtab.  Contexts are not freed explicitly, but unref'd
+// when the caller is done with them.
+upb_symtab *upb_symtab_new(void);
+void _upb_symtab_free(upb_symtab *s);  // Must not be called directly!
+
+INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); }
+INLINE void upb_symtab_unref(upb_symtab *s) {
+  if(upb_atomic_unref(&s->refcount)) _upb_symtab_free(s);
+}
+
+// Resolves the given symbol using the rules described in descriptor.proto,
+// namely:
+//
+//    If the name starts with a '.', it is fully-qualified.  Otherwise, C++-like
+//    scoping rules are used to find the type (i.e. first the nested types
+//    within this message are searched, then within the parent, on up to the
+//    root namespace).
+//
+// If a def is found, the caller owns one ref on the returned def.  Otherwise
+// returns NULL.
+upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym);
+
+// Find an entry in the symbol table with this exact name.  If a def is found,
+// the caller owns one ref on the returned def.  Otherwise returns NULL.
+upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym);
+
+// Gets an array of pointers to all currently active defs in this symtab.  The
+// caller owns the returned array (which is of length *count) as well as a ref
+// to each symbol inside.  If type is UPB_DEF_ANY then defs of all types are
+// returned, otherwise only defs of the required type are returned.
+upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type);
+
+// "fds" is a upb_src that will yield data from the
+// google.protobuf.FileDescriptorSet message type.  upb_symtab_addfds() adds
+// all the definitions from the given FileDescriptorSet and adds them to the
+// symtab.  status indicates whether the operation was successful or not, and
+// the error message (if any).
+//
+// TODO: should this allow redefinition?  Either is possible, but which is
+// more useful?  Maybe it should be an option.
+void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status);
+
+// Returns a symtab that defines google.protobuf.DescriptorProto and all other
+// types that are defined in descriptor.proto.  This allows you to load other
+// proto types.  The caller owns a ref on the returned symtab.
+upb_symtab *upb_get_descriptor_symtab();
+
+
+/* upb_def casts **************************************************************/
+
+// Dynamic casts, for determining if a def is of a particular type at runtime.
+#define UPB_DYNAMIC_CAST_DEF(lower, upper) \
+  struct _upb_ ## lower;  /* Forward-declare. */ \
+  INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \
+    if(def->type != UPB_DEF_ ## upper) return NULL; \
+    return (struct _upb_ ## lower*)def; \
+  }
+UPB_DYNAMIC_CAST_DEF(msgdef, MSG);
+UPB_DYNAMIC_CAST_DEF(enumdef, ENUM);
+UPB_DYNAMIC_CAST_DEF(svcdef, SVC);
+UPB_DYNAMIC_CAST_DEF(extdef, EXT);
+UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED);
+#undef UPB_DYNAMIC_CAST_DEF
+
+// Downcasts, for when some wants to assert that a def is of a particular type.
+// These are only checked if we are building debug.
+#define UPB_DOWNCAST_DEF(lower, upper) \
+  struct _upb_ ## lower;  /* Forward-declare. */ \
+  INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \
+    assert(def->type == UPB_DEF_ ## upper); \
+    return (struct _upb_ ## lower*)def; \
+  }
+UPB_DOWNCAST_DEF(msgdef, MSG);
+UPB_DOWNCAST_DEF(enumdef, ENUM);
+UPB_DOWNCAST_DEF(svcdef, SVC);
+UPB_DOWNCAST_DEF(extdef, EXT);
+UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
+#undef UPB_DOWNCAST_DEF
+
+#define UPB_UPCAST(ptr) (&(ptr)->base)
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_DEF_H_ */
diff --git a/core/upb_stream.h b/core/upb_stream.h
new file mode 100644
index 0000000..e7b4074
--- /dev/null
+++ b/core/upb_stream.h
@@ -0,0 +1,121 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * This file defines four general-purpose streaming interfaces for protobuf
+ * data or bytes:
+ *
+ * - upb_src: pull interface for protobuf data.
+ * - upb_sink: push interface for protobuf data.
+ * - upb_bytesrc: pull interface for bytes.
+ * - upb_bytesink: push interface for bytes.
+ *
+ * These interfaces are used as general-purpose glue in upb.  For example, the
+ * decoder interface works by implementing a upb_src and calling a upb_bytesrc.
+ *
+ * Copyright (c) 2010 Joshua Haberman.  See LICENSE for details.
+ *
+ */
+
+#ifndef UPB_SRCSINK_H
+#define UPB_SRCSINK_H
+
+#include "upb_stream_vtbl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Forward-declare.  We can't include upb_def.h; it would be circular.
+struct _upb_fielddef;
+
+// Note!  The "eof" flags work like feof() in C; they cannot report end-of-file
+// until a read has failed due to eof.  They cannot preemptively tell you that
+// the next call will fail due to eof.  Since these are the semantics that C
+// and UNIX provide, we're stuck with them if we want to support eg. stdio.
+
+/* upb_src ********************************************************************/
+
+// TODO: decide how to handle unknown fields.
+
+// Retrieves the fielddef for the next field in the stream.  Returns NULL on
+// error or end-of-stream.
+struct _upb_fielddef *upb_src_getdef(upb_src *src);
+
+// Retrieves and stores the next value in "val".  For string types "val" must
+// be a newly-recycled string.  Returns false on error.
+bool upb_src_getval(upb_src *src, upb_valueptr val);
+bool upb_src_getstr(upb_src *src, upb_string *val);
+
+// Like upb_src_getval() but skips the value.
+bool upb_src_skipval(upb_src *src);
+
+// Descends into a submessage.  May only be called after a def has been
+// returned that indicates a submessage.
+bool upb_src_startmsg(upb_src *src);
+
+// Stops reading a submessage.  May be called before the stream is EOF, in
+// which case the rest of the submessage is skipped.
+bool upb_src_endmsg(upb_src *src);
+
+// Returns the current error/eof status for the stream.
+INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; }
+INLINE bool upb_src_eof(upb_src *src) { return src->eof; }
+
+// The following functions are equivalent to upb_src_getval(), but take
+// pointers to specific types.  In debug mode this may check that the type
+// is compatible with the type being read.  This check will *not* be performed
+// in non-debug mode, and if you get the type wrong the behavior is undefined.
+bool upb_src_getbool(upb_src *src, bool *val);
+bool upb_src_getint32(upb_src *src, int32_t *val);
+bool upb_src_getint64(upb_src *src, int64_t *val);
+bool upb_src_getuint32(upb_src *src, uint32_t *val);
+bool upb_src_getuint64(upb_src *src, uint64_t *val);
+bool upb_src_getfloat(upb_src *src, float *val);
+bool upb_src_getdouble(upb_src *src, double *val);
+
+/* upb_sink *******************************************************************/
+
+// Puts the given fielddef into the stream.
+bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def);
+
+// Puts the given value into the stream.
+bool upb_sink_putval(upb_sink *sink, upb_value val);
+
+// Starts a submessage.  (needed?  the def tells us we're starting a submsg.)
+bool upb_sink_startmsg(upb_sink *sink);
+
+// Ends a submessage.
+bool upb_sink_endmsg(upb_sink *sink);
+
+// Returns the current error status for the stream.
+upb_status *upb_sink_status(upb_sink *sink);
+
+/* upb_bytesrc ****************************************************************/
+
+// Returns the next string in the stream.  false is returned on error or eof.
+// The string must be at least "minlen" bytes long unless the stream is eof.
+bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen);
+
+// Appends the next "len" bytes in the stream in-place to "str".  This should
+// be used when the caller needs to build a contiguous string of the existing
+// data in "str" with more data.
+bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len);
+
+// Returns the current error status for the stream.
+INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; }
+INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; }
+
+/* upb_bytesink ***************************************************************/
+
+// Puts the given string.  Returns the number of bytes that were actually,
+// consumed, which may be fewer than were in the string, or <0 on error.
+int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str);
+
+// Returns the current error status for the stream.
+upb_status *upb_bytesink_status(upb_bytesink *sink);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif
diff --git a/core/upb_stream_vtbl.h b/core/upb_stream_vtbl.h
new file mode 100644
index 0000000..0ec45d2
--- /dev/null
+++ b/core/upb_stream_vtbl.h
@@ -0,0 +1,93 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * vtable declarations for types that are implementing any of the src or sink
+ * interfaces.  Only components that are implementing these interfaces need
+ * to worry about this file.
+ *
+ * Copyright (c) 2010 Joshua Haberman.  See LICENSE for details.
+ */
+
+#ifndef UPB_SRCSINK_VTBL_H_
+#define UPB_SRCSINK_VTBL_H_
+
+#include "upb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct upb_src;
+typedef struct upb_src upb_src;
+struct upb_sink;
+typedef struct upb_sink upb_sink;
+struct upb_bytesrc;
+typedef struct upb_bytesrc upb_bytesrc;
+struct upb_bytesink;
+typedef struct upb_bytesink upb_bytesink;
+
+// Typedefs for function pointers to all of the virtual functions.
+typedef struct _upb_fielddef (*upb_src_getdef_fptr)(upb_src *src);
+typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val);
+typedef bool (*upb_src_skipval_fptr)(upb_src *src);
+typedef bool (*upb_src_startmsg_fptr)(upb_src *src);
+typedef bool (*upb_src_endmsg_fptr)(upb_src *src);
+
+typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def);
+typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val);
+typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink);
+typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink);
+
+typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src);
+typedef void (*upb_bytesrc_recycle_fptr)(upb_bytesrc *src, upb_string *str);
+typedef bool (*upb_bytesrc_append_fptr)(
+    upb_bytesrc *src, upb_string *str, upb_strlen_t len);
+
+typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str);
+
+// Vtables for the above interfaces.
+typedef struct {
+  upb_src_getdef_fptr   getdef;
+  upb_src_getval_fptr   getval;
+  upb_src_skipval_fptr  skipval;
+  upb_src_startmsg_fptr startmsg;
+  upb_src_endmsg_fptr   endmsg;
+} upb_src_vtable;
+
+typedef struct {
+  upb_bytesrc_get_fptr     get;
+  upb_bytesrc_append_fptr  append;
+  upb_bytesrc_recycle_fptr recycle;
+} upb_bytesrc_vtable;
+
+// "Base Class" definitions; components that implement these interfaces should
+// contain one of these structures.
+
+struct upb_src {
+  upb_src_vtable *vtbl;
+  upb_status status;
+  bool eof;
+#ifndef NDEBUG
+  int state;  // For debug-mode checking of API usage.
+#endif
+};
+
+struct upb_bytesrc {
+  upb_bytesrc_vtable *vtbl;
+  upb_status status;
+  bool eof;
+};
+
+INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) {
+  s->vtbl = vtbl;
+  s->eof = false;
+#ifndef DEBUG
+  // TODO: initialize debug-mode checking.
+#endif
+}
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif
diff --git a/core/upb_string.c b/core/upb_string.c
new file mode 100644
index 0000000..91ab9ae
--- /dev/null
+++ b/core/upb_string.c
@@ -0,0 +1,47 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2010 Joshua Haberman.  See LICENSE for details.
+ */
+
+#include "upb_string.h"
+
+#include <stdlib.h>
+
+#define UPB_STRING_UNFINALIZED -1
+
+static uint32_t upb_round_up_pow2(uint32_t v) {
+  // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+  v--;
+  v |= v >> 1;
+  v |= v >> 2;
+  v |= v >> 4;
+  v |= v >> 8;
+  v |= v >> 16;
+  v++;
+  return v;
+}
+
+upb_string *upb_string_new() {
+  upb_string *str = malloc(sizeof(*str));
+  str->ptr = NULL;
+  str->size = 0;
+  str->len = UPB_STRING_UNFINALIZED;
+  upb_atomic_refcount_init(&str->refcount, 1);
+  return str;
+}
+
+void _upb_string_free(upb_string *str) {
+  if(str->ptr) free(str->ptr);
+  free(str);
+}
+
+char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) {
+  assert(str->len == UPB_STRING_UNFINALIZED);
+  if (str->size < len) {
+    str->size = upb_round_up_pow2(len);
+    str->ptr = realloc(str->ptr, str->size);
+  }
+  str->len = len;
+  return str->ptr;
+}
diff --git a/core/upb_string.h b/core/upb_string.h
new file mode 100644
index 0000000..770dba7
--- /dev/null
+++ b/core/upb_string.h
@@ -0,0 +1,194 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2010 Joshua Haberman.  See LICENSE for details.
+ *
+ * This file defines a simple string type.  The overriding goal of upb_string
+ * is to avoid memcpy(), malloc(), and free() wheverever possible, while
+ * keeping both CPU and memory overhead low.  Throughout upb there are
+ * situations where one wants to reference all or part of another string
+ * without copying.  upb_string provides APIs for doing this.
+ *
+ * Characteristics of upb_string:
+ * - strings are reference-counted.
+ * - strings are logically immutable.
+ * - if a string has no other referents, it can be "recycled" into a new string
+ *   without having to reallocate the upb_string.
+ * - strings can be substrings of other strings (owning a ref on the source
+ *   string).
+ * - strings can refer to memory that they do not own, in which case we avoid
+ *   copies if possible (the exact strategy for doing this can vary).
+ * - strings are not thread-safe by default, but can be made so by calling a
+ *   function.  This is not the default because it causes extra CPU overhead.
+ */
+
+#ifndef UPB_STRING_H
+#define UPB_STRING_H
+
+#include <assert.h>
+#include <string.h>
+#include "upb_atomic.h"
+#include "upb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// All members of this struct are private, and may only be read/written through
+// the associated functions.  Also, strings may *only* be allocated on the heap.
+struct _upb_string {
+  char *ptr;
+  int32_t len;
+  uint32_t size;
+  upb_atomic_refcount_t refcount;
+  union {
+    // Used if this is a slice of another string.
+    struct _upb_string *src;
+    // Used if this string is referencing external unowned memory.
+    upb_atomic_refcount_t reader_count;
+  } extra;
+};
+
+// Returns a newly-created, empty, non-finalized string.  When the string is no
+// longer needed, it should be unref'd, never freed directly.
+upb_string *upb_string_new();
+
+void _upb_string_free(upb_string *str);
+
+// Releases a ref on the given string, which may free the memory.  "str"
+// can be NULL, in which case this is a no-op.
+INLINE void upb_string_unref(upb_string *str) {
+  if (str && upb_atomic_unref(&str->refcount)) _upb_string_free(str);
+}
+
+// Returns a string with the same contents as "str".  The caller owns a ref on
+// the returned string, which may or may not be the same object as "str.
+INLINE upb_string *upb_string_getref(upb_string *str) {
+  // If/when we support stack-allocated strings, this will have to allocate
+  // a new string if the given string is on the stack.
+  upb_atomic_ref(&str->refcount);
+  return str;
+}
+
+// Returns the length of the string.
+INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; }
+
+// Use to read the bytes of the string.  The caller *must* call
+// upb_string_endread() after the data has been read.  The window between
+// upb_string_getrobuf() and upb_string_endread() should be kept as short as
+// possible, because any pending upb_string_detach() may be blocked until
+// upb_string_endread is called().  No other functions may be called on the
+// string during this window except upb_string_len().
+INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; }
+INLINE void upb_string_endread(upb_string *str) { (void)str; }
+
+// Attempts to recycle the string "str" so it may be reused and have different
+// data written to it.  The returned string is either "str" if it could be
+// recycled or a newly created string if "str" has other references.
+//
+// As a special case, passing NULL will allocate a new string.  This is
+// convenient for the pattern:
+//
+//   upb_string *str = NULL;
+//   while (x) {
+//     if (y) {
+//       str = upb_string_tryrecycle(str);
+//       upb_src_getstr(str);
+//     }
+//   }
+upb_string *upb_string_tryrecycle(upb_string *str);
+
+// The three options for setting the contents of a string.  These may only be
+// called when a string is first created or recycled; once other functions have
+// been called on the string, these functions are not allowed until the string
+// is recycled.
+
+// Gets a pointer suitable for writing to the string, which is guaranteed to
+// have at least "len" bytes of data available.  The size of the string will
+// become "len".
+char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len);
+
+// Sets the contents of "str" to be the given substring of "target_str", to
+// which the caller must own a ref.
+void upb_string_substr(upb_string *str, upb_string *target_str,
+                       upb_strlen_t start, upb_strlen_t len);
+
+// Makes the string "str" a reference to the given string data.  The caller
+// guarantees that the given string data will not change or be deleted until
+// a matching call to upb_string_detach().
+void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len);
+void upb_string_detach(upb_string *str);
+
+// Allows using upb_strings in printf, ie:
+//   upb_strptr str = UPB_STRLIT("Hello, World!\n");
+//   printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */
+#define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str)
+#define UPB_STRFMT "%.*s"
+
+/* upb_string library functions ***********************************************/
+
+// Named like their <string.h> counterparts, these are all safe against buffer
+// overflow.  These only use the public upb_string interface.
+
+// More efficient than upb_strcmp if all you need is to test equality.
+INLINE bool upb_streql(upb_string *s1, upb_string *s2) {
+  upb_strlen_t len = upb_string_len(s1);
+  if(len != upb_string_len(s2)) {
+    return false;
+  } else {
+    bool ret =
+        memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0;
+    upb_string_endread(s1);
+    upb_string_endread(s2);
+    return ret;
+  }
+}
+
+// Like strcmp().
+int upb_strcmp(upb_string *s1, upb_string *s2);
+
+// Like upb_strcpy, but copies from a buffer and length.
+INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) {
+  memcpy(upb_string_getrwbuf(dest, len), src, len);
+}
+
+// Replaces the contents of "dest" with the contents of "src".
+INLINE void upb_strcpy(upb_string *dest, upb_string *src) {
+  upb_strcpylen(dest, upb_string_getrobuf(src), upb_string_len(src));
+  upb_string_endread(src);
+}
+
+// Like upb_strcpy, but copies from a NULL-terminated string.
+INLINE void upb_strcpyc(upb_string *dest, const char *src) {
+  // This does two passes over src, but that is necessary unless we want to
+  // repeatedly re-allocate dst, which seems worse.
+  upb_strcpylen(dest, src, strlen(src));
+}
+
+// Returns a new string whose contents are a copy of s.
+upb_string *upb_strdup(upb_string *s);
+
+// Like upb_strdup(), but duplicates a given buffer and length.
+INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) {
+  upb_string *s = upb_string_new();
+  upb_strcpylen(s, src, len);
+  return s;
+}
+
+// Like upb_strdup(), but duplicates a C NULL-terminated string.
+upb_string *upb_strdupc(const char *src);
+
+// Appends 'append' to 's' in-place, resizing s if necessary.
+void upb_strcat(upb_string *s, upb_string *append);
+
+// Returns a new string that is a substring of the given string.
+upb_string *upb_strslice(upb_string *s, int offset, int len);
+
+// Reads an entire file into a newly-allocated string.
+upb_string *upb_strreadfile(const char *filename);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif
diff --git a/core/upb_table.c b/core/upb_table.c
new file mode 100644
index 0000000..b91776c
--- /dev/null
+++ b/core/upb_table.c
@@ -0,0 +1,411 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ */
+
+#include "upb_table.h"
+#include "upb_string.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+static const upb_inttable_key_t EMPTYENT = 0;
+static const double MAX_LOAD = 0.85;
+
+static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
+
+/* We use 1-based indexes into the table so that 0 can be "NULL". */
+static upb_inttable_entry *intent(upb_inttable *t, int32_t i) {
+  return UPB_INDEX(t->t.entries, i-1, t->t.entry_size);
+}
+static upb_strtable_entry *strent(upb_strtable *t, int32_t i) {
+  return UPB_INDEX(t->t.entries, i-1, t->t.entry_size);
+}
+
+void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size)
+{
+  t->count = 0;
+  t->entry_size = entry_size;
+  t->size_lg2 = 1;
+  while(size >>= 1) t->size_lg2++;
+  size_t bytes = upb_table_size(t) * t->entry_size;
+  t->mask = upb_table_size(t) - 1;
+  t->entries = malloc(bytes);
+  memset(t->entries, 0, bytes);  /* Both tables consider 0's an empty entry. */
+}
+
+void upb_inttable_init(upb_inttable *t, uint32_t size, uint16_t entsize)
+{
+  upb_table_init(&t->t, size, entsize);
+}
+
+void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t entsize)
+{
+  upb_table_init(&t->t, size, entsize);
+}
+
+void upb_table_free(upb_table *t) { free(t->entries); }
+void upb_inttable_free(upb_inttable *t) { upb_table_free(&t->t); }
+void upb_strtable_free(upb_strtable *t) {
+  // Free refs from the strtable.
+  upb_strtable_entry *e = upb_strtable_begin(t);
+  for(; e; e = upb_strtable_next(t, e)) {
+    upb_string_unref(e->key);
+  }
+  upb_table_free(&t->t);
+}
+
+static uint32_t strtable_bucket(upb_strtable *t, upb_string *key)
+{
+  uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_string_len(key), 0);
+  return (hash & (upb_strtable_size(t)-1)) + 1;
+}
+
+void *upb_strtable_lookup(upb_strtable *t, upb_string *key)
+{
+  uint32_t bucket = strtable_bucket(t, key);
+  upb_strtable_entry *e;
+  do {
+    e = strent(t, bucket);
+    if(e->key && upb_streql(e->key, key)) return e;
+  } while((bucket = e->next) != UPB_END_OF_CHAIN);
+  return NULL;
+}
+
+static uint32_t empty_intbucket(upb_inttable *table)
+{
+  /* TODO: does it matter that this is biased towards the front of the table? */
+  for(uint32_t i = 1; i <= upb_inttable_size(table); i++) {
+    upb_inttable_entry *e = intent(table, i);
+    if(e->key == EMPTYENT) return i;
+  }
+  assert(false);
+  return 0;
+}
+
+/* The insert routines have a lot more code duplication between int/string
+ * variants than I would like, but there's just a bit too much that varies to
+ * parameterize them. */
+static void intinsert(upb_inttable *t, upb_inttable_entry *e)
+{
+  assert(upb_inttable_lookup(t, e->key) == NULL);
+  t->t.count++;
+  uint32_t bucket = upb_inttable_bucket(t, e->key);
+  upb_inttable_entry *table_e = intent(t, bucket);
+  if(table_e->key != EMPTYENT) {  /* Collision. */
+    if(bucket == upb_inttable_bucket(t, table_e->key)) {
+      /* Existing element is in its main posisiton.  Find an empty slot to
+       * place our new element and append it to this key's chain. */
+      uint32_t empty_bucket = empty_intbucket(t);
+      while (table_e->next != UPB_END_OF_CHAIN)
+        table_e = intent(t, table_e->next);
+      table_e->next = empty_bucket;
+      table_e = intent(t, empty_bucket);
+    } else {
+      /* Existing element is not in its main position.  Move it to an empty
+       * slot and put our element in its main position. */
+      uint32_t empty_bucket = empty_intbucket(t);
+      uint32_t evictee_bucket = upb_inttable_bucket(t, table_e->key);
+      memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
+      upb_inttable_entry *evictee_e = intent(t, evictee_bucket);
+      while(1) {
+        assert(evictee_e->key != UPB_EMPTY_ENTRY);
+        assert(evictee_e->next != UPB_END_OF_CHAIN);
+        if(evictee_e->next == bucket) {
+          evictee_e->next = empty_bucket;
+          break;
+        }
+        evictee_e = intent(t, evictee_e->next);
+      }
+      /* table_e remains set to our mainpos. */
+    }
+  }
+  memcpy(table_e, e, t->t.entry_size);
+  table_e->next = UPB_END_OF_CHAIN;
+  assert(upb_inttable_lookup(t, e->key) == table_e);
+}
+
+void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e)
+{
+  assert(e->key != 0);
+  if((double)(t->t.count + 1) / upb_inttable_size(t) > MAX_LOAD) {
+    /* Need to resize.  New table of double the size, add old elements to it. */
+    upb_inttable new_table;
+    upb_inttable_init(&new_table, upb_inttable_size(t)*2, t->t.entry_size);
+    new_table.t.count = t->t.count;
+    upb_inttable_entry *old_e;
+    for(old_e = upb_inttable_begin(t); old_e; old_e = upb_inttable_next(t, old_e))
+      intinsert(&new_table, old_e);
+    upb_inttable_free(t);
+    *t = new_table;
+  }
+  intinsert(t, e);
+}
+
+static uint32_t empty_strbucket(upb_strtable *table)
+{
+  /* TODO: does it matter that this is biased towards the front of the table? */
+  for(uint32_t i = 1; i <= upb_strtable_size(table); i++) {
+    upb_strtable_entry *e = strent(table, i);
+    if(!e->key) return i;
+  }
+  assert(false);
+  return 0;
+}
+
+static void strinsert(upb_strtable *t, upb_strtable_entry *e)
+{
+  assert(upb_strtable_lookup(t, e->key) == NULL);
+  e->key = upb_string_getref(e->key);
+  t->t.count++;
+  uint32_t bucket = strtable_bucket(t, e->key);
+  upb_strtable_entry *table_e = strent(t, bucket);
+  if(table_e->key) {  /* Collision. */
+    if(bucket == strtable_bucket(t, table_e->key)) {
+      /* Existing element is in its main posisiton.  Find an empty slot to
+       * place our new element and append it to this key's chain. */
+      uint32_t empty_bucket = empty_strbucket(t);
+      while (table_e->next != UPB_END_OF_CHAIN)
+        table_e = strent(t, table_e->next);
+      table_e->next = empty_bucket;
+      table_e = strent(t, empty_bucket);
+    } else {
+      /* Existing element is not in its main position.  Move it to an empty
+       * slot and put our element in its main position. */
+      uint32_t empty_bucket = empty_strbucket(t);
+      uint32_t evictee_bucket = strtable_bucket(t, table_e->key);
+      memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
+      upb_strtable_entry *evictee_e = strent(t, evictee_bucket);
+      while(1) {
+        assert(!upb_string_isnull(evictee_e->key));
+        assert(evictee_e->next != UPB_END_OF_CHAIN);
+        if(evictee_e->next == bucket) {
+          evictee_e->next = empty_bucket;
+          break;
+        }
+        evictee_e = strent(t, evictee_e->next);
+      }
+      /* table_e remains set to our mainpos. */
+    }
+  }
+  memcpy(table_e, e, t->t.entry_size);
+  table_e->next = UPB_END_OF_CHAIN;
+  assert(upb_strtable_lookup(t, e->key) == table_e);
+}
+
+void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e)
+{
+  if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) {
+    /* Need to resize.  New table of double the size, add old elements to it. */
+    upb_strtable new_table;
+    upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.entry_size);
+    upb_strtable_entry *old_e;
+    for(old_e = upb_strtable_begin(t); old_e; old_e = upb_strtable_next(t, old_e))
+      strinsert(&new_table, old_e);
+    upb_strtable_free(t);
+    *t = new_table;
+  }
+  strinsert(t, e);
+}
+
+void *upb_inttable_begin(upb_inttable *t) {
+  return upb_inttable_next(t, intent(t, 0));
+}
+
+void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur) {
+  upb_inttable_entry *end = intent(t, upb_inttable_size(t)+1);
+  do {
+    cur = (void*)((char*)cur + t->t.entry_size);
+    if(cur == end) return NULL;
+  } while(cur->key == UPB_EMPTY_ENTRY);
+  return cur;
+}
+
+void *upb_strtable_begin(upb_strtable *t) {
+  return upb_strtable_next(t, strent(t, 0));
+}
+
+void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur) {
+  upb_strtable_entry *end = strent(t, upb_strtable_size(t)+1);
+  do {
+    cur = (void*)((char*)cur + t->t.entry_size);
+    if(cur == end) return NULL;
+  } while(cur->key == NULL);
+  return cur;
+}
+
+#ifdef UPB_UNALIGNED_READS_OK
+//-----------------------------------------------------------------------------
+// MurmurHash2, by Austin Appleby (released as public domain).
+// Reformatted and C99-ified by Joshua Haberman.
+// Note - This code makes a few assumptions about how your machine behaves -
+//   1. We can read a 4-byte value from any address without crashing
+//   2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
+// And it has a few limitations -
+//   1. It will not work incrementally.
+//   2. It will not produce the same results on little-endian and big-endian
+//      machines.
+static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed)
+{
+  // 'm' and 'r' are mixing constants generated offline.
+  // They're not really 'magic', they just happen to work well.
+  const uint32_t m = 0x5bd1e995;
+  const int32_t r = 24;
+
+  // Initialize the hash to a 'random' value
+  uint32_t h = seed ^ len;
+
+  // Mix 4 bytes at a time into the hash
+  const uint8_t * data = (const uint8_t *)key;
+  while(len >= 4) {
+    uint32_t k = *(uint32_t *)data;
+
+    k *= m;
+    k ^= k >> r;
+    k *= m;
+
+    h *= m;
+    h ^= k;
+
+    data += 4;
+    len -= 4;
+  }
+
+  // Handle the last few bytes of the input array
+  switch(len) {
+    case 3: h ^= data[2] << 16;
+    case 2: h ^= data[1] << 8;
+    case 1: h ^= data[0]; h *= m;
+  };
+
+  // Do a few final mixes of the hash to ensure the last few
+  // bytes are well-incorporated.
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
+
+  return h;
+}
+
+#else // !UPB_UNALIGNED_READS_OK
+
+//-----------------------------------------------------------------------------
+// MurmurHashAligned2, by Austin Appleby
+// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
+// on certain platforms.
+// Performance will be lower than MurmurHash2
+
+#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+
+static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed)
+{
+  const uint32_t m = 0x5bd1e995;
+  const int32_t r = 24;
+  const uint8_t * data = (const uint8_t *)key;
+  uint32_t h = seed ^ len;
+  uint8_t align = (uintptr_t)data & 3;
+
+  if(align && (len >= 4)) {
+    // Pre-load the temp registers
+    uint32_t t = 0, d = 0;
+
+    switch(align) {
+      case 1: t |= data[2] << 16;
+      case 2: t |= data[1] << 8;
+      case 3: t |= data[0];
+    }
+
+    t <<= (8 * align);
+
+    data += 4-align;
+    len -= 4-align;
+
+    int32_t sl = 8 * (4-align);
+    int32_t sr = 8 * align;
+
+    // Mix
+
+    while(len >= 4) {
+      d = *(uint32_t *)data;
+      t = (t >> sr) | (d << sl);
+
+      uint32_t k = t;
+
+      MIX(h,k,m);
+
+      t = d;
+
+      data += 4;
+      len -= 4;
+    }
+
+    // Handle leftover data in temp registers
+
+    d = 0;
+
+    if(len >= align) {
+      switch(align) {
+        case 3: d |= data[2] << 16;
+        case 2: d |= data[1] << 8;
+        case 1: d |= data[0];
+      }
+
+      uint32_t k = (t >> sr) | (d << sl);
+      MIX(h,k,m);
+
+      data += align;
+      len -= align;
+
+      //----------
+      // Handle tail bytes
+
+      switch(len) {
+        case 3: h ^= data[2] << 16;
+        case 2: h ^= data[1] << 8;
+        case 1: h ^= data[0]; h *= m;
+      };
+    } else {
+      switch(len) {
+        case 3: d |= data[2] << 16;
+        case 2: d |= data[1] << 8;
+        case 1: d |= data[0];
+        case 0: h ^= (t >> sr) | (d << sl); h *= m;
+      }
+    }
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+  } else {
+    while(len >= 4) {
+      uint32_t k = *(uint32_t *)data;
+
+      MIX(h,k,m);
+
+      data += 4;
+      len -= 4;
+    }
+
+    //----------
+    // Handle tail bytes
+
+    switch(len) {
+      case 3: h ^= data[2] << 16;
+      case 2: h ^= data[1] << 8;
+      case 1: h ^= data[0]; h *= m;
+    };
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+  }
+}
+#undef MIX
+
+#endif // UPB_UNALIGNED_READS_OK
diff --git a/core/upb_table.h b/core/upb_table.h
new file mode 100644
index 0000000..20dae92
--- /dev/null
+++ b/core/upb_table.h
@@ -0,0 +1,133 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ *
+ * This file defines very fast int->struct (inttable) and string->struct
+ * (strtable) hash tables.  The struct can be of any size, and it is stored
+ * in the table itself, for cache-friendly performance.
+ *
+ * The table uses internal chaining with Brent's variation (inspired by the
+ * Lua implementation of hash tables).  The hash function for strings is
+ * Austin Appleby's "MurmurHash."
+ */
+
+#ifndef UPB_TABLE_H_
+#define UPB_TABLE_H_
+
+#include <assert.h>
+#include "upb.h"
+#include "upb_string.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Note: the key cannot be zero!  Zero is used by the implementation. */
+typedef uint32_t upb_inttable_key_t;
+
+#define UPB_END_OF_CHAIN (uint32_t)0
+#define UPB_EMPTY_ENTRY (uint32_t)0
+
+typedef struct {
+  upb_inttable_key_t key;
+  uint32_t next;  /* Internal chaining. */
+} upb_inttable_entry;
+
+// TODO: consider storing the hash in the entry.  This would avoid the need to
+// rehash on table resizes, but more importantly could possibly improve lookup
+// performance by letting us compare hashes before comparing lengths or the
+// strings themselves.
+typedef struct {
+  upb_string *key;         // We own a ref.
+  uint32_t next;           // Internal chaining.
+} upb_strtable_entry;
+
+typedef struct {
+  void *entries;
+  uint32_t count;       /* How many elements are currently in the table? */
+  uint16_t entry_size;  /* How big is each entry? */
+  uint8_t size_lg2;     /* The table is 2^size_lg2 in size. */
+  uint32_t mask;
+} upb_table;
+
+typedef struct {
+  upb_table t;
+} upb_strtable;
+
+typedef struct {
+  upb_table t;
+} upb_inttable;
+
+/* Initialize and free a table, respectively.  Specify the initial size
+ * with 'size' (the size will be increased as necessary).  Entry size
+ * specifies how many bytes each entry in the table is. */
+void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t entry_size);
+void upb_inttable_free(upb_inttable *table);
+void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t entry_size);
+void upb_strtable_free(upb_strtable *table);
+
+INLINE uint32_t upb_table_size(upb_table *t) { return 1 << t->size_lg2; }
+INLINE uint32_t upb_inttable_size(upb_inttable *t) {
+  return upb_table_size(&t->t);
+}
+INLINE uint32_t upb_strtable_size(upb_strtable *t) {
+  return upb_table_size(&t->t);
+}
+
+INLINE uint32_t upb_table_count(upb_table *t) { return t->count; }
+INLINE uint32_t upb_inttable_count(upb_inttable *t) {
+  return upb_table_count(&t->t);
+}
+INLINE uint32_t upb_strtable_count(upb_strtable *t) {
+  return upb_table_count(&t->t);
+}
+
+/* Inserts the given key into the hashtable with the given value.  The key must
+ * not already exist in the hash table.  The data will be copied from e into
+ * the hashtable (the amount of data copied comes from entry_size when the
+ * table was constructed).  Therefore the data at val may be freed once the
+ * call returns. */
+void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e);
+void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e);
+
+INLINE uint32_t upb_inttable_bucket(upb_inttable *t, upb_inttable_key_t k) {
+  return (k & t->t.mask) + 1;  /* Identity hash for ints. */
+}
+
+/* Looks up key in this table.  Inlined because this is in the critical path of
+ * decoding.  We have the caller specify the entry_size because fixing this as
+ * a literal (instead of reading table->entry_size) gives the compiler more
+ * ability to optimize. */
+INLINE void *upb_inttable_fastlookup(upb_inttable *t, uint32_t key,
+                                     uint32_t entry_size) {
+  assert(key != 0);
+  uint32_t bucket = upb_inttable_bucket(t, key);
+  upb_inttable_entry *e;
+  do {
+    e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket-1, entry_size);
+    if(e->key == key) return e;
+  } while((bucket = e->next) != UPB_END_OF_CHAIN);
+  return NULL;  /* Not found. */
+}
+
+INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) {
+  return upb_inttable_fastlookup(t, key, t->t.entry_size);
+}
+
+void *upb_strtable_lookup(upb_strtable *t, upb_string *key);
+
+/* Provides iteration over the table.  The order in which the entries are
+ * returned is undefined.  Insertions invalidate iterators.  The _next
+ * functions return NULL when the end has been reached. */
+void *upb_inttable_begin(upb_inttable *t);
+void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur);
+
+void *upb_strtable_begin(upb_strtable *t);
+void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_TABLE_H_ */
diff --git a/gen-deps.sh b/gen-deps.sh
index 6c0ced3..2bc82f8 100755
--- a/gen-deps.sh
+++ b/gen-deps.sh
@@ -14,5 +14,5 @@
 set -e
 rm -f deps
 for file in $@; do
-  gcc -MM $file -MT ${file%.*}.o -DUPB_THREAD_UNSAFE -Idescriptor -Isrc -I. >> deps
+  gcc -MM $file -MT ${file%.*}.o -DUPB_THREAD_UNSAFE -Idescriptor -Icore -Istream -I. >> deps
 done
diff --git a/src/upb.c b/src/upb.c
deleted file mode 100644
index a98512d..0000000
--- a/src/upb.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
- *
- */
-
-#include <stdarg.h>
-#include <stddef.h>
-#include <string.h>
-
-#include "upb.h"
-#include "upb_string.h"
-
-#define alignof(t) offsetof(struct { char c; t x; }, x)
-#define TYPE_INFO(wire_type, ctype, allows_delimited) \
-    {alignof(ctype), sizeof(ctype), wire_type, \
-     (1 << wire_type) | (allows_delimited << UPB_WIRE_TYPE_DELIMITED), \
-     #ctype},
-
-upb_type_info upb_types[] = {
-  {0, 0, 0, 0, ""}, // There is no type 0.
-  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       double,    1)    // DOUBLE
-  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       float,     1)    // FLOAT
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int64_t,   1)    // INT64
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint64_t,  1)    // UINT64
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int32_t,   1)    // INT32
-  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       uint64_t,  1)    // FIXED64
-  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       uint32_t,  1)    // FIXED32
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      bool,      1)    // BOOL
-  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     1)    // STRING
-  TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*,     0)    // GROUP
-  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     1)    // MESSAGE
-  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     1)    // BYTES
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint32_t,  1)    // UINT32
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint32_t,  1)    // ENUM
-  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       int32_t,   1)    // SFIXED32
-  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       int64_t,   1)    // SFIXED64
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int32_t,   1)    // SINT32
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int64_t,   1)    // SINT64
-};
-
-void upb_seterr(upb_status *status, enum upb_status_code code,
-                const char *msg, ...)
-{
-  if(upb_ok(status)) {  // The first error is the most interesting.
-    status->str = upb_string_new();
-    char *str = upb_string_getrwbuf(status->str, UPB_ERRORMSG_MAXLEN);
-    status->code = code;
-    va_list args;
-    va_start(args, msg);
-    vsnprintf(str, UPB_ERRORMSG_MAXLEN, msg, args);
-    va_end(args);
-  }
-}
-
-void upb_copyerr(upb_status *to, upb_status *from)
-{
-  to->code = from->code;
-  to->str = upb_string_getref(from->str);
-}
-
-void upb_reset(upb_status *status) {
-  status->code = UPB_STATUS_OK;
-  upb_string_unref(status->str);
-  status->str = NULL;
-}
diff --git a/src/upb.h b/src/upb.h
deleted file mode 100644
index 230e638..0000000
--- a/src/upb.h
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
- *
- * This file contains shared definitions that are widely used across upb.
- */
-
-#ifndef UPB_H_
-#define UPB_H_
-
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>  // only for size_t.
-#include "descriptor_const.h"
-#include "upb_atomic.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// inline if possible, emit standalone code if required.
-#ifndef INLINE
-#define INLINE static inline
-#endif
-
-#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
-#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
-#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
-
-// The maximum that any submessages can be nested.  Matches proto2's limit.
-#define UPB_MAX_NESTING 64
-
-// The maximum number of fields that any one .proto type can have.  Note that
-// this is very different than the max field number.  It is hard to imagine a
-// scenario where more than 32k fields makes sense.
-#define UPB_MAX_FIELDS (1<<15)
-typedef int16_t upb_field_count_t;
-
-// Nested type names are separated by periods.
-#define UPB_SYMBOL_SEPARATOR '.'
-
-// This limit is for the longest fully-qualified symbol, eg. foo.bar.MsgType
-#define UPB_SYMBOL_MAXLEN 128
-
-// The longest chain that mutually-recursive types are allowed to form.  For
-// example, this is a type cycle of length 2:
-//   message A {
-//     B b = 1;
-//   }
-//   message B {
-//     A a = 1;
-//   }
-#define UPB_MAX_TYPE_CYCLE_LEN 16
-
-// The maximum depth that the type graph can have.  Note that this setting does
-// not automatically constrain UPB_MAX_NESTING, because type cycles allow for
-// unlimited nesting if we do not limit it.
-#define UPB_MAX_TYPE_DEPTH 64
-
-// The biggest possible single value is a 10-byte varint.
-#define UPB_MAX_ENCODED_SIZE 10
-
-
-/* Fundamental types and type constants. **************************************/
-
-// A list of types as they are encoded on-the-wire.
-enum upb_wire_type {
-  UPB_WIRE_TYPE_VARINT      = 0,
-  UPB_WIRE_TYPE_64BIT       = 1,
-  UPB_WIRE_TYPE_DELIMITED   = 2,
-  UPB_WIRE_TYPE_START_GROUP = 3,
-  UPB_WIRE_TYPE_END_GROUP   = 4,
-  UPB_WIRE_TYPE_32BIT       = 5,
-
-  // This isn't a real wire type, but we use this constant to describe varints
-  // that are expected to be a maximum of 32 bits.
-  UPB_WIRE_TYPE_32BIT_VARINT = 8
-};
-
-typedef uint8_t upb_wire_type_t;
-
-// Value type as defined in a .proto file.  eg. string, int32, etc.  The
-// integers that represent this are defined by descriptor.proto.  Note that
-// descriptor.proto reserves "0" for errors, and we use it to represent
-// exceptional circumstances.
-typedef uint8_t upb_field_type_t;
-
-// For referencing the type constants tersely.
-#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type
-#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type
-
-INLINE bool upb_issubmsgtype(upb_field_type_t type) {
-  return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
-}
-
-INLINE bool upb_isstringtype(upb_field_type_t type) {
-  return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
-}
-
-// Info for a given field type.
-typedef struct {
-  uint8_t align;
-  uint8_t size;
-  upb_wire_type_t native_wire_type;
-  uint8_t allowed_wire_types;  // For packable fields, also allows delimited.
-  char *ctype;
-} upb_type_info;
-
-// A static array of info about all of the field types, indexed by type number.
-extern upb_type_info upb_types[];
-
-// The number of a field, eg. "optional string foo = 3".
-typedef int32_t upb_field_number_t;
-
-// Label (optional, repeated, required) as defined in a .proto file.  The
-// values of this are defined by google.protobuf.FieldDescriptorProto.Label
-// (from descriptor.proto).
-typedef uint8_t  upb_label_t;
-
-// A scalar (non-string) wire value.  Used only for parsing unknown fields.
-typedef union {
-  uint64_t varint;
-  uint64_t _64bit;
-  uint32_t _32bit;
-} upb_wire_value;
-
-/* Polymorphic values of .proto types *****************************************/
-
-struct _upb_string;
-typedef struct _upb_string upb_string;
-
-typedef uint32_t upb_strlen_t;
-
-// A single .proto value.  The owner must have an out-of-band way of knowing
-// the type, so that it knows which union member to use.
-typedef union {
-  double _double;
-  float _float;
-  int32_t int32;
-  int64_t int64;
-  uint32_t uint32;
-  uint64_t uint64;
-  bool _bool;
-} upb_value;
-
-// A pointer to a .proto value.  The owner must have an out-of-band way of
-// knowing the type, so it knows which union member to use.
-typedef union {
-  double *_double;
-  float *_float;
-  int32_t *int32;
-  int64_t *int64;
-  uint8_t *uint8;
-  uint32_t *uint32;
-  uint64_t *uint64;
-  bool *_bool;
-} upb_valueptr;
-
-INLINE upb_valueptr upb_value_addrof(upb_value *val) {
-  upb_valueptr ptr = {&val->_double};
-  return ptr;
-}
-
-// Status codes used as a return value.  Codes >0 are not fatal and can be
-// resumed.
-enum upb_status_code {
-  UPB_STATUS_OK = 0,
-
-  // A read or write from a streaming src/sink could not be completed right now.
-  UPB_STATUS_TRYAGAIN = 1,
-
-  // A value had an incorrect wire type and will be skipped.
-  UPB_STATUS_BADWIRETYPE = 2,
-
-  // An unrecoverable error occurred.
-  UPB_STATUS_ERROR = -1,
-
-  // A varint went for 10 bytes without terminating.
-  UPB_ERROR_UNTERMINATED_VARINT = -2,
-
-  // The max nesting level (UPB_MAX_NESTING) was exceeded.
-  UPB_ERROR_MAX_NESTING_EXCEEDED = -3
-};
-
-typedef struct {
-  enum upb_status_code code;
-  upb_string *str;
-} upb_status;
-
-#define UPB_STATUS_INIT {UPB_STATUS_OK, NULL}
-#define UPB_ERRORMSG_MAXLEN 256
-
-INLINE bool upb_ok(upb_status *status) {
-  return status->code == UPB_STATUS_OK;
-}
-
-void upb_reset(upb_status *status);
-void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg,
-                ...);
-void upb_copyerr(upb_status *to, upb_status *from);
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif  /* UPB_H_ */
diff --git a/src/upb_atomic.h b/src/upb_atomic.h
deleted file mode 100644
index 01fc8a2..0000000
--- a/src/upb_atomic.h
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
- *
- * Only a very small part of upb is thread-safe.  Notably, individual
- * messages, arrays, and strings are *not* thread safe for mutating.
- * However, we do make message *metadata* such as upb_msgdef and
- * upb_context thread-safe, and their ownership is tracked via atomic
- * refcounting.  This header implements the small number of atomic
- * primitives required to support this.  The primitives we implement
- * are:
- *
- * - a reader/writer lock (wrappers around platform-provided mutexes).
- * - an atomic refcount.
- */
-
-#ifndef UPB_ATOMIC_H_
-#define UPB_ATOMIC_H_
-
-#include <stdbool.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* inline if possible, emit standalone code if required. */
-#ifndef INLINE
-#define INLINE static inline
-#endif
-
-#ifdef UPB_THREAD_UNSAFE
-
-/* Non-thread-safe implementations. ******************************************/
-
-typedef struct {
-  int v;
-} upb_atomic_refcount_t;
-
-INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) {
-  a->v = val;
-}
-
-INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) {
-  return a->v++ == 0;
-}
-
-INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) {
-  return --a->v == 0;
-}
-
-INLINE int upb_atomic_read(upb_atomic_refcount_t *a) {
-  return a->v;
-}
-
-INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int val) {
-  a->v += val;
-  return a->v == 0;
-}
-
-INLINE int upb_atomic_fetch_and_add(upb_atomic_refcount_t *a, int val) {
-  int ret = a->v;
-  a->v += val;
-  return ret;
-}
-
-#endif
-
-/* Atomic refcount ************************************************************/
-
-#ifdef UPB_THREAD_UNSAFE
-
-/* Already defined above. */
-
-#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4
-
-/* GCC includes atomic primitives. */
-
-typedef struct {
-  volatile int v;
-} upb_atomic_refcount_t;
-
-INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) {
-  a->v = val;
-  __sync_synchronize();   /* Ensure the initialized value is visible. */
-}
-
-INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) {
-  return __sync_fetch_and_add(&a->v, 1) == 0;
-}
-
-INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int n) {
-  return __sync_add_and_fetch(&a->v, n) == 0;
-}
-
-INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) {
-  return __sync_sub_and_fetch(&a->v, 1) == 0;
-}
-
-INLINE bool upb_atomic_read(upb_atomic_refcount_t *a) {
-  return __sync_fetch_and_add(&a->v, 0);
-}
-
-#elif defined(WIN32)
-
-/* Windows defines atomic increment/decrement. */
-#include <Windows.h>
-
-typedef struct {
-  volatile LONG val;
-} upb_atomic_refcount_t;
-
-INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) {
-  InterlockedExchange(&a->val, val);
-}
-
-INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) {
-  return InterlockedIncrement(&a->val) == 1;
-}
-
-INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) {
-  return InterlockedDecrement(&a->val) == 0;
-}
-
-#else
-#error Atomic primitives not defined for your platform/CPU.  \
-       Implement them or compile with UPB_THREAD_UNSAFE.
-#endif
-
-/* Reader/Writer lock. ********************************************************/
-
-#ifdef UPB_THREAD_UNSAFE
-
-typedef struct {
-} upb_rwlock_t;
-
-INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; }
-
-#elif defined(UPB_USE_PTHREADS)
-
-#include <pthread.h>
-
-typedef struct {
-  pthread_rwlock_t lock;
-} upb_rwlock_t;
-
-INLINE void upb_rwlock_init(upb_rwlock_t *l) {
-  /* TODO: check return value. */
-  pthread_rwlock_init(&l->lock, NULL);
-}
-
-INLINE void upb_rwlock_destroy(upb_rwlock_t *l) {
-  /* TODO: check return value. */
-  pthread_rwlock_destroy(&l->lock);
-}
-
-INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) {
-  /* TODO: check return value. */
-  pthread_rwlock_rdlock(&l->lock);
-}
-
-INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) {
-  /* TODO: check return value. */
-  pthread_rwlock_wrlock(&l->lock);
-}
-
-INLINE void upb_rwlock_unlock(upb_rwlock_t *l) {
-  /* TODO: check return value. */
-  pthread_rwlock_unlock(&l->lock);
-}
-
-#else
-#error Reader/writer lock is not defined for your platform/CPU.  \
-       Implement it or compile with UPB_THREAD_UNSAFE.
-#endif
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif  /* UPB_ATOMIC_H_ */
diff --git a/src/upb_byteio.h b/src/upb_byteio.h
deleted file mode 100644
index 69a28b3..0000000
--- a/src/upb_byteio.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * This file contains upb_bytesrc and upb_bytesink implementations for common
- * interfaces like strings, UNIX fds, and FILE*.
- *
- * Copyright (c) 2009-2010 Joshua Haberman.  See LICENSE for details.
- */
-
-#ifndef UPB_BYTEIO_H
-#define UPB_BYTEIO_H
-
-#include "upb_srcsink.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* upb_stringsrc **************************************************************/
-
-struct upb_stringsrc;
-typedef struct upb_stringsrc upb_stringsrc;
-
-// Create/free a stringsrc.
-upb_stringsrc *upb_stringsrc_new();
-void upb_stringsrc_free(upb_stringsrc *s);
-
-// Resets the stringsrc to a state where it will vend the given string.  The
-// stringsrc will take a reference on the string, so the caller need not ensure
-// that it outlives the stringsrc.  A stringsrc can be reset multiple times.
-void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str);
-
-// Returns the upb_bytesrc* for this stringsrc.  Invalidated by reset above.
-upb_bytesrc *upb_stringsrc_bytesrc();
-
-
-/* upb_fdsrc ******************************************************************/
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif
diff --git a/src/upb_decoder.c b/src/upb_decoder.c
deleted file mode 100644
index e3fdc49..0000000
--- a/src/upb_decoder.c
+++ /dev/null
@@ -1,577 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2008-2009 Joshua Haberman.  See LICENSE for details.
- */
-
-#include "upb_decoder.h"
-
-#include <inttypes.h>
-#include <stddef.h>
-#include <stdlib.h>
-
-#define UPB_GROUP_END_OFFSET UINT32_MAX
-
-// Returns true if the give wire type and field type combination is valid,
-// taking into account both packed and non-packed encodings.
-static bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) {
-  return (1 << wt) & upb_types[ft].allowed_wire_types;
-}
-
-// Performs zig-zag decoding, which is used by sint32 and sint64.
-static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
-static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
-
-
-/* upb_decoder ****************************************************************/
-
-// The decoder keeps a stack with one entry per level of recursion.
-// upb_decoder_frame is one frame of that stack.
-typedef struct {
-  upb_msgdef *msgdef;
-  upb_fielddef *field;
-  upb_strlen_t end_offset;  // For groups, -1.
-} upb_decoder_frame;
-
-struct upb_decoder {
-  upb_src src;  // upb_decoder is a upb_src.
-
-  upb_msgdef *toplevel_msgdef;
-  upb_bytesrc *bytesrc;
-
-  // The buffer of input data.  NULL is equivalent to the empty string.
-  upb_string *buf;
-
-  // Holds residual bytes when fewer than UPB_MAX_ENCODED_SIZE bytes remain.
-  uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE];
-
-  // The number of bytes we have yet to consume from "buf" or tmpbuf.  This is
-  // always >= 0 unless we were just reset or are eof.
-  int32_t buf_bytesleft;
-
-  // The offset within "buf" from where we are currently reading.  This can be
-  // <0 if we are reading some residual bytes from the previous buffer, which
-  // are stored in tmpbuf and combined with bytes from "buf".
-  int32_t buf_offset;
-
-  // The overall stream offset of the beginning of "buf".
-  uint32_t buf_stream_offset;
-
-  // Fielddef for the key we just read.
-  upb_fielddef *field;
-
-  // Wire type of the key we just read.
-  upb_wire_type_t wire_type;
-
-  // Delimited length of the string field we are reading.
-  upb_strlen_t delimited_len;
-
-  upb_strlen_t packed_end_offset;
-
-  // We keep a stack of messages we have recursed into.
-  upb_decoder_frame *top, *limit, stack[UPB_MAX_NESTING];
-};
-
-
-/* upb_decoder buffering. *****************************************************/
-
-static upb_strlen_t upb_decoder_offset(upb_decoder *d)
-{
-  return d->buf_stream_offset - d->buf_offset;
-}
-
-static bool upb_decoder_nextbuf(upb_decoder *d)
-{
-  assert(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE);
-
-  // Copy residual bytes to temporary buffer.
-  if(d->buf_bytesleft > 0) {
-    memcpy(d->tmpbuf, upb_string_getrobuf(d->buf) + d->buf_offset,
-           d->buf_bytesleft);
-  }
-
-  // Recycle old buffer.
-  if(d->buf) {
-    d->buf = upb_string_tryrecycle(d->buf);
-    d->buf_offset -= upb_string_len(d->buf);
-    d->buf_stream_offset += upb_string_len(d->buf);
-  }
-
-  // Pull next buffer.
-  if(upb_bytesrc_get(d->bytesrc, d->buf, UPB_MAX_ENCODED_SIZE)) {
-    d->buf_bytesleft += upb_string_len(d->buf);
-    return true;
-  } else {
-    // Error or EOF.
-    if(!upb_bytesrc_eof(d->bytesrc)) {
-      // Error from bytesrc.
-      upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc));
-      return false;
-    } else if(d->buf_bytesleft == 0) {
-      // EOF from bytesrc and we don't have any residual bytes left.
-      d->src.eof = true;
-      return false;
-    } else {
-      // No more data left from the bytesrc, but we still have residual bytes.
-      return true;
-    }
-  }
-}
-
-static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes)
-{
-  if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE) {
-    // GCC is currently complaining about use of an uninitialized value if we
-    // don't set this now.  I think this is incorrect, but leaving this in
-    // to suppress the warning for now.
-    *bytes = 0;
-    if(!upb_decoder_nextbuf(d)) return NULL;
-  }
-
-  assert(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE);
-
-  if(d->buf_offset >= 0) {
-    // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE
-    // contiguous bytes, so we can read directly out of it.
-    *bytes = d->buf_bytesleft;
-    return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset;
-  } else {
-    // We need to accumulate UPB_MAX_ENCODED_SIZE bytes; len is how many we
-    // have so far.
-    upb_strlen_t len = -d->buf_offset;
-    if(d->buf) {
-      upb_strlen_t to_copy =
-          UPB_MIN(UPB_MAX_ENCODED_SIZE - len, upb_string_len(d->buf));
-      memcpy(d->tmpbuf + len, upb_string_getrobuf(d->buf), to_copy);
-      len += to_copy;
-    }
-    // Pad the buffer out to UPB_MAX_ENCODED_SIZE.
-    memset(d->tmpbuf + len, 0x80, UPB_MAX_ENCODED_SIZE - len);
-    *bytes = len;
-    return d->tmpbuf;
-  }
-}
-
-// Returns a pointer to a buffer of data that is at least UPB_MAX_ENCODED_SIZE
-// bytes long.  This buffer contains the next bytes in the stream (even if
-// those bytes span multiple buffers).  *bytes is set to the number of actual
-// stream bytes that are available in the returned buffer.  If
-// *bytes < UPB_MAX_ENCODED_SIZE, the buffer is padded with 0x80 bytes.
-//
-// After the data has been read, upb_decoder_consume() should be called to
-// indicate how many bytes were consumed.
-static const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes)
-{
-  if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE && d->buf_offset >= 0) {
-    // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE
-    // contiguous bytes, so we can read directly out of it.
-    *bytes = d->buf_bytesleft;
-    return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset;
-  } else {
-    return upb_decoder_getbuf_full(d, bytes);
-  }
-}
-
-static bool upb_decoder_consume(upb_decoder *d, uint32_t bytes)
-{
-  assert(bytes <= UPB_MAX_ENCODED_SIZE);
-  d->buf_offset += bytes;
-  d->buf_bytesleft -= bytes;
-  if(d->buf_offset < 0) {
-    // We still have residual bytes we have not consumed.
-    memmove(d->tmpbuf, d->tmpbuf + bytes, -d->buf_offset);
-  }
-  assert(d->buf_bytesleft >= 0);
-  return true;
-}
-
-static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes)
-{
-  d->buf_offset += bytes;
-  d->buf_bytesleft -= bytes;
-  while(d->buf_bytesleft < 0) {
-    if(!upb_decoder_nextbuf(d)) return false;
-  }
-  return true;
-}
-
-
-/* Functions to read wire values. *********************************************/
-
-// Parses remining bytes of a 64-bit varint that has already had its first byte
-// parsed.
-INLINE bool upb_decoder_readv64(upb_decoder *d, uint32_t *low, uint32_t *high)
-{
-  upb_strlen_t bytes_available;
-  const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available);
-  const uint8_t *start = buf;
-  if(!buf) return false;
-
-  *high = 0;
-  uint32_t b;
-  b = *(buf++); *low   = (b & 0x7f)      ; if(!(b & 0x80)) goto done;
-  b = *(buf++); *low  |= (b & 0x7f) <<  7; if(!(b & 0x80)) goto done;
-  b = *(buf++); *low  |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
-  b = *(buf++); *low  |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
-  b = *(buf++); *low  |= (b & 0x7f) << 28;
-                *high  = (b & 0x7f) >>  3; if(!(b & 0x80)) goto done;
-  b = *(buf++); *high |= (b & 0x7f) <<  4; if(!(b & 0x80)) goto done;
-  b = *(buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done;
-  b = *(buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done;
-  b = *(buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done;
-
-  if(bytes_available >= 10) {
-    upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Varint was unterminated "
-               "after 10 bytes, stream offset: %u", upb_decoder_offset(d));
-  } else {
-    upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Stream ended in the middle "
-               "of a varint, stream offset: %u", upb_decoder_offset(d));
-  }
-  return false;
-
-done:
-  return upb_decoder_consume(d, buf - start);
-}
-
-// Gets a varint -- called when we only need 32 bits of it.  Note that a 32-bit
-// varint is not a true wire type.
-static bool upb_decoder_readv32(upb_decoder *d, uint32_t *val)
-{
-  uint32_t high;
-  if(!upb_decoder_readv64(d, val, &high)) return false;
-
-  // We expect the high bits to be zero, except that signed 32-bit values are
-  // first sign-extended to be wire-compatible with 64 bits, in which case we
-  // expect the high bits to be all one.
-  //
-  // We could perform a slightly more sophisticated check by having the caller
-  // indicate whether a signed or unsigned value is being read.  We could check
-  // that the high bits are all zeros for unsigned, and properly sign-extended
-  // for signed.
-  if(high != 0 && ~high != 0) {
-    upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Read a 32-bit varint, but "
-               "the high bits contained data we should not truncate: "
-               "%ux, stream offset: %u", high, upb_decoder_offset(d));
-    return false;
-  }
-  return true;
-}
-
-// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT).  Caller
-// promises that 4 bytes are available at buf.
-static bool upb_decoder_readf32(upb_decoder *d, uint32_t *val)
-{
-  upb_strlen_t bytes_available;
-  const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available);
-  if(!buf) return false;
-  if(bytes_available < 4) {
-    upb_seterr(&d->src.status, UPB_STATUS_ERROR,
-               "Stream ended in the middle of a 32-bit value");
-    return false;
-  }
-  memcpy(val, buf, 4);
-  // TODO: byte swap if big-endian.
-  return upb_decoder_consume(d, 4);
-}
-
-// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT).  Caller
-// promises that 8 bytes are available at buf.
-static bool upb_decoder_readf64(upb_decoder *d, uint64_t *val)
-{
-  upb_strlen_t bytes_available;
-  const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available);
-  if(!buf) return false;
-  if(bytes_available < 8) {
-    upb_seterr(&d->src.status, UPB_STATUS_ERROR,
-               "Stream ended in the middle of a 64-bit value");
-    return false;
-  }
-  memcpy(val, buf, 8);
-  // TODO: byte swap if big-endian.
-  return upb_decoder_consume(d, 8);
-}
-
-// Returns the length of a varint (wire type: UPB_WIRE_TYPE_VARINT), allowing
-// it to be easily skipped.  Caller promises that 10 bytes are available at
-// "buf".  The function will return a maximum of 11 bytes before quitting.
-static uint8_t upb_decoder_skipv64(upb_decoder *d)
-{
-  uint32_t bytes_available;
-  const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available);
-  if(!buf) return false;
-  uint8_t i;
-  for(i = 0; i < 10 && buf[i] & 0x80; i++)
-    ;  // empty loop body.
-  if(i > 10) {
-    upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Unterminated varint.");
-    return false;
-  }
-  return upb_decoder_consume(d, i);
-}
-
-
-/* upb_src implementation for upb_decoder. ************************************/
-
-bool upb_decoder_skipval(upb_decoder *d);
-
-upb_fielddef *upb_decoder_getdef(upb_decoder *d)
-{
-  // Detect end-of-submessage.
-  if(upb_decoder_offset(d) >= d->top->end_offset) {
-    d->src.eof = true;
-    return NULL;
-  }
-
-  // Handles the packed field case.
-  if(d->field) return d->field;
-
-  uint32_t key = 0;
-again:
-  if(!upb_decoder_readv32(d, &key)) return NULL;
-  upb_wire_type_t wire_type = key & 0x7;
-  int32_t field_number = key >> 3;
-
-  if(wire_type == UPB_WIRE_TYPE_DELIMITED) {
-    // For delimited wire values we parse the length now, since we need it in
-    // all cases.
-    if(!upb_decoder_readv32(d, &d->delimited_len)) return NULL;
-  } else if(wire_type == UPB_WIRE_TYPE_END_GROUP) {
-    if(d->top->end_offset == UPB_GROUP_END_OFFSET) {
-      d->src.eof = true;
-    } else {
-      upb_seterr(&d->src.status, UPB_STATUS_ERROR, "End group seen but current "
-                 "message is not a group, byte offset: %zd",
-                 upb_decoder_offset(d));
-    }
-    return NULL;
-  }
-
-  // Look up field by tag number.
-  upb_fielddef *f = upb_msg_itof(d->top->msgdef, field_number);
-
-  if (!f) {
-    // Unknown field.  If/when the upb_src interface supports reporting
-    // unknown fields we will implement that here.
-    upb_decoder_skipval(d);
-    goto again;
-  } else if (!upb_check_type(wire_type, f->type)) {
-    // This is a recoverable error condition.  We skip the value but also
-    // return NULL and report the error.
-    upb_decoder_skipval(d);
-    // TODO: better error message.
-    upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Incorrect wire type.\n");
-    return NULL;
-  }
-  d->field = f;
-  d->wire_type = wire_type;
-  return f;
-}
-
-bool upb_decoder_getval(upb_decoder *d, upb_valueptr val)
-{
-  switch(upb_types[d->field->type].native_wire_type) {
-    case UPB_WIRE_TYPE_VARINT: {
-      uint32_t low, high;
-      if(!upb_decoder_readv64(d, &low, &high)) return false;
-      uint64_t u64 = ((uint64_t)high << 32) | low;
-      if(d->field->type == UPB_TYPE(SINT64))
-        *val.int64 = upb_zzdec_64(u64);
-      else
-        *val.uint64 = u64;
-      break;
-    }
-    case UPB_WIRE_TYPE_32BIT_VARINT: {
-      uint32_t u32;
-      if(!upb_decoder_readv32(d, &u32)) return false;
-      if(d->field->type == UPB_TYPE(SINT32))
-        *val.int32 = upb_zzdec_32(u32);
-      else
-        *val.uint32 = u32;
-      break;
-    }
-    case UPB_WIRE_TYPE_64BIT:
-      if(!upb_decoder_readf64(d, val.uint64)) return false;
-      break;
-    case UPB_WIRE_TYPE_32BIT:
-      if(!upb_decoder_readf32(d, val.uint32)) return false;
-      break;
-    default:
-      upb_seterr(&d->src.status, UPB_STATUS_ERROR,
-                 "Attempted to call getval on a group.");
-      return false;
-  }
-  // For a packed field where we have not reached the end, we leave the field
-  // in the decoder so we will return it again without parsing a key.
-  if(d->wire_type != UPB_WIRE_TYPE_DELIMITED ||
-     upb_decoder_offset(d) >= d->packed_end_offset) {
-    d->field = NULL;
-  }
-  return true;
-}
-
-bool upb_decoder_getstr(upb_decoder *d, upb_string *str) {
-  // A string, bytes, or a length-delimited submessage.  The latter isn't
-  // technically a string, but can be gotten as one to perform lazy parsing.
-  const int32_t total_len = d->delimited_len;
-  if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) {
-    // The entire string is inside our current buffer, so we can just
-    // return a substring of the buffer without copying.
-    upb_string_substr(str, d->buf,
-                      upb_string_len(d->buf) - d->buf_bytesleft,
-                      total_len);
-    upb_decoder_skipbytes(d, total_len);
-  } else {
-    // The string spans buffers, so we must copy from the residual buffer
-    // (if any bytes are there), then the buffer, and finally from the bytesrc.
-    uint8_t *ptr = (uint8_t*)upb_string_getrwbuf(
-        str, UPB_MIN(total_len, d->buf_bytesleft));
-    int32_t len = 0;
-    if(d->buf_offset < 0) {
-      // Residual bytes we need to copy from tmpbuf.
-      memcpy(ptr, d->tmpbuf, -d->buf_offset);
-      len += -d->buf_offset;
-    }
-    if(d->buf) {
-      // Bytes from the buffer.
-      memcpy(ptr + len, upb_string_getrobuf(d->buf) + d->buf_offset,
-             upb_string_len(str) - len);
-    }
-    upb_decoder_skipbytes(d, upb_string_len(str));
-    if(len < total_len) {
-      // Bytes from the bytesrc.
-      if(!upb_bytesrc_append(d->bytesrc, str, total_len - len)) {
-        upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc));
-        return false;
-      }
-      // Have to advance this since the buffering layer of the decoder will
-      // never see these bytes.
-      d->buf_stream_offset += total_len - len;
-    }
-  }
-  d->field = NULL;
-  return true;
-}
-
-static bool upb_decoder_skipgroup(upb_decoder *d);
-
-bool upb_decoder_startmsg(upb_decoder *d) {
-  d->top->field = d->field;
-  if(++d->top >= d->limit) {
-    upb_seterr(&d->src.status, UPB_ERROR_MAX_NESTING_EXCEEDED,
-               "Nesting exceeded maximum (%d levels)\n",
-               UPB_MAX_NESTING);
-    return false;
-  }
-  upb_decoder_frame *frame = d->top;
-  frame->msgdef = upb_downcast_msgdef(d->field->def);
-  if(d->field->type == UPB_TYPE(GROUP)) {
-    frame->end_offset = UPB_GROUP_END_OFFSET;
-  } else {
-    frame->end_offset = upb_decoder_offset(d) + d->delimited_len;
-  }
-  return true;
-}
-
-bool upb_decoder_endmsg(upb_decoder *d) {
-  if(d->top > d->stack) {
-    --d->top;
-    if(!d->src.eof) {
-      if(d->top->field->type == UPB_TYPE(GROUP))
-        upb_decoder_skipgroup(d);
-      else
-        upb_decoder_skipbytes(d, d->top->end_offset - upb_decoder_offset(d));
-    }
-    d->src.eof = false;
-    return true;
-  } else {
-    return false;
-  }
-}
-
-bool upb_decoder_skipval(upb_decoder *d) {
-  upb_strlen_t bytes_to_skip;
-  switch(d->wire_type) {
-    case UPB_WIRE_TYPE_VARINT: {
-      return upb_decoder_skipv64(d);
-    }
-    case UPB_WIRE_TYPE_START_GROUP:
-      if(!upb_decoder_startmsg(d)) return false;
-      if(!upb_decoder_skipgroup(d)) return false;
-      if(!upb_decoder_endmsg(d)) return false;
-      return true;
-    default:
-      // Including UPB_WIRE_TYPE_END_GROUP.
-      assert(false);
-      upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Tried to skip an end group");
-      return false;
-    case UPB_WIRE_TYPE_64BIT:
-      bytes_to_skip = 8;
-      break;
-    case UPB_WIRE_TYPE_32BIT:
-      bytes_to_skip = 4;
-      break;
-    case UPB_WIRE_TYPE_DELIMITED:
-      // Works for both string/bytes *and* submessages.
-      bytes_to_skip = d->delimited_len;
-      break;
-  }
-  return upb_decoder_skipbytes(d, bytes_to_skip);
-}
-
-static bool upb_decoder_skipgroup(upb_decoder *d)
-{
-  // This will be mututally recursive with upb_decoder_skipval() if the group
-  // has sub-groups.  If we wanted to handle EAGAIN in the future, this
-  // approach would not work; we would need to track the group depth
-  // explicitly.
-  while(upb_decoder_getdef(d)) {
-    if(!upb_decoder_skipval(d)) return false;
-  }
-  // If we are at the end of the group like we want to be, then
-  // upb_decoder_getdef() returned NULL because of eof, not error.
-  if(!&d->src.eof) return false;
-  return true;
-}
-
-upb_src_vtable upb_decoder_src_vtbl = {
-  (upb_src_getdef_fptr)&upb_decoder_getdef,
-  (upb_src_getval_fptr)&upb_decoder_getval,
-  (upb_src_skipval_fptr)&upb_decoder_skipval,
-  (upb_src_startmsg_fptr)&upb_decoder_startmsg,
-  (upb_src_endmsg_fptr)&upb_decoder_endmsg,
-};
-
-
-/* upb_decoder construction/destruction. **************************************/
-
-upb_decoder *upb_decoder_new(upb_msgdef *msgdef)
-{
-  upb_decoder *d = malloc(sizeof(*d));
-  d->toplevel_msgdef = msgdef;
-  d->limit = &d->stack[UPB_MAX_NESTING];
-  d->buf = NULL;
-  upb_src_init(&d->src, &upb_decoder_src_vtbl);
-  return d;
-}
-
-void upb_decoder_free(upb_decoder *d)
-{
-  upb_string_unref(d->buf);
-  free(d);
-}
-
-void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc)
-{
-  upb_string_unref(d->buf);
-  d->top = d->stack;
-  d->top->msgdef = d->toplevel_msgdef;
-  // The top-level message is not delimited (we can keep receiving data for it
-  // indefinitely), so we set the end offset as high as possible, but not equal
-  // to UINT32_MAX so it doesn't equal UPB_GROUP_END_OFFSET.
-  d->top->end_offset = UINT32_MAX - 1;
-  d->bytesrc = bytesrc;
-  d->buf = NULL;
-  d->buf_bytesleft = 0;
-  d->buf_stream_offset = 0;
-  d->buf_offset = 0;
-}
diff --git a/src/upb_decoder.h b/src/upb_decoder.h
deleted file mode 100644
index dde61fc..0000000
--- a/src/upb_decoder.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * upb_decoder implements a high performance, streaming decoder for protobuf
- * data that works by implementing upb_src and getting its data from a
- * upb_bytesrc.
- *
- * The decoder does not currently support non-blocking I/O, in the sense that
- * if the bytesrc returns UPB_STATUS_TRYAGAIN it is not possible to resume the
- * decoder when data becomes available again.  Support for this could be added,
- * but it would add complexity and perhaps cost efficiency also.
- *
- * Copyright (c) 2009-2010 Joshua Haberman.  See LICENSE for details.
- */
-
-#ifndef UPB_DECODER_H_
-#define UPB_DECODER_H_
-
-#include <stdbool.h>
-#include <stdint.h>
-#include "upb_def.h"
-#include "upb_stream.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* upb_decoder *****************************************************************/
-
-// A upb_decoder decodes the binary protocol buffer format, writing the data it
-// decodes to a upb_sink.
-struct upb_decoder;
-typedef struct upb_decoder upb_decoder;
-
-// Allocates and frees a upb_decoder, respectively.
-upb_decoder *upb_decoder_new(upb_msgdef *md);
-void upb_decoder_free(upb_decoder *d);
-
-// Resets the internal state of an already-allocated decoder.  This puts it in a
-// state where it has not seen any data, and expects the next data to be from
-// the beginning of a new protobuf.  Parsers must be reset before they can be
-// used.  A decoder can be reset multiple times.
-void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc);
-
-// Returns a upb_src pointer by which the decoder can be used.  The returned
-// upb_src is invalidated by upb_decoder_reset() or upb_decoder_free().
-upb_src *upb_decoder_getsrc(upb_decoder *d);
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif  /* UPB_DECODER_H_ */
diff --git a/src/upb_def.c b/src/upb_def.c
deleted file mode 100644
index bfab738..0000000
--- a/src/upb_def.c
+++ /dev/null
@@ -1,1022 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2008-2009 Joshua Haberman.  See LICENSE for details.
- */
-
-#include <stdlib.h>
-#include "descriptor_const.h"
-#include "descriptor.h"
-#include "upb_def.h"
-
-#define CHECKSRC(x) if(!(x)) goto src_err
-#define CHECK(x) if(!(x)) goto err
-
-// A little dynamic array for storing a growing list of upb_defs.
-typedef struct {
-  upb_def **defs;
-  uint32_t len;
-  uint32_t size;
-} upb_deflist;
-
-static void upb_deflist_init(upb_deflist *l) {
-  l->size = 8;
-  l->defs = malloc(l->size);
-  l->len = 0;
-}
-
-static void upb_deflist_uninit(upb_deflist *l) {
-  for(uint32_t i = 0; i < l->len; i++)
-    if(l->defs[i]) upb_def_unref(l->defs[i]);
-  free(l->defs);
-}
-
-static void upb_deflist_push(upb_deflist *l, upb_def *d) {
-  if(l->len == l->size) {
-    l->size *= 2;
-    l->defs = realloc(l->defs, l->size);
-  }
-  l->defs[l->len++] = d;
-}
-
-/* Joins strings together, for example:
- *   join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
- *   join("", "Baz") -> "Baz"
- * Caller owns a ref on the returned string. */
-static upb_string *upb_join(upb_string *base, upb_string *name) {
-  upb_string *joined = upb_strdup(base);
-  upb_strlen_t len = upb_string_len(joined);
-  if(len > 0) {
-    upb_string_getrwbuf(joined, len + 1)[len] = UPB_SYMBOL_SEPARATOR;
-  }
-  upb_strcat(joined, name);
-  return joined;
-}
-
-// Qualify the defname for all defs starting with offset "start" with "str".
-static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) {
-  for(uint32_t i = start; i < l->len; i++) {
-    upb_def *def = l->defs[i];
-    upb_string *name = def->fqname;
-    def->fqname = upb_join(str, name);
-    upb_string_unref(name);
-  }
-}
-
-/* upb_def ********************************************************************/
-
-// Defs are reference counted, but can have cycles when types are
-// self-recursive or mutually recursive, so we need to be capable of collecting
-// the cycles.  In our situation defs are immutable (so cycles cannot be
-// created or destroyed post-initialization).  We need to be thread-safe but
-// want to avoid locks if at all possible and rely only on atomic operations.
-//
-// Our scheme is as follows.  First we give each def a flag indicating whether
-// it is part of a cycle or not.  Because defs are immutable, this flag will
-// never change.  For acyclic defs, we can use a naive algorithm and avoid the
-// overhead of dealing with cycles.  Most defs will be acyclic, and most cycles
-// will be very short.
-//
-// For defs that participate in cycles we keep two reference counts.  One
-// tracks references that come from outside the cycle (we call these external
-// references), and is incremented and decremented like a regular refcount.
-// The other is a cycle refcount, and works as follows.  Every cycle is
-// considered distinct, even if two cycles share members.  For example, this
-// graph has two distinct cycles:
-//
-//   A-->B-->C
-//   ^   |   |
-//   +---+---+
-//
-// The cycles in this graph are AB and ABC.  When A's external refcount
-// transitions from 0->1, we say that A takes "cycle references" on both
-// cycles.  Taking a cycle reference means incrementing the cycle refcount of
-// all defs in the cycle.  Since A and B are common to both cycles, A and B's
-// cycle refcounts will be incremented by two, and C's will be incremented by
-// one.  Likewise, when A's external refcount transitions from 1->0, we
-// decrement A and B's cycle refcounts by two and C's by one.  We collect a
-// cyclic type when its cycle refcount drops to zero.  A precondition for this
-// is that the external refcount has dropped to zero also.
-//
-// This algorithm is relatively cheap, since it only requires extra work when
-// the external refcount on a cyclic type transitions from 0->1 or 1->0.
-
-static void upb_msgdef_free(upb_msgdef *m);
-static void upb_enumdef_free(upb_enumdef *e);
-static void upb_unresolveddef_free(struct _upb_unresolveddef *u);
-
-static void upb_def_free(upb_def *def)
-{
-  switch(def->type) {
-    case UPB_DEF_MSG:
-      upb_msgdef_free(upb_downcast_msgdef(def));
-      break;
-    case UPB_DEF_ENUM:
-      upb_enumdef_free(upb_downcast_enumdef(def));
-      break;
-    case UPB_DEF_SVC:
-      assert(false);  /* Unimplemented. */
-      break;
-    case UPB_DEF_UNRESOLVED:
-      upb_unresolveddef_free(upb_downcast_unresolveddef(def));
-      break;
-    default:
-      assert(false);
-  }
-}
-
-// Depth-first search for all cycles that include cycle_base.  Returns the
-// number of paths from def that lead to cycle_base, which is equivalent to the
-// number of cycles def is in that include cycle_base.
-//
-// open_defs tracks the set of nodes that are currently being visited in the
-// search so we can stop the search if we detect a cycles that do not involve
-// cycle_base.  We can't color the nodes as we go by writing to a member of the
-// def, because another thread could be performing the search concurrently.
-static int upb_cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base,
-                                  upb_msgdef **open_defs, int num_open_defs,
-                                  bool ref) {
-  bool found = false;
-  for(int i = 0; i < num_open_defs; i++) {
-    if(open_defs[i] == m) {
-      // We encountered a cycle that did not involve cycle_base.
-      found = true;
-      break;
-    }
-  }
-
-  if(found || num_open_defs == UPB_MAX_TYPE_CYCLE_LEN) {
-    return 0;
-  } else if(m == cycle_base) {
-    return 1;
-  } else {
-    int path_count = 0;
-    if(cycle_base == NULL) {
-      cycle_base = m;
-    } else {
-      open_defs[num_open_defs++] = m;
-    }
-    for(int i = 0; i < m->num_fields; i++) {
-      upb_fielddef *f = &m->fields[i];
-      upb_def *def = f->def;
-      if(upb_issubmsg(f) && def->is_cyclic) {
-        upb_msgdef *sub_m = upb_downcast_msgdef(def);
-        path_count += upb_cycle_ref_or_unref(sub_m, cycle_base, open_defs,
-                                         num_open_defs, ref);
-      }
-    }
-    if(ref) {
-      upb_atomic_add(&m->cycle_refcount, path_count);
-    } else {
-      if(upb_atomic_add(&m->cycle_refcount, -path_count))
-        upb_def_free(UPB_UPCAST(m));
-    }
-    return path_count;
-  }
-}
-
-void _upb_def_reftozero(upb_def *def) {
-  if(def->is_cyclic) {
-    upb_msgdef *m = upb_downcast_msgdef(def);
-    upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
-    upb_cycle_ref_or_unref(m, NULL, open_defs, 0, false);
-  } else {
-    upb_def_free(def);
-  }
-}
-
-void _upb_def_cyclic_ref(upb_def *def) {
-  upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
-  upb_cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true);
-}
-
-static void upb_def_init(upb_def *def, upb_def_type type) {
-  def->type = type;
-  def->is_cyclic = 0;  // We detect this later, after resolving refs.
-  def->search_depth = 0;
-  def->fqname = NULL;
-  upb_atomic_refcount_init(&def->refcount, 1);
-}
-
-static void upb_def_uninit(upb_def *def) {
-  upb_string_unref(def->fqname);
-}
-
-
-/* upb_unresolveddef **********************************************************/
-
-// Unresolved defs are used as temporary placeholders for a def whose name has
-// not been resolved yet.  During the name resolution step, all unresolved defs
-// are replaced with pointers to the actual def being referenced.
-typedef struct _upb_unresolveddef {
-  upb_def base;
-
-  // The target type name.  This may or may not be fully qualified.
-  upb_string *name;
-} upb_unresolveddef;
-
-// Is passed a ref on the string.
-static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) {
-  upb_unresolveddef *def = malloc(sizeof(*def));
-  upb_def_init(&def->base, UPB_DEF_UNRESOLVED);
-  def->name = str;
-  return def;
-}
-
-static void upb_unresolveddef_free(struct _upb_unresolveddef *def) {
-  upb_def_uninit(&def->base);
-  free(def);
-}
-
-
-/* upb_enumdef ****************************************************************/
-
-typedef struct {
-  upb_strtable_entry e;
-  uint32_t value;
-} ntoi_ent;
-
-typedef struct {
-  upb_inttable_entry e;
-  upb_string *string;
-} iton_ent;
-
-static void upb_enumdef_free(upb_enumdef *e) {
-  upb_strtable_free(&e->ntoi);
-  upb_inttable_free(&e->iton);
-  upb_def_uninit(&e->base);
-  free(e);
-}
-
-static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status)
-{
-  int32_t number = -1;
-  upb_string *name = NULL;
-  upb_fielddef *f;
-  while((f = upb_src_getdef(src)) != NULL) {
-    switch(f->number) {
-      case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_FIELDNUM:
-        CHECKSRC(upb_src_getint32(src, &number));
-        break;
-      case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM:
-        name = upb_string_tryrecycle(name);
-        CHECKSRC(upb_src_getstr(src, name));
-        break;
-      default:
-        CHECKSRC(upb_src_skipval(src));
-        break;
-    }
-  }
-
-  if(name == NULL || number == -1) {
-    upb_seterr(status, UPB_STATUS_ERROR, "Enum value missing name or number.");
-    goto err;
-  }
-  ntoi_ent ntoi_ent = {{name, 0}, number};
-  iton_ent iton_ent = {{number, 0}, name};
-  upb_strtable_insert(&e->ntoi, &ntoi_ent.e);
-  upb_inttable_insert(&e->iton, &iton_ent.e);
-  // We don't unref "name" because we pass our ref to the iton entry of the
-  // table.  strtables can ref their keys, but the inttable doesn't know that
-  // the value is a string.
-  return true;
-
-src_err:
-  upb_copyerr(status, upb_src_status(src));
-err:
-  upb_string_unref(name);
-  return false;
-}
-
-static bool upb_addenum(upb_src *src, upb_deflist *defs, upb_status *status)
-{
-  upb_enumdef *e = malloc(sizeof(*e));
-  upb_def_init(&e->base, UPB_DEF_ENUM);
-  upb_strtable_init(&e->ntoi, 0, sizeof(ntoi_ent));
-  upb_inttable_init(&e->iton, 0, sizeof(iton_ent));
-  upb_fielddef *f;
-  while((f = upb_src_getdef(src)) != NULL) {
-    switch(f->number) {
-      case GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_FIELDNUM:
-        CHECK(upb_addenum_val(src, e, status));
-        break;
-      default:
-        upb_src_skipval(src);
-        break;
-    }
-  }
-  upb_deflist_push(defs, UPB_UPCAST(e));
-  return true;
-
-err:
-  upb_enumdef_free(e);
-  return false;
-}
-
-static void fill_iter(upb_enum_iter *iter, ntoi_ent *ent) {
-  iter->state = ent;
-  iter->name = ent->e.key;
-  iter->val = ent->value;
-}
-
-void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e) {
-  // We could iterate over either table here; the choice is arbitrary.
-  ntoi_ent *ent = upb_strtable_begin(&e->ntoi);
-  iter->e = e;
-  fill_iter(iter, ent);
-}
-
-void upb_enum_next(upb_enum_iter *iter) {
-  ntoi_ent *ent = iter->state;
-  assert(ent);
-  ent = upb_strtable_next(&iter->e->ntoi, &ent->e);
-  iter->state = ent;
-  if(ent) fill_iter(iter, ent);
-}
-
-bool upb_enum_done(upb_enum_iter *iter) {
-  return iter->state == NULL;
-}
-
-
-/* upb_fielddef ***************************************************************/
-
-static void upb_fielddef_free(upb_fielddef *f) {
-  free(f);
-}
-
-static void upb_fielddef_uninit(upb_fielddef *f) {
-  upb_string_unref(f->name);
-  if(upb_hasdef(f) && f->owned) {
-    upb_def_unref(f->def);
-  }
-}
-
-static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status)
-{
-  upb_fielddef *f = malloc(sizeof(*f));
-  f->def = NULL;
-  f->owned = false;
-  upb_fielddef *parsed_f;
-  int32_t tmp;
-  while((parsed_f = upb_src_getdef(src))) {
-    switch(parsed_f->number) {
-      case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIELDNUM:
-        CHECKSRC(upb_src_getint32(src, &tmp));
-        f->type = tmp;
-        break;
-      case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_FIELDNUM:
-        CHECKSRC(upb_src_getint32(src, &tmp));
-        f->label = tmp;
-        break;
-      case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_FIELDNUM:
-        CHECKSRC(upb_src_getint32(src, &tmp));
-        f->number = tmp;
-        break;
-      case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM:
-        f->name = upb_string_tryrecycle(f->name);
-        CHECKSRC(upb_src_getstr(src, f->name));
-        break;
-      case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: {
-        upb_string *str = upb_string_new();
-        CHECKSRC(upb_src_getstr(src, str));
-        if(f->def) upb_def_unref(f->def);
-        f->def = UPB_UPCAST(upb_unresolveddef_new(str));
-        f->owned = true;
-        break;
-      }
-    }
-  }
-  CHECKSRC(upb_src_eof(src));
-  // TODO: verify that all required fields were present.
-  assert((f->def != NULL) == upb_hasdef(f));
-
-  // Field was successfully read, add it as a field of the msgdef.
-  upb_itof_ent itof_ent = {{f->number, 0}, f};
-  upb_ntof_ent ntof_ent = {{f->name, 0}, f};
-  upb_inttable_insert(&m->itof, &itof_ent.e);
-  upb_strtable_insert(&m->ntof, &ntof_ent.e);
-  return true;
-
-src_err:
-  upb_copyerr(status, upb_src_status(src));
-  upb_fielddef_free(f);
-  return false;
-}
-
-
-/* upb_msgdef *****************************************************************/
-
-// Processes a google.protobuf.DescriptorProto, adding defs to "defs."
-static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status)
-{
-  upb_msgdef *m = malloc(sizeof(*m));
-  upb_def_init(&m->base, UPB_DEF_MSG);
-  upb_atomic_refcount_init(&m->cycle_refcount, 0);
-  upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent));
-  upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent));
-  int32_t start_count = defs->len;
-
-  upb_fielddef *f;
-  while((f = upb_src_getdef(src)) != NULL) {
-    switch(f->number) {
-      case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM:
-        m->base.fqname = upb_string_tryrecycle(m->base.fqname);
-        CHECKSRC(upb_src_getstr(src, m->base.fqname));
-        break;
-      case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM:
-        CHECKSRC(upb_src_startmsg(src));
-        CHECK(upb_addfield(src, m, status));
-        CHECKSRC(upb_src_endmsg(src));
-        break;
-      case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_FIELDNUM:
-        CHECKSRC(upb_src_startmsg(src));
-        CHECK(upb_addmsg(src, defs, status));
-        CHECKSRC(upb_src_endmsg(src));
-        break;
-      case GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_FIELDNUM:
-        CHECKSRC(upb_src_startmsg(src));
-        CHECK(upb_addenum(src, defs, status));
-        CHECKSRC(upb_src_endmsg(src));
-        break;
-      default:
-        // TODO: extensions.
-        CHECKSRC(upb_src_skipval(src));
-    }
-  }
-  CHECK(upb_src_eof(src));
-  if(!m->base.fqname) {
-    upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name.");
-    goto err;
-  }
-  upb_deflist_qualify(defs, m->base.fqname, start_count);
-  upb_deflist_push(defs, UPB_UPCAST(m));
-  return true;
-
-src_err:
-  upb_copyerr(status, upb_src_status(src));
-err:
-  upb_msgdef_free(m);
-  return false;
-}
-
-static void upb_msgdef_free(upb_msgdef *m)
-{
-  for (upb_field_count_t i = 0; i < m->num_fields; i++)
-    upb_fielddef_uninit(&m->fields[i]);
-  free(m->fields);
-  upb_strtable_free(&m->ntof);
-  upb_inttable_free(&m->itof);
-  upb_def_uninit(&m->base);
-  free(m);
-}
-
-static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) {
-  (void)m;
-  if(f->owned) upb_def_unref(f->def);
-  f->def = def;
-  // We will later make the ref unowned if it is a part of a cycle.
-  f->owned = true;
-  upb_def_ref(def);
-}
-
-
-/* symtab internal  ***********************************************************/
-
-// Processes a google.protobuf.FileDescriptorProto, adding the defs to "defs".
-static bool upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status)
-{
-  upb_string *package = NULL;
-  int32_t start_count = defs->len;
-  upb_fielddef *f;
-  while((f = upb_src_getdef(src)) != NULL) {
-    switch(f->number) {
-      case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM:
-        package = upb_string_tryrecycle(package);
-        CHECKSRC(upb_src_getstr(src, package));
-        break;
-      case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM:
-        CHECKSRC(upb_src_startmsg(src));
-        CHECK(upb_addmsg(src, defs, status));
-        CHECKSRC(upb_src_endmsg(src));
-        break;
-      case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_FIELDNUM:
-        CHECKSRC(upb_src_startmsg(src));
-        CHECK(upb_addenum(src, defs, status));
-        CHECKSRC(upb_src_endmsg(src));
-        break;
-      default:
-        // TODO: services and extensions.
-        CHECKSRC(upb_src_skipval(src));
-    }
-  }
-  CHECK(upb_src_eof(src));
-  upb_deflist_qualify(defs, package, start_count);
-  upb_string_unref(package);
-  return true;
-
-src_err:
-  upb_copyerr(status, upb_src_status(src));
-err:
-  upb_string_unref(package);
-  return false;
-}
-
-/* Search for a character in a string, in reverse. */
-static int my_memrchr(char *data, char c, size_t len)
-{
-  int off = len-1;
-  while(off > 0 && data[off] != c) --off;
-  return off;
-}
-
-typedef struct {
-  upb_strtable_entry e;
-  upb_def *def;
-} upb_symtab_ent;
-
-// Given a symbol and the base symbol inside which it is defined, find the
-// symbol's definition in t.
-static upb_symtab_ent *upb_resolve(upb_strtable *t,
-                                   upb_string *base, upb_string *sym)
-{
-  if(upb_string_len(base) + upb_string_len(sym) + 1 >= UPB_SYMBOL_MAXLEN ||
-     upb_string_len(sym) == 0) return NULL;
-
-  if(upb_string_getrobuf(sym)[0] == UPB_SYMBOL_SEPARATOR) {
-    // Symbols starting with '.' are absolute, so we do a single lookup.
-    // Slice to omit the leading '.'
-    upb_string *sym_str = upb_strslice(sym, 1, upb_string_len(sym) - 1);
-    upb_symtab_ent *e = upb_strtable_lookup(t, sym_str);
-    upb_string_unref(sym_str);
-    return e;
-  } else {
-    // Remove components from base until we find an entry or run out.
-    upb_string *sym_str = upb_string_new();
-    int baselen = upb_string_len(base);
-    while(1) {
-      // sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + sym
-      upb_strlen_t len = baselen + upb_string_len(sym) + 1;
-      char *buf = upb_string_getrwbuf(sym_str, len);
-      memcpy(buf, upb_string_getrobuf(base), baselen);
-      buf[baselen] = UPB_SYMBOL_SEPARATOR;
-      memcpy(buf + baselen + 1, upb_string_getrobuf(sym), upb_string_len(sym));
-
-      upb_symtab_ent *e = upb_strtable_lookup(t, sym_str);
-      if (e) return e;
-      else if(baselen == 0) return NULL;  // No more scopes to try.
-
-      baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen);
-    }
-  }
-}
-
-// Performs a pass over the type graph to find all cycles that include m.
-static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status)
-{
-  if(depth > UPB_MAX_TYPE_DEPTH) {
-    // We have found a non-cyclic path from the base of the type tree that
-    // exceeds the maximum allowed depth.  There are many situations in upb
-    // where we recurse over the type tree (like for example, right now) and an
-    // absurdly deep tree could cause us to stack overflow on systems with very
-    // limited stacks.
-    upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was found at "
-               "depth %d in the type graph, which exceeds the maximum type "
-               "depth of %d.", UPB_UPCAST(m)->fqname, depth,
-               UPB_MAX_TYPE_DEPTH);
-    return false;
-  } else if(UPB_UPCAST(m)->search_depth == 1) {
-    // Cycle!
-    int cycle_len = depth - 1;
-    if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) {
-      upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was involved "
-                 "in a cycle of length %d, which exceeds the maximum type "
-                 "cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len,
-                 UPB_MAX_TYPE_CYCLE_LEN);
-    }
-    return true;
-  } else if(UPB_UPCAST(m)->search_depth > 0) {
-    // This was a cycle, but did not originate from the base of our search tree.
-    // We'll find it when we call find_cycles() on this node directly.
-    return false;
-  } else {
-    UPB_UPCAST(m)->search_depth = ++depth;
-    bool cycle_found = false;
-    for(upb_field_count_t i = 0; i < m->num_fields; i++) {
-      upb_fielddef *f = &m->fields[i];
-      if(!upb_issubmsg(f)) continue;
-      upb_def *sub_def = f->def;
-      upb_msgdef *sub_m = upb_downcast_msgdef(sub_def);
-      if(upb_symtab_findcycles(sub_m, depth, status)) {
-        cycle_found = true;
-        UPB_UPCAST(m)->is_cyclic = true;
-        if(f->owned) {
-          upb_atomic_unref(&sub_def->refcount);
-          f->owned = false;
-        }
-      }
-    }
-    UPB_UPCAST(m)->search_depth = 0;
-    return cycle_found;
-  }
-}
-
-// Given a table of pending defs "tmptab" and a table of existing defs "symtab",
-// resolves all of the unresolved refs for the defs in tmptab.
-bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab,
-                     upb_status *status)
-{
-  upb_symtab_ent *e;
-  for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) {
-    upb_msgdef *m = upb_dyncast_msgdef(e->def);
-    if(!m) continue;
-    // Type names are resolved relative to the message in which they appear.
-    upb_string *base = e->e.key;
-
-    for(upb_field_count_t i = 0; i < m->num_fields; i++) {
-      upb_fielddef *f = &m->fields[i];
-      if(!upb_hasdef(f)) continue;  // No resolving necessary.
-      upb_string *name = upb_downcast_unresolveddef(f->def)->name;
-
-      // Resolve from either the tmptab (pending adds) or symtab (existing
-      // defs).  If both exist, prefer the pending add, because it will be
-      // overwriting the existing def.
-      upb_symtab_ent *found;
-      if(!(found = upb_resolve(tmptab, base, name)) &&
-         !(found = upb_resolve(symtab, base, name))) {
-        upb_seterr(status, UPB_STATUS_ERROR,
-                   "could not resolve symbol '" UPB_STRFMT "'"
-                   " in context '" UPB_STRFMT "'",
-                   UPB_STRARG(name), UPB_STRARG(base));
-        return false;
-      }
-
-      // Check the type of the found def.
-      upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
-      if(found->def->type != expected) {
-        upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type");
-        return false;
-      }
-      upb_msgdef_resolve(m, f, found->def);
-    }
-  }
-
-  // Deal with type cycles.
-  for(e = upb_strtable_begin(tmptab); e; e = upb_strtable_next(tmptab, &e->e)) {
-    upb_msgdef *m = upb_dyncast_msgdef(e->def);
-    if(!m) continue;
-    // The findcycles() call will decrement the external refcount of the
-    if(!upb_symtab_findcycles(m, 0, status)) return false;
-    upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
-    upb_cycle_ref_or_unref(m, NULL, open_defs, 0, true);
-  }
-
-  return true;
-}
-
-// Given a list of defs, a list of extensions (in the future), and a flag
-// indicating whether the new defs can overwrite existing defs in the symtab,
-// attempts to add the given defs to the symtab.  The whole operation either
-// succeeds or fails.  Ownership of "defs" and "exts" is taken.
-bool upb_symtab_add_defs(upb_symtab *s, upb_deflist *defs, bool allow_redef,
-                         upb_status *status)
-{
-  upb_rwlock_wrlock(&s->lock);
-
-  // Build a table of the defs we mean to add, for duplicate detection and name
-  // resolution.
-  upb_strtable tmptab;
-  upb_strtable_init(&tmptab, defs->len, sizeof(upb_symtab_ent));
-  for (uint32_t i = 0; i < defs->len; i++) {
-    upb_def *def = defs->defs[i];
-    upb_symtab_ent e = {{def->fqname, 0}, def};
-
-    // Redefinition is never allowed within a single FileDescriptorSet.
-    // Additionally, we only allow overwriting of an existing definition if
-    // allow_redef is set.
-    if (upb_strtable_lookup(&tmptab, def->fqname) ||
-        (!allow_redef && upb_strtable_lookup(&s->symtab, def->fqname))) {
-      upb_seterr(status, UPB_STATUS_ERROR, "Redefinition of symbol " UPB_STRFMT,
-                 UPB_STRARG(def->fqname));
-      goto err;
-    }
-
-    // Pass ownership from the deflist to the strtable.
-    upb_strtable_insert(&tmptab, &e.e);
-    defs->defs[i] = NULL;
-  }
-
-  // TODO: process the list of extensions by modifying entries from
-  // tmptab in-place (copying them from the symtab first if necessary).
-
-  CHECK(upb_resolverefs(&tmptab, &s->symtab, status));
-
-  // The defs in tmptab have been vetted, and can be added to the symtab
-  // without causing errors.  Now add all tmptab defs to the symtab,
-  // overwriting (and releasing a ref on) any existing defs with the same
-  // names.  Ownership for tmptab defs passes from the tmptab to the symtab.
-  upb_symtab_ent *tmptab_e;
-  for(tmptab_e = upb_strtable_begin(&tmptab); tmptab_e;
-      tmptab_e = upb_strtable_next(&tmptab, &tmptab_e->e)) {
-    upb_symtab_ent *symtab_e =
-        upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname);
-    if(symtab_e) {
-      upb_def_unref(symtab_e->def);
-      symtab_e->def = tmptab_e->def;
-    } else {
-      upb_strtable_insert(&s->symtab, &tmptab_e->e);
-    }
-  }
-
-  upb_rwlock_unlock(&s->lock);
-  upb_strtable_free(&tmptab);
-  return true;
-
-err:
-  // We need to free all defs from "tmptab."
-  upb_rwlock_unlock(&s->lock);
-  for(upb_symtab_ent *e = upb_strtable_begin(&tmptab); e;
-      e = upb_strtable_next(&tmptab, &e->e))
-    upb_def_unref(e->def);
-  upb_strtable_free(&tmptab);
-  return false;
-}
-
-
-/* upb_symtab *****************************************************************/
-
-upb_symtab *upb_symtab_new()
-{
-  upb_symtab *s = malloc(sizeof(*s));
-  upb_atomic_refcount_init(&s->refcount, 1);
-  upb_rwlock_init(&s->lock);
-  upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent));
-  return s;
-}
-
-static void upb_free_symtab(upb_strtable *t)
-{
-  upb_symtab_ent *e;
-  for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e))
-    upb_def_unref(e->def);
-  upb_strtable_free(t);
-}
-
-void _upb_symtab_free(upb_symtab *s)
-{
-  upb_free_symtab(&s->symtab);
-  upb_free_symtab(&s->psymtab);
-  upb_rwlock_destroy(&s->lock);
-  free(s);
-}
-
-upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type)
-{
-  upb_rwlock_rdlock(&s->lock);
-  int total = upb_strtable_count(&s->symtab);
-  // We may only use part of this, depending on how many symbols are of the
-  // correct type.
-  upb_def **defs = malloc(sizeof(*defs) * total);
-  upb_symtab_ent *e = upb_strtable_begin(&s->symtab);
-  int i = 0;
-  for(; e; e = upb_strtable_next(&s->symtab, &e->e)) {
-    upb_def *def = e->def;
-    assert(def);
-    if(type == UPB_DEF_ANY || def->type == type)
-      defs[i++] = def;
-  }
-  upb_rwlock_unlock(&s->lock);
-  *count = i;
-  for(i = 0; i < *count; i++)
-    upb_def_ref(defs[i]);
-  return defs;
-}
-
-upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym)
-{
-  upb_rwlock_rdlock(&s->lock);
-  upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym);
-  upb_def *ret = NULL;
-  if(e) {
-    ret = e->def;
-    upb_def_ref(ret);
-  }
-  upb_rwlock_unlock(&s->lock);
-  return ret;
-}
-
-
-upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) {
-  upb_rwlock_rdlock(&s->lock);
-  upb_symtab_ent *e = upb_resolve(&s->symtab, base, symbol);
-  upb_def *ret = NULL;
-  if(e) {
-    ret = e->def;
-    upb_def_ref(ret);
-  }
-  upb_rwlock_unlock(&s->lock);
-  return ret;
-}
-
-void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status)
-{
-  upb_deflist defs;
-  upb_deflist_init(&defs);
-  upb_fielddef *f;
-  while((f = upb_src_getdef(src)) != NULL) {
-    switch(f->number) {
-      case GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM:
-        CHECKSRC(upb_src_startmsg(src));
-        CHECK(upb_addfd(src, &defs, status));
-        CHECKSRC(upb_src_endmsg(src));
-        break;
-      default:
-        CHECKSRC(upb_src_skipval(src));
-    }
-  }
-  CHECKSRC(upb_src_eof(src));
-  CHECK(upb_symtab_add_defs(s, &defs, false, status));
-  upb_deflist_uninit(&defs);
-  return;
-
-src_err:
-  upb_copyerr(status, upb_src_status(src));
-err:
-  upb_deflist_uninit(&defs);
-}
-
-
-/* upb_baredecoder ************************************************************/
-
-// upb_baredecoder is a upb_src that can parse a subset of the protocol buffer
-// binary format.  It is only used for bootstrapping.  It can parse without
-// having a upb_msgdef, which is why it is useful for bootstrapping the first
-// msgdef.  On the downside, it does not support:
-//
-// * having its input span multiple upb_strings.
-// * reading any field of the returned upb_fielddef's except f->number.
-// * keeping a pointer to the upb_fielddef* and reading it later (the same
-//   upb_fielddef is reused over and over).
-// * detecting errors in the input (we trust that our input is known-good).
-//
-// It also does not support any of the follow protobuf features:
-// * packed fields.
-// * groups.
-// * zig-zag-encoded types like sint32 and sint64.
-//
-// If descriptor.proto ever changed to use any of these features, this decoder
-// would need to be extended to support them.
-
-typedef struct {
-  upb_src src;
-  upb_string *input;
-  upb_strlen_t offset;
-  upb_fielddef field;
-  upb_wire_type_t wire_type;
-  upb_strlen_t delimited_len;
-  upb_strlen_t stack[UPB_MAX_NESTING], *top;
-  upb_string *str;
-} upb_baredecoder;
-
-static uint64_t upb_baredecoder_readv64(upb_baredecoder *d)
-{
-  const uint8_t *start = (uint8_t*)upb_string_getrobuf(d->input) + d->offset;
-  const uint8_t *buf = start;
-  uint8_t last = 0x80;
-  uint64_t val = 0;
-  for(int bitpos = 0; (last & 0x80); buf++, bitpos += 7)
-    val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos;
-  d->offset += buf - start;
-  return val;
-}
-
-static uint32_t upb_baredecoder_readv32(upb_baredecoder *d)
-{
-  return (uint32_t)upb_baredecoder_readv64(d); // Truncate.
-}
-
-static uint64_t upb_baredecoder_readf64(upb_baredecoder *d)
-{
-  uint64_t val;
-  memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 8);
-  d->offset += 8;
-  return val;
-}
-
-static uint32_t upb_baredecoder_readf32(upb_baredecoder *d)
-{
-  uint32_t val;
-  memcpy(&val, upb_string_getrobuf(d->input) + d->offset, 4);
-  d->offset += 4;
-  return val;
-}
-
-static upb_fielddef *upb_baredecoder_getdef(upb_baredecoder *d)
-{
-  // Detect end-of-submessage.
-  if(d->offset >= *d->top) {
-    d->src.eof = true;
-    return NULL;
-  }
-
-  uint32_t key;
-  key = upb_baredecoder_readv32(d);
-  d->wire_type = key & 0x7;
-  d->field.number = key >> 3;
-  if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) {
-    // For delimited wire values we parse the length now, since we need it in
-    // all cases.
-    d->delimited_len = upb_baredecoder_readv32(d);
-  }
-  return &d->field;
-}
-
-static bool upb_baredecoder_getval(upb_baredecoder *d, upb_valueptr val)
-{
-  if(d->wire_type == UPB_WIRE_TYPE_DELIMITED) {
-    d->str = upb_string_tryrecycle(d->str);
-    upb_string_substr(d->str, d->input, d->offset, d->delimited_len);
-  } else {
-    switch(d->wire_type) {
-      case UPB_WIRE_TYPE_VARINT:
-        *val.uint64 = upb_baredecoder_readv64(d);
-        break;
-      case UPB_WIRE_TYPE_32BIT_VARINT:
-        *val.uint32 = upb_baredecoder_readv32(d);
-        break;
-      case UPB_WIRE_TYPE_64BIT:
-        *val.uint64 = upb_baredecoder_readf64(d);
-        break;
-      case UPB_WIRE_TYPE_32BIT:
-        *val.uint32 = upb_baredecoder_readf32(d);
-        break;
-      default:
-        assert(false);
-    }
-  }
-  return true;
-}
-
-static bool upb_baredecoder_skipval(upb_baredecoder *d)
-{
-  upb_value val;
-  return upb_baredecoder_getval(d, upb_value_addrof(&val));
-}
-
-static bool upb_baredecoder_startmsg(upb_baredecoder *d)
-{
-  *(d->top++) = d->offset + d->delimited_len;
-  return true;
-}
-
-static bool upb_baredecoder_endmsg(upb_baredecoder *d)
-{
-  d->offset = *(--d->top);
-  return true;
-}
-
-static upb_src_vtable upb_baredecoder_src_vtbl = {
-  (upb_src_getdef_fptr)&upb_baredecoder_getdef,
-  (upb_src_getval_fptr)&upb_baredecoder_getval,
-  (upb_src_skipval_fptr)&upb_baredecoder_skipval,
-  (upb_src_startmsg_fptr)&upb_baredecoder_startmsg,
-  (upb_src_endmsg_fptr)&upb_baredecoder_endmsg,
-};
-
-static upb_baredecoder *upb_baredecoder_new(upb_string *str)
-{
-  upb_baredecoder *d = malloc(sizeof(*d));
-  d->input = upb_string_getref(str);
-  d->str = upb_string_new();
-  d->top = &d->stack[0];
-  upb_src_init(&d->src, &upb_baredecoder_src_vtbl);
-  return d;
-}
-
-static void upb_baredecoder_free(upb_baredecoder *d)
-{
-  upb_string_unref(d->input);
-  upb_string_unref(d->str);
-  free(d);
-}
-
-static upb_src *upb_baredecoder_src(upb_baredecoder *d)
-{
-  return &d->src;
-}
-
-upb_symtab *upb_get_descriptor_symtab()
-{
-  // TODO: implement sharing of symtabs, so that successive calls to this
-  // function will return the same symtab.
-  upb_symtab *symtab = upb_symtab_new();
-  // TODO: allow upb_strings to be static or on the stack.
-  upb_string *descriptor = upb_strduplen(descriptor_pb, descriptor_pb_len);
-  upb_baredecoder *decoder = upb_baredecoder_new(descriptor);
-  upb_status status;
-  upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status);
-  assert(upb_ok(&status));
-  upb_baredecoder_free(decoder);
-  upb_string_unref(descriptor);
-  return symtab;
-}
diff --git a/src/upb_def.h b/src/upb_def.h
deleted file mode 100644
index c297e83..0000000
--- a/src/upb_def.h
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
- *
- * Provides definitions of .proto constructs:
- * - upb_msgdef: describes a "message" construct.
- * - upb_fielddef: describes a message field.
- * - upb_enumdef: describes an enum.
- * (TODO: definitions of extensions and services).
- *
- * Defs are obtained from a upb_symtab object.  A upb_symtab is empty when
- * constructed, and definitions can be added by supplying serialized
- * descriptors.
- *
- * Defs are immutable and reference-counted.  Symbol tables reference any defs
- * that are the "current" definitions.  If an extension is loaded that adds a
- * field to an existing message, a new msgdef is constructed that includes the
- * new field and the old msgdef is unref'd.  The old msgdef will still be ref'd
- * by messages (if any) that were constructed with that msgdef.
- *
- * This file contains routines for creating and manipulating the definitions
- * themselves.  To create and manipulate actual messages, see upb_msg.h.
- */
-
-#ifndef UPB_DEF_H_
-#define UPB_DEF_H_
-
-#include "upb_atomic.h"
-#include "upb_stream.h"
-#include "upb_table.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* upb_def: base class for defs  **********************************************/
-
-// All the different kind of defs we support.  These correspond 1:1 with
-// declarations in a .proto file.
-typedef enum {
-  UPB_DEF_MSG = 0,
-  UPB_DEF_ENUM,
-  UPB_DEF_SVC,
-  UPB_DEF_EXT,
-  // Internal-only, placeholder for a def that hasn't be resolved yet.
-  UPB_DEF_UNRESOLVED,
-
-  // For specifying that defs of any type are requsted from getdefs.
-  UPB_DEF_ANY = -1
-} upb_def_type;
-
-// This typedef is more space-efficient than declaring an enum var directly.
-typedef int8_t upb_def_type_t;
-
-typedef struct {
-  upb_string *fqname;  // Fully qualified.
-  upb_atomic_refcount_t refcount;
-  upb_def_type_t type;
-
-  // The is_cyclic flag could go in upb_msgdef instead of here, because only
-  // messages can be involved in cycles.  However, putting them here is free
-  // from a space perspective because structure alignment will otherwise leave
-  // three bytes empty after type.  It is also makes ref and unref more
-  // efficient, because we don't have to downcast to msgdef before checking the
-  // is_cyclic flag.
-  bool is_cyclic;
-  uint16_t search_depth;  // Used during initialization dfs.
-} upb_def;
-
-// These must not be called directly!
-void _upb_def_cyclic_ref(upb_def *def);
-void _upb_def_reftozero(upb_def *def);
-
-// Call to ref/deref a def.
-INLINE void upb_def_ref(upb_def *def) {
-  if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def);
-}
-INLINE void upb_def_unref(upb_def *def) {
-  if(upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def);
-}
-
-/* upb_fielddef ***************************************************************/
-
-// A upb_fielddef describes a single field in a message.  It isn't a full def
-// in the sense that it derives from upb_def.  It cannot stand on its own; it
-// is either a field of a upb_msgdef or contained inside a upb_extensiondef.
-// It is also reference-counted.
-typedef struct _upb_fielddef {
-  upb_atomic_refcount_t refcount;
-  upb_string *name;
-  upb_field_number_t number;
-  upb_field_type_t type;
-  upb_label_t label;
-  upb_value default_value;
-
-  // For the case of an enum or a submessage, points to the def for that type.
-  upb_def *def;
-
-  // True if we own a ref on "def" (above).  This is true unless this edge is
-  // part of a cycle.
-  bool owned;
-
-  // These are set only when this fielddef is part of a msgdef.
-  uint32_t byte_offset;           // Where in a upb_msg to find the data.
-  upb_field_count_t field_index;  // Indicates set bit.
-} upb_fielddef;
-
-// A variety of tests about the type of a field.
-INLINE bool upb_issubmsg(upb_fielddef *f) {
-  return upb_issubmsgtype(f->type);
-}
-INLINE bool upb_isstring(upb_fielddef *f) {
-  return upb_isstringtype(f->type);
-}
-INLINE bool upb_isarray(upb_fielddef *f) {
-  return f->label == UPB_LABEL(REPEATED);
-}
-// Does the type of this field imply that it should contain an associated def?
-INLINE bool upb_hasdef(upb_fielddef *f) {
-  return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM);
-}
-
-INLINE bool upb_field_ismm(upb_fielddef *f) {
-  return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f);
-}
-
-INLINE bool upb_elem_ismm(upb_fielddef *f) {
-  return upb_isstring(f) || upb_issubmsg(f);
-}
-
-/* upb_msgdef *****************************************************************/
-
-// Structure that describes a single .proto message type.
-typedef struct _upb_msgdef {
-  upb_def base;
-  upb_atomic_refcount_t cycle_refcount;
-  size_t size;
-  upb_field_count_t num_fields;
-  uint32_t set_flags_bytes;
-  uint32_t num_required_fields;  // Required fields have the lowest set bytemasks.
-  upb_fielddef *fields;   // We have exclusive ownership of these.
-
-  // Tables for looking up fields by number and name.
-  upb_inttable itof;  // int to field
-  upb_strtable ntof;  // name to field
-} upb_msgdef;
-
-// Hash table entries for looking up fields by name or number.
-typedef struct {
-  upb_inttable_entry e;
-  upb_fielddef *f;
-} upb_itof_ent;
-typedef struct {
-  upb_strtable_entry e;
-  upb_fielddef *f;
-} upb_ntof_ent;
-
-// Looks up a field by name or number.  While these are written to be as fast
-// as possible, it will still be faster to cache the results of this lookup if
-// possible.  These return NULL if no such field is found.
-INLINE upb_fielddef *upb_msg_itof(upb_msgdef *m, uint32_t num) {
-  upb_itof_ent *e =
-      (upb_itof_ent*)upb_inttable_fastlookup(&m->itof, num, sizeof(*e));
-  return e ? e->f : NULL;
-}
-
-INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_string *name) {
-  upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
-  return e ? e->f : NULL;
-}
-
-/* upb_enumdef ****************************************************************/
-
-typedef struct _upb_enumdef {
-  upb_def base;
-  upb_strtable ntoi;
-  upb_inttable iton;
-} upb_enumdef;
-
-typedef int32_t upb_enumval_t;
-
-// Lookups from name to integer and vice-versa.
-bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num);
-upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num);
-
-// Iteration over name/value pairs.  The order is undefined.
-//   upb_enum_iter i;
-//   for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
-//     // ...
-//   }
-typedef struct {
-  upb_enumdef *e;
-  void *state;   // Internal iteration state.
-  upb_string *name;
-  upb_enumval_t val;
-} upb_enum_iter;
-void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e);
-void upb_enum_next(upb_enum_iter *iter);
-bool upb_enum_done(upb_enum_iter *iter);
-
-/* upb_symtab *****************************************************************/
-
-// A SymbolTable is where upb_defs live.  It is empty when first constructed.
-// Clients add definitions to the symtab by supplying unserialized or
-// serialized descriptors (as defined in descriptor.proto).
-typedef struct {
-  upb_atomic_refcount_t refcount;
-  upb_rwlock_t lock;       // Protects all members except the refcount.
-  upb_msgdef *fds_msgdef;  // In psymtab, ptr here for convenience.
-
-  // Our symbol tables; we own refs to the defs therein.
-  upb_strtable symtab;     // The main symbol table.
-  upb_strtable psymtab;    // Private symbols, for internal use.
-} upb_symtab;
-
-// Initializes a upb_symtab.  Contexts are not freed explicitly, but unref'd
-// when the caller is done with them.
-upb_symtab *upb_symtab_new(void);
-void _upb_symtab_free(upb_symtab *s);  // Must not be called directly!
-
-INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); }
-INLINE void upb_symtab_unref(upb_symtab *s) {
-  if(upb_atomic_unref(&s->refcount)) _upb_symtab_free(s);
-}
-
-// Resolves the given symbol using the rules described in descriptor.proto,
-// namely:
-//
-//    If the name starts with a '.', it is fully-qualified.  Otherwise, C++-like
-//    scoping rules are used to find the type (i.e. first the nested types
-//    within this message are searched, then within the parent, on up to the
-//    root namespace).
-//
-// If a def is found, the caller owns one ref on the returned def.  Otherwise
-// returns NULL.
-upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym);
-
-// Find an entry in the symbol table with this exact name.  If a def is found,
-// the caller owns one ref on the returned def.  Otherwise returns NULL.
-upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym);
-
-// Gets an array of pointers to all currently active defs in this symtab.  The
-// caller owns the returned array (which is of length *count) as well as a ref
-// to each symbol inside.  If type is UPB_DEF_ANY then defs of all types are
-// returned, otherwise only defs of the required type are returned.
-upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type);
-
-// "fds" is a upb_src that will yield data from the
-// google.protobuf.FileDescriptorSet message type.  upb_symtab_addfds() adds
-// all the definitions from the given FileDescriptorSet and adds them to the
-// symtab.  status indicates whether the operation was successful or not, and
-// the error message (if any).
-//
-// TODO: should this allow redefinition?  Either is possible, but which is
-// more useful?  Maybe it should be an option.
-void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status);
-
-// Returns a symtab that defines google.protobuf.DescriptorProto and all other
-// types that are defined in descriptor.proto.  This allows you to load other
-// proto types.  The caller owns a ref on the returned symtab.
-upb_symtab *upb_get_descriptor_symtab();
-
-
-/* upb_def casts **************************************************************/
-
-// Dynamic casts, for determining if a def is of a particular type at runtime.
-#define UPB_DYNAMIC_CAST_DEF(lower, upper) \
-  struct _upb_ ## lower;  /* Forward-declare. */ \
-  INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \
-    if(def->type != UPB_DEF_ ## upper) return NULL; \
-    return (struct _upb_ ## lower*)def; \
-  }
-UPB_DYNAMIC_CAST_DEF(msgdef, MSG);
-UPB_DYNAMIC_CAST_DEF(enumdef, ENUM);
-UPB_DYNAMIC_CAST_DEF(svcdef, SVC);
-UPB_DYNAMIC_CAST_DEF(extdef, EXT);
-UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED);
-#undef UPB_DYNAMIC_CAST_DEF
-
-// Downcasts, for when some wants to assert that a def is of a particular type.
-// These are only checked if we are building debug.
-#define UPB_DOWNCAST_DEF(lower, upper) \
-  struct _upb_ ## lower;  /* Forward-declare. */ \
-  INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \
-    assert(def->type == UPB_DEF_ ## upper); \
-    return (struct _upb_ ## lower*)def; \
-  }
-UPB_DOWNCAST_DEF(msgdef, MSG);
-UPB_DOWNCAST_DEF(enumdef, ENUM);
-UPB_DOWNCAST_DEF(svcdef, SVC);
-UPB_DOWNCAST_DEF(extdef, EXT);
-UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
-#undef UPB_DOWNCAST_DEF
-
-#define UPB_UPCAST(ptr) (&(ptr)->base)
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif  /* UPB_DEF_H_ */
diff --git a/src/upb_encoder.c b/src/upb_encoder.c
deleted file mode 100644
index 304a423..0000000
--- a/src/upb_encoder.c
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
- */
-
-#include "upb_encoder.h"
-
-#include <stdlib.h>
-#include "descriptor.h"
-
-/* Functions for calculating sizes of wire values. ****************************/
-
-static size_t upb_v_uint64_t_size(uint64_t val) {
-#ifdef __GNUC__
-  int high_bit = 63 - __builtin_clzll(val);  // 0-based, undef if val == 0.
-#else
-  int high_bit = 0;
-  uint64_t tmp = val;
-  while(tmp >>= 1) high_bit++;
-#endif
-  return val == 0 ? 1 : high_bit / 7 + 1;
-}
-
-static size_t upb_v_int32_t_size(int32_t val) {
-  // v_uint32's are sign-extended to maintain wire compatibility with int64s.
-  return upb_v_uint64_t_size((int64_t)val);
-}
-static size_t upb_v_uint32_t_size(uint32_t val) {
-  return upb_v_uint64_t_size(val);
-}
-static size_t upb_f_uint64_t_size(uint64_t val) {
-  (void)val;  // Length is independent of value.
-  return sizeof(uint64_t);
-}
-static size_t upb_f_uint32_t_size(uint32_t val) {
-  (void)val;  // Length is independent of value.
-  return sizeof(uint32_t);
-}
-
-
-/* Functions to write wire values. ********************************************/
-
-// Since we know in advance the longest that the value could be, we always make
-// sure that our buffer is long enough.  This saves us from having to perform
-// bounds checks.
-
-// Puts a varint (wire type: UPB_WIRE_TYPE_VARINT).
-static uint8_t *upb_put_v_uint64_t(uint8_t *buf, uint64_t val)
-{
-  do {
-    uint8_t byte = val & 0x7f;
-    val >>= 7;
-    if(val) byte |= 0x80;
-    *buf++ = byte;
-  } while(val);
-  return buf;
-}
-
-// Puts an unsigned 32-bit varint, verbatim.  Never uses the high 64 bits.
-static uint8_t *upb_put_v_uint32_t(uint8_t *buf, uint32_t val)
-{
-  return upb_put_v_uint64_t(buf, val);
-}
-
-// Puts a signed 32-bit varint, first sign-extending to 64-bits.  We do this to
-// maintain wire-compatibility with 64-bit signed integers.
-static uint8_t *upb_put_v_int32_t(uint8_t *buf, int32_t val)
-{
-  return upb_put_v_uint64_t(buf, (int64_t)val);
-}
-
-static void upb_put32(uint8_t *buf, uint32_t val) {
-  buf[0] = val & 0xff;
-  buf[1] = (val >> 8) & 0xff;
-  buf[2] = (val >> 16) & 0xff;
-  buf[3] = (val >> 24);
-}
-
-// Puts a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT).
-static uint8_t *upb_put_f_uint32_t(uint8_t *buf, uint32_t val)
-{
-  uint8_t *uint32_end = buf + sizeof(uint32_t);
-#if UPB_UNALIGNED_READS_OK
-  *(uint32_t*)buf = val;
-#else
-  upb_put32(buf, val);
-#endif
-  return uint32_end;
-}
-
-// Puts a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT).
-static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val)
-{
-  uint8_t *uint64_end = buf + sizeof(uint64_t);
-#if UPB_UNALIGNED_READS_OK
-  *(uint64_t*)buf = val;
-#else
-  upb_put32(buf, (uint32_t)val);
-  upb_put32(buf, (uint32_t)(val >> 32));
-#endif
-  return uint64_end;
-}
-
-/* Functions to write and calculate sizes for .proto values. ******************/
-
-// Performs zig-zag encoding, which is used by sint32 and sint64.
-static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
-static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
-
-/* Use macros to define a set of two functions for each .proto type:
- *
- *  // Converts and writes a .proto value into buf.  "end" indicates the end
- *  // of the current available buffer (if the buffer does not contain enough
- *  // space UPB_STATUS_NEED_MORE_DATA is returned).  On success, *outbuf will
- *  // point one past the data that was written.
- *  uint8_t *upb_put_INT32(uint8_t *buf, int32_t val);
- *
- *  // Returns the number of bytes required to encode val.
- *  size_t upb_get_INT32_size(int32_t val);
- *
- *  // Given a .proto value s (source) convert it to a wire value.
- *  uint32_t upb_vtowv_INT32(int32_t s);
- */
-
-#define VTOWV(type, wire_t, val_t) \
-  static wire_t upb_vtowv_ ## type(val_t s)
-
-#define PUT(type, v_or_f, wire_t, val_t, member_name) \
-  static uint8_t *upb_put_ ## type(uint8_t *buf, val_t val) { \
-    wire_t tmp = upb_vtowv_ ## type(val); \
-    return upb_put_ ## v_or_f ## _ ## wire_t(buf, tmp); \
-  }
-
-#define T(type, v_or_f, wire_t, val_t, member_name) \
-  static size_t upb_get_ ## type ## _size(val_t val) { \
-    return upb_ ## v_or_f ## _ ## wire_t ## _size(val); \
-  } \
-  VTOWV(type, wire_t, val_t);  /* prototype for PUT below */ \
-  PUT(type, v_or_f, wire_t, val_t, member_name) \
-  VTOWV(type, wire_t, val_t)
-
-T(INT32,    v,  int32_t, int32_t,  int32)   { return (uint32_t)s;     }
-T(INT64,    v, uint64_t, int64_t,  int64)   { return (uint64_t)s;     }
-T(UINT32,   v, uint32_t, uint32_t, uint32)  { return s;               }
-T(UINT64,   v, uint64_t, uint64_t, uint64)  { return s;               }
-T(SINT32,   v, uint32_t, int32_t,  int32)   { return upb_zzenc_32(s); }
-T(SINT64,   v, uint64_t, int64_t,  int64)   { return upb_zzenc_64(s); }
-T(FIXED32,  f, uint32_t, uint32_t, uint32)  { return s;               }
-T(FIXED64,  f, uint64_t, uint64_t, uint64)  { return s;               }
-T(SFIXED32, f, uint32_t, int32_t,  int32)   { return (uint32_t)s;     }
-T(SFIXED64, f, uint64_t, int64_t,  int64)   { return (uint64_t)s;     }
-T(BOOL,     v, uint32_t, bool,     _bool)   { return (uint32_t)s;     }
-T(ENUM,     v, uint32_t, int32_t,  int32)   { return (uint32_t)s;     }
-T(DOUBLE,   f, uint64_t, double,   _double) {
-  upb_value v;
-  v._double = s;
-  return v.uint64;
-}
-T(FLOAT,    f, uint32_t, float,    _float)  {
-  upb_value v;
-  v._float = s;
-  return v.uint32;
-}
-#undef VTOWV
-#undef PUT
-#undef T
-
-static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v)
-{
-#define CASE(t, member_name) \
-  case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name);
-  switch(ft) {
-    CASE(DOUBLE,   _double)
-    CASE(FLOAT,    _float)
-    CASE(INT32,    int32)
-    CASE(INT64,    int64)
-    CASE(UINT32,   uint32)
-    CASE(UINT64,   uint64)
-    CASE(SINT32,   int32)
-    CASE(SINT64,   int64)
-    CASE(FIXED32,  uint32)
-    CASE(FIXED64,  uint64)
-    CASE(SFIXED32, int32)
-    CASE(SFIXED64, int64)
-    CASE(BOOL,     _bool)
-    CASE(ENUM,     int32)
-    default: assert(false); return buf;
-  }
-#undef CASE
-}
-
-static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v)
-{
-#define CASE(t, member_name) \
-  case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name);
-  switch(ft) {
-    CASE(DOUBLE,   _double)
-    CASE(FLOAT,    _float)
-    CASE(INT32,    int32)
-    CASE(INT64,    int64)
-    CASE(UINT32,   uint32)
-    CASE(UINT64,   uint64)
-    CASE(SINT32,   int32)
-    CASE(SINT64,   int64)
-    CASE(FIXED32,  uint32)
-    CASE(FIXED64,  uint64)
-    CASE(SFIXED32, int32)
-    CASE(SFIXED64, int64)
-    CASE(BOOL,     _bool)
-    CASE(ENUM,     int32)
-    default: assert(false); return 0;
-  }
-#undef CASE
-}
-
-static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num,
-                             upb_wire_type_t wt)
-{
-  return upb_put_UINT32(buf, wt | (num << 3));
-}
-
-static uint32_t _upb_get_tag_size(upb_field_number_t num)
-{
-  return upb_get_UINT32_size(num << 3);
-}
-
-
-/* upb_sizebuilder ************************************************************/
-
-struct upb_sizebuilder {
-  // Accumulating size for the current level.
-  uint32_t size;
-
-  // Stack of sizes for our current nesting.
-  uint32_t stack[UPB_MAX_NESTING], *top;
-
-  // Vector of sizes.
-  uint32_t *sizes;
-  int sizes_len;
-  int sizes_size;
-
-  upb_status status;
-};
-
-// upb_sink callbacks.
-static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f,
-                                                upb_value val,
-                                                upb_status *status)
-{
-  (void)status;
-  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
-  uint32_t size = 0;
-  size += _upb_get_tag_size(f->number);
-  size += _upb_get_value_size(f->type, val);
-  sb->size += size;
-  return UPB_SINK_CONTINUE;
-}
-
-static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f,
-                                              upb_strptr str,
-                                              int32_t start, uint32_t end,
-                                              upb_status *status)
-{
-  (void)status;
-  (void)str;   // String data itself is not used.
-  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
-  if(start >= 0) {
-    uint32_t size = 0;
-    size += _upb_get_tag_size(f->number);
-    size += upb_get_UINT32_size(end - start);
-    sb->size += size;
-  }
-  return UPB_SINK_CONTINUE;
-}
-
-static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f,
-                                                upb_status *status)
-{
-  (void)status;
-  (void)f;  // Unused (we calculate tag size and delimiter in endcb).
-  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
-  if(f->type == UPB_TYPE(MESSAGE)) {
-    *sb->top = sb->size;
-    sb->top++;
-    sb->size = 0;
-  } else {
-    assert(f->type == UPB_TYPE(GROUP));
-    sb->size += _upb_get_tag_size(f->number);
-  }
-  return UPB_SINK_CONTINUE;
-}
-
-static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f,
-                                              upb_status *status)
-{
-  (void)status;
-  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
-  if(f->type == UPB_TYPE(MESSAGE)) {
-    sb->top--;
-    if(sb->sizes_len == sb->sizes_size) {
-      sb->sizes_size *= 2;
-      sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes));
-    }
-    uint32_t child_size = sb->size;
-    uint32_t parent_size = *sb->top;
-    sb->sizes[sb->sizes_len++] = child_size;
-    // The size according to the parent includes the tag size and delimiter of
-    // the submessage.
-    parent_size += upb_get_UINT32_size(child_size);
-    parent_size += _upb_get_tag_size(f->number);
-    // Include size accumulated in parent before child began.
-    sb->size = child_size + parent_size;
-  } else {
-    assert(f->type == UPB_TYPE(GROUP));
-    // As an optimization, we could just add this number twice in startcb, to
-    // avoid having to recalculate it.
-    sb->size += _upb_get_tag_size(f->number);
-  }
-  return UPB_SINK_CONTINUE;
-}
-
-upb_sink_callbacks _upb_sizebuilder_sink_vtbl = {
-  _upb_sizebuilder_valuecb,
-  _upb_sizebuilder_strcb,
-  _upb_sizebuilder_startcb,
-  _upb_sizebuilder_endcb
-};
-
-
-/* upb_sink callbacks *********************************************************/
-
-struct upb_encoder {
-  upb_sink base;
-  //upb_bytesink *bytesink;
-  uint32_t *sizes;
-  int size_offset;
-};
-
-
-// Within one callback we may need to encode up to two separate values.
-#define UPB_ENCODER_BUFSIZE (UPB_MAX_ENCODED_SIZE * 2)
-
-static upb_sink_status _upb_encoder_push_buf(upb_encoder *s, const uint8_t *buf,
-                                             size_t len, upb_status *status)
-{
-  // TODO: conjure a upb_strptr that points to buf.
-  //upb_strptr ptr;
-  (void)s;
-  (void)buf;
-  (void)status;
-  size_t written = 5;// = upb_bytesink_onbytes(s->bytesink, ptr);
-  if(written < len) {
-    // TODO: mark to skip "written" bytes next time.
-    return UPB_SINK_STOP;
-  } else {
-    return UPB_SINK_CONTINUE;
-  }
-}
-
-static upb_sink_status _upb_encoder_valuecb(upb_sink *sink, upb_fielddef *f,
-                                            upb_value val, upb_status *status)
-{
-  upb_encoder *s = (upb_encoder*)sink;
-  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
-  upb_wire_type_t wt = upb_types[f->type].expected_wire_type;
-  // TODO: handle packed encoding.
-  ptr = _upb_put_tag(ptr, f->number, wt);
-  ptr = upb_encode_value(ptr, f->type, val);
-  return _upb_encoder_push_buf(s, buf, ptr - buf, status);
-}
-
-static upb_sink_status _upb_encoder_strcb(upb_sink *sink, upb_fielddef *f,
-                                          upb_strptr str,
-                                          int32_t start, uint32_t end,
-                                          upb_status *status)
-{
-  upb_encoder *s = (upb_encoder*)sink;
-  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
-  if(start >= 0) {
-    ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED);
-    ptr = upb_put_UINT32(ptr, end - start);
-  }
-  // TODO: properly handle partially consumed strings and partially supplied
-  // strings.
-  _upb_encoder_push_buf(s, buf, ptr - buf, status);
-  return _upb_encoder_push_buf(s, (uint8_t*)upb_string_getrobuf(str), end - start, status);
-}
-
-static upb_sink_status _upb_encoder_startcb(upb_sink *sink, upb_fielddef *f,
-                                            upb_status *status)
-{
-  upb_encoder *s = (upb_encoder*)sink;
-  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
-  if(f->type == UPB_TYPE(GROUP)) {
-    ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_START_GROUP);
-  } else {
-    ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED);
-    ptr = upb_put_UINT32(ptr, s->sizes[--s->size_offset]);
-  }
-  return _upb_encoder_push_buf(s, buf, ptr - buf, status);
-}
-
-static upb_sink_status _upb_encoder_endcb(upb_sink *sink, upb_fielddef *f,
-                                          upb_status *status)
-{
-  upb_encoder *s = (upb_encoder*)sink;
-  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
-  if(f->type != UPB_TYPE(GROUP)) return UPB_SINK_CONTINUE;
-  ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_END_GROUP);
-  return _upb_encoder_push_buf(s, buf, ptr - buf, status);
-}
-
-upb_sink_callbacks _upb_encoder_sink_vtbl = {
-  _upb_encoder_valuecb,
-  _upb_encoder_strcb,
-  _upb_encoder_startcb,
-  _upb_encoder_endcb
-};
-
diff --git a/src/upb_encoder.h b/src/upb_encoder.h
deleted file mode 100644
index e879b0b..0000000
--- a/src/upb_encoder.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Implements a upb_sink that writes protobuf data to the binary wire format.
- *
- * For messages that have any submessages, the encoder needs a buffer
- * containing the submessage sizes, so they can be properly written at the
- * front of each message.  Note that groups do *not* have this requirement.
- *
- * Copyright (c) 2009-2010 Joshua Haberman.  See LICENSE for details.
- */
-
-#ifndef UPB_ENCODER_H_
-#define UPB_ENCODER_H_
-
-#include "upb.h"
-#include "upb_srcsink.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* upb_encoder ****************************************************************/
-
-// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol
-// buffer binary wire format.
-struct upb_encoder;
-typedef struct upb_encoder upb_encoder;
-
-upb_encoder *upb_encoder_new(upb_msgdef *md);
-void upb_encoder_free(upb_encoder *e);
-
-// Resets the given upb_encoder such that is is ready to begin encoding,
-// outputting data to "bytesink" (which must live until the encoder is
-// reset or destroyed).
-void upb_encoder_reset(upb_encoder *e, upb_bytesink *bytesink);
-
-// Returns the upb_sink to which data can be written.  The sink is invalidated
-// when the encoder is reset or destroyed.  Note that if the client wants to
-// encode any length-delimited submessages it must first call
-// upb_encoder_buildsizes() below.
-upb_sink *upb_encoder_sink(upb_encoder *e);
-
-// Call prior to pushing any data with embedded submessages.  "src" must yield
-// exactly the same data as what will next be encoded, but in reverse order.
-// The encoder iterates over this data in order to determine the sizes of the
-// submessages.  If any errors are returned by the upb_src, the status will
-// be saved in *status.  If the client is sure that the upb_src will not throw
-// any errors, "status" may be NULL.
-void upb_encoder_buildsizes(upb_encoder *e, upb_src *src, upb_status *status);
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif  /* UPB_ENCODER_H_ */
diff --git a/src/upb_inlinedefs.c b/src/upb_inlinedefs.c
deleted file mode 100644
index 5db04f6..0000000
--- a/src/upb_inlinedefs.c
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * This file, if compiled, will contain standalone (non-inlined) versions of
- * all inline functions defined in header files.  We don't generally use this
- * file since we use "static inline" for inline functions (which will put a
- * standalone version of the function in any .o file that needs it, but
- * compiling this file and dumping the object file will let us inspect how
- * inline functions are compiled, so we keep it around.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
- */
-
-#define INLINE
-#include "upb.h"
-#include "upb_data.h"
-#include "upb_def.h"
-#include "upb_parse.h"
-#include "upb_table.h"
-#include "upb_text.h"
diff --git a/src/upb_stream.h b/src/upb_stream.h
deleted file mode 100644
index e7b4074..0000000
--- a/src/upb_stream.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * This file defines four general-purpose streaming interfaces for protobuf
- * data or bytes:
- *
- * - upb_src: pull interface for protobuf data.
- * - upb_sink: push interface for protobuf data.
- * - upb_bytesrc: pull interface for bytes.
- * - upb_bytesink: push interface for bytes.
- *
- * These interfaces are used as general-purpose glue in upb.  For example, the
- * decoder interface works by implementing a upb_src and calling a upb_bytesrc.
- *
- * Copyright (c) 2010 Joshua Haberman.  See LICENSE for details.
- *
- */
-
-#ifndef UPB_SRCSINK_H
-#define UPB_SRCSINK_H
-
-#include "upb_stream_vtbl.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Forward-declare.  We can't include upb_def.h; it would be circular.
-struct _upb_fielddef;
-
-// Note!  The "eof" flags work like feof() in C; they cannot report end-of-file
-// until a read has failed due to eof.  They cannot preemptively tell you that
-// the next call will fail due to eof.  Since these are the semantics that C
-// and UNIX provide, we're stuck with them if we want to support eg. stdio.
-
-/* upb_src ********************************************************************/
-
-// TODO: decide how to handle unknown fields.
-
-// Retrieves the fielddef for the next field in the stream.  Returns NULL on
-// error or end-of-stream.
-struct _upb_fielddef *upb_src_getdef(upb_src *src);
-
-// Retrieves and stores the next value in "val".  For string types "val" must
-// be a newly-recycled string.  Returns false on error.
-bool upb_src_getval(upb_src *src, upb_valueptr val);
-bool upb_src_getstr(upb_src *src, upb_string *val);
-
-// Like upb_src_getval() but skips the value.
-bool upb_src_skipval(upb_src *src);
-
-// Descends into a submessage.  May only be called after a def has been
-// returned that indicates a submessage.
-bool upb_src_startmsg(upb_src *src);
-
-// Stops reading a submessage.  May be called before the stream is EOF, in
-// which case the rest of the submessage is skipped.
-bool upb_src_endmsg(upb_src *src);
-
-// Returns the current error/eof status for the stream.
-INLINE upb_status *upb_src_status(upb_src *src) { return &src->status; }
-INLINE bool upb_src_eof(upb_src *src) { return src->eof; }
-
-// The following functions are equivalent to upb_src_getval(), but take
-// pointers to specific types.  In debug mode this may check that the type
-// is compatible with the type being read.  This check will *not* be performed
-// in non-debug mode, and if you get the type wrong the behavior is undefined.
-bool upb_src_getbool(upb_src *src, bool *val);
-bool upb_src_getint32(upb_src *src, int32_t *val);
-bool upb_src_getint64(upb_src *src, int64_t *val);
-bool upb_src_getuint32(upb_src *src, uint32_t *val);
-bool upb_src_getuint64(upb_src *src, uint64_t *val);
-bool upb_src_getfloat(upb_src *src, float *val);
-bool upb_src_getdouble(upb_src *src, double *val);
-
-/* upb_sink *******************************************************************/
-
-// Puts the given fielddef into the stream.
-bool upb_sink_putdef(upb_sink *sink, struct _upb_fielddef *def);
-
-// Puts the given value into the stream.
-bool upb_sink_putval(upb_sink *sink, upb_value val);
-
-// Starts a submessage.  (needed?  the def tells us we're starting a submsg.)
-bool upb_sink_startmsg(upb_sink *sink);
-
-// Ends a submessage.
-bool upb_sink_endmsg(upb_sink *sink);
-
-// Returns the current error status for the stream.
-upb_status *upb_sink_status(upb_sink *sink);
-
-/* upb_bytesrc ****************************************************************/
-
-// Returns the next string in the stream.  false is returned on error or eof.
-// The string must be at least "minlen" bytes long unless the stream is eof.
-bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen);
-
-// Appends the next "len" bytes in the stream in-place to "str".  This should
-// be used when the caller needs to build a contiguous string of the existing
-// data in "str" with more data.
-bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len);
-
-// Returns the current error status for the stream.
-INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; }
-INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; }
-
-/* upb_bytesink ***************************************************************/
-
-// Puts the given string.  Returns the number of bytes that were actually,
-// consumed, which may be fewer than were in the string, or <0 on error.
-int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str);
-
-// Returns the current error status for the stream.
-upb_status *upb_bytesink_status(upb_bytesink *sink);
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif
diff --git a/src/upb_stream_vtbl.h b/src/upb_stream_vtbl.h
deleted file mode 100644
index 0ec45d2..0000000
--- a/src/upb_stream_vtbl.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * vtable declarations for types that are implementing any of the src or sink
- * interfaces.  Only components that are implementing these interfaces need
- * to worry about this file.
- *
- * Copyright (c) 2010 Joshua Haberman.  See LICENSE for details.
- */
-
-#ifndef UPB_SRCSINK_VTBL_H_
-#define UPB_SRCSINK_VTBL_H_
-
-#include "upb.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct upb_src;
-typedef struct upb_src upb_src;
-struct upb_sink;
-typedef struct upb_sink upb_sink;
-struct upb_bytesrc;
-typedef struct upb_bytesrc upb_bytesrc;
-struct upb_bytesink;
-typedef struct upb_bytesink upb_bytesink;
-
-// Typedefs for function pointers to all of the virtual functions.
-typedef struct _upb_fielddef (*upb_src_getdef_fptr)(upb_src *src);
-typedef bool (*upb_src_getval_fptr)(upb_src *src, upb_valueptr val);
-typedef bool (*upb_src_skipval_fptr)(upb_src *src);
-typedef bool (*upb_src_startmsg_fptr)(upb_src *src);
-typedef bool (*upb_src_endmsg_fptr)(upb_src *src);
-
-typedef bool (*upb_sink_putdef_fptr)(upb_sink *sink, struct _upb_fielddef *def);
-typedef bool (*upb_sink_putval_fptr)(upb_sink *sink, upb_value val);
-typedef bool (*upb_sink_startmsg_fptr)(upb_sink *sink);
-typedef bool (*upb_sink_endmsg_fptr)(upb_sink *sink);
-
-typedef upb_string *(*upb_bytesrc_get_fptr)(upb_bytesrc *src);
-typedef void (*upb_bytesrc_recycle_fptr)(upb_bytesrc *src, upb_string *str);
-typedef bool (*upb_bytesrc_append_fptr)(
-    upb_bytesrc *src, upb_string *str, upb_strlen_t len);
-
-typedef int32_t (*upb_bytesink_put_fptr)(upb_bytesink *sink, upb_string *str);
-
-// Vtables for the above interfaces.
-typedef struct {
-  upb_src_getdef_fptr   getdef;
-  upb_src_getval_fptr   getval;
-  upb_src_skipval_fptr  skipval;
-  upb_src_startmsg_fptr startmsg;
-  upb_src_endmsg_fptr   endmsg;
-} upb_src_vtable;
-
-typedef struct {
-  upb_bytesrc_get_fptr     get;
-  upb_bytesrc_append_fptr  append;
-  upb_bytesrc_recycle_fptr recycle;
-} upb_bytesrc_vtable;
-
-// "Base Class" definitions; components that implement these interfaces should
-// contain one of these structures.
-
-struct upb_src {
-  upb_src_vtable *vtbl;
-  upb_status status;
-  bool eof;
-#ifndef NDEBUG
-  int state;  // For debug-mode checking of API usage.
-#endif
-};
-
-struct upb_bytesrc {
-  upb_bytesrc_vtable *vtbl;
-  upb_status status;
-  bool eof;
-};
-
-INLINE void upb_src_init(upb_src *s, upb_src_vtable *vtbl) {
-  s->vtbl = vtbl;
-  s->eof = false;
-#ifndef DEBUG
-  // TODO: initialize debug-mode checking.
-#endif
-}
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif
diff --git a/src/upb_string.c b/src/upb_string.c
deleted file mode 100644
index 91ab9ae..0000000
--- a/src/upb_string.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2010 Joshua Haberman.  See LICENSE for details.
- */
-
-#include "upb_string.h"
-
-#include <stdlib.h>
-
-#define UPB_STRING_UNFINALIZED -1
-
-static uint32_t upb_round_up_pow2(uint32_t v) {
-  // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
-  v--;
-  v |= v >> 1;
-  v |= v >> 2;
-  v |= v >> 4;
-  v |= v >> 8;
-  v |= v >> 16;
-  v++;
-  return v;
-}
-
-upb_string *upb_string_new() {
-  upb_string *str = malloc(sizeof(*str));
-  str->ptr = NULL;
-  str->size = 0;
-  str->len = UPB_STRING_UNFINALIZED;
-  upb_atomic_refcount_init(&str->refcount, 1);
-  return str;
-}
-
-void _upb_string_free(upb_string *str) {
-  if(str->ptr) free(str->ptr);
-  free(str);
-}
-
-char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) {
-  assert(str->len == UPB_STRING_UNFINALIZED);
-  if (str->size < len) {
-    str->size = upb_round_up_pow2(len);
-    str->ptr = realloc(str->ptr, str->size);
-  }
-  str->len = len;
-  return str->ptr;
-}
diff --git a/src/upb_string.h b/src/upb_string.h
deleted file mode 100644
index 770dba7..0000000
--- a/src/upb_string.h
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2010 Joshua Haberman.  See LICENSE for details.
- *
- * This file defines a simple string type.  The overriding goal of upb_string
- * is to avoid memcpy(), malloc(), and free() wheverever possible, while
- * keeping both CPU and memory overhead low.  Throughout upb there are
- * situations where one wants to reference all or part of another string
- * without copying.  upb_string provides APIs for doing this.
- *
- * Characteristics of upb_string:
- * - strings are reference-counted.
- * - strings are logically immutable.
- * - if a string has no other referents, it can be "recycled" into a new string
- *   without having to reallocate the upb_string.
- * - strings can be substrings of other strings (owning a ref on the source
- *   string).
- * - strings can refer to memory that they do not own, in which case we avoid
- *   copies if possible (the exact strategy for doing this can vary).
- * - strings are not thread-safe by default, but can be made so by calling a
- *   function.  This is not the default because it causes extra CPU overhead.
- */
-
-#ifndef UPB_STRING_H
-#define UPB_STRING_H
-
-#include <assert.h>
-#include <string.h>
-#include "upb_atomic.h"
-#include "upb.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// All members of this struct are private, and may only be read/written through
-// the associated functions.  Also, strings may *only* be allocated on the heap.
-struct _upb_string {
-  char *ptr;
-  int32_t len;
-  uint32_t size;
-  upb_atomic_refcount_t refcount;
-  union {
-    // Used if this is a slice of another string.
-    struct _upb_string *src;
-    // Used if this string is referencing external unowned memory.
-    upb_atomic_refcount_t reader_count;
-  } extra;
-};
-
-// Returns a newly-created, empty, non-finalized string.  When the string is no
-// longer needed, it should be unref'd, never freed directly.
-upb_string *upb_string_new();
-
-void _upb_string_free(upb_string *str);
-
-// Releases a ref on the given string, which may free the memory.  "str"
-// can be NULL, in which case this is a no-op.
-INLINE void upb_string_unref(upb_string *str) {
-  if (str && upb_atomic_unref(&str->refcount)) _upb_string_free(str);
-}
-
-// Returns a string with the same contents as "str".  The caller owns a ref on
-// the returned string, which may or may not be the same object as "str.
-INLINE upb_string *upb_string_getref(upb_string *str) {
-  // If/when we support stack-allocated strings, this will have to allocate
-  // a new string if the given string is on the stack.
-  upb_atomic_ref(&str->refcount);
-  return str;
-}
-
-// Returns the length of the string.
-INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; }
-
-// Use to read the bytes of the string.  The caller *must* call
-// upb_string_endread() after the data has been read.  The window between
-// upb_string_getrobuf() and upb_string_endread() should be kept as short as
-// possible, because any pending upb_string_detach() may be blocked until
-// upb_string_endread is called().  No other functions may be called on the
-// string during this window except upb_string_len().
-INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; }
-INLINE void upb_string_endread(upb_string *str) { (void)str; }
-
-// Attempts to recycle the string "str" so it may be reused and have different
-// data written to it.  The returned string is either "str" if it could be
-// recycled or a newly created string if "str" has other references.
-//
-// As a special case, passing NULL will allocate a new string.  This is
-// convenient for the pattern:
-//
-//   upb_string *str = NULL;
-//   while (x) {
-//     if (y) {
-//       str = upb_string_tryrecycle(str);
-//       upb_src_getstr(str);
-//     }
-//   }
-upb_string *upb_string_tryrecycle(upb_string *str);
-
-// The three options for setting the contents of a string.  These may only be
-// called when a string is first created or recycled; once other functions have
-// been called on the string, these functions are not allowed until the string
-// is recycled.
-
-// Gets a pointer suitable for writing to the string, which is guaranteed to
-// have at least "len" bytes of data available.  The size of the string will
-// become "len".
-char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len);
-
-// Sets the contents of "str" to be the given substring of "target_str", to
-// which the caller must own a ref.
-void upb_string_substr(upb_string *str, upb_string *target_str,
-                       upb_strlen_t start, upb_strlen_t len);
-
-// Makes the string "str" a reference to the given string data.  The caller
-// guarantees that the given string data will not change or be deleted until
-// a matching call to upb_string_detach().
-void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len);
-void upb_string_detach(upb_string *str);
-
-// Allows using upb_strings in printf, ie:
-//   upb_strptr str = UPB_STRLIT("Hello, World!\n");
-//   printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */
-#define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str)
-#define UPB_STRFMT "%.*s"
-
-/* upb_string library functions ***********************************************/
-
-// Named like their <string.h> counterparts, these are all safe against buffer
-// overflow.  These only use the public upb_string interface.
-
-// More efficient than upb_strcmp if all you need is to test equality.
-INLINE bool upb_streql(upb_string *s1, upb_string *s2) {
-  upb_strlen_t len = upb_string_len(s1);
-  if(len != upb_string_len(s2)) {
-    return false;
-  } else {
-    bool ret =
-        memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0;
-    upb_string_endread(s1);
-    upb_string_endread(s2);
-    return ret;
-  }
-}
-
-// Like strcmp().
-int upb_strcmp(upb_string *s1, upb_string *s2);
-
-// Like upb_strcpy, but copies from a buffer and length.
-INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) {
-  memcpy(upb_string_getrwbuf(dest, len), src, len);
-}
-
-// Replaces the contents of "dest" with the contents of "src".
-INLINE void upb_strcpy(upb_string *dest, upb_string *src) {
-  upb_strcpylen(dest, upb_string_getrobuf(src), upb_string_len(src));
-  upb_string_endread(src);
-}
-
-// Like upb_strcpy, but copies from a NULL-terminated string.
-INLINE void upb_strcpyc(upb_string *dest, const char *src) {
-  // This does two passes over src, but that is necessary unless we want to
-  // repeatedly re-allocate dst, which seems worse.
-  upb_strcpylen(dest, src, strlen(src));
-}
-
-// Returns a new string whose contents are a copy of s.
-upb_string *upb_strdup(upb_string *s);
-
-// Like upb_strdup(), but duplicates a given buffer and length.
-INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) {
-  upb_string *s = upb_string_new();
-  upb_strcpylen(s, src, len);
-  return s;
-}
-
-// Like upb_strdup(), but duplicates a C NULL-terminated string.
-upb_string *upb_strdupc(const char *src);
-
-// Appends 'append' to 's' in-place, resizing s if necessary.
-void upb_strcat(upb_string *s, upb_string *append);
-
-// Returns a new string that is a substring of the given string.
-upb_string *upb_strslice(upb_string *s, int offset, int len);
-
-// Reads an entire file into a newly-allocated string.
-upb_string *upb_strreadfile(const char *filename);
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif
diff --git a/src/upb_table.c b/src/upb_table.c
deleted file mode 100644
index b91776c..0000000
--- a/src/upb_table.c
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
- */
-
-#include "upb_table.h"
-#include "upb_string.h"
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-
-static const upb_inttable_key_t EMPTYENT = 0;
-static const double MAX_LOAD = 0.85;
-
-static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
-
-/* We use 1-based indexes into the table so that 0 can be "NULL". */
-static upb_inttable_entry *intent(upb_inttable *t, int32_t i) {
-  return UPB_INDEX(t->t.entries, i-1, t->t.entry_size);
-}
-static upb_strtable_entry *strent(upb_strtable *t, int32_t i) {
-  return UPB_INDEX(t->t.entries, i-1, t->t.entry_size);
-}
-
-void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size)
-{
-  t->count = 0;
-  t->entry_size = entry_size;
-  t->size_lg2 = 1;
-  while(size >>= 1) t->size_lg2++;
-  size_t bytes = upb_table_size(t) * t->entry_size;
-  t->mask = upb_table_size(t) - 1;
-  t->entries = malloc(bytes);
-  memset(t->entries, 0, bytes);  /* Both tables consider 0's an empty entry. */
-}
-
-void upb_inttable_init(upb_inttable *t, uint32_t size, uint16_t entsize)
-{
-  upb_table_init(&t->t, size, entsize);
-}
-
-void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t entsize)
-{
-  upb_table_init(&t->t, size, entsize);
-}
-
-void upb_table_free(upb_table *t) { free(t->entries); }
-void upb_inttable_free(upb_inttable *t) { upb_table_free(&t->t); }
-void upb_strtable_free(upb_strtable *t) {
-  // Free refs from the strtable.
-  upb_strtable_entry *e = upb_strtable_begin(t);
-  for(; e; e = upb_strtable_next(t, e)) {
-    upb_string_unref(e->key);
-  }
-  upb_table_free(&t->t);
-}
-
-static uint32_t strtable_bucket(upb_strtable *t, upb_string *key)
-{
-  uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_string_len(key), 0);
-  return (hash & (upb_strtable_size(t)-1)) + 1;
-}
-
-void *upb_strtable_lookup(upb_strtable *t, upb_string *key)
-{
-  uint32_t bucket = strtable_bucket(t, key);
-  upb_strtable_entry *e;
-  do {
-    e = strent(t, bucket);
-    if(e->key && upb_streql(e->key, key)) return e;
-  } while((bucket = e->next) != UPB_END_OF_CHAIN);
-  return NULL;
-}
-
-static uint32_t empty_intbucket(upb_inttable *table)
-{
-  /* TODO: does it matter that this is biased towards the front of the table? */
-  for(uint32_t i = 1; i <= upb_inttable_size(table); i++) {
-    upb_inttable_entry *e = intent(table, i);
-    if(e->key == EMPTYENT) return i;
-  }
-  assert(false);
-  return 0;
-}
-
-/* The insert routines have a lot more code duplication between int/string
- * variants than I would like, but there's just a bit too much that varies to
- * parameterize them. */
-static void intinsert(upb_inttable *t, upb_inttable_entry *e)
-{
-  assert(upb_inttable_lookup(t, e->key) == NULL);
-  t->t.count++;
-  uint32_t bucket = upb_inttable_bucket(t, e->key);
-  upb_inttable_entry *table_e = intent(t, bucket);
-  if(table_e->key != EMPTYENT) {  /* Collision. */
-    if(bucket == upb_inttable_bucket(t, table_e->key)) {
-      /* Existing element is in its main posisiton.  Find an empty slot to
-       * place our new element and append it to this key's chain. */
-      uint32_t empty_bucket = empty_intbucket(t);
-      while (table_e->next != UPB_END_OF_CHAIN)
-        table_e = intent(t, table_e->next);
-      table_e->next = empty_bucket;
-      table_e = intent(t, empty_bucket);
-    } else {
-      /* Existing element is not in its main position.  Move it to an empty
-       * slot and put our element in its main position. */
-      uint32_t empty_bucket = empty_intbucket(t);
-      uint32_t evictee_bucket = upb_inttable_bucket(t, table_e->key);
-      memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
-      upb_inttable_entry *evictee_e = intent(t, evictee_bucket);
-      while(1) {
-        assert(evictee_e->key != UPB_EMPTY_ENTRY);
-        assert(evictee_e->next != UPB_END_OF_CHAIN);
-        if(evictee_e->next == bucket) {
-          evictee_e->next = empty_bucket;
-          break;
-        }
-        evictee_e = intent(t, evictee_e->next);
-      }
-      /* table_e remains set to our mainpos. */
-    }
-  }
-  memcpy(table_e, e, t->t.entry_size);
-  table_e->next = UPB_END_OF_CHAIN;
-  assert(upb_inttable_lookup(t, e->key) == table_e);
-}
-
-void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e)
-{
-  assert(e->key != 0);
-  if((double)(t->t.count + 1) / upb_inttable_size(t) > MAX_LOAD) {
-    /* Need to resize.  New table of double the size, add old elements to it. */
-    upb_inttable new_table;
-    upb_inttable_init(&new_table, upb_inttable_size(t)*2, t->t.entry_size);
-    new_table.t.count = t->t.count;
-    upb_inttable_entry *old_e;
-    for(old_e = upb_inttable_begin(t); old_e; old_e = upb_inttable_next(t, old_e))
-      intinsert(&new_table, old_e);
-    upb_inttable_free(t);
-    *t = new_table;
-  }
-  intinsert(t, e);
-}
-
-static uint32_t empty_strbucket(upb_strtable *table)
-{
-  /* TODO: does it matter that this is biased towards the front of the table? */
-  for(uint32_t i = 1; i <= upb_strtable_size(table); i++) {
-    upb_strtable_entry *e = strent(table, i);
-    if(!e->key) return i;
-  }
-  assert(false);
-  return 0;
-}
-
-static void strinsert(upb_strtable *t, upb_strtable_entry *e)
-{
-  assert(upb_strtable_lookup(t, e->key) == NULL);
-  e->key = upb_string_getref(e->key);
-  t->t.count++;
-  uint32_t bucket = strtable_bucket(t, e->key);
-  upb_strtable_entry *table_e = strent(t, bucket);
-  if(table_e->key) {  /* Collision. */
-    if(bucket == strtable_bucket(t, table_e->key)) {
-      /* Existing element is in its main posisiton.  Find an empty slot to
-       * place our new element and append it to this key's chain. */
-      uint32_t empty_bucket = empty_strbucket(t);
-      while (table_e->next != UPB_END_OF_CHAIN)
-        table_e = strent(t, table_e->next);
-      table_e->next = empty_bucket;
-      table_e = strent(t, empty_bucket);
-    } else {
-      /* Existing element is not in its main position.  Move it to an empty
-       * slot and put our element in its main position. */
-      uint32_t empty_bucket = empty_strbucket(t);
-      uint32_t evictee_bucket = strtable_bucket(t, table_e->key);
-      memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
-      upb_strtable_entry *evictee_e = strent(t, evictee_bucket);
-      while(1) {
-        assert(!upb_string_isnull(evictee_e->key));
-        assert(evictee_e->next != UPB_END_OF_CHAIN);
-        if(evictee_e->next == bucket) {
-          evictee_e->next = empty_bucket;
-          break;
-        }
-        evictee_e = strent(t, evictee_e->next);
-      }
-      /* table_e remains set to our mainpos. */
-    }
-  }
-  memcpy(table_e, e, t->t.entry_size);
-  table_e->next = UPB_END_OF_CHAIN;
-  assert(upb_strtable_lookup(t, e->key) == table_e);
-}
-
-void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e)
-{
-  if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) {
-    /* Need to resize.  New table of double the size, add old elements to it. */
-    upb_strtable new_table;
-    upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.entry_size);
-    upb_strtable_entry *old_e;
-    for(old_e = upb_strtable_begin(t); old_e; old_e = upb_strtable_next(t, old_e))
-      strinsert(&new_table, old_e);
-    upb_strtable_free(t);
-    *t = new_table;
-  }
-  strinsert(t, e);
-}
-
-void *upb_inttable_begin(upb_inttable *t) {
-  return upb_inttable_next(t, intent(t, 0));
-}
-
-void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur) {
-  upb_inttable_entry *end = intent(t, upb_inttable_size(t)+1);
-  do {
-    cur = (void*)((char*)cur + t->t.entry_size);
-    if(cur == end) return NULL;
-  } while(cur->key == UPB_EMPTY_ENTRY);
-  return cur;
-}
-
-void *upb_strtable_begin(upb_strtable *t) {
-  return upb_strtable_next(t, strent(t, 0));
-}
-
-void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur) {
-  upb_strtable_entry *end = strent(t, upb_strtable_size(t)+1);
-  do {
-    cur = (void*)((char*)cur + t->t.entry_size);
-    if(cur == end) return NULL;
-  } while(cur->key == NULL);
-  return cur;
-}
-
-#ifdef UPB_UNALIGNED_READS_OK
-//-----------------------------------------------------------------------------
-// MurmurHash2, by Austin Appleby (released as public domain).
-// Reformatted and C99-ified by Joshua Haberman.
-// Note - This code makes a few assumptions about how your machine behaves -
-//   1. We can read a 4-byte value from any address without crashing
-//   2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
-// And it has a few limitations -
-//   1. It will not work incrementally.
-//   2. It will not produce the same results on little-endian and big-endian
-//      machines.
-static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed)
-{
-  // 'm' and 'r' are mixing constants generated offline.
-  // They're not really 'magic', they just happen to work well.
-  const uint32_t m = 0x5bd1e995;
-  const int32_t r = 24;
-
-  // Initialize the hash to a 'random' value
-  uint32_t h = seed ^ len;
-
-  // Mix 4 bytes at a time into the hash
-  const uint8_t * data = (const uint8_t *)key;
-  while(len >= 4) {
-    uint32_t k = *(uint32_t *)data;
-
-    k *= m;
-    k ^= k >> r;
-    k *= m;
-
-    h *= m;
-    h ^= k;
-
-    data += 4;
-    len -= 4;
-  }
-
-  // Handle the last few bytes of the input array
-  switch(len) {
-    case 3: h ^= data[2] << 16;
-    case 2: h ^= data[1] << 8;
-    case 1: h ^= data[0]; h *= m;
-  };
-
-  // Do a few final mixes of the hash to ensure the last few
-  // bytes are well-incorporated.
-  h ^= h >> 13;
-  h *= m;
-  h ^= h >> 15;
-
-  return h;
-}
-
-#else // !UPB_UNALIGNED_READS_OK
-
-//-----------------------------------------------------------------------------
-// MurmurHashAligned2, by Austin Appleby
-// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
-// on certain platforms.
-// Performance will be lower than MurmurHash2
-
-#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
-
-static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed)
-{
-  const uint32_t m = 0x5bd1e995;
-  const int32_t r = 24;
-  const uint8_t * data = (const uint8_t *)key;
-  uint32_t h = seed ^ len;
-  uint8_t align = (uintptr_t)data & 3;
-
-  if(align && (len >= 4)) {
-    // Pre-load the temp registers
-    uint32_t t = 0, d = 0;
-
-    switch(align) {
-      case 1: t |= data[2] << 16;
-      case 2: t |= data[1] << 8;
-      case 3: t |= data[0];
-    }
-
-    t <<= (8 * align);
-
-    data += 4-align;
-    len -= 4-align;
-
-    int32_t sl = 8 * (4-align);
-    int32_t sr = 8 * align;
-
-    // Mix
-
-    while(len >= 4) {
-      d = *(uint32_t *)data;
-      t = (t >> sr) | (d << sl);
-
-      uint32_t k = t;
-
-      MIX(h,k,m);
-
-      t = d;
-
-      data += 4;
-      len -= 4;
-    }
-
-    // Handle leftover data in temp registers
-
-    d = 0;
-
-    if(len >= align) {
-      switch(align) {
-        case 3: d |= data[2] << 16;
-        case 2: d |= data[1] << 8;
-        case 1: d |= data[0];
-      }
-
-      uint32_t k = (t >> sr) | (d << sl);
-      MIX(h,k,m);
-
-      data += align;
-      len -= align;
-
-      //----------
-      // Handle tail bytes
-
-      switch(len) {
-        case 3: h ^= data[2] << 16;
-        case 2: h ^= data[1] << 8;
-        case 1: h ^= data[0]; h *= m;
-      };
-    } else {
-      switch(len) {
-        case 3: d |= data[2] << 16;
-        case 2: d |= data[1] << 8;
-        case 1: d |= data[0];
-        case 0: h ^= (t >> sr) | (d << sl); h *= m;
-      }
-    }
-
-    h ^= h >> 13;
-    h *= m;
-    h ^= h >> 15;
-
-    return h;
-  } else {
-    while(len >= 4) {
-      uint32_t k = *(uint32_t *)data;
-
-      MIX(h,k,m);
-
-      data += 4;
-      len -= 4;
-    }
-
-    //----------
-    // Handle tail bytes
-
-    switch(len) {
-      case 3: h ^= data[2] << 16;
-      case 2: h ^= data[1] << 8;
-      case 1: h ^= data[0]; h *= m;
-    };
-
-    h ^= h >> 13;
-    h *= m;
-    h ^= h >> 15;
-
-    return h;
-  }
-}
-#undef MIX
-
-#endif // UPB_UNALIGNED_READS_OK
diff --git a/src/upb_table.h b/src/upb_table.h
deleted file mode 100644
index 20dae92..0000000
--- a/src/upb_table.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
- *
- * This file defines very fast int->struct (inttable) and string->struct
- * (strtable) hash tables.  The struct can be of any size, and it is stored
- * in the table itself, for cache-friendly performance.
- *
- * The table uses internal chaining with Brent's variation (inspired by the
- * Lua implementation of hash tables).  The hash function for strings is
- * Austin Appleby's "MurmurHash."
- */
-
-#ifndef UPB_TABLE_H_
-#define UPB_TABLE_H_
-
-#include <assert.h>
-#include "upb.h"
-#include "upb_string.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Note: the key cannot be zero!  Zero is used by the implementation. */
-typedef uint32_t upb_inttable_key_t;
-
-#define UPB_END_OF_CHAIN (uint32_t)0
-#define UPB_EMPTY_ENTRY (uint32_t)0
-
-typedef struct {
-  upb_inttable_key_t key;
-  uint32_t next;  /* Internal chaining. */
-} upb_inttable_entry;
-
-// TODO: consider storing the hash in the entry.  This would avoid the need to
-// rehash on table resizes, but more importantly could possibly improve lookup
-// performance by letting us compare hashes before comparing lengths or the
-// strings themselves.
-typedef struct {
-  upb_string *key;         // We own a ref.
-  uint32_t next;           // Internal chaining.
-} upb_strtable_entry;
-
-typedef struct {
-  void *entries;
-  uint32_t count;       /* How many elements are currently in the table? */
-  uint16_t entry_size;  /* How big is each entry? */
-  uint8_t size_lg2;     /* The table is 2^size_lg2 in size. */
-  uint32_t mask;
-} upb_table;
-
-typedef struct {
-  upb_table t;
-} upb_strtable;
-
-typedef struct {
-  upb_table t;
-} upb_inttable;
-
-/* Initialize and free a table, respectively.  Specify the initial size
- * with 'size' (the size will be increased as necessary).  Entry size
- * specifies how many bytes each entry in the table is. */
-void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t entry_size);
-void upb_inttable_free(upb_inttable *table);
-void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t entry_size);
-void upb_strtable_free(upb_strtable *table);
-
-INLINE uint32_t upb_table_size(upb_table *t) { return 1 << t->size_lg2; }
-INLINE uint32_t upb_inttable_size(upb_inttable *t) {
-  return upb_table_size(&t->t);
-}
-INLINE uint32_t upb_strtable_size(upb_strtable *t) {
-  return upb_table_size(&t->t);
-}
-
-INLINE uint32_t upb_table_count(upb_table *t) { return t->count; }
-INLINE uint32_t upb_inttable_count(upb_inttable *t) {
-  return upb_table_count(&t->t);
-}
-INLINE uint32_t upb_strtable_count(upb_strtable *t) {
-  return upb_table_count(&t->t);
-}
-
-/* Inserts the given key into the hashtable with the given value.  The key must
- * not already exist in the hash table.  The data will be copied from e into
- * the hashtable (the amount of data copied comes from entry_size when the
- * table was constructed).  Therefore the data at val may be freed once the
- * call returns. */
-void upb_inttable_insert(upb_inttable *t, upb_inttable_entry *e);
-void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e);
-
-INLINE uint32_t upb_inttable_bucket(upb_inttable *t, upb_inttable_key_t k) {
-  return (k & t->t.mask) + 1;  /* Identity hash for ints. */
-}
-
-/* Looks up key in this table.  Inlined because this is in the critical path of
- * decoding.  We have the caller specify the entry_size because fixing this as
- * a literal (instead of reading table->entry_size) gives the compiler more
- * ability to optimize. */
-INLINE void *upb_inttable_fastlookup(upb_inttable *t, uint32_t key,
-                                     uint32_t entry_size) {
-  assert(key != 0);
-  uint32_t bucket = upb_inttable_bucket(t, key);
-  upb_inttable_entry *e;
-  do {
-    e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket-1, entry_size);
-    if(e->key == key) return e;
-  } while((bucket = e->next) != UPB_END_OF_CHAIN);
-  return NULL;  /* Not found. */
-}
-
-INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) {
-  return upb_inttable_fastlookup(t, key, t->t.entry_size);
-}
-
-void *upb_strtable_lookup(upb_strtable *t, upb_string *key);
-
-/* Provides iteration over the table.  The order in which the entries are
- * returned is undefined.  Insertions invalidate iterators.  The _next
- * functions return NULL when the end has been reached. */
-void *upb_inttable_begin(upb_inttable *t);
-void *upb_inttable_next(upb_inttable *t, upb_inttable_entry *cur);
-
-void *upb_strtable_begin(upb_strtable *t);
-void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur);
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif  /* UPB_TABLE_H_ */
diff --git a/src/upb_text.c b/src/upb_text.c
deleted file mode 100644
index 8662269..0000000
--- a/src/upb_text.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
- */
-
-#include <inttypes.h>
-#include "descriptor.h"
-#include "upb_text.h"
-#include "upb_data.h"
-
-void upb_text_printval(upb_field_type_t type, upb_value val, FILE *file)
-{
-#define CASE(fmtstr, member) fprintf(file, fmtstr, val.member); break;
-  switch(type) {
-    case UPB_TYPE(DOUBLE):
-      CASE("%0.f", _double);
-    case UPB_TYPE(FLOAT):
-      CASE("%0.f", _float)
-    case UPB_TYPE(INT64):
-    case UPB_TYPE(SFIXED64):
-    case UPB_TYPE(SINT64):
-      CASE("%" PRId64, int64)
-    case UPB_TYPE(UINT64):
-    case UPB_TYPE(FIXED64):
-      CASE("%" PRIu64, uint64)
-    case UPB_TYPE(INT32):
-    case UPB_TYPE(SFIXED32):
-    case UPB_TYPE(SINT32):
-      CASE("%" PRId32, int32)
-    case UPB_TYPE(UINT32):
-    case UPB_TYPE(FIXED32):
-    case UPB_TYPE(ENUM):
-      CASE("%" PRIu32, uint32);
-    case UPB_TYPE(BOOL):
-      CASE("%hhu", _bool);
-    case UPB_TYPE(STRING):
-    case UPB_TYPE(BYTES):
-      /* TODO: escaping. */
-      fprintf(file, "\"" UPB_STRFMT "\"", UPB_STRARG(val.str)); break;
-  }
-}
-
-static void print_indent(upb_text_printer *p, FILE *stream)
-{
-  if(!p->single_line)
-    for(int i = 0; i < p->indent_depth; i++)
-      fprintf(stream, "  ");
-}
-
-void upb_text_printfield(upb_text_printer *p, upb_strptr name,
-                         upb_field_type_t valtype, upb_value val,
-                         FILE *stream)
-{
-  print_indent(p, stream);
-  fprintf(stream, UPB_STRFMT ":", UPB_STRARG(name));
-  upb_text_printval(valtype, val, stream);
-  if(p->single_line)
-    fputc(' ', stream);
-  else
-    fputc('\n', stream);
-}
-
-void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, FILE *stream)
-{
-  print_indent(p, stream);
-  fprintf(stream, UPB_STRFMT " {", UPB_STRARG(submsg_type));
-  if(!p->single_line) fputc('\n', stream);
-  p->indent_depth++;
-}
-
-void upb_text_pop(upb_text_printer *p, FILE *stream)
-{
-  p->indent_depth--;
-  print_indent(p, stream);
-  fprintf(stream, "}\n");
-}
-
-static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f,
-                     FILE *stream);
-
-static void printmsg(upb_text_printer *printer, upb_msg *msg, upb_msgdef *md,
-                     FILE *stream)
-{
-  for(upb_field_count_t i = 0; i < md->num_fields; i++) {
-    upb_fielddef *f = &md->fields[i];
-    if(!upb_msg_has(msg, f)) continue;
-    upb_value v = upb_msg_get(msg, f);
-    if(upb_isarray(f)) {
-      upb_arrayptr arr = v.arr;
-      for(uint32_t j = 0; j < upb_array_len(arr); j++) {
-        upb_value elem = upb_array_get(arr, f, j);
-        printval(printer, elem, f, stream);
-      }
-    } else {
-      printval(printer, v, f, stream);
-    }
-  }
-}
-
-static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f,
-                     FILE *stream)
-{
-  if(upb_issubmsg(f)) {
-    upb_text_push(printer, f->name, stream);
-    printmsg(printer, v.msg, upb_downcast_msgdef(f->def), stream);
-    upb_text_pop(printer, stream);
-  } else {
-    upb_text_printfield(printer, f->name, f->type, v, stream);
-  }
-}
-
-
-void upb_msg_print(upb_msg *msg, upb_msgdef *md, bool single_line,
-                   FILE *stream)
-{
-  upb_text_printer printer;
-  upb_text_printer_init(&printer, single_line);
-  printmsg(&printer, msg, md, stream);
-}
-
diff --git a/src/upb_text.h b/src/upb_text.h
deleted file mode 100644
index d89c9d6..0000000
--- a/src/upb_text.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
- */
-
-#ifndef UPB_TEXT_H_
-#define UPB_TEXT_H_
-
-#include "upb.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct {
-  int indent_depth;
-  bool single_line;
-} upb_text_printer;
-
-INLINE void upb_text_printer_init(upb_text_printer *p, bool single_line) {
-  p->indent_depth = 0;
-  p->single_line = single_line;
-}
-void upb_text_printval(upb_field_type_t type, upb_value p, FILE *file);
-void upb_text_printfield(upb_text_printer *p, upb_strptr name,
-                         upb_field_type_t valtype, upb_value val, FILE *stream);
-void upb_text_push(upb_text_printer *p, upb_strptr submsg_type,
-                   FILE *stream);
-void upb_text_pop(upb_text_printer *p, FILE *stream);
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif  /* UPB_TEXT_H_ */
diff --git a/stream/upb_byteio.h b/stream/upb_byteio.h
new file mode 100644
index 0000000..69a28b3
--- /dev/null
+++ b/stream/upb_byteio.h
@@ -0,0 +1,43 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * This file contains upb_bytesrc and upb_bytesink implementations for common
+ * interfaces like strings, UNIX fds, and FILE*.
+ *
+ * Copyright (c) 2009-2010 Joshua Haberman.  See LICENSE for details.
+ */
+
+#ifndef UPB_BYTEIO_H
+#define UPB_BYTEIO_H
+
+#include "upb_srcsink.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* upb_stringsrc **************************************************************/
+
+struct upb_stringsrc;
+typedef struct upb_stringsrc upb_stringsrc;
+
+// Create/free a stringsrc.
+upb_stringsrc *upb_stringsrc_new();
+void upb_stringsrc_free(upb_stringsrc *s);
+
+// Resets the stringsrc to a state where it will vend the given string.  The
+// stringsrc will take a reference on the string, so the caller need not ensure
+// that it outlives the stringsrc.  A stringsrc can be reset multiple times.
+void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str);
+
+// Returns the upb_bytesrc* for this stringsrc.  Invalidated by reset above.
+upb_bytesrc *upb_stringsrc_bytesrc();
+
+
+/* upb_fdsrc ******************************************************************/
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif
diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c
new file mode 100644
index 0000000..e3fdc49
--- /dev/null
+++ b/stream/upb_decoder.c
@@ -0,0 +1,577 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2008-2009 Joshua Haberman.  See LICENSE for details.
+ */
+
+#include "upb_decoder.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#define UPB_GROUP_END_OFFSET UINT32_MAX
+
+// Returns true if the give wire type and field type combination is valid,
+// taking into account both packed and non-packed encodings.
+static bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) {
+  return (1 << wt) & upb_types[ft].allowed_wire_types;
+}
+
+// Performs zig-zag decoding, which is used by sint32 and sint64.
+static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
+static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
+
+
+/* upb_decoder ****************************************************************/
+
+// The decoder keeps a stack with one entry per level of recursion.
+// upb_decoder_frame is one frame of that stack.
+typedef struct {
+  upb_msgdef *msgdef;
+  upb_fielddef *field;
+  upb_strlen_t end_offset;  // For groups, -1.
+} upb_decoder_frame;
+
+struct upb_decoder {
+  upb_src src;  // upb_decoder is a upb_src.
+
+  upb_msgdef *toplevel_msgdef;
+  upb_bytesrc *bytesrc;
+
+  // The buffer of input data.  NULL is equivalent to the empty string.
+  upb_string *buf;
+
+  // Holds residual bytes when fewer than UPB_MAX_ENCODED_SIZE bytes remain.
+  uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE];
+
+  // The number of bytes we have yet to consume from "buf" or tmpbuf.  This is
+  // always >= 0 unless we were just reset or are eof.
+  int32_t buf_bytesleft;
+
+  // The offset within "buf" from where we are currently reading.  This can be
+  // <0 if we are reading some residual bytes from the previous buffer, which
+  // are stored in tmpbuf and combined with bytes from "buf".
+  int32_t buf_offset;
+
+  // The overall stream offset of the beginning of "buf".
+  uint32_t buf_stream_offset;
+
+  // Fielddef for the key we just read.
+  upb_fielddef *field;
+
+  // Wire type of the key we just read.
+  upb_wire_type_t wire_type;
+
+  // Delimited length of the string field we are reading.
+  upb_strlen_t delimited_len;
+
+  upb_strlen_t packed_end_offset;
+
+  // We keep a stack of messages we have recursed into.
+  upb_decoder_frame *top, *limit, stack[UPB_MAX_NESTING];
+};
+
+
+/* upb_decoder buffering. *****************************************************/
+
+static upb_strlen_t upb_decoder_offset(upb_decoder *d)
+{
+  return d->buf_stream_offset - d->buf_offset;
+}
+
+static bool upb_decoder_nextbuf(upb_decoder *d)
+{
+  assert(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE);
+
+  // Copy residual bytes to temporary buffer.
+  if(d->buf_bytesleft > 0) {
+    memcpy(d->tmpbuf, upb_string_getrobuf(d->buf) + d->buf_offset,
+           d->buf_bytesleft);
+  }
+
+  // Recycle old buffer.
+  if(d->buf) {
+    d->buf = upb_string_tryrecycle(d->buf);
+    d->buf_offset -= upb_string_len(d->buf);
+    d->buf_stream_offset += upb_string_len(d->buf);
+  }
+
+  // Pull next buffer.
+  if(upb_bytesrc_get(d->bytesrc, d->buf, UPB_MAX_ENCODED_SIZE)) {
+    d->buf_bytesleft += upb_string_len(d->buf);
+    return true;
+  } else {
+    // Error or EOF.
+    if(!upb_bytesrc_eof(d->bytesrc)) {
+      // Error from bytesrc.
+      upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc));
+      return false;
+    } else if(d->buf_bytesleft == 0) {
+      // EOF from bytesrc and we don't have any residual bytes left.
+      d->src.eof = true;
+      return false;
+    } else {
+      // No more data left from the bytesrc, but we still have residual bytes.
+      return true;
+    }
+  }
+}
+
+static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes)
+{
+  if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE) {
+    // GCC is currently complaining about use of an uninitialized value if we
+    // don't set this now.  I think this is incorrect, but leaving this in
+    // to suppress the warning for now.
+    *bytes = 0;
+    if(!upb_decoder_nextbuf(d)) return NULL;
+  }
+
+  assert(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE);
+
+  if(d->buf_offset >= 0) {
+    // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE
+    // contiguous bytes, so we can read directly out of it.
+    *bytes = d->buf_bytesleft;
+    return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset;
+  } else {
+    // We need to accumulate UPB_MAX_ENCODED_SIZE bytes; len is how many we
+    // have so far.
+    upb_strlen_t len = -d->buf_offset;
+    if(d->buf) {
+      upb_strlen_t to_copy =
+          UPB_MIN(UPB_MAX_ENCODED_SIZE - len, upb_string_len(d->buf));
+      memcpy(d->tmpbuf + len, upb_string_getrobuf(d->buf), to_copy);
+      len += to_copy;
+    }
+    // Pad the buffer out to UPB_MAX_ENCODED_SIZE.
+    memset(d->tmpbuf + len, 0x80, UPB_MAX_ENCODED_SIZE - len);
+    *bytes = len;
+    return d->tmpbuf;
+  }
+}
+
+// Returns a pointer to a buffer of data that is at least UPB_MAX_ENCODED_SIZE
+// bytes long.  This buffer contains the next bytes in the stream (even if
+// those bytes span multiple buffers).  *bytes is set to the number of actual
+// stream bytes that are available in the returned buffer.  If
+// *bytes < UPB_MAX_ENCODED_SIZE, the buffer is padded with 0x80 bytes.
+//
+// After the data has been read, upb_decoder_consume() should be called to
+// indicate how many bytes were consumed.
+static const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes)
+{
+  if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE && d->buf_offset >= 0) {
+    // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE
+    // contiguous bytes, so we can read directly out of it.
+    *bytes = d->buf_bytesleft;
+    return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset;
+  } else {
+    return upb_decoder_getbuf_full(d, bytes);
+  }
+}
+
+static bool upb_decoder_consume(upb_decoder *d, uint32_t bytes)
+{
+  assert(bytes <= UPB_MAX_ENCODED_SIZE);
+  d->buf_offset += bytes;
+  d->buf_bytesleft -= bytes;
+  if(d->buf_offset < 0) {
+    // We still have residual bytes we have not consumed.
+    memmove(d->tmpbuf, d->tmpbuf + bytes, -d->buf_offset);
+  }
+  assert(d->buf_bytesleft >= 0);
+  return true;
+}
+
+static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes)
+{
+  d->buf_offset += bytes;
+  d->buf_bytesleft -= bytes;
+  while(d->buf_bytesleft < 0) {
+    if(!upb_decoder_nextbuf(d)) return false;
+  }
+  return true;
+}
+
+
+/* Functions to read wire values. *********************************************/
+
+// Parses remining bytes of a 64-bit varint that has already had its first byte
+// parsed.
+INLINE bool upb_decoder_readv64(upb_decoder *d, uint32_t *low, uint32_t *high)
+{
+  upb_strlen_t bytes_available;
+  const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available);
+  const uint8_t *start = buf;
+  if(!buf) return false;
+
+  *high = 0;
+  uint32_t b;
+  b = *(buf++); *low   = (b & 0x7f)      ; if(!(b & 0x80)) goto done;
+  b = *(buf++); *low  |= (b & 0x7f) <<  7; if(!(b & 0x80)) goto done;
+  b = *(buf++); *low  |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
+  b = *(buf++); *low  |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
+  b = *(buf++); *low  |= (b & 0x7f) << 28;
+                *high  = (b & 0x7f) >>  3; if(!(b & 0x80)) goto done;
+  b = *(buf++); *high |= (b & 0x7f) <<  4; if(!(b & 0x80)) goto done;
+  b = *(buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done;
+  b = *(buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done;
+  b = *(buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done;
+
+  if(bytes_available >= 10) {
+    upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Varint was unterminated "
+               "after 10 bytes, stream offset: %u", upb_decoder_offset(d));
+  } else {
+    upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Stream ended in the middle "
+               "of a varint, stream offset: %u", upb_decoder_offset(d));
+  }
+  return false;
+
+done:
+  return upb_decoder_consume(d, buf - start);
+}
+
+// Gets a varint -- called when we only need 32 bits of it.  Note that a 32-bit
+// varint is not a true wire type.
+static bool upb_decoder_readv32(upb_decoder *d, uint32_t *val)
+{
+  uint32_t high;
+  if(!upb_decoder_readv64(d, val, &high)) return false;
+
+  // We expect the high bits to be zero, except that signed 32-bit values are
+  // first sign-extended to be wire-compatible with 64 bits, in which case we
+  // expect the high bits to be all one.
+  //
+  // We could perform a slightly more sophisticated check by having the caller
+  // indicate whether a signed or unsigned value is being read.  We could check
+  // that the high bits are all zeros for unsigned, and properly sign-extended
+  // for signed.
+  if(high != 0 && ~high != 0) {
+    upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Read a 32-bit varint, but "
+               "the high bits contained data we should not truncate: "
+               "%ux, stream offset: %u", high, upb_decoder_offset(d));
+    return false;
+  }
+  return true;
+}
+
+// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT).  Caller
+// promises that 4 bytes are available at buf.
+static bool upb_decoder_readf32(upb_decoder *d, uint32_t *val)
+{
+  upb_strlen_t bytes_available;
+  const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available);
+  if(!buf) return false;
+  if(bytes_available < 4) {
+    upb_seterr(&d->src.status, UPB_STATUS_ERROR,
+               "Stream ended in the middle of a 32-bit value");
+    return false;
+  }
+  memcpy(val, buf, 4);
+  // TODO: byte swap if big-endian.
+  return upb_decoder_consume(d, 4);
+}
+
+// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT).  Caller
+// promises that 8 bytes are available at buf.
+static bool upb_decoder_readf64(upb_decoder *d, uint64_t *val)
+{
+  upb_strlen_t bytes_available;
+  const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available);
+  if(!buf) return false;
+  if(bytes_available < 8) {
+    upb_seterr(&d->src.status, UPB_STATUS_ERROR,
+               "Stream ended in the middle of a 64-bit value");
+    return false;
+  }
+  memcpy(val, buf, 8);
+  // TODO: byte swap if big-endian.
+  return upb_decoder_consume(d, 8);
+}
+
+// Returns the length of a varint (wire type: UPB_WIRE_TYPE_VARINT), allowing
+// it to be easily skipped.  Caller promises that 10 bytes are available at
+// "buf".  The function will return a maximum of 11 bytes before quitting.
+static uint8_t upb_decoder_skipv64(upb_decoder *d)
+{
+  uint32_t bytes_available;
+  const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available);
+  if(!buf) return false;
+  uint8_t i;
+  for(i = 0; i < 10 && buf[i] & 0x80; i++)
+    ;  // empty loop body.
+  if(i > 10) {
+    upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Unterminated varint.");
+    return false;
+  }
+  return upb_decoder_consume(d, i);
+}
+
+
+/* upb_src implementation for upb_decoder. ************************************/
+
+bool upb_decoder_skipval(upb_decoder *d);
+
+upb_fielddef *upb_decoder_getdef(upb_decoder *d)
+{
+  // Detect end-of-submessage.
+  if(upb_decoder_offset(d) >= d->top->end_offset) {
+    d->src.eof = true;
+    return NULL;
+  }
+
+  // Handles the packed field case.
+  if(d->field) return d->field;
+
+  uint32_t key = 0;
+again:
+  if(!upb_decoder_readv32(d, &key)) return NULL;
+  upb_wire_type_t wire_type = key & 0x7;
+  int32_t field_number = key >> 3;
+
+  if(wire_type == UPB_WIRE_TYPE_DELIMITED) {
+    // For delimited wire values we parse the length now, since we need it in
+    // all cases.
+    if(!upb_decoder_readv32(d, &d->delimited_len)) return NULL;
+  } else if(wire_type == UPB_WIRE_TYPE_END_GROUP) {
+    if(d->top->end_offset == UPB_GROUP_END_OFFSET) {
+      d->src.eof = true;
+    } else {
+      upb_seterr(&d->src.status, UPB_STATUS_ERROR, "End group seen but current "
+                 "message is not a group, byte offset: %zd",
+                 upb_decoder_offset(d));
+    }
+    return NULL;
+  }
+
+  // Look up field by tag number.
+  upb_fielddef *f = upb_msg_itof(d->top->msgdef, field_number);
+
+  if (!f) {
+    // Unknown field.  If/when the upb_src interface supports reporting
+    // unknown fields we will implement that here.
+    upb_decoder_skipval(d);
+    goto again;
+  } else if (!upb_check_type(wire_type, f->type)) {
+    // This is a recoverable error condition.  We skip the value but also
+    // return NULL and report the error.
+    upb_decoder_skipval(d);
+    // TODO: better error message.
+    upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Incorrect wire type.\n");
+    return NULL;
+  }
+  d->field = f;
+  d->wire_type = wire_type;
+  return f;
+}
+
+bool upb_decoder_getval(upb_decoder *d, upb_valueptr val)
+{
+  switch(upb_types[d->field->type].native_wire_type) {
+    case UPB_WIRE_TYPE_VARINT: {
+      uint32_t low, high;
+      if(!upb_decoder_readv64(d, &low, &high)) return false;
+      uint64_t u64 = ((uint64_t)high << 32) | low;
+      if(d->field->type == UPB_TYPE(SINT64))
+        *val.int64 = upb_zzdec_64(u64);
+      else
+        *val.uint64 = u64;
+      break;
+    }
+    case UPB_WIRE_TYPE_32BIT_VARINT: {
+      uint32_t u32;
+      if(!upb_decoder_readv32(d, &u32)) return false;
+      if(d->field->type == UPB_TYPE(SINT32))
+        *val.int32 = upb_zzdec_32(u32);
+      else
+        *val.uint32 = u32;
+      break;
+    }
+    case UPB_WIRE_TYPE_64BIT:
+      if(!upb_decoder_readf64(d, val.uint64)) return false;
+      break;
+    case UPB_WIRE_TYPE_32BIT:
+      if(!upb_decoder_readf32(d, val.uint32)) return false;
+      break;
+    default:
+      upb_seterr(&d->src.status, UPB_STATUS_ERROR,
+                 "Attempted to call getval on a group.");
+      return false;
+  }
+  // For a packed field where we have not reached the end, we leave the field
+  // in the decoder so we will return it again without parsing a key.
+  if(d->wire_type != UPB_WIRE_TYPE_DELIMITED ||
+     upb_decoder_offset(d) >= d->packed_end_offset) {
+    d->field = NULL;
+  }
+  return true;
+}
+
+bool upb_decoder_getstr(upb_decoder *d, upb_string *str) {
+  // A string, bytes, or a length-delimited submessage.  The latter isn't
+  // technically a string, but can be gotten as one to perform lazy parsing.
+  const int32_t total_len = d->delimited_len;
+  if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) {
+    // The entire string is inside our current buffer, so we can just
+    // return a substring of the buffer without copying.
+    upb_string_substr(str, d->buf,
+                      upb_string_len(d->buf) - d->buf_bytesleft,
+                      total_len);
+    upb_decoder_skipbytes(d, total_len);
+  } else {
+    // The string spans buffers, so we must copy from the residual buffer
+    // (if any bytes are there), then the buffer, and finally from the bytesrc.
+    uint8_t *ptr = (uint8_t*)upb_string_getrwbuf(
+        str, UPB_MIN(total_len, d->buf_bytesleft));
+    int32_t len = 0;
+    if(d->buf_offset < 0) {
+      // Residual bytes we need to copy from tmpbuf.
+      memcpy(ptr, d->tmpbuf, -d->buf_offset);
+      len += -d->buf_offset;
+    }
+    if(d->buf) {
+      // Bytes from the buffer.
+      memcpy(ptr + len, upb_string_getrobuf(d->buf) + d->buf_offset,
+             upb_string_len(str) - len);
+    }
+    upb_decoder_skipbytes(d, upb_string_len(str));
+    if(len < total_len) {
+      // Bytes from the bytesrc.
+      if(!upb_bytesrc_append(d->bytesrc, str, total_len - len)) {
+        upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc));
+        return false;
+      }
+      // Have to advance this since the buffering layer of the decoder will
+      // never see these bytes.
+      d->buf_stream_offset += total_len - len;
+    }
+  }
+  d->field = NULL;
+  return true;
+}
+
+static bool upb_decoder_skipgroup(upb_decoder *d);
+
+bool upb_decoder_startmsg(upb_decoder *d) {
+  d->top->field = d->field;
+  if(++d->top >= d->limit) {
+    upb_seterr(&d->src.status, UPB_ERROR_MAX_NESTING_EXCEEDED,
+               "Nesting exceeded maximum (%d levels)\n",
+               UPB_MAX_NESTING);
+    return false;
+  }
+  upb_decoder_frame *frame = d->top;
+  frame->msgdef = upb_downcast_msgdef(d->field->def);
+  if(d->field->type == UPB_TYPE(GROUP)) {
+    frame->end_offset = UPB_GROUP_END_OFFSET;
+  } else {
+    frame->end_offset = upb_decoder_offset(d) + d->delimited_len;
+  }
+  return true;
+}
+
+bool upb_decoder_endmsg(upb_decoder *d) {
+  if(d->top > d->stack) {
+    --d->top;
+    if(!d->src.eof) {
+      if(d->top->field->type == UPB_TYPE(GROUP))
+        upb_decoder_skipgroup(d);
+      else
+        upb_decoder_skipbytes(d, d->top->end_offset - upb_decoder_offset(d));
+    }
+    d->src.eof = false;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool upb_decoder_skipval(upb_decoder *d) {
+  upb_strlen_t bytes_to_skip;
+  switch(d->wire_type) {
+    case UPB_WIRE_TYPE_VARINT: {
+      return upb_decoder_skipv64(d);
+    }
+    case UPB_WIRE_TYPE_START_GROUP:
+      if(!upb_decoder_startmsg(d)) return false;
+      if(!upb_decoder_skipgroup(d)) return false;
+      if(!upb_decoder_endmsg(d)) return false;
+      return true;
+    default:
+      // Including UPB_WIRE_TYPE_END_GROUP.
+      assert(false);
+      upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Tried to skip an end group");
+      return false;
+    case UPB_WIRE_TYPE_64BIT:
+      bytes_to_skip = 8;
+      break;
+    case UPB_WIRE_TYPE_32BIT:
+      bytes_to_skip = 4;
+      break;
+    case UPB_WIRE_TYPE_DELIMITED:
+      // Works for both string/bytes *and* submessages.
+      bytes_to_skip = d->delimited_len;
+      break;
+  }
+  return upb_decoder_skipbytes(d, bytes_to_skip);
+}
+
+static bool upb_decoder_skipgroup(upb_decoder *d)
+{
+  // This will be mututally recursive with upb_decoder_skipval() if the group
+  // has sub-groups.  If we wanted to handle EAGAIN in the future, this
+  // approach would not work; we would need to track the group depth
+  // explicitly.
+  while(upb_decoder_getdef(d)) {
+    if(!upb_decoder_skipval(d)) return false;
+  }
+  // If we are at the end of the group like we want to be, then
+  // upb_decoder_getdef() returned NULL because of eof, not error.
+  if(!&d->src.eof) return false;
+  return true;
+}
+
+upb_src_vtable upb_decoder_src_vtbl = {
+  (upb_src_getdef_fptr)&upb_decoder_getdef,
+  (upb_src_getval_fptr)&upb_decoder_getval,
+  (upb_src_skipval_fptr)&upb_decoder_skipval,
+  (upb_src_startmsg_fptr)&upb_decoder_startmsg,
+  (upb_src_endmsg_fptr)&upb_decoder_endmsg,
+};
+
+
+/* upb_decoder construction/destruction. **************************************/
+
+upb_decoder *upb_decoder_new(upb_msgdef *msgdef)
+{
+  upb_decoder *d = malloc(sizeof(*d));
+  d->toplevel_msgdef = msgdef;
+  d->limit = &d->stack[UPB_MAX_NESTING];
+  d->buf = NULL;
+  upb_src_init(&d->src, &upb_decoder_src_vtbl);
+  return d;
+}
+
+void upb_decoder_free(upb_decoder *d)
+{
+  upb_string_unref(d->buf);
+  free(d);
+}
+
+void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc)
+{
+  upb_string_unref(d->buf);
+  d->top = d->stack;
+  d->top->msgdef = d->toplevel_msgdef;
+  // The top-level message is not delimited (we can keep receiving data for it
+  // indefinitely), so we set the end offset as high as possible, but not equal
+  // to UINT32_MAX so it doesn't equal UPB_GROUP_END_OFFSET.
+  d->top->end_offset = UINT32_MAX - 1;
+  d->bytesrc = bytesrc;
+  d->buf = NULL;
+  d->buf_bytesleft = 0;
+  d->buf_stream_offset = 0;
+  d->buf_offset = 0;
+}
diff --git a/stream/upb_decoder.h b/stream/upb_decoder.h
new file mode 100644
index 0000000..dde61fc
--- /dev/null
+++ b/stream/upb_decoder.h
@@ -0,0 +1,53 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * upb_decoder implements a high performance, streaming decoder for protobuf
+ * data that works by implementing upb_src and getting its data from a
+ * upb_bytesrc.
+ *
+ * The decoder does not currently support non-blocking I/O, in the sense that
+ * if the bytesrc returns UPB_STATUS_TRYAGAIN it is not possible to resume the
+ * decoder when data becomes available again.  Support for this could be added,
+ * but it would add complexity and perhaps cost efficiency also.
+ *
+ * Copyright (c) 2009-2010 Joshua Haberman.  See LICENSE for details.
+ */
+
+#ifndef UPB_DECODER_H_
+#define UPB_DECODER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "upb_def.h"
+#include "upb_stream.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* upb_decoder *****************************************************************/
+
+// A upb_decoder decodes the binary protocol buffer format, writing the data it
+// decodes to a upb_sink.
+struct upb_decoder;
+typedef struct upb_decoder upb_decoder;
+
+// Allocates and frees a upb_decoder, respectively.
+upb_decoder *upb_decoder_new(upb_msgdef *md);
+void upb_decoder_free(upb_decoder *d);
+
+// Resets the internal state of an already-allocated decoder.  This puts it in a
+// state where it has not seen any data, and expects the next data to be from
+// the beginning of a new protobuf.  Parsers must be reset before they can be
+// used.  A decoder can be reset multiple times.
+void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc);
+
+// Returns a upb_src pointer by which the decoder can be used.  The returned
+// upb_src is invalidated by upb_decoder_reset() or upb_decoder_free().
+upb_src *upb_decoder_getsrc(upb_decoder *d);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_DECODER_H_ */
diff --git a/stream/upb_encoder.c b/stream/upb_encoder.c
new file mode 100644
index 0000000..304a423
--- /dev/null
+++ b/stream/upb_encoder.c
@@ -0,0 +1,420 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ */
+
+#include "upb_encoder.h"
+
+#include <stdlib.h>
+#include "descriptor.h"
+
+/* Functions for calculating sizes of wire values. ****************************/
+
+static size_t upb_v_uint64_t_size(uint64_t val) {
+#ifdef __GNUC__
+  int high_bit = 63 - __builtin_clzll(val);  // 0-based, undef if val == 0.
+#else
+  int high_bit = 0;
+  uint64_t tmp = val;
+  while(tmp >>= 1) high_bit++;
+#endif
+  return val == 0 ? 1 : high_bit / 7 + 1;
+}
+
+static size_t upb_v_int32_t_size(int32_t val) {
+  // v_uint32's are sign-extended to maintain wire compatibility with int64s.
+  return upb_v_uint64_t_size((int64_t)val);
+}
+static size_t upb_v_uint32_t_size(uint32_t val) {
+  return upb_v_uint64_t_size(val);
+}
+static size_t upb_f_uint64_t_size(uint64_t val) {
+  (void)val;  // Length is independent of value.
+  return sizeof(uint64_t);
+}
+static size_t upb_f_uint32_t_size(uint32_t val) {
+  (void)val;  // Length is independent of value.
+  return sizeof(uint32_t);
+}
+
+
+/* Functions to write wire values. ********************************************/
+
+// Since we know in advance the longest that the value could be, we always make
+// sure that our buffer is long enough.  This saves us from having to perform
+// bounds checks.
+
+// Puts a varint (wire type: UPB_WIRE_TYPE_VARINT).
+static uint8_t *upb_put_v_uint64_t(uint8_t *buf, uint64_t val)
+{
+  do {
+    uint8_t byte = val & 0x7f;
+    val >>= 7;
+    if(val) byte |= 0x80;
+    *buf++ = byte;
+  } while(val);
+  return buf;
+}
+
+// Puts an unsigned 32-bit varint, verbatim.  Never uses the high 64 bits.
+static uint8_t *upb_put_v_uint32_t(uint8_t *buf, uint32_t val)
+{
+  return upb_put_v_uint64_t(buf, val);
+}
+
+// Puts a signed 32-bit varint, first sign-extending to 64-bits.  We do this to
+// maintain wire-compatibility with 64-bit signed integers.
+static uint8_t *upb_put_v_int32_t(uint8_t *buf, int32_t val)
+{
+  return upb_put_v_uint64_t(buf, (int64_t)val);
+}
+
+static void upb_put32(uint8_t *buf, uint32_t val) {
+  buf[0] = val & 0xff;
+  buf[1] = (val >> 8) & 0xff;
+  buf[2] = (val >> 16) & 0xff;
+  buf[3] = (val >> 24);
+}
+
+// Puts a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT).
+static uint8_t *upb_put_f_uint32_t(uint8_t *buf, uint32_t val)
+{
+  uint8_t *uint32_end = buf + sizeof(uint32_t);
+#if UPB_UNALIGNED_READS_OK
+  *(uint32_t*)buf = val;
+#else
+  upb_put32(buf, val);
+#endif
+  return uint32_end;
+}
+
+// Puts a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT).
+static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val)
+{
+  uint8_t *uint64_end = buf + sizeof(uint64_t);
+#if UPB_UNALIGNED_READS_OK
+  *(uint64_t*)buf = val;
+#else
+  upb_put32(buf, (uint32_t)val);
+  upb_put32(buf, (uint32_t)(val >> 32));
+#endif
+  return uint64_end;
+}
+
+/* Functions to write and calculate sizes for .proto values. ******************/
+
+// Performs zig-zag encoding, which is used by sint32 and sint64.
+static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
+static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
+
+/* Use macros to define a set of two functions for each .proto type:
+ *
+ *  // Converts and writes a .proto value into buf.  "end" indicates the end
+ *  // of the current available buffer (if the buffer does not contain enough
+ *  // space UPB_STATUS_NEED_MORE_DATA is returned).  On success, *outbuf will
+ *  // point one past the data that was written.
+ *  uint8_t *upb_put_INT32(uint8_t *buf, int32_t val);
+ *
+ *  // Returns the number of bytes required to encode val.
+ *  size_t upb_get_INT32_size(int32_t val);
+ *
+ *  // Given a .proto value s (source) convert it to a wire value.
+ *  uint32_t upb_vtowv_INT32(int32_t s);
+ */
+
+#define VTOWV(type, wire_t, val_t) \
+  static wire_t upb_vtowv_ ## type(val_t s)
+
+#define PUT(type, v_or_f, wire_t, val_t, member_name) \
+  static uint8_t *upb_put_ ## type(uint8_t *buf, val_t val) { \
+    wire_t tmp = upb_vtowv_ ## type(val); \
+    return upb_put_ ## v_or_f ## _ ## wire_t(buf, tmp); \
+  }
+
+#define T(type, v_or_f, wire_t, val_t, member_name) \
+  static size_t upb_get_ ## type ## _size(val_t val) { \
+    return upb_ ## v_or_f ## _ ## wire_t ## _size(val); \
+  } \
+  VTOWV(type, wire_t, val_t);  /* prototype for PUT below */ \
+  PUT(type, v_or_f, wire_t, val_t, member_name) \
+  VTOWV(type, wire_t, val_t)
+
+T(INT32,    v,  int32_t, int32_t,  int32)   { return (uint32_t)s;     }
+T(INT64,    v, uint64_t, int64_t,  int64)   { return (uint64_t)s;     }
+T(UINT32,   v, uint32_t, uint32_t, uint32)  { return s;               }
+T(UINT64,   v, uint64_t, uint64_t, uint64)  { return s;               }
+T(SINT32,   v, uint32_t, int32_t,  int32)   { return upb_zzenc_32(s); }
+T(SINT64,   v, uint64_t, int64_t,  int64)   { return upb_zzenc_64(s); }
+T(FIXED32,  f, uint32_t, uint32_t, uint32)  { return s;               }
+T(FIXED64,  f, uint64_t, uint64_t, uint64)  { return s;               }
+T(SFIXED32, f, uint32_t, int32_t,  int32)   { return (uint32_t)s;     }
+T(SFIXED64, f, uint64_t, int64_t,  int64)   { return (uint64_t)s;     }
+T(BOOL,     v, uint32_t, bool,     _bool)   { return (uint32_t)s;     }
+T(ENUM,     v, uint32_t, int32_t,  int32)   { return (uint32_t)s;     }
+T(DOUBLE,   f, uint64_t, double,   _double) {
+  upb_value v;
+  v._double = s;
+  return v.uint64;
+}
+T(FLOAT,    f, uint32_t, float,    _float)  {
+  upb_value v;
+  v._float = s;
+  return v.uint32;
+}
+#undef VTOWV
+#undef PUT
+#undef T
+
+static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v)
+{
+#define CASE(t, member_name) \
+  case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name);
+  switch(ft) {
+    CASE(DOUBLE,   _double)
+    CASE(FLOAT,    _float)
+    CASE(INT32,    int32)
+    CASE(INT64,    int64)
+    CASE(UINT32,   uint32)
+    CASE(UINT64,   uint64)
+    CASE(SINT32,   int32)
+    CASE(SINT64,   int64)
+    CASE(FIXED32,  uint32)
+    CASE(FIXED64,  uint64)
+    CASE(SFIXED32, int32)
+    CASE(SFIXED64, int64)
+    CASE(BOOL,     _bool)
+    CASE(ENUM,     int32)
+    default: assert(false); return buf;
+  }
+#undef CASE
+}
+
+static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v)
+{
+#define CASE(t, member_name) \
+  case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name);
+  switch(ft) {
+    CASE(DOUBLE,   _double)
+    CASE(FLOAT,    _float)
+    CASE(INT32,    int32)
+    CASE(INT64,    int64)
+    CASE(UINT32,   uint32)
+    CASE(UINT64,   uint64)
+    CASE(SINT32,   int32)
+    CASE(SINT64,   int64)
+    CASE(FIXED32,  uint32)
+    CASE(FIXED64,  uint64)
+    CASE(SFIXED32, int32)
+    CASE(SFIXED64, int64)
+    CASE(BOOL,     _bool)
+    CASE(ENUM,     int32)
+    default: assert(false); return 0;
+  }
+#undef CASE
+}
+
+static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num,
+                             upb_wire_type_t wt)
+{
+  return upb_put_UINT32(buf, wt | (num << 3));
+}
+
+static uint32_t _upb_get_tag_size(upb_field_number_t num)
+{
+  return upb_get_UINT32_size(num << 3);
+}
+
+
+/* upb_sizebuilder ************************************************************/
+
+struct upb_sizebuilder {
+  // Accumulating size for the current level.
+  uint32_t size;
+
+  // Stack of sizes for our current nesting.
+  uint32_t stack[UPB_MAX_NESTING], *top;
+
+  // Vector of sizes.
+  uint32_t *sizes;
+  int sizes_len;
+  int sizes_size;
+
+  upb_status status;
+};
+
+// upb_sink callbacks.
+static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f,
+                                                upb_value val,
+                                                upb_status *status)
+{
+  (void)status;
+  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+  uint32_t size = 0;
+  size += _upb_get_tag_size(f->number);
+  size += _upb_get_value_size(f->type, val);
+  sb->size += size;
+  return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f,
+                                              upb_strptr str,
+                                              int32_t start, uint32_t end,
+                                              upb_status *status)
+{
+  (void)status;
+  (void)str;   // String data itself is not used.
+  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+  if(start >= 0) {
+    uint32_t size = 0;
+    size += _upb_get_tag_size(f->number);
+    size += upb_get_UINT32_size(end - start);
+    sb->size += size;
+  }
+  return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f,
+                                                upb_status *status)
+{
+  (void)status;
+  (void)f;  // Unused (we calculate tag size and delimiter in endcb).
+  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+  if(f->type == UPB_TYPE(MESSAGE)) {
+    *sb->top = sb->size;
+    sb->top++;
+    sb->size = 0;
+  } else {
+    assert(f->type == UPB_TYPE(GROUP));
+    sb->size += _upb_get_tag_size(f->number);
+  }
+  return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f,
+                                              upb_status *status)
+{
+  (void)status;
+  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+  if(f->type == UPB_TYPE(MESSAGE)) {
+    sb->top--;
+    if(sb->sizes_len == sb->sizes_size) {
+      sb->sizes_size *= 2;
+      sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes));
+    }
+    uint32_t child_size = sb->size;
+    uint32_t parent_size = *sb->top;
+    sb->sizes[sb->sizes_len++] = child_size;
+    // The size according to the parent includes the tag size and delimiter of
+    // the submessage.
+    parent_size += upb_get_UINT32_size(child_size);
+    parent_size += _upb_get_tag_size(f->number);
+    // Include size accumulated in parent before child began.
+    sb->size = child_size + parent_size;
+  } else {
+    assert(f->type == UPB_TYPE(GROUP));
+    // As an optimization, we could just add this number twice in startcb, to
+    // avoid having to recalculate it.
+    sb->size += _upb_get_tag_size(f->number);
+  }
+  return UPB_SINK_CONTINUE;
+}
+
+upb_sink_callbacks _upb_sizebuilder_sink_vtbl = {
+  _upb_sizebuilder_valuecb,
+  _upb_sizebuilder_strcb,
+  _upb_sizebuilder_startcb,
+  _upb_sizebuilder_endcb
+};
+
+
+/* upb_sink callbacks *********************************************************/
+
+struct upb_encoder {
+  upb_sink base;
+  //upb_bytesink *bytesink;
+  uint32_t *sizes;
+  int size_offset;
+};
+
+
+// Within one callback we may need to encode up to two separate values.
+#define UPB_ENCODER_BUFSIZE (UPB_MAX_ENCODED_SIZE * 2)
+
+static upb_sink_status _upb_encoder_push_buf(upb_encoder *s, const uint8_t *buf,
+                                             size_t len, upb_status *status)
+{
+  // TODO: conjure a upb_strptr that points to buf.
+  //upb_strptr ptr;
+  (void)s;
+  (void)buf;
+  (void)status;
+  size_t written = 5;// = upb_bytesink_onbytes(s->bytesink, ptr);
+  if(written < len) {
+    // TODO: mark to skip "written" bytes next time.
+    return UPB_SINK_STOP;
+  } else {
+    return UPB_SINK_CONTINUE;
+  }
+}
+
+static upb_sink_status _upb_encoder_valuecb(upb_sink *sink, upb_fielddef *f,
+                                            upb_value val, upb_status *status)
+{
+  upb_encoder *s = (upb_encoder*)sink;
+  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+  upb_wire_type_t wt = upb_types[f->type].expected_wire_type;
+  // TODO: handle packed encoding.
+  ptr = _upb_put_tag(ptr, f->number, wt);
+  ptr = upb_encode_value(ptr, f->type, val);
+  return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+}
+
+static upb_sink_status _upb_encoder_strcb(upb_sink *sink, upb_fielddef *f,
+                                          upb_strptr str,
+                                          int32_t start, uint32_t end,
+                                          upb_status *status)
+{
+  upb_encoder *s = (upb_encoder*)sink;
+  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+  if(start >= 0) {
+    ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED);
+    ptr = upb_put_UINT32(ptr, end - start);
+  }
+  // TODO: properly handle partially consumed strings and partially supplied
+  // strings.
+  _upb_encoder_push_buf(s, buf, ptr - buf, status);
+  return _upb_encoder_push_buf(s, (uint8_t*)upb_string_getrobuf(str), end - start, status);
+}
+
+static upb_sink_status _upb_encoder_startcb(upb_sink *sink, upb_fielddef *f,
+                                            upb_status *status)
+{
+  upb_encoder *s = (upb_encoder*)sink;
+  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+  if(f->type == UPB_TYPE(GROUP)) {
+    ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_START_GROUP);
+  } else {
+    ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED);
+    ptr = upb_put_UINT32(ptr, s->sizes[--s->size_offset]);
+  }
+  return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+}
+
+static upb_sink_status _upb_encoder_endcb(upb_sink *sink, upb_fielddef *f,
+                                          upb_status *status)
+{
+  upb_encoder *s = (upb_encoder*)sink;
+  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+  if(f->type != UPB_TYPE(GROUP)) return UPB_SINK_CONTINUE;
+  ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_END_GROUP);
+  return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+}
+
+upb_sink_callbacks _upb_encoder_sink_vtbl = {
+  _upb_encoder_valuecb,
+  _upb_encoder_strcb,
+  _upb_encoder_startcb,
+  _upb_encoder_endcb
+};
+
diff --git a/stream/upb_encoder.h b/stream/upb_encoder.h
new file mode 100644
index 0000000..e879b0b
--- /dev/null
+++ b/stream/upb_encoder.h
@@ -0,0 +1,56 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Implements a upb_sink that writes protobuf data to the binary wire format.
+ *
+ * For messages that have any submessages, the encoder needs a buffer
+ * containing the submessage sizes, so they can be properly written at the
+ * front of each message.  Note that groups do *not* have this requirement.
+ *
+ * Copyright (c) 2009-2010 Joshua Haberman.  See LICENSE for details.
+ */
+
+#ifndef UPB_ENCODER_H_
+#define UPB_ENCODER_H_
+
+#include "upb.h"
+#include "upb_srcsink.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* upb_encoder ****************************************************************/
+
+// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol
+// buffer binary wire format.
+struct upb_encoder;
+typedef struct upb_encoder upb_encoder;
+
+upb_encoder *upb_encoder_new(upb_msgdef *md);
+void upb_encoder_free(upb_encoder *e);
+
+// Resets the given upb_encoder such that is is ready to begin encoding,
+// outputting data to "bytesink" (which must live until the encoder is
+// reset or destroyed).
+void upb_encoder_reset(upb_encoder *e, upb_bytesink *bytesink);
+
+// Returns the upb_sink to which data can be written.  The sink is invalidated
+// when the encoder is reset or destroyed.  Note that if the client wants to
+// encode any length-delimited submessages it must first call
+// upb_encoder_buildsizes() below.
+upb_sink *upb_encoder_sink(upb_encoder *e);
+
+// Call prior to pushing any data with embedded submessages.  "src" must yield
+// exactly the same data as what will next be encoded, but in reverse order.
+// The encoder iterates over this data in order to determine the sizes of the
+// submessages.  If any errors are returned by the upb_src, the status will
+// be saved in *status.  If the client is sure that the upb_src will not throw
+// any errors, "status" may be NULL.
+void upb_encoder_buildsizes(upb_encoder *e, upb_src *src, upb_status *status);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_ENCODER_H_ */
diff --git a/stream/upb_text.c b/stream/upb_text.c
new file mode 100644
index 0000000..8662269
--- /dev/null
+++ b/stream/upb_text.c
@@ -0,0 +1,121 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ */
+
+#include <inttypes.h>
+#include "descriptor.h"
+#include "upb_text.h"
+#include "upb_data.h"
+
+void upb_text_printval(upb_field_type_t type, upb_value val, FILE *file)
+{
+#define CASE(fmtstr, member) fprintf(file, fmtstr, val.member); break;
+  switch(type) {
+    case UPB_TYPE(DOUBLE):
+      CASE("%0.f", _double);
+    case UPB_TYPE(FLOAT):
+      CASE("%0.f", _float)
+    case UPB_TYPE(INT64):
+    case UPB_TYPE(SFIXED64):
+    case UPB_TYPE(SINT64):
+      CASE("%" PRId64, int64)
+    case UPB_TYPE(UINT64):
+    case UPB_TYPE(FIXED64):
+      CASE("%" PRIu64, uint64)
+    case UPB_TYPE(INT32):
+    case UPB_TYPE(SFIXED32):
+    case UPB_TYPE(SINT32):
+      CASE("%" PRId32, int32)
+    case UPB_TYPE(UINT32):
+    case UPB_TYPE(FIXED32):
+    case UPB_TYPE(ENUM):
+      CASE("%" PRIu32, uint32);
+    case UPB_TYPE(BOOL):
+      CASE("%hhu", _bool);
+    case UPB_TYPE(STRING):
+    case UPB_TYPE(BYTES):
+      /* TODO: escaping. */
+      fprintf(file, "\"" UPB_STRFMT "\"", UPB_STRARG(val.str)); break;
+  }
+}
+
+static void print_indent(upb_text_printer *p, FILE *stream)
+{
+  if(!p->single_line)
+    for(int i = 0; i < p->indent_depth; i++)
+      fprintf(stream, "  ");
+}
+
+void upb_text_printfield(upb_text_printer *p, upb_strptr name,
+                         upb_field_type_t valtype, upb_value val,
+                         FILE *stream)
+{
+  print_indent(p, stream);
+  fprintf(stream, UPB_STRFMT ":", UPB_STRARG(name));
+  upb_text_printval(valtype, val, stream);
+  if(p->single_line)
+    fputc(' ', stream);
+  else
+    fputc('\n', stream);
+}
+
+void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, FILE *stream)
+{
+  print_indent(p, stream);
+  fprintf(stream, UPB_STRFMT " {", UPB_STRARG(submsg_type));
+  if(!p->single_line) fputc('\n', stream);
+  p->indent_depth++;
+}
+
+void upb_text_pop(upb_text_printer *p, FILE *stream)
+{
+  p->indent_depth--;
+  print_indent(p, stream);
+  fprintf(stream, "}\n");
+}
+
+static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f,
+                     FILE *stream);
+
+static void printmsg(upb_text_printer *printer, upb_msg *msg, upb_msgdef *md,
+                     FILE *stream)
+{
+  for(upb_field_count_t i = 0; i < md->num_fields; i++) {
+    upb_fielddef *f = &md->fields[i];
+    if(!upb_msg_has(msg, f)) continue;
+    upb_value v = upb_msg_get(msg, f);
+    if(upb_isarray(f)) {
+      upb_arrayptr arr = v.arr;
+      for(uint32_t j = 0; j < upb_array_len(arr); j++) {
+        upb_value elem = upb_array_get(arr, f, j);
+        printval(printer, elem, f, stream);
+      }
+    } else {
+      printval(printer, v, f, stream);
+    }
+  }
+}
+
+static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f,
+                     FILE *stream)
+{
+  if(upb_issubmsg(f)) {
+    upb_text_push(printer, f->name, stream);
+    printmsg(printer, v.msg, upb_downcast_msgdef(f->def), stream);
+    upb_text_pop(printer, stream);
+  } else {
+    upb_text_printfield(printer, f->name, f->type, v, stream);
+  }
+}
+
+
+void upb_msg_print(upb_msg *msg, upb_msgdef *md, bool single_line,
+                   FILE *stream)
+{
+  upb_text_printer printer;
+  upb_text_printer_init(&printer, single_line);
+  printmsg(&printer, msg, md, stream);
+}
+
diff --git a/stream/upb_text.h b/stream/upb_text.h
new file mode 100644
index 0000000..d89c9d6
--- /dev/null
+++ b/stream/upb_text.h
@@ -0,0 +1,36 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ */
+
+#ifndef UPB_TEXT_H_
+#define UPB_TEXT_H_
+
+#include "upb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+  int indent_depth;
+  bool single_line;
+} upb_text_printer;
+
+INLINE void upb_text_printer_init(upb_text_printer *p, bool single_line) {
+  p->indent_depth = 0;
+  p->single_line = single_line;
+}
+void upb_text_printval(upb_field_type_t type, upb_value p, FILE *file);
+void upb_text_printfield(upb_text_printer *p, upb_strptr name,
+                         upb_field_type_t valtype, upb_value val, FILE *stream);
+void upb_text_push(upb_text_printer *p, upb_strptr submsg_type,
+                   FILE *stream);
+void upb_text_pop(upb_text_printer *p, FILE *stream);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_TEXT_H_ */
-- 
cgit v1.2.3