49 files changed, 4584 insertions, 3633 deletions
diff --git a/Makefile b/Makefile
index 6aef581..a12e7ef 100644
--- a/Makefile
+++ b/Makefile
@@ -83,11 +83,15 @@ deps: Makefile $(ALLSRC)
 CORE= \
   upb/upb.c \
   upb/handlers.c \
-  upb/descriptor.c \
+  upb/descriptor/reader.c \
   upb/table.c \
+  upb/refcount.c \
   upb/def.c \
   upb/msg.c \
   upb/bytestream.c \
+  bindings/cpp/upb/proto2_bridge.cc \
+
+# TODO: the proto2 bridge should be built as a separate library.
 
 # Library for the protocol buffer format (both text and binary).
 PB= \
@@ -122,8 +126,9 @@ LIBUPB_PIC=upb/libupb_pic.a
 lib: $(LIBUPB)
 
 
-OBJ=$(patsubst %.c,%.o,$(SRC))
-PICOBJ=$(patsubst %.c,%.lo,$(SRC))
+OBJ=$(patsubst %.c,%.o,$(SRC)) $(patsubst %.cc,%.o,$(SRC))
+PICOBJ=$(patsubst %.c,%.lo,$(SRC)) $(patsubst %.cc,%.lo,$(SRC))
+
 
 ifdef USE_JIT
 upb/pb/decoder.o upb/pb/decoder.lo: upb/pb/decoder_x64.h
@@ -139,10 +144,18 @@ $(LIBUPB_PIC): $(PICOBJ)
 	$(E) CC $<
 	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
 
+%.o : %.cc
+	$(E) CXX $<
+	$(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $<
+
 %.lo : %.c
 	$(E) 'CC -fPIC' $<
 	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $< -fPIC
 
+%.o : %.cc
+	$(E) CXX $<
+	$(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $< -fPIC
+
 # Override the optimization level for def.o, because it is not in the
 # critical path but gets very large when -O3 is used.
 upb/def.o: upb/def.c
@@ -197,47 +210,39 @@ tests/test.proto.pb: tests/test.proto
 SIMPLE_TESTS= \
   tests/test_def \
   tests/test_varint \
-  tests/tests \
-
-# Too many tests in this binary to run Valgrind (it takes minutes).
-SLOW_TESTS= \
-  tests/test_decoder \
 
 SIMPLE_CXX_TESTS= \
   tests/test_table \
   tests/test_cpp \
+  tests/test_decoder \
 
 VARIADIC_TESTS= \
   tests/t.test_vs_proto2.googlemessage1 \
   tests/t.test_vs_proto2.googlemessage2 \
 
-TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS) $(SLOW_TESTS)
-tests: $(TESTS)
+TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS)
+
+
+tests: $(TESTS) $(INTERACTIVE_TESTS)
 $(TESTS): $(LIBUPB)
-tests/tests: tests/test.proto.pb
+tests/test_def: tests/test.proto.pb
 
 $(SIMPLE_TESTS): % : %.c
 	$(E) CC $<
 	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $< $(LIBUPB)
 
-VALGRIND=valgrind --leak-check=full --error-exitcode=1 
+VALGRIND=valgrind --leak-check=full --error-exitcode=1
 test: tests
 	@echo Running all tests under valgrind.
 	@set -e  # Abort on error.
 	@for test in $(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS); do \
 	  if [ -x ./$$test ] ; then \
-	    echo !!! $(VALGRIND) ./$$test tests/test.proto.pb; \
+	    echo !!! $(VALGRIND) ./$$test; \
 	    $(VALGRIND) ./$$test tests/test.proto.pb || exit 1; \
 	  fi \
 	done;
-	@for test in "$(SLOW_TESTS)"; do \
-	  if [ -x ./$$test ] ; then \
-	    echo !!! ./$$test; \
-	    ./$$test || exit 1; \
-	  fi \
-	done;
-	@$(VALGRIND) tests/t.test_vs_proto2.googlemessage1 benchmarks/google_messages.proto.pb benchmarks/google_message1.dat
-	@$(VALGRIND) tests/t.test_vs_proto2.googlemessage2 benchmarks/google_messages.proto.pb benchmarks/google_message2.dat
+	@$(VALGRIND) ./tests/t.test_vs_proto2.googlemessage1 benchmarks/google_message1.dat || exit 1;
+	@$(VALGRIND) ./tests/t.test_vs_proto2.googlemessage2 benchmarks/google_message2.dat || exit 1;
 	@echo "All tests passed!"
 
 tests/t.test_vs_proto2.googlemessage1 \
@@ -273,15 +278,11 @@ tests/tests: upb/libupb.a
 # Benchmarks
 UPB_BENCHMARKS=benchmarks/b.parsestream_googlemessage1.upb_table \
                benchmarks/b.parsestream_googlemessage2.upb_table \
-               benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \
-               benchmarks/b.parsetostruct_googlemessage2.upb_table_byval \
 
 ifdef USE_JIT
 UPB_BENCHMARKS += \
                benchmarks/b.parsestream_googlemessage1.upb_jit \
                benchmarks/b.parsestream_googlemessage2.upb_jit \
-               benchmarks/b.parsetostruct_googlemessage1.upb_jit_byval \
-               benchmarks/b.parsetostruct_googlemessage2.upb_jit_byval \
                benchmarks/b.parsetoproto2_googlemessage1.upb_jit \
                benchmarks/b.parsetoproto2_googlemessage2.upb_jit
 endif
@@ -318,21 +319,21 @@ benchmarks/google_messages.pb.cc: benchmarks/google_messages.proto
 # want to make these command-line parameters -- it makes it more annoying to
 # debug or profile them.
 
-benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \
-benchmarks/b.parsetostruct_googlemessage2.upb_table_byval: \
+benchmarks/b.parsetostruct_googlemessage1.upb_table \
+benchmarks/b.parsetostruct_googlemessage2.upb_table: \
     benchmarks/parsetostruct.upb.c $(LIBUPB) benchmarks/google_messages.proto.pb
-	$(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage1, byval, nojit)'
-	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage1.upb_table_byval $< \
+	$(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage1, nojit)'
+	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage1.upb_table $< \
 	  -DMESSAGE_NAME=\"benchmarks.SpeedMessage1\" \
 	  -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \
 	  -DMESSAGE_FILE=\"google_message1.dat\" \
-	  -DBYREF=false -DJIT=false $(LIBUPB)
-	$(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage2, byref, nojit)'
-	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage2.upb_table_byval $< \
+	  -DJIT=false $(LIBUPB)
+	$(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage2, nojit)'
+	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage2.upb_table $< \
 	  -DMESSAGE_NAME=\"benchmarks.SpeedMessage2\" \
 	  -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \
 	  -DMESSAGE_FILE=\"google_message2.dat\" \
-	  -DBYREF=false -DJIT=false $(LIBUPB)
+	  -DJIT=false $(LIBUPB)
 
 benchmarks/b.parsestream_googlemessage1.upb_table \
 benchmarks/b.parsestream_googlemessage2.upb_table: \
@@ -351,21 +352,21 @@ benchmarks/b.parsestream_googlemessage2.upb_table: \
 	  $(LIBUPB)
 
 ifdef USE_JIT
-benchmarks/b.parsetostruct_googlemessage1.upb_jit_byval \
-benchmarks/b.parsetostruct_googlemessage2.upb_jit_byval: \
+benchmarks/b.parsetostruct_googlemessage1.upb_jit \
+benchmarks/b.parsetostruct_googlemessage2.upb_jit: \
     benchmarks/parsetostruct.upb.c $(LIBUPB) benchmarks/google_messages.proto.pb
-	$(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage1, byref, jit)'
-	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage1.upb_jit_byval $< \
+	$(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage1, jit)'
+	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage1.upb_jit $< \
 	  -DMESSAGE_NAME=\"benchmarks.SpeedMessage1\" \
 	  -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \
 	  -DMESSAGE_FILE=\"google_message1.dat\" -DJIT=true \
-	  -DBYREF=true -DJIT=true $(LIBUPB)
-	$(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage2, byval, jit)'
-	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage2.upb_jit_byval $< \
+	  -DJIT=true $(LIBUPB)
+	$(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage2, jit)'
+	$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage2.upb_jit $< \
 	  -DMESSAGE_NAME=\"benchmarks.SpeedMessage2\" \
 	  -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \
 	  -DMESSAGE_FILE=\"google_message2.dat\" -DJIT=true \
-	  -DBYREF=false -DJIT=true $(LIBUPB)
+	  -DJIT=true $(LIBUPB)
 
 benchmarks/b.parsestream_googlemessage1.upb_jit \
 benchmarks/b.parsestream_googlemessage2.upb_jit: \
diff --git a/benchmarks/google_messages.proto b/benchmarks/google_messages.proto
index b43e94b..b367954 100644
--- a/benchmarks/google_messages.proto
+++ b/benchmarks/google_messages.proto
@@ -3,6 +3,11 @@ package benchmarks;
 
 option optimize_for = SPEED;
 
+enum Foo {
+  FOO_VALUE = 1;
+  FOO_VALUE2 = 2;
+}
+
 message SpeedMessage1 {
   required string field1 = 1;
   optional string field9 = 9;
@@ -45,6 +50,7 @@ message SpeedMessage1 {
   optional int32 field128 = 128 [default=0];
   optional string field129 = 129 [default="xxxxxxxxxxxxxxxxxxxxx"];
   optional int32 field131 = 131 [default=0];
+  optional Foo field132 = 132 [default=FOO_VALUE];
 }
 
 message SpeedMessage1SubMessage {
diff --git a/benchmarks/parsestream.upb.c b/benchmarks/parsestream.upb.c
index 0316a86..e9164d0 100644
--- a/benchmarks/parsestream.upb.c
+++ b/benchmarks/parsestream.upb.c
@@ -39,7 +39,7 @@ static bool initialize()
     return false;
   }
 
-  def = upb_dyncast_msgdef_const(upb_symtab_lookup(s, MESSAGE_NAME));
+  def = upb_dyncast_msgdef_const(upb_symtab_lookup(s, MESSAGE_NAME, &def));
   if(!def) {
     fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
     return false;
@@ -68,7 +68,7 @@ static bool initialize()
 static void cleanup()
 {
   free(input_str);
-  upb_def_unref(UPB_UPCAST(def));
+  upb_def_unref(UPB_UPCAST(def), &def);
   upb_decoder_uninit(&decoder);
   upb_decoderplan_unref(plan);
   upb_stringsrc_uninit(&stringsrc);
diff --git a/benchmarks/parsetoproto2.upb.cc b/benchmarks/parsetoproto2.upb.cc
index 988faad..5023b0e 100644
--- a/benchmarks/parsetoproto2.upb.cc
+++ b/benchmarks/parsetoproto2.upb.cc
@@ -1,320 +1,61 @@
-// This file is a crime against software engineering.  It breaks the
-// encapsulation of proto2 in numerous ways, violates the C++ standard
-// in others, and generally deserves to have comtempt and scorn heaped
-// upon it.
-//
-// Its purpose is to get an accurate benchmark for how fast upb can
-// parse into proto2 data structures.  To add proper support for this
-// functionality, proto2 would need to expose actual support for the
-// operations we are trying to perform here.
+// Tests speed of upb parsing into proto2 generated classes.
 
 #define __STDC_LIMIT_MACROS 1
 #include "main.c"
 
 #include <stdint.h>
-#include "upb/bytestream.h"
-#include "upb/def.h"
-#include "upb/msg.h"
-#include "upb/pb/decoder.h"
+#include "upb/bytestream.hpp"
+#include "upb/def.hpp"
+#include "upb/msg.hpp"
+#include "upb/pb/decoder.hpp"
 #include "upb/pb/glue.h"
-
-// Need to violate the encapsulation of GeneratedMessageReflection -- see below.
-#define private public
+#include "upb/proto2_bridge.hpp"
 #include MESSAGE_HFILE
-#include <google/protobuf/descriptor.h>
-#undef private
 
-static size_t len;
+const char *str;
+size_t len;
 MESSAGE_CIDENT msg[NUM_MESSAGES];
 MESSAGE_CIDENT msg2;
-static upb_stringsrc strsrc;
-static upb_decoder d;
-static const upb_msgdef *def;
-static upb_decoderplan *p;
-char *str;
-
-#define PROTO2_APPEND(type, ctype) \
-  upb_flow_t proto2_append_ ## type(void *_r, upb_value fval, upb_value val) { \
-    (void)fval; \
-    typedef google::protobuf::RepeatedField<ctype> R; \
-    R *r = (R*)_r; \
-    r->Add(upb_value_get ## type(val)); \
-    return UPB_CONTINUE; \
-  }
-
-PROTO2_APPEND(double, double)
-PROTO2_APPEND(float, float)
-PROTO2_APPEND(uint64, uint64_t)
-PROTO2_APPEND(int64, int64_t)
-PROTO2_APPEND(int32, int32_t)
-PROTO2_APPEND(uint32, uint32_t)
-PROTO2_APPEND(bool, bool)
-
-upb_flow_t proto2_setstr(void *m, upb_value fval, upb_value val) {
-  assert(m != NULL);
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  std::string **str = (std::string**)UPB_INDEX(m, f->offset, 1);
-  if (*str == f->default_ptr) *str = new std::string;
-  const upb_byteregion *reg = upb_value_getbyteregion(val);
-  size_t len;
-  (*str)->assign(
-      upb_byteregion_getptr(reg, upb_byteregion_startofs(reg), &len),
-      upb_byteregion_len(reg));
-  // XXX: only supports contiguous strings atm.
-  assert(len == upb_byteregion_len(reg));
-  return UPB_CONTINUE;
-}
-
-upb_flow_t proto2_append_str(void *_r, upb_value fval, upb_value val) {
-  assert(_r != NULL);
-  typedef google::protobuf::RepeatedPtrField<std::string> R;
-  (void)fval;
-  R *r = (R*)_r;
-  const upb_byteregion *reg = upb_value_getbyteregion(val);
-  size_t len;
-  r->Add()->assign(
-      upb_byteregion_getptr(reg, upb_byteregion_startofs(reg), &len),
-      upb_byteregion_len(reg));
-  // XXX: only supports contiguous strings atm.
-  assert(len == upb_byteregion_len(reg));
-  return UPB_CONTINUE;
-}
-
-upb_sflow_t proto2_startseq(void *m, upb_value fval) {
-  assert(m != NULL);
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  return UPB_CONTINUE_WITH(UPB_INDEX(m, f->offset, 1));
-}
-
-upb_sflow_t proto2_startsubmsg(void *m, upb_value fval) {
-  assert(m != NULL);
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  google::protobuf::Message *prototype = (google::protobuf::Message*)f->prototype;
-  void **subm = (void**)UPB_INDEX(m, f->offset, 1);
-  if (*subm == NULL || *subm == f->default_ptr)
-    *subm = prototype->New();
-  assert(*subm != NULL);
-  return UPB_CONTINUE_WITH(*subm);
-}
-
-class UpbRepeatedPtrField : public google::protobuf::internal::RepeatedPtrFieldBase {
- public:
-  class TypeHandler {
-   public:
-    typedef void Type;
-    // AddAllocated() calls this, but only if other objects are sitting
-    // around waiting for reuse, which we will not do.
-    static void Delete(Type*) { assert(false); }
-  };
-  void *Add(google::protobuf::Message *m) {
-    void *submsg = RepeatedPtrFieldBase::AddFromCleared<TypeHandler>();
-    if (!submsg) {
-      submsg = m->New();
-      RepeatedPtrFieldBase::AddAllocated<TypeHandler>(submsg);
-    }
-    return submsg;
-  }
-};
-
-upb_sflow_t proto2_startsubmsg_r(void *_r, upb_value fval) {
-  assert(_r != NULL);
-  // Compared to the other writers, this implementation is particularly sketchy.
-  // The object we are modifying is a RepeatedPtrField<SubType>*, but we can't
-  // properly declare that templated pointer because we don't have access to
-  // that type at compile-time (and wouldn't want to create a separate callback
-  // for each type anyway).  Instead we access the pointer as a
-  // RepeatedPtrFieldBase, which is indeed a superclass of RepeatedPtrField.
-  // But we can't properly declare a TypeHandler for the submessage's type,
-  // for the same reason that we can't create a RepeatedPtrField<SubType>*.
-  // Instead we treat it as a void*, and create the submessage using
-  // google::protobuf::Message::New() if we need to.
-  class TypeHandler {
-   public:
-    typedef void Type;
-  };
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  UpbRepeatedPtrField *r = (UpbRepeatedPtrField*)_r;
-  void *submsg = r->Add((google::protobuf::Message*)f->prototype);
-  assert(submsg != NULL);
-  return UPB_CONTINUE_WITH(submsg);
-}
-
-#define PROTO2MSG(type, size) { static upb_accessor_vtbl vtbl = { \
-    &proto2_startsubmsg, \
-    &upb_stdmsg_set ## type, \
-    &proto2_startseq, \
-    &proto2_startsubmsg_r, \
-    &proto2_append_ ## type, \
-    NULL, NULL, NULL, NULL, NULL, NULL}; \
-  return &vtbl; }
-
-static upb_accessor_vtbl *proto2_accessor(upb_fielddef *f) {
-  switch (f->type) {
-    case UPB_TYPE(DOUBLE): PROTO2MSG(double, 8)
-    case UPB_TYPE(FLOAT): PROTO2MSG(float, 4)
-    case UPB_TYPE(UINT64):
-    case UPB_TYPE(FIXED64): PROTO2MSG(uint64, 8)
-    case UPB_TYPE(INT64):
-    case UPB_TYPE(SFIXED64):
-    case UPB_TYPE(SINT64): PROTO2MSG(int64, 8)
-    case UPB_TYPE(INT32):
-    case UPB_TYPE(SINT32):
-    case UPB_TYPE(ENUM):
-    case UPB_TYPE(SFIXED32): PROTO2MSG(int32, 4)
-    case UPB_TYPE(UINT32):
-    case UPB_TYPE(FIXED32): PROTO2MSG(uint32, 4)
-    case UPB_TYPE(BOOL): PROTO2MSG(bool, 1)
-    case UPB_TYPE(STRING):
-    case UPB_TYPE(BYTES):
-    case UPB_TYPE(GROUP):
-    case UPB_TYPE(MESSAGE): {
-        static upb_accessor_vtbl vtbl = {
-        &proto2_startsubmsg,
-        &proto2_setstr,
-        &proto2_startseq,
-        &proto2_startsubmsg_r,
-        &proto2_append_str,
-        NULL, NULL, NULL, NULL, NULL, NULL};
-        return &vtbl;
-    }
-  }
-  return NULL;
-}
-
-static void layout_msgdef_from_proto2(upb_msgdef *upb_md,
-                                      const google::protobuf::Message *m,
-                                      const google::protobuf::Descriptor *proto2_d) {
-  // Hack: we break the encapsulation of GeneratedMessageReflection to get at
-  // the offsets we need.  If/when we do this for real, we will need
-  // GeneratedMessageReflection to expose those offsets publicly.
-  const google::protobuf::internal::GeneratedMessageReflection *r =
-      (google::protobuf::internal::GeneratedMessageReflection*)m->GetReflection();
-  for (int i = 0; i < proto2_d->field_count(); i++) {
-    const google::protobuf::FieldDescriptor *proto2_f = proto2_d->field(i);
-    upb_fielddef *upb_f = upb_msgdef_itof(upb_md, proto2_f->number());
-    assert(upb_f);
-
-    // Encapsulation violation BEGIN
-    uint32_t data_offset = r->offsets_[proto2_f->index()];
-    uint32_t hasbit = (r->has_bits_offset_ * 8) + proto2_f->index();
-    // Encapsulation violation END
-
-    if (upb_isseq(upb_f)) {
-      // proto2 does not store hasbits for repeated fields.
-      upb_f->hasbit = -1;
-    } else {
-      upb_f->hasbit = hasbit;
-    }
-    upb_f->offset = data_offset;
-    upb_fielddef_setaccessor(upb_f, proto2_accessor(upb_f));
-
-    if (upb_isstring(upb_f) && !upb_isseq(upb_f)) {
-      upb_f->default_ptr = &r->GetStringReference(*m, proto2_f, NULL);
-    } else if (upb_issubmsg(upb_f)) {
-      // XXX: skip leading "."
-      const google::protobuf::Descriptor *subm_descriptor =
-          google::protobuf::DescriptorPool::generated_pool()->
-              FindMessageTypeByName(upb_fielddef_typename(upb_f) + 1);
-      assert(subm_descriptor);
-      upb_f->prototype = google::protobuf::MessageFactory::generated_factory()->GetPrototype(subm_descriptor);
-      if (!upb_isseq(upb_f))
-        upb_f->default_ptr = &r->GetMessage(*m, proto2_f);
-    }
-  }
-}
+upb::StringSource strsrc;
+upb::Decoder d;
+const upb::MessageDef *def;
+upb::DecoderPlan* plan;
 
 static bool initialize()
 {
-  // Initialize upb state, decode descriptor.
-  upb_status status = UPB_STATUS_INIT;
-  upb_symtab *s = upb_symtab_new();
-
-  char *data = upb_readfile(MESSAGE_DESCRIPTOR_FILE, &len);
-  if (!data) {
-    fprintf(stderr, "Couldn't read file: " MESSAGE_DESCRIPTOR_FILE);
-    return false;
-  }
-  int n;
-  upb_def **defs = upb_load_defs_from_descriptor(data, len, &n, &status);
-  free(data);
-  if(!upb_ok(&status)) {
-    fprintf(stderr, "Error reading descriptor: %s\n",
-            upb_status_getstr(&status));
-    return false;
-  }
-
-  // Setup offsets and accessors to properly write into a proto2 generated
-  // class.
-  for (int i = 0; i < n; i++) {
-    upb_def *def = defs[i];
-    upb_msgdef *upb_md = upb_dyncast_msgdef(def);
-    if (!upb_md) continue;
-    const google::protobuf::Descriptor *proto2_md =
-        google::protobuf::DescriptorPool::generated_pool()->
-            FindMessageTypeByName(upb_def_fqname(def));
-    if (!proto2_md) abort();
-    const google::protobuf::Message *proto2_m =
-        google::protobuf::MessageFactory::generated_factory()->GetPrototype(proto2_md);
-    layout_msgdef_from_proto2(upb_md, proto2_m, proto2_md);
-  }
-
-  upb_symtab_add(s, defs, n, &status);
-  if(!upb_ok(&status)) {
-    fprintf(stderr, "Error reading adding to symtab: %s\n",
-            upb_status_getstr(&status));
-    return false;
-  }
-  for(int i = 0; i < n; i++) upb_def_unref(defs[i]);
-  free(defs);
-
-  def = upb_dyncast_msgdef_const(upb_symtab_lookup(s, MESSAGE_NAME));
-  if(!def) {
-    fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
-    return false;
-  }
-  upb_symtab_unref(s);
-
   // Read the message data itself.
   str = upb_readfile(MESSAGE_FILE, &len);
   if(str == NULL) {
     fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
     return false;
   }
-  upb_status_uninit(&status);
+
+  def = upb::proto2_bridge::NewFinalMessageDef(msg2, &def);
 
   msg2.ParseFromArray(str, len);
 
-  upb_stringsrc_init(&strsrc);
-  upb_handlers *h = upb_handlers_new();
-  upb_accessors_reghandlers(h, def);
-  p = upb_decoderplan_new(h, JIT);
-  upb_decoder_init(&d);
-  upb_decoder_resetplan(&d, p, 0);
-  upb_handlers_unref(h);
+  upb::Handlers* h = upb::Handlers::New();
+  upb::RegisterWriteHandlers(h, def);
+  plan = upb::DecoderPlan::New(h, JIT);
+  d.ResetPlan(plan, 0);
+  h->Unref();
 
   return true;
 }
 
 static void cleanup() {
-  upb_stringsrc_uninit(&strsrc);
-  upb_decoder_uninit(&d);
-  upb_def_unref(UPB_UPCAST(def));
-  upb_decoderplan_unref(p);
-  free(str);
+  def->Unref(&def);
+  plan->Unref();
 }
 
-static size_t run(int i)
-{
-  (void)i;
-  upb_status status = UPB_STATUS_INIT;
+static size_t run(int i) {
   msg[i % NUM_MESSAGES].Clear();
-  upb_stringsrc_reset(&strsrc, str, len);
-  upb_decoder_resetinput(
-      &d, upb_stringsrc_allbytes(&strsrc), &msg[i % NUM_MESSAGES]);
-  if (upb_decoder_decode(&d) != UPB_OK) goto err;
+  strsrc.Reset(str, len);
+  d.ResetInput(strsrc.AllBytes(), &msg[i % NUM_MESSAGES]);
+  if (d.Decode() != UPB_OK) goto err;
   return len;
 
 err:
-  fprintf(stderr, "Decode error: %s", upb_status_getstr(&status));
+  fprintf(stderr, "Decode error: %s", d.status().GetString());
   return 0;
 }
diff --git a/benchmarks/parsetostruct.upb.c b/benchmarks/parsetostruct.upb.c
deleted file mode 100644
index 9487577..0000000
--- a/benchmarks/parsetostruct.upb.c
+++ /dev/null
@@ -1,85 +0,0 @@
-
-#include "main.c"
-
-#include "upb/bytestream.h"
-#include "upb/def.h"
-#include "upb/msg.h"
-#include "upb/pb/decoder.h"
-#include "upb/pb/glue.h"
-
-static const upb_msgdef *def;
-static size_t len;
-static void *msg[NUM_MESSAGES];
-static upb_stringsrc strsrc;
-static upb_decoder d;
-static upb_decoderplan *p;
-char *str;
-
-static bool initialize()
-{
-  // Initialize upb state, decode descriptor.
-  upb_status status = UPB_STATUS_INIT;
-  upb_symtab *s = upb_symtab_new();
-  upb_load_descriptor_file_into_symtab(s, MESSAGE_DESCRIPTOR_FILE, &status);
-  if(!upb_ok(&status)) {
-    fprintf(stderr, "Error reading descriptor: %s\n",
-            upb_status_getstr(&status));
-    return false;
-  }
-
-  def = upb_dyncast_msgdef_const(upb_symtab_lookup(s, MESSAGE_NAME));
-  if(!def) {
-    fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
-    return false;
-  }
-  upb_symtab_unref(s);
-
-  // Read the message data itself.
-  str = upb_readfile(MESSAGE_FILE, &len);
-  if(str == NULL) {
-    fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
-    return false;
-  }
-  upb_status_uninit(&status);
-  for (int i = 0; i < NUM_MESSAGES; i++)
-    msg[i] = upb_stdmsg_new(def);
-
-  upb_stringsrc_init(&strsrc);
-  upb_handlers *h = upb_handlers_new();
-  upb_accessors_reghandlers(h, def);
-  p = upb_decoderplan_new(h, JIT);
-  upb_decoder_init(&d);
-  upb_handlers_unref(h);
-  upb_decoder_resetplan(&d, p, 0);
-
-  if (!BYREF) {
-    // TODO: use byref/byval accessors.
-  }
-  return true;
-}
-
-static void cleanup()
-{
-  for (int i = 0; i < NUM_MESSAGES; i++)
-    upb_stdmsg_free(msg[i], def);
-  upb_def_unref(UPB_UPCAST(def));
-  upb_stringsrc_uninit(&strsrc);
-  upb_decoder_uninit(&d);
-  upb_decoderplan_unref(p);
-  free(str);
-}
-
-static size_t run(int i)
-{
-  upb_status status = UPB_STATUS_INIT;
-  i %= NUM_MESSAGES;
-  upb_msg_clear(msg[i], def);
-  upb_stringsrc_reset(&strsrc, str, len);
-  upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), msg[i]);
-  if (upb_decoder_decode(&d) != UPB_OK) goto err;
-  return len;
-
-err:
-  fprintf(stderr, "Decode error: %s", upb_status_getstr(&status));
-  return 0;
-}
diff --git a/bindings/cpp/upb/bytestream.hpp b/bindings/cpp/upb/bytestream.hpp
index 968d542..81134b9 100644
--- a/bindings/cpp/upb/bytestream.hpp
+++ b/bindings/cpp/upb/bytestream.hpp
@@ -68,6 +68,7 @@
 
 #include "upb/bytestream.h"
 #include "upb/upb.hpp"
+#include <string>
 
 namespace upb {
 
@@ -204,6 +205,18 @@ class ByteRegion : public upb_byteregion {
     return upb_byteregion_strdup(this);
   }
 
+  template <typename T> void AssignToString(T* str) {
+    uint64_t ofs = start_ofs();
+    str->clear();
+    str->reserve(Length());
+    while (ofs < end_ofs()) {
+      size_t len;
+      const char *ptr = GetPtr(ofs, &len);
+      str->append(ptr, len);
+      ofs += len;
+    }
+  }
+
   // TODO: add if/when there is a demonstrated need.
   //
   // // Pins this byteregion's bytes in memory, allowing it to outlive its
@@ -220,12 +233,24 @@ class ByteRegion : public upb_byteregion {
 class StringSource : public upb_stringsrc {
  public:
   StringSource() : upb_stringsrc() { upb_stringsrc_init(this); }
+  template <typename T> explicit StringSource(const T& str) {
+    upb_stringsrc_init(this);
+    Reset(str);
+  }
+  StringSource(const char *data, size_t len) {
+    upb_stringsrc_init(this);
+    Reset(data, len);
+  }
   ~StringSource() { upb_stringsrc_uninit(this); }
 
   void Reset(const char* data, size_t len) {
     upb_stringsrc_reset(this, data, len);
   }
 
+  template <typename T> void Reset(const T& str) {
+    Reset(str.c_str(), str.size());
+  }
+
   ByteRegion* AllBytes() {
     return static_cast<ByteRegion*>(upb_stringsrc_allbytes(this));
   }
@@ -233,6 +258,14 @@ class StringSource : public upb_stringsrc {
   upb_bytesrc* ByteSource() { return upb_stringsrc_bytesrc(this); }
 };
 
+template <> inline ByteRegion* GetValue<ByteRegion*>(Value v) {
+  return static_cast<ByteRegion*>(upb_value_getbyteregion(v));
+}
+
+template <> inline Value MakeValue<ByteRegion*>(ByteRegion* v) {
+  return upb_value_byteregion(v);
+}
+
 }  // namespace upb
 
 #endif
diff --git a/bindings/cpp/upb/def.hpp b/bindings/cpp/upb/def.hpp
index 030ba40..6998648 100644
--- a/bindings/cpp/upb/def.hpp
+++ b/bindings/cpp/upb/def.hpp
@@ -1,7 +1,7 @@
 //
 // upb - a minimalist implementation of protocol buffers.
 //
-// Copyright (c) 2011 Google Inc.  See LICENSE for details.
+// Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
 // Author: Josh Haberman <jhaberman@gmail.com>
 //
 // The set of upb::*Def classes and upb::SymbolTable allow for defining and
@@ -15,21 +15,20 @@
 //    not be used for any purpose except to set its properties (it can't be
 //    used to parse anything, create any messages in memory, etc).
 //
-// 2. FINALIZED: after being added to a symtab (which links the defs together)
-//    the defs become finalized (thread-safe and immutable).  Programs may only
-//    access defs through a CONST POINTER during this stage -- upb_symtab will
-//    help you out with this requirement by only vending const pointers, but
-//    you need to make sure not to use any non-const pointers you still have
-//    sitting around.  In practice this means that you may not call any setters
-//    on the defs (or functions that themselves call the setters).  If you want
-//    to modify an existing immutable def, copy it with upb_*_dup(), modify the
-//    copy, and add the modified def to the symtab (replacing the existing
-//    def).
+// 2. FINALIZED: the Def::Finzlie() operation finalizes a set of defs,
+//    which makes them thread-safe and immutable.  Finalized defs may only be
+//    accessed through a CONST POINTER.  If you want to modify an existing
+//    immutable def, copy it with Dup() and modify and finalize the copy.
 //
-// You can test for which stage of life a def is in by calling
-// upb::Def::IsMutable().  This is particularly useful for dynamic language
-// bindings, which must properly guarantee that the dynamic language cannot
-// break the rules laid out above.
+// The refcounting of defs works properly no matter what state the def is in.
+// Once the def is finalized it is guaranteed that any def reachable from a
+// live def is also live (so a ref on the base of a message tree keeps the
+// whole tree alive).
+//
+// You can test for which stage of life a def is in by calling IsMutable().
+// This is particularly useful for dynamic language bindings, which must
+// properly guarantee that the dynamic language cannot break the rules laid out
+// above.
 //
 // It would be possible to make the defs thread-safe during stage 1 by using
 // mutexes internally and changing any methods returning pointers to return
@@ -48,63 +47,213 @@
 
 namespace upb {
 
+class Def;
 class MessageDef;
 
+typedef upb_fieldtype_t FieldType;
+typedef upb_label_t Label;
+
 class FieldDef : public upb_fielddef {
  public:
-  static FieldDef* Cast(upb_fielddef *f) { return (FieldDef*)f; }
-  static const FieldDef* Cast(const upb_fielddef *f) { return (FieldDef*)f; }
+  static FieldDef* Cast(upb_fielddef *f) { return static_cast<FieldDef*>(f); }
+  static const FieldDef* Cast(const upb_fielddef *f) {
+    return static_cast<const FieldDef*>(f);
+  }
+
+  static FieldDef* New(void *owner) { return Cast(upb_fielddef_new(owner)); }
+  FieldDef* Dup(void *owner) const {
+    return Cast(upb_fielddef_dup(this, owner));
+  }
+  void Ref(void *owner) { upb_fielddef_ref(this, owner); }
+  void Unref(void *owner) { upb_fielddef_unref(this, owner); }
 
-  static FieldDef* New() { return Cast(upb_fielddef_new()); }
-  FieldDef* Dup() { return Cast(upb_fielddef_dup(this)); }
+  bool IsMutable() const { return upb_fielddef_ismutable(this); }
+  bool IsFinalized() const { return upb_fielddef_isfinalized(this); }
+  bool IsString() const { return upb_isstring(this); }
+  bool IsSequence() const { return upb_isseq(this); }
+  bool IsSubmessage() const { return upb_issubmsg(this); }
 
-  // Read accessors -- may be called at any time.
-  uint8_t type() const { return upb_fielddef_type(this); }
-  uint8_t label() const { return upb_fielddef_label(this); }
+  // Simple accessors. /////////////////////////////////////////////////////////
+
+  FieldType type() const { return upb_fielddef_type(this); }
+  Label label() const { return upb_fielddef_label(this); }
   int32_t number() const { return upb_fielddef_number(this); }
   std::string name() const { return std::string(upb_fielddef_name(this)); }
   Value default_() const { return upb_fielddef_default(this); }
   Value bound_value() const { return upb_fielddef_fval(this); }
+  uint16_t offset() const { return upb_fielddef_offset(this); }
+  int16_t hasbit() const { return upb_fielddef_hasbit(this); }
+
+  bool set_type(FieldType type) { return upb_fielddef_settype(this, type); }
+  bool set_label(Label label) { return upb_fielddef_setlabel(this, label); }
+  void set_offset(uint16_t offset) { upb_fielddef_setoffset(this, offset); }
+  void set_hasbit(int16_t hasbit) { upb_fielddef_sethasbit(this, hasbit); }
+  void set_fval(Value fval) { upb_fielddef_setfval(this, fval); }
+  void set_accessor(struct _upb_accessor_vtbl* vtbl) {
+    upb_fielddef_setaccessor(this, vtbl);
+  }
+  MessageDef* message();
+  const MessageDef* message() const;
 
-  MessageDef* message() { return (MessageDef*)upb_fielddef_msgdef(this); }
-  const MessageDef* message() const { return (MessageDef*)upb_fielddef_msgdef(this); }
-
-  // Will be added once upb::Def is defined:
-  // Def* subdef() { return upb_fielddef_subdef(this); }
-  // const Def* subdef() { return upb_fielddef_subdef(this); }
-
-  // Returns true if this FieldDef is finalized
-  bool IsFinalized() const { return upb_fielddef_finalized(this); }
   struct _upb_accessor_vtbl *accessor() const {
     return upb_fielddef_accessor(this);
   }
-  std::string type_name() const {
-    return std::string(upb_fielddef_typename(this));
+
+  // "Number" and "name" must be set before the fielddef is added to a msgdef.
+  // For the moment we do not allow these to be set once the fielddef is added
+  // to a msgdef -- this could be relaxed in the future.
+  bool set_number(int32_t number) {
+    return upb_fielddef_setnumber(this, number);
+  }
+  bool set_name(const char *name) { return upb_fielddef_setname(this, name); }
+  bool set_name(const std::string& name) { return set_name(name.c_str()); }
+
+  // Default value. ////////////////////////////////////////////////////////////
+
+  // Returns the default value for this fielddef, which may either be something
+  // the client set explicitly or the "default default" (0 for numbers, empty
+  // for strings).  The field's type indicates the type of the returned value,
+  // except for enum fields that are still mutable.
+  //
+  // For enums the default can be set either numerically or symbolically -- the
+  // upb_fielddef_default_is_symbolic() function below will indicate which it
+  // is.  For string defaults, the value will be a upb_byteregion which is
+  // invalidated by any other non-const call on this object.  Once the fielddef
+  // is finalized, symbolic enum defaults are resolved, so finalized enum
+  // fielddefs always have a default of type int32.
+  Value defaultval() { return upb_fielddef_default(this); }
+
+  // Sets default value for the field.  For numeric types, use
+  // upb_fielddef_setdefault(), and "value" must match the type of the field.
+  // For string/bytes types, use upb_fielddef_setdefaultstr().  Enum types may
+  // use either, since the default may be set either numerically or
+  // symbolically.
+  //
+  // NOTE: May only be called for fields whose type has already been set.
+  // Also, will be reset to default if the field's type is set again.
+  void set_default(Value value) { upb_fielddef_setdefault(this, value); }
+  void set_default(const char *str) { upb_fielddef_setdefaultcstr(this, str); }
+  void set_default(const char *str, size_t len) {
+    upb_fielddef_setdefaultstr(this, str, len);
+  }
+  void set_default(const std::string& str) {
+    upb_fielddef_setdefaultstr(this, str.c_str(), str.size());
+  }
+
+  // The results of this function are only meaningful for mutable enum fields,
+  // which can have a default specified either as an integer or as a string.
+  // If this returns true, the default returned from upb_fielddef_default() is
+  // a string, otherwise it is an integer.
+  bool DefaultIsSymbolic() { return upb_fielddef_default_is_symbolic(this); }
+
+  // Subdef. ///////////////////////////////////////////////////////////////////
+
+  // Submessage and enum fields must reference a "subdef", which is the
+  // MessageDef or EnumDef that defines their type.  Note that when the
+  // FieldDef is mutable it may not have a subdef *yet*, but this still returns
+  // true to indicate that the field's type requires a subdef.
+  bool HasSubDef() { return upb_hassubdef(this); }
+
+  // Before a FieldDef is finalized, its subdef may be set either directly
+  // (with a Def*) or symbolically.  Symbolic refs must be resolved by the
+  // client before the containing msgdef can be finalized.
+  //
+  // Both methods require that HasSubDef() (so the type must be set prior to
+  // calling these methods).  Returns false if this is not the case, or if the
+  // given subdef is not of the correct type.  The subtype is reset if the
+  // field's type is changed.
+  bool set_subdef(Def* def);
+  bool set_subtype_name(const char *name) {
+    return upb_fielddef_setsubtypename(this, name);
+  }
+  bool set_subtype_name(const std::string& str) {
+    return set_subtype_name(str.c_str());
   }
 
-  // Write accessors -- may not be called once the FieldDef is finalized.
+  // Returns the enum or submessage def or symbolic name for this field, if
+  // any.  May only be called for fields where HasSubDef() is true.  Returns
+  // NULL if the subdef has not been set or if you ask for a subtype name when
+  // the subtype is currently set symbolically (or vice-versa).
+  //
+  // Caller does *not* own a ref on the returned def or string.
+  // subtypename_name() is non-const because only mutable defs can have the
+  // subtype name set symbolically (symbolic references must be resolved before
+  // the MessageDef can be finalized).
+  const Def* subdef() const;
+  const char *subtype_name() { return upb_fielddef_subtypename(this); }
 
  private:
-  FieldDef();
-  ~FieldDef();
+  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(FieldDef);
+};
+
+class Def : public upb_def {
+ public:
+  // Converting from C types to C++ wrapper types.
+  static Def* Cast(upb_def *def) { return static_cast<Def*>(def); }
+  static const Def* Cast(const upb_def *def) {
+    return static_cast<const Def*>(def);
+  }
+
+  void Ref(void *owner) const { upb_def_ref(this, owner); }
+  void Unref(void *owner) const { upb_def_unref(this, owner); }
+
+  void set_full_name(const char *name) { upb_def_setfullname(this, name); }
+  void set_full_name(const std::string& name) {
+    upb_def_setfullname(this, name.c_str());
+  }
+
+  const char *full_name() const { return upb_def_fullname(this); }
+
+  // Finalizes the given list of defs (as well as the fielddefs for the given
+  // msgdefs).  All defs reachable from any def in this list must either be
+  // already finalized or elsewhere in the list.  Any symbolic references to
+  // enums or submessages must already have been resolved.  Returns true on
+  // success, otherwise false is returned and status contains details.  In the
+  // error case the input defs are unmodified.  See the comment at the top of
+  // this file for the semantics of finalized defs.
+  //
+  // n is currently limited to 64k defs, if more are required break them into
+  // batches of 64k (or we could raise this limit, at the cost of a bigger
+  // upb_def structure or complexity in upb_def_finalize()).
+  static bool Finalize(Def*const* defs, int n, Status* status) {
+    return upb_finalize(reinterpret_cast<upb_def*const*>(defs), n, status);
+  }
+  static bool Finalize(const std::vector<Def*>& defs, Status* status) {
+    return Finalize(&defs[0], defs.size(), status);
+  }
 };
 
 class MessageDef : public upb_msgdef {
  public:
   // Converting from C types to C++ wrapper types.
-  static MessageDef* Cast(upb_msgdef *md) { return (MessageDef*)md; }
+  static MessageDef* Cast(upb_msgdef *md) {
+    return static_cast<MessageDef*>(md);
+  }
   static const MessageDef* Cast(const upb_msgdef *md) {
-    return (MessageDef*)md;
+    return static_cast<const MessageDef*>(md);
+  }
+  static MessageDef* DynamicCast(Def* def) {
+    return Cast(upb_dyncast_msgdef(def));
+  }
+  static const MessageDef* DynamicCast(const Def* def) {
+    return Cast(upb_dyncast_msgdef_const(def));
   }
 
-  static MessageDef* New() { return Cast(upb_msgdef_new()); }
-  MessageDef* Dup() { return Cast(upb_msgdef_dup(this)); }
+  Def* AsDef() { return Def::Cast(UPB_UPCAST(this)); }
+  const Def* AsDef() const { return Def::Cast(UPB_UPCAST(this)); }
+
+  static MessageDef* New(void *owner) { return Cast(upb_msgdef_new(owner)); }
+  MessageDef* Dup(void *owner) const {
+    return Cast(upb_msgdef_dup(this, owner));
+  }
 
-  void Ref() const { upb_msgdef_ref(this); }
-  void Unref() const { upb_msgdef_unref(this); }
+  void Ref(void *owner) const { upb_msgdef_ref(this, owner); }
+  void Unref(void *owner) const { upb_msgdef_unref(this, owner); }
 
   // Read accessors -- may be called at any time.
 
+  const char *full_name() const { return AsDef()->full_name(); }
+
   // The total size of in-memory messages created with this MessageDef.
   uint16_t instance_size() const { return upb_msgdef_size(this); }
 
@@ -116,25 +265,32 @@ class MessageDef : public upb_msgdef {
 
   // Write accessors.  May only be called before the msgdef is in a symtab.
 
+  void set_full_name(const char *name) { AsDef()->set_full_name(name); }
+  void set_full_name(const std::string& name) { AsDef()->set_full_name(name); }
+
   void set_instance_size(uint16_t size) { upb_msgdef_setsize(this, size); }
   void set_hasbit_bytes(uint16_t size) { upb_msgdef_setsize(this, size); }
   bool SetExtensionRange(uint32_t start, uint32_t end) {
     return upb_msgdef_setextrange(this, start, end);
   }
 
-  // Adds a set of fields (upb_fielddef objects) to a msgdef.  Caller retains
-  // its ref on the fielddef.  May only be done before the msgdef is in a
-  // symtab (requires upb_def_ismutable(m) for the msgdef).  The fielddef's
-  // name and number must be set, and the message may not already contain any
-  // field with this name or number, and this fielddef may not be part of
-  // another message, otherwise false is returned and no action is performed.
-  bool AddFields(FieldDef*const * f, int n) {
-    return upb_msgdef_addfields(this, (upb_fielddef**)f, n);
+  // Adds a set of fields (FieldDef objects) to a MessageDef.  Caller passes a
+  // ref on the FieldDef to the MessageDef in both success and failure cases.
+  // May only be done before the MessageDef is in a SymbolTable (requires
+  // m->IsMutable() for the MessageDef).  The FieldDef's name and number must
+  // be set, and the message may not already contain any field with this name
+  // or number, and this FieldDef may not be part of another message, otherwise
+  // false is returned and the MessageDef is unchanged.
+  bool AddField(FieldDef* f, void *owner) { return AddFields(&f, 1, owner); }
+  bool AddFields(FieldDef*const * f, int n, void *owner) {
+    return upb_msgdef_addfields(this, (upb_fielddef*const*)f, n, owner);
   }
-  bool AddFields(const std::vector<FieldDef*>& fields) {
-    return AddFields(&fields[0], fields.size());
+  bool AddFields(const std::vector<FieldDef*>& fields, void *owner) {
+    return AddFields(&fields[0], fields.size(), owner);
   }
 
+  int field_count() const { return upb_msgdef_numfields(this); }
+
   // Lookup fields by name or number, returning NULL if no such field exists.
   FieldDef* FindFieldByName(const char *name) {
     return FieldDef::Cast(upb_msgdef_ntof(this, name));
@@ -156,19 +312,89 @@ class MessageDef : public upb_msgdef {
     return FindFieldByNumber(num);
   }
 
-  // TODO: iteration over fields.
+  class Iterator : public upb_msg_iter {
+   public:
+    explicit Iterator(MessageDef* md) { upb_msg_begin(this, md); }
+    Iterator() {}
+
+    FieldDef* field() { return FieldDef::Cast(upb_msg_iter_field(this)); }
+    bool Done() { return upb_msg_done(this); }
+    void Next() { return upb_msg_next(this); }
+  };
+
+  class ConstIterator : public upb_msg_iter {
+   public:
+    explicit ConstIterator(const MessageDef* md) { upb_msg_begin(this, md); }
+    ConstIterator() {}
+
+    const FieldDef* field() { return FieldDef::Cast(upb_msg_iter_field(this)); }
+    bool Done() { return upb_msg_done(this); }
+    void Next() { return upb_msg_next(this); }
+  };
 
  private:
-  MessageDef();
-  ~MessageDef();
+  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(MessageDef);
+};
+
+class EnumDef : public upb_enumdef {
+ public:
+  // Converting from C types to C++ wrapper types.
+  static EnumDef* Cast(upb_enumdef *e) { return static_cast<EnumDef*>(e); }
+  static const EnumDef* Cast(const upb_enumdef *e) {
+    return static_cast<const EnumDef*>(e);
+  }
+
+  static EnumDef* New(void *owner) { return Cast(upb_enumdef_new(owner)); }
+
+  void Ref(void *owner) { upb_enumdef_ref(this, owner); }
+  void Unref(void *owner) { upb_enumdef_unref(this, owner); }
+  EnumDef* Dup(void *owner) const { return Cast(upb_enumdef_dup(this, owner)); }
+
+  Def* AsDef() { return Def::Cast(UPB_UPCAST(this)); }
+  const Def* AsDef() const { return Def::Cast(UPB_UPCAST(this)); }
+
+  int32_t default_value() const { return upb_enumdef_default(this); }
+
+  // May only be set if IsMutable().
+  void set_full_name(const char *name) { AsDef()->set_full_name(name); }
+  void set_full_name(const std::string& name) { AsDef()->set_full_name(name); }
+  void set_default_value(int32_t val) {
+    return upb_enumdef_setdefault(this, val);
+  }
+
+  // Adds a value to the enumdef.  Requires that no existing val has this
+  // name or number (returns false and does not add if there is).  May only
+  // be called if IsMutable().
+  bool AddValue(char *name, int32_t num) {
+    return upb_enumdef_addval(this, name, num);
+  }
+  bool AddValue(const std::string& name, int32_t num) {
+    return upb_enumdef_addval(this, name.c_str(), num);
+  }
+
+  // Lookups from name to integer and vice-versa.
+  bool LookupName(const char *name, int32_t* num) const {
+    return upb_enumdef_ntoi(this, name, num);
+  }
+
+  // Lookup from integer to name, returns a NULL-terminated string which
+  // the caller does not own, or NULL if not found.
+  const char *LookupNumber(int32_t num) const {
+    return upb_enumdef_iton(this, num);
+  }
+
+ private:
+  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(EnumDef);
 };
 
 class SymbolTable : public upb_symtab {
  public:
   // Converting from C types to C++ wrapper types.
-  static SymbolTable* Cast(upb_symtab *s) { return (SymbolTable*)s; }
+  static SymbolTable* Cast(upb_symtab *s) {
+    return static_cast<SymbolTable*>(s);
+  }
   static const SymbolTable* Cast(const upb_symtab *s) {
-    return (SymbolTable*)s;
+    return static_cast<const SymbolTable*>(s);
   }
 
   static SymbolTable* New() { return Cast(upb_symtab_new()); }
@@ -176,17 +402,50 @@ class SymbolTable : public upb_symtab {
   void Ref() const { upb_symtab_unref(this); }
   void Unref() const { upb_symtab_unref(this); }
 
+  // Adds the given defs to the symtab, resolving all symbols.  Only one def
+  // per name may be in the list, but defs can replace existing defs in the
+  // symtab.  The entire operation either succeeds or fails.  If the operation
+  // fails, the symtab is unchanged, false is returned, and status indicates
+  // the error.  The caller passes a ref on the defs in all cases.
+  bool Add(Def *const *defs, int n, void *owner, Status* status) {
+    return upb_symtab_add(this, (upb_def*const*)defs, n, owner, status);
+  }
+  bool Add(const std::vector<Def*>& defs, void *owner, Status* status) {
+    return Add(&defs[0], defs.size(), owner, status);
+  }
+
   // If the given name refers to a message in this symbol table, returns a new
   // ref to that MessageDef object, otherwise returns NULL.
-  const MessageDef* LookupMessage(const char *name) const {
-    return MessageDef::Cast(upb_symtab_lookupmsg(this, name));
+  const MessageDef* LookupMessage(const char *name, void *owner) const {
+    return MessageDef::Cast(upb_symtab_lookupmsg(this, name, owner));
   }
 
  private:
-  SymbolTable();
-  ~SymbolTable();
+  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(SymbolTable);
 };
 
+template <> inline const FieldDef* GetValue<const FieldDef*>(Value v) {
+  return static_cast<const FieldDef*>(upb_value_getfielddef(v));
+}
+
+template <> inline Value MakeValue<FieldDef*>(FieldDef* v) {
+  return upb_value_fielddef(v);
+}
+
+inline MessageDef* FieldDef::message() {
+  return MessageDef::Cast(upb_fielddef_msgdef(this));
+}
+inline const MessageDef* FieldDef::message() const {
+  return MessageDef::Cast(upb_fielddef_msgdef(this));
+}
+
+inline const Def* FieldDef::subdef() const {
+  return Def::Cast(upb_fielddef_subdef(this));
+}
+inline bool FieldDef::set_subdef(Def* def) {
+  return upb_fielddef_setsubdef(this, def);
+}
+
 }  // namespace upb
 
 #endif
diff --git a/bindings/cpp/upb/handlers.cc b/bindings/cpp/upb/handlers.cc
new file mode 100644
index 0000000..c96a74e
--- /dev/null
+++ b/bindings/cpp/upb/handlers.cc
@@ -0,0 +1,39 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+
+#include "handlers.hpp"
+
+#include "def.hpp"
+
+namespace upb {
+
+namespace {
+
+void MessageCallbackWrapper(
+    void* closure, upb_mhandlers* mh, const upb_msgdef* m) {
+  Handlers::MessageRegistrationVisitor* visitor =
+      static_cast<Handlers::MessageRegistrationVisitor*>(closure);
+  visitor->OnMessage(static_cast<MessageHandlers*>(mh),
+                     static_cast<const MessageDef*>(m));
+}
+
+void FieldCallbackWrapper(
+    void* closure, upb_fhandlers* fh, const upb_fielddef* f) {
+  Handlers::MessageRegistrationVisitor* visitor =
+      static_cast<Handlers::MessageRegistrationVisitor*>(closure);
+  visitor->OnField(static_cast<FieldHandlers*>(fh),
+                   static_cast<const FieldDef*>(f));
+}
+}  // namepace
+
+MessageHandlers* Handlers::RegisterMessageDef(
+    const MessageDef& m, Handlers::MessageRegistrationVisitor* visitor) {
+  upb_mhandlers* mh = upb_handlers_regmsgdef(
+      this, &m, &MessageCallbackWrapper, &FieldCallbackWrapper, &visitor);
+  return static_cast<MessageHandlers*>(mh);
+}
+
+}  // namespace upb
diff --git a/bindings/cpp/upb/handlers.hpp b/bindings/cpp/upb/handlers.hpp
index d356a33..a366c3d 100644
--- a/bindings/cpp/upb/handlers.hpp
+++ b/bindings/cpp/upb/handlers.hpp
@@ -15,11 +15,16 @@
 
 #include "upb/handlers.h"
 
+#include "upb/upb.hpp"
+
 namespace upb {
 
 typedef upb_fieldtype_t FieldType;
 typedef upb_flow_t Flow;
+typedef upb_sflow_t SubFlow;
 class MessageHandlers;
+class MessageDef;
+class FieldDef;
 
 class FieldHandlers : public upb_fhandlers {
  public:
@@ -68,12 +73,11 @@ class FieldHandlers : public upb_fhandlers {
   MessageHandlers* GetSubMessageHandlers() const;
   // If set to >=0, the given hasbit will be set after the value callback is
   // called (offset relative to the current closure).
-  int32_t GetValueHasbit() const { return upb_fhandlers_getvaluehasbit(this); }
-  void SetValueHasbit(int32_t bit) { upb_fhandlers_setvaluehasbit(this, bit); }
+  int32_t GetHasbit() const { return upb_fhandlers_gethasbit(this); }
+  void SetHasbit(int32_t bit) { upb_fhandlers_sethasbit(this, bit); }
 
  private:
-  FieldHandlers();  // Only created by upb::Handlers.
-  ~FieldHandlers(); // Only destroyed by refcounting.
+  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(FieldHandlers);
 };
 
 class MessageHandlers : public upb_mhandlers {
@@ -81,6 +85,13 @@ class MessageHandlers : public upb_mhandlers {
   typedef upb_startmsg_handler StartMessageHandler;
   typedef upb_endmsg_handler EndMessageHandler;
 
+  static MessageHandlers* Cast(upb_mhandlers* mh) {
+    return static_cast<MessageHandlers*>(mh);
+  }
+  static const MessageHandlers* Cast(const upb_mhandlers* mh) {
+    return static_cast<const MessageHandlers*>(mh);
+  }
+
   // The MessageHandlers will live at least as long as the upb::Handlers to
   // which it belongs, but can be Ref'd/Unref'd to make it live longer (which
   // will prolong the life of the underlying upb::Handlers also).
@@ -89,7 +100,7 @@ class MessageHandlers : public upb_mhandlers {
 
   // Functions to set this message's handlers.
   // These return "this" so they can be conveniently chained, eg.
-  //   handlers->NewMessage()
+  //   handlers->NewMessageHandlers()
   //       ->SetStartMessageHandler(&StartMessage)
   //       ->SetEndMessageHandler(&EndMessage);
   MessageHandlers* SetStartMessageHandler(StartMessageHandler* h) {
@@ -111,13 +122,13 @@ class MessageHandlers : public upb_mhandlers {
   FieldHandlers* NewFieldHandlersForSubmessage(uint32_t n, const char *name,
                                                FieldType type, bool repeated,
                                                MessageHandlers* subm) {
+    (void)name;
     return static_cast<FieldHandlers*>(
         upb_mhandlers_newfhandlers_subm(this, n, type, repeated, subm));
   }
 
  private:
-  MessageHandlers();  // Only created by upb::Handlers.
-  ~MessageHandlers(); // Only destroyed by refcounting.
+  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(MessageHandlers);
 };
 
 class Handlers : public upb_handlers {
@@ -134,17 +145,29 @@ class Handlers : public upb_handlers {
     return static_cast<MessageHandlers*>(upb_handlers_newmhandlers(this));
   }
 
+  // Convenience function for registering handlers for all messages and fields
+  // in a MessageDef and all its children.  For every registered message,
+  // OnMessage will be called on the visitor with newly-created MessageHandlers
+  // and MessageDef. Likewise with OnField will be called with newly-created
+  // FieldHandlers and FieldDef for each field.
+  class MessageRegistrationVisitor {
+   public:
+    virtual ~MessageRegistrationVisitor() {}
+    virtual void OnMessage(MessageHandlers* mh, const MessageDef* m) = 0;
+    virtual void OnField(FieldHandlers* fh, const FieldDef* f) = 0;
+  };
+  MessageHandlers* RegisterMessageDef(const MessageDef& m,
+                                      MessageRegistrationVisitor* visitor);
+
  private:
-  Handlers();  // Only created by Handlers::New().
-  ~Handlers(); // Only destroyed by refcounting.
+  UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(Handlers);
 };
 
-
-MessageHandlers* FieldHandlers::GetMessageHandlers() const {
+inline MessageHandlers* FieldHandlers::GetMessageHandlers() const {
   return static_cast<MessageHandlers*>(upb_fhandlers_getmsg(this));
 }
 
-MessageHandlers* FieldHandlers::GetSubMessageHandlers() const {
+inline MessageHandlers* FieldHandlers::GetSubMessageHandlers() const {
   return static_cast<MessageHandlers*>(upb_fhandlers_getsubmsg(this));
 }
 
diff --git a/bindings/cpp/upb/msg.hpp b/bindings/cpp/upb/msg.hpp
new file mode 100644
index 0000000..c7cf1f2
--- /dev/null
+++ b/bindings/cpp/upb/msg.hpp
@@ -0,0 +1,62 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+// Routines for reading and writing message data to an in-memory structure,
+// similar to a C struct.
+//
+// upb does not define one single message object that everyone must use.
+// Rather it defines an abstract interface for reading and writing members
+// of a message object, and all of the parsers and serializers use this
+// abstract interface.  This allows upb's parsers and serializers to be used
+// regardless of what memory management scheme or synchronization model the
+// application is using.
+//
+// A standard set of accessors is provided for doing simple reads and writes at
+// a known offset into the message.  These accessors should be used when
+// possible, because they are specially optimized -- for example, the JIT can
+// recognize them and emit specialized code instead of having to call the
+// function at all.  The application can substitute its own accessors when the
+// standard accessors are not suitable.
+
+#ifndef UPB_MSG_HPP
+#define UPB_MSG_HPP
+
+#include "upb/msg.h"
+#include "upb/handlers.hpp"
+
+namespace upb {
+
+typedef upb_accessor_vtbl AccessorVTable;
+
+// Registers handlers for writing into a message of the given type using
+// whatever accessors it has defined.
+inline MessageHandlers* RegisterWriteHandlers(upb::Handlers* handlers,
+                                              const upb::MessageDef* md) {
+  return MessageHandlers::Cast(
+      upb_accessors_reghandlers(handlers, md));
+}
+
+template <typename T> static FieldHandlers::ValueHandler* GetValueHandler();
+
+// A handy templated function that will retrieve a value handler for a given
+// C++ type.
+#define GET_VALUE_HANDLER(type, ctype) \
+    template <> \
+    FieldHandlers::ValueHandler* GetValueHandler<ctype>() { \
+      return &upb_stdmsg_set ## type; \
+    }
+
+GET_VALUE_HANDLER(double, double);
+GET_VALUE_HANDLER(float, float);
+GET_VALUE_HANDLER(uint64, uint64_t);
+GET_VALUE_HANDLER(uint32, uint32_t);
+GET_VALUE_HANDLER(int64, int64_t);
+GET_VALUE_HANDLER(int32, int32_t);
+GET_VALUE_HANDLER(bool, bool);
+#undef GET_VALUE_HANDLER
+
+}  // namespace
+
+#endif
diff --git a/bindings/cpp/upb/pb/glue.hpp b/bindings/cpp/upb/pb/glue.hpp
index be072a7..d43baeb 100644
--- a/bindings/cpp/upb/pb/glue.hpp
+++ b/bindings/cpp/upb/pb/glue.hpp
@@ -13,11 +13,23 @@
 
 namespace upb {
 
+// All routines that load descriptors expect the descriptor to be a
+// FileDescriptorSet.
 bool LoadDescriptorFileIntoSymtab(SymbolTable* s, const char *fname,
                                   Status* status) {
   return upb_load_descriptor_file_into_symtab(s, fname, status);
 }
 
+bool LoadDescriptorIntoSymtab(SymbolTable* s, const char* str,
+                              size_t len, Status* status) {
+  return upb_load_descriptor_into_symtab(s, str, len, status);
+}
+
+template <typename T>
+bool LoadDescriptorIntoSymtab(SymbolTable* s, const T& desc, Status* status) {
+  return upb_load_descriptor_into_symtab(s, desc.c_str(), desc.size(), status);
+}
+
 }  // namespace upb
 
 #endif
diff --git a/bindings/cpp/upb/proto2_bridge.cc b/bindings/cpp/upb/proto2_bridge.cc
new file mode 100644
index 0000000..6119295
--- /dev/null
+++ b/bindings/cpp/upb/proto2_bridge.cc
@@ -0,0 +1,892 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+
+#include <string>
+#include <typeinfo>
+#include "upb/bytestream.hpp"
+#include "upb/def.hpp"
+#include "upb/handlers.hpp"
+#include "upb/msg.hpp"
+#include "upb/proto2_bridge.hpp"
+
+namespace {
+
+static void* GetFieldPointer(void *message, const upb::FieldDef* f) {
+  return static_cast<char*>(message) + f->offset();
+}
+
+}  // namespace
+
+#ifdef UPB_GOOGLE3
+
+// TODO(haberman): friend upb so that this isn't required.
+#define protected public
+#include "net/proto2/public/repeated_field.h"
+#undef private
+
+#define private public
+#include "net/proto/proto2_reflection.h"
+#undef private
+
+#include "net/proto2/proto/descriptor.pb.h"
+#include "net/proto2/public/descriptor.h"
+#include "net/proto2/public/generated_message_reflection.h"
+#include "net/proto2/public/lazy_field.h"
+#include "net/proto2/public/message.h"
+#include "net/proto2/public/string_piece_field_support.h"
+#include "net/proto/internal_layout.h"
+#include "strings/cord.h"
+using ::proto2::Descriptor;
+using ::proto2::EnumDescriptor;
+using ::proto2::EnumValueDescriptor;
+using ::proto2::FieldDescriptor;
+using ::proto2::FieldOptions;
+using ::proto2::FileDescriptor;
+using ::proto2::internal::GeneratedMessageReflection;
+using ::proto2::internal::RepeatedPtrFieldBase;
+using ::proto2::internal::StringPieceField;
+using ::proto2::Message;
+using ::proto2::MessageFactory;
+using ::proto2::Reflection;
+using ::proto2::RepeatedField;
+using ::proto2::RepeatedPtrField;
+
+namespace upb {
+
+static const Message* GetPrototypeForField(const Message& m,
+                                           const FieldDescriptor* f);
+
+namespace proto2_bridge_google3 { class FieldAccessor; }
+
+using ::upb::proto2_bridge_google3::FieldAccessor;
+
+namespace proto2_bridge_google3 {
+
+static void AssignToCord(const ByteRegion* r, Cord* cord) {
+  // TODO(haberman): ref source data if source is a cord.
+  cord->Clear();
+  uint64_t ofs = r->start_ofs();
+  while (ofs < r->end_ofs()) {
+    size_t len;
+    const char *buf = r->GetPtr(ofs, &len);
+    cord->Append(StringPiece(buf, len));
+    ofs += len;
+  }
+}
+
+#else
+
+// TODO(haberman): friend upb so that this isn't required.
+#define protected public
+#include "google/protobuf/repeated_field.h"
+#undef protected
+
+#define private public
+#include "google/protobuf/generated_message_reflection.h"
+#undef private
+
+#include "google/protobuf/descriptor.h"
+#include "google/protobuf/descriptor.pb.h"
+#include "google/protobuf/message.h"
+using ::google::protobuf::Descriptor;
+using ::google::protobuf::EnumDescriptor;
+using ::google::protobuf::EnumValueDescriptor;
+using ::google::protobuf::FieldDescriptor;
+using ::google::protobuf::FieldOptions;
+using ::google::protobuf::FileDescriptor;
+using ::google::protobuf::internal::GeneratedMessageReflection;
+using ::google::protobuf::internal::RepeatedPtrFieldBase;
+using ::google::protobuf::Message;
+using ::google::protobuf::MessageFactory;
+using ::google::protobuf::Reflection;
+using ::google::protobuf::RepeatedField;
+using ::google::protobuf::RepeatedPtrField;
+
+namespace upb {
+static const Message* GetPrototypeForField(const Message& m,
+                                           const FieldDescriptor* f);
+
+namespace proto2_bridge_opensource { class FieldAccessor; }
+
+using ::upb::proto2_bridge_opensource::FieldAccessor;
+
+namespace proto2_bridge_opensource {
+
+#endif  // ifdef UPB_GOOGLE3
+
+// Have to define this manually since older versions of proto2 didn't define
+// an enum value for STRING.
+#define UPB_CTYPE_STRING 0
+
+// The code in this class depends on the internal representation of the proto2
+// generated classes, which is an internal implementation detail of proto2 and
+// is not a public interface.  As a result, this class's implementation may
+// need to be changed if/when proto2 changes its internal representation.  It
+// is intended that this class is the only code that depends on these internal,
+// non-public interfaces.
+//
+// This class only works with messages that use GeneratedMessageReflection.
+// Other reflection classes will need other accessor implementations.
+class FieldAccessor {
+ public:
+  // Returns true if we were able to set an accessor and any other properties
+  // of the FieldDef that are necessary to read/write this field to a
+  // proto2::Message.
+  static bool TrySet(const FieldDescriptor* proto2_f,
+                     const upb::MessageDef* md,
+                     upb::FieldDef* upb_f) {
+    const Message* prototype = static_cast<const Message*>(md->prototype);
+    const Reflection* base_r = prototype->GetReflection();
+    const GeneratedMessageReflection* r =
+        dynamic_cast<const GeneratedMessageReflection*>(base_r);
+    // Old versions of the open-source protobuf release erroneously default to
+    // Cord even though that has never been supported in the open-source
+    // release.
+    int32_t ctype = proto2_f->options().has_ctype() ?
+        proto2_f->options().ctype() : UPB_CTYPE_STRING;
+    if (!r) return false;
+    // Extensions not supported yet.
+    if (proto2_f->is_extension()) return false;
+
+    upb_f->set_accessor(GetForFieldDescriptor(proto2_f, ctype));
+    upb_f->set_hasbit(GetHasbit(proto2_f, r));
+    upb_f->set_offset(GetOffset(proto2_f, r));
+    if (upb_f->IsSubmessage()) {
+      upb_f->set_subtype_name(proto2_f->message_type()->full_name());
+      upb_f->prototype = GetPrototypeForField(*prototype, proto2_f);
+    }
+
+    if (upb_f->IsString() && !upb_f->IsSequence() &&
+        ctype == UPB_CTYPE_STRING) {
+      upb_f->prototype = &r->GetStringReference(*prototype, proto2_f, NULL);
+    }
+    return true;
+  }
+
+  static MessageFactory* GetMessageFactory(const Message& m) {
+    const GeneratedMessageReflection* r =
+        dynamic_cast<const GeneratedMessageReflection*>(m.GetReflection());
+    return r ? r->message_factory_ : NULL;
+  }
+
+ private:
+  static int64_t GetHasbit(const FieldDescriptor* f,
+                           const GeneratedMessageReflection* r) {
+    if (f->is_repeated()) {
+      // proto2 does not store hasbits for repeated fields.
+      return -1;
+    } else {
+      return (r->has_bits_offset_ * 8) + f->index();
+    }
+  }
+
+  static uint16_t GetOffset(const FieldDescriptor* f,
+                            const GeneratedMessageReflection* r) {
+    return r->offsets_[f->index()];
+  }
+
+  static AccessorVTable *GetForFieldDescriptor(const FieldDescriptor* f,
+                                               int32_t ctype) {
+    switch (f->cpp_type()) {
+      case FieldDescriptor::CPPTYPE_ENUM:
+        // Should handlers validate enum membership to match proto2?
+      case FieldDescriptor::CPPTYPE_INT32: return Get<int32_t>();
+      case FieldDescriptor::CPPTYPE_INT64: return Get<int64_t>();
+      case FieldDescriptor::CPPTYPE_UINT32: return Get<uint32_t>();
+      case FieldDescriptor::CPPTYPE_UINT64: return Get<uint64_t>();
+      case FieldDescriptor::CPPTYPE_DOUBLE: return Get<double>();
+      case FieldDescriptor::CPPTYPE_FLOAT: return Get<float>();
+      case FieldDescriptor::CPPTYPE_BOOL: return Get<bool>();
+      case FieldDescriptor::CPPTYPE_STRING:
+        switch (ctype) {
+#ifdef UPB_GOOGLE3
+          case FieldOptions::STRING:
+            return GetForString<string>();
+          case FieldOptions::CORD:
+            return GetForCord();
+          case FieldOptions::STRING_PIECE:
+            return GetForStringPiece();
+#else
+          case UPB_CTYPE_STRING:
+            return GetForString<std::string>();
+#endif
+          default: return NULL;
+        }
+      case FieldDescriptor::CPPTYPE_MESSAGE:
+#ifdef UPB_GOOGLE3
+        if (f->options().lazy()) {
+          return NULL;  // Not yet implemented.
+        } else {
+          return GetForMessage();
+        }
+#else
+        return GetForMessage();
+#endif
+      default: return NULL;
+    }
+  }
+
+  // PushOffset handler (used for StartSequence and others)  ///////////////////
+
+  static SubFlow PushOffset(void *m, Value fval) {
+    const FieldDef *f = GetValue<const FieldDef*>(fval);
+    return UPB_CONTINUE_WITH(GetFieldPointer(m, f));
+  }
+
+  // Primitive Value (numeric, enum, bool) /////////////////////////////////////
+
+  template <typename T> static AccessorVTable *Get() {
+    static upb_accessor_vtbl vtbl = {
+      NULL,  // StartSubMessage handler
+      GetValueHandler<T>(),
+      &PushOffset,  // StartSequence handler
+      NULL,  // StartRepeatedSubMessage handler
+      &Append<T>,
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  template <typename T>
+  static Flow Append(void *_r, Value fval, Value val) {
+    (void)fval;
+    RepeatedField<T>* r = static_cast<RepeatedField<T>*>(_r);
+    r->Add(GetValue<T>(val));
+    return UPB_CONTINUE;
+  }
+
+  // String ////////////////////////////////////////////////////////////////////
+
+  template <typename T> static AccessorVTable *GetForString() {
+    static upb_accessor_vtbl vtbl = {
+      NULL,  // StartSubMessage handler
+      &SetString<T>,
+      &PushOffset,  // StartSequence handler
+      NULL,  // StartRepeatedSubMessage handler
+      &AppendString<T>,
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  // This needs to be templated because google3 string is not std::string.
+  template <typename T> static Flow SetString(void *m, Value fval, Value val) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    T **str = static_cast<T**>(GetFieldPointer(m, f));
+    // If it points to the default instance, we must create a new instance.
+    if (*str == f->prototype) *str = new T();
+    GetValue<ByteRegion*>(val)->AssignToString(*str);
+    return UPB_CONTINUE;
+  }
+
+  template <typename T>
+  static Flow AppendString(void *_r, Value fval, Value val) {
+    (void)fval;
+    RepeatedPtrField<T>* r = static_cast<RepeatedPtrField<T>*>(_r);
+    GetValue<ByteRegion*>(val)->AssignToString(r->Add());
+    return UPB_CONTINUE;
+  }
+
+  // SubMessage ////////////////////////////////////////////////////////////////
+
+  static AccessorVTable *GetForMessage() {
+    static upb_accessor_vtbl vtbl = {
+      &StartSubMessage,
+      NULL,  // Value handler
+      &PushOffset,  // StartSequence handler
+      &StartRepeatedSubMessage,
+      NULL,  // Repeated value handler
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static SubFlow StartSubMessage(void *m, Value fval) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    void **subm = static_cast<void**>(GetFieldPointer(m, f));
+    if (*subm == NULL || *subm == f->prototype) {
+      const Message* prototype = static_cast<const Message*>(f->prototype);
+      *subm = prototype->New();
+    }
+    return UPB_CONTINUE_WITH(*subm);
+  }
+
+  class RepeatedMessageTypeHandler {
+   public:
+    typedef void Type;
+    // AddAllocated() calls this, but only if other objects are sitting
+    // around waiting for reuse, which we will not do.
+    static void Delete(Type* t) {
+      (void)t;
+      assert(false);
+    }
+  };
+
+  // Closure is a RepeatedPtrField<SubMessageType>*, but we access it through
+  // its base class RepeatedPtrFieldBase*.
+  static SubFlow StartRepeatedSubMessage(void* _r, Value fval) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    RepeatedPtrFieldBase *r = static_cast<RepeatedPtrFieldBase*>(_r);
+    void *submsg = r->AddFromCleared<RepeatedMessageTypeHandler>();
+    if (!submsg) {
+      const Message* prototype = static_cast<const Message*>(f->prototype);
+      submsg = prototype->New();
+      r->AddAllocated<RepeatedMessageTypeHandler>(submsg);
+    }
+    return UPB_CONTINUE_WITH(submsg);
+  }
+
+  // TODO(haberman): handle Extensions, Unknown Fields.
+
+#ifdef UPB_GOOGLE3
+  // Handlers for types/features only included in internal proto2 release:
+  // Cord, StringPiece, LazyField, and MessageSet.
+  // TODO(haberman): LazyField, MessageSet.
+
+  // Cord //////////////////////////////////////////////////////////////////////
+
+  static AccessorVTable *GetForCord() {
+    static upb_accessor_vtbl vtbl = {
+      NULL,  // StartSubMessage handler
+      &SetCord,
+      &PushOffset,  // StartSequence handler
+      NULL,  // StartRepeatedSubMessage handler
+      &AppendCord,
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static Flow SetCord(void *m, Value fval, Value val) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    Cord* field = static_cast<Cord*>(GetFieldPointer(m, f));
+    AssignToCord(GetValue<ByteRegion*>(val), field);
+    return UPB_CONTINUE;
+  }
+
+  static Flow AppendCord(void *_r, Value fval, Value val) {
+    RepeatedField<Cord>* r = static_cast<RepeatedField<Cord>*>(_r);
+    AssignToCord(GetValue<ByteRegion*>(val), r->Add());
+    return UPB_CONTINUE;
+  }
+
+  // StringPiece ///////////////////////////////////////////////////////////////
+
+  static AccessorVTable *GetForStringPiece() {
+    static upb_accessor_vtbl vtbl = {
+      NULL,  // StartSubMessage handler
+      &SetStringPiece,
+      &PushOffset,  // StartSequence handler
+      NULL,  // StartRepeatedSubMessage handler
+      &AppendStringPiece,
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static void AssignToStringPieceField(const ByteRegion* r,
+                                       proto2::internal::StringPieceField* f) {
+    // TODO(haberman): alias if possible and enabled on the input stream.
+    // TODO(haberman): add a method to StringPieceField that lets us avoid
+    // this copy/malloc/free.
+    char *data = new char[r->Length()];
+    r->Copy(r->start_ofs(), r->Length(), data);
+    f->CopyFrom(StringPiece(data, r->Length()));
+    delete[] data;
+  }
+
+  static Flow SetStringPiece(void *m, Value fval, Value val) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    StringPieceField* field =
+        static_cast<StringPieceField*>(GetFieldPointer(m, f));
+    AssignToStringPieceField(GetValue<ByteRegion*>(val), field);
+    return UPB_CONTINUE;
+  }
+
+  static Flow AppendStringPiece(void* _r, Value fval, Value val) {
+    RepeatedPtrField<StringPieceField>* r =
+        static_cast<RepeatedPtrField<StringPieceField>*>(_r);
+    AssignToStringPieceField(GetValue<ByteRegion*>(val), r->Add());
+    return UPB_CONTINUE;
+  }
+
+#endif  // UPB_GOOGLE3
+};
+
+#ifdef UPB_GOOGLE3
+
+// Proto1 accessor -- only needed inside Google.
+class Proto1FieldAccessor {
+ public:
+  // Returns true if we were able to set an accessor and any other properties
+  // of the FieldDef that are necessary to read/write this field to a
+  // proto2::Message.
+  static bool TrySet(const FieldDescriptor* proto2_f,
+                     const upb::MessageDef* md,
+                     upb::FieldDef* upb_f) {
+    const Message* m = static_cast<const Message*>(md->prototype);
+    const proto2::Reflection* base_r = m->GetReflection();
+    const _pi::Proto2Reflection* r =
+        dynamic_cast<const _pi::Proto2Reflection*>(base_r);
+    if (!r) return false;
+    // Extensions not supported yet.
+    if (proto2_f->is_extension()) return false;
+
+    const _pi::Field* f = r->GetFieldLayout(proto2_f);
+
+    if (f->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK) {
+      // Override the BYTES type that proto2 descriptors have for weak fields.
+      upb_f->set_type(UPB_TYPE(MESSAGE));
+    }
+
+    if (upb_f->IsSubmessage()) {
+      const Message* prototype = upb::GetPrototypeForField(*m, proto2_f);
+      upb_f->set_subtype_name(prototype->GetDescriptor()->full_name());
+      upb_f->prototype = prototype;
+    }
+
+    upb_f->set_accessor(GetForCrep(f->crep));
+    upb_f->set_hasbit(GetHasbit(proto2_f, r));
+    upb_f->set_offset(GetOffset(proto2_f, r));
+    return true;
+  }
+
+ private:
+  static int16_t GetHasbit(const FieldDescriptor* f,
+                           const _pi::Proto2Reflection* r) {
+    if (f->is_repeated()) {
+      // proto1 does not store hasbits for repeated fields.
+      return -1;
+    } else {
+      return (r->layout_->has_bit_offset * 8) + r->GetFieldLayout(f)->has_index;
+    }
+  }
+
+  static uint16_t GetOffset(const FieldDescriptor* f,
+                            const _pi::Proto2Reflection* r) {
+    return r->GetFieldLayout(f)->offset;
+  }
+
+  static AccessorVTable *GetForCrep(int crep) {
+#define PRIMITIVE(name, type_name) \
+    case _pi::CREP_REQUIRED_ ## name: \
+    case _pi::CREP_OPTIONAL_ ## name: \
+    case _pi::CREP_REPEATED_ ## name: return Get<type_name>();
+
+    switch (crep) {
+      PRIMITIVE(DOUBLE,   double);
+      PRIMITIVE(FLOAT,    float);
+      PRIMITIVE(INT64,    int64_t);
+      PRIMITIVE(UINT64,   uint64_t);
+      PRIMITIVE(INT32,    int32_t);
+      PRIMITIVE(FIXED64,  uint64_t);
+      PRIMITIVE(FIXED32,  uint32_t);
+      PRIMITIVE(BOOL,     bool);
+      case _pi::CREP_REQUIRED_STRING:
+      case _pi::CREP_OPTIONAL_STRING:
+      case _pi::CREP_REPEATED_STRING: return GetForString();
+      case _pi::CREP_OPTIONAL_OUTOFLINE_STRING: return GetForOutOfLineString();
+      case _pi::CREP_REQUIRED_CORD:
+      case _pi::CREP_OPTIONAL_CORD:
+      case _pi::CREP_REPEATED_CORD: return GetForCord();
+      case _pi::CREP_REQUIRED_GROUP:
+      case _pi::CREP_REQUIRED_FOREIGN:
+      case _pi::CREP_REQUIRED_FOREIGN_PROTO2: return GetForRequiredMessage();
+      case _pi::CREP_OPTIONAL_GROUP:
+      case _pi::CREP_REPEATED_GROUP:
+      case _pi::CREP_OPTIONAL_FOREIGN:
+      case _pi::CREP_REPEATED_FOREIGN:
+      case _pi::CREP_OPTIONAL_FOREIGN_PROTO2:
+      case _pi::CREP_REPEATED_FOREIGN_PROTO2: return GetForMessage();
+      case _pi::CREP_OPTIONAL_FOREIGN_WEAK: return GetForWeakMessage();
+      default: assert(false); return NULL;
+    }
+#undef PRIMITIVE
+  }
+
+  // PushOffset handler (used for StartSequence and others)  ///////////////////
+
+  // We can find a RepeatedField* or a RepeatedPtrField* at f->offset().
+  static SubFlow PushOffset(void *m, Value fval) {
+    const FieldDef *f = GetValue<const FieldDef*>(fval);
+    return UPB_CONTINUE_WITH(GetFieldPointer(m, f));
+  }
+
+  // Primitive Value (numeric, enum, bool) /////////////////////////////////////
+
+  template <typename T> static AccessorVTable *Get() {
+    static upb_accessor_vtbl vtbl = {
+      NULL,  // StartSubMessage handler
+      GetValueHandler<T>(),
+      &PushOffset,  // StartSequence handler
+      NULL,  // StartRepeatedSubMessage handler
+      &Append<T>,
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  template <typename T>
+  static Flow Append(void *_r, Value fval, Value val) {
+    (void)fval;
+    // Proto1's ProtoArray class derives from RepeatedField.
+    RepeatedField<T>* r = static_cast<RepeatedField<T>*>(_r);
+    r->Add(GetValue<T>(val));
+    return UPB_CONTINUE;
+  }
+
+  // String ////////////////////////////////////////////////////////////////////
+
+  static AccessorVTable *GetForString() {
+    static upb_accessor_vtbl vtbl = {
+      NULL,  // StartSubMessage handler
+      &SetString,
+      &PushOffset,  // StartSequence handler
+      NULL,  // StartRepeatedSubMessage handler
+      &AppendString,
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static Flow SetString(void *m, Value fval, Value val) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    string *str = static_cast<string*>(GetFieldPointer(m, f));
+    GetValue<ByteRegion*>(val)->AssignToString(str);
+    return UPB_CONTINUE;
+  }
+
+  static Flow AppendString(void *_r, Value fval, Value val) {
+    (void)fval;
+    RepeatedPtrField<string>* r = static_cast<RepeatedPtrField<string>*>(_r);
+    GetValue<ByteRegion*>(val)->AssignToString(r->Add());
+    return UPB_CONTINUE;
+  }
+
+  // Out-of-line string ////////////////////////////////////////////////////////
+
+  static AccessorVTable *GetForOutOfLineString() {
+    static upb_accessor_vtbl vtbl = {
+      NULL, &SetOutOfLineString,
+      // This type is only used for non-repeated string fields.
+      NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static Flow SetOutOfLineString(void *m, Value fval, Value val) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    string **str = static_cast<string**>(GetFieldPointer(m, f));
+    if (*str == &::ProtocolMessage::___empty_internal_proto_string_)
+      *str = new string();
+    GetValue<ByteRegion*>(val)->AssignToString(*str);
+    return UPB_CONTINUE;
+  }
+
+  // Cord //////////////////////////////////////////////////////////////////////
+
+  static AccessorVTable *GetForCord() {
+    static upb_accessor_vtbl vtbl = {
+      NULL,  // StartSubMessage handler
+      &SetCord,
+      &PushOffset,  // StartSequence handler
+      NULL,  // StartRepeatedSubMessage handler
+      &AppendCord,
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static Flow SetCord(void *m, Value fval, Value val) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    Cord* field = static_cast<Cord*>(GetFieldPointer(m, f));
+    AssignToCord(GetValue<ByteRegion*>(val), field);
+    return UPB_CONTINUE;
+  }
+
+  static Flow AppendCord(void *_r, Value fval, Value val) {
+    RepeatedField<Cord>* r = static_cast<RepeatedField<Cord>*>(_r);
+    AssignToCord(GetValue<ByteRegion*>(val), r->Add());
+    return UPB_CONTINUE;
+  }
+
+  // SubMessage ////////////////////////////////////////////////////////////////
+
+  static AccessorVTable *GetForRequiredMessage() {
+    static upb_accessor_vtbl vtbl = {
+      &PushOffset,  // StartSubMessage handler
+      NULL,  // Value handler
+      &PushOffset,  // StartSequence handler
+      &StartRepeatedSubMessage,
+      NULL,  // Repeated value handler
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static AccessorVTable *GetForWeakMessage() {
+    static upb_accessor_vtbl vtbl = {
+      &StartWeakSubMessage,  // StartSubMessage handler
+      NULL,  // Value handler
+      &PushOffset,  // StartSequence handler
+      &StartRepeatedSubMessage,
+      NULL,  // Repeated value handler
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static AccessorVTable *GetForMessage() {
+    static upb_accessor_vtbl vtbl = {
+      &StartSubMessage,
+      NULL,  // Value handler
+      &PushOffset,  // StartSequence handler
+      &StartRepeatedSubMessage,
+      NULL,  // Repeated value handler
+      NULL, NULL, NULL, NULL, NULL, NULL};
+    return &vtbl;
+  }
+
+  static SubFlow StartSubMessage(void *m, Value fval) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    Message **subm = static_cast<Message**>(GetFieldPointer(m, f));
+    if (*subm == f->prototype) *subm = (*subm)->New();
+    return UPB_CONTINUE_WITH(*subm);
+  }
+
+  static SubFlow StartWeakSubMessage(void *m, Value fval) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    Message **subm = static_cast<Message**>(GetFieldPointer(m, f));
+    if (*subm == NULL) {
+      const Message* prototype = static_cast<const Message*>(f->prototype);
+      *subm = prototype->New();
+    }
+    return UPB_CONTINUE_WITH(*subm);
+  }
+
+  class RepeatedMessageTypeHandler {
+   public:
+    typedef void Type;
+    // AddAllocated() calls this, but only if other objects are sitting
+    // around waiting for reuse, which we will not do.
+    static void Delete(Type* t) {
+      (void)t;
+      assert(false);
+    }
+  };
+
+  // Closure is a RepeatedPtrField<SubMessageType>*, but we access it through
+  // its base class RepeatedPtrFieldBase*.
+  static SubFlow StartRepeatedSubMessage(void* _r, Value fval) {
+    const FieldDef* f = GetValue<const FieldDef*>(fval);
+    RepeatedPtrFieldBase *r = static_cast<RepeatedPtrFieldBase*>(_r);
+    void *submsg = r->AddFromCleared<RepeatedMessageTypeHandler>();
+    if (!submsg) {
+      const Message* prototype = static_cast<const Message*>(f->prototype);
+      submsg = prototype->New();
+      r->AddAllocated<RepeatedMessageTypeHandler>(submsg);
+    }
+    return UPB_CONTINUE_WITH(submsg);
+  }
+};
+
+#endif
+
+}  // namespace proto2_bridge_{google3,opensource}
+
+static const Message* GetPrototypeForMessage(const Message& m) {
+  const Message* ret = NULL;
+  MessageFactory* factory = FieldAccessor::GetMessageFactory(m);
+  if (factory) {
+    // proto2 generated message or DynamicMessage.
+    ret = factory->GetPrototype(m.GetDescriptor());
+    assert(ret);
+  } else {
+    // Proto1 message; since proto1 has no dynamic message, it must be
+    // from the generated factory.
+    ret = MessageFactory::generated_factory()->GetPrototype(m.GetDescriptor());
+    assert(ret);  // If NULL, then wasn't a proto1 message, can't handle it.
+  }
+  assert(ret->GetReflection() == m.GetReflection());
+  return ret;
+}
+
+static const Message* GetPrototypeForField(const Message& m,
+                                           const FieldDescriptor* f) {
+#ifdef UPB_GOOGLE3
+  if (f->type() == FieldDescriptor::TYPE_BYTES) {
+    // Proto1 weak field: the proto2 descriptor says their type is BYTES.
+    const _pi::Proto2Reflection* r =
+        dynamic_cast<const _pi::Proto2Reflection*>(m.GetReflection());
+    assert(r);
+    const _pi::Field* field = r->GetFieldLayout(f);
+    assert(field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK);
+    return GetPrototypeForMessage(
+        *static_cast<const Message*>(field->weak_layout()->default_instance));
+  } else if (dynamic_cast<const _pi::Proto2Reflection*>(m.GetReflection())) {
+    // Proto1 message; since proto1 has no dynamic message, it must be from
+    // the generated factory.
+    const Message* ret =
+        MessageFactory::generated_factory()->GetPrototype(f->message_type());
+    assert(ret);
+    return ret;
+  }
+#endif
+  assert(f->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE);
+  // We assume that all submessages (and extensions) will be constructed using
+  // the same MessageFactory as this message.  This doesn't cover the case of
+  // CodedInputStream::SetExtensionRegistry().
+  MessageFactory* factory = FieldAccessor::GetMessageFactory(m);
+  assert(factory);  // If neither proto1 nor proto2 we can't handle it.
+  const Message* ret = factory->GetPrototype(f->message_type());
+  assert(ret);
+  return ret;
+}
+
+namespace proto2_bridge {
+
+upb::FieldDef* AddFieldDef(const FieldDescriptor* f, upb::MessageDef* md) {
+  upb::FieldDef* upb_f = upb::FieldDef::New(&upb_f);
+  upb_f->set_number(f->number());
+  upb_f->set_name(f->name());
+  upb_f->set_label(static_cast<upb::Label>(f->label()));
+  upb_f->set_type(static_cast<upb::FieldType>(f->type()));
+
+  if (!FieldAccessor::TrySet(f, md, upb_f)
+#ifdef UPB_GOOGLE3
+      && !proto2_bridge_google3::Proto1FieldAccessor::TrySet(f, md, upb_f)
+#endif
+     ) {
+    // Unsupported reflection class.
+    assert(false);
+  }
+
+  if (upb_f->type() == UPB_TYPE(ENUM)) {
+    // We set the enum default symbolically.
+    upb_f->set_default(f->default_value_enum()->name());
+    upb_f->set_subtype_name(f->enum_type()->full_name());
+  } else {
+    // Set field default for primitive types.  Need to switch on the upb type
+    // rather than the proto2 type, because upb_f->type() may have been changed
+    // from BYTES to MESSAGE for a weak field.
+    switch (upb_types[upb_f->type()].inmemory_type) {
+      case UPB_CTYPE_INT32:
+        upb_f->set_default(MakeValue(f->default_value_int32()));
+        break;
+      case UPB_CTYPE_INT64:
+        upb_f->set_default(
+            MakeValue(static_cast<int64_t>(f->default_value_int64())));
+        break;
+      case UPB_CTYPE_UINT32:
+        upb_f->set_default(MakeValue(f->default_value_uint32()));
+        break;
+      case UPB_CTYPE_UINT64:
+        upb_f->set_default(
+            MakeValue(static_cast<uint64_t>(f->default_value_uint64())));
+        break;
+      case UPB_CTYPE_DOUBLE:
+        upb_f->set_default(MakeValue(f->default_value_double()));
+        break;
+      case UPB_CTYPE_FLOAT:
+        upb_f->set_default(MakeValue(f->default_value_float()));
+        break;
+      case UPB_CTYPE_BOOL:
+        upb_f->set_default(MakeValue(f->default_value_bool()));
+        break;
+      case UPB_CTYPE_BYTEREGION:
+        upb_f->set_default(f->default_value_string());
+        break;
+    }
+  }
+  return md->AddField(upb_f, &upb_f) ? upb_f : NULL;
+}
+
+upb::MessageDef *NewEmptyMessageDef(const Message& m, void *owner) {
+  upb::MessageDef *md = upb::MessageDef::New(owner);
+  md->set_full_name(m.GetDescriptor()->full_name());
+  md->prototype = GetPrototypeForMessage(m);
+  return md;
+}
+
+upb::EnumDef* NewEnumDef(const EnumDescriptor* desc, void *owner) {
+  upb::EnumDef* e = upb::EnumDef::New(owner);
+  e->set_full_name(desc->full_name());
+  for (int i = 0; i < desc->value_count(); i++) {
+    const EnumValueDescriptor* val = desc->value(i);
+    bool success = e->AddValue(val->name(), val->number());
+    assert(success);
+    (void)success;
+  }
+  return e;
+}
+
+void AddAllFields(upb::MessageDef* md) {
+  const Descriptor* d =
+      static_cast<const Message*>(md->prototype)->GetDescriptor();
+  for (int i = 0; i < d->field_count(); i++) {
+#ifdef UPB_GOOGLE3
+    // Skip lazy fields for now since we can't properly handle them.
+    if (d->field(i)->options().lazy()) continue;
+#endif
+    // Extensions not supported yet.
+    if (d->field(i)->is_extension()) continue;
+    AddFieldDef(d->field(i), md);
+  }
+}
+
+upb::MessageDef *NewFullMessageDef(const Message& m, void *owner) {
+  upb::MessageDef* md = NewEmptyMessageDef(m, owner);
+  AddAllFields(md);
+  // TODO(haberman): add unknown field handler and extensions.
+  return md;
+}
+
+typedef std::map<std::string, upb::Def*> SymbolMap;
+
+static upb::MessageDef* NewFinalMessageDefHelper(const Message& m, void *owner,
+                                                 SymbolMap* symbols) {
+  upb::MessageDef* md = NewFullMessageDef(m, owner);
+  // Must do this before processing submessages to prevent infinite recursion.
+  (*symbols)[std::string(md->full_name())] = md->AsDef();
+
+  for (upb::MessageDef::Iterator i(md); !i.Done(); i.Next()) {
+    upb::FieldDef* f = i.field();
+    if (!f->HasSubDef()) continue;
+    SymbolMap::iterator iter = symbols->find(f->subtype_name());
+    upb::Def* subdef;
+    if (iter != symbols->end()) {
+      subdef = iter->second;
+    } else {
+      const FieldDescriptor* proto2_f =
+          m.GetDescriptor()->FindFieldByNumber(f->number());
+      if (f->type() == UPB_TYPE(ENUM)) {
+        subdef = NewEnumDef(proto2_f->enum_type(), owner)->AsDef();
+        (*symbols)[std::string(subdef->full_name())] = subdef;
+      } else {
+        assert(f->IsSubmessage());
+        const Message* prototype = GetPrototypeForField(m, proto2_f);
+        subdef = NewFinalMessageDefHelper(*prototype, owner, symbols)->AsDef();
+      }
+    }
+    f->set_subdef(subdef);
+  }
+  return md;
+}
+
+const upb::MessageDef* NewFinalMessageDef(const Message& m, void *owner) {
+  SymbolMap symbols;
+  upb::MessageDef* ret = NewFinalMessageDefHelper(m, owner, &symbols);
+
+  // Finalize defs.
+  std::vector<upb::Def*> defs;
+  SymbolMap::iterator iter;
+  for (iter = symbols.begin(); iter != symbols.end(); ++iter) {
+    defs.push_back(iter->second);
+  }
+  Status status;
+  bool success = Def::Finalize(defs, &status);
+  assert(success);
+  (void)success;
+
+  // Unref all defs except the top-level one that we are returning.
+  for (int i = 0; i < static_cast<int>(defs.size()); i++) {
+    if (defs[i] != ret->AsDef()) defs[i]->Unref(owner);
+  }
+
+  return ret;
+}
+
+}  // namespace proto2_bridge
+}  // namespace upb
diff --git a/bindings/cpp/upb/proto2_bridge.hpp b/bindings/cpp/upb/proto2_bridge.hpp
new file mode 100644
index 0000000..ace08ce
--- /dev/null
+++ b/bindings/cpp/upb/proto2_bridge.hpp
@@ -0,0 +1,170 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// A bridge between upb and proto2, allows populating proto2 generated
+// classes using upb's parser, translating between descriptors and defs, etc.
+//
+// This is designed to be able to be compiled against either the open-source
+// version of protocol buffers or the Google-internal proto2.  The two are
+// the same in most ways, but live in different namespaces (proto2 vs
+// google::protobuf) and have a few other more minor differences.
+//
+// The bridge gives you a lot of control over which fields will be written to
+// the message (fields that are not written will just be skipped), and whether
+// unknown fields are written to the UnknownFieldSet.  This can save a lot of
+// work if the client only cares about some subset of the fields.
+//
+// Example usage:
+//
+//   // Build a def that will have all fields and parse just like proto2 would.
+//   const upb::MessageDef* md = upb::proto2_bridge::NewMessageDef(&MyProto());
+//
+//   // JIT the parser; should only be done once ahead-of-time.
+//   upb::Handlers* handlers = upb::NewHandlersForMessage(md);
+//   upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers);
+//   handlers->Unref();
+//
+//   // The actual parsing.
+//   MyProto proto;
+//   upb::Decoder decoder;
+//   upb::StringSource source(buf, len);
+//   decoder.ResetPlan(plan, 0);
+//   decoder.ResetInput(source.AllBytes(), &proto);
+//   CHECK(decoder.Decode() == UPB_OK) << decoder.status();
+//
+// To parse only one field and skip all others:
+//
+//   const upb::MessageDef* md =
+//       upb::proto2_bridge::NewEmptyMessageDef(MyProto().GetPrototype());
+//   upb::proto2_bridge::AddFieldDef(
+//       MyProto::descriptor()->FindFieldByName("my_field"), md);
+//   upb::Finalize(md);
+//
+//   // Now continue with "JIT the parser" from above.
+//
+// Note that there is currently no support for
+// CodedInputStream::SetExtensionRegistry(), which allows specifying a separate
+// DescriptorPool and MessageFactory for extensions.  Since this is a property
+// of the input in proto2, it's difficult to build a plan ahead-of-time that
+// can properly support this.  If it's an important use case, the caller should
+// probably build a upb plan explicitly.
+
+#ifndef UPB_PROTO2_BRIDGE
+#define UPB_PROTO2_BRIDGE
+
+#include <vector>
+
+namespace google {
+namespace protobuf {
+class Descriptor;
+class EnumDescriptor;
+class FieldDescriptor;
+class FileDescriptor;
+class Message;
+}  // namespace google
+}  // namespace protobuf
+
+namespace proto2 {
+class Descriptor;
+class EnumDescriptor;
+class FieldDescriptor;
+class FileDescriptor;
+class Message;
+}  // namespace proto2
+
+
+namespace upb {
+
+class Def;
+class FieldDef;
+class MessageDef;
+
+namespace proto2_bridge {
+
+// Unfinalized defs ////////////////////////////////////////////////////////////
+
+// Creating of UNFINALIZED defs.  All of these functions return defs that are
+// still mutable and have not been finalized.  They must be finalized before
+// using them to parse anything.  This is useful if you want more control over
+// the process of constructing defs, eg. to add the specific set of fields you
+// care about.
+
+// Creates a new upb::MessageDef that corresponds to the type in the given
+// prototype message.  The MessageDef will not have any fields added to it.
+upb::MessageDef *NewEmptyMessageDef(const proto2::Message& m, void *owner);
+upb::MessageDef *NewEmptyMessageDef(const google::protobuf::Message& desc,
+                                    void *owner);
+
+// Adds a new upb::FieldDef to the given MessageDef corresponding to the given
+// FieldDescriptor.  The FieldDef will be given an accessor and offset so that
+// it can be used to read and write data into the proto2::Message classes.
+// The given MessageDef must have been constructed with NewEmptyDefForMessage()
+// and f->containing_type() must correspond to the message that was used.
+//
+// Any submessage, group, or enum fields will be given symbolic references to
+// the subtype, which must be resolved before the MessageDef can be finalized.
+//
+// On success, returns the FieldDef that was added (caller does not own a ref).
+// If an existing field had the same name or number, returns NULL.
+upb::FieldDef* AddFieldDef(const proto2::FieldDescriptor* f,
+                           upb::MessageDef* md);
+upb::FieldDef* AddFieldDef(const google::protobuf::FieldDescriptor* f,
+                           upb::MessageDef* md);
+
+// Given a MessageDef that was constructed with NewEmptyDefForMessage(), adds
+// FieldDefs for all fields defined in the original message, but not for any
+// extensions or unknown fields.  The given MessageDef must not have any fields
+// that have the same name or number as any of the fields we are adding (the
+// easiest way to guarantee this is to start with an empty MessageDef).
+//
+// Returns true on success or false if any of the fields could not be added.
+void AddAllFields(upb::MessageDef* md);
+
+// TODO(haberman): Add:
+// // Adds a handler that will store unknown fields in the UnknownFieldSet.
+// void AddUnknownFieldHandler(upb::MessageDef* md);
+
+// Returns a new upb::MessageDef that contains handlers for all fields, unknown
+// fields, and any extensions in the descriptor's pool.  The resulting
+// def/handlers should be equivalent to the generated code constructed by the
+// protobuf compiler (or the code in DynamicMessage) for the given type.
+// The subdefs for message/enum fields (if any) will be referenced symbolically,
+// and will need to be resolved before being finalized.
+//
+// TODO(haberman): Add missing support (LazyField, MessageSet, and extensions).
+//
+// TODO(haberman): possibly add a similar function that lets you supply a
+// separate DescriptorPool and MessageFactory for extensions, to support
+// proto2's io::CodedInputStream::SetExtensionRegistry().
+upb::MessageDef* NewFullMessageDef(const proto2::Message& m, void *owner);
+upb::MessageDef* NewFullMessageDef(const google::protobuf::Message& m,
+                                   void *owner);
+
+// Returns a new upb::EnumDef that corresponds to the given EnumDescriptor.
+// Caller owns a ref on the returned EnumDef.
+upb::EnumDef* NewEnumDef(const proto2::EnumDescriptor* desc, void *owner);
+upb::EnumDef* NewEnumDef(const google::protobuf::EnumDescriptor* desc,
+                         void *owner);
+
+// Finalized defs //////////////////////////////////////////////////////////////
+
+// These functions return FINALIZED defs, meaning that they are immutable and
+// ready for use.  Since they are immutable you cannot make any further changes
+// to eg. the set of fields, but these functions are more convenient if you
+// simply want to parse a message exactly how the built-in proto2 parser would.
+
+// Creates a returns a finalized MessageDef for the give message and its entire
+// type tree that will include all fields and unknown handlers (ie. it will
+// parse just like proto2 would).
+const upb::MessageDef* NewFinalMessageDef(const proto2::Message& m,
+                                          void *owner);
+const upb::MessageDef* NewFinalMessageDef(const google::protobuf::Message& m,
+                                          void *owner);
+
+}  // namespace proto2_bridge
+}  // namespace upb
+
+#endif
diff --git a/bindings/cpp/upb/upb.hpp b/bindings/cpp/upb/upb.hpp
index 226859c..48c2708 100644
--- a/bindings/cpp/upb/upb.hpp
+++ b/bindings/cpp/upb/upb.hpp
@@ -10,6 +10,16 @@
 #include "upb/upb.h"
 #include <iostream>
 
+#if defined(__GXX_EXPERIMENTAL_CXX0X__) && !defined(UPB_NO_CXX11)
+#define UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(class_name) \
+  class_name() = delete; \
+  ~class_name() = delete;
+#else
+#define UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(class_name) \
+  class_name(); \
+  ~class_name();
+#endif
+
 namespace upb {
 
 typedef upb_success_t Success;
@@ -31,11 +41,35 @@ class Status : public upb_status {
   void Clear() { upb_status_clear(this); }
 };
 
-class Value : public upb_value {
- public:
-  Value(const upb_value& val) { *this = val; }
-  Value() {}
-};
+typedef upb_value Value;
+
+template <typename T> T GetValue(Value v);
+template <typename T> Value MakeValue(T v);
+
+#define UPB_VALUE_ACCESSORS(type, ctype) \
+  template <> inline ctype GetValue<ctype>(Value v) { \
+    return upb_value_get ## type(v); \
+  } \
+  template <> inline Value MakeValue<ctype>(ctype v) { \
+    return upb_value_ ## type(v); \
+  }
+
+UPB_VALUE_ACCESSORS(double, double);
+UPB_VALUE_ACCESSORS(float,  float);
+UPB_VALUE_ACCESSORS(int32,  int32_t);
+UPB_VALUE_ACCESSORS(int64,  int64_t);
+UPB_VALUE_ACCESSORS(uint32, uint32_t);
+UPB_VALUE_ACCESSORS(uint64, uint64_t);
+UPB_VALUE_ACCESSORS(bool,   bool);
+
+#undef UPB_VALUE_ACCESSORS
+
+template <typename T> inline T* GetPtrValue(Value v) {
+  return static_cast<T*>(upb_value_getptr(v));
+}
+template <typename T> inline Value MakePtrValue(T* v) {
+  return upb_value_ptr(static_cast<void*>(v));
+}
 
 INLINE std::ostream& operator<<(std::ostream& out, const Status& status) {
   out << status.GetString();
diff --git a/bindings/lua/upb.c b/bindings/lua/upb.c
index 56c5be9..4cce4b6 100644
--- a/bindings/lua/upb.c
+++ b/bindings/lua/upb.c
@@ -37,11 +37,15 @@ static uint32_t lupb_touint32(lua_State *L, int narg, const char *name) {
   return n;
 }
 
-static void lupb_pushstring(lua_State *L, const upb_byteregion *r) {
-  // TODO: could avoid a copy in the case that the string is contiguous.
-  char *str = upb_byteregion_strdup(r);
-  lua_pushlstring(L, str, upb_byteregion_len(r));
-  free(str);
+static void lupb_pushstring(lua_State *L, const upb_strref *ref) {
+  if (ref->ptr) {
+    lua_pushlstring(L, ref->ptr, ref->len);
+  } else {
+    // Lua requires a continguous string; must copy+allocate.
+    char *str = upb_strref_dup(ref);
+    lua_pushlstring(L, str, ref->len);
+    free(str);
+  }
 }
 
 static void lupb_pushvalue(lua_State *L, upb_value val, upb_fielddef *f) {
@@ -73,7 +77,7 @@ static void lupb_pushvalue(lua_State *L, upb_value val, upb_fielddef *f) {
 
 // Returns a scalar value (ie. not a submessage) as a upb_value.
 static upb_value lupb_getvalue(lua_State *L, int narg, upb_fielddef *f,
-                               upb_byteregion *ref) {
+                               upb_strref *ref) {
   assert(!upb_issubmsg(f));
   upb_value val;
   if (upb_fielddef_type(f) == UPB_TYPE(BOOL)) {
@@ -135,7 +139,7 @@ static upb_value lupb_getvalue(lua_State *L, int narg, upb_fielddef *f,
 }
 
 static void lupb_typecheck(lua_State *L, int narg, upb_fielddef *f) {
-  upb_byteregion ref;
+  upb_strref ref;
   lupb_getvalue(L, narg, f, &ref);
 }
 
@@ -298,8 +302,8 @@ static void lupb_fielddef_set(lua_State *L, upb_fielddef *f,
   } else if (streql(field, "default_value")) {
     if (!upb_fielddef_type(f))
       luaL_error(L, "Must set type before setting default_value");
-    upb_byteregion region;
-    upb_fielddef_setdefault(f, lupb_getvalue(L, narg, f, &region));
+    upb_strref ref;
+    upb_fielddef_setdefault(f, lupb_getvalue(L, narg, f, &ref));
   } else {
     luaL_error(L, "Cannot set fielddef member '%s'", field);
   }
@@ -778,7 +782,7 @@ static upb_flow_t lupb_msg_string(void *m, upb_value fval, upb_value val,
   lua_State *L = *(lua_State**)m;
   int offset = array ? lua_rawlen(L, -1) : f->offset;
   if (!lua_checkstack(L, 1)) luaL_error(L, "stack full");
-  lupb_pushstring(L, upb_value_getbyteregion(val));
+  lupb_pushstring(L, upb_value_getstrref(val));
   lua_rawseti(L, -2, offset);
   return UPB_CONTINUE;
 }
diff --git a/bindings/python/upb.c b/bindings/python/upb.c
index 8f36f70..497074b 100644
--- a/bindings/python/upb.c
+++ b/bindings/python/upb.c
@@ -612,9 +612,8 @@ static upb_sflow_t PyUpb_Message_StartRepeatedSubmessage(void *a, upb_value fval
 static upb_flow_t PyUpb_Message_StringValue(void *m, upb_value fval, upb_value val) {
   PyObject **str = PyUpb_Accessor_GetPtr(m, fval);
   if (*str) { Py_DECREF(*str); }
-  upb_byteregion *r = upb_value_getbyteregion(val);
-  *str = PyString_FromStringAndSize(NULL, upb_byteregion_len(r));
-  upb_byteregion_copyall(r, PyString_AsString(*str));
+  *str = PyString_FromStringAndSize(NULL, upb_value_getstrref(val)->len);
+  upb_strref_read(upb_value_getstrref(val), PyString_AsString(*str));
   upb_stdmsg_sethas(m, fval);
   return UPB_CONTINUE;
 }
@@ -622,9 +621,8 @@ static upb_flow_t PyUpb_Message_StringValue(void *m, upb_value fval, upb_value v
 static upb_flow_t PyUpb_Message_AppendStringValue(void *a, upb_value fval, upb_value val) {
   (void)fval;
   PyObject **elem = upb_stdarray_append(a, sizeof(void*));
-  upb_byteregion *r = upb_value_getbyteregion(val);
-  *elem = PyString_FromStringAndSize(NULL, upb_byteregion_len(r));
-  upb_byteregion_copyall(r, PyString_AsString(*elem));
+  *elem = PyString_FromStringAndSize(NULL, upb_value_getstrref(val)->len);
+  upb_strref_read(upb_value_getstrref(val), PyString_AsString(*elem));
   return UPB_CONTINUE;
 }
 
diff --git a/tests/test.proto b/tests/test.proto
index f3dde24..e634ed2 100644
--- a/tests/test.proto
+++ b/tests/test.proto
@@ -1,14 +1,10 @@
 
 // A series of messages with various kinds of cycles in them.
-//      +-+---+    +---+
-//      V |   |    |   |
-// A -> B-+-> C -> D<--+
-// ^          |    |
-// +----------+----+
-//
-// This tests the following cases:
-//  - B and C are together in multiple cycles
-//  - B and D are cycles to themselves.
+//      +-+---+    +---+    +---+
+//      V |   |    V   |    V   |
+// A -> B-+-> C -> D---+--->E---+
+// ^          |`---|--------^
+// +----------+----+        F
 
 message A {
   optional B b = 1;
@@ -23,11 +19,21 @@ message C {
   optional A a = 1;
   optional B b = 2;
   optional D d = 3;
+  optional E e = 4;
 }
 
 message D {
   optional A a = 1;
   optional D d = 2;
+  optional E e = 3;
+}
+
+message E {
+  optional E e = 1;
+}
+
+message F {
+  optional E e = 1;
 }
 
 // A proto with a bunch of simple primitives.
diff --git a/tests/test_cpp.cc b/tests/test_cpp.cc
index 5182217..4d70e85 100644
--- a/tests/test_cpp.cc
+++ b/tests/test_cpp.cc
@@ -15,6 +15,7 @@
 #include "upb/upb.hpp"
 #include "upb/pb/decoder.hpp"
 #include "upb/pb/glue.hpp"
+#include "upb_test.h"
 
 static void TestSymbolTable(const char *descriptor_file) {
   upb::SymbolTable *s = upb::SymbolTable::New();
@@ -23,20 +24,20 @@ static void TestSymbolTable(const char *descriptor_file) {
     std::cerr << "Couldn't load descriptor: " << status;
     exit(1);
   }
-  const upb::MessageDef *md = s->LookupMessage("A");
-  assert(md);
+  const upb::MessageDef *md = s->LookupMessage("A", &md);
+  ASSERT(md);
 
   s->Unref();
-  md->Unref();
+  md->Unref(&md);
 }
 
 static void TestByteStream() {
   upb::StringSource stringsrc;
   stringsrc.Reset("testing", 7);
   upb::ByteRegion* byteregion = stringsrc.AllBytes();
-  assert(byteregion->FetchAll() == UPB_BYTE_OK);
+  ASSERT(byteregion->FetchAll() == UPB_BYTE_OK);
   char* str = byteregion->StrDup();
-  assert(strcmp(str, "testing") == 0);
+  ASSERT(strcmp(str, "testing") == 0);
   free(str);
 }
 
diff --git a/tests/test_decoder.c b/tests/test_decoder.cc
index 14d0e2d..13403bb 100644
--- a/tests/test_decoder.c
+++ b/tests/test_decoder.cc
@@ -21,6 +21,10 @@
  *   of submsg/sequences, etc.
  */
 
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS  // For PRIuS, etc.
+#endif
+
 #include <inttypes.h>
 #include <stdarg.h>
 #include <stdint.h>
@@ -32,95 +36,133 @@
 #include "upb/upb.h"
 #include "upb_test.h"
 
+// Copied from decoder.c, since this is not a public interface.
 typedef struct {
-  char *buf;
-  size_t len;
-} buffer;
+  uint8_t native_wire_type;
+  bool is_numeric;
+} upb_decoder_typeinfo;
+
+static const upb_decoder_typeinfo upb_decoder_types[] = {
+  {UPB_WIRE_TYPE_END_GROUP,   false},  // ENDGROUP
+  {UPB_WIRE_TYPE_64BIT,       true},   // DOUBLE
+  {UPB_WIRE_TYPE_32BIT,       true},   // FLOAT
+  {UPB_WIRE_TYPE_VARINT,      true},   // INT64
+  {UPB_WIRE_TYPE_VARINT,      true},   // UINT64
+  {UPB_WIRE_TYPE_VARINT,      true},   // INT32
+  {UPB_WIRE_TYPE_64BIT,       true},   // FIXED64
+  {UPB_WIRE_TYPE_32BIT,       true},   // FIXED32
+  {UPB_WIRE_TYPE_VARINT,      true},   // BOOL
+  {UPB_WIRE_TYPE_DELIMITED,   false},  // STRING
+  {UPB_WIRE_TYPE_START_GROUP, false},  // GROUP
+  {UPB_WIRE_TYPE_DELIMITED,   false},  // MESSAGE
+  {UPB_WIRE_TYPE_DELIMITED,   false},  // BYTES
+  {UPB_WIRE_TYPE_VARINT,      true},   // UINT32
+  {UPB_WIRE_TYPE_VARINT,      true},   // ENUM
+  {UPB_WIRE_TYPE_32BIT,       true},   // SFIXED32
+  {UPB_WIRE_TYPE_64BIT,       true},   // SFIXED64
+  {UPB_WIRE_TYPE_VARINT,      true},   // SINT32
+  {UPB_WIRE_TYPE_VARINT,      true},   // SINT64
+};
+
+
+class buffer {
+ public:
+  buffer(const void *data, size_t len) : len_(0) { append(data, len); }
+  explicit buffer(const char *data) : len_(0) { append(data); }
+  explicit buffer(size_t len) : len_(len) { memset(buf_, 0, len); }
+  buffer(const buffer& buf) : len_(0) { append(buf); }
+  buffer() : len_(0) {}
+
+  void append(const void *data, size_t len) {
+    ASSERT_NOCOUNT(len + len_ < sizeof(buf_));
+    memcpy(buf_ + len_, data, len);
+    len_ += len;
+    buf_[len_] = NULL;
+  }
 
-// Mem is initialized to NULL.
-buffer *buffer_new(size_t len) {
-  buffer *buf = malloc(sizeof(*buf));
-  buf->buf = malloc(len);
-  buf->len = len;
-  memset(buf->buf, 0, buf->len);
-  return buf;
-}
+  void append(const buffer& buf) {
+    append(buf.buf_, buf.len_);
+  }
 
-buffer *buffer_new2(const void *data, size_t len) {
-  buffer *buf = buffer_new(len);
-  memcpy(buf->buf, data, len);
-  return buf;
-}
+  void append(const char *str) {
+    append(str, strlen(str));
+  }
 
-buffer *buffer_new3(const char *data) {
-  return buffer_new2(data, strlen(data));
-}
+  void vappendf(const char *fmt, va_list args) {
+    size_t avail = sizeof(buf_) - len_;
+    size_t size = vsnprintf(buf_ + len_, avail, fmt, args);
+    ASSERT_NOCOUNT(avail > size);
+    len_ += size;
+  }
 
-buffer *buffer_dup(buffer *buf) { return buffer_new2(buf->buf, buf->len); }
+  void appendf(const char *fmt, ...) {
+    va_list args;
+    va_start(args, fmt);
+    vappendf(fmt, args);
+    va_end(args);
+  }
 
-void buffer_free(buffer *buf) {
-  free(buf->buf);
-  free(buf);
-}
+  void assign(const buffer& buf) {
+    clear();
+    append(buf);
+  }
 
-void buffer_appendf(buffer *buf, const char *fmt, ...) {
-  va_list args;
-  va_start(args, fmt);
-  size_t size = buf->len;
-  buf->len += upb_vrprintf(&buf->buf, &size, buf->len, fmt, args);
-  va_end(args);
-}
+  bool eql(const buffer& other) const {
+    return len_ == other.len_ && memcmp(buf_, other.buf_, len_) == 0;
+  }
 
-void buffer_cat(buffer *buf, buffer *buf2) {
-  size_t newlen = buf->len + buf2->len;
-  buf->buf = realloc(buf->buf, newlen);
-  memcpy(buf->buf + buf->len, buf2->buf, buf2->len);
-  buf->len = newlen;
-  buffer_free(buf2);
-}
+  void clear() { len_ = 0; }
+  size_t len() const { return len_; }
+  const char *buf() const { return buf_; }
 
-bool buffer_eql(buffer *buf, buffer *buf2) {
-  return buf->len == buf2->len && memcmp(buf->buf, buf2->buf, buf->len) == 0;
-}
+ private:
+  // Has to be big enough for the largest string used in the test.
+  char buf_[32768];
+  size_t len_;
+};
 
 
 /* Routines for building arbitrary protos *************************************/
 
-buffer *cat(buffer *arg1, ...) {
-  va_list ap;
-  buffer *arg;
-  va_start(ap, arg1);
-  while ((arg = va_arg(ap, buffer*)) != NULL) {
-    buffer_cat(arg1, arg);
-  }
-  va_end(ap);
-  return arg1;
+const buffer empty;
+
+buffer cat(const buffer& a, const buffer& b,
+           const buffer& c = empty,
+           const buffer& d = empty,
+           const buffer& e = empty) {
+  buffer ret;
+  ret.append(a);
+  ret.append(b);
+  ret.append(c);
+  ret.append(d);
+  ret.append(e);
+  return ret;
 }
 
-buffer *varint(uint64_t x) {
-  buffer *buf = buffer_new(UPB_PB_VARINT_MAX_LEN + 1);
-  buf->len = upb_vencode64(x, buf->buf);
-  return buf;
+buffer varint(uint64_t x) {
+  char buf[UPB_PB_VARINT_MAX_LEN];
+  size_t len = upb_vencode64(x, buf);
+  return buffer(buf, len);
 }
 
 // TODO: proper byte-swapping for big-endian machines.
-buffer *fixed32(void *data) { return buffer_new2(data, 4); }
-buffer *fixed64(void *data) { return buffer_new2(data, 8); }
-
-buffer *delim(buffer *buf) { return cat( varint(buf->len), buf, NULL ); }
-buffer *uint32(uint32_t u32) { return fixed32(&u32); }
-buffer *uint64(uint64_t u64) { return fixed64(&u64); }
-buffer *flt(float f) { return fixed32(&f); }
-buffer *dbl(double d) { return fixed64(&d); }
-buffer *zz32(int32_t x) { return varint(upb_zzenc_32(x)); }
-buffer *zz64(int64_t x) { return varint(upb_zzenc_64(x)); }
-
-buffer *tag(uint32_t fieldnum, char wire_type) {
+buffer fixed32(void *data) { return buffer(data, 4); }
+buffer fixed64(void *data) { return buffer(data, 8); }
+
+buffer delim(const buffer& buf) { return cat(varint(buf.len()), buf); }
+buffer uint32(uint32_t u32) { return fixed32(&u32); }
+buffer uint64(uint64_t u64) { return fixed64(&u64); }
+buffer flt(float f) { return fixed32(&f); }
+buffer dbl(double d) { return fixed64(&d); }
+buffer zz32(int32_t x) { return varint(upb_zzenc_32(x)); }
+buffer zz64(int64_t x) { return varint(upb_zzenc_64(x)); }
+
+buffer tag(uint32_t fieldnum, char wire_type) {
   return varint((fieldnum << 3) | wire_type);
 }
 
-buffer *submsg(uint32_t fn, buffer *buf) {
-  return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf), NULL );
+buffer submsg(uint32_t fn, const buffer& buf) {
+  return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf) );
 }
 
 
@@ -128,11 +170,26 @@ buffer *submsg(uint32_t fn, buffer *buf) {
 
 // The handlers simply append to a string indicating what handlers were called.
 // This string is similar to protobuf text format but fields are referred to by
-// number instead of name and sequences are explicitly delimited.
+// number instead of name and sequences are explicitly delimited.  We indent
+// using the closure depth to test that the stack of closures is properly
+// handled.
+
+int closures[UPB_MAX_NESTING];
+buffer output;
+
+void indentbuf(buffer *buf, int depth) {
+  for (int i = 0; i < depth; i++)
+    buf->append("  ", 2);
+}
+
+void indent(void *depth) {
+  indentbuf(&output, *(int*)depth);
+}
 
 #define VALUE_HANDLER(member, fmt) \
   upb_flow_t value_ ## member(void *closure, upb_value fval, upb_value val) { \
-    buffer_appendf(closure, "%" PRIu32 ":%" fmt "; ",                         \
+    indent(closure);                                                          \
+    output.appendf("%" PRIu32 ":%" fmt "\n",                                  \
                    upb_value_getuint32(fval), upb_value_get ## member(val));  \
     return UPB_CONTINUE;                                                      \
   }
@@ -145,7 +202,8 @@ VALUE_HANDLER(float, "g")
 VALUE_HANDLER(double, "g")
 
 upb_flow_t value_bool(void *closure, upb_value fval, upb_value val) {
-  buffer_appendf(closure, "%" PRIu32 ":%s; ",
+  indent(closure);
+  output.appendf("%" PRIu32 ":%s\n",
                  upb_value_getuint32(fval),
                  upb_value_getbool(val) ? "true" : "false");
   return UPB_CONTINUE;
@@ -153,34 +211,49 @@ upb_flow_t value_bool(void *closure, upb_value fval, upb_value val) {
 
 upb_flow_t value_string(void *closure, upb_value fval, upb_value val) {
   // Note: won't work with strings that contain NULL.
+  indent(closure);
   char *str = upb_byteregion_strdup(upb_value_getbyteregion(val));
-  buffer_appendf(closure, "%" PRIu32 ":%s; ", upb_value_getuint32(fval), str);
+  output.appendf("%" PRIu32 ":%s\n", upb_value_getuint32(fval), str);
   free(str);
   return UPB_CONTINUE;
 }
 
 upb_sflow_t startsubmsg(void *closure, upb_value fval) {
-  buffer_appendf(closure, "%" PRIu32 ":{ ", upb_value_getuint32(fval));
-  return UPB_CONTINUE_WITH(closure);
+  indent(closure);
+  output.appendf("%" PRIu32 ":{\n", upb_value_getuint32(fval));
+  return UPB_CONTINUE_WITH(((int*)closure) + 1);
 }
 
 upb_flow_t endsubmsg(void *closure, upb_value fval) {
-  (void)fval;
-  buffer_appendf(closure, "} ");
+  indent(closure);
+  output.append("}\n");
   return UPB_CONTINUE;
 }
 
 upb_sflow_t startseq(void *closure, upb_value fval) {
-  buffer_appendf(closure, "%" PRIu32 ":[ ", upb_value_getuint32(fval));
-  return UPB_CONTINUE_WITH(closure);
+  indent(closure);
+  output.appendf("%" PRIu32 ":[\n", upb_value_getuint32(fval));
+  return UPB_CONTINUE_WITH(((int*)closure) + 1);
 }
 
 upb_flow_t endseq(void *closure, upb_value fval) {
-  (void)fval;
-  buffer_appendf(closure, "] ");
+  indent(closure);
+  output.append("]\n");
   return UPB_CONTINUE;
 }
 
+upb_flow_t startmsg(void *closure) {
+  indent(closure);
+  output.append("<\n");
+  return UPB_CONTINUE;
+}
+
+void endmsg(void *closure, upb_status *status) {
+  (void)status;
+  indent(closure);
+  output.append(">\n");
+}
+
 void doreg(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type, bool repeated,
            upb_value_handler *handler) {
   upb_fhandlers *f = upb_mhandlers_newfhandlers(m, num, type, repeated);
@@ -221,6 +294,9 @@ void reg_subm(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type,
 }
 
 void reghandlers(upb_mhandlers *m) {
+  upb_mhandlers_setstartmsg(m, &startmsg);
+  upb_mhandlers_setendmsg(m, &endmsg);
+
   // Register handlers for each type.
   reg(m, UPB_TYPE(DOUBLE),   &value_double);
   reg(m, UPB_TYPE(FLOAT),    &value_float);
@@ -267,7 +343,7 @@ size_t upb_seamsrc_avail(const upb_seamsrc *src, size_t ofs) {
 }
 
 upb_bytesuccess_t upb_seamsrc_fetch(void *_src, uint64_t ofs, size_t *read) {
-  upb_seamsrc *src = _src;
+  upb_seamsrc *src = (upb_seamsrc*)_src;
   assert(ofs < src->len);
   if (ofs == src->len) {
     upb_status_seteof(&src->bytesrc.status);
@@ -279,7 +355,7 @@ upb_bytesuccess_t upb_seamsrc_fetch(void *_src, uint64_t ofs, size_t *read) {
 
 void upb_seamsrc_copy(const void *_src, uint64_t ofs,
                       size_t len, char *dst) {
-  const upb_seamsrc *src = _src;
+  const upb_seamsrc *src = (const upb_seamsrc*)_src;
   assert(ofs + len <= src->len);
   memcpy(dst, src->str + ofs, len);
 }
@@ -290,7 +366,7 @@ void upb_seamsrc_discard(void *src, uint64_t ofs) {
 }
 
 const char *upb_seamsrc_getptr(const void *_s, uint64_t ofs, size_t *len) {
-  const upb_seamsrc *src = _s;
+  const upb_seamsrc *src = (const upb_seamsrc*)_s;
   *len = upb_seamsrc_avail(src, ofs);
   return src->str + ofs;
 }
@@ -314,7 +390,7 @@ void upb_seamsrc_init(upb_seamsrc *s, const char *str, size_t len) {
 }
 
 void upb_seamsrc_resetseams(upb_seamsrc *s, size_t seam1, size_t seam2) {
-  ASSERT(seam1 <= seam2);
+  assert(seam1 <= seam2);
   s->seam1 = seam1;
   s->seam2 = seam2;
   s->byteregion.discard = 0;
@@ -337,83 +413,68 @@ upb_byteregion *upb_seamsrc_allbytes(upb_seamsrc *s) {
 /* Running of test cases ******************************************************/
 
 upb_decoderplan *plan;
-
-void run_decoder(buffer *proto, buffer *expected_output) {
+#define LINE(x) x "\n"
+void run_decoder(const buffer& proto, const buffer* expected_output) {
   upb_seamsrc src;
-  upb_seamsrc_init(&src, proto->buf, proto->len);
+  upb_seamsrc_init(&src, proto.buf(), proto.len());
   upb_decoder d;
   upb_decoder_init(&d);
   upb_decoder_resetplan(&d, plan, 0);
-  for (size_t i = 0; i < proto->len; i++) {
-    for (size_t j = i; j < proto->len; j++) {
+  for (size_t i = 0; i < proto.len(); i++) {
+    for (size_t j = i; j < UPB_MIN(proto.len(), i + 5); j++) {
       upb_seamsrc_resetseams(&src, i, j);
       upb_byteregion *input = upb_seamsrc_allbytes(&src);
-      buffer *output = buffer_new(0);
-      upb_decoder_resetinput(&d, input, output);
+      output.clear();
+      upb_decoder_resetinput(&d, input, &closures[0]);
       upb_success_t success = UPB_SUSPENDED;
       while (success == UPB_SUSPENDED)
         success = upb_decoder_decode(&d);
       ASSERT(upb_ok(upb_decoder_status(&d)) == (success == UPB_OK));
       if (expected_output) {
-        ASSERT(success == UPB_OK);
+        ASSERT_STATUS(success == UPB_OK, upb_decoder_status(&d));
         // The input should be fully consumed.
         ASSERT(upb_byteregion_fetchofs(input) == upb_byteregion_endofs(input));
         ASSERT(upb_byteregion_discardofs(input) ==
                upb_byteregion_endofs(input));
-        if (!buffer_eql(output, expected_output)) {
+        if (!output.eql(*expected_output)) {
           fprintf(stderr, "Text mismatch: '%s' vs '%s'\n",
-                  output->buf, expected_output->buf);
+                  output.buf(), expected_output->buf());
         }
-        ASSERT(strcmp(output->buf, expected_output->buf) == 0);
+        ASSERT(output.eql(*expected_output));
       } else {
         ASSERT(success == UPB_ERROR);
       }
-      buffer_free(output);
     }
   }
-  upb_seamsrc_uninit(&src);
   upb_decoder_uninit(&d);
-  buffer_free(proto);
-}
-
-void assert_successful_parse_at_eof(buffer *proto, const char *expected_fmt,
-                                    va_list args) {
-  buffer *expected_text = buffer_new(0);
-  size_t size = expected_text->len;
-  expected_text->len += upb_vrprintf(&expected_text->buf, &size,
-                                     expected_text->len, expected_fmt, args);
-  run_decoder(proto, expected_text);
-  buffer_free(expected_text);
+  upb_seamsrc_uninit(&src);
 }
 
-void assert_does_not_parse_at_eof(buffer *proto) {
-  run_decoder(proto, NULL);
-}
+const static buffer thirty_byte_nop = buffer(cat(
+    tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(buffer(30)) ));
 
-void assert_successful_parse(buffer *proto, const char *expected_fmt, ...) {
-  // The JIT is only used for data >=20 bytes from end-of-buffer, so
-  // repeat once with no-op padding data at the end of buffer.
-  va_list args, args2;
+void assert_successful_parse(const buffer& proto,
+                             const char *expected_fmt, ...) {
+  buffer expected_text;
+  va_list args;
   va_start(args, expected_fmt);
-  va_copy(args2, args);
-  assert_successful_parse_at_eof(buffer_dup(proto), expected_fmt, args);
-  assert_successful_parse_at_eof(
-      cat( proto,
-           tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(buffer_new(30)),
-           NULL ),
-      expected_fmt, args2);
+  expected_text.vappendf(expected_fmt, args);
   va_end(args);
-  va_end(args2);
+  // The JIT is only used for data >=20 bytes from end-of-buffer, so
+  // repeat once with no-op padding data at the end of buffer.
+  run_decoder(proto, &expected_text);
+  run_decoder(cat( proto, thirty_byte_nop ), &expected_text);
 }
 
-void assert_does_not_parse(buffer *proto) {
+void assert_does_not_parse_at_eof(const buffer& proto) {
+  run_decoder(proto, NULL);
+}
+
+void assert_does_not_parse(const buffer& proto) {
   // The JIT is only used for data >=20 bytes from end-of-buffer, so
   // repeat once with no-op padding data at the end of buffer.
-  assert_does_not_parse_at_eof(buffer_dup(proto));
-  assert_does_not_parse_at_eof(
-      cat( proto,
-           tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim( buffer_new(30)),
-           NULL ));
+  assert_does_not_parse_at_eof(proto);
+  assert_does_not_parse_at_eof(cat( proto, thirty_byte_nop ));
 }
 
 
@@ -421,19 +482,19 @@ void assert_does_not_parse(buffer *proto) {
 
 void test_premature_eof_for_type(upb_fieldtype_t type) {
   // Incomplete values for each wire type.
-  static const char *incompletes[] = {
-    "\x80",    // UPB_WIRE_TYPE_VARINT
-    "abcdefg", // UPB_WIRE_TYPE_64BIT
-    "\x80",    // UPB_WIRE_TYPE_DELIMITED (partial length)
-    NULL,      // UPB_WIRE_TYPE_START_GROUP (no value required)
-    NULL,      // UPB_WIRE_TYPE_END_GROUP (no value required)
-    "abc"      // UPB_WIRE_TYPE_32BIT
+  static const buffer incompletes[6] = {
+    buffer("\x80"),     // UPB_WIRE_TYPE_VARINT
+    buffer("abcdefg"),  // UPB_WIRE_TYPE_64BIT
+    buffer("\x80"),     // UPB_WIRE_TYPE_DELIMITED (partial length)
+    buffer(),           // UPB_WIRE_TYPE_START_GROUP (no value required)
+    buffer(),           // UPB_WIRE_TYPE_END_GROUP (no value required)
+    buffer("abc")       // UPB_WIRE_TYPE_32BIT
   };
 
   uint32_t fieldnum = type;
   uint32_t rep_fieldnum = rep_fn(type);
-  int wire_type = upb_types[type].native_wire_type;
-  const char *incomplete = incompletes[wire_type];
+  int wire_type = upb_decoder_types[type].native_wire_type;
+  const buffer& incomplete = incompletes[wire_type];
 
   // EOF before a known non-repeated value.
   assert_does_not_parse_at_eof(tag(fieldnum, wire_type));
@@ -446,108 +507,128 @@ void test_premature_eof_for_type(upb_fieldtype_t type) {
 
   // EOF inside a known non-repeated value.
   assert_does_not_parse_at_eof(
-      cat( tag(fieldnum, wire_type), buffer_new3(incomplete), NULL ));
+      cat( tag(fieldnum, wire_type), incomplete ));
 
   // EOF inside a known repeated value.
   assert_does_not_parse_at_eof(
-      cat( tag(rep_fieldnum, wire_type), buffer_new3(incomplete), NULL ));
+      cat( tag(rep_fieldnum, wire_type), incomplete ));
 
   // EOF inside an unknown value.
   assert_does_not_parse_at_eof(
-      cat( tag(UNKNOWN_FIELD, wire_type), buffer_new3(incomplete), NULL ));
+      cat( tag(UNKNOWN_FIELD, wire_type), incomplete ));
 
   if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
     // EOF in the middle of delimited data for known non-repeated value.
     assert_does_not_parse_at_eof(
-        cat( tag(fieldnum, wire_type), varint(1), NULL ));
+        cat( tag(fieldnum, wire_type), varint(1) ));
 
     // EOF in the middle of delimited data for known repeated value.
     assert_does_not_parse_at_eof(
-        cat( tag(rep_fieldnum, wire_type), varint(1), NULL ));
+        cat( tag(rep_fieldnum, wire_type), varint(1) ));
 
     // EOF in the middle of delimited data for unknown value.
     assert_does_not_parse_at_eof(
-        cat( tag(UNKNOWN_FIELD, wire_type), varint(1), NULL ));
+        cat( tag(UNKNOWN_FIELD, wire_type), varint(1) ));
 
     if (type == UPB_TYPE(MESSAGE)) {
       // Submessage ends in the middle of a value.
-      buffer *incomplete_submsg =
+      buffer incomplete_submsg =
           cat ( tag(UPB_TYPE(INT32), UPB_WIRE_TYPE_VARINT),
-                buffer_new3(incompletes[UPB_WIRE_TYPE_VARINT]), NULL );
+                incompletes[UPB_WIRE_TYPE_VARINT] );
       assert_does_not_parse(
           cat( tag(fieldnum, UPB_WIRE_TYPE_DELIMITED),
-               varint(incomplete_submsg->len),
-               incomplete_submsg, NULL ));
+               varint(incomplete_submsg.len()),
+               incomplete_submsg ));
     }
   } else {
     // Packed region ends in the middle of a value.
     assert_does_not_parse(
         cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
-             varint(strlen(incomplete)),
-             buffer_new3(incomplete), NULL ));
+             varint(incomplete.len()),
+             incomplete ));
 
     // EOF in the middle of packed region.
     assert_does_not_parse_at_eof(
-        cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), varint(1), NULL ));
+        cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), varint(1) ));
   }
 }
 
 // "33" and "66" are just two random values that all numeric types can
 // represent.
 void test_valid_data_for_type(upb_fieldtype_t type,
-                              buffer *enc33, buffer *enc66) {
+                              const buffer& enc33, const buffer& enc66) {
   uint32_t fieldnum = type;
   uint32_t rep_fieldnum = rep_fn(type);
-  int wire_type = upb_types[type].native_wire_type;
+  int wire_type = upb_decoder_types[type].native_wire_type;
 
   // Non-repeated
   assert_successful_parse(
-      cat( tag(fieldnum, wire_type), buffer_dup(enc33),
-           tag(fieldnum, wire_type), buffer_dup(enc66), NULL ),
-      "%u:33; %u:66; ", fieldnum, fieldnum);
+      cat( tag(fieldnum, wire_type), enc33,
+           tag(fieldnum, wire_type), enc66 ),
+      LINE("<")
+      LINE("%u:33")
+      LINE("%u:66")
+      LINE(">"), fieldnum, fieldnum);
 
   // Non-packed repeated.
   assert_successful_parse(
-      cat( tag(rep_fieldnum, wire_type), buffer_dup(enc33),
-           tag(rep_fieldnum, wire_type), buffer_dup(enc66), NULL ),
-      "%u:[ %u:33; %u:66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
+      cat( tag(rep_fieldnum, wire_type), enc33,
+           tag(rep_fieldnum, wire_type), enc66 ),
+      LINE("<")
+      LINE("%u:[")
+      LINE("  %u:33")
+      LINE("  %u:66")
+      LINE("]")
+      LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
 
   // Packed repeated.
   assert_successful_parse(
       cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
-           delim(cat( buffer_dup(enc33), buffer_dup(enc66), NULL )), NULL ),
-      "%u:[ %u:33; %u:66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
-
-  buffer_free(enc33);
-  buffer_free(enc66);
+           delim(cat( enc33, enc66 )) ),
+      LINE("<")
+      LINE("%u:[")
+      LINE("  %u:33")
+      LINE("  %u:66")
+      LINE("]")
+      LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
 }
 
 void test_valid_data_for_signed_type(upb_fieldtype_t type,
-                                     buffer *enc33, buffer *enc66) {
+                                     const buffer& enc33, const buffer& enc66) {
   uint32_t fieldnum = type;
   uint32_t rep_fieldnum = rep_fn(type);
-  int wire_type = upb_types[type].native_wire_type;
+  int wire_type = upb_decoder_types[type].native_wire_type;
 
   // Non-repeated
   assert_successful_parse(
-      cat( tag(fieldnum, wire_type), buffer_dup(enc33),
-           tag(fieldnum, wire_type), buffer_dup(enc66), NULL ),
-      "%u:33; %u:-66; ", fieldnum, fieldnum);
+      cat( tag(fieldnum, wire_type), enc33,
+           tag(fieldnum, wire_type), enc66 ),
+      LINE("<")
+      LINE("%u:33")
+      LINE("%u:-66")
+      LINE(">"), fieldnum, fieldnum);
 
   // Non-packed repeated.
   assert_successful_parse(
-      cat( tag(rep_fieldnum, wire_type), buffer_dup(enc33),
-           tag(rep_fieldnum, wire_type), buffer_dup(enc66), NULL ),
-      "%u:[ %u:33; %u:-66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
+      cat( tag(rep_fieldnum, wire_type), enc33,
+           tag(rep_fieldnum, wire_type), enc66 ),
+      LINE("<")
+      LINE("%u:[")
+      LINE("  %u:33")
+      LINE("  %u:-66")
+      LINE("]")
+      LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
 
   // Packed repeated.
   assert_successful_parse(
       cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
-           delim(cat( buffer_dup(enc33), buffer_dup(enc66), NULL )), NULL ),
-      "%u:[ %u:33; %u:-66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
-
-  buffer_free(enc33);
-  buffer_free(enc66);
+           delim(cat( enc33, enc66 )) ),
+      LINE("<")
+      LINE("%u:[")
+      LINE("  %u:33")
+      LINE("  %u:-66")
+      LINE("]")
+      LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
 }
 
 // Test that invalid protobufs are properly detected (without crashing) and
@@ -571,7 +652,7 @@ void test_invalid() {
   test_premature_eof_for_type(UPB_TYPE(SINT64));
 
   // EOF inside a tag's varint.
-  assert_does_not_parse_at_eof( buffer_new3("\x80") );
+  assert_does_not_parse_at_eof( buffer("\x80") );
 
   // EOF inside a known group.
   assert_does_not_parse_at_eof( tag(4, UPB_WIRE_TYPE_START_GROUP) );
@@ -584,33 +665,19 @@ void test_invalid() {
 
   // Field number is 0.
   assert_does_not_parse(
-      cat( tag(0, UPB_WIRE_TYPE_DELIMITED), varint(0), NULL ));
+      cat( tag(0, UPB_WIRE_TYPE_DELIMITED), varint(0) ));
 
   // Field number is too large.
   assert_does_not_parse(
       cat( tag(UPB_MAX_FIELDNUMBER + 1, UPB_WIRE_TYPE_DELIMITED),
-           varint(0), NULL ));
+           varint(0) ));
 
   // Test exceeding the resource limit of stack depth.
-  buffer *buf = buffer_new3("");
+  buffer buf;
   for (int i = 0; i < UPB_MAX_NESTING; i++) {
-    buf = submsg(UPB_TYPE(MESSAGE), buf);
+    buf.assign(submsg(UPB_TYPE(MESSAGE), buf));
   }
   assert_does_not_parse(buf);
-
-  // Staying within the stack limit should work properly.
-  buf = buffer_new3("");
-  buffer *textbuf = buffer_new3("");
-  int total = UPB_MAX_NESTING - 1;
-  for (int i = 0; i < total; i++) {
-    buf = submsg(UPB_TYPE(MESSAGE), buf);
-    buffer_appendf(textbuf, "%u:{ ", UPB_TYPE(MESSAGE));
-  }
-  for (int i = 0; i < total; i++) {
-    buffer_appendf(textbuf, "} ");
-  }
-  assert_successful_parse(buf, "%s", textbuf->buf);
-  buffer_free(textbuf);
 }
 
 void test_valid() {
@@ -629,16 +696,80 @@ void test_valid() {
   test_valid_data_for_type(UPB_TYPE(FIXED64), uint64(33), uint64(66));
   test_valid_data_for_type(UPB_TYPE(FIXED32), uint32(33), uint32(66));
 
+  // Test implicit startseq/endseq.
+  uint32_t repfl_fn = rep_fn(UPB_TYPE(FLOAT));
+  uint32_t repdb_fn = rep_fn(UPB_TYPE(DOUBLE));
+  assert_successful_parse(
+      cat( tag(repfl_fn, UPB_WIRE_TYPE_32BIT), flt(33),
+           tag(repdb_fn, UPB_WIRE_TYPE_64BIT), dbl(66) ),
+      LINE("<")
+      LINE("%u:[")
+      LINE("  %u:33")
+      LINE("]")
+      LINE("%u:[")
+      LINE("  %u:66")
+      LINE("]")
+      LINE(">"), repfl_fn, repfl_fn, repdb_fn, repdb_fn);
+
   // Submessage tests.
   uint32_t msg_fn = UPB_TYPE(MESSAGE);
   assert_successful_parse(
-      submsg(msg_fn, submsg(msg_fn, submsg(msg_fn, buffer_new3("")))),
-      "%u:{ %u:{ %u:{ } } } ", msg_fn, msg_fn, msg_fn);
+      submsg(msg_fn, submsg(msg_fn, submsg(msg_fn, buffer()))),
+      LINE("<")
+      LINE("%u:{")
+      LINE("  <")
+      LINE("  %u:{")
+      LINE("    <")
+      LINE("    %u:{")
+      LINE("      <")
+      LINE("      >")
+      LINE("    }")
+      LINE("    >")
+      LINE("  }")
+      LINE("  >")
+      LINE("}")
+      LINE(">"), msg_fn, msg_fn, msg_fn);
 
   uint32_t repm_fn = rep_fn(UPB_TYPE(MESSAGE));
   assert_successful_parse(
-      submsg(repm_fn, submsg(repm_fn, buffer_new3(""))),
-      "%u:[ %u:{ %u:[ %u:{ } ] } ] ", repm_fn, repm_fn, repm_fn, repm_fn);
+      submsg(repm_fn, submsg(repm_fn, buffer())),
+      LINE("<")
+      LINE("%u:[")
+      LINE("  %u:{")
+      LINE("    <")
+      LINE("    %u:[")
+      LINE("      %u:{")
+      LINE("        <")
+      LINE("        >")
+      LINE("      }")
+      LINE("    ]")
+      LINE("    >")
+      LINE("  }")
+      LINE("]")
+      LINE(">"), repm_fn, repm_fn, repm_fn, repm_fn);
+
+  // Staying within the stack limit should work properly.
+  buffer buf;
+  buffer textbuf;
+  int total = UPB_MAX_NESTING - 1;
+  for (int i = 0; i < total; i++) {
+    buf.assign(submsg(UPB_TYPE(MESSAGE), buf));
+    indentbuf(&textbuf, i);
+    textbuf.append("<\n");
+    indentbuf(&textbuf, i);
+    textbuf.appendf("%u:{\n", UPB_TYPE(MESSAGE));
+  }
+  indentbuf(&textbuf, total);
+  textbuf.append("<\n");
+  indentbuf(&textbuf, total);
+  textbuf.append(">\n");
+  for (int i = 0; i < total; i++) {
+    indentbuf(&textbuf, total - i - 1);
+    textbuf.append("}\n");
+    indentbuf(&textbuf, total - i - 1);
+    textbuf.append(">\n");
+  }
+  assert_successful_parse(buf, "%s", textbuf.buf());
 }
 
 void run_tests() {
@@ -647,10 +778,17 @@ void run_tests() {
 }
 
 int main() {
+  for (int i = 0; i < UPB_MAX_NESTING; i++) {
+    closures[i] = i;
+  }
   // Construct decoder plan.
   upb_handlers *h = upb_handlers_new();
   reghandlers(upb_handlers_newmhandlers(h));
 
+  // Create an empty handlers to make sure that the decoder can handle empty
+  // messages.
+  upb_handlers_newmhandlers(h);
+
   // Test without JIT.
   plan = upb_decoderplan_new(h, false);
   run_tests();
@@ -658,6 +796,11 @@ int main() {
 
   // Test JIT.
   plan = upb_decoderplan_new(h, true);
+#ifdef UPB_USE_JIT_X64
+  ASSERT(upb_decoderplan_hasjitcode(plan));
+#else
+  ASSERT(!upb_decoderplan_hasjitcode(plan));
+#endif
   run_tests();
   upb_decoderplan_unref(plan);
 
diff --git a/tests/test_def.c b/tests/test_def.c
index 3ca3064..698532e 100644
--- a/tests/test_def.c
+++ b/tests/test_def.c
@@ -1,19 +1,174 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ *
+ * Test of defs and symtab.  There should be far more tests of edge conditions
+ * (like attempts to link defs that don't have required properties set).
+ */
 
-#undef NDEBUG  /* ensure tests always assert. */
 #include "upb/def.h"
+#include "upb/pb/glue.h"
+#include "upb_test.h"
 #include <stdlib.h>
+#include <string.h>
 
-int main() {
-  upb_symtab *s = upb_symtab_new();
+const char *descriptor_file;
 
-  // Will be empty atm since we haven't added anything to the symtab.
+static void test_empty_symtab() {
+  upb_symtab *s = upb_symtab_new();
   int count;
-  const upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY);
-  for (int i = 0; i < count; i++) {
-    upb_def_unref(defs[i]);
-  }
+  const upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY, NULL);
+  ASSERT(count == 0);
   free(defs);
+  upb_symtab_unref(s);
+}
 
+static upb_symtab *load_test_proto() {
+  upb_symtab *s = upb_symtab_new();
+  ASSERT(s);
+  upb_status status = UPB_STATUS_INIT;
+  if (!upb_load_descriptor_file_into_symtab(s, descriptor_file, &status)) {
+    fprintf(stderr, "Error loading descriptor file: %s\n",
+            upb_status_getstr(&status));
+    exit(1);
+  }
+  upb_status_uninit(&status);
+  return s;
+}
+
+static void test_cycles() {
+  upb_symtab *s = load_test_proto();
+
+  // Test cycle detection by making a cyclic def's main refcount go to zero
+  // and then be incremented to one again.
+  const upb_def *def = upb_symtab_lookup(s, "A", &def);
+  ASSERT(def);
+  ASSERT(upb_def_isfinalized(def));
   upb_symtab_unref(s);
+
+  // Message A has only one subfield: "optional B b = 1".
+  const upb_msgdef *m = upb_downcast_msgdef_const(def);
+  upb_fielddef *f = upb_msgdef_itof(m, 1);
+  ASSERT(f);
+  ASSERT(upb_hassubdef(f));
+  const upb_def *def2 = upb_fielddef_subdef(f);
+  ASSERT(upb_downcast_msgdef_const(def2));
+  ASSERT(strcmp(upb_def_fullname(def2), "B") == 0);
+
+  upb_def_ref(def2, &def2);
+  upb_def_unref(def, &def);
+  upb_def_unref(def2, &def2);
+}
+
+static void test_fielddef_unref() {
+  upb_symtab *s = load_test_proto();
+  const upb_msgdef *md = upb_symtab_lookupmsg(s, "A", &md);
+  upb_fielddef *f = upb_msgdef_itof(md, 1);
+  upb_fielddef_ref(f, &f);
+
+  // Unref symtab and msgdef; now fielddef is the only thing keeping the msgdef
+  // alive.
+  upb_symtab_unref(s);
+  upb_msgdef_unref(md, &md);
+  // Check that md is still alive.
+  ASSERT(strcmp(upb_def_fullname(UPB_UPCAST(md)), "A") == 0);
+
+  // Check that unref of fielddef frees the whole remaining graph.
+  upb_fielddef_unref(f, &f);
+}
+
+static void test_fielddef_accessors() {
+  upb_fielddef *f1 = upb_fielddef_new(&f1);
+  upb_fielddef *f2 = upb_fielddef_new(&f2);
+
+  ASSERT(upb_fielddef_ismutable(f1));
+  upb_fielddef_setname(f1, "f1");
+  upb_fielddef_setnumber(f1, 1937);
+  upb_fielddef_settype(f1, UPB_TYPE(FIXED64));
+  upb_fielddef_setlabel(f1, UPB_LABEL(REPEATED));
+  ASSERT(upb_fielddef_number(f1) == 1937);
+
+  ASSERT(upb_fielddef_ismutable(f2));
+  upb_fielddef_setname(f2, "f2");
+  upb_fielddef_setnumber(f2, 1572);
+  upb_fielddef_settype(f2, UPB_TYPE(BYTES));
+  upb_fielddef_setlabel(f2, UPB_LABEL(REPEATED));
+  ASSERT(upb_fielddef_number(f2) == 1572);
+
+  upb_fielddef_unref(f1, &f1);
+  upb_fielddef_unref(f2, &f2);
+}
+
+static upb_fielddef *newfield(
+    const char *name, int32_t num, uint8_t type, uint8_t label,
+    const char *type_name, void *owner) {
+  upb_fielddef *f = upb_fielddef_new(owner);
+  upb_fielddef_setname(f, name);
+  upb_fielddef_setnumber(f, num);
+  upb_fielddef_settype(f, type);
+  upb_fielddef_setlabel(f, label);
+  upb_fielddef_setsubtypename(f, type_name);
+  return f;
+}
+
+static upb_msgdef *upb_msgdef_newnamed(const char *name, void *owner) {
+  upb_msgdef *m = upb_msgdef_new(owner);
+  upb_def_setfullname(UPB_UPCAST(m), name);
+  return m;
+}
+
+INLINE upb_enumdef *upb_enumdef_newnamed(const char *name, void *owner) {
+  upb_enumdef *e = upb_enumdef_new(owner);
+  upb_def_setfullname(UPB_UPCAST(e), name);
+  return e;
+}
+
+void test_replacement() {
+  upb_symtab *s = upb_symtab_new();
+
+  upb_msgdef *m = upb_msgdef_newnamed("MyMessage", &s);
+  upb_msgdef_addfield(m, newfield(
+      "field1", 1, UPB_TYPE(ENUM), UPB_LABEL(OPTIONAL), ".MyEnum", &s), &s);
+  upb_msgdef *m2 = upb_msgdef_newnamed("MyMessage2", &s);
+  upb_enumdef *e = upb_enumdef_newnamed("MyEnum", &s);
+
+  upb_def *newdefs[] = {UPB_UPCAST(m), UPB_UPCAST(m2), UPB_UPCAST(e)};
+  upb_status status = UPB_STATUS_INIT;
+  ASSERT_STATUS(upb_symtab_add(s, newdefs, 3, &s, &status), &status);
+
+  // Try adding a new definition of MyEnum, MyMessage should get replaced with
+  // a new version.
+  upb_enumdef *e2 = upb_enumdef_new(&s);
+  upb_def_setfullname(UPB_UPCAST(e2), "MyEnum");
+  upb_def *newdefs2[] = {UPB_UPCAST(e2)};
+  ASSERT_STATUS(upb_symtab_add(s, newdefs2, 1, &s, &status), &status);
+
+  const upb_msgdef *m3 = upb_symtab_lookupmsg(s, "MyMessage", &m3);
+  ASSERT(m3);
+  // Must be different because it points to MyEnum which was replaced.
+  ASSERT(m3 != m);
+  upb_msgdef_unref(m3, &m3);
+
+  m3 = upb_symtab_lookupmsg(s, "MyMessage2", &m3);
+  // Should be the same because it was not replaced, nor were any defs that
+  // are reachable from it.
+  ASSERT(m3 == m2);
+  upb_msgdef_unref(m3, &m3);
+
+  upb_symtab_unref(s);
+}
+
+int main(int argc, char *argv[]) {
+  if (argc < 2) {
+    fprintf(stderr, "Usage: test_def <test.proto.pb>\n");
+    return 1;
+  }
+  descriptor_file = argv[1];
+  test_empty_symtab();
+  test_cycles();
+  test_fielddef_accessors();
+  test_fielddef_unref();
+  test_replacement();
   return 0;
 }
diff --git a/tests/test_table.cc b/tests/test_table.cc
index 47e083f..2538e35 100644
--- a/tests/test_table.cc
+++ b/tests/test_table.cc
@@ -1,8 +1,11 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2012 Google Inc.  See LICENSE for details.
+ *
+ * Tests for upb_table.
+ */
 
-#undef NDEBUG  /* ensure tests always assert. */
-#include "upb/table.h"
-#include "test_util.h"
-#include <assert.h>
 #include <string.h>
 #include <sys/resource.h>
 #include <ext/hash_map>
@@ -11,55 +14,45 @@
 #include <set>
 #include <string>
 #include <vector>
+#include "tests/test_util.h"
+#include "tests/upb_test.h"
+#include "upb/table.h"
 
 bool benchmark = false;
 #define CPU_TIME_PER_TEST 0.5
 
 using std::vector;
 
-typedef struct {
-  uint32_t value;  /* key*2 */
-} inttable_entry;
-
-typedef struct {
-  int32_t value;  /* ASCII Value of first letter */
-} strtable_entry;
-
-double get_usertime()
-{
+double get_usertime() {
   struct rusage usage;
   getrusage(RUSAGE_SELF, &usage);
   return usage.ru_utime.tv_sec + (usage.ru_utime.tv_usec/1000000.0);
 }
 
 /* num_entries must be a power of 2. */
-void test_strtable(const vector<std::string>& keys, uint32_t num_to_insert)
-{
+void test_strtable(const vector<std::string>& keys, uint32_t num_to_insert) {
   /* Initialize structures. */
   upb_strtable table;
   std::map<std::string, int32_t> m;
-  upb_strtable_init(&table, 0, sizeof(strtable_entry));
+  upb_strtable_init(&table);
   std::set<std::string> all;
   for(size_t i = 0; i < num_to_insert; i++) {
     const std::string& key = keys[i];
     all.insert(key);
-    strtable_entry e;
-    e.value = key[0];
-    upb_strtable_insert(&table, key.c_str(), &e);
+    upb_strtable_insert(&table, key.c_str(), upb_value_int32(key[0]));
     m[key] = key[0];
   }
 
   /* Test correctness. */
   for(uint32_t i = 0; i < keys.size(); i++) {
     const std::string& key = keys[i];
-    strtable_entry *e =
-        (strtable_entry*)upb_strtable_lookup(&table, key.c_str());
+    const upb_value *v = upb_strtable_lookup(&table, key.c_str());
     if(m.find(key) != m.end()) { /* Assume map implementation is correct. */
-      assert(e);
-      assert(e->value == key[0]);
-      assert(m[key] == key[0]);
+      ASSERT(v);
+      ASSERT(upb_value_getint32(*v) == key[0]);
+      ASSERT(m[key] == key[0]);
     } else {
-      assert(e == NULL);
+      ASSERT(v == NULL);
     }
   }
 
@@ -69,66 +62,83 @@ void test_strtable(const vector<std::string>& keys, uint32_t num_to_insert)
     const char *key = upb_strtable_iter_key(&iter);
     std::string tmp(key, strlen(key));
     std::set<std::string>::iterator i = all.find(tmp);
-    assert(i != all.end());
+    ASSERT(i != all.end());
     all.erase(i);
   }
-  assert(all.empty());
+  ASSERT(all.empty());
 
-  upb_strtable_free(&table);
+  upb_strtable_uninit(&table);
 }
 
 /* num_entries must be a power of 2. */
-void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc)
-{
+void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) {
   /* Initialize structures. */
   upb_inttable table;
   uint32_t largest_key = 0;
   std::map<uint32_t, uint32_t> m;
   __gnu_cxx::hash_map<uint32_t, uint32_t> hm;
-  upb_inttable_init(&table, num_entries, sizeof(inttable_entry));
+  upb_inttable_init(&table);
   for(size_t i = 0; i < num_entries; i++) {
     int32_t key = keys[i];
     largest_key = UPB_MAX((int32_t)largest_key, key);
-    inttable_entry e;
-    e.value = (key*2) << 1;
-    upb_inttable_insert(&table, key, &e);
+    upb_inttable_insert(&table, key, upb_value_uint32(key * 2));
     m[key] = key*2;
     hm[key] = key*2;
   }
 
   /* Test correctness. */
   for(uint32_t i = 0; i <= largest_key; i++) {
-    inttable_entry *e = (inttable_entry*)upb_inttable_lookup(
-        &table, i);
+    const upb_value *v = upb_inttable_lookup(&table, i);
+    if(m.find(i) != m.end()) { /* Assume map implementation is correct. */
+      ASSERT(v);
+      ASSERT(upb_value_getuint32(*v) == i*2);
+      ASSERT(m[i] == i*2);
+      ASSERT(hm[i] == i*2);
+    } else {
+      ASSERT(v == NULL);
+    }
+  }
+
+  for(uint16_t i = 0; i < num_entries; i += 2) {
+    upb_value val;
+    bool ret = upb_inttable_remove(&table, keys[i], &val);
+    ASSERT(ret == (m.erase(keys[i]) == 1));
+    if (ret) ASSERT(upb_value_getuint32(val) == keys[i] * 2);
+    hm.erase(keys[i]);
+    m.erase(keys[i]);
+  }
+
+  ASSERT(upb_inttable_count(&table) == hm.size());
+
+  /* Test correctness. */
+  for(uint32_t i = 0; i <= largest_key; i++) {
+    const upb_value *v = upb_inttable_lookup(&table, i);
     if(m.find(i) != m.end()) { /* Assume map implementation is correct. */
-      assert(e);
-      //printf("addr: %p, expected: %d, actual: %d\n", e, i*2, e->value);
-      assert(((e->value) >> 1) == i*2);
-      assert(m[i] == i*2);
-      assert(hm[i] == i*2);
+      ASSERT(v);
+      ASSERT(upb_value_getuint32(*v) == i*2);
+      ASSERT(m[i] == i*2);
+      ASSERT(hm[i] == i*2);
     } else {
-      assert(e == NULL);
+      ASSERT(v == NULL);
     }
   }
 
   // Compact and test correctness again.
   upb_inttable_compact(&table);
   for(uint32_t i = 0; i <= largest_key; i++) {
-    inttable_entry *e = (inttable_entry*)upb_inttable_lookup(
-        &table, i);
+    const upb_value *v = upb_inttable_lookup(&table, i);
     if(m.find(i) != m.end()) { /* Assume map implementation is correct. */
-      assert(e);
-      //printf("addr: %p, expected: %d, actual: %d\n", e, i*2, e->value);
-      assert(((e->value) >> 1) == i*2);
-      assert(m[i] == i*2);
-      assert(hm[i] == i*2);
+      ASSERT(v);
+      ASSERT(upb_value_getuint32(*v) == i*2);
+      ASSERT(m[i] == i*2);
+      ASSERT(hm[i] == i*2);
     } else {
-      assert(e == NULL);
+      ASSERT(v == NULL);
     }
   }
 
   if(!benchmark) {
-    upb_inttable_free(&table);
+    upb_inttable_uninit(&table);
     return;
   }
 
@@ -141,7 +151,7 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc)
   }
   for(uint16_t i = num_entries - 1; i >= 1; i--) {
     uint16_t rand_i = (random() / (double)RAND_MAX) * i;
-    assert(rand_i <= i);
+    ASSERT(rand_i <= i);
     uint16_t tmp = rand_order[rand_i];
     rand_order[rand_i] = rand_order[i];
     rand_order[i] = tmp;
@@ -162,8 +172,8 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc)
   for(i = 0; true; i++) {
     MAYBE_BREAK;
     int32_t key = keys[i & mask];
-    inttable_entry *e = (inttable_entry*)upb_inttable_lookup(&table, key);
-    x += (uintptr_t)e;
+    const upb_value *v = upb_inttable_lookup32(&table, key);
+    x += (uintptr_t)v;
   }
   double total = get_usertime() - before;
   printf("%s/s\n", eng(i/total, 3, false));
@@ -174,8 +184,8 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc)
   for(i = 0; true; i++) {
     MAYBE_BREAK;
     int32_t key = keys[rand_order[i & mask]];
-    inttable_entry *e = (inttable_entry*)upb_inttable_lookup(&table, key);
-    x += (uintptr_t)e;
+    const upb_value *v = upb_inttable_lookup32(&table, key);
+    x += (uintptr_t)v;
   }
   total = get_usertime() - before;
   printf("%s/s\n", eng(i/total, 3, false));
@@ -223,20 +233,18 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc)
   }
   total = get_usertime() - before;
   printf("%s/s\n\n", eng(i/total, 3, false));
-  upb_inttable_free(&table);
+  upb_inttable_uninit(&table);
   delete rand_order;
 }
 
-int32_t *get_contiguous_keys(int32_t num)
-{
+int32_t *get_contiguous_keys(int32_t num) {
   int32_t *buf = new int32_t[num];
   for(int32_t i = 0; i < num; i++)
-    buf[i] = i+1;
+    buf[i] = i;
   return buf;
 }
 
-int main(int argc, char *argv[])
-{
+int main(int argc, char *argv[]) {
   for (int i = 1; i < argc; i++) {
     if (strcmp(argv[i], "--benchmark") == 0) benchmark = true;
   }
diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc
index 53b2498..020dca5 100644
--- a/tests/test_vs_proto2.cc
+++ b/tests/test_vs_proto2.cc
@@ -1,7 +1,7 @@
 /*
  * upb - a minimalist implementation of protocol buffers.
  *
- * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ * Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
  *
  * A test that verifies that our results are identical to proto2 for a
  * given proto type and input protobuf.
@@ -9,230 +9,87 @@
 
 #define __STDC_LIMIT_MACROS  // So we get UINT32_MAX
 #include <assert.h>
+#include <google/protobuf/descriptor.h>
+#include <google/protobuf/wire_format_lite.h>
 #include <inttypes.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <google/protobuf/descriptor.h>
-#include <google/protobuf/wire_format_lite.h>
 #include "benchmarks/google_messages.pb.h"
-#include "upb/def.h"
-#include "upb/msg.h"
+#include "upb/def.hpp"
+#include "upb/handlers.hpp"
+#include "upb/msg.hpp"
+#include "upb/pb/decoder.hpp"
 #include "upb/pb/glue.h"
 #include "upb/pb/varint.h"
+#include "upb/proto2_bridge.hpp"
 #include "upb_test.h"
 
-size_t string_size;
-
-void compare(const google::protobuf::Message& proto2_msg,
-             void *upb_msg, const upb_msgdef *upb_md);
-
-void compare_arrays(const google::protobuf::Reflection *r,
-                    const google::protobuf::Message& proto2_msg,
-                    const google::protobuf::FieldDescriptor *proto2_f,
-                    void *upb_msg, upb_fielddef *upb_f)
-{
-  ASSERT(upb_msg_has(upb_msg, upb_f));
-  ASSERT(upb_isseq(upb_f));
-  const void *arr = upb_value_getptr(upb_msg_getseq(upb_msg, upb_f));
-  const void *iter = upb_seq_begin(arr, upb_f);
-  for(int i = 0;
-      i < r->FieldSize(proto2_msg, proto2_f);
-      i++, iter = upb_seq_next(arr, iter, upb_f)) {
-    ASSERT(!upb_seq_done(iter));
-    upb_value v = upb_seq_get(iter, upb_f);
-    switch(upb_f->type) {
-      default:
-        ASSERT(false);
-      case UPB_TYPE(DOUBLE):
-        ASSERT(r->GetRepeatedDouble(proto2_msg, proto2_f, i) == upb_value_getdouble(v));
-        break;
-      case UPB_TYPE(FLOAT):
-        ASSERT(r->GetRepeatedFloat(proto2_msg, proto2_f, i) == upb_value_getfloat(v));
-        break;
-      case UPB_TYPE(INT64):
-      case UPB_TYPE(SINT64):
-      case UPB_TYPE(SFIXED64):
-        ASSERT(r->GetRepeatedInt64(proto2_msg, proto2_f, i) == upb_value_getint64(v));
-        break;
-      case UPB_TYPE(UINT64):
-      case UPB_TYPE(FIXED64):
-        ASSERT(r->GetRepeatedUInt64(proto2_msg, proto2_f, i) == upb_value_getuint64(v));
-        break;
-      case UPB_TYPE(SFIXED32):
-      case UPB_TYPE(SINT32):
-      case UPB_TYPE(INT32):
-      case UPB_TYPE(ENUM):
-        ASSERT(r->GetRepeatedInt32(proto2_msg, proto2_f, i) == upb_value_getint32(v));
-        break;
-      case UPB_TYPE(FIXED32):
-      case UPB_TYPE(UINT32):
-        ASSERT(r->GetRepeatedUInt32(proto2_msg, proto2_f, i) == upb_value_getuint32(v));
-        break;
-      case UPB_TYPE(BOOL):
-        ASSERT(r->GetRepeatedBool(proto2_msg, proto2_f, i) == upb_value_getbool(v));
-        break;
-      case UPB_TYPE(STRING):
-      case UPB_TYPE(BYTES): {
-        std::string str = r->GetRepeatedString(proto2_msg, proto2_f, i);
-        upb_stdarray *upbstr = (upb_stdarray*)upb_value_getptr(v);
-        std::string str2(upbstr->ptr, upbstr->len);
-        string_size += upbstr->len;
-        ASSERT(str == str2);
-        break;
-      }
-      case UPB_TYPE(GROUP):
-      case UPB_TYPE(MESSAGE):
-        ASSERT(upb_dyncast_msgdef(upb_f->def) != NULL);
-        compare(r->GetRepeatedMessage(proto2_msg, proto2_f, i),
-                upb_value_getptr(v), upb_downcast_msgdef(upb_f->def));
-    }
-  }
-  ASSERT(upb_seq_done(iter));
-}
-
-void compare_values(const google::protobuf::Reflection *r,
-                    const google::protobuf::Message& proto2_msg,
-                    const google::protobuf::FieldDescriptor *proto2_f,
-                    void *upb_msg, upb_fielddef *upb_f)
-{
-  upb_value v = upb_msg_get(upb_msg, upb_f);
-  switch(upb_f->type) {
-    default:
-      ASSERT(false);
-    case UPB_TYPE(DOUBLE):
-      ASSERT(r->GetDouble(proto2_msg, proto2_f) == upb_value_getdouble(v));
-      break;
-    case UPB_TYPE(FLOAT):
-      ASSERT(r->GetFloat(proto2_msg, proto2_f) == upb_value_getfloat(v));
-      break;
-    case UPB_TYPE(INT64):
-    case UPB_TYPE(SINT64):
-    case UPB_TYPE(SFIXED64):
-      ASSERT(r->GetInt64(proto2_msg, proto2_f) == upb_value_getint64(v));
-      break;
-    case UPB_TYPE(UINT64):
-    case UPB_TYPE(FIXED64):
-      ASSERT(r->GetUInt64(proto2_msg, proto2_f) == upb_value_getuint64(v));
-      break;
-    case UPB_TYPE(SFIXED32):
-    case UPB_TYPE(SINT32):
-    case UPB_TYPE(INT32):
-    case UPB_TYPE(ENUM):
-      ASSERT(r->GetInt32(proto2_msg, proto2_f) == upb_value_getint32(v));
-      break;
-    case UPB_TYPE(FIXED32):
-    case UPB_TYPE(UINT32):
-      ASSERT(r->GetUInt32(proto2_msg, proto2_f) == upb_value_getuint32(v));
-      break;
-    case UPB_TYPE(BOOL):
-      ASSERT(r->GetBool(proto2_msg, proto2_f) == upb_value_getbool(v));
-      break;
-    case UPB_TYPE(STRING):
-    case UPB_TYPE(BYTES): {
-      std::string str = r->GetString(proto2_msg, proto2_f);
-      upb_stdarray *upbstr = (upb_stdarray*)upb_value_getptr(v);
-      std::string str2(upbstr->ptr, upbstr->len);
-      string_size += upbstr->len;
-      ASSERT(str == str2);
-      break;
-    }
-    case UPB_TYPE(GROUP):
-    case UPB_TYPE(MESSAGE):
-      // XXX: getstr
-      compare(r->GetMessage(proto2_msg, proto2_f),
-              upb_value_getptr(v), upb_downcast_msgdef(upb_f->def));
-  }
-}
-
-void compare(const google::protobuf::Message& proto2_msg,
-             void *upb_msg, const upb_msgdef *upb_md)
-{
-  const google::protobuf::Reflection *r = proto2_msg.GetReflection();
-  const google::protobuf::Descriptor *d = proto2_msg.GetDescriptor();
-
-  ASSERT(d->field_count() == upb_msgdef_numfields(upb_md));
-  upb_msg_iter i;
-  for(i = upb_msg_begin(upb_md); !upb_msg_done(i); i = upb_msg_next(upb_md, i)) {
-    upb_fielddef *upb_f = upb_msg_iter_field(i);
+void compare_metadata(const google::protobuf::Descriptor* d,
+                      const upb::MessageDef *upb_md) {
+  ASSERT(d->field_count() == upb_md->field_count());
+  for (upb::MessageDef::ConstIterator i(upb_md); !i.Done(); i.Next()) {
+    const upb::FieldDef* upb_f = i.field();
     const google::protobuf::FieldDescriptor *proto2_f =
-        d->FindFieldByNumber(upb_f->number);
-    // Make sure the definitions are equal.
+        d->FindFieldByNumber(upb_f->number());
     ASSERT(upb_f);
     ASSERT(proto2_f);
-    ASSERT(upb_f->number == proto2_f->number());
-    ASSERT(std::string(upb_f->name) == proto2_f->name());
-    ASSERT(upb_f->type == proto2_f->type());
-    ASSERT(upb_isseq(upb_f) == proto2_f->is_repeated());
-
-    if(!upb_msg_has(upb_msg, upb_f)) {
-      if(upb_isseq(upb_f))
-        ASSERT(r->FieldSize(proto2_msg, proto2_f) == 0);
-      else
-        ASSERT(r->HasField(proto2_msg, proto2_f) == false);
-    } else {
-      if(upb_isseq(upb_f)) {
-        compare_arrays(r, proto2_msg, proto2_f, upb_msg, upb_f);
-      } else {
-        ASSERT(r->HasField(proto2_msg, proto2_f) == true);
-        compare_values(r, proto2_msg, proto2_f, upb_msg, upb_f);
-      }
-    }
+    ASSERT(upb_f->number() == proto2_f->number());
+    ASSERT(std::string(upb_f->name()) == proto2_f->name());
+    ASSERT(upb_f->type() == static_cast<upb::FieldType>(proto2_f->type()));
+    ASSERT(upb_f->IsSequence() == proto2_f->is_repeated());
   }
 }
 
-void parse_and_compare(MESSAGE_CIDENT *proto2_msg,
-                       void *upb_msg, const upb_msgdef *upb_md,
-                       const char *str, size_t len, bool allow_jit)
-{
+void parse_and_compare(MESSAGE_CIDENT *msg1, MESSAGE_CIDENT *msg2,
+                       const upb::MessageDef *upb_md,
+                       const char *str, size_t len, bool allow_jit) {
   // Parse to both proto2 and upb.
-  ASSERT(proto2_msg->ParseFromArray(str, len));
-  upb_status status = UPB_STATUS_INIT;
-  upb_msg_clear(upb_msg, upb_md);
-  upb_strtomsg(str, len, upb_msg, upb_md, allow_jit, &status);
-  if (!upb_ok(&status)) {
-    fprintf(stderr, "Error parsing protobuf: %s", upb_status_getstr(&status));
-    exit(1);
-  }
-  string_size = 0;
-  compare(*proto2_msg, upb_msg, upb_md);
-  printf("Total size: %zd, string size: %zd (%0.2f%%)\n", len,
-         string_size, (double)string_size / len * 100);
-  upb_status_uninit(&status);
+  ASSERT(msg1->ParseFromArray(str, len));
+
+  upb::Handlers* handlers = upb::Handlers::New();
+  upb::RegisterWriteHandlers(handlers, upb_md);
+  upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers, allow_jit);
+  upb::StringSource src(str, len);
+  upb::Decoder decoder;
+  decoder.ResetPlan(plan, 0);
+  decoder.ResetInput(src.AllBytes(), msg2);
+  msg2->Clear();
+  ASSERT(decoder.Decode() == UPB_OK);
+  plan->Unref();
+  handlers->Unref();
+
+  // Would like to just compare the message objects themselves,  but
+  // unfortunately MessageDifferencer is not part of the open-source release of
+  // proto2, so we compare their serialized strings, which we expect will be
+  // equivalent.
+  std::string str1;
+  std::string str2;
+  msg1->SerializeToString(&str1);
+  msg2->SerializeToString(&str2);
+  ASSERT(str1 == str2);
+  ASSERT(std::string(str, len) == str2);
 }
 
-int main(int argc, char *argv[])
-{
-  if (argc < 3) {
-    fprintf(stderr, "Usage: test_vs_proto2 <descriptor file> <message file>\n");
-    return 1;
+void test_zig_zag() {
+  for (uint64_t num = 5; num * 1.5 > num; num *= 1.5) {
+    ASSERT(upb_zzenc_64(num) ==
+           google::protobuf::internal::WireFormatLite::ZigZagEncode64(num));
+    if (num < UINT32_MAX) {
+      ASSERT(upb_zzenc_32(num) ==
+             google::protobuf::internal::WireFormatLite::ZigZagEncode32(num));
+    }
   }
-  const char *descriptor_file = argv[1];
-  const char *message_file = argv[2];
 
-  // Initialize upb state, parse descriptor.
-  upb_status status = UPB_STATUS_INIT;
-  upb_symtab *symtab = upb_symtab_new();
-  size_t fds_len;
-  const char *fds = upb_readfile(descriptor_file, &fds_len);
-  if(fds == NULL) {
-    fprintf(stderr, "Couldn't read %s.\n", descriptor_file);
-    return 1;
-  }
-  upb_load_descriptor_into_symtab(symtab, fds, fds_len, &status);
-  if(!upb_ok(&status)) {
-    fprintf(stderr, "Error importing %s: %s", descriptor_file,
-            upb_status_getstr(&status));
-    return 1;
-  }
-  free((void*)fds);
+}
 
-  const upb_def *def = upb_symtab_lookup(symtab, MESSAGE_NAME);
-  const upb_msgdef *msgdef;
-  if(!def || !(msgdef = upb_dyncast_msgdef_const(def))) {
-    fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
+int main(int argc, char *argv[])
+{
+  if (argc < 2) {
+    fprintf(stderr, "Usage: test_vs_proto2 <message file>\n");
     return 1;
   }
+  const char *message_file = argv[1];
 
   // Read the message data itself.
   size_t len;
@@ -242,32 +99,25 @@ int main(int argc, char *argv[])
     return 1;
   }
 
+  MESSAGE_CIDENT msg1;
+  MESSAGE_CIDENT msg2;
+
+  const upb::MessageDef* m = upb::proto2_bridge::NewFinalMessageDef(msg1, &m);
+
+  compare_metadata(msg1.GetDescriptor(), m);
+
   // Run twice to test proper object reuse.
-  MESSAGE_CIDENT proto2_msg;
-  void *upb_msg = upb_stdmsg_new(msgdef);
-  parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, true);
-  parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, false);
-  parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, true);
-  parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, false);
+  parse_and_compare(&msg1, &msg2, m, str, len, true);
+  parse_and_compare(&msg1, &msg2, m, str, len, false);
+  parse_and_compare(&msg1, &msg2, m, str, len, true);
+  parse_and_compare(&msg1, &msg2, m, str, len, false);
   printf("All tests passed, %d assertions.\n", num_assertions);
 
-  upb_stdmsg_free(upb_msg, msgdef);
-  upb_def_unref(UPB_UPCAST(msgdef));
+  m->Unref(&m);
   free((void*)str);
-  upb_symtab_unref(symtab);
-  upb_status_uninit(&status);
 
-  // Test Zig-Zag encoding/decoding.
-  for (uint64_t num = 5; num * 1.5 > num; num *= 1.5) {
-    ASSERT(upb_zzenc_64(num) ==
-           google::protobuf::internal::WireFormatLite::ZigZagEncode64(num));
-    if (num < UINT32_MAX) {
-      ASSERT(upb_zzenc_32(num) ==
-             google::protobuf::internal::WireFormatLite::ZigZagEncode32(num));
-    }
-  }
+  test_zig_zag();
 
   google::protobuf::ShutdownProtobufLibrary();
-
   return 0;
 }
diff --git a/tests/tests.c b/tests/tests.c
deleted file mode 100644
index 12ff4bb..0000000
--- a/tests/tests.c
+++ /dev/null
@@ -1,121 +0,0 @@
-
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include "upb/def.h"
-#include "upb/handlers.h"
-#include "upb/pb/decoder.h"
-#include "upb/pb/glue.h"
-#include "upb_test.h"
-
-const char *descriptor_file;
-
-static upb_symtab *load_test_proto() {
-  upb_symtab *s = upb_symtab_new();
-  ASSERT(s);
-  upb_status status = UPB_STATUS_INIT;
-  if (!upb_load_descriptor_file_into_symtab(s, descriptor_file, &status)) {
-    fprintf(stderr, "Error loading descriptor file: %s\n",
-            upb_status_getstr(&status));
-    exit(1);
-  }
-  upb_status_uninit(&status);
-  return s;
-}
-
-static upb_flow_t upb_test_onvalue(void *c, upb_value fval, upb_value val) {
-  (void)c;
-  (void)fval;
-  (void)val;
-  return UPB_CONTINUE;
-}
-
-static void test_upb_jit() {
-  upb_symtab *s = load_test_proto();
-  const upb_def *def = upb_symtab_lookup(s, "SimplePrimitives");
-  ASSERT(def);
-
-  upb_handlers *h = upb_handlers_new();
-  upb_handlerset hset = {NULL, NULL, &upb_test_onvalue, NULL, NULL, NULL, NULL};
-  upb_handlers_reghandlerset(h, upb_downcast_msgdef_const(def), &hset);
-  upb_decoderplan *p = upb_decoderplan_new(h, true);
-#ifdef UPB_USE_JIT_X64
-  ASSERT(upb_decoderplan_hasjitcode(p));
-#else
-  ASSERT(!upb_decoderplan_hasjitcode(p));
-#endif
-  upb_decoderplan_unref(p);
-  upb_symtab_unref(s);
-  upb_def_unref(def);
-  upb_handlers_unref(h);
-}
-
-static void test_upb_symtab() {
-  upb_symtab *s = load_test_proto();
-
-  // Test cycle detection by making a cyclic def's main refcount go to zero
-  // and then be incremented to one again.
-  const upb_def *def = upb_symtab_lookup(s, "A");
-  ASSERT(def);
-  upb_symtab_unref(s);
-  const upb_msgdef *m = upb_downcast_msgdef_const(def);
-  upb_msg_iter i = upb_msg_begin(m);
-  ASSERT(!upb_msg_done(i));
-  upb_fielddef *f = upb_msg_iter_field(i);
-  ASSERT(upb_hassubdef(f));
-  upb_def *def2 = f->def;
-
-  i = upb_msg_next(m, i);
-  ASSERT(upb_msg_done(i));  // "A" should only have one field.
-
-  ASSERT(upb_downcast_msgdef(def2));
-  upb_def_ref(def2);
-  upb_def_unref(def);
-  upb_def_unref(def2);
-}
-
-static void test_upb_two_fielddefs() {
-  upb_fielddef *f1 = upb_fielddef_new();
-  upb_fielddef *f2 = upb_fielddef_new();
-
-  ASSERT(upb_fielddef_ismutable(f1));
-  upb_fielddef_setname(f1, "");
-  upb_fielddef_setnumber(f1, 1937);
-  upb_fielddef_settype(f1, UPB_TYPE(FIXED64));
-  upb_fielddef_setlabel(f1, UPB_LABEL(REPEATED));
-  upb_fielddef_settypename(f1, "");
-  ASSERT(upb_fielddef_number(f1) == 1937);
-
-  ASSERT(upb_fielddef_ismutable(f2));
-  upb_fielddef_setname(f2, "");
-  upb_fielddef_setnumber(f2, 1572);
-  upb_fielddef_settype(f2, UPB_TYPE(BYTES));
-  upb_fielddef_setlabel(f2, UPB_LABEL(REPEATED));
-  upb_fielddef_settypename(f2, "");
-  ASSERT(upb_fielddef_number(f2) == 1572);
-
-  upb_fielddef_unref(f1);
-  upb_fielddef_unref(f2);
-}
-
-int main(int argc, char *argv[])
-{
-  if (argc < 2) {
-    fprintf(stderr, "Usage: test_cpp <descriptor file>\n");
-    return 1;
-  }
-  descriptor_file = argv[1];
-#define TEST(func) do { \
-  int assertions_before = num_assertions; \
-  printf("Running " #func "..."); fflush(stdout); \
-  func(); \
-  printf("ok (%d assertions).\n", num_assertions - assertions_before); \
-  } while (0)
-
-  TEST(test_upb_symtab);
-  TEST(test_upb_jit);
-  TEST(test_upb_two_fielddefs);
-  printf("All tests passed (%d assertions).\n", num_assertions);
-  return 0;
-}
diff --git a/tests/upb_test.h b/tests/upb_test.h
index 2bd340e..652977b 100644
--- a/tests/upb_test.h
+++ b/tests/upb_test.h
@@ -7,6 +7,7 @@
 #ifndef UPB_TEST_H_
 #define UPB_TEST_H_
 
+#include <stdio.h>
 #include <stdlib.h>
 
 #ifdef __cplusplus
@@ -18,9 +19,28 @@ int num_assertions = 0;
   ++num_assertions; \
   if (!(expr)) { \
     fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
+    fprintf(stderr, "expr: %s\n", #expr); \
     abort(); \
   } \
-} while(0)
+} while (0)
+
+#define ASSERT_NOCOUNT(expr) do { \
+  if (!(expr)) { \
+    fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
+    fprintf(stderr, "expr: %s\n", #expr); \
+    abort(); \
+  } \
+} while (0)
+
+#define ASSERT_STATUS(expr, status) do { \
+  ++num_assertions; \
+  if (!(expr)) { \
+    fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
+    fprintf(stderr, "expr: %s\n", #expr); \
+    fprintf(stderr, "failed status: %s\n", upb_status_getstr(status)); \
+    abort(); \
+  } \
+} while (0)
 
 #ifdef __cplusplus
 }  /* extern "C" */
diff --git a/tools/upbc.c b/tools/upbc.c
index a5d8897..4b25f3e 100644
--- a/tools/upbc.c
+++ b/tools/upbc.c
@@ -55,7 +55,7 @@ static void write_const_h(const upb_def *defs[], int num_entries,
   for(int i = 0; i < num_entries; i++) {  /* Foreach enum */
     if(defs[i]->type != UPB_DEF_ENUM) continue;
     const upb_enumdef *enumdef = upb_downcast_enumdef_const(defs[i]);
-    char *enum_name = strdup(upb_def_fqname(UPB_UPCAST(enumdef)));
+    char *enum_name = strdup(upb_def_fullname(UPB_UPCAST(enumdef)));
     char *enum_val_prefix = strdup(enum_name);
     to_cident(enum_name);
     to_preproc(enum_val_prefix);
@@ -63,11 +63,12 @@ static void write_const_h(const upb_def *defs[], int num_entries,
     fprintf(stream, "typedef enum %s {\n", enum_name);
     bool first = true;
     /* Foreach enum value. */
-    for (upb_enum_iter iter = upb_enum_begin(enumdef);
-         !upb_enum_done(iter);
-         iter = upb_enum_next(enumdef, iter)) {
-      char *value_name = strdup(upb_enum_iter_name(iter));
-      uint32_t value = upb_enum_iter_number(iter);
+    upb_enum_iter iter;
+    for (upb_enum_begin(&iter, enumdef);
+         !upb_enum_done(&iter);
+         upb_enum_next(&iter)) {
+      char *value_name = strdup(upb_enum_iter_name(&iter));
+      uint32_t value = upb_enum_iter_number(&iter);
       to_preproc(value_name);
       /* "  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13," */
       if (!first) fputs(",\n", stream);
@@ -85,20 +86,20 @@ static void write_const_h(const upb_def *defs[], int num_entries,
   for(int i = 0; i < num_entries; i++) {  /* Foreach enum */
     const upb_msgdef *m = upb_dyncast_msgdef_const(defs[i]);
     if(!m) continue;
-    char *msg_name = strdup(upb_def_fqname(UPB_UPCAST(m)));
+    char *msg_name = strdup(upb_def_fullname(UPB_UPCAST(m)));
     char *msg_val_prefix = strdup(msg_name);
     to_preproc(msg_val_prefix);
     upb_msg_iter i;
-    for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
-      upb_fielddef *f = upb_msg_iter_field(i);
-      char *preproc_field_name = strdup(f->name);
+    for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+      upb_fielddef *f = upb_msg_iter_field(&i);
+      char *preproc_field_name = strdup(upb_fielddef_name(f));
       to_preproc(preproc_field_name);
       fprintf(stream, "#define %s_%s__FIELDNUM %d\n",
               msg_val_prefix, preproc_field_name, upb_fielddef_number(f));
       fprintf(stream, "#define %s_%s__FIELDNAME \"%s\"\n",
-              msg_val_prefix, preproc_field_name, f->name);
+              msg_val_prefix, preproc_field_name, upb_fielddef_name(f));
       fprintf(stream, "#define %s_%s__FIELDTYPE %d\n\n",
-              msg_val_prefix, preproc_field_name, f->type);
+              msg_val_prefix, preproc_field_name, upb_fielddef_type(f));
       free(preproc_field_name);
     }
     free(msg_val_prefix);
@@ -123,13 +124,13 @@ const char usage[] =
   "                     of using the input file as a basename.\n"
 ;
 
-void usage_err(char *err) {
+void usage_err(const char *err) {
   fprintf(stderr, "upbc: %s\n\n", err);
   fputs(usage, stderr);
   exit(1);
 }
 
-void error(char *err, ...) {
+void error(const char *err, ...) {
   va_list args;
   va_start(args, err);
   fprintf(stderr, "upbc: ");
@@ -175,8 +176,8 @@ int main(int argc, char *argv[]) {
   upb_status_uninit(&status);
 
   /* Emit output files. */
-  const int maxsize = 256;
-  char h_const_filename[maxsize];
+  char h_const_filename[256];
+  const int maxsize = sizeof(h_const_filename);
   if(snprintf(h_const_filename, maxsize, "%s_const.h", outfile_base) >= maxsize)
     error("File base too long.\n");
 
@@ -184,9 +185,9 @@ int main(int argc, char *argv[]) {
   if(!h_const_file) error("Failed to open _const.h output file\n");
 
   int symcount;
-  const upb_def **defs = upb_symtab_getdefs(s, &symcount, UPB_DEF_ANY);
+  const upb_def **defs = upb_symtab_getdefs(s, &symcount, UPB_DEF_ANY, &defs);
   write_const_h(defs, symcount, h_const_filename, h_const_file);
-  for (int i = 0; i < symcount; i++) upb_def_unref(defs[i]);
+  for (int i = 0; i < symcount; i++) upb_def_unref(defs[i], &defs);
   free(defs);
   free(descriptor);
   upb_symtab_unref(s);
diff --git a/upb/atomic.h b/upb/atomic.h
deleted file mode 100644
index 2478fe4..0000000
--- a/upb/atomic.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Google Inc.  See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- *
- * Only a very small part of upb is thread-safe.  Notably, individual
- * messages, arrays, and strings are *not* thread safe for mutating.
- * However, we do make message *metadata* such as upb_msgdef and
- * upb_symtab thread-safe, and their ownership is tracked via atomic
- * refcounting.  This header implements the small number of atomic
- * primitives required to support this.  The primitives we implement
- * are:
- *
- * - a reader/writer lock (wrappers around platform-provided mutexes).
- * - an atomic refcount.
- *
- * TODO: This needs some revisiting/refinement, see:
- *       http://code.google.com/p/upb/issues/detail?id=8
- */
-
-#ifndef UPB_ATOMIC_H_
-#define UPB_ATOMIC_H_
-
-#include <stdbool.h>
-#include <assert.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* inline if possible, emit standalone code if required. */
-#ifndef INLINE
-#define INLINE static inline
-#endif
-
-// Until this stuff is actually working, make thread-unsafe the default.
-#define UPB_THREAD_UNSAFE
-
-#ifdef UPB_THREAD_UNSAFE
-
-/* Non-thread-safe implementations. ******************************************/
-
-typedef struct {
-  int v;
-} upb_atomic_t;
-
-#define UPB_ATOMIC_INIT(x) {x}
-
-INLINE void upb_atomic_init(upb_atomic_t *a, int val) { a->v = val; }
-INLINE bool upb_atomic_ref(upb_atomic_t *a) { return a->v++ == 0; }
-INLINE bool upb_atomic_unref(upb_atomic_t *a) { assert(a->v > 0); return --a->v == 0; }
-INLINE int upb_atomic_read(upb_atomic_t *a) { return a->v; }
-INLINE bool upb_atomic_add(upb_atomic_t *a, int val) {
-  a->v += val;
-  return a->v == 0;
-}
-
-#endif
-
-/* Atomic refcount ************************************************************/
-
-#ifdef UPB_THREAD_UNSAFE
-
-/* Already defined above. */
-
-#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4
-
-/* GCC includes atomic primitives. */
-
-typedef struct {
-  volatile int v;
-} upb_atomic_t;
-
-INLINE void upb_atomic_init(upb_atomic_t *a, int val) {
-  a->v = val;
-  __sync_synchronize();   /* Ensure the initialized value is visible. */
-}
-
-INLINE bool upb_atomic_ref(upb_atomic_t *a) {
-  return __sync_fetch_and_add(&a->v, 1) == 0;
-}
-
-INLINE bool upb_atomic_add(upb_atomic_t *a, int n) {
-  return __sync_add_and_fetch(&a->v, n) == 0;
-}
-
-INLINE bool upb_atomic_unref(upb_atomic_t *a) {
-  return __sync_sub_and_fetch(&a->v, 1) == 0;
-}
-
-INLINE bool upb_atomic_read(upb_atomic_t *a) {
-  return __sync_fetch_and_add(&a->v, 0);
-}
-
-#elif defined(WIN32)
-
-/* Windows defines atomic increment/decrement. */
-#include <Windows.h>
-
-typedef struct {
-  volatile LONG val;
-} upb_atomic_t;
-
-INLINE void upb_atomic_init(upb_atomic_t *a, int val) {
-  InterlockedExchange(&a->val, val);
-}
-
-INLINE bool upb_atomic_ref(upb_atomic_t *a) {
-  return InterlockedIncrement(&a->val) == 1;
-}
-
-INLINE bool upb_atomic_unref(upb_atomic_t *a) {
-  return InterlockedDecrement(&a->val) == 0;
-}
-
-#else
-#error Atomic primitives not defined for your platform/CPU.  \
-       Implement them or compile with UPB_THREAD_UNSAFE.
-#endif
-
-INLINE bool upb_atomic_only(upb_atomic_t *a) {
-  return upb_atomic_read(a) == 1;
-}
-
-/* Reader/Writer lock. ********************************************************/
-
-#ifdef UPB_THREAD_UNSAFE
-
-typedef struct {
-} upb_rwlock_t;
-
-INLINE void upb_rwlock_init(const upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_destroy(const upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_rdlock(const upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_wrlock(const upb_rwlock_t *l) { (void)l; }
-INLINE void upb_rwlock_unlock(const upb_rwlock_t *l) { (void)l; }
-
-#elif defined(UPB_USE_PTHREADS)
-
-#include <pthread.h>
-
-typedef struct {
-  pthread_rwlock_t lock;
-} upb_rwlock_t;
-
-INLINE void upb_rwlock_init(const upb_rwlock_t *l) {
-  /* TODO: check return value. */
-  pthread_rwlock_init(&l->lock, NULL);
-}
-
-INLINE void upb_rwlock_destroy(const upb_rwlock_t *l) {
-  /* TODO: check return value. */
-  pthread_rwlock_destroy(&l->lock);
-}
-
-INLINE void upb_rwlock_rdlock(const upb_rwlock_t *l) {
-  /* TODO: check return value. */
-  pthread_rwlock_rdlock(&l->lock);
-}
-
-INLINE void upb_rwlock_wrlock(const upb_rwlock_t *l) {
-  /* TODO: check return value. */
-  pthread_rwlock_wrlock(&l->lock);
-}
-
-INLINE void upb_rwlock_unlock(const upb_rwlock_t *l) {
-  /* TODO: check return value. */
-  pthread_rwlock_unlock(&l->lock);
-}
-
-#else
-#error Reader/writer lock is not defined for your platform/CPU.  \
-       Implement it or compile with UPB_THREAD_UNSAFE.
-#endif
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif  /* UPB_ATOMIC_H_ */
diff --git a/upb/bytestream.c b/upb/bytestream.c
index 812e552..8feb678 100644
--- a/upb/bytestream.c
+++ b/upb/bytestream.c
@@ -32,8 +32,6 @@ upb_byteregion *upb_byteregion_newl(const void *str, size_t len) {
   memcpy(ptr, str, len);
   ptr[len] = '\0';
   upb_stringsrc_reset(src, ptr, len);
-  upb_byteregion_fetch(upb_stringsrc_allbytes(src));
-  assert(len == upb_byteregion_available(upb_stringsrc_allbytes(src), 0));
   return upb_stringsrc_allbytes(src);
 }
 
@@ -93,10 +91,10 @@ static upb_stdio_buf *upb_stdio_findbuf(const upb_stdio *s, uint64_t ofs) {
 
 static upb_stdio_buf *upb_stdio_rotatebufs(upb_stdio *s) {
   upb_stdio_buf **reuse = NULL;  // XXX
-  uint32_t num_reused = 0, num_inuse = 0;
+  int num_reused = 0, num_inuse = 0;
 
   // Could sweep only a subset of bufs if this was a hotspot.
-  for (uint32_t i = 0; i < s->nbuf; i++) {
+  for (int i = 0; i < s->nbuf; i++) {
     upb_stdio_buf *buf = s->bufs[i];
     if (buf->refcount > 0) {
       s->bufs[num_inuse++] = buf;
@@ -243,10 +241,9 @@ upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; }
 
 upb_bytesuccess_t upb_stringsrc_fetch(void *_src, uint64_t ofs, size_t *read) {
   upb_stringsrc *src = _src;
-  assert(ofs <= src->len);
+  assert(ofs < src->len);
   if (ofs == src->len) {
     upb_status_seteof(&src->bytesrc.status);
-    *read = 0;
     return UPB_BYTE_EOF;
   }
   *read = src->len - ofs;
diff --git a/upb/bytestream.h b/upb/bytestream.h
index fe049d2..3217ee1 100644
--- a/upb/bytestream.h
+++ b/upb/bytestream.h
@@ -372,8 +372,7 @@ INLINE int upb_bytesink_putc(upb_bytesink *sink, char ch) {
 }
 
 INLINE int upb_bytesink_putrepeated(upb_bytesink *sink, char ch, int len) {
-  int i;
-  for (i = 0; i < len; i++)
+  for (int i = 0; i < len; i++)
     if (upb_bytesink_write(sink, &ch, 1) < 0)
       return -1;
   return len;
@@ -436,7 +435,8 @@ typedef struct {
   FILE *file;
   bool should_close;
   upb_stdio_buf **bufs;
-  uint32_t nbuf, szbuf;
+  int nbuf;
+  uint32_t szbuf;
 } upb_stdio;
 
 void upb_stdio_init(upb_stdio *stdio);
diff --git a/upb/def.c b/upb/def.c
index 5ac3498..5a5b0f4 100644
--- a/upb/def.c
+++ b/upb/def.c
@@ -1,7 +1,7 @@
 /*
  * upb - a minimalist implementation of protocol buffers.
  *
- * Copyright (c) 2008-2009 Google Inc.  See LICENSE for details.
+ * Copyright (c) 2008-2012 Google Inc.  See LICENSE for details.
  * Author: Josh Haberman <jhaberman@gmail.com>
  */
 
@@ -11,168 +11,283 @@
 #include "upb/bytestream.h"
 #include "upb/def.h"
 
-#define alignof(t) offsetof(struct { char c; t x; }, x)
+// isalpha() etc. from <ctype.h> are locale-dependent, which we don't want.
+static bool upb_isbetween(char c, char low, char high) {
+  return c >= low && c <= high;
+}
 
-void upb_deflist_init(upb_deflist *l) {
-  l->size = 8;
-  l->defs = malloc(l->size * sizeof(void*));
-  l->len = 0;
+static bool upb_isletter(char c) {
+  return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
 }
 
-void upb_deflist_uninit(upb_deflist *l) {
-  for(uint32_t i = 0; i < l->len; i++) upb_def_unref(l->defs[i]);
-  free(l->defs);
+static bool upb_isalphanum(char c) {
+  return upb_isletter(c) || upb_isbetween(c, '0', '9');
 }
 
-void upb_deflist_push(upb_deflist *l, upb_def *d) {
-  if(l->len == l->size) {
-    l->size *= 2;
-    l->defs = realloc(l->defs, l->size * sizeof(void*));
+static bool upb_isident(const char *str, size_t len, bool full) {
+  bool start = true;
+  for (size_t i = 0; i < len; i++) {
+    char c = str[i];
+    if (c == '.') {
+      if (start || !full) return false;
+      start = true;
+    } else if (start) {
+      if (!upb_isletter(c)) return false;
+      start = false;
+    } else {
+      if (!upb_isalphanum(c)) return false;
+    }
   }
-  l->defs[l->len++] = d;
+  return !start;
 }
 
 
 /* upb_def ********************************************************************/
 
 static void upb_msgdef_free(upb_msgdef *m);
+static void upb_fielddef_free(upb_fielddef *f);
 static void upb_enumdef_free(upb_enumdef *e);
-static void upb_unresolveddef_free(struct _upb_unresolveddef *u);
 
-bool upb_def_ismutable(const upb_def *def) { return def->symtab == NULL; }
+bool upb_def_ismutable(const upb_def *def) { return !def->is_finalized; }
+bool upb_def_isfinalized(const upb_def *def) { return def->is_finalized; }
 
-bool upb_def_setfqname(upb_def *def, const char *fqname) {
+bool upb_def_setfullname(upb_def *def, const char *fullname) {
   assert(upb_def_ismutable(def));
-  free(def->fqname);
-  def->fqname = strdup(fqname);
-  return true;  // TODO: check for acceptable characters.
-}
-
-static void upb_def_free(upb_def *def) {
-  switch (def->type) {
-    case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break;
-    case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break;
-    case UPB_DEF_UNRESOLVED:
-        upb_unresolveddef_free(upb_downcast_unresolveddef(def)); break;
-    default:
-      assert(false);
-  }
+  if (!upb_isident(fullname, strlen(fullname), true)) return false;
+  free(def->fullname);
+  def->fullname = strdup(fullname);
+  return true;
 }
 
-upb_def *upb_def_dup(const upb_def *def) {
+upb_def *upb_def_dup(const upb_def *def, void *o) {
   switch (def->type) {
     case UPB_DEF_MSG:
-      return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef_const(def)));
+      return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef_const(def), o));
+    case UPB_DEF_FIELD:
+      return UPB_UPCAST(upb_fielddef_dup(upb_downcast_fielddef_const(def), o));
     case UPB_DEF_ENUM:
-      return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef_const(def)));
+      return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef_const(def), o));
     default: assert(false); return NULL;
   }
 }
 
-// Prior to being in a symtab, the def's refcount controls the lifetime of the
-// def itself.  If the refcount falls to zero, the def is deleted.  Once the
-// def belongs to a symtab, the def is owned by the symtab and its refcount
-// determines whether the def owns a ref on the symtab or not.
-void upb_def_ref(const upb_def *_def) {
-  upb_def *def = (upb_def*)_def;  // Need to modify refcount.
-  if (upb_atomic_ref(&def->refcount) && def->symtab)
-    upb_symtab_ref(def->symtab);
-}
-
-static void upb_def_movetosymtab(upb_def *d, upb_symtab *s) {
-  assert(upb_atomic_read(&d->refcount) > 0);
-  d->symtab = s;
-  upb_symtab_ref(s);
-  upb_msgdef *m = upb_dyncast_msgdef(d);
-  if (m) upb_inttable_compact(&m->itof);
+void upb_def_ref(const upb_def *_def, void *owner) {
+  upb_def *def = (upb_def*)_def;
+  upb_refcount_ref(&def->refcount, owner);
 }
 
-void upb_def_unref(const upb_def *_def) {
-  upb_def *def = (upb_def*)_def;  // Need to modify refcount.
+void upb_def_unref(const upb_def *_def, void *owner) {
+  upb_def *def = (upb_def*)_def;
   if (!def) return;
-  if (upb_atomic_unref(&def->refcount)) {
-    if (def->symtab) {
-      upb_symtab_unref(def->symtab);
-      // Def might be deleted now.
-    } else {
-      upb_def_free(def);
+  if (!upb_refcount_unref(&def->refcount, owner)) return;
+  upb_def *base = def;
+  // Free all defs in the SCC.
+  do {
+    upb_def *next = (upb_def*)def->refcount.next;
+    switch (def->type) {
+      case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break;
+      case UPB_DEF_FIELD: upb_fielddef_free(upb_downcast_fielddef(def)); break;
+      case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break;
+      default:
+        assert(false);
     }
-  }
+    def = next;
+  } while(def != base);
 }
 
-static void upb_def_init(upb_def *def, upb_deftype_t type) {
+static bool upb_def_init(upb_def *def, upb_deftype_t type, void *owner) {
   def->type = type;
-  def->fqname = NULL;
-  def->symtab = NULL;
-  upb_atomic_init(&def->refcount, 1);
+  def->is_finalized = false;
+  def->fullname = NULL;
+  return upb_refcount_init(&def->refcount, owner);
 }
 
 static void upb_def_uninit(upb_def *def) {
-  free(def->fqname);
+  upb_refcount_uninit(&def->refcount);
+  free(def->fullname);
 }
 
+void upb_def_donateref(const upb_def *_def, void *from, void *to) {
+  upb_def *def = (upb_def*)_def;
+  upb_refcount_donateref(&def->refcount, from, to);
+}
 
-/* upb_unresolveddef **********************************************************/
-
-// Unresolved defs are used as temporary placeholders for a def whose name has
-// not been resolved yet.  During the name resolution step, all unresolved defs
-// are replaced with pointers to the actual def being referenced.
-typedef struct _upb_unresolveddef {
-  upb_def base;
-} upb_unresolveddef;
+static void upb_def_getsuccessors(upb_refcount *refcount, void *closure) {
+  upb_def *def = (upb_def*)refcount;
+  switch (def->type) {
+    case UPB_DEF_MSG: {
+      upb_msgdef *m = upb_downcast_msgdef(def);
+      upb_msg_iter i;
+      for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+        upb_fielddef *f = upb_msg_iter_field(&i);
+        upb_refcount_visit(refcount, &f->base.refcount, closure);
+      }
+      break;
+    }
+    case UPB_DEF_FIELD: {
+      upb_fielddef *f = upb_downcast_fielddef(def);
+      assert(f->msgdef);
+      upb_refcount_visit(refcount, &f->msgdef->base.refcount, closure);
+      upb_def *subdef = f->sub.def;
+      if (subdef)
+        upb_refcount_visit(refcount, &subdef->refcount, closure);
+      break;
+    }
+    case UPB_DEF_ENUM:
+    case UPB_DEF_SERVICE:
+    case UPB_DEF_ANY:
+      break;
+  }
+}
 
-// Is passed a ref on the string.
-static upb_unresolveddef *upb_unresolveddef_new(const char *str) {
-  upb_unresolveddef *def = malloc(sizeof(*def));
-  upb_def_init(&def->base, UPB_DEF_UNRESOLVED);
-  def->base.fqname = strdup(str);
-  return def;
+static bool upb_validate_field(const upb_fielddef *f, upb_status *s) {
+  if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == -1) {
+    upb_status_seterrliteral(s, "fielddef must have name and number set");
+    return false;
+  }
+  if (upb_hassubdef(f)) {
+    if (f->subdef_is_symbolic) {
+      upb_status_seterrf(s,
+          "field %s has not been resolved", upb_fielddef_name(f));
+      return false;
+    } else if (upb_fielddef_subdef(f) == NULL) {
+      upb_status_seterrf(s,
+          "field is %s missing required subdef", upb_fielddef_name(f));
+      return false;
+    } else if (!upb_def_isfinalized(upb_fielddef_subdef(f))) {
+      upb_status_seterrf(s,
+          "field %s subtype is not being finalized", upb_fielddef_name(f));
+      return false;
+    }
+  }
+  return true;
 }
 
-static void upb_unresolveddef_free(struct _upb_unresolveddef *def) {
-  upb_def_uninit(&def->base);
-  free(def);
+bool upb_finalize(upb_def *const*defs, int n, upb_status *s) {
+  if (n >= UINT16_MAX - 1) {
+    upb_status_seterrliteral(s, "too many defs (max is 64k at a time)");
+    return false;
+  }
+
+  // First perform validation, in two passes so we can check that we have a
+  // transitive closure without needing to search.
+  for (int i = 0; i < n; i++) {
+    upb_def *def = defs[i];
+    if (upb_def_isfinalized(def)) {
+      // Could relax this requirement if it's annoying.
+      upb_status_seterrliteral(s, "def is already finalized");
+      goto err;
+    } else if (def->type == UPB_DEF_FIELD) {
+      upb_status_seterrliteral(s, "standalone fielddefs can not be finalized");
+      goto err;
+    } else {
+      // Set now to detect transitive closure in the second pass.
+      def->is_finalized = true;
+    }
+  }
+
+  for (int i = 0; i < n; i++) {
+    upb_msgdef *m = upb_dyncast_msgdef(defs[i]);
+    if (!m) continue;
+    upb_inttable_compact(&m->itof);
+    upb_msg_iter j;
+    for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) {
+      upb_fielddef *f = upb_msg_iter_field(&j);
+      assert(f->msgdef == m);
+      if (!upb_validate_field(f, s)) goto err;
+    }
+  }
+
+  // Validation all passed, now find strongly-connected components so that
+  // our refcounting works with cycles.
+  upb_refcount_findscc((upb_refcount**)defs, n, &upb_def_getsuccessors);
+
+  // Now that ref cycles have been removed it is safe to have each fielddef
+  // take a ref on its subdef (if any), but only if it's a member of another
+  // SCC.
+  for (int i = 0; i < n; i++) {
+    upb_msgdef *m = upb_dyncast_msgdef(defs[i]);
+    if (!m) continue;
+    upb_msg_iter j;
+    for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) {
+      upb_fielddef *f = upb_msg_iter_field(&j);
+      f->base.is_finalized = true;
+      // Release the ref taken in upb_msgdef_addfields().
+      upb_fielddef_unref(f, m);
+      if (!upb_hassubdef(f)) continue;
+      assert(upb_fielddef_subdef(f));
+      if (!upb_refcount_merged(&f->base.refcount, &f->sub.def->refcount)) {
+        // Subdef is part of a different strongly-connected component.
+        upb_def_ref(f->sub.def, &f->sub.def);
+        f->subdef_is_owned = true;
+      }
+    }
+  }
+
+  return true;
+
+err:
+  for (int i = 0; i < n; i++) {
+    defs[i]->is_finalized = false;
+  }
+  return false;
 }
 
 
 /* upb_enumdef ****************************************************************/
 
-upb_enumdef *upb_enumdef_new() {
+upb_enumdef *upb_enumdef_new(void *owner) {
   upb_enumdef *e = malloc(sizeof(*e));
-  upb_def_init(&e->base, UPB_DEF_ENUM);
-  upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent));
-  upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent));
+  if (!e) return NULL;
+  if (!upb_def_init(&e->base, UPB_DEF_ENUM, owner)) goto err2;
+  if (!upb_strtable_init(&e->ntoi)) goto err2;
+  if (!upb_inttable_init(&e->iton)) goto err1;
   return e;
+
+err1:
+  upb_strtable_uninit(&e->ntoi);
+err2:
+  free(e);
+  return NULL;
 }
 
 static void upb_enumdef_free(upb_enumdef *e) {
-  upb_enum_iter i;
-  for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
-    // Frees the ref taken when the string was parsed.
-    free(upb_enum_iter_name(i));
-  }
-  upb_strtable_free(&e->ntoi);
-  upb_inttable_free(&e->iton);
+  upb_inttable_iter i;
+  upb_inttable_begin(&i, &e->iton);
+  for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    // To clean up the strdup() from upb_enumdef_addval().
+    free(upb_value_getptr(upb_inttable_iter_value(&i)));
+  }
+  upb_strtable_uninit(&e->ntoi);
+  upb_inttable_uninit(&e->iton);
   upb_def_uninit(&e->base);
   free(e);
 }
 
-upb_enumdef *upb_enumdef_dup(const upb_enumdef *e) {
-  upb_enumdef *new_e = upb_enumdef_new();
+upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, void *owner) {
+  upb_enumdef *new_e = upb_enumdef_new(owner);
+  if (!new_e) return NULL;
   upb_enum_iter i;
-  for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
-    assert(upb_enumdef_addval(new_e, upb_enum_iter_name(i),
-                                     upb_enum_iter_number(i)));
+  for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
+    bool success = upb_enumdef_addval(
+        new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i));
+    if (!success) {
+      upb_enumdef_unref(new_e, owner);
+      return NULL;
+    }
   }
   return new_e;
 }
 
-bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num) {
-  if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL))
+bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num) {
+  if (!upb_isident(name, strlen(name), false)) return false;
+  if (upb_enumdef_ntoi(e, name, NULL))
+    return false;
+  if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num)))
+    return false;
+  if (!upb_inttable_lookup(&e->iton, num) &&
+      !upb_inttable_insert(&e->iton, num, upb_value_ptr(strdup(name))))
     return false;
-  upb_iton_ent ent = {0, strdup(name)};
-  upb_strtable_insert(&e->ntoi, name, &num);
-  upb_inttable_insert(&e->iton, num, &ent);
   return true;
 }
 
@@ -181,42 +296,70 @@ void upb_enumdef_setdefault(upb_enumdef *e, int32_t val) {
   e->defaultval = val;
 }
 
-upb_enum_iter upb_enum_begin(const upb_enumdef *e) {
-  // We could iterate over either table here; the choice is arbitrary.
-  return upb_inttable_begin(&e->iton);
+void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
+  // We iterate over the ntoi table, to account for duplicate numbers.
+  upb_strtable_begin(i, &e->ntoi);
 }
 
-upb_enum_iter upb_enum_next(const upb_enumdef *e, upb_enum_iter iter) {
-  return upb_inttable_next(&e->iton, iter);
-}
+void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
+bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
 
-const char *upb_enumdef_iton(upb_enumdef *def, int32_t num) {
-  upb_iton_ent *e = upb_inttable_fastlookup(&def->iton, num, sizeof(*e));
-  return e ? e->str : NULL;
-}
-
-bool upb_enumdef_ntoil(upb_enumdef *def, const char *name, size_t len, int32_t *num) {
-  upb_ntoi_ent *e = upb_strtable_lookupl(&def->ntoi, name, len);
-  if (!e) return false;
-  if (num) *num = e->value;
+bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, int32_t *num) {
+  const upb_value *v = upb_strtable_lookup(&def->ntoi, name);
+  if (!v) return false;
+  if (num) *num = upb_value_getint32(*v);
   return true;
 }
 
-bool upb_enumdef_ntoi(upb_enumdef *e, const char *name, int32_t *num) {
-  return upb_enumdef_ntoil(e, name, strlen(name), num);
+const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
+  const upb_value *v = upb_inttable_lookup32(&def->iton, num);
+  return v ? upb_value_getptr(*v) : NULL;
 }
 
 
 /* upb_fielddef ***************************************************************/
 
+#define alignof(t) offsetof(struct { char c; t x; }, x)
+#define TYPE_INFO(ctype, inmemory_type) \
+    {alignof(ctype), sizeof(ctype), UPB_CTYPE_ ## inmemory_type}
+
+const upb_typeinfo upb_types[UPB_NUM_TYPES] = {
+  // END_GROUP is not real, but used to signify the pseudo-field that
+  // ends a group from within the group.
+  TYPE_INFO(void*,     PTR),        // ENDGROUP
+  TYPE_INFO(double,    DOUBLE),     // DOUBLE
+  TYPE_INFO(float,     FLOAT),      // FLOAT
+  TYPE_INFO(int64_t,   INT64),      // INT64
+  TYPE_INFO(uint64_t,  UINT64),     // UINT64
+  TYPE_INFO(int32_t,   INT32),      // INT32
+  TYPE_INFO(uint64_t,  UINT64),     // FIXED64
+  TYPE_INFO(uint32_t,  UINT32),     // FIXED32
+  TYPE_INFO(bool,      BOOL),       // BOOL
+  TYPE_INFO(void*,     BYTEREGION), // STRING
+  TYPE_INFO(void*,     PTR),        // GROUP
+  TYPE_INFO(void*,     PTR),        // MESSAGE
+  TYPE_INFO(void*,     BYTEREGION), // BYTES
+  TYPE_INFO(uint32_t,  UINT32),     // UINT32
+  TYPE_INFO(uint32_t,  INT32),      // ENUM
+  TYPE_INFO(int32_t,   INT32),      // SFIXED32
+  TYPE_INFO(int64_t,   INT64),      // SFIXED64
+  TYPE_INFO(int32_t,   INT32),      // SINT32
+  TYPE_INFO(int64_t,   INT64),      // SINT64
+};
+
 static void upb_fielddef_init_default(upb_fielddef *f);
 
-upb_fielddef *upb_fielddef_new() {
+upb_fielddef *upb_fielddef_new(void *owner) {
   upb_fielddef *f = malloc(sizeof(*f));
+  if (!f) return NULL;
+  if (!upb_def_init(UPB_UPCAST(f), UPB_DEF_FIELD, owner)) {
+    free(f);
+    return NULL;
+  }
   f->msgdef = NULL;
-  f->def = NULL;
-  upb_atomic_init(&f->refcount, 1);
-  f->finalized = false;
+  f->sub.def = NULL;
+  f->subdef_is_symbolic = false;
+  f->subdef_is_owned = false;
   f->label = UPB_LABEL(OPTIONAL);
   f->hasbit = -1;
   f->offset = 0;
@@ -226,14 +369,68 @@ upb_fielddef *upb_fielddef_new() {
   // These are initialized to be invalid; the user must set them explicitly.
   // Could relax this later if it's convenient and non-confusing to have a
   // defaults for them.
-  f->name = NULL;
-  f->type = 0;
+  f->type = UPB_TYPE_NONE;
   f->number = 0;
 
   upb_fielddef_init_default(f);
   return f;
 }
 
+static void upb_fielddef_uninit_default(upb_fielddef *f) {
+  if (f->default_is_string)
+    upb_byteregion_free(upb_value_getbyteregion(f->defaultval));
+}
+
+static void upb_fielddef_free(upb_fielddef *f) {
+  if (f->subdef_is_owned)
+    upb_def_unref(f->sub.def, &f->sub.def);
+  upb_fielddef_uninit_default(f);
+  upb_def_uninit(UPB_UPCAST(f));
+  free(f);
+}
+
+upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, void *owner) {
+  upb_fielddef *newf = upb_fielddef_new(owner);
+  if (!newf) return NULL;
+  upb_fielddef_settype(newf, upb_fielddef_type(f));
+  upb_fielddef_setlabel(newf, upb_fielddef_label(f));
+  upb_fielddef_setnumber(newf, upb_fielddef_number(f));
+  upb_fielddef_setname(newf, upb_fielddef_name(f));
+  upb_fielddef_sethasbit(newf, upb_fielddef_hasbit(f));
+  upb_fielddef_setoffset(newf, upb_fielddef_offset(f));
+  upb_fielddef_setaccessor(newf, upb_fielddef_accessor(f));
+  upb_fielddef_setfval(newf, upb_fielddef_fval(f));
+  if (f->default_is_string) {
+    upb_byteregion *r = upb_value_getbyteregion(upb_fielddef_default(f));
+    size_t len;
+    const char *ptr = upb_byteregion_getptr(r, 0, &len);
+    assert(len == upb_byteregion_len(r));
+    upb_fielddef_setdefaultstr(newf, ptr, len);
+  } else {
+    upb_fielddef_setdefault(newf, upb_fielddef_default(f));
+  }
+
+  const char *srcname;
+  if (f->subdef_is_symbolic) {
+    srcname = f->sub.name;  // Might be NULL.
+  } else {
+    srcname = f->sub.def ? upb_def_fullname(f->sub.def) : NULL;
+  }
+  if (srcname) {
+    char *newname = malloc(strlen(f->sub.def->fullname) + 2);
+    if (!newname) {
+      upb_fielddef_unref(newf, owner);
+      return NULL;
+    }
+    strcpy(newname, ".");
+    strcat(newname, f->sub.def->fullname);
+    upb_fielddef_setsubtypename(newf, newname);
+    free(newname);
+  }
+
+  return newf;
+}
+
 static void upb_fielddef_init_default(upb_fielddef *f) {
   f->default_is_string = false;
   switch (upb_fielddef_type(f)) {
@@ -253,105 +450,62 @@ static void upb_fielddef_init_default(upb_fielddef *f) {
     case UPB_TYPE(BOOL): upb_value_setbool(&f->defaultval, false); break;
     case UPB_TYPE(STRING):
     case UPB_TYPE(BYTES):
-      f->default_is_string = true;
-      upb_value_setbyteregion(&f->defaultval, upb_byteregion_new(""));
-      break;
+        upb_value_setbyteregion(&f->defaultval, upb_byteregion_new(""));
+        f->default_is_string = true;
+        break;
     case UPB_TYPE(GROUP):
     case UPB_TYPE(MESSAGE): upb_value_setptr(&f->defaultval, NULL); break;
+    case UPB_TYPE_ENDGROUP: assert(false);
+    case UPB_TYPE_NONE: break;
   }
 }
 
-static void upb_fielddef_uninit_default(upb_fielddef *f) {
-  if (f->default_is_string) {
-    upb_byteregion_free(upb_value_getbyteregion(f->defaultval));
-  }
-}
-
-static void upb_fielddef_free(upb_fielddef *f) {
-  upb_fielddef_uninit_default(f);
-  if (f->def) {
-    // We own a ref on the subdef iff we are not part of a msgdef.
-    if (f->msgdef == NULL) {
-      if (f->def) upb_downcast_unresolveddef(f->def);  // assert() check.
-      upb_def_unref(f->def);
-    }
-  }
-  free(f->name);
-  free(f);
-}
-
-void upb_fielddef_ref(upb_fielddef *f) {
-  // TODO.
-  (void)f;
-}
-
-void upb_fielddef_unref(upb_fielddef *f) {
-  // TODO.
-  (void)f;
-  if (!f) return;
-  if (upb_atomic_unref(&f->refcount)) {
-    if (f->msgdef) {
-      upb_msgdef_unref(f->msgdef);
-      // fielddef might be deleted now.
-    } else {
-      upb_fielddef_free(f);
-    }
+const upb_def *upb_fielddef_subdef(const upb_fielddef *f) {
+  if (upb_hassubdef(f) && upb_fielddef_isfinalized(f)) {
+    assert(f->sub.def);
+    return f->sub.def;
+  } else {
+    return f->subdef_is_symbolic ? NULL : f->sub.def;
   }
 }
 
-upb_fielddef *upb_fielddef_dup(upb_fielddef *f) {
-  upb_fielddef *newf = upb_fielddef_new();
-  newf->msgdef = f->msgdef;
-  newf->type = f->type;
-  newf->label = f->label;
-  newf->number = f->number;
-  newf->name = f->name;
-  upb_fielddef_settypename(newf, f->def->fqname);
-  return f;
+upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) {
+  return (upb_def*)upb_fielddef_subdef(f);
 }
 
-bool upb_fielddef_ismutable(const upb_fielddef *f) {
-  return !f->msgdef || upb_def_ismutable(UPB_UPCAST(f->msgdef));
+const char *upb_fielddef_subtypename(upb_fielddef *f) {
+  assert(upb_fielddef_ismutable(f));
+  return f->subdef_is_symbolic ? f->sub.name : NULL;
 }
 
-upb_def *upb_fielddef_subdef(const upb_fielddef *f) {
-  if (upb_hassubdef(f) && !upb_fielddef_ismutable(f))
-    return f->def;
-  else
-    return NULL;
-}
-
-static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) {
-  assert(upb_dyncast_unresolveddef(f->def));
-  upb_def_unref(f->def);
-  f->def = def;
-  if (f->type == UPB_TYPE(ENUM) && f->default_is_string) {
-    // Resolve the enum's default from a string to an integer.
-    upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval);
-    assert(bytes);  // Points to either a real default or the empty string.
-    upb_enumdef *e = upb_downcast_enumdef(f->def);
-    int32_t val = 0;
-    // Could do a sanity check that the default value does not have embedded
-    // NULLs.
-    if (upb_byteregion_len(bytes) == 0) {
-      upb_value_setint32(&f->defaultval, e->defaultval);
-    } else {
-      size_t len;
-      // ptr is guaranteed to be NULL-terminated because the byteregion was
-      // created with upb_byteregion_newl().
-      const char *ptr = upb_byteregion_getptr(bytes, 0, &len);
-      assert(len == upb_byteregion_len(bytes));  // Should all be in one chunk.
-      bool success = upb_enumdef_ntoi(e, ptr, &val);
-      if (!success) {
-        upb_status_seterrf(
-            s, "Default enum value (%s) is not a member of the enum", ptr);
-        return false;
-      }
-      upb_value_setint32(&f->defaultval, val);
+// Could expose this to clients if a client wants to call it independently
+// of upb_resolve() for whatever reason.
+static bool upb_fielddef_resolvedefault(upb_fielddef *f, upb_status *s) {
+  if (!f->default_is_string) return true;
+  // Resolve the enum's default from a string to an integer.
+  upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval);
+  assert(bytes);  // Points to either a real default or the empty string.
+  upb_enumdef *e = upb_downcast_enumdef(upb_fielddef_subdef_mutable(f));
+  int32_t val = 0;
+  if (upb_byteregion_len(bytes) == 0) {
+    upb_value_setint32(&f->defaultval, e->defaultval);
+  } else {
+    size_t len;
+    // ptr is guaranteed to be NULL-terminated because the byteregion was
+    // created with upb_byteregion_newl().
+    const char *ptr = upb_byteregion_getptr(
+        bytes, upb_byteregion_startofs(bytes), &len);
+    assert(len == upb_byteregion_len(bytes));  // Should all be in one chunk.
+    bool success = upb_enumdef_ntoi(e, ptr, &val);
+    if (!success) {
+      upb_status_seterrf(
+          s, "Default enum value (%s) is not a member of the enum", ptr);
+      return false;
     }
-    f->default_is_string = false;
-    upb_byteregion_free(bytes);
+    upb_value_setint32(&f->defaultval, val);
   }
+  f->default_is_string = false;
+  upb_byteregion_free(bytes);
   return true;
 }
 
@@ -361,42 +515,50 @@ bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number) {
   return true;
 }
 
-bool upb_fielddef_setname(upb_fielddef *f, const char *name) {
-  assert(f->msgdef == NULL);
-  free(f->name);
-  f->name = strdup(name);
-  return true;
-}
-
-bool upb_fielddef_settype(upb_fielddef *f, uint8_t type) {
-  assert(!f->finalized);
+bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) {
+  assert(upb_fielddef_ismutable(f));
   upb_fielddef_uninit_default(f);
   f->type = type;
   upb_fielddef_init_default(f);
   return true;
 }
 
-bool upb_fielddef_setlabel(upb_fielddef *f, uint8_t label) {
-  assert(!f->finalized);
+bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) {
+  assert(upb_fielddef_ismutable(f));
   f->label = label;
   return true;
 }
 
 void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) {
-  assert(!f->finalized);
-  assert(!upb_isstring(f));
+  assert(upb_fielddef_ismutable(f));
+  assert(!upb_isstring(f) && !upb_issubmsg(f));
+  if (f->default_is_string) {
+    upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval);
+    assert(bytes);
+    upb_byteregion_free(bytes);
+  }
   f->defaultval = value;
+  f->default_is_string = false;
 }
 
-void upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len) {
+bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len) {
   assert(upb_isstring(f) || f->type == UPB_TYPE(ENUM));
   if (f->default_is_string) {
     upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval);
     assert(bytes);
     upb_byteregion_free(bytes);
-  }
-  upb_value_setbyteregion(&f->defaultval, upb_byteregion_newl(str, len));
+  } else {
+    assert(f->type == UPB_TYPE(ENUM));
+  }
+  if (f->type == UPB_TYPE(ENUM) && !upb_isident(str, len, false)) return false;
+  upb_byteregion *r = upb_byteregion_newl(str, len);
+  upb_value_setbyteregion(&f->defaultval, r);
+  upb_bytesuccess_t ret = upb_byteregion_fetch(r);
+  (void)ret;
+  assert(ret == (len == 0 ? UPB_BYTE_EOF : UPB_BYTE_OK));
+  assert(upb_byteregion_available(r, 0) == upb_byteregion_len(r));
   f->default_is_string = true;
+  return true;
 }
 
 void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str) {
@@ -404,82 +566,106 @@ void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str) {
 }
 
 void upb_fielddef_setfval(upb_fielddef *f, upb_value fval) {
-  assert(!f->finalized);
-  // TODO: string ownership?
+  assert(upb_fielddef_ismutable(f));
+  // TODO: we need an ownership/freeing mechanism for dynamically-allocated
+  // fvals.  One possibility is to let the user supply a free() function
+  // and call it when the fval is no longer referenced.  Would have to
+  // ensure that no common use cases need cycles.
+  //
+  // For now the fval has no ownership; the caller must simply guarantee
+  // somehow that it outlives any handlers/plan.
   f->fval = fval;
 }
 
-void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl) {
-  assert(!f->finalized);
-  f->accessor = vtbl;
+void upb_fielddef_sethasbit(upb_fielddef *f, int16_t hasbit) {
+  assert(upb_fielddef_ismutable(f));
+  f->hasbit = hasbit;
 }
 
-bool upb_fielddef_settypename(upb_fielddef *f, const char *name) {
-  upb_def_unref(f->def);
-  f->def = UPB_UPCAST(upb_unresolveddef_new(name));
-  return true;
+void upb_fielddef_setoffset(upb_fielddef *f, uint16_t offset) {
+  assert(upb_fielddef_ismutable(f));
+  f->offset = offset;
 }
 
-// Returns an ordering of fields based on:
-// 1. value size (small to large).
-// 2. field number.
-static int upb_fielddef_cmpval(const void *_f1, const void *_f2) {
-  upb_fielddef *f1 = *(void**)_f1;
-  upb_fielddef *f2 = *(void**)_f2;
-  size_t size1 = upb_types[f1->type].size;
-  size_t size2 = upb_types[f2->type].size;
-  if (size1 != size2) return size1 - size2;
-  // Otherwise return in number order.
-  return f1->number - f2->number;
+void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *tbl) {
+  assert(upb_fielddef_ismutable(f));
+  f->accessor = tbl;
 }
 
-// Returns an ordering of all fields based on:
-// 1. required/optional (required fields first).
-// 2. field number
-static int upb_fielddef_cmphasbit(const void *_f1, const void *_f2) {
-  upb_fielddef *f1 = *(void**)_f1;
-  upb_fielddef *f2 = *(void**)_f2;
-  size_t req1 = f1->label == UPB_LABEL(REQUIRED);
-  size_t req2 = f2->label == UPB_LABEL(REQUIRED);
-  if (req1 != req2) return req1 - req2;
-  // Otherwise return in number order.
-  return f1->number - f2->number;
+static bool upb_subtype_typecheck(upb_fielddef *f, const upb_def *subdef) {
+  if (f->type == UPB_TYPE(MESSAGE) || f->type == UPB_TYPE(GROUP))
+    return upb_dyncast_msgdef_const(subdef) != NULL;
+  else if (f->type == UPB_TYPE(ENUM))
+    return upb_dyncast_enumdef_const(subdef) != NULL;
+  else {
+    assert(false);
+    return false;
+  }
+}
+
+bool upb_fielddef_setsubdef(upb_fielddef *f, upb_def *subdef) {
+  assert(upb_fielddef_ismutable(f));
+  assert(upb_hassubdef(f));
+  assert(subdef);
+  if (!upb_subtype_typecheck(f, subdef)) return false;
+  if (f->subdef_is_symbolic) free(f->sub.name);
+  f->sub.def = subdef;
+  f->subdef_is_symbolic = false;
+  return true;
+}
+
+bool upb_fielddef_setsubtypename(upb_fielddef *f, const char *name) {
+  assert(upb_fielddef_ismutable(f));
+  assert(upb_hassubdef(f));
+  if (f->subdef_is_symbolic) free(f->sub.name);
+  f->sub.name = strdup(name);
+  f->subdef_is_symbolic = true;
+  return true;
 }
 
 
 /* upb_msgdef *****************************************************************/
 
-upb_msgdef *upb_msgdef_new() {
+upb_msgdef *upb_msgdef_new(void *owner) {
   upb_msgdef *m = malloc(sizeof(*m));
-  upb_def_init(&m->base, UPB_DEF_MSG);
-  upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent));
-  upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent));
+  if (!m) return NULL;
+  if (!upb_def_init(&m->base, UPB_DEF_MSG, owner)) goto err2;
+  if (!upb_inttable_init(&m->itof)) goto err2;
+  if (!upb_strtable_init(&m->ntof)) goto err1;
   m->size = 0;
   m->hasbit_bytes = 0;
   m->extstart = 0;
   m->extend = 0;
   return m;
+
+err1:
+  upb_inttable_uninit(&m->itof);
+err2:
+  free(m);
+  return NULL;
 }
 
 static void upb_msgdef_free(upb_msgdef *m) {
-  upb_msg_iter i;
-  for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
-    upb_fielddef_free(upb_msg_iter_field(i));
-  upb_strtable_free(&m->ntof);
-  upb_inttable_free(&m->itof);
+  upb_strtable_uninit(&m->ntof);
+  upb_inttable_uninit(&m->itof);
   upb_def_uninit(&m->base);
   free(m);
 }
 
-upb_msgdef *upb_msgdef_dup(const upb_msgdef *m) {
-  upb_msgdef *newm = upb_msgdef_new();
-  newm->size = m->size;
-  newm->hasbit_bytes = m->hasbit_bytes;
-  newm->extstart = m->extstart;
-  newm->extend = m->extend;
+upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, void *owner) {
+  upb_msgdef *newm = upb_msgdef_new(owner);
+  if (!newm) return NULL;
+  upb_msgdef_setsize(newm, upb_msgdef_size(m));
+  upb_msgdef_sethasbit_bytes(newm, upb_msgdef_hasbit_bytes(m));
+  upb_msgdef_setextrange(newm, upb_msgdef_extstart(m), upb_msgdef_extend(m));
+  upb_def_setfullname(UPB_UPCAST(newm), upb_def_fullname(UPB_UPCAST(m)));
   upb_msg_iter i;
-  for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
-    upb_msgdef_addfield(newm, upb_fielddef_dup(upb_msg_iter_field(i)));
+  for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+    upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f);
+    if (!f || !upb_msgdef_addfield(newm, f, &f)) {
+      upb_msgdef_unref(newm, owner);
+      return NULL;
+    }
   }
   return newm;
 }
@@ -506,160 +692,69 @@ bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end) {
   return true;
 }
 
-bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n) {
+bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n,
+                          void *ref_donor) {
   // Check constraints for all fields before performing any action.
   for (int i = 0; i < n; i++) {
     upb_fielddef *f = fields[i];
-    assert(upb_atomic_read(&f->refcount) > 0);
-    if (f->name == NULL || f->number == 0 ||
-        upb_msgdef_itof(m, f->number) || upb_msgdef_ntof(m, f->name))
+    if (f->msgdef != NULL ||
+        upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0 ||
+        upb_msgdef_itof(m, upb_fielddef_number(f)) ||
+        upb_msgdef_ntof(m, upb_fielddef_name(f)))
       return false;
   }
 
   // Constraint checks ok, perform the action.
   for (int i = 0; i < n; i++) {
     upb_fielddef *f = fields[i];
-    upb_msgdef_ref(m);
-    assert(f->msgdef == NULL);
     f->msgdef = m;
-    upb_itof_ent itof_ent = {0, f};
-    upb_inttable_insert(&m->itof, f->number, &itof_ent);
-    upb_strtable_insert(&m->ntof, f->name, &f);
+    upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f));
+    upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f));
+    upb_fielddef_ref(f, m);
+    if (ref_donor) upb_fielddef_unref(f, ref_donor);
   }
   return true;
 }
 
-static int upb_div_round_up(int numerator, int denominator) {
-  /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */
-  return numerator > 0 ? (numerator - 1) / denominator + 1 : 0;
-}
-
-void upb_msgdef_layout(upb_msgdef *m) {
-  // Create an ordering over the fields, but only include fields with accessors.
-  upb_fielddef **sorted_fields =
-      malloc(sizeof(upb_fielddef*) * upb_msgdef_numfields(m));
-  int n = 0;
-  upb_msg_iter i;
-  for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
-    upb_fielddef *f = upb_msg_iter_field(i);
-    if (f->accessor) sorted_fields[n++] = f;
-  }
-
-  m->hasbit_bytes = upb_div_round_up(n, 8);
-  m->size = m->hasbit_bytes;  // + header_size?
-
-  // Assign hasbits.
-  qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmphasbit);
-  for (int i = 0; i < n; i++) {
-    upb_fielddef *f = sorted_fields[i];
-    f->hasbit = i;
-  }
-
-  // Assign value offsets.
-  qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmpval);
-  size_t max_align = 0;
-  for (int i = 0; i < n; i++) {
-    upb_fielddef *f = sorted_fields[i];
-    const upb_type_info *type_info = &upb_types[f->type];
-    size_t size = type_info->size;
-    size_t align = type_info->align;
-    if (upb_isseq(f)) {
-      size = sizeof(void*);
-      align = alignof(void*);
-    }
-
-    // General alignment rules are: each member must be at an address that is a
-    // multiple of that type's alignment.  Also, the size of the structure as a
-    // whole must be a multiple of the greatest alignment of any member.
-    f->offset = upb_align_up(m->size, align);
-    m->size = f->offset + size;
-    max_align = UPB_MAX(max_align, align);
-  }
-  if (max_align > 0) m->size = upb_align_up(m->size, max_align);
-
-  free(sorted_fields);
-}
-
-upb_msg_iter upb_msg_begin(const upb_msgdef *m) {
-  return upb_inttable_begin(&m->itof);
+void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) {
+  upb_inttable_begin(iter, &m->itof);
 }
 
-upb_msg_iter upb_msg_next(const upb_msgdef *m, upb_msg_iter iter) {
-  return upb_inttable_next(&m->itof, iter);
-}
+void upb_msg_next(upb_msg_iter *iter) { upb_inttable_next(iter); }
 
 
 /* upb_symtab *****************************************************************/
 
-typedef struct {
-  upb_def *def;
-} upb_symtab_ent;
-
-// Given a symbol and the base symbol inside which it is defined, find the
-// symbol's definition in t.
-static upb_symtab_ent *upb_resolve(const upb_strtable *t,
-                                   const char *base, const char *sym) {
-  if(strlen(sym) == 0) return NULL;
-  if(sym[0] == UPB_SYMBOL_SEPARATOR) {
-    // Symbols starting with '.' are absolute, so we do a single lookup.
-    // Slice to omit the leading '.'
-    return upb_strtable_lookup(t, sym + 1);
-  } else {
-    // Remove components from base until we find an entry or run out.
-    // TODO: This branch is totally broken, but currently not used.
-    (void)base;
-    assert(false);
-    return NULL;
-  }
-}
-
-static void _upb_symtab_free(upb_strtable *t) {
-  upb_strtable_iter i;
-  upb_strtable_begin(&i, t);
-  for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
-    const upb_symtab_ent *e = upb_strtable_iter_value(&i);
-    assert(upb_atomic_read(&e->def->refcount) == 0);
-    upb_def_free(e->def);
-  }
-  upb_strtable_free(t);
-}
-
 static void upb_symtab_free(upb_symtab *s) {
-  _upb_symtab_free(&s->symtab);
-  for (uint32_t i = 0; i < s->olddefs.len; i++) {
-    upb_def *d = s->olddefs.defs[i];
-    assert(upb_atomic_read(&d->refcount) == 0);
-    upb_def_free(d);
-  }
-  upb_rwlock_destroy(&s->lock);
-  upb_deflist_uninit(&s->olddefs);
+  upb_strtable_iter i;
+  upb_strtable_begin(&i, &s->symtab);
+  for (; !upb_strtable_done(&i); upb_strtable_next(&i))
+    upb_def_unref(upb_value_getptr(upb_strtable_iter_value(&i)), s);
+  upb_strtable_uninit(&s->symtab);
   free(s);
 }
 
 void upb_symtab_ref(const upb_symtab *_s) {
   upb_symtab *s = (upb_symtab*)_s;
-  upb_atomic_ref(&s->refcount);
+  s->refcount++;
 }
 
 void upb_symtab_unref(const upb_symtab *_s) {
   upb_symtab *s = (upb_symtab*)_s;
-  if(s && upb_atomic_unref(&s->refcount)) {
+  if(s && --s->refcount == 0) {
     upb_symtab_free(s);
   }
 }
 
 upb_symtab *upb_symtab_new() {
   upb_symtab *s = malloc(sizeof(*s));
-  upb_atomic_init(&s->refcount, 1);
-  upb_rwlock_init(&s->lock);
-  upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent));
-  upb_deflist_init(&s->olddefs);
+  s->refcount = 1;
+  upb_strtable_init(&s->symtab);
   return s;
 }
 
 const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *count,
-                                   upb_deftype_t type) {
-  upb_rwlock_rdlock(&s->lock);
+                                   upb_deftype_t type, void *owner) {
   int total = upb_strtable_count(&s->symtab);
   // We may only use part of this, depending on how many symbols are of the
   // correct type.
@@ -668,177 +763,252 @@ const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *count,
   upb_strtable_begin(&iter, &s->symtab);
   int i = 0;
   for(; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
-    const upb_symtab_ent *e = upb_strtable_iter_value(&iter);
-    upb_def *def = e->def;
+    upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
     assert(def);
     if(type == UPB_DEF_ANY || def->type == type)
       defs[i++] = def;
   }
-  upb_rwlock_unlock(&s->lock);
   *count = i;
-  for(i = 0; i < *count; i++) upb_def_ref(defs[i]);
+  if (owner)
+    for(i = 0; i < *count; i++) upb_def_ref(defs[i], owner);
   return defs;
 }
 
-const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) {
-  upb_rwlock_rdlock(&s->lock);
-  upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym);
-  upb_def *ret = NULL;
-  if(e) {
-    ret = e->def;
-    upb_def_ref(ret);
-  }
-  upb_rwlock_unlock(&s->lock);
+const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym,
+                                 void *owner) {
+  const upb_value *v = upb_strtable_lookup(&s->symtab, sym);
+  upb_def *ret = v ? upb_value_getptr(*v) : NULL;
+  if (ret) upb_def_ref(ret, owner);
   return ret;
 }
 
-const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
-  upb_rwlock_rdlock(&s->lock);
-  upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym);
+const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym,
+                                       void *owner) {
+  const upb_value *v = upb_strtable_lookup(&s->symtab, sym);
+  upb_def *def = v ? upb_value_getptr(*v) : NULL;
   upb_msgdef *ret = NULL;
-  if(e && e->def->type == UPB_DEF_MSG) {
-    ret = upb_downcast_msgdef(e->def);
-    upb_def_ref(UPB_UPCAST(ret));
+  if(def && def->type == UPB_DEF_MSG) {
+    ret = upb_downcast_msgdef(def);
+    upb_def_ref(def, owner);
   }
-  upb_rwlock_unlock(&s->lock);
   return ret;
 }
 
+// Given a symbol and the base symbol inside which it is defined, find the
+// symbol's definition in t.
+static upb_def *upb_resolvename(const upb_strtable *t,
+                                const char *base, const char *sym) {
+  if(strlen(sym) == 0) return NULL;
+  if(sym[0] == UPB_SYMBOL_SEPARATOR) {
+    // Symbols starting with '.' are absolute, so we do a single lookup.
+    // Slice to omit the leading '.'
+    const upb_value *v = upb_strtable_lookup(t, sym + 1);
+    return v ? upb_value_getptr(*v) : NULL;
+  } else {
+    // Remove components from base until we find an entry or run out.
+    // TODO: This branch is totally broken, but currently not used.
+    (void)base;
+    assert(false);
+    return NULL;
+  }
+}
+
 const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
-                                  const char *sym) {
-  upb_rwlock_rdlock(&s->lock);
-  upb_symtab_ent *e = upb_resolve(&s->symtab, base, sym);
-  upb_def *ret = NULL;
-  if(e) {
-    ret = e->def;
-    upb_def_ref(ret);
-  }
-  upb_rwlock_unlock(&s->lock);
+                                  const char *sym, void *owner) {
+  upb_def *ret = upb_resolvename(&s->symtab, base, sym);
+  if (ret) upb_def_ref(ret, owner);
   return ret;
 }
 
-bool upb_symtab_dfs(upb_def *def, upb_def **open_defs, int n,
-                    upb_strtable *addtab) {
-  // This linear search makes the DFS O(n^2) in the length of the paths.
-  // Could make this O(n) with a hash table, but n is small.
-  for (int i = 0; i < n; i++) {
-    if (def == open_defs[i]) return false;
-  }
-
-  bool needcopy = false;
-  upb_msgdef *m = upb_dyncast_msgdef(def);
-  if (m) {
-    upb_msg_iter i;
-    open_defs[n++] = def;
-    for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
-      upb_fielddef *f = upb_msg_iter_field(i);
-      if (!upb_hassubdef(f)) continue;
-      needcopy |= upb_symtab_dfs(f->def, open_defs, n, addtab);
+// Adds dups of any existing def that can reach a def with the same name as one
+// of "defs."  This is to provide a consistent output graph as documented in
+// the header file.  We use a modified depth-first traversal that traverses
+// each SCC (which we already computed) as if it were a single node.  This
+// allows us to traverse the possibly-cyclic graph as if it were a DAG and to
+// easily dup the correct set of nodes with O(n) time.
+//
+// Returns true if defs that can reach "def" need to be duplicated into deftab.
+static bool upb_resolve_dfs(const upb_def *def, upb_strtable *deftab,
+                            void *new_owner, upb_inttable *seen,
+                            upb_status *s) {
+  // Memoize results of this function for efficiency (since we're traversing a
+  // DAG this is not needed to limit the depth of the search).
+  upb_value *v = upb_inttable_lookup(seen, (uintptr_t)def);
+  if (v) return upb_value_getbool(*v);
+
+  // Visit submessages for all messages in the SCC.
+  bool need_dup = false;
+  const upb_def *base = def;
+  do {
+    assert(upb_def_isfinalized(def));
+    if (def->type == UPB_DEF_FIELD) continue;
+    upb_value *v = upb_strtable_lookup(deftab, upb_def_fullname(def));
+    if (v) {
+      upb_def *add_def = upb_value_getptr(*v);
+      if (add_def->refcount.next && add_def->refcount.next != &def->refcount) {
+        upb_status_seterrf(s, "conflicting existing defs for name: '%s'",
+                           upb_def_fullname(def));
+        return false;
+      }
+      need_dup = true;
+    }
+    const upb_msgdef *m = upb_dyncast_msgdef_const(def);
+    if (m) {
+      upb_msg_iter i;
+      for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+        upb_fielddef *f = upb_msg_iter_field(&i);
+        if (!upb_hassubdef(f)) continue;
+        // |= to avoid short-circuit; we need its side-effects.
+        need_dup |= upb_resolve_dfs(
+            upb_fielddef_subdef_mutable(f), deftab, new_owner, seen, s);
+        if (!upb_ok(s)) return false;
+      }
     }
+  } while ((def = (upb_def*)def->refcount.next) != base);
+
+  if (need_dup) {
+    // Dup any defs that don't already have entries in deftab.
+    def = base;
+    do {
+      if (def->type == UPB_DEF_FIELD) continue;
+      const char *name = upb_def_fullname(def);
+      if (upb_strtable_lookup(deftab, name) == NULL) {
+        upb_def *newdef = upb_def_dup(def, new_owner);
+        if (!newdef) goto oom;
+        // We temporarily use this field to track who we were dup'd from.
+        newdef->refcount.next = (upb_refcount*)def;
+        if (!upb_strtable_insert(deftab, name, upb_value_ptr(newdef)))
+          goto oom;
+      }
+    } while ((def = (upb_def*)def->refcount.next) != base);
   }
 
-  bool replacing = (upb_strtable_lookup(addtab, m->base.fqname) != NULL);
-  if (needcopy && !replacing) {
-    upb_symtab_ent e = {upb_def_dup(def)};
-    upb_strtable_insert(addtab, def->fqname, &e);
-    replacing = true;
-  }
-  return replacing;
-}
+  upb_inttable_insert(seen, (uintptr_t)def, upb_value_bool(need_dup));
+  return need_dup;
 
-bool upb_symtab_add(upb_symtab *s, upb_def **defs, int n, upb_status *status) {
-  upb_rwlock_wrlock(&s->lock);
+oom:
+  upb_status_seterrliteral(s, "out of memory");
+  return false;
+}
 
-  // Add all defs to a table for resolution.
+bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
+                    upb_status *status) {
+  upb_def **add_defs = NULL;
   upb_strtable addtab;
-  upb_strtable_init(&addtab, n, sizeof(upb_symtab_ent));
+  if (!upb_strtable_init(&addtab)) {
+    upb_status_seterrliteral(status, "out of memory");
+    return false;
+  }
+
+  // Add new defs to table.
   for (int i = 0; i < n; i++) {
     upb_def *def = defs[i];
-    if (upb_strtable_lookup(&addtab, def->fqname)) {
-      upb_status_seterrf(status, "Conflicting defs named '%s'", def->fqname);
-      upb_strtable_free(&addtab);
-      return false;
+    assert(upb_def_ismutable(def));
+    const char *fullname = upb_def_fullname(def);
+    if (!fullname) {
+      upb_status_seterrliteral(
+          status, "Anonymous defs cannot be added to a symtab");
+      goto err;
     }
-    upb_strtable_insert(&addtab, def->fqname, &def);
+    if (upb_strtable_lookup(&addtab, fullname) != NULL) {
+      upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
+      goto err;
+    }
+    if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
+      goto oom_err;
+    // We temporarily use this field to indicate that we came from the user's
+    // list rather than being dup'd.
+    def->refcount.next = NULL;
   }
 
-  // All existing defs that can reach defs that are being replaced must
-  // themselves be replaced with versions that will point to the new defs.
-  // Do a DFS -- any path that finds a new def must replace all ancestors.
-  upb_strtable *symtab = &s->symtab;
+  // Add dups of any existing def that can reach a def with the same name as
+  // one of "defs."
+  upb_inttable seen;
+  if (!upb_inttable_init(&seen)) goto oom_err;
   upb_strtable_iter i;
-  upb_strtable_begin(&i, symtab);
-  for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
-    upb_def *open_defs[UPB_MAX_TYPE_DEPTH];
-    const upb_symtab_ent *e = upb_strtable_iter_value(&i);
-    upb_symtab_dfs(e->def, open_defs, 0, &addtab);
+  upb_strtable_begin(&i, &s->symtab);
+  for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+    upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
+    upb_resolve_dfs(def, &addtab, ref_donor, &seen, status);
+    if (!upb_ok(status)) goto err;
   }
+  upb_inttable_uninit(&seen);
 
-  // Resolve all refs.
+  // Now using the table, resolve symbolic references.
   upb_strtable_begin(&i, &addtab);
-  for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
-    const upb_symtab_ent *e = upb_strtable_iter_value(&i);
-    upb_msgdef *m = upb_dyncast_msgdef(e->def);
-    if(!m) continue;
+  for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+    upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
+    upb_msgdef *m = upb_dyncast_msgdef(def);
+    if (!m) continue;
     // Type names are resolved relative to the message in which they appear.
-    const char *base = m->base.fqname;
+    const char *base = upb_def_fullname(UPB_UPCAST(m));
 
     upb_msg_iter j;
-    for(j = upb_msg_begin(m); !upb_msg_done(j); j = upb_msg_next(m, j)) {
-      upb_fielddef *f = upb_msg_iter_field(j);
-      if (f->type == 0) {
-        upb_status_seterrf(status, "Field type was not set.");
-        return false;
-      }
-
-      if (!upb_hassubdef(f)) continue;  // No resolving necessary.
-      upb_downcast_unresolveddef(f->def);  // Type check.
-      const char *name = f->def->fqname;
-
-      // Resolve from either the addtab (pending adds) or symtab (existing
-      // defs).  If both exist, prefer the pending add, because it will be
-      // overwriting the existing def.
-      upb_symtab_ent *found;
-      if(!(found = upb_resolve(&addtab, base, name)) &&
-         !(found = upb_resolve(symtab, base, name))) {
-        upb_status_seterrf(status, "could not resolve symbol '%s' "
-                                   "in context '%s'", name, base);
-        return false;
+    for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) {
+      upb_fielddef *f = upb_msg_iter_field(&j);
+      const char *name = upb_fielddef_subtypename(f);
+      if (name) {
+        upb_def *subdef = upb_resolvename(&addtab, base, name);
+        if (subdef == NULL) {
+          upb_status_seterrf(
+              status, "couldn't resolve name '%s' in message '%s'", name, base);
+          goto err;
+        } else if (!upb_fielddef_setsubdef(f, subdef)) {
+          upb_status_seterrf(
+              status, "def '%s' had the wrong type for field '%s'",
+              upb_def_fullname(subdef), upb_fielddef_name(f));
+          goto err;
+        }
       }
 
-      // Check the type of the found def.
-      upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
-      if(found->def->type != expected) {
-        upb_status_seterrliteral(status, "Unexpected type");
-        return false;
-      }
-      if (!upb_fielddef_resolve(f, found->def, status)) return false;
+      if (upb_fielddef_type(f) == UPB_TYPE(ENUM) && upb_fielddef_subdef(f) &&
+          !upb_fielddef_resolvedefault(f, status))
+        goto err;
     }
   }
 
-  // The defs in the transaction have been vetted, and can be moved to the
-  // symtab without causing errors.
+  // We need an array of the defs in addtab, for passing to upb_finalize.
+  add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab));
+  if (add_defs == NULL) goto oom_err;
   upb_strtable_begin(&i, &addtab);
-  for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
-    const upb_symtab_ent *tmptab_e = upb_strtable_iter_value(&i);
-    upb_def_movetosymtab(tmptab_e->def, s);
-    upb_symtab_ent *symtab_e =
-        upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname);
-    if(symtab_e) {
-      upb_deflist_push(&s->olddefs, symtab_e->def);
-      symtab_e->def = tmptab_e->def;
+  for (n = 0; !upb_strtable_done(&i); upb_strtable_next(&i))
+    add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&i));
+
+  // Restore the next pointer that we stole.
+  for (int i = 0; i < n; i++)
+    add_defs[i]->refcount.next = &add_defs[i]->refcount;
+
+  if (!upb_finalize(add_defs, n, status)) goto err;
+  upb_strtable_uninit(&addtab);
+
+  for (int i = 0; i < n; i++) {
+    upb_def *def = add_defs[i];
+    const char *name = upb_def_fullname(def);
+    upb_def_donateref(def, ref_donor, s);
+    upb_value *v = upb_strtable_lookup(&s->symtab, name);
+    if(v) {
+      upb_def_unref(upb_value_getptr(*v), s);
+      upb_value_setptr(v, def);
     } else {
-      upb_strtable_insert(&s->symtab, tmptab_e->def->fqname, tmptab_e);
+      upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
     }
   }
-
-  upb_strtable_free(&addtab);
-  upb_rwlock_unlock(&s->lock);
-  upb_symtab_gc(s);
+  free(add_defs);
   return true;
-}
 
-void upb_symtab_gc(upb_symtab *s) {
-  (void)s;
-  // TODO.
+oom_err:
+  upb_status_seterrliteral(status, "out of memory");
+err: {
+    // Need to unref any defs we dup'd (we can distinguish them from defs that
+    // the user passed in by their def->refcount.next pointers).
+    upb_strtable_iter i;
+    upb_strtable_begin(&i, &addtab);
+    for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+      upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
+      if (def->refcount.next) upb_def_unref(def, s);
+    }
+  }
+  upb_strtable_uninit(&addtab);
+  free(add_defs);
+  return false;
 }
diff --git a/upb/def.h b/upb/def.h
index 462655a..452b809 100644
--- a/upb/def.h
+++ b/upb/def.h
@@ -1,17 +1,17 @@
 /*
  * upb - a minimalist implementation of protocol buffers.
  *
- * Copyright (c) 2009-2011 Google Inc.  See LICENSE for details.
+ * Copyright (c) 2009-2012 Google Inc.  See LICENSE for details.
  * Author: Josh Haberman <jhaberman@gmail.com>
  *
- * Provides a mechanism for creating and linking proto definitions.
- * These form the protobuf schema, and are used extensively throughout upb:
+ * Defs are upb's internal representation of the constructs that can appear
+ * in a .proto file:
+ *
  * - upb_msgdef: describes a "message" construct.
  * - upb_fielddef: describes a message field.
  * - upb_enumdef: describes an enum.
  * (TODO: definitions of services).
  *
- *
  * Defs go through two distinct phases of life:
  *
  * 1. MUTABLE: when first created, the properties of the def can be set freely
@@ -20,16 +20,15 @@
  *    not be used for any purpose except to set its properties (it can't be
  *    used to parse anything, create any messages in memory, etc).
  *
- * 2. FINALIZED: after being added to a symtab (which links the defs together)
- *    the defs become finalized (thread-safe and immutable).  Programs may only
- *    access defs through a CONST POINTER during this stage -- upb_symtab will
- *    help you out with this requirement by only vending const pointers, but
- *    you need to make sure not to use any non-const pointers you still have
- *    sitting around.  In practice this means that you may not call any setters
- *    on the defs (or functions that themselves call the setters).  If you want
- *    to modify an existing immutable def, copy it with upb_*_dup(), modify the
- *    copy, and add the modified def to the symtab (replacing the existing
- *    def).
+ * 2. FINALIZED: the upb_def_finalize() operation finalizes a set of defs,
+ *    which makes them thread-safe and immutable.  Finalized defs may only be
+ *    accessed through a CONST POINTER.  If you want to modify an existing
+ *    immutable def, copy it with upb_*_dup() and modify and finalize the copy.
+ *
+ * The refcounting of defs works properly no matter what state the def is in.
+ * Once the def is finalized it is guaranteed that any def reachable from a
+ * live def is also live (so a ref on the base of a message tree keeps the
+ * whole tree alive).
  *
  * You can test for which stage of life a def is in by calling
  * upb_def_ismutable().  This is particularly useful for dynamic language
@@ -46,181 +45,306 @@
 #ifndef UPB_DEF_H_
 #define UPB_DEF_H_
 
-#include "upb/atomic.h"
+#include "upb/refcount.h"
 #include "upb/table.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-struct _upb_symtab;
-typedef struct _upb_symtab upb_symtab;
+/* upb_def: base class for defs  **********************************************/
 
 // All the different kind of defs we support.  These correspond 1:1 with
 // declarations in a .proto file.
 typedef enum {
-  UPB_DEF_MSG = 1,
+  UPB_DEF_MSG,
+  UPB_DEF_FIELD,
   UPB_DEF_ENUM,
   UPB_DEF_SERVICE,          // Not yet implemented.
 
   UPB_DEF_ANY = -1,         // Wildcard for upb_symtab_get*()
-  UPB_DEF_UNRESOLVED = 99,  // Internal-only.
 } upb_deftype_t;
 
-
-/* upb_def: base class for defs  **********************************************/
-
-typedef struct {
-  char *fqname;     // Fully qualified.
-  upb_symtab *symtab;     // Def is mutable iff symtab == NULL.
-  upb_atomic_t refcount;  // Owns a ref on symtab iff (symtab && refcount > 0).
+typedef struct _upb_def {
+  upb_refcount refcount;
+  char *fullname;
   upb_deftype_t type;
+  bool is_finalized;
 } upb_def;
 
+#define UPB_UPCAST(ptr) (&(ptr)->base)
+
 // Call to ref/unref a def.  Can be used at any time, but is not thread-safe
-// until the def is in a symtab.  While a def is in a symtab, everything
-// reachable from that def (the symtab and all defs in the symtab) are
-// guaranteed to be alive.
-void upb_def_ref(const upb_def *def);
-void upb_def_unref(const upb_def *def);
-upb_def *upb_def_dup(const upb_def *def);
-
-// A def is mutable until it has been added to a symtab.
+// until the def is finalized.  While a def is finalized, everything reachable
+// from that def is guaranteed to be alive.
+void upb_def_ref(const upb_def *def, void *owner);
+void upb_def_unref(const upb_def *def, void *owner);
+void upb_def_donateref(const upb_def *def, void *from, void *to);
+upb_def *upb_def_dup(const upb_def *def, void *owner);
+
+// A def is mutable until it has been finalized.
 bool upb_def_ismutable(const upb_def *def);
-INLINE const char *upb_def_fqname(const upb_def *def) { return def->fqname; }
-bool upb_def_setfqname(upb_def *def, const char *fqname);  // Only if mutable.
+bool upb_def_isfinalized(const upb_def *def);
 
-#define UPB_UPCAST(ptr) (&(ptr)->base)
+// "fullname" is the def's fully-qualified name (eg. foo.bar.Message).
+INLINE const char *upb_def_fullname(const upb_def *d) { return d->fullname; }
+
+// The def must be mutable.  Caller retains ownership of fullname.  Defs are
+// not required to have a name; if a def has no name when it is finalized, it
+// will remain an anonymous def.
+bool upb_def_setfullname(upb_def *def, const char *fullname);
+
+// Finalizes the given defs; this validates all constraints and marks the defs
+// as finalized (read-only).  This will also cause fielddefs to take refs on
+// their subdefs so that any reachable def will be kept alive (but this is
+// done in a way that correctly handles circular references).
+//
+// On success, a new list is returned containing the finalized defs and
+// ownership of the "defs" list passes to the function.  On failure NULL is
+// returned and the caller retains ownership of "defs."
+//
+// Symbolic references to sub-types or enum defaults must have already been
+// resolved.  "defs" must contain the transitive closure of any mutable defs
+// reachable from the any def in the list.  In other words, there may not be a
+// mutable def which is reachable from one of "defs" that does not appear
+// elsewhere in "defs."  "defs" may not contain fielddefs, but any fielddefs
+// reachable from the given msgdefs will be finalized.
+//
+// n is currently limited to 64k defs, if more are required break them into
+// batches of 64k (or we could raise this limit, at the cost of a bigger
+// upb_def structure or complexity in upb_finalize()).
+bool upb_finalize(upb_def *const*defs, int n, upb_status *status);
 
 
 /* upb_fielddef ***************************************************************/
 
-// A upb_fielddef describes a single field in a message.  It isn't a full def
-// in the sense that it derives from upb_def.  It cannot stand on its own; it
-// must be part of a upb_msgdef.  It is also reference-counted.
+// We choose these to match descriptor.proto.  Clients may use UPB_TYPE() and
+// UPB_LABEL() instead of referencing these directly.
+typedef enum {
+  UPB_TYPE_NONE     = -1,  // Internal-only, may be removed.
+  UPB_TYPE_ENDGROUP = 0,   // Internal-only, may be removed.
+  UPB_TYPE_DOUBLE   = 1,
+  UPB_TYPE_FLOAT    = 2,
+  UPB_TYPE_INT64    = 3,
+  UPB_TYPE_UINT64   = 4,
+  UPB_TYPE_INT32    = 5,
+  UPB_TYPE_FIXED64  = 6,
+  UPB_TYPE_FIXED32  = 7,
+  UPB_TYPE_BOOL     = 8,
+  UPB_TYPE_STRING   = 9,
+  UPB_TYPE_GROUP    = 10,
+  UPB_TYPE_MESSAGE  = 11,
+  UPB_TYPE_BYTES    = 12,
+  UPB_TYPE_UINT32   = 13,
+  UPB_TYPE_ENUM     = 14,
+  UPB_TYPE_SFIXED32 = 15,
+  UPB_TYPE_SFIXED64 = 16,
+  UPB_TYPE_SINT32   = 17,
+  UPB_TYPE_SINT64   = 18,
+} upb_fieldtype_t;
+
+#define UPB_NUM_TYPES 19
+
+typedef enum {
+  UPB_LABEL_OPTIONAL = 1,
+  UPB_LABEL_REQUIRED = 2,
+  UPB_LABEL_REPEATED = 3,
+} upb_label_t;
+
+// These macros are provided for legacy reasons.
+#define UPB_TYPE(type) UPB_TYPE_ ## type
+#define UPB_LABEL(type) UPB_LABEL_ ## type
+
+// Info for a given field type.
+typedef struct {
+  uint8_t align;
+  uint8_t size;
+  uint8_t inmemory_type;    // For example, INT32, SINT32, and SFIXED32 -> INT32
+} upb_typeinfo;
+
+extern const upb_typeinfo upb_types[UPB_NUM_TYPES];
+
+// A upb_fielddef describes a single field in a message.  It is most often
+// found as a part of a upb_msgdef, but can also stand alone to represent
+// an extension.
 typedef struct _upb_fielddef {
+  upb_def base;
   struct _upb_msgdef *msgdef;
-  upb_def *def;  // if upb_hasdef(f)
-  upb_atomic_t refcount;
-  bool finalized;
-
-  // The following fields may be modified until the def is finalized.
-  uint8_t type;          // Use UPB_TYPE() constants.
-  uint8_t label;         // Use UPB_LABEL() constants.
+  union {
+    char *name;    // If subdef_is_symbolic.
+    upb_def *def;  // If !subdef_is_symbolic.
+  } sub;  // The msgdef or enumdef for this field, if upb_hassubdef(f).
+  bool subdef_is_symbolic;
+  bool default_is_string;
+  bool subdef_is_owned;
+  upb_fieldtype_t type;
+  upb_label_t label;
   int16_t hasbit;
   uint16_t offset;
-  bool default_is_string;
-  bool active;
   int32_t number;
-  char *name;
-  upb_value defaultval;  // Only meaningful for non-repeated scalars and strings.
+  upb_value defaultval;  // Only for non-repeated scalars and strings.
   upb_value fval;
   struct _upb_accessor_vtbl *accessor;
-  const void *default_ptr;
   const void *prototype;
 } upb_fielddef;
 
-upb_fielddef *upb_fielddef_new(void);
-void upb_fielddef_ref(upb_fielddef *f);
-void upb_fielddef_unref(upb_fielddef *f);
-upb_fielddef *upb_fielddef_dup(upb_fielddef *f);
+// Returns NULL if memory allocation failed.
+upb_fielddef *upb_fielddef_new(void *owner);
+
+INLINE void upb_fielddef_ref(upb_fielddef *f, void *owner) {
+  upb_def_ref(UPB_UPCAST(f), owner);
+}
+INLINE void upb_fielddef_unref(upb_fielddef *f, void *owner) {
+  upb_def_unref(UPB_UPCAST(f), owner);
+}
+
+// Duplicates the given field, returning NULL if memory allocation failed.
+// When a fielddef is duplicated, the subdef (if any) is made symbolic if it
+// wasn't already.  If the subdef is set but has no name (which is possible
+// since msgdefs are not required to have a name) the new fielddef's subdef
+// will be unset.
+upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, void *owner);
+
+INLINE bool upb_fielddef_ismutable(const upb_fielddef *f) {
+  return upb_def_ismutable(UPB_UPCAST(f));
+}
+INLINE bool upb_fielddef_isfinalized(const upb_fielddef *f) {
+  return !upb_fielddef_ismutable(f);
+}
 
-// A fielddef is mutable until its msgdef has been added to a symtab.
-bool upb_fielddef_ismutable(const upb_fielddef *f);
+// Simple accessors. ///////////////////////////////////////////////////////////
 
-// Read accessors.  May be called any time.
-INLINE uint8_t upb_fielddef_type(const upb_fielddef *f) { return f->type; }
-INLINE uint8_t upb_fielddef_label(const upb_fielddef *f) { return f->label; }
+INLINE upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
+  return f->type;
+}
+INLINE upb_label_t upb_fielddef_label(const upb_fielddef *f) {
+  return f->label;
+}
 INLINE int32_t upb_fielddef_number(const upb_fielddef *f) { return f->number; }
-INLINE char *upb_fielddef_name(const upb_fielddef *f) { return f->name; }
+INLINE uint16_t upb_fielddef_offset(const upb_fielddef *f) { return f->offset; }
+INLINE int16_t upb_fielddef_hasbit(const upb_fielddef *f) { return f->hasbit; }
+INLINE const char *upb_fielddef_name(const upb_fielddef *f) {
+  return upb_def_fullname(UPB_UPCAST(f));
+}
 INLINE upb_value upb_fielddef_fval(const upb_fielddef *f) { return f->fval; }
-INLINE bool upb_fielddef_finalized(const upb_fielddef *f) { return f->finalized; }
 INLINE struct _upb_msgdef *upb_fielddef_msgdef(const upb_fielddef *f) {
   return f->msgdef;
 }
 INLINE struct _upb_accessor_vtbl *upb_fielddef_accessor(const upb_fielddef *f) {
   return f->accessor;
 }
-INLINE const char *upb_fielddef_typename(const upb_fielddef *f) {
-  return f->def ? f->def->fqname : NULL;
-}
 
-// Returns the default value for this fielddef, which may either be something
-// the client set explicitly or the "default default" (0 for numbers, empty for
-// strings).  The field's type indicates the type of the returned value, except
-// for enums.   For enums the default can be set either numerically or
-// symbolically -- the upb_fielddef_default_is_symbolic() function below will
-// indicate which it is.  For string defaults, the value will be a upb_strref
-// which is invalidated by any other call on this object.
-INLINE upb_value upb_fielddef_default(const upb_fielddef *f) {
-  return f->defaultval;
-}
+bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type);
+bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label);
+void upb_fielddef_sethasbit(upb_fielddef *f, int16_t hasbit);
+void upb_fielddef_setoffset(upb_fielddef *f, uint16_t offset);
+// TODO(haberman): need a way of keeping the fval alive even if some handlers
+// outlast the fielddef.
+void upb_fielddef_setfval(upb_fielddef *f, upb_value fval);
+void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl);
 
-// The results of this function are only meaningful for enum fields, which can
-// have a default specified either as an integer or as a string.  If this
-// returns true, the default returned from upb_fielddef_default() is a string,
-// otherwise it is an integer.
-INLINE bool upb_fielddef_default_is_symbolic(const upb_fielddef *f) {
-  return f->default_is_string;
+// "Number" and "fullname" must be set before the fielddef is added to a msgdef.
+// For the moment we do not allow these to be set once the fielddef is added to
+// a msgdef -- this could be relaxed in the future.
+bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number);
+INLINE bool upb_fielddef_setname(upb_fielddef *f, const char *name) {
+  return upb_def_setfullname(UPB_UPCAST(f), name);
 }
 
-// The enum or submessage def for this field, if any.  Only meaningful for
-// submessage, group, and enum fields (ie. when upb_hassubdef(f) is true).
-// Since defs are not linked together until they are in a symtab, this
-// will return NULL until the msgdef is in a symtab.
-upb_def *upb_fielddef_subdef(const upb_fielddef *f);
+// Field type tests. ///////////////////////////////////////////////////////////
 
-// Write accessors.  "Number" and "name" must be set before the fielddef is
-// added to a msgdef.  For the moment we do not allow these to be set once
-// the fielddef is added to a msgdef -- this could be relaxed in the future.
-bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number);
-bool upb_fielddef_setname(upb_fielddef *f, const char *name);
+INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
+  return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
+}
+INLINE bool upb_isstringtype(upb_fieldtype_t type) {
+  return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
+}
+INLINE bool upb_isprimitivetype(upb_fieldtype_t type) {
+  return !upb_issubmsgtype(type) && !upb_isstringtype(type);
+}
+INLINE bool upb_issubmsg(const upb_fielddef *f) {
+  return upb_issubmsgtype(f->type);
+}
+INLINE bool upb_isstring(const upb_fielddef *f) {
+  return upb_isstringtype(f->type);
+}
+INLINE bool upb_isseq(const upb_fielddef *f) {
+  return f->label == UPB_LABEL(REPEATED);
+}
 
-// These writers may be called at any time prior to being put in a symtab.
-bool upb_fielddef_settype(upb_fielddef *f, uint8_t type);
-bool upb_fielddef_setlabel(upb_fielddef *f, uint8_t label);
-void upb_fielddef_setfval(upb_fielddef *f, upb_value fval);
-void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl);
+// Default value. //////////////////////////////////////////////////////////////
 
-// The name of the message or enum this field is referring to.  Must be found
-// at name resolution time (when upb_symtab_add() is called).
+// Returns the default value for this fielddef, which may either be something
+// the client set explicitly or the "default default" (0 for numbers, empty for
+// strings).  The field's type indicates the type of the returned value, except
+// for enum fields that are still mutable.
 //
-// NOTE: May only be called for fields whose type has already been set to
-// be a submessage, group, or enum!  Also, will be reset to empty if the
-// field's type is set again.
-bool upb_fielddef_settypename(upb_fielddef *f, const char *name);
-
-// The default value for the field.  For numeric types, use
+// For enums the default can be set either numerically or symbolically -- the
+// upb_fielddef_default_is_symbolic() function below will indicate which it is.
+// For string defaults, the value will be a upb_byteregion which is invalidated
+// by any other non-const call on this object.  Once the fielddef is finalized,
+// symbolic enum defaults are resolved, so finalized enum fielddefs always have
+// a default of type int32.
+INLINE upb_value upb_fielddef_default(const upb_fielddef *f) {
+  return f->defaultval;
+}
+// Sets default value for the field.  For numeric types, use
 // upb_fielddef_setdefault(), and "value" must match the type of the field.
-// For string/bytes types, use upb_fielddef_setdefaultstr().
-// Enum types may use either, since the default may be set either numerically
-// or symbolically.
+// For string/bytes types, use upb_fielddef_setdefaultstr().  Enum types may
+// use either, since the default may be set either numerically or symbolically.
 //
 // NOTE: May only be called for fields whose type has already been set.
 // Also, will be reset to default if the field's type is set again.
 void upb_fielddef_setdefault(upb_fielddef *f, upb_value value);
-void upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len);
+bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len);
 void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str);
 
-// A variety of tests about the type of a field.
-INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
-  return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
-}
-INLINE bool upb_isstringtype(upb_fieldtype_t type) {
-  return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
-}
-INLINE bool upb_isprimitivetype(upb_fieldtype_t type) {
-  return !upb_issubmsgtype(type) && !upb_isstringtype(type);
+// The results of this function are only meaningful for mutable enum fields,
+// which can have a default specified either as an integer or as a string.  If
+// this returns true, the default returned from upb_fielddef_default() is a
+// string, otherwise it is an integer.
+INLINE bool upb_fielddef_default_is_symbolic(const upb_fielddef *f) {
+  assert(f->type == UPB_TYPE(ENUM));
+  return f->default_is_string;
 }
-INLINE bool upb_issubmsg(const upb_fielddef *f) { return upb_issubmsgtype(f->type); }
-INLINE bool upb_isstring(const upb_fielddef *f) { return upb_isstringtype(f->type); }
-INLINE bool upb_isseq(const upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); }
 
-// Does the type of this field imply that it should contain an associated def?
+// Subdef. /////////////////////////////////////////////////////////////////////
+
+// Submessage and enum fields must reference a "subdef", which is the
+// upb_msgdef or upb_enumdef that defines their type.  Note that when the
+// fielddef is mutable it may not have a subdef *yet*, but this function still
+// returns true to indicate that the field's type requires a subdef.
 INLINE bool upb_hassubdef(const upb_fielddef *f) {
   return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM);
 }
 
+// Before a fielddef is finalized, its subdef may be set either directly (with
+// a upb_def*) or symbolically.  Symbolic refs must be resolved before the
+// containing msgdef can be finalized (see upb_resolve() above).  The client is
+// responsible for making sure that "subdef" lives until this fielddef is
+// finalized or deleted.
+//
+// Both methods require that upb_hassubdef(f) (so the type must be set prior
+// to calling these methods).  Returns false if this is not the case, or if
+// the given subdef is not of the correct type.  The subtype is reset if the
+// field's type is changed.
+bool upb_fielddef_setsubdef(upb_fielddef *f, upb_def *subdef);
+bool upb_fielddef_setsubtypename(upb_fielddef *f, const char *name);
+
+// Returns the enum or submessage def or symbolic name for this field, if any.
+// Requires that upb_hassubdef(f).  Returns NULL if the subdef has not been set
+// or if you ask for a subtype name when the subtype is currently set
+// symbolically (or vice-versa).  To access the subtype's name for a linked
+// fielddef, use upb_def_fullname(upb_fielddef_subdef(f)).
+//
+// Caller does *not* own a ref on the returned def or string.
+// upb_fielddef_subtypename() is non-const because finalized defs will never
+// have a symbolic reference (they must be resolved before the msgdef can be
+// finalized).
+upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f);
+const upb_def *upb_fielddef_subdef(const upb_fielddef *f);
+const char *upb_fielddef_subtypename(upb_fielddef *f);
+
 
 /* upb_msgdef *****************************************************************/
 
@@ -232,31 +356,31 @@ typedef struct _upb_msgdef {
   upb_inttable itof;  // int to field
   upb_strtable ntof;  // name to field
 
-  // The following fields may be modified until finalized.
+  // The following fields may be modified while mutable.
   uint16_t size;
   uint8_t hasbit_bytes;
   // The range of tag numbers used to store extensions.
   uint32_t extstart, extend;
+  // Used for proto2 integration.
+  const void *prototype;
 } upb_msgdef;
 
-// Hash table entries for looking up fields by name or number.
-typedef struct {
-  bool junk;
-  upb_fielddef *f;
-} upb_itof_ent;
-typedef struct {
-  upb_fielddef *f;
-} upb_ntof_ent;
+// Returns NULL if memory allocation failed.
+upb_msgdef *upb_msgdef_new(void *owner);
 
-upb_msgdef *upb_msgdef_new(void);
-INLINE void upb_msgdef_unref(const upb_msgdef *md) { upb_def_unref(UPB_UPCAST(md)); }
-INLINE void upb_msgdef_ref(const upb_msgdef *md) { upb_def_ref(UPB_UPCAST(md)); }
+INLINE void upb_msgdef_unref(const upb_msgdef *md, void *owner) {
+  upb_def_unref(UPB_UPCAST(md), owner);
+}
+INLINE void upb_msgdef_ref(const upb_msgdef *md, void *owner) {
+  upb_def_ref(UPB_UPCAST(md), owner);
+}
 
 // Returns a new msgdef that is a copy of the given msgdef (and a copy of all
 // the fields) but with any references to submessages broken and replaced with
-// just the name of the submessage.  This can be put back into another symtab
-// and the names will be re-resolved in the new context.
-upb_msgdef *upb_msgdef_dup(const upb_msgdef *m);
+// just the name of the submessage.  Returns NULL if memory allocation failed.
+// This can be put back into another symtab and the names will be re-resolved
+// in the new context.
+upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, void *owner);
 
 // Read accessors.  May be called at any time.
 INLINE size_t upb_msgdef_size(const upb_msgdef *m) { return m->size; }
@@ -271,38 +395,35 @@ void upb_msgdef_setsize(upb_msgdef *m, uint16_t size);
 void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes);
 bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end);
 
-// Adds a set of fields (upb_fielddef objects) to a msgdef.  Caller retains its
-// ref on the fielddef.  May only be done before the msgdef is in a symtab
-// (requires upb_def_ismutable(m) for the msgdef).  The fielddef's name and
-// number must be set, and the message may not already contain any field with
-// this name or number, and this fielddef may not be part of another message,
-// otherwise false is returned and no action is performed.
-bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *f, int n);
-INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) {
-  return upb_msgdef_addfields(m, &f, 1);
-}
-
-// Sets the layout of all fields according to default rules:
-// 1. Hasbits for required fields come first, then optional fields.
-// 2. Values are laid out in a way that respects alignment rules.
-// 3. The order is chosen to minimize memory usage.
-// This should only be called once all fielddefs have been added.
-// TODO: will likely want the ability to exclude strings/submessages/arrays.
-// TODO: will likely want the ability to define a header size.
-void upb_msgdef_layout(upb_msgdef *m);
+// Adds a set of fields (upb_fielddef objects) to a msgdef.  Requires that the
+// msgdef and all the fielddefs are mutable.  The fielddef's name and number
+// must be set, and the message may not already contain any field with this
+// name or number, and this fielddef may not be part of another message.  In
+// error cases false is returned and the msgdef is unchanged.
+//
+// On success, the msgdef takes a ref on the fielddef so the caller needn't
+// worry about continuing to keep it alive (however the reverse is not true;
+// refs on the fielddef will *not* keep the msgdef alive).  If ref_donor is
+// non-NULL, caller passes a ref on the fielddef from ref_donor to the msgdef,
+// otherwise caller retains its reference(s) on the defs in f.
+bool upb_msgdef_addfields(
+    upb_msgdef *m, upb_fielddef *const *f, int n, void *ref_donor);
+INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f,
+                                void *ref_donor) {
+  return upb_msgdef_addfields(m, &f, 1, ref_donor);
+}
 
 // Looks up a field by name or number.  While these are written to be as fast
 // as possible, it will still be faster to cache the results of this lookup if
 // possible.  These return NULL if no such field is found.
 INLINE upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
-  upb_itof_ent *e = (upb_itof_ent*)
-      upb_inttable_fastlookup(&m->itof, i, sizeof(upb_itof_ent));
-  return e ? e->f : NULL;
+  const upb_value *val = upb_inttable_lookup32(&m->itof, i);
+  return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL;
 }
 
 INLINE upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name) {
-  upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
-  return e ? e->f : NULL;
+  const upb_value *val = upb_strtable_lookup(&m->ntof, name);
+  return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL;
 }
 
 INLINE int upb_msgdef_numfields(const upb_msgdef *m) {
@@ -313,20 +434,19 @@ INLINE int upb_msgdef_numfields(const upb_msgdef *m) {
 // TODO: the iteration should be in field order.
 // Iterators are invalidated when a field is added or removed.
 //   upb_msg_iter i;
-//   for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
-//     upb_fielddef *f = upb_msg_iter_field(i);
+//   for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+//     upb_fielddef *f = upb_msg_iter_field(&i);
 //     // ...
 //   }
 typedef upb_inttable_iter upb_msg_iter;
 
-upb_msg_iter upb_msg_begin(const upb_msgdef *m);
-upb_msg_iter upb_msg_next(const upb_msgdef *m, upb_msg_iter iter);
-INLINE bool upb_msg_done(upb_msg_iter iter) { return upb_inttable_done(iter); }
+void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m);
+void upb_msg_next(upb_msg_iter *iter);
+INLINE bool upb_msg_done(upb_msg_iter *iter) { return upb_inttable_done(iter); }
 
 // Iterator accessor.
-INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) {
-  upb_itof_ent *ent = (upb_itof_ent*)upb_inttable_iter_value(iter);
-  return ent->f;
+INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter *iter) {
+  return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
 }
 
 
@@ -339,84 +459,75 @@ typedef struct _upb_enumdef {
   int32_t defaultval;
 } upb_enumdef;
 
-typedef struct {
-  uint32_t value;
-} upb_ntoi_ent;
-
-typedef struct {
-  bool junk;
-  char *str;
-} upb_iton_ent;
-
-upb_enumdef *upb_enumdef_new(void);
-INLINE void upb_enumdef_ref(const upb_enumdef *e) { upb_def_ref(UPB_UPCAST(e)); }
-INLINE void upb_enumdef_unref(const upb_enumdef *e) { upb_def_unref(UPB_UPCAST(e)); }
-upb_enumdef *upb_enumdef_dup(const upb_enumdef *e);
+// Returns NULL if memory allocation failed.
+upb_enumdef *upb_enumdef_new(void *owner);
+INLINE void upb_enumdef_ref(const upb_enumdef *e, void *owner) {
+  upb_def_ref(&e->base, owner);
+}
+INLINE void upb_enumdef_unref(const upb_enumdef *e, void *owner) {
+  upb_def_unref(&e->base, owner);
+}
+upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, void *owner);
 
-INLINE int32_t upb_enumdef_default(upb_enumdef *e) { return e->defaultval; }
+INLINE int32_t upb_enumdef_default(const upb_enumdef *e) {
+  return e->defaultval;
+}
 
 // May only be set if upb_def_ismutable(e).
 void upb_enumdef_setdefault(upb_enumdef *e, int32_t val);
 
-// Adds a value to the enumdef.  Requires that no existing val has this
-// name or number (returns false and does not add if there is).  May only
-// be called before the enumdef is in a symtab.
-bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num);
+// Returns the number of values currently defined in the enum.  Note that
+// multiple names can refer to the same number, so this may be greater than the
+// total number of unique numbers.
+INLINE int upb_enumdef_numvals(const upb_enumdef *e) {
+  return upb_strtable_count(&e->ntoi);
+}
+
+// Adds a value to the enumdef.  Requires that no existing val has this name,
+// but duplicate numbers are allowed.  May only be called if the enumdef is
+// mutable.  Returns false if the existing name is used, or if "name" is not a
+// valid label, or on memory allocation failure (we may want to distinguish
+// these failure cases in the future).
+bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num);
 
-// Lookups from name to integer and vice-versa.
-bool upb_enumdef_ntoil(upb_enumdef *e, const char *name, size_t len, int32_t *num);
-bool upb_enumdef_ntoi(upb_enumdef *e, const char *name, int32_t *num);
-// Caller does not own the returned string.
-const char *upb_enumdef_iton(upb_enumdef *e, int32_t num);
+// Lookups from name to integer, returning true if found.
+bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, int32_t *num);
+
+// Finds the name corresponding to the given number, or NULL if none was found.
+// If more than one name corresponds to this number, returns the first one that
+// was added.
+const char *upb_enumdef_iton(const upb_enumdef *e, int32_t num);
 
 // Iteration over name/value pairs.  The order is undefined.
 // Adding an enum val invalidates any iterators.
 //   upb_enum_iter i;
-//   for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
+//   for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
 //     // ...
 //   }
-typedef upb_inttable_iter upb_enum_iter;
+typedef upb_strtable_iter upb_enum_iter;
 
-upb_enum_iter upb_enum_begin(const upb_enumdef *e);
-upb_enum_iter upb_enum_next(const upb_enumdef *e, upb_enum_iter iter);
-INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); }
+void upb_enum_begin(upb_enum_iter *iter, const upb_enumdef *e);
+void upb_enum_next(upb_enum_iter *iter);
+bool upb_enum_done(upb_enum_iter *iter);
 
 // Iterator accessors.
-INLINE char *upb_enum_iter_name(upb_enum_iter iter) {
-  upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter);
-  return e->str;
+INLINE const char *upb_enum_iter_name(upb_enum_iter *iter) {
+  return upb_strtable_iter_key(iter);
 }
-INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
-  return upb_inttable_iter_key(iter);
+INLINE int32_t upb_enum_iter_number(upb_enum_iter *iter) {
+  return upb_value_getint32(upb_strtable_iter_value(iter));
 }
 
 
-/* upb_deflist ****************************************************************/
-
-// upb_deflist is an internal-only dynamic array for storing a growing list of
-// upb_defs.
-typedef struct {
-  upb_def **defs;
-  uint32_t len;
-  uint32_t size;
-} upb_deflist;
-
-void upb_deflist_init(upb_deflist *l);
-void upb_deflist_uninit(upb_deflist *l);
-void upb_deflist_push(upb_deflist *l, upb_def *d);
-
-
 /* upb_symtab *****************************************************************/
 
-// A symtab (symbol table) is where upb_defs live.  It is empty when first
-// constructed.  Clients add definitions to the symtab (or replace existing
-// definitions) by calling upb_symtab_add().
-struct _upb_symtab {
-  upb_atomic_t refcount;
-  upb_rwlock_t lock;       // Protects all members except the refcount.
-  upb_strtable symtab;     // The symbol table.
-  upb_deflist olddefs;
-};
+// A symtab (symbol table) stores a name->def map of upb_defs.  Clients could
+// always create such tables themselves, but upb_symtab has logic for resolving
+// symbolic references, which is nontrivial.
+typedef struct {
+  uint32_t refcount;
+  upb_strtable symtab;
+} upb_symtab;
 
 upb_symtab *upb_symtab_new(void);
 void upb_symtab_ref(const upb_symtab *s);
@@ -430,33 +541,47 @@ void upb_symtab_unref(const upb_symtab *s);
 //    within this message are searched, then within the parent, on up to the
 //    root namespace).
 //
-// If a def is found, the caller owns one ref on the returned def.  Otherwise
-// returns NULL.
+// If a def is found, the caller owns one ref on the returned def, owned by
+// owner.  Otherwise returns NULL.
 const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
-                                  const char *sym);
+                                  const char *sym, void *owner);
 
-// Find an entry in the symbol table with this exact name.  If a def is found,
-// the caller owns one ref on the returned def.  Otherwise returns NULL.
-const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym);
-const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym);
+// Finds an entry in the symbol table with this exact name.  If a def is found,
+// the caller owns one ref on the returned def, owned by owner.  Otherwise
+// returns NULL.
+const upb_def *upb_symtab_lookup(
+    const upb_symtab *s, const char *sym, void *owner);
+const upb_msgdef *upb_symtab_lookupmsg(
+    const upb_symtab *s, const char *sym, void *owner);
 
 // Gets an array of pointers to all currently active defs in this symtab.  The
 // caller owns the returned array (which is of length *count) as well as a ref
-// to each symbol inside.  If type is UPB_DEF_ANY then defs of all types are
-// returned, otherwise only defs of the required type are returned.
-const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *n, upb_deftype_t type);
-
-// Adds the given defs to the symtab, resolving all symbols.  Only one def per
-// name may be in the list, but defs can replace existing defs in the symtab.
+// to each symbol inside (owned by owner).  If type is UPB_DEF_ANY then defs of
+// all types are returned, otherwise only defs of the required type are
+// returned.
+const upb_def **upb_symtab_getdefs(
+    const upb_symtab *s, int *n, upb_deftype_t type, void *owner);
+
+// Adds the given defs to the symtab, resolving all symbols (including enum
+// default values) and finalizing the defs.  Only one def per name may be in
+// the list, but defs can replace existing defs in the symtab.  All defs must
+// have a name -- anonymous defs are not allowed.  Anonymous defs can still be
+// finalized by calling upb_def_finalize() directly.
+//
+// Any existing defs that can reach defs that are being replaced will
+// themselves be replaced also, so that the resulting set of defs is fully
+// consistent.
+//
+// This logic implemented in this method is a convenience; ultimately it calls
+// some combination of upb_fielddef_setsubdef(), upb_def_dup(), and
+// upb_finalize(), any of which the client could call themself.  However, since
+// the logic for doing so is nontrivial, we provide it here.
+//
 // The entire operation either succeeds or fails.  If the operation fails, the
 // symtab is unchanged, false is returned, and status indicates the error.  The
-// caller retains its ref on all defs in all cases.
-bool upb_symtab_add(upb_symtab *s, upb_def **defs, int n, upb_status *status);
-
-// Frees defs that are no longer active in the symtab and are no longer
-// reachable.  Such defs are not freed when they are replaced in the symtab
-// if they are still reachable from defs that are still referenced.
-void upb_symtab_gc(upb_symtab *s);
+// caller passes a ref on all defs to the symtab (even if the operation fails).
+bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
+                    upb_status *status);
 
 
 /* upb_def casts **************************************************************/
@@ -483,9 +608,9 @@ void upb_symtab_gc(upb_symtab *s);
     return (const struct _upb_ ## lower*)def; \
   }
 UPB_DEF_CASTS(msgdef, MSG);
+UPB_DEF_CASTS(fielddef, FIELD);
 UPB_DEF_CASTS(enumdef, ENUM);
 UPB_DEF_CASTS(svcdef, SERVICE);
-UPB_DEF_CASTS(unresolveddef, UNRESOLVED);
 #undef UPB_DEF_CASTS
 
 #ifdef __cplusplus
diff --git a/upb/descriptor_const.h b/upb/descriptor/descriptor_const.h
index 20058e4..52ca803 100644
--- a/upb/descriptor_const.h
+++ b/upb/descriptor/descriptor_const.h
@@ -9,79 +9,47 @@ extern "C" {
 
 /* Enums. */
 
-typedef enum google_protobuf_FieldOptions_CType {
-  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING = 0,
-  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_CORD = 1,
-  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING_PIECE = 2
-} google_protobuf_FieldOptions_CType;
-
 typedef enum google_protobuf_FieldDescriptorProto_Type {
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED64 = 6,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_STRING = 9,
   GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FLOAT = 2,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT64 = 3,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT64 = 4,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1,
   GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT32 = 5,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED64 = 6,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED32 = 15,
   GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED32 = 7,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BOOL = 8,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_STRING = 9,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_GROUP = 10,
   GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_MESSAGE = 11,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BYTES = 12,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT64 = 3,
   GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_ENUM = 14,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED32 = 15,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT64 = 4,
   GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED64 = 16,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT32 = 17,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT64 = 18
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BYTES = 12,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT64 = 18,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BOOL = 8,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_GROUP = 10,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT32 = 17
 } google_protobuf_FieldDescriptorProto_Type;
 
 typedef enum google_protobuf_FieldDescriptorProto_Label {
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_OPTIONAL = 1,
   GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REQUIRED = 2,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REPEATED = 3
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REPEATED = 3,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_OPTIONAL = 1
 } google_protobuf_FieldDescriptorProto_Label;
 
+typedef enum google_protobuf_FieldOptions_CType {
+  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_CORD = 1,
+  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING = 0,
+  GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING_PIECE = 2
+} google_protobuf_FieldOptions_CType;
+
 typedef enum google_protobuf_FileOptions_OptimizeMode {
-  GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_SPEED = 1,
   GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_CODE_SIZE = 2,
+  GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_SPEED = 1,
   GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_LITE_RUNTIME = 3
 } google_protobuf_FileOptions_OptimizeMode;
 
 /* Constants for field names and numbers. */
 
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM 1
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNAME "file"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM 2
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNAME "field"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM 3
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNAME "nested_type"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 4
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNUM 5
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNAME "extension_range"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNUM 6
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNUM 7
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNUM 1
 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNAME "path"
 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDTYPE 5
@@ -106,6 +74,10 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDNAME "negative_int_value"
 #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDTYPE 3
 
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNUM 8
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNAME "aggregate_value"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDTYPE 9
+
 #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNUM 6
 #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNAME "double_value"
 #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDTYPE 1
@@ -114,10 +86,6 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDNAME "string_value"
 #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDTYPE 12
 
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNUM 8
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNAME "aggregate_value"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDTYPE 9
-
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNUM 1
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNAME "name"
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDTYPE 9
@@ -138,14 +106,6 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type"
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11
 
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNUM 6
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNAME "service"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNUM 7
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
-#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
-
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNUM 8
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
@@ -154,6 +114,14 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDNAME "source_code_info"
 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDTYPE 11
 
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNUM 6
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNAME "service"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNUM 7
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
+
 #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNUM 1
 #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNAME "name"
 #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDTYPE 9
@@ -170,53 +138,13 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
 #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
 
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM 2
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNAME "value"
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
 #define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
 #define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
 #define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
 
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM 2
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNAME "number"
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE 5
-
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNUM 1
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNAME "name"
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNUM 2
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNAME "method"
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNUM 1
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNAME "name_part"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNUM 2
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNAME "is_extension"
-#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDTYPE 8
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM 1
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNAME "file"
+#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE 11
 
 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNUM 1
 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNAME "location"
@@ -230,6 +158,18 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDNAME "end"
 #define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDTYPE 5
 
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM 2
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNAME "number"
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE 5
+
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
 #define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNUM 1
 #define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNAME "ctype"
 #define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDTYPE 14
@@ -254,18 +194,6 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDNAME "java_package"
 #define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDTYPE 9
 
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNUM 8
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNAME "java_outer_classname"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDTYPE 9
-
-#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNUM 9
-#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNAME "optimize_for"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDTYPE 14
-
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNUM 10
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNAME "java_multiple_files"
-#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDTYPE 8
-
 #define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNUM 16
 #define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNAME "cc_generic_services"
 #define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDTYPE 8
@@ -286,17 +214,69 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
 #define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
 
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNUM 1
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNAME "message_set_wire_format"
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDTYPE 8
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNUM 8
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNAME "java_outer_classname"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDTYPE 9
 
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNUM 2
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNAME "no_standard_descriptor_accessor"
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDTYPE 8
+#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNUM 9
+#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNAME "optimize_for"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDTYPE 14
 
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
-#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNUM 10
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNAME "java_multiple_files"
+#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM 2
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNAME "value"
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNUM 2
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNAME "method"
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM 1
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNAME "name"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM 2
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNAME "field"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM 3
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNAME "nested_type"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 4
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNUM 5
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNAME "extension_range"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNUM 6
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNAME "extension"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDTYPE 11
+
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNUM 7
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
 
 #define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
 #define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
@@ -322,6 +302,10 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNAME "type"
 #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE 14
 
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNUM 8
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
+#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
+
 #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNUM 6
 #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNAME "type_name"
 #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE 9
@@ -330,18 +314,34 @@ typedef enum google_protobuf_FileOptions_OptimizeMode {
 #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNAME "default_value"
 #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE 9
 
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNUM 8
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNAME "options"
-#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11
-
 #define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
 #define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
 #define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
 
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNUM 1
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNAME "message_set_wire_format"
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNUM 2
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNAME "no_standard_descriptor_accessor"
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDTYPE 8
+
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
+#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
+
 #define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999
 #define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option"
 #define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11
 
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNUM 1
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNAME "name_part"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDTYPE 9
+
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNUM 2
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNAME "is_extension"
+#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDTYPE 8
+
 #ifdef __cplusplus
 }  /* extern "C" */
 #endif
diff --git a/upb/descriptor.c b/upb/descriptor/reader.c
index 0c589f2..8177560 100644
--- a/upb/descriptor.c
+++ b/upb/descriptor/reader.c
@@ -8,13 +8,14 @@
 #include <stdlib.h>
 #include <errno.h>
 #include "upb/def.h"
-#include "upb/descriptor.h"
+#include "upb/descriptor/descriptor_const.h"
+#include "upb/descriptor/reader.h"
 
 // Returns a newly allocated string that joins input strings together, for example:
 //   join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
 //   join("", "Baz") -> "Baz"
 // Caller owns a ref on the returned string. */
-static char *upb_join(char *base, char *name) {
+static char *upb_join(const char *base, const char *name) {
   if (!base || strlen(base) == 0) {
     return strdup(name);
   } else {
@@ -27,6 +28,36 @@ static char *upb_join(char *base, char *name) {
   }
 }
 
+void upb_deflist_init(upb_deflist *l) {
+  l->size = 8;
+  l->defs = malloc(l->size * sizeof(void*));
+  l->len = 0;
+  l->owned = true;
+}
+
+void upb_deflist_uninit(upb_deflist *l) {
+  if (l->owned)
+    for(size_t i = 0; i < l->len; i++)
+      upb_def_unref(l->defs[i], &l->defs);
+  free(l->defs);
+}
+
+void upb_deflist_push(upb_deflist *l, upb_def *d) {
+  if(l->len == l->size) {
+    l->size *= 2;
+    l->defs = realloc(l->defs, l->size * sizeof(void*));
+  }
+  l->defs[l->len++] = d;
+}
+
+void upb_deflist_donaterefs(upb_deflist *l, void *owner) {
+  assert(l->owned);
+  for (size_t i = 0; i < l->len; i++)
+    upb_def_donateref(l->defs[i], &l->defs, owner);
+  l->owned = false;
+}
+
+
 /* upb_descreader  ************************************************************/
 
 static upb_def *upb_deflist_last(upb_deflist *l) {
@@ -37,8 +68,8 @@ static upb_def *upb_deflist_last(upb_deflist *l) {
 static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
   for(uint32_t i = start; i < l->len; i++) {
     upb_def *def = l->defs[i];
-    char *name = def->fqname;
-    def->fqname = upb_join(str, name);
+    char *name = upb_join(str, upb_def_fullname(def));
+    upb_def_setfullname(def, name);
     free(name);
   }
 }
@@ -66,9 +97,9 @@ void upb_descreader_uninit(upb_descreader *r) {
   }
 }
 
-upb_def **upb_descreader_getdefs(upb_descreader *r, int *n) {
+upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
   *n = r->defs.len;
-  r->defs.len = 0;
+  upb_deflist_donaterefs(&r->defs, owner);
   return r->defs.defs;
 }
 
@@ -204,7 +235,7 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r,
     return;
   }
   upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
-  if (upb_inttable_count(&e->iton) == 0) {
+  if (upb_enumdef_numvals(e) == 0) {
     // The default value of an enum (in the absence of an explicit default) is
     // its first listed value.
     upb_enumdef_setdefault(e, r->number);
@@ -236,18 +267,18 @@ static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto(
 // google.protobuf.EnumDescriptorProto.
 static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_r) {
   upb_descreader *r = _r;
-  upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new()));
+  upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new(&r->defs)));
   return UPB_CONTINUE;
 }
 
 static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status) {
   upb_descreader *r = _r;
   upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
-  if (upb_descreader_last((upb_descreader*)_r)->fqname == NULL) {
+  if (upb_def_fullname(upb_descreader_last((upb_descreader*)_r)) == NULL) {
     upb_status_seterrliteral(status, "Enum had no name.");
     return;
   }
-  if (upb_inttable_count(&e->iton) == 0) {
+  if (upb_enumdef_numvals(e) == 0) {
     upb_status_seterrliteral(status, "Enum had no values.");
     return;
   }
@@ -258,9 +289,9 @@ static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r,
                                                        upb_value val) {
   (void)fval;
   upb_descreader *r = _r;
-  upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
-  free(e->base.fqname);
-  e->base.fqname = upb_byteregion_strdup(upb_value_getbyteregion(val));
+  char *fullname = upb_byteregion_strdup(upb_value_getbyteregion(val));
+  upb_def_setfullname(upb_descreader_last(r), fullname);
+  free(fullname);
   return UPB_CONTINUE;
 }
 
@@ -284,7 +315,7 @@ static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h)
 
 static upb_flow_t upb_fielddef_startmsg(void *_r) {
   upb_descreader *r = _r;
-  r->f = upb_fielddef_new();
+  r->f = upb_fielddef_new(&r->defs);
   free(r->default_string);
   r->default_string = NULL;
   return UPB_CONTINUE;
@@ -370,13 +401,12 @@ static void upb_fielddef_endmsg(void *_r, upb_status *status) {
   upb_descreader *r = _r;
   upb_fielddef *f = r->f;
   // TODO: verify that all required fields were present.
-  assert(f->number != -1 && f->name != NULL);
-  assert((f->def != NULL) == upb_hassubdef(f));
+  assert(f->number != -1 && upb_fielddef_name(f) != NULL);
+  assert((upb_fielddef_subtypename(f) != NULL) == upb_hassubdef(f));
 
   // Field was successfully read, add it as a field of the msgdef.
   upb_msgdef *m = upb_descreader_top(r);
-  upb_msgdef_addfield(m, f);
-  upb_fielddef_unref(f);
+  upb_msgdef_addfield(m, f, &r->defs);
   r->f = NULL;
 
   if (r->default_string) {
@@ -435,7 +465,7 @@ static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval,
   (void)fval;
   upb_descreader *r = _r;
   char *name = upb_byteregion_strdup(upb_value_getbyteregion(val));
-  upb_fielddef_settypename(r->f, name);
+  upb_fielddef_setsubtypename(r->f, name);
   free(name);
   return UPB_CONTINUE;
 }
@@ -479,7 +509,7 @@ static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto(
 // google.protobuf.DescriptorProto.
 static upb_flow_t upb_msgdef_startmsg(void *_r) {
   upb_descreader *r = _r;
-  upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new()));
+  upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new(&r->defs)));
   upb_descreader_startcontainer(r);
   return UPB_CONTINUE;
 }
@@ -487,7 +517,7 @@ static upb_flow_t upb_msgdef_startmsg(void *_r) {
 static void upb_msgdef_endmsg(void *_r, upb_status *status) {
   upb_descreader *r = _r;
   upb_msgdef *m = upb_descreader_top(r);
-  if(!m->base.fqname) {
+  if(!upb_def_fullname(UPB_UPCAST(m))) {
     upb_status_seterrliteral(status, "Encountered message with no name.");
     return;
   }
@@ -497,11 +527,10 @@ static void upb_msgdef_endmsg(void *_r, upb_status *status) {
 static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) {
   (void)fval;
   upb_descreader *r = _r;
-  assert(val.type == UPB_TYPE(STRING));
   upb_msgdef *m = upb_descreader_top(r);
-  free(m->base.fqname);
-  m->base.fqname = upb_byteregion_strdup(upb_value_getbyteregion(val));
-  upb_descreader_setscopename(r, strdup(m->base.fqname));
+  char *name = upb_byteregion_strdup(upb_value_getbyteregion(val));
+  upb_def_setfullname(UPB_UPCAST(m), name);
+  upb_descreader_setscopename(r, name);  // Passes ownership of name.
   return UPB_CONTINUE;
 }
 
@@ -530,4 +559,3 @@ static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) {
 }
 #undef FNUM
 #undef FTYPE
-
diff --git a/upb/descriptor.h b/upb/descriptor/reader.h
index 21099b3..0e1bfa0 100644
--- a/upb/descriptor.h
+++ b/upb/descriptor/reader.h
@@ -4,9 +4,9 @@
  * Copyright (c) 2011 Google Inc.  See LICENSE for details.
  * Author: Josh Haberman <jhaberman@gmail.com>
  *
- * Routines for building defs by parsing descriptors in descriptor.proto format.
- * This only needs to use the public API of upb_symtab.  Later we may also
- * add routines for dumping a symtab to a descriptor.
+ * upb_descreader provides a set of sink handlers that will build defs from a
+ * data source that uses the descriptor.proto schema (like a protobuf binary
+ * descriptor).
  */
 
 #ifndef UPB_DESCRIPTOR_H
@@ -18,6 +18,20 @@
 extern "C" {
 #endif
 
+/* upb_deflist ****************************************************************/
+
+// upb_deflist is an internal-only dynamic array for storing a growing list of
+// upb_defs.
+typedef struct {
+  upb_def **defs;
+  size_t len;
+  size_t size;
+  bool owned;
+} upb_deflist;
+
+void upb_deflist_init(upb_deflist *l);
+void upb_deflist_uninit(upb_deflist *l);
+void upb_deflist_push(upb_deflist *l, upb_def *d);
 
 /* upb_descreader  ************************************************************/
 
@@ -56,11 +70,11 @@ void upb_descreader_uninit(upb_descreader *r);
 upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h);
 
 // Gets the array of defs that have been parsed and removes them from the
-// descreader.  Ownership of the defs is passed to the caller, but the
-// ownership of the returned array is retained and is invalidated by any other
-// call into the descreader.  The defs will not have been resolved, and are
-// ready to be added to a symtab.
-upb_def **upb_descreader_getdefs(upb_descreader *r, int *n);
+// descreader.  Ownership of the defs is passed to the caller using the given
+// owner), but the ownership of the returned array is retained and is
+// invalidated by any other call into the descreader.  The defs will not have
+// been resolved, and are ready to be added to a symtab.
+upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n);
 
 #ifdef __cplusplus
 }  /* extern "C" */
diff --git a/upb/handlers.c b/upb/handlers.c
index 1ccaf8d..ea5a054 100644
--- a/upb/handlers.c
+++ b/upb/handlers.c
@@ -13,7 +13,7 @@
 
 static upb_mhandlers *upb_mhandlers_new() {
   upb_mhandlers *m = malloc(sizeof(*m));
-  upb_inttable_init(&m->fieldtab, 8, sizeof(upb_itofhandlers_ent));
+  upb_inttable_init(&m->fieldtab);
   m->startmsg = NULL;
   m->endmsg = NULL;
   m->is_group = false;
@@ -26,20 +26,19 @@ static upb_mhandlers *upb_mhandlers_new() {
 static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
                                                   upb_fieldtype_t type,
                                                   bool repeated) {
-  upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, n);
+  const upb_value *v = upb_inttable_lookup(&m->fieldtab, n);
   // TODO: design/refine the API for changing the set of fields or modifying
   // existing handlers.
-  if (e) return NULL;
-  upb_fhandlers new_f = {type, repeated, UPB_ATOMIC_INIT(0),
+  if (v) return NULL;
+  upb_fhandlers new_f = {type, repeated, 0,
       n, -1, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL,
 #ifdef UPB_USE_JIT_X64
       0, 0, 0,
 #endif
-      NULL};
+  };
   upb_fhandlers *ptr = malloc(sizeof(*ptr));
   memcpy(ptr, &new_f, sizeof(upb_fhandlers));
-  upb_itofhandlers_ent ent = {false, ptr};
-  upb_inttable_insert(&m->fieldtab, n, &ent);
+  upb_inttable_insert(&m->fieldtab, n, upb_value_ptr(ptr));
   return ptr;
 }
 
@@ -64,12 +63,17 @@ upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n,
   return f;
 }
 
+upb_fhandlers *upb_mhandlers_lookup(const upb_mhandlers *m, uint32_t n) {
+  const upb_value *v = upb_inttable_lookup(&m->fieldtab, n);
+  return v ? upb_value_getptr(*v) : NULL;
+}
+
 
 /* upb_handlers ***************************************************************/
 
 upb_handlers *upb_handlers_new() {
   upb_handlers *h = malloc(sizeof(*h));
-  upb_atomic_init(&h->refcount, 1);
+  h->refcount = 1;
   h->msgs_len = 0;
   h->msgs_size = 4;
   h->msgs = malloc(h->msgs_size * sizeof(*h->msgs));
@@ -77,19 +81,18 @@ upb_handlers *upb_handlers_new() {
   return h;
 }
 
-void upb_handlers_ref(upb_handlers *h) { upb_atomic_ref(&h->refcount); }
+void upb_handlers_ref(upb_handlers *h) { h->refcount++; }
 
 void upb_handlers_unref(upb_handlers *h) {
-  if (upb_atomic_unref(&h->refcount)) {
+  if (--h->refcount == 0) {
     for (int i = 0; i < h->msgs_len; i++) {
       upb_mhandlers *mh = h->msgs[i];
-      for(upb_inttable_iter j = upb_inttable_begin(&mh->fieldtab);
-          !upb_inttable_done(j);
-          j = upb_inttable_next(&mh->fieldtab, j)) {
-        upb_itofhandlers_ent *e = upb_inttable_iter_value(j);
-        free(e->f);
+      upb_inttable_iter j;
+      upb_inttable_begin(&j, &mh->fieldtab);
+      for(; !upb_inttable_done(&j); upb_inttable_next(&j)) {
+        free(upb_value_getptr(upb_inttable_iter_value(&j)));
       }
-      upb_inttable_free(&mh->fieldtab);
+      upb_inttable_uninit(&mh->fieldtab);
 #ifdef UPB_USE_JIT_X64
       free(mh->tablearray);
 #endif
@@ -110,31 +113,28 @@ upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h) {
   return mh;
 }
 
-typedef struct {
-  upb_mhandlers *mh;
-} upb_mtab_ent;
-
 static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, const upb_msgdef *m,
                                      upb_onmsgreg *msgreg_cb,
                                      upb_onfieldreg *fieldreg_cb,
                                      void *closure, upb_strtable *mtab) {
   upb_mhandlers *mh = upb_handlers_newmhandlers(h);
-  upb_mtab_ent e = {mh};
-  upb_strtable_insert(mtab, m->base.fqname, &e);
+  upb_strtable_insert(mtab, upb_def_fullname(UPB_UPCAST(m)), upb_value_ptr(mh));
   if (msgreg_cb) msgreg_cb(closure, mh, m);
   upb_msg_iter i;
-  for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
-    upb_fielddef *f = upb_msg_iter_field(i);
+  for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
+    upb_fielddef *f = upb_msg_iter_field(&i);
     upb_fhandlers *fh;
     if (upb_issubmsg(f)) {
       upb_mhandlers *sub_mh;
-      upb_mtab_ent *subm_ent;
+      const upb_value *subm_ent;
       // The table lookup is necessary to break the DFS for type cycles.
-      if ((subm_ent = upb_strtable_lookup(mtab, f->def->fqname)) != NULL) {
-        sub_mh = subm_ent->mh;
+      const char *subname = upb_def_fullname(upb_fielddef_subdef(f));
+      if ((subm_ent = upb_strtable_lookup(mtab, subname)) != NULL) {
+        sub_mh = upb_value_getptr(*subm_ent);
       } else {
-        sub_mh = upb_regmsg_dfs(h, upb_downcast_msgdef(f->def), msgreg_cb,
-                                fieldreg_cb, closure, mtab);
+        sub_mh = upb_regmsg_dfs(
+            h, upb_downcast_msgdef_const(upb_fielddef_subdef(f)),
+            msgreg_cb, fieldreg_cb, closure, mtab);
       }
       fh = upb_mhandlers_newfhandlers_subm(
           mh, f->number, f->type, upb_isseq(f), sub_mh);
@@ -151,10 +151,10 @@ upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m,
                                       upb_onfieldreg *fieldreg_cb,
                                       void *closure) {
   upb_strtable mtab;
-  upb_strtable_init(&mtab, 8, sizeof(upb_mtab_ent));
+  upb_strtable_init(&mtab);
   upb_mhandlers *ret =
       upb_regmsg_dfs(h, m, msgreg_cb, fieldreg_cb, closure, &mtab);
-  upb_strtable_free(&mtab);
+  upb_strtable_uninit(&mtab);
   return ret;
 }
 
@@ -212,6 +212,7 @@ upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
 
   upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
   if (f->startseq) sflow = f->startseq(d->top->closure, f->fval);
+  _upb_dispatcher_sethas(d->top->closure, f->hasbit);
   if (sflow.flow != UPB_CONTINUE) {
     _upb_dispatcher_abortjmp(d);
   }
@@ -247,6 +248,7 @@ upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
 
   upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
   if (f->startsubmsg) sflow = f->startsubmsg(d->top->closure, f->fval);
+  _upb_dispatcher_sethas(d->top->closure, f->hasbit);
   if (sflow.flow != UPB_CONTINUE) {
     _upb_dispatcher_abortjmp(d);
   }
diff --git a/upb/handlers.h b/upb/handlers.h
index 9ed02c1..9083a2e 100644
--- a/upb/handlers.h
+++ b/upb/handlers.h
@@ -9,6 +9,10 @@
  * for each message and/or field as the data is being parsed or iterated over,
  * without having to know the source format that we are parsing from.  This
  * decouples the parsing logic from the processing logic.
+ *
+ * TODO: should we allow handlers to longjmp()?  Would be necessary to eg. let
+ * a Lua handler "yield" from the current coroutine.  I *think* everything
+ * would "just work" with our current decoder.
  */
 
 #ifndef UPB_HANDLERS_H
@@ -141,9 +145,9 @@ struct _upb_mhandlers;
 typedef struct _upb_fieldent {
   upb_fieldtype_t type;
   bool repeated;
-  upb_atomic_t refcount;
+  uint32_t refcount;
   uint32_t number;
-  int32_t valuehasbit;
+  int32_t hasbit;
   struct _upb_mhandlers *msg;
   struct _upb_mhandlers *submsg;  // Set iff upb_issubmsgtype(type) == true.
   upb_value fval;
@@ -157,14 +161,8 @@ typedef struct _upb_fieldent {
   uint32_t jit_pclabel_notypecheck;
   uint32_t jit_submsg_done_pclabel;
 #endif
-  void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
 } upb_fhandlers;
 
-typedef struct {
-  bool junk;  // Stolen by table impl; see table.h for details.
-  upb_fhandlers *f;
-} upb_itofhandlers_ent;
-
 // fhandlers are created as part of a upb_handlers instance, but can be ref'd
 // and unref'd to prolong the life of the handlers.
 void upb_fhandlers_ref(upb_fhandlers *m);
@@ -174,6 +172,8 @@ void upb_fhandlers_unref(upb_fhandlers *m);
 #define UPB_FHANDLERS_ACCESSORS(name, type) \
   INLINE void upb_fhandlers_set ## name(upb_fhandlers *f, type v){f->name = v;} \
   INLINE type upb_fhandlers_get ## name(const upb_fhandlers *f) { return f->name; }
+// TODO(haberman): need a way of keeping the fval alive even if a plan outlasts
+// the handlers.
 UPB_FHANDLERS_ACCESSORS(fval, upb_value)
 UPB_FHANDLERS_ACCESSORS(value, upb_value_handler*)
 UPB_FHANDLERS_ACCESSORS(startsubmsg, upb_startfield_handler*)
@@ -182,11 +182,13 @@ UPB_FHANDLERS_ACCESSORS(startseq, upb_startfield_handler*)
 UPB_FHANDLERS_ACCESSORS(endseq, upb_endfield_handler*)
 UPB_FHANDLERS_ACCESSORS(msg, struct _upb_mhandlers*)
 UPB_FHANDLERS_ACCESSORS(submsg, struct _upb_mhandlers*)
-// If set to >= 0, the hasbit will automatically be set after the corresponding
-// callback is called (when a JIT is enabled, this can be significantly more
-// efficient than setting the hasbit yourself inside the callback).  Could add
-// this for seq and submsg also, but doesn't look like a win at the moment.
-UPB_FHANDLERS_ACCESSORS(valuehasbit, int32_t)
+// If set to >= 0, the hasbit will automatically be set when the corresponding
+// field is parsed (when a JIT is enabled, this can be significantly more
+// efficient than setting the hasbit yourself inside the callback).  For values
+// it is undefined whether the hasbit is set before or after the callback is
+// called.  For seq and submsg, the hasbit is set *after* the start handler is
+// called, but before any of the handlers for the submsg or sequence.
+UPB_FHANDLERS_ACCESSORS(hasbit, int32_t)
 
 
 /* upb_mhandlers **************************************************************/
@@ -195,7 +197,7 @@ UPB_FHANDLERS_ACCESSORS(valuehasbit, int32_t)
 // message in the graph of messages.
 
 typedef struct _upb_mhandlers {
-  upb_atomic_t refcount;
+  uint32_t refcount;
   upb_startmsg_handler *startmsg;
   upb_endmsg_handler *endmsg;
   upb_inttable fieldtab;  // Maps field number -> upb_fhandlers.
@@ -203,6 +205,7 @@ typedef struct _upb_mhandlers {
 #ifdef UPB_USE_JIT_X64
   // Used inside the JIT to track labels (jmp targets) in the generated code.
   uint32_t jit_startmsg_pclabel;  // Starting a parse of this (sub-)message.
+  uint32_t jit_afterstartmsg_pclabel;  // After calling the startmsg handler.
   uint32_t jit_endofbuf_pclabel;  // ptr hitend, but delim_end or jit_end?
   uint32_t jit_endofmsg_pclabel;  // Done parsing this (sub-)message.
   uint32_t jit_dyndispatch_pclabel;  // Dispatch by table lookup.
@@ -240,11 +243,14 @@ upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n,
 UPB_MHANDLERS_ACCESSORS(startmsg, upb_startmsg_handler*);
 UPB_MHANDLERS_ACCESSORS(endmsg, upb_endmsg_handler*);
 
+// Returns fhandlers for the given field, or NULL if none.
+upb_fhandlers *upb_mhandlers_lookup(const upb_mhandlers *m, uint32_t n);
+
 
 /* upb_handlers ***************************************************************/
 
 struct _upb_handlers {
-  upb_atomic_t refcount;
+  uint32_t refcount;
   upb_mhandlers **msgs;  // Array of msgdefs, [0]=toplevel.
   int msgs_len, msgs_size;
   bool should_jit;
@@ -267,8 +273,10 @@ upb_mhandlers *upb_handlers_getmhandlers(upb_handlers *h, int index);
 // with "fieldreg_cb"
 //
 // See upb_handlers_reghandlerset() below for an example.
-typedef void upb_onmsgreg(void *closure, upb_mhandlers *mh, const upb_msgdef *m);
-typedef void upb_onfieldreg(void *closure, upb_fhandlers *mh, const upb_fielddef *m);
+typedef void upb_onmsgreg(
+    void *closure, upb_mhandlers *mh, const upb_msgdef *m);
+typedef void upb_onfieldreg(
+    void *closure, upb_fhandlers *fh, const upb_fielddef *f);
 upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m,
                                       upb_onmsgreg *msgreg_cb,
                                       upb_onfieldreg *fieldreg_cb,
@@ -305,8 +313,8 @@ INLINE void upb_onfreg_hset(void *c, upb_fhandlers *fh, const upb_fielddef *f) {
   upb_value_setfielddef(&val, f);
   upb_fhandlers_setfval(fh, val);
 }
-INLINE upb_mhandlers *upb_handlers_reghandlerset(upb_handlers *h, const upb_msgdef *m,
-                                                 upb_handlerset *hs) {
+INLINE upb_mhandlers *upb_handlers_reghandlerset(
+    upb_handlers *h, const upb_msgdef *m, upb_handlerset *hs) {
   return upb_handlers_regmsgdef(h, m, &upb_onmreg_hset, &upb_onfreg_hset, hs);
 }
 
@@ -373,7 +381,7 @@ INLINE void upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f,
                                upb_value val) {
   upb_flow_t flow = UPB_CONTINUE;
   if (f->value) flow = f->value(d->top->closure, f->fval, val);
-  _upb_dispatcher_sethas(d->top->closure, f->valuehasbit);
+  _upb_dispatcher_sethas(d->top->closure, f->hasbit);
   if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
 }
 void upb_dispatch_startmsg(upb_dispatcher *d);
@@ -381,7 +389,8 @@ void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status);
 upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
                                                upb_fhandlers *f);
 upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d);
-upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_fhandlers *f);
+upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
+                                            upb_fhandlers *f);
 upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d);
 
 #ifdef __cplusplus
diff --git a/upb/msg.c b/upb/msg.c
index 77521e5..c671b7b 100644
--- a/upb/msg.c
+++ b/upb/msg.c
@@ -4,101 +4,12 @@
  * Copyright (c) 2010 Google Inc.  See LICENSE for details.
  * Author: Josh Haberman <jhaberman@gmail.com>
  *
- * Data structure for storing a message of protobuf data.
  */
 
 #include "upb/upb.h"
 #include "upb/msg.h"
 
-void upb_msg_clear(void *msg, const upb_msgdef *md) {
-  assert(msg != NULL);
-  memset(msg, 0, md->hasbit_bytes);
-  // TODO: set primitive fields to defaults?
-}
-
-void *upb_stdarray_append(upb_stdarray *a, size_t type_size) {
-  assert(a != NULL);
-  assert(a->len <= a->size);
-  if (a->len == a->size) {
-    size_t old_size = a->size;
-    a->size = old_size == 0 ? 8 : (old_size * 2);
-    a->ptr = realloc(a->ptr, a->size * type_size);
-    memset(&a->ptr[old_size * type_size], 0, (a->size - old_size) * type_size);
-  }
-  return &a->ptr[a->len++ * type_size];
-}
-
-#if 0
-static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
-                                   upb_dispatcher *d);
-
-static upb_flow_t upb_msg_pushval(upb_value val, upb_fielddef *f,
-                                  upb_dispatcher *d, upb_fhandlers *hf) {
-  if (upb_issubmsg(f)) {
-    upb_msg *msg = upb_value_getmsg(val);
-    upb_dispatch_startsubmsg(d, hf);
-    upb_msg_dispatch(msg, upb_downcast_msgdef(f->def), d);
-    upb_dispatch_endsubmsg(d);
-  } else {
-    upb_dispatch_value(d, hf, val);
-  }
-  return UPB_CONTINUE;
-}
-
-static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
-                                   upb_dispatcher *d) {
-  upb_msg_iter i;
-  for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
-    upb_fielddef *f = upb_msg_iter_field(i);
-    if (!upb_msg_has(msg, f)) continue;
-    upb_fhandlers *hf = upb_dispatcher_lookup(d, f->number);
-    if (!hf) continue;
-    upb_value val = upb_msg_get(msg, f);
-    if (upb_isarray(f)) {
-      upb_array *arr = upb_value_getarr(val);
-      for (uint32_t j = 0; j < upb_array_len(arr); ++j) {
-        upb_msg_pushval(upb_array_get(arr, f, j), f, d, hf);
-      }
-    } else {
-      upb_msg_pushval(val, f, d, hf);
-    }
-  }
-  return UPB_CONTINUE;
-}
-
-void upb_msg_runhandlers(upb_msg *msg, upb_msgdef *md, upb_handlers *h,
-                         void *closure, upb_status *status) {
-  upb_dispatcher d;
-  upb_dispatcher_init(&d, h, NULL, NULL, NULL);
-  upb_dispatcher_reset(&d, closure);
-
-  upb_dispatch_startmsg(&d);
-  upb_msg_dispatch(msg, md, &d);
-  upb_dispatch_endmsg(&d, status);
-
-  upb_dispatcher_uninit(&d);
-}
-#endif
-
-/* Standard writers. **********************************************************/
-
-void upb_stdmsg_sethas(void *_m, upb_value fval) {
-  assert(_m != NULL);
-  char *m = _m;
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  if (f->hasbit >= 0)
-    m[(uint32_t)f->hasbit / 8] |= (1 << ((uint32_t)f->hasbit % 8));
-}
-
-bool upb_stdmsg_has(const void *_m, upb_value fval) {
-  assert(_m != NULL);
-  const char *m = _m;
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  return f->hasbit < 0 ||
-      (m[(uint32_t)f->hasbit / 8] & (1 << ((uint32_t)f->hasbit % 8)));
-}
-
-#define UPB_ACCESSORS(type, ctype)                                            \
+#define UPB_ACCESSOR(type, ctype)                                             \
   upb_flow_t upb_stdmsg_set ## type (void *_m, upb_value fval,                \
                                      upb_value val) {                         \
     assert(_m != NULL);                                                       \
@@ -108,230 +19,17 @@ bool upb_stdmsg_has(const void *_m, upb_value fval) {
     *(ctype*)&m[f->offset] = upb_value_get ## type(val);                      \
     return UPB_CONTINUE;                                                      \
   }                                                                           \
-                                                                              \
-  upb_flow_t upb_stdmsg_set ## type ## _r(void *a, upb_value _fval,           \
-                                          upb_value val) {                    \
-    (void)_fval;                                                              \
-    assert(a != NULL);                                                        \
-    ctype *p = upb_stdarray_append((upb_stdarray*)a, sizeof(ctype));          \
-    *p = upb_value_get ## type(val);                                          \
-    return UPB_CONTINUE;                                                      \
-  }                                                                           \
-                                                                              \
-  upb_value upb_stdmsg_get ## type(const void *_m, upb_value fval) {          \
-    assert(_m != NULL);                                                       \
-    const uint8_t *m = _m;                                                    \
-    const upb_fielddef *f = upb_value_getfielddef(fval);                      \
-    upb_value ret;                                                            \
-    upb_value_set ## type(&ret, *(ctype*)&m[f->offset]);                      \
-    return ret;                                                               \
-  }                                                                           \
-  upb_value upb_stdmsg_seqget ## type(const void *i) {                        \
-    assert(i != NULL);                                                        \
-    upb_value val;                                                            \
-    upb_value_set ## type(&val, *(ctype*)i);                                  \
-    return val;                                                               \
-  }
 
-UPB_ACCESSORS(double, double)
-UPB_ACCESSORS(float, float)
-UPB_ACCESSORS(int32, int32_t)
-UPB_ACCESSORS(int64, int64_t)
-UPB_ACCESSORS(uint32, uint32_t)
-UPB_ACCESSORS(uint64, uint64_t)
-UPB_ACCESSORS(bool, bool)
-UPB_ACCESSORS(ptr, void*)
+UPB_ACCESSOR(double, double)
+UPB_ACCESSOR(float, float)
+UPB_ACCESSOR(int32, int32_t)
+UPB_ACCESSOR(int64, int64_t)
+UPB_ACCESSOR(uint32, uint32_t)
+UPB_ACCESSOR(uint64, uint64_t)
+UPB_ACCESSOR(bool, bool)
+UPB_ACCESSOR(ptr, void*)
 #undef UPB_ACCESSORS
 
-static void _upb_stdmsg_setstr(void *_dst, upb_value src) {
-  upb_stdarray **dstp = _dst;
-  upb_stdarray *dst = *dstp;
-  if (!dst) {
-    dst = malloc(sizeof(*dst));
-    dst->size = 0;
-    dst->ptr = NULL;
-    *dstp = dst;
-  }
-  dst->len = 0;
-  const upb_byteregion *bytes = upb_value_getbyteregion(src);
-  uint32_t len = upb_byteregion_len(bytes);
-  if (len > dst->size) {
-    dst->size = len;
-    dst->ptr = realloc(dst->ptr, dst->size);
-  }
-  dst->len = len;
-  upb_byteregion_copyall(bytes, dst->ptr);
-}
-
-upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) {
-  assert(_m != NULL);
-  char *m = _m;
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  // Hasbit automatically set by the handlers.
-  _upb_stdmsg_setstr(&m[f->offset], val);
-  return UPB_CONTINUE;
-}
-
-upb_flow_t upb_stdmsg_setstr_r(void *a, upb_value fval, upb_value val) {
-  assert(a != NULL);
-  (void)fval;
-  _upb_stdmsg_setstr(upb_stdarray_append((upb_stdarray*)a, sizeof(void*)), val);
-  return UPB_CONTINUE;
-}
-
-upb_value upb_stdmsg_getstr(const void *m, upb_value fval) {
-  assert(m != NULL);
-  return upb_stdmsg_getptr(m, fval);
-}
-
-upb_value upb_stdmsg_seqgetstr(const void *i) {
-  assert(i != NULL);
-  return upb_stdmsg_seqgetptr(i);
-}
-
-void *upb_stdmsg_new(const upb_msgdef *md) {
-  void *m = malloc(md->size);
-  memset(m, 0, md->size);
-  upb_msg_clear(m, md);
-  return m;
-}
-
-void upb_stdseq_free(void *s, upb_fielddef *f) {
-  upb_stdarray *a = s;
-  if (upb_issubmsg(f) || upb_isstring(f)) {
-    void **p = (void**)a->ptr;
-    for (uint32_t i = 0; i < a->size; i++) {
-      if (upb_issubmsg(f)) {
-        upb_stdmsg_free(p[i], upb_downcast_msgdef(f->def));
-      } else {
-        upb_stdarray *str = p[i];
-        free(str->ptr);
-        free(str);
-      }
-    }
-  }
-  free(a->ptr);
-  free(a);
-}
-
-void upb_stdmsg_free(void *m, const upb_msgdef *md) {
-  if (m == NULL) return;
-  upb_msg_iter i;
-  for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
-    upb_fielddef *f = upb_msg_iter_field(i);
-    if (!upb_isseq(f) && !upb_issubmsg(f) && !upb_isstring(f)) continue;
-    void *subp = upb_value_getptr(upb_stdmsg_getptr(m, f->fval));
-    if (subp == NULL) continue;
-    if (upb_isseq(f)) {
-      upb_stdseq_free(subp, f);
-    } else if (upb_issubmsg(f)) {
-      upb_stdmsg_free(subp, upb_downcast_msgdef(f->def));
-    } else {
-      upb_stdarray *str = subp;
-      free(str->ptr);
-      free(str);
-    }
-  }
-  free(m);
-}
-
-upb_sflow_t upb_stdmsg_startseq(void *_m, upb_value fval) {
-  char *m = _m;
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  upb_stdarray **arr = (void*)&m[f->offset];
-  if (!upb_stdmsg_has(_m, fval)) {
-    if (!*arr) {
-      *arr = malloc(sizeof(**arr));
-      (*arr)->size = 0;
-      (*arr)->ptr = NULL;
-    }
-    (*arr)->len = 0;
-    upb_stdmsg_sethas(m, fval);
-  }
-  return UPB_CONTINUE_WITH(*arr);
-}
-
-void upb_stdmsg_recycle(void **m, const upb_msgdef *md) {
-  if (*m)
-    upb_msg_clear(*m, md);
-  else
-    *m = upb_stdmsg_new(md);
-}
-
-upb_sflow_t upb_stdmsg_startsubmsg(void *_m, upb_value fval) {
-  assert(_m != NULL);
-  char *m = _m;
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  void **subm = (void*)&m[f->offset];
-  if (!upb_stdmsg_has(m, fval)) {
-    upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def));
-    upb_stdmsg_sethas(m, fval);
-  }
-  return UPB_CONTINUE_WITH(*subm);
-}
-
-upb_sflow_t upb_stdmsg_startsubmsg_r(void *a, upb_value fval) {
-  assert(a != NULL);
-  const upb_fielddef *f = upb_value_getfielddef(fval);
-  void **subm = upb_stdarray_append((upb_stdarray*)a, sizeof(void*));
-  upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def));
-  return UPB_CONTINUE_WITH(*subm);
-}
-
-const void *upb_stdmsg_seqbegin(const void *_a) {
-  const upb_stdarray *a = _a;
-  return a->len > 0 ? a->ptr : NULL;
-}
-
-#define NEXTFUNC(size) \
-  const void *upb_stdmsg_ ## size ## byte_seqnext(const void *_a, const void *iter) {\
-    const upb_stdarray *a = _a;                                          \
-    const void *next = (char*)iter + size;                               \
-    return (char*)next < (char*)a->ptr + (a->len * size) ? next : NULL;  \
-  }
-
-NEXTFUNC(8)
-NEXTFUNC(4)
-NEXTFUNC(1)
-
-#define STDMSG(type, size) { static upb_accessor_vtbl vtbl = { \
-    &upb_stdmsg_startsubmsg, \
-    &upb_stdmsg_set ## type, \
-    &upb_stdmsg_startseq, \
-    &upb_stdmsg_startsubmsg_r, \
-    &upb_stdmsg_set ## type ## _r, \
-    &upb_stdmsg_has, \
-    &upb_stdmsg_getptr, \
-    &upb_stdmsg_get ## type, \
-    &upb_stdmsg_seqbegin, \
-    &upb_stdmsg_ ## size ## byte_seqnext, \
-    &upb_stdmsg_seqget ## type}; \
-  return &vtbl; }
-
-upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f) {
-  switch (f->type) {
-    case UPB_TYPE(DOUBLE): STDMSG(double, 8)
-    case UPB_TYPE(FLOAT): STDMSG(float, 4)
-    case UPB_TYPE(UINT64):
-    case UPB_TYPE(FIXED64): STDMSG(uint64, 8)
-    case UPB_TYPE(INT64):
-    case UPB_TYPE(SFIXED64):
-    case UPB_TYPE(SINT64): STDMSG(int64, 8)
-    case UPB_TYPE(INT32):
-    case UPB_TYPE(SINT32):
-    case UPB_TYPE(ENUM):
-    case UPB_TYPE(SFIXED32): STDMSG(int32, 4)
-    case UPB_TYPE(UINT32):
-    case UPB_TYPE(FIXED32): STDMSG(uint32, 4)
-    case UPB_TYPE(BOOL): STDMSG(bool, 1)
-    case UPB_TYPE(STRING):
-    case UPB_TYPE(BYTES):
-    case UPB_TYPE(GROUP):
-    case UPB_TYPE(MESSAGE): STDMSG(str, 8)  // TODO: 32-bit
-  }
-  return NULL;
-}
-
 static void upb_accessors_onfreg(void *c, upb_fhandlers *fh,
                                  const upb_fielddef *f) {
   (void)c;
@@ -344,7 +42,7 @@ static void upb_accessors_onfreg(void *c, upb_fhandlers *fh,
     } else {
       upb_fhandlers_setvalue(fh, f->accessor->set);
       upb_fhandlers_setstartsubmsg(fh, f->accessor->startsubmsg);
-      upb_fhandlers_setvaluehasbit(fh, f->hasbit);
+      upb_fhandlers_sethasbit(fh, f->hasbit);
     }
   }
 }
diff --git a/upb/msg.h b/upb/msg.h
index 67903d0..7aaaf2a 100644
--- a/upb/msg.h
+++ b/upb/msg.h
@@ -68,34 +68,18 @@ typedef struct _upb_accessor_vtbl {
   upb_seqget_handler     *seqget;
 } upb_accessor_vtbl;
 
-// Registers handlers for writing into a message of the given type.
+// Registers handlers for writing into a message of the given type using
+// whatever accessors it has defined.
 upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, const upb_msgdef *m);
 
-// Returns an stdmsg accessor for the given fielddef.
-upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f);
-
-
-/* upb_msg/upb_seq ************************************************************/
-
-// upb_msg and upb_seq allow for generic access to a message through its
-// accessor vtable.  Note that these do *not* allow you to create, destroy, or
-// take references on the objects -- these operations are specifically outside
-// the scope of what the accessors define.
-
-// Clears all hasbits.
-// TODO: Add a separate function for setting primitive values back to their
-// defaults (but not strings, submessages, or arrays).
-void upb_msg_clear(void *msg, const upb_msgdef *md);
-
 INLINE void upb_msg_clearbit(void *msg, const upb_fielddef *f) {
   ((char*)msg)[f->hasbit / 8] &= ~(1 << (f->hasbit % 8));
 }
 
-// Could add a method that recursively clears submessages, strings, and
-// arrays if desired.  This could be a win if you wanted to merge without
-// needing hasbits, because during parsing you would never clear submessages
-// or arrays.  Also this could be desired to provide proto2 operations on
-// generated messages.
+/* upb_msg/upb_seq ************************************************************/
+
+// These accessor functions are simply convenience methods for reading or
+// writing to a message through its accessors.
 
 INLINE bool upb_msg_has(const void *m, const upb_fielddef *f) {
   return f->accessor && f->accessor->has(m, f->fval);
@@ -148,65 +132,11 @@ INLINE bool upb_msg_get_named(const void *m, const upb_msgdef *md,
   return true;
 }
 
-
-/* upb_msgvisitor *************************************************************/
-
-// A upb_msgvisitor reads data from an in-memory structure using its accessors,
-// pushing the results to a given set of upb_handlers.
-// TODO: not yet implemented.
-
-typedef struct {
-  upb_fhandlers *fh;
-  upb_fielddef *f;
-  uint16_t msgindex;  // Only when upb_issubmsg(f).
-} upb_msgvisitor_field;
-
-typedef struct {
-  upb_msgvisitor_field *fields;
-  int fields_len;
-} upb_msgvisitor_msg;
-
-typedef struct {
-  uint16_t msgindex;
-  uint16_t fieldindex;
-  uint32_t arrayindex;  // UINT32_MAX if not an array frame.
-} upb_msgvisitor_frame;
-
-typedef struct {
-  upb_msgvisitor_msg *messages;
-  int messages_len;
-  upb_dispatcher dispatcher;
-} upb_msgvisitor;
-
-// Initializes a msgvisitor that will push data from messages of the given
-// msgdef to the given set of handlers.
-void upb_msgvisitor_init(upb_msgvisitor *v, upb_msgdef *md, upb_handlers *h);
-void upb_msgvisitor_uninit(upb_msgvisitor *v);
-
-void upb_msgvisitor_reset(upb_msgvisitor *v, void *m);
-void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status);
-
-
-/* Standard writers. **********************************************************/
-
-// Allocates a new stdmsg.
-void *upb_stdmsg_new(const upb_msgdef *md);
-
-// Recursively frees any strings or submessages that the message refers to.
-void upb_stdmsg_free(void *m, const upb_msgdef *md);
-
-void upb_stdmsg_sethas(void *_m, upb_value fval);
-
-// "hasbit" must be <= UPB_MAX_FIELDS.  If it is <0, this field has no hasbit.
-upb_value upb_stdmsg_packfval(int16_t hasbit, uint16_t value_offset);
-upb_value upb_stdmsg_packfval_subm(int16_t hasbit, uint16_t value_offset,
-                                   uint16_t subm_size, uint8_t subm_setbytes);
-
 // Value writers for every in-memory type: write the data to a known offset
-// from the closure "c" and set the hasbit (if any).
-// TODO: can we get away with having only one for int64, uint64, double, etc?
-// The main thing in the way atm is that the upb_value is strongly typed.
-// in debug mode.
+// from the closure "c."
+//
+// TODO(haberman): instead of having standard writer functions, should we have
+// a bool in the accessor that says "write raw value to the field's offset"?
 upb_flow_t upb_stdmsg_setint64(void *c, upb_value fval, upb_value val);
 upb_flow_t upb_stdmsg_setint32(void *c, upb_value fval, upb_value val);
 upb_flow_t upb_stdmsg_setuint64(void *c, upb_value fval, upb_value val);
@@ -216,94 +146,6 @@ upb_flow_t upb_stdmsg_setfloat(void *c, upb_value fval, upb_value val);
 upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val);
 upb_flow_t upb_stdmsg_setptr(void *c, upb_value fval, upb_value val);
 
-// Value writers for repeated fields: the closure points to a standard array
-// struct, appends the value to the end of the array, resizing with realloc()
-// if necessary.
-typedef struct {
-  char *ptr;
-  uint32_t len;   // Number of elements present.
-  uint32_t size;  // Number of elements allocated.
-} upb_stdarray;
-
-void *upb_stdarray_append(upb_stdarray *a, size_t type_size);
-
-upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setint32_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setuint64_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setuint32_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setdouble_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setfloat_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setbool_r(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setptr_r(void *c, upb_value fval, upb_value val);
-
-// Writers for C strings (NULL-terminated): we can find a char* at a known
-// offset from the closure "c".  Calls realloc() on the pointer to allocate
-// the memory (TODO: investigate whether checking malloc_usable_size() would
-// be cheaper than realloc()).  Also sets the hasbit, if any.
-//
-// Since the string is NULL terminated and does not store an explicit length,
-// these are not suitable for binary data that can contain NULLs.
-upb_flow_t upb_stdmsg_setcstr(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setcstr_r(void *c, upb_value fval, upb_value val);
-
-// Writers for length-delimited strings: we explicitly store the length, so
-// the data can contain NULLs.  Stores the data using upb_stdarray
-// which is located at a known offset from the closure "c" (note that it
-// is included inline rather than pointed to).  Also sets the hasbit, if any.
-upb_flow_t upb_stdmsg_setstr(void *c, upb_value fval, upb_value val);
-upb_flow_t upb_stdmsg_setstr_r(void *c, upb_value fval, upb_value val);
-
-// Writers for startseq and startmsg which allocate (or reuse, if possible)
-// a sub data structure (upb_stdarray or a submessage, respectively),
-// setting the hasbit.  If the hasbit is already set, the existing data
-// structure is used verbatim.  If the hasbit is not already set, the pointer
-// is checked for NULL.  If it is NULL, a new substructure is allocated,
-// cleared, and used.  If it is not NULL, the existing substructure is
-// cleared and reused.
-//
-// If there is no hasbit, we always behave as if the hasbit was not set,
-// so any existing data for this array or submessage is cleared.  In most
-// cases this will be fine since each array or non-repeated submessage should
-// occur at most once in the stream.  But if the client is using "concatenation
-// as merging", it will want to make sure hasbits are allocated so merges can
-// happen appropriately.
-//
-// If there was a demand for the behavior that absence of a hasbit acts as if
-// the bit was always set, we could provide that also.  But Clear() would need
-// to act recursively, which is less efficient since it requires an extra pass
-// over the tree.
-upb_sflow_t upb_stdmsg_startseq(void *c, upb_value fval);
-upb_sflow_t upb_stdmsg_startsubmsg(void *c, upb_value fval);
-upb_sflow_t upb_stdmsg_startsubmsg_r(void *c, upb_value fval);
-
-
-/* Standard readers. **********************************************************/
-
-bool upb_stdmsg_has(const void *c, upb_value fval);
-const void *upb_stdmsg_seqbegin(const void *c);
-
-upb_value upb_stdmsg_getint64(const void *c, upb_value fval);
-upb_value upb_stdmsg_getint32(const void *c, upb_value fval);
-upb_value upb_stdmsg_getuint64(const void *c, upb_value fval);
-upb_value upb_stdmsg_getuint32(const void *c, upb_value fval);
-upb_value upb_stdmsg_getdouble(const void *c, upb_value fval);
-upb_value upb_stdmsg_getfloat(const void *c, upb_value fval);
-upb_value upb_stdmsg_getbool(const void *c, upb_value fval);
-upb_value upb_stdmsg_getptr(const void *c, upb_value fval);
-
-const void *upb_stdmsg_8byte_seqnext(const void *c, const void *iter);
-const void *upb_stdmsg_4byte_seqnext(const void *c, const void *iter);
-const void *upb_stdmsg_1byte_seqnext(const void *c, const void *iter);
-
-upb_value upb_stdmsg_seqgetint64(const void *c);
-upb_value upb_stdmsg_seqgetint32(const void *c);
-upb_value upb_stdmsg_seqgetuint64(const void *c);
-upb_value upb_stdmsg_seqgetuint32(const void *c);
-upb_value upb_stdmsg_seqgetdouble(const void *c);
-upb_value upb_stdmsg_seqgetfloat(const void *c);
-upb_value upb_stdmsg_seqgetbool(const void *c);
-upb_value upb_stdmsg_seqgetptr(const void *c);
-
 #ifdef __cplusplus
 }  /* extern "C" */
 #endif
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index 06125dd..b0e2392 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -13,6 +13,33 @@
 #include "upb/pb/decoder.h"
 #include "upb/pb/varint.h"
 
+typedef struct {
+  uint8_t native_wire_type;
+  bool is_numeric;
+} upb_decoder_typeinfo;
+
+static const upb_decoder_typeinfo upb_decoder_types[] = {
+  {UPB_WIRE_TYPE_END_GROUP,   false},  // ENDGROUP
+  {UPB_WIRE_TYPE_64BIT,       true},   // DOUBLE
+  {UPB_WIRE_TYPE_32BIT,       true},   // FLOAT
+  {UPB_WIRE_TYPE_VARINT,      true},   // INT64
+  {UPB_WIRE_TYPE_VARINT,      true},   // UINT64
+  {UPB_WIRE_TYPE_VARINT,      true},   // INT32
+  {UPB_WIRE_TYPE_64BIT,       true},   // FIXED64
+  {UPB_WIRE_TYPE_32BIT,       true},   // FIXED32
+  {UPB_WIRE_TYPE_VARINT,      true},   // BOOL
+  {UPB_WIRE_TYPE_DELIMITED,   false},  // STRING
+  {UPB_WIRE_TYPE_START_GROUP, false},  // GROUP
+  {UPB_WIRE_TYPE_DELIMITED,   false},  // MESSAGE
+  {UPB_WIRE_TYPE_DELIMITED,   false},  // BYTES
+  {UPB_WIRE_TYPE_VARINT,      true},   // UINT32
+  {UPB_WIRE_TYPE_VARINT,      true},   // ENUM
+  {UPB_WIRE_TYPE_32BIT,       true},   // SFIXED32
+  {UPB_WIRE_TYPE_64BIT,       true},   // SFIXED64
+  {UPB_WIRE_TYPE_VARINT,      true},   // SINT32
+  {UPB_WIRE_TYPE_VARINT,      true},   // SINT64
+};
+
 /* upb_decoderplan ************************************************************/
 
 #ifdef UPB_USE_JIT_X64
@@ -32,37 +59,6 @@
 #include "upb/pb/decoder_x64.h"
 #endif
 
-typedef struct {
-  upb_fhandlers base;
-  void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
-#ifdef UPB_USE_JIT_X64
-  uint32_t jit_pclabel;
-  uint32_t jit_pclabel_notypecheck;
-#endif
-} upb_dplanfield;
-
-typedef struct {
-  upb_mhandlers base;
-#ifdef UPB_USE_JIT_X64
-  uint32_t jit_startmsg_pclabel;
-  uint32_t jit_endofbuf_pclabel;
-  uint32_t jit_endofmsg_pclabel;
-  uint32_t jit_dyndispatch_pclabel;
-  uint32_t jit_unknownfield_pclabel;
-  int32_t jit_parent_field_done_pclabel;
-  uint32_t max_field_number;
-  // Currently keyed on field number.  Could also try keying it
-  // on encoded or decoded tag, or on encoded field number.
-  void **tablearray;
-#endif
-} upb_dplanmsg;
-
-static void *upb_decoderplan_fptrs[];
-
-void upb_decoderplan_initfhandlers(upb_fhandlers *f) {
-  f->decode = upb_decoderplan_fptrs[f->type];
-}
-
 upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) {
   upb_decoderplan *p = malloc(sizeof(*p));
   p->handlers = h;
@@ -72,17 +68,6 @@ upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) {
   p->jit_code = NULL;
   if (allowjit) upb_decoderplan_makejit(p);
 #endif
-  // Set function pointers for each field's decode function.
-  for (int i = 0; i < h->msgs_len; i++) {
-    upb_mhandlers *m = h->msgs[i];
-    for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab);
-        !upb_inttable_done(i);
-        i = upb_inttable_next(&m->fieldtab, i)) {
-      upb_itofhandlers_ent *e = upb_inttable_iter_value(i);
-      upb_fhandlers *f = e->f;
-      upb_decoderplan_initfhandlers(f);
-    }
-  }
   return p;
 }
 
@@ -396,14 +381,6 @@ static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
   upb_push_msg(d, f, upb_decoder_offset(d) + len);
 }
 
-#define F(type) &upb_decode_ ## type
-static void *upb_decoderplan_fptrs[] = {
-    &upb_endgroup, F(DOUBLE), F(FLOAT), F(INT64),
-    F(UINT64), F(INT32), F(FIXED64), F(FIXED32), F(BOOL), F(STRING),
-    F(GROUP), F(MESSAGE), F(STRING), F(UINT32), F(ENUM), F(SFIXED32),
-    F(SFIXED64), F(SINT32), F(SINT64)};
-#undef F
-
 
 /* The main decoding loop *****************************************************/
 
@@ -431,16 +408,18 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
     if (!upb_trydecode_varint32(d, &tag)) return NULL;
     uint8_t wire_type = tag & 0x7;
     uint32_t fieldnum = tag >> 3;
-    upb_itofhandlers_ent *e = upb_inttable_fastlookup(
-        d->dispatch_table, fieldnum, sizeof(upb_itofhandlers_ent));
-    upb_fhandlers *f = e ? e->f : NULL;
+    const upb_value *val = upb_inttable_lookup32(d->dispatch_table, fieldnum);
+    upb_fhandlers *f = val ? upb_value_getptr(*val) : NULL;
+    bool is_packed = false;
 
     if (f) {
       // Wire type check.
-      if (wire_type == upb_types[f->type].native_wire_type ||
-          (wire_type == UPB_WIRE_TYPE_DELIMITED &&
-           upb_types[f->type].is_numeric)) {
+      if (wire_type == upb_decoder_types[f->type].native_wire_type) {
         // Wire type is ok.
+      } else if ((wire_type == UPB_WIRE_TYPE_DELIMITED &&
+                 upb_decoder_types[f->type].is_numeric)) {
+        // Wire type is ok (and packed).
+        is_packed = true;
       } else {
         f = NULL;
       }
@@ -453,19 +432,18 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
     if (fr->is_sequence && fr->f != f) {
       upb_dispatch_endseq(&d->dispatcher);
       upb_decoder_setmsgend(d);
+      fr = d->dispatcher.top;
     }
-    if (f && f->repeated && (!fr->is_sequence || fr->f != f)) {
-      uint64_t old_end = d->dispatcher.top->end_ofs;
-      upb_dispatcher_frame *fr = upb_dispatch_startseq(&d->dispatcher, f);
-      if (wire_type != UPB_WIRE_TYPE_DELIMITED ||
-          upb_issubmsgtype(f->type) || upb_isstringtype(f->type)) {
-        // Non-packed field -- this tag pertains to only a single message.
-        fr->end_ofs = old_end;
-      } else {
+    if (f && f->repeated && !fr->is_sequence) {
+      upb_dispatcher_frame *fr2 = upb_dispatch_startseq(&d->dispatcher, f);
+      if (is_packed) {
         // Packed primitive field.
         uint32_t len = upb_decode_varint32(d);
-        fr->end_ofs = upb_decoder_offset(d) + len;
-        fr->is_packed = true;
+        fr2->end_ofs = upb_decoder_offset(d) + len;
+        fr2->is_packed = true;
+      } else {
+        // Non-packed field -- this tag pertains to only a single message.
+        fr2->end_ofs = fr->end_ofs;
       }
       upb_decoder_setmsgend(d);
     }
@@ -513,13 +491,37 @@ upb_success_t upb_decoder_decode(upb_decoder *d) {
     if (!d->top_is_packed) f = upb_decode_tag(d);
     if (!f) {
       // Sucessful EOF.  We may need to dispatch a top-level implicit frame.
-      if (d->dispatcher.top == d->dispatcher.stack + 1) {
-        assert(d->dispatcher.top->is_sequence);
+      if (d->dispatcher.top->is_sequence) {
+        assert(d->dispatcher.top == d->dispatcher.stack + 1);
         upb_dispatch_endseq(&d->dispatcher);
       }
+      assert(d->dispatcher.top == d->dispatcher.stack);
+      upb_dispatch_endmsg(&d->dispatcher, &d->status);
       return UPB_OK;
     }
-    f->decode(d, f);
+
+    switch (f->type) {
+      case UPB_TYPE_ENDGROUP:  upb_endgroup(d, f);        break;
+      case UPB_TYPE(DOUBLE):   upb_decode_DOUBLE(d, f);   break;
+      case UPB_TYPE(FLOAT):    upb_decode_FLOAT(d, f);    break;
+      case UPB_TYPE(INT64):    upb_decode_INT64(d, f);    break;
+      case UPB_TYPE(UINT64):   upb_decode_UINT64(d, f);   break;
+      case UPB_TYPE(INT32):    upb_decode_INT32(d, f);    break;
+      case UPB_TYPE(FIXED64):  upb_decode_FIXED64(d, f);  break;
+      case UPB_TYPE(FIXED32):  upb_decode_FIXED32(d, f);  break;
+      case UPB_TYPE(BOOL):     upb_decode_BOOL(d, f);     break;
+      case UPB_TYPE(STRING):
+      case UPB_TYPE(BYTES):    upb_decode_STRING(d, f);   break;
+      case UPB_TYPE(GROUP):    upb_decode_GROUP(d, f);    break;
+      case UPB_TYPE(MESSAGE):  upb_decode_MESSAGE(d, f);  break;
+      case UPB_TYPE(UINT32):   upb_decode_UINT32(d, f);   break;
+      case UPB_TYPE(ENUM):     upb_decode_ENUM(d, f);     break;
+      case UPB_TYPE(SFIXED32): upb_decode_SFIXED32(d, f); break;
+      case UPB_TYPE(SFIXED64): upb_decode_SFIXED64(d, f); break;
+      case UPB_TYPE(SINT32):   upb_decode_SINT32(d, f);   break;
+      case UPB_TYPE(SINT64):   upb_decode_SINT64(d, f);   break;
+      case UPB_TYPE_NONE: assert(false); break;
+    }
     upb_decoder_checkpoint(d);
   }
 }
@@ -542,7 +544,6 @@ void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset) {
 void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input,
                             void *closure) {
   assert(d->plan);
-  assert(upb_byteregion_discardofs(input) == upb_byteregion_startofs(input));
   upb_dispatcher_frame *f =
       upb_dispatcher_reset(&d->dispatcher, closure, d->plan->handlers->msgs[0]);
   upb_status_clear(&d->status);
diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc
index fa984ef..f58e403 100644
--- a/upb/pb/decoder_x64.dasc
+++ b/upb/pb/decoder_x64.dasc
@@ -9,8 +9,8 @@
 |// parsing the specific message and calling specific handlers.
 |//
 |// Since the JIT can call other functions (the JIT'ted code is not a leaf
-|// function) we must respect alignment rules.  On OS X, this means aligning
-|// the stack to 16 bytes.
+|// function) we must respect alignment rules.  All x86-64 systems require
+|// 16-byte stack alignment.
 
 #include <sys/mman.h>
 #include "dynasm/dasm_x86.h"
@@ -103,7 +103,7 @@ void upb_reg_jit_gdb(upb_decoderplan *plan) {
 // Has to be a separate function, otherwise GCC will complain about
 // expressions like (&foo != NULL) because they will never evaluate
 // to false.
-static void upb_assert_notnull(void *addr) { assert(addr != NULL); }
+static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; }
 
 |.arch x64
 |.actionlist upb_jit_actionlist
@@ -401,45 +401,10 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
   }
 }
 
-#if 0
-// These appear not to speed things up, but keeping around for
-// further experimentation.
-static void upb_decoderplan_jit_doappend(upb_decoderplan *plan, uint8_t size,
-                                         upb_fhandlers *f) {
-  |  mov   eax, STDARRAY:ARG1_64->len
-  |  cmp   eax, STDARRAY:ARG1_64->size
-  |  jne   >2
-  // If array is full, fall back to actual function.
-  |  loadfval f
-  |  callp  f->value
-  |  jmp   >3
-  |2:
-  |  mov   rcx, STDARRAY:ARG1_64->ptr
-  |  mov   esi, eax
-  |  add   eax, 1
-
-  switch (size) {
-    case 8:
-      |  mov   [rcx + rsi * 8], ARG3_64
-      break;
-
-    case 4:
-      |  mov   [rcx + rsi * 4], ARG3_32
-      break;
-
-    case 1:
-      |  mov   [rcx + rsi * 4], ARG3_8
-      break;
-  }
-
-  |  mov   STDARRAY:ARG1_64->len, eax
-  |3:
-}
-#endif
-
 static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
                                        upb_fhandlers *f) {
-  // Call callbacks.
+  // Call callbacks.  Specializing the append accessors didn't yield a speed
+  // increase in benchmarks.
   if (upb_issubmsgtype(f->type)) {
     if (f->type == UPB_TYPE(MESSAGE)) {
       |   mov   rsi, PTR
@@ -457,7 +422,10 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
       |  mov   ARG1_64, CLOSURE
       |  loadfval f
       |  callp f->startsubmsg
+      |  sethas CLOSURE, f->hasbit
       |  mov  CLOSURE, rdx
+    } else {
+      |  sethas CLOSURE, f->hasbit
     }
     |  mov   qword FRAME->closure, CLOSURE
     // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
@@ -465,6 +433,7 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
 
     const upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
     |  call  =>sub_m->jit_startmsg_pclabel;
+    |  popframe upb_fhandlers_getmsg(f)
 
     // Call endsubmsg handler (if any).
     if (f->endsubmsg) {
@@ -473,7 +442,6 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
       |  loadfval  f
       |  callp f->endsubmsg
     }
-    |   popframe upb_fhandlers_getmsg(f)
     // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
     |  mov   DECODER->ptr, PTR
   } else {
@@ -494,21 +462,6 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
     } else if (f->value == &upb_stdmsg_setbool) {
       const upb_fielddef *fd = upb_value_getfielddef(f->fval);
       |  mov   [ARG1_64 + fd->offset], ARG3_8
-#if 0
-    // These appear not to speed things up, but keeping around for
-    // further experimentation.
-    } else if (f->value == &upb_stdmsg_setint64_r ||
-        f->value == &upb_stdmsg_setuint64_r ||
-        f->value == &upb_stdmsg_setptr_r ||
-        f->value == &upb_stdmsg_setdouble_r) {
-      upb_decoderplan_jit_doappend(plan, 8, f);
-    } else if (f->value == &upb_stdmsg_setint32_r ||
-               f->value == &upb_stdmsg_setuint32_r ||
-               f->value == &upb_stdmsg_setfloat_r) {
-      upb_decoderplan_jit_doappend(plan, 4, f);
-    } else if (f->value == &upb_stdmsg_setbool_r) {
-      upb_decoderplan_jit_doappend(plan, 1, f);
-#endif
     } else if (f->value) {
       // Load closure and fval into arg registers.
       ||#ifndef NDEBUG
@@ -520,16 +473,26 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
       |  loadfval f
       |  callp  f->value
     }
-    |  sethas CLOSURE, f->valuehasbit
+    |  sethas CLOSURE, f->hasbit
     // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
     |  mov   DECODER->ptr, PTR
   }
 }
 
+static uint64_t upb_get_encoded_tag(upb_fhandlers *f) {
+  uint32_t tag = (f->number << 3) | upb_decoder_types[f->type].native_wire_type;
+  uint64_t encoded_tag = upb_vencode32(tag);
+  // No tag should be greater than 5 bytes.
+  assert(encoded_tag <= 0xffffffffff);
+  return encoded_tag;
+}
+
 // PTR should point to the beginning of the tag.
-static void upb_decoderplan_jit_field(upb_decoderplan *plan, uint64_t tag,
-                                      uint64_t next_tag, upb_mhandlers *m,
+static void upb_decoderplan_jit_field(upb_decoderplan *plan, upb_mhandlers *m,
                                       upb_fhandlers *f, upb_fhandlers *next_f) {
+  uint64_t tag = upb_get_encoded_tag(f);
+  uint64_t next_tag = next_f ? upb_get_encoded_tag(next_f) : 0;
+
   // PC-label for the dispatch table.
   // We check the wire type (which must be loaded in edx) because the
   // table is keyed on field number, not type.
@@ -541,10 +504,13 @@ static void upb_decoderplan_jit_field(upb_decoderplan *plan, uint64_t tag,
     |  mov   rsi, FRAME->end_ofs
     |  pushframe  f, rsi, true
     if (f->startseq) {
-      |  mov   ARG1_64, CLOSURE
+      |  mov    ARG1_64, CLOSURE
       |  loadfval f
-      |  callp f->startseq
-      |  mov   CLOSURE, rdx
+      |  callp  f->startseq
+      |  sethas CLOSURE, f->hasbit
+      |  mov    CLOSURE, rdx
+    } else {
+      |  sethas CLOSURE, f->hasbit
     }
     |  mov   qword FRAME->closure, CLOSURE
   }
@@ -590,6 +556,11 @@ static int upb_compare_uint32(const void *a, const void *b) {
 }
 
 static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
+  |=>m->jit_afterstartmsg_pclabel:
+  // There was a call to get here, so we need to align the stack.
+  |  sub  rsp, 8
+  |  jmp  >1
+
   |=>m->jit_startmsg_pclabel:
   // There was a call to get here, so we need to align the stack.
   |  sub  rsp, 8
@@ -602,6 +573,7 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
     // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
   }
 
+  |1:
   |  setmsgend  m
   |  check_eob   m
   |  mov    ecx, dword [PTR]
@@ -616,30 +588,19 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
   int num_keys = upb_inttable_count(&m->fieldtab);
   uint32_t *keys = malloc(num_keys * sizeof(*keys));
   int idx = 0;
-  for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab);
-      !upb_inttable_done(i);
-      i = upb_inttable_next(&m->fieldtab, i)) {
-    keys[idx++] = upb_inttable_iter_key(i);
+  upb_inttable_iter i;
+  upb_inttable_begin(&i, &m->fieldtab);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    keys[idx++] = upb_inttable_iter_key(&i);
   }
   qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
 
-  upb_fhandlers *last_f = NULL;
-  uint64_t last_encoded_tag = 0;
   for(int i = 0; i < num_keys; i++) {
-    uint32_t fieldnum = keys[i];
-    upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, fieldnum);
-    upb_fhandlers *f = e->f;
-    assert(f->number == fieldnum);
-    uint32_t tag = (f->number << 3) | upb_types[f->type].native_wire_type;
-    uint64_t encoded_tag = upb_vencode32(tag);
-    // No tag should be greater than 5 bytes.
-    assert(encoded_tag <= 0xffffffffff);
-    if (last_f) upb_decoderplan_jit_field(
-        plan, last_encoded_tag, encoded_tag, m, last_f, f);
-    last_encoded_tag = encoded_tag;
-    last_f = f;
+    upb_fhandlers *f = upb_mhandlers_lookup(m, keys[i]);
+    upb_fhandlers *next_f =
+        (i + 1 < num_keys) ? upb_mhandlers_lookup(m, keys[i + 1]) : NULL;
+    upb_decoderplan_jit_field(plan, m, f, next_f);
   }
-  upb_decoderplan_jit_field(plan, last_encoded_tag, 0, m, last_f, NULL);
 
   free(keys);
 
@@ -733,18 +694,19 @@ static void upb_decoderplan_jit_assignfieldlabs(upb_fhandlers *f,
 static void upb_decoderplan_jit_assignmsglabs(upb_mhandlers *m,
                                               uint32_t *pclabel_count) {
   m->jit_startmsg_pclabel = (*pclabel_count)++;
+  m->jit_afterstartmsg_pclabel = (*pclabel_count)++;
   m->jit_endofbuf_pclabel = (*pclabel_count)++;
   m->jit_endofmsg_pclabel = (*pclabel_count)++;
   m->jit_dyndispatch_pclabel = (*pclabel_count)++;
   m->jit_unknownfield_pclabel = (*pclabel_count)++;
   m->max_field_number = 0;
   upb_inttable_iter i;
-  for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
-      i = upb_inttable_next(&m->fieldtab, i)) {
-    uint32_t key = upb_inttable_iter_key(i);
+  upb_inttable_begin(&i, &m->fieldtab);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    uint32_t key = upb_inttable_iter_key(&i);
     m->max_field_number = UPB_MAX(m->max_field_number, key);
-    upb_itofhandlers_ent *e = upb_inttable_iter_value(i);
-    upb_decoderplan_jit_assignfieldlabs(e->f, pclabel_count);
+    upb_fhandlers *f = upb_value_getptr(upb_inttable_iter_value(&i));
+    upb_decoderplan_jit_assignfieldlabs(f, pclabel_count);
   }
   // TODO: support large field numbers by either using a hash table or
   // generating code for a binary search.  For now large field numbers
@@ -784,11 +746,12 @@ static void upb_decoderplan_makejit(upb_decoderplan *plan) {
   // Create dispatch tables.
   for (int i = 0; i < h->msgs_len; i++) {
     upb_mhandlers *m = h->msgs[i];
+    // We jump to after the startmsg handler since it is called before entering
+    // the JIT (either by upb_decoder or by a previous call to the JIT).
     m->jit_func =
-        plan->jit_code + dasm_getpclabel(plan, m->jit_startmsg_pclabel);
+        plan->jit_code + dasm_getpclabel(plan, m->jit_afterstartmsg_pclabel);
     for (uint32_t j = 0; j <= m->max_field_number; j++) {
-      upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, j);
-      upb_fhandlers *f = e ? e->f : NULL;
+      upb_fhandlers *f = upb_mhandlers_lookup(m, j);
       if (f) {
         m->tablearray[j] =
             plan->jit_code + dasm_getpclabel(plan, f->jit_pclabel);
diff --git a/upb/pb/glue.c b/upb/pb/glue.c
index 4949fe3..40b901d 100644
--- a/upb/pb/glue.c
+++ b/upb/pb/glue.c
@@ -1,84 +1,17 @@
 /*
  * upb - a minimalist implementation of protocol buffers.
  *
- * Copyright (c) 2010 Google Inc.  See LICENSE for details.
+ * Copyright (c) 2010-2012 Google Inc.  See LICENSE for details.
  * Author: Josh Haberman <jhaberman@gmail.com>
  */
 
 #include "upb/bytestream.h"
-#include "upb/descriptor.h"
-#include "upb/msg.h"
+#include "upb/descriptor/reader.h"
 #include "upb/pb/decoder.h"
 #include "upb/pb/glue.h"
-#include "upb/pb/textprinter.h"
-
-bool upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md,
-                  bool allow_jit, upb_status *status) {
-  upb_stringsrc strsrc;
-  upb_stringsrc_init(&strsrc);
-  upb_stringsrc_reset(&strsrc, str, len);
-
-  upb_decoder d;
-  upb_handlers *h = upb_handlers_new();
-  upb_accessors_reghandlers(h, md);
-  upb_decoderplan *p = upb_decoderplan_new(h, allow_jit);
-  upb_decoder_init(&d);
-  upb_handlers_unref(h);
-  upb_decoder_resetplan(&d, p, 0);
-  upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), msg);
-  upb_success_t ret = upb_decoder_decode(&d);
-  // stringsrc and the handlers registered by upb_accessors_reghandlers()
-  // should not suspend.
-  assert((ret == UPB_OK) == upb_ok(upb_decoder_status(&d)));
-  if (status) upb_status_copy(status, upb_decoder_status(&d));
-
-  upb_stringsrc_uninit(&strsrc);
-  upb_decoder_uninit(&d);
-  upb_decoderplan_unref(p);
-  return ret == UPB_OK;
-}
-
-void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s) {
-  void *msg = upb_stdmsg_new(md);
-  size_t len;
-  char *data = upb_readfile(fname, &len);
-  if (!data) goto err;
-  upb_strtomsg(data, len, msg, md, false, s);
-  if (!upb_ok(s)) goto err;
-  return msg;
-
-err:
-  upb_stdmsg_free(msg, md);
-  return NULL;
-}
-
-#if 0
-void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
-                   bool single_line) {
-  upb_stringsink strsink;
-  upb_stringsink_init(&strsink);
-  upb_stringsink_reset(&strsink, str);
-
-  upb_textprinter *p = upb_textprinter_new();
-  upb_handlers *h = upb_handlers_new();
-  upb_textprinter_reghandlers(h, md);
-  upb_textprinter_reset(p, upb_stringsink_bytesink(&strsink), single_line);
-
-  upb_status status = UPB_STATUS_INIT;
-  upb_msg_runhandlers(msg, md, h, p, &status);
-  // None of {upb_msg_runhandlers, upb_textprinter, upb_stringsink} should be
-  // capable of returning an error.
-  assert(upb_ok(&status));
-  upb_status_uninit(&status);
-
-  upb_stringsink_uninit(&strsink);
-  upb_textprinter_free(p);
-  upb_handlers_unref(h);
-}
-#endif
 
 upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
-                                        upb_status *status) {
+                                        void *owner, upb_status *status) {
   upb_stringsrc strsrc;
   upb_stringsrc_init(&strsrc);
   upb_stringsrc_reset(&strsrc, str, len);
@@ -104,35 +37,20 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
     upb_descreader_uninit(&r);
     return NULL;
   }
-  upb_def **defs = upb_descreader_getdefs(&r, n);
+  upb_def **defs = upb_descreader_getdefs(&r, owner, n);
   upb_def **defscopy = malloc(sizeof(upb_def*) * (*n));
   memcpy(defscopy, defs, sizeof(upb_def*) * (*n));
   upb_descreader_uninit(&r);
 
-  // Set default accessors and layouts on all messages.
-  for(int i = 0; i < *n; i++) {
-    upb_def *def = defscopy[i];
-    upb_msgdef *md = upb_dyncast_msgdef(def);
-    if (!md) continue;
-    // For field in msgdef:
-    upb_msg_iter i;
-    for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
-      upb_fielddef *f = upb_msg_iter_field(i);
-      upb_fielddef_setaccessor(f, upb_stdmsg_accessor(f));
-    }
-    upb_msgdef_layout(md);
-  }
-
   return defscopy;
 }
 
 bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len,
                                      upb_status *status) {
   int n;
-  upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, status);
+  upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, &defs, status);
   if (!defs) return false;
-  bool success = upb_symtab_add(s, defs, n, status);
-  for(int i = 0; i < n; i++) upb_def_unref(defs[i]);
+  bool success = upb_symtab_add(s, defs, n, &defs, status);
   free(defs);
   return success;
 }
diff --git a/upb/pb/glue.h b/upb/pb/glue.h
index ff8c85e..6179d8d 100644
--- a/upb/pb/glue.h
+++ b/upb/pb/glue.h
@@ -1,7 +1,7 @@
 /*
  * upb - a minimalist implementation of protocol buffers.
  *
- * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ * Copyright (c) 2011-2012 Google Inc.  See LICENSE for details.
  * Author: Josh Haberman <jhaberman@gmail.com>
  *
  * upb's core components like upb_decoder and upb_msg are carefully designed to
@@ -34,25 +34,12 @@
 extern "C" {
 #endif
 
-// Decodes the given string, which must be in protobuf binary format, to the
-// given upb_msg with msgdef "md", storing the status of the operation in "s".
-bool upb_strtomsg(const char *str, size_t len, void *msg,
-                  const upb_msgdef *md, bool allow_jit, upb_status *s);
-
-// Parses the given file into a new message of the given type.  Caller owns
-// the returned message (or NULL if an error occurred).
-void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s);
-
-//void upb_msgtotext(struct _upb_string *str, void *msg,
-//                   struct _upb_msgdef *md, bool single_line);
-
-
 // Loads all defs from the given protobuf binary descriptor, setting default
 // accessors and a default layout on all messages.  The caller owns the
 // returned array of defs, which will be of length *n.  On error NULL is
 // returned and status is set (if non-NULL).
 upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
-                                        upb_status *status);
+                                        void *owner, upb_status *status);
 
 // Like the previous but also adds the loaded defs to the given symtab.
 bool upb_load_descriptor_into_symtab(upb_symtab *symtab, const char *str,
diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c
index 3f68f90..0d9c967 100644
--- a/upb/pb/textprinter.c
+++ b/upb/pb/textprinter.c
@@ -96,7 +96,7 @@ err:
     const upb_fielddef *f = upb_value_getfielddef(fval);                     \
     uint64_t start_ofs = upb_bytesink_getoffset(p->sink);                    \
     CHECK(upb_textprinter_indent(p));                                        \
-    CHECK(upb_bytesink_writestr(p->sink, f->name));                          \
+    CHECK(upb_bytesink_writestr(p->sink, upb_fielddef_name(f)));             \
     CHECK(upb_bytesink_writestr(p->sink, ": "));                             \
     CHECK(upb_bytesink_printf(p->sink, fmt, upb_value_get ## member(val)));  \
     CHECK(upb_textprinter_endfield(p));                                      \
@@ -124,7 +124,8 @@ static upb_flow_t upb_textprinter_putenum(void *_p, upb_value fval,
   upb_textprinter *p = _p;
   uint64_t start_ofs = upb_bytesink_getoffset(p->sink);
   const upb_fielddef *f = upb_value_getfielddef(fval);
-  upb_enumdef *enum_def = upb_downcast_enumdef(f->def);
+  const upb_enumdef *enum_def =
+      upb_downcast_enumdef_const(upb_fielddef_subdef(f));
   const char *label = upb_enumdef_iton(enum_def, upb_value_getint32(val));
   if (label) {
     CHECK(upb_bytesink_writestr(p->sink, label));
@@ -157,7 +158,7 @@ static upb_sflow_t upb_textprinter_startsubmsg(void *_p, upb_value fval) {
   uint64_t start_ofs = upb_bytesink_getoffset(p->sink);
   const upb_fielddef *f = upb_value_getfielddef(fval);
   CHECK(upb_textprinter_indent(p));
-  CHECK(upb_bytesink_printf(p->sink, "%s {", f->name));
+  CHECK(upb_bytesink_printf(p->sink, "%s {", upb_fielddef_name(f)));
   if (!p->single_line)
     CHECK(upb_bytesink_putc(p->sink, '\n'));
   p->indent_depth++;
diff --git a/upb/pb/varint.h b/upb/pb/varint.h
index 815a7a1..c0e0134 100644
--- a/upb/pb/varint.h
+++ b/upb/pb/varint.h
@@ -19,6 +19,16 @@
 extern "C" {
 #endif
 
+// A list of types as they are encoded on-the-wire.
+typedef enum {
+  UPB_WIRE_TYPE_VARINT      = 0,
+  UPB_WIRE_TYPE_64BIT       = 1,
+  UPB_WIRE_TYPE_DELIMITED   = 2,
+  UPB_WIRE_TYPE_START_GROUP = 3,
+  UPB_WIRE_TYPE_END_GROUP   = 4,
+  UPB_WIRE_TYPE_32BIT       = 5,
+} upb_wiretype_t;
+
 // The maximum number of bytes that it takes to encode a 64-bit varint.
 // Note that with a better encoding this could be 9 (TODO: write up a
 // wiki document about this).
diff --git a/upb/refcount.c b/upb/refcount.c
new file mode 100644
index 0000000..a15547a
--- /dev/null
+++ b/upb/refcount.c
@@ -0,0 +1,224 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2012 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <stdlib.h>
+#include <limits.h>
+#include "upb/refcount.h"
+
+// TODO(haberman): require client to define these if ref debugging is on.
+#ifndef UPB_LOCK
+#define UPB_LOCK
+#endif
+
+#ifndef UPB_UNLOCK
+#define UPB_UNLOCK
+#endif
+
+/* arch-specific atomic primitives  *******************************************/
+
+#ifdef UPB_THREAD_UNSAFE  //////////////////////////////////////////////////////
+
+INLINE void upb_atomic_inc(uint32_t *a) { (*a)++; }
+INLINE bool upb_atomic_dec(uint32_t *a) { return --(*a) == 0; }
+
+#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 ///////////////////
+
+INLINE void upb_atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
+INLINE bool upb_atomic_dec(uint32_t *a) {
+  return __sync_sub_and_fetch(a, 1) == 0;
+}
+
+#elif defined(WIN32) ///////////////////////////////////////////////////////////
+
+#include <Windows.h>
+
+INLINE void upb_atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
+INLINE bool upb_atomic_dec(upb_atomic_t *a) {
+  return InterlockedDecrement(&a->val) == 0;
+}
+
+#else
+#error Atomic primitives not defined for your platform/CPU.  \
+       Implement them or compile with UPB_THREAD_UNSAFE.
+#endif
+
+// Reserved index values.
+#define UPB_INDEX_UNDEFINED UINT16_MAX
+#define UPB_INDEX_NOT_IN_STACK (UINT16_MAX - 1)
+
+static void upb_refcount_merge(upb_refcount *r, upb_refcount *from) {
+  if (upb_refcount_merged(r, from)) return;
+  *r->count += *from->count;
+  free(from->count);
+  upb_refcount *base = from;
+
+  // Set all refcount pointers in the "from" chain to the merged refcount.
+  do { from->count = r->count; } while ((from = from->next) != base);
+
+  // Merge the two circularly linked lists by swapping their next pointers.
+  upb_refcount *tmp = r->next;
+  r->next = base->next;
+  base->next = tmp;
+}
+
+// Tarjan's algorithm, see:
+//   http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
+
+typedef struct {
+  int index;
+  upb_refcount **stack;
+  int stack_len;
+  upb_getsuccessors *func;
+} upb_tarjan_state;
+
+static void upb_refcount_dofindscc(upb_refcount *obj, upb_tarjan_state *state);
+
+void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *_state) {
+  upb_tarjan_state *state = _state;
+  if (subobj->index == UPB_INDEX_UNDEFINED) {
+    // Subdef has not yet been visited; recurse on it.
+    upb_refcount_dofindscc(subobj, state);
+    obj->lowlink = UPB_MIN(obj->lowlink, subobj->lowlink);
+  } else if (subobj->index != UPB_INDEX_NOT_IN_STACK) {
+    // Subdef is in the stack and hence in the current SCC.
+    obj->lowlink = UPB_MIN(obj->lowlink, subobj->index);
+  }
+}
+
+static void upb_refcount_dofindscc(upb_refcount *obj, upb_tarjan_state *state) {
+  obj->index = state->index;
+  obj->lowlink = state->index;
+  state->index++;
+  state->stack[state->stack_len++] = obj;
+
+  state->func(obj, state);  // Visit successors.
+
+  if (obj->lowlink == obj->index) {
+    upb_refcount *scc_obj;
+    while ((scc_obj = state->stack[--state->stack_len]) != obj) {
+      upb_refcount_merge(obj, scc_obj);
+      scc_obj->index = UPB_INDEX_NOT_IN_STACK;
+    }
+    obj->index = UPB_INDEX_NOT_IN_STACK;
+  }
+}
+
+bool upb_refcount_findscc(upb_refcount **refs, int n, upb_getsuccessors *func) {
+  // TODO(haberman): allocate less memory.  We can't use n as a bound because
+  // it doesn't include fielddefs.  Could either use a dynamically-resizing
+  // array or think of some other way.
+  upb_tarjan_state state = {0, malloc(UINT16_MAX * sizeof(void*)), 0, func};
+  if (state.stack == NULL) return false;
+  for (int i = 0; i < n; i++)
+    if (refs[i]->index == UPB_INDEX_UNDEFINED)
+      upb_refcount_dofindscc(refs[i], &state);
+  free(state.stack);
+  return true;
+}
+
+
+/* upb_refcount  **************************************************************/
+
+bool upb_refcount_init(upb_refcount *r, void *owner) {
+  r->count = malloc(sizeof(uint32_t));
+  if (!r->count) return false;
+  // Initializing this here means upb_refcount_findscc() can only run once for
+  // each refcount; may need to revise this to be more flexible.
+  r->index = UPB_INDEX_UNDEFINED;
+  r->next = r;
+#ifdef UPB_DEBUG_REFS
+  // We don't detect malloc() failures for UPB_DEBUG_REFS.
+  upb_inttable_init(&r->refs);
+  *r->count = 0;
+  upb_refcount_ref(r, owner);
+#else
+  *r->count = 1;
+#endif
+  return true;
+}
+
+void upb_refcount_uninit(upb_refcount *r) {
+  (void)r;
+#ifdef UPB_DEBUG_REFS
+  assert(upb_inttable_count(&r->refs) == 0);
+  upb_inttable_uninit(&r->refs);
+#endif
+}
+
+// Moves an existing ref from ref_donor to new_owner, without changing the
+// overall ref count.
+void upb_refcount_donateref(upb_refcount *r, void *from, void *to) {
+  (void)r; (void)from; (void)to;
+  assert(from != to);
+#ifdef UPB_DEBUG_REFS
+  upb_refcount_ref(r, to);
+  upb_refcount_unref(r, from);
+#endif
+}
+
+// Thread-safe operations //////////////////////////////////////////////////////
+
+// Ref and unref are thread-safe.
+void upb_refcount_ref(upb_refcount *r, void *owner) {
+  (void)owner;
+  upb_atomic_inc(r->count);
+#ifdef UPB_DEBUG_REFS
+  UPB_LOCK;
+  // Caller must not already own a ref.
+  assert(upb_inttable_lookup(&r->refs, (uintptr_t)owner) == NULL);
+
+  // If a ref is leaked we want to blame the leak on the whoever leaked the
+  // ref, not on who originally allocated the refcounted object.  We accomplish
+  // this as follows.  When a ref is taken in DEBUG_REFS mode, we malloc() some
+  // memory and arrange setup pointers like so:
+  //
+  //   upb_refcount
+  //   +----------+  +---------+
+  //   | count    |<-+         |
+  //   +----------+       +----------+
+  //   | table    |---X-->| malloc'd |
+  //   +----------+       | memory   |
+  //                      +----------+
+  //
+  // Since the "malloc'd memory" is allocated inside of "ref" and free'd in
+  // unref, it will cause a leak if not unref'd.  And since the leaked memory
+  // points to the object itself, the object will be considered "indirectly
+  // lost" by tools like Valgrind and not shown unless requested (which is good
+  // because the object's creator may not be responsible for the leak).  But we
+  // have to hide the pointer marked "X" above from Valgrind, otherwise the
+  // malloc'd memory will appear to be indirectly leaked and the object itself
+  // will still be considered the primary leak.  We hide this pointer from
+  // Valgrind (et all) by doing a bitwise not on it.
+  upb_refcount **target = malloc(sizeof(void*));
+  uintptr_t obfuscated = ~(uintptr_t)target;
+  *target = r;
+  upb_inttable_insert(&r->refs, (uintptr_t)owner, upb_value_uint64(obfuscated));
+  UPB_UNLOCK;
+#endif
+}
+
+bool upb_refcount_unref(upb_refcount *r, void *owner) {
+  (void)owner;
+  bool ret = upb_atomic_dec(r->count);
+#ifdef UPB_DEBUG_REFS
+  UPB_LOCK;
+  upb_value v;
+  bool success = upb_inttable_remove(&r->refs, (uintptr_t)owner, &v);
+  assert(success);
+  if (success) {
+    // Must un-obfuscate the pointer (see above).
+    free((void*)(~upb_value_getuint64(v)));
+  }
+  UPB_UNLOCK;
+#endif
+  if (ret) free(r->count);
+  return ret;
+}
+
+bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2) {
+  return r->count == r2->count;
+}
diff --git a/upb/refcount.h b/upb/refcount.h
new file mode 100644
index 0000000..cb2bda9
--- /dev/null
+++ b/upb/refcount.h
@@ -0,0 +1,70 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * A thread-safe refcount that can optionally track references for debugging
+ * purposes.  It helps avoid circular references by allowing a
+ * strongly-connected component in the graph to share a refcount.
+ *
+ * This interface is internal to upb.
+ */
+
+#ifndef UPB_REFCOUNT_H_
+#define UPB_REFCOUNT_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "upb/table.h"
+
+#ifndef NDEBUG
+#define UPB_DEBUG_REFS
+#endif
+
+typedef struct _upb_refcount {
+  uint32_t *count;
+  struct _upb_refcount *next;  // Circularly-linked list of this SCC.
+  uint16_t index;    // For SCC algorithm.
+  uint16_t lowlink;  // For SCC algorithm.
+#ifdef UPB_DEBUG_REFS
+  upb_inttable refs;
+#endif
+} upb_refcount;
+
+// NON THREAD SAFE operations //////////////////////////////////////////////////
+
+// Initializes the refcount with a single ref for the given owner.  Returns
+// NULL if memory could not be allocated.
+bool upb_refcount_init(upb_refcount *r, void *owner);
+
+// Uninitializes the refcount.  May only be called after unref() returns true.
+void upb_refcount_uninit(upb_refcount *r);
+
+// Moves an existing ref from ref_donor to new_owner, without changing the
+// overall ref count.
+void upb_refcount_donateref(upb_refcount *r, void *from, void *to);
+
+// Finds strongly-connected components among some set of objects and merges all
+// refcounts that share a SCC.  The given function will be called when the
+// algorithm needs to visit children of a particular object; the function
+// should call upb_refcount_visit() once for each child obj.
+//
+// Returns false if memory allocation failed.
+typedef void upb_getsuccessors(upb_refcount *obj, void*);
+bool upb_refcount_findscc(upb_refcount **objs, int n, upb_getsuccessors *func);
+void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *closure);
+
+// Thread-safe operations //////////////////////////////////////////////////////
+
+// Increases the ref count, the new ref is owned by "owner" which must not
+// already own a ref.  Circular reference chains are not allowed.
+void upb_refcount_ref(upb_refcount *r, void *owner);
+
+// Release a ref owned by owner, returns true if that was the last ref.
+bool upb_refcount_unref(upb_refcount *r, void *owner);
+
+// Returns true if these two objects share a refcount.
+bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2);
+
+#endif  // UPB_REFCOUNT_H_
diff --git a/upb/table.c b/upb/table.c
index 31c91b1..4e3544e 100644
--- a/upb/table.c
+++ b/upb/table.c
@@ -4,8 +4,10 @@
  * Copyright (c) 2009 Google Inc.  See LICENSE for details.
  * Author: Josh Haberman <jhaberman@gmail.com>
  *
- * There are a few printf's strewn throughout this file, uncommenting them
- * can be useful for debugging.
+ * Implementation is heavily inspired by Lua's ltable.c.
+ *
+ * TODO: for table iteration we use (array - 1) in several places; is this
+ * undefined behavior?  If so find a better solution.
  */
 
 #include "upb/table.h"
@@ -14,6 +16,8 @@
 #include <stdlib.h>
 #include <string.h>
 
+#define UPB_MAXARRSIZE 16  // 64k.
+
 static const double MAX_LOAD = 0.85;
 
 // The minimum percentage of an array part that we will allow.  This is a
@@ -21,385 +25,319 @@ static const double MAX_LOAD = 0.85;
 // cache effects).  The lower this is, the more memory we'll use.
 static const double MIN_DENSITY = 0.1;
 
+int upb_log2(uint64_t v) {
+#ifdef __GNUC__
+  int ret = 31 - __builtin_clz(v);
+#else
+  int ret = 0;
+  while (v >>= 1) ret++;
+#endif
+  return UPB_MIN(UPB_MAXARRSIZE, ret);
+}
+
+static upb_tabkey upb_strkey(const char *str) {
+  upb_tabkey k;
+  k.str = (char*)str;
+  return k;
+}
+
 static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
+typedef upb_tabent *upb_hashfunc_t(const upb_table *t, upb_tabkey key);
+typedef bool upb_eqlfunc_t(upb_tabkey k1, upb_tabkey k2);
 
 /* Base table (shared code) ***************************************************/
 
-static uint32_t upb_table_size(const upb_table *t) { return 1 << t->size_lg2; }
-static size_t upb_table_entrysize(const upb_table *t) { return t->entry_size; }
-static size_t upb_table_valuesize(const upb_table *t) { return t->value_size; }
+static size_t upb_table_size(const upb_table *t) { return 1 << t->size_lg2; }
+
+static bool upb_table_isfull(upb_table *t) {
+  return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD;
+}
 
-void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) {
+static bool upb_table_init(upb_table *t, uint8_t size_lg2) {
   t->count = 0;
-  t->entry_size = entry_size;
-  t->size_lg2 = 1;
-  while(upb_table_size(t) < size) t->size_lg2++;
-  size_t bytes = upb_table_size(t) * t->entry_size;
+  t->size_lg2 = size_lg2;
+  size_t bytes = upb_table_size(t) * sizeof(upb_tabent);
   t->mask = upb_table_size(t) - 1;
   t->entries = malloc(bytes);
+  if (!t->entries) return false;
+  memset(t->entries, 0, bytes);
+  return true;
 }
 
-void upb_table_free(upb_table *t) { free(t->entries); }
+static void upb_table_uninit(upb_table *t) { free(t->entries); }
 
-/* upb_inttable ***************************************************************/
+static bool upb_tabent_isempty(const upb_tabent *e) { return e->key.num == 0; }
 
-static upb_inttable_entry *intent(const upb_inttable *t, int32_t i) {
-  //printf("looking up int entry %d, size of entry: %d\n", i, t->t.entry_size);
-  return UPB_INDEX(t->t.entries, i, t->t.entry_size);
+static upb_tabent *upb_table_emptyent(const upb_table *t) {
+  upb_tabent *e = t->entries + upb_table_size(t);
+  while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
 }
 
-static uint32_t upb_inttable_hashtablesize(const upb_inttable *t) {
-  return upb_table_size(&t->t);
+static upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key,
+                                   upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
+  upb_tabent *e = hash(t, key);
+  if (upb_tabent_isempty(e)) return NULL;
+  while (1) {
+    if (eql(e->key, key)) return &e->val;
+    if ((e = e->next) == NULL) return NULL;
+  }
 }
 
-void upb_inttable_sizedinit(upb_inttable *t, uint32_t arrsize, uint32_t hashsize,
-                            uint16_t value_size) {
-  size_t entsize = _upb_inttable_entrysize(value_size);
-  upb_table_init(&t->t, hashsize, entsize);
-  for (uint32_t i = 0; i < upb_table_size(&t->t); i++) {
-    upb_inttable_entry *e = intent(t, i);
-    e->hdr.key = 0;
-    e->hdr.next = UPB_END_OF_CHAIN;
-    e->val.has_entry = 0;
+// The given key must not already exist in the table.
+static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val,
+                             upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
+  assert(upb_table_lookup(t, key, hash, eql) == NULL);
+  t->count++;
+  upb_tabent *mainpos_e = hash(t, key);
+  upb_tabent *our_e = mainpos_e;
+  if (!upb_tabent_isempty(mainpos_e)) {  // Collision.
+    upb_tabent *new_e = upb_table_emptyent(t);
+    upb_tabent *chain = hash(t, mainpos_e->key);  // Head of collider's chain.
+    if (chain == mainpos_e) {
+      // Existing ent is in its main posisiton (it has the same hash as us, and
+      // is the head of our chain).  Insert to new ent and append to this chain.
+      new_e->next = mainpos_e->next;
+      mainpos_e->next = new_e;
+      our_e = new_e;
+    } else {
+      // Existing ent is not in its main position (it is a node in some other
+      // chain).  This implies that no existing ent in the table has our hash.
+      // Evict it (updating its chain) and use its ent for head of our chain.
+      *new_e = *mainpos_e;  // copies next.
+      while (chain->next != mainpos_e) chain = chain->next;
+      chain->next = new_e;
+      our_e = mainpos_e;
+      our_e->next = NULL;
+    }
   }
-  t->t.value_size = value_size;
-  // Always make the array part at least 1 long, so that we know key 0
-  // won't be in the hash part (which lets us speed up that code path).
-  t->array_size = UPB_MAX(1, arrsize);
-  t->array = malloc(upb_table_valuesize(&t->t) * t->array_size);
-  t->array_count = 0;
-  for (uint32_t i = 0; i < t->array_size; i++) {
-    upb_inttable_value *val = UPB_INDEX(t->array, i, upb_table_valuesize(&t->t));
-    val->has_entry = false;
+  our_e->key = key;
+  our_e->val = val;
+  assert(upb_table_lookup(t, key, hash, eql) == &our_e->val);
+}
+
+static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val,
+                             upb_hashfunc_t *hash, upb_eqlfunc_t *eql) {
+  upb_tabent *chain = hash(t, key);
+  if (eql(chain->key, key)) {
+    t->count--;
+    if (val) *val = chain->val;
+    if (chain->next) {
+      upb_tabent *move = chain->next;
+      *chain = *move;
+      move->key.num = 0;  // Make the slot empty.
+    } else {
+      chain->key.num = 0;  // Make the slot empty.
+    }
+    return true;
+  } else {
+    while (chain->next && !eql(chain->next->key, key))
+      chain = chain->next;
+    if (chain->next) {
+      // Found element to remove.
+      if (val) *val = chain->next->val;
+      chain->next->key.num = 0;
+      chain->next = chain->next->next;
+      t->count--;
+      return true;
+    } else {
+      return false;
+    }
   }
 }
 
-void upb_inttable_init(upb_inttable *t, uint32_t hashsize, uint16_t value_size) {
-  upb_inttable_sizedinit(t, 0, hashsize, value_size);
+static upb_tabent *upb_table_next(const upb_table *t, upb_tabent *e) {
+  upb_tabent *end = t->entries + upb_table_size(t);
+  do { if (++e == end) return NULL; } while(e->key.num == 0);
+  return e;
 }
 
-void upb_inttable_free(upb_inttable *t) {
-  upb_table_free(&t->t);
-  free(t->array);
+static upb_tabent *upb_table_begin(const upb_table *t) {
+  return upb_table_next(t, t->entries - 1);
 }
 
-static uint32_t empty_intbucket(upb_inttable *table)
-{
-  // TODO: does it matter that this is biased towards the front of the table?
-  for(uint32_t i = 0; i < upb_inttable_hashtablesize(table); i++) {
-    upb_inttable_entry *e = intent(table, i);
-    if(!e->val.has_entry) return i;
-  }
-  assert(false);
-  return 0;
+
+/* upb_strtable ***************************************************************/
+
+// A simple "subclass" of upb_table that only adds a hash function for strings.
+
+static upb_tabent *upb_strhash(const upb_table *t, upb_tabkey key) {
+  // Could avoid the strlen() by using a hash function that terminates on NULL.
+  return t->entries + (MurmurHash2(key.str, strlen(key.str), 0) & t->mask);
 }
 
-// The insert routines have a lot more code duplication between int/string
-// variants than I would like, but there's just a bit too much that varies to
-// parameterize them.
-static void intinsert(upb_inttable *t, uint32_t key, const void *val) {
-  assert(upb_inttable_lookup(t, key) == NULL);
-  upb_inttable_value *table_val;
-  if (_upb_inttable_isarrkey(t, key)) {
-    table_val = UPB_INDEX(t->array, key, upb_table_valuesize(&t->t));
-    t->array_count++;
-    //printf("Inserting key %d to Array part! %p\n", key, table_val);
-  } else {
-    t->t.count++;
-    uint32_t bucket = _upb_inttable_bucket(t, key);
-    upb_inttable_entry *table_e = intent(t, bucket);
-    //printf("Hash part!  Inserting into bucket %d?\n", bucket);
-    if(table_e->val.has_entry) {  /* Collision. */
-      //printf("Collision!\n");
-      if(bucket == _upb_inttable_bucket(t, table_e->hdr.key)) {
-        /* Existing element is in its main posisiton.  Find an empty slot to
-         * place our new element and append it to this key's chain. */
-        uint32_t empty_bucket = empty_intbucket(t);
-        while (table_e->hdr.next != UPB_END_OF_CHAIN)
-          table_e = intent(t, table_e->hdr.next);
-        table_e->hdr.next = empty_bucket;
-        table_e = intent(t, empty_bucket);
-      } else {
-        /* Existing element is not in its main position.  Move it to an empty
-         * slot and put our element in its main position. */
-        uint32_t empty_bucket = empty_intbucket(t);
-        uint32_t evictee_bucket = _upb_inttable_bucket(t, table_e->hdr.key);
-        memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
-        upb_inttable_entry *evictee_e = intent(t, evictee_bucket);
-        while(1) {
-          assert(evictee_e->val.has_entry);
-          assert(evictee_e->hdr.next != UPB_END_OF_CHAIN);
-          if(evictee_e->hdr.next == bucket) {
-            evictee_e->hdr.next = empty_bucket;
-            break;
-          }
-          evictee_e = intent(t, evictee_e->hdr.next);
-        }
-        /* table_e remains set to our mainpos. */
-      }
-    }
-    //printf("Inserting!  to:%p, copying to: %p\n", table_e, &table_e->val);
-    table_val = &table_e->val;
-    table_e->hdr.key = key;
-    table_e->hdr.next = UPB_END_OF_CHAIN;
-  }
-  memcpy(table_val, val, upb_table_valuesize(&t->t));
-  table_val->has_entry = true;
-  assert(upb_inttable_lookup(t, key) == table_val);
+static bool upb_streql(upb_tabkey k1, upb_tabkey k2) {
+  return strcmp(k1.str, k2.str) == 0;
 }
 
-// Insert all elements from src into dest.  Caller ensures that a resize will
-// not be necessary.
-static void upb_inttable_insertall(upb_inttable *dst, upb_inttable *src) {
-  for(upb_inttable_iter i = upb_inttable_begin(src); !upb_inttable_done(i);
-      i = upb_inttable_next(src, i)) {
-    //printf("load check: %d %d\n", upb_table_count(&dst->t), upb_inttable_hashtablesize(dst));
-    assert((double)(upb_table_count(&dst->t)) /
-                    upb_inttable_hashtablesize(dst) <= MAX_LOAD);
-    intinsert(dst, upb_inttable_iter_key(i), upb_inttable_iter_value(i));
-  }
+bool upb_strtable_init(upb_strtable *t) { return upb_table_init(&t->t, 4); }
+
+void upb_strtable_uninit(upb_strtable *t) {
+  for (size_t i = 0; i < upb_table_size(&t->t); i++)
+    free(t->t.entries[i].key.str);
+  upb_table_uninit(&t->t);
 }
 
-void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val) {
-  if((double)(t->t.count + 1) / upb_inttable_hashtablesize(t) > MAX_LOAD) {
-    //printf("RESIZE!\n");
-    // Need to resize.  Allocate new table with double the size of however many
-    // elements we have now, add old elements to it.  We create the new hash
-    // table without an array part, even if the old table had an array part.
-    // If/when the user calls upb_inttable_compact() again, we'll create an
-    // array part then.
-    upb_inttable new_table;
-    //printf("Old table count=%d, size=%d\n", upb_inttable_count(t), upb_inttable_hashtablesize(t));
-    upb_inttable_init(&new_table, upb_inttable_count(t)*2, upb_table_valuesize(&t->t));
-    upb_inttable_insertall(&new_table, t);
-    upb_inttable_free(t);
+bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) {
+  if (upb_table_isfull(&t->t)) {
+    // Need to resize.  New table of double the size, add old elements to it.
+    upb_strtable new_table;
+    if (!upb_table_init(&new_table.t, t->t.size_lg2 + 1)) return false;
+    upb_strtable_iter i;
+    upb_strtable_begin(&i, t);
+    for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+      upb_strtable_insert(
+          &new_table, upb_strtable_iter_key(&i), upb_strtable_iter_value(&i));
+    }
+    upb_strtable_uninit(t);
     *t = new_table;
   }
-  intinsert(t, key, val);
+  if ((k = strdup(k)) == NULL) return false;
+  upb_table_insert(&t->t, upb_strkey(k), v, &upb_strhash, &upb_streql);
+  return true;
 }
 
-void upb_inttable_compact(upb_inttable *t) {
-  // Find the largest array part we can that satisfies the MIN_DENSITY
-  // definition.  For now we just count down powers of two.
-  uint32_t largest_key = 0;
-  for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
-      i = upb_inttable_next(t, i)) {
-    largest_key = UPB_MAX(largest_key, upb_inttable_iter_key(i));
-  }
-  int lg2_array = 0;
-  while ((1UL << lg2_array) < largest_key) ++lg2_array;
-  ++lg2_array;  // Undo the first iteration.
-  size_t array_size = 0;
-  int array_count = 0;
-  while (lg2_array > 0) {
-    array_size = (1 << --lg2_array);
-    //printf("Considering size %d (btw, our table has %d things total)\n", array_size, upb_inttable_count(t));
-    if ((double)upb_inttable_count(t) / array_size < MIN_DENSITY) {
-      // Even if 100% of the keys were in the array pary, an array of this
-      // size would not be dense enough.
-      continue;
-    }
-    array_count = 0;
-    for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
-        i = upb_inttable_next(t, i)) {
-      if (upb_inttable_iter_key(i) < array_size)
-        array_count++;
-    }
-    //printf("There would be %d things in that array\n", array_count);
-    if ((double)array_count / array_size >= MIN_DENSITY) break;
-  }
-  upb_inttable new_table;
-  int hash_size = (upb_inttable_count(t) - array_count + 1) / MAX_LOAD;
-  //printf("array_count: %d, array_size: %d, hash_size: %d, table size: %d\n", array_count, array_size, hash_size, upb_inttable_count(t));
-  upb_inttable_sizedinit(&new_table, array_size, hash_size,
-                         upb_table_valuesize(&t->t));
-  //printf("For %d things, using array size=%d, hash_size = %d\n", upb_inttable_count(t), array_size, hash_size);
-  upb_inttable_insertall(&new_table, t);
-  upb_inttable_free(t);
-  *t = new_table;
+upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key) {
+  return upb_table_lookup(&t->t, upb_strkey(key), &upb_strhash, &upb_streql);
 }
 
-upb_inttable_iter upb_inttable_begin(const upb_inttable *t) {
-  upb_inttable_iter iter = {-1, NULL, true};  // -1 will overflow to 0 on the first iteration.
-  return upb_inttable_next(t, iter);
+void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
+  i->t = t;
+  i->e = upb_table_begin(&t->t);
 }
 
-upb_inttable_iter upb_inttable_next(const upb_inttable *t,
-                                    upb_inttable_iter iter) {
-  const size_t hdrsize = sizeof(upb_inttable_header);
-  const size_t entsize = upb_table_entrysize(&t->t);
-  if (iter.array_part) {
-    while (++iter.key < t->array_size) {
-      //printf("considering value %d\n", iter.key);
-      iter.value = UPB_INDEX(t->array, iter.key, t->t.value_size);
-      if (iter.value->has_entry) return iter;
-    }
-    //printf("Done with array part!\n");
-    iter.array_part = false;
-    // Point to the value of the table[-1] entry.
-    iter.value = UPB_INDEX(intent(t, -1), 1, hdrsize);
-  }
-  void *end = intent(t, upb_inttable_hashtablesize(t));
-  // Point to the entry for the value that was previously in iter.
-  upb_inttable_entry *e = UPB_INDEX(iter.value, -1, hdrsize);
-  do {
-    e = UPB_INDEX(e, 1, entsize);
-    //printf("considering value %p (val: %p)\n", e, &e->val);
-    if(e == end) {
-      //printf("No values.\n");
-      iter.value = NULL;
-      return iter;
-    }
-  } while(!e->val.has_entry);
-  //printf("USING VALUE! %p\n", e);
-  iter.key = e->hdr.key;
-  iter.value = &e->val;
-  return iter;
+void upb_strtable_next(upb_strtable_iter *i) {
+  i->e = upb_table_next(&i->t->t, i->e);
 }
 
 
-/* upb_strtable ***************************************************************/
+/* upb_inttable ***************************************************************/
 
-static upb_strtable_entry *strent(const upb_strtable *t, int32_t i) {
-  //fprintf(stderr, "i: %d, table_size: %d\n", i, upb_table_size(&t->t));
-  assert(i <= (int32_t)upb_table_size(&t->t));
-  return UPB_INDEX(t->t.entries, i, t->t.entry_size);
-}
+// For inttables we use a hybrid structure where small keys are kept in an
+// array and large keys are put in the hash table.
 
-static uint32_t upb_strtable_size(const upb_strtable *t) {
-  return upb_table_size(&t->t);
+static bool upb_inteql(upb_tabkey k1, upb_tabkey k2) {
+  return k1.num == k2.num;
 }
 
-void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t valuesize) {
-  t->t.value_size = valuesize;
-  size_t entsize = upb_align_up(sizeof(upb_strtable_header) + valuesize, 8);
-  upb_table_init(&t->t, size, entsize);
-  for (uint32_t i = 0; i < upb_table_size(&t->t); i++) {
-    upb_strtable_entry *e = strent(t, i);
-    e->hdr.key = NULL;
-    e->hdr.next = UPB_END_OF_CHAIN;
-  }
+size_t upb_inttable_count(const upb_inttable *t) {
+  return t->t.count + t->array_count;
 }
 
-void upb_strtable_free(upb_strtable *t) {
-  // Free keys from the strtable.
-  upb_strtable_iter i;
-  for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i))
-    free((char*)upb_strtable_iter_key(&i));
-  upb_table_free(&t->t);
+bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2) {
+  if (!upb_table_init(&t->t, hsize_lg2)) return false;
+  // Always make the array part at least 1 long, so that we know key 0
+  // won't be in the hash part, which simplifies things.
+  t->array_size = UPB_MAX(1, asize);
+  t->array_count = 0;
+  size_t array_bytes = t->array_size * sizeof(upb_value);
+  t->array = malloc(array_bytes);
+  if (!t->array) {
+    upb_table_uninit(&t->t);
+    return false;
+  }
+  memset(t->array, 0xff, array_bytes);
+  return true;
 }
 
-static uint32_t strtable_bucket(const upb_strtable *t, const char *key) {
-  uint32_t hash = MurmurHash2(key, strlen(key), 0);
-  return (hash & t->t.mask);
+bool upb_inttable_init(upb_inttable *t) {
+  return upb_inttable_sizedinit(t, 0, 4);
 }
 
-void *upb_strtable_lookup(const upb_strtable *t, const char *key) {
-  uint32_t bucket = strtable_bucket(t, key);
-  upb_strtable_entry *e;
-  do {
-    e = strent(t, bucket);
-    if(e->hdr.key && strcmp(e->hdr.key, key) == 0) return &e->val;
-  } while((bucket = e->hdr.next) != UPB_END_OF_CHAIN);
-  return NULL;
+void upb_inttable_uninit(upb_inttable *t) {
+  upb_table_uninit(&t->t);
+  free(t->array);
 }
 
-void *upb_strtable_lookupl(const upb_strtable *t, const char *key, size_t len) {
-  // TODO: improve.
-  char *key2 = malloc(len+1);
-  memcpy(key2, key, len);
-  key2[len] = '\0';
-  void *ret = upb_strtable_lookup(t, key2);
-  free(key2);
-  return ret;
+bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
+  assert(upb_arrhas(val));
+  if (key < t->array_size) {
+    assert(!upb_arrhas(t->array[key]));
+    t->array_count++;
+    t->array[key] = val;
+  } else {
+    if (upb_table_isfull(&t->t)) {
+      // Need to resize the hash part, but we re-use the array part.
+      upb_table new_table;
+      if (!upb_table_init(&new_table, t->t.size_lg2 + 1)) return false;
+      upb_tabent *e;
+      for (e = upb_table_begin(&t->t); e; e = upb_table_next(&t->t, e))
+        upb_table_insert(&new_table, e->key, e->val, &upb_inthash, &upb_inteql);
+      upb_table_uninit(&t->t);
+      t->t = new_table;
+    }
+    upb_table_insert(&t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql);
+  }
+  return true;
 }
 
-static uint32_t empty_strbucket(upb_strtable *table) {
-  // TODO: does it matter that this is biased towards the front of the table?
-  for(uint32_t i = 0; i < upb_strtable_size(table); i++) {
-    upb_strtable_entry *e = strent(table, i);
-    if(!e->hdr.key) return i;
+upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key) {
+  if (key < t->array_size) {
+    upb_value *v = &t->array[key];
+    return upb_arrhas(*v) ? v : NULL;
   }
-  assert(false);
-  return 0;
+  return upb_table_lookup(&t->t, upb_intkey(key), &upb_inthash, &upb_inteql);
 }
 
-static void strinsert(upb_strtable *t, const char *key, const void *val) {
-  assert(upb_strtable_lookup(t, key) == NULL);
-  t->t.count++;
-  uint32_t bucket = strtable_bucket(t, key);
-  upb_strtable_entry *table_e = strent(t, bucket);
-  if(table_e->hdr.key) {  /* Collision. */
-    if(bucket == strtable_bucket(t, table_e->hdr.key)) {
-      /* Existing element is in its main posisiton.  Find an empty slot to
-       * place our new element and append it to this key's chain. */
-      uint32_t empty_bucket = empty_strbucket(t);
-      while (table_e->hdr.next != UPB_END_OF_CHAIN)
-        table_e = strent(t, table_e->hdr.next);
-      table_e->hdr.next = empty_bucket;
-      table_e = strent(t, empty_bucket);
+bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
+  if (key < t->array_size) {
+    if (upb_arrhas(t->array[key])) {
+      t->array_count--;
+      if (val) *val = t->array[key];
+      t->array[key] = upb_value_uint64(-1);
+      return true;
     } else {
-      /* Existing element is not in its main position.  Move it to an empty
-       * slot and put our element in its main position. */
-      uint32_t empty_bucket = empty_strbucket(t);
-      uint32_t evictee_bucket = strtable_bucket(t, table_e->hdr.key);
-      memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
-      upb_strtable_entry *evictee_e = strent(t, evictee_bucket);
-      while(1) {
-        assert(evictee_e->hdr.key);
-        assert(evictee_e->hdr.next != UPB_END_OF_CHAIN);
-        if(evictee_e->hdr.next == bucket) {
-          evictee_e->hdr.next = empty_bucket;
-          break;
-        }
-        evictee_e = strent(t, evictee_e->hdr.next);
-      }
-      /* table_e remains set to our mainpos. */
+      return false;
     }
+  } else {
+    return upb_table_remove(
+        &t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql);
   }
-  //fprintf(stderr, "val: %p\n", val);
-  //fprintf(stderr, "val size: %d\n", t->t.value_size);
-  memcpy(&table_e->val, val, t->t.value_size);
-  table_e->hdr.key = strdup(key);
-  table_e->hdr.next = UPB_END_OF_CHAIN;
-  //fprintf(stderr, "Looking up, string=%s...\n", key);
-  assert(upb_strtable_lookup(t, key) == &table_e->val);
-  //printf("Yay!\n");
 }
 
-void upb_strtable_insert(upb_strtable *t, const char *key, const void *val) {
-  if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) {
-    // Need to resize.  New table of double the size, add old elements to it.
-    //printf("RESIZE!!\n");
-    upb_strtable new_table;
-    upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.value_size);
-    upb_strtable_iter i;
-    upb_strtable_begin(&i, t);
-    for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
-      strinsert(&new_table,
-                upb_strtable_iter_key(&i),
-                upb_strtable_iter_value(&i));
-    }
-    upb_strtable_free(t);
-    *t = new_table;
+void upb_inttable_compact(upb_inttable *t) {
+  // Find the largest power of two that satisfies the MIN_DENSITY definition.
+  int counts[UPB_MAXARRSIZE + 1] = {0};
+  upb_inttable_iter i;
+  for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i))
+    counts[upb_log2(upb_inttable_iter_key(&i))]++;
+  int count = upb_inttable_count(t);
+  int size;
+  for (size = UPB_MAXARRSIZE; size > 1; size--) {
+    count -= counts[size];
+    if (count >= (1 << size) * MIN_DENSITY) break;
   }
-  strinsert(t, key, val);
+
+  // Insert all elements into new, perfectly-sized table.
+  upb_inttable new_table;
+  int hashsize = (upb_inttable_count(t) - count + 1) / MAX_LOAD;
+  upb_inttable_sizedinit(&new_table, size, upb_log2(hashsize) + 1);
+  for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i))
+    upb_inttable_insert(
+        &new_table, upb_inttable_iter_key(&i), upb_inttable_iter_value(&i));
+  upb_inttable_uninit(t);
+  *t = new_table;
 }
 
-void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
-  i->e = strent(t, -1);
+void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
   i->t = t;
-  upb_strtable_next(i);
+  i->arrkey = -1;
+  i->array_part = true;
+  upb_inttable_next(i);
 }
 
-void upb_strtable_next(upb_strtable_iter *i) {
-  upb_strtable_entry *end = strent(i->t, upb_strtable_size(i->t));
-  upb_strtable_entry *cur = i->e;
-  do {
-    cur = (void*)((char*)cur + i->t->t.entry_size);
-    if(cur == end) { i->e = NULL; return; }
-  } while(cur->hdr.key == NULL);
-  i->e = cur;
+void upb_inttable_next(upb_inttable_iter *iter) {
+  const upb_inttable *t = iter->t;
+  if (iter->array_part) {
+    for (size_t i = iter->arrkey; ++i < t->array_size; )
+      if (upb_arrhas(t->array[i])) {
+        iter->ptr.val = &t->array[i];
+        iter->arrkey = i;
+        return;
+      }
+    iter->array_part = false;
+    iter->ptr.ent = t->t.entries - 1;
+  }
+  iter->ptr.ent = upb_table_next(&t->t, iter->ptr.ent);
 }
 
 #ifdef UPB_UNALIGNED_READS_OK
@@ -413,8 +351,7 @@ void upb_strtable_next(upb_strtable_iter *i) {
 //   1. It will not work incrementally.
 //   2. It will not produce the same results on little-endian and big-endian
 //      machines.
-static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed)
-{
+static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
   // 'm' and 'r' are mixing constants generated offline.
   // They're not really 'magic', they just happen to work well.
   const uint32_t m = 0x5bd1e995;
@@ -465,8 +402,7 @@ static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed)
 
 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
 
-static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed)
-{
+static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
   const uint32_t m = 0x5bd1e995;
   const int32_t r = 24;
   const uint8_t * data = (const uint8_t *)key;
diff --git a/upb/table.h b/upb/table.h
index 0c0a785..f6bff66 100644
--- a/upb/table.h
+++ b/upb/table.h
@@ -4,13 +4,16 @@
  * Copyright (c) 2009 Google Inc.  See LICENSE for details.
  * Author: Josh Haberman <jhaberman@gmail.com>
  *
- * This file defines very fast int->struct (inttable) and string->struct
- * (strtable) hash tables.  The struct can be of any size, and it is stored
- * in the table itself, for cache-friendly performance.
+ * This file defines very fast int->upb_value (inttable) and string->upb_value
+ * (strtable) hash tables.
  *
- * The table uses internal chaining with Brent's variation (inspired by the
- * Lua implementation of hash tables).  The hash function for strings is
- * Austin Appleby's "MurmurHash."
+ * The table uses chained scatter with Brent's variation (inspired by the Lua
+ * implementation of hash tables).  The hash function for strings is Austin
+ * Appleby's "MurmurHash."
+ *
+ * The inttable uses uintptr_t as its key, which guarantees it can be used to
+ * store pointers or integers of at least 32 bits (upb isn't really useful on
+ * systems where sizeof(void*) < 4).
  *
  * This header is internal to upb; its interface should not be considered
  * public or stable.
@@ -19,52 +22,30 @@
 #ifndef UPB_TABLE_H_
 #define UPB_TABLE_H_
 
-#include <assert.h>
 #include <stddef.h>
+#include <stdint.h>
 #include "upb.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#define UPB_END_OF_CHAIN (uint32_t)-1
-
-typedef struct {
-  bool has_entry:1;
-  // The rest of the bits are the user's.
-} upb_inttable_value;
-
-typedef struct {
-  uint32_t key;
-  uint32_t next;  // Internal chaining.
-} upb_inttable_header;
-
-typedef struct {
-  upb_inttable_header hdr;
-  upb_inttable_value val;
-} upb_inttable_entry;
-
-// TODO: consider storing the hash in the entry.  This would avoid the need to
-// rehash on table resizes, but more importantly could possibly improve lookup
-// performance by letting us compare hashes before comparing lengths or the
-// strings themselves.
-typedef struct {
-  char *key;         // We own, nullz. TODO: store explicit len?
-  uint32_t next;     // Internal chaining.
-} upb_strtable_header;
+typedef union {
+  uintptr_t num;
+  char *str;  // We own, nullz.
+} upb_tabkey;
 
-typedef struct {
-  upb_strtable_header hdr;
-  uint32_t val;      // Val is at least 32 bits.
-} upb_strtable_entry;
+typedef struct _upb_tabent {
+  upb_tabkey key;
+  upb_value val;
+  struct _upb_tabent *next;  // Internal chaining.
+} upb_tabent;
 
 typedef struct {
-  void *entries;        // Hash table.
-  uint32_t count;       // Number of entries in the hash part.
-  uint32_t mask;        // Mask to turn hash value -> bucket.
-  uint16_t entry_size;  // Size of each entry.
-  uint16_t value_size;  // Size of each value.
-  uint8_t size_lg2;     // Size of the hash table part is 2^size_lg2 entries.
+  upb_tabent *entries;   // Hash table.
+  size_t count;          // Number of entries in the hash part.
+  size_t mask;           // Mask to turn hash value -> bucket.
+  uint8_t size_lg2;      // Size of the hash table part is 2^size_lg2 entries.
 } upb_table;
 
 typedef struct {
@@ -72,149 +53,124 @@ typedef struct {
 } upb_strtable;
 
 typedef struct {
-  upb_table t;
-  void *array;           // Array part of the table.
-  uint32_t array_size;   // Array part size.
-  uint32_t array_count;  // Array part number of elements.
+  upb_table t;           // For entries that don't fit in the array part.
+  upb_value *array;      // Array part of the table.
+  size_t array_size;     // Array part size.
+  size_t array_count;    // Array part number of elements.
 } upb_inttable;
 
-// Initialize and free a table, respectively.  Specify the initial size
-// with 'size' (the size will be increased as necessary).  Value size
-// specifies how many bytes each value in the table is.
-//
-// WARNING!  The lowest bit of every entry is reserved by the hash table.
-// It will always be overwritten when you insert, and must not be modified
-// when looked up!
-void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t value_size);
-void upb_inttable_free(upb_inttable *table);
-void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t value_size);
-void upb_strtable_free(upb_strtable *table);
-
-// Number of values in the hash table.
-INLINE uint32_t upb_table_count(const upb_table *t) { return t->count; }
-INLINE uint32_t upb_inttable_count(const upb_inttable *t) {
-  return t->array_count + upb_table_count(&t->t);
-}
-INLINE uint32_t upb_strtable_count(const upb_strtable *t) {
-  return upb_table_count(&t->t);
+INLINE upb_tabkey upb_intkey(uintptr_t key) { upb_tabkey k = {key}; return k; }
+
+INLINE upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) {
+  return t->entries + ((uint32_t)key.num & t->mask);
 }
 
-// Inserts the given key into the hashtable with the given value.  The key must
-// not already exist in the hash table.  The data will be copied from val into
-// the hashtable (the amount of data copied comes from value_size when the
-// table was constructed).  Therefore the data at val may be freed once the
-// call returns.  For string tables, the table takes ownership of the string.
-//
-// WARNING: the lowest bit of val is reserved and will be overwritten!
-void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val);
-// TODO: may want to allow for more complex keys with custom hash/comparison
-// functions.
-void upb_strtable_insert(upb_strtable *t, const char *key, const void *val);
-void upb_inttable_compact(upb_inttable *t);
+INLINE bool upb_arrhas(upb_value v) { return v.val.uint64 != (uint64_t)-1; }
 
-INLINE uint32_t _upb_inttable_bucket(const upb_inttable *t, uint32_t k) {
-  uint32_t bucket = k & t->t.mask;  // Identity hash for ints.
-  assert(bucket != UPB_END_OF_CHAIN);
-  return bucket;
-}
+// Initialize and uninitialize a table, respectively.  If memory allocation
+// failed, false is returned that the table is uninitialized.
+bool upb_inttable_init(upb_inttable *table);
+bool upb_strtable_init(upb_strtable *table);
+void upb_inttable_uninit(upb_inttable *table);
+void upb_strtable_uninit(upb_strtable *table);
 
-// Returns true if this key belongs in the array part of the table.
-INLINE bool _upb_inttable_isarrkey(const upb_inttable *t, uint32_t k) {
-  return (k < t->array_size);
-}
+// Returns the number of values in the table.
+size_t upb_inttable_count(const upb_inttable *t);
+INLINE size_t upb_strtable_count(const upb_strtable *t) { return t->t.count; }
 
-// Looks up key in this table, returning a pointer to the user's inserted data.
-// We have the caller specify the entry_size because fixing this as a literal
-// (instead of reading table->entry_size) gives the compiler more ability to
-// optimize.
+// Inserts the given key into the hashtable with the given value.  The key must
+// not already exist in the hash table.  For string tables, the key must be
+// NULL-terminated, and the table will make an internal copy of the key.
+// Inttables must not insert a value of UINTPTR_MAX.
 //
-// Note: All returned pointers are invalidated by inserts!
-INLINE void *_upb_inttable_fastlookup(const upb_inttable *t, uint32_t key,
-                                      size_t entry_size, size_t value_size) {
-  upb_inttable_value *arrval =
-      (upb_inttable_value*)UPB_INDEX(t->array, key, value_size);
-  if (_upb_inttable_isarrkey(t, key)) {
-    return (arrval->has_entry) ? arrval : NULL;
+// If a table resize was required but memory allocation failed, false is
+// returned and the table is unchanged.
+bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val);
+bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val);
+
+// Looks up key in this table, returning a pointer to the table's internal copy
+// of the user's inserted data, or NULL if this key is not in the table.  The
+// user is free to modify the given upb_value, which will be reflected in any
+// future lookups of this key.  The returned pointer is invalidated by inserts.
+upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key);
+upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key);
+
+// Removes an item from the table.  Returns true if the remove was successful,
+// and stores the removed item in *val if non-NULL.
+bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
+
+// Optimizes the table for the current set of entries, for both memory use and
+// lookup time.  Client should call this after all entries have been inserted;
+// inserting more entries is legal, but will likely require a table resize.
+void upb_inttable_compact(upb_inttable *t);
+
+// A special-case inlinable version of the lookup routine for 32-bit integers.
+INLINE upb_value *upb_inttable_lookup32(const upb_inttable *t, uint32_t key) {
+  if (key < t->array_size) {
+    upb_value *v = &t->array[key];
+    return upb_arrhas(*v) ? v : NULL;
   }
-  uint32_t bucket = _upb_inttable_bucket(t, key);
-  upb_inttable_entry *e =
-      (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
-  while (1) {
-    if (e->hdr.key == key) {
-      return &e->val;
-    }
-    if ((bucket = e->hdr.next) == UPB_END_OF_CHAIN) return NULL;
-    e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
+  for (upb_tabent *e = upb_inthash(&t->t, upb_intkey(key)); true; e = e->next) {
+    if ((uint32_t)e->key.num == key) return &e->val;
+    if (e->next == NULL) return NULL;
   }
 }
 
-INLINE size_t _upb_inttable_entrysize(size_t value_size) {
-  return upb_align_up(sizeof(upb_inttable_header) + value_size, 8);
-}
-
-INLINE void *upb_inttable_fastlookup(const upb_inttable *t, uint32_t key,
-                                     uint32_t value_size) {
-  return _upb_inttable_fastlookup(
-      t, key, _upb_inttable_entrysize(value_size), value_size);
-}
-
-INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) {
-  return _upb_inttable_fastlookup(t, key, t->t.entry_size, t->t.value_size);
-}
-
-void *upb_strtable_lookupl(const upb_strtable *t, const char *key, size_t len);
-void *upb_strtable_lookup(const upb_strtable *t, const char *key);
-
 
 /* upb_strtable_iter **********************************************************/
 
 // Strtable iteration.  Order is undefined.  Insertions invalidate iterators.
 //   upb_strtable_iter i;
-//   for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) {
+//   upb_strtable_begin(&i, t);
+//   for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
 //     const char *key = upb_strtable_iter_key(&i);
 //     const myval *val = upb_strtable_iter_value(&i);
 //     // ...
 //   }
 typedef struct {
   const upb_strtable *t;
-  upb_strtable_entry *e;
+  upb_tabent *e;
 } upb_strtable_iter;
 
 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t);
 void upb_strtable_next(upb_strtable_iter *i);
 INLINE bool upb_strtable_done(upb_strtable_iter *i) { return i->e == NULL; }
 INLINE const char *upb_strtable_iter_key(upb_strtable_iter *i) {
-  return i->e->hdr.key;
+  return i->e->key.str;
 }
-INLINE const void *upb_strtable_iter_value(upb_strtable_iter *i) {
-  return &i->e->val;
+INLINE upb_value upb_strtable_iter_value(upb_strtable_iter *i) {
+  return i->e->val;
 }
 
 
 /* upb_inttable_iter **********************************************************/
 
 // Inttable iteration.  Order is undefined.  Insertions invalidate iterators.
-//   for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
-//       i = upb_inttable_next(t, i)) {
+//   upb_inttable_iter i;
+//   upb_inttable_begin(&i, t);
+//   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
 //     // ...
 //   }
 typedef struct {
-  uint32_t key;
-  upb_inttable_value *value;
+  const upb_inttable *t;
+  union {
+    upb_tabent *ent;  // For hash iteration.
+    upb_value *val;   // For array iteration.
+  } ptr;
+  uintptr_t arrkey;
   bool array_part;
 } upb_inttable_iter;
 
-upb_inttable_iter upb_inttable_begin(const upb_inttable *t);
-upb_inttable_iter upb_inttable_next(const upb_inttable *t,
-                                    upb_inttable_iter iter);
-INLINE bool upb_inttable_done(upb_inttable_iter iter) {
-  return iter.value == NULL;
+void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t);
+void upb_inttable_next(upb_inttable_iter *i);
+INLINE bool upb_inttable_done(upb_inttable_iter *i) {
+  return i->ptr.ent == NULL;
 }
-INLINE uint32_t upb_inttable_iter_key(upb_inttable_iter iter) {
-  return iter.key;
+INLINE uintptr_t upb_inttable_iter_key(upb_inttable_iter *i) {
+  return i->array_part ? i->arrkey : i->ptr.ent->key.num;
 }
-INLINE void *upb_inttable_iter_value(upb_inttable_iter iter) {
-  return iter.value;
+INLINE upb_value upb_inttable_iter_value(upb_inttable_iter *i) {
+  return i->array_part ? *i->ptr.val : i->ptr.ent->val;
 }
 
 #ifdef __cplusplus
diff --git a/upb/upb.c b/upb/upb.c
index 3af9b75..c172bd3 100644
--- a/upb/upb.c
+++ b/upb/upb.c
@@ -1,47 +1,17 @@
 /*
  * upb - a minimalist implementation of protocol buffers.
  *
- * Copyright (c) 2009 Google Inc.  See LICENSE for details.
+ * Copyright (c) 2009-2012 Google Inc.  See LICENSE for details.
  * Author: Josh Haberman <jhaberman@gmail.com>
  */
 
 #include <errno.h>
 #include <stdarg.h>
 #include <stddef.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include "upb/descriptor_const.h"
 #include "upb/upb.h"
-#include "upb/bytestream.h"
-
-#define alignof(t) offsetof(struct { char c; t x; }, x)
-#define TYPE_INFO(wire_type, ctype, inmemory_type, is_numeric) \
-    {alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), \
-     #ctype, is_numeric},
-
-const upb_type_info upb_types[] = {
-  // END_GROUP is not real, but used to signify the pseudo-field that
-  // ends a group from within the group.
-  TYPE_INFO(UPB_WIRE_TYPE_END_GROUP,   void*,     MESSAGE, false)   // ENDGROUP
-  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       double,    DOUBLE,  true)    // DOUBLE
-  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       float,     FLOAT,   true)    // FLOAT
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int64_t,   INT64,   true)    // INT64
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint64_t,  UINT64,  true)    // UINT64
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int32_t,   INT32,   true)    // INT32
-  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       uint64_t,  UINT64,  true)    // FIXED64
-  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       uint32_t,  UINT32,  true)    // FIXED32
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      bool,      BOOL,    true)    // BOOL
-  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     STRING,  false)   // STRING
-  TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*,     MESSAGE, false)   // GROUP
-  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     MESSAGE, false)   // MESSAGE
-  TYPE_INFO(UPB_WIRE_TYPE_DELIMITED,   void*,     STRING,  false)   // BYTES
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint32_t,  UINT32,  true)    // UINT32
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      uint32_t,  INT32,   true)    // ENUM
-  TYPE_INFO(UPB_WIRE_TYPE_32BIT,       int32_t,   INT32,   true)    // SFIXED32
-  TYPE_INFO(UPB_WIRE_TYPE_64BIT,       int64_t,   INT64,   true)    // SFIXED64
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int32_t,   INT32,   true)    // SINT32
-  TYPE_INFO(UPB_WIRE_TYPE_VARINT,      int64_t,   INT64,   true)    // SINT64
-};
 
 #ifdef NDEBUG
 upb_value UPB_NO_VALUE = {{0}};
@@ -142,8 +112,9 @@ bool upb_errno_is_wouldblock() {
 bool upb_posix_codetostr(int code, char *buf, size_t len) {
   if (strerror_r(code, buf, len) == -1) {
     if (errno == EINVAL) {
-      int n = snprintf(buf, len, "Invalid POSIX error number %d\n", code);
-      return n >= (int)len;
+      size_t actual_len =
+          snprintf(buf, len, "Invalid POSIX error number %d\n", code);
+      return actual_len >= len;
     } else if (errno == ERANGE) {
       return false;
     }
diff --git a/upb/upb.h b/upb/upb.h
index 01970ca..ef440fb 100644
--- a/upb/upb.h
+++ b/upb/upb.h
@@ -15,9 +15,6 @@
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
-#include <string.h>
-#include "descriptor_const.h"
-#include "atomic.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -36,20 +33,6 @@ extern "C" {
 
 #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
 #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
-#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
-
-INLINE void nop_printf(const char *fmt, ...) { (void)fmt; }
-
-#ifdef NDEBUG
-#define DEBUGPRINTF nop_printf
-#else
-#define DEBUGPRINTF printf
-#endif
-
-// Rounds val up to the next multiple of align.
-INLINE uint32_t upb_align_up(uint32_t val, uint32_t align) {
-  return val % align == 0 ? val : val + align - (val % align);
-}
 
 // The maximum that any submessages can be nested.  Matches proto2's limit.
 // At the moment this specifies the size of several statically-sized arrays
@@ -94,73 +77,46 @@ INLINE uint32_t upb_align_up(uint32_t val, uint32_t align) {
 #define UPB_MAX_TYPE_DEPTH 64
 
 
-/* Fundamental types and type constants. **************************************/
-
-// A list of types as they are encoded on-the-wire.
-enum upb_wire_type {
-  UPB_WIRE_TYPE_VARINT      = 0,
-  UPB_WIRE_TYPE_64BIT       = 1,
-  UPB_WIRE_TYPE_DELIMITED   = 2,
-  UPB_WIRE_TYPE_START_GROUP = 3,
-  UPB_WIRE_TYPE_END_GROUP   = 4,
-  UPB_WIRE_TYPE_32BIT       = 5,
-};
-
-// Type of a field as defined in a .proto file.  eg. string, int32, etc.  The
-// integers that represent this are defined by descriptor.proto.  Note that
-// descriptor.proto reserves "0" for errors, and we use it to represent
-// exceptional circumstances.
-typedef uint8_t upb_fieldtype_t;
-
-// For referencing the type constants tersely.
-#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_ ## type
-#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_ ## type
-
-// Info for a given field type.
-typedef struct {
-  uint8_t align;
-  uint8_t size;
-  uint8_t native_wire_type;
-  uint8_t inmemory_type;    // For example, INT32, SINT32, and SFIXED32 -> INT32
-  const char *ctype;
-  bool is_numeric;  // Only numeric types can be packed.
-} upb_type_info;
-
-// A static array of info about all of the field types, indexed by type number.
-extern const upb_type_info upb_types[];
-
-
 /* upb_value ******************************************************************/
 
+// Clients should not need to access these enum values; they are used internally
+// to do typechecks of upb_value accesses.
+typedef enum {
+  UPB_CTYPE_INT32 = 1,
+  UPB_CTYPE_INT64 = 2,
+  UPB_CTYPE_UINT32 = 3,
+  UPB_CTYPE_UINT64 = 4,
+  UPB_CTYPE_DOUBLE = 5,
+  UPB_CTYPE_FLOAT = 6,
+  UPB_CTYPE_BOOL = 7,
+  UPB_CTYPE_PTR = 8,
+  UPB_CTYPE_BYTEREGION = 9,
+  UPB_CTYPE_FIELDDEF = 10,
+} upb_ctype_t;
+
 struct _upb_byteregion;
 struct _upb_fielddef;
 
-// Special constants for the upb_value.type field.  These must not conflict
-// with any members of FieldDescriptorProto.Type.
-#define UPB_TYPE_ENDGROUP 0
-#define UPB_VALUETYPE_FIELDDEF 32
-#define UPB_VALUETYPE_PTR 33
-
 // A single .proto value.  The owner must have an out-of-band way of knowing
 // the type, so that it knows which union member to use.
 typedef struct {
   union {
     uint64_t uint64;
-    double _double;
-    float _float;
     int32_t int32;
     int64_t int64;
     uint32_t uint32;
+    double _double;
+    float _float;
     bool _bool;
+    void *_void;
     struct _upb_byteregion *byteregion;
     const struct _upb_fielddef *fielddef;
-    void *_void;
   } val;
 
 #ifndef NDEBUG
   // In debug mode we carry the value type around also so we can check accesses
   // to be sure the right member is being read.
-  char type;
+  upb_ctype_t type;
 #endif
 } upb_value;
 
@@ -185,7 +141,7 @@ typedef struct {
     return val.val.membername; \
   } \
   INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \
-    memset(val, 0, sizeof(*val)); \
+    val->val.uint64 = 0; \
     SET_TYPE(val->type, proto_type); \
     val->val.membername = cval; \
   } \
@@ -195,21 +151,23 @@ typedef struct {
     return ret; \
   }
 
-UPB_VALUE_ACCESSORS(double, _double, double, UPB_TYPE(DOUBLE));
-UPB_VALUE_ACCESSORS(float, _float, float, UPB_TYPE(FLOAT));
-UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_TYPE(INT32));
-UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64));
-UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32));
-UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64));
-UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL));
-UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_VALUETYPE_PTR);
+UPB_VALUE_ACCESSORS(int32,  int32,   int32_t,  UPB_CTYPE_INT32);
+UPB_VALUE_ACCESSORS(int64,  int64,   int64_t,  UPB_CTYPE_INT64);
+UPB_VALUE_ACCESSORS(uint32, uint32,  uint32_t, UPB_CTYPE_UINT32);
+UPB_VALUE_ACCESSORS(uint64, uint64,  uint64_t, UPB_CTYPE_UINT64);
+UPB_VALUE_ACCESSORS(double, _double, double,   UPB_CTYPE_DOUBLE);
+UPB_VALUE_ACCESSORS(float,  _float,  float,    UPB_CTYPE_FLOAT);
+UPB_VALUE_ACCESSORS(bool,   _bool,   bool,     UPB_CTYPE_BOOL);
+UPB_VALUE_ACCESSORS(ptr,    _void,   void*,    UPB_CTYPE_PTR);
 UPB_VALUE_ACCESSORS(byteregion, byteregion, struct _upb_byteregion*,
-                    UPB_TYPE(STRING));
+                    UPB_CTYPE_BYTEREGION);
 
 // upb_fielddef should never be modified from a callback
 // (ie. when they're getting passed through a upb_value).
 UPB_VALUE_ACCESSORS(fielddef, fielddef, const struct _upb_fielddef*,
-                    UPB_VALUETYPE_FIELDDEF);
+                    UPB_CTYPE_FIELDDEF);
+
+#undef UPB_VALUE_ACCESSORS
 
 extern upb_value UPB_NO_VALUE;
 
@@ -262,7 +220,7 @@ void upb_status_copy(upb_status *to, const upb_status *from);
 
 extern upb_errorspace upb_posix_errorspace;
 void upb_status_fromerrno(upb_status *status);
-bool upb_errno_is_wouldblock(void);
+bool upb_errno_is_wouldblock();
 
 // Like vasprintf (which allocates a string large enough for the result), but
 // uses *buf (which can be NULL) as a starting point and reallocates it only if