From a95ab58e79c50b0927eae2b834d3de20a8effc36 Mon Sep 17 00:00:00 2001
From: Joshua Haberman <joshua@reverberate.org>
Date: Sat, 28 Nov 2009 15:38:29 -0800
Subject: Overhaul defs to derive from a common base.

---
 src/upb_context.c | 175 +++++++++++++++++++++++++-----------------------------
 src/upb_context.h |  96 ++++++++++++------------------
 src/upb_def.c     | 170 +++++++++++++++++++++++++++++++++-------------------
 src/upb_def.h     | 175 +++++++++++++++++++++++++++++++++---------------------
 src/upb_mm.c      |   2 +-
 src/upb_msg.c     |   2 +-
 src/upb_parse.c   |   2 +-
 src/upb_string.h  |   5 ++
 src/upb_table.c   |  20 +++----
 src/upb_table.h   |  12 ++--
 10 files changed, 357 insertions(+), 302 deletions(-)

(limited to 'src')

diff --git a/src/upb_context.c b/src/upb_context.c
index cea82cd..469f879 100644
--- a/src/upb_context.c
+++ b/src/upb_context.c
@@ -11,6 +11,11 @@
 #include "upb_def.h"
 #include "upb_mm.h"
 
+struct upb_symtab_entry {
+  struct upb_strtable_entry e;
+  struct upb_def *def;
+};
+
 /* Search for a character in a string, in reverse. */
 static int my_memrchr(char *data, char c, size_t len)
 {
@@ -21,7 +26,6 @@ static int my_memrchr(char *data, char c, size_t len)
 
 void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs,
            google_protobuf_FileDescriptorProto *fd, bool sort,
-           struct upb_context *context,
            struct upb_status *status);
 
 struct upb_context *upb_context_new()
@@ -35,7 +39,7 @@ struct upb_context *upb_context_new()
   google_protobuf_FileDescriptorProto *fd =
       upb_file_descriptor_set->file->elements[0]; /* We know there is only 1. */
   struct upb_status status = UPB_STATUS_INIT;
-  addfd(&c->psymtab, &c->symtab, fd, false, c, &status);
+  addfd(&c->psymtab, &c->symtab, fd, false, &status);
   if(!upb_ok(&status)) {
     fprintf(stderr, "Failed to initialize upb: %s.\n", status.msg);
     assert(false);
@@ -44,10 +48,7 @@ struct upb_context *upb_context_new()
   struct upb_string name = UPB_STRLIT("google.protobuf.FileDescriptorSet");
   struct upb_symtab_entry *e = upb_strtable_lookup(&c->psymtab, &name);
   assert(e);
-  c->fds_msgdef = e->ref.msg;
-  c->fds_size = 16;
-  c->fds_len = 0;
-  c->fds = malloc(sizeof(*c->fds));
+  c->fds_msgdef = upb_downcast_msgdef(e->def);
   return c;
 }
 
@@ -56,7 +57,7 @@ static void free_symtab(struct upb_strtable *t)
   struct upb_symtab_entry *e = upb_strtable_begin(t);
   for(; e; e = upb_strtable_next(t, &e->e)) {
     upb_def_unref(e->def);
-    free(e->e.key.ptr);
+    upb_string_unref(e->e.key);
   }
   upb_strtable_free(t);
 }
@@ -64,10 +65,7 @@ static void free_symtab(struct upb_strtable *t)
 static void free_context(struct upb_context *c)
 {
   free_symtab(&c->symtab);
-  for(size_t i = 0; i < c->fds_len; i++)
-    upb_msg_unref((struct upb_msg*)c->fds[i]);
   free_symtab(&c->psymtab);
-  free(c->fds);
 }
 
 void upb_context_unref(struct upb_context *c)
@@ -81,24 +79,30 @@ void upb_context_unref(struct upb_context *c)
   }
 }
 
-bool upb_context_lookup(struct upb_context *c, struct upb_string *symbol,
-                        struct upb_symtab_entry *out_entry)
+struct upb_def **upb_context_getandref_defs(struct upb_context *c, int *count)
 {
-  upb_rwlock_rdlock(&c->lock);
-  struct upb_symtab_entry *e = upb_strtable_lookup(&c->symtab, symbol);
-  if(e) *out_entry = *e;
+  upb_rwlock_wrlock(&c->lock);
+  *count = upb_strtable_count(&c->symtab);
+  struct upb_def **defs = malloc(sizeof(*defs) * (*count));
+  struct upb_symtab_entry *e = upb_strtable_begin(&c->symtab);
+  int i = 0;
+  for(; e; e = upb_strtable_next(&c->symtab, &e->e), i++) {
+    assert(e->def);
+    defs[i] = e->def;
+    upb_def_ref(defs[i]);
+  }
+  assert(*count == i);
   upb_rwlock_unlock(&c->lock);
-  return e != NULL;
+  return defs;
 }
 
-void upb_context_enumerate(struct upb_context *c, upb_context_enumerator_t cb,
-                           void *udata)
+struct upb_def *upb_context_lookup(struct upb_context *c,
+                                   struct upb_string *sym)
 {
   upb_rwlock_rdlock(&c->lock);
-  struct upb_symtab_entry *e = upb_strtable_begin(&c->symtab);
-  for(; e; e = upb_strtable_next(&c->symtab, &e->e))
-    cb(udata, e);
+  struct upb_symtab_entry *e = upb_strtable_lookup(&c->symtab, sym);
   upb_rwlock_unlock(&c->lock);
+  return e ? e->def : NULL;
 }
 
 /* Given a symbol and the base symbol inside which it is defined, find the
@@ -137,43 +141,41 @@ static struct upb_symtab_entry *resolve(struct upb_strtable *t,
 }
 
 /* Tries to resolve a symbol in two different tables. */
-union upb_symbol_ref resolve2(struct upb_strtable *t1, struct upb_strtable *t2,
-                              struct upb_string *base, struct upb_string *sym,
-                                 enum upb_symbol_type expected_type) {
-  union upb_symbol_ref nullref = {.msg = NULL};
+struct upb_def *resolve2(struct upb_strtable *t1, struct upb_strtable *t2,
+                         struct upb_string *base, struct upb_string *sym,
+                         enum upb_def_type expected_type) {
   struct upb_symtab_entry *e = resolve(t1, base, sym);
   if(e == NULL) e = resolve(t2, base, sym);
-
-  if(e && e->type == expected_type) return e->ref;
-  else return nullref;
+  if(e && e->def->type == expected_type) return e->def;
+  return NULL;
 }
 
 
-bool upb_context_resolve(struct upb_context *c, struct upb_string *base,
-                         struct upb_string *symbol,
-                         struct upb_symtab_entry *out_entry) {
+struct upb_def *upb_context_resolve(struct upb_context *c,
+                                    struct upb_string *base,
+                                    struct upb_string *symbol) {
   upb_rwlock_rdlock(&c->lock);
   struct upb_symtab_entry *e = resolve(&c->symtab, base, symbol);
-  if(e) *out_entry = *e;
   upb_rwlock_unlock(&c->lock);
-  return e != NULL;
+  return e ? e->def : NULL;
 }
 
 /* Joins strings together, for example:
  *   join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
  *   join("", "Baz") -> "Baz"
  * Caller owns the returned string and must free it. */
-static struct upb_string join(struct upb_string *base, struct upb_string *name) {
+static struct upb_string *join(struct upb_string *base, struct upb_string *name) {
   size_t len = base->byte_len + name->byte_len;
   if(base->byte_len > 0) len++;  /* For the separator. */
-  struct upb_string joined = {.byte_len=len, .ptr=malloc(len)};
+  struct upb_string *joined = upb_string_new();
+  upb_string_resize(joined, len);
   if(base->byte_len > 0) {
     /* nested_base = base + '.' +  d->name */
-    memcpy(joined.ptr, base->ptr, base->byte_len);
-    joined.ptr[base->byte_len] = UPB_SYMBOL_SEPARATOR;
-    memcpy(&joined.ptr[base->byte_len+1], name->ptr, name->byte_len);
+    memcpy(joined->ptr, base->ptr, base->byte_len);
+    joined->ptr[base->byte_len] = UPB_SYMBOL_SEPARATOR;
+    memcpy(&joined->ptr[base->byte_len+1], name->ptr, name->byte_len);
   } else {
-    memcpy(joined.ptr, name->ptr, name->byte_len);
+    memcpy(joined->ptr, name->ptr, name->byte_len);
   }
   return joined;
 }
@@ -181,7 +183,6 @@ static struct upb_string join(struct upb_string *base, struct upb_string *name)
 static void insert_enum(struct upb_strtable *t,
                         google_protobuf_EnumDescriptorProto *ed,
                         struct upb_string *base,
-                        struct upb_context *c,
                         struct upb_status *status)
 {
   if(!ed->set_flags.has.name) {
@@ -191,29 +192,24 @@ static void insert_enum(struct upb_strtable *t,
     return;
   }
 
-  /* We own this and must free it on destruct. */
-  struct upb_string fqname = join(base, ed->name);
-
-  if(upb_strtable_lookup(t, &fqname)) {
+  struct upb_string *fqname = join(base, ed->name);
+  if(upb_strtable_lookup(t, fqname)) {
     upb_seterr(status, UPB_STATUS_ERROR,
                "attempted to redefine symbol '" UPB_STRFMT "'",
-               UPB_STRARG(&fqname));
-    free(fqname.ptr);
+               UPB_STRARG(fqname));
+    upb_string_unref(fqname);
     return;
   }
 
   struct upb_symtab_entry e;
-  e.e.key = fqname;
-  e.type = UPB_SYM_ENUM;
-  e.ref._enum = malloc(sizeof(*e.ref._enum));
-  upb_enumdef_init(e.ref._enum, ed, c);
+  e.e.key = fqname;  // Donating our ref to the table.
+  e.def = (struct upb_def*)upb_enumdef_new(ed, fqname);
   upb_strtable_insert(t, &e.e);
 }
 
 static void insert_message(struct upb_strtable *t,
                            google_protobuf_DescriptorProto *d,
                            struct upb_string *base, bool sort,
-                           struct upb_context *c,
                            struct upb_status *status)
 {
   if(!d->set_flags.has.name) {
@@ -224,51 +220,51 @@ static void insert_message(struct upb_strtable *t,
   }
 
   /* We own this and must free it on destruct. */
-  struct upb_string fqname = join(base, d->name);
+  struct upb_string *fqname = join(base, d->name);
 
-  if(upb_strtable_lookup(t, &fqname)) {
+  if(upb_strtable_lookup(t, fqname)) {
     upb_seterr(status, UPB_STATUS_ERROR,
                "attempted to redefine symbol '" UPB_STRFMT "'",
-               UPB_STRARG(&fqname));
-    free(fqname.ptr);
+               UPB_STRARG(fqname));
+    upb_string_unref(fqname);
     return;
   }
 
   struct upb_symtab_entry e;
-  e.e.key = fqname;
-  e.type = UPB_SYM_MESSAGE;
-  e.ref.msg = malloc(sizeof(*e.ref.msg));
-  upb_msgdef_init(e.ref.msg, d, &fqname, sort, c, status);
-  if(!upb_ok(status)) {
-    free(fqname.ptr);
-    return;
+  e.e.key = fqname;  // Donating our ref to the table.
+  struct upb_fielddef *fielddefs = malloc(sizeof(*fielddefs) * d->field->len);
+  for (unsigned int i = 0; i < d->field->len; i++) {
+    google_protobuf_FieldDescriptorProto *fd = d->field->elements[i];
+    upb_fielddef_init(&fielddefs[i], fd);
   }
+  if(sort) upb_fielddef_sort(fielddefs, d->field->len);
+  e.def = (struct upb_def*)upb_msgdef_new(fielddefs, d->field->len, fqname);
   upb_strtable_insert(t, &e.e);
 
   /* Add nested messages and enums. */
   if(d->set_flags.has.nested_type)
     for(unsigned int i = 0; i < d->nested_type->len; i++)
-      insert_message(t, d->nested_type->elements[i], &fqname, sort, c, status);
+      insert_message(t, d->nested_type->elements[i], fqname, sort, status);
 
   if(d->set_flags.has.enum_type)
     for(unsigned int i = 0; i < d->enum_type->len; i++)
-      insert_enum(t, d->enum_type->elements[i], &fqname, c, status);
+      insert_enum(t, d->enum_type->elements[i], fqname, status);
 }
 
 void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs,
            google_protobuf_FileDescriptorProto *fd, bool sort,
-           struct upb_context *c, struct upb_status *status)
+           struct upb_status *status)
 {
   struct upb_string pkg = {.byte_len=0};
   if(fd->set_flags.has.package) pkg = *fd->package;
 
   if(fd->set_flags.has.message_type)
     for(unsigned int i = 0; i < fd->message_type->len; i++)
-      insert_message(addto, fd->message_type->elements[i], &pkg, sort, c, status);
+      insert_message(addto, fd->message_type->elements[i], &pkg, sort, status);
 
   if(fd->set_flags.has.enum_type)
     for(unsigned int i = 0; i < fd->enum_type->len; i++)
-      insert_enum(addto, fd->enum_type->elements[i], &pkg, c, status);
+      insert_enum(addto, fd->enum_type->elements[i], &pkg, status);
 
   if(!upb_ok(status)) return;
 
@@ -277,33 +273,34 @@ void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs,
   /* Attempt to resolve all references. */
   struct upb_symtab_entry *e;
   for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) {
-    if(upb_strtable_lookup(existingdefs, &e->e.key)) {
+    if(upb_strtable_lookup(existingdefs, e->e.key)) {
       upb_seterr(status, UPB_STATUS_ERROR,
                  "attempted to redefine symbol '" UPB_STRFMT "'",
-                 UPB_STRARG(&e->e.key));
+                 UPB_STRARG(e->e.key));
       return;
     }
-    if(e->type == UPB_SYM_MESSAGE) {
-      struct upb_msgdef *m = e->ref.msg;
+    if(e->def->type == UPB_DEF_MESSAGE) {
+      struct upb_msgdef *m = upb_downcast_msgdef(e->def);
       for(unsigned int i = 0; i < m->num_fields; i++) {
         struct upb_fielddef *f = &m->fields[i];
-        union upb_symbol_ref ref;
-        if(f->type == UPB_TYPENUM(MESSAGE) || f->type == UPB_TYPENUM(GROUP))
-          ref = resolve2(existingdefs, addto, &e->e.key, f->ref.str,
-                         UPB_SYM_MESSAGE);
+        if(!upb_issubmsg(f) && f->type != UPB_TYPENUM(ENUM)) {
+          // No resolving necessary.
+          continue;
+        }
+        struct upb_def *def;
+        struct upb_string *name = upb_downcast_unresolveddef(f->def)->name;
+        if(upb_issubmsg(f))
+          def = resolve2(existingdefs, addto, e->e.key, name, UPB_DEF_MESSAGE);
         else if(f->type == UPB_TYPENUM(ENUM))
-          ref = resolve2(existingdefs, addto, &e->e.key, f->ref.str,
-                         UPB_SYM_ENUM);
-        else
-          continue;  /* No resolving necessary. */
-        if(!ref.msg) {
+          def = resolve2(existingdefs, addto, e->e.key, name, UPB_DEF_ENUM);
+        if(!def) {
           upb_seterr(status, UPB_STATUS_ERROR,
                      "could not resolve symbol '" UPB_STRFMT "'"
                      " in context '" UPB_STRFMT "'",
-                     UPB_STRARG(f->ref.str), UPB_STRARG(&e->e.key));
+                     UPB_STRARG(name), UPB_STRARG(e->e.key));
           return;
         }
-        upb_msgdef_setref(m, f, ref);
+        upb_msgdef_resolve(m, f, def);
       }
     }
   }
@@ -320,7 +317,7 @@ void upb_context_addfds(struct upb_context *c,
     upb_strtable_init(&tmp, 0, sizeof(struct upb_symtab_entry));
     upb_rwlock_rdlock(&c->lock);
     for(uint32_t i = 0; i < fds->file->len; i++) {
-      addfd(&tmp, &c->symtab, fds->file->elements[i], true, c, status);
+      addfd(&tmp, &c->symtab, fds->file->elements[i], true, status);
       if(!upb_ok(status)) {
         free_symtab(&tmp);
         upb_rwlock_unlock(&c->lock);
@@ -349,17 +346,5 @@ void upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str,
   upb_msg_parsestr(fds, fds_str->ptr, fds_str->byte_len, status);
   if(!upb_ok(status)) return;
   upb_context_addfds(c, (google_protobuf_FileDescriptorSet*)fds, status);
-  if(!upb_ok(status)) return;
-
-  {
-    /* We own fds now, need to keep a ref so we can free it later. */
-    upb_rwlock_wrlock(&c->lock);
-    if(c->fds_size == c->fds_len) {
-      c->fds_size *= 2;
-      c->fds = realloc(c->fds, c->fds_size);
-    }
-    c->fds[c->fds_len++] = (google_protobuf_FileDescriptorSet*)fds;
-    upb_rwlock_unlock(&c->lock);
-  }
   return;
 }
diff --git a/src/upb_context.h b/src/upb_context.h
index b20f169..177b42e 100644
--- a/src/upb_context.h
+++ b/src/upb_context.h
@@ -24,26 +24,18 @@ extern "C" {
 
 /* Definitions. ***************************************************************/
 
-struct upb_symtab_entry {
-  struct upb_strtable_entry e;
-  struct upb_def *def;  /* We own one ref. */
-};
-
 struct upb_context {
   upb_atomic_refcount_t refcount;
-  upb_rwlock_t lock;
-  struct upb_strtable symtab;     /* The context's symbol table. */
-  struct upb_strtable psymtab;    /* Private symbols, for internal use. */
-  struct upb_msgdef *fds_msgdef;  /* In psymtab, ptr here for convenience. */
-
-  /* A list of the FileDescriptorProtos we own (from having parsed them
-   * ourselves) and must free on destruction. */
-  size_t fds_size, fds_len;
-  struct google_protobuf_FileDescriptorSet **fds;
+  upb_rwlock_t lock;  // Protects all members except the refcount.
+  struct upb_msgdef *fds_msgdef;  // In psymtab, ptr here for convenience.
+
+  // Our symbol tables; we own refs to the defs therein.
+  struct upb_strtable symtab;     // The context's symbol table.
+  struct upb_strtable psymtab;    // Private symbols, for internal use.
 };
 
-/* Initializes a upb_context.  Contexts are not freed explicitly, but unref'd
- * when the caller is done with them. */
+// Initializes a upb_context.  Contexts are not freed explicitly, but unref'd
+// when the caller is done with them.
 struct upb_context *upb_context_new(void);
 INLINE void upb_context_ref(struct upb_context *c) {
   upb_atomic_ref(&c->refcount);
@@ -52,55 +44,41 @@ void upb_context_unref(struct upb_context *c);
 
 /* Looking up symbols. ********************************************************/
 
-/* Resolves the given symbol using the rules described in descriptor.proto,
- * namely:
- *
- *    If the name starts with a '.', it is fully-qualified.  Otherwise, C++-like
- *    scoping rules are used to find the type (i.e. first the nested types
- *    within this message are searched, then within the parent, on up to the
- *    root namespace).
- *
- * Returns NULL if the symbol has not been defined. */
-bool upb_context_resolve(struct upb_context *c, struct upb_string *base,
-                         struct upb_string *symbol,
-                         struct upb_symtab_entry *out_entry);
-
-/* Find an entry in the symbol table with this exact name.  Returns NULL if no
- * such symbol name exists. */
-bool upb_context_lookup(struct upb_context *c, struct upb_string *symbol,
-                        struct upb_symtab_entry *out_entry);
-
-/* For enumerating over the entries in the symbol table.  The enumerator
- * callback will be called once for every symtab entry.
- *
- * The callback *must not* block or take any significant amount of time, since
- * the upb_context's lock is held while it is being called! */
-typedef void (*upb_context_enumerator_t)(
-    void *udata, struct upb_symtab_entry *entry);
-void upb_context_enumerate(struct upb_context *c, upb_context_enumerator_t,
-                           void *udata);
+// Resolves the given symbol using the rules described in descriptor.proto,
+// namely:
+//
+//    If the name starts with a '.', it is fully-qualified.  Otherwise, C++-like
+//    scoping rules are used to find the type (i.e. first the nested types
+//    within this message are searched, then within the parent, on up to the
+//    root namespace).
+//
+// Returns NULL if no such symbol has been defined.
+struct upb_def *upb_context_resolve(struct upb_context *c,
+                                    struct upb_string *base,
+                                    struct upb_string *symbol);
+
+// Find an entry in the symbol table with this exact name.  Returns NULL if no
+// such symbol name has been defined.
+struct upb_def *upb_context_lookup(struct upb_context *c,
+                                   struct upb_string *sym);
+
+// Gets an array of pointers to all currently active defs in this context.  The
+// caller owns the returned array (which is of length *count) as well as a ref
+// to each symbol inside.
+struct upb_def **upb_context_getandref_defs(struct upb_context *c, int *count);
 
 /* Adding symbols. ************************************************************/
 
-/* Adds the definitions in the given file descriptor to this context.  All
- * types that are referenced from fd must have previously been defined (or be
- * defined in fd).  fd may not attempt to define any names that are already
- * defined in this context.
- *
- * Caller retains ownership of fd, but the context will contain references to
- * it, so it must outlive the context.
- *
- * upb_context_addfd only returns true or false; it does not give any hint
- * about what happened in the case of failure.  This is because the descriptor
- * is expected to have been validated at the time it was parsed/generated. */
-void upb_context_addfds(struct upb_context *c,
-                        struct google_protobuf_FileDescriptorSet *fds,
-                        struct upb_status *status);
-
+// Adds the definitions in the given file descriptor to this context.  All
+// types that are referenced from fd must have previously been defined (or be
+// defined in fd).  fd may not attempt to define any names that are already
+// defined in this context.  Caller retains ownership of fd.  status indicates
+// whether the operation was successful or not, and the error message (if any).
+struct google_protobuf_FileDescriptorSet;
 void upb_context_addfds(struct upb_context *c,
                         struct google_protobuf_FileDescriptorSet *fds,
                         struct upb_status *status);
-
+// Like the above, but also parses the FileDescriptorSet from fds.
 void upb_context_parsefds(struct upb_context *c, struct upb_string *fds,
                           struct upb_status *status);
 
diff --git a/src/upb_def.c b/src/upb_def.c
index 0d56459..32675f5 100644
--- a/src/upb_def.c
+++ b/src/upb_def.c
@@ -31,111 +31,157 @@ static int compare_fields(const void *e1, const void *e2) {
   }
 }
 
-void upb_msgdef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num)
+/* Callback for sorting fields. */
+static int compare_fields2(const void *e1, const void *e2) {
+  const struct upb_fielddef *f1 = e1;
+  const struct upb_fielddef *f2 = e2;
+  /* Required fields go before non-required. */
+  bool req1 = f1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED;
+  bool req2 = f2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED;
+  if(req1 != req2) {
+    return req2 - req1;
+  } else {
+    /* Within required and non-required field lists, list in number order.
+     * TODO: consider ordering by data size to reduce padding. */
+    return f1->number - f2->number;
+  }
+}
+
+void upb_fielddef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num)
+{
+  qsort(fds, num, sizeof(*fds), compare_fields);
+}
+
+void upb_fielddef_sort(struct upb_fielddef *defs, size_t num)
+{
+  qsort(defs, num, sizeof(*defs), compare_fields2);
+}
+
+void upb_def_init(struct upb_def *def, enum upb_def_type type,
+                  struct upb_string *fqname) {
+  def->type = type;
+  def->fqname = fqname;
+  upb_string_ref(fqname);
+  upb_atomic_refcount_init(&def->refcount, 1);
+}
+
+void upb_def_uninit(struct upb_def *def) {
+  upb_string_unref(def->fqname);
+}
+
+void upb_fielddef_init(struct upb_fielddef *f,
+                       struct google_protobuf_FieldDescriptorProto *fd)
 {
-  qsort(fds, num, sizeof(void*), compare_fields);
+  f->type = fd->type;
+  f->label = fd->label;
+  f->number = fd->number;
+  f->name = upb_strdup(fd->name);
+  f->def = NULL;
+  if(fd->set_flags.has.type_name) {
+    f->def = (struct upb_def*)upb_unresolveddef_new(fd->type_name);
+  }
 }
 
-void upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d,
-                     struct upb_string *fqname, bool sort, struct upb_context *c,
-                     struct upb_status *status)
+void upb_fielddef_uninit(struct upb_fielddef *f)
 {
-  (void)status;  // Nothing that can fail at the moment.
-  int num_fields = d->set_flags.has.field ? d->field->len : 0;
+  upb_string_unref(f->name);
+  if(upb_fielddef_hasdef(f)) upb_def_unref(f->def);
+}
+
+struct upb_fielddef *upb_fielddef_dup(struct upb_fielddef *f)
+{
+  struct upb_fielddef *new_f = malloc(sizeof(*new_f));
+  new_f->type = f->type;
+  new_f->label = f->label;
+  new_f->number = f->number;
+  new_f->name = upb_strdup(f->name);
+  new_f->type = f->type;
+  new_f->def = NULL;
+  if(upb_fielddef_hasdef(f)) {
+    new_f->def = f->def;
+    upb_def_ref(new_f->def);
+  }
+  return new_f;
+}
+
+struct upb_msgdef *upb_msgdef_new(struct upb_fielddef *fields, int num_fields,
+                                  struct upb_string *fqname)
+{
+  struct upb_msgdef *m = malloc(sizeof(*m));
+  upb_def_init(&m->def, UPB_DEF_MESSAGE, fqname);
   upb_inttable_init(&m->fields_by_num, num_fields,
                     sizeof(struct upb_fieldsbynum_entry));
   upb_strtable_init(&m->fields_by_name, num_fields,
                     sizeof(struct upb_fieldsbyname_entry));
 
-  upb_atomic_refcount_init(&m->refcount, 1);
-  m->fqname = upb_strdup(fqname);
-  m->context = c;
   m->num_fields = num_fields;
   m->set_flags_bytes = div_round_up(m->num_fields, 8);
-  /* These are incremented in the loop. */
+  // These are incremented in the loop.
   m->num_required_fields = 0;
   m->size = m->set_flags_bytes;
-
-  m->fields = malloc(sizeof(*m->fields) * m->num_fields);
-
-  /* Create a sorted list of the fields. */
-  google_protobuf_FieldDescriptorProto **fds =
-      malloc(sizeof(*fds) * m->num_fields);
-  for(unsigned int i = 0; i < m->num_fields; i++) {
-    /* We count on the caller to keep this pointer alive. */
-    fds[i] = d->field->elements[i];
-  }
-  if(sort) upb_msgdef_sortfds(fds, m->num_fields);
+  m->fields = fields;
 
   size_t max_align = 0;
-  for(unsigned int i = 0; i < m->num_fields; i++) {
+  for(int i = 0; i < num_fields; i++) {
     struct upb_fielddef *f = &m->fields[i];
-    google_protobuf_FieldDescriptorProto *fd = fds[i];
-    struct upb_type_info *type_info = &upb_type_info[fd->type];
+    struct upb_type_info *type_info = &upb_type_info[f->type];
 
-    /* General alignment rules are: each member must be at an address that is a
-     * multiple of that type's alignment.  Also, the size of the structure as
-     * a whole must be a multiple of the greatest alignment of any member. */
+    // General alignment rules are: each member must be at an address that is a
+    // multiple of that type's alignment.  Also, the size of the structure as
+    // a whole must be a multiple of the greatest alignment of any member. */
     f->field_index = i;
     f->byte_offset = ALIGN_UP(m->size, type_info->align);
-    f->type = fd->type;
-    f->label = fd->label;
-    f->number = fd->number;
-    f->name = upb_strdup(fd->name);
-    f->ref.str = fd->type_name;
     m->size = f->byte_offset + type_info->size;
     max_align = UPB_MAX(max_align, type_info->align);
-    if(fd->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED)
+    if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) {
+      // We currently rely on the fact that required fields are always sorted
+      // to occur before non-required fields.
       m->num_required_fields++;
+    }
 
-    /* Insert into the tables.  Note that f->ref will be uninitialized, even in
-     * the tables' copies of *f, which is why we must update them separately
-     * in upb_msg_setref() below. */
-    struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = *f};
-    struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = *f};
+    // Insert into the tables.  Note that f->ref will be uninitialized, even in
+    // the tables' copies of *f, which is why we must update them separately
+    // in upb_msg_setref() below.
+    struct upb_fieldsbynum_entry nument = {.e = {.key = f->number}, .f = *f};
+    struct upb_fieldsbyname_entry strent = {.e = {.key = upb_strdup(f->name)}, .f = *f};
     upb_inttable_insert(&m->fields_by_num, &nument.e);
     upb_strtable_insert(&m->fields_by_name, &strent.e);
   }
 
-  if(max_align > 0)
-    m->size = ALIGN_UP(m->size, max_align);
-  free(fds);
+  if(max_align > 0) m->size = ALIGN_UP(m->size, max_align);
+  return m;
 }
 
 void _upb_msgdef_free(struct upb_msgdef *m)
 {
+  upb_def_uninit(&m->def);
   upb_inttable_free(&m->fields_by_num);
   upb_strtable_free(&m->fields_by_name);
-  upb_string_unref(m->fqname);
-  for (unsigned int i = 0; i < m->num_fields; i++) {
-    struct upb_fielddef *f = &m->fields[i];
-    upb_string_unref(f->name);
-    if (upb_issubmsg(f) || f->type == UPB_TYPENUM(ENUM))
-      upb_def_unref(f->ref, f->type);
-  }
+  for (unsigned int i = 0; i < m->num_fields; i++)
+    upb_fielddef_uninit(&m->fields[i]);
   free(m->fields);
   free(m);
 }
 
-void upb_msgdef_setref(struct upb_msgdef *m, struct upb_fielddef *f,
-                       union upb_def_ptr ref) {
+void upb_msgdef_resolve(struct upb_msgdef *m, struct upb_fielddef *f,
+                        struct upb_def *def) {
   struct upb_fieldsbynum_entry *int_e = upb_inttable_fast_lookup(
       &m->fields_by_num, f->number, sizeof(struct upb_fieldsbynum_entry));
   struct upb_fieldsbyname_entry *str_e =
       upb_strtable_lookup(&m->fields_by_name, f->name);
   assert(int_e && str_e);
-  f->ref = ref;
-  int_e->f.ref = ref;
-  str_e->f.ref = ref;
-  upb_def_ref(ref, f->type);
+  f->def = def;
+  int_e->f.def = def;
+  str_e->f.def = def;
+  upb_def_ref(def);
 }
 
-void upb_enumdef_init(struct upb_enumdef *e,
-                   struct google_protobuf_EnumDescriptorProto *ed,
-                   struct upb_context *c) {
+struct upb_enumdef *upb_enumdef_new(
+    struct google_protobuf_EnumDescriptorProto *ed, struct upb_string *fqname)
+{
+  struct upb_enumdef *e = malloc(sizeof(*e));
+  upb_def_init(&e->def, UPB_DEF_ENUM, fqname);
   int num_values = ed->set_flags.has.value ? ed->value->len : 0;
-  e->context = c;
-  upb_atomic_refcount_init(&e->refcount, 1);
   upb_strtable_init(&e->nametoint, num_values,
                     sizeof(struct upb_enumdef_ntoi_entry));
   upb_inttable_init(&e->inttoname, num_values,
@@ -143,16 +189,18 @@ void upb_enumdef_init(struct upb_enumdef *e,
 
   for(int i = 0; i < num_values; i++) {
     google_protobuf_EnumValueDescriptorProto *value = ed->value->elements[i];
-    struct upb_enumdef_ntoi_entry ntoi_entry = {.e = {.key = *value->name},
+    struct upb_enumdef_ntoi_entry ntoi_entry = {.e = {.key = upb_strdup(value->name)},
                                                 .value = value->number};
     struct upb_enumdef_iton_entry iton_entry = {.e = {.key = value->number},
                                                 .string = value->name};
     upb_strtable_insert(&e->nametoint, &ntoi_entry.e);
     upb_inttable_insert(&e->inttoname, &iton_entry.e);
   }
+  return e;
 }
 
 void _upb_enumdef_free(struct upb_enumdef *e) {
+  upb_def_uninit(&e->def);
   upb_strtable_free(&e->nametoint);
   upb_inttable_free(&e->inttoname);
   free(e);
diff --git a/src/upb_def.h b/src/upb_def.h
index 6a6622a..e58f01f 100644
--- a/src/upb_def.h
+++ b/src/upb_def.h
@@ -9,6 +9,9 @@
  * - upb_enumdef: describes an enum.
  * (TODO: descriptions of extensions and services).
  *
+ * Defs should be obtained from a upb_context object; the APIs for creating
+ * them directly are internal-only.
+ *
  * Defs are immutable and reference-counted.  Contexts reference any defs
  * that are the currently in their symbol table.  If an extension is loaded
  * that adds a field to an existing message, a new msgdef is constructed that
@@ -44,13 +47,15 @@ enum upb_def_type {
 
 // Common members.
 struct upb_def {
+  struct upb_string *fqname;  // Fully qualified.
   enum upb_def_type type;
   upb_atomic_refcount_t refcount;
-  struct upb_string *fqname;  /* Fully-qualified. */
-}
+};
 
 void upb_def_init(struct upb_def *def, enum upb_def_type type,
                   struct upb_string *fqname);
+void upb_def_uninit(struct upb_def *def);
+INLINE void upb_def_ref(struct upb_def *def) { upb_atomic_ref(&def->refcount); }
 
 /* Field definition. **********************************************************/
 
@@ -72,6 +77,7 @@ struct upb_fielddef {
   struct upb_def *def;
 };
 
+// A variety of tests about the type of a field.
 INLINE bool upb_issubmsg(struct upb_fielddef *f) {
   return upb_issubmsgtype(f->type);
 }
@@ -81,6 +87,10 @@ INLINE bool upb_isstring(struct upb_fielddef *f) {
 INLINE bool upb_isarray(struct upb_fielddef *f) {
   return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED;
 }
+// Does the type of this field imply that it should contain an associated def?
+INLINE bool upb_fielddef_hasdef(struct upb_fielddef *f) {
+  return upb_issubmsg(f) || f->type == UPB_TYPENUM(ENUM);
+}
 
 INLINE bool upb_field_ismm(struct upb_fielddef *f) {
   return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f);
@@ -105,18 +115,22 @@ INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_fielddef *f) {
   else return -1;
 }
 
-// Interfaces for constructing/destroying fielddefs.  These are internal-only.
 struct google_protobuf_FieldDescriptorProto;
 
+// Interfaces for constructing/destroying fielddefs.  These are internal-only.
+
 // Initializes a upb_fielddef from a FieldDescriptorProto.  The caller must
 // have previously allocated the upb_fielddef.
-void upb_fielddef_init(struct google_protobuf_FieldDescriptorProto *fd,
-                       struct upb_fielddef *f);
+void upb_fielddef_init(struct upb_fielddef *f,
+                       struct google_protobuf_FieldDescriptorProto *fd);
+struct upb_fielddef *upb_fielddef_dup(struct upb_fielddef *f);
 void upb_fielddef_uninit(struct upb_fielddef *f);
 
 // Sort the given fielddefs in-place, according to what we think is an optimal
 // ordering of fields.  This can change from upb release to upb release.
 void upb_fielddef_sort(struct upb_fielddef *defs, size_t num);
+void upb_fielddef_sortfds(struct google_protobuf_FieldDescriptorProto **fds,
+                          size_t num);
 
 /* Message definition. ********************************************************/
 
@@ -155,22 +169,52 @@ struct upb_fieldsbyname_entry {
 // possible.  These return NULL if no such field is found.
 INLINE struct upb_fielddef *upb_msg_fieldbynum(struct upb_msgdef *m,
                                                uint32_t number) {
-  struct upb_fieldsbynum_entry *e = upb_inttable_fast_lookup(
-      &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry));
+  struct upb_fieldsbynum_entry *e = (struct upb_fieldsbynum_entry*)
+      upb_inttable_fast_lookup(
+          &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry));
   return e ? &e->f : NULL;
 }
 
 INLINE struct upb_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m,
                                                 struct upb_string *name) {
-  struct upb_fieldsbyname_entry *e = upb_strtable_lookup(
-      &m->fields_by_name, name);
+  struct upb_fieldsbyname_entry *e = (struct upb_fieldsbyname_entry*)
+      upb_strtable_lookup(
+          &m->fields_by_name, name);
   return e ? &e->f : NULL;
 }
 
+// Internal-only functions for constructing a msgdef.  Caller retains ownership
+// of d and fqname.  Ownership of fields passes to the msgdef.
+//
+// Note that init does not resolve upb_fielddef.ref; the caller should do that
+// post-initialization by calling upb_msgdef_resolve() below.
+struct upb_msgdef *upb_msgdef_new(struct upb_fielddef *fields, int num_fields,
+                                  struct upb_string *fqname);
+void _upb_msgdef_free(struct upb_msgdef *m);
+INLINE void upb_msgdef_ref(struct upb_msgdef *m) {
+  upb_def_ref(&m->def);
+}
+INLINE void upb_msgdef_unref(struct upb_msgdef *m) {
+  if(upb_atomic_unref(&m->def.refcount)) _upb_msgdef_free(m);
+}
+
+// Clients use this function on a previously initialized upb_msgdef to resolve
+// the "ref" field in the upb_fielddef.  Since messages can refer to each
+// other in mutually-recursive ways, this step must be separated from
+// initialization.
+void upb_msgdef_resolve(struct upb_msgdef *m, struct upb_fielddef *f,
+                        struct upb_def *def);
+
+// Downcasts.  They are checked only if asserts are enabled.
+INLINE struct upb_msgdef *upb_downcast_msgdef(struct upb_def *def) {
+  assert(def->type == UPB_DEF_MESSAGE);
+  return (struct upb_msgdef*)def;
+}
+
 /* Enum defintion. ************************************************************/
 
 struct upb_enumdef {
-  upb_atomic_refcount_t refcount;
+  struct upb_def def;
   struct upb_strtable nametoint;
   struct upb_inttable inttoname;
 };
@@ -185,72 +229,67 @@ struct upb_enumdef_iton_entry {
   struct upb_string *string;
 };
 
-/* Internal functions. ********************************************************/
-
-/* Initializes/frees a upb_msgdef.  Usually this will be called by upb_context,
- * and clients will not have to construct one directly.
- *
- * Caller retains ownership of d and fqname.  Note that init does not resolve
- * upb_fielddef.ref the caller should do that post-initialization by
- * calling upb_msg_ref() below.
- *
- * fqname indicates the fully-qualified name of this message.
- *
- * sort indicates whether or not it is safe to reorder the fields from the order
- * they appear in d.  This should be false if code has been compiled against a
- * header for this type that expects the given order. */
-void upb_msgdef_init(struct upb_msgdef *m,
-                     struct google_protobuf_DescriptorProto *d,
-                     struct upb_string *fqname, bool sort,
-                     struct upb_status *status);
-void _upb_msgdef_free(struct upb_msgdef *m);
-INLINE void upb_msgdef_ref(struct upb_msgdef *m) {
-  upb_atomic_ref(&m->refcount);
+// Internal-only functions for creating/destroying an enumdef.  Caller retains
+// ownership of ed.  The enumdef is initialized with one ref.
+struct upb_enumdef *upb_enumdef_new(
+    struct google_protobuf_EnumDescriptorProto *ed, struct upb_string *fqname);
+void _upb_enumdef_free(struct upb_enumdef *e);
+INLINE void upb_enumdef_ref(struct upb_enumdef *e) { upb_def_ref(&e->def); }
+INLINE void upb_enumdef_unref(struct upb_enumdef *e) {
+  if(upb_atomic_unref(&e->def.refcount)) _upb_enumdef_free(e);
 }
-INLINE void upb_msgdef_unref(struct upb_msgdef *m) {
-  if(upb_atomic_unref(&m->refcount)) _upb_msgdef_free(m);
+INLINE struct upb_enumdef *upb_downcast_enumdef(struct upb_def *def) {
+  assert(def->type == UPB_DEF_ENUM);
+  return (struct upb_enumdef*)def;
 }
 
-/* Clients use this function on a previously initialized upb_msgdef to resolve
- * the "ref" field in the upb_fielddef.  Since messages can refer to each
- * other in mutually-recursive ways, this step must be separated from
- * initialization. */
-void upb_msgdef_setref(struct upb_msgdef *m, struct upb_fielddef *f,
-                       union upb_symbol_ref ref);
-
-/* Initializes and frees an enum, respectively.  Caller retains ownership of
- * ed.  The enumdef is initialized with one ref. */
-void upb_enumdef_init(struct upb_enumdef *e,
-                      struct google_protobuf_EnumDescriptorProto *ed);
-void _upb_enumdef_free(struct upb_enumdef *e);
-INLINE void upb_enumdef_ref(struct upb_enumdef *e) {
-  upb_atomic_ref(&e->refcount);
+/* Unresolved definition. *****************************************************/
+
+// This is a placeholder definition that contains only the name of the type
+// that should eventually be referenced.  Once symbols are resolved, this
+// definition is replaced with a real definition.
+struct upb_unresolveddef {
+  struct upb_def def;
+  struct upb_string *name;  // Not fully-qualified.
+};
+
+INLINE struct upb_unresolveddef *upb_unresolveddef_new(struct upb_string *name) {
+  struct upb_unresolveddef *d = (struct upb_unresolveddef*)malloc(sizeof(*d));
+  upb_def_init(&d->def, UPB_DEF_UNRESOLVED, name);
+  d->name = name;
+  upb_string_ref(name);
+  return d;
 }
-INLINE void upb_enumdef_unref(struct upb_enumdef *e) {
-  if(upb_atomic_unref(&e->refcount)) _upb_enumdef_free(e);
+INLINE void _upb_unresolveddef_free(struct upb_unresolveddef *def) {
+  upb_def_uninit(&def->def);
+  upb_string_unref(def->name);
+}
+INLINE struct upb_unresolveddef *upb_downcast_unresolveddef(struct upb_def *def) {
+  assert(def->type == UPB_DEF_UNRESOLVED);
+  return (struct upb_unresolveddef*)def;
 }
 
-INLINE void upb_def_ref(struct upb_def *def) { upb_atomic_ref(&def->refcount); }
 INLINE void upb_def_unref(struct upb_def *def) {
   if(upb_atomic_unref(&def->refcount)) {
-  switch(def->type) {
-    case UPB_DEF_MESSAGE:
-      _upb_msgdef_free((struct upb_msgdef*)def);
-      break;
-    case UPB_DEF_ENUM:
-      _upb_emumdef_free((struct upb_enumdef*)def);
-      break;
-    case UPB_DEF_SERVICE:
-      assert(false);  /* Unimplemented. */
-      break;
-    case UPB_DEF_EXTENSION,
-      _upb_extensiondef_free((struct upb_extensiondef*)def);
-      break;
-    case UPB_DEF_UNRESOLVED
-      upb_string_unref((struct upb_string*)def);
-      break;
-    default:
-      assert(false);
+    switch(def->type) {
+      case UPB_DEF_MESSAGE:
+        _upb_msgdef_free((struct upb_msgdef*)def);
+        break;
+      case UPB_DEF_ENUM:
+        _upb_enumdef_free((struct upb_enumdef*)def);
+        break;
+      case UPB_DEF_SERVICE:
+        assert(false);  /* Unimplemented. */
+        break;
+      case UPB_DEF_EXTENSION:
+        assert(false);  /* Unimplemented. */
+        break;
+      case UPB_DEF_UNRESOLVED:
+        _upb_unresolveddef_free((struct upb_unresolveddef*)def);
+        break;
+      default:
+        assert(false);
+    }
   }
 }
 
diff --git a/src/upb_mm.c b/src/upb_mm.c
index cad1158..60809a5 100644
--- a/src/upb_mm.c
+++ b/src/upb_mm.c
@@ -47,7 +47,7 @@ static union upb_mmptr upb_mm_newptr(upb_mm_ptrtype type,
 {
   union upb_mmptr p = {NULL};
   switch(type) {
-    case UPB_MM_MSG_REF: p.msg = upb_msg_new(f->ref.msg);
+    case UPB_MM_MSG_REF: p.msg = upb_msg_new(upb_downcast_msgdef(f->def));
     case UPB_MM_STR_REF: p.str = upb_string_new();
     case UPB_MM_ARR_REF: p.arr = upb_array_new(f);
     default: assert(false); break;
diff --git a/src/upb_msg.c b/src/upb_msg.c
index 5f96980..0106d02 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -100,7 +100,7 @@ static void start_cb(void *udata, struct upb_fielddef *f)
     if(!*p.msg || !upb_mmhead_only(&((*p.msg)->mmhead))) {
       if(*p.msg)
         upb_msg_unref(*p.msg);
-      *p.msg = upb_msg_new(f->ref.msg);
+      *p.msg = upb_msg_new(upb_downcast_msgdef(f->def));
     }
     upb_msg_clear(*p.msg);
     upb_msg_set(oldmsg, f);
diff --git a/src/upb_parse.c b/src/upb_parse.c
index 3abaedf..eed8ec8 100644
--- a/src/upb_parse.c
+++ b/src/upb_parse.c
@@ -387,7 +387,7 @@ static uint8_t *push(struct upb_cbparser *p, uint8_t *start,
   }
   struct upb_cbparser_frame *frame = p->top;
   frame->end_offset = p->completed_offset + submsg_len;
-  frame->msgdef = f->ref.msg;
+  frame->msgdef = upb_downcast_msgdef(f->def);
 
   if(p->start_cb) p->start_cb(p->udata, f);
   return get_msgend(p, start);
diff --git a/src/upb_string.h b/src/upb_string.h
index 505ac5b..c1caddc 100644
--- a/src/upb_string.h
+++ b/src/upb_string.h
@@ -58,6 +58,11 @@ INLINE void upb_string_unref(struct upb_string *str)
   if(upb_mmhead_unref(&str->mmhead)) upb_string_destroy(str);
 }
 
+INLINE void upb_string_ref(struct upb_string *str)
+{
+  upb_mmhead_ref(&str->mmhead);
+}
+
 /* Resizes the string to size, reallocating if necessary.  Does not preserve
  * existing data. */
 INLINE void upb_string_resize(struct upb_string *str, uint32_t size)
diff --git a/src/upb_table.c b/src/upb_table.c
index 036d175..2c4c824 100644
--- a/src/upb_table.c
+++ b/src/upb_table.c
@@ -61,7 +61,7 @@ void *upb_strtable_lookup(struct upb_strtable *t, struct upb_string *key)
   struct upb_strtable_entry *e;
   do {
     e = strent(t, bucket);
-    if(upb_streql(&e->key, key)) return e;
+    if(e->key && upb_streql(e->key, key)) return e;
   } while((bucket = e->next) != UPB_END_OF_CHAIN);
   return NULL;
 }
@@ -141,7 +141,7 @@ static uint32_t empty_strbucket(struct upb_strtable *table)
   /* TODO: does it matter that this is biased towards the front of the table? */
   for(uint32_t i = 1; i <= upb_strtable_size(table); i++) {
     struct upb_strtable_entry *e = strent(table, i);
-    if(e->key.byte_len == 0) return i;
+    if(e->key == NULL) return i;
   }
   assert(false);
   return 0;
@@ -149,12 +149,12 @@ static uint32_t empty_strbucket(struct upb_strtable *table)
 
 static void strinsert(struct upb_strtable *t, struct upb_strtable_entry *e)
 {
-  assert(upb_strtable_lookup(t, &e->key) == NULL);
+  assert(upb_strtable_lookup(t, e->key) == NULL);
   t->t.count++;
-  uint32_t bucket = strtable_bucket(t, &e->key);
+  uint32_t bucket = strtable_bucket(t, e->key);
   struct upb_strtable_entry *table_e = strent(t, bucket);
-  if(table_e->key.byte_len != 0) {  /* Collision. */
-    if(bucket == strtable_bucket(t, &table_e->key)) {
+  if(table_e->key != NULL) {  /* Collision. */
+    if(bucket == strtable_bucket(t, table_e->key)) {
       /* Existing element is in its main posisiton.  Find an empty slot to
        * place our new element and append it to this key's chain. */
       uint32_t empty_bucket = empty_strbucket(t);
@@ -166,11 +166,11 @@ static void strinsert(struct upb_strtable *t, struct upb_strtable_entry *e)
       /* Existing element is not in its main position.  Move it to an empty
        * slot and put our element in its main position. */
       uint32_t empty_bucket = empty_strbucket(t);
-      uint32_t evictee_bucket = strtable_bucket(t, &table_e->key);
+      uint32_t evictee_bucket = strtable_bucket(t, table_e->key);
       memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
       struct upb_strtable_entry *evictee_e = strent(t, evictee_bucket);
       while(1) {
-        assert(evictee_e->key.byte_len != 0);
+        assert(evictee_e->key != NULL);
         assert(evictee_e->next != UPB_END_OF_CHAIN);
         if(evictee_e->next == bucket) {
           evictee_e->next = empty_bucket;
@@ -183,7 +183,7 @@ static void strinsert(struct upb_strtable *t, struct upb_strtable_entry *e)
   }
   memcpy(table_e, e, t->t.entry_size);
   table_e->next = UPB_END_OF_CHAIN;
-  assert(upb_strtable_lookup(t, &e->key) == table_e);
+  assert(upb_strtable_lookup(t, e->key) == table_e);
 }
 
 void upb_strtable_insert(struct upb_strtable *t, struct upb_strtable_entry *e)
@@ -223,7 +223,7 @@ void *upb_strtable_next(struct upb_strtable *t, struct upb_strtable_entry *cur)
   do {
     cur = (void*)((char*)cur + t->t.entry_size);
     if(cur == end) return NULL;
-  } while(cur->key.byte_len == 0);
+  } while(cur->key == NULL);
   return cur;
 }
 
diff --git a/src/upb_table.h b/src/upb_table.h
index 3855e3e..2202684 100644
--- a/src/upb_table.h
+++ b/src/upb_table.h
@@ -34,13 +34,13 @@ struct upb_inttable_entry {
   uint32_t next;  /* Internal chaining. */
 };
 
-/* TODO: consider storing the hash in the entry.  This would avoid the need to
- * rehash on table resizes, but more importantly could possibly improve lookup
- * performance by letting us compare hashes before comparing lengths or the
- * strings themselves. */
+// TODO: consider storing the hash in the entry.  This would avoid the need to
+// rehash on table resizes, but more importantly could possibly improve lookup
+// performance by letting us compare hashes before comparing lengths or the
+// strings themselves.
 struct upb_strtable_entry {
-  struct upb_string key;
-  uint32_t next;  /* Internal chaining. */
+  struct upb_string *key;  // We own one ref.
+  uint32_t next;           // Internal chaining.
 };
 
 struct upb_table {
-- 
cgit v1.2.3