In the midst of a major refactoring.

author: Joshua Haberman <joshua@reverberate.org> 2009-12-21 10:48:01 -0800
committer: Joshua Haberman <joshua@reverberate.org> 2009-12-21 10:48:01 -0800
commit: c2419764856e5666bfa9e3c1b87de29ec93babe1 (patch)
tree: 7771bf0bbcf9b1103a55b963831385b07705b739
parent: c6cba2af37638cc47ff69aed866669567ef365d9 (diff)
14 files changed, 31 insertions, 1247 deletions
diff --git a/Makefile b/Makefile
index cc30814..f8e75aa 100644
--- a/Makefile
+++ b/Makefile
@@ -46,8 +46,7 @@ clean:
 	cd lang_ext/python && python setup.py clean --all
 
 # The core library (src/libupb.a)
-SRC=src/upb.c src/upb_parse.c src/upb_table.c src/upb_msg.c src/upb_mm.c \
-    src/upb_def.c src/upb_string.c src/upb_text.c \
+SRC=src/upb.c src/upb_parse.c src/upb_table.c src/upb_def.c src/upb_data.c \
     descriptor/descriptor.c
     #src/upb_serialize.c descriptor/descriptor.c
 STATICOBJ=$(patsubst %.c,%.o,$(SRC))
diff --git a/descriptor/descriptor.h b/descriptor/descriptor.h
index f968015..ba86c81 100644
--- a/descriptor/descriptor.h
+++ b/descriptor/descriptor.h
@@ -3,7 +3,7 @@
 #ifndef DESCRIPTOR_DESCRIPTOR_H
 #define DESCRIPTOR_DESCRIPTOR_H
 
-#include <upb_struct.h>
+#include <upb_data.h>
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/src/upb_array.h b/src/upb_array.h
deleted file mode 100644
index 70923b6..0000000
--- a/src/upb_array.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
- *
- * Defines an in-memory, polymorphic array type.  The array does not know its
- * own type -- its owner must know that information out-of-band.
- *
- * upb_arrays are memory-managed in the sense that they contain a pointer
- * ("mem") to memory that is "owned" by the array (which may be NULL if the
- * array owns no memory).  There is a separate pointer ("elements") that points
- * to the the array's currently "effective" memory, which is either equal to
- * mem (if the array's current value is memory we own) or not (if the array is
- * referencing other memory).
- *
- * If the array is referencing other memory, it is up to the array's owner to
- * ensure that the other memory remains valid for as long as the array is
- * referencing it.
- *
- */
-
-#ifndef UPB_ARRAY_H_
-#define UPB_ARRAY_H_
-
-#include <stdlib.h>
-#include "upb_def.h"  /* Because we use upb_fielddef */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct upb_string;
-
-/* Returns a pointer to an array element.  Does not perform a bounds check! */
-INLINE union upb_value_ptr upb_array_getelementptr(struct upb_array *arr,
-                                                   upb_arraylen_t n)
-{
-  union upb_value_ptr ptr;
-  ptr._void = UPB_INDEX(arr->elements._void, n,
-                        upb_type_info[arr->fielddef->type].size);
-  return ptr;
-}
-
-/* Allocation/Deallocation/Resizing. ******************************************/
-
-INLINE struct upb_array *upb_array_new(struct upb_fielddef *f)
-{
-  struct upb_array *arr = (struct upb_array*)malloc(sizeof(*arr));
-  upb_mmhead_init(&arr->mmhead);
-  arr->elements._void = NULL;
-  arr->len = 0;
-  arr->size = 0;
-  arr->fielddef = f;
-  return arr;
-}
-
-INLINE uint32_t upb_round_up_to_pow2(uint32_t v)
-{
-  /* cf. http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
-  v--;
-  v |= v >> 1;
-  v |= v >> 2;
-  v |= v >> 4;
-  v |= v >> 8;
-  v |= v >> 16;
-  v++;
-  return v;
-}
-
-INLINE union upb_value_ptr upb_array_append(struct upb_array *arr)
-{
-  size_t size = upb_type_info[arr->fielddef->type].size;
-  if(arr->len == arr->size) {
-    arr->size = UPB_MAX(4, upb_round_up_to_pow2(arr->len + 1));
-    arr->elements._void = realloc(arr->elements._void, arr->size * size);
-    memset((char*)arr->elements._void + (arr->len * size), 0,
-           (arr->size - arr->len) * size);
-  }
-  return upb_array_getelementptr(arr, arr->len++);
-}
-
-INLINE void upb_array_truncate(struct upb_array *arr)
-{
-  arr->len = 0;
-}
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif
diff --git a/src/upb_def.c b/src/upb_def.c
index 6bb1d0c..abf2b56 100644
--- a/src/upb_def.c
+++ b/src/upb_def.c
@@ -4,10 +4,10 @@
  * Copyright (c) 2008-2009 Joshua Haberman.  See LICENSE for details.
  */
 
+#include <stdlib.h>
 #include "descriptor.h"
 #include "upb_def.h"
-#include "upb_mm.h"
-#include "upb_msg.h"
+#include "upb_data.h"
 
 /* Rounds p up to the next multiple of t. */
 #define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t))))
@@ -153,8 +153,7 @@ static void upb_def_init(struct upb_def *def, enum upb_def_type type,
   def->type = type;
   def->is_cyclic = 0;  // We detect this later, after resolving refs.
   def->search_depth = 0;
-  def->fqname = fqname;
-  upb_string_ref(fqname);
+  def->fqname = upb_string_getref(fqname, UPB_REF_FROZEN);
   upb_atomic_refcount_init(&def->refcount, 1);
 }
 
@@ -171,7 +170,7 @@ struct upb_unresolveddef {
 
 static struct upb_unresolveddef *upb_unresolveddef_new(struct upb_string *str) {
   struct upb_unresolveddef *def = malloc(sizeof(*def));
-  struct upb_string *name = upb_strdup(str);
+  upb_string *name = upb_string_getref(str, UPB_REF_THREADUNSAFE_READONLY);
   upb_def_init(&def->base, UPB_DEF_UNRESOLVED, name);
   def->name = name;
   return def;
@@ -191,7 +190,7 @@ static void fielddef_init(struct upb_fielddef *f,
   f->type = fd->type;
   f->label = fd->label;
   f->number = fd->number;
-  f->name = upb_strdup(fd->name);
+  f->name = upb_string_getref(fd->name, UPB_REF_FROZEN);
   f->def = NULL;
   f->owned = false;
   assert(fd->set_flags.has.type_name == upb_hasdef(f));
@@ -219,7 +218,7 @@ static void fielddef_uninit(struct upb_fielddef *f)
 static void fielddef_copy(struct upb_fielddef *dst, struct upb_fielddef *src)
 {
   *dst = *src;
-  dst->name = upb_strdup(src->name);
+  dst->name = upb_string_getref(src->name, UPB_REF_FROZEN);
   if(upb_hasdef(src)) {
     upb_def_ref(dst->def);
     dst->owned = true;
@@ -311,7 +310,7 @@ static struct upb_msgdef *msgdef_new(struct upb_fielddef **fields,
 
     // Insert into the tables.
     struct upb_itof_ent itof_ent = {{f->number, 0}, f};
-    struct upb_ntof_ent ntof_ent = {{upb_strdup(f->name), 0}, f};
+    struct upb_ntof_ent ntof_ent = {{f->name, 0}, f};
     upb_inttable_insert(&m->itof, &itof_ent.e);
     upb_strtable_insert(&m->ntof, &ntof_ent.e);
   }
@@ -325,14 +324,6 @@ static void msgdef_free(struct upb_msgdef *m)
   for (upb_field_count_t i = 0; i < m->num_fields; i++)
     fielddef_uninit(&m->fields[i]);
   free(m->fields);
-
-  // Free refs from the strtable.
-  // TODO: once memory management for data is more developed, let the table
-  // handle freeing the refs itself.
-  struct upb_strtable_entry *e = upb_strtable_begin(&m->ntof);
-  for(; e; e = upb_strtable_next(&m->ntof, e)) {
-    upb_string_unref(e->key);
-  }
   upb_strtable_free(&m->ntof);
   upb_inttable_free(&m->itof);
   upb_def_uninit(&m->base);
@@ -372,7 +363,7 @@ static struct upb_enumdef *enumdef_new(google_protobuf_EnumDescriptorProto *ed,
 
   for(int i = 0; i < num_values; i++) {
     google_protobuf_EnumValueDescriptorProto *value = ed->value->elements[i];
-    struct ntoi_ent ntoi_ent = {{upb_strdup(value->name), 0}, value->number};
+    struct ntoi_ent ntoi_ent = {{value->name, 0}, value->number};
     struct iton_ent iton_ent = {{value->number, 0}, value->name};
     upb_strtable_insert(&e->ntoi, &ntoi_ent.e);
     upb_inttable_insert(&e->iton, &iton_ent.e);
@@ -381,14 +372,6 @@ static struct upb_enumdef *enumdef_new(google_protobuf_EnumDescriptorProto *ed,
 }
 
 static void enumdef_free(struct upb_enumdef *e) {
-  // Free refs from the strtable.
-  // TODO: once memory management for data is more developed, let the table
-  // handle freeing the refs itself.
-  struct upb_strtable_entry *ent = upb_strtable_begin(&e->ntoi);
-  for(; ent; ent = upb_strtable_next(&e->ntoi, ent)) {
-    upb_string_unref(ent->key);
-  }
-
   upb_strtable_free(&e->ntoi);
   upb_inttable_free(&e->iton);
   upb_def_uninit(&e->base);
@@ -473,19 +456,19 @@ static struct symtab_ent *resolve(struct upb_strtable *t,
 /* Joins strings together, for example:
  *   join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
  *   join("", "Baz") -> "Baz"
- * Caller owns the returned string and must free it. */
+ * Caller owns a ref on the returned string. */
 static struct upb_string *join(struct upb_string *base, struct upb_string *name) {
   size_t len = base->byte_len + name->byte_len;
   if(base->byte_len > 0) len++;  /* For the separator. */
   struct upb_string *joined = upb_string_new();
-  upb_string_resize(joined, len);
+  char *joined_ptr = upb_string_getrwbuf(joined, len);
   if(base->byte_len > 0) {
     /* nested_base = base + '.' +  d->name */
-    memcpy(joined->ptr, base->ptr, base->byte_len);
-    joined->ptr[base->byte_len] = UPB_SYMBOL_SEPARATOR;
-    memcpy(&joined->ptr[base->byte_len+1], name->ptr, name->byte_len);
+    memcpy(joined_ptr, base->ptr, base->byte_len);
+    joined_ptr[base->byte_len] = UPB_SYMBOL_SEPARATOR;
+    memcpy(&joined_ptr[base->byte_len+1], name->ptr, name->byte_len);
   } else {
-    memcpy(joined->ptr, name->ptr, name->byte_len);
+    memcpy(joined_ptr, name->ptr, name->byte_len);
   }
   return joined;
 }
@@ -718,10 +701,8 @@ struct upb_symtab *upb_symtab_new()
 static void free_symtab(struct upb_strtable *t)
 {
   struct symtab_ent *e = upb_strtable_begin(t);
-  for(; e; e = upb_strtable_next(t, &e->e)) {
+  for(; e; e = upb_strtable_next(t, &e->e))
     upb_def_unref(e->def);
-    upb_string_unref(e->e.key);
-  }
   upb_strtable_free(t);
 }
 
@@ -838,10 +819,10 @@ void upb_symtab_addfds(struct upb_symtab *s,
 void upb_symtab_add_desc(struct upb_symtab *s, struct upb_string *desc,
                          struct upb_status *status)
 {
-  struct upb_msg *fds = upb_msg_new(s->fds_msgdef);
-  upb_msg_parsestr(fds, desc->ptr, desc->byte_len, status);
+  upb_msg *fds = upb_msg_new(s->fds_msgdef);
+  upb_msg_parsestr(fds, s->fds_msgdef, desc, status);
   if(!upb_ok(status)) return;
   upb_symtab_addfds(s, (google_protobuf_FileDescriptorSet*)fds, status);
-  upb_msg_unref(fds);
+  upb_msg_unref(fds, s->fds_msgdef);
   return;
 }
diff --git a/src/upb_def.h b/src/upb_def.h
index 0d497a4..87c2be8 100644
--- a/src/upb_def.h
+++ b/src/upb_def.h
@@ -156,21 +156,6 @@ INLINE bool upb_elem_ismm(struct upb_fielddef *f) {
   return upb_isstring(f) || upb_issubmsg(f);
 }
 
-/* Defined iff upb_field_ismm(f). */
-INLINE upb_mm_ptrtype upb_field_ptrtype(struct upb_fielddef *f) {
-  if(upb_isarray(f)) return UPB_MM_ARR_REF;
-  else if(upb_isstring(f)) return UPB_MM_STR_REF;
-  else if(upb_issubmsg(f)) return UPB_MM_MSG_REF;
-  else return -1;
-}
-
-/* Defined iff upb_elem_ismm(f). */
-INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_fielddef *f) {
-  if(upb_isstring(f)) return UPB_MM_STR_REF;
-  else if(upb_issubmsg(f)) return UPB_MM_MSG_REF;
-  else return -1;
-}
-
 // Internal-only interface for the upb compiler.
 // Sorts the given fielddefs in-place, according to what we think is an optimal
 // ordering of fields.  This can change from upb release to upb release.
diff --git a/src/upb_mm.c b/src/upb_mm.c
index 5f3cab0..f831671 100644
--- a/src/upb_mm.c
+++ b/src/upb_mm.c
@@ -17,30 +17,6 @@ static void upb_mm_destroy(union upb_value_ptr p, upb_mm_ptrtype type)
   }
 }
 
-void upb_msg_destroy(struct upb_msg *msg) {
-  for(upb_field_count_t i = 0; i < msg->def->num_fields; i++) {
-    struct upb_fielddef *f = &msg->def->fields[i];
-    if(!upb_msg_isset(msg, f) || !upb_field_ismm(f)) continue;
-    upb_mm_destroy(upb_msg_getptr(msg, f), upb_field_ptrtype(f));
-  }
-  upb_def_unref(UPB_UPCAST(msg->def));
-  free(msg);
-}
-
-void upb_array_destroy(struct upb_array *arr)
-{
-  if(upb_elem_ismm(arr->fielddef)) {
-    upb_arraylen_t i;
-    /* Unref elements. */
-    for(i = 0; i < arr->size; i++) {
-      union upb_value_ptr p = upb_array_getelementptr(arr, i);
-      upb_mm_destroy(p, upb_elem_ptrtype(arr->fielddef));
-    }
-  }
-  if(arr->size != 0) free(arr->elements._void);
-  free(arr);
-}
-
 static union upb_mmptr upb_mm_newptr(upb_mm_ptrtype type,
                                      struct upb_fielddef *f)
 {
diff --git a/src/upb_mm.h b/src/upb_mm.h
deleted file mode 100644
index 2e3e082..0000000
--- a/src/upb_mm.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
- *
- * A parsed protobuf is represented in memory as a tree.  The three kinds of
- * nodes in this tree are messages, arrays, and strings.  This file defines
- * a memory-management scheme for making sure that these nodes are colected
- * at the right times.
- *
- * The basic strategy is reference-counting, but with a twist.  Since any
- * dynamic language that wishes to reference these nodes will need its own,
- * language-specific structure, we provide two different kinds of references:
- *
- * - counted references.  these are references that are tracked with only a
- *   reference count.  They are used for two separate purposes:
- *   1. for references within the tree, from one node to another.
- *   2. for external references into the tree, where the referer does not need
- *      a separate message structure.
- * - listed references.  these are references that have their own separate
- *   data record.  these separate records are kept in a linked list.
- */
-
-#ifndef UPB_MM_H_
-#define UPB_MM_H_
-
-#include "upb.h"
-#include "upb_string.h"
-#include "upb_array.h"
-#include "upb_msg.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Structure definitions. *****************************************************/
-
-typedef int16_t upb_mm_id;
-
-struct upb_msg;
-struct upb_array;
-struct upb_string;
-struct upb_fielddef;
-
-struct upb_mm_ref;
-/* Info about a mm. */
-struct upb_mm {
-  /* fromref is set iff this call is from getfieldref or getelemref. */
-  struct upb_mm_ref *(*newref_cb)(struct upb_mm_ref *fromref,
-                                  union upb_mmptr p, upb_mm_ptrtype type);
-};
-
-struct upb_mm_ref {
-  union upb_mmptr p;
-  /* This is slightly wasteful, because the mm-specific ref will probably also
-   * contain the information about what kind of ref this is, in a different
-   * form. */
-  upb_mm_ptrtype type;
-  struct upb_mm *mm;    /* TODO: There are ways to shrink this. */
-  struct upb_mm_ref *next;  /* Linked list for refs to the same value. */
-};
-
-/* Functions for working with listed references.  *****************************/
-
-/* Create a new top-level message and create a single ref for it. */
-struct upb_mm_ref *upb_mm_newmsg_ref(struct upb_msgdef *def, struct upb_mm *mm);
-
-/* Given a pointer to an existing msg, array, or string, find a ref for this
- * mm, creating one if necessary.  'created' indicates whether the returned
- * reference was just created. */
-struct upb_mm_ref *upb_mm_getref(union upb_mmptr p, upb_mm_ptrtype type,
-                                 struct upb_mm *mm, bool *created);
-
-/* f must be ismm == true.  The msg field may or may not be set (will be
- * created if it doesn't exist).  If a ref already exists for the given field,
- * returns it, otherwise calls the given callback to create one.  'created'
- * indicates whether a new reference was created. */
-struct upb_mm_ref *upb_mm_getfieldref(struct upb_mm_ref *msgref,
-                                      struct upb_fielddef *f,
-                                      bool *refcreated);
-/* Array len must be < i. */
-struct upb_mm_ref *upb_mm_getelemref(struct upb_mm_ref *arrref, upb_arraylen_t i,
-                                     bool *refcreated);
-
-/* Remove this ref from the list for this msg.
- * If that was the last reference, deletes the msg itself. */
-void upb_mm_release(struct upb_mm_ref *ref);
-
-void upb_mm_msgset(struct upb_mm_ref *msg, struct upb_mm_ref *to,
-                   struct upb_fielddef *f);
-void upb_mm_msgclear(struct upb_mm_ref *from, struct upb_fielddef *f);
-void upb_mm_msgclear_all(struct upb_mm_ref *from);
-
-void upb_mm_arrset(struct upb_mm_ref *from, struct upb_mm_ref *to, uint32_t i);
-
-/* Defined iff upb_field_ismm(f). */
-INLINE upb_mm_ptrtype upb_field_ptrtype(struct upb_fielddef *f);
-/* Defined iff upb_elem_ismm(f). */
-INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_fielddef *f);
-
-INLINE void upb_mm_unref(union upb_mmptr p, upb_mm_ptrtype type);
-
-/* These methods are all a bit silly, since all branches of the case compile
- * to the same thing (which the compiler will recognize), but we do it this way
- * for full union correctness. */
-INLINE union upb_mmptr upb_mmptr_read(union upb_value_ptr p, upb_mm_ptrtype t)
-{
-  union upb_mmptr val;
-  switch(t) {
-    case UPB_MM_MSG_REF: val.msg = *p.msg; break;
-    case UPB_MM_STR_REF: val.str = *p.str; break;
-    case UPB_MM_ARR_REF: val.arr = *p.arr; break;
-    default: assert(false); val.msg = *p.msg; break;  /* Shouldn't happen. */
-  }
-  return val;
-}
-
-INLINE void upb_mmptr_write(union upb_value_ptr p, union upb_mmptr val,
-                            upb_mm_ptrtype t)
-{
-  switch(t) {
-    case UPB_MM_MSG_REF: *p.msg = val.msg; break;
-    case UPB_MM_STR_REF: *p.str = val.str; break;
-    case UPB_MM_ARR_REF: *p.arr = val.arr; break;
-    default: assert(false); val.msg = *p.msg; break;  /* Shouldn't happen. */
-  }
-}
-
-void upb_array_destroy(struct upb_array *arr);
-void upb_msg_destroy(struct upb_msg *msg);
-
-INLINE void upb_msg_unref(struct upb_msg *msg) {
-  if(upb_mmhead_unref(&msg->mmhead)) upb_msg_destroy(msg);
-}
-
-INLINE void upb_array_unref(struct upb_array *arr) {
-  if(upb_mmhead_unref(&arr->mmhead)) upb_array_destroy(arr);
-}
-
-INLINE void upb_mm_unref(union upb_mmptr p, upb_mm_ptrtype type)
-{
-  switch(type) {
-    case UPB_MM_MSG_REF: upb_msg_unref(p.msg); break;
-    case UPB_MM_STR_REF: upb_string_unref(p.str); break;
-    case UPB_MM_ARR_REF: upb_array_unref(p.arr);
-  }
-}
-
-static struct upb_mmhead *upb_mmhead_addr(union upb_mmptr p, upb_mm_ptrtype t)
-{
-  switch(t) {
-    case UPB_MM_MSG_REF: return &((*p.msg).mmhead);
-    case UPB_MM_STR_REF: return &((*p.str).mmhead);
-    case UPB_MM_ARR_REF: return &((*p.arr).mmhead);
-    default: assert(false); return &((*p.msg).mmhead);  /* Shouldn't happen. */
-  }
-}
-
-INLINE void upb_mm_ref(union upb_mmptr p, upb_mm_ptrtype type)
-{
-  upb_mmhead_ref(upb_mmhead_addr(p, type));
-}
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif  /* UPB_MM_MSG_H_ */
diff --git a/src/upb_msg.c b/src/upb_msg.c
deleted file mode 100644
index dd6b72e..0000000
--- a/src/upb_msg.c
+++ /dev/null
@@ -1,422 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
- */
-
-#include <inttypes.h>
-#include <stdlib.h>
-#include "upb_msg.h"
-#include "descriptor.h"
-#include "upb_mm.h"
-#include "upb_parse.h"
-#include "upb_serialize.h"
-#include "upb_text.h"
-
-/* Parsing.  ******************************************************************/
-
-struct upb_msgparser_frame {
-  struct upb_msg *msg;
-};
-
-struct upb_msgparser {
-  struct upb_cbparser *s;
-  bool merge;
-  bool byref;
-  struct upb_msgparser_frame stack[UPB_MAX_NESTING], *top;
-};
-
-/* Helper function that returns a pointer to where the next value for field "f"
- * should be stored, taking into account whether f is an array that may need to
- * be allocated or resized. */
-static union upb_value_ptr get_value_ptr(struct upb_msg *msg,
-                                         struct upb_fielddef *f)
-{
-  union upb_value_ptr p = upb_msg_getptr(msg, f);
-  if(upb_isarray(f)) {
-    if(!upb_msg_isset(msg, f)) {
-      if(!*p.arr || !upb_mmhead_only(&((*p.arr)->mmhead))) {
-        if(*p.arr)
-          upb_array_unref(*p.arr);
-        *p.arr = upb_array_new(f);
-      }
-      upb_array_truncate(*p.arr);
-      upb_msg_set(msg, f);
-    }
-    p = upb_array_append(*p.arr);
-  }
-  return p;
-}
-
-/* Callbacks for the stream parser. */
-
-static bool value_cb(void *udata, struct upb_msgdef *msgdef,
-                     struct upb_fielddef *f, union upb_value val)
-{
-  (void)msgdef;
-  struct upb_msgparser *mp = udata;
-  struct upb_msg *msg = mp->top->msg;
-  union upb_value_ptr p = get_value_ptr(msg, f);
-  upb_msg_set(msg, f);
-  upb_value_write(p, val, f->type);
-  return true;
-}
-
-static bool str_cb(void *udata, struct upb_msgdef *msgdef,
-                   struct upb_fielddef *f, uint8_t *str, size_t avail_len,
-                   size_t total_len)
-{
-  (void)msgdef;
-  struct upb_msgparser *mp = udata;
-  struct upb_msg *msg = mp->top->msg;
-  union upb_value_ptr p = get_value_ptr(msg, f);
-  upb_msg_set(msg, f);
-  if(avail_len != total_len) abort();  /* TODO: support streaming. */
-  //bool byref = avail_len == total_len && mp->byref;
-  if(!*p.str || !upb_mmhead_only(&((*p.str)->mmhead))) {
-    if(*p.str)
-      upb_string_unref(*p.str);
-    *p.str = upb_string_new();
-  }
-  //if(byref) {
-  //  upb_strdrop(*p.str);
-  //  (*p.str)->ptr = (char*)str;
-  //  (*p.str)->byte_len = avail_len;
-  //} else {
-    upb_string_resize(*p.str, total_len);
-    memcpy((*p.str)->ptr, str, avail_len);
-    (*p.str)->byte_len = avail_len;
-  //}
-  return true;
-}
-
-static void start_cb(void *udata, struct upb_fielddef *f)
-{
-  struct upb_msgparser *mp = udata;
-  struct upb_msg *oldmsg = mp->top->msg;
-  union upb_value_ptr p = get_value_ptr(oldmsg, f);
-
-  if(upb_isarray(f) || !upb_msg_isset(oldmsg, f)) {
-    if(!*p.msg || !upb_mmhead_only(&((*p.msg)->mmhead))) {
-      if(*p.msg)
-        upb_msg_unref(*p.msg);
-      *p.msg = upb_msg_new(upb_downcast_msgdef(f->def));
-    }
-    upb_msg_clear(*p.msg);
-    upb_msg_set(oldmsg, f);
-  }
-
-  mp->top++;
-  mp->top->msg = *p.msg;
-}
-
-static void end_cb(void *udata)
-{
-  struct upb_msgparser *mp = udata;
-  mp->top--;
-}
-
-/* Externally-visible functions for the msg parser. */
-
-struct upb_msgparser *upb_msgparser_new(struct upb_msgdef *def)
-{
-  struct upb_msgparser *mp = malloc(sizeof(struct upb_msgparser));
-  mp->s = upb_cbparser_new(def, value_cb, str_cb, start_cb, end_cb);
-  return mp;
-}
-
-void upb_msgparser_reset(struct upb_msgparser *s, struct upb_msg *msg, bool byref)
-{
-  upb_cbparser_reset(s->s, s);
-  s->byref = byref;
-  s->top = s->stack;
-  s->top->msg = msg;
-}
-
-void upb_msgparser_free(struct upb_msgparser *s)
-{
-  upb_cbparser_free(s->s);
-  free(s);
-}
-
-void upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len,
-                      struct upb_status *status)
-{
-  struct upb_msgparser *mp = upb_msgparser_new(msg->def);
-  upb_msgparser_reset(mp, msg, false);
-  upb_msg_clear(msg);
-  upb_msgparser_parse(mp, buf, len, status);
-  upb_msgparser_free(mp);
-}
-
-size_t upb_msgparser_parse(struct upb_msgparser *s, void *data, size_t len,
-                           struct upb_status *status)
-{
-  return upb_cbparser_parse(s->s, data, len, status);
-}
-
-/* Serialization.  ************************************************************/
-
-/* We store the message sizes linearly in post-order (size of parent after sizes
- * of children) for a right-to-left traversal of the message tree.  Iterating
- * over this in reverse gives us a pre-order (size of parent before sizes of
- * children) left-to-right traversal, which is what we want for parsing. */
-struct upb_msgsizes {
-  int len;
-  int size;
-  size_t *sizes;
-};
-
-/* Declared below -- this and get_valuesize are mutually recursive. */
-static size_t get_msgsize(struct upb_msgsizes *sizes, struct upb_msg *m);
-
-/* Returns a size of a value as it will be serialized.  Does *not* include
- * the size of the tag -- that is already accounted for. */
-static size_t get_valuesize(struct upb_msgsizes *sizes, union upb_value_ptr p,
-                            struct upb_fielddef *f)
-{
-  switch(f->type) {
-    default: assert(false); return 0;  /* Internal corruption. */
-    case UPB_TYPE(MESSAGE): {
-      size_t submsg_size = get_msgsize(sizes, *p.msg);
-      return upb_get_INT32_size(submsg_size) + submsg_size;
-    }
-    case UPB_TYPE(GROUP): {
-      size_t endgrp_tag_size = upb_get_tag_size(f->number);
-      return endgrp_tag_size + get_msgsize(sizes, *p.msg);
-    }
-#define CASE(type, member) \
-    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type: \
-      return upb_get_ ## type ## _size(*p.member);
-    CASE(DOUBLE,   _double)
-    CASE(FLOAT,    _float)
-    CASE(INT32,    int32)
-    CASE(INT64,    int64)
-    CASE(UINT32,   uint32)
-    CASE(UINT64,   uint64)
-    CASE(SINT32,   int32)
-    CASE(SINT64,   int64)
-    CASE(FIXED32,  uint32)
-    CASE(FIXED64,  uint64)
-    CASE(SFIXED32, int32)
-    CASE(SFIXED64, int64)
-    CASE(BOOL,     _bool)
-    CASE(ENUM,     int32)
-#undef CASE
-  }
-}
-
-/* This is mostly just a pure recursive function to calculate the size of a
- * message.  However it also stores the results of each level of the recursion
- * in sizes, because we need all of this intermediate information later. */
-static size_t get_msgsize(struct upb_msgsizes *sizes, struct upb_msg *m)
-{
-  size_t size = 0;
-  /* We iterate over fields and arrays in reverse order. */
-  for(int32_t i = m->def->num_fields - 1; i >= 0; i--) {
-    struct upb_fielddef *f = &m->def->fields[i];
-    if(!upb_msg_isset(m, f)) continue;
-    union upb_value_ptr p = upb_msg_getptr(m, f);
-    if(upb_isarray(f)) {
-      for(int32_t j = (*p.arr)->len - 1; j >= 0; j--) {
-        union upb_value_ptr elem = upb_array_getelementptr(*p.arr, j);
-        /* TODO: for packed arrays tag size goes outside the loop. */
-        size += upb_get_tag_size(f->number);
-        size += get_valuesize(sizes, elem, f);
-      }
-    } else {
-      size += upb_get_tag_size(f->number);
-      size += get_valuesize(sizes, p, f);
-    }
-  }
-  /* Resize the 'sizes' array if necessary. */
-  assert(sizes->len <= sizes->size);
-  if(sizes->len == sizes->size) {
-    sizes->size *= 2;
-    sizes->sizes = realloc(sizes->sizes, sizes->size * sizeof(size_t));
-  }
-  /* Add our size (already added our children, so post-order). */
-  sizes->sizes[sizes->len++] = size;
-  return size;
-}
-
-void upb_msgsizes_read(struct upb_msgsizes *sizes, struct upb_msg *m)
-{
-  get_msgsize(sizes, m);
-}
-
-/* Initialize/free a upb_msg_sizes for the given message. */
-void upb_msgsizes_init(struct upb_msgsizes *sizes)
-{
-  sizes->len = 0;
-  sizes->size = 0;
-  sizes->sizes = NULL;
-}
-
-void upb_msgsizes_free(struct upb_msgsizes *sizes)
-{
-  free(sizes->sizes);
-}
-
-size_t upb_msgsizes_totalsize(struct upb_msgsizes *sizes)
-{
-  return sizes->sizes[sizes->len-1];
-}
-
-struct upb_msg_serialize_state {
-  struct {
-    int field_iter;
-    int elem_iter;
-    struct upb_msgdef *m;
-    void *msg;
-  } stack[UPB_MAX_NESTING], *top, *limit;
-};
-
-void upb_msg_serialize_alloc(struct upb_msg_serialize_state *s)
-{
-  (void)s;
-}
-
-void upb_msg_serialize_free(struct upb_msg_serialize_state *s)
-{
-  (void)s;
-}
-
-void upb_msg_serialize_init(struct upb_msg_serialize_state *s, struct upb_msg *m,
-                            struct upb_msgsizes *sizes)
-{
-  (void)s;
-  (void)m;
-  (void)sizes;
-}
-
-#if 0
-static uint8_t *serialize_tag(uint8_t *buf, uint8_t *end,
-                              struct upb_fielddef *f,
-                              struct upb_status *status)
-{
-  /* TODO: need to have the field number also. */
-  return upb_put_UINT32(buf, end, f->type, status);
-}
-
-/* Serializes the next set of bytes into buf (which has size len).  Returns
- * UPB_STATUS_OK if serialization is complete, or UPB_STATUS_NEED_MORE_DATA
- * if there is more data from the message left to be serialized.
- *
- * The number of bytes written to buf is returned in *read.  This will be
- * equal to len unless we finished serializing. */
-size_t upb_msg_serialize(struct upb_msg_serialize_state *s,
-                         void *_buf, size_t len, struct upb_status *status)
-{
-  uint8_t *buf = _buf;
-  uint8_t *end = buf + len;
-  uint8_t *const start = buf;
-  int i = s->top->field_iter;
-  //int j = s->top->elem_iter;
-  void *msg = s->top->msg;
-  struct upb_msgdef *m = s->top->m;
-
-  while(buf < end) {
-    struct upb_fielddef *f = &m->fields[i];
-    //union upb_value_ptr p = upb_msg_getptr(msg, f);
-    buf = serialize_tag(buf, end, f, status);
-    if(f->type == UPB_TYPE(MESSAGE)) {
-    } else if(f->type == UPB_TYPE(GROUP)) {
-    } else if(upb_isstring(f)) {
-    } else {
-      //upb_serialize_value(buf, end, f->type, p, status);
-    }
-  }
-  return buf - start;
-}
-#endif
-
-
-/* Comparison.  ***************************************************************/
-
-bool upb_value_eql(union upb_value_ptr p1, union upb_value_ptr p2,
-                   upb_field_type_t type)
-{
-#define CMP(type) return *p1.type == *p2.type;
-  switch(type) {
-    case UPB_TYPE(DOUBLE):
-      CMP(_double)
-    case UPB_TYPE(FLOAT):
-      CMP(_float)
-    case UPB_TYPE(INT64):
-    case UPB_TYPE(SFIXED64):
-    case UPB_TYPE(SINT64):
-      CMP(int64)
-    case UPB_TYPE(UINT64):
-    case UPB_TYPE(FIXED64):
-      CMP(uint64)
-    case UPB_TYPE(INT32):
-    case UPB_TYPE(SFIXED32):
-    case UPB_TYPE(SINT32):
-      CMP(int32)
-    case UPB_TYPE(UINT32):
-    case UPB_TYPE(FIXED32):
-    case UPB_TYPE(ENUM):
-      CMP(uint32);
-    case UPB_TYPE(BOOL):
-      CMP(_bool);
-    case UPB_TYPE(STRING):
-    case UPB_TYPE(BYTES):
-      return upb_streql(*p1.str, *p2.str);
-    default: return false;
-  }
-}
-
-bool upb_array_eql(struct upb_array *arr1, struct upb_array *arr2,
-                   struct upb_fielddef *f, bool recursive)
-{
-  if(arr1->len != arr2->len) return false;
-  if(upb_issubmsg(f)) {
-    if(!recursive) return true;
-    for(uint32_t i = 0; i < arr1->len; i++)
-      if(!upb_msg_eql(arr1->elements.msg[i], arr2->elements.msg[i], recursive))
-        return false;
-  } else if(upb_isstring(f)) {
-    for(uint32_t i = 0; i < arr1->len; i++)
-      if(!upb_streql(arr1->elements.str[i], arr2->elements.str[i]))
-        return false;
-  } else {
-    /* For primitive types we can compare the memory directly. */
-    return memcmp(arr1->elements._void, arr2->elements._void,
-                  arr1->len * upb_type_info[f->type].size) == 0;
-  }
-  return true;
-}
-
-bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive)
-{
-  /* Must have the same fields set.  TODO: is this wrong?  Should we also
-   * consider absent defaults equal to explicitly set defaults? */
-  if(msg1->def != msg2->def) return false;
-  struct upb_msgdef *m = msg1->def;
-  if(memcmp(msg1->data, msg2->data, msg1->def->set_flags_bytes) != 0)
-    return false;
-
-  /* Possible optimization: create a mask of the bytes in the messages that
-   * contain only primitive values (not strings, arrays, submessages, or
-   * padding) and memcmp the masked messages. */
-
-  for(upb_field_count_t i = 0; i < m->num_fields; i++) {
-    struct upb_fielddef *f = &m->fields[i];
-    bool msg1set = upb_msg_isset(msg1, f);
-    bool msg2set = upb_msg_isset(msg2, f);
-    if(msg1set != msg2set) return false;
-    if(!msg1set) continue;
-    union upb_value_ptr p1 = upb_msg_getptr(msg1, f);
-    union upb_value_ptr p2 = upb_msg_getptr(msg2, f);
-    if(upb_isarray(f)) {
-      if(!upb_array_eql(*p1.arr, *p2.arr, f, recursive)) return false;
-    } else if(upb_issubmsg(f)) {
-      if(recursive && !upb_msg_eql(*p1.msg, *p2.msg, recursive))
-        return false;
-    } else if(!upb_value_eql(p1, p2, f->type)) {
-      return false;
-    }
-  }
-  return true;
-}
diff --git a/src/upb_msg.h b/src/upb_msg.h
deleted file mode 100644
index adee884..0000000
--- a/src/upb_msg.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
- *
- * The upb_msg routines provide facilities for creating and manipulating
- * messages according to a upb_msgdef definition.
- *
- * A upb_msg is READ-ONLY, and the upb_msgdef functions in this file provide
- * read-only access.  For a mutable message, or for a message that you can take
- * a reference to to prevents its destruction, see upb_mm_msg.h, which is a
- * layer on top of upb_msg that adds memory management semantics.
- *
- * The in-memory format is very much like a C struct that you can define at
- * run-time, but also supports reflection.  Like C structs it supports
- * offset-based access, as opposed to the much slower name-based lookup.  The
- * format stores both the values themselves and bits describing whether each
- * field is set or not.
- *
- * For a more in-depth description of the in-memory format, see:
- *   http://wiki.github.com/haberman/upb/inmemoryformat
- *
- * Because the C struct emitted by the upb compiler uses exactly the same
- * byte-level format as the reflection interface, you can access the same hunk
- * of memory either way.  The C struct provides maximum performance and static
- * type safety; upb_msg_def provides flexibility.
- *
- * The in-memory format has no interoperability guarantees whatsoever, except
- * that a single version of upb will interoperate with itself.  Don't even
- * think about persisting the in-memory format or sending it anywhere.  That's
- * what serialized protobufs are for!  The in-memory format is just that -- an
- * in-memory representation that allows for fast access.
- */
-
-#ifndef UPB_MSG_H_
-#define UPB_MSG_H_
-
-#include <stdbool.h>
-#include <stdint.h>
-#include <stddef.h>
-
-#include "descriptor.h"
-#include "upb.h"
-#include "upb_def.h"
-#include "upb_parse.h"
-#include "upb_table.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Message structure. *********************************************************/
-
-/* Constructs a new msg corresponding to the given msgdef, and having one
- * counted reference. */
-INLINE struct upb_msg *upb_msg_new(struct upb_msgdef *md) {
-  size_t size = md->size + offsetof(struct upb_msg, data);
-  struct upb_msg *msg = (struct upb_msg*)malloc(size);
-  memset(msg, 0, size);
-  upb_mmhead_init(&msg->mmhead);
-  msg->def = md;
-  upb_def_ref(UPB_UPCAST(md));
-  return msg;
-}
-
-/* Field access. **************************************************************/
-
-/* Note that these only provide access to fields that are directly in the msg
- * itself.  For dynamic fields (strings, arrays, and submessages) it will be
- * necessary to dereference the returned values. */
-
-/* Returns a pointer to a specific field in a message. */
-INLINE union upb_value_ptr upb_msg_getptr(struct upb_msg *msg,
-                                          struct upb_fielddef *f) {
-  union upb_value_ptr p;
-  p._void = &msg->data[f->byte_offset];
-  return p;
-}
-
-/* "Set" flag reading and writing.  *******************************************/
-
-/* All upb code and code using upb should guarantee that the set flags are
- * always valid.  It should always be the case that if a flag's field is set
- * for a dynamic field that the pointer is valid.
- *
- * Clients should never set fields on a plain upb_msg, only on a upb_mm_msg. */
-
-/* Returns the byte offset where we store whether this field is set. */
-INLINE size_t upb_isset_offset(uint32_t field_index) {
-  return field_index / 8;
-}
-
-/* Returns the mask within the appropriate byte that selects the set bit. */
-INLINE uint8_t upb_isset_mask(uint32_t field_index) {
-  return 1 << (field_index % 8);
-}
-
-/* Returns true if the given field is set, false otherwise. */
-INLINE void upb_msg_set(struct upb_msg *msg, struct upb_fielddef *f)
-{
-  msg->data[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index);
-}
-
-/* Clears the set bit for this field in the given message. */
-INLINE void upb_msg_unset(struct upb_msg *msg, struct upb_fielddef *f)
-{
-  msg->data[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index);
-}
-
-/* Tests whether the given field is set. */
-INLINE bool upb_msg_isset(struct upb_msg *msg, struct upb_fielddef *f)
-{
-  return msg->data[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index);
-}
-
-/* Returns true if *all* required fields are set, false otherwise. */
-INLINE bool upb_msg_all_required_fields_set(struct upb_msg *msg)
-{
-  int num_fields = msg->def->num_required_fields;
-  int i = 0;
-  while(num_fields > 8) {
-    if(msg->data[i++] != 0xFF) return false;
-    num_fields -= 8;
-  }
-  if(msg->data[i] != (1 << num_fields) - 1) return false;
-  return true;
-}
-
-/* Clears the set bit for all fields. */
-INLINE void upb_msg_clear(struct upb_msg *msg)
-{
-  memset(msg->data, 0, msg->def->set_flags_bytes);
-}
-
-/* Parsing ********************************************************************/
-
-/* TODO: a stream parser. */
-void upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len,
-                      struct upb_status *status);
-
-struct upb_msgparser *upb_msgparser_new(struct upb_msgdef *def);
-void upb_msgparser_free(struct upb_msgparser *mp);
-
-void upb_msgparser_reset(struct upb_msgparser *mp, struct upb_msg *m,
-                         bool byref);
-
-size_t upb_msgparser_parse(struct upb_msgparser *mp, void *buf, size_t len,
-                           struct upb_status *status);
-
-/* Serialization  *************************************************************/
-
-/* For messages that contain any submessages, we must do a pre-pass on the
- * message tree to discover the size of all submessages.  This is necessary
- * because when serializing, the message length has to precede the message data
- * itself.
- *
- * We can calculate these sizes once and reuse them as long as the message is
- * known not to have changed. */
-struct upb_msgsizes;
-
-/* Initialize/free a upb_msgsizes for the given message. */
-struct upb_msgsizes *upb_msgsizes_new(void);
-void upb_msgsizes_free(struct upb_msgsizes *sizes);
-
-/* Given a previously initialized sizes, recurse over the message and store its
- * sizes in 'sizes'. */
-void upb_msgsizes_read(struct upb_msgsizes *sizes, struct upb_msg *msg);
-
-/* Returns the total size of the serialized message given in sizes.  Must be
- * preceeded by a call to upb_msgsizes_read. */
-size_t upb_msgsizes_totalsize(struct upb_msgsizes *sizes);
-
-struct upb_msg_serialize_state;
-
-/* Initializes the state of serialization.  The provided message must not
- * change between the upb_msgsizes_read() call that was used to construct
- * "sizes" and the parse being fully completed. */
-void upb_msg_serialize_alloc(struct upb_msg_serialize_state *s);
-void upb_msg_serialize_free(struct upb_msg_serialize_state *s);
-void upb_msg_serialize_init(struct upb_msg_serialize_state *s,
-                            struct upb_msg *msg, struct upb_msgsizes *sizes);
-
-/* Serializes the next set of bytes into buf (which has size len).  Returns
- * UPB_STATUS_OK if serialization is complete, or UPB_STATUS_NEED_MORE_DATA
- * if there is more data from the message left to be serialized.
- *
- * The number of bytes written to buf is returned in *written.  This will be
- * equal to len unless we finished serializing. */
-size_t upb_msg_serialize(struct upb_msg_serialize_state *s,
-                         void *buf, size_t len, struct upb_status *status);
-
-void upb_msg_serialize_all(struct upb_msg *msg, struct upb_msgsizes *sizes,
-                           void *buf, struct upb_status *status);
-
-/* Text dump  *****************************************************************/
-
-bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive);
-void upb_msg_print(struct upb_msg *data, bool single_line, FILE *stream);
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif  /* UPB_MSG_H_ */
diff --git a/src/upb_string.c b/src/upb_string.c
deleted file mode 100644
index 54df4f1..0000000
--- a/src/upb_string.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
- */
-
-#include <stdio.h>
-#include "upb_string.h"
-
-struct upb_string *upb_strreadfile(const char *filename) {
-  FILE *f = fopen(filename, "rb");
-  if(!f) return false;
-  if(fseek(f, 0, SEEK_END) != 0) goto error;
-  long size = ftell(f);
-  if(size < 0) goto error;
-  if(fseek(f, 0, SEEK_SET) != 0) goto error;
-  struct upb_string *s = upb_string_new();
-  upb_string_resize(s, size);
-  if(fread(s->ptr, size, 1, f) != 1) goto error;
-  fclose(f);
-  return s;
-
-error:
-  fclose(f);
-  return NULL;
-}
diff --git a/src/upb_string.h b/src/upb_string.h
deleted file mode 100644
index c1caddc..0000000
--- a/src/upb_string.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
-
- * Defines a delimited (as opposed to null-terminated) string type and some
- * library functions for manipulating them.
- *
- * There are two primary reasons upb uses delimited strings.  One is that they
- * can be more efficient for some operations because they do not have to scan
- * the string to find its length.  For example, streql can start by just
- * comparing the lengths (very efficient) and scan the strings themselves only
- * if the lengths are equal.
- *
- * More importantly, using delimited strings makes it possible for strings to
- * reference substrings of other strings.  For example, if I am parsing a
- * protobuf I can create a string that references the original protobuf's
- * string data.  With NULL-termination I would be forced to write a NULL
- * into the middle of the protobuf's data, which is less than ideal and in
- * some cases not practical or possible.
- */
-
-#ifndef UPB_STRING_H_
-#define UPB_STRING_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "upb_struct.h"
-
-/* Allocation/Deallocation/Resizing. ******************************************/
-
-INLINE struct upb_string *upb_string_new(void)
-{
-  struct upb_string *str = (struct upb_string*)malloc(sizeof(*str));
-  upb_mmhead_init(&str->mmhead);
-  str->ptr = NULL;
-  str->byte_len = 0;
-  str->byte_size = 0;
-  return str;
-}
-
-/* For internal use only. */
-INLINE void upb_string_destroy(struct upb_string *str)
-{
-  if(str->byte_size != 0) free(str->ptr);
-  free(str);
-}
-
-INLINE void upb_string_unref(struct upb_string *str)
-{
-  if(upb_mmhead_unref(&str->mmhead)) upb_string_destroy(str);
-}
-
-INLINE void upb_string_ref(struct upb_string *str)
-{
-  upb_mmhead_ref(&str->mmhead);
-}
-
-/* Resizes the string to size, reallocating if necessary.  Does not preserve
- * existing data. */
-INLINE void upb_string_resize(struct upb_string *str, uint32_t size)
-{
-  if(str->byte_size < size) {
-    /* Need to resize. */
-    str->byte_size = size;
-    void *oldptr = str->byte_size == 0 ? NULL : str->ptr;
-    str->ptr = (char*)realloc(oldptr, str->byte_size);
-  }
-  str->byte_len = size;
-}
-
-/* Library functions. *********************************************************/
-
-INLINE bool upb_streql(struct upb_string *s1, struct upb_string *s2) {
-  return s1->byte_len == s2->byte_len &&
-         memcmp(s1->ptr, s2->ptr, s1->byte_len) == 0;
-}
-
-INLINE int upb_strcmp(struct upb_string *s1, struct upb_string *s2) {
-  size_t common_length = UPB_MIN(s1->byte_len, s2->byte_len);
-  int common_diff = memcmp(s1->ptr, s2->ptr, common_length);
-  return common_diff == 0 ? (int)s1->byte_len - (int)s2->byte_len : common_diff;
-}
-
-INLINE void upb_strcpy(struct upb_string *dest, struct upb_string *src) {
-  dest->byte_len = src->byte_len;
-  upb_string_resize(dest, dest->byte_len);
-  memcpy(dest->ptr, src->ptr, src->byte_len);
-}
-
-INLINE struct upb_string *upb_strdup(struct upb_string *s) {
-  struct upb_string *copy = upb_string_new();
-  upb_strcpy(copy, s);
-  return copy;
-}
-
-INLINE struct upb_string *upb_strdupc(const char *s) {
-  struct upb_string *copy = upb_string_new();
-  copy->byte_len = strlen(s);
-  upb_string_resize(copy, copy->byte_len);
-  memcpy(copy->ptr, s, copy->byte_len);
-  return copy;
-}
-
-/* Reads an entire file into a newly-allocated string. */
-struct upb_string *upb_strreadfile(const char *filename);
-
-/* Allows defining upb_strings as literals, ie:
- *   struct upb_string str = UPB_STRLIT("Hello, World!\n");
- * Doesn't work with C++ due to lack of struct initializer syntax.
- */
-#define UPB_STRLIT(strlit) {.ptr=strlit, .byte_len=sizeof(strlit)-1}
-
-/* Allows using upb_strings in printf, ie:
- *   struct upb_string str = UPB_STRLIT("Hello, World!\n");
- *   printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */
-#define UPB_STRARG(str) (str)->byte_len, (str)->ptr
-#define UPB_STRFMT "%.*s"
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif  /* UPB_H_ */
diff --git a/src/upb_struct.h b/src/upb_struct.h
deleted file mode 100644
index 094c207..0000000
--- a/src/upb_struct.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
- *
- * This file defines the in-memory format for messages, arrays, and strings
- * (which are the three dynamically-allocated structures that make up all
- * protobufs). */
-
-#ifndef UPB_STRUCT_H
-#define UPB_STRUCT_H
-
-#include "upb.h"
-
-/* mmhead -- this is a "base class" for strings, arrays, and messages ********/
-
-struct upb_mm_ref;
-struct upb_mmhead {
-  struct upb_mm_ref *refs;  /* Head of linked list. */
-  uint32_t refcount;
-};
-
-INLINE void upb_mmhead_init(struct upb_mmhead *head) {
-  head->refs = NULL;
-  head->refcount = 1;
-}
-
-INLINE bool upb_mmhead_norefs(struct upb_mmhead *head) {
-  return head->refcount == 0 && head->refs == NULL;
-}
-
-INLINE bool upb_mmhead_only(struct upb_mmhead *head) {
-  return head->refcount == 1 && head->refs == NULL;
-}
-
-INLINE bool upb_mmhead_unref(struct upb_mmhead *head) {
-  head->refcount--;
-  return upb_mmhead_norefs(head);
-}
-
-INLINE void upb_mmhead_ref(struct upb_mmhead *head) {
-  head->refcount++;
-}
-
-/* Structures for msg, string, and array. *************************************/
-
-/* These are all self describing. */
-
-struct upb_msgdef;
-struct upb_fielddef;
-
-struct upb_msg {
-  struct upb_mmhead mmhead;
-  struct upb_msgdef *def;
-  uint8_t data[1];
-};
-
-typedef uint32_t upb_arraylen_t;  /* can be at most 2**32 elements long. */
-struct upb_array {
-  struct upb_mmhead mmhead;
-  struct upb_fielddef *fielddef;  /* Defines the type of the array. */
-  union upb_value_ptr elements;
-  upb_arraylen_t len;     /* Number of elements in "elements". */
-  upb_arraylen_t size;    /* Memory we own. */
-};
-
-struct upb_string {
-  struct upb_mmhead mmhead;
-  /* We expect the data to be 8-bit clean (uint8_t), but char* is such an
-   * ingrained convention that we follow it. */
-  char *ptr;
-  uint32_t byte_len;
-  uint32_t byte_size;  /* How many bytes of ptr we own, 0 if we reference. */
-};
-
-/* Type-specific overlays on upb_array. ***************************************/
-
-#define UPB_DEFINE_ARRAY_TYPE(name, type) \
-  struct name ## _array { \
-    struct upb_mmhead mmhead; \
-    struct upb_fielddef *fielddef; \
-    type elements; \
-    upb_arraylen_t len; \
-    upb_arraylen_t size; \
-  };
-
-UPB_DEFINE_ARRAY_TYPE(upb_double, double)
-UPB_DEFINE_ARRAY_TYPE(upb_float,  float)
-UPB_DEFINE_ARRAY_TYPE(upb_int32,  int32_t)
-UPB_DEFINE_ARRAY_TYPE(upb_int64,  int64_t)
-UPB_DEFINE_ARRAY_TYPE(upb_uint32, uint32_t)
-UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t)
-UPB_DEFINE_ARRAY_TYPE(upb_bool,   bool)
-UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*)
-UPB_DEFINE_ARRAY_TYPE(upb_msg,    void*)
-
-/* Defines an array of a specific message type (an overlay of upb_array). */
-#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array
-#define UPB_DEFINE_MSG_ARRAY(msg_type) \
-  UPB_MSG_ARRAY(msg_type) { \
-    struct upb_mmhead mmhead; \
-    struct upb_fielddef *fielddef; \
-    msg_type **elements; \
-    upb_arraylen_t len; \
-    upb_arraylen_t size; \
-    };
-
-/* mmptr -- a pointer which polymorphically points to one of the above. *******/
-
-union upb_mmptr {
-  struct upb_msg *msg;
-  struct upb_array *arr;
-  struct upb_string *str;
-};
-
-enum {
-  UPB_MM_MSG_REF,
-  UPB_MM_STR_REF,
-  UPB_MM_ARR_REF
-};
-typedef uint8_t upb_mm_ptrtype;
-
-#endif
diff --git a/src/upb_table.c b/src/upb_table.c
index 2c4c824..e73c6f4 100644
--- a/src/upb_table.c
+++ b/src/upb_table.c
@@ -47,7 +47,14 @@ void upb_strtable_init(struct upb_strtable *t, uint32_t size, uint16_t entsize)
 
 void upb_table_free(struct upb_table *t) { free(t->entries); }
 void upb_inttable_free(struct upb_inttable *t) { upb_table_free(&t->t); }
-void upb_strtable_free(struct upb_strtable *t) { upb_table_free(&t->t); }
+void upb_strtable_free(struct upb_strtable *t) {
+  // Free refs from the strtable.
+  struct upb_strtable_entry *e = upb_strtable_begin(t);
+  for(; e; e = upb_strtable_next(&m->ntof, e)) {
+    upb_string_unref(e->key);
+  }
+  upb_table_free(&t->t);
+}
 
 static uint32_t strtable_bucket(struct upb_strtable *t, struct upb_string *key)
 {
@@ -150,6 +157,7 @@ static uint32_t empty_strbucket(struct upb_strtable *table)
 static void strinsert(struct upb_strtable *t, struct upb_strtable_entry *e)
 {
   assert(upb_strtable_lookup(t, e->key) == NULL);
+  e->key = upb_string_getref(e->key, UPB_REF_FROZEN);
   t->t.count++;
   uint32_t bucket = strtable_bucket(t, e->key);
   struct upb_strtable_entry *table_e = strent(t, bucket);
diff --git a/src/upb_table.h b/src/upb_table.h
index 2202684..8250354 100644
--- a/src/upb_table.h
+++ b/src/upb_table.h
@@ -17,7 +17,7 @@
 
 #include <assert.h>
 #include "upb.h"
-#include "upb_string.h"
+#include "upb_data.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -39,7 +39,7 @@ struct upb_inttable_entry {
 // performance by letting us compare hashes before comparing lengths or the
 // strings themselves.
 struct upb_strtable_entry {
-  struct upb_string *key;  // We own one ref.
+  struct upb_string *key;  // We own a frozen ref.
   uint32_t next;           // Internal chaining.
 };
author	Joshua Haberman <joshua@reverberate.org>	2009-12-21 10:48:01 -0800
committer	Joshua Haberman <joshua@reverberate.org>	2009-12-21 10:48:01 -0800
commit	c2419764856e5666bfa9e3c1b87de29ec93babe1 (patch)
tree	7771bf0bbcf9b1103a55b963831385b07705b739
parent	c6cba2af37638cc47ff69aed866669567ef365d9 (diff)