10 files changed, 452 insertions, 54 deletions
diff --git a/Makefile b/Makefile
index 203bed6..131b3c0 100644
--- a/Makefile
+++ b/Makefile
@@ -55,6 +55,7 @@ clean:
 # The core library (core/libupb.a)
 SRC=core/upb.c stream/upb_decoder.c core/upb_table.c core/upb_def.c core/upb_string.c \
     core/upb_stream.c stream/upb_stdio.c stream/upb_strstream.c stream/upb_textprinter.c \
+    core/upb_msg.c \
     descriptor/descriptor.c
 $(SRC): perf-cppflags
 # Parts of core that are yet to be converted.
@@ -101,14 +102,13 @@ tests/test.proto.pb: tests/test.proto
 TESTS=tests/test_string \
     tests/test_table \
     tests/test_def \
-    tests/test_decoder
-tests: $(TESTS)
-
-OTHER_TESTS=tests/tests \
-    tests/test_table \
+    tests/test_decoder \
     tests/t.test_vs_proto2.googlemessage1 \
     tests/t.test_vs_proto2.googlemessage2 \
     tests/test.proto.pb
+tests: $(TESTS)
+
+OTHER_TESTS=tests/tests \
 $(TESTS): core/libupb.a
 
 VALGRIND=valgrind --leak-check=full --error-exitcode=1 
diff --git a/core/upb.h b/core/upb.h
index b605fd9..7ee0469 100644
--- a/core/upb.h
+++ b/core/upb.h
@@ -80,24 +80,16 @@ enum upb_wire_type {
 
 typedef uint8_t upb_wire_type_t;
 
-// Value type as defined in a .proto file.  eg. string, int32, etc.  The
+// Type of a field as defined in a .proto file.  eg. string, int32, etc.  The
 // integers that represent this are defined by descriptor.proto.  Note that
 // descriptor.proto reserves "0" for errors, and we use it to represent
 // exceptional circumstances.
-typedef uint8_t upb_field_type_t;
+typedef uint8_t upb_fieldtype_t;
 
 // For referencing the type constants tersely.
 #define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type
 #define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type
 
-INLINE bool upb_issubmsgtype(upb_field_type_t type) {
-  return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
-}
-
-INLINE bool upb_isstringtype(upb_field_type_t type) {
-  return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
-}
-
 // Info for a given field type.
 typedef struct {
   uint8_t align;
@@ -129,6 +121,10 @@ typedef union {
 
 struct _upb_string;
 typedef struct _upb_string upb_string;
+struct _upb_array;
+typedef struct _upb_array upb_array;
+struct _upb_msg;
+typedef struct _upb_msg upb_msg;
 
 typedef uint32_t upb_strlen_t;
 
@@ -142,6 +138,11 @@ typedef union {
   uint32_t uint32;
   uint64_t uint64;
   bool _bool;
+  upb_string *str;
+  upb_msg *msg;
+  upb_array *arr;
+  upb_atomic_refcount_t *refcount;
+  void *_void;
 } upb_value;
 
 // A pointer to a .proto value.  The owner must have an out-of-band way of
@@ -155,13 +156,90 @@ typedef union {
   uint32_t *uint32;
   uint64_t *uint64;
   bool *_bool;
+  upb_string **str;
+  upb_msg **msg;
+  upb_array **arr;
+  void *_void;
 } upb_valueptr;
 
+// The type of a upb_value.  This is like a upb_fieldtype_t, but adds the
+// constant UPB_VALUETYPE_ARRAY to represent an array.
+typedef uint8_t upb_valuetype_t;
+#define UPB_VALUETYPE_ARRAY 32
+
 INLINE upb_valueptr upb_value_addrof(upb_value *val) {
   upb_valueptr ptr = {&val->_double};
   return ptr;
 }
 
+// Converts upb_value_ptr -> upb_value by reading from the pointer.  We need to
+// know the value type to perform this operation, because we need to know how
+// much memory to copy.
+INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) {
+  upb_value val;
+
+#define CASE(t, member_name) \
+  case UPB_TYPE(t): val.member_name = *ptr.member_name; break;
+
+  switch(ft) {
+    CASE(DOUBLE,   _double)
+    CASE(FLOAT,    _float)
+    CASE(INT32,    int32)
+    CASE(INT64,    int64)
+    CASE(UINT32,   uint32)
+    CASE(UINT64,   uint64)
+    CASE(SINT32,   int32)
+    CASE(SINT64,   int64)
+    CASE(FIXED32,  uint32)
+    CASE(FIXED64,  uint64)
+    CASE(SFIXED32, int32)
+    CASE(SFIXED64, int64)
+    CASE(BOOL,     _bool)
+    CASE(ENUM,     int32)
+    CASE(STRING,   str)
+    CASE(BYTES,    str)
+    CASE(MESSAGE,  msg)
+    CASE(GROUP,    msg)
+    default: break;
+  }
+  return val;
+
+#undef CASE
+}
+
+// Writes a upb_value to a upb_value_ptr location. We need to know the value
+// type to perform this operation, because we need to know how much memory to
+// copy.
+INLINE void upb_value_write(upb_valueptr ptr, upb_value val,
+                            upb_fieldtype_t ft) {
+#define CASE(t, member_name) \
+  case UPB_TYPE(t): *ptr.member_name = val.member_name; break;
+
+  switch(ft) {
+    CASE(DOUBLE,   _double)
+    CASE(FLOAT,    _float)
+    CASE(INT32,    int32)
+    CASE(INT64,    int64)
+    CASE(UINT32,   uint32)
+    CASE(UINT64,   uint64)
+    CASE(SINT32,   int32)
+    CASE(SINT64,   int64)
+    CASE(FIXED32,  uint32)
+    CASE(FIXED64,  uint64)
+    CASE(SFIXED32, int32)
+    CASE(SFIXED64, int64)
+    CASE(BOOL,     _bool)
+    CASE(ENUM,     int32)
+    CASE(STRING,   str)
+    CASE(BYTES,    str)
+    CASE(MESSAGE,  msg)
+    CASE(GROUP,    msg)
+    default: break;
+  }
+
+#undef CASE
+}
+
 // Status codes used as a return value.  Codes >0 are not fatal and can be
 // resumed.
 enum upb_status_code {
diff --git a/core/upb_atomic.h b/core/upb_atomic.h
index 01fc8a2..1cd848b 100644
--- a/core/upb_atomic.h
+++ b/core/upb_atomic.h
@@ -127,6 +127,10 @@ INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) {
        Implement them or compile with UPB_THREAD_UNSAFE.
 #endif
 
+INLINE bool upb_atomic_only(upb_atomic_refcount_t *a) {
+  return upb_atomic_read(a) == 1;
+}
+
 /* Reader/Writer lock. ********************************************************/
 
 #ifdef UPB_THREAD_UNSAFE
diff --git a/core/upb_def.c b/core/upb_def.c
index e117455..1c8fbdc 100644
--- a/core/upb_def.c
+++ b/core/upb_def.c
@@ -12,6 +12,16 @@
 #define CHECKSRC(x) if(!(x)) goto src_err
 #define CHECK(x) if(!(x)) goto err
 
+/* Rounds p up to the next multiple of t. */
+static size_t upb_align_up(size_t val, size_t align) {
+  return val % align == 0 ? val : val + align - (val % align);
+}
+
+static int upb_div_round_up(int numerator, int denominator) {
+  /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */
+  return numerator > 0 ? (numerator - 1) / denominator + 1 : 0;
+}
+
 // A little dynamic array for storing a growing list of upb_defs.
 typedef struct {
   upb_def **defs;
@@ -409,6 +419,19 @@ src_err:
 
 /* upb_msgdef *****************************************************************/
 
+static int upb_compare_typed_fields(upb_fielddef *f1, upb_fielddef *f2) {
+  // Sort by data size (ascending) to reduce padding.
+  size_t size1 = upb_types[f1->type].size;
+  size_t size2 = upb_types[f2->type].size;
+  if (size1 != size2) return size1 - size2;
+  // Otherwise return in number order (just so we get a reproduceable order.
+  return f1->number - f2->number;
+}
+
+static int upb_compare_fields(const void *f1, const void *f2) {
+  return upb_compare_typed_fields(*(void**)f1, *(void**)f2);
+}
+
 // Processes a google.protobuf.DescriptorProto, adding defs to "defs."
 static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status)
 {
@@ -418,7 +441,6 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status)
   upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent));
   upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent));
   int32_t start_count = defs->len;
-
   upb_fielddef *f;
   while((f = upb_src_getdef(src)) != NULL) {
     switch(f->number) {
@@ -451,6 +473,45 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status)
     upb_seterr(status, UPB_STATUS_ERROR, "Encountered message with no name.");
     goto err;
   }
+
+
+  // Create an ordering over the fields.
+  upb_field_count_t n = upb_msgdef_numfields(m);
+  upb_fielddef **sorted_fields = malloc(sizeof(upb_fielddef*) * n);
+  upb_field_count_t field = 0;
+  upb_msg_iter i;
+  for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+    sorted_fields[field++]= upb_msg_iter_field(i);
+  }
+  qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields);
+
+  // Assign offsets in the msg.
+  m->set_flags_bytes = upb_div_round_up(n, 8);
+  m->size = sizeof(upb_atomic_refcount_t) + m->set_flags_bytes;
+
+  size_t max_align = 0;
+  for (int i = 0; i < n; i++) {
+    upb_fielddef *f = sorted_fields[i];
+    upb_type_info *type_info = &upb_types[f->type];
+
+    // This identifies the set bit.  When we implement is_initialized (a
+    // general check about whether all required bits are set) we will probably
+    // want to use a different ordering that puts all the required bits
+    // together.
+    f->field_index = i;
+
+    // General alignment rules are: each member must be at an address that is a
+    // multiple of that type's alignment.  Also, the size of the structure as a
+    // whole must be a multiple of the greatest alignment of any member.
+    size_t offset = upb_align_up(m->size, type_info->align);
+    // Offsets are relative to the end of the refcount.
+    f->byte_offset = offset - sizeof(upb_atomic_refcount_t);
+    m->size = offset + type_info->size;
+    max_align = UPB_MAX(max_align, type_info->align);
+  }
+
+  if (max_align > 0) m->size = upb_align_up(m->size, max_align);
+
   upb_deflist_qualify(defs, m->base.fqname, start_count);
   upb_deflist_push(defs, UPB_UPCAST(m));
   return true;
@@ -664,7 +725,7 @@ bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab,
       }
 
       // Check the type of the found def.
-      upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
+      upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
       if(found->def->type != expected) {
         upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type");
         return false;
diff --git a/core/upb_def.h b/core/upb_def.h
index 3294a8d..9eb961a 100644
--- a/core/upb_def.h
+++ b/core/upb_def.h
@@ -103,7 +103,7 @@ typedef struct _upb_fielddef {
   upb_field_count_t field_index;  // Indicates set bit.
 
   upb_field_number_t number;
-  upb_field_type_t type;
+  upb_fieldtype_t type;
   upb_label_t label;
   // True if we own a ref on "def" (above).  This is true unless this edge is
   // part of a cycle.
@@ -112,10 +112,10 @@ typedef struct _upb_fielddef {
 
 // A variety of tests about the type of a field.
 INLINE bool upb_issubmsg(upb_fielddef *f) {
-  return upb_issubmsgtype(f->type);
+  return f->type == UPB_TYPE(GROUP) || f->type == UPB_TYPE(MESSAGE);
 }
 INLINE bool upb_isstring(upb_fielddef *f) {
-  return upb_isstringtype(f->type);
+  return f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES);
 }
 INLINE bool upb_isarray(upb_fielddef *f) {
   return f->label == UPB_LABEL(REPEATED);
@@ -125,6 +125,19 @@ INLINE bool upb_hasdef(upb_fielddef *f) {
   return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM);
 }
 
+INLINE upb_valuetype_t upb_field_valuetype(upb_fielddef *f) {
+  if (upb_isarray(f)) {
+    return UPB_VALUETYPE_ARRAY;
+  } else {
+    return f->type;
+  }
+}
+
+INLINE upb_valuetype_t upb_elem_valuetype(upb_fielddef *f) {
+  assert(upb_isarray(f));
+  return f->type;
+}
+
 INLINE bool upb_field_ismm(upb_fielddef *f) {
   return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f);
 }
@@ -139,6 +152,8 @@ INLINE bool upb_elem_ismm(upb_fielddef *f) {
 typedef struct _upb_msgdef {
   upb_def base;
   upb_atomic_refcount_t cycle_refcount;
+  uint32_t size;
+  uint32_t set_flags_bytes;
 
   // Tables for looking up fields by number and name.
   upb_inttable itof;  // int to field
@@ -169,9 +184,14 @@ INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) {
   return e ? e->f : NULL;
 }
 
+INLINE upb_field_count_t upb_msgdef_numfields(upb_msgdef *m) {
+  return upb_strtable_count(&m->ntof);
+}
+
 // Iteration over fields.  The order is undefined.
 //   upb_msg_iter i;
-//   for(i = upb_msg_begin(m); !upb_msg_done(&i); i = upb_msg_next(&i)) {
+//   for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
+//     upb_fielddef *f = upb_msg_iter_field(i);
 //     // ...
 //   }
 typedef upb_itof_ent *upb_msg_iter;
diff --git a/core/upb_msg.c b/core/upb_msg.c
new file mode 100644
index 0000000..75f7a35
--- /dev/null
+++ b/core/upb_msg.c
@@ -0,0 +1,123 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2010 Joshua Haberman.  See LICENSE for details.
+ *
+ * Data structure for storing a message of protobuf data.
+ */
+
+#include "upb_msg.h"
+
+void _upb_elem_free(upb_value v, upb_fielddef *f) {
+  switch(f->type) {
+    case UPB_TYPE(MESSAGE):
+    case UPB_TYPE(GROUP):
+      _upb_msg_free(v.msg, upb_downcast_msgdef(f->def));
+      break;
+    case UPB_TYPE(STRING):
+    case UPB_TYPE(BYTES):
+      _upb_string_free(v.str);
+      break;
+    default:
+      abort();
+  }
+}
+
+void _upb_field_free(upb_value v, upb_fielddef *f) {
+  if (upb_isarray(f)) {
+    _upb_array_free(v.arr, f);
+  } else {
+    _upb_elem_free(v, f);
+  }
+}
+
+upb_msg *upb_msg_new(upb_msgdef *md) {
+  upb_msg *msg = malloc(md->size);
+  // Clear all set bits and cached pointers.
+  memset(msg, 0, md->size);
+  upb_atomic_refcount_init(&msg->refcount, 1);
+  return msg;
+}
+
+void _upb_msg_free(upb_msg *msg, upb_msgdef *md) {
+  // Need to release refs on all sub-objects.
+  upb_msg_iter i;
+  for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
+    upb_fielddef *f = upb_msg_iter_field(i);
+    upb_valueptr p = _upb_msg_getptr(msg, f);
+    upb_valuetype_t type = upb_field_valuetype(f);
+    if (upb_field_ismm(f)) _upb_field_unref(upb_value_read(p, type), f);
+  }
+  free(msg);
+}
+
+upb_array *upb_array_new(void) {
+  upb_array *arr = malloc(sizeof(*arr));
+  upb_atomic_refcount_init(&arr->refcount, 1);
+  arr->size = 0;
+  arr->len = 0;
+  arr->elements._void = NULL;
+  return arr;
+}
+
+void _upb_array_free(upb_array *arr, upb_fielddef *f) {
+  if (upb_elem_ismm(f)) {
+    // Need to release refs on sub-objects.
+    upb_valuetype_t type = upb_elem_valuetype(f);
+    for (upb_arraylen_t i = 0; i < arr->size; i++) {
+      upb_valueptr p = _upb_array_getptr(arr, f, i);
+      _upb_elem_unref(upb_value_read(p, type), f);
+    }
+  }
+  if (arr->elements._void) free(arr->elements._void);
+  free(arr);
+}
+
+upb_value upb_field_new(upb_fielddef *f, upb_valuetype_t type) {
+  upb_value v;
+  switch(type) {
+    case UPB_TYPE(MESSAGE):
+    case UPB_TYPE(GROUP):
+      v.msg = upb_msg_new(upb_downcast_msgdef(f->def));
+    case UPB_TYPE(STRING):
+    case UPB_TYPE(BYTES):
+      v.str = upb_string_new();
+    case UPB_VALUETYPE_ARRAY:
+      v.arr = upb_array_new();
+    default:
+      abort();
+  }
+  return v;
+}
+
+static void upb_field_recycle(upb_value val) {
+  (void)val;
+}
+
+upb_value upb_field_tryrecycle(upb_valueptr p, upb_value val, upb_fielddef *f,
+                               upb_valuetype_t type) {
+  if (val._void == NULL || !upb_atomic_only(val.refcount)) {
+    if (val._void != NULL) upb_atomic_unref(val.refcount);
+    val = upb_field_new(f, type);
+    upb_value_write(p, val, type);
+  } else {
+    upb_field_recycle(val);
+  }
+  return val;
+}
+
+void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str,
+                       upb_status *status) {
+  (void)msg;
+  (void)md;
+  (void)str;
+  (void)status;
+}
+
+void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str,
+                       upb_status *status) {
+  (void)msg;
+  (void)md;
+  (void)str;
+  (void)status;
+}
diff --git a/core/upb_msg.h b/core/upb_msg.h
index 5215bd9..2db67c0 100644
--- a/core/upb_msg.h
+++ b/core/upb_msg.h
@@ -9,14 +9,39 @@
 #ifndef UPB_MSG_H
 #define UPB_MSG_H
 
+#include "upb.h"
+#include "upb_def.h"
+#include <stdlib.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-typedef struct {
+upb_value upb_field_tryrecycle(upb_valueptr p, upb_value v, upb_fielddef *f,
+                               upb_valuetype_t type);
+
+INLINE void _upb_value_ref(upb_value v) { upb_atomic_ref(v.refcount); }
+
+void _upb_field_free(upb_value v, upb_fielddef *f);
+void _upb_elem_free(upb_value v, upb_fielddef *f);
+INLINE void _upb_field_unref(upb_value v, upb_fielddef *f) {
+  assert(upb_field_ismm(f));
+  if (v.refcount && upb_atomic_unref(v.refcount))
+    _upb_field_free(v, f);
+}
+INLINE void _upb_elem_unref(upb_value v, upb_fielddef *f) {
+  assert(upb_elem_ismm(f));
+  if (v.refcount && upb_atomic_unref(v.refcount))
+    _upb_elem_free(v, f);
+}
+
+/* upb_array ******************************************************************/
+
+typedef uint32_t upb_arraylen_t;
+struct _upb_array {
   upb_atomic_refcount_t refcount;
-  uint32_t len;
-  uint32_t size;
+  upb_arraylen_t len;
+  upb_arraylen_t size;
   upb_valueptr elements;
 };
 
@@ -31,29 +56,70 @@ INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) {
   if (upb_atomic_unref(&a->refcount)) _upb_array_free(a, f);
 }
 
+INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f,
+                                      uint32_t elem) {
+  upb_valueptr p;
+  p._void = &a->elements.uint8[elem * upb_types[f->type].size];
+  return p;
+}
+
 INLINE upb_value upb_array_get(upb_array *a, upb_fielddef *f, uint32_t elem) {
   assert(elem < upb_array_len(a));
   return upb_value_read(_upb_array_getptr(a, f, elem), f->type);
 }
 
 // For string or submessages, will release a ref on the previously set value.
+// and take a ref on the new value.  The array must already be at least "elem"
+// long; to append use append_mutable.
 INLINE void upb_array_set(upb_array *a, upb_fielddef *f, uint32_t elem,
                           upb_value val) {
+  assert(elem < upb_array_len(a));
+  upb_valueptr p = _upb_array_getptr(a, f, elem);
+  if (upb_elem_ismm(f)) {
+    _upb_elem_unref(upb_value_read(p, f->type), f);
+    _upb_value_ref(val);
+  }
+  upb_value_write(p, val, f->type);
 }
 
-// Append an element with the default value, returning it.  For strings or
-// submessages, this will try to reuse previously allocated memory.
-INLINE upb_value upb_array_append_mutable(upb_array *a, upb_fielddef *f) {
+INLINE void upb_array_resize(upb_array *a, upb_fielddef *f) {
+  if (a->len == a->size) {
+    a->len *= 2;
+    a->elements._void = realloc(a->elements._void,
+                                a->len * upb_types[f->type].size);
+  }
 }
 
-typedef struct {
+// Append an element to an array of string or submsg with the default value,
+// returning it.  This will try to reuse previously allocated memory.
+INLINE upb_value upb_array_appendmutable(upb_array *a, upb_fielddef *f) {
+  assert(upb_elem_ismm(f));
+  upb_array_resize(a, f);
+  upb_valueptr p = _upb_array_getptr(a, f, a->len++);
+  upb_valuetype_t type = upb_elem_valuetype(f);
+  upb_value val = upb_value_read(p, type);
+  val = upb_field_tryrecycle(p, val, f, type);
+  return val;
+}
+
+
+/* upb_msg ********************************************************************/
+
+struct _upb_msg {
   upb_atomic_refcount_t refcount;
   uint8_t data[4];  // We allocate the appropriate amount per message.
-} upb_msg;
+};
 
 // Creates a new msg of the given type.
 upb_msg *upb_msg_new(upb_msgdef *md);
 
+// Returns a pointer to the given field.
+INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) {
+  upb_valueptr p;
+  p._void = &msg->data[f->byte_offset];
+  return p;
+}
+
 void _upb_msg_free(upb_msg *msg, upb_msgdef *md);
 INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) {
   if (upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md);
@@ -65,6 +131,10 @@ INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) {
   return (msg->data[f->field_index/8] & (1 << (f->field_index % 8))) != 0;
 }
 
+INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) {
+  msg->data[f->field_index/8] |= (1 << (f->field_index % 8));
+}
+
 // Returns the current value of the given field if set, or the default value if
 // not set.
 INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) {
@@ -79,12 +149,29 @@ INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) {
 // Otherwise sets it and returns an empty instance, attempting to reuse any
 // previously allocated memory.
 INLINE upb_value upb_msg_getmutable(upb_msg *msg, upb_fielddef *f) {
+  assert(upb_field_ismm(f));
+  upb_valueptr p = _upb_msg_getptr(msg, f);
+  upb_valuetype_t type = upb_field_valuetype(f);
+  upb_value val = upb_value_read(p, type);
+  if (!upb_msg_has(msg, f)) {
+    upb_msg_sethas(msg, f);
+    val = upb_field_tryrecycle(p, val, f, type);
+  }
+  return val;
 }
 
 // Sets the current value of the field.  If this is a string, array, or
 // submessage field, releases a ref on the value (if any) that was previously
 // set.
 INLINE void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) {
+  upb_valueptr p = _upb_msg_getptr(msg, f);
+  upb_valuetype_t type = upb_field_valuetype(f);
+  if (upb_field_ismm(f)) {
+    _upb_field_unref(upb_value_read(p, type), f);
+    _upb_value_ref(val);
+  }
+  upb_msg_sethas(msg, f);
+  upb_value_write(p, val, upb_field_valuetype(f));
 }
 
 // Unsets all field values back to their defaults.
@@ -92,6 +179,17 @@ INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) {
   memset(msg->data, 0, md->set_flags_bytes);
 }
 
+// A convenience function for decoding an entire protobuf all at once, without
+// having to worry about setting up the appropriate objects.
+void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_string *str,
+                       upb_status *status);
+
+// A convenience function for encoding an entire protobuf all at once.  If an
+// error occurs, the null string is returned and the status object contains
+// the error.
+void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_string *str,
+                       upb_status *status);
+
 #ifdef __cplusplus
 }  /* extern "C" */
 #endif
diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c
index 7591f78..c35212e 100644
--- a/stream/upb_decoder.c
+++ b/stream/upb_decoder.c
@@ -14,8 +14,10 @@
 
 // Returns true if the give wire type and field type combination is valid,
 // taking into account both packed and non-packed encodings.
-static bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) {
-  return (1 << wt) & upb_types[ft].allowed_wire_types;
+static bool upb_check_type(upb_wire_type_t wt, upb_fielddef *f) {
+  // TODO: need to take into account the label; only repeated fields are
+  // allowed to use packed encoding.
+  return (1 << wt) & upb_types[f->type].allowed_wire_types;
 }
 
 // Performs zig-zag decoding, which is used by sint32 and sint64.
@@ -358,7 +360,7 @@ again:
     // unknown fields we will implement that here.
     upb_decoder_skipval(d);
     goto again;
-  } else if (!upb_check_type(wire_type, f->type)) {
+  } else if (!upb_check_type(wire_type, f)) {
     // This is a recoverable error condition.  We skip the value but also
     // return NULL and report the error.
     upb_decoder_skipval(d);
diff --git a/stream/upb_strstream.h b/stream/upb_strstream.h
index fa9bace..d01d21f 100644
--- a/stream/upb_strstream.h
+++ b/stream/upb_strstream.h
@@ -31,7 +31,7 @@ void upb_stringsrc_free(upb_stringsrc *s);
 void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str);
 
 // Returns the upb_bytesrc* for this stringsrc.  Invalidated by reset above.
-upb_bytesrc *upb_stringsrc_bytesrc();
+upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s);
 
 
 /* upb_stringsink *************************************************************/
diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc
index 9083788..9446b8f 100644
--- a/tests/test_vs_proto2.cc
+++ b/tests/test_vs_proto2.cc
@@ -4,9 +4,10 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <google/protobuf/descriptor.h>
-#include "upb_data.h"
+#include "upb_msg.h"
 #include "upb_def.h"
 #include "upb_decoder.h"
+#include "upb_strstream.h"
 
 int num_assertions = 0;
 #define ASSERT(expr) do { \
@@ -25,7 +26,7 @@ void compare_arrays(const google::protobuf::Reflection *r,
                     upb_msg *upb_msg, upb_fielddef *upb_f)
 {
   ASSERT(upb_msg_has(upb_msg, upb_f));
-  upb_arrayptr arr = upb_msg_get(upb_msg, upb_f).arr;
+  upb_array *arr = upb_msg_get(upb_msg, upb_f).arr;
   ASSERT(upb_array_len(arr) == (upb_arraylen_t)r->FieldSize(proto2_msg, proto2_f));
   for(upb_arraylen_t i = 0; i < upb_array_len(arr); i++) {
     upb_value v = upb_array_get(arr, upb_f, i);
@@ -63,7 +64,7 @@ void compare_arrays(const google::protobuf::Reflection *r,
       case UPB_TYPE(STRING):
       case UPB_TYPE(BYTES): {
         std::string str = r->GetRepeatedString(proto2_msg, proto2_f, i);
-        std::string str2(upb_string_getrobuf(v.str), upb_strlen(v.str));
+        std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str));
         ASSERT(str == str2);
         break;
       }
@@ -116,7 +117,7 @@ void compare_values(const google::protobuf::Reflection *r,
     case UPB_TYPE(STRING):
     case UPB_TYPE(BYTES): {
       std::string str = r->GetString(proto2_msg, proto2_f);
-      std::string str2(upb_string_getrobuf(v.str), upb_strlen(v.str));
+      std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str));
       ASSERT(str == str2);
       break;
     }
@@ -133,9 +134,10 @@ void compare(const google::protobuf::Message& proto2_msg,
   const google::protobuf::Reflection *r = proto2_msg.GetReflection();
   const google::protobuf::Descriptor *d = proto2_msg.GetDescriptor();
 
-  ASSERT((upb_field_count_t)d->field_count() == upb_md->num_fields);
-  for(upb_field_count_t i = 0; i < upb_md->num_fields; i++) {
-    upb_fielddef *upb_f = &upb_md->fields[i];
+  ASSERT((upb_field_count_t)d->field_count() == upb_msgdef_numfields(upb_md));
+  upb_msg_iter i;
+  for(i = upb_msg_begin(upb_md); !upb_msg_done(i); i = upb_msg_next(upb_md, i)) {
+    upb_fielddef *upb_f = upb_msg_iter_field(i);
     const google::protobuf::FieldDescriptor *proto2_f =
         d->FindFieldByNumber(upb_f->number);
     // Make sure the definitions are equal.
@@ -143,7 +145,7 @@ void compare(const google::protobuf::Message& proto2_msg,
     ASSERT(proto2_f);
     ASSERT(upb_f->number == proto2_f->number());
     ASSERT(std::string(upb_string_getrobuf(upb_f->name),
-                       upb_strlen(upb_f->name)) ==
+                       upb_string_len(upb_f->name)) ==
            proto2_f->name());
     ASSERT(upb_f->type == proto2_f->type());
     ASSERT(upb_isarray(upb_f) == proto2_f->is_repeated());
@@ -166,10 +168,10 @@ void compare(const google::protobuf::Message& proto2_msg,
 
 void parse_and_compare(MESSAGE_CIDENT *proto2_msg,
                        upb_msg *upb_msg, upb_msgdef *upb_md,
-                       upb_strptr str)
+                       upb_string *str)
 {
   // Parse to both proto2 and upb.
-  ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_strlen(str)));
+  ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_string_len(str)));
   upb_status status = UPB_STATUS_INIT;
   upb_msg_decodestr(upb_msg, upb_md, str, &status);
   ASSERT(upb_ok(&status));
@@ -194,22 +196,32 @@ int main(int argc, char *argv[])
 
   // Initialize upb state, parse descriptor.
   upb_status status = UPB_STATUS_INIT;
-  upb_symtab *c = upb_symtab_new();
-  upb_strptr fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE);
-  if(upb_string_isnull(fds)) {
+  upb_symtab *symtab = upb_symtab_new();
+  upb_string *fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE);
+  if(fds == NULL) {
     fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n");
     return 1;
   }
-  upb_symtab_add_desc(c, fds, &status);
+  upb_symtab_add_descriptorproto(symtab);
+  upb_def *fds_msgdef = upb_symtab_lookup(
+      symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet"));
+
+  upb_stringsrc *ssrc = upb_stringsrc_new();
+  upb_stringsrc_reset(ssrc, fds);
+  upb_decoder *decoder = upb_decoder_new(upb_downcast_msgdef(fds_msgdef));
+  upb_decoder_reset(decoder, upb_stringsrc_bytesrc(ssrc));
+  upb_symtab_addfds(symtab, upb_decoder_src(decoder), &status);
   if(!upb_ok(&status)) {
-    fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": %s.\n",
-            status.msg);
+    fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": ");
+    upb_printerr(&status);
     return 1;
   }
   upb_string_unref(fds);
+  upb_decoder_free(decoder);
+  upb_stringsrc_free(ssrc);
 
-  upb_strptr proto_name = upb_strdupc(MESSAGE_NAME);
-  upb_msgdef *def = upb_downcast_msgdef(upb_symtab_lookup(c, proto_name));
+  upb_string *proto_name = upb_strdupc(MESSAGE_NAME);
+  upb_msgdef *def = upb_downcast_msgdef(upb_symtab_lookup(symtab, proto_name));
   if(!def) {
     fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n",
             UPB_STRARG(proto_name));
@@ -218,8 +230,8 @@ int main(int argc, char *argv[])
   upb_string_unref(proto_name);
 
   // Read the message data itself.
-  upb_strptr str = upb_strreadfile(MESSAGE_FILE);
-  if(upb_string_isnull(str)) {
+  upb_string *str = upb_strreadfile(MESSAGE_FILE);
+  if(str == NULL) {
     fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
     return 1;
   }
@@ -234,7 +246,7 @@ int main(int argc, char *argv[])
   upb_msg_unref(upb_msg, def);
   upb_def_unref(UPB_UPCAST(def));
   upb_string_unref(str);
-  upb_symtab_unref(c);
+  upb_symtab_unref(symtab);
 
   return 0;
 }