From 040f7e6ba2e2282b80f332a031b77d7d34b4fc85 Mon Sep 17 00:00:00 2001
From: Joshua Haberman <joshua@reverberate.org>
Date: Mon, 24 Aug 2009 21:44:22 -0700
Subject: Significant memory-management refactoring any Python extension.

---
 Makefile                      |  38 +-
 descriptor/descriptor.h       |  79 +---
 descriptor/descriptor_const.h |  53 +++
 lang_ext/python/cext.c        |  16 +
 lang_ext/python/cext.h        |  48 +++
 lang_ext/python/definition.c  | 164 ++++----
 lang_ext/python/definition.h  |  20 +-
 lang_ext/python/pb.c          | 919 ++++++++++++++++++++++++++++++++++++++++++
 lang_ext/python/setup.py      |  18 +-
 src/upb.h                     |  93 ++++-
 src/upb_array.h               |  89 +---
 src/upb_context.c             |  16 +-
 src/upb_enum.h                |   9 -
 src/upb_inlinedefs.c          |   1 +
 src/upb_mm.c                  | 208 ++++++++++
 src/upb_mm.h                  | 168 ++++++++
 src/upb_msg.c                 | 213 +++-------
 src/upb_msg.h                 | 152 +++----
 src/upb_parse.c               |   2 +-
 src/upb_parse.h               |  10 -
 src/upb_string.c              |  23 +-
 src/upb_string.h              |  59 +--
 src/upb_struct.h              | 119 ++++++
 src/upb_text.c                |  54 ++-
 tools/upbc.c                  | 124 ++++--
 25 files changed, 2082 insertions(+), 613 deletions(-)
 create mode 100644 descriptor/descriptor_const.h
 create mode 100644 lang_ext/python/cext.c
 create mode 100644 lang_ext/python/cext.h
 create mode 100644 lang_ext/python/pb.c
 create mode 100644 src/upb_mm.c
 create mode 100644 src/upb_mm.h
 create mode 100644 src/upb_struct.h

diff --git a/Makefile b/Makefile
index 2d2c6f8..0aaae32 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,11 @@
 #
+# This Makefile builds the upb library as well as associated tests, tools, and
+# language extensions.
+#
+# It does not use autoconf/automake/libtool because I can't stomach all the
+# cruft.  If you're not compiling for gcc, you may have to change some of the
+# options.
+#
 # Summary of compiler flags you may want to use:
 #
 # * -DNDEBUG: makes binary smaller and faster by removing sanity checks.
@@ -25,19 +32,28 @@ CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && ca
 LDLIBS=-lpthread
 
 LIBUPB=src/libupb.a
-ALL=deps $(OBJ) $(LIBUPB) tests/test_table tests/tests tools/upbc
+LIBUPB_PIC=src/libupb_pic.a
+LIBUPB_SHARED=src/libupb.so
+ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) $(LIBUPB_SHARED) tests/test_table tests/tests tools/upbc
 all: $(ALL)
 clean:
-	rm -rf $(call rwildcard,,*.o) $(ALL) benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb*
+	rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(ALL) benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb*
 	rm -rf descriptor/descriptor.proto.pb
+	cd lang_ext/python && python setup.py clean --all
 
 # The core library (src/libupb.a)
-OBJ=src/upb_parse.o src/upb_table.o src/upb_msg.o src/upb_enum.o src/upb_context.o \
-    src/upb_string.o src/upb_text.o src/upb_serialize.o descriptor/descriptor.o
-SRC=$(call rwildcard,,*.c)
-HEADERS=$(call rwildcard,,*.h)
-$(LIBUPB): $(OBJ)
-	ar rcs $(LIBUPB) $(OBJ)
+SRC=src/upb_parse.c src/upb_table.c src/upb_msg.c src/upb_mm.c src/upb_enum.c src/upb_context.c \
+    src/upb_string.c src/upb_text.c src/upb_serialize.c descriptor/descriptor.c
+STATICOBJ=$(patsubst %.c,%.o,$(SRC))
+SHAREDOBJ=$(patsubst %.c,%.lo,$(SRC))
+# building shared objects is like building static ones, except -fPIC is added.
+%.lo : %.c ; $(CC) -fPIC $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
+$(LIBUPB): $(STATICOBJ)
+	ar rcs $(LIBUPB) $(STATICOBJ)
+$(LIBUPB_PIC): $(SHAREDOBJ)
+	ar rcs $(LIBUPB_PIC) $(SHAREDOBJ)
+$(LIBUPB_SHARED): $(SHAREDOBJ)
+	$(CC) -shared -o $(LIBUPB_SHARED) $(SHAREDOBJ)
 
 # Regenerating the auto-generated files in descriptor/.
 descriptor/descriptor.proto.pb: descriptor/descriptor.proto
@@ -47,6 +63,10 @@ descriptor/descriptor.proto.pb: descriptor/descriptor.proto
 descriptorgen: descriptor/descriptor.proto.pb tools/upbc
 	./tools/upbc -i upb_file_descriptor_set -o descriptor/descriptor descriptor/descriptor.proto.pb
 
+# Language extensions.
+python: $(LIBUPB_PIC)
+	cd lang_ext/python && python setup.py build
+
 # Tests
 test: tests/tests
 	./tests/tests
@@ -136,5 +156,5 @@ benchmarks/b.parsetostruct_googlemessage2.proto2_compiled: \
 	  benchmarks/google_messages.pb.cc -lprotobuf -lpthread
 
 -include deps
-deps: $(SRC) $(HEADERS) gen-deps.sh Makefile
+deps: gen-deps.sh Makefile $(call rwildcard,,*.c) $(call rwildcard,,*.h)
 	@./gen-deps.sh $(SRC)
diff --git a/descriptor/descriptor.h b/descriptor/descriptor.h
index 7096023..403d9df 100644
--- a/descriptor/descriptor.h
+++ b/descriptor/descriptor.h
@@ -3,9 +3,7 @@
 #ifndef DESCRIPTOR_DESCRIPTOR_H
 #define DESCRIPTOR_DESCRIPTOR_H
 
-#include <upb_string.h>
-
-#include <upb_array.h>
+#include <upb_struct.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -14,45 +12,6 @@ extern "C" {
 struct google_protobuf_FileDescriptorSet;
 extern struct google_protobuf_FileDescriptorSet *upb_file_descriptor_set;
 
-/* Enums. */
-
-typedef enum google_protobuf_FieldOptions_CType {
-  GOOGLE_PROTOBUF_FIELDOPTIONS_CORD = 1,
-  GOOGLE_PROTOBUF_FIELDOPTIONS_STRING_PIECE = 2
-} google_protobuf_FieldOptions_CType;
-
-typedef enum google_protobuf_FieldDescriptorProto_Type {
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE = 1,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT = 2,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64 = 3,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64 = 4,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32 = 5,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64 = 6,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32 = 7,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL = 8,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING = 9,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP = 10,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE = 11,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES = 12,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32 = 13,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM = 14,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32 = 15,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64 = 16,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32 = 17,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64 = 18
-} google_protobuf_FieldDescriptorProto_Type;
-
-typedef enum google_protobuf_FieldDescriptorProto_Label {
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_OPTIONAL = 1,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED = 2,
-  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED = 3
-} google_protobuf_FieldDescriptorProto_Label;
-
-typedef enum google_protobuf_FileOptions_OptimizeMode {
-  GOOGLE_PROTOBUF_FILEOPTIONS_SPEED = 1,
-  GOOGLE_PROTOBUF_FILEOPTIONS_CODE_SIZE = 2
-} google_protobuf_FileOptions_OptimizeMode;
-
 /* Forward declarations of all message types.
  * So they can refer to each other in possibly-recursive ways. */
 
@@ -131,8 +90,8 @@ typedef struct google_protobuf_MethodOptions
 /* The message definitions themselves. */
 
 struct google_protobuf_UninterpretedOption_NamePart {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -146,8 +105,8 @@ struct google_protobuf_UninterpretedOption_NamePart {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_UninterpretedOption_NamePart)
 
 struct google_protobuf_DescriptorProto {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -171,8 +130,8 @@ struct google_protobuf_DescriptorProto {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_DescriptorProto)
 
 struct google_protobuf_EnumDescriptorProto {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -188,8 +147,8 @@ struct google_protobuf_EnumDescriptorProto {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumDescriptorProto)
 
 struct google_protobuf_UninterpretedOption {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -211,8 +170,8 @@ struct google_protobuf_UninterpretedOption {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_UninterpretedOption)
 
 struct google_protobuf_FileDescriptorProto {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -238,8 +197,8 @@ struct google_protobuf_FileDescriptorProto {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_FileDescriptorProto)
 
 struct google_protobuf_MethodDescriptorProto {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -257,8 +216,8 @@ struct google_protobuf_MethodDescriptorProto {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_MethodDescriptorProto)
 
 struct google_protobuf_EnumValueOptions {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -270,8 +229,8 @@ struct google_protobuf_EnumValueOptions {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumValueOptions)
 
 struct google_protobuf_EnumValueDescriptorProto {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -287,8 +246,8 @@ struct google_protobuf_EnumValueDescriptorProto {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumValueDescriptorProto)
 
 struct google_protobuf_ServiceDescriptorProto {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -304,8 +263,8 @@ struct google_protobuf_ServiceDescriptorProto {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_ServiceDescriptorProto)
 
 struct google_protobuf_FileDescriptorSet {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -317,8 +276,8 @@ struct google_protobuf_FileDescriptorSet {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_FileDescriptorSet)
 
 struct google_protobuf_DescriptorProto_ExtensionRange {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -332,8 +291,8 @@ struct google_protobuf_DescriptorProto_ExtensionRange {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_DescriptorProto_ExtensionRange)
 
 struct google_protobuf_FieldOptions {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -353,8 +312,8 @@ struct google_protobuf_FieldOptions {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_FieldOptions)
 
 struct google_protobuf_FileOptions {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -374,8 +333,8 @@ struct google_protobuf_FileOptions {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_FileOptions)
 
 struct google_protobuf_MessageOptions {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -389,8 +348,8 @@ struct google_protobuf_MessageOptions {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_MessageOptions)
 
 struct google_protobuf_EnumOptions {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -402,8 +361,8 @@ struct google_protobuf_EnumOptions {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumOptions)
 
 struct google_protobuf_FieldDescriptorProto {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -429,8 +388,8 @@ struct google_protobuf_FieldDescriptorProto {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_FieldDescriptorProto)
 
 struct google_protobuf_ServiceOptions {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
@@ -442,8 +401,8 @@ struct google_protobuf_ServiceOptions {
 UPB_DEFINE_MSG_ARRAY(google_protobuf_ServiceOptions)
 
 struct google_protobuf_MethodOptions {
+  struct upb_mmhead mmhead;
   struct upb_msgdef *def;
-  void *gptr;
   union {
     uint8_t bytes[1];
     struct {
diff --git a/descriptor/descriptor_const.h b/descriptor/descriptor_const.h
new file mode 100644
index 0000000..2423e97
--- /dev/null
+++ b/descriptor/descriptor_const.h
@@ -0,0 +1,53 @@
+/* This file was generated by upbc (the upb compiler).  Do not edit. */
+
+#ifndef DESCRIPTOR_DESCRIPTOR_C
+#define DESCRIPTOR_DESCRIPTOR_C
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Enums. */
+
+typedef enum google_protobuf_FieldOptions_CType {
+  GOOGLE_PROTOBUF_FIELDOPTIONS_CORD = 1,
+  GOOGLE_PROTOBUF_FIELDOPTIONS_STRING_PIECE = 2
+} google_protobuf_FieldOptions_CType;
+
+typedef enum google_protobuf_FieldDescriptorProto_Type {
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE = 1,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT = 2,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64 = 3,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64 = 4,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32 = 5,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64 = 6,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32 = 7,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL = 8,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING = 9,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP = 10,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE = 11,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES = 12,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32 = 13,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM = 14,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32 = 15,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64 = 16,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32 = 17,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64 = 18
+} google_protobuf_FieldDescriptorProto_Type;
+
+typedef enum google_protobuf_FieldDescriptorProto_Label {
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_OPTIONAL = 1,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED = 2,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED = 3
+} google_protobuf_FieldDescriptorProto_Label;
+
+typedef enum google_protobuf_FileOptions_OptimizeMode {
+  GOOGLE_PROTOBUF_FILEOPTIONS_SPEED = 1,
+  GOOGLE_PROTOBUF_FILEOPTIONS_CODE_SIZE = 2
+} google_protobuf_FileOptions_OptimizeMode;
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* DESCRIPTOR_DESCRIPTOR_C */
diff --git a/lang_ext/python/cext.c b/lang_ext/python/cext.c
new file mode 100644
index 0000000..5336f2d
--- /dev/null
+++ b/lang_ext/python/cext.c
@@ -0,0 +1,16 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ *
+ */
+
+#include "cext.h"
+
+PyMODINIT_FUNC
+initcext(void)
+{
+  PyObject *mod = Py_InitModule("upb.cext", NULL);
+  initdefinition();
+  initpb();
+}
diff --git a/lang_ext/python/cext.h b/lang_ext/python/cext.h
new file mode 100644
index 0000000..e0e7832
--- /dev/null
+++ b/lang_ext/python/cext.h
@@ -0,0 +1,48 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ *
+ */
+
+#ifndef UPB_PYTHON_CEXT_H_
+#define UPB_PYTHON_CEXT_H_
+
+#include <Python.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+  PyObject_HEAD
+  struct upb_context *context;
+  PyObject *created_defs;
+} PyUpb_Context;
+
+typedef struct {
+  PyObject_HEAD
+  struct upb_msgdef *def;
+  PyUpb_Context *context;
+} PyUpb_MsgDef;
+
+extern PyTypeObject PyUpb_MsgDefType;
+
+/* What format string should be passed to PyArg_ParseTuple to get just a raw
+ * string of bytes and a length. */
+#if PY_MAJOR_VERSION >= 3
+#define BYTES_FORMAT "y#"
+#else
+#define BYTES_FORMAT "s#"
+#endif
+
+#define RETURN_BOOL(val) if(val) { Py_RETURN_TRUE; } else { Py_RETURN_FALSE; }
+
+extern PyMODINIT_FUNC initdefinition(void);
+extern PyMODINIT_FUNC initpb(void);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif
diff --git a/lang_ext/python/definition.c b/lang_ext/python/definition.c
index 6a788ab..cc1089d 100644
--- a/lang_ext/python/definition.c
+++ b/lang_ext/python/definition.c
@@ -24,21 +24,29 @@
 #include "upb_context.h"
 #include "upb_msg.h"
 
-#if PY_MAJOR_VERSION > 3
-const char *bytes_format = "y#";
-#else
-const char *bytes_format = "s#";
-#endif
+static PyTypeObject PyUpb_ContextType;
+static struct upb_strtable msgdefs;
+static struct upb_strtable contexts;
 
+struct msgtab_entry {
+  struct upb_strtable_entry e;
+  PyUpb_MsgDef *msgdef;
+};
 
-/* upb.def.MessageDefinition **************************************************/
+struct contexttab_entry {
+  struct upb_strtable_entry e;
+  PyUpb_Context *context;
+};
 
-typedef struct {
-  PyObject_HEAD
-  struct upb_msgdef *def;
-} PyUpb_MsgDef;
+#define CheckContext(obj) \
+  (void*)obj; do { \
+    if(!PyObject_TypeCheck(obj, &PyUpb_ContextType)) { \
+      PyErr_SetString(PyExc_TypeError, "Must be a upb.Context"); \
+      return NULL; \
+    } \
+  } while(0)
 
-PyTypeObject PyUpb_MsgDefType;  /* forward decl. */
+/* upb.def.MessageDefinition **************************************************/
 
 /* Not implemented yet, but these methods will expose information about the
  * message definition (the upb_msgdef). */
@@ -46,18 +54,10 @@ static PyMethodDef msgdef_methods[] = {
   {NULL, NULL}
 };
 
-static PyObject *msgdef_new(struct upb_msgdef *m)
-{
-  PyUpb_MsgDef *md_obj = (void*)PyType_GenericAlloc(&PyUpb_MsgDefType, 0);
-  md_obj->def = m;
-  upb_msgdef_ref(md_obj->def);
-  return (void*)md_obj;
-}
-
 static void msgdef_dealloc(PyObject *obj)
 {
   PyUpb_MsgDef *md_obj = (void*)obj;
-  upb_msgdef_unref(md_obj->def);
+  Py_DECREF(md_obj->context);
   obj->ob_type->tp_free(obj);
 }
 
@@ -106,27 +106,11 @@ PyTypeObject PyUpb_MsgDefType = {
 
 /* upb.Context ****************************************************************/
 
-typedef struct {
-  PyObject_HEAD
-  struct upb_context *context;
-  PyObject *created_defs;
-} PyUpb_Context;
-
-static PyTypeObject PyUpb_ContextType;  /* forward decl. */
-
-#define CheckContext(obj) \
-  (void*)obj; do { \
-    if(!PyObject_TypeCheck(obj, &PyUpb_ContextType)) { \
-      PyErr_SetString(PyExc_TypeError, "Must be a upb.Context"); \
-      return NULL; \
-    } \
-  } while(0)
-
 static PyObject *context_parsefds(PyObject *_context, PyObject *args)
 {
   PyUpb_Context *context = CheckContext(_context);
   struct upb_string str;
-  if(!PyArg_ParseTuple(args, bytes_format, &str.ptr, &str.byte_len))
+  if(!PyArg_ParseTuple(args, BYTES_FORMAT, &str.ptr, &str.byte_len))
     return NULL;
   str.byte_size = 0;  /* We don't own that mem. */
 
@@ -138,35 +122,56 @@ static PyObject *context_parsefds(PyObject *_context, PyObject *args)
   Py_RETURN_NONE;
 }
 
-static PyObject *get_or_create_def(PyUpb_Context *context,
-                                   struct upb_symtab_entry *e)
+static PyObject *get_or_create_def(struct upb_symtab_entry *e)
 {
-  /* Check out internal dictionary of Python classes we have already created
-   * (keyed by the address of the obj we are referencing). */
-#if PY_MAJOR_VERSION > 3
-  PyObject *str = PyBytes_FromStringAndSize((char*)&e->ref, sizeof(void*));
-#else
-  PyObject *str = PyString_FromStringAndSize((char*)&e->ref, sizeof(void*));
-#endif
-  /* Would use PyDict_GetItemStringAndSize() if it existed, but only
-   * PyDict_GetItemString() exists, and pointers could have NULL bytes. */
-  PyObject *def = PyDict_GetItem(context->created_defs, str);
-  if(!def) {
-    switch(e->type) {
-      case UPB_SYM_MESSAGE:
-        def = msgdef_new(e->ref.msg);
-        break;
-      case UPB_SYM_ENUM:
-      case UPB_SYM_SERVICE:
-      case UPB_SYM_EXTENSION:
-      default:
-        def = NULL;
-        break;
-    }
-    if(def) PyDict_SetItem(context->created_defs, str, def);
+  switch(e->type) {
+    case UPB_SYM_MESSAGE: return (PyObject*)get_or_create_msgdef(e->ref.msg);
+    case UPB_SYM_ENUM:
+    case UPB_SYM_SERVICE:
+    case UPB_SYM_EXTENSION:
+    default: fprintf(stderr, "upb.pb, not implemented.\n"); abort(); return NULL;
   }
-  Py_DECREF(str);
-  return def;
+}
+
+static PyUpb_Context *get_or_create_context(struct upb_context *context)
+{
+  PyUpb_Context *pycontext = NULL;
+  struct upb_string str = {.ptr = (char*)&context, .byte_len = sizeof(void*)};
+  struct contexttab_entry *e = upb_strtable_lookup(&contexts, &str);
+  if(!e) {
+    pycontext = (void*)PyUpb_ContextType.tp_alloc(&PyUpb_ContextType, 0);
+    pycontext->context = context;
+    struct contexttab_entry new_e = {
+      .e = {.key = {.ptr = (char*)&pycontext->context, .byte_len = sizeof(void*)}},
+      .context = pycontext
+    };
+    upb_strtable_insert(&contexts, &new_e.e);
+  } else {
+    pycontext = e->context;
+    Py_INCREF(pycontext);
+  }
+  return pycontext;
+}
+
+PyUpb_MsgDef *get_or_create_msgdef(struct upb_msgdef *def)
+{
+  PyUpb_MsgDef *pydef = NULL;
+  struct upb_string str = {.ptr = (char*)&def, .byte_len = sizeof(void*)};
+  struct msgtab_entry *e = upb_strtable_lookup(&msgdefs, &str);
+  if(!e) {
+    pydef = (void*)PyUpb_MsgDefType.tp_alloc(&PyUpb_MsgDefType, 0);
+    pydef->def = def;
+    pydef->context = get_or_create_context(def->context);
+    struct msgtab_entry new_e = {
+      .e = {.key = {.ptr = (char*)&pydef->def, .byte_len = sizeof(void*)}},
+      .msgdef = pydef
+    };
+    upb_strtable_insert(&msgdefs, &new_e.e);
+  } else {
+    pydef = e->msgdef;
+    Py_INCREF(pydef);
+  }
+  return pydef;
 }
 
 static PyObject *context_lookup(PyObject *self, PyObject *args)
@@ -179,7 +184,7 @@ static PyObject *context_lookup(PyObject *self, PyObject *args)
 
   struct upb_symtab_entry e;
   if(upb_context_lookup(context->context, &str, &e)) {
-    return get_or_create_def(context, &e);
+    return get_or_create_def(&e);
   } else {
     Py_RETURN_NONE;
   }
@@ -197,12 +202,13 @@ static PyObject *context_resolve(PyObject *self, PyObject *args)
 
   struct upb_symtab_entry e;
   if(upb_context_resolve(context->context, &base, &str, &e)) {
-    return get_or_create_def(context, &e);
+    return get_or_create_def(&e);
   } else {
     Py_RETURN_NONE;
   }
 }
 
+/* Callback for upb_context_enumerate below. */
 static void add_string(void *udata, struct upb_symtab_entry *entry)
 {
   PyObject *list = udata;
@@ -244,7 +250,11 @@ static PyObject *context_new(PyTypeObject *subtype,
 {
   PyUpb_Context *obj = (void*)subtype->tp_alloc(subtype, 0);
   obj->context = upb_context_new();
-  obj->created_defs = PyDict_New();
+  struct contexttab_entry e = {
+    .e = {.key = {.ptr = (char*)&obj->context, .byte_len = sizeof(void*)}},
+    .context = obj
+  };
+  upb_strtable_insert(&contexts, &e.e);
   return (void*)obj;
 }
 
@@ -252,7 +262,9 @@ static void context_dealloc(PyObject *obj)
 {
   PyUpb_Context *c = (void*)obj;
   upb_context_unref(c->context);
-  Py_DECREF(c->created_defs);
+  /* TODO: once strtable supports delete. */
+  //struct upb_string ptrstr = {.ptr = (char*)&c->context, .byte_len = sizeof(void*)};
+  //upb_strtable_delete(&contexts, &ptrstr);
   obj->ob_type->tp_free(obj);
 }
 
@@ -299,17 +311,25 @@ static PyTypeObject PyUpb_ContextType = {
   0,                                      /* tp_free */
 };
 
-PyMethodDef methods[] = {
+static PyMethodDef methods[] = {
+  {NULL, NULL}
 };
 
 PyMODINIT_FUNC
 initdefinition(void)
 {
   if(PyType_Ready(&PyUpb_ContextType) < 0) return;
-  Py_INCREF(&PyUpb_ContextType);  /* TODO: necessary? */
   if(PyType_Ready(&PyUpb_MsgDefType) < 0) return;
-  Py_INCREF(&PyUpb_MsgDefType);  /* TODO: necessary? */
 
-  PyObject *mod = Py_InitModule("upb.definition", methods);
+  /* PyModule_AddObject steals a reference.  These objects are statically
+   * allocated and must not be deleted, so we increment their refcount. */
+  Py_INCREF(&PyUpb_ContextType);
+  Py_INCREF(&PyUpb_MsgDefType);
+
+  PyObject *mod = Py_InitModule("upb.cext.definition", methods);
   PyModule_AddObject(mod, "Context", (PyObject*)&PyUpb_ContextType);
+  PyModule_AddObject(mod, "MessageDefinition", (PyObject*)&PyUpb_MsgDefType);
+
+  upb_strtable_init(&contexts, 8, sizeof(struct contexttab_entry));
+  upb_strtable_init(&msgdefs, 16, sizeof(struct msgtab_entry));
 }
diff --git a/lang_ext/python/definition.h b/lang_ext/python/definition.h
index 8731b8a..040019d 100644
--- a/lang_ext/python/definition.h
+++ b/lang_ext/python/definition.h
@@ -16,16 +16,30 @@
 extern "C" {
 #endif
 
+typedef struct {
+  PyObject_HEAD
+  struct upb_context *context;
+} PyUpb_Context;
+
 typedef struct {
   PyObject_HEAD
   struct upb_msgdef *def;
-} PyUpb_MessageDefinition;
+  PyUpb_Context *context;
+} PyUpb_MsgDef;
 
-extern PyTypeObject PyUpb_MessageDefinitionType;
+extern PyTypeObject PyUpb_MsgDefType;
 
 /* What format string should be passed to PyArg_ParseTuple to get just a raw
  * string of bytes and a length. */
-extern const char *bytes_format;
+#if PY_MAJOR_VERSION >= 3
+#define BYTES_FORMAT "y#"
+#else
+#define BYTES_FORMAT "s#"
+#endif
+
+PyUpb_MsgDef *get_or_create_msgdef(struct upb_msgdef *def);
+
+#define RETURN_BOOL(val) if(val) { Py_RETURN_TRUE; } else { Py_RETURN_FALSE; }
 
 #ifdef __cplusplus
 }  /* extern "C" */
diff --git a/lang_ext/python/pb.c b/lang_ext/python/pb.c
new file mode 100644
index 0000000..6f016b4
--- /dev/null
+++ b/lang_ext/python/pb.c
@@ -0,0 +1,919 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ *
+ * This file implements an interface to Python that is compatible
+ * (as much as possible) with proto1 (the first implementation of
+ * protocol buffers, which is only released internally to Google).
+ *
+ * The key interface we must support is ProtocolMessage.  Each message
+ * type has its own Python class that supports the ProtocolMessage
+ * interface (obj.Clear(), obj.IsInitialized(), etc) as well as
+ * message-specific accessors (obj.foo(), obj.set_foo(),
+ * obj.clear_foo(), etc).
+ *
+ * accessors.  We represent these message types as instances as
+ * upb.pb.MessageType objects.  In other words, these instances
+ * are both instances of upb.pb.MessageType *and* classes of
+ * type MyProtoType.
+ */
+
+#include <Python.h>
+#include <stddef.h>
+#include "upb_mm.h"
+#include "definition.h"
+
+/* Opcodes that describe all of the operations you can perform on a field of a
+ * protobuf from Python.  For example, foo.has_bar() uses opcode OP_HAS. */
+typedef enum {
+  /* For non-repeated fields. */
+  OP_HAS,
+  /* For non-repeated fields that are not submessages. */
+  OP_SET,
+  /* For non-repeated message fields. */
+  OP_MUTABLE,
+
+  /* For repeated fields. */
+  OP_SIZE, OP_LIST, OP_ADD,
+
+  /* For all types of fields. */
+  OP_GET, OP_CLEAR
+} PyUpb_PbBoundFieldOpCode;
+
+const char *opcode_names[] = {
+  "OP_HAS", "OP_SET", "OP_MUTABLE", "OP_SIZE", "OP_LIST", "OP_ADD", "OP_GET", "OP_CLEAR"
+};
+
+/* Structures for the Python objects we define. */
+typedef struct {
+  PyObject_HEAD;
+  PyUpb_MsgDef *def;
+} PyUpb_PbMsgCreator;
+
+typedef struct {
+  PyObject_HEAD;
+  struct upb_mm_ref ref;
+  PyUpb_MsgDef *def;
+} PyUpb_PbMsg;
+
+typedef struct {
+  PyObject_HEAD;
+  PyUpb_PbMsg *msg;
+  struct upb_msg_fielddef *f;
+  PyUpb_PbBoundFieldOpCode code;
+} PyUpb_PbBoundFieldOp;
+
+static PyTypeObject PyUpb_PbMsgCreatorType;
+static PyTypeObject PyUpb_PbMsgType;
+static PyTypeObject PyUpb_PbBoundFieldOpType;
+
+#define Check_MsgCreator(obj) \
+  (void*)obj; do { \
+    if(!PyObject_TypeCheck(obj, &PyUpb_PbMsgCreatorType)) { \
+      PyErr_SetString(PyExc_TypeError, "must be a MessageCreator"); \
+      return NULL; \
+    } \
+  } while(0)
+
+#define Check_Message(obj) \
+  (void*)obj; do { \
+    if(!PyObject_TypeCheck(obj, &PyUpb_PbMsgType)) { \
+      PyErr_SetString(PyExc_TypeError, "must be a Message"); \
+      return NULL; \
+    } \
+  } while(0)
+
+#define Check_BoundFieldOp(obj) \
+  (void*)obj; do { \
+    if(!PyObject_TypeCheck(obj, &PyUpb_PbBoundFieldOpType)) { \
+      PyErr_SetString(PyExc_TypeError, "must be a BoundFieldOp"); \
+      return NULL; \
+    } \
+  } while(0)
+
+#define EXPECT_NO_ARGS if(!PyArg_ParseTuple(args, "")) return NULL;
+#define MMREF_TO_PYOBJ(mmref) (PyObject*)((char*)(mmref)-offsetof(PyUpb_PbMsg, ref))
+
+static struct upb_mm_ref *NewPyRef(struct upb_mm_ref *fromref,
+                                   union upb_mmptr p, upb_mm_ptrtype type)
+{
+  (void)fromref;  /* Don't care. */
+  struct upb_mm_ref *ref = NULL;
+  switch(type) {
+    case UPB_MM_MSG_REF: {
+      PyUpb_PbMsg *msg = (void*)PyUpb_PbMsgType.tp_alloc(&PyUpb_PbMsgType, 0);
+      msg->def = get_or_create_msgdef(p.msg->def);  /* gets a ref. */
+      ref = &msg->ref;
+      break;
+    }
+    case UPB_MM_STR_REF: {
+    }
+    case UPB_MM_ARR_REF: {
+    }
+    default: assert(false); abort(); break;  /* Shouldn't happen. */
+  }
+  return ref;
+}
+
+struct upb_mm pymm = {NewPyRef};
+
+/* upb.pb.BoundFieldOp ********************************************************/
+
+static PyObject *upb_to_py(union upb_value_ptr p, upb_field_type_t type)
+{
+  switch(type) {
+    default:
+      PyErr_SetString(PyExc_RuntimeError, "internal: unexpected type");
+      return NULL;
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE:
+      return PyFloat_FromDouble(*p._double);
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT:
+      return PyFloat_FromDouble(*p._float);
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64:
+      return PyLong_FromLongLong(*p.int64);
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64:
+      return PyLong_FromUnsignedLongLong(*p.uint64);
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM:
+#if PY_MAJOR_VERSION >= 3
+      return PyLong_FromLong(*p.int32);
+#else
+      return PyInt_FromLong(*p.int32);
+#endif
+
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32:
+      return PyLong_FromLong(*p.uint32);
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL:
+      RETURN_BOOL(*p._bool);
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES:
+      /* Py3k will distinguish between these two. */
+      return PyString_FromStringAndSize((*p.str)->ptr, (*p.str)->byte_len);
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE: {
+      union upb_mmptr mmptr = upb_mmptr_read(p, UPB_MM_MSG_REF);
+      bool created;
+      struct upb_mm_ref *ref = upb_mm_getref(mmptr, UPB_MM_MSG_REF, &pymm, &created);
+      PyObject *obj = MMREF_TO_PYOBJ(ref);
+      if(!created) Py_INCREF(obj);
+      return obj;
+    }
+  }
+}
+
+static long convert_to_long(PyObject *val, long lobound, long hibound, bool *ok)
+{
+  PyObject *o = PyNumber_Int(val);
+  if(!o) {
+    PyErr_SetString(PyExc_OverflowError, "could not convert to long");
+    *ok = false;
+    return -1;
+  }
+  long longval = PyInt_AS_LONG(o);
+  if(longval > hibound || longval < lobound) {
+    PyErr_SetString(PyExc_OverflowError, "value outside type bounds");
+    *ok = false;
+    return -1;
+  }
+  *ok = true;
+  return longval;
+}
+
+static void set_upbscalarfield(union upb_value_ptr p, PyObject *val,
+                               upb_field_type_t type)
+{
+  switch(type) {
+    default:
+      PyErr_SetString(PyExc_RuntimeError, "internal error");
+      return;
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE: {
+      PyObject *o = PyNumber_Float(val);
+      if(!o) {
+        PyErr_SetString(PyExc_ValueError, "could not convert to double");
+        return;
+      }
+      *p._double = PyFloat_AS_DOUBLE(o);
+      return;
+    }
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT: {
+      PyObject *o = PyNumber_Float(val);
+      if(!o) {
+        PyErr_SetString(PyExc_ValueError, "could not convert to float");
+        return;
+      }
+      *p._float = PyFloat_AS_DOUBLE(o);
+      return;
+    }
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64: {
+#if LONG_MAX >= INT64_MAX
+      bool ok;
+      long longval = convert_to_long(val, INT64_MIN, INT64_MAX, &ok);
+      if(ok) *p.int32 = longval;
+      return;
+#else
+      PyObject *o = PyNumber_Long(val);
+      if(!o) {
+        PyErr_SetString(PyExc_ValueError, "could not convert to int64");
+        return;
+      }
+      *p.int64 = PyLong_AsLongLong(o);
+      return;
+#endif
+    }
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64: {
+      PyObject *o = PyNumber_Long(val);
+      if(!o) {
+        PyErr_SetString(PyExc_ValueError, "could not convert to uint64");
+        return;
+      }
+      *p.uint64 = PyLong_AsUnsignedLongLong(o);
+      return;
+    }
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM: {
+      bool ok;
+      long longval = convert_to_long(val, INT32_MIN, INT32_MAX, &ok);
+      if(ok) *p.int32 = longval;
+      return;
+    }
+
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32: {
+#if LONG_MAX >= UINT32_MAX
+      bool ok;
+      long longval = convert_to_long(val, 0, UINT32_MAX, &ok);
+      if(ok) *p.int32 = longval;
+      return;
+#else
+      PyObject *o = PyNumber_Long(val);
+      if(!o) {
+        PyErr_SetString(PyExc_ValueError, "could not convert to uint32");
+        return;
+      }
+      *p.uint32 = PyLong_AsUnsignedLong(o);
+      return;
+#endif
+    }
+
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL:
+      if(!PyBool_Check(val)) {
+        PyErr_SetString(PyExc_ValueError, "should be true or false");
+        return;
+      }
+      if(val == Py_True) *p._bool = true;
+      else if(val == Py_False) *p._bool = false;
+      else PyErr_SetString(PyExc_RuntimeError, "not true or false?");
+      return;
+
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING:
+    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES: {
+      size_t len = PyString_GET_SIZE(val);
+      upb_string_resize(*p.str, len);
+      memcpy((*p.str)->ptr, PyString_AS_STRING(val), len);
+      return;
+    }
+  }
+}
+
+static bool check_py_type(PyObject *obj, upb_field_type_t type)
+{
+  /* TODO */
+  return true;
+}
+
+PyObject* fieldop_call(PyObject *callable, PyObject *args, PyObject *kw)
+{
+  PyUpb_PbBoundFieldOp *op = Check_BoundFieldOp(callable);
+  PyUpb_PbMsg *pymsg = op->msg;
+  struct upb_mm_ref *msgref = &(pymsg->ref);
+  struct upb_msg *msg = pymsg->ref.p.msg;
+  struct upb_msg_fielddef *f = op->f;
+  union upb_value_ptr p = upb_msg_getptr(msg, f);
+  switch(op->code) {
+    case OP_HAS:
+      /* obj.has_foo() */
+      EXPECT_NO_ARGS;
+      RETURN_BOOL(upb_msg_isset(msg, f));
+    case OP_SET: {
+      PyObject *val;
+      if(upb_isarray(f)) {
+        /* obj.set_repeatedfoo(i, val) */
+        int i;
+        if(!PyArg_ParseTuple(args, "iO", &i, &val)) return NULL;
+        if(!upb_msg_isset(msg, f) || i >= (*p.arr)->len) {
+          PyErr_SetString(PyExc_IndexError, "assignment to invalid index");
+          return NULL;
+        }
+        p = upb_array_getelementptr(*p.arr, i, f->type);
+      } else {
+        /* obj.set_foo(val) */
+        if(!PyArg_ParseTuple(args, "O", &val)) return NULL;
+      }
+      set_upbscalarfield(p, val, f->type);
+      if(PyErr_Occurred()) return NULL;
+      Py_RETURN_NONE;
+    }
+    case OP_MUTABLE: {
+      /* obj.mutable_scalarmsg() */
+      EXPECT_NO_ARGS;
+      bool created;
+      PyObject *obj = MMREF_TO_PYOBJ(upb_mm_getfieldref(msgref, f, &created));
+      if(!created) Py_INCREF(obj);
+      return obj;
+    }
+
+    /* For repeated fields. */
+    case OP_SIZE: {
+      /* obj.repeatedfoo_size() */
+      EXPECT_NO_ARGS;
+      long len =
+          upb_msg_isset(msg, f) ? (*upb_msg_getptr(msg, f).arr)->len : 0;
+      return PyInt_FromLong(len);
+    }
+    case OP_LIST:
+      /* obj.repeatedfoo_list() */
+    case OP_ADD: {
+      /* Parse/Verify the args. */
+      PyObject *val;
+      if(upb_issubmsg(f)) {
+        /* obj.add_submsgfoo()  # returns the new submsg */
+        EXPECT_NO_ARGS;
+      } else {
+        /* obj.add_scalarfoo(val) */
+        if(!PyArg_ParseTuple(args, "O", &val)) return NULL;
+        if(!check_py_type(val, f->type)) return NULL;
+      }
+
+      upb_arraylen_t len = (*p.arr)->len;
+      union upb_value_ptr elem_p = upb_array_getelementptr(*p.arr, len, f->type);
+      upb_array_resize(*p.arr, len + 1);
+
+      if(upb_issubmsg(f)) {
+        /* string or submsg. */
+        bool created;
+        upb_mm_ptrtype type = upb_elem_ptrtype(f);
+        union upb_mmptr mmptr = upb_mmptr_read(elem_p, type);
+        struct upb_mm_ref *valref = upb_mm_getref(mmptr, type, &pymm, &created);
+        assert(created);
+        PyObject *obj = MMREF_TO_PYOBJ(valref);
+        return obj;
+      } else {
+        set_upbscalarfield(elem_p, val, f->type);
+        if(PyErr_Occurred()) return NULL;
+        Py_RETURN_NONE;
+      }
+    }
+
+    /* For all fields. */
+    case OP_GET: {
+      if(upb_isarray(f)) {
+        /* obj.repeatedfoo(i) */
+        int i;
+        if(!PyArg_ParseTuple(args, "i", &i)) return NULL;
+        if(!upb_msg_isset(msg, f) || i >= (*p.arr)->len) {
+          PyErr_SetString(PyExc_IndexError, "get from invalid index");
+          return NULL;
+        }
+        p = upb_array_getelementptr(*p.arr, i, f->type);
+      } else {
+        /* obj.foo() */
+        EXPECT_NO_ARGS;
+      }
+      return upb_to_py(p, f->type);
+    }
+    case OP_CLEAR:
+      /* obj.clear_foo() */
+      EXPECT_NO_ARGS;
+      upb_mm_msgclear(msgref, f);
+      Py_RETURN_NONE;
+
+    default:
+      PyErr_SetString(PyExc_RuntimeError, "invalid bound field opcode.");
+      return NULL;
+  }
+}
+
+static void fieldop_dealloc(PyObject *obj)
+{
+  PyUpb_PbBoundFieldOp *op = (void*)obj;
+  Py_DECREF(op->msg);
+  obj->ob_type->tp_free(obj);
+}
+
+static PyObject *fieldop_repr(PyObject *obj)
+{
+  PyUpb_PbBoundFieldOp *op = Check_BoundFieldOp(obj);
+  struct upb_string *name = op->msg->def->def->descriptor->name;
+  /* Need to get a NULL-terminated copy of name since PyString_FromFormat
+   * doesn't support ptr+len. */
+  PyObject *nameobj = PyString_FromStringAndSize(name->ptr, name->byte_len);
+  struct google_protobuf_FieldDescriptorProto *fd =
+      upb_msg_field_descriptor(op->f, op->msg->def->def);
+  PyObject *fieldnameobj = PyString_FromStringAndSize(fd->name->ptr, fd->name->byte_len);
+  PyObject *ret =
+      PyString_FromFormat("<upb.pb.BoundFieldOp field='%s', op=%s, msgtype='%s'>",
+                          PyString_AS_STRING(fieldnameobj),
+                          opcode_names[op->code], PyString_AS_STRING(nameobj));
+  Py_DECREF(nameobj);
+  Py_DECREF(fieldnameobj);
+  return ret;
+}
+
+static PyTypeObject PyUpb_PbBoundFieldOpType = {
+  PyObject_HEAD_INIT(NULL)
+  0,                                      /* ob_size */
+  "upb.pb.BoundFieldOp",                  /* tp_name */
+  sizeof(PyUpb_PbBoundFieldOp),           /* tp_basicsize */
+  0,                                      /* tp_itemsize */
+  fieldop_dealloc,                        /* tp_dealloc */
+  0,                                      /* tp_print */
+  0,                                      /* tp_getattr */
+  0,                                      /* tp_setattr */
+  0,                                      /* tp_compare */
+  fieldop_repr,                           /* tp_repr */
+  0,                                      /* tp_as_number */
+  0,                                      /* tp_as_sequence */
+  0,                                      /* tp_as_mapping */
+  0,                                      /* tp_hash */
+  fieldop_call,                           /* tp_call */
+  0,                                      /* tp_str */
+  0,                                      /* tp_getattro */
+  0,                                      /* tp_setattro */
+  0,                                      /* tp_as_buffer */
+  Py_TPFLAGS_DEFAULT,                     /* tp_flags */
+  0,                                      /* tp_doc */
+  0,                                      /* tp_traverse */
+  0,                                      /* tp_clear */
+  0,                                      /* tp_richcompare */
+  0,                                      /* tp_weaklistoffset */
+  0,                                      /* tp_iter */
+  0,                                      /* tp_iternext */
+  0,                                      /* tp_methods */
+  0,                                      /* tp_members */
+  0,                                      /* tp_getset */
+  0,                                      /* tp_base */
+  0,                                      /* tp_dict */
+  0,                                      /* tp_descr_get */
+  0,                                      /* tp_descr_set */
+  0,                                      /* tp_dictoffset */
+  0,                                      /* tp_init */
+  0,                                      /* tp_alloc */
+  0, /* Can't be created from Python. */  /* tp_new */
+  0,                                      /* tp_free */
+};
+
+/* upb.pb.Message *************************************************************/
+
+#define Check_SameProtoType(obj1, obj2) \
+  do { \
+    if(self->ob_type != other->ob_type) { \
+      PyErr_SetString(PyExc_TypeError, "other must be of the same type"); \
+      return NULL; \
+    } \
+  } while(0);
+
+static PyObject *msg_clear(PyObject *self, PyObject *args)
+{
+  (void)args;
+  PyUpb_PbMsg *msg = Check_Message(self);
+  upb_mm_msgclear_all(&msg->ref);
+  Py_RETURN_NONE;
+}
+
+//static PyObject *msg_encode(PyObject *self, PyObject *args)
+//{
+//  (void)args;
+//  PyUpb_PbMsg *msg = Check_Message(self);
+//  struct upb_msgsizes *sizes = upb_msgsizes_new();
+//  struct upb_msg *upb_msg = msg->ref.p.msg;
+//  upb_msgsizes_read(sizes, upb_msg);
+//
+//  size_t size = upb_msgsizes_totalsize(sizes);
+//  PyObject *str = PyString_FromStringAndSize(NULL, size);
+//  if(!str) return NULL;
+//  char *strbuf = PyString_AS_STRING(str);
+//
+//  bool success = upb_msg_serialize_all(upb_msg, sizes, strbuf);
+//  upb_msgsizes_free(sizes);
+//  if(success) {
+//    return str;
+//  } else {
+//    /* TODO: better error than TypeError. */
+//    PyErr_SetString(PyExc_TypeError, "Error serializing protobuf.");
+//    return NULL;
+//  }
+//}
+
+static PyObject *msg_equals(PyObject *self, PyObject *other)
+{
+  PyUpb_PbMsg *msg1 = Check_Message(self);
+  PyUpb_PbMsg *msg2 = Check_Message(other);
+  Check_SameProtoType(msg1, msg2);
+  RETURN_BOOL(upb_msg_eql(msg1->ref.p.msg, msg2->ref.p.msg, true))
+}
+
+static PyObject *msg_isinitialized(PyObject *self, PyObject *args)
+{
+  (void)args;
+  PyUpb_PbMsg *msg = Check_Message(self);
+  RETURN_BOOL(upb_msg_all_required_fields_set(msg->ref.p.msg))
+}
+
+static PyObject *msg_parsefromstring(PyObject *self, PyObject *args)
+{
+  PyUpb_PbMsg *msg = Check_Message(self);
+  char *strdata;
+  size_t strlen;
+  if(!PyArg_ParseTuple(args, BYTES_FORMAT, &strdata, &strlen))
+    return NULL;
+
+  if(upb_msg_parsestr(msg->ref.p.msg, strdata, strlen) != UPB_STATUS_OK) {
+    /* TODO: better error than TypeError. */
+    PyErr_SetString(PyExc_TypeError, "error parsing protobuf");
+    return NULL;
+  }
+  Py_RETURN_NONE;
+}
+
+static PyObject *msg_mergefromstring(PyObject *self, PyObject *args)
+{
+  PyUpb_PbMsg *msg = Check_Message(self);
+  char *strdata;
+  size_t strlen;
+  if(!PyArg_ParseTuple(args, BYTES_FORMAT, &strdata, &strlen))
+    return NULL;
+
+  if(upb_msg_parsestr(msg->ref.p.msg, strdata, strlen) != UPB_STATUS_OK) {
+    /* TODO: better error than TypeError. */
+    PyErr_SetString(PyExc_TypeError, "error parsing protobuf");
+    return NULL;
+  }
+  Py_RETURN_NONE;
+}
+
+/* Commented-out methods are TODO. */
+static PyMethodDef msg_methods[] = {
+  {"Clear", msg_clear, METH_NOARGS,
+   "Erases all data from the ProtocolMessage, reseting fields to their defaults"
+  },
+  //{"CopyFrom", msg_copyfrom, METH_O,
+  // "Copies data from another ProtocolMessage."
+  //},
+  //{"Encode", msg_encode, METH_NOARGS,
+  // "Returns a string representing the ProtocolMessage."
+  //},
+  {"Equals", msg_equals, METH_O,
+   "Returns true if the given ProtocolMessage has the same type and value."
+  },
+  {"IsInitialized", msg_isinitialized, METH_NOARGS,
+   "Returns true iff all required fields have been set."
+  },
+  //{"Merge", msg_merge, METH_O,
+  // "Merges data from the given Decoder."
+  //},
+  //{"MergeFrom", msg_mergefrom, METH_O,
+  // "Merges data from another ProtocolMessage of the same type."
+  //},
+  {"MergeFromString", msg_mergefromstring, METH_VARARGS,
+   "Merges data from the given string.  Raises an exception if this does not "
+   "result in the ProtocolMessage being initialized."
+  },
+  //{"Output", msg_output, METH_O,
+  // "Writes the ProtocolMessage to the given encoder."
+  //},
+  //{"OutputUnchecked", msg_output, METH_O,
+  // "Writes the ProtocolMessage to the given encoder, without checking "
+  // "initialization"
+  //},
+  //{"Parse", msg_parse, METH_O,
+  // "Parses data from the given Decoder."
+  //},
+  //{"ParseASCII", msg_parseascii, METH_VARARGS,
+  // "Parses a string generated by ToASCII.  Raises a ValueError if unknown "
+  // "fields are encountered."
+  //},
+  //{"ParseASCIIIgnoreUnknown", msg_parseascii, METH_VARARGS,
+  // "Parses a string generated by ToASCII.  Ignores unknown fields."
+  //},
+  {"ParseFromString", msg_parsefromstring, METH_VARARGS,
+   "Parses data from the given string.  Raises an exception if this does not "
+   "result in the ProtocolMessage being initialized."
+  },
+  //{"ToASCII", msg_toascii, METH_NOARGS,
+  // "Returns the ProtocolMessage as a human-readable ASCII string."
+  //},
+  //{"ToCompactASCII", msg_tocompactascii, METH_NOARGS,
+  // "Returns the ProtocolMessage as a human-readable ASCII string that uses "
+  // "tag numbers instead of field names."
+  //},
+  //{"ToShortASCII", msg_toshortascii, METH_NOARGS,
+  // "Returns the ProtocolMessage as a human-readable ASCII string, all on one
+  // "line."
+  //},
+  //{"TryMerge", msg_trymerge, METH_O,
+  // "Merges data from the given decoder.
+  //}
+  {NULL, NULL}
+};
+
+static bool starts_with(struct upb_string *str, struct upb_string *prefix,
+                        struct upb_string *out_str)
+{
+  if(str->byte_len < prefix->byte_len) return false;
+  if(memcmp(str->ptr, prefix->ptr, prefix->byte_len) == 0) {
+    out_str->ptr = str->ptr + prefix->byte_len;
+    out_str->byte_len = str->byte_len - prefix->byte_len;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+static bool ends_with(struct upb_string *str, struct upb_string *suffix,
+                      struct upb_string *out_str)
+{
+  if(str->byte_len < suffix->byte_len) return false;
+  if(memcmp(str->ptr + str->byte_len - suffix->byte_len, suffix->ptr, suffix->byte_len) == 0) {
+    out_str->ptr = str->ptr;
+    out_str->byte_len = str->byte_len - suffix->byte_len;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+PyObject *PyUpb_NewPbBoundFieldOp(PyUpb_PbMsg *msgobj, struct upb_msg_fielddef *f,
+                                  PyUpb_PbBoundFieldOpCode code)
+{
+  /* Type check that this operation on a field of this type makes sense.  */
+  if(upb_isarray(f)) {
+    switch(code) {
+      case OP_HAS:
+      case OP_SET:
+      case OP_MUTABLE:
+        return NULL;
+      default: break;
+    }
+  } else {
+    if(upb_issubmsg(f)) {
+      switch(code) {
+        case OP_SET:
+        case OP_SIZE:
+        case OP_LIST:
+        case OP_ADD:
+          return NULL;
+        default: break;
+      }
+    } else {
+      switch(code) {
+        case OP_MUTABLE:
+        case OP_SIZE:
+        case OP_LIST:
+        case OP_ADD:
+          return NULL;
+        default: break;
+      }
+    }
+  }
+
+  PyUpb_PbBoundFieldOp *op =
+      (void*)PyUpb_PbBoundFieldOpType.tp_alloc(&PyUpb_PbBoundFieldOpType, 0);
+  op->msg = msgobj;
+  op->f = f;
+  op->code = code;
+  Py_INCREF(op->msg);
+  return (PyObject*)op;
+}
+
+PyObject* msg_getattro(PyObject *obj, PyObject *attr_name)
+{
+  /* Each protobuf field results in a set of four methods for a scalar or five
+   * methods for an array.  To avoid putting 4f entries in our type dict, we
+   * dynamically scan the method to see if it is of these forms, and if so,
+   * look it up in the hash table that upb already keeps.
+   *
+   * If these repeated comparisons showed up as being a hot spot in a profile,
+   * there are several ways this dispatch could be optimized. */
+  static struct upb_string set = {.ptr = "set_", .byte_len = 4};
+  static struct upb_string has = {.ptr = "has_", .byte_len = 4};
+  static struct upb_string clear = {.ptr = "clear_", .byte_len = 6};
+  static struct upb_string size = {.ptr = "_size", .byte_len = 5};
+  static struct upb_string mutable = {.ptr = "mutable_", .byte_len = 8};
+  static struct upb_string add = {.ptr = "add_", .byte_len = 4};
+  static struct upb_string list = {.ptr = "_list", .byte_len = 5};
+
+  struct upb_string str;
+  Py_ssize_t len;
+  PyString_AsStringAndSize(attr_name, &str.ptr, &len);
+  if(len > UINT32_MAX) {
+    PyErr_SetString(PyExc_TypeError,
+                    "Wow, that's a long attribute name you've got there.");
+    return NULL;
+  }
+  str.byte_len = (uint32_t)len;
+  PyUpb_PbMsg *msgobj = Check_Message(obj);
+  struct upb_msgdef *def = msgobj->ref.p.msg->def;
+
+  /* This can be a field reference iff the first letter is lowercase, because
+   * generic methods (eg. IsInitialized()) all start with uppercase. */
+  if(islower(str.ptr[0])) {
+    PyUpb_PbBoundFieldOpCode opcode;
+    struct upb_string field_name;
+    if(starts_with(&str, &has, &field_name))
+      opcode = OP_HAS;
+    else if(starts_with(&str, &set, &field_name))
+      opcode = OP_SET;
+    else if(starts_with(&str, &mutable, &field_name))
+      opcode = OP_MUTABLE;
+    else if(ends_with(&str, &size, &field_name))
+      opcode = OP_SIZE;
+    else if(ends_with(&str, &list, &field_name))
+      opcode = OP_LIST;
+    else if(starts_with(&str, &add, &field_name))
+      opcode = OP_ADD;
+    else if(starts_with(&str, &clear, &field_name))
+      opcode = OP_CLEAR;
+    else {
+      /* Could be a plain field reference (eg. obj.field(i)). */
+      opcode = OP_GET;
+      field_name = str;
+    }
+    struct upb_msg_fielddef *f = upb_msg_fieldbyname(def, &field_name);
+    if(f) {
+      PyObject *op = PyUpb_NewPbBoundFieldOp(msgobj, f, opcode);
+      if(op) return op;
+    }
+  }
+
+  /* Fall back on regular attribute lookup. */
+  return PyObject_GenericGetAttr(obj, attr_name);
+}
+
+static void msg_dealloc(PyObject *obj)
+{
+  PyUpb_PbMsg *msg = (void*)obj;
+  upb_mm_release(&msg->ref);
+  Py_DECREF(msg->def);
+  obj->ob_type->tp_free(obj);
+}
+
+static PyTypeObject PyUpb_PbMsgType = {
+  PyObject_HEAD_INIT(NULL)
+  0,                                      /* ob_size */
+  "upb.pb.Message",                       /* tp_name */
+  sizeof(PyUpb_PbMsg),                    /* tp_basicsize */
+  0,                                      /* tp_itemsize */
+  msg_dealloc,                            /* tp_dealloc */
+  0,                                      /* tp_print */
+  0,                                      /* tp_getattr */
+  0,                                      /* tp_setattr */
+  0,                                      /* tp_compare */
+  0,                                      /* tp_repr (TODO) */
+  0,                                      /* tp_as_number */
+  0,                                      /* tp_as_sequence */
+  0,                                      /* tp_as_mapping */
+  0,                                      /* tp_hash */
+  0,                                      /* tp_call */
+  0,                                      /* tp_str */
+  msg_getattro,                           /* tp_getattro */
+  0, /* Not allowed. */                   /* tp_setattro */
+  0,                                      /* tp_as_buffer */
+  Py_TPFLAGS_DEFAULT,                     /* tp_flags */
+  0,                                      /* tp_doc */
+  0,                                      /* tp_traverse (TODO) */
+  0,                                      /* tp_clear (TODO) */
+  0,                                      /* tp_richcompare */
+  0,                                      /* tp_weaklistoffset */
+  0,                                      /* tp_iter */
+  0,                                      /* tp_iternext */
+  msg_methods,                            /* tp_methods */
+  0,                                      /* tp_members */
+  0,                                      /* tp_getset */
+  0,                                      /* tp_base */
+  0,                                      /* tp_dict */
+  0,                                      /* tp_descr_get */
+  0,                                      /* tp_descr_set */
+  0,                                      /* tp_dictoffset */
+  0,                                      /* tp_init */
+  0,                                      /* tp_alloc */
+  0, /* Can't be created from Python. */  /* tp_new */
+  0,                                      /* tp_free */
+};
+
+/* upb.pb.MessageCreator ******************************************************/
+
+static PyObject *creator_call(PyObject *callable, PyObject *args, PyObject *kw)
+{
+  PyUpb_PbMsgCreator *creator = Check_MsgCreator(callable);
+  return MMREF_TO_PYOBJ(upb_mm_newmsg_ref(creator->def->def, &pymm));
+}
+
+static PyObject *creator_repr(PyObject *obj)
+{
+  PyUpb_PbMsgCreator *creator = Check_MsgCreator(obj);
+  struct upb_string *name = creator->def->def->descriptor->name;
+  /* Need to get a NULL-terminated copy of name since PyString_FromFormat
+   * doesn't support ptr+len. */
+  PyObject *nameobj = PyString_FromStringAndSize(name->ptr, name->byte_len);
+  PyObject *ret = PyString_FromFormat("<upb.pb.MessageCreator for '%s'>",
+                                      PyString_AS_STRING(nameobj));
+  Py_DECREF(nameobj);
+  return ret;
+}
+
+static void creator_dealloc(PyObject *obj)
+{
+  PyUpb_PbMsgCreator *creator = (void*)obj;
+  Py_DECREF(creator->def);
+  obj->ob_type->tp_free(obj);
+}
+
+static PyObject *creator_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+  PyUpb_PbMsgCreator *creator = (void*)type->tp_alloc(type, 0);
+  PyUpb_MsgDef *def;
+  if(!PyArg_ParseTuple(args, "O!", &PyUpb_MsgDefType, &def)) return NULL;
+  creator->def = def;
+  Py_INCREF(creator->def);
+  return (PyObject*)creator;
+}
+
+static PyTypeObject PyUpb_PbMsgCreatorType = {
+  PyObject_HEAD_INIT(NULL)
+  0,                                      /* ob_size */
+  "upb.pb.MessageCreator",                /* tp_name */
+  sizeof(PyUpb_PbMsgCreator),             /* tp_basicsize */
+  0,                                      /* tp_itemsize */
+  creator_dealloc,                        /* tp_dealloc */
+  0,                                      /* tp_print */
+  0,                                      /* tp_getattr */
+  0,                                      /* tp_setattr */
+  0,                                      /* tp_compare */
+  creator_repr,                           /* tp_repr */
+  0,                                      /* tp_as_number */
+  0,                                      /* tp_as_sequence */
+  0,                                      /* tp_as_mapping */
+  0,                                      /* tp_hash */
+  creator_call,                           /* tp_call */
+  0,                                      /* tp_str */
+  0,                                      /* tp_getattro */
+  0,                                      /* tp_setattro */
+  0,                                      /* tp_as_buffer */
+  Py_TPFLAGS_DEFAULT,                     /* tp_flags */
+  0,                                      /* tp_doc */
+  0,                                      /* tp_traverse */
+  0,                                      /* tp_clear */
+  0,                                      /* tp_richcompare */
+  0,                                      /* tp_weaklistoffset */
+  0,                                      /* tp_iter */
+  0,                                      /* tp_iternext */
+  0,                                      /* tp_methods */
+  0,                                      /* tp_members */
+  0,                                      /* tp_getset */
+  0,                                      /* tp_base */
+  0,                                      /* tp_dict */
+  0,                                      /* tp_descr_get */
+  0,                                      /* tp_descr_set */
+  0,                                      /* tp_dictoffset */
+  0,                                      /* tp_init */
+  0,                                      /* tp_alloc */
+  creator_new,                            /* tp_new */
+  0,                                      /* tp_free */
+};
+
+/* upb.pb module **************************************************************/
+
+static PyMethodDef methods[] = {
+  {NULL, NULL}
+};
+
+PyMODINIT_FUNC
+initpb(void)
+{
+  if(PyType_Ready(&PyUpb_PbBoundFieldOpType) < 0) return;
+  if(PyType_Ready(&PyUpb_PbMsgType) < 0) return;
+  if(PyType_Ready(&PyUpb_PbMsgCreatorType) < 0) return;
+
+  /* PyModule_AddObject steals a reference.  These objects are statically
+   * allocated and must not be deleted, so we increment their refcount. */
+  Py_INCREF(&PyUpb_PbBoundFieldOpType);
+  Py_INCREF(&PyUpb_PbMsgType);
+  Py_INCREF(&PyUpb_PbMsgCreatorType);
+
+  PyObject *mod = Py_InitModule("upb.cext.pb", methods);
+  PyModule_AddObject(mod, "BoundFieldOp", (PyObject*)&PyUpb_PbBoundFieldOpType);
+  PyModule_AddObject(mod, "Message", (PyObject*)&PyUpb_PbMsgType);
+  PyModule_AddObject(mod, "MessageCreator", (PyObject*)&PyUpb_PbMsgCreatorType);
+}
diff --git a/lang_ext/python/setup.py b/lang_ext/python/setup.py
index 66862f3..53cbef1 100644
--- a/lang_ext/python/setup.py
+++ b/lang_ext/python/setup.py
@@ -1,11 +1,15 @@
 from distutils.core import setup, Extension
+
 setup(name='upb',
       version='0.1',
-      ext_modules=[Extension('upb.definition', ['definition.c'],
-                             include_dirs=['../../src', '../../descriptor'],
-                             define_macros=[("UPB_USE_PTHREADS", 1),
-                                            ("UPB_UNALIGNED_READS_OK", 1)],
-                             library_dirs=['../../src'],
-                             libraries=['upb_pic']
-      )],
+      ext_modules=[
+          Extension('upb.cext', ['definition.c', 'pb.c', 'cext.c'],
+              include_dirs=['../../src', '../../descriptor'],
+              define_macros=[("UPB_USE_PTHREADS", 1),
+                             ("UPB_UNALIGNED_READS_OK", 1)],
+              library_dirs=['../../src'],
+              libraries=['upb_pic'],
+          ),
+      ],
+      packages=['upb']
       )
diff --git a/src/upb.h b/src/upb.h
index 27bf5fc..af026f5 100644
--- a/src/upb.h
+++ b/src/upb.h
@@ -12,7 +12,7 @@
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>  /* for size_t. */
-#include "upb_string.h"
+#include "descriptor_const.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -23,6 +23,9 @@ extern "C" {
 #define INLINE static inline
 #endif
 
+#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
+#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
+
 /* The maximum that any submessages can be nested.  Matches proto2's limit. */
 #define UPB_MAX_NESTING 64
 
@@ -55,12 +58,22 @@ typedef uint8_t upb_wire_type_t;
  * errors, and we use it to represent exceptional circumstances. */
 typedef uint8_t upb_field_type_t;
 
+INLINE bool upb_issubmsgtype(upb_field_type_t type) {
+  return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP  ||
+         type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE;
+}
+
+INLINE bool upb_isstringtype(upb_field_type_t type) {
+  return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING  ||
+         type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES;
+}
+
 /* Information about a given value type (upb_field_type_t). */
 struct upb_type_info {
   uint8_t align;
   uint8_t size;
   upb_wire_type_t expected_wire_type;
-  struct upb_string ctype;
+  char *ctype;
 };
 
 /* Contains information for all .proto types.  Indexed by upb_field_type_t. */
@@ -90,6 +103,10 @@ struct upb_tag {
 
 /* Polymorphic values of .proto types *****************************************/
 
+struct upb_string;
+struct upb_array;
+struct upb_msg;
+
 /* A single .proto value.  The owner must have an out-of-band way of knowing
  * the type, so that it knows which union member to use. */
 union upb_value {
@@ -121,15 +138,83 @@ union upb_value_ptr {
   void     *_void;
 };
 
+/* Unfortunately there is no way to define this so that it can be used as a
+ * generic expression, a la:
+ *   foo(UPB_VALUE_ADDROF(bar));
+ * ...you have to use it as the initializer of a upb_value_ptr:
+ *   union upb_value_ptr p = UPB_VALUE_ADDROF(bar);
+ *   foo(p);
+ */
+#define UPB_VALUE_ADDROF(val) {(void*)&val._double}
+
 /* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer.  We need
  * to know the field type to perform this operation, because we need to know
  * how much memory to copy. */
-INLINE union upb_value upb_deref(union upb_value_ptr ptr, upb_field_type_t t) {
+INLINE union upb_value upb_value_read(union upb_value_ptr ptr,
+                                      upb_field_type_t ft) {
   union upb_value val;
-  memcpy(&val, ptr._void, upb_type_info[t].size);
+#define CASE(t, member_name) \
+  case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
+    val.member_name = *ptr.member_name; \
+    break;
+  switch(ft) {
+    CASE(DOUBLE,   _double)
+    CASE(FLOAT,    _float)
+    CASE(INT32,    int32)
+    CASE(INT64,    int64)
+    CASE(UINT32,   uint32)
+    CASE(UINT64,   uint64)
+    CASE(SINT32,   int32)
+    CASE(SINT64,   int64)
+    CASE(FIXED32,  uint32)
+    CASE(FIXED64,  uint64)
+    CASE(SFIXED32, int32)
+    CASE(SFIXED64, int64)
+    CASE(BOOL,     _bool)
+    CASE(ENUM,     int32)
+    CASE(STRING,   str)
+    CASE(BYTES,    str)
+    CASE(MESSAGE,  msg)
+    CASE(GROUP,    msg)
+    default: break;
+  }
+#undef CASE
   return val;
 }
 
+/* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer.  We need
+ * to know the field type to perform this operation, because we need to know
+ * how much memory to copy. */
+INLINE void upb_value_write(union upb_value_ptr ptr, union upb_value val,
+                            upb_field_type_t ft) {
+#define CASE(t, member_name) \
+  case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
+    *ptr.member_name = val.member_name; \
+    break;
+  switch(ft) {
+    CASE(DOUBLE,   _double)
+    CASE(FLOAT,    _float)
+    CASE(INT32,    int32)
+    CASE(INT64,    int64)
+    CASE(UINT32,   uint32)
+    CASE(UINT64,   uint64)
+    CASE(SINT32,   int32)
+    CASE(SINT64,   int64)
+    CASE(FIXED32,  uint32)
+    CASE(FIXED64,  uint64)
+    CASE(SFIXED32, int32)
+    CASE(SFIXED64, int64)
+    CASE(BOOL,     _bool)
+    CASE(ENUM,     int32)
+    CASE(STRING,   str)
+    CASE(BYTES,    str)
+    CASE(MESSAGE,  msg)
+    CASE(GROUP,    msg)
+    default: break;
+  }
+#undef CASE
+}
+
 union upb_symbol_ref {
   struct upb_msgdef *msg;
   struct upb_enum *_enum;
diff --git a/src/upb_array.h b/src/upb_array.h
index 370f6eb..732c4aa 100644
--- a/src/upb_array.h
+++ b/src/upb_array.h
@@ -23,7 +23,7 @@
 #define UPB_ARRAY_H_
 
 #include <stdlib.h>
-#include "upb.h"
+#include "upb_msg.h"  /* Because we use upb_msg_fielddef */
 
 #ifdef __cplusplus
 extern "C" {
@@ -31,41 +31,6 @@ extern "C" {
 
 struct upb_string;
 
-/* upb_arrays can be at most 2**32 elements long. */
-typedef uint32_t upb_arraylen_t;
-
-/* Represents an array (a repeated field) of any type.  The interpretation of
- * the data in the array depends on the type. */
-struct upb_array {
-  union upb_value_ptr elements;
-  upb_arraylen_t len;     /* Number of elements in "elements". */
-  upb_arraylen_t size;    /* Memory we own (0 if by reference). */
-  void *gptr;
-};
-
-INLINE void upb_array_init(struct upb_array *arr)
-{
-  arr->elements._void = NULL;
-  arr->len = 0;
-  arr->size = 0;
-}
-
-INLINE void upb_array_uninit(struct upb_array *arr)
-{
-  if(arr->size) free(arr->elements._void);
-}
-
-INLINE struct upb_array *upb_array_new(void) {
-  struct upb_array *arr = malloc(sizeof(*arr));
-  upb_array_init(arr);
-  return arr;
-}
-
-INLINE void upb_array_free(struct upb_array *arr) {
-  upb_array_uninit(arr);
-  free(arr);
-}
-
 /* Returns a pointer to an array element.  Does not perform a bounds check! */
 INLINE union upb_value_ptr upb_array_getelementptr(
     struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type)
@@ -75,10 +40,17 @@ INLINE union upb_value_ptr upb_array_getelementptr(
   return ptr;
 }
 
-INLINE union upb_value upb_array_getelement(
-    struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type)
+/* Allocation/Deallocation/Resizing. ******************************************/
+
+INLINE struct upb_array *upb_array_new(struct upb_msg_fielddef *f)
 {
-  return upb_deref(upb_array_getelementptr(arr, n, type), type);
+  struct upb_array *arr = malloc(sizeof(*arr));
+  upb_mmhead_init(&arr->mmhead);
+  arr->elements._void = NULL;
+  arr->len = 0;
+  arr->size = 0;
+  arr->fielddef = f;
+  return arr;
 }
 
 INLINE uint32_t upb_round_up_to_pow2(uint32_t v)
@@ -94,13 +66,10 @@ INLINE uint32_t upb_round_up_to_pow2(uint32_t v)
   return v;
 }
 
-/* Resizes array to be "len" elements long and ensures we have write access
- * to the array (reallocating if necessary).  Returns true iff we were
- * referencing memory for the array and dropped the reference. */
-INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen,
-                             upb_field_type_t type)
+/* Resizes array to be "len" elements long (reallocating if necessary). */
+INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen)
 {
-  size_t type_size = upb_type_info[type].size;
+  size_t type_size = upb_type_info[arr->fielddef->type].size;
   bool dropped = false;
   bool ref = arr->size == 0;   /* Ref'ing external memory. */
   void *data = arr->elements._void;
@@ -114,39 +83,11 @@ INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen,
     memcpy(arr->elements._void, data, UPB_MIN(arr->len, newlen) * type_size);
     dropped = true;
   }
+  /* TODO: fill with defaults. */
   arr->len = newlen;
   return dropped;
 }
 
-/* These are all overlays on upb_array, pointers between them can be cast. */
-#define UPB_DEFINE_ARRAY_TYPE(name, type) \
-  struct name ## _array { \
-    struct upb_fielddef *f; \
-    void *gptr; \
-    type *elements; \
-    upb_arraylen_t len; \
-    upb_arraylen_t size; \
-  };
-
-UPB_DEFINE_ARRAY_TYPE(upb_double, double)
-UPB_DEFINE_ARRAY_TYPE(upb_float,  float)
-UPB_DEFINE_ARRAY_TYPE(upb_int32,  int32_t)
-UPB_DEFINE_ARRAY_TYPE(upb_int64,  int64_t)
-UPB_DEFINE_ARRAY_TYPE(upb_uint32, uint32_t)
-UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t)
-UPB_DEFINE_ARRAY_TYPE(upb_bool,   bool)
-UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*)
-UPB_DEFINE_ARRAY_TYPE(upb_msg,    void*)
-
-/* Defines an array of a specific message type (an overlay of upb_array). */
-#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array
-#define UPB_DEFINE_MSG_ARRAY(msg_type) \
-  UPB_MSG_ARRAY(msg_type) { \
-    msg_type **elements; \
-    upb_arraylen_t len; \
-    upb_arraylen_t size; \
-  };
-
 #ifdef __cplusplus
 }  /* extern "C" */
 #endif
diff --git a/src/upb_context.c b/src/upb_context.c
index 12ad8c7..0d64c3e 100644
--- a/src/upb_context.c
+++ b/src/upb_context.c
@@ -10,6 +10,7 @@
 #include "upb_context.h"
 #include "upb_enum.h"
 #include "upb_msg.h"
+#include "upb_mm.h"
 
 /* Search for a character in a string, in reverse. */
 static int my_memrchr(char *data, char c, size_t len)
@@ -66,7 +67,7 @@ static void free_context(struct upb_context *c)
 {
   free_symtab(&c->symtab);
   for(size_t i = 0; i < c->fds_len; i++)
-    upb_msg_free((struct upb_msg*)c->fds[i]);
+    upb_msg_unref((struct upb_msg*)c->fds[i]);
   free_symtab(&c->psymtab);
   free(c->fds);
 }
@@ -77,9 +78,9 @@ void upb_context_unref(struct upb_context *c)
     upb_rwlock_wrlock(&c->lock);
     free_context(c);
     upb_rwlock_unlock(&c->lock);
+    free(c);
+    upb_rwlock_destroy(&c->lock);
   }
-  free(c);
-  upb_rwlock_destroy(&c->lock);
 }
 
 bool upb_context_lookup(struct upb_context *c, struct upb_string *symbol,
@@ -325,10 +326,9 @@ bool upb_context_addfds(struct upb_context *c,
 }
 
 bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str) {
-  google_protobuf_FileDescriptorSet *fds =
-      (google_protobuf_FileDescriptorSet*)upb_msg_parsenew(c->fds_msg, fds_str);
-  if(!fds) return false;
-  if(!upb_context_addfds(c, fds)) return false;
+  struct upb_msg *fds = upb_msg_new(c->fds_msg);
+  if(upb_msg_parsestr(fds, fds_str->ptr, fds_str->byte_len) != UPB_STATUS_OK) return false;
+  if(!upb_context_addfds(c, (google_protobuf_FileDescriptorSet*)fds)) return false;
 
   {
     /* We own fds now, need to keep a ref so we can free it later. */
@@ -337,7 +337,7 @@ bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str) {
       c->fds_size *= 2;
       c->fds = realloc(c->fds, c->fds_size);
     }
-    c->fds[c->fds_len++] = fds;
+    c->fds[c->fds_len++] = (google_protobuf_FileDescriptorSet*)fds;
     upb_rwlock_unlock(&c->lock);
   }
   return true;
diff --git a/src/upb_enum.h b/src/upb_enum.h
index e43a203..9acc075 100644
--- a/src/upb_enum.h
+++ b/src/upb_enum.h
@@ -33,15 +33,6 @@ struct upb_enum_iton_entry {
   struct upb_string *string;
 };
 
-INLINE void upb_enum_ref(struct upb_enum *e) {
-  if(upb_atomic_ref(&e->refcount)) upb_context_ref(e->context);
-}
-
-INLINE void upb_enum_unref(struct upb_enum *e) {
-  if(upb_atomic_unref(&e->refcount)) upb_context_unref(e->context);
-}
-
-
 /* Initializes and frees an enum, respectively.  Caller retains ownership of
  * ed, but it must outlive e. */
 void upb_enum_init(struct upb_enum *e,
diff --git a/src/upb_inlinedefs.c b/src/upb_inlinedefs.c
index dae5c01..7a55e06 100644
--- a/src/upb_inlinedefs.c
+++ b/src/upb_inlinedefs.c
@@ -15,6 +15,7 @@
 #include "upb_array.h"
 #include "upb_context.h"
 #include "upb_enum.h"
+#include "upb_mm.h"
 #include "upb_msg.h"
 #include "upb_parse.h"
 #include "upb_serialize.h"
diff --git a/src/upb_mm.c b/src/upb_mm.c
new file mode 100644
index 0000000..853d572
--- /dev/null
+++ b/src/upb_mm.c
@@ -0,0 +1,208 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ */
+
+#include "upb_mm.h"
+#include "upb_string.h"
+#include "upb_array.h"
+#include "upb_msg.h"
+
+void upb_msg_destroy(struct upb_msg *msg) {
+  uint32_t i;
+  for(i = 0; i < msg->def->num_fields; i++) {
+    struct upb_msg_fielddef *f = &msg->def->fields[i];
+    if(!upb_msg_isset(msg, f) || !upb_field_ismm(f)) continue;
+    upb_mm_ptrtype type = upb_field_ptrtype(f);
+    union upb_mmptr mmptr = upb_mmptr_read(upb_msg_getptr(msg, f), type);
+    upb_mm_unref(mmptr, type);
+  }
+  free(msg);
+}
+
+void upb_array_destroy(struct upb_array *arr)
+{
+  if(upb_elem_ismm(arr->fielddef)) {
+    upb_arraylen_t i;
+    /* Unref elements. */
+    for(i = 0; i < arr->len; i++) {
+      union upb_value_ptr p = upb_array_getelementptr(arr, i, arr->fielddef->type);
+      upb_mm_ptrtype type = upb_elem_ptrtype(arr->fielddef);
+      union upb_mmptr mmptr = upb_mmptr_read(p, type);
+      upb_mm_unref(mmptr, type);
+    }
+  }
+  if(arr->size != 0) free(arr->elements._void);
+  free(arr);
+}
+
+static union upb_mmptr upb_mm_newptr(upb_mm_ptrtype type,
+                                     struct upb_msg_fielddef *f)
+{
+  union upb_mmptr p = {NULL};
+  switch(type) {
+    case UPB_MM_MSG_REF: p.msg = upb_msg_new(f->ref.msg);
+    case UPB_MM_STR_REF: p.str = upb_string_new();
+    case UPB_MM_ARR_REF: p.arr = upb_array_new(f);
+    default: assert(false); break;
+  }
+  return p;
+}
+
+static struct upb_mm_ref *find_or_create_ref(struct upb_mm_ref *fromref,
+                                             struct upb_mm *mm,
+                                             union upb_mmptr p, upb_mm_ptrtype type,
+                                             bool *created)
+{
+  struct upb_mmhead *head = upb_mmhead_addr(p, type);
+  struct upb_mm_ref **ref = &head->refs;
+  while(*ref && (*ref)->mm <= mm) {
+    if((*ref)->mm == mm) {
+      return *ref;
+      *created = false;
+    }
+    ref = &((*ref)->next);
+  }
+  *created = true;
+  struct upb_mm_ref *newref = mm->newref_cb(fromref, p, type);
+  newref->p = p;
+  newref->type = type;
+  newref->mm = mm;
+  newref->next = *ref;
+  *ref = newref;
+  return newref;
+}
+
+struct upb_mm_ref *upb_mm_getref(union upb_mmptr p, upb_mm_ptrtype type,
+                                 struct upb_mm *mm, bool *created)
+{
+  return find_or_create_ref(NULL, mm, p, type, created);
+}
+
+struct upb_mm_ref *upb_mm_newmsg_ref(struct upb_msgdef *def, struct upb_mm *mm)
+{
+  struct upb_msg *msg = upb_msg_new(def);
+  union upb_mmptr mmptr = {.msg = msg};
+  bool created;
+  struct upb_mm_ref *ref = find_or_create_ref(NULL, mm, mmptr, UPB_MM_MSG_REF, &created);
+  upb_mm_unref(mmptr, UPB_MM_MSG_REF);  /* Shouldn't have any counted refs. */
+  assert(created);
+  return ref;
+}
+
+struct upb_mm_ref *upb_mm_getfieldref(struct upb_mm_ref *msgref,
+                                      struct upb_msg_fielddef *f,
+                                      bool *refcreated)
+{
+  assert(upb_field_ismm(f));
+  upb_mm_ptrtype ptrtype = upb_field_ptrtype(f);
+  struct upb_msg *msg = msgref->p.msg;
+  union upb_mmptr val;
+  union upb_value_ptr p = upb_msg_getptr(msg, f);
+
+  /* Create the upb value if it doesn't already exist. */
+  if(!upb_msg_isset(msg, f)) {
+    upb_msg_set(msg, f);
+    val = upb_mm_newptr(ptrtype, f);
+    upb_mmptr_write(p, val, ptrtype);
+  } else {
+    val = upb_mmptr_read(p, ptrtype);
+  }
+
+  return find_or_create_ref(msgref, msgref->mm, val, ptrtype, refcreated);
+}
+
+struct upb_mm_ref *upb_mm_getelemref(struct upb_mm_ref *arrref, upb_arraylen_t i,
+                                     bool *refcreated)
+{
+  struct upb_array *arr = arrref->p.arr;
+  struct upb_msg_fielddef *f = arr->fielddef;
+  assert(upb_elem_ismm(f));
+  assert(i < arr->len);
+  union upb_value_ptr p = upb_array_getelementptr(arr, i, f->type);
+  upb_mm_ptrtype type = upb_elem_ptrtype(f);
+  union upb_mmptr val = upb_mmptr_read(p, type);
+  return find_or_create_ref(arrref, arrref->mm, val, type, refcreated);
+}
+
+void upb_mm_release(struct upb_mm_ref *ref)
+{
+  struct upb_mm_ref **ref_head = (void*)ref->p.msg;
+  struct upb_mm_ref **ref_elem = ref_head;
+  struct upb_mm *mm = ref->mm;
+  while(true) {
+    assert(*ref_elem);  /* Client asserts r->mm is in the list. */
+    if((*ref_elem)->mm == mm) {
+      *ref_elem = (*ref_elem)->next;  /* Remove from the list. */
+      break;
+    }
+  }
+
+  if(upb_mmhead_norefs(&ref->p.msg->mmhead)) {
+    /* Destroy the dynamic object. */
+    switch(ref->type) {
+      case UPB_MM_MSG_REF:
+        upb_msg_destroy(ref->p.msg);
+        break;
+      case UPB_MM_ARR_REF:
+        upb_array_destroy(ref->p.arr);
+        break;
+      case UPB_MM_STR_REF:
+        upb_string_destroy(ref->p.str);
+        break;
+      default: assert(false); break;
+    }
+  }
+}
+
+void upb_mm_msg_set(struct upb_mm_ref *from_msg_ref, struct upb_mm_ref *to_ref,
+                    struct upb_msg_fielddef *f)
+{
+  assert(upb_field_ismm(f));
+  union upb_mmptr fromval = from_msg_ref->p;
+  union upb_mmptr toval = to_ref->p;
+  union upb_value_ptr field_p = upb_msg_getptr(fromval.msg, f);
+  upb_mm_ptrtype type = upb_field_ptrtype(f);
+  if(upb_msg_isset(fromval.msg, f)) {
+    union upb_mmptr existingval = upb_mmptr_read(field_p, type);
+    if(existingval.msg == toval.msg)
+      return;  /* Setting to its existing value, do nothing. */
+    upb_mm_unref(existingval, type);
+  }
+  upb_msg_set(fromval.msg, f);
+  upb_mmptr_write(field_p, toval, type);
+  upb_mm_ref(toval, type);
+}
+
+void upb_mm_msgclear(struct upb_mm_ref *from_msg_ref, struct upb_msg_fielddef *f)
+{
+  assert(upb_field_ismm(f));
+  union upb_mmptr fromval = from_msg_ref->p;
+  upb_mm_ptrtype type = upb_field_ptrtype(f);
+  if(upb_msg_isset(fromval.msg, f)) {
+    union upb_value_ptr field_p = upb_msg_getptr(fromval.msg, f);
+    union upb_mmptr existingval = upb_mmptr_read(field_p, type);
+    upb_msg_unset(fromval.msg, f);
+    upb_mm_unref(existingval, type);
+  }
+}
+
+void upb_mm_msgclear_all(struct upb_mm_ref *from)
+{
+  struct upb_msgdef *def = from->p.msg->def;
+  for(uint32_t i = 0; i < def->num_fields; i++) {
+    struct upb_msg_fielddef *f = &def->fields[i];
+    if(!upb_field_ismm(f)) continue;
+    upb_mm_msgclear(from, f);
+  }
+}
+
+void upb_mm_arr_set(struct upb_mm_ref *from, struct upb_mm_ref *to,
+                    upb_arraylen_t i, upb_field_type_t type)
+{
+  (void)from;
+  (void)to;
+  (void)i;
+  (void)type;
+}
diff --git a/src/upb_mm.h b/src/upb_mm.h
new file mode 100644
index 0000000..88cb043
--- /dev/null
+++ b/src/upb_mm.h
@@ -0,0 +1,168 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ *
+ * A parsed protobuf is represented in memory as a tree.  The three kinds of
+ * nodes in this tree are messages, arrays, and strings.  This file defines
+ * a memory-management scheme for making sure that these nodes are colected
+ * at the right times.
+ *
+ * The basic strategy is reference-counting, but with a twist.  Since any
+ * dynamic language that wishes to reference these nodes will need its own,
+ * language-specific structure, we provide two different kinds of references:
+ *
+ * - counted references.  these are references that are tracked with only a
+ *   reference count.  They are used for two separate purposes:
+ *   1. for references within the tree, from one node to another.
+ *   2. for external references into the tree, where the referer does not need
+ *      a separate message structure.
+ * - listed references.  these are references that have their own separate
+ *   data record.  these separate records are kept in a linked list.
+ */
+
+#ifndef UPB_MM_H_
+#define UPB_MM_H_
+
+#include "upb.h"
+#include "upb_string.h"
+#include "upb_array.h"
+#include "upb_msg.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Structure definitions. *****************************************************/
+
+typedef int16_t upb_mm_id;
+
+struct upb_msg;
+struct upb_array;
+struct upb_string;
+struct upb_msg_fielddef;
+
+struct upb_mm_ref;
+/* Info about a mm. */
+struct upb_mm {
+  /* fromref is set iff this call is from getfieldref or getelemref. */
+  struct upb_mm_ref *(*newref_cb)(struct upb_mm_ref *fromref,
+                                  union upb_mmptr p, upb_mm_ptrtype type);
+};
+
+struct upb_mm_ref {
+  union upb_mmptr p;
+  /* This is slightly wasteful, because the mm-specific ref will probably also
+   * contain the information about what kind of ref this is, in a different
+   * form. */
+  upb_mm_ptrtype type;
+  struct upb_mm *mm;    /* TODO: There are ways to shrink this. */
+  struct upb_mm_ref *next;  /* Linked list for refs to the same value. */
+};
+
+/* Functions for working with listed references.  *****************************/
+
+/* Create a new top-level message and create a single ref for it. */
+struct upb_mm_ref *upb_mm_newmsg_ref(struct upb_msgdef *def, struct upb_mm *mm);
+
+/* Given a pointer to an existing msg, array, or string, find a ref for this
+ * mm, creating one if necessary.  'created' indicates whether the returned
+ * reference was just created. */
+struct upb_mm_ref *upb_mm_getref(union upb_mmptr p, upb_mm_ptrtype type,
+                                 struct upb_mm *mm, bool *created);
+
+/* f must be ismm == true.  The msg field may or may not be set (will be
+ * created if it doesn't exist).  If a ref already exists for the given field,
+ * returns it, otherwise calls the given callback to create one.  'created'
+ * indicates whether a new reference was created. */
+struct upb_mm_ref *upb_mm_getfieldref(struct upb_mm_ref *msgref,
+                                      struct upb_msg_fielddef *f,
+                                      bool *refcreated);
+/* Array len must be < i. */
+struct upb_mm_ref *upb_mm_getelemref(struct upb_mm_ref *arrref, upb_arraylen_t i,
+                                     bool *refcreated);
+
+/* Remove this ref from the list for this msg.
+ * If that was the last reference, deletes the msg itself. */
+void upb_mm_release(struct upb_mm_ref *ref);
+
+void upb_mm_msgset(struct upb_mm_ref *msg, struct upb_mm_ref *to,
+                   struct upb_msg_fielddef *f);
+void upb_mm_msgclear(struct upb_mm_ref *from, struct upb_msg_fielddef *f);
+void upb_mm_msgclear_all(struct upb_mm_ref *from);
+
+void upb_mm_arrset(struct upb_mm_ref *from, struct upb_mm_ref *to, uint32_t i);
+
+/* Defined iff upb_field_ismm(f). */
+INLINE upb_mm_ptrtype upb_field_ptrtype(struct upb_msg_fielddef *f);
+/* Defined iff upb_elem_ismm(f). */
+INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_msg_fielddef *f);
+
+INLINE void upb_mm_unref(union upb_mmptr p, upb_mm_ptrtype type);
+
+/* These methods are all a bit silly, since all branches of the case compile
+ * to the same thing (which the compiler will recognize), but we do it this way
+ * for full union correctness. */
+INLINE union upb_mmptr upb_mmptr_read(union upb_value_ptr p, upb_mm_ptrtype t)
+{
+  union upb_mmptr val;
+  switch(t) {
+    case UPB_MM_MSG_REF: val.msg = *p.msg; break;
+    case UPB_MM_STR_REF: val.str = *p.str; break;
+    case UPB_MM_ARR_REF: val.arr = *p.arr; break;
+    default: assert(false); val.msg = *p.msg; break;  /* Shouldn't happen. */
+  }
+  return val;
+}
+
+INLINE void upb_mmptr_write(union upb_value_ptr p, union upb_mmptr val,
+                            upb_mm_ptrtype t)
+{
+  switch(t) {
+    case UPB_MM_MSG_REF: *p.msg = val.msg; break;
+    case UPB_MM_STR_REF: *p.str = val.str; break;
+    case UPB_MM_ARR_REF: *p.arr = val.arr; break;
+    default: assert(false); val.msg = *p.msg; break;  /* Shouldn't happen. */
+  }
+}
+
+void upb_array_destroy(struct upb_array *arr);
+void upb_msg_destroy(struct upb_msg *msg);
+
+INLINE void upb_msg_unref(struct upb_msg *msg) {
+  if(upb_mmhead_unref(&msg->mmhead)) upb_msg_destroy(msg);
+}
+
+INLINE void upb_array_unref(struct upb_array *arr) {
+  if(upb_mmhead_unref(&arr->mmhead)) upb_array_destroy(arr);
+}
+
+INLINE void upb_mm_unref(union upb_mmptr p, upb_mm_ptrtype type)
+{
+  switch(type) {
+    case UPB_MM_MSG_REF: upb_msg_unref(p.msg); break;
+    case UPB_MM_STR_REF: upb_string_unref(p.str); break;
+    case UPB_MM_ARR_REF: upb_array_unref(p.arr);
+  }
+}
+
+static struct upb_mmhead *upb_mmhead_addr(union upb_mmptr p, upb_mm_ptrtype t)
+{
+  switch(t) {
+    case UPB_MM_MSG_REF: return &((*p.msg).mmhead);
+    case UPB_MM_STR_REF: return &((*p.str).mmhead);
+    case UPB_MM_ARR_REF: return &((*p.arr).mmhead);
+    default: assert(false); return &((*p.msg).mmhead);  /* Shouldn't happen. */
+  }
+}
+
+INLINE void upb_mm_ref(union upb_mmptr p, upb_mm_ptrtype type)
+{
+  upb_mmhead_ref(upb_mmhead_addr(p, type));
+}
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_MM_MSG_H_ */
diff --git a/src/upb_msg.c b/src/upb_msg.c
index 45f889d..80602dd 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -6,8 +6,9 @@
 
 #include <inttypes.h>
 #include <stdlib.h>
-#include "descriptor.h"
 #include "upb_msg.h"
+#include "descriptor.h"
+#include "upb_mm.h"
 #include "upb_parse.h"
 #include "upb_serialize.h"
 #include "upb_text.h"
@@ -47,7 +48,6 @@ bool upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d,
   /* TODO: more complete validation. */
   if(!d->set_flags.has.field) return false;
 
-  upb_atomic_refcount_init(&m->refcount, 0);
   upb_inttable_init(&m->fields_by_num, d->field->len,
                     sizeof(struct upb_fieldsbynum_entry));
   upb_strtable_init(&m->fields_by_name, d->field->len,
@@ -123,113 +123,43 @@ void upb_msgdef_setref(struct upb_msgdef *m, struct upb_msg_fielddef *f,
   str_e->f.ref = ref;
 }
 
-/* Simple, one-shot parsing ***************************************************/
-
-static void *upb_msg_new(struct upb_msgdef *md)
-{
-  size_t size = md->size + (sizeof(void*) * 2);
-  struct upb_msg *msg = malloc(size);
-  memset(msg, 0, size);
-  msg->def = md;
-  return msg;
-}
+/* Parsing.  ******************************************************************/
 
-/* Allocation callbacks. */
-struct upb_array *getarray_cb(
-    void *from_gptr, struct upb_array *existingval, struct upb_msg_fielddef *f)
-{
-  (void)from_gptr;
-  (void)existingval;  /* Don't care -- always zero. */
-  (void)f;
-  return upb_array_new();
-}
+struct upb_msg_parser_frame {
+  struct upb_msg *msg;
+};
 
-static struct upb_string *getstring_cb(
-    void *from_gptr, struct upb_string *existingval, struct upb_msg_fielddef *f,
-    bool byref)
-{
-  (void)from_gptr;
-  (void)existingval;  /* Don't care -- always zero. */
-  (void)f;
-  (void)byref;
-  return upb_strnew();
-}
+struct upb_msg_parser {
+  struct upb_stream_parser s;
+  bool merge;
+  bool byref;
+  struct upb_msg_parser_frame stack[UPB_MAX_NESTING], *top;
+};
 
-static struct upb_msg *getmsg_cb(
-    void *from_gptr, struct upb_msg *existingval, struct upb_msg_fielddef *f)
-{
-  (void)from_gptr;
-  (void)existingval;  /* Don't care -- always zero. */
-  return upb_msg_new(f->ref.msg);
-}
+void upb_msg_parser_reset(struct upb_msg_parser *p,
+                          struct upb_msg *msg, bool byref);
 
-struct upb_msg *upb_msg_parsenew(struct upb_msgdef *md, struct upb_string *s)
-{
-  struct upb_msg_parser mp;
-  struct upb_msg *msg = upb_msg_new(md);
-  upb_msg_parser_reset(&mp, msg, false);
-  mp.getarray_cb = getarray_cb;
-  mp.getstring_cb = getstring_cb;
-  mp.getmsg_cb = getmsg_cb;
-  size_t read;
-  upb_status_t status = upb_msg_parser_parse(&mp, s->ptr, s->byte_len, &read);
-  if(status == UPB_STATUS_OK && read == s->byte_len) {
-    return msg;
-  } else {
-    upb_msg_free(msg);
-    return NULL;
-  }
-}
+/* Parses protocol buffer data out of data which has length of len.  The data
+ * need not be a complete protocol buffer.  The number of bytes parsed is
+ * returned in *read, and the next call to upb_msg_parse must supply data that
+ * is *read bytes past data in the logical stream. */
+upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p,
+                                  void *data, size_t len, size_t *read);
 
-/* For simple, one-shot parsing we assume that a dynamic field exists (and
- * needs to be freed) iff its set bit is set. */
-static void free_value(union upb_value_ptr p, struct upb_msg_fielddef *f)
-{
-  if(upb_isstring(f)) {
-    free((*p.str)->ptr);
-    free(*p.str);
-  } else if(upb_issubmsg(f)) {
-    upb_msg_free(*p.msg);
-  }
-}
 
-void upb_msg_free(struct upb_msg *msg)
-{
-  if(!msg) return;  /* A very free-like thing to do. */
-  struct upb_msgdef *m = msg->def;
-  for(unsigned int i = 0; i < m->num_fields; i++) {
-    struct upb_msg_fielddef *f = &m->fields[i];
-    if(!upb_msg_isset(msg, f)) continue;
-    union upb_value_ptr p = upb_msg_getptr(msg, f);
-    if(upb_isarray(f)) {
-      assert(*p.arr);
-      for(upb_arraylen_t j = 0; j < (*p.arr)->len; j++)
-        free_value(upb_array_getelementptr(*p.arr, j, f->type), f);
-      upb_array_free(*p.arr);
-    } else {
-      free_value(p, f);
-    }
-  }
-  free(msg);
-}
-
-/* Parsing.  ******************************************************************/
 
 /* Helper function that returns a pointer to where the next value for field "f"
  * should be stored, taking into account whether f is an array that may need to
  * be allocated or resized. */
 static union upb_value_ptr get_value_ptr(struct upb_msg *msg,
-                                         struct upb_msg_fielddef *f,
-                                         void **gptr,
-                                         upb_msg_getandref_array_cb_t getarray_cb)
+                                         struct upb_msg_fielddef *f)
 {
   union upb_value_ptr p = upb_msg_getptr(msg, f);
   if(upb_isarray(f)) {
     bool isset = upb_msg_isset(msg, f);
     size_t len = isset ? (*p.arr)->len : 0;
-    if(!isset) *p.arr = getarray_cb(*gptr, *p.arr, f);
-    upb_array_resize(*p.arr, len+1, f->type);
-    *gptr = (*p.arr)->gptr;
+    if(!isset) *p.arr = upb_array_new(f);
+    upb_array_resize(*p.arr, len+1);
     p = upb_array_getelementptr(*p.arr, len, f->type);
   }
   return p;
@@ -255,8 +185,7 @@ static upb_status_t value_cb(void *udata, uint8_t *buf, uint8_t *end,
   struct upb_msg_parser *mp = udata;
   struct upb_msg_fielddef *f = user_field_desc;
   struct upb_msg *msg = mp->top->msg;
-  void *gptr = upb_msg_gptr(msg);
-  union upb_value_ptr p = get_value_ptr(msg, f, &gptr, mp->getarray_cb);
+  union upb_value_ptr p = get_value_ptr(msg, f);
   upb_msg_set(msg, f);
   UPB_CHECK(upb_parse_value(buf, end, f->type, p, outbuf));
   return UPB_STATUS_OK;
@@ -269,21 +198,20 @@ static void str_cb(void *udata, uint8_t *str,
   struct upb_msg_parser *mp = udata;
   struct upb_msg_fielddef *f = udesc;
   struct upb_msg *msg = mp->top->msg;
-  void *gptr = upb_msg_gptr(msg);
-  union upb_value_ptr p = get_value_ptr(msg, f, &gptr, mp->getarray_cb);
+  union upb_value_ptr p = get_value_ptr(msg, f);
   upb_msg_set(msg, f);
   if(avail_len != total_len) abort();  /* TODO: support streaming. */
-  bool byref = avail_len == total_len && mp->byref;
-  *p.str = mp->getstring_cb(gptr, *p.str, f, byref);
-  if(byref) {
-    upb_strdrop(*p.str);
-    (*p.str)->ptr = (char*)str;
-    (*p.str)->byte_len = avail_len;
-  } else {
-    upb_stralloc(*p.str, total_len);
+  //bool byref = avail_len == total_len && mp->byref;
+  *p.str = upb_string_new();
+  //if(byref) {
+  //  upb_strdrop(*p.str);
+  //  (*p.str)->ptr = (char*)str;
+  //  (*p.str)->byte_len = avail_len;
+  //} else {
+    upb_string_resize(*p.str, total_len);
     memcpy((*p.str)->ptr, str, avail_len);
     (*p.str)->byte_len = avail_len;
-  }
+  //}
 }
 
 static void submsg_start_cb(void *udata, void *user_field_desc)
@@ -291,22 +219,39 @@ static void submsg_start_cb(void *udata, void *user_field_desc)
   struct upb_msg_parser *mp = udata;
   struct upb_msg_fielddef *f = user_field_desc;
   struct upb_msg *oldmsg = mp->top->msg;
-  void *gptr = upb_msg_gptr(oldmsg);
-  union upb_value_ptr p = get_value_ptr(oldmsg, f, &gptr, mp->getarray_cb);
+  union upb_value_ptr p = get_value_ptr(oldmsg, f);
+  struct upb_msg **submsg = p.msg;
+  //if(*submsg && upb_mmhead_only(&((*submsg)->mmhead))) {
+  //  /* We can reuse the existing submsg. */
+  //} else {
+    *submsg = upb_msg_new(f->ref.msg);
+  //}
+  upb_msg_clear(*submsg);
   upb_msg_set(oldmsg, f);
-  *p.msg = mp->getmsg_cb(gptr, *p.msg, f);
   mp->top++;
-  mp->top->msg = *p.msg;
+  mp->top->msg = *submsg;
 }
 
 static void submsg_end_cb(void *udata)
 {
   struct upb_msg_parser *mp = udata;
+  struct upb_msg *msg = mp->top->msg;
+  /* TODO: free any remaining dynamic storage that was not reused. */
+  (void)msg;
   mp->top--;
 }
 
 /* Externally-visible functions for the msg parser. */
 
+upb_status_t upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len)
+{
+  struct upb_msg_parser mp;
+  upb_msg_parser_reset(&mp, msg, false);
+  size_t read;
+  upb_status_t ret = upb_msg_parser_parse(&mp, buf, len, &read);
+  return ret;
+}
+
 void upb_msg_parser_reset(struct upb_msg_parser *s, struct upb_msg *msg, bool byref)
 {
   upb_stream_parser_reset(&s->s, s);
@@ -592,51 +537,3 @@ bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive)
   }
   return true;
 }
-
-
-static void printval(struct upb_text_printer *printer, union upb_value_ptr p,
-                     struct upb_msg_fielddef *f,
-                     google_protobuf_FieldDescriptorProto *fd,
-                     FILE *stream);
-
-static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg,
-                     FILE *stream)
-{
-  struct upb_msgdef *m = msg->def;
-  for(uint32_t i = 0; i < m->num_fields; i++) {
-    struct upb_msg_fielddef *f = &m->fields[i];
-    google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m);
-    if(!upb_msg_isset(msg, f)) continue;
-    union upb_value_ptr p = upb_msg_getptr(msg, f);
-    if(upb_isarray(f)) {
-      struct upb_array *arr = *p.arr;
-      for(uint32_t j = 0; j < arr->len; j++) {
-        union upb_value_ptr elem_p = upb_array_getelementptr(arr, j, f->type);
-        printval(printer, elem_p, f, fd, stream);
-      }
-    } else {
-      printval(printer, p, f, fd, stream);
-    }
-  }
-}
-
-static void printval(struct upb_text_printer *printer, union upb_value_ptr p,
-                     struct upb_msg_fielddef *f,
-                     google_protobuf_FieldDescriptorProto *fd,
-                     FILE *stream)
-{
-  if(upb_issubmsg(f)) {
-    upb_text_push(printer, fd->name, stream);
-    printmsg(printer, *p.msg, stream);
-    upb_text_pop(printer, stream);
-  } else {
-    upb_text_printfield(printer, fd->name, f->type, upb_deref(p, f->type), stream);
-  }
-}
-
-void upb_msg_print(struct upb_msg *msg, bool single_line, FILE *stream)
-{
-  struct upb_text_printer printer;
-  upb_text_printer_init(&printer, single_line);
-  printmsg(&printer, msg, stream);
-}
diff --git a/src/upb_msg.h b/src/upb_msg.h
index 9dc1827..abec479 100644
--- a/src/upb_msg.h
+++ b/src/upb_msg.h
@@ -52,10 +52,10 @@
 
 #include <stdbool.h>
 #include <stdint.h>
+#include <stddef.h>
 
+#include "descriptor.h"
 #include "upb.h"
-#include "upb_atomic.h"
-#include "upb_context.h"
 #include "upb_parse.h"
 #include "upb_table.h"
 
@@ -66,10 +66,11 @@ extern "C" {
 /* Message definition. ********************************************************/
 
 struct upb_msg_fielddef;
+struct upb_context;
 /* Structure that describes a single .proto message type. */
 struct upb_msgdef {
-  upb_atomic_refcount_t refcount;
   struct upb_context *context;
+  struct upb_msg *default_msg;   /* Message with all default values set. */
   struct google_protobuf_DescriptorProto *descriptor;
   struct upb_string fqname;      /* Fully qualified. */
   size_t size;
@@ -82,7 +83,6 @@ struct upb_msgdef {
   struct google_protobuf_FieldDescriptorProto **field_descriptors;
 };
 
-
 /* Structure that describes a single field in a message.  This structure is very
  * consciously designed to fit into 12/16 bytes (32/64 bit, respectively),
  * because copies of this struct are in the hash table that is read in the
@@ -96,14 +96,6 @@ struct upb_msg_fielddef {
   upb_label_t label;
 };
 
-INLINE void upb_msgdef_ref(struct upb_msgdef *m) {
-  if(upb_atomic_ref(&m->refcount)) upb_context_ref(m->context);
-}
-
-INLINE void upb_msgdef_unref(struct upb_msgdef *m) {
-  if(upb_atomic_unref(&m->refcount)) upb_context_unref(m->context);
-}
-
 INLINE bool upb_issubmsg(struct upb_msg_fielddef *f) {
   return upb_issubmsgtype(f->type);
 }
@@ -114,6 +106,29 @@ INLINE bool upb_isarray(struct upb_msg_fielddef *f) {
   return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED;
 }
 
+INLINE bool upb_field_ismm(struct upb_msg_fielddef *f) {
+  return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f);
+}
+
+INLINE bool upb_elem_ismm(struct upb_msg_fielddef *f) {
+  return upb_isstring(f) || upb_issubmsg(f);
+}
+
+/* Defined iff upb_field_ismm(f). */
+INLINE upb_mm_ptrtype upb_field_ptrtype(struct upb_msg_fielddef *f) {
+  if(upb_isarray(f)) return UPB_MM_ARR_REF;
+  else if(upb_isstring(f)) return UPB_MM_STR_REF;
+  else if(upb_issubmsg(f)) return UPB_MM_MSG_REF;
+  else return -1;
+}
+
+/* Defined iff upb_elem_ismm(f). */
+INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_msg_fielddef *f) {
+  if(upb_isstring(f)) return UPB_MM_STR_REF;
+  else if(upb_issubmsg(f)) return UPB_MM_MSG_REF;
+  else return -1;
+}
+
 /* Can be used to retrieve a field descriptor given the upb_msg_fielddef. */
 INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor(
     struct upb_msg_fielddef *f, struct upb_msgdef *m) {
@@ -122,14 +137,15 @@ INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor(
 
 /* Message structure. *********************************************************/
 
-struct upb_msg {
-  struct upb_msgdef *def;
-  void *gptr;  /* Generic pointer for use by subclasses. */
-  uint8_t data[1];
-};
-
-INLINE void *upb_msg_gptr(struct upb_msg *msg) {
-  return msg->gptr;
+/* Constructs a new msg corresponding to the given msgdef, and having one
+ * counted reference. */
+INLINE struct upb_msg *upb_msg_new(struct upb_msgdef *md) {
+  size_t size = md->size + offsetof(struct upb_msg, data);
+  struct upb_msg *msg = malloc(size);
+  memset(msg, 0, size);
+  upb_mmhead_init(&msg->mmhead);
+  msg->def = md;
+  return msg;
 }
 
 /* Field access. **************************************************************/
@@ -146,12 +162,6 @@ INLINE union upb_value_ptr upb_msg_getptr(struct upb_msg *msg,
   return p;
 }
 
-/* Returns a a specific field in a message. */
-INLINE union upb_value upb_msg_get(struct upb_msg *msg,
-                                   struct upb_msg_fielddef *f) {
-  return upb_deref(upb_msg_getptr(msg, f), f->type);
-}
-
 /* "Set" flag reading and writing.  *******************************************/
 
 /* All upb code and code using upb should guarantee that the set flags are
@@ -244,85 +254,10 @@ INLINE struct upb_msg_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m,
 }
 
 
-/* Simple, one-shot parsing ***************************************************/
-
-/* A simple interface for parsing into a newly-allocated message.  This
- * interface should only be used when the message will be read-only with
- * respect to memory management (eg. won't add or remove internal references to
- * dynamic memory).  For more flexible (but also more complicated) interfaces,
- * see below and in upb_mm_msg.h. */
-
-/* Parses the protobuf in s (which is expected to be complete) and allocates
- * new message data to hold it.  If byref is set, strings in the returned
- * upb_msg will reference s instead of copying from it, but this requires that
- * s will live for as long as the returned message does. */
-struct upb_msg *upb_msg_parsenew(struct upb_msgdef *m, struct upb_string *s);
-
-/* This function should be used to free messages that were parsed with
- * upb_msg_parsenew.  It will free the message appropriately (including all
- * submessages). */
-void upb_msg_free(struct upb_msg *msg);
-
-
-/* Parsing with (re)allocation callbacks. *************************************/
-
-/* This interface parses protocol buffers into upb_msgs, but allows the client
- * to supply allocation callbacks whenever the parser needs to obtain a string,
- * array, or submsg (a "dynamic field").  If the parser sees that a dynamic
- * field is already present (its "set bit" is set) it will use that, resizing
- * it if necessary in the case of an array.  Otherwise it will call the
- * allocation callback to obtain one.
- *
- * This may seem trivial (since nearly all clients will use malloc and free for
- * memory management), but the allocation callback can be used for more than
- * just allocation.  If we are parsing data into an existing upb_msg, the
- * allocation callback can examine any existing memory that is allocated for
- * the dynamic field and determine whether it can reuse it.  It can also
- * perform memory management like refing the new field.
- *
- * This parser is layered on top of the event-based parser in upb_parse.h.  The
- * parser is upb_mm_msg.h is layered on top of this parser.
- *
- * This parser is fully streaming-capable. */
-
-/* Should return an initialized array. */
-typedef struct upb_array *(*upb_msg_getandref_array_cb_t)(
-    void *from_gptr, struct upb_array *existingval, struct upb_msg_fielddef *f);
-
-/* Callback to allocate a string.  If byref is true, the client should assume
- * that the string will be referencing the input data. */
-typedef struct upb_string *(*upb_msg_getandref_string_cb_t)(
-    void *from_gptr, struct upb_string *existingval, struct upb_msg_fielddef *f,
-    bool byref);
-
-/* Should return a cleared message. */
-typedef struct upb_msg *(*upb_msg_getandref_msg_cb_t)(
-    void *from_gptr, struct upb_msg *existingval, struct upb_msg_fielddef *f);
-
-struct upb_msg_parser_frame {
-  struct upb_msg *msg;
-};
-
-struct upb_msg_parser {
-  struct upb_stream_parser s;
-  bool merge;
-  bool byref;
-  struct upb_msg_parser_frame stack[UPB_MAX_NESTING], *top;
-  upb_msg_getandref_array_cb_t getarray_cb;
-  upb_msg_getandref_string_cb_t getstring_cb;
-  upb_msg_getandref_msg_cb_t getmsg_cb;
-};
-
-void upb_msg_parser_reset(struct upb_msg_parser *p,
-                          struct upb_msg *msg, bool byref);
-
-/* Parses protocol buffer data out of data which has length of len.  The data
- * need not be a complete protocol buffer.  The number of bytes parsed is
- * returned in *read, and the next call to upb_msg_parse must supply data that
- * is *read bytes past data in the logical stream. */
-upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p,
-                                  void *data, size_t len, size_t *read);
+/* Parsing ********************************************************************/
 
+/* TODO: a stream parser. */
+upb_status_t upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len);
 
 /* Serialization  *************************************************************/
 
@@ -336,7 +271,7 @@ upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p,
 struct upb_msgsizes;
 
 /* Initialize/free a upb_msgsizes for the given message. */
-void upb_msgsizes_init(struct upb_msgsizes *sizes);
+struct upb_msgsizes *upb_msgsizes_new(void);
 void upb_msgsizes_free(struct upb_msgsizes *sizes);
 
 /* Given a previously initialized sizes, recurse over the message and store its
@@ -366,6 +301,10 @@ void upb_msg_serialize_init(struct upb_msg_serialize_state *s,
 upb_status_t upb_msg_serialize(struct upb_msg_serialize_state *s,
                                void *buf, size_t len, size_t *written);
 
+upb_status_t upb_msg_serialize_all(struct upb_msg *msg,
+                                   struct upb_msgsizes *sizes,
+                                   void *buf);
+
 /* Text dump  *****************************************************************/
 
 bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive);
@@ -397,7 +336,8 @@ void upb_msgdef_free(struct upb_msgdef *m);
 /* Sort the given field descriptors in-place, according to what we think is an
  * optimal ordering of fields.  This can change from upb release to upb
  * release. */
-void upb_msgdef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num);
+void upb_msgdef_sortfds(struct google_protobuf_FieldDescriptorProto **fds,
+                        size_t num);
 
 /* Clients use this function on a previously initialized upb_msgdef to resolve
  * the "ref" field in the upb_msg_fielddef.  Since messages can refer to each
diff --git a/src/upb_parse.c b/src/upb_parse.c
index b7f3832..7c1ad66 100644
--- a/src/upb_parse.c
+++ b/src/upb_parse.c
@@ -11,7 +11,7 @@
 
 /* May want to move this to upb.c if enough other things warrant it. */
 #define alignof(t) offsetof(struct { char c; t x; }, x)
-#define TYPE_INFO(proto_type, wire_type, ctype) [proto_type] = {alignof(ctype), sizeof(ctype), wire_type, UPB_STRLIT(#ctype)},
+#define TYPE_INFO(proto_type, wire_type, ctype) [proto_type] = {alignof(ctype), sizeof(ctype), wire_type, #ctype},
 struct upb_type_info upb_type_info[] = {
   TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE,   UPB_WIRE_TYPE_64BIT,       double)
   TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT,    UPB_WIRE_TYPE_32BIT,       float)
diff --git a/src/upb_parse.h b/src/upb_parse.h
index de4cb2c..1454dd5 100644
--- a/src/upb_parse.h
+++ b/src/upb_parse.h
@@ -21,16 +21,6 @@
 extern "C" {
 #endif
 
-INLINE bool upb_issubmsgtype(upb_field_type_t type) {
-  return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP  ||
-         type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE;
-}
-
-INLINE bool upb_isstringtype(upb_field_type_t type) {
-  return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING  ||
-         type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES;
-}
-
 /* High-level parsing interface. **********************************************/
 
 /* The general scheme is that the client registers callbacks that will be
diff --git a/src/upb_string.c b/src/upb_string.c
index 7754936..54df4f1 100644
--- a/src/upb_string.c
+++ b/src/upb_string.c
@@ -7,19 +7,20 @@
 #include <stdio.h>
 #include "upb_string.h"
 
-bool upb_strreadfile(const char *filename, struct upb_string *data) {
+struct upb_string *upb_strreadfile(const char *filename) {
   FILE *f = fopen(filename, "rb");
   if(!f) return false;
-  if(fseek(f, 0, SEEK_END) != 0) return false;
+  if(fseek(f, 0, SEEK_END) != 0) goto error;
   long size = ftell(f);
-  if(size < 0) return false;
-  if(fseek(f, 0, SEEK_SET) != 0) return false;
-  data->byte_len = size;
-  upb_stralloc(data, data->byte_len);
-  if(fread(data->ptr, size, 1, f) != 1) {
-    free(data->ptr);
-    return false;
-  }
+  if(size < 0) goto error;
+  if(fseek(f, 0, SEEK_SET) != 0) goto error;
+  struct upb_string *s = upb_string_new();
+  upb_string_resize(s, size);
+  if(fread(s->ptr, size, 1, f) != 1) goto error;
   fclose(f);
-  return true;
+  return s;
+
+error:
+  fclose(f);
+  return NULL;
 }
diff --git a/src/upb_string.h b/src/upb_string.h
index 9740a0b..aa62575 100644
--- a/src/upb_string.h
+++ b/src/upb_string.h
@@ -32,48 +32,35 @@ extern "C" {
 #include <stdlib.h>
 #include <string.h>
 
-/* inline if possible, emit standalone code if required. */
-#ifndef INLINE
-#define INLINE static inline
-#endif
-
-#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
-#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
+#include "upb_struct.h"
 
-struct upb_string {
-  /* We expect the data to be 8-bit clean (uint8_t), but char* is such an
-   * ingrained convention that we follow it. */
-  char *ptr;
-  uint32_t byte_len;
-  uint32_t byte_size;  /* How many bytes of ptr we own. */
-};
+/* Allocation/Deallocation/Resizing. ******************************************/
 
-INLINE void upb_strinit(struct upb_string *str)
+INLINE struct upb_string *upb_string_new(void)
 {
+  struct upb_string *str = (struct upb_string*)malloc(sizeof(*str));
+  upb_mmhead_init(&str->mmhead);
   str->ptr = NULL;
   str->byte_len = 0;
   str->byte_size = 0;
+  return str;
 }
 
-INLINE void upb_struninit(struct upb_string *str)
-{
-  if(str->byte_size) free(str->ptr);
-}
-
-INLINE struct upb_string *upb_strnew(void)
+/* For internal use only. */
+INLINE void upb_string_destroy(struct upb_string *str)
 {
-  struct upb_string *str = (struct upb_string*)malloc(sizeof(*str));
-  upb_strinit(str);
-  return str;
+  if(str->byte_size != 0) free(str->ptr);
+  free(str);
 }
 
-INLINE void upb_strfree(struct upb_string *str)
+INLINE void upb_string_unref(struct upb_string *str)
 {
-  upb_struninit(str);
-  free(str);
+  if(upb_mmhead_unref(&str->mmhead)) upb_string_destroy(str);
 }
 
-INLINE void upb_stralloc(struct upb_string *str, uint32_t size)
+/* Resizes the string to size, reallocating if necessary.  Does not preserve
+ * existing data. */
+INLINE void upb_string_resize(struct upb_string *str, uint32_t size)
 {
   if(str->byte_size < size) {
     /* Need to resize. */
@@ -81,12 +68,10 @@ INLINE void upb_stralloc(struct upb_string *str, uint32_t size)
     void *oldptr = str->byte_size == 0 ? NULL : str->ptr;
     str->ptr = (char*)realloc(oldptr, str->byte_size);
   }
+  str->byte_len = size;
 }
 
-INLINE void upb_strdrop(struct upb_string *str)
-{
-  upb_struninit(str);
-}
+/* Library functions. *********************************************************/
 
 INLINE bool upb_streql(struct upb_string *s1, struct upb_string *s2) {
   return s1->byte_len == s2->byte_len &&
@@ -101,26 +86,26 @@ INLINE int upb_strcmp(struct upb_string *s1, struct upb_string *s2) {
 
 INLINE void upb_strcpy(struct upb_string *dest, struct upb_string *src) {
   dest->byte_len = src->byte_len;
-  upb_stralloc(dest, dest->byte_len);
+  upb_string_resize(dest, dest->byte_len);
   memcpy(dest->ptr, src->ptr, src->byte_len);
 }
 
 INLINE struct upb_string *upb_strdup(struct upb_string *s) {
-  struct upb_string *copy = upb_strnew();
+  struct upb_string *copy = upb_string_new();
   upb_strcpy(copy, s);
   return copy;
 }
 
 INLINE struct upb_string *upb_strdupc(char *s) {
-  struct upb_string *copy = upb_strnew();
+  struct upb_string *copy = upb_string_new();
   copy->byte_len = strlen(s);
-  upb_stralloc(copy, copy->byte_len);
+  upb_string_resize(copy, copy->byte_len);
   memcpy(copy->ptr, s, copy->byte_len);
   return copy;
 }
 
 /* Reads an entire file into a newly-allocated string. */
-bool upb_strreadfile(const char *filename, struct upb_string *data);
+struct upb_string *upb_strreadfile(const char *filename);
 
 /* Allows defining upb_strings as literals, ie:
  *   struct upb_string str = UPB_STRLIT("Hello, World!\n");
diff --git a/src/upb_struct.h b/src/upb_struct.h
new file mode 100644
index 0000000..9c1bb2e
--- /dev/null
+++ b/src/upb_struct.h
@@ -0,0 +1,119 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ *
+ * This file defines the in-memory format for messages, arrays, and strings
+ * (which are the three dynamically-allocated structures that make up all
+ * protobufs). */
+
+#ifndef UPB_STRUCT_H
+#define UPB_STRUCT_H
+
+#include "upb.h"
+
+/* mmhead -- this is a "base class" for strings, arrays, and messages ********/
+
+struct upb_mm_ref;
+struct upb_mmhead {
+  struct upb_mm_ref *refs;  /* Head of linked list. */
+  uint32_t refcount;
+};
+
+INLINE void upb_mmhead_init(struct upb_mmhead *head) {
+  head->refs = NULL;
+  head->refcount = 1;
+}
+
+INLINE bool upb_mmhead_norefs(struct upb_mmhead *head) {
+  return head->refcount == 0 && head->refs == NULL;
+}
+
+INLINE bool upb_mmhead_unref(struct upb_mmhead *head) {
+  head->refcount--;
+  return upb_mmhead_norefs(head);
+}
+
+INLINE void upb_mmhead_ref(struct upb_mmhead *head) {
+  head->refcount++;
+}
+
+/* Structures for msg, string, and array. *************************************/
+
+/* These are all self describing. */
+
+struct upb_msgdef;
+struct upb_msg_fielddef;
+
+struct upb_msg {
+  struct upb_mmhead mmhead;
+  struct upb_msgdef *def;
+  uint8_t data[1];
+};
+
+typedef uint32_t upb_arraylen_t;  /* can be at most 2**32 elements long. */
+struct upb_array {
+  struct upb_mmhead mmhead;
+  struct upb_msg_fielddef *fielddef;  /* Defines the type of the array. */
+  union upb_value_ptr elements;
+  upb_arraylen_t len;     /* Number of elements in "elements". */
+  upb_arraylen_t size;    /* Memory we own (0 if by reference). */
+};
+
+struct upb_string {
+  struct upb_mmhead mmhead;
+  /* We expect the data to be 8-bit clean (uint8_t), but char* is such an
+   * ingrained convention that we follow it. */
+  char *ptr;
+  uint32_t byte_len;
+  uint32_t byte_size;  /* How many bytes of ptr we own, 0 if we reference. */
+};
+
+/* Type-specific overlays on upb_array. ***************************************/
+
+#define UPB_DEFINE_ARRAY_TYPE(name, type) \
+  struct name ## _array { \
+    struct upb_mmhead mmhead; \
+    struct upb_msg_fielddef *fielddef; \
+    type elements; \
+    upb_arraylen_t len; \
+    upb_arraylen_t size; \
+  };
+
+UPB_DEFINE_ARRAY_TYPE(upb_double, double)
+UPB_DEFINE_ARRAY_TYPE(upb_float,  float)
+UPB_DEFINE_ARRAY_TYPE(upb_int32,  int32_t)
+UPB_DEFINE_ARRAY_TYPE(upb_int64,  int64_t)
+UPB_DEFINE_ARRAY_TYPE(upb_uint32, uint32_t)
+UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t)
+UPB_DEFINE_ARRAY_TYPE(upb_bool,   bool)
+UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*)
+UPB_DEFINE_ARRAY_TYPE(upb_msg,    void*)
+
+/* Defines an array of a specific message type (an overlay of upb_array). */
+#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array
+#define UPB_DEFINE_MSG_ARRAY(msg_type) \
+  UPB_MSG_ARRAY(msg_type) { \
+    struct upb_mmhead mmhead; \
+    struct upb_msg_fielddef *fielddef; \
+    msg_type **elements; \
+    upb_arraylen_t len; \
+    upb_arraylen_t size; \
+    };
+
+/* mmptr -- a pointer which polymorphically points to one of the above. *******/
+
+union upb_mmptr {
+  struct upb_msg *msg;
+  struct upb_array *arr;
+  struct upb_string *str;
+};
+
+enum {
+  UPB_MM_MSG_REF,
+  UPB_MM_STR_REF,
+  UPB_MM_ARR_REF
+};
+typedef uint8_t upb_mm_ptrtype;
+
+#endif
diff --git a/src/upb_text.c b/src/upb_text.c
index c9aad7e..6d43152 100644
--- a/src/upb_text.c
+++ b/src/upb_text.c
@@ -5,8 +5,11 @@
  */
 
 #include <inttypes.h>
-#include "upb_text.h"
 #include "descriptor.h"
+#include "upb_text.h"
+#include "upb_string.h"
+#include "upb_msg.h"
+#include "upb_array.h"
 
 void upb_text_printval(upb_field_type_t type, union upb_value val, FILE *file)
 {
@@ -78,3 +81,52 @@ void upb_text_pop(struct upb_text_printer *p,
   print_indent(p, stream);
   fprintf(stream, "}\n");
 }
+
+static void printval(struct upb_text_printer *printer, union upb_value_ptr p,
+                     struct upb_msg_fielddef *f,
+                     google_protobuf_FieldDescriptorProto *fd,
+                     FILE *stream);
+
+static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg,
+                     FILE *stream)
+{
+  struct upb_msgdef *m = msg->def;
+  for(uint32_t i = 0; i < m->num_fields; i++) {
+    struct upb_msg_fielddef *f = &m->fields[i];
+    google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m);
+    if(!upb_msg_isset(msg, f)) continue;
+    union upb_value_ptr p = upb_msg_getptr(msg, f);
+    if(upb_isarray(f)) {
+      struct upb_array *arr = *p.arr;
+      for(uint32_t j = 0; j < arr->len; j++) {
+        union upb_value_ptr elem_p = upb_array_getelementptr(arr, j, f->type);
+        printval(printer, elem_p, f, fd, stream);
+      }
+    } else {
+      printval(printer, p, f, fd, stream);
+    }
+  }
+}
+
+static void printval(struct upb_text_printer *printer, union upb_value_ptr p,
+                     struct upb_msg_fielddef *f,
+                     google_protobuf_FieldDescriptorProto *fd,
+                     FILE *stream)
+{
+  if(upb_issubmsg(f)) {
+    upb_text_push(printer, fd->name, stream);
+    printmsg(printer, *p.msg, stream);
+    upb_text_pop(printer, stream);
+  } else {
+    upb_text_printfield(printer, fd->name, f->type, upb_value_read(p, f->type), stream);
+  }
+}
+
+
+void upb_msg_print(struct upb_msg *msg, bool single_line, FILE *stream)
+{
+  struct upb_text_printer printer;
+  upb_text_printer_init(&printer, single_line);
+  printmsg(&printer, msg, stream);
+}
+
diff --git a/tools/upbc.c b/tools/upbc.c
index a324971..885a259 100644
--- a/tools/upbc.c
+++ b/tools/upbc.c
@@ -15,6 +15,8 @@
 #include "upb_enum.h"
 #include "upb_msg.h"
 #include "upb_text.h"
+#include "upb_array.h"
+#include "upb_mm.h"
 
 /* These are in-place string transformations that do not change the length of
  * the string (and thus never need to re-allocate). */
@@ -54,32 +56,24 @@ void *strtable_to_array(struct upb_strtable *t, int *size)
   return array;
 }
 
-/* The .h file defines structs for the types defined in the .proto file.  It
- * also defines constants for the enum values.
- *
- * Assumes that d has been validated. */
-static void write_h(struct upb_symtab_entry *entries[], int num_entries,
-                    char *outfile_name, char *descriptor_cident, FILE *stream)
+/* The _const.h file defines the constants (enums) defined in the .proto
+ * file. */
+static void write_const_h(struct upb_symtab_entry *entries[], int num_entries,
+                          char *outfile_name, FILE *stream)
 {
   /* Header file prologue. */
   struct upb_string *include_guard_name = upb_strdupc(outfile_name);
   to_preproc(include_guard_name);
+  /* A bit cheesy, but will do the job. */
+  include_guard_name->ptr[include_guard_name->byte_len-1] = 'C';
   fputs("/* This file was generated by upbc (the upb compiler).  "
         "Do not edit. */\n\n", stream),
   fprintf(stream, "#ifndef " UPB_STRFMT "\n", UPB_STRARG(include_guard_name));
   fprintf(stream, "#define " UPB_STRFMT "\n\n", UPB_STRARG(include_guard_name));
-  fputs("#include <upb_string.h>\n\n", stream);
-  fputs("#include <upb_array.h>\n\n", stream);
   fputs("#ifdef __cplusplus\n", stream);
   fputs("extern \"C\" {\n", stream);
   fputs("#endif\n\n", stream);
 
-  if(descriptor_cident) {
-    fputs("struct google_protobuf_FileDescriptorSet;\n", stream);
-    fprintf(stream, "extern struct google_protobuf_FileDescriptorSet *%s;\n\n",
-            descriptor_cident);
-  }
-
   /* Enums. */
   fprintf(stream, "/* Enums. */\n\n");
   for(int i = 0; i < num_entries; i++) {  /* Foreach enum */
@@ -109,12 +103,45 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries,
                 UPB_STRARG(enum_val_prefix), UPB_STRARG(value_name), v->number);
         if(j != ed->value->len-1) fputc(',', stream);
         fputc('\n', stream);
-        upb_strfree(value_name);
+        upb_string_unref(value_name);
       }
     }
     fprintf(stream, "} " UPB_STRFMT ";\n\n", UPB_STRARG(enum_name));
-    upb_strfree(enum_name);
-    upb_strfree(enum_val_prefix);
+    upb_string_unref(enum_name);
+    upb_string_unref(enum_val_prefix);
+  }
+
+  /* Epilogue. */
+  fputs("#ifdef __cplusplus\n", stream);
+  fputs("}  /* extern \"C\" */\n", stream);
+  fputs("#endif\n\n", stream);
+  fprintf(stream, "#endif  /* " UPB_STRFMT " */\n", UPB_STRARG(include_guard_name));
+  upb_string_unref(include_guard_name);
+}
+
+/* The .h file defines structs for the types defined in the .proto file.  It
+ * also defines constants for the enum values.
+ *
+ * Assumes that d has been validated. */
+static void write_h(struct upb_symtab_entry *entries[], int num_entries,
+                    char *outfile_name, char *descriptor_cident, FILE *stream)
+{
+  /* Header file prologue. */
+  struct upb_string *include_guard_name = upb_strdupc(outfile_name);
+  to_preproc(include_guard_name);
+  fputs("/* This file was generated by upbc (the upb compiler).  "
+        "Do not edit. */\n\n", stream),
+  fprintf(stream, "#ifndef " UPB_STRFMT "\n", UPB_STRARG(include_guard_name));
+  fprintf(stream, "#define " UPB_STRFMT "\n\n", UPB_STRARG(include_guard_name));
+  fputs("#include <upb_struct.h>\n\n", stream);
+  fputs("#ifdef __cplusplus\n", stream);
+  fputs("extern \"C\" {\n", stream);
+  fputs("#endif\n\n", stream);
+
+  if(descriptor_cident) {
+    fputs("struct google_protobuf_FileDescriptorSet;\n", stream);
+    fprintf(stream, "extern struct google_protobuf_FileDescriptorSet *%s;\n\n",
+            descriptor_cident);
   }
 
   /* Forward declarations. */
@@ -131,7 +158,7 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries,
     fprintf(stream, "struct " UPB_STRFMT ";\n", UPB_STRARG(msg_name));
     fprintf(stream, "typedef struct " UPB_STRFMT "\n    " UPB_STRFMT ";\n\n",
             UPB_STRARG(msg_name), UPB_STRARG(msg_name));
-    upb_strfree(msg_name);
+    upb_string_unref(msg_name);
   }
 
   /* Message Declarations. */
@@ -144,8 +171,8 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries,
     struct upb_string *msg_name = upb_strdup(&entry->e.key);
     to_cident(msg_name);
     fprintf(stream, "struct " UPB_STRFMT " {\n", UPB_STRARG(msg_name));
+    fputs("  struct upb_mmhead mmhead;\n", stream);
     fputs("  struct upb_msgdef *def;\n", stream);
-    fputs("  void *gptr;\n", stream);
     fputs("  union {\n", stream);
     fprintf(stream, "    uint8_t bytes[%" PRIu32 "];\n", m->set_flags_bytes);
     fputs("    struct {\n", stream);
@@ -179,7 +206,7 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries,
           fprintf(stream, "  " UPB_STRFMT "* " UPB_STRFMT ";\n",
                   UPB_STRARG(type_name), UPB_STRARG(fd->name));
         }
-        upb_strfree(type_name);
+        upb_string_unref(type_name);
       } else if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) {
         static char* c_types[] = {
           "", "struct upb_double_array*", "struct upb_float_array*",
@@ -208,7 +235,7 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries,
     fputs("};\n", stream);
     fprintf(stream, "UPB_DEFINE_MSG_ARRAY(" UPB_STRFMT ")\n\n",
             UPB_STRARG(msg_name));
-    upb_strfree(msg_name);
+    upb_string_unref(msg_name);
   }
 
   /* Epilogue. */
@@ -216,7 +243,7 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries,
   fputs("}  /* extern \"C\" */\n", stream);
   fputs("#endif\n\n", stream);
   fprintf(stream, "#endif  /* " UPB_STRFMT " */\n", UPB_STRARG(include_guard_name));
-  upb_strfree(include_guard_name);
+  upb_string_unref(include_guard_name);
 }
 
 /* Format of table entries that we use when analyzing data structures for
@@ -300,24 +327,28 @@ static void add_strings_from_msg(void *data, struct upb_msgdef *m,
 struct typetable_entry *get_or_insert_typeentry(struct upb_strtable *t,
                                                 struct upb_msg_fielddef *f)
 {
-  struct upb_string type_name = upb_issubmsg(f) ? f->ref.msg->fqname :
-                                                  upb_type_info[f->type].ctype;
-  struct typetable_entry *type_e = upb_strtable_lookup(t, &type_name);
+  struct upb_string *type_name = upb_issubmsg(f) ? upb_strdup(&f->ref.msg->fqname) :
+                                                   upb_strdupc(upb_type_info[f->type].ctype);
+  struct typetable_entry *type_e = upb_strtable_lookup(t, type_name);
   if(type_e == NULL) {
     struct typetable_entry new_type_e = {
-      .e = {.key = type_name}, .field = f, .cident = upb_strdup(&type_name),
+      .e = {.key = *type_name}, .field = f, .cident = upb_strdup(type_name),
       .values = NULL, .values_size = 0, .values_len = 0,
       .arrays = NULL, .arrays_size = 0, .arrays_len = 0
     };
     to_cident(new_type_e.cident);
+    assert(upb_strtable_lookup(t, type_name) == NULL);
+    assert(upb_strtable_lookup(t, &new_type_e.e.key) == NULL);
     upb_strtable_insert(t, &new_type_e.e);
-    type_e = upb_strtable_lookup(t, &type_name);
+    type_e = upb_strtable_lookup(t, type_name);
     assert(type_e);
+  } else {
+    upb_string_unref(type_name);
   }
   return type_e;
 }
 
-static void add_value(union upb_value value, struct upb_msg_fielddef *f,
+static void add_value(union upb_value_ptr p, struct upb_msg_fielddef *f,
                       struct upb_strtable *t)
 {
   struct typetable_entry *type_e = get_or_insert_typeentry(t, f);
@@ -325,7 +356,7 @@ static void add_value(union upb_value value, struct upb_msg_fielddef *f,
     type_e->values_size = UPB_MAX(type_e->values_size * 2, 4);
     type_e->values = realloc(type_e->values, sizeof(*type_e->values) * type_e->values_size);
   }
-  type_e->values[type_e->values_len++] = value;
+  type_e->values[type_e->values_len++] = upb_value_read(p, f->type);
 }
 
 static void add_submsgs(void *data, struct upb_msgdef *m, struct upb_strtable *t)
@@ -353,7 +384,7 @@ static void add_submsgs(void *data, struct upb_msgdef *m, struct upb_strtable *t
 
       /* Add the individual values in the array. */
       for(uint32_t j = 0; j < arr->len; j++)
-        add_value(upb_array_getelement(arr, j, f->type), f, t);
+        add_value(upb_array_getelementptr(arr, j, f->type), f, t);
 
       /* Add submsgs.  We must do this separately so that the msgs in this
        * array are contiguous (and don't have submsgs of the same type
@@ -362,7 +393,7 @@ static void add_submsgs(void *data, struct upb_msgdef *m, struct upb_strtable *t
         add_submsgs(*upb_array_getelementptr(arr, j, f->type).msg, f->ref.msg, t);
     } else {
       if(!upb_issubmsg(f)) continue;
-      add_value(upb_deref(p, f->type), f, t);
+      add_value(p, f, t);
       add_submsgs(*p.msg, f->ref.msg, t);
     }
   }
@@ -445,7 +476,8 @@ static void write_message_c(void *data, struct upb_msgdef *m,
       .type = GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE,
       .ref = {.msg = m}
   };
-  add_value(val, &fake_field, &types);
+  union upb_value_ptr p = UPB_VALUE_ADDROF(val);
+  add_value(p, &fake_field, &types);
   add_submsgs(data, m, &types);
 
   /* Emit foward declarations for all msgs of all types, and define arrays. */
@@ -503,7 +535,7 @@ static void write_message_c(void *data, struct upb_msgdef *m,
         for(unsigned int j = 0; j < m->num_fields; j++) {
           struct upb_msg_fielddef *f = &m->fields[j];
           google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[j];
-          union upb_value val = upb_msg_get(msgdata, f);
+          union upb_value val = upb_value_read(upb_msg_getptr(msgdata, f), f->type);
           fprintf(stream, "    ." UPB_STRFMT " = ", UPB_STRARG(fd->name));
           if(!upb_msg_isset(msgdata, f)) {
             fputs("0,   /* Not set. */", stream);
@@ -571,7 +603,7 @@ static void write_message_c(void *data, struct upb_msgdef *m,
 
   /* Free tables. */
   for(e = upb_strtable_begin(&types); e; e = upb_strtable_next(&types, &e->e)) {
-    upb_strfree(e->cident);
+    upb_string_unref(e->cident);
     free(e->values);
     free(e->arrays);
   }
@@ -641,16 +673,17 @@ int main(int argc, char *argv[])
   if(!outfile_base) outfile_base = input_file;
 
   /* Read input file. */
-  struct upb_string *descriptor = upb_strnew();
-  if(!upb_strreadfile(input_file, descriptor))
+  struct upb_string *descriptor = upb_strreadfile(input_file);
+  if(!descriptor)
     error("Couldn't read input file.");
 
   /* Parse input file. */
   struct upb_context *c = upb_context_new();
-  struct upb_msg *fds_msg = upb_msg_parsenew(c->fds_msg, descriptor);
-  google_protobuf_FileDescriptorSet *fds = (void*)fds_msg;
-  if(!fds)
+  struct upb_msg *fds_msg = upb_msg_new(c->fds_msg);
+  if(upb_msg_parsestr(fds_msg, descriptor->ptr, descriptor->byte_len) != UPB_STATUS_OK)
     error("Failed to parse input file descriptor.");
+  //upb_msg_print(fds_msg, false, stderr);
+  google_protobuf_FileDescriptorSet *fds = (void*)fds_msg;
   if(!upb_context_addfds(c, fds))
     error("Failed to resolve symbols in descriptor.\n");
 
@@ -666,17 +699,21 @@ int main(int argc, char *argv[])
 
   /* Emit output files. */
   const int maxsize = 256;
-  char h_filename[maxsize], c_filename[maxsize];
+  char h_filename[maxsize], h_const_filename[maxsize], c_filename[maxsize];
   if(snprintf(h_filename, maxsize, "%s.h", outfile_base) >= maxsize ||
-     snprintf(c_filename, maxsize, "%s.c", outfile_base) >= maxsize)
+     snprintf(c_filename, maxsize, "%s.c", outfile_base) >= maxsize ||
+     snprintf(h_const_filename, maxsize, "%s_const.h", outfile_base) >= maxsize)
     error("File base too long.\n");
 
   FILE *h_file = fopen(h_filename, "w");
   if(!h_file) error("Failed to open .h output file");
+  FILE *h_const_file = fopen(h_const_filename, "w");
+  if(!h_const_file) error("Failed to open _const.h output file");
 
   int symcount;
   struct upb_symtab_entry **entries = strtable_to_array(&c->symtab, &symcount);
   write_h(entries, symcount, h_filename, cident, h_file);
+  write_const_h(entries, symcount, h_filename, h_const_file);
   free(entries);
   if(cident) {
     FILE *c_file = fopen(c_filename, "w");
@@ -684,10 +721,11 @@ int main(int argc, char *argv[])
     write_message_c(fds, c->fds_msg, cident, h_filename, argc, argv, input_file, c_file);
     fclose(c_file);
   }
-  upb_msg_free(fds_msg);
+  upb_msg_unref(fds_msg);
   upb_context_unref(c);
-  upb_strfree(descriptor);
+  upb_string_unref(descriptor);
   fclose(h_file);
+  fclose(h_const_file);
 
   return 0;
 }
-- 
cgit v1.2.3