13 files changed, 961 insertions, 48 deletions
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..8b52c1d
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "third_party/protobuf"]
+	path = third_party/protobuf
+	url = https://github.com/google/protobuf.git
diff --git a/Makefile b/Makefile
index 597476c..390e7af 100644
--- a/Makefile
+++ b/Makefile
@@ -110,7 +110,7 @@ clean_leave_profile:
 	@rm -rf obj lib
 	@rm -f tests/google_message?.h
 	@rm -f tests/json/test.upbdefs.o
-	@rm -f $(TESTS) tests/testmain.o tests/t.*
+	@rm -f $(TESTS) tests/testmain.o tests/t.* tests/conformance_upb
 	@rm -rf tools/upbc deps
 	@rm -rf upb/bindings/python/build
 	@rm -f upb/bindings/ruby/Makefile
@@ -148,7 +148,9 @@ make_objs_cc = $$(patsubst upb/$$(pc).cc,obj/upb/$$(pc).$(1),$$($$(call to_srcs,
 # Core libraries (ie. not bindings). ###############################################################
 
 upb_SRCS = \
+  upb/decode.c \
   upb/def.c \
+  upb/encode.c \
   upb/handlers.c \
   upb/msg.c \
   upb/refcounted.c \
@@ -361,6 +363,21 @@ test:
 	done;
 	@echo "All tests passed!"
 
+obj/conformance_protos: obj/conformance_protos.pb tools/upbc
+	cd obj && ../tools/upbc conformance_protos.pb && touch conformance_protos
+
+obj/conformance_protos.pb: third_party/protobuf/autogen.sh
+	protoc -Ithird_party/protobuf/conformance -Ithird_party/protobuf/src --include_imports \
+	  third_party/protobuf/conformance/conformance.proto \
+	  third_party/protobuf/src/google/protobuf/test_messages_proto3.proto \
+	  -o obj/conformance_protos.pb
+
+third_party/protouf/autogen.sh: .gitmodules
+	git submodule init && git submodule update
+
+tests/conformance_upb: tests/conformance_upb.c lib/libupb.a obj/conformance_protos
+	$(CC) -o tests/conformance_upb tests/conformance_upb.c -Iobj -I. $(CPPFLAGS) $(CFLAGS) obj/conformance.upb.c obj/google/protobuf/*.upb.c lib/libupb.a
+
 
 # Google protobuf binding ######################################################
 
diff --git a/tests/conformance_upb.c b/tests/conformance_upb.c
index 9f83e80..e1221b2 100644
--- a/tests/conformance_upb.c
+++ b/tests/conformance_upb.c
@@ -4,6 +4,7 @@
 
 #include <errno.h>
 #include <stdarg.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 
@@ -52,7 +53,7 @@ void DoTest(
 
       if (!test_message) {
         /* TODO(haberman): return details. */
-        static char msg[] = "Parse error (no more details available).";
+        static const char msg[] = "Parse error (no more details available).";
         conformance_ConformanceResponse_set_parse_error(
             response, upb_stringview_make(msg, sizeof(msg)));
         return;
@@ -60,20 +61,20 @@ void DoTest(
       break;
 
     case conformance_ConformanceRequest_payload_json_payload: {
-      static char msg[] = "JSON support not yet implemented.";
+      static const char msg[] = "JSON support not yet implemented.";
       conformance_ConformanceResponse_set_skipped(
           response, upb_stringview_make(msg, sizeof(msg)));
       return;
     }
 
     case conformance_ConformanceRequest_payload_NOT_SET:
-      fprintf(stderr, "conformance_upb: Request didn't have payload.");
-      exit(1);
+      fprintf(stderr, "conformance_upb: Request didn't have payload.\n");
+      return;
   }
 
   switch (conformance_ConformanceRequest_requested_output_format(request)) {
     case conformance_UNSPECIFIED:
-      fprintf(stderr, "conformance_upb: Unspecified output format.");
+      fprintf(stderr, "conformance_upb: Unspecified output format.\n");
       exit(1);
 
     case conformance_PROTOBUF: {
@@ -81,8 +82,10 @@ void DoTest(
       char *serialized = protobuf_test_messages_proto3_TestAllTypes_serialize(
           test_message, env, &serialized_len);
       if (!serialized) {
-        fprintf(stderr, "conformance_upb: Error serialiing.");
-        exit(1);
+        static const char msg[] = "Error serializing.";
+        conformance_ConformanceResponse_set_serialize_error(
+            response, upb_stringview_make(msg, sizeof(msg)));
+        return;
       }
       conformance_ConformanceResponse_set_protobuf_payload(
           response, upb_stringview_make(serialized, serialized_len));
@@ -90,14 +93,14 @@ void DoTest(
     }
 
     case conformance_JSON: {
-      static char msg[] = "JSON support not yet implemented.";
+      static const char msg[] = "JSON support not yet implemented.";
       conformance_ConformanceResponse_set_skipped(
           response, upb_stringview_make(msg, sizeof(msg)));
       break;
     }
 
     default:
-      fprintf(stderr, "conformance_upb: Unknown output format: %d",
+      fprintf(stderr, "conformance_upb: Unknown output format: %d\n",
               conformance_ConformanceRequest_requested_output_format(request));
       exit(1);
   }
@@ -111,7 +114,7 @@ bool DoTestIo() {
   char *serialized_input;
   char *serialized_output;
   uint32_t input_size;
-  size_t output_size;
+  size_t output_size = 0;
   conformance_ConformanceRequest *request;
   conformance_ConformanceResponse *response;
 
diff --git a/third_party/protobuf b/third_party/protobuf
new file mode 160000
+Subproject 6bd51a59df41b99058ec8c2b03a177a218267ce
diff --git a/tools/make_c_api.lua b/tools/make_c_api.lua
index aaf5d1e..62fd370 100644
--- a/tools/make_c_api.lua
+++ b/tools/make_c_api.lua
@@ -47,7 +47,6 @@ local function to_preproc(...)
   return string.upper(to_cident(...))
 end
 
-
 -- Strips away last path element, ie:
 --   foo.Bar.Baz -> foo.Bar
 local function remove_name(name)
@@ -60,6 +59,10 @@ local function remove_name(name)
   return string.sub(name, 1, package_end)
 end
 
+local function enum_value_symbol(enumdef, name)
+  return to_cident(remove_name(enumdef:full_name())) .. "_" .. name
+end
+
 local function dump_enum_vals(enumdef, append)
   local enum_vals = {}
 
@@ -96,7 +99,7 @@ local function dump_enum_vals(enumdef, append)
   local cident = to_cident(remove_name(enumdef:full_name()))
   for i, pair in ipairs(enum_vals) do
     k, v = pair[1], pair[2]
-    append('  %s = %d', cident .. "_" .. k, v)
+    append('  %s = %d', enum_value_symbol(enumdef, k), v)
     if i == #enum_vals then
       append('\n')
     else
@@ -105,6 +108,20 @@ local function dump_enum_vals(enumdef, append)
   end
 end
 
+local function field_default(field)
+  if field:type() == upb.TYPE_MESSAGE then
+    return "NULL"
+  elseif field:type() == upb.TYPE_STRING or
+         field:type() == upb.TYPE_BYTES then
+    local default = field:default() or ""
+    return string.format('upb_stringview_make("%s", strlen("%s"))', field:default(), field:default())
+  elseif field:type() == upb.TYPE_ENUM then
+    return enum_value_symbol(field:subdef(), field:default())
+  else
+    return field:default();
+  end
+end
+
 local function ctype(field)
   if field:label() == upb.LABEL_REPEATED then
     return "upb_array*"
@@ -134,19 +151,18 @@ end
 local function field_layout_rank(field)
   -- Order:
   --   1, 2, 3. primitive fields (8, 4, 1 byte)
-  --   4. oneof fields
-  --   5. string fields
-  --   6. submessage fields
-  --   7. repeated fields
+  --   4. string fields
+  --   5. submessage fields
+  --   6. repeated fields
   local rank
   if field:containing_oneof() then
-    rank = 4
+    rank = 100  -- These go last (actually we skip them).
   elseif field:label() == upb.LABEL_REPEATED then
-    rank = 7
-  elseif field:type() == upb.TYPE_MESSAGE then
     rank = 6
-  elseif field:type() == upb.TYPE_STRING or field:type() == upb.TYPE_BYTES then
+  elseif field:type() == upb.TYPE_MESSAGE then
     rank = 5
+  elseif field:type() == upb.TYPE_STRING or field:type() == upb.TYPE_BYTES then
+    rank = 4
   elseif field:type() == upb.TYPE_BOOL then
     rank = 3
   elseif field:type() == upb.TYPE_FLOAT or
@@ -257,6 +273,8 @@ local function write_c_file(filedef, hfilename, append)
   emit_file_warning(filedef, append)
 
   append('#include <stddef.h>\n')
+  append('#include "upb/decode.h"\n\n')
+  append('#include "upb/encode.h"\n\n')
   append('#include "upb/msg.h"\n')
   append('#include "upb/upb.h"\n')
   append('#include "%s"\n\n', hfilename)
@@ -273,13 +291,29 @@ local function write_c_file(filedef, hfilename, append)
 
     local fields_array_ref = "NULL"
     local submsgs_array_ref = "NULL"
+    local oneofs_array_ref = "NULL"
     local field_count = 0
     local submsg_count = 0
     local submsg_set = {}
     local submsg_indexes = {}
     local hasbit_count = 0
     local hasbit_indexes = {}
-    -- TODO(haberman): oneofs
+    local oneof_count = 0
+    local oneof_indexes = {}
+
+    -- Create a layout order for oneofs.
+    local oneofs_layout_order = {}
+    for oneof in msg:oneofs() do
+      table.insert(oneofs_layout_order, oneof)
+    end
+    table.sort(oneofs_layout_order, function(a, b)
+      return a:name() < b:name()
+    end)
+
+    for _, oneof in ipairs(oneofs_layout_order) do
+      oneof_indexes[oneof] = oneof_count
+      oneof_count = oneof_count + 1
+    end
 
     -- Create a layout order for fields.  We use this order for the struct and
     -- for offsets, but our list of fields we keep in field number order.
@@ -301,6 +335,8 @@ local function write_c_file(filedef, hfilename, append)
     end)
 
     append('struct %s {\n', msgname)
+
+    -- Non-oneof fields.
     for _, field in ipairs(fields_layout_order) do
       field_count = field_count + 1
 
@@ -309,15 +345,46 @@ local function write_c_file(filedef, hfilename, append)
         submsg_set[field:subdef()] = true
       end
 
-      if has_hasbit(field) then
-        hasbit_indexes[field] = hasbit_count
-        hasbit_count = hasbit_count + 1
+      if field:containing_oneof() then
+        -- Do nothing now
+      else
+        if has_hasbit(field) then
+          hasbit_indexes[field] = hasbit_count
+          hasbit_count = hasbit_count + 1
+        end
+
+        append('  %s %s;\n', ctype(field), field:name())
       end
+    end
 
-      append('  %s %s;\n', ctype(field), field:name())
+    local oneof_last_fields = {}
+    -- Oneof fields.
+    for oneof in msg:oneofs() do
+      local fullname = to_cident(oneof:containing_type():full_name() .. "." .. oneof:name())
+      append('  union {\n')
+      oneof_last_fields[oneof] = ""
+      for field in oneof:fields() do
+        oneof_last_fields[oneof] = field:name()
+        append('    %s %s;\n', ctype(field), field:name())
+      end
+      append('  } %s;\n', oneof:name())
+      append('  %s_oneofcases %s_case;\n', fullname, oneof:name())
     end
+
     append('};\n\n')
 
+    if oneof_count > 0 then
+      local oneofs_array_name = msgname .. "_oneofs"
+      oneofs_array_ref = "&" .. oneofs_array_name .. "[0]"
+      append('static const upb_msglayout_oneofinit_v1 %s[%s] = {\n',
+             oneofs_array_name, oneof_count)
+      for _, oneof in ipairs(oneofs_layout_order) do
+        append('  {offsetof(%s, %s), offsetof(%s, %s_case)},\n',
+               msgname, oneof:name(), msgname, oneof:name())
+      end
+      append('};\n\n')
+    end
+
     if submsg_count > 0 then
       -- TODO(haberman): could save a little bit of space by only generating a
       -- "submsgs" array for every strongly-connected component.
@@ -354,11 +421,14 @@ local function write_c_file(filedef, hfilename, append)
         if field:type() == upb.TYPE_MESSAGE then
           submsg_index = submsg_indexes[field:subdef()]
         end
+        if field:containing_oneof() then
+          oneof_index = oneof_indexes[field:containing_oneof()]
+        end
         -- TODO(haberman): oneofs.
         append('  {%s, offsetof(%s, %s), %s, %s, %s, %s, %s},\n',
                field:number(),
                msgname,
-               field:name(),
+               (field:containing_oneof() and field:containing_oneof():name()) or field:name(),
                hasbit_indexes[field] or "-1",
                oneof_index,
                submsg_index,
@@ -371,7 +441,7 @@ local function write_c_file(filedef, hfilename, append)
     append('const upb_msglayout_msginit_v1 %s_msginit = {\n', msgname)
     append('  %s,\n', submsgs_array_ref)
     append('  %s,\n', fields_array_ref)
-    append('  NULL, /* TODO. oneofs */\n')
+    append('  %s,\n', oneofs_array_ref)
     append('  NULL, /* TODO. default_msg */\n')
     append('  UPB_ALIGNED_SIZEOF(%s), %s, %s, %s, %s\n',
            msgname, field_count,
@@ -390,36 +460,49 @@ local function write_c_file(filedef, hfilename, append)
 
     append('%s *%s_parsenew(upb_stringview buf, upb_env *env) {\n',
            msgname, msgname)
-    append('  UPB_UNUSED(buf);\n')
-    append('  UPB_UNUSED(env);\n')
-    append('  return NULL;\n')
+    append('  %s *msg = %s_new(env);\n', msgname, msgname)
+    append('  if (upb_decode(buf, msg, &%s_msginit, env)) {\n', msgname)
+    append('    return msg;\n')
+    append('  } else {\n')
+    append('    return NULL;\n')
+    append('  }\n')
     append('}\n')
 
     append('char *%s_serialize(%s *msg, upb_env *env, size_t *size) {\n',
            msgname, msgname)
-    append('  UPB_UNUSED(msg);\n')
-    append('  UPB_UNUSED(env);\n')
-    append('  UPB_UNUSED(size);\n')
-    append('  return NULL; /* TODO. */\n')
+    append('  return upb_encode(msg, &%s_msginit, env, size);\n', msgname)
     append('}\n')
 
     for field in msg:fields() do
       local typename = ctype(field)
       append('%s %s_%s(const %s *msg) {\n',
              typename, msgname, field:name(), msgname);
-      append('  return msg->%s;\n', field:name())
+      if field:containing_oneof() then
+        local oneof = field:containing_oneof()
+        append('  return msg->%s_case == %s ? msg->%s.%s : %s;\n',
+               oneof:name(), field:number(), oneof:name(), field:name(),
+               field_default(field))
+      else
+        append('  return msg->%s;\n', field:name())
+      end
       append('}\n')
       append('void %s_set_%s(%s *msg, %s value) {\n',
              msgname, field:name(), msgname, typename);
-      append('  msg->%s = value;\n', field:name())
+      if field:containing_oneof() then
+        local oneof = field:containing_oneof()
+        append('  msg->%s.%s = value;\n', oneof:name(), field:name())
+        append('  msg->%s_case = %s;\n', oneof:name(), field:number())
+      else
+        append('  msg->%s = value;\n', field:name())
+      end
       append('}\n')
     end
 
     for oneof in msg:oneofs() do
       local fullname = to_cident(oneof:containing_type():full_name() .. "." .. oneof:name())
       append('%s_oneofcases %s_case(const %s *msg) {\n', fullname, fullname, msgname)
-      append('  return 0;  /* TODO. */')
-      append('}')
+      append('  return msg->%s_case;\n', oneof:name())
+      append('}\n')
     end
   end
 end
diff --git a/upb/decode.c b/upb/decode.c
new file mode 100644
index 0000000..3b2ea54
--- /dev/null
+++ b/upb/decode.c
@@ -0,0 +1,247 @@
+
+#include "upb/decode.h"
+
+typedef enum {
+  UPB_WIRE_TYPE_VARINT      = 0,
+  UPB_WIRE_TYPE_64BIT       = 1,
+  UPB_WIRE_TYPE_DELIMITED   = 2,
+  UPB_WIRE_TYPE_START_GROUP = 3,
+  UPB_WIRE_TYPE_END_GROUP   = 4,
+  UPB_WIRE_TYPE_32BIT       = 5
+} upb_wiretype_t;
+
+static void upb_decode_seterr(upb_env *env, const char *msg) {
+  upb_status status = UPB_STATUS_INIT;
+  upb_status_seterrmsg(&status, msg);
+  upb_env_reporterror(env, &status);
+}
+
+static bool upb_decode_varint(const char **ptr, const char *limit,
+                              uint64_t *val) {
+  uint8_t byte = 0x80;
+  int bitpos = 0;
+  const char *p = *ptr;
+  *val = 0;
+
+  while (byte & 0x80) {
+    if (bitpos == 70 || p == limit) {
+      return false;
+    }
+
+    byte = *p;
+    *val |= (uint64_t)(byte & 0x7F) << bitpos;
+    p++;
+    bitpos += 7;
+  }
+
+  *ptr = p;
+  return true;
+}
+
+static bool upb_decode_varint32(const char **ptr, const char *limit,
+                                uint32_t *val) {
+  uint64_t u64;
+  if (!upb_decode_varint(ptr, limit, &u64) || u64 > UINT32_MAX) {
+    return false;
+  } else {
+    *val = u64;
+    return true;
+  }
+}
+
+static const upb_msglayout_fieldinit_v1 *upb_find_field(
+    const upb_msglayout_msginit_v1 *l, uint32_t field_number) {
+  /* Lots of optimization opportunities here. */
+  int i;
+  for (i = 0; i < l->field_count; i++) {
+    if (l->fields[i].number == field_number) {
+      return &l->fields[i];
+    }
+  }
+
+  return NULL;  /* Unknown field. */
+}
+
+static bool upb_decode_64bit(const char **ptr, const char *limit,
+                             uint64_t *val) {
+  if (limit - *ptr < 8) {
+    return false;
+  } else {
+    memcpy(val, *ptr, 8);
+    *ptr += 8;
+    return true;
+  }
+}
+
+static bool upb_decode_32bit(const char **ptr, const char *limit,
+                             uint32_t *val) {
+  if (limit - *ptr < 4) {
+    return false;
+  } else {
+    memcpy(val, *ptr, 4);
+    *ptr += 4;
+    return true;
+  }
+}
+
+static int32_t upb_zzdec_32(uint32_t n) {
+  return (n >> 1) ^ -(int32_t)(n & 1);
+}
+
+static int64_t upb_zzdec_64(uint64_t n) {
+  return (n >> 1) ^ -(int64_t)(n & 1);
+}
+
+static bool upb_decode_string(const char **ptr, const char *limit,
+                              upb_stringview *val) {
+  uint32_t len;
+
+  if (!upb_decode_varint32(ptr, limit, &len) ||
+      limit - *ptr < len) {
+    return false;
+  }
+
+  *val = upb_stringview_make(*ptr, len);
+  *ptr += len;
+  return true;
+}
+
+static void upb_set32(void *msg, size_t ofs, uint32_t val) {
+  memcpy((char*)msg + ofs, &val, sizeof(val));
+}
+
+bool upb_append_unknown(const char **ptr, const char *start, const char *limit,
+                        char *msg) {
+  UPB_UNUSED(limit);
+  UPB_UNUSED(msg);
+  *ptr = limit;
+  return true;
+}
+
+bool upb_decode_field(const char **ptr, const char *limit, char *msg,
+                      const upb_msglayout_msginit_v1 *l, upb_env *env) {
+  uint32_t tag;
+  uint32_t wire_type;
+  uint32_t field_number;
+  const char *p = *ptr;
+  const char *field_start = p;
+  const upb_msglayout_fieldinit_v1 *f;
+
+  if (!upb_decode_varint32(&p, limit, &tag)) {
+    upb_decode_seterr(env, "Error decoding tag.\n");
+    return false;
+  }
+
+  wire_type = tag & 0x7;
+  field_number = tag >> 3;
+
+  if (field_number == 0) {
+    return false;
+  }
+
+  f = upb_find_field(l, field_number);
+
+  switch (wire_type) {
+    case UPB_WIRE_TYPE_VARINT: {
+      uint64_t val;
+      if (!upb_decode_varint(&p, limit, &val)) {
+        upb_decode_seterr(env, "Error decoding varint value.\n");
+        return false;
+      }
+
+      if (!f) {
+        return upb_append_unknown(ptr, field_start, p, msg);
+      }
+
+      switch (f->type) {
+        case UPB_DESCRIPTOR_TYPE_INT64:
+        case UPB_DESCRIPTOR_TYPE_UINT64:
+          memcpy(msg + f->offset, &val, sizeof(val));
+          break;
+        case UPB_DESCRIPTOR_TYPE_INT32:
+        case UPB_DESCRIPTOR_TYPE_UINT32:
+        case UPB_DESCRIPTOR_TYPE_ENUM: {
+          uint32_t val32 = val;
+          memcpy(msg + f->offset, &val32, sizeof(val32));
+          break;
+        }
+        case UPB_DESCRIPTOR_TYPE_SINT32: {
+          int32_t decoded = upb_zzdec_32(val);
+          memcpy(msg + f->offset, &decoded, sizeof(decoded));
+          break;
+        }
+        case UPB_DESCRIPTOR_TYPE_SINT64: {
+          int64_t decoded = upb_zzdec_64(val);
+          memcpy(msg + f->offset, &decoded, sizeof(decoded));
+          break;
+        }
+        default:
+          return upb_append_unknown(ptr, field_start, p, msg);
+      }
+
+      break;
+    }
+    case UPB_WIRE_TYPE_64BIT: {
+      uint64_t val;
+      if (!upb_decode_64bit(&p, limit, &val)) {
+        upb_decode_seterr(env, "Error decoding 64bit value.\n");
+        return false;
+      }
+
+      if (!f) {
+        return upb_append_unknown(ptr, field_start, p, msg);
+      }
+
+      break;
+    }
+    case UPB_WIRE_TYPE_32BIT: {
+      uint32_t val;
+      if (!upb_decode_32bit(&p, limit, &val)) {
+        upb_decode_seterr(env, "Error decoding 32bit value.\n");
+        return false;
+      }
+
+      if (!f) {
+        return upb_append_unknown(ptr, field_start, p, msg);
+      }
+
+      break;
+    }
+    case UPB_WIRE_TYPE_DELIMITED: {
+      upb_stringview val;
+      if (!upb_decode_string(&p, limit, &val)) {
+        upb_decode_seterr(env, "Error decoding delimited value.\n");
+        return false;
+      }
+
+      if (!f) {
+        return upb_append_unknown(ptr, field_start, p, msg);
+      }
+
+      memcpy(msg + f->offset, &val, sizeof(val));
+      break;
+    }
+  }
+
+  if (f->oneof_index != UPB_NOT_IN_ONEOF) {
+    upb_set32(msg, l->oneofs[f->oneof_index].case_offset, f->number);
+  }
+
+  *ptr = p;
+  return true;
+}
+
+bool upb_decode(upb_stringview buf, void *msg_void,
+                const upb_msglayout_msginit_v1 *l, upb_env *env) {
+  char *msg = msg_void;
+  const char *ptr = buf.data;
+  const char *limit = ptr + buf.size;
+
+  while (ptr < limit) {
+    if (!upb_decode_field(&ptr, limit, msg, l, env)) {
+      return false;
+    }
+  }
+
+  return true;
+}
diff --git a/upb/decode.h b/upb/decode.h
new file mode 100644
index 0000000..2a9e39e
--- /dev/null
+++ b/upb/decode.h
@@ -0,0 +1,17 @@
+/*
+** upb_decode: parsing into a upb_msg using a upb_msglayout.
+*/
+
+#ifndef UPB_DECODE_H_
+#define UPB_DECODE_H_
+
+#include "upb/msg.h"
+
+UPB_BEGIN_EXTERN_C
+
+bool upb_decode(upb_stringview buf, void *msg,
+                const upb_msglayout_msginit_v1 *l, upb_env *env);
+
+UPB_END_EXTERN_C
+
+#endif  /* UPB_DECODE_H_ */
diff --git a/upb/encode.c b/upb/encode.c
new file mode 100644
index 0000000..30f2da7
--- /dev/null
+++ b/upb/encode.c
@@ -0,0 +1,512 @@
+
+#include "upb/encode.h"
+#include "upb/structs.int.h"
+
+#define UPB_PB_VARINT_MAX_LEN 10
+
+static size_t upb_encode_varint(uint64_t val, char *buf) {
+  size_t i;
+  if (val == 0) { buf[0] = 0; return 1; }
+  i = 0;
+  while (val) {
+    uint8_t byte = val & 0x7fU;
+    val >>= 7;
+    if (val) byte |= 0x80U;
+    buf[i++] = byte;
+  }
+  return i;
+}
+
+static size_t upb_varint_size(uint64_t val) {
+  char buf[UPB_PB_VARINT_MAX_LEN];
+  return upb_encode_varint(val, buf);
+}
+
+static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
+static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
+
+typedef enum {
+  UPB_WIRE_TYPE_VARINT      = 0,
+  UPB_WIRE_TYPE_64BIT       = 1,
+  UPB_WIRE_TYPE_DELIMITED   = 2,
+  UPB_WIRE_TYPE_START_GROUP = 3,
+  UPB_WIRE_TYPE_END_GROUP   = 4,
+  UPB_WIRE_TYPE_32BIT       = 5
+} upb_wiretype_t;
+
+/* Index is descriptor type. */
+const uint8_t upb_native_wiretypes[] = {
+  UPB_WIRE_TYPE_END_GROUP,     /* ENDGROUP */
+  UPB_WIRE_TYPE_64BIT,         /* DOUBLE */
+  UPB_WIRE_TYPE_32BIT,         /* FLOAT */
+  UPB_WIRE_TYPE_VARINT,        /* INT64 */
+  UPB_WIRE_TYPE_VARINT,        /* UINT64 */
+  UPB_WIRE_TYPE_VARINT,        /* INT32 */
+  UPB_WIRE_TYPE_64BIT,         /* FIXED64 */
+  UPB_WIRE_TYPE_32BIT,         /* FIXED32 */
+  UPB_WIRE_TYPE_VARINT,        /* BOOL */
+  UPB_WIRE_TYPE_DELIMITED,     /* STRING */
+  UPB_WIRE_TYPE_START_GROUP,   /* GROUP */
+  UPB_WIRE_TYPE_DELIMITED,     /* MESSAGE */
+  UPB_WIRE_TYPE_DELIMITED,     /* BYTES */
+  UPB_WIRE_TYPE_VARINT,        /* UINT32 */
+  UPB_WIRE_TYPE_VARINT,        /* ENUM */
+  UPB_WIRE_TYPE_32BIT,         /* SFIXED32 */
+  UPB_WIRE_TYPE_64BIT,         /* SFIXED64 */
+  UPB_WIRE_TYPE_VARINT,        /* SINT32 */
+  UPB_WIRE_TYPE_VARINT,        /* SINT64 */
+};
+
+/* The output buffer is divided into segments; a segment is a string of data
+ * that is "ready to go" -- it does not need any varint lengths inserted into
+ * the middle.  The seams between segments are where varints will be inserted
+ * once they are known.
+ *
+ * We also use the concept of a "run", which is a range of encoded bytes that
+ * occur at a single submessage level.  Every segment contains one or more runs.
+ *
+ * A segment can span messages.  Consider:
+ *
+ *                  .--Submessage lengths---------.
+ *                  |       |                     |
+ *                  |       V                     V
+ *                  V      | |---------------    | |-----------------
+ * Submessages:    | |-----------------------------------------------
+ * Top-level msg: ------------------------------------------------------------
+ *
+ * Segments:          -----   -------------------   -----------------
+ * Runs:              *----   *--------------*---   *----------------
+ * (* marks the start)
+ *
+ * Note that the top-level menssage is not in any segment because it does not
+ * have any length preceding it.
+ *
+ * A segment is only interrupted when another length needs to be inserted.  So
+ * observe how the second segment spans both the inner submessage and part of
+ * the next enclosing message. */
+
+typedef struct {
+  uint32_t msglen;  /* The length to varint-encode before this segment. */
+  uint32_t seglen;  /* Length of the segment. */
+} upb_segment;
+
+typedef struct {
+  upb_env *env;
+  char *buf, *ptr, *limit;
+
+  /* The beginning of the current run, or undefined if we are at the top
+   * level. */
+  char *runbegin;
+
+  /* The list of segments we are accumulating. */
+  upb_segment *segbuf, *segptr, *seglimit;
+
+  /* The stack of enclosing submessages.  Each entry in the stack points to the
+   * segment where this submessage's length is being accumulated. */
+  int *stack, *top, *stacklimit;
+} upb_encstate;
+
+static upb_segment *upb_encode_top(upb_encstate *e) {
+  return &e->segbuf[*e->top];
+}
+
+static bool upb_encode_growbuffer(upb_encstate *e, size_t bytes) {
+  char *new_buf;
+  size_t needed = bytes + (e->ptr - e->buf);
+  size_t old_size = e->limit - e->buf;
+
+  size_t new_size = old_size;
+
+  while (new_size < needed) {
+    new_size *= 2;
+  }
+
+  new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
+
+  if (new_buf == NULL) {
+    return false;
+  }
+
+  e->ptr = new_buf + (e->ptr - e->buf);
+  e->runbegin = new_buf + (e->runbegin - e->buf);
+  e->limit = new_buf + new_size;
+  e->buf = new_buf;
+  return true;
+}
+
+/* Call to ensure that at least "bytes" bytes are available for writing at
+ * e->ptr.  Returns false if the bytes could not be allocated. */
+static bool upb_encode_reserve(upb_encstate *e, size_t bytes) {
+  if (UPB_LIKELY((size_t)(e->limit - e->ptr) >= bytes)) {
+    return true;
+  }
+
+  return upb_encode_growbuffer(e, bytes);
+}
+
+/* Call when "bytes" bytes have been writte at e->ptr.  The caller *must* have
+ * previously called reserve() with at least this many bytes. */
+static void upb_encode_advance(upb_encstate *e, size_t bytes) {
+  UPB_ASSERT((size_t)(e->limit - e->ptr) >= bytes);
+  e->ptr += bytes;
+}
+
+/* Writes the given bytes to the buffer, handling reserve/advance. */
+static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) {
+  if (!upb_encode_reserve(e, len)) {
+    return false;
+  }
+
+  memcpy(e->ptr, data, len);
+  upb_encode_advance(e, len);
+  return true;
+}
+
+/* Finish the current run by adding the run totals to the segment and message
+ * length. */
+static void upb_encode_accumulate(upb_encstate *e) {
+  size_t run_len;
+  UPB_ASSERT(e->ptr >= e->runbegin);
+  run_len = e->ptr - e->runbegin;
+  e->segptr->seglen += run_len;
+  upb_encode_top(e)->msglen += run_len;
+  e->runbegin = e->ptr;
+}
+
+/* Call to indicate the start of delimited region for which the full length is
+ * not yet known.  The length will be inserted at the current position once it
+ * is known (and subsequent data moved if necessary). */
+static bool upb_encode_startdelim(upb_encstate *e) {
+  if (e->top) {
+    /* We are already buffering, advance to the next segment and push it on the
+     * stack. */
+    upb_encode_accumulate(e);
+
+    if (++e->top == e->stacklimit) {
+      /* TODO(haberman): grow stack? */
+      return false;
+    }
+
+    if (++e->segptr == e->seglimit) {
+      /* Grow segment buffer. */
+      size_t old_size =
+          (e->seglimit - e->segbuf) * sizeof(upb_segment);
+      size_t new_size = old_size * 2;
+      upb_segment *new_buf =
+          upb_env_realloc(e->env, e->segbuf, old_size, new_size);
+
+      if (new_buf == NULL) {
+        return false;
+      }
+
+      e->segptr = new_buf + (e->segptr - e->segbuf);
+      e->seglimit = new_buf + (new_size / sizeof(upb_segment));
+      e->segbuf = new_buf;
+    }
+  } else {
+    /* We were previously at the top level, start buffering. */
+    e->segptr = e->segbuf;
+    e->top = e->stack;
+    e->runbegin = e->ptr;
+  }
+
+  *e->top = e->segptr - e->segbuf;
+  e->segptr->seglen = 0;
+  e->segptr->msglen = 0;
+
+  return true;
+}
+
+/* Call to indicate the end of a delimited region.  We now know the length of
+ * the delimited region.  If we are not nested inside any other delimited
+ * regions, we can now emit all of the buffered data we accumulated. */
+static bool upb_encode_enddelim(upb_encstate *e) {
+  size_t msglen;
+  upb_encode_accumulate(e);
+  msglen = upb_encode_top(e)->msglen;
+
+  if (e->top == e->stack) {
+    /* All lengths are now available, emit all buffered data. */
+    char buf[UPB_PB_VARINT_MAX_LEN];
+    upb_segment *s;
+    const char *ptr = e->buf;
+    for (s = e->segbuf; s <= e->segptr; s++) {
+      size_t lenbytes = upb_encode_varint(s->msglen, buf);
+      //putbuf(e, buf, lenbytes);
+      //putbuf(e, ptr, s->seglen);
+      ptr += s->seglen;
+    }
+
+    e->ptr = e->buf;
+    e->top = NULL;
+  } else {
+    /* Need to keep buffering; propagate length info into enclosing
+     * submessages. */
+    --e->top;
+    upb_encode_top(e)->msglen += msglen + upb_varint_size(msglen);
+  }
+
+  return true;
+}
+
+/* encoding of wire types *****************************************************/
+
+static bool upb_put_fixed64(upb_encstate *e, uint64_t val) {
+  /* TODO(haberman): byte-swap for big endian. */
+  return upb_put_bytes(e, &val, sizeof(uint64_t));
+}
+
+static bool upb_put_fixed32(upb_encstate *e, uint32_t val) {
+  /* TODO(haberman): byte-swap for big endian. */
+  return upb_put_bytes(e, &val, sizeof(uint32_t));
+}
+
+static bool upb_put_varint(upb_encstate *e, uint64_t val) {
+  if (!upb_encode_reserve(e, UPB_PB_VARINT_MAX_LEN)) {
+    return false;
+  }
+
+  upb_encode_advance(e, upb_encode_varint(val, e->ptr));
+  return true;
+}
+
+static bool upb_put_double(upb_encstate *e, double d) {
+  uint64_t u64;
+  UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
+  memcpy(&u64, &d, sizeof(uint64_t));
+  return upb_put_fixed64(e, u64);
+}
+
+static bool upb_put_float(upb_encstate *e, float d) {
+  uint32_t u32;
+  UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
+  memcpy(&u32, &d, sizeof(uint32_t));
+  return upb_put_fixed32(e, u32);
+}
+
+static uint32_t upb_readcase(const char *msg, const upb_msglayout_msginit_v1 *m,
+                             int oneof_index) {
+  uint32_t ret;
+  memcpy(&ret, msg + m->oneofs[oneof_index].case_offset, sizeof(ret));
+  return ret;
+}
+
+static bool upb_readhasbit(const char *msg,
+                           const upb_msglayout_fieldinit_v1 *f) {
+  UPB_ASSERT(f->hasbit != UPB_NO_HASBIT);
+  return msg[f->hasbit / 8] & (1 << (f->hasbit % 8));
+}
+
+static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) {
+  return upb_put_varint(e, (field_number << 3) | wire_type);
+}
+
+static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr,
+                               size_t size) {
+  size_t bytes = arr->len * size;
+  return upb_put_varint(e, bytes) && upb_put_bytes(e, arr->data, bytes);
+}
+
+bool upb_encode_message(upb_encstate *e, const char *msg,
+                        const upb_msglayout_msginit_v1 *m);
+
+static bool upb_encode_array(upb_encstate *e, const char *field_mem,
+                             const upb_msglayout_msginit_v1 *m,
+                             const upb_msglayout_fieldinit_v1 *f) {
+  const upb_array *arr = *(const upb_array**)field_mem;
+
+  if (arr->len == 0) {
+    return true;
+  }
+
+  /* We encode all primitive arrays as packed, regardless of what was specified
+   * in the .proto file.  Could special case 1-sized arrays. */
+  if (!upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)) {
+    return false;
+  }
+
+#define VARINT_CASE(ctype, encode) { \
+  uint64_t *data = arr->data; \
+  uint64_t *limit = data + arr->len; \
+  if (!upb_encode_startdelim(e)) { \
+    return false; \
+  } \
+  for (; data < limit; data++) { \
+    if (!upb_put_varint(e, encode)) { \
+      return false; \
+    } \
+  } \
+  return upb_encode_enddelim(e); \
+}
+
+  switch (f->type) {
+    case UPB_DESCRIPTOR_TYPE_DOUBLE:
+      return upb_put_fixedarray(e, arr, sizeof(double));
+    case UPB_DESCRIPTOR_TYPE_FLOAT:
+      return upb_put_fixedarray(e, arr, sizeof(float));
+    case UPB_DESCRIPTOR_TYPE_SFIXED64:
+    case UPB_DESCRIPTOR_TYPE_FIXED64:
+      return upb_put_fixedarray(e, arr, sizeof(uint64_t));
+    case UPB_DESCRIPTOR_TYPE_FIXED32:
+    case UPB_DESCRIPTOR_TYPE_SFIXED32:
+      return upb_put_fixedarray(e, arr, sizeof(uint32_t));
+    case UPB_DESCRIPTOR_TYPE_INT64:
+    case UPB_DESCRIPTOR_TYPE_UINT64:
+      VARINT_CASE(uint64_t, *data);
+    case UPB_DESCRIPTOR_TYPE_UINT32:
+    case UPB_DESCRIPTOR_TYPE_INT32:
+    case UPB_DESCRIPTOR_TYPE_ENUM:
+      VARINT_CASE(uint32_t, *data);
+    case UPB_DESCRIPTOR_TYPE_BOOL:
+      VARINT_CASE(bool, *data);
+    case UPB_DESCRIPTOR_TYPE_SINT32:
+      VARINT_CASE(int32_t, upb_zzenc_32(*data));
+    case UPB_DESCRIPTOR_TYPE_SINT64:
+      VARINT_CASE(int64_t, upb_zzenc_64(*data));
+    case UPB_DESCRIPTOR_TYPE_STRING:
+    case UPB_DESCRIPTOR_TYPE_BYTES: {
+      upb_stringview *data = arr->data;
+      upb_stringview *limit = data + arr->len;
+      goto put_string_data;  /* Skip first tag, we already put it. */
+      for (; data < limit; data++) {
+        if (!upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)) {
+          return false;
+        }
+put_string_data:
+        if (!upb_put_varint(e, data->size) ||
+            !upb_put_bytes(e, data->data, data->size)) {
+          return false;
+        }
+      }
+    }
+    case UPB_DESCRIPTOR_TYPE_GROUP:
+    case UPB_DESCRIPTOR_TYPE_MESSAGE: {
+      void **data = arr->data;
+      void **limit = data + arr->len;
+      const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index];
+      goto put_submsg_data;  /* Skip first tag, we already put it. */
+      for (; data < limit; data++) {
+        if (!upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)) {
+          return false;
+        }
+put_submsg_data:
+        if (!upb_encode_startdelim(e) ||
+            !upb_encode_message(e, *data, subm) ||
+            !upb_encode_enddelim(e)) {
+          return false;
+        }
+      }
+    }
+  }
+  UPB_UNREACHABLE();
+#undef VARINT_CASE
+}
+
+static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem,
+                                   const upb_msglayout_msginit_v1 *m,
+                                   const upb_msglayout_fieldinit_v1 *f,
+                                   bool is_proto3) {
+#define CASE(ctype, type, wire_type, encodeval) { \
+  ctype val = *(ctype*)field_mem; \
+  if (is_proto3 && val == 0) { \
+    return true; \
+  } \
+  return upb_put_tag(e, f->number, wire_type) && \
+      upb_put_ ## type(e, encodeval); \
+}
+
+  switch (f->type) {
+    case UPB_DESCRIPTOR_TYPE_DOUBLE:
+      CASE(double, double, UPB_WIRE_TYPE_64BIT, val)
+    case UPB_DESCRIPTOR_TYPE_FLOAT:
+      CASE(float, float, UPB_WIRE_TYPE_32BIT, val)
+    case UPB_DESCRIPTOR_TYPE_INT64:
+    case UPB_DESCRIPTOR_TYPE_UINT64:
+      CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val)
+    case UPB_DESCRIPTOR_TYPE_UINT32:
+    case UPB_DESCRIPTOR_TYPE_INT32:
+    case UPB_DESCRIPTOR_TYPE_ENUM:
+      CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val)
+    case UPB_DESCRIPTOR_TYPE_SFIXED64:
+    case UPB_DESCRIPTOR_TYPE_FIXED64:
+      CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val)
+    case UPB_DESCRIPTOR_TYPE_FIXED32:
+    case UPB_DESCRIPTOR_TYPE_SFIXED32:
+      CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val)
+    case UPB_DESCRIPTOR_TYPE_BOOL:
+      CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val)
+    case UPB_DESCRIPTOR_TYPE_SINT32:
+      CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzenc_32(val))
+    case UPB_DESCRIPTOR_TYPE_SINT64:
+      CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzenc_64(val))
+    case UPB_DESCRIPTOR_TYPE_STRING:
+    case UPB_DESCRIPTOR_TYPE_BYTES: {
+      upb_stringview view = *(upb_stringview*)field_mem;
+      if (is_proto3 && view.size == 0) {
+        return true;
+      }
+      return upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED) &&
+          upb_put_varint(e, view.size) &&
+          upb_put_bytes(e, view.data, view.size);
+    }
+    case UPB_DESCRIPTOR_TYPE_GROUP:
+    case UPB_DESCRIPTOR_TYPE_MESSAGE: {
+      void *submsg = *(void**)field_mem;
+      if (is_proto3 && submsg == NULL) {
+        return true;
+      }
+      return upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED) &&
+          upb_encode_startdelim(e) &&
+          upb_encode_message(e, submsg, m->submsgs[f->submsg_index]) &&
+          upb_encode_enddelim(e);
+    }
+  }
+#undef CASE
+  UPB_UNREACHABLE();
+}
+
+bool upb_encode_hasscalarfield(const char *msg,
+                               const upb_msglayout_msginit_v1 *m,
+                               const upb_msglayout_fieldinit_v1 *f) {
+  if (f->oneof_index != UPB_NOT_IN_ONEOF) {
+    return upb_readcase(msg, m, f->oneof_index) == f->number;
+  } else if (m->is_proto2) {
+    return upb_readhasbit(msg, f);
+  } else {
+    /* For proto3, we'll test for the field being empty later. */
+    return true;
+  }
+}
+
+bool upb_encode_message(upb_encstate* e, const char *msg,
+                        const upb_msglayout_msginit_v1 *m) {
+  int i;
+  for (i = 0; i < m->field_count; i++) {
+    const upb_msglayout_fieldinit_v1 *f = &m->fields[i];
+
+    if (f->label == UPB_LABEL_REPEATED) {
+      if (!upb_encode_array(e, msg, m, f)) {
+        return NULL;
+      }
+    } else {
+      if (upb_encode_hasscalarfield(msg, m, f) &&
+          !upb_encode_scalarfield(e, msg + f->offset, m, f, !m->is_proto2)) {
+        return NULL;
+      }
+    }
+  }
+
+  return true;
+}
+
+char *upb_encode(const void *msg, const upb_msglayout_msginit_v1 *m,
+                 upb_env *env, size_t *size) {
+  upb_encstate e;
+
+  if (!upb_encode_message(&e, msg, m)) {
+    return false;
+  }
+
+  *size = e.ptr - e.buf;
+  return e.buf;
+}
diff --git a/upb/encode.h b/upb/encode.h
new file mode 100644
index 0000000..83908d4
--- /dev/null
+++ b/upb/encode.h
@@ -0,0 +1,17 @@
+/*
+** upb_encode: parsing into a upb_msg using a upb_msglayout.
+*/
+
+#ifndef UPB_ENCODE_H_
+#define UPB_ENCODE_H_
+
+#include "upb/msg.h"
+
+UPB_BEGIN_EXTERN_C
+
+char *upb_encode(const void *msg, const upb_msglayout_msginit_v1 *l,
+                 upb_env *env, size_t *size);
+
+UPB_END_EXTERN_C
+
+#endif  /* UPB_ENCODE_H_ */
diff --git a/upb/msg.c b/upb/msg.c
index ef39dc0..9d29a39 100644
--- a/upb/msg.c
+++ b/upb/msg.c
@@ -1,5 +1,6 @@
 
 #include "upb/msg.h"
+#include "upb/structs.int.h"
 
 static bool is_power_of_two(size_t val) {
   return (val & (val - 1)) == 0;
@@ -791,15 +792,6 @@ void upb_msg_set(upb_msg *msg, int field_index, upb_msgval val,
 
 /** upb_array *****************************************************************/
 
-struct upb_array {
-  upb_fieldtype_t type;
-  uint8_t element_size;
-  void *data;   /* Each element is element_size. */
-  size_t len;   /* Measured in elements. */
-  size_t size;  /* Measured in elements. */
-  upb_alloc *alloc;
-};
-
 #define DEREF_ARR(arr, i, type) ((type*)arr->data)[i]
 
 size_t upb_array_sizeof(upb_fieldtype_t type) {
diff --git a/upb/msg.h b/upb/msg.h
index ee1e2fb..8024828 100644
--- a/upb/msg.h
+++ b/upb/msg.h
@@ -386,6 +386,7 @@ bool upb_msg_getscalarhandlerdata(const upb_handlers *h,
 /** Interfaces for generated code *********************************************/
 
 #define UPB_NOT_IN_ONEOF UINT16_MAX
+#define UPB_NO_HASBIT UINT16_MAX
 
 typedef struct {
   uint32_t number;
diff --git a/upb/structs.int.h b/upb/structs.int.h
new file mode 100644
index 0000000..242155b
--- /dev/null
+++ b/upb/structs.int.h
@@ -0,0 +1,18 @@
+/*
+** structs.int.h: structures definitions that are internal to upb.
+*/
+
+#ifndef UPB_STRUCTS_H_
+#define UPB_STRUCTS_H_
+
+struct upb_array {
+  upb_fieldtype_t type;
+  uint8_t element_size;
+  void *data;   /* Each element is element_size. */
+  size_t len;   /* Measured in elements. */
+  size_t size;  /* Measured in elements. */
+  upb_alloc *alloc;
+};
+
+#endif  /* UPB_STRUCTS_H_ */
+
diff --git a/upb/upb.h b/upb/upb.h
index a2b79ca..19cd02c 100644
--- a/upb/upb.h
+++ b/upb/upb.h
@@ -34,6 +34,9 @@ template <int N> class InlinedEnvironment;
 #define UPB_INLINE static
 #endif
 
+/* Hints to the compiler about likely/unlikely branches. */
+#define UPB_LIKELY(x) __builtin_expect((x),1)
+
 /* Define UPB_BIG_ENDIAN manually if you're on big endian and your compiler
  * doesn't provide these preprocessor symbols. */
 #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)