From 10265aa56b22ac4f04e7ba08330138e4507534e4 Mon Sep 17 00:00:00 2001
From: Joshua Haberman <joshua@reverberate.org>
Date: Fri, 15 Jul 2011 12:05:43 -0700
Subject: Directory restructure.

Includes are now via upb/foo.h.
Files specific to the protobuf format are
now in upb/pb (the core library is concerned
with message definitions, handlers, and
byte streams, but knows nothing about any
particular serializationf format).
---
 upb/pb/decoder.c            | 469 ++++++++++++++++++++++++++++++
 upb/pb/decoder.h            |  99 +++++++
 upb/pb/decoder_x86.dasc     | 694 ++++++++++++++++++++++++++++++++++++++++++++
 upb/pb/encoder.c            | 421 +++++++++++++++++++++++++++
 upb/pb/encoder.h            |  58 ++++
 upb/pb/glue.c               | 129 ++++++++
 upb/pb/glue.h               |  62 ++++
 upb/pb/jit_debug_elf_file.s |   7 +
 upb/pb/textprinter.c        | 199 +++++++++++++
 upb/pb/textprinter.h        |  31 ++
 upb/pb/varint.c             |  54 ++++
 upb/pb/varint.h             | 142 +++++++++
 12 files changed, 2365 insertions(+)
 create mode 100644 upb/pb/decoder.c
 create mode 100644 upb/pb/decoder.h
 create mode 100644 upb/pb/decoder_x86.dasc
 create mode 100644 upb/pb/encoder.c
 create mode 100644 upb/pb/encoder.h
 create mode 100644 upb/pb/glue.c
 create mode 100644 upb/pb/glue.h
 create mode 100644 upb/pb/jit_debug_elf_file.s
 create mode 100644 upb/pb/textprinter.c
 create mode 100644 upb/pb/textprinter.h
 create mode 100644 upb/pb/varint.c
 create mode 100644 upb/pb/varint.h

(limited to 'upb/pb')

diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
new file mode 100644
index 0000000..218c780
--- /dev/null
+++ b/upb/pb/decoder.c
@@ -0,0 +1,469 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2008-2011 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include "upb/bytestream.h"
+#include "upb/msg.h"
+#include "upb/pb/decoder.h"
+#include "upb/pb/varint.h"
+
+// Used for frames that have no specific end offset: groups, repeated primitive
+// fields inside groups, and the top-level message.
+#define UPB_NONDELIMITED UINT32_MAX
+
+#ifdef UPB_USE_JIT_X64
+#define Dst_DECL upb_decoder *d
+#define Dst_REF (d->dynasm)
+#define Dst (d)
+#include "dynasm/dasm_proto.h"
+#include "upb/pb/decoder_x86.h"
+#endif
+
+// It's unfortunate that we have to micro-manage the compiler this way,
+// especially since this tuning is necessarily specific to one hardware
+// configuration.  But emperically on a Core i7, performance increases 30-50%
+// with these annotations.  Every instance where these appear, gcc 4.2.1 made
+// the wrong decision and degraded performance in benchmarks.
+#define FORCEINLINE static __attribute__((always_inline))
+#define NOINLINE static __attribute__((noinline))
+
+static void upb_decoder_exit(upb_decoder *d) { siglongjmp(d->exitjmp, 1); }
+static void upb_decoder_exit2(void *_d) {
+  upb_decoder *d = _d;
+  upb_decoder_exit(d);
+}
+static void upb_decoder_abort(upb_decoder *d, const char *msg) {
+  upb_status_setf(d->status, UPB_ERROR, msg);
+  upb_decoder_exit(d);
+}
+
+/* Decoding/Buffering of wire types *******************************************/
+
+static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
+static void upb_decoder_advance(upb_decoder *d, size_t len) {
+  assert((size_t)(d->end - d->ptr) >= len);
+  d->ptr += len;
+}
+
+size_t upb_decoder_offset(upb_decoder *d) {
+  size_t offset = d->bufstart_ofs;
+  if (d->ptr) offset += (d->ptr - d->buf);
+  return offset;
+}
+
+static void upb_decoder_setmsgend(upb_decoder *d) {
+  upb_dispatcher_frame *f = d->dispatcher.top;
+  size_t delimlen = f->end_ofs - d->bufstart_ofs;
+  size_t buflen = d->end - d->buf;
+  if (f->end_ofs != UINT64_MAX && delimlen <= buflen) {
+    d->delim_end = (uintptr_t)(d->buf + delimlen);
+  } else {
+    // Buffers must not run up against the end of memory.
+    assert((uintptr_t)d->end < UINTPTR_MAX);
+    d->delim_end = UINTPTR_MAX;
+  }
+}
+
+// Pulls the next buffer from the bytesrc.  Should be called only when the
+// current buffer is completely empty.
+static bool upb_trypullbuf(upb_decoder *d) {
+  assert(upb_decoder_bufleft(d) == 0);
+  if (d->bufend_ofs == d->refend_ofs) {
+    d->refend_ofs += upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status);
+    if (!upb_ok(d->status)) {
+      d->ptr = NULL;
+      d->end = NULL;
+      if (upb_iseof(d->status)) return false;
+      upb_decoder_exit(d);
+    }
+  }
+  d->bufstart_ofs = d->bufend_ofs;
+  size_t len;
+  d->buf = upb_bytesrc_getptr(d->bytesrc, d->bufstart_ofs, &len);
+  assert(len > 0);
+  d->bufend_ofs = d->bufstart_ofs + len;
+  d->ptr = d->buf;
+  d->end = d->buf + len;
+#ifdef UPB_USE_JIT_X64
+  d->jit_end = d->end - 20;
+#endif
+  upb_decoder_setmsgend(d);
+  return true;
+}
+
+static void upb_pullbuf(upb_decoder *d) {
+  if (!upb_trypullbuf(d)) upb_decoder_abort(d, "Unexpected EOF");
+}
+
+void upb_decoder_commit(upb_decoder *d) {
+  d->completed_ptr = d->ptr;
+  if (d->refstart_ofs < d->bufstart_ofs) {
+    // Drop our ref on the previous buf's region.
+    upb_bytesrc_refregion(d->bytesrc, d->bufstart_ofs, d->refend_ofs);
+    upb_bytesrc_unrefregion(d->bytesrc, d->refstart_ofs, d->refend_ofs);
+    d->refstart_ofs = d->bufstart_ofs;
+  }
+}
+
+NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) {
+  uint8_t byte = 0x80;
+  uint64_t u64 = 0;
+  int bitpos;
+  const char *ptr = d->ptr;
+  for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
+    if (upb_decoder_bufleft(d) == 0) {
+      upb_pullbuf(d);
+      ptr = d->ptr;
+    }
+    u64 |= ((uint64_t)(byte = *ptr++) & 0x7F) << bitpos;
+  }
+  if(bitpos == 70 && (byte & 0x80)) upb_decoder_abort(d, "Unterminated varint");
+  return u64;
+}
+
+// For tags and delimited lengths, which must be <=32bit and are usually small.
+FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) {
+  const char *p = d->ptr;
+  uint32_t ret;
+  uint64_t u64;
+  // Nearly all will be either 1 byte (1-16) or 2 bytes (17-2048).
+  if (upb_decoder_bufleft(d) < 2) goto slow;  // unlikely.
+  ret = *p & 0x7f;
+  if ((*(p++) & 0x80) == 0) goto done;  // predictable if fields are in order
+  ret |= (*p & 0x7f) << 7;
+  if ((*(p++) & 0x80) == 0) goto done;  // likely
+slow:
+  u64 = upb_decode_varint_slow(d);
+  if (u64 > 0xffffffff) upb_decoder_abort(d, "Unterminated 32-bit varint");
+  ret = (uint32_t)u64;
+  p = d->ptr;  // Turn the next line into a nop.
+done:
+  upb_decoder_advance(d, p - d->ptr);
+  return ret;
+}
+
+FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) {
+  if (upb_decoder_bufleft(d) == 0) {
+    // Check for our two normal end-of-message conditions.
+    if (d->bufend_ofs == d->end_ofs) return false;
+    if (!upb_trypullbuf(d)) return false;
+  }
+  *val = upb_decode_varint32(d);
+  return true;
+}
+
+FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) {
+  if (upb_decoder_bufleft(d) >= 10) {
+    // Fast case.
+    upb_decoderet r = upb_vdecode_fast(d->ptr);
+    if (r.p == NULL) upb_decoder_abort(d, "Unterminated varint");
+    upb_decoder_advance(d, r.p - d->ptr);
+    return r.val;
+  } else if (upb_decoder_bufleft(d) > 0) {
+    // Intermediate case -- worth it?
+    char tmpbuf[10];
+    memset(tmpbuf, 0x80, 10);
+    memcpy(tmpbuf, d->ptr, upb_decoder_bufleft(d));
+    upb_decoderet r = upb_vdecode_fast(tmpbuf);
+    if (r.p != NULL) {
+      upb_decoder_advance(d, r.p - tmpbuf);
+      return r.val;
+    }
+  }
+  // Slow case -- varint spans buffer seam.
+  return upb_decode_varint_slow(d);
+}
+
+FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
+  if (upb_decoder_bufleft(d) >= bytes) {
+    // Fast case.
+    memcpy(buf, d->ptr, bytes);
+    upb_decoder_advance(d, bytes);
+  } else {
+    // Slow case.
+    size_t read = 0;
+    while (read < bytes) {
+      size_t avail = upb_decoder_bufleft(d);
+      memcpy(buf + read, d->ptr, avail);
+      upb_decoder_advance(d, avail);
+      read += avail;
+    }
+  }
+}
+
+FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) {
+  uint32_t u32;
+  upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t));
+  return u32;  // TODO: proper byte swapping
+}
+FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
+  uint64_t u64;
+  upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t));
+  return u64;  // TODO: proper byte swapping
+}
+
+INLINE upb_strref *upb_decode_string(upb_decoder *d) {
+  uint32_t strlen = upb_decode_varint32(d);
+  d->strref.stream_offset = upb_decoder_offset(d);
+  d->strref.len = strlen;
+  if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d);
+  if (upb_decoder_bufleft(d) >= strlen) {
+    // Fast case.
+    d->strref.ptr = d->ptr;
+    upb_decoder_advance(d, strlen);
+  } else {
+    // Slow case.
+    while (1) {
+      size_t consume = UPB_MIN(upb_decoder_bufleft(d), strlen);
+      upb_decoder_advance(d, consume);
+      strlen -= consume;
+      if (strlen == 0) break;
+      upb_pullbuf(d);
+    }
+  }
+  return &d->strref;
+}
+
+INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) {
+  upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end;
+  upb_decoder_setmsgend(d);
+}
+
+
+/* Decoding of .proto types ***************************************************/
+
+// Technically, we are losing data if we see a 32-bit varint that is not
+// properly sign-extended.  We could detect this and error about the data loss,
+// but proto2 does not do this, so we pass.
+
+#define T(type, wt, valtype, convfunc) \
+  INLINE void upb_decode_ ## type(upb_decoder *d, upb_fhandlers *f) { \
+    upb_value val; \
+    upb_value_set ## valtype(&val, (convfunc)(upb_decode_ ## wt(d))); \
+    upb_dispatch_value(&d->dispatcher, f, val); \
+  } \
+
+static double  upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
+static float   upb_asfloat(uint32_t n)  { float  f; memcpy(&f, &n, 4); return f; }
+static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
+static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
+
+T(INT32,    varint,  int32,  int32_t)
+T(INT64,    varint,  int64,  int64_t)
+T(UINT32,   varint,  uint32, uint32_t)
+T(UINT64,   varint,  uint64, uint64_t)
+T(FIXED32,  fixed32, uint32, uint32_t)
+T(FIXED64,  fixed64, uint64, uint64_t)
+T(SFIXED32, fixed32, int32,  int32_t)
+T(SFIXED64, fixed64, int64,  int64_t)
+T(BOOL,     varint,  bool,   bool)
+T(ENUM,     varint,  int32,  int32_t)
+T(DOUBLE,   fixed64, double, upb_asdouble)
+T(FLOAT,    fixed32, float,  upb_asfloat)
+T(SINT32,   varint,  int32,  upb_zzdec_32)
+T(SINT64,   varint,  int64,  upb_zzdec_64)
+T(STRING,   string,  strref, upb_strref*)
+
+static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) {
+  upb_push(d, f, UPB_NONDELIMITED);
+}
+static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
+  (void)f;
+  upb_dispatch_endsubmsg(&d->dispatcher);
+  upb_decoder_setmsgend(d);
+}
+static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
+  upb_push(d, f, upb_decode_varint32(d) + (d->ptr - d->buf));
+}
+
+
+/* The main decoding loop *****************************************************/
+
+static void upb_decoder_checkdelim(upb_decoder *d) {
+  while ((uintptr_t)d->ptr >= d->delim_end) {
+    if ((uintptr_t)d->ptr > d->delim_end)
+      upb_decoder_abort(d, "Bad submessage end");
+
+    if (d->dispatcher.top->is_sequence) {
+      upb_dispatch_endseq(&d->dispatcher);
+    } else {
+      upb_dispatch_endsubmsg(&d->dispatcher);
+    }
+    upb_decoder_setmsgend(d);
+  }
+}
+
+static void upb_decoder_enterjit(upb_decoder *d) {
+  (void)d;
+#ifdef UPB_USE_JIT_X64
+  if (d->jit_code && d->dispatcher.top == d->dispatcher.stack && d->ptr < d->jit_end) {
+    // Decodes as many fields as possible, updating d->ptr appropriately,
+    // before falling through to the slow(er) path.
+    void (*upb_jit_decode)(upb_decoder *d) = (void*)d->jit_code;
+    upb_jit_decode(d);
+  }
+#endif
+}
+
+INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
+  while (1) {
+    uint32_t tag;
+    if (!upb_trydecode_varint32(d, &tag)) return NULL;
+    upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag);
+
+    // There are no explicit "startseq" or "endseq" markers in protobuf
+    // streams, so we have to infer them by noticing when a repeated field
+    // starts or ends.
+    if (d->dispatcher.top->is_sequence && d->dispatcher.top->f != f) {
+      upb_dispatch_endseq(&d->dispatcher);
+      upb_decoder_setmsgend(d);
+    }
+    if (f && f->repeated && d->dispatcher.top->f != f) {
+      // TODO: support packed.
+      assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) ||
+             (tag & 0x7) != UPB_WIRE_TYPE_DELIMITED);
+      uint32_t end = d->dispatcher.top->end_ofs;
+      upb_dispatch_startseq(&d->dispatcher, f)->end_ofs = end;
+      upb_decoder_setmsgend(d);
+    }
+    if (f) return f;
+
+    // Unknown field.
+    switch (tag & 0x7) {
+      case UPB_WIRE_TYPE_VARINT:    upb_decode_varint(d); break;
+      case UPB_WIRE_TYPE_32BIT:     upb_decoder_advance(d, 4); break;
+      case UPB_WIRE_TYPE_64BIT:     upb_decoder_advance(d, 8); break;
+      case UPB_WIRE_TYPE_DELIMITED:
+        upb_decoder_advance(d, upb_decode_varint32(d)); break;
+      default:
+        upb_decoder_abort(d, "Invavlid wire type");
+    }
+    // TODO: deliver to unknown field callback.
+    upb_decoder_commit(d);
+    upb_decoder_checkdelim(d);
+  }
+}
+
+void upb_decoder_onexit(upb_decoder *d) {
+  if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher);
+  if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) {
+    // Normal end-of-file.
+    upb_status_clear(d->status);
+    upb_dispatch_endmsg(&d->dispatcher, d->status);
+  } else {
+    if (d->status->code == UPB_EOF)
+      upb_status_setf(d->status, UPB_ERROR, "Input ended mid-submessage.");
+  }
+}
+
+void upb_decoder_decode(upb_decoder *d, upb_status *status) {
+  if (sigsetjmp(d->exitjmp, 0)) {
+    upb_decoder_onexit(d);
+    return;
+  }
+  d->status = status;
+  upb_dispatch_startmsg(&d->dispatcher);
+  while(1) { // Main loop: executed once per tag/field pair.
+    upb_decoder_checkdelim(d);
+    upb_decoder_enterjit(d);
+    // if (!d->dispatcher.top->is_packed)
+    upb_fhandlers *f = upb_decode_tag(d);
+    if (!f) upb_decoder_exit2(d);
+    f->decode(d, f);
+    upb_decoder_commit(d);
+  }
+}
+
+static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top,
+                             upb_dispatcher_frame *bottom) {
+  (void)top;
+  (void)bottom;
+  (void)_d;
+#if 0
+  upb_decoder *d = _d;
+  // TODO
+  if (bottom->end_offset == UPB_NONDELIMITED) {
+    // TODO: support skipping groups.
+    abort();
+  }
+  d->ptr = d->buf.ptr + bottom->end_offset;
+#endif
+}
+
+void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
+  upb_dispatcher_init(
+      &d->dispatcher, handlers, upb_decoder_skip, upb_decoder_exit2, d);
+#ifdef UPB_USE_JIT_X64
+  d->jit_code = NULL;
+  if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d);
+#endif
+  // Set function pointers for each field's decode function.
+  for (int i = 0; i < handlers->msgs_len; i++) {
+    upb_mhandlers *m = handlers->msgs[i];
+    for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+        i = upb_inttable_next(&m->fieldtab, i)) {
+      upb_fhandlers *f = upb_inttable_iter_value(i);
+      switch (f->type) {
+        case UPB_TYPE(INT32):    f->decode = &upb_decode_INT32;    break;
+        case UPB_TYPE(INT64):    f->decode = &upb_decode_INT64;    break;
+        case UPB_TYPE(UINT32):   f->decode = &upb_decode_UINT32;   break;
+        case UPB_TYPE(UINT64):   f->decode = &upb_decode_UINT64;   break;
+        case UPB_TYPE(FIXED32):  f->decode = &upb_decode_FIXED32;  break;
+        case UPB_TYPE(FIXED64):  f->decode = &upb_decode_FIXED64;  break;
+        case UPB_TYPE(SFIXED32): f->decode = &upb_decode_SFIXED32; break;
+        case UPB_TYPE(SFIXED64): f->decode = &upb_decode_SFIXED64; break;
+        case UPB_TYPE(BOOL):     f->decode = &upb_decode_BOOL;     break;
+        case UPB_TYPE(ENUM):     f->decode = &upb_decode_ENUM;     break;
+        case UPB_TYPE(DOUBLE):   f->decode = &upb_decode_DOUBLE;   break;
+        case UPB_TYPE(FLOAT):    f->decode = &upb_decode_FLOAT;    break;
+        case UPB_TYPE(SINT32):   f->decode = &upb_decode_SINT32;   break;
+        case UPB_TYPE(SINT64):   f->decode = &upb_decode_SINT64;   break;
+        case UPB_TYPE(STRING):   f->decode = &upb_decode_STRING;   break;
+        case UPB_TYPE(BYTES):    f->decode = &upb_decode_STRING;   break;
+        case UPB_TYPE(GROUP):    f->decode = &upb_decode_GROUP;    break;
+        case UPB_TYPE(MESSAGE):  f->decode = &upb_decode_MESSAGE;  break;
+        case UPB_TYPE_ENDGROUP:  f->decode = &upb_endgroup;        break;
+      }
+    }
+  }
+}
+
+void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m) {
+  upb_handlers *h = upb_handlers_new();
+  upb_accessors_reghandlers(h, m);
+  upb_decoder_initforhandlers(d, h);
+  upb_handlers_unref(h);
+}
+
+void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, uint64_t start_ofs,
+                       uint64_t end_ofs, void *closure) {
+  upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure);
+  f->end_ofs = end_ofs;
+  d->end_ofs = end_ofs;
+  d->refstart_ofs = start_ofs;
+  d->refend_ofs = start_ofs;
+  d->bufstart_ofs = start_ofs;
+  d->bufend_ofs = start_ofs;
+  d->bytesrc = bytesrc;
+  d->buf = NULL;
+  d->ptr = NULL;
+  d->end = NULL;  // Force a buffer pull.
+#ifdef UPB_USE_JIT_X64
+  d->jit_end = NULL;
+#endif
+  d->delim_end = UINTPTR_MAX;  // But don't let end-of-message get triggered.
+  d->strref.bytesrc = bytesrc;
+}
+
+void upb_decoder_uninit(upb_decoder *d) {
+#ifdef UPB_USE_JIT_X64
+  if (d->dispatcher.handlers->should_jit) upb_decoder_freejit(d);
+#endif
+  upb_dispatcher_uninit(&d->dispatcher);
+}
diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h
new file mode 100644
index 0000000..3981359
--- /dev/null
+++ b/upb/pb/decoder.h
@@ -0,0 +1,99 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009-2010 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * upb_decoder implements a high performance, streaming decoder for protobuf
+ * data that works by implementing upb_src and getting its data from a
+ * upb_bytesrc.
+ *
+ * The decoder does not currently support non-blocking I/O, in the sense that
+ * if the bytesrc returns UPB_STATUS_TRYAGAIN it is not possible to resume the
+ * decoder when data becomes available again.  Support for this could be added,
+ * but it would add complexity and perhaps cost efficiency also.
+ */
+
+#ifndef UPB_DECODER_H_
+#define UPB_DECODER_H_
+
+#include <setjmp.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include "upb/handlers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* upb_decoder *****************************************************************/
+
+struct dasm_State;
+
+typedef struct _upb_decoder {
+  upb_bytesrc *bytesrc;       // Source of our serialized data.
+  upb_dispatcher dispatcher;  // Dispatcher to which we push parsed data.
+  upb_status *status;         // Where we will store any errors that occur.
+  upb_strref strref;          // For passing string data to callbacks.
+
+  // Offsets for the region we currently have ref'd.
+  uint64_t refstart_ofs, refend_ofs;
+
+  // Current buffer and its stream offset.
+  const char *buf, *ptr, *end;
+  uint64_t bufstart_ofs, bufend_ofs;
+
+  // Stream offset for the end of the top-level message, if any.
+  uint64_t end_ofs;
+
+  // Buf offset as of which we've delivered calbacks; needed for rollback on
+  // UPB_TRYAGAIN (or in the future, UPB_SUSPEND).
+  const char *completed_ptr;
+
+  // End of the delimited region, relative to ptr, or UINTPTR_MAX if not in
+  // this buf.
+  uintptr_t delim_end;
+
+#ifdef UPB_USE_JIT_X64
+  // For JIT, which doesn't do bounds checks in the middle of parsing a field.
+  const char *jit_end, *effective_end;  // == MIN(jit_end, submsg_end)
+
+  // JIT-generated machine code (else NULL).
+  char *jit_code;
+  size_t jit_size;
+  char *debug_info;
+
+  struct dasm_State *dynasm;
+#endif
+
+  sigjmp_buf exitjmp;
+} upb_decoder;
+
+// Initializes/uninitializes a decoder for calling into the given handlers
+// or to write into the given msgdef, given its accessors).  Takes a ref
+// on the handlers or msgdef.
+void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *h);
+
+// Equivalent to:
+//   upb_accessors_reghandlers(m, h);
+//   upb_decoder_initforhandlers(d, h);
+// except possibly more efficient, by using cached state in the msgdef.
+void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m);
+void upb_decoder_uninit(upb_decoder *d);
+
+// Resets the internal state of an already-allocated decoder.  This puts it in a
+// state where it has not seen any data, and expects the next data to be from
+// the beginning of a new protobuf.  Parsers must be reset before they can be
+// used.  A decoder can be reset multiple times.
+//
+// Pass UINT64_MAX for end_ofs to indicate a non-delimited top-level message.
+void upb_decoder_reset(upb_decoder *d, upb_bytesrc *src, uint64_t start_ofs,
+                       uint64_t end_ofs, void *closure);
+
+void upb_decoder_decode(upb_decoder *d, upb_status *status);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_DECODER_H_ */
diff --git a/upb/pb/decoder_x86.dasc b/upb/pb/decoder_x86.dasc
new file mode 100644
index 0000000..19043c6
--- /dev/null
+++ b/upb/pb/decoder_x86.dasc
@@ -0,0 +1,694 @@
+|//
+|// upb - a minimalist implementation of protocol buffers.
+|//
+|// Copyright (c) 2011 Google Inc.  See LICENSE for details.
+|// Author: Josh Haberman <jhaberman@gmail.com>
+|//
+|// JIT compiler for upb_decoder on x86.  Given a upb_handlers object,
+|// generates code specialized to parsing the specific message and
+|// calling specific handlers.
+
+#define UPB_NONE -1
+#define UPB_MULTIPLE -2
+#define UPB_TOPLEVEL_ONE -3
+
+#include <sys/mman.h>
+#include "dynasm/dasm_proto.h"
+#include "dynasm/dasm_x86.h"
+
+#ifndef MAP_ANONYMOUS
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+// We map into the low 32 bits when we can, but if this is not available
+// (like on OS X) we take what we can get.  It's not required for correctness,
+// it's just a performance thing that makes it more likely that our jumps
+// can be rel32 (i.e. within 32-bits of our pc) instead of the longer
+// sequence required for other jumps (see callp).
+#ifndef MAP_32BIT
+#define MAP_32BIT 0
+#endif
+
+// To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code
+// at runtime.  GDB 7.x+ has defined an interface for doing this, and these
+// structure/function defintions are copied out of gdb/jit.h
+//
+// We need to give GDB an ELF file at runtime describing the symbols we have
+// generated.  To avoid implementing the ELF format, we generate an ELF file
+// at compile-time and compile it in as a character string.  We can replace
+// a few key constants (address of JIT-ted function and its size) by looking
+// for a few magic numbers and doing a dumb string replacement.
+
+#ifndef __APPLE__
+#include "upb/pb/jit_debug_elf_file.h"
+
+typedef enum
+{
+  GDB_JIT_NOACTION = 0,
+  GDB_JIT_REGISTER,
+  GDB_JIT_UNREGISTER
+} jit_actions_t;
+
+typedef struct gdb_jit_entry {
+  struct gdb_jit_entry *next_entry;
+  struct gdb_jit_entry *prev_entry;
+  const char *symfile_addr;
+  uint64_t symfile_size;
+} gdb_jit_entry;
+
+typedef struct {
+  uint32_t version;
+  uint32_t action_flag;
+  gdb_jit_entry *relevant_entry;
+  gdb_jit_entry *first_entry;
+} gdb_jit_descriptor;
+
+gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL};
+
+void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); }
+
+void upb_reg_jit_gdb(upb_decoder *d) {
+  // Create debug info.
+  size_t elf_len = upb_pb_jit_debug_elf_file_o_len;
+  d->debug_info = malloc(elf_len);
+  memcpy(d->debug_info, upb_pb_jit_debug_elf_file_o, elf_len);
+  uint64_t *p = (void*)d->debug_info;
+  for (; (void*)(p+1) <= (void*)d->debug_info + elf_len; ++p) {
+    if (*p == 0x12345678) { *p = (uintptr_t)d->jit_code; }
+    if (*p == 0x321) { *p = d->jit_size; }
+  }
+
+  // Register the JIT-ted code with GDB.
+  gdb_jit_entry *e = malloc(sizeof(gdb_jit_entry));
+  e->next_entry = __jit_debug_descriptor.first_entry;
+  e->prev_entry = NULL;
+  if (e->next_entry) e->next_entry->prev_entry = e;
+  e->symfile_addr = d->debug_info;
+  e->symfile_size = elf_len;
+  __jit_debug_descriptor.first_entry = e;
+  __jit_debug_descriptor.relevant_entry = e;
+  __jit_debug_descriptor.action_flag = GDB_JIT_REGISTER;
+  __jit_debug_register_code();
+}
+
+#else
+
+void upb_reg_jit_gdb(upb_decoder *d) {
+  (void)d;
+}
+
+#endif
+
+|.arch x64
+|.actionlist upb_jit_actionlist
+|.globals UPB_JIT_GLOBAL_
+|.globalnames upb_jit_globalnames
+|
+|// Calling conventions.
+|.define ARG1_64,   rdi
+|.define ARG2_8,    sil
+|.define ARG2_32,   esi
+|.define ARG2_64,   rsi
+|.define ARG3_8,    dl
+|.define ARG3_32,   edx
+|.define ARG3_64,   rdx
+|
+|// Register allocation / type map.
+|// ALL of the code in this file uses these register allocations.
+|// When we "call" within this file, we do not use regular calling
+|// conventions, but of course when calling to user callbacks we must.
+|.define PTR,       rbx
+|.define CLOSURE,   r12
+|.type   FRAME,     upb_dispatcher_frame, r13
+|.type   STRREF,    upb_strref, r14
+|.type   DECODER,   upb_decoder, r15
+|
+|.macro callp, addr
+|| if ((uintptr_t)addr < 0xffffffff) {
+     |  call   &addr
+|| } else {
+     |  mov64  rax, (uintptr_t)addr
+     |  call   rax
+|| }
+|.endmacro
+|
+|// Checks PTR for end-of-buffer.
+|.macro check_eob, m
+|  cmp   PTR, DECODER->effective_end
+|| if (m->is_group) {
+     |  jae  ->exit_jit
+|| } else {
+     |  jae  =>m->jit_endofbuf_pclabel
+|| }
+|.endmacro
+|
+|// Decodes varint from [PTR + offset] -> ARG3.
+|// Saves new pointer as rax.
+|.macro decode_loaded_varint, offset
+|  // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder.
+|  lea    rax, [PTR + offset + 1]
+|  mov    ARG3_32, ecx
+|  and    ARG3_32, 0x7f
+|  test   cl, cl
+|  jns    >9
+|  lea    rax, [PTR + offset + 2]
+|  movzx  esi, ch
+|  and    esi, 0x7f
+|  shl    esi, 7
+|  or     ARG3_32, esi
+|  test   cx, cx
+|  jns    >9
+|  mov    ARG1_64, rax
+|  mov    ARG2_32, ARG3_32
+|  callp  upb_vdecode_max8_fast
+|  test   rax, rax
+|  jz     ->exit_jit   // >10-byte varint.
+|9:
+|.endmacro
+|
+|.macro decode_varint, offset
+|  mov    ecx, dword [PTR + offset]
+|  decode_loaded_varint offset
+|  mov    PTR, rax
+|.endmacro
+|
+|// Decode the tag -> edx.
+|// Could specialize this by avoiding the value masking: could just key the
+|// table on the raw (length-masked) varint to save 3-4 cycles of latency.
+|// Currently only support tables where all entries are in the array part.
+|.macro dyndispatch, m
+|  decode_loaded_varint, 0
+|  mov  ecx, edx
+|  shr  ecx, 3
+|  and  edx, 0x7
+|  cmp  ecx, m->max_field_number  // Bounds-check the field.
+|  ja   ->exit_jit                // In the future; could be unknown label
+|| if ((uintptr_t)m->tablearray < 0xffffffff) {
+|    mov  rax, qword [rcx*8 + m->tablearray]  // TODO: support hybrid array/hash tables.
+|| } else {
+|    mov64  rax, (uintptr_t)m->tablearray
+|    mov  rax, qword [rax + rcx*8]
+|| }
+|  jmp  rax  // Dispatch: unpredictable jump.
+|.endmacro
+|
+|// Push a stack frame (not the CPU stack, the upb_decoder stack).
+|.macro pushframe, f, closure_, end_offset_, is_sequence_
+|  lea   rax, [FRAME + sizeof(upb_dispatcher_frame)]  // rax for shorter addressing.
+|  cmp   rax, qword DECODER->dispatcher.limit
+|  jae   ->exit_jit  // Frame stack overflow.
+|  mov   qword FRAME:rax->f, f
+|  mov   qword FRAME:rax->closure, closure_
+|  mov   dword FRAME:rax->end_ofs, end_offset_
+|  mov   byte FRAME:rax->is_sequence, is_sequence_
+|  mov   CLOSURE, rdx
+|  mov   DECODER->dispatcher.top, rax
+|  mov   FRAME, rax
+|.endmacro
+|
+|.macro popframe
+|  sub   FRAME, sizeof(upb_dispatcher_frame)
+|  mov   DECODER->dispatcher.top, FRAME
+|  setmsgend  m
+|  mov   CLOSURE, FRAME->closure
+|.endmacro
+|
+|.macro setmsgend, m
+|    mov    rsi, DECODER->jit_end
+|| if (m->is_group) {
+|    mov64  rax, 0xffffffffffffffff
+|    mov    qword DECODER->delim_end, rax
+|    mov    DECODER->effective_end, rsi
+|| } else {
+|    // Could store a correctly-biased version in the frame, at the cost of
+|    // a larger stack.
+|    mov    eax, dword FRAME->end_ofs
+|    add    rax, qword DECODER->buf
+|    mov    DECODER->delim_end, rax  // delim_end = d->buf + f->end_ofs
+|    cmp    rax, rsi
+|    jb     >8
+|    mov    rax, rsi                  // effective_end = min(d->delim_end, d->jit_end)
+|8:
+|    mov    DECODER->effective_end, rax
+|| }
+|.endmacro
+|
+|// rax contains the tag, compare it against "tag", but since it is a varint
+|// we must only compare as many bytes as actually have data.
+|.macro checktag, tag
+|| switch (upb_value_size(tag)) {
+||    case 1:
+|       cmp   cl, tag
+||      break;
+||    case 2:
+|       cmp   cx, tag
+||      break;
+||    case 3:
+|       and   ecx, 0xffffff  // 3 bytes
+|       cmp   rcx, tag
+||    case 4:
+|       cmp   ecx, tag
+||      break;
+||    case 5:
+|       mov64 rdx, 0xffffffffff  // 5 bytes
+|       and   rcx, rdx
+|       cmp   rcx, tag
+||      break;
+||    default: abort();
+||  }
+|.endmacro
+|
+|// TODO: optimize for 0 (xor) and 32-bits.
+|.macro loadfval, f
+|| if (f->fval.val.uint64 == 0) {
+|    xor     ARG2_32, ARG2_32
+|| } else if (f->fval.val.uint64 < 0xffffffff) {
+|    mov     ARG2_32, f->fval.val.uint64
+|| } else {
+|    mov64   ARG2_64, f->fval.val.uint64
+|| }
+|.endmacro
+
+#include <stdlib.h>
+#include "upb/pb/varint.h"
+
+// PTR should point to the beginning of the tag.
+static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag,
+                                  upb_mhandlers *m,
+                                  upb_fhandlers *f, upb_fhandlers *next_f) {
+  int tag_size = upb_value_size(tag);
+
+  // PC-label for the dispatch table.
+  // We check the wire type (which must be loaded in edx) because the
+  // table is keyed on field number, not type.
+  |=>f->jit_pclabel:
+  |  cmp  edx, (tag & 0x7)
+  |  jne  ->exit_jit     // In the future: could be an unknown field or packed.
+  |=>f->jit_pclabel_notypecheck:
+  if (f->repeated) {
+    if (f->startseq) {
+      |  mov   ARG1_64, CLOSURE
+      |  loadfval f
+      |  callp f->startseq
+    } else {
+      |  mov   rdx, CLOSURE
+    }
+    |  mov   esi, FRAME->end_ofs
+    |  pushframe  f, rdx, esi, true
+  }
+
+  |1:  // Label for repeating this field.
+
+  // Decode the value into arg 3 for the callback.
+  switch (f->type) {
+    case UPB_TYPE(DOUBLE):
+    case UPB_TYPE(FIXED64):
+    case UPB_TYPE(SFIXED64):
+      |  mov  ARG3_64, qword [PTR + tag_size]
+      |  add  PTR, 8 + tag_size
+      break;
+
+    case UPB_TYPE(FLOAT):
+    case UPB_TYPE(FIXED32):
+    case UPB_TYPE(SFIXED32):
+      |  mov  ARG3_32, dword [PTR + tag_size]
+      |  add  PTR, 4 + tag_size
+      break;
+
+    case UPB_TYPE(BOOL):
+      // Can't assume it's one byte long, because bool must be wire-compatible
+      // with all of the varint integer types.
+      |  decode_varint  tag_size
+      |  test  ARG3_64, ARG3_64
+      |  setne ARG3_8   // Other bytes left with val, should be ok.
+      break;
+
+    case UPB_TYPE(INT64):
+    case UPB_TYPE(UINT64):
+    case UPB_TYPE(INT32):
+    case UPB_TYPE(UINT32):
+    case UPB_TYPE(ENUM):
+      |  decode_varint  tag_size
+      break;
+
+    case UPB_TYPE(SINT64):
+      // 64-bit zig-zag decoding.
+      |  decode_varint  tag_size
+      |  mov  rax, ARG3_64
+      |  shr  ARG3_64, 1
+      |  and  rax, 1
+      |  neg  rax
+      |  xor  ARG3_64, rax
+      break;
+
+    case UPB_TYPE(SINT32):
+      // 32-bit zig-zag decoding.
+      |  decode_varint  tag_size
+      |  mov  eax, ARG3_32
+      |  shr  ARG3_32, 1
+      |  and  eax, 1
+      |  neg  eax
+      |  xor  ARG3_32, eax
+      break;
+
+    case UPB_TYPE(STRING):
+    case UPB_TYPE(BYTES):
+      // We only handle the case where the entire string is in our current
+      // buf, which sidesteps any security problems.  The C path has more
+      // robust checks.
+      |  decode_varint  tag_size
+      |  mov  STRREF->len, ARG3_32
+      |  mov  STRREF->ptr, PTR
+      |  mov  rax, PTR
+      |  sub  rax, DECODER->buf
+      |  add  eax, DECODER->bufstart_ofs  // = d->ptr - d->buf + d->bufstart_ofs
+      |  mov  STRREF->stream_offset, eax
+      |  add  PTR, ARG3_64
+      |  mov  ARG3_64, STRREF
+      |  cmp  PTR, DECODER->effective_end
+      |  ja   ->exit_jit   // Can't deliver, whole string not in buf.
+      break;
+
+    case UPB_TYPE_ENDGROUP:  // A pseudo-type.
+      |  add  PTR, tag_size
+      |  mov  DECODER->ptr, PTR
+      |  jmp  =>m->jit_endofmsg_pclabel
+      return;
+
+    // Will dispatch callbacks and call submessage in a second.
+    case UPB_TYPE(MESSAGE):
+      |  decode_varint  tag_size
+      break;
+    case UPB_TYPE(GROUP):
+      |  add  PTR, tag_size
+      break;
+
+    default: abort();
+  }
+  // Commit our work by advancing ptr.
+  // (If in the future we wanted to support a UPB_SUSPEND_AGAIN that
+  // suspends the decoder and redelivers the value later, we would
+  // need to adjust this to happen perhaps after the callback ran).
+  |  mov   DECODER->ptr, PTR
+
+  // Load closure and fval into arg registers.
+  |  mov   ARG1_64, CLOSURE
+  |  loadfval f
+
+  // Call callbacks.
+  if (upb_issubmsgtype(f->type)) {
+    // Call startsubmsg handler (if any).
+    if (f->startsubmsg) {
+      // upb_sflow_t startsubmsg(void *closure, upb_value fval)
+      |  mov   r12d, ARG3_32
+      |  callp f->startsubmsg
+    } else {
+      |  mov   rdx, CLOSURE
+      |  mov   r12d, ARG3_32
+    }
+    if (f->type == UPB_TYPE(MESSAGE)) {
+      |   mov   rsi, PTR
+      |   sub   rsi, DECODER->buf
+      |   add   esi, r12d   // = (d->ptr - d->buf) + delim_len
+    } else {
+      assert(f->type == UPB_TYPE(GROUP));
+      |   mov   esi, UPB_NONDELIMITED
+    }
+    |  pushframe  f, rdx, esi, false
+
+    upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
+    if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) {
+      |  jmp   =>sub_m->jit_startmsg_pclabel;
+    } else {
+      |  call  =>sub_m->jit_startmsg_pclabel;
+    }
+
+    |=>f->jit_submsg_done_pclabel:
+    |   popframe
+
+    // Call endsubmsg handler (if any).
+    if (f->endsubmsg) {
+      // upb_flow_t endsubmsg(void *closure, upb_value fval);
+      |  mov   ARG1_64, CLOSURE
+      |  loadfval  f
+      |  callp f->endsubmsg
+    }
+  } else {
+    |  callp  f->value
+  }
+  // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+
+  // Epilogue: load next tag, check for repeated field.
+  |  check_eob   m
+  |  mov         rcx, qword [PTR]
+  if (f->repeated) {
+    |  checktag  tag
+    |  je  <1
+    |  popframe
+    if (f->endseq) {
+      |  mov   ARG1_64, CLOSURE
+      |  loadfval f
+      |  callp f->endseq
+    }
+  }
+  if (next_tag != 0) {
+    |  checktag  next_tag
+    |  je  =>next_f->jit_pclabel_notypecheck
+  }
+
+  // Fall back to dynamic dispatch.  Replicate the dispatch
+  // here so we can learn what fields generally follow others.
+  |  dyndispatch  m
+  |1:
+}
+
+static int upb_compare_uint32(const void *a, const void *b) {
+  // TODO: always put ENDGROUP at the end.
+  return *(uint32_t*)a - *(uint32_t*)b;
+}
+
+static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
+  |=>m->jit_startmsg_pclabel:
+  // Call startmsg handler (if any):
+  if (m->startmsg) {
+    // upb_flow_t startmsg(void *closure);
+    |  mov   ARG1_64, FRAME->closure
+    |  callp m->startmsg
+    // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+  }
+
+  |  setmsgend  m
+  |  check_eob   m
+  |  mov    ecx, dword [PTR]
+  |  dyndispatch m
+
+  // --------- New code section (does not fall through) ------------------------
+
+  // Emit code for parsing each field (dynamic dispatch contains pointers to
+  // all of these).
+
+  // Create an ordering over the fields (inttable ordering is undefined).
+  int num_keys = upb_inttable_count(&m->fieldtab);
+  uint32_t *keys = malloc(num_keys * sizeof(*keys));
+  int idx = 0;
+  for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+      i = upb_inttable_next(&m->fieldtab, i)) {
+    keys[idx++] = upb_inttable_iter_key(i);
+  }
+  qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
+
+  upb_fhandlers *last_f = NULL;
+  uint32_t last_tag = 0;
+  for(int i = 0; i < num_keys; i++) {
+    uint32_t key = keys[i];
+    upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, key);
+    uint32_t tag = upb_vencode32(key);
+    if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f);
+    last_tag = tag;
+    last_f = f;
+  }
+  upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL);
+
+  free(keys);
+
+  // --------- New code section (does not fall through) ------------------------
+
+  // End-of-buf / end-of-message.
+  if (!m->is_group) {
+    // This case doesn't exist for groups, because there eob really means
+    // eob, so that case just exits the jit directly.
+    |=>m->jit_endofbuf_pclabel:
+    |  cmp  PTR, DECODER->delim_end
+    |  jb   ->exit_jit    // We are at eob, but not end-of-submsg.
+  }
+
+  |=>m->jit_endofmsg_pclabel:
+  // We are at end-of-submsg: call endmsg handler (if any):
+  if (m->endmsg) {
+    // void endmsg(void *closure, upb_status *status) {
+    |  mov   ARG1_64, FRAME->closure
+    |  lea   ARG2_64, DECODER->dispatcher.status
+    |  callp m->endmsg
+  }
+
+  if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
+    |  ret
+  } else if (m->jit_parent_field_done_pclabel == UPB_TOPLEVEL_ONE) {
+    |  jmp  ->exit_jit
+  } else {
+    |  jmp  =>m->jit_parent_field_done_pclabel
+  }
+
+}
+
+static const char *dbgfmt =
+    "JIT encountered unknown field!  wt=%d, fn=%d\n";
+
+static void upb_decoder_jit(upb_decoder *d) {
+  |  push  rbp
+  |  mov   rbp, rsp
+  |  push  r15
+  |  push  r14
+  |  push  r13
+  |  push  r12
+  |  push  rbx
+  |  mov   DECODER, ARG1_64
+  |  mov   FRAME, DECODER:ARG1_64->dispatcher.top
+  |  lea   STRREF, DECODER:ARG1_64->strref
+  |  mov   CLOSURE, FRAME->closure
+  |  mov   PTR, DECODER->ptr
+
+  upb_handlers *h = d->dispatcher.handlers;
+  if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
+    |  call  =>h->msgs[0]->jit_startmsg_pclabel
+    |  jmp   ->exit_jit
+  }
+
+  // TODO: push return addresses for re-entry (will be necessary for multiple
+  // buffer support).
+  for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_msg(d, h->msgs[i]);
+
+  |->exit_jit:
+  |  pop   rbx
+  |  pop   r12
+  |  pop   r13
+  |  pop   r14
+  |  pop   r15
+  |  leave
+  |  ret
+  |=>0:
+  |  mov rdi, stderr
+  |  mov rsi, dbgfmt
+  |  callp  fprintf
+  |  callp  abort
+}
+
+void upb_decoder_jit_assignfieldlabs(upb_fhandlers *f,
+                                     uint32_t *pclabel_count) {
+  f->jit_pclabel = (*pclabel_count)++;
+  f->jit_pclabel_notypecheck = (*pclabel_count)++;
+  f->jit_submsg_done_pclabel = (*pclabel_count)++;
+}
+
+void upb_decoder_jit_assignmsglabs(upb_mhandlers *m, uint32_t *pclabel_count) {
+  m->jit_startmsg_pclabel = (*pclabel_count)++;
+  m->jit_endofbuf_pclabel = (*pclabel_count)++;
+  m->jit_endofmsg_pclabel = (*pclabel_count)++;
+  m->jit_unknownfield_pclabel = (*pclabel_count)++;
+  m->jit_parent_field_done_pclabel = UPB_NONE;
+  m->max_field_number = 0;
+  upb_inttable_iter i;
+  for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+      i = upb_inttable_next(&m->fieldtab, i)) {
+    uint32_t key = upb_inttable_iter_key(i);
+    m->max_field_number = UPB_MAX(m->max_field_number, key);
+    upb_fhandlers *f = upb_inttable_iter_value(i);
+    upb_decoder_jit_assignfieldlabs(f, pclabel_count);
+  }
+  // XXX: Won't work for large field numbers; will need to use a upb_table.
+  m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*));
+}
+
+// Second pass: for messages that have only one parent, link them to the field
+// from which they are called.
+void upb_decoder_jit_assignmsglabs2(upb_mhandlers *m) {
+  upb_inttable_iter i;
+  for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+      i = upb_inttable_next(&m->fieldtab, i)) {
+    upb_fhandlers *f = upb_inttable_iter_value(i);
+    if (upb_issubmsgtype(f->type)) {
+      upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
+      if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) {
+        sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel;
+      } else {
+        sub_m->jit_parent_field_done_pclabel = UPB_MULTIPLE;
+      }
+    }
+  }
+}
+
+void upb_decoder_makejit(upb_decoder *d) {
+  d->debug_info = NULL;
+
+  // Assign pclabels.
+  uint32_t pclabel_count = 1;
+  upb_handlers *h = d->dispatcher.handlers;
+  for (int i = 0; i < h->msgs_len; i++)
+    upb_decoder_jit_assignmsglabs(h->msgs[i], &pclabel_count);
+  for (int i = 0; i < h->msgs_len; i++)
+    upb_decoder_jit_assignmsglabs2(h->msgs[i]);
+
+  if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_NONE) {
+    h->msgs[0]->jit_parent_field_done_pclabel = UPB_TOPLEVEL_ONE;
+  }
+
+  void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals));
+  dasm_init(d, 1);
+  dasm_setupglobal(d, globals, UPB_JIT_GLOBAL__MAX);
+  dasm_growpc(d, pclabel_count);
+  dasm_setup(d, upb_jit_actionlist);
+
+  upb_decoder_jit(d);
+
+  dasm_link(d, &d->jit_size);
+
+  d->jit_code = mmap(NULL, d->jit_size, PROT_READ | PROT_WRITE,
+                     MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+
+  upb_reg_jit_gdb(d);
+
+  dasm_encode(d, d->jit_code);
+
+  // Create dispatch tables.
+  for (int i = 0; i < h->msgs_len; i++) {
+    upb_mhandlers *m = h->msgs[i];
+    for (uint32_t j = 0; j <= m->max_field_number; j++) {
+      upb_fhandlers *f = NULL;
+      for (int k = 0; k < 8; k++) {
+        f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k);
+        if (f) break;
+      }
+      if (f) {
+        m->tablearray[j] = d->jit_code + dasm_getpclabel(d, f->jit_pclabel);
+      } else {
+        // Don't handle unknown fields yet.
+        m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0);
+      }
+    }
+  }
+
+  dasm_free(d);
+  free(globals);
+
+  mprotect(d->jit_code, d->jit_size, PROT_EXEC | PROT_READ);
+
+  FILE *f = fopen("/tmp/machine-code", "wb");
+  fwrite(d->jit_code, d->jit_size, 1, f);
+  fclose(f);
+}
+
+void upb_decoder_freejit(upb_decoder *d) {
+  munmap(d->jit_code, d->jit_size);
+  free(d->debug_info);
+  // TODO: unregister
+}
diff --git a/upb/pb/encoder.c b/upb/pb/encoder.c
new file mode 100644
index 0000000..139dc88
--- /dev/null
+++ b/upb/pb/encoder.c
@@ -0,0 +1,421 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include "upb_encoder.h"
+
+#include <stdlib.h>
+#include "descriptor.h"
+
+/* Functions for calculating sizes of wire values. ****************************/
+
+static size_t upb_v_uint64_t_size(uint64_t val) {
+#ifdef __GNUC__
+  int high_bit = 63 - __builtin_clzll(val);  // 0-based, undef if val == 0.
+#else
+  int high_bit = 0;
+  uint64_t tmp = val;
+  while(tmp >>= 1) high_bit++;
+#endif
+  return val == 0 ? 1 : high_bit / 7 + 1;
+}
+
+static size_t upb_v_int32_t_size(int32_t val) {
+  // v_uint32's are sign-extended to maintain wire compatibility with int64s.
+  return upb_v_uint64_t_size((int64_t)val);
+}
+static size_t upb_v_uint32_t_size(uint32_t val) {
+  return upb_v_uint64_t_size(val);
+}
+static size_t upb_f_uint64_t_size(uint64_t val) {
+  (void)val;  // Length is independent of value.
+  return sizeof(uint64_t);
+}
+static size_t upb_f_uint32_t_size(uint32_t val) {
+  (void)val;  // Length is independent of value.
+  return sizeof(uint32_t);
+}
+
+
+/* Functions to write wire values. ********************************************/
+
+// Since we know in advance the longest that the value could be, we always make
+// sure that our buffer is long enough.  This saves us from having to perform
+// bounds checks.
+
+// Puts a varint (wire type: UPB_WIRE_TYPE_VARINT).
+static uint8_t *upb_put_v_uint64_t(uint8_t *buf, uint64_t val)
+{
+  do {
+    uint8_t byte = val & 0x7f;
+    val >>= 7;
+    if(val) byte |= 0x80;
+    *buf++ = byte;
+  } while(val);
+  return buf;
+}
+
+// Puts an unsigned 32-bit varint, verbatim.  Never uses the high 64 bits.
+static uint8_t *upb_put_v_uint32_t(uint8_t *buf, uint32_t val)
+{
+  return upb_put_v_uint64_t(buf, val);
+}
+
+// Puts a signed 32-bit varint, first sign-extending to 64-bits.  We do this to
+// maintain wire-compatibility with 64-bit signed integers.
+static uint8_t *upb_put_v_int32_t(uint8_t *buf, int32_t val)
+{
+  return upb_put_v_uint64_t(buf, (int64_t)val);
+}
+
+static void upb_put32(uint8_t *buf, uint32_t val) {
+  buf[0] = val & 0xff;
+  buf[1] = (val >> 8) & 0xff;
+  buf[2] = (val >> 16) & 0xff;
+  buf[3] = (val >> 24);
+}
+
+// Puts a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT).
+static uint8_t *upb_put_f_uint32_t(uint8_t *buf, uint32_t val)
+{
+  uint8_t *uint32_end = buf + sizeof(uint32_t);
+#if UPB_UNALIGNED_READS_OK
+  *(uint32_t*)buf = val;
+#else
+  upb_put32(buf, val);
+#endif
+  return uint32_end;
+}
+
+// Puts a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT).
+static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val)
+{
+  uint8_t *uint64_end = buf + sizeof(uint64_t);
+#if UPB_UNALIGNED_READS_OK
+  *(uint64_t*)buf = val;
+#else
+  upb_put32(buf, (uint32_t)val);
+  upb_put32(buf, (uint32_t)(val >> 32));
+#endif
+  return uint64_end;
+}
+
+/* Functions to write and calculate sizes for .proto values. ******************/
+
+// Performs zig-zag encoding, which is used by sint32 and sint64.
+static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
+static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
+
+/* Use macros to define a set of two functions for each .proto type:
+ *
+ *  // Converts and writes a .proto value into buf.  "end" indicates the end
+ *  // of the current available buffer (if the buffer does not contain enough
+ *  // space UPB_STATUS_NEED_MORE_DATA is returned).  On success, *outbuf will
+ *  // point one past the data that was written.
+ *  uint8_t *upb_put_INT32(uint8_t *buf, int32_t val);
+ *
+ *  // Returns the number of bytes required to encode val.
+ *  size_t upb_get_INT32_size(int32_t val);
+ *
+ *  // Given a .proto value s (source) convert it to a wire value.
+ *  uint32_t upb_vtowv_INT32(int32_t s);
+ */
+
+#define VTOWV(type, wire_t, val_t) \
+  static wire_t upb_vtowv_ ## type(val_t s)
+
+#define PUT(type, v_or_f, wire_t, val_t, member_name) \
+  static uint8_t *upb_put_ ## type(uint8_t *buf, val_t val) { \
+    wire_t tmp = upb_vtowv_ ## type(val); \
+    return upb_put_ ## v_or_f ## _ ## wire_t(buf, tmp); \
+  }
+
+#define T(type, v_or_f, wire_t, val_t, member_name) \
+  static size_t upb_get_ ## type ## _size(val_t val) { \
+    return upb_ ## v_or_f ## _ ## wire_t ## _size(val); \
+  } \
+  VTOWV(type, wire_t, val_t);  /* prototype for PUT below */ \
+  PUT(type, v_or_f, wire_t, val_t, member_name) \
+  VTOWV(type, wire_t, val_t)
+
+T(INT32,    v,  int32_t, int32_t,  int32)   { return (uint32_t)s;     }
+T(INT64,    v, uint64_t, int64_t,  int64)   { return (uint64_t)s;     }
+T(UINT32,   v, uint32_t, uint32_t, uint32)  { return s;               }
+T(UINT64,   v, uint64_t, uint64_t, uint64)  { return s;               }
+T(SINT32,   v, uint32_t, int32_t,  int32)   { return upb_zzenc_32(s); }
+T(SINT64,   v, uint64_t, int64_t,  int64)   { return upb_zzenc_64(s); }
+T(FIXED32,  f, uint32_t, uint32_t, uint32)  { return s;               }
+T(FIXED64,  f, uint64_t, uint64_t, uint64)  { return s;               }
+T(SFIXED32, f, uint32_t, int32_t,  int32)   { return (uint32_t)s;     }
+T(SFIXED64, f, uint64_t, int64_t,  int64)   { return (uint64_t)s;     }
+T(BOOL,     v, uint32_t, bool,     _bool)   { return (uint32_t)s;     }
+T(ENUM,     v, uint32_t, int32_t,  int32)   { return (uint32_t)s;     }
+T(DOUBLE,   f, uint64_t, double,   _double) {
+  upb_value v;
+  v._double = s;
+  return v.uint64;
+}
+T(FLOAT,    f, uint32_t, float,    _float)  {
+  upb_value v;
+  v._float = s;
+  return v.uint32;
+}
+#undef VTOWV
+#undef PUT
+#undef T
+
+static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v)
+{
+#define CASE(t, member_name) \
+  case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name);
+  switch(ft) {
+    CASE(DOUBLE,   _double)
+    CASE(FLOAT,    _float)
+    CASE(INT32,    int32)
+    CASE(INT64,    int64)
+    CASE(UINT32,   uint32)
+    CASE(UINT64,   uint64)
+    CASE(SINT32,   int32)
+    CASE(SINT64,   int64)
+    CASE(FIXED32,  uint32)
+    CASE(FIXED64,  uint64)
+    CASE(SFIXED32, int32)
+    CASE(SFIXED64, int64)
+    CASE(BOOL,     _bool)
+    CASE(ENUM,     int32)
+    default: assert(false); return buf;
+  }
+#undef CASE
+}
+
+static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v)
+{
+#define CASE(t, member_name) \
+  case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name);
+  switch(ft) {
+    CASE(DOUBLE,   _double)
+    CASE(FLOAT,    _float)
+    CASE(INT32,    int32)
+    CASE(INT64,    int64)
+    CASE(UINT32,   uint32)
+    CASE(UINT64,   uint64)
+    CASE(SINT32,   int32)
+    CASE(SINT64,   int64)
+    CASE(FIXED32,  uint32)
+    CASE(FIXED64,  uint64)
+    CASE(SFIXED32, int32)
+    CASE(SFIXED64, int64)
+    CASE(BOOL,     _bool)
+    CASE(ENUM,     int32)
+    default: assert(false); return 0;
+  }
+#undef CASE
+}
+
+static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num,
+                             upb_wire_type_t wt)
+{
+  return upb_put_UINT32(buf, wt | (num << 3));
+}
+
+static uint32_t _upb_get_tag_size(upb_field_number_t num)
+{
+  return upb_get_UINT32_size(num << 3);
+}
+
+
+/* upb_sizebuilder ************************************************************/
+
+struct upb_sizebuilder {
+  // Accumulating size for the current level.
+  uint32_t size;
+
+  // Stack of sizes for our current nesting.
+  uint32_t stack[UPB_MAX_NESTING], *top;
+
+  // Vector of sizes.
+  uint32_t *sizes;
+  int sizes_len;
+  int sizes_size;
+
+  upb_status status;
+};
+
+// upb_sink callbacks.
+static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f,
+                                                upb_value val,
+                                                upb_status *status)
+{
+  (void)status;
+  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+  uint32_t size = 0;
+  size += _upb_get_tag_size(f->number);
+  size += _upb_get_value_size(f->type, val);
+  sb->size += size;
+  return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f,
+                                              upb_strptr str,
+                                              int32_t start, uint32_t end,
+                                              upb_status *status)
+{
+  (void)status;
+  (void)str;   // String data itself is not used.
+  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+  if(start >= 0) {
+    uint32_t size = 0;
+    size += _upb_get_tag_size(f->number);
+    size += upb_get_UINT32_size(end - start);
+    sb->size += size;
+  }
+  return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f,
+                                                upb_status *status)
+{
+  (void)status;
+  (void)f;  // Unused (we calculate tag size and delimiter in endcb).
+  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+  if(f->type == UPB_TYPE(MESSAGE)) {
+    *sb->top = sb->size;
+    sb->top++;
+    sb->size = 0;
+  } else {
+    assert(f->type == UPB_TYPE(GROUP));
+    sb->size += _upb_get_tag_size(f->number);
+  }
+  return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f,
+                                              upb_status *status)
+{
+  (void)status;
+  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+  if(f->type == UPB_TYPE(MESSAGE)) {
+    sb->top--;
+    if(sb->sizes_len == sb->sizes_size) {
+      sb->sizes_size *= 2;
+      sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes));
+    }
+    uint32_t child_size = sb->size;
+    uint32_t parent_size = *sb->top;
+    sb->sizes[sb->sizes_len++] = child_size;
+    // The size according to the parent includes the tag size and delimiter of
+    // the submessage.
+    parent_size += upb_get_UINT32_size(child_size);
+    parent_size += _upb_get_tag_size(f->number);
+    // Include size accumulated in parent before child began.
+    sb->size = child_size + parent_size;
+  } else {
+    assert(f->type == UPB_TYPE(GROUP));
+    // As an optimization, we could just add this number twice in startcb, to
+    // avoid having to recalculate it.
+    sb->size += _upb_get_tag_size(f->number);
+  }
+  return UPB_SINK_CONTINUE;
+}
+
+upb_sink_callbacks _upb_sizebuilder_sink_vtbl = {
+  _upb_sizebuilder_valuecb,
+  _upb_sizebuilder_strcb,
+  _upb_sizebuilder_startcb,
+  _upb_sizebuilder_endcb
+};
+
+
+/* upb_sink callbacks *********************************************************/
+
+struct upb_encoder {
+  upb_sink base;
+  //upb_bytesink *bytesink;
+  uint32_t *sizes;
+  int size_offset;
+};
+
+
+// Within one callback we may need to encode up to two separate values.
+#define UPB_ENCODER_BUFSIZE (UPB_MAX_ENCODED_SIZE * 2)
+
+static upb_sink_status _upb_encoder_push_buf(upb_encoder *s, const uint8_t *buf,
+                                             size_t len, upb_status *status)
+{
+  // TODO: conjure a upb_strptr that points to buf.
+  //upb_strptr ptr;
+  (void)s;
+  (void)buf;
+  (void)status;
+  size_t written = 5;// = upb_bytesink_onbytes(s->bytesink, ptr);
+  if(written < len) {
+    // TODO: mark to skip "written" bytes next time.
+    return UPB_SINK_STOP;
+  } else {
+    return UPB_SINK_CONTINUE;
+  }
+}
+
+static upb_sink_status _upb_encoder_valuecb(upb_sink *sink, upb_fielddef *f,
+                                            upb_value val, upb_status *status)
+{
+  upb_encoder *s = (upb_encoder*)sink;
+  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+  upb_wire_type_t wt = upb_types[f->type].expected_wire_type;
+  // TODO: handle packed encoding.
+  ptr = _upb_put_tag(ptr, f->number, wt);
+  ptr = upb_encode_value(ptr, f->type, val);
+  return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+}
+
+static upb_sink_status _upb_encoder_strcb(upb_sink *sink, upb_fielddef *f,
+                                          upb_strptr str,
+                                          int32_t start, uint32_t end,
+                                          upb_status *status)
+{
+  upb_encoder *s = (upb_encoder*)sink;
+  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+  if(start >= 0) {
+    ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED);
+    ptr = upb_put_UINT32(ptr, end - start);
+  }
+  // TODO: properly handle partially consumed strings and partially supplied
+  // strings.
+  _upb_encoder_push_buf(s, buf, ptr - buf, status);
+  return _upb_encoder_push_buf(s, (uint8_t*)upb_string_getrobuf(str), end - start, status);
+}
+
+static upb_sink_status _upb_encoder_startcb(upb_sink *sink, upb_fielddef *f,
+                                            upb_status *status)
+{
+  upb_encoder *s = (upb_encoder*)sink;
+  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+  if(f->type == UPB_TYPE(GROUP)) {
+    ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_START_GROUP);
+  } else {
+    ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED);
+    ptr = upb_put_UINT32(ptr, s->sizes[--s->size_offset]);
+  }
+  return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+}
+
+static upb_sink_status _upb_encoder_endcb(upb_sink *sink, upb_fielddef *f,
+                                          upb_status *status)
+{
+  upb_encoder *s = (upb_encoder*)sink;
+  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+  if(f->type != UPB_TYPE(GROUP)) return UPB_SINK_CONTINUE;
+  ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_END_GROUP);
+  return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+}
+
+upb_sink_callbacks _upb_encoder_sink_vtbl = {
+  _upb_encoder_valuecb,
+  _upb_encoder_strcb,
+  _upb_encoder_startcb,
+  _upb_encoder_endcb
+};
+
diff --git a/upb/pb/encoder.h b/upb/pb/encoder.h
new file mode 100644
index 0000000..64c5047
--- /dev/null
+++ b/upb/pb/encoder.h
@@ -0,0 +1,58 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009-2010 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * Implements a set of upb_handlers that write protobuf data to the binary wire
+ * format.
+ *
+ * For messages that have any submessages, the encoder needs a buffer
+ * containing the submessage sizes, so they can be properly written at the
+ * front of each message.  Note that groups do *not* have this requirement.
+ */
+
+#ifndef UPB_ENCODER_H_
+#define UPB_ENCODER_H_
+
+#include "upb.h"
+#include "upb_stream.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* upb_encoder ****************************************************************/
+
+// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol
+// buffer binary wire format.
+struct upb_encoder;
+typedef struct upb_encoder upb_encoder;
+
+upb_encoder *upb_encoder_new(upb_msgdef *md);
+void upb_encoder_free(upb_encoder *e);
+
+// Resets the given upb_encoder such that is is ready to begin encoding,
+// outputting data to "bytesink" (which must live until the encoder is
+// reset or destroyed).
+void upb_encoder_reset(upb_encoder *e, upb_bytesink *bytesink);
+
+// Returns the upb_sink to which data can be written.  The sink is invalidated
+// when the encoder is reset or destroyed.  Note that if the client wants to
+// encode any length-delimited submessages it must first call
+// upb_encoder_buildsizes() below.
+upb_sink *upb_encoder_sink(upb_encoder *e);
+
+// Call prior to pushing any data with embedded submessages.  "src" must yield
+// exactly the same data as what will next be encoded, but in reverse order.
+// The encoder iterates over this data in order to determine the sizes of the
+// submessages.  If any errors are returned by the upb_src, the status will
+// be saved in *status.  If the client is sure that the upb_src will not throw
+// any errors, "status" may be NULL.
+void upb_encoder_buildsizes(upb_encoder *e, upb_src *src, upb_status *status);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_ENCODER_H_ */
diff --git a/upb/pb/glue.c b/upb/pb/glue.c
new file mode 100644
index 0000000..3763ae0
--- /dev/null
+++ b/upb/pb/glue.c
@@ -0,0 +1,129 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2010 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include "upb/bytestream.h"
+#include "upb/descriptor.h"
+#include "upb/msg.h"
+#include "upb/pb/decoder.h"
+#include "upb/pb/glue.h"
+#include "upb/pb/textprinter.h"
+
+void upb_strtomsg(const char *str, size_t len, void *msg, upb_msgdef *md,
+                  upb_status *status) {
+  upb_stringsrc strsrc;
+  upb_stringsrc_init(&strsrc);
+  upb_stringsrc_reset(&strsrc, str, len);
+
+  upb_decoder d;
+  upb_decoder_initformsgdef(&d, md);
+  upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, msg);
+  upb_decoder_decode(&d, status);
+
+  upb_stringsrc_uninit(&strsrc);
+  upb_decoder_uninit(&d);
+}
+
+#if 0
+void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
+                   bool single_line) {
+  upb_stringsink strsink;
+  upb_stringsink_init(&strsink);
+  upb_stringsink_reset(&strsink, str);
+
+  upb_textprinter *p = upb_textprinter_new();
+  upb_handlers *h = upb_handlers_new();
+  upb_textprinter_reghandlers(h, md);
+  upb_textprinter_reset(p, upb_stringsink_bytesink(&strsink), single_line);
+
+  upb_status status = UPB_STATUS_INIT;
+  upb_msg_runhandlers(msg, md, h, p, &status);
+  // None of {upb_msg_runhandlers, upb_textprinter, upb_stringsink} should be
+  // capable of returning an error.
+  assert(upb_ok(&status));
+  upb_status_uninit(&status);
+
+  upb_stringsink_uninit(&strsink);
+  upb_textprinter_free(p);
+  upb_handlers_unref(h);
+}
+#endif
+
+// TODO: read->load.
+void upb_read_descriptor(upb_symtab *symtab, const char *str, size_t len,
+                         upb_status *status) {
+  upb_stringsrc strsrc;
+  upb_stringsrc_init(&strsrc);
+  upb_stringsrc_reset(&strsrc, str, len);
+
+  upb_handlers *h = upb_handlers_new();
+  upb_descreader_reghandlers(h);
+
+  upb_decoder d;
+  upb_decoder_initforhandlers(&d, h);
+  upb_handlers_unref(h);
+  upb_descreader r;
+  upb_symtabtxn txn;
+  upb_symtabtxn_init(&txn);
+  upb_descreader_init(&r, &txn);
+  upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, &r);
+
+  upb_decoder_decode(&d, status);
+
+  // Set default accessors and layouts on all messages.
+  // for msgdef in symtabtxn:
+  upb_symtabtxn_iter i;
+  upb_symtabtxn_begin(&i, &txn);
+  for(; !upb_symtabtxn_done(&i); upb_symtabtxn_next(&i)) {
+    upb_def *def = upb_symtabtxn_iter_def(&i);
+    upb_msgdef *md = upb_dyncast_msgdef(def);
+    if (!md) return;
+    // For field in msgdef:
+    upb_msg_iter i;
+    for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
+      upb_fielddef *f = upb_msg_iter_field(i);
+      upb_fielddef_setaccessor(f, upb_stdmsg_accessor(f));
+    }
+    upb_msgdef_layout(md);
+  }
+
+  if (upb_ok(status)) upb_symtab_commit(symtab, &txn, status);
+
+  upb_symtabtxn_uninit(&txn);
+  upb_descreader_uninit(&r);
+  upb_stringsrc_uninit(&strsrc);
+  upb_decoder_uninit(&d);
+}
+
+char *upb_readfile(const char *filename, size_t *len) {
+  FILE *f = fopen(filename, "rb");
+  if(!f) return NULL;
+  if(fseek(f, 0, SEEK_END) != 0) goto error;
+  long size = ftell(f);
+  if(size < 0) goto error;
+  if(fseek(f, 0, SEEK_SET) != 0) goto error;
+  char *buf = malloc(size);
+  if(fread(buf, size, 1, f) != 1) goto error;
+  fclose(f);
+  if (len) *len = size;
+  return buf;
+
+error:
+  fclose(f);
+  return NULL;
+}
+
+void upb_read_descriptorfile(upb_symtab *symtab, const char *fname,
+                             upb_status *status) {
+  size_t len;
+  char *data = upb_readfile(fname, &len);
+  if (!data) {
+    upb_status_setf(status, UPB_ERROR, "Couldn't read file: %s", fname);
+    return;
+  }
+  upb_read_descriptor(symtab, data, len, status);
+  free(data);
+}
diff --git a/upb/pb/glue.h b/upb/pb/glue.h
new file mode 100644
index 0000000..5359120
--- /dev/null
+++ b/upb/pb/glue.h
@@ -0,0 +1,62 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * upb's core components like upb_decoder and upb_msg are carefully designed to
+ * avoid depending on each other for maximum orthogonality.  In other words,
+ * you can use a upb_decoder to decode into *any* kind of structure; upb_msg is
+ * just one such structure.  A upb_msg can be serialized/deserialized into any
+ * format, protobuf binary format is just one such format.
+ *
+ * However, for convenience we provide functions here for doing common
+ * operations like deserializing protobuf binary format into a upb_msg.  The
+ * compromise is that this file drags in almost all of upb as a dependency,
+ * which could be undesirable if you're trying to use a trimmed-down build of
+ * upb.
+ *
+ * While these routines are convenient, they do not reuse any encoding/decoding
+ * state.  For example, if a decoder is JIT-based, it will be re-JITted every
+ * time these functions are called.  For this reason, if you are parsing lots
+ * of data and efficiency is an issue, these may not be the best functions to
+ * use (though they are useful for prototyping, before optimizing).
+ */
+
+#ifndef UPB_GLUE_H
+#define UPB_GLUE_H
+
+#include <stdbool.h>
+#include "upb/upb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Forward-declares so we don't have to include everything in this .h file.
+// Clients should use the regular, typedef'd names (eg. upb_string).
+struct _upb_msg;
+struct _upb_msgdef;
+struct _upb_symtab;
+
+// Decodes the given string, which must be in protobuf binary format, to the
+// given upb_msg with msgdef "md", storing the status of the operation in "s".
+void upb_strtomsg(const char *str, size_t len, void *msg,
+                  struct _upb_msgdef *md, upb_status *s);
+
+//void upb_msgtotext(struct _upb_string *str, void *msg,
+//                   struct _upb_msgdef *md, bool single_line);
+
+void upb_read_descriptor(struct _upb_symtab *symtab, const char *str, size_t len,
+                         upb_status *status);
+
+void upb_read_descriptorfile(struct _upb_symtab *symtab, const char *fname,
+                             upb_status *status);
+
+char *upb_readfile(const char *filename, size_t *len);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif
diff --git a/upb/pb/jit_debug_elf_file.s b/upb/pb/jit_debug_elf_file.s
new file mode 100644
index 0000000..0b74630
--- /dev/null
+++ b/upb/pb/jit_debug_elf_file.s
@@ -0,0 +1,7 @@
+   .file "JIT mcode"
+   .text
+upb_jit_compiled_decoder:
+   .globl upb_jit_compiled_decoder
+   .size upb_jit_compiled_decoder, 0x321
+   .type upb_jit_compiled_decoder STT_FUNC
+   .space 0x321
diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c
new file mode 100644
index 0000000..ce029d5
--- /dev/null
+++ b/upb/pb/textprinter.c
@@ -0,0 +1,199 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include <ctype.h>
+#include <float.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include "upb/pb/textprinter.h"
+
+struct _upb_textprinter {
+  upb_bytesink *bytesink;
+  int indent_depth;
+  bool single_line;
+  upb_status status;
+};
+
+#define CHECK(x) if ((x) < 0) goto err;
+
+static int upb_textprinter_putescaped(upb_textprinter *p, upb_strref *strref,
+                                      bool preserve_utf8) {
+  // Based on CEscapeInternal() from Google's protobuf release.
+  // TODO; we could read directly fraom a bytesrc's buffer instead.
+  // TODO; we could write directly into a bytesink's buffer instead.
+  char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
+  char buf[strref->len], *src = buf;
+  char *end = src + strref->len;
+  upb_bytesrc_read(strref->bytesrc, strref->stream_offset, strref->len, buf);
+
+  // I think hex is prettier and more useful, but proto2 uses octal; should
+  // investigate whether it can parse hex also.
+  bool use_hex = false;
+  bool last_hex_escape = false; // true if last output char was \xNN
+
+  for (; src < end; src++) {
+    if (dstend - dst < 4) {
+      CHECK(upb_bytesink_write(p->bytesink, dstbuf, dst - dstbuf, &p->status));
+      dst = dstbuf;
+    }
+
+    bool is_hex_escape = false;
+    switch (*src) {
+      case '\n': *(dst++) = '\\'; *(dst++) = 'n';  break;
+      case '\r': *(dst++) = '\\'; *(dst++) = 'r';  break;
+      case '\t': *(dst++) = '\\'; *(dst++) = 't';  break;
+      case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
+      case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
+      case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
+      default:
+        // Note that if we emit \xNN and the src character after that is a hex
+        // digit then that digit must be escaped too to prevent it being
+        // interpreted as part of the character code by C.
+        if ((!preserve_utf8 || (uint8_t)*src < 0x80) &&
+            (!isprint(*src) || (last_hex_escape && isxdigit(*src)))) {
+          sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*src);
+          is_hex_escape = use_hex;
+          dst += 4;
+        } else {
+          *(dst++) = *src; break;
+        }
+    }
+    last_hex_escape = is_hex_escape;
+  }
+  // Flush remaining data.
+  CHECK(upb_bytesink_write(p->bytesink, dst, dst - dstbuf, &p->status));
+  return 0;
+err:
+  return -1;
+}
+
+static int upb_textprinter_indent(upb_textprinter *p) {
+  if(!p->single_line)
+    for(int i = 0; i < p->indent_depth; i++)
+      CHECK(upb_bytesink_writestr(p->bytesink, "  ", &p->status));
+  return 0;
+err:
+  return -1;
+}
+
+static int upb_textprinter_endfield(upb_textprinter *p) {
+  if(p->single_line) {
+    CHECK(upb_bytesink_writestr(p->bytesink, " ", &p->status));
+  } else {
+    CHECK(upb_bytesink_writestr(p->bytesink, "\n", &p->status));
+  }
+  return 0;
+err:
+  return -1;
+}
+
+static upb_flow_t upb_textprinter_value(void *_p, upb_value fval,
+                                        upb_value val) {
+  upb_textprinter *p = _p;
+  upb_fielddef *f = upb_value_getfielddef(fval);
+  upb_textprinter_indent(p);
+  CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%s: ", f->name));
+#define CASE(fmtstr, member) \
+    CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break;
+  switch(f->type) {
+    // TODO: figure out what we should really be doing for these
+    // floating-point formats.
+    case UPB_TYPE(DOUBLE):
+      CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%.*g", DBL_DIG, upb_value_getdouble(val))); break;
+    case UPB_TYPE(FLOAT):
+      CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%.*g", FLT_DIG+2, upb_value_getfloat(val))); break;
+    case UPB_TYPE(INT64):
+    case UPB_TYPE(SFIXED64):
+    case UPB_TYPE(SINT64):
+      CASE("%" PRId64, int64)
+    case UPB_TYPE(UINT64):
+    case UPB_TYPE(FIXED64):
+      CASE("%" PRIu64, uint64)
+    case UPB_TYPE(UINT32):
+    case UPB_TYPE(FIXED32):
+      CASE("%" PRIu32, uint32);
+    case UPB_TYPE(ENUM): {
+      upb_enumdef *enum_def = upb_downcast_enumdef(f->def);
+      const char *label = upb_enumdef_iton(enum_def, upb_value_getint32(val));
+      if (label) {
+        // We found a corresponding string for this enum.  Otherwise we fall
+        // through to the int32 code path.
+        CHECK(upb_bytesink_writestr(p->bytesink, label, &p->status));
+        break;
+      }
+    }
+    case UPB_TYPE(INT32):
+    case UPB_TYPE(SFIXED32):
+    case UPB_TYPE(SINT32):
+      CASE("%" PRId32, int32)
+    case UPB_TYPE(BOOL):
+      CASE("%hhu", bool);
+    case UPB_TYPE(STRING):
+    case UPB_TYPE(BYTES): {
+      CHECK(upb_bytesink_writestr(p->bytesink, "\"", &p->status));
+      CHECK(upb_textprinter_putescaped(p, upb_value_getstrref(val),
+                                       f->type == UPB_TYPE(STRING)));
+      CHECK(upb_bytesink_writestr(p->bytesink, "\"", &p->status));
+      break;
+    }
+  }
+  upb_textprinter_endfield(p);
+  return UPB_CONTINUE;
+err:
+  return UPB_BREAK;
+}
+
+static upb_sflow_t upb_textprinter_startsubmsg(void *_p, upb_value fval) {
+  upb_textprinter *p = _p;
+  upb_fielddef *f = upb_value_getfielddef(fval);
+  upb_textprinter_indent(p);
+  bool ret = upb_bytesink_printf(p->bytesink, &p->status, "%s {", f->name);
+  if (!ret) return UPB_SBREAK;
+  if (!p->single_line)
+    upb_bytesink_writestr(p->bytesink, "\n", &p->status);
+  p->indent_depth++;
+  return UPB_CONTINUE_WITH(_p);
+}
+
+static upb_flow_t upb_textprinter_endsubmsg(void *_p, upb_value fval) {
+  (void)fval;
+  upb_textprinter *p = _p;
+  p->indent_depth--;
+  upb_textprinter_indent(p);
+  upb_bytesink_writestr(p->bytesink, "}", &p->status);
+  upb_textprinter_endfield(p);
+  return UPB_CONTINUE;
+}
+
+upb_textprinter *upb_textprinter_new() {
+  upb_textprinter *p = malloc(sizeof(*p));
+  return p;
+}
+
+void upb_textprinter_free(upb_textprinter *p) {
+  free(p);
+}
+
+void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink,
+                           bool single_line) {
+  p->bytesink = sink;
+  p->single_line = single_line;
+  p->indent_depth = 0;
+}
+
+upb_mhandlers *upb_textprinter_reghandlers(upb_handlers *h, upb_msgdef *m) {
+  upb_handlerset hset = {
+    NULL,  // startmsg
+    NULL,  // endmsg
+    upb_textprinter_value,
+    upb_textprinter_startsubmsg,
+    upb_textprinter_endsubmsg,
+    NULL,  // startseq
+    NULL,  // endseq
+  };
+  return upb_handlers_reghandlerset(h, m, &hset);
+}
diff --git a/upb/pb/textprinter.h b/upb/pb/textprinter.h
new file mode 100644
index 0000000..9455208
--- /dev/null
+++ b/upb/pb/textprinter.h
@@ -0,0 +1,31 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#ifndef UPB_TEXT_H_
+#define UPB_TEXT_H_
+
+#include "upb/bytestream.h"
+#include "upb/handlers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct _upb_textprinter;
+typedef struct _upb_textprinter upb_textprinter;
+
+upb_textprinter *upb_textprinter_new();
+void upb_textprinter_free(upb_textprinter *p);
+void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink,
+                           bool single_line);
+upb_mhandlers *upb_textprinter_reghandlers(upb_handlers *h, upb_msgdef *m);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_TEXT_H_ */
diff --git a/upb/pb/varint.c b/upb/pb/varint.c
new file mode 100644
index 0000000..45caec1
--- /dev/null
+++ b/upb/pb/varint.c
@@ -0,0 +1,54 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ */
+
+#include "upb/pb/varint.h"
+
+// Given an encoded varint v, returns an integer with a single bit set that
+// indicates the end of the varint.  Subtracting one from this value will
+// yield a mask that leaves only bits that are part of the varint.  Returns
+// 0 if the varint is unterminated.
+INLINE uint64_t upb_get_vstopbit(uint64_t v) {
+  uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
+  return ~cbits & (cbits+1);
+}
+INLINE uint64_t upb_get_vmask(uint64_t v) { return upb_get_vstopbit(v) - 1; }
+
+upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
+  uint64_t b;
+  memcpy(&b, r.p, sizeof(b));
+  uint64_t stop_bit = upb_get_vstopbit(b);
+  b =  (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
+  b +=       b & 0x007f007f007f007fULL;
+  b +=  3 * (b & 0x0000ffff0000ffffULL);
+  b += 15 * (b & 0x00000000ffffffffULL);
+  if (stop_bit == 0) {
+    // Error: unterminated varint.
+    upb_decoderet err_r = {(void*)0, 0};
+    return err_r;
+  }
+  upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
+                        r.val | (b << 7)};
+  return my_r;
+}
+
+upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
+  uint64_t b;
+  memcpy(&b, r.p, sizeof(b));
+  uint64_t stop_bit = upb_get_vstopbit(b);
+  b &= (stop_bit - 1);
+  b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f);
+  b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff);
+  b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff);
+  if (stop_bit == 0) {
+    // Error: unterminated varint.
+    upb_decoderet err_r = {(void*)0, 0};
+    return err_r;
+  }
+  upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
+                        r.val | (b << 14)};
+  return my_r;
+}
diff --git a/upb/pb/varint.h b/upb/pb/varint.h
new file mode 100644
index 0000000..1bbd193
--- /dev/null
+++ b/upb/pb/varint.h
@@ -0,0 +1,142 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc.  See LICENSE for details.
+ * Author: Josh Haberman <jhaberman@gmail.com>
+ *
+ * A number of routines for varint manipulation (we keep them all around to
+ * have multiple approaches available for benchmarking).
+ */
+
+#ifndef UPB_VARINT_DECODER_H_
+#define UPB_VARINT_DECODER_H_
+
+#include <stdint.h>
+#include <string.h>
+#include "upb/upb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Decoding *******************************************************************/
+
+// All decoding functions return this struct by value.
+typedef struct {
+  const char *p;  // NULL if the varint was unterminated.
+  uint64_t val;
+} upb_decoderet;
+
+// A basic branch-based decoder, uses 32-bit values to get good performance
+// on 32-bit architectures (but performs well on 64-bits also).
+INLINE upb_decoderet upb_vdecode_branch32(const char *p) {
+  upb_decoderet r = {NULL, 0};
+  uint32_t low, high = 0;
+  uint32_t b;
+  b = *(p++); low   = (b & 0x7f)      ; if(!(b & 0x80)) goto done;
+  b = *(p++); low  |= (b & 0x7f) <<  7; if(!(b & 0x80)) goto done;
+  b = *(p++); low  |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
+  b = *(p++); low  |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
+  b = *(p++); low  |= (b & 0x7f) << 28;
+              high  = (b & 0x7f) >>  4; if(!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7f) <<  3; if(!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done;
+  return r;
+
+done:
+  r.val = ((uint64_t)high << 32) | low;
+  r.p = p;
+  return r;
+}
+
+// Like the previous, but uses 64-bit values.
+INLINE upb_decoderet upb_vdecode_branch64(const char *p) {
+  uint64_t val;
+  uint64_t b;
+  upb_decoderet r = {(void*)0, 0};
+  b = *(p++); val  = (b & 0x7f)      ; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) <<  7; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) << 28; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) << 35; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) << 42; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) << 49; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) << 56; if(!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7f) << 63; if(!(b & 0x80)) goto done;
+  return r;
+
+done:
+  r.val = val;
+  r.p = p;
+  return r;
+}
+
+// Decodes a varint of at most 8 bytes without branching (except for error).
+upb_decoderet upb_vdecode_max8_wright(upb_decoderet r);
+
+// Another implementation of the previous.
+upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);
+
+// Template for a function that checks the first two bytes with branching
+// and dispatches 2-10 bytes with a separate function.
+#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function)                \
+INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) {           \
+  uint8_t *p = (uint8_t*)_p;                                                 \
+  if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7f}; return r; } \
+  upb_decoderet r = {_p + 2, (*p & 0x7f) | ((*(p + 1) & 0x7f) << 7)};        \
+  if ((*(p + 1) & 0x80) == 0) return r;                                      \
+  return decode_max8_function(r);                                            \
+}
+
+UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright);
+UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino);
+#undef UPB_VARINT_DECODER_CHECK2
+
+// Our canonical functions for decoding varints, based on the currently
+// favored best-performing implementations.
+INLINE upb_decoderet upb_vdecode_fast(const char *p) {
+  // Use nobranch2 on 64-bit, branch32 on 32-bit.
+  if (sizeof(long) == 8)
+    return upb_vdecode_check2_massimino(p);
+  else
+    return upb_vdecode_branch32(p);
+}
+
+INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
+  return upb_vdecode_max8_massimino(r);
+}
+
+
+/* Encoding *******************************************************************/
+
+INLINE size_t upb_value_size(uint64_t val) {
+#ifdef __GNUC__
+  int high_bit = 63 - __builtin_clzll(val);  // 0-based, undef if val == 0.
+#else
+  int high_bit = 0;
+  uint64_t tmp = val;
+  while(tmp >>= 1) high_bit++;
+#endif
+  return val == 0 ? 1 : high_bit / 8 + 1;
+}
+
+// Encodes a 32-bit varint, *not* sign-extended.
+INLINE uint64_t upb_vencode32(uint32_t val) {
+  uint64_t ret = 0;
+  for (int bitpos = 0; val; bitpos+=8, val >>=7) {
+    if (bitpos > 0) ret |= (1 << (bitpos-1));
+    ret |= (val & 0x7f) << bitpos;
+  }
+  return ret;
+}
+
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_VARINT_DECODER_H_ */
-- 
cgit v1.2.3