7 files changed, 1306 insertions, 0 deletions
diff --git a/stream/upb_byteio.h b/stream/upb_byteio.h
new file mode 100644
index 0000000..69a28b3
--- /dev/null
+++ b/stream/upb_byteio.h
@@ -0,0 +1,43 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * This file contains upb_bytesrc and upb_bytesink implementations for common
+ * interfaces like strings, UNIX fds, and FILE*.
+ *
+ * Copyright (c) 2009-2010 Joshua Haberman.  See LICENSE for details.
+ */
+
+#ifndef UPB_BYTEIO_H
+#define UPB_BYTEIO_H
+
+#include "upb_srcsink.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* upb_stringsrc **************************************************************/
+
+struct upb_stringsrc;
+typedef struct upb_stringsrc upb_stringsrc;
+
+// Create/free a stringsrc.
+upb_stringsrc *upb_stringsrc_new();
+void upb_stringsrc_free(upb_stringsrc *s);
+
+// Resets the stringsrc to a state where it will vend the given string.  The
+// stringsrc will take a reference on the string, so the caller need not ensure
+// that it outlives the stringsrc.  A stringsrc can be reset multiple times.
+void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str);
+
+// Returns the upb_bytesrc* for this stringsrc.  Invalidated by reset above.
+upb_bytesrc *upb_stringsrc_bytesrc();
+
+
+/* upb_fdsrc ******************************************************************/
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif
diff --git a/stream/upb_decoder.c b/stream/upb_decoder.c
new file mode 100644
index 0000000..e3fdc49
--- /dev/null
+++ b/stream/upb_decoder.c
@@ -0,0 +1,577 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2008-2009 Joshua Haberman.  See LICENSE for details.
+ */
+
+#include "upb_decoder.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#define UPB_GROUP_END_OFFSET UINT32_MAX
+
+// Returns true if the give wire type and field type combination is valid,
+// taking into account both packed and non-packed encodings.
+static bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) {
+  return (1 << wt) & upb_types[ft].allowed_wire_types;
+}
+
+// Performs zig-zag decoding, which is used by sint32 and sint64.
+static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
+static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
+
+
+/* upb_decoder ****************************************************************/
+
+// The decoder keeps a stack with one entry per level of recursion.
+// upb_decoder_frame is one frame of that stack.
+typedef struct {
+  upb_msgdef *msgdef;
+  upb_fielddef *field;
+  upb_strlen_t end_offset;  // For groups, -1.
+} upb_decoder_frame;
+
+struct upb_decoder {
+  upb_src src;  // upb_decoder is a upb_src.
+
+  upb_msgdef *toplevel_msgdef;
+  upb_bytesrc *bytesrc;
+
+  // The buffer of input data.  NULL is equivalent to the empty string.
+  upb_string *buf;
+
+  // Holds residual bytes when fewer than UPB_MAX_ENCODED_SIZE bytes remain.
+  uint8_t tmpbuf[UPB_MAX_ENCODED_SIZE];
+
+  // The number of bytes we have yet to consume from "buf" or tmpbuf.  This is
+  // always >= 0 unless we were just reset or are eof.
+  int32_t buf_bytesleft;
+
+  // The offset within "buf" from where we are currently reading.  This can be
+  // <0 if we are reading some residual bytes from the previous buffer, which
+  // are stored in tmpbuf and combined with bytes from "buf".
+  int32_t buf_offset;
+
+  // The overall stream offset of the beginning of "buf".
+  uint32_t buf_stream_offset;
+
+  // Fielddef for the key we just read.
+  upb_fielddef *field;
+
+  // Wire type of the key we just read.
+  upb_wire_type_t wire_type;
+
+  // Delimited length of the string field we are reading.
+  upb_strlen_t delimited_len;
+
+  upb_strlen_t packed_end_offset;
+
+  // We keep a stack of messages we have recursed into.
+  upb_decoder_frame *top, *limit, stack[UPB_MAX_NESTING];
+};
+
+
+/* upb_decoder buffering. *****************************************************/
+
+static upb_strlen_t upb_decoder_offset(upb_decoder *d)
+{
+  return d->buf_stream_offset - d->buf_offset;
+}
+
+static bool upb_decoder_nextbuf(upb_decoder *d)
+{
+  assert(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE);
+
+  // Copy residual bytes to temporary buffer.
+  if(d->buf_bytesleft > 0) {
+    memcpy(d->tmpbuf, upb_string_getrobuf(d->buf) + d->buf_offset,
+           d->buf_bytesleft);
+  }
+
+  // Recycle old buffer.
+  if(d->buf) {
+    d->buf = upb_string_tryrecycle(d->buf);
+    d->buf_offset -= upb_string_len(d->buf);
+    d->buf_stream_offset += upb_string_len(d->buf);
+  }
+
+  // Pull next buffer.
+  if(upb_bytesrc_get(d->bytesrc, d->buf, UPB_MAX_ENCODED_SIZE)) {
+    d->buf_bytesleft += upb_string_len(d->buf);
+    return true;
+  } else {
+    // Error or EOF.
+    if(!upb_bytesrc_eof(d->bytesrc)) {
+      // Error from bytesrc.
+      upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc));
+      return false;
+    } else if(d->buf_bytesleft == 0) {
+      // EOF from bytesrc and we don't have any residual bytes left.
+      d->src.eof = true;
+      return false;
+    } else {
+      // No more data left from the bytesrc, but we still have residual bytes.
+      return true;
+    }
+  }
+}
+
+static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes)
+{
+  if(d->buf_bytesleft < UPB_MAX_ENCODED_SIZE) {
+    // GCC is currently complaining about use of an uninitialized value if we
+    // don't set this now.  I think this is incorrect, but leaving this in
+    // to suppress the warning for now.
+    *bytes = 0;
+    if(!upb_decoder_nextbuf(d)) return NULL;
+  }
+
+  assert(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE);
+
+  if(d->buf_offset >= 0) {
+    // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE
+    // contiguous bytes, so we can read directly out of it.
+    *bytes = d->buf_bytesleft;
+    return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset;
+  } else {
+    // We need to accumulate UPB_MAX_ENCODED_SIZE bytes; len is how many we
+    // have so far.
+    upb_strlen_t len = -d->buf_offset;
+    if(d->buf) {
+      upb_strlen_t to_copy =
+          UPB_MIN(UPB_MAX_ENCODED_SIZE - len, upb_string_len(d->buf));
+      memcpy(d->tmpbuf + len, upb_string_getrobuf(d->buf), to_copy);
+      len += to_copy;
+    }
+    // Pad the buffer out to UPB_MAX_ENCODED_SIZE.
+    memset(d->tmpbuf + len, 0x80, UPB_MAX_ENCODED_SIZE - len);
+    *bytes = len;
+    return d->tmpbuf;
+  }
+}
+
+// Returns a pointer to a buffer of data that is at least UPB_MAX_ENCODED_SIZE
+// bytes long.  This buffer contains the next bytes in the stream (even if
+// those bytes span multiple buffers).  *bytes is set to the number of actual
+// stream bytes that are available in the returned buffer.  If
+// *bytes < UPB_MAX_ENCODED_SIZE, the buffer is padded with 0x80 bytes.
+//
+// After the data has been read, upb_decoder_consume() should be called to
+// indicate how many bytes were consumed.
+static const uint8_t *upb_decoder_getbuf(upb_decoder *d, uint32_t *bytes)
+{
+  if(d->buf_bytesleft >= UPB_MAX_ENCODED_SIZE && d->buf_offset >= 0) {
+    // Common case: the main buffer contains at least UPB_MAX_ENCODED_SIZE
+    // contiguous bytes, so we can read directly out of it.
+    *bytes = d->buf_bytesleft;
+    return (uint8_t*)upb_string_getrobuf(d->buf) + d->buf_offset;
+  } else {
+    return upb_decoder_getbuf_full(d, bytes);
+  }
+}
+
+static bool upb_decoder_consume(upb_decoder *d, uint32_t bytes)
+{
+  assert(bytes <= UPB_MAX_ENCODED_SIZE);
+  d->buf_offset += bytes;
+  d->buf_bytesleft -= bytes;
+  if(d->buf_offset < 0) {
+    // We still have residual bytes we have not consumed.
+    memmove(d->tmpbuf, d->tmpbuf + bytes, -d->buf_offset);
+  }
+  assert(d->buf_bytesleft >= 0);
+  return true;
+}
+
+static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes)
+{
+  d->buf_offset += bytes;
+  d->buf_bytesleft -= bytes;
+  while(d->buf_bytesleft < 0) {
+    if(!upb_decoder_nextbuf(d)) return false;
+  }
+  return true;
+}
+
+
+/* Functions to read wire values. *********************************************/
+
+// Parses remining bytes of a 64-bit varint that has already had its first byte
+// parsed.
+INLINE bool upb_decoder_readv64(upb_decoder *d, uint32_t *low, uint32_t *high)
+{
+  upb_strlen_t bytes_available;
+  const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available);
+  const uint8_t *start = buf;
+  if(!buf) return false;
+
+  *high = 0;
+  uint32_t b;
+  b = *(buf++); *low   = (b & 0x7f)      ; if(!(b & 0x80)) goto done;
+  b = *(buf++); *low  |= (b & 0x7f) <<  7; if(!(b & 0x80)) goto done;
+  b = *(buf++); *low  |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
+  b = *(buf++); *low  |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
+  b = *(buf++); *low  |= (b & 0x7f) << 28;
+                *high  = (b & 0x7f) >>  3; if(!(b & 0x80)) goto done;
+  b = *(buf++); *high |= (b & 0x7f) <<  4; if(!(b & 0x80)) goto done;
+  b = *(buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done;
+  b = *(buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done;
+  b = *(buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done;
+
+  if(bytes_available >= 10) {
+    upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Varint was unterminated "
+               "after 10 bytes, stream offset: %u", upb_decoder_offset(d));
+  } else {
+    upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Stream ended in the middle "
+               "of a varint, stream offset: %u", upb_decoder_offset(d));
+  }
+  return false;
+
+done:
+  return upb_decoder_consume(d, buf - start);
+}
+
+// Gets a varint -- called when we only need 32 bits of it.  Note that a 32-bit
+// varint is not a true wire type.
+static bool upb_decoder_readv32(upb_decoder *d, uint32_t *val)
+{
+  uint32_t high;
+  if(!upb_decoder_readv64(d, val, &high)) return false;
+
+  // We expect the high bits to be zero, except that signed 32-bit values are
+  // first sign-extended to be wire-compatible with 64 bits, in which case we
+  // expect the high bits to be all one.
+  //
+  // We could perform a slightly more sophisticated check by having the caller
+  // indicate whether a signed or unsigned value is being read.  We could check
+  // that the high bits are all zeros for unsigned, and properly sign-extended
+  // for signed.
+  if(high != 0 && ~high != 0) {
+    upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Read a 32-bit varint, but "
+               "the high bits contained data we should not truncate: "
+               "%ux, stream offset: %u", high, upb_decoder_offset(d));
+    return false;
+  }
+  return true;
+}
+
+// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT).  Caller
+// promises that 4 bytes are available at buf.
+static bool upb_decoder_readf32(upb_decoder *d, uint32_t *val)
+{
+  upb_strlen_t bytes_available;
+  const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available);
+  if(!buf) return false;
+  if(bytes_available < 4) {
+    upb_seterr(&d->src.status, UPB_STATUS_ERROR,
+               "Stream ended in the middle of a 32-bit value");
+    return false;
+  }
+  memcpy(val, buf, 4);
+  // TODO: byte swap if big-endian.
+  return upb_decoder_consume(d, 4);
+}
+
+// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT).  Caller
+// promises that 8 bytes are available at buf.
+static bool upb_decoder_readf64(upb_decoder *d, uint64_t *val)
+{
+  upb_strlen_t bytes_available;
+  const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available);
+  if(!buf) return false;
+  if(bytes_available < 8) {
+    upb_seterr(&d->src.status, UPB_STATUS_ERROR,
+               "Stream ended in the middle of a 64-bit value");
+    return false;
+  }
+  memcpy(val, buf, 8);
+  // TODO: byte swap if big-endian.
+  return upb_decoder_consume(d, 8);
+}
+
+// Returns the length of a varint (wire type: UPB_WIRE_TYPE_VARINT), allowing
+// it to be easily skipped.  Caller promises that 10 bytes are available at
+// "buf".  The function will return a maximum of 11 bytes before quitting.
+static uint8_t upb_decoder_skipv64(upb_decoder *d)
+{
+  uint32_t bytes_available;
+  const uint8_t *buf = upb_decoder_getbuf(d, &bytes_available);
+  if(!buf) return false;
+  uint8_t i;
+  for(i = 0; i < 10 && buf[i] & 0x80; i++)
+    ;  // empty loop body.
+  if(i > 10) {
+    upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Unterminated varint.");
+    return false;
+  }
+  return upb_decoder_consume(d, i);
+}
+
+
+/* upb_src implementation for upb_decoder. ************************************/
+
+bool upb_decoder_skipval(upb_decoder *d);
+
+upb_fielddef *upb_decoder_getdef(upb_decoder *d)
+{
+  // Detect end-of-submessage.
+  if(upb_decoder_offset(d) >= d->top->end_offset) {
+    d->src.eof = true;
+    return NULL;
+  }
+
+  // Handles the packed field case.
+  if(d->field) return d->field;
+
+  uint32_t key = 0;
+again:
+  if(!upb_decoder_readv32(d, &key)) return NULL;
+  upb_wire_type_t wire_type = key & 0x7;
+  int32_t field_number = key >> 3;
+
+  if(wire_type == UPB_WIRE_TYPE_DELIMITED) {
+    // For delimited wire values we parse the length now, since we need it in
+    // all cases.
+    if(!upb_decoder_readv32(d, &d->delimited_len)) return NULL;
+  } else if(wire_type == UPB_WIRE_TYPE_END_GROUP) {
+    if(d->top->end_offset == UPB_GROUP_END_OFFSET) {
+      d->src.eof = true;
+    } else {
+      upb_seterr(&d->src.status, UPB_STATUS_ERROR, "End group seen but current "
+                 "message is not a group, byte offset: %zd",
+                 upb_decoder_offset(d));
+    }
+    return NULL;
+  }
+
+  // Look up field by tag number.
+  upb_fielddef *f = upb_msg_itof(d->top->msgdef, field_number);
+
+  if (!f) {
+    // Unknown field.  If/when the upb_src interface supports reporting
+    // unknown fields we will implement that here.
+    upb_decoder_skipval(d);
+    goto again;
+  } else if (!upb_check_type(wire_type, f->type)) {
+    // This is a recoverable error condition.  We skip the value but also
+    // return NULL and report the error.
+    upb_decoder_skipval(d);
+    // TODO: better error message.
+    upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Incorrect wire type.\n");
+    return NULL;
+  }
+  d->field = f;
+  d->wire_type = wire_type;
+  return f;
+}
+
+bool upb_decoder_getval(upb_decoder *d, upb_valueptr val)
+{
+  switch(upb_types[d->field->type].native_wire_type) {
+    case UPB_WIRE_TYPE_VARINT: {
+      uint32_t low, high;
+      if(!upb_decoder_readv64(d, &low, &high)) return false;
+      uint64_t u64 = ((uint64_t)high << 32) | low;
+      if(d->field->type == UPB_TYPE(SINT64))
+        *val.int64 = upb_zzdec_64(u64);
+      else
+        *val.uint64 = u64;
+      break;
+    }
+    case UPB_WIRE_TYPE_32BIT_VARINT: {
+      uint32_t u32;
+      if(!upb_decoder_readv32(d, &u32)) return false;
+      if(d->field->type == UPB_TYPE(SINT32))
+        *val.int32 = upb_zzdec_32(u32);
+      else
+        *val.uint32 = u32;
+      break;
+    }
+    case UPB_WIRE_TYPE_64BIT:
+      if(!upb_decoder_readf64(d, val.uint64)) return false;
+      break;
+    case UPB_WIRE_TYPE_32BIT:
+      if(!upb_decoder_readf32(d, val.uint32)) return false;
+      break;
+    default:
+      upb_seterr(&d->src.status, UPB_STATUS_ERROR,
+                 "Attempted to call getval on a group.");
+      return false;
+  }
+  // For a packed field where we have not reached the end, we leave the field
+  // in the decoder so we will return it again without parsing a key.
+  if(d->wire_type != UPB_WIRE_TYPE_DELIMITED ||
+     upb_decoder_offset(d) >= d->packed_end_offset) {
+    d->field = NULL;
+  }
+  return true;
+}
+
+bool upb_decoder_getstr(upb_decoder *d, upb_string *str) {
+  // A string, bytes, or a length-delimited submessage.  The latter isn't
+  // technically a string, but can be gotten as one to perform lazy parsing.
+  const int32_t total_len = d->delimited_len;
+  if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) {
+    // The entire string is inside our current buffer, so we can just
+    // return a substring of the buffer without copying.
+    upb_string_substr(str, d->buf,
+                      upb_string_len(d->buf) - d->buf_bytesleft,
+                      total_len);
+    upb_decoder_skipbytes(d, total_len);
+  } else {
+    // The string spans buffers, so we must copy from the residual buffer
+    // (if any bytes are there), then the buffer, and finally from the bytesrc.
+    uint8_t *ptr = (uint8_t*)upb_string_getrwbuf(
+        str, UPB_MIN(total_len, d->buf_bytesleft));
+    int32_t len = 0;
+    if(d->buf_offset < 0) {
+      // Residual bytes we need to copy from tmpbuf.
+      memcpy(ptr, d->tmpbuf, -d->buf_offset);
+      len += -d->buf_offset;
+    }
+    if(d->buf) {
+      // Bytes from the buffer.
+      memcpy(ptr + len, upb_string_getrobuf(d->buf) + d->buf_offset,
+             upb_string_len(str) - len);
+    }
+    upb_decoder_skipbytes(d, upb_string_len(str));
+    if(len < total_len) {
+      // Bytes from the bytesrc.
+      if(!upb_bytesrc_append(d->bytesrc, str, total_len - len)) {
+        upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc));
+        return false;
+      }
+      // Have to advance this since the buffering layer of the decoder will
+      // never see these bytes.
+      d->buf_stream_offset += total_len - len;
+    }
+  }
+  d->field = NULL;
+  return true;
+}
+
+static bool upb_decoder_skipgroup(upb_decoder *d);
+
+bool upb_decoder_startmsg(upb_decoder *d) {
+  d->top->field = d->field;
+  if(++d->top >= d->limit) {
+    upb_seterr(&d->src.status, UPB_ERROR_MAX_NESTING_EXCEEDED,
+               "Nesting exceeded maximum (%d levels)\n",
+               UPB_MAX_NESTING);
+    return false;
+  }
+  upb_decoder_frame *frame = d->top;
+  frame->msgdef = upb_downcast_msgdef(d->field->def);
+  if(d->field->type == UPB_TYPE(GROUP)) {
+    frame->end_offset = UPB_GROUP_END_OFFSET;
+  } else {
+    frame->end_offset = upb_decoder_offset(d) + d->delimited_len;
+  }
+  return true;
+}
+
+bool upb_decoder_endmsg(upb_decoder *d) {
+  if(d->top > d->stack) {
+    --d->top;
+    if(!d->src.eof) {
+      if(d->top->field->type == UPB_TYPE(GROUP))
+        upb_decoder_skipgroup(d);
+      else
+        upb_decoder_skipbytes(d, d->top->end_offset - upb_decoder_offset(d));
+    }
+    d->src.eof = false;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool upb_decoder_skipval(upb_decoder *d) {
+  upb_strlen_t bytes_to_skip;
+  switch(d->wire_type) {
+    case UPB_WIRE_TYPE_VARINT: {
+      return upb_decoder_skipv64(d);
+    }
+    case UPB_WIRE_TYPE_START_GROUP:
+      if(!upb_decoder_startmsg(d)) return false;
+      if(!upb_decoder_skipgroup(d)) return false;
+      if(!upb_decoder_endmsg(d)) return false;
+      return true;
+    default:
+      // Including UPB_WIRE_TYPE_END_GROUP.
+      assert(false);
+      upb_seterr(&d->src.status, UPB_STATUS_ERROR, "Tried to skip an end group");
+      return false;
+    case UPB_WIRE_TYPE_64BIT:
+      bytes_to_skip = 8;
+      break;
+    case UPB_WIRE_TYPE_32BIT:
+      bytes_to_skip = 4;
+      break;
+    case UPB_WIRE_TYPE_DELIMITED:
+      // Works for both string/bytes *and* submessages.
+      bytes_to_skip = d->delimited_len;
+      break;
+  }
+  return upb_decoder_skipbytes(d, bytes_to_skip);
+}
+
+static bool upb_decoder_skipgroup(upb_decoder *d)
+{
+  // This will be mututally recursive with upb_decoder_skipval() if the group
+  // has sub-groups.  If we wanted to handle EAGAIN in the future, this
+  // approach would not work; we would need to track the group depth
+  // explicitly.
+  while(upb_decoder_getdef(d)) {
+    if(!upb_decoder_skipval(d)) return false;
+  }
+  // If we are at the end of the group like we want to be, then
+  // upb_decoder_getdef() returned NULL because of eof, not error.
+  if(!&d->src.eof) return false;
+  return true;
+}
+
+upb_src_vtable upb_decoder_src_vtbl = {
+  (upb_src_getdef_fptr)&upb_decoder_getdef,
+  (upb_src_getval_fptr)&upb_decoder_getval,
+  (upb_src_skipval_fptr)&upb_decoder_skipval,
+  (upb_src_startmsg_fptr)&upb_decoder_startmsg,
+  (upb_src_endmsg_fptr)&upb_decoder_endmsg,
+};
+
+
+/* upb_decoder construction/destruction. **************************************/
+
+upb_decoder *upb_decoder_new(upb_msgdef *msgdef)
+{
+  upb_decoder *d = malloc(sizeof(*d));
+  d->toplevel_msgdef = msgdef;
+  d->limit = &d->stack[UPB_MAX_NESTING];
+  d->buf = NULL;
+  upb_src_init(&d->src, &upb_decoder_src_vtbl);
+  return d;
+}
+
+void upb_decoder_free(upb_decoder *d)
+{
+  upb_string_unref(d->buf);
+  free(d);
+}
+
+void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc)
+{
+  upb_string_unref(d->buf);
+  d->top = d->stack;
+  d->top->msgdef = d->toplevel_msgdef;
+  // The top-level message is not delimited (we can keep receiving data for it
+  // indefinitely), so we set the end offset as high as possible, but not equal
+  // to UINT32_MAX so it doesn't equal UPB_GROUP_END_OFFSET.
+  d->top->end_offset = UINT32_MAX - 1;
+  d->bytesrc = bytesrc;
+  d->buf = NULL;
+  d->buf_bytesleft = 0;
+  d->buf_stream_offset = 0;
+  d->buf_offset = 0;
+}
diff --git a/stream/upb_decoder.h b/stream/upb_decoder.h
new file mode 100644
index 0000000..dde61fc
--- /dev/null
+++ b/stream/upb_decoder.h
@@ -0,0 +1,53 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * upb_decoder implements a high performance, streaming decoder for protobuf
+ * data that works by implementing upb_src and getting its data from a
+ * upb_bytesrc.
+ *
+ * The decoder does not currently support non-blocking I/O, in the sense that
+ * if the bytesrc returns UPB_STATUS_TRYAGAIN it is not possible to resume the
+ * decoder when data becomes available again.  Support for this could be added,
+ * but it would add complexity and perhaps cost efficiency also.
+ *
+ * Copyright (c) 2009-2010 Joshua Haberman.  See LICENSE for details.
+ */
+
+#ifndef UPB_DECODER_H_
+#define UPB_DECODER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "upb_def.h"
+#include "upb_stream.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* upb_decoder *****************************************************************/
+
+// A upb_decoder decodes the binary protocol buffer format, writing the data it
+// decodes to a upb_sink.
+struct upb_decoder;
+typedef struct upb_decoder upb_decoder;
+
+// Allocates and frees a upb_decoder, respectively.
+upb_decoder *upb_decoder_new(upb_msgdef *md);
+void upb_decoder_free(upb_decoder *d);
+
+// Resets the internal state of an already-allocated decoder.  This puts it in a
+// state where it has not seen any data, and expects the next data to be from
+// the beginning of a new protobuf.  Parsers must be reset before they can be
+// used.  A decoder can be reset multiple times.
+void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc);
+
+// Returns a upb_src pointer by which the decoder can be used.  The returned
+// upb_src is invalidated by upb_decoder_reset() or upb_decoder_free().
+upb_src *upb_decoder_getsrc(upb_decoder *d);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_DECODER_H_ */
diff --git a/stream/upb_encoder.c b/stream/upb_encoder.c
new file mode 100644
index 0000000..304a423
--- /dev/null
+++ b/stream/upb_encoder.c
@@ -0,0 +1,420 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ */
+
+#include "upb_encoder.h"
+
+#include <stdlib.h>
+#include "descriptor.h"
+
+/* Functions for calculating sizes of wire values. ****************************/
+
+static size_t upb_v_uint64_t_size(uint64_t val) {
+#ifdef __GNUC__
+  int high_bit = 63 - __builtin_clzll(val);  // 0-based, undef if val == 0.
+#else
+  int high_bit = 0;
+  uint64_t tmp = val;
+  while(tmp >>= 1) high_bit++;
+#endif
+  return val == 0 ? 1 : high_bit / 7 + 1;
+}
+
+static size_t upb_v_int32_t_size(int32_t val) {
+  // v_uint32's are sign-extended to maintain wire compatibility with int64s.
+  return upb_v_uint64_t_size((int64_t)val);
+}
+static size_t upb_v_uint32_t_size(uint32_t val) {
+  return upb_v_uint64_t_size(val);
+}
+static size_t upb_f_uint64_t_size(uint64_t val) {
+  (void)val;  // Length is independent of value.
+  return sizeof(uint64_t);
+}
+static size_t upb_f_uint32_t_size(uint32_t val) {
+  (void)val;  // Length is independent of value.
+  return sizeof(uint32_t);
+}
+
+
+/* Functions to write wire values. ********************************************/
+
+// Since we know in advance the longest that the value could be, we always make
+// sure that our buffer is long enough.  This saves us from having to perform
+// bounds checks.
+
+// Puts a varint (wire type: UPB_WIRE_TYPE_VARINT).
+static uint8_t *upb_put_v_uint64_t(uint8_t *buf, uint64_t val)
+{
+  do {
+    uint8_t byte = val & 0x7f;
+    val >>= 7;
+    if(val) byte |= 0x80;
+    *buf++ = byte;
+  } while(val);
+  return buf;
+}
+
+// Puts an unsigned 32-bit varint, verbatim.  Never uses the high 64 bits.
+static uint8_t *upb_put_v_uint32_t(uint8_t *buf, uint32_t val)
+{
+  return upb_put_v_uint64_t(buf, val);
+}
+
+// Puts a signed 32-bit varint, first sign-extending to 64-bits.  We do this to
+// maintain wire-compatibility with 64-bit signed integers.
+static uint8_t *upb_put_v_int32_t(uint8_t *buf, int32_t val)
+{
+  return upb_put_v_uint64_t(buf, (int64_t)val);
+}
+
+static void upb_put32(uint8_t *buf, uint32_t val) {
+  buf[0] = val & 0xff;
+  buf[1] = (val >> 8) & 0xff;
+  buf[2] = (val >> 16) & 0xff;
+  buf[3] = (val >> 24);
+}
+
+// Puts a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT).
+static uint8_t *upb_put_f_uint32_t(uint8_t *buf, uint32_t val)
+{
+  uint8_t *uint32_end = buf + sizeof(uint32_t);
+#if UPB_UNALIGNED_READS_OK
+  *(uint32_t*)buf = val;
+#else
+  upb_put32(buf, val);
+#endif
+  return uint32_end;
+}
+
+// Puts a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT).
+static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val)
+{
+  uint8_t *uint64_end = buf + sizeof(uint64_t);
+#if UPB_UNALIGNED_READS_OK
+  *(uint64_t*)buf = val;
+#else
+  upb_put32(buf, (uint32_t)val);
+  upb_put32(buf, (uint32_t)(val >> 32));
+#endif
+  return uint64_end;
+}
+
+/* Functions to write and calculate sizes for .proto values. ******************/
+
+// Performs zig-zag encoding, which is used by sint32 and sint64.
+static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
+static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
+
+/* Use macros to define a set of two functions for each .proto type:
+ *
+ *  // Converts and writes a .proto value into buf.  "end" indicates the end
+ *  // of the current available buffer (if the buffer does not contain enough
+ *  // space UPB_STATUS_NEED_MORE_DATA is returned).  On success, *outbuf will
+ *  // point one past the data that was written.
+ *  uint8_t *upb_put_INT32(uint8_t *buf, int32_t val);
+ *
+ *  // Returns the number of bytes required to encode val.
+ *  size_t upb_get_INT32_size(int32_t val);
+ *
+ *  // Given a .proto value s (source) convert it to a wire value.
+ *  uint32_t upb_vtowv_INT32(int32_t s);
+ */
+
+#define VTOWV(type, wire_t, val_t) \
+  static wire_t upb_vtowv_ ## type(val_t s)
+
+#define PUT(type, v_or_f, wire_t, val_t, member_name) \
+  static uint8_t *upb_put_ ## type(uint8_t *buf, val_t val) { \
+    wire_t tmp = upb_vtowv_ ## type(val); \
+    return upb_put_ ## v_or_f ## _ ## wire_t(buf, tmp); \
+  }
+
+#define T(type, v_or_f, wire_t, val_t, member_name) \
+  static size_t upb_get_ ## type ## _size(val_t val) { \
+    return upb_ ## v_or_f ## _ ## wire_t ## _size(val); \
+  } \
+  VTOWV(type, wire_t, val_t);  /* prototype for PUT below */ \
+  PUT(type, v_or_f, wire_t, val_t, member_name) \
+  VTOWV(type, wire_t, val_t)
+
+T(INT32,    v,  int32_t, int32_t,  int32)   { return (uint32_t)s;     }
+T(INT64,    v, uint64_t, int64_t,  int64)   { return (uint64_t)s;     }
+T(UINT32,   v, uint32_t, uint32_t, uint32)  { return s;               }
+T(UINT64,   v, uint64_t, uint64_t, uint64)  { return s;               }
+T(SINT32,   v, uint32_t, int32_t,  int32)   { return upb_zzenc_32(s); }
+T(SINT64,   v, uint64_t, int64_t,  int64)   { return upb_zzenc_64(s); }
+T(FIXED32,  f, uint32_t, uint32_t, uint32)  { return s;               }
+T(FIXED64,  f, uint64_t, uint64_t, uint64)  { return s;               }
+T(SFIXED32, f, uint32_t, int32_t,  int32)   { return (uint32_t)s;     }
+T(SFIXED64, f, uint64_t, int64_t,  int64)   { return (uint64_t)s;     }
+T(BOOL,     v, uint32_t, bool,     _bool)   { return (uint32_t)s;     }
+T(ENUM,     v, uint32_t, int32_t,  int32)   { return (uint32_t)s;     }
+T(DOUBLE,   f, uint64_t, double,   _double) {
+  upb_value v;
+  v._double = s;
+  return v.uint64;
+}
+T(FLOAT,    f, uint32_t, float,    _float)  {
+  upb_value v;
+  v._float = s;
+  return v.uint32;
+}
+#undef VTOWV
+#undef PUT
+#undef T
+
+static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v)
+{
+#define CASE(t, member_name) \
+  case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name);
+  switch(ft) {
+    CASE(DOUBLE,   _double)
+    CASE(FLOAT,    _float)
+    CASE(INT32,    int32)
+    CASE(INT64,    int64)
+    CASE(UINT32,   uint32)
+    CASE(UINT64,   uint64)
+    CASE(SINT32,   int32)
+    CASE(SINT64,   int64)
+    CASE(FIXED32,  uint32)
+    CASE(FIXED64,  uint64)
+    CASE(SFIXED32, int32)
+    CASE(SFIXED64, int64)
+    CASE(BOOL,     _bool)
+    CASE(ENUM,     int32)
+    default: assert(false); return buf;
+  }
+#undef CASE
+}
+
+static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v)
+{
+#define CASE(t, member_name) \
+  case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name);
+  switch(ft) {
+    CASE(DOUBLE,   _double)
+    CASE(FLOAT,    _float)
+    CASE(INT32,    int32)
+    CASE(INT64,    int64)
+    CASE(UINT32,   uint32)
+    CASE(UINT64,   uint64)
+    CASE(SINT32,   int32)
+    CASE(SINT64,   int64)
+    CASE(FIXED32,  uint32)
+    CASE(FIXED64,  uint64)
+    CASE(SFIXED32, int32)
+    CASE(SFIXED64, int64)
+    CASE(BOOL,     _bool)
+    CASE(ENUM,     int32)
+    default: assert(false); return 0;
+  }
+#undef CASE
+}
+
+static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num,
+                             upb_wire_type_t wt)
+{
+  return upb_put_UINT32(buf, wt | (num << 3));
+}
+
+static uint32_t _upb_get_tag_size(upb_field_number_t num)
+{
+  return upb_get_UINT32_size(num << 3);
+}
+
+
+/* upb_sizebuilder ************************************************************/
+
+struct upb_sizebuilder {
+  // Accumulating size for the current level.
+  uint32_t size;
+
+  // Stack of sizes for our current nesting.
+  uint32_t stack[UPB_MAX_NESTING], *top;
+
+  // Vector of sizes.
+  uint32_t *sizes;
+  int sizes_len;
+  int sizes_size;
+
+  upb_status status;
+};
+
+// upb_sink callbacks.
+static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f,
+                                                upb_value val,
+                                                upb_status *status)
+{
+  (void)status;
+  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+  uint32_t size = 0;
+  size += _upb_get_tag_size(f->number);
+  size += _upb_get_value_size(f->type, val);
+  sb->size += size;
+  return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f,
+                                              upb_strptr str,
+                                              int32_t start, uint32_t end,
+                                              upb_status *status)
+{
+  (void)status;
+  (void)str;   // String data itself is not used.
+  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+  if(start >= 0) {
+    uint32_t size = 0;
+    size += _upb_get_tag_size(f->number);
+    size += upb_get_UINT32_size(end - start);
+    sb->size += size;
+  }
+  return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f,
+                                                upb_status *status)
+{
+  (void)status;
+  (void)f;  // Unused (we calculate tag size and delimiter in endcb).
+  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+  if(f->type == UPB_TYPE(MESSAGE)) {
+    *sb->top = sb->size;
+    sb->top++;
+    sb->size = 0;
+  } else {
+    assert(f->type == UPB_TYPE(GROUP));
+    sb->size += _upb_get_tag_size(f->number);
+  }
+  return UPB_SINK_CONTINUE;
+}
+
+static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f,
+                                              upb_status *status)
+{
+  (void)status;
+  upb_sizebuilder *sb = (upb_sizebuilder*)sink;
+  if(f->type == UPB_TYPE(MESSAGE)) {
+    sb->top--;
+    if(sb->sizes_len == sb->sizes_size) {
+      sb->sizes_size *= 2;
+      sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes));
+    }
+    uint32_t child_size = sb->size;
+    uint32_t parent_size = *sb->top;
+    sb->sizes[sb->sizes_len++] = child_size;
+    // The size according to the parent includes the tag size and delimiter of
+    // the submessage.
+    parent_size += upb_get_UINT32_size(child_size);
+    parent_size += _upb_get_tag_size(f->number);
+    // Include size accumulated in parent before child began.
+    sb->size = child_size + parent_size;
+  } else {
+    assert(f->type == UPB_TYPE(GROUP));
+    // As an optimization, we could just add this number twice in startcb, to
+    // avoid having to recalculate it.
+    sb->size += _upb_get_tag_size(f->number);
+  }
+  return UPB_SINK_CONTINUE;
+}
+
+upb_sink_callbacks _upb_sizebuilder_sink_vtbl = {
+  _upb_sizebuilder_valuecb,
+  _upb_sizebuilder_strcb,
+  _upb_sizebuilder_startcb,
+  _upb_sizebuilder_endcb
+};
+
+
+/* upb_sink callbacks *********************************************************/
+
+struct upb_encoder {
+  upb_sink base;
+  //upb_bytesink *bytesink;
+  uint32_t *sizes;
+  int size_offset;
+};
+
+
+// Within one callback we may need to encode up to two separate values.
+#define UPB_ENCODER_BUFSIZE (UPB_MAX_ENCODED_SIZE * 2)
+
+static upb_sink_status _upb_encoder_push_buf(upb_encoder *s, const uint8_t *buf,
+                                             size_t len, upb_status *status)
+{
+  // TODO: conjure a upb_strptr that points to buf.
+  //upb_strptr ptr;
+  (void)s;
+  (void)buf;
+  (void)status;
+  size_t written = 5;// = upb_bytesink_onbytes(s->bytesink, ptr);
+  if(written < len) {
+    // TODO: mark to skip "written" bytes next time.
+    return UPB_SINK_STOP;
+  } else {
+    return UPB_SINK_CONTINUE;
+  }
+}
+
+static upb_sink_status _upb_encoder_valuecb(upb_sink *sink, upb_fielddef *f,
+                                            upb_value val, upb_status *status)
+{
+  upb_encoder *s = (upb_encoder*)sink;
+  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+  upb_wire_type_t wt = upb_types[f->type].expected_wire_type;
+  // TODO: handle packed encoding.
+  ptr = _upb_put_tag(ptr, f->number, wt);
+  ptr = upb_encode_value(ptr, f->type, val);
+  return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+}
+
+static upb_sink_status _upb_encoder_strcb(upb_sink *sink, upb_fielddef *f,
+                                          upb_strptr str,
+                                          int32_t start, uint32_t end,
+                                          upb_status *status)
+{
+  upb_encoder *s = (upb_encoder*)sink;
+  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+  if(start >= 0) {
+    ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED);
+    ptr = upb_put_UINT32(ptr, end - start);
+  }
+  // TODO: properly handle partially consumed strings and partially supplied
+  // strings.
+  _upb_encoder_push_buf(s, buf, ptr - buf, status);
+  return _upb_encoder_push_buf(s, (uint8_t*)upb_string_getrobuf(str), end - start, status);
+}
+
+static upb_sink_status _upb_encoder_startcb(upb_sink *sink, upb_fielddef *f,
+                                            upb_status *status)
+{
+  upb_encoder *s = (upb_encoder*)sink;
+  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+  if(f->type == UPB_TYPE(GROUP)) {
+    ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_START_GROUP);
+  } else {
+    ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_DELIMITED);
+    ptr = upb_put_UINT32(ptr, s->sizes[--s->size_offset]);
+  }
+  return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+}
+
+static upb_sink_status _upb_encoder_endcb(upb_sink *sink, upb_fielddef *f,
+                                          upb_status *status)
+{
+  upb_encoder *s = (upb_encoder*)sink;
+  uint8_t buf[UPB_ENCODER_BUFSIZE], *ptr = buf;
+  if(f->type != UPB_TYPE(GROUP)) return UPB_SINK_CONTINUE;
+  ptr = _upb_put_tag(ptr, f->number, UPB_WIRE_TYPE_END_GROUP);
+  return _upb_encoder_push_buf(s, buf, ptr - buf, status);
+}
+
+upb_sink_callbacks _upb_encoder_sink_vtbl = {
+  _upb_encoder_valuecb,
+  _upb_encoder_strcb,
+  _upb_encoder_startcb,
+  _upb_encoder_endcb
+};
+
diff --git a/stream/upb_encoder.h b/stream/upb_encoder.h
new file mode 100644
index 0000000..e879b0b
--- /dev/null
+++ b/stream/upb_encoder.h
@@ -0,0 +1,56 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Implements a upb_sink that writes protobuf data to the binary wire format.
+ *
+ * For messages that have any submessages, the encoder needs a buffer
+ * containing the submessage sizes, so they can be properly written at the
+ * front of each message.  Note that groups do *not* have this requirement.
+ *
+ * Copyright (c) 2009-2010 Joshua Haberman.  See LICENSE for details.
+ */
+
+#ifndef UPB_ENCODER_H_
+#define UPB_ENCODER_H_
+
+#include "upb.h"
+#include "upb_srcsink.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* upb_encoder ****************************************************************/
+
+// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol
+// buffer binary wire format.
+struct upb_encoder;
+typedef struct upb_encoder upb_encoder;
+
+upb_encoder *upb_encoder_new(upb_msgdef *md);
+void upb_encoder_free(upb_encoder *e);
+
+// Resets the given upb_encoder such that is is ready to begin encoding,
+// outputting data to "bytesink" (which must live until the encoder is
+// reset or destroyed).
+void upb_encoder_reset(upb_encoder *e, upb_bytesink *bytesink);
+
+// Returns the upb_sink to which data can be written.  The sink is invalidated
+// when the encoder is reset or destroyed.  Note that if the client wants to
+// encode any length-delimited submessages it must first call
+// upb_encoder_buildsizes() below.
+upb_sink *upb_encoder_sink(upb_encoder *e);
+
+// Call prior to pushing any data with embedded submessages.  "src" must yield
+// exactly the same data as what will next be encoded, but in reverse order.
+// The encoder iterates over this data in order to determine the sizes of the
+// submessages.  If any errors are returned by the upb_src, the status will
+// be saved in *status.  If the client is sure that the upb_src will not throw
+// any errors, "status" may be NULL.
+void upb_encoder_buildsizes(upb_encoder *e, upb_src *src, upb_status *status);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_ENCODER_H_ */
diff --git a/stream/upb_text.c b/stream/upb_text.c
new file mode 100644
index 0000000..8662269
--- /dev/null
+++ b/stream/upb_text.c
@@ -0,0 +1,121 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ */
+
+#include <inttypes.h>
+#include "descriptor.h"
+#include "upb_text.h"
+#include "upb_data.h"
+
+void upb_text_printval(upb_field_type_t type, upb_value val, FILE *file)
+{
+#define CASE(fmtstr, member) fprintf(file, fmtstr, val.member); break;
+  switch(type) {
+    case UPB_TYPE(DOUBLE):
+      CASE("%0.f", _double);
+    case UPB_TYPE(FLOAT):
+      CASE("%0.f", _float)
+    case UPB_TYPE(INT64):
+    case UPB_TYPE(SFIXED64):
+    case UPB_TYPE(SINT64):
+      CASE("%" PRId64, int64)
+    case UPB_TYPE(UINT64):
+    case UPB_TYPE(FIXED64):
+      CASE("%" PRIu64, uint64)
+    case UPB_TYPE(INT32):
+    case UPB_TYPE(SFIXED32):
+    case UPB_TYPE(SINT32):
+      CASE("%" PRId32, int32)
+    case UPB_TYPE(UINT32):
+    case UPB_TYPE(FIXED32):
+    case UPB_TYPE(ENUM):
+      CASE("%" PRIu32, uint32);
+    case UPB_TYPE(BOOL):
+      CASE("%hhu", _bool);
+    case UPB_TYPE(STRING):
+    case UPB_TYPE(BYTES):
+      /* TODO: escaping. */
+      fprintf(file, "\"" UPB_STRFMT "\"", UPB_STRARG(val.str)); break;
+  }
+}
+
+static void print_indent(upb_text_printer *p, FILE *stream)
+{
+  if(!p->single_line)
+    for(int i = 0; i < p->indent_depth; i++)
+      fprintf(stream, "  ");
+}
+
+void upb_text_printfield(upb_text_printer *p, upb_strptr name,
+                         upb_field_type_t valtype, upb_value val,
+                         FILE *stream)
+{
+  print_indent(p, stream);
+  fprintf(stream, UPB_STRFMT ":", UPB_STRARG(name));
+  upb_text_printval(valtype, val, stream);
+  if(p->single_line)
+    fputc(' ', stream);
+  else
+    fputc('\n', stream);
+}
+
+void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, FILE *stream)
+{
+  print_indent(p, stream);
+  fprintf(stream, UPB_STRFMT " {", UPB_STRARG(submsg_type));
+  if(!p->single_line) fputc('\n', stream);
+  p->indent_depth++;
+}
+
+void upb_text_pop(upb_text_printer *p, FILE *stream)
+{
+  p->indent_depth--;
+  print_indent(p, stream);
+  fprintf(stream, "}\n");
+}
+
+static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f,
+                     FILE *stream);
+
+static void printmsg(upb_text_printer *printer, upb_msg *msg, upb_msgdef *md,
+                     FILE *stream)
+{
+  for(upb_field_count_t i = 0; i < md->num_fields; i++) {
+    upb_fielddef *f = &md->fields[i];
+    if(!upb_msg_has(msg, f)) continue;
+    upb_value v = upb_msg_get(msg, f);
+    if(upb_isarray(f)) {
+      upb_arrayptr arr = v.arr;
+      for(uint32_t j = 0; j < upb_array_len(arr); j++) {
+        upb_value elem = upb_array_get(arr, f, j);
+        printval(printer, elem, f, stream);
+      }
+    } else {
+      printval(printer, v, f, stream);
+    }
+  }
+}
+
+static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f,
+                     FILE *stream)
+{
+  if(upb_issubmsg(f)) {
+    upb_text_push(printer, f->name, stream);
+    printmsg(printer, v.msg, upb_downcast_msgdef(f->def), stream);
+    upb_text_pop(printer, stream);
+  } else {
+    upb_text_printfield(printer, f->name, f->type, v, stream);
+  }
+}
+
+
+void upb_msg_print(upb_msg *msg, upb_msgdef *md, bool single_line,
+                   FILE *stream)
+{
+  upb_text_printer printer;
+  upb_text_printer_init(&printer, single_line);
+  printmsg(&printer, msg, md, stream);
+}
+
diff --git a/stream/upb_text.h b/stream/upb_text.h
new file mode 100644
index 0000000..d89c9d6
--- /dev/null
+++ b/stream/upb_text.h
@@ -0,0 +1,36 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ */
+
+#ifndef UPB_TEXT_H_
+#define UPB_TEXT_H_
+
+#include "upb.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+  int indent_depth;
+  bool single_line;
+} upb_text_printer;
+
+INLINE void upb_text_printer_init(upb_text_printer *p, bool single_line) {
+  p->indent_depth = 0;
+  p->single_line = single_line;
+}
+void upb_text_printval(upb_field_type_t type, upb_value p, FILE *file);
+void upb_text_printfield(upb_text_printer *p, upb_strptr name,
+                         upb_field_type_t valtype, upb_value val, FILE *stream);
+void upb_text_push(upb_text_printer *p, upb_strptr submsg_type,
+                   FILE *stream);
+void upb_text_pop(upb_text_printer *p, FILE *stream);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_TEXT_H_ */