15 files changed, 2133 insertions, 2007 deletions
diff --git a/upb/pb/compile_decoder.c b/upb/pb/compile_decoder.c
index a17332b..83c914b 100644
--- a/upb/pb/compile_decoder.c
+++ b/upb/pb/compile_decoder.c
@@ -45,14 +45,14 @@ static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit,
   upb_inttable_begin(&i, &g->methods);
   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
     upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
-    visit(r, UPB_UPCAST(method), closure);
+    visit(r, upb_pbdecodermethod_upcast(method), closure);
   }
 }
 
 mgroup *newgroup(const void *owner) {
   mgroup *g = malloc(sizeof(*g));
   static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup};
-  upb_refcounted_init(UPB_UPCAST(g), &vtbl, owner);
+  upb_refcounted_init(mgroup_upcast_mutable(g), &vtbl, owner);
   upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
   g->bytecode = NULL;
   g->bytecode_end = NULL;
@@ -83,18 +83,18 @@ static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
                                       mgroup *group) {
   static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod};
   upb_pbdecodermethod *ret = malloc(sizeof(*ret));
-  upb_refcounted_init(UPB_UPCAST(ret), &vtbl, &ret);
+  upb_refcounted_init(upb_pbdecodermethod_upcast_mutable(ret), &vtbl, &ret);
   upb_byteshandler_init(&ret->input_handler_);
 
-  // The method references the group and vice-versa, in a circular reference.
+  /* The method references the group and vice-versa, in a circular reference. */
   upb_ref2(ret, group);
   upb_ref2(group, ret);
   upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret));
-  upb_refcounted_unref(UPB_UPCAST(ret), &ret);
+  upb_pbdecodermethod_unref(ret, &ret);
 
-  ret->group = UPB_UPCAST(group);
+  ret->group = mgroup_upcast_mutable(group);
   ret->dest_handlers_ = dest_handlers;
-  ret->is_native_ = false;  // If we JIT, it will update this later.
+  ret->is_native_ = false;  /* If we JIT, it will update this later. */
   upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
 
   if (ret->dest_handlers_) {
@@ -103,25 +103,6 @@ static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
   return ret;
 }
 
-void upb_pbdecodermethod_ref(const upb_pbdecodermethod *m, const void *owner) {
-  upb_refcounted_ref(UPB_UPCAST(m), owner);
-}
-
-void upb_pbdecodermethod_unref(const upb_pbdecodermethod *m,
-                               const void *owner) {
-  upb_refcounted_unref(UPB_UPCAST(m), owner);
-}
-
-void upb_pbdecodermethod_donateref(const upb_pbdecodermethod *m,
-                                   const void *from, const void *to) {
-  upb_refcounted_donateref(UPB_UPCAST(m), from, to);
-}
-
-void upb_pbdecodermethod_checkref(const upb_pbdecodermethod *m,
-                                  const void *owner) {
-  upb_refcounted_checkref(UPB_UPCAST(m), owner);
-}
-
 const upb_handlers *upb_pbdecodermethod_desthandlers(
     const upb_pbdecodermethod *m) {
   return m->dest_handlers_;
@@ -138,10 +119,11 @@ bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
 
 const upb_pbdecodermethod *upb_pbdecodermethod_new(
     const upb_pbdecodermethodopts *opts, const void *owner) {
+  const upb_pbdecodermethod *ret;
   upb_pbcodecache cache;
+
   upb_pbcodecache_init(&cache);
-  const upb_pbdecodermethod *ret =
-      upb_pbcodecache_getdecodermethod(&cache, opts);
+  ret = upb_pbcodecache_getdecodermethod(&cache, opts);
   upb_pbdecodermethod_ref(ret, owner);
   upb_pbcodecache_uninit(&cache);
   return ret;
@@ -150,7 +132,7 @@ const upb_pbdecodermethod *upb_pbdecodermethod_new(
 
 /* bytecode compiler **********************************************************/
 
-// Data used only at compilation time.
+/* Data used only at compilation time. */
 typedef struct {
   mgroup *group;
 
@@ -158,15 +140,17 @@ typedef struct {
   int fwd_labels[MAXLABEL];
   int back_labels[MAXLABEL];
 
-  // For fields marked "lazy", parse them lazily or eagerly?
+  /* For fields marked "lazy", parse them lazily or eagerly? */
   bool lazy;
 } compiler;
 
 static compiler *newcompiler(mgroup *group, bool lazy) {
   compiler *ret = malloc(sizeof(*ret));
+  int i;
+
   ret->group = group;
   ret->lazy = lazy;
-  for (int i = 0; i < MAXLABEL; i++) {
+  for (i = 0; i < MAXLABEL; i++) {
     ret->fwd_labels[i] = EMPTYLABEL;
     ret->back_labels[i] = EMPTYLABEL;
   }
@@ -179,7 +163,7 @@ static void freecompiler(compiler *c) {
 
 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
 
-// How many words an instruction is.
+/* How many words an instruction is. */
 static int instruction_len(uint32_t instr) {
   switch (getop(instr)) {
     case OP_SETDISPATCH: return 1 + ptr_words;
@@ -195,8 +179,8 @@ bool op_has_longofs(int32_t instruction) {
     case OP_BRANCH:
     case OP_CHECKDELIM:
       return true;
-    // The "tag" instructions only have 8 bytes available for the jump target,
-    // but that is ok because these opcodes only require short jumps.
+    /* The "tag" instructions only have 8 bytes available for the jump target,
+     * but that is ok because these opcodes only require short jumps. */
     case OP_TAG1:
     case OP_TAG2:
     case OP_TAGN:
@@ -221,18 +205,21 @@ static void setofs(uint32_t *instruction, int32_t ofs) {
   } else {
     *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
   }
-  assert(getofs(*instruction) == ofs);  // Would fail in cases of overflow.
+  assert(getofs(*instruction) == ofs);  /* Would fail in cases of overflow. */
 }
 
 static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
 
-// Defines a local label at the current PC location.  All previous forward
-// references are updated to point to this location.  The location is noted
-// for any future backward references.
+/* Defines a local label at the current PC location.  All previous forward
+ * references are updated to point to this location.  The location is noted
+ * for any future backward references. */
 static void label(compiler *c, unsigned int label) {
+  int val;
+  uint32_t *codep;
+
   assert(label < MAXLABEL);
-  int val = c->fwd_labels[label];
-  uint32_t *codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
+  val = c->fwd_labels[label];
+  codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
   while (codep) {
     int ofs = getofs(*codep);
     setofs(codep, c->pc - codep - instruction_len(*codep));
@@ -242,24 +229,25 @@ static void label(compiler *c, unsigned int label) {
   c->back_labels[label] = pcofs(c);
 }
 
-// Creates a reference to a numbered label; either a forward reference
-// (positive arg) or backward reference (negative arg).  For forward references
-// the value returned now is actually a "next" pointer into a linked list of all
-// instructions that use this label and will be patched later when the label is
-// defined with label().
-//
-// The returned value is the offset that should be written into the instruction.
+/* Creates a reference to a numbered label; either a forward reference
+ * (positive arg) or backward reference (negative arg).  For forward references
+ * the value returned now is actually a "next" pointer into a linked list of all
+ * instructions that use this label and will be patched later when the label is
+ * defined with label().
+ *
+ * The returned value is the offset that should be written into the instruction.
+ */
 static int32_t labelref(compiler *c, int label) {
   assert(label < MAXLABEL);
   if (label == LABEL_DISPATCH) {
-    // No resolving required.
+    /* No resolving required. */
     return 0;
   } else if (label < 0) {
-    // Backward local label.  Relative to the next instruction.
+    /* Backward local label.  Relative to the next instruction. */
     uint32_t from = (c->pc + 1) - c->group->bytecode;
     return c->back_labels[-label] - from;
   } else {
-    // Forward local label: prepend to (possibly-empty) linked list.
+    /* Forward local label: prepend to (possibly-empty) linked list. */
     int *lptr = &c->fwd_labels[label];
     int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
     *lptr = pcofs(c);
@@ -273,7 +261,7 @@ static void put32(compiler *c, uint32_t v) {
     int ofs = pcofs(c);
     size_t oldsize = g->bytecode_end - g->bytecode;
     size_t newsize = UPB_MAX(oldsize * 2, 64);
-    // TODO(haberman): handle OOM.
+    /* TODO(haberman): handle OOM. */
     g->bytecode = realloc(g->bytecode, newsize * sizeof(uint32_t));
     g->bytecode_end = g->bytecode + newsize;
     c->pc = g->bytecode + ofs;
@@ -372,19 +360,22 @@ static void putop(compiler *c, opcode op, ...) {
 #if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE)
 
 const char *upb_pbdecoder_getopname(unsigned int op) {
-#define OP(op) [OP_ ## op] = "OP_" #op
-#define T(op) OP(PARSE_##op)
-  static const char *names[] = {
-    "<no opcode>",
-    T(DOUBLE), T(FLOAT), T(INT64), T(UINT64), T(INT32), T(FIXED64), T(FIXED32),
-    T(BOOL), T(UINT32), T(SFIXED32), T(SFIXED64), T(SINT32), T(SINT64),
-    OP(STARTMSG), OP(ENDMSG), OP(STARTSEQ), OP(ENDSEQ), OP(STARTSUBMSG),
-    OP(ENDSUBMSG), OP(STARTSTR), OP(STRING), OP(ENDSTR), OP(CALL), OP(RET),
-    OP(PUSHLENDELIM), OP(PUSHTAGDELIM), OP(SETDELIM), OP(CHECKDELIM),
-    OP(BRANCH), OP(TAG1), OP(TAG2), OP(TAGN), OP(SETDISPATCH), OP(POP),
-    OP(SETBIGGROUPNUM), OP(DISPATCH), OP(HALT),
-  };
-  return op > OP_HALT ? names[0] : names[op];
+#define QUOTE(x) #x
+#define EXPAND_AND_QUOTE(x) QUOTE(x)
+#define OPNAME(x) OP_##x
+#define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
+#define T(x) OP(PARSE_##x)
+  /* Keep in sync with list in decoder.int.h. */
+  switch ((opcode)op) {
+    T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
+    T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
+    OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
+    OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
+    OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
+    OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
+    OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
+  }
+  return "<unknown op>";
 #undef OP
 #undef T
 }
@@ -482,7 +473,7 @@ static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
   uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
   uint64_t encoded_tag = upb_vencode32(tag);
-  // No tag should be greater than 5 bytes.
+  /* No tag should be greater than 5 bytes. */
   assert(encoded_tag <= 0xffffffffff);
   return encoded_tag;
 }
@@ -510,29 +501,29 @@ static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
   return selector;
 }
 
-// Takes an existing, primary dispatch table entry and repacks it with a
-// different alternate wire type.  Called when we are inserting a secondary
-// dispatch table entry for an alternate wire type.
+/* Takes an existing, primary dispatch table entry and repacks it with a
+ * different alternate wire type.  Called when we are inserting a secondary
+ * dispatch table entry for an alternate wire type. */
 static uint64_t repack(uint64_t dispatch, int new_wt2) {
   uint64_t ofs;
   uint8_t wt1;
   uint8_t old_wt2;
   upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
-  assert(old_wt2 == NO_WIRE_TYPE);  // wt2 should not be set yet.
+  assert(old_wt2 == NO_WIRE_TYPE);  /* wt2 should not be set yet. */
   return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
 }
 
-// Marks the current bytecode position as the dispatch target for this message,
-// field, and wire type.
+/* Marks the current bytecode position as the dispatch target for this message,
+ * field, and wire type. */
 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
                            const upb_fielddef *f, int wire_type) {
-  // Offset is relative to msg base.
+  /* Offset is relative to msg base. */
   uint64_t ofs = pcofs(c) - method->code_base.ofs;
   uint32_t fn = upb_fielddef_number(f);
   upb_inttable *d = &method->dispatch;
   upb_value v;
   if (upb_inttable_remove(d, fn, &v)) {
-    // TODO: prioritize based on packed setting in .proto file.
+    /* TODO: prioritize based on packed setting in .proto file. */
     uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
     upb_inttable_insert(d, fn, upb_value_uint64(repacked));
     upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
@@ -574,8 +565,8 @@ static void putsel(compiler *c, opcode op, upb_selector_t sel,
   }
 }
 
-// Puts an opcode to call a callback, but only if a callback actually exists for
-// this field and handler type.
+/* Puts an opcode to call a callback, but only if a callback actually exists for
+ * this field and handler type. */
 static void maybeput(compiler *c, opcode op, const upb_handlers *h,
                      const upb_fielddef *f, upb_handlertype_t type) {
   putsel(c, op, getsel(f, type), h);
@@ -593,27 +584,28 @@ static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
 
 /* bytecode compiler code generation ******************************************/
 
-// Symbolic names for our local labels.
-#define LABEL_LOOPSTART 1  // Top of a repeated field loop.
-#define LABEL_LOOPBREAK 2  // To jump out of a repeated loop
-#define LABEL_FIELD     3  // Jump backward to find the most recent field.
-#define LABEL_ENDMSG    4  // To reach the OP_ENDMSG instr for this msg.
+/* Symbolic names for our local labels. */
+#define LABEL_LOOPSTART 1  /* Top of a repeated field loop. */
+#define LABEL_LOOPBREAK 2  /* To jump out of a repeated loop */
+#define LABEL_FIELD     3  /* Jump backward to find the most recent field. */
+#define LABEL_ENDMSG    4  /* To reach the OP_ENDMSG instr for this msg. */
 
-// Generates bytecode to parse a single non-lazy message field.
+/* Generates bytecode to parse a single non-lazy message field. */
 static void generate_msgfield(compiler *c, const upb_fielddef *f,
                               upb_pbdecodermethod *method) {
   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
   const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
+  int wire_type;
 
   if (!sub_m) {
-    // Don't emit any code for this field at all; it will be parsed as an
-    // unknown field.
+    /* Don't emit any code for this field at all; it will be parsed as an
+     * unknown field. */
     return;
   }
 
   label(c, LABEL_FIELD);
 
-  int wire_type =
+  wire_type =
       (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
           ? UPB_WIRE_TYPE_DELIMITED
           : UPB_WIRE_TYPE_START_GROUP;
@@ -654,7 +646,7 @@ static void generate_msgfield(compiler *c, const upb_fielddef *f,
   }
 }
 
-// Generates bytecode to parse a single string or lazy submessage field.
+/* Generates bytecode to parse a single string or lazy submessage field. */
 static void generate_delimfield(compiler *c, const upb_fielddef *f,
                                 upb_pbdecodermethod *method) {
   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
@@ -669,7 +661,7 @@ static void generate_delimfield(compiler *c, const upb_fielddef *f,
    label(c, LABEL_LOOPSTART);
     putop(c, OP_PUSHLENDELIM);
     putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
-    // Need to emit even if no handler to skip past the string.
+    /* Need to emit even if no handler to skip past the string. */
     putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
     putop(c, OP_POP);
     maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
@@ -693,49 +685,52 @@ static void generate_delimfield(compiler *c, const upb_fielddef *f,
   }
 }
 
-// Generates bytecode to parse a single primitive field.
+/* Generates bytecode to parse a single primitive field. */
 static void generate_primitivefield(compiler *c, const upb_fielddef *f,
                                     upb_pbdecodermethod *method) {
-  label(c, LABEL_FIELD);
-
   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
   upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
+  opcode parse_type;
+  upb_selector_t sel;
+  int wire_type;
+
+  label(c, LABEL_FIELD);
 
-  // From a decoding perspective, ENUM is the same as INT32.
+  /* From a decoding perspective, ENUM is the same as INT32. */
   if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
     descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
 
-  opcode parse_type = (opcode)descriptor_type;
+  parse_type = (opcode)descriptor_type;
 
-  // TODO(haberman): generate packed or non-packed first depending on "packed"
-  // setting in the fielddef.  This will favor (in speed) whichever was
-  // specified.
+  /* TODO(haberman): generate packed or non-packed first depending on "packed"
+   * setting in the fielddef.  This will favor (in speed) whichever was
+   * specified. */
 
   assert((int)parse_type >= 0 && parse_type <= OP_MAX);
-  upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
-  int wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
+  sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
+  wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
   if (upb_fielddef_isseq(f)) {
     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
     putop(c, OP_PUSHLENDELIM);
-    putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  // Packed
+    putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Packed */
    label(c, LABEL_LOOPSTART);
     putop(c, parse_type, sel);
     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
    dispatchtarget(c, method, f, wire_type);
     putop(c, OP_PUSHTAGDELIM, 0);
-    putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  // Non-packed
+    putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Non-packed */
    label(c, LABEL_LOOPSTART);
     putop(c, parse_type, sel);
     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
     putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
    label(c, LABEL_LOOPBREAK);
-    putop(c, OP_POP);  // Packed and non-packed join.
+    putop(c, OP_POP);  /* Packed and non-packed join. */
     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
-    putop(c, OP_SETDELIM);  // Could remove for non-packed by dup ENDSEQ.
+    putop(c, OP_SETDELIM);  /* Could remove for non-packed by dup ENDSEQ. */
   } else {
     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
     putchecktag(c, f, wire_type, LABEL_DISPATCH);
@@ -744,24 +739,29 @@ static void generate_primitivefield(compiler *c, const upb_fielddef *f,
   }
 }
 
-// Adds bytecode for parsing the given message to the given decoderplan,
-// while adding all dispatch targets to this message's dispatch table.
+/* Adds bytecode for parsing the given message to the given decoderplan,
+ * while adding all dispatch targets to this message's dispatch table. */
 static void compile_method(compiler *c, upb_pbdecodermethod *method) {
+  const upb_handlers *h;
+  const upb_msgdef *md;
+  uint32_t* start_pc;
+  upb_msg_field_iter i;
+  upb_value val;
+
   assert(method);
 
-  // Clear all entries in the dispatch table.
+  /* Clear all entries in the dispatch table. */
   upb_inttable_uninit(&method->dispatch);
   upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
 
-  const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
-  const upb_msgdef *md = upb_handlers_msgdef(h);
+  h = upb_pbdecodermethod_desthandlers(method);
+  md = upb_handlers_msgdef(h);
 
  method->code_base.ofs = pcofs(c);
   putop(c, OP_SETDISPATCH, &method->dispatch);
   putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
  label(c, LABEL_FIELD);
-  uint32_t* start_pc = c->pc;
-  upb_msg_field_iter i;
+  start_pc = c->pc;
   for(upb_msg_field_begin(&i, md);
       !upb_msg_field_done(&i);
       upb_msg_field_next(&i)) {
@@ -778,23 +778,23 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
     }
   }
 
-  // If there were no fields, or if no handlers were defined, we need to
-  // generate a non-empty loop body so that we can at least dispatch for unknown
-  // fields and check for the end of the message.
+  /* If there were no fields, or if no handlers were defined, we need to
+   * generate a non-empty loop body so that we can at least dispatch for unknown
+   * fields and check for the end of the message. */
   if (c->pc == start_pc) {
-    // Check for end-of-message.
+    /* Check for end-of-message. */
     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
-    // Unconditionally dispatch.
+    /* Unconditionally dispatch. */
     putop(c, OP_DISPATCH, 0);
   }
 
-  // For now we just loop back to the last field of the message (or if none,
-  // the DISPATCH opcode for the message).
+  /* For now we just loop back to the last field of the message (or if none,
+   * the DISPATCH opcode for the message). */
   putop(c, OP_BRANCH, -LABEL_FIELD);
 
-  // Insert both a label and a dispatch table entry for this end-of-msg.
+  /* Insert both a label and a dispatch table entry for this end-of-msg. */
  label(c, LABEL_ENDMSG);
-  upb_value val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
+  val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
   upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
 
   putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
@@ -803,19 +803,21 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
   upb_inttable_compact(&method->dispatch);
 }
 
-// Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
-// Returns the method for these handlers.
-//
-// Generates a new method for every destination handlers reachable from "h".
+/* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
+ * Returns the method for these handlers.
+ *
+ * Generates a new method for every destination handlers reachable from "h". */
 static void find_methods(compiler *c, const upb_handlers *h) {
   upb_value v;
+  upb_msg_field_iter i;
+  const upb_msgdef *md;
+
   if (upb_inttable_lookupptr(&c->group->methods, h, &v))
     return;
   newmethod(h, c->group);
 
-  // Find submethods.
-  upb_msg_field_iter i;
-  const upb_msgdef *md = upb_handlers_msgdef(h);
+  /* Find submethods. */
+  md = upb_handlers_msgdef(h);
   for(upb_msg_field_begin(&i, md);
       !upb_msg_field_done(&i);
       upb_msg_field_next(&i)) {
@@ -823,20 +825,21 @@ static void find_methods(compiler *c, const upb_handlers *h) {
     const upb_handlers *sub_h;
     if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
         (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
-      // We only generate a decoder method for submessages with handlers.
-      // Others will be parsed as unknown fields.
+      /* We only generate a decoder method for submessages with handlers.
+       * Others will be parsed as unknown fields. */
       find_methods(c, sub_h);
     }
   }
 }
 
-// (Re-)compile bytecode for all messages in "msgs."
-// Overwrites any existing bytecode in "c".
+/* (Re-)compile bytecode for all messages in "msgs."
+ * Overwrites any existing bytecode in "c". */
 static void compile_methods(compiler *c) {
-  // Start over at the beginning of the bytecode.
+  upb_inttable_iter i;
+
+  /* Start over at the beginning of the bytecode. */
   c->pc = c->group->bytecode;
 
-  upb_inttable_iter i;
   upb_inttable_begin(&i, &c->group->methods);
   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
     upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
@@ -849,10 +852,10 @@ static void set_bytecode_handlers(mgroup *g) {
   upb_inttable_begin(&i, &g->methods);
   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
     upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
+    upb_byteshandler *h = &m->input_handler_;
 
     m->code_base.ptr = g->bytecode + m->code_base.ofs;
 
-    upb_byteshandler *h = &m->input_handler_;
     upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
     upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
     upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
@@ -867,53 +870,58 @@ static void set_bytecode_handlers(mgroup *g) {
 static void sethandlers(mgroup *g, bool allowjit) {
   g->jit_code = NULL;
   if (allowjit) {
-    // Compile byte-code into machine code, create handlers.
+    /* Compile byte-code into machine code, create handlers. */
     upb_pbdecoder_jit(g);
   } else {
     set_bytecode_handlers(g);
   }
 }
 
-#else  // UPB_USE_JIT_X64
+#else  /* UPB_USE_JIT_X64 */
 
 static void sethandlers(mgroup *g, bool allowjit) {
-  // No JIT compiled in; use bytecode handlers unconditionally.
+  /* No JIT compiled in; use bytecode handlers unconditionally. */
   UPB_UNUSED(allowjit);
   set_bytecode_handlers(g);
 }
 
-#endif  // UPB_USE_JIT_X64
+#endif  /* UPB_USE_JIT_X64 */
 
 
-// TODO(haberman): allow this to be constructed for an arbitrary set of dest
-// handlers and other mgroups (but verify we have a transitive closure).
+/* TODO(haberman): allow this to be constructed for an arbitrary set of dest
+ * handlers and other mgroups (but verify we have a transitive closure). */
 const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy,
                          const void *owner) {
+  mgroup *g;
+  compiler *c;
+
   UPB_UNUSED(allowjit);
   assert(upb_handlers_isfrozen(dest));
 
-  mgroup *g = newgroup(owner);
-  compiler *c = newcompiler(g, lazy);
+  g = newgroup(owner);
+  c = newcompiler(g, lazy);
   find_methods(c, dest);
 
-  // We compile in two passes:
-  // 1. all messages are assigned relative offsets from the beginning of the
-  //    bytecode (saved in method->code_base).
-  // 2. forwards OP_CALL instructions can be correctly linked since message
-  //    offsets have been previously assigned.
-  //
-  // Could avoid the second pass by linking OP_CALL instructions somehow.
+  /* We compile in two passes:
+   * 1. all messages are assigned relative offsets from the beginning of the
+   *    bytecode (saved in method->code_base).
+   * 2. forwards OP_CALL instructions can be correctly linked since message
+   *    offsets have been previously assigned.
+   *
+   * Could avoid the second pass by linking OP_CALL instructions somehow. */
   compile_methods(c);
   compile_methods(c);
   g->bytecode_end = c->pc;
   freecompiler(c);
 
 #ifdef UPB_DUMP_BYTECODE
-  FILE *f = fopen("/tmp/upb-bytecode", "wb");
-  assert(f);
-  dumpbc(g->bytecode, g->bytecode_end, stderr);
-  dumpbc(g->bytecode, g->bytecode_end, f);
-  fclose(f);
+  {
+    FILE *f = fopen("/tmp/upb-bytecode", "wb");
+    assert(f);
+    dumpbc(g->bytecode, g->bytecode_end, stderr);
+    dumpbc(g->bytecode, g->bytecode_end, f);
+    fclose(f);
+  }
 #endif
 
   sethandlers(g, allowjit);
@@ -933,7 +941,7 @@ void upb_pbcodecache_uninit(upb_pbcodecache *c) {
   upb_inttable_begin(&i, &c->groups);
   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
     const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i));
-    upb_refcounted_unref(UPB_UPCAST(group), c);
+    mgroup_unref(group, c);
   }
   upb_inttable_uninit(&c->groups);
 }
@@ -951,13 +959,15 @@ bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
 
 const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
     upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) {
-  // Right now we build a new DecoderMethod every time.
-  // TODO(haberman): properly cache methods by their true key.
+  upb_value v;
+  bool ok;
+
+  /* Right now we build a new DecoderMethod every time.
+   * TODO(haberman): properly cache methods by their true key. */
   const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c);
   upb_inttable_push(&c->groups, upb_value_constptr(g));
 
-  upb_value v;
-  bool ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
+  ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
   UPB_ASSERT_VAR(ok, ok);
   return upb_value_getptr(v);
 }
diff --git a/upb/pb/compile_decoder_x64.c b/upb/pb/compile_decoder_x64.c
index 51b9b9e..3ce11e4 100644
--- a/upb/pb/compile_decoder_x64.c
+++ b/upb/pb/compile_decoder_x64.c
@@ -7,7 +7,7 @@
  * Driver code for the x64 JIT compiler.
  */
 
-// Needed to ensure we get defines like MAP_ANON.
+/* Needed to ensure we get defines like MAP_ANON. */
 #define _GNU_SOURCE
 
 #include <dlfcn.h>
@@ -19,50 +19,50 @@
 #include "upb/pb/varint.int.h"
 #include "upb/shim/shim.h"
 
-// To debug the JIT:
-//
-// 1. Uncomment:
-// #define UPB_JIT_LOAD_SO
-//
-// Note: this mode requires that we can shell out to gcc.
-//
-// 2. Run the test locally.  This will load the JIT code by building a
-//    .so (/tmp/upb-jit-code.so) and using dlopen, so more of the tooling will
-//    work properly (like GDB).
-//
-// IF YOU ALSO WANT AUTOMATIC JIT DEBUG OUTPUT:
-//
-// 3. Run: upb/pb/make-gdb-script.rb > script.gdb.  This reads
-//    /tmp/upb-jit-code.so as input and generates a GDB script that is specific
-//    to this jit code.
-//
-// 4. Run: gdb --command=script.gdb --args path/to/test
-//    This will drop you to a GDB prompt which you can now use normally.
-//    But when you run the test it will print a message to stdout every time
-//    the JIT executes assembly for a particular bytecode.  Sample output:
-//
-//    X.enterjit bytes=18
-//    buf_ofs=1 data_rem=17 delim_rem=-2 X.0x6.OP_PARSE_DOUBLE
-//    buf_ofs=9 data_rem=9 delim_rem=-10 X.0x7.OP_CHECKDELIM
-//    buf_ofs=9 data_rem=9 delim_rem=-10 X.0x8.OP_TAG1
-//    X.0x3.dispatch.DecoderTest
-//    X.parse_unknown
-//    X.0x3.dispatch.DecoderTest
-//    X.decode_unknown_tag_fallback
-//    X.exitjit
-//
-//    This output should roughly correspond to the output that the bytecode
-//    interpreter emits when compiled with UPB_DUMP_BYTECODE (modulo some
-//    extra JIT-specific output).
-
-// These defines are necessary for DynASM codegen.
-// See dynasm/dasm_proto.h for more info.
+/* To debug the JIT:
+ *
+ * 1. Uncomment:
+ * #define UPB_JIT_LOAD_SO
+ *
+ * Note: this mode requires that we can shell out to gcc.
+ *
+ * 2. Run the test locally.  This will load the JIT code by building a
+ *    .so (/tmp/upb-jit-code.so) and using dlopen, so more of the tooling will
+ *    work properly (like GDB).
+ *
+ * IF YOU ALSO WANT AUTOMATIC JIT DEBUG OUTPUT:
+ *
+ * 3. Run: upb/pb/make-gdb-script.rb > script.gdb.  This reads
+ *    /tmp/upb-jit-code.so as input and generates a GDB script that is specific
+ *    to this jit code.
+ *
+ * 4. Run: gdb --command=script.gdb --args path/to/test
+ *    This will drop you to a GDB prompt which you can now use normally.
+ *    But when you run the test it will print a message to stdout every time
+ *    the JIT executes assembly for a particular bytecode.  Sample output:
+ *
+ *    X.enterjit bytes=18
+ *    buf_ofs=1 data_rem=17 delim_rem=-2 X.0x6.OP_PARSE_DOUBLE
+ *    buf_ofs=9 data_rem=9 delim_rem=-10 X.0x7.OP_CHECKDELIM
+ *    buf_ofs=9 data_rem=9 delim_rem=-10 X.0x8.OP_TAG1
+ *    X.0x3.dispatch.DecoderTest
+ *    X.parse_unknown
+ *    X.0x3.dispatch.DecoderTest
+ *    X.decode_unknown_tag_fallback
+ *    X.exitjit
+ *
+ *    This output should roughly correspond to the output that the bytecode
+ *    interpreter emits when compiled with UPB_DUMP_BYTECODE (modulo some
+ *    extra JIT-specific output). */
+
+/* These defines are necessary for DynASM codegen.
+ * See dynasm/dasm_proto.h for more info. */
 #define Dst_DECL jitcompiler *jc
 #define Dst_REF (jc->dynasm)
 #define Dst (jc)
 
-// In debug mode, make DynASM do internal checks (must be defined before any
-// dasm header is included.
+/* In debug mode, make DynASM do internal checks (must be defined before any
+ * dasm header is included. */
 #ifndef NDEBUG
 #define DASM_CHECKS
 #endif
@@ -75,49 +75,49 @@ typedef struct {
   mgroup *group;
   uint32_t *pc;
 
-  // This pointer is allocated by dasm_init() and freed by dasm_free().
+  /* This pointer is allocated by dasm_init() and freed by dasm_free(). */
   struct dasm_State *dynasm;
 
-  // Maps some key (an arbitrary void*) to a pclabel.
-  //
-  // The pclabel represents a location in the generated code -- DynASM exposes
-  // a pclabel -> (machine code offset) lookup function.
-  //
-  // The key can be anything.  There are two main kinds of keys:
-  //   - bytecode location -- the void* points to the bytecode instruction
-  //     itself.  We can then use this to generate jumps to this instruction.
-  //   - other object (like dispatch table).  We use these to represent parts
-  //     of the generated code that do not exactly correspond to a bytecode
-  //     instruction.
-  upb_inttable jmptargets;
+  /* Maps some key (an arbitrary void*) to a pclabel.
+   *
+   *  The pclabel represents a location in the generated code -- DynASM exposes
+   *  a pclabel -> (machine code offset) lookup function.
+   *
+   *  The key can be anything.  There are two main kinds of keys:
+   *    - bytecode location -- the void* points to the bytecode instruction
+   *      itself.  We can then use this to generate jumps to this instruction.
+   *    - other object (like dispatch table).  We use these to represent parts
+   *      of the generated code that do not exactly correspond to a bytecode
+   *      instruction. */
+   upb_inttable jmptargets;
 
 #ifndef NDEBUG
-  // Like jmptargets, but members are present in the table when they have had
-  // define_jmptarget() (as opposed to jmptarget) called.  Used to verify that
-  // define_jmptarget() is called exactly once for every target.
-  // The value is ignored.
+  /* Like jmptargets, but members are present in the table when they have had
+   * define_jmptarget() (as opposed to jmptarget) called.  Used to verify that
+   * define_jmptarget() is called exactly once for every target.
+   * The value is ignored. */
   upb_inttable jmpdefined;
 
-  // For checking that two asmlabels aren't defined for the same byte.
+  /* For checking that two asmlabels aren't defined for the same byte. */
   int lastlabelofs;
 #endif
 
 #ifdef UPB_JIT_LOAD_SO
-  // For marking labels that should go into the generated code.
-  // Maps pclabel -> char* label (string is owned by the table).
+  /* For marking labels that should go into the generated code.
+   * Maps pclabel -> char* label (string is owned by the table). */
   upb_inttable asmlabels;
 #endif
 
-  // The total number of pclabels currently defined.
-  // Note that this contains both jmptargets and asmlabels, which both use
-  // pclabels but for different purposes.
+  /* The total number of pclabels currently defined.
+   * Note that this contains both jmptargets and asmlabels, which both use
+   * pclabels but for different purposes. */
   uint32_t pclabel_count;
 
-  // Used by DynASM to store globals.
+  /* Used by DynASM to store globals. */
   void **globals;
 } jitcompiler;
 
-// Functions called by codegen.
+/* Functions called by codegen. */
 static int jmptarget(jitcompiler *jc, const void *key);
 static int define_jmptarget(jitcompiler *jc, const void *key);
 static void asmlabel(jitcompiler *jc, const char *fmt, ...);
@@ -174,21 +174,21 @@ static void freejitcompiler(jitcompiler *jc) {
 
 #ifdef UPB_JIT_LOAD_SO
 
-// Like sprintf except allocates the string, which is returned and owned by the
-// caller.
-//
-// Like the GNU extension asprintf(), except we abort on error (since this is
-// only for debugging).
+/* Like sprintf except allocates the string, which is returned and owned by the
+ * caller.
+ *
+ * Like the GNU extension asprintf(), except we abort on error (since this is
+ * only for debugging). */
 static char *upb_vasprintf(const char *fmt, va_list args) {
-  // Run once to get the length of the string.
+  /* Run once to get the length of the string. */
   va_list args_copy;
   va_copy(args_copy, args);
-  int len = vsnprintf(NULL, 0, fmt, args_copy);
+  int len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
   va_end(args_copy);
 
-  char *ret = malloc(len + 1);  // + 1 for NULL terminator.
+  char *ret = malloc(len + 1);  /* + 1 for NULL terminator. */
   if (!ret) abort();
-  int written = vsnprintf(ret, len + 1, fmt, args);
+  int written = _upb_vsnprintf(ret, len + 1, fmt, args);
   UPB_ASSERT_VAR(written, written == len);
 
   return ret;
@@ -220,23 +220,26 @@ static bool try_getjmptarget(jitcompiler *jc, const void *key, int *pclabel) {
   }
 }
 
-// Gets the pclabel for this bytecode location's jmptarget.  Requires that the
-// jmptarget() has been previously defined.
+/* Gets the pclabel for this bytecode location's jmptarget.  Requires that the
+ * jmptarget() has been previously defined. */
 static int getjmptarget(jitcompiler *jc, const void *key) {
   int pclabel = 0;
+  bool ok;
+
   assert(upb_inttable_lookupptr(&jc->jmpdefined, key, NULL));
-  bool ok = try_getjmptarget(jc, key, &pclabel);
+  ok = try_getjmptarget(jc, key, &pclabel);
   UPB_ASSERT_VAR(ok, ok);
   return pclabel;
 }
 
-// Returns a pclabel that serves as a jmp target for the given bytecode pointer.
-// This should only be called for code that is jumping to the target; code
-// defining the target should use define_jmptarget().
-//
-// Creates/allocates a pclabel for this target if one does not exist already.
+/* Returns a pclabel that serves as a jmp target for the given bytecode pointer.
+ * This should only be called for code that is jumping to the target; code
+ * defining the target should use define_jmptarget().
+ *
+ * Creates/allocates a pclabel for this target if one does not exist already. */
 static int jmptarget(jitcompiler *jc, const void *key) {
-  int pclabel;
+  // Optimizer sometimes can't figure out that initializing this is unnecessary.
+  int pclabel = 0;
   if (!try_getjmptarget(jc, key, &pclabel)) {
     pclabel = alloc_pclabel(jc);
     upb_inttable_insertptr(&jc->jmptargets, key, upb_value_uint32(pclabel));
@@ -244,12 +247,12 @@ static int jmptarget(jitcompiler *jc, const void *key) {
   return pclabel;
 }
 
-// Defines a pclabel associated with the given bytecode location.
-// Must be called exactly once by the code that is generating the code for this
-// bytecode.
-//
-// Must be called exactly once before bytecode generation is complete (this is a
-// sanity check to make sure the label is defined exactly once).
+/* Defines a pclabel associated with the given bytecode location.
+ * Must be called exactly once by the code that is generating the code for this
+ * bytecode.
+ *
+ * Must be called exactly once before bytecode generation is complete (this is a
+ * sanity check to make sure the label is defined exactly once). */
 static int define_jmptarget(jitcompiler *jc, const void *key) {
 #ifndef NDEBUG
   upb_inttable_insertptr(&jc->jmpdefined, key, upb_value_bool(true));
@@ -257,115 +260,121 @@ static int define_jmptarget(jitcompiler *jc, const void *key) {
   return jmptarget(jc, key);
 }
 
-// Returns a bytecode pc offset relative to the beginning of the group's code.
+/* Returns a bytecode pc offset relative to the beginning of the group's
+ * code. */
 static int pcofs(jitcompiler *jc) {
   return jc->pc - jc->group->bytecode;
 }
 
-// Returns a machine code offset corresponding to the given key.
-// Requires that this key was defined with define_jmptarget.
+/* Returns a machine code offset corresponding to the given key.
+ * Requires that this key was defined with define_jmptarget. */
 static int machine_code_ofs(jitcompiler *jc, const void *key) {
   int pclabel = getjmptarget(jc, key);
-  // Despite its name, this function takes a pclabel and returns the
-  // corresponding machine code offset.
+  /* Despite its name, this function takes a pclabel and returns the
+   * corresponding machine code offset. */
   return dasm_getpclabel(jc, pclabel);
 }
 
-// Returns a machine code offset corresponding to the given method-relative
-// bytecode offset.  Note that the bytecode offset is relative to the given
-// method, but the returned machine code offset is relative to the beginning of
-// *all* the machine code.
+/* Returns a machine code offset corresponding to the given method-relative
+ * bytecode offset.  Note that the bytecode offset is relative to the given
+ * method, but the returned machine code offset is relative to the beginning of
+ * *all* the machine code. */
 static int machine_code_ofs2(jitcompiler *jc, const upb_pbdecodermethod *method,
                              int pcofs) {
   void *bc_target = jc->group->bytecode + method->code_base.ofs + pcofs;
   return machine_code_ofs(jc, bc_target);
 }
 
-// Given a pcofs relative to this method's base, returns a machine code offset
-// relative to jmptarget(dispatch->array) (which is used in jitdispatch as the
-// machine code base for dispatch table lookups).
+/* Given a pcofs relative to this method's base, returns a machine code offset
+ * relative to jmptarget(dispatch->array) (which is used in jitdispatch as the
+ * machine code base for dispatch table lookups). */
 uint32_t dispatchofs(jitcompiler *jc, const upb_pbdecodermethod *method,
                      int pcofs) {
   int mc_base = machine_code_ofs(jc, method->dispatch.array);
   int mc_target = machine_code_ofs2(jc, method, pcofs);
+  int ret;
+
   assert(mc_base > 0);
   assert(mc_target > 0);
-  int ret = mc_target - mc_base;
+  ret = mc_target - mc_base;
   assert(ret > 0);
   return ret;
 }
 
-// Rewrites the dispatch tables into machine code offsets.
+/* Rewrites the dispatch tables into machine code offsets. */
 static void patchdispatch(jitcompiler *jc) {
   upb_inttable_iter i;
   upb_inttable_begin(&i, &jc->group->methods);
   for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
     upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
-    method->is_native_ = true;
-
     upb_inttable *dispatch = &method->dispatch;
+    upb_inttable_iter i2;
+
+    method->is_native_ = true;
 
-    // Remove DISPATCH_ENDMSG -- only the bytecode interpreter needs it.
-    // And leaving it around will cause us to find field 0 improperly.
+    /* Remove DISPATCH_ENDMSG -- only the bytecode interpreter needs it.
+     * And leaving it around will cause us to find field 0 improperly. */
     upb_inttable_remove(dispatch, DISPATCH_ENDMSG, NULL);
 
-    upb_inttable_iter i2;
     upb_inttable_begin(&i2, dispatch);
     for (; !upb_inttable_done(&i2); upb_inttable_next(&i2)) {
       uintptr_t key = upb_inttable_iter_key(&i2);
       uint64_t val = upb_value_getuint64(upb_inttable_iter_value(&i2));
       uint64_t newval;
+      bool ok;
       if (key <= UPB_MAX_FIELDNUMBER) {
-        // Primary slot.
+        /* Primary slot. */
         uint64_t ofs;
         uint8_t wt1;
         uint8_t wt2;
         upb_pbdecoder_unpackdispatch(val, &ofs, &wt1, &wt2);
 
-        // Update offset and repack.
+        /* Update offset and repack. */
         ofs = dispatchofs(jc, method, ofs);
         newval = upb_pbdecoder_packdispatch(ofs, wt1, wt2);
         assert((int64_t)newval > 0);
       } else {
-        // Secondary slot.  Since we have 64 bits for the value, we use an
-        // absolute offset.
+        /* Secondary slot.  Since we have 64 bits for the value, we use an
+         * absolute offset. */
         int mcofs = machine_code_ofs2(jc, method, val);
         newval = (uint64_t)((char*)jc->group->jit_code + mcofs);
       }
-      bool ok = upb_inttable_replace(dispatch, key, upb_value_uint64(newval));
+      ok = upb_inttable_replace(dispatch, key, upb_value_uint64(newval));
       UPB_ASSERT_VAR(ok, ok);
     }
 
-    // Update entry point for this method to point at mc base instead of bc
-    // base.  Set this only *after* we have patched the offsets
-    // (machine_code_ofs2() uses this).
+    /* Update entry point for this method to point at mc base instead of bc
+     * base.  Set this only *after* we have patched the offsets
+     * (machine_code_ofs2() uses this). */
     method->code_base.ptr = (char*)jc->group->jit_code + machine_code_ofs(jc, method);
 
-    upb_byteshandler *h = &method->input_handler_;
-    upb_byteshandler_setstartstr(h, upb_pbdecoder_startjit, NULL);
-    upb_byteshandler_setstring(h, jc->group->jit_code, method->code_base.ptr);
-    upb_byteshandler_setendstr(h, upb_pbdecoder_end, method);
+    {
+      upb_byteshandler *h = &method->input_handler_;
+      upb_byteshandler_setstartstr(h, upb_pbdecoder_startjit, NULL);
+      upb_byteshandler_setstring(h, jc->group->jit_code, method->code_base.ptr);
+      upb_byteshandler_setendstr(h, upb_pbdecoder_end, method);
+    }
   }
 }
 
 #ifdef UPB_JIT_LOAD_SO
 
 static void load_so(jitcompiler *jc) {
-  // Dump to a .so file in /tmp and load that, so all the tooling works right
-  // (for example, debuggers and profilers will see symbol names for the JIT-ted
-  // code).  This is the same goal of the GDB JIT code below, but the GDB JIT
-  // interface is only used/understood by GDB.  Hopefully a standard will
-  // develop for registering JIT-ted code that all tools will recognize,
-  // rendering this obsolete.
-
-  // jc->asmlabels maps:
-  //   pclabel -> char* label
-  //
-  // Use this to build mclabels, which maps:
-  //   machine code offset -> char* label
-  //
-  // Then we can use mclabels to emit the labels as we iterate over the bytes we
-  // are outputting.
+  /* Dump to a .so file in /tmp and load that, so all the tooling works right
+   * (for example, debuggers and profilers will see symbol names for the JIT-ted
+   * code).  This is the same goal of the GDB JIT code below, but the GDB JIT
+   * interface is only used/understood by GDB.  Hopefully a standard will
+   * develop for registering JIT-ted code that all tools will recognize,
+   * rendering this obsolete.
+   *
+   * jc->asmlabels maps:
+   *   pclabel -> char* label
+   *
+   * Use this to build mclabels, which maps:
+   *   machine code offset -> char* label
+   *
+   * Then we can use mclabels to emit the labels as we iterate over the bytes we
+   * are outputting. */
   upb_inttable_iter i;
   upb_inttable mclabels;
   upb_inttable_init(&mclabels, UPB_CTYPE_PTR);
@@ -376,25 +385,26 @@ static void load_so(jitcompiler *jc) {
                         upb_inttable_iter_value(&i));
   }
 
-  // We write a .s file in text format, as input to the assembler.
-  // Then we run gcc to turn it into a .so file.
-  //
-  // The last "XXXXXX" will be replaced with something randomly generated by
-  // mkstmemp().  We don't add ".s" to this filename because it makes the string
-  // processing for mkstemp() and system() more complicated.
+  /* We write a .s file in text format, as input to the assembler.
+   * Then we run gcc to turn it into a .so file.
+   *
+   * The last "XXXXXX" will be replaced with something randomly generated by
+   * mkstmemp().  We don't add ".s" to this filename because it makes the string
+   * processing for mkstemp() and system() more complicated. */
   char s_filename[] = "/tmp/upb-jit-codeXXXXXX";
   int fd = mkstemp(s_filename);
   FILE *f;
   if (fd >= 0 && (f = fdopen(fd, "wb")) != NULL) {
     uint8_t *jit_code = (uint8_t*)jc->group->jit_code;
-    fputs("  .text\n\n", f);
     size_t linelen = 0;
-    for (size_t i = 0; i < jc->group->jit_size; i++) {
+    size_t i;
+    fputs("  .text\n\n", f);
+    for (i = 0; i < jc->group->jit_size; i++) {
       upb_value v;
       if (upb_inttable_lookup(&mclabels, i, &v)) {
         const char *label = upb_value_getptr(v);
-        // "X." makes our JIT syms recognizable as such, which we build into
-        // other tooling.
+        /* "X." makes our JIT syms recognizable as such, which we build into
+         * other tooling. */
         fprintf(f, "\n\nX.%s:\n", label);
         fprintf(f, "  .globl X.%s", label);
         linelen = 1000;
@@ -412,10 +422,10 @@ static void load_so(jitcompiler *jc) {
     abort();
   }
 
-  // This is exploitable if you have an adversary on your machine who can write
-  // to this tmp directory.  But this is just for debugging so we don't worry
-  // too much about that.  It shouldn't be prone to races against concurrent
-  // (non-adversarial) upb JIT's because we used mkstemp().
+  /* This is exploitable if you have an adversary on your machine who can write
+   * to this tmp directory.  But this is just for debugging so we don't worry
+   * too much about that.  It shouldn't be prone to races against concurrent
+   * (non-adversarial) upb JIT's because we used mkstemp(). */
   char *cmd = upb_asprintf("gcc -shared -o %s.so -x assembler %s", s_filename,
                            s_filename);
   if (system(cmd) != 0) {
@@ -426,12 +436,14 @@ static void load_so(jitcompiler *jc) {
 
   char *so_filename = upb_asprintf("%s.so", s_filename);
 
-  // Some convenience symlinks.
-  // This is racy, but just for convenience.
+  /* Some convenience symlinks.
+   * This is racy, but just for convenience. */
+  int ret;
   unlink("/tmp/upb-jit-code.so");
   unlink("/tmp/upb-jit-code.s");
-  symlink(s_filename, "/tmp/upb-jit-code.s");
-  symlink(so_filename, "/tmp/upb-jit-code.so");
+  ret = symlink(s_filename, "/tmp/upb-jit-code.s");
+  ret = symlink(so_filename, "/tmp/upb-jit-code.so");
+  UPB_UNUSED(ret);  // We don't care if this fails.
 
   jc->group->dl = dlopen(so_filename, RTLD_LAZY);
   free(so_filename);
@@ -453,22 +465,26 @@ static void load_so(jitcompiler *jc) {
 #endif
 
 void upb_pbdecoder_jit(mgroup *group) {
+  jitcompiler *jc;
+  char *jit_code;
+  int dasm_status;
+
   group->debug_info = NULL;
   group->dl = NULL;
 
   assert(group->bytecode);
-  jitcompiler *jc = newjitcompiler(group);
+  jc = newjitcompiler(group);
   emit_static_asm(jc);
   jitbytecode(jc);
 
-  int dasm_status = dasm_link(jc, &jc->group->jit_size);
+  dasm_status = dasm_link(jc, &jc->group->jit_size);
   if (dasm_status != DASM_S_OK) {
     fprintf(stderr, "DynASM error; returned status: 0x%08x\n", dasm_status);
     abort();
   }
 
-  char *jit_code = mmap(NULL, jc->group->jit_size, PROT_READ | PROT_WRITE,
-                        MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+  jit_code = mmap(NULL, jc->group->jit_size, PROT_READ | PROT_WRITE,
+                  MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
   dasm_encode(jc, jit_code);
   mprotect(jit_code, jc->group->jit_size, PROT_EXEC | PROT_READ);
   jc->group->jit_code = (upb_string_handlerfunc *)jit_code;
@@ -481,7 +497,7 @@ void upb_pbdecoder_jit(mgroup *group) {
 
   freejitcompiler(jc);
 
-  // Now the bytecode is no longer needed.
+  /* Now the bytecode is no longer needed. */
   free(group->bytecode);
   group->bytecode = NULL;
 }
@@ -493,7 +509,7 @@ void upb_pbdecoder_freejit(mgroup *group) {
     dlclose(group->dl);
 #endif
   } else {
-    munmap(group->jit_code, group->jit_size);
+    munmap((void*)group->jit_code, group->jit_size);
   }
   free(group->debug_info);
 }
diff --git a/upb/pb/compile_decoder_x64.dasc b/upb/pb/compile_decoder_x64.dasc
index b43a520..dfc9597 100644
--- a/upb/pb/compile_decoder_x64.dasc
+++ b/upb/pb/compile_decoder_x64.dasc
@@ -143,13 +143,13 @@ static upb_func *gethandler(const upb_handlers *h, upb_selector_t sel) {
   return h ? upb_handlers_gethandler(h, sel) : NULL;
 }
 
-// Defines an "assembly label" for the current code generation offset.
-// This label exists *purely* for debugging purposes: it is emitted into
-// the .so, and printed as part of JIT debugging output when UPB_JIT_LOAD_SO is
-// defined.
-//
-// We would define this in the .c file except that it conditionally defines a
-// pclabel.
+/* Defines an "assembly label" for the current code generation offset.
+ * This label exists *purely* for debugging purposes: it is emitted into
+ * the .so, and printed as part of JIT debugging output when UPB_JIT_LOAD_SO is
+ * defined.
+ *
+ * We would define this in the .c file except that it conditionally defines a
+ * pclabel. */
 static void asmlabel(jitcompiler *jc, const char *fmt, ...) {
 #ifndef NDEBUG
   int ofs = jc->dynasm->section->ofs;
@@ -167,37 +167,39 @@ static void asmlabel(jitcompiler *jc, const char *fmt, ...) {
   va_end(args);
 
   int pclabel = alloc_pclabel(jc);
-  // Normally we would prefer to allocate this inline with the codegen,
-  // ie.
-  //   |=>asmlabel(...)
-  // But since we do this conditionally, only when UPB_JIT_LOAD_SO is defined,
-  // we do it here instead.
+  /* Normally we would prefer to allocate this inline with the codegen,
+   * ie.
+   *   |=>asmlabel(...)
+   * But since we do this conditionally, only when UPB_JIT_LOAD_SO is defined,
+   * we do it here instead. */
   |=>pclabel:
   upb_inttable_insert(&jc->asmlabels, pclabel, upb_value_ptr(str));
 #endif
 }
 
-// Should only be called when the associated handler is known to exist.
+/* Should only be called when the associated handler is known to exist. */
 static bool alwaysok(const upb_handlers *h, upb_selector_t sel) {
   upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
   bool ok = upb_handlers_getattr(h, sel, &attr);
+  bool ret;
+
   UPB_ASSERT_VAR(ok, ok);
-  bool ret = upb_handlerattr_alwaysok(&attr);
+  ret = upb_handlerattr_alwaysok(&attr);
   upb_handlerattr_uninit(&attr);
   return ret;
 }
 
-// Emit static assembly routines; code that does not vary based on the message
-// schema.  Since it's not input-dependent, we only need one single copy of it.
-// For the moment we generate a single copy per generated handlers.  Eventually
-// we should generate this code at compile time and link it into the binary so
-// we have one copy total.  To do that we'll want to be sure that it is within
-// 2GB of our JIT code, so that branches between the two are near (rel32).
-//
-// We'd put this assembly in a .s file directly, but DynASM's ability to
-// calculate structure offsets automatically is too useful to pass up (it's way
-// more convenient to write DECODER->sink than [rbx + 0x96], especially since
-// the latter would have to be changed whenever the structure is updated).
+/* Emit static assembly routines; code that does not vary based on the message
+ * schema.  Since it's not input-dependent, we only need one single copy of it.
+ * For the moment we generate a single copy per generated handlers.  Eventually
+ * we should generate this code at compile time and link it into the binary so
+ * we have one copy total.  To do that we'll want to be sure that it is within
+ * 2GB of our JIT code, so that branches between the two are near (rel32).
+ *
+ * We'd put this assembly in a .s file directly, but DynASM's ability to
+ * calculate structure offsets automatically is too useful to pass up (it's way
+ * more convenient to write DECODER->sink than [rbx + 0x96], especially since
+ * the latter would have to be changed whenever the structure is updated). */
 static void emit_static_asm(jitcompiler *jc) {
   | // Trampolines for entering/exiting the JIT.  These are a bit tricky to
   | // support full resuming; when we suspend we copy the JIT's portion of
@@ -526,15 +528,17 @@ static void jitprimitive(jitcompiler *jc, opcode op,
     X, F64, F32, V64, V64, V32, F64, F32, V64, X, X, X, X, V32, V32, F32, F64,
     V32, V64 };
   static char fastpath_bytes[] = { 1, 1, 4, 8 };
-  const valtype_t type = types[op];
-  const int fastbytes = fastpath_bytes[type];
+  const valtype_t vtype = types[op];
+  const int fastbytes = fastpath_bytes[vtype];
   upb_func *handler = gethandler(h, sel);
+  upb_fieldtype_t ftype;
+  const upb_shim_data *data;
 
   if (handler) {
     |1:
     |  chkneob  fastbytes, >3
     |2:
-    switch (type) {
+    switch (vtype) {
     case V32:
       |  call   ->decodev32_fallback
       break;
@@ -551,7 +555,7 @@ static void jitprimitive(jitcompiler *jc, opcode op,
     }
     |  jmp    >4
 
-    // Fast path decode; for when check_bytes bytes are available.
+    /* Fast path decode; for when check_bytes bytes are available. */
     |3:
     switch (op) {
     case OP_PARSE_SFIXED32:
@@ -569,19 +573,19 @@ static void jitprimitive(jitcompiler *jc, opcode op,
       |  movsd  xmm0, qword [PTR]
       break;
     default:
-      // Inline one byte of varint decoding.
+      /* Inline one byte of varint decoding. */
       |  movzx  edx, byte [PTR]
       |  test   dl, dl
       |  js     <2   // Fallback to slow path for >1 byte varint.
       break;
     }
 
-    // Second-stage decode; used for both fast and slow paths
-    // (only needed for a few types).
+    /* Second-stage decode; used for both fast and slow paths */
+    /* (only needed for a few types). */
     |4:
     switch (op) {
     case OP_PARSE_SINT32:
-      // 32-bit zig-zag decode.
+      /* 32-bit zig-zag decode. */
       |  mov    eax, edx
       |  shr    edx, 1
       |  and    eax, 1
@@ -589,7 +593,7 @@ static void jitprimitive(jitcompiler *jc, opcode op,
       |  xor    edx, eax
       break;
     case OP_PARSE_SINT64:
-      // 64-bit zig-zag decode.
+      /* 64-bit zig-zag decode. */
       |  mov    rax, rdx
       |  shr    rdx, 1
       |  and    rax, 1
@@ -603,11 +607,10 @@ static void jitprimitive(jitcompiler *jc, opcode op,
     default: break;
     }
 
-    // Call callback (or specialize if we can).
-    upb_fieldtype_t type;
-    const upb_shim_data *data = upb_shim_getdata(h, sel, &type);
+    /* Call callback (or specialize if we can). */
+    data = upb_shim_getdata(h, sel, &ftype);
     if (data) {
-      switch (type) {
+      switch (ftype) {
         case UPB_TYPE_INT64:
         case UPB_TYPE_UINT64:
           |  mov   [CLOSURE + data->offset], rdx
@@ -645,14 +648,14 @@ static void jitprimitive(jitcompiler *jc, opcode op,
       }
     }
 
-    // We do this last so that the checkpoint is not advanced past the user's
-    // data until the callback has returned success.
+    /* We do this last so that the checkpoint is not advanced past the user's
+     * data until the callback has returned success. */
     |  add    PTR, fastbytes
   } else {
-    // No handler registered for this value, just skip it.
+    /* No handler registered for this value, just skip it. */
     |  chkneob  fastbytes, >3
     |2:
-    switch (type) {
+    switch (vtype) {
     case V32:
       |  call   ->skipv32_fallback
       break;
@@ -668,9 +671,9 @@ static void jitprimitive(jitcompiler *jc, opcode op,
     case X: break;
     }
 
-    // Fast-path skip.
+    /* Fast-path skip. */
     |3:
-    if (type == V32 || type == V64) {
+    if (vtype == V32 || vtype == V64) {
       |  test   byte [PTR], 0x80
       |  jnz    <2
     }
@@ -680,21 +683,21 @@ static void jitprimitive(jitcompiler *jc, opcode op,
 
 static void jitdispatch(jitcompiler *jc,
                         const upb_pbdecodermethod *method) {
-  // Lots of room for tweaking/optimization here.
+  /* Lots of room for tweaking/optimization here. */
 
   const upb_inttable *dispatch = &method->dispatch;
   bool has_hash_entries = (dispatch->t.count > 0);
 
-  // Whether any of the fields for this message can have two wire types which
-  // are both valid (packed & non-packed).
-  //
-  // OPT: populate this more precisely; not all messages with hash entries have
-  // this characteristic.
+  /* Whether any of the fields for this message can have two wire types which
+   * are both valid (packed & non-packed).
+   *
+   * OPT: populate this more precisely; not all messages with hash entries have
+   * this characteristic. */
   bool has_multi_wiretype = has_hash_entries;
 
   |=>define_jmptarget(jc, &method->dispatch):
   |1:
-  // Decode the field tag.
+  /* Decode the field tag. */
   |  mov     aword DECODER->checkpoint, PTR
   |  chkeob  2, >6
   |  movzx   edx, byte [PTR]
@@ -721,8 +724,8 @@ static void jitdispatch(jitcompiler *jc,
   |  shr     edx, 3
   |  and     cl, 7
 
-  // See comment attached to upb_pbdecodermethod.dispatch for layout of the
-  // dispatch table.
+  /* See comment attached to upb_pbdecodermethod.dispatch for layout of the
+   * dispatch table. */
   |2:
   |  cmp     edx, dispatch->array_size
   if (has_hash_entries) {
@@ -794,16 +797,17 @@ static void jitdispatch(jitcompiler *jc,
 
 static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs,
                    const upb_pbdecodermethod *method) {
-  // Internally we parse unknown fields; if this runs us into DELIMEND we jump
-  // to the corresponding DELIMEND target (either msg end or repeated field
-  // end), which we find from the OP_CHECKDELIM which must have necessarily
-  // preceded us.
+  /* Internally we parse unknown fields; if this runs us into DELIMEND we jump
+   * to the corresponding DELIMEND target (either msg end or repeated field
+   * end), which we find from the OP_CHECKDELIM which must have necessarily
+   * preceded us. */
   uint32_t last_instruction = *(jc->pc - 2);
   int last_arg = (int32_t)last_instruction >> 8;
-  assert((last_instruction & 0xff) == OP_CHECKDELIM);
   uint32_t *delimend = (jc->pc - 1) + last_arg;
   const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
 
+  assert((last_instruction & 0xff) == OP_CHECKDELIM);
+
   if (getop(*(jc->pc - 1)) == OP_TAGN) {
     jc->pc += ptr_words;
   }
@@ -861,7 +865,7 @@ static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs,
   |5:
 }
 
-// Compile the bytecode to x64.
+/* Compile the bytecode to x64. */
 static void jitbytecode(jitcompiler *jc) {
   upb_pbdecodermethod *method = NULL;
   const upb_handlers *h = NULL;
@@ -872,13 +876,13 @@ static void jitbytecode(jitcompiler *jc) {
     int32_t longofs = arg;
 
     if (op != OP_SETDISPATCH) {
-      // Skipped for SETDISPATCH because it defines its own asmlabel for the
-      // dispatch code it emits.
+      /* Skipped for SETDISPATCH because it defines its own asmlabel for the
+       * dispatch code it emits. */
       asmlabel(jc, "0x%lx.%s", pcofs(jc), upb_pbdecoder_getopname(op));
 
-      // Skipped for SETDISPATCH because it should point at the function
-      // prologue, not the dispatch function that is emitted first.
-      // TODO: optimize this to only define pclabels that are actually used.
+      /* Skipped for SETDISPATCH because it should point at the function
+       * prologue, not the dispatch function that is emitted first.
+       * TODO: optimize this to only define pclabels that are actually used. */
       |=>define_jmptarget(jc, jc->pc):
     }
 
@@ -888,7 +892,7 @@ static void jitbytecode(jitcompiler *jc) {
     case OP_STARTMSG: {
       upb_func *startmsg = gethandler(h, UPB_STARTMSG_SELECTOR);
       if (startmsg) {
-        // bool startmsg(void *closure, const void *hd)
+        /* bool startmsg(void *closure, const void *hd) */
         |1:
         |  mov   ARG1_64, CLOSURE
         |  load_handler_data h, UPB_STARTMSG_SELECTOR
@@ -909,7 +913,7 @@ static void jitbytecode(jitcompiler *jc) {
       upb_func *endmsg = gethandler(h, UPB_ENDMSG_SELECTOR);
       |9:
       if (endmsg) {
-        // bool endmsg(void *closure, const void *hd, upb_status *status)
+        /* bool endmsg(void *closure, const void *hd, upb_status *status) */
         |  mov   ARG1_64, CLOSURE
         |  load_handler_data h, UPB_ENDMSG_SELECTOR
         |  mov   ARG3_64, DECODER->status
@@ -919,27 +923,28 @@ static void jitbytecode(jitcompiler *jc) {
     }
     case OP_SETDISPATCH: {
       uint32_t *op_pc = jc->pc - 1;
-
-      // Load info for new method.
+      const char *msgname;
       upb_inttable *dispatch;
+
+      /* Load info for new method. */
       memcpy(&dispatch, jc->pc, sizeof(void*));
       jc->pc += sizeof(void*) / sizeof(uint32_t);
-      // The OP_SETDISPATCH bytecode contains a pointer that is
-      // &method->dispatch; we want to go backwards and recover method.
+      /* The OP_SETDISPATCH bytecode contains a pointer that is
+       * &method->dispatch; we want to go backwards and recover method. */
       method =
           (void*)((char*)dispatch - offsetof(upb_pbdecodermethod, dispatch));
-      // May be NULL, in which case no handlers for this message will be found.
-      // OPT: we should do better by completely skipping the message in this
-      // case instead of parsing it field by field.  We should also do the skip
-      // in the containing message's code.
+      /* May be NULL, in which case no handlers for this message will be found.
+       * OPT: we should do better by completely skipping the message in this
+       * case instead of parsing it field by field.  We should also do the skip
+       * in the containing message's code. */
       h = method->dest_handlers_;
-      const char *msgname = upb_msgdef_fullname(upb_handlers_msgdef(h));
+      msgname = upb_msgdef_fullname(upb_handlers_msgdef(h));
 
-      // Emit dispatch code for new method.
+      /* Emit dispatch code for new method. */
       asmlabel(jc, "0x%lx.dispatch.%s", pcofs(jc), msgname);
       jitdispatch(jc, method);
 
-      // Emit function prologue for new method.
+      /* Emit function prologue for new method. */
       asmlabel(jc, "0x%lx.parse.%s", pcofs(jc), msgname);
       |=>define_jmptarget(jc, op_pc):
       |=>define_jmptarget(jc, method):
@@ -967,9 +972,9 @@ static void jitbytecode(jitcompiler *jc) {
     case OP_STARTSTR: {
       upb_func *start = gethandler(h, arg);
       if (start) {
-        // void *startseq(void *closure, const void *hd)
-        // void *startsubmsg(void *closure, const void *hd)
-        // void *startstr(void *closure, const void *hd, size_t size_hint)
+        /* void *startseq(void *closure, const void *hd)
+         * void *startsubmsg(void *closure, const void *hd)
+         * void *startstr(void *closure, const void *hd, size_t size_hint) */
         |1:
         |  mov   ARG1_64, CLOSURE
         |  load_handler_data h, arg
@@ -987,7 +992,7 @@ static void jitbytecode(jitcompiler *jc) {
         }
         |  mov   CLOSURE, rax
       } else {
-        // TODO: nop is only required because of asmlabel().
+        /* TODO: nop is only required because of asmlabel(). */
         |  nop
       }
       break;
@@ -997,9 +1002,9 @@ static void jitbytecode(jitcompiler *jc) {
     case OP_ENDSTR: {
       upb_func *end = gethandler(h, arg);
       if (end) {
-        // bool endseq(void *closure, const void *hd)
-        // bool endsubmsg(void *closure, const void *hd)
-        // bool endstr(void *closure, const void *hd)
+        /* bool endseq(void *closure, const void *hd)
+         * bool endsubmsg(void *closure, const void *hd)
+         * bool endstr(void *closure, const void *hd) */
         |1:
         |  mov   ARG1_64, CLOSURE
         |  load_handler_data h, arg
@@ -1012,7 +1017,7 @@ static void jitbytecode(jitcompiler *jc) {
           |2:
         }
       } else {
-        // TODO: nop is only required because of asmlabel().
+        /* TODO: nop is only required because of asmlabel(). */
         |  nop
       }
       break;
@@ -1028,7 +1033,8 @@ static void jitbytecode(jitcompiler *jc) {
       |  jmp   <1
       |2:
       if (str) {
-        // size_t str(void *closure, const void *hd, const char *str, size_t n)
+        /* size_t str(void *closure, const void *hd, const char *str,
+         *            size_t n) */
         |  mov   ARG1_64, CLOSURE
         |  load_handler_data h, arg
         |  mov   ARG3_64, PTR
@@ -1072,7 +1078,7 @@ static void jitbytecode(jitcompiler *jc) {
       |  mov   CLOSURE, FRAME->sink.closure
       break;
     case OP_SETDELIM:
-      // OPT: experiment with testing vs old offset to optimize away.
+      /* OPT: experiment with testing vs old offset to optimize away. */
       |  mov   DATAEND, DECODER->end
       |  add   DELIMEND, FRAME->end_ofs
       |  cmp   DELIMEND, DECODER->buf
diff --git a/upb/pb/compile_decoder_x64.h b/upb/pb/compile_decoder_x64.h
index bebf7ea..525f143 100644
--- a/upb/pb/compile_decoder_x64.h
+++ b/upb/pb/compile_decoder_x64.h
@@ -10,17 +10,17 @@
 #endif
 
 # 1 "upb/pb/compile_decoder_x64.dasc"
-//|//
-//|// upb - a minimalist implementation of protocol buffers.
-//|//
-//|// Copyright (c) 2011-2013 Google Inc.  See LICENSE for details.
-//|// Author: Josh Haberman <jhaberman@gmail.com>
-//|//
-//|// JIT compiler for upb_pbdecoder on x86-64.  Generates machine code from the
-//|// bytecode generated in compile_decoder.c.
-//|
-//|.arch x64
-//|.actionlist upb_jit_actionlist
+/*|// */
+/*|// upb - a minimalist implementation of protocol buffers. */
+/*|// */
+/*|// Copyright (c) 2011-2013 Google Inc.  See LICENSE for details. */
+/*|// Author: Josh Haberman <jhaberman@gmail.com> */
+/*|// */
+/*|// JIT compiler for upb_pbdecoder on x86-64.  Generates machine code from the */
+/*|// bytecode generated in compile_decoder.c. */
+/*| */
+/*|.arch x64 */
+/*|.actionlist upb_jit_actionlist */
 static const unsigned char upb_jit_actionlist[2420] = {
   249,255,248,10,248,1,85,65,87,65,86,65,85,65,84,83,72,137,252,243,73,137,
   252,255,72,184,237,237,65,84,73,137,228,72,129,228,239,252,255,208,76,137,
@@ -142,7 +142,7 @@ static const unsigned char upb_jit_actionlist[2420] = {
 };
 
 # 12 "upb/pb/compile_decoder_x64.dasc"
-//|.globals UPB_JIT_GLOBAL_
+/*|.globals UPB_JIT_GLOBAL_ */
 enum {
   UPB_JIT_GLOBAL_enterjit,
   UPB_JIT_GLOBAL_exitjit,
@@ -166,7 +166,7 @@ enum {
   UPB_JIT_GLOBAL__MAX
 };
 # 13 "upb/pb/compile_decoder_x64.dasc"
-//|.globalnames upb_jit_globalnames
+/*|.globalnames upb_jit_globalnames */
 static const char *const upb_jit_globalnames[] = {
   "enterjit",
   "exitjit",
@@ -190,135 +190,135 @@ static const char *const upb_jit_globalnames[] = {
   (const char *)0
 };
 # 14 "upb/pb/compile_decoder_x64.dasc"
-//|
-//|// Calling conventions.  Note -- this will need to be changed for
-//|// Windows, which uses a different calling convention!
-//|.define ARG1_64,   rdi
-//|.define ARG2_8,    r6b  // DynASM's equivalent to "sil" -- low byte of esi.
-//|.define ARG2_32,   esi
-//|.define ARG2_64,   rsi
-//|.define ARG3_8,    dl
-//|.define ARG3_32,   edx
-//|.define ARG3_64,   rdx
-//|.define ARG4_64,   rcx
-//|.define ARG5_64,   r8
-//|.define XMMARG1,   xmm0
-//|
-//|// Register allocation / type map.
-//|// ALL of the code in this file uses these register allocations.
-//|// When we "call" within this file, we do not use regular calling
-//|// conventions, but of course when calling to user callbacks we must.
-//|.define PTR,       rbx                       // DECODER->ptr      (unsynced)
-//|.define DATAEND,   r12                       // DECODER->data_end (unsynced)
-//|.define CLOSURE,   r13                       // FRAME->closure    (unsynced)
-//|.type   FRAME,     upb_pbdecoder_frame, r14  // DECODER->top      (unsynced)
+/*| */
+/*|// Calling conventions.  Note -- this will need to be changed for */
+/*|// Windows, which uses a different calling convention! */
+/*|.define ARG1_64,   rdi */
+/*|.define ARG2_8,    r6b  // DynASM's equivalent to "sil" -- low byte of esi. */
+/*|.define ARG2_32,   esi */
+/*|.define ARG2_64,   rsi */
+/*|.define ARG3_8,    dl */
+/*|.define ARG3_32,   edx */
+/*|.define ARG3_64,   rdx */
+/*|.define ARG4_64,   rcx */
+/*|.define ARG5_64,   r8 */
+/*|.define XMMARG1,   xmm0 */
+/*| */
+/*|// Register allocation / type map. */
+/*|// ALL of the code in this file uses these register allocations. */
+/*|// When we "call" within this file, we do not use regular calling */
+/*|// conventions, but of course when calling to user callbacks we must. */
+/*|.define PTR,       rbx                       // DECODER->ptr      (unsynced) */
+/*|.define DATAEND,   r12                       // DECODER->data_end (unsynced) */
+/*|.define CLOSURE,   r13                       // FRAME->closure    (unsynced) */
+/*|.type   FRAME,     upb_pbdecoder_frame, r14  // DECODER->top      (unsynced) */
 #define Dt1(_V) (int)(ptrdiff_t)&(((upb_pbdecoder_frame *)0)_V)
 # 36 "upb/pb/compile_decoder_x64.dasc"
-//|.type   DECODER,   upb_pbdecoder, r15        // DECODER           (immutable)
+/*|.type   DECODER,   upb_pbdecoder, r15        // DECODER           (immutable) */
 #define Dt2(_V) (int)(ptrdiff_t)&(((upb_pbdecoder *)0)_V)
 # 37 "upb/pb/compile_decoder_x64.dasc"
-//|.define DELIMEND,  rbp
-//|
-//| // Spills unsynced registers back to memory.
-//|.macro commit_regs
-//|  mov  DECODER->top, FRAME
-//|  mov  DECODER->ptr, PTR
-//|  mov  DECODER->data_end, DATAEND
-//|  // We don't guarantee that delim_end is NULL when out of range like the
-//|  // interpreter does.
-//|  mov  DECODER->delim_end, DELIMEND
-//|  sub  DELIMEND, DECODER->buf
-//|  add  DELIMEND, DECODER->bufstart_ofs
-//|  mov  FRAME->end_ofs, DELIMEND
-//|  mov  FRAME->sink.closure, CLOSURE
-//|.endmacro
-//|
-//| // Loads unsynced registers from memory back into registers.
-//|.macro load_regs
-//|  mov  FRAME, DECODER->top
-//|  mov  PTR, DECODER->ptr
-//|  mov  DATAEND, DECODER->data_end
-//|  mov  CLOSURE, FRAME->sink.closure
-//|  mov  DELIMEND, FRAME->end_ofs
-//|  sub  DELIMEND, DECODER->bufstart_ofs
-//|  add  DELIMEND, DECODER->buf
-//|.endmacro
-//|
-//| // Calls an external C function at address "addr".
-//|.macro callp, addr
-//|    mov64  rax, (uintptr_t)addr
-//|
-//|    // Stack must be 16-byte aligned (x86-64 ABI requires this).
-//|    //
-//|    // OPT: possibly remove this by statically ensuring correct alignment.
-//|    //
-//|    // OPT: use "call rel32" where possible.
-//|    push   r12
-//|    mov    r12, rsp
-//|    and    rsp, 0xfffffffffffffff0UL   // Align stack.
-//|    call   rax
-//|    mov    rsp, r12
-//|    pop    r12
-//|.endmacro
-//|
-//|.macro ld64, val
-//|| {
-//|| uintptr_t v = (uintptr_t)val;
-//|| if (v > 0xffffffff) {
-//|    mov64  ARG2_64, v
-//|| } else if (v) {
-//|    mov    ARG2_32, v
-//|| } else {
-//|    xor    ARG2_32, ARG2_32
-//|| }
-//|| }
-//|.endmacro
-//|
-//|.macro load_handler_data, h, arg
-//|  ld64   upb_handlers_gethandlerdata(h, arg)
-//|.endmacro
-//|
-//|.macro chkeob, bytes, target
-//|| if (bytes == 1) {
-//|  cmp    PTR, DATAEND
-//|  je     target
-//|| } else {
-//|  mov    rcx, DATAEND
-//|  sub    rcx, PTR
-//|  cmp    rcx, bytes
-//|  jb     target
-//|| }
-//|.endmacro
-//|
-//|.macro chkneob, bytes, target
-//|| if (bytes == 1) {
-//|  cmp    PTR, DATAEND
-//|  jne    target
-//|| } else {
-//|  mov    rcx, DATAEND
-//|  sub    rcx, PTR
-//|  cmp    rcx, bytes
-//|  jae    target
-//|| }
-//|.endmacro
+/*|.define DELIMEND,  rbp */
+/*| */
+/*| // Spills unsynced registers back to memory. */
+/*|.macro commit_regs */
+/*|  mov  DECODER->top, FRAME */
+/*|  mov  DECODER->ptr, PTR */
+/*|  mov  DECODER->data_end, DATAEND */
+/*|  // We don't guarantee that delim_end is NULL when out of range like the */
+/*|  // interpreter does. */
+/*|  mov  DECODER->delim_end, DELIMEND */
+/*|  sub  DELIMEND, DECODER->buf */
+/*|  add  DELIMEND, DECODER->bufstart_ofs */
+/*|  mov  FRAME->end_ofs, DELIMEND */
+/*|  mov  FRAME->sink.closure, CLOSURE */
+/*|.endmacro */
+/*| */
+/*| // Loads unsynced registers from memory back into registers. */
+/*|.macro load_regs */
+/*|  mov  FRAME, DECODER->top */
+/*|  mov  PTR, DECODER->ptr */
+/*|  mov  DATAEND, DECODER->data_end */
+/*|  mov  CLOSURE, FRAME->sink.closure */
+/*|  mov  DELIMEND, FRAME->end_ofs */
+/*|  sub  DELIMEND, DECODER->bufstart_ofs */
+/*|  add  DELIMEND, DECODER->buf */
+/*|.endmacro */
+/*| */
+/*| // Calls an external C function at address "addr". */
+/*|.macro callp, addr */
+/*|    mov64  rax, (uintptr_t)addr */
+/*| */
+/*|    // Stack must be 16-byte aligned (x86-64 ABI requires this). */
+/*|    // */
+/*|    // OPT: possibly remove this by statically ensuring correct alignment. */
+/*|    // */
+/*|    // OPT: use "call rel32" where possible. */
+/*|    push   r12 */
+/*|    mov    r12, rsp */
+/*|    and    rsp, 0xfffffffffffffff0UL   // Align stack. */
+/*|    call   rax */
+/*|    mov    rsp, r12 */
+/*|    pop    r12 */
+/*|.endmacro */
+/*| */
+/*|.macro ld64, val */
+/*|| { */
+/*|| uintptr_t v = (uintptr_t)val; */
+/*|| if (v > 0xffffffff) { */
+/*|    mov64  ARG2_64, v */
+/*|| } else if (v) { */
+/*|    mov    ARG2_32, v */
+/*|| } else { */
+/*|    xor    ARG2_32, ARG2_32 */
+/*|| } */
+/*|| } */
+/*|.endmacro */
+/*| */
+/*|.macro load_handler_data, h, arg */
+/*|  ld64   upb_handlers_gethandlerdata(h, arg) */
+/*|.endmacro */
+/*| */
+/*|.macro chkeob, bytes, target */
+/*|| if (bytes == 1) { */
+/*|  cmp    PTR, DATAEND */
+/*|  je     target */
+/*|| } else { */
+/*|  mov    rcx, DATAEND */
+/*|  sub    rcx, PTR */
+/*|  cmp    rcx, bytes */
+/*|  jb     target */
+/*|| } */
+/*|.endmacro */
+/*| */
+/*|.macro chkneob, bytes, target */
+/*|| if (bytes == 1) { */
+/*|  cmp    PTR, DATAEND */
+/*|  jne    target */
+/*|| } else { */
+/*|  mov    rcx, DATAEND */
+/*|  sub    rcx, PTR */
+/*|  cmp    rcx, bytes */
+/*|  jae    target */
+/*|| } */
+/*|.endmacro */
 
-//|.macro sethas, reg, hasbit
-//|| if (hasbit >= 0) {
-//|    or   byte [reg + ((uint32_t)hasbit / 8)], (1 << ((uint32_t)hasbit % 8))
-//|| }
-//|.endmacro
-//|
-//| // Decodes 32-bit varint into rdx, inlining 1 byte.
-//|.macro dv32
-//|  chkeob  1, >7
-//|  movzx   edx, byte [PTR]
-//|  test    dl, dl
-//|  jns     >8
-//|7:
-//|  call    ->decodev32_fallback
-//|8:
-//|  add     PTR, 1
-//|.endmacro
+/*|.macro sethas, reg, hasbit */
+/*|| if (hasbit >= 0) { */
+/*|    or   byte [reg + ((uint32_t)hasbit / 8)], (1 << ((uint32_t)hasbit % 8)) */
+/*|| } */
+/*|.endmacro */
+/*| */
+/*| // Decodes 32-bit varint into rdx, inlining 1 byte. */
+/*|.macro dv32 */
+/*|  chkeob  1, >7 */
+/*|  movzx   edx, byte [PTR] */
+/*|  test    dl, dl */
+/*|  jns     >8 */
+/*|7: */
+/*|  call    ->decodev32_fallback */
+/*|8: */
+/*|  add     PTR, 1 */
+/*|.endmacro */
 
 #define DECODE_EOF -3
 
@@ -326,13 +326,13 @@ static upb_func *gethandler(const upb_handlers *h, upb_selector_t sel) {
   return h ? upb_handlers_gethandler(h, sel) : NULL;
 }
 
-// Defines an "assembly label" for the current code generation offset.
-// This label exists *purely* for debugging purposes: it is emitted into
-// the .so, and printed as part of JIT debugging output when UPB_JIT_LOAD_SO is
-// defined.
-//
-// We would define this in the .c file except that it conditionally defines a
-// pclabel.
+/* Defines an "assembly label" for the current code generation offset.
+ * This label exists *purely* for debugging purposes: it is emitted into
+ * the .so, and printed as part of JIT debugging output when UPB_JIT_LOAD_SO is
+ * defined.
+ *
+ * We would define this in the .c file except that it conditionally defines a
+ * pclabel. */
 static void asmlabel(jitcompiler *jc, const char *fmt, ...) {
 #ifndef NDEBUG
   int ofs = jc->dynasm->section->ofs;
@@ -350,180 +350,182 @@ static void asmlabel(jitcompiler *jc, const char *fmt, ...) {
   va_end(args);
 
   int pclabel = alloc_pclabel(jc);
-  // Normally we would prefer to allocate this inline with the codegen,
-  // ie.
-  //   |=>asmlabel(...)
-  // But since we do this conditionally, only when UPB_JIT_LOAD_SO is defined,
-  // we do it here instead.
-  //|=>pclabel:
+  /* Normally we would prefer to allocate this inline with the codegen,
+   * ie.
+   *   |=>asmlabel(...)
+   * But since we do this conditionally, only when UPB_JIT_LOAD_SO is defined,
+   * we do it here instead. */
+  /*|=>pclabel: */
   dasm_put(Dst, 0, pclabel);
 # 176 "upb/pb/compile_decoder_x64.dasc"
   upb_inttable_insert(&jc->asmlabels, pclabel, upb_value_ptr(str));
 #endif
 }
 
-// Should only be called when the associated handler is known to exist.
+/* Should only be called when the associated handler is known to exist. */
 static bool alwaysok(const upb_handlers *h, upb_selector_t sel) {
   upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
   bool ok = upb_handlers_getattr(h, sel, &attr);
+  bool ret;
+
   UPB_ASSERT_VAR(ok, ok);
-  bool ret = upb_handlerattr_alwaysok(&attr);
+  ret = upb_handlerattr_alwaysok(&attr);
   upb_handlerattr_uninit(&attr);
   return ret;
 }
 
-// Emit static assembly routines; code that does not vary based on the message
-// schema.  Since it's not input-dependent, we only need one single copy of it.
-// For the moment we generate a single copy per generated handlers.  Eventually
-// we should generate this code at compile time and link it into the binary so
-// we have one copy total.  To do that we'll want to be sure that it is within
-// 2GB of our JIT code, so that branches between the two are near (rel32).
-//
-// We'd put this assembly in a .s file directly, but DynASM's ability to
-// calculate structure offsets automatically is too useful to pass up (it's way
-// more convenient to write DECODER->sink than [rbx + 0x96], especially since
-// the latter would have to be changed whenever the structure is updated).
+/* Emit static assembly routines; code that does not vary based on the message
+ * schema.  Since it's not input-dependent, we only need one single copy of it.
+ * For the moment we generate a single copy per generated handlers.  Eventually
+ * we should generate this code at compile time and link it into the binary so
+ * we have one copy total.  To do that we'll want to be sure that it is within
+ * 2GB of our JIT code, so that branches between the two are near (rel32).
+ *
+ * We'd put this assembly in a .s file directly, but DynASM's ability to
+ * calculate structure offsets automatically is too useful to pass up (it's way
+ * more convenient to write DECODER->sink than [rbx + 0x96], especially since
+ * the latter would have to be changed whenever the structure is updated). */
 static void emit_static_asm(jitcompiler *jc) {
-  //| // Trampolines for entering/exiting the JIT.  These are a bit tricky to
-  //| // support full resuming; when we suspend we copy the JIT's portion of
-  //| // the call stack into the upb_pbdecoder and restore it when we resume.
+  /*| // Trampolines for entering/exiting the JIT.  These are a bit tricky to */
+  /*| // support full resuming; when we suspend we copy the JIT's portion of */
+  /*| // the call stack into the upb_pbdecoder and restore it when we resume. */
   asmlabel(jc, "enterjit");
-  //|->enterjit:
-  //|1:
-  //|  push  rbp
-  //|  push  r15
-  //|  push  r14
-  //|  push  r13
-  //|  push  r12
-  //|  push  rbx
-  //|
-  //|  mov   rbx, ARG2_64  // Preserve JIT method.
-  //|
-  //|  mov   DECODER, rdi
-  //|  callp upb_pbdecoder_resume  // Same args as us; reuse regs.
-  //|  test  eax, eax
-  //|  jns   >1
-  //|  mov   DECODER->saved_rsp, rsp
-  //|  mov   rax, rbx
-  //|  load_regs
-  //|
-  //|  // Test whether we have a saved stack to resume.
-  //|  mov   ARG3_64, DECODER->call_len
-  //|  test  ARG3_64, ARG3_64
-  //|  jnz   >2
-  //|
-  //|  call  rax
-  //|
-  //|  mov   rax, DECODER->size_param
-  //|  mov   qword DECODER->call_len, 0
-  //|1:
-  //|  pop   rbx
+  /*|->enterjit: */
+  /*|1: */
+  /*|  push  rbp */
+  /*|  push  r15 */
+  /*|  push  r14 */
+  /*|  push  r13 */
+  /*|  push  r12 */
+  /*|  push  rbx */
+  /*| */
+  /*|  mov   rbx, ARG2_64  // Preserve JIT method. */
+  /*| */
+  /*|  mov   DECODER, rdi */
+  /*|  callp upb_pbdecoder_resume  // Same args as us; reuse regs. */
+  /*|  test  eax, eax */
+  /*|  jns   >1 */
+  /*|  mov   DECODER->saved_rsp, rsp */
+  /*|  mov   rax, rbx */
+  /*|  load_regs */
+  /*| */
+  /*|  // Test whether we have a saved stack to resume. */
+  /*|  mov   ARG3_64, DECODER->call_len */
+  /*|  test  ARG3_64, ARG3_64 */
+  /*|  jnz   >2 */
+  /*| */
+  /*|  call  rax */
+  /*| */
+  /*|  mov   rax, DECODER->size_param */
+  /*|  mov   qword DECODER->call_len, 0 */
+  /*|1: */
+  /*|  pop   rbx */
   dasm_put(Dst, 2, (unsigned int)((uintptr_t)upb_pbdecoder_resume), (unsigned int)(((uintptr_t)upb_pbdecoder_resume)>>32), 0xfffffffffffffff0UL, Dt2(->saved_rsp), Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt1(->sink.closure), Dt1(->end_ofs), Dt2(->bufstart_ofs), Dt2(->buf), Dt2(->call_len), Dt2(->size_param), Dt2(->call_len));
-# 236 "upb/pb/compile_decoder_x64.dasc"
-  //|  pop   r12
-  //|  pop   r13
-  //|  pop   r14
-  //|  pop   r15
-  //|  pop   rbp
-  //|  ret
-  //|
-  //|2:
-  //|  // Resume decoder.
-  //|  mov   ARG2_64, DECODER->callstack
-  //|  sub   rsp, ARG3_64
-  //|  mov   ARG1_64, rsp
-  //|  callp memcpy  // Restore stack.
-  //|  ret  // Return to resumed function (not ->enterjit caller).
-  //|
-  //| // Other code can call this to suspend the JIT.
-  //| // To the calling code, it will appear that the function returns when
-  //| // the JIT resumes, and more buffer space will be available.
-  //| // Args: eax=the value that decode() should return.
+# 238 "upb/pb/compile_decoder_x64.dasc"
+  /*|  pop   r12 */
+  /*|  pop   r13 */
+  /*|  pop   r14 */
+  /*|  pop   r15 */
+  /*|  pop   rbp */
+  /*|  ret */
+  /*| */
+  /*|2: */
+  /*|  // Resume decoder. */
+  /*|  mov   ARG2_64, DECODER->callstack */
+  /*|  sub   rsp, ARG3_64 */
+  /*|  mov   ARG1_64, rsp */
+  /*|  callp memcpy  // Restore stack. */
+  /*|  ret  // Return to resumed function (not ->enterjit caller). */
+  /*| */
+  /*| // Other code can call this to suspend the JIT. */
+  /*| // To the calling code, it will appear that the function returns when */
+  /*| // the JIT resumes, and more buffer space will be available. */
+  /*| // Args: eax=the value that decode() should return. */
   dasm_put(Dst, 115, Dt2(->callstack), (unsigned int)((uintptr_t)memcpy), (unsigned int)(((uintptr_t)memcpy)>>32), 0xfffffffffffffff0UL);
-# 255 "upb/pb/compile_decoder_x64.dasc"
+# 257 "upb/pb/compile_decoder_x64.dasc"
   asmlabel(jc, "exitjit");
-  //|->exitjit:
-  //|  // Save the stack into DECODER->callstack.
-  //|  mov   ARG1_64, DECODER->callstack
-  //|  mov   ARG2_64, rsp
-  //|  mov   ARG3_64, DECODER->saved_rsp
-  //|  sub   ARG3_64, rsp
-  //|  mov   DECODER->call_len, ARG3_64  // Preserve len for next resume.
-  //|  mov   ebx, eax  // Preserve return value across memcpy.
-  //|  callp memcpy    // Copy stack into decoder.
-  //|  mov   eax, ebx  // This will be our return value.
-  //|
-  //|  // Must NOT do this before the memcpy(), otherwise memcpy() will
-  //|  // clobber the stack we are trying to save!
-  //|  mov   rsp, DECODER->saved_rsp
-  //|  pop   rbx
-  //|  pop   r12
-  //|  pop   r13
-  //|  pop   r14
-  //|  pop   r15
-  //|  pop   rbp
-  //|  ret
-  //|
-  //| // Like suspend() in the C decoder, except that the function appears
-  //| // (from the caller's perspective) not to return until the decoder is
-  //| // resumed.
+  /*|->exitjit: */
+  /*|  // Save the stack into DECODER->callstack. */
+  /*|  mov   ARG1_64, DECODER->callstack */
+  /*|  mov   ARG2_64, rsp */
+  /*|  mov   ARG3_64, DECODER->saved_rsp */
+  /*|  sub   ARG3_64, rsp */
+  /*|  mov   DECODER->call_len, ARG3_64  // Preserve len for next resume. */
+  /*|  mov   ebx, eax  // Preserve return value across memcpy. */
+  /*|  callp memcpy    // Copy stack into decoder. */
+  /*|  mov   eax, ebx  // This will be our return value. */
+  /*| */
+  /*|  // Must NOT do this before the memcpy(), otherwise memcpy() will */
+  /*|  // clobber the stack we are trying to save! */
+  /*|  mov   rsp, DECODER->saved_rsp */
+  /*|  pop   rbx */
+  /*|  pop   r12 */
+  /*|  pop   r13 */
+  /*|  pop   r14 */
+  /*|  pop   r15 */
+  /*|  pop   rbp */
+  /*|  ret */
+  /*| */
+  /*| // Like suspend() in the C decoder, except that the function appears */
+  /*| // (from the caller's perspective) not to return until the decoder is */
+  /*| // resumed. */
   dasm_put(Dst, 161, Dt2(->callstack), Dt2(->saved_rsp), Dt2(->call_len), (unsigned int)((uintptr_t)memcpy), (unsigned int)(((uintptr_t)memcpy)>>32), 0xfffffffffffffff0UL, Dt2(->saved_rsp));
-# 281 "upb/pb/compile_decoder_x64.dasc"
+# 283 "upb/pb/compile_decoder_x64.dasc"
   asmlabel(jc, "suspend");
-  //|->suspend:
-  //|  cmp   DECODER->ptr, PTR
-  //|  je    >1
-  //|  mov   DECODER->checkpoint, PTR
-  //|1:
-  //|  commit_regs
-  //|  mov   rdi, DECODER
-  //|  callp upb_pbdecoder_suspend
-  //|  jmp   ->exitjit
-  //|
+  /*|->suspend: */
+  /*|  cmp   DECODER->ptr, PTR */
+  /*|  je    >1 */
+  /*|  mov   DECODER->checkpoint, PTR */
+  /*|1: */
+  /*|  commit_regs */
+  /*|  mov   rdi, DECODER */
+  /*|  callp upb_pbdecoder_suspend */
+  /*|  jmp   ->exitjit */
+  /*| */
   dasm_put(Dst, 222, Dt2(->ptr), Dt2(->checkpoint), Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt2(->delim_end), Dt2(->buf), Dt2(->bufstart_ofs), Dt1(->end_ofs), Dt1(->sink.closure), (unsigned int)((uintptr_t)upb_pbdecoder_suspend), (unsigned int)(((uintptr_t)upb_pbdecoder_suspend)>>32), 0xfffffffffffffff0UL);
-# 292 "upb/pb/compile_decoder_x64.dasc"
+# 294 "upb/pb/compile_decoder_x64.dasc"
   asmlabel(jc, "pushlendelim");
-  //|->pushlendelim:
-  //|1:
-  //|  mov   FRAME->sink.closure, CLOSURE
-  //|  mov   DECODER->checkpoint, PTR
-  //|  dv32
+  /*|->pushlendelim: */
+  /*|1: */
+  /*|  mov   FRAME->sink.closure, CLOSURE */
+  /*|  mov   DECODER->checkpoint, PTR */
+  /*|  dv32 */
   dasm_put(Dst, 300, Dt1(->sink.closure), Dt2(->checkpoint));
    if (1 == 1) {
   dasm_put(Dst, 313);
    } else {
   dasm_put(Dst, 321);
    }
-# 298 "upb/pb/compile_decoder_x64.dasc"
-  //|  mov   rcx, DELIMEND
-  //|  sub   rcx, PTR
-  //|  sub   rcx, rdx
-  //|  jb    ->err  // Len is greater than enclosing message.
-  //|  mov   FRAME->end_ofs, rcx
-  //|  cmp   FRAME, DECODER->limit
-  //|  je    >3   // Stack overflow
-  //|  add   FRAME, sizeof(upb_pbdecoder_frame)
-  //|  mov   DELIMEND, PTR
-  //|  add   DELIMEND, rdx
-  //|  mov   dword FRAME->groupnum, 0
-  //|  test  rcx, rcx
-  //|  jz    >2
-  //|  mov   DATAEND, DECODER->end
-  //|  cmp   PTR, DELIMEND
-  //|  ja    >2
-  //|  cmp   DELIMEND, DATAEND
-  //|  ja    >2
-  //|  mov   DATAEND, DELIMEND  // If DELIMEND >= PTR && DELIMEND < DATAEND
+# 300 "upb/pb/compile_decoder_x64.dasc"
+  /*|  mov   rcx, DELIMEND */
+  /*|  sub   rcx, PTR */
+  /*|  sub   rcx, rdx */
+  /*|  jb    ->err  // Len is greater than enclosing message. */
+  /*|  mov   FRAME->end_ofs, rcx */
+  /*|  cmp   FRAME, DECODER->limit */
+  /*|  je    >3   // Stack overflow */
+  /*|  add   FRAME, sizeof(upb_pbdecoder_frame) */
+  /*|  mov   DELIMEND, PTR */
+  /*|  add   DELIMEND, rdx */
+  /*|  mov   dword FRAME->groupnum, 0 */
+  /*|  test  rcx, rcx */
+  /*|  jz    >2 */
+  /*|  mov   DATAEND, DECODER->end */
+  /*|  cmp   PTR, DELIMEND */
+  /*|  ja    >2 */
+  /*|  cmp   DELIMEND, DATAEND */
+  /*|  ja    >2 */
+  /*|  mov   DATAEND, DELIMEND  // If DELIMEND >= PTR && DELIMEND < DATAEND */
   dasm_put(Dst, 337, Dt1(->end_ofs), Dt2(->limit), sizeof(upb_pbdecoder_frame), Dt1(->groupnum), Dt2(->end));
-# 317 "upb/pb/compile_decoder_x64.dasc"
-  //|2:
-  //|  ret
-  //|3:
-  //|  // Error -- call seterr.
-  //|  mov   PTR, DECODER->checkpoint  // Rollback to before the delim len.
-  //|  // Prepare seterr args.
-  //|  mov   ARG1_64, DECODER
-  //|  ld64  kPbDecoderStackOverflow
+# 319 "upb/pb/compile_decoder_x64.dasc"
+  /*|2: */
+  /*|  ret */
+  /*|3: */
+  /*|  // Error -- call seterr. */
+  /*|  mov   PTR, DECODER->checkpoint  // Rollback to before the delim len. */
+  /*|  // Prepare seterr args. */
+  /*|  mov   ARG1_64, DECODER */
+  /*|  ld64  kPbDecoderStackOverflow */
   dasm_put(Dst, 428, Dt2(->checkpoint));
    {
    uintptr_t v = (uintptr_t)kPbDecoderStackOverflow;
@@ -535,248 +537,248 @@ static void emit_static_asm(jitcompiler *jc) {
   dasm_put(Dst, 454);
    }
    }
-# 325 "upb/pb/compile_decoder_x64.dasc"
-  //|  callp upb_pbdecoder_seterr
-  //|  call  ->suspend
-  //|  jmp   <1
-  //|
-  //| // For getting a value that spans a buffer seam.  Falls back to C.
-  //|.macro getvalue_slow, func, bytes
-  //|  sub   rsp, 8           // Need stack space for func to write value to.
-  //|1:
-  //|  mov   qword [rsp], 0   // For parsing routines that only parse 32 bits.
-  //|  mov   ARG1_64, DECODER
-  //|  mov   ARG2_64, rsp
-  //|  mov   DECODER->checkpoint, PTR
-  //|  commit_regs
-  //|  callp func
-  //|  load_regs
-  //|  test  eax, eax
-  //|  jns   >2
-  //|  // Success; return parsed data (in rdx AND xmm0).
-  //|  mov   rdx, [rsp]
-  //|  movsd xmm0, qword [rsp]
-  //|  add   rsp, 8
-  //|  sub   PTR, bytes       // Bias our buffer pointer to rejoin the fast-path.
-  //|  mov   DECODER->ptr, PTR
-  //|  ret
-  //|2:
-  //|  call  ->exitjit   // Return eax from decode function.
-  //|  jmp   <1
-  //|.endmacro
-  //|
+# 327 "upb/pb/compile_decoder_x64.dasc"
+  /*|  callp upb_pbdecoder_seterr */
+  /*|  call  ->suspend */
+  /*|  jmp   <1 */
+  /*| */
+  /*| // For getting a value that spans a buffer seam.  Falls back to C. */
+  /*|.macro getvalue_slow, func, bytes */
+  /*|  sub   rsp, 8           // Need stack space for func to write value to. */
+  /*|1: */
+  /*|  mov   qword [rsp], 0   // For parsing routines that only parse 32 bits. */
+  /*|  mov   ARG1_64, DECODER */
+  /*|  mov   ARG2_64, rsp */
+  /*|  mov   DECODER->checkpoint, PTR */
+  /*|  commit_regs */
+  /*|  callp func */
+  /*|  load_regs */
+  /*|  test  eax, eax */
+  /*|  jns   >2 */
+  /*|  // Success; return parsed data (in rdx AND xmm0). */
+  /*|  mov   rdx, [rsp] */
+  /*|  movsd xmm0, qword [rsp] */
+  /*|  add   rsp, 8 */
+  /*|  sub   PTR, bytes       // Bias our buffer pointer to rejoin the fast-path. */
+  /*|  mov   DECODER->ptr, PTR */
+  /*|  ret */
+  /*|2: */
+  /*|  call  ->exitjit   // Return eax from decode function. */
+  /*|  jmp   <1 */
+  /*|.endmacro */
+  /*| */
   dasm_put(Dst, 458, (unsigned int)((uintptr_t)upb_pbdecoder_seterr), (unsigned int)(((uintptr_t)upb_pbdecoder_seterr)>>32), 0xfffffffffffffff0UL);
-# 354 "upb/pb/compile_decoder_x64.dasc"
+# 356 "upb/pb/compile_decoder_x64.dasc"
   asmlabel(jc, "parse_unknown");
-  //| // Args: edx=fieldnum, cl=wire type
-  //|->parse_unknown:
-  //|  // OPT: handle directly instead of kicking to C.
-  //|  // Check for ENDGROUP.
-  //|  mov     ARG1_64, DECODER
-  //|  mov     ARG2_32, edx
-  //|  movzx   ARG3_32, cl
-  //|  commit_regs
-  //|  callp   upb_pbdecoder_skipunknown
-  //|  load_regs
-  //|  cmp     eax, DECODE_ENDGROUP
-  //|  jne     >1
+  /*| // Args: edx=fieldnum, cl=wire type */
+  /*|->parse_unknown: */
+  /*|  // OPT: handle directly instead of kicking to C. */
+  /*|  // Check for ENDGROUP. */
+  /*|  mov     ARG1_64, DECODER */
+  /*|  mov     ARG2_32, edx */
+  /*|  movzx   ARG3_32, cl */
+  /*|  commit_regs */
+  /*|  callp   upb_pbdecoder_skipunknown */
+  /*|  load_regs */
+  /*|  cmp     eax, DECODE_ENDGROUP */
+  /*|  jne     >1 */
   dasm_put(Dst, 487, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt2(->delim_end), Dt2(->buf), Dt2(->bufstart_ofs), Dt1(->end_ofs), Dt1(->sink.closure), (unsigned int)((uintptr_t)upb_pbdecoder_skipunknown), (unsigned int)(((uintptr_t)upb_pbdecoder_skipunknown)>>32), 0xfffffffffffffff0UL, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt1(->sink.closure), Dt1(->end_ofs), Dt2(->bufstart_ofs), Dt2(->buf), DECODE_ENDGROUP);
-# 367 "upb/pb/compile_decoder_x64.dasc"
-  //|  ret     // Return eax=DECODE_ENDGROUP, not zero
-  //|1:
-  //|  cmp     eax, DECODE_OK
-  //|  je      >1
-  //|  call    ->exitjit  // Return eax from decode function.
-  //|1:
-  //|  xor     eax, eax
-  //|  ret
-  //|
-  //| // Fallback functions for parsing single values.  These are used when the
-  //| // buffer doesn't contain enough remaining data for the fast path.  Each
-  //| // primitive type (v32, v64, f32, f64) has two functions: decode & skip.
-  //| // Decode functions return their value in rsi/esi.
-  //| //
-  //| // These functions leave PTR = value_end - fast_path_bytes, so that we can
-  //| // re-join the fast path which will add fast_path_bytes after the callback
-  //| // completes.  We also set DECODER->ptr to this value which is a signal to
-  //| // ->suspend that DECODER->checkpoint is up to date.
+# 369 "upb/pb/compile_decoder_x64.dasc"
+  /*|  ret     // Return eax=DECODE_ENDGROUP, not zero */
+  /*|1: */
+  /*|  cmp     eax, DECODE_OK */
+  /*|  je      >1 */
+  /*|  call    ->exitjit  // Return eax from decode function. */
+  /*|1: */
+  /*|  xor     eax, eax */
+  /*|  ret */
+  /*| */
+  /*| // Fallback functions for parsing single values.  These are used when the */
+  /*| // buffer doesn't contain enough remaining data for the fast path.  Each */
+  /*| // primitive type (v32, v64, f32, f64) has two functions: decode & skip. */
+  /*| // Decode functions return their value in rsi/esi. */
+  /*| // */
+  /*| // These functions leave PTR = value_end - fast_path_bytes, so that we can */
+  /*| // re-join the fast path which will add fast_path_bytes after the callback */
+  /*| // completes.  We also set DECODER->ptr to this value which is a signal to */
+  /*| // ->suspend that DECODER->checkpoint is up to date. */
   dasm_put(Dst, 584, DECODE_OK);
-# 385 "upb/pb/compile_decoder_x64.dasc"
+# 387 "upb/pb/compile_decoder_x64.dasc"
   asmlabel(jc, "skip_decode_f32_fallback");
-  //|->skipf32_fallback:
-  //|->decodef32_fallback:
-  //|  getvalue_slow upb_pbdecoder_decode_f32, 4
+  /*|->skipf32_fallback: */
+  /*|->decodef32_fallback: */
+  /*|  getvalue_slow upb_pbdecoder_decode_f32, 4 */
   dasm_put(Dst, 608, Dt2(->checkpoint), Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt2(->delim_end), Dt2(->buf), Dt2(->bufstart_ofs), Dt1(->end_ofs), Dt1(->sink.closure), (unsigned int)((uintptr_t)upb_pbdecoder_decode_f32), (unsigned int)(((uintptr_t)upb_pbdecoder_decode_f32)>>32), 0xfffffffffffffff0UL, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt1(->sink.closure), Dt1(->end_ofs));
-# 389 "upb/pb/compile_decoder_x64.dasc"
-  //|
+# 391 "upb/pb/compile_decoder_x64.dasc"
+  /*| */
   dasm_put(Dst, 712, Dt2(->bufstart_ofs), Dt2(->buf), Dt2(->ptr));
-# 390 "upb/pb/compile_decoder_x64.dasc"
+# 392 "upb/pb/compile_decoder_x64.dasc"
   asmlabel(jc, "skip_decode_f64_fallback");
-  //|->skipf64_fallback:
-  //|->decodef64_fallback:
-  //|  getvalue_slow upb_pbdecoder_decode_f64, 8
+  /*|->skipf64_fallback: */
+  /*|->decodef64_fallback: */
+  /*|  getvalue_slow upb_pbdecoder_decode_f64, 8 */
   dasm_put(Dst, 760, Dt2(->checkpoint), Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt2(->delim_end), Dt2(->buf), Dt2(->bufstart_ofs), Dt1(->end_ofs), Dt1(->sink.closure), (unsigned int)((uintptr_t)upb_pbdecoder_decode_f64), (unsigned int)(((uintptr_t)upb_pbdecoder_decode_f64)>>32), 0xfffffffffffffff0UL, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt1(->sink.closure), Dt1(->end_ofs));
-# 394 "upb/pb/compile_decoder_x64.dasc"
-  //|
-  //| // Called for varint >= 1 byte.
-  dasm_put(Dst, 864, Dt2(->bufstart_ofs), Dt2(->buf), Dt2(->ptr));
 # 396 "upb/pb/compile_decoder_x64.dasc"
+  /*| */
+  /*| // Called for varint >= 1 byte. */
+  dasm_put(Dst, 864, Dt2(->bufstart_ofs), Dt2(->buf), Dt2(->ptr));
+# 398 "upb/pb/compile_decoder_x64.dasc"
   asmlabel(jc, "skip_decode_v32_fallback");
-  //|->skipv32_fallback:
-  //|->skipv64_fallback:
-  //|  chkeob   16, >1
+  /*|->skipv32_fallback: */
+  /*|->skipv64_fallback: */
+  /*|  chkeob   16, >1 */
   dasm_put(Dst, 912);
    if (16 == 1) {
   dasm_put(Dst, 917);
    } else {
   dasm_put(Dst, 925);
    }
-# 400 "upb/pb/compile_decoder_x64.dasc"
-  //|  // With at least 16 bytes left, we can do a branch-less SSE version.
-  //|  movdqu   xmm0, [PTR]
-  //|  pmovmskb eax, xmm0   // bits 0-15 are continuation bits, 16-31 are 0.
-  //|  not      eax
-  //|  bsf      eax, eax
-  //|  cmp      al, 10
-  //|  jae      ->decode_varint_slow  // Error (>10 byte varint).
-  //|  add      PTR, rax    // bsf result is 0-based, so PTR=end-1, as desired.
-  //|  ret
-  //|
-  //|1:
-  //|  // With fewer than 16 bytes, we have to read byte by byte.
-  //|  lea      rcx, [PTR + 10]
-  //|  mov      rax, PTR    // Preserve PTR in case of fallback to slow path.
-  //|  cmp      rcx, DATAEND
-  //|  cmova    rcx, DATAEND    // rcx = MIN(DATAEND, PTR + 10)
-  //|2:
-  //|  cmp      rax, rcx
-  //|  je       ->decode_varint_slow
-  //|  test     byte [rax], 0x80
-  //|  jz       >3
-  //|  add      rax, 1
-  //|  jmp      <2
-  //|3:
-  //|  mov      PTR, rax  // PTR = varint_end - 1, as desired
-  //|  ret
-  //|
-  //| // Returns tag in edx
+# 402 "upb/pb/compile_decoder_x64.dasc"
+  /*|  // With at least 16 bytes left, we can do a branch-less SSE version. */
+  /*|  movdqu   xmm0, [PTR] */
+  /*|  pmovmskb eax, xmm0   // bits 0-15 are continuation bits, 16-31 are 0. */
+  /*|  not      eax */
+  /*|  bsf      eax, eax */
+  /*|  cmp      al, 10 */
+  /*|  jae      ->decode_varint_slow  // Error (>10 byte varint). */
+  /*|  add      PTR, rax    // bsf result is 0-based, so PTR=end-1, as desired. */
+  /*|  ret */
+  /*| */
+  /*|1: */
+  /*|  // With fewer than 16 bytes, we have to read byte by byte. */
+  /*|  lea      rcx, [PTR + 10] */
+  /*|  mov      rax, PTR    // Preserve PTR in case of fallback to slow path. */
+  /*|  cmp      rcx, DATAEND */
+  /*|  cmova    rcx, DATAEND    // rcx = MIN(DATAEND, PTR + 10) */
+  /*|2: */
+  /*|  cmp      rax, rcx */
+  /*|  je       ->decode_varint_slow */
+  /*|  test     byte [rax], 0x80 */
+  /*|  jz       >3 */
+  /*|  add      rax, 1 */
+  /*|  jmp      <2 */
+  /*|3: */
+  /*|  mov      PTR, rax  // PTR = varint_end - 1, as desired */
+  /*|  ret */
+  /*| */
+  /*| // Returns tag in edx */
   dasm_put(Dst, 941, 10);
-# 428 "upb/pb/compile_decoder_x64.dasc"
+# 430 "upb/pb/compile_decoder_x64.dasc"
   asmlabel(jc, "decode_unknown_tag_fallback");
-  //|->decode_unknown_tag_fallback:
-  //|  sub   rsp, 16
-  //|1:
-  //|  cmp      PTR, DELIMEND
-  //|  jne      >2
-  //|  add      rsp, 16
-  //|  xor      eax, eax
-  //|  ret
-  //|2:
-  //|  // OPT: Have a medium-fast path before falling back to _slow.
-  //|  mov   ARG1_64, DECODER
-  //|  mov   ARG2_64, rsp
-  //|  commit_regs
-  //|  callp upb_pbdecoder_decode_varint_slow
-  //|  load_regs
+  /*|->decode_unknown_tag_fallback: */
+  /*|  sub   rsp, 16 */
+  /*|1: */
+  /*|  cmp      PTR, DELIMEND */
+  /*|  jne      >2 */
+  /*|  add      rsp, 16 */
+  /*|  xor      eax, eax */
+  /*|  ret */
+  /*|2: */
+  /*|  // OPT: Have a medium-fast path before falling back to _slow. */
+  /*|  mov   ARG1_64, DECODER */
+  /*|  mov   ARG2_64, rsp */
+  /*|  commit_regs */
+  /*|  callp upb_pbdecoder_decode_varint_slow */
+  /*|  load_regs */
   dasm_put(Dst, 1014, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt2(->delim_end), Dt2(->buf), Dt2(->bufstart_ofs), Dt1(->end_ofs), Dt1(->sink.closure), (unsigned int)((uintptr_t)upb_pbdecoder_decode_varint_slow), (unsigned int)(((uintptr_t)upb_pbdecoder_decode_varint_slow)>>32), 0xfffffffffffffff0UL, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt1(->sink.closure));
-# 444 "upb/pb/compile_decoder_x64.dasc"
-  //|  cmp   eax, 0
-  //|  jge   >3
-  //|  mov   edx, [rsp]   // Success; return parsed data.
-  //|  add   rsp, 16
-  //|  ret
-  //|3:
-  //|  call  ->exitjit   // Return eax from decode function.
-  //|  jmp   <1
-  //|
-  //| // Called for varint >= 1 byte.
+# 446 "upb/pb/compile_decoder_x64.dasc"
+  /*|  cmp   eax, 0 */
+  /*|  jge   >3 */
+  /*|  mov   edx, [rsp]   // Success; return parsed data. */
+  /*|  add   rsp, 16 */
+  /*|  ret */
+  /*|3: */
+  /*|  call  ->exitjit   // Return eax from decode function. */
+  /*|  jmp   <1 */
+  /*| */
+  /*| // Called for varint >= 1 byte. */
   dasm_put(Dst, 1117, Dt1(->end_ofs), Dt2(->bufstart_ofs), Dt2(->buf));
-# 454 "upb/pb/compile_decoder_x64.dasc"
+# 456 "upb/pb/compile_decoder_x64.dasc"
   asmlabel(jc, "decode_v32_v64_fallback");
-  //|->decodev32_fallback:
-  //|->decodev64_fallback:
-  //|  chkeob   10, ->decode_varint_slow
+  /*|->decodev32_fallback: */
+  /*|->decodev64_fallback: */
+  /*|  chkeob   10, ->decode_varint_slow */
   dasm_put(Dst, 1155);
    if (10 == 1) {
   dasm_put(Dst, 1160);
    } else {
   dasm_put(Dst, 1168);
    }
-# 458 "upb/pb/compile_decoder_x64.dasc"
-  //|  // OPT: do something faster than just calling the C version.
-  //|  mov      rdi, PTR
-  //|  callp    upb_vdecode_fast
-  //|  test     rax, rax
-  //|  je       ->decode_varint_slow  // Unterminated varint.
-  //|  mov      PTR, rax
-  //|  sub      PTR, 1
-  //|  mov      DECODER->ptr, PTR
-  //|  ret
-  //|
+# 460 "upb/pb/compile_decoder_x64.dasc"
+  /*|  // OPT: do something faster than just calling the C version. */
+  /*|  mov      rdi, PTR */
+  /*|  callp    upb_vdecode_fast */
+  /*|  test     rax, rax */
+  /*|  je       ->decode_varint_slow  // Unterminated varint. */
+  /*|  mov      PTR, rax */
+  /*|  sub      PTR, 1 */
+  /*|  mov      DECODER->ptr, PTR */
+  /*|  ret */
+  /*| */
   dasm_put(Dst, 1184, (unsigned int)((uintptr_t)upb_vdecode_fast), (unsigned int)(((uintptr_t)upb_vdecode_fast)>>32), 0xfffffffffffffff0UL, Dt2(->ptr));
-# 468 "upb/pb/compile_decoder_x64.dasc"
+# 470 "upb/pb/compile_decoder_x64.dasc"
   asmlabel(jc, "decode_varint_slow");
-  //|->decode_varint_slow:
-  //|  // Slow path: end of buffer or error (varint length >= 10).
-  //|  getvalue_slow upb_pbdecoder_decode_varint_slow, 1
+  /*|->decode_varint_slow: */
+  /*|  // Slow path: end of buffer or error (varint length >= 10). */
+  /*|  getvalue_slow upb_pbdecoder_decode_varint_slow, 1 */
   dasm_put(Dst, 1229, Dt2(->checkpoint), Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt2(->delim_end), Dt2(->buf), Dt2(->bufstart_ofs), Dt1(->end_ofs), Dt1(->sink.closure), (unsigned int)((uintptr_t)upb_pbdecoder_decode_varint_slow), (unsigned int)(((uintptr_t)upb_pbdecoder_decode_varint_slow)>>32), 0xfffffffffffffff0UL, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt1(->sink.closure), Dt1(->end_ofs), Dt2(->bufstart_ofs));
-# 472 "upb/pb/compile_decoder_x64.dasc"
-  //|
-  //| // Args: rsi=expected tag, return=rax (DECODE_{OK,MISMATCH})
-  dasm_put(Dst, 1335, Dt2(->buf), Dt2(->ptr));
 # 474 "upb/pb/compile_decoder_x64.dasc"
+  /*| */
+  /*| // Args: rsi=expected tag, return=rax (DECODE_{OK,MISMATCH}) */
+  dasm_put(Dst, 1335, Dt2(->buf), Dt2(->ptr));
+# 476 "upb/pb/compile_decoder_x64.dasc"
   asmlabel(jc, "checktag_fallback");
-  //|->checktag_fallback:
-  //|  sub      rsp, 8
-  //|  mov      [rsp], rsi  // Preserve expected tag.
-  //|1:
-  //|  mov      ARG1_64, DECODER
-  //|  commit_regs
-  //|  mov      DECODER->checkpoint, PTR
-  //|  callp    upb_pbdecoder_checktag_slow
-  //|  load_regs
+  /*|->checktag_fallback: */
+  /*|  sub      rsp, 8 */
+  /*|  mov      [rsp], rsi  // Preserve expected tag. */
+  /*|1: */
+  /*|  mov      ARG1_64, DECODER */
+  /*|  commit_regs */
+  /*|  mov      DECODER->checkpoint, PTR */
+  /*|  callp    upb_pbdecoder_checktag_slow */
+  /*|  load_regs */
   dasm_put(Dst, 1379, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt2(->delim_end), Dt2(->buf), Dt2(->bufstart_ofs), Dt1(->end_ofs), Dt1(->sink.closure), Dt2(->checkpoint), (unsigned int)((uintptr_t)upb_pbdecoder_checktag_slow), (unsigned int)(((uintptr_t)upb_pbdecoder_checktag_slow)>>32), 0xfffffffffffffff0UL, Dt2(->top), Dt2(->ptr), Dt2(->data_end), Dt1(->sink.closure), Dt1(->end_ofs), Dt2(->bufstart_ofs));
-# 484 "upb/pb/compile_decoder_x64.dasc"
-  //|  cmp      eax, 0
-  //|  jge      >2
-  //|  add      rsp, 8
-  //|  ret
-  //|2:
-  //|  call     ->exitjit
-  //|  mov      rsi, [rsp]
-  //|  cmp      PTR, DELIMEND
-  //|  jne      <1
-  //|  mov      eax, DECODE_EOF
-  //|  add      rsp, 8
-  //|  ret
-  //|
-  //| // Args: rsi=upb_inttable, rdx=key, return=rax (-1 if not found).
-  //| // Preserves: rcx, rdx
-  //| // OPT: Could write this in assembly if it's a hotspot.
+# 486 "upb/pb/compile_decoder_x64.dasc"
+  /*|  cmp      eax, 0 */
+  /*|  jge      >2 */
+  /*|  add      rsp, 8 */
+  /*|  ret */
+  /*|2: */
+  /*|  call     ->exitjit */
+  /*|  mov      rsi, [rsp] */
+  /*|  cmp      PTR, DELIMEND */
+  /*|  jne      <1 */
+  /*|  mov      eax, DECODE_EOF */
+  /*|  add      rsp, 8 */
+  /*|  ret */
+  /*| */
+  /*| // Args: rsi=upb_inttable, rdx=key, return=rax (-1 if not found). */
+  /*| // Preserves: rcx, rdx */
+  /*| // OPT: Could write this in assembly if it's a hotspot. */
   dasm_put(Dst, 1478, Dt2(->buf), DECODE_EOF);
-# 500 "upb/pb/compile_decoder_x64.dasc"
+# 502 "upb/pb/compile_decoder_x64.dasc"
   asmlabel(jc, "hashlookup");
-  //|->hashlookup:
-  //|  push   rcx
-  //|  push   rdx
-  //|  sub    rsp, 16
-  //|  mov    rdi, rsi
-  //|  mov    rsi, rdx
-  //|  mov    rdx, rsp
-  //|  callp  upb_inttable_lookup
-  //|  add    rsp, 16
-  //|  pop    rdx
-  //|  pop    rcx
-  //|  test   al, al
-  //|  jz     >2  // Unknown field.
-  //|  mov    rax, [rsp-32]  // Value from table.
-  //|  ret
-  //|2:
-  //|  xor    rax, rax
-  //|  not    rax
-  //|  ret
+  /*|->hashlookup: */
+  /*|  push   rcx */
+  /*|  push   rdx */
+  /*|  sub    rsp, 16 */
+  /*|  mov    rdi, rsi */
+  /*|  mov    rsi, rdx */
+  /*|  mov    rdx, rsp */
+  /*|  callp  upb_inttable_lookup */
+  /*|  add    rsp, 16 */
+  /*|  pop    rdx */
+  /*|  pop    rcx */
+  /*|  test   al, al */
+  /*|  jz     >2  // Unknown field. */
+  /*|  mov    rax, [rsp-32]  // Value from table. */
+  /*|  ret */
+  /*|2: */
+  /*|  xor    rax, rax */
+  /*|  not    rax */
+  /*|  ret */
   dasm_put(Dst, 1520, (unsigned int)((uintptr_t)upb_inttable_lookup), (unsigned int)(((uintptr_t)upb_inttable_lookup)>>32), 0xfffffffffffffff0UL);
-# 520 "upb/pb/compile_decoder_x64.dasc"
+# 522 "upb/pb/compile_decoder_x64.dasc"
 }
 
 static void jitprimitive(jitcompiler *jc, opcode op,
@@ -786,168 +788,169 @@ static void jitprimitive(jitcompiler *jc, opcode op,
     X, F64, F32, V64, V64, V32, F64, F32, V64, X, X, X, X, V32, V32, F32, F64,
     V32, V64 };
   static char fastpath_bytes[] = { 1, 1, 4, 8 };
-  const valtype_t type = types[op];
-  const int fastbytes = fastpath_bytes[type];
+  const valtype_t vtype = types[op];
+  const int fastbytes = fastpath_bytes[vtype];
   upb_func *handler = gethandler(h, sel);
+  upb_fieldtype_t ftype;
+  const upb_shim_data *data;
 
   if (handler) {
-    //|1:
-    //|  chkneob  fastbytes, >3
+    /*|1: */
+    /*|  chkneob  fastbytes, >3 */
     dasm_put(Dst, 112);
      if (fastbytes == 1) {
     dasm_put(Dst, 1589);
      } else {
     dasm_put(Dst, 1597, fastbytes);
      }
-# 536 "upb/pb/compile_decoder_x64.dasc"
-    //|2:
+# 540 "upb/pb/compile_decoder_x64.dasc"
+    /*|2: */
     dasm_put(Dst, 1613);
-# 537 "upb/pb/compile_decoder_x64.dasc"
-    switch (type) {
+# 541 "upb/pb/compile_decoder_x64.dasc"
+    switch (vtype) {
     case V32:
-      //|  call   ->decodev32_fallback
+      /*|  call   ->decodev32_fallback */
       dasm_put(Dst, 1616);
-# 540 "upb/pb/compile_decoder_x64.dasc"
+# 544 "upb/pb/compile_decoder_x64.dasc"
       break;
     case V64:
-      //|  call   ->decodev64_fallback
+      /*|  call   ->decodev64_fallback */
       dasm_put(Dst, 1620);
-# 543 "upb/pb/compile_decoder_x64.dasc"
+# 547 "upb/pb/compile_decoder_x64.dasc"
       break;
     case F32:
-      //|  call   ->decodef32_fallback
+      /*|  call   ->decodef32_fallback */
       dasm_put(Dst, 1624);
-# 546 "upb/pb/compile_decoder_x64.dasc"
+# 550 "upb/pb/compile_decoder_x64.dasc"
       break;
     case F64:
-      //|  call   ->decodef64_fallback
+      /*|  call   ->decodef64_fallback */
       dasm_put(Dst, 1628);
-# 549 "upb/pb/compile_decoder_x64.dasc"
+# 553 "upb/pb/compile_decoder_x64.dasc"
       break;
     case X: break;
     }
-    //|  jmp    >4
+    /*|  jmp    >4 */
     dasm_put(Dst, 1632);
-# 553 "upb/pb/compile_decoder_x64.dasc"
+# 557 "upb/pb/compile_decoder_x64.dasc"
 
-    // Fast path decode; for when check_bytes bytes are available.
-    //|3:
+    /* Fast path decode; for when check_bytes bytes are available. */
+    /*|3: */
     dasm_put(Dst, 1637);
-# 556 "upb/pb/compile_decoder_x64.dasc"
+# 560 "upb/pb/compile_decoder_x64.dasc"
     switch (op) {
     case OP_PARSE_SFIXED32:
     case OP_PARSE_FIXED32:
-      //|  mov    edx, dword [PTR]
+      /*|  mov    edx, dword [PTR] */
       dasm_put(Dst, 1640);
-# 560 "upb/pb/compile_decoder_x64.dasc"
+# 564 "upb/pb/compile_decoder_x64.dasc"
       break;
     case OP_PARSE_SFIXED64:
     case OP_PARSE_FIXED64:
-      //|  mov    rdx, qword [PTR]
+      /*|  mov    rdx, qword [PTR] */
       dasm_put(Dst, 1643);
-# 564 "upb/pb/compile_decoder_x64.dasc"
+# 568 "upb/pb/compile_decoder_x64.dasc"
       break;
     case OP_PARSE_FLOAT:
-      //|  movss  xmm0, dword [PTR]
+      /*|  movss  xmm0, dword [PTR] */
       dasm_put(Dst, 1647);
-# 567 "upb/pb/compile_decoder_x64.dasc"
+# 571 "upb/pb/compile_decoder_x64.dasc"
       break;
     case OP_PARSE_DOUBLE:
-      //|  movsd  xmm0, qword [PTR]
+      /*|  movsd  xmm0, qword [PTR] */
       dasm_put(Dst, 1653);
-# 570 "upb/pb/compile_decoder_x64.dasc"
+# 574 "upb/pb/compile_decoder_x64.dasc"
       break;
     default:
-      // Inline one byte of varint decoding.
-      //|  movzx  edx, byte [PTR]
-      //|  test   dl, dl
-      //|  js     <2   // Fallback to slow path for >1 byte varint.
+      /* Inline one byte of varint decoding. */
+      /*|  movzx  edx, byte [PTR] */
+      /*|  test   dl, dl */
+      /*|  js     <2   // Fallback to slow path for >1 byte varint. */
       dasm_put(Dst, 1659);
-# 576 "upb/pb/compile_decoder_x64.dasc"
+# 580 "upb/pb/compile_decoder_x64.dasc"
       break;
     }
 
-    // Second-stage decode; used for both fast and slow paths
-    // (only needed for a few types).
-    //|4:
+    /* Second-stage decode; used for both fast and slow paths */
+    /* (only needed for a few types). */
+    /*|4: */
     dasm_put(Dst, 1669);
-# 582 "upb/pb/compile_decoder_x64.dasc"
+# 586 "upb/pb/compile_decoder_x64.dasc"
     switch (op) {
     case OP_PARSE_SINT32:
-      // 32-bit zig-zag decode.
-      //|  mov    eax, edx
-      //|  shr    edx, 1
-      //|  and    eax, 1
-      //|  neg    eax
-      //|  xor    edx, eax
+      /* 32-bit zig-zag decode. */
+      /*|  mov    eax, edx */
+      /*|  shr    edx, 1 */
+      /*|  and    eax, 1 */
+      /*|  neg    eax */
+      /*|  xor    edx, eax */
       dasm_put(Dst, 1672);
-# 590 "upb/pb/compile_decoder_x64.dasc"
+# 594 "upb/pb/compile_decoder_x64.dasc"
       break;
     case OP_PARSE_SINT64:
-      // 64-bit zig-zag decode.
-      //|  mov    rax, rdx
-      //|  shr    rdx, 1
-      //|  and    rax, 1
-      //|  neg    rax
-      //|  xor    rdx, rax
+      /* 64-bit zig-zag decode. */
+      /*|  mov    rax, rdx */
+      /*|  shr    rdx, 1 */
+      /*|  and    rax, 1 */
+      /*|  neg    rax */
+      /*|  xor    rdx, rax */
       dasm_put(Dst, 1686);
-# 598 "upb/pb/compile_decoder_x64.dasc"
+# 602 "upb/pb/compile_decoder_x64.dasc"
       break;
     case OP_PARSE_BOOL:
-      //|  test   rdx, rdx
-      //|  setne  dl
+      /*|  test   rdx, rdx */
+      /*|  setne  dl */
       dasm_put(Dst, 1705);
-# 602 "upb/pb/compile_decoder_x64.dasc"
+# 606 "upb/pb/compile_decoder_x64.dasc"
       break;
     default: break;
     }
 
-    // Call callback (or specialize if we can).
-    upb_fieldtype_t type;
-    const upb_shim_data *data = upb_shim_getdata(h, sel, &type);
+    /* Call callback (or specialize if we can). */
+    data = upb_shim_getdata(h, sel, &ftype);
     if (data) {
-      switch (type) {
+      switch (ftype) {
         case UPB_TYPE_INT64:
         case UPB_TYPE_UINT64:
-          //|  mov   [CLOSURE + data->offset], rdx
+          /*|  mov   [CLOSURE + data->offset], rdx */
           dasm_put(Dst, 1712, data->offset);
-# 614 "upb/pb/compile_decoder_x64.dasc"
+# 617 "upb/pb/compile_decoder_x64.dasc"
           break;
         case UPB_TYPE_INT32:
         case UPB_TYPE_UINT32:
         case UPB_TYPE_ENUM:
-          //|  mov   [CLOSURE + data->offset], edx
+          /*|  mov   [CLOSURE + data->offset], edx */
           dasm_put(Dst, 1717, data->offset);
-# 619 "upb/pb/compile_decoder_x64.dasc"
+# 622 "upb/pb/compile_decoder_x64.dasc"
           break;
         case UPB_TYPE_DOUBLE:
-          //|  movsd  qword [CLOSURE + data->offset], XMMARG1
+          /*|  movsd  qword [CLOSURE + data->offset], XMMARG1 */
           dasm_put(Dst, 1722, data->offset);
-# 622 "upb/pb/compile_decoder_x64.dasc"
+# 625 "upb/pb/compile_decoder_x64.dasc"
           break;
         case UPB_TYPE_FLOAT:
-          //|  movss  dword [CLOSURE + data->offset], XMMARG1
+          /*|  movss  dword [CLOSURE + data->offset], XMMARG1 */
           dasm_put(Dst, 1730, data->offset);
-# 625 "upb/pb/compile_decoder_x64.dasc"
+# 628 "upb/pb/compile_decoder_x64.dasc"
           break;
         case UPB_TYPE_BOOL:
-          //|  mov   [CLOSURE + data->offset], dl
+          /*|  mov   [CLOSURE + data->offset], dl */
           dasm_put(Dst, 1738, data->offset);
-# 628 "upb/pb/compile_decoder_x64.dasc"
+# 631 "upb/pb/compile_decoder_x64.dasc"
           break;
         case UPB_TYPE_STRING:
         case UPB_TYPE_BYTES:
         case UPB_TYPE_MESSAGE:
           assert(false); break;
       }
-      //|  sethas CLOSURE, data->hasbit
+      /*|  sethas CLOSURE, data->hasbit */
        if (data->hasbit >= 0) {
       dasm_put(Dst, 1743, ((uint32_t)data->hasbit / 8), (1 << ((uint32_t)data->hasbit % 8)));
        }
-# 635 "upb/pb/compile_decoder_x64.dasc"
+# 638 "upb/pb/compile_decoder_x64.dasc"
     } else if (handler) {
-      //|  mov    ARG1_64, CLOSURE
-      //|  load_handler_data h, sel
+      /*|  mov    ARG1_64, CLOSURE */
+      /*|  load_handler_data h, sel */
       dasm_put(Dst, 1749);
        {
        uintptr_t v = (uintptr_t)upb_handlers_gethandlerdata(h, sel);
@@ -959,211 +962,211 @@ static void jitprimitive(jitcompiler *jc, opcode op,
       dasm_put(Dst, 454);
        }
        }
-# 638 "upb/pb/compile_decoder_x64.dasc"
-      //|  callp  handler
+# 641 "upb/pb/compile_decoder_x64.dasc"
+      /*|  callp  handler */
       dasm_put(Dst, 1754, (unsigned int)((uintptr_t)handler), (unsigned int)(((uintptr_t)handler)>>32), 0xfffffffffffffff0UL);
-# 639 "upb/pb/compile_decoder_x64.dasc"
+# 642 "upb/pb/compile_decoder_x64.dasc"
       if (!alwaysok(h, sel)) {
-        //|  test   al, al
-        //|  jnz    >5
-        //|  call   ->suspend
-        //|  jmp    <1
-        //|5:
+        /*|  test   al, al */
+        /*|  jnz    >5 */
+        /*|  call   ->suspend */
+        /*|  jmp    <1 */
+        /*|5: */
         dasm_put(Dst, 1776);
-# 645 "upb/pb/compile_decoder_x64.dasc"
+# 648 "upb/pb/compile_decoder_x64.dasc"
       }
     }
 
-    // We do this last so that the checkpoint is not advanced past the user's
-    // data until the callback has returned success.
-    //|  add    PTR, fastbytes
+    /* We do this last so that the checkpoint is not advanced past the user's
+     * data until the callback has returned success. */
+    /*|  add    PTR, fastbytes */
     dasm_put(Dst, 1792, fastbytes);
-# 651 "upb/pb/compile_decoder_x64.dasc"
+# 654 "upb/pb/compile_decoder_x64.dasc"
   } else {
-    // No handler registered for this value, just skip it.
-    //|  chkneob  fastbytes, >3
+    /* No handler registered for this value, just skip it. */
+    /*|  chkneob  fastbytes, >3 */
      if (fastbytes == 1) {
     dasm_put(Dst, 1589);
      } else {
     dasm_put(Dst, 1597, fastbytes);
      }
-# 654 "upb/pb/compile_decoder_x64.dasc"
-    //|2:
+# 657 "upb/pb/compile_decoder_x64.dasc"
+    /*|2: */
     dasm_put(Dst, 1613);
-# 655 "upb/pb/compile_decoder_x64.dasc"
-    switch (type) {
+# 658 "upb/pb/compile_decoder_x64.dasc"
+    switch (vtype) {
     case V32:
-      //|  call   ->skipv32_fallback
+      /*|  call   ->skipv32_fallback */
       dasm_put(Dst, 1797);
-# 658 "upb/pb/compile_decoder_x64.dasc"
+# 661 "upb/pb/compile_decoder_x64.dasc"
       break;
     case V64:
-      //|  call   ->skipv64_fallback
+      /*|  call   ->skipv64_fallback */
       dasm_put(Dst, 1801);
-# 661 "upb/pb/compile_decoder_x64.dasc"
+# 664 "upb/pb/compile_decoder_x64.dasc"
       break;
     case F32:
-      //|  call   ->skipf32_fallback
+      /*|  call   ->skipf32_fallback */
       dasm_put(Dst, 1805);
-# 664 "upb/pb/compile_decoder_x64.dasc"
+# 667 "upb/pb/compile_decoder_x64.dasc"
       break;
     case F64:
-      //|  call   ->skipf64_fallback
+      /*|  call   ->skipf64_fallback */
       dasm_put(Dst, 1809);
-# 667 "upb/pb/compile_decoder_x64.dasc"
+# 670 "upb/pb/compile_decoder_x64.dasc"
       break;
     case X: break;
     }
 
-    // Fast-path skip.
-    //|3:
+    /* Fast-path skip. */
+    /*|3: */
     dasm_put(Dst, 1637);
-# 673 "upb/pb/compile_decoder_x64.dasc"
-    if (type == V32 || type == V64) {
-      //|  test   byte [PTR], 0x80
-      //|  jnz    <2
-      dasm_put(Dst, 1813);
 # 676 "upb/pb/compile_decoder_x64.dasc"
+    if (vtype == V32 || vtype == V64) {
+      /*|  test   byte [PTR], 0x80 */
+      /*|  jnz    <2 */
+      dasm_put(Dst, 1813);
+# 679 "upb/pb/compile_decoder_x64.dasc"
     }
-    //|  add    PTR, fastbytes
+    /*|  add    PTR, fastbytes */
     dasm_put(Dst, 1792, fastbytes);
-# 678 "upb/pb/compile_decoder_x64.dasc"
+# 681 "upb/pb/compile_decoder_x64.dasc"
   }
 }
 
 static void jitdispatch(jitcompiler *jc,
                         const upb_pbdecodermethod *method) {
-  // Lots of room for tweaking/optimization here.
+  /* Lots of room for tweaking/optimization here. */
 
   const upb_inttable *dispatch = &method->dispatch;
   bool has_hash_entries = (dispatch->t.count > 0);
 
-  // Whether any of the fields for this message can have two wire types which
-  // are both valid (packed & non-packed).
-  //
-  // OPT: populate this more precisely; not all messages with hash entries have
-  // this characteristic.
+  /* Whether any of the fields for this message can have two wire types which
+   * are both valid (packed & non-packed).
+   *
+   * OPT: populate this more precisely; not all messages with hash entries have
+   * this characteristic. */
   bool has_multi_wiretype = has_hash_entries;
 
-  //|=>define_jmptarget(jc, &method->dispatch):
-  //|1:
+  /*|=>define_jmptarget(jc, &method->dispatch): */
+  /*|1: */
   dasm_put(Dst, 1822, define_jmptarget(jc, &method->dispatch));
-# 697 "upb/pb/compile_decoder_x64.dasc"
-  // Decode the field tag.
-  //|  mov     aword DECODER->checkpoint, PTR
-  //|  chkeob  2, >6
+# 700 "upb/pb/compile_decoder_x64.dasc"
+  /* Decode the field tag. */
+  /*|  mov     aword DECODER->checkpoint, PTR */
+  /*|  chkeob  2, >6 */
   dasm_put(Dst, 308, Dt2(->checkpoint));
    if (2 == 1) {
   dasm_put(Dst, 1826);
    } else {
   dasm_put(Dst, 1834);
    }
-# 700 "upb/pb/compile_decoder_x64.dasc"
-  //|  movzx   edx, byte [PTR]
-  //|  test    dl, dl
-  //|  jns     >7    // Jump if first byte has no continuation bit.
-  //|  movzx   ecx, byte [PTR + 1]
-  //|  test    cl, cl
-  //|  js      >6    // Jump if second byte has continuation bit.
-  //|  // Confirmed two-byte varint.
-  //|  shl     ecx, 7
-  //|  and     edx, 0x7f
-  //|  or      edx, ecx
-  //|  add     PTR, 2
-  //|  jmp     >8
-  //|6:
-  //|  call    ->decode_unknown_tag_fallback
-  //|  test    eax, eax  // Hit DELIMEND?
-  //|  jnz     >8
-  //|  ret
-  //|7:
-  //|  add     PTR, 1
-  //|8:
-  //|  mov     ecx, edx
-  //|  shr     edx, 3
-  //|  and     cl, 7
+# 703 "upb/pb/compile_decoder_x64.dasc"
+  /*|  movzx   edx, byte [PTR] */
+  /*|  test    dl, dl */
+  /*|  jns     >7    // Jump if first byte has no continuation bit. */
+  /*|  movzx   ecx, byte [PTR + 1] */
+  /*|  test    cl, cl */
+  /*|  js      >6    // Jump if second byte has continuation bit. */
+  /*|  // Confirmed two-byte varint. */
+  /*|  shl     ecx, 7 */
+  /*|  and     edx, 0x7f */
+  /*|  or      edx, ecx */
+  /*|  add     PTR, 2 */
+  /*|  jmp     >8 */
+  /*|6: */
+  /*|  call    ->decode_unknown_tag_fallback */
+  /*|  test    eax, eax  // Hit DELIMEND? */
+  /*|  jnz     >8 */
+  /*|  ret */
+  /*|7: */
+  /*|  add     PTR, 1 */
+  /*|8: */
+  /*|  mov     ecx, edx */
+  /*|  shr     edx, 3 */
+  /*|  and     cl, 7 */
   dasm_put(Dst, 1850, 1);
-# 723 "upb/pb/compile_decoder_x64.dasc"
+# 726 "upb/pb/compile_decoder_x64.dasc"
 
-  // See comment attached to upb_pbdecodermethod.dispatch for layout of the
-  // dispatch table.
-  //|2:
-  //|  cmp     edx, dispatch->array_size
+  /* See comment attached to upb_pbdecodermethod.dispatch for layout of the
+   * dispatch table. */
+  /*|2: */
+  /*|  cmp     edx, dispatch->array_size */
   dasm_put(Dst, 1915, dispatch->array_size);
-# 728 "upb/pb/compile_decoder_x64.dasc"
+# 731 "upb/pb/compile_decoder_x64.dasc"
   if (has_hash_entries) {
-    //|  jae     >7
+    /*|  jae     >7 */
     dasm_put(Dst, 1922);
-# 730 "upb/pb/compile_decoder_x64.dasc"
+# 733 "upb/pb/compile_decoder_x64.dasc"
   } else {
-    //|  jae     >5
+    /*|  jae     >5 */
     dasm_put(Dst, 1927);
-# 732 "upb/pb/compile_decoder_x64.dasc"
+# 735 "upb/pb/compile_decoder_x64.dasc"
   }
-  //|  // OPT: Compact the lookup arr into 32-bit entries.
+  /*|  // OPT: Compact the lookup arr into 32-bit entries. */
   if ((uintptr_t)dispatch->array > 0x7fffffff) {
-    //|  mov64 rax, (uintptr_t)dispatch->array
-    //|  mov   rax, qword [rax + rdx * 8]
+    /*|  mov64 rax, (uintptr_t)dispatch->array */
+    /*|  mov   rax, qword [rax + rdx * 8] */
     dasm_put(Dst, 1932, (unsigned int)((uintptr_t)dispatch->array), (unsigned int)(((uintptr_t)dispatch->array)>>32));
-# 737 "upb/pb/compile_decoder_x64.dasc"
+# 740 "upb/pb/compile_decoder_x64.dasc"
   } else {
-    //|  mov   rax, qword [rdx * 8 + dispatch->array]
+    /*|  mov   rax, qword [rdx * 8 + dispatch->array] */
     dasm_put(Dst, 1941, dispatch->array);
-# 739 "upb/pb/compile_decoder_x64.dasc"
+# 742 "upb/pb/compile_decoder_x64.dasc"
   }
-  //|3:
-  //|  // We take advantage of the fact that non-present entries are stored
-  //|  // as -1, which will result in wire types that will never match.
-  //|  cmp  al, cl
+  /*|3: */
+  /*|  // We take advantage of the fact that non-present entries are stored */
+  /*|  // as -1, which will result in wire types that will never match. */
+  /*|  cmp  al, cl */
   dasm_put(Dst, 1947);
-# 744 "upb/pb/compile_decoder_x64.dasc"
+# 747 "upb/pb/compile_decoder_x64.dasc"
   if (has_multi_wiretype) {
-    //|  jne  >6
+    /*|  jne  >6 */
     dasm_put(Dst, 1952);
-# 746 "upb/pb/compile_decoder_x64.dasc"
+# 749 "upb/pb/compile_decoder_x64.dasc"
   } else {
-    //|  jne  >5
+    /*|  jne  >5 */
     dasm_put(Dst, 1957);
-# 748 "upb/pb/compile_decoder_x64.dasc"
+# 751 "upb/pb/compile_decoder_x64.dasc"
   }
-  //|  shr  rax, 16
-  //|
-  //|  // Load the machine code address from the table entry.
-  //|  // The table entry is relative to the dispatch->array jmptarget
-  //|  // (patchdispatch() took care of this) which is the same as
-  //|  // local label "4".  The "lea" is really just trying to do
-  //|  //    lea  rax, [>4 + rax]
-  //|  //
-  //|  // But we can't write that directly for some reason, so we use
-  //|  // rdx as a temporary.
-  //|  lea  rdx, [>4]
-  //|=>define_jmptarget(jc, dispatch->array):
-  //|4:
-  //|  add  rax, rdx
-  //|  ret
-  //|
-  //|5:
-  //|  // Field isn't in our table.
-  //|  call ->parse_unknown
-  //|  test eax, eax  // ENDGROUP?
-  //|  jz   <1
-  //|  lea  rax, [>9]  // ENDGROUP; Load address of OP_ENDMSG.
-  //|  ret
+  /*|  shr  rax, 16 */
+  /*| */
+  /*|  // Load the machine code address from the table entry. */
+  /*|  // The table entry is relative to the dispatch->array jmptarget */
+  /*|  // (patchdispatch() took care of this) which is the same as */
+  /*|  // local label "4".  The "lea" is really just trying to do */
+  /*|  //    lea  rax, [>4 + rax] */
+  /*|  // */
+  /*|  // But we can't write that directly for some reason, so we use */
+  /*|  // rdx as a temporary. */
+  /*|  lea  rdx, [>4] */
+  /*|=>define_jmptarget(jc, dispatch->array): */
+  /*|4: */
+  /*|  add  rax, rdx */
+  /*|  ret */
+  /*| */
+  /*|5: */
+  /*|  // Field isn't in our table. */
+  /*|  call ->parse_unknown */
+  /*|  test eax, eax  // ENDGROUP? */
+  /*|  jz   <1 */
+  /*|  lea  rax, [>9]  // ENDGROUP; Load address of OP_ENDMSG. */
+  /*|  ret */
   dasm_put(Dst, 1962, define_jmptarget(jc, dispatch->array));
-# 772 "upb/pb/compile_decoder_x64.dasc"
+# 775 "upb/pb/compile_decoder_x64.dasc"
 
   if (has_multi_wiretype) {
-    //|6:
-    //|  // Primary wire type didn't match, check secondary wire type.
-    //|  cmp  ah, cl
-    //|  jne  <5
-    //|  // Secondary wire type is a match, look up fn + UPB_MAX_FIELDNUMBER.
-    //|  add   rdx, UPB_MAX_FIELDNUMBER
-    //|  // This key will never be in the array part, so do a hash lookup.
+    /*|6: */
+    /*|  // Primary wire type didn't match, check secondary wire type. */
+    /*|  cmp  ah, cl */
+    /*|  jne  <5 */
+    /*|  // Secondary wire type is a match, look up fn + UPB_MAX_FIELDNUMBER. */
+    /*|  add   rdx, UPB_MAX_FIELDNUMBER */
+    /*|  // This key will never be in the array part, so do a hash lookup. */
     dasm_put(Dst, 1996, UPB_MAX_FIELDNUMBER);
-# 781 "upb/pb/compile_decoder_x64.dasc"
+# 784 "upb/pb/compile_decoder_x64.dasc"
     assert(has_hash_entries);
-    //|  ld64  dispatch
+    /*|  ld64  dispatch */
      {
      uintptr_t v = (uintptr_t)dispatch;
      if (v > 0xffffffff) {
@@ -1174,16 +1177,16 @@ static void jitdispatch(jitcompiler *jc,
     dasm_put(Dst, 454);
      }
      }
-# 783 "upb/pb/compile_decoder_x64.dasc"
-    //|  jmp   ->hashlookup  // Tail call.
+# 786 "upb/pb/compile_decoder_x64.dasc"
+    /*|  jmp   ->hashlookup  // Tail call. */
     dasm_put(Dst, 2009);
-# 784 "upb/pb/compile_decoder_x64.dasc"
+# 787 "upb/pb/compile_decoder_x64.dasc"
   }
 
   if (has_hash_entries) {
-    //|7:
-    //|  // Hash table lookup.
-    //|  ld64   dispatch
+    /*|7: */
+    /*|  // Hash table lookup. */
+    /*|  ld64   dispatch */
     dasm_put(Dst, 2014);
      {
      uintptr_t v = (uintptr_t)dispatch;
@@ -1195,41 +1198,42 @@ static void jitdispatch(jitcompiler *jc,
     dasm_put(Dst, 454);
      }
      }
-# 790 "upb/pb/compile_decoder_x64.dasc"
-    //|  call   ->hashlookup
-    //|  jmp    <3
+# 793 "upb/pb/compile_decoder_x64.dasc"
+    /*|  call   ->hashlookup */
+    /*|  jmp    <3 */
     dasm_put(Dst, 2017);
-# 792 "upb/pb/compile_decoder_x64.dasc"
+# 795 "upb/pb/compile_decoder_x64.dasc"
   }
 }
 
 static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs,
                    const upb_pbdecodermethod *method) {
-  // Internally we parse unknown fields; if this runs us into DELIMEND we jump
-  // to the corresponding DELIMEND target (either msg end or repeated field
-  // end), which we find from the OP_CHECKDELIM which must have necessarily
-  // preceded us.
+  /* Internally we parse unknown fields; if this runs us into DELIMEND we jump
+   * to the corresponding DELIMEND target (either msg end or repeated field
+   * end), which we find from the OP_CHECKDELIM which must have necessarily
+   * preceded us. */
   uint32_t last_instruction = *(jc->pc - 2);
   int last_arg = (int32_t)last_instruction >> 8;
-  assert((last_instruction & 0xff) == OP_CHECKDELIM);
   uint32_t *delimend = (jc->pc - 1) + last_arg;
   const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
 
+  assert((last_instruction & 0xff) == OP_CHECKDELIM);
+
   if (getop(*(jc->pc - 1)) == OP_TAGN) {
     jc->pc += ptr_words;
   }
 
-  //|  chkneob n, >1
+  /*|  chkneob n, >1 */
    if (n == 1) {
   dasm_put(Dst, 2025);
    } else {
   dasm_put(Dst, 2033, n);
    }
-# 812 "upb/pb/compile_decoder_x64.dasc"
+# 816 "upb/pb/compile_decoder_x64.dasc"
 
-  //|  // OPT: this is way too much fallback code to put here.
-  //|  // Reduce and/or move to a separate section to make better icache usage.
-  //|  ld64  tag
+  /*|  // OPT: this is way too much fallback code to put here. */
+  /*|  // Reduce and/or move to a separate section to make better icache usage. */
+  /*|  ld64  tag */
    {
    uintptr_t v = (uintptr_t)tag;
    if (v > 0xffffffff) {
@@ -1240,78 +1244,78 @@ static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs,
   dasm_put(Dst, 454);
    }
    }
-# 816 "upb/pb/compile_decoder_x64.dasc"
-  //|  call  ->checktag_fallback
-  //|  cmp   eax, DECODE_MISMATCH
-  //|  je    >3
-  //|  cmp   eax, DECODE_EOF
-  //|  je     =>jmptarget(jc, delimend)
-  //|  jmp   >5
+# 820 "upb/pb/compile_decoder_x64.dasc"
+  /*|  call  ->checktag_fallback */
+  /*|  cmp   eax, DECODE_MISMATCH */
+  /*|  je    >3 */
+  /*|  cmp   eax, DECODE_EOF */
+  /*|  je     =>jmptarget(jc, delimend) */
+  /*|  jmp   >5 */
   dasm_put(Dst, 2049, DECODE_MISMATCH, DECODE_EOF, jmptarget(jc, delimend));
-# 822 "upb/pb/compile_decoder_x64.dasc"
+# 826 "upb/pb/compile_decoder_x64.dasc"
 
-  //|1:
+  /*|1: */
   dasm_put(Dst, 112);
-# 824 "upb/pb/compile_decoder_x64.dasc"
+# 828 "upb/pb/compile_decoder_x64.dasc"
   switch (n) {
   case 1:
-    //|  cmp  byte [PTR], tag
+    /*|  cmp  byte [PTR], tag */
     dasm_put(Dst, 2072, tag);
-# 827 "upb/pb/compile_decoder_x64.dasc"
+# 831 "upb/pb/compile_decoder_x64.dasc"
     break;
   case 2:
-    //|  cmp  word [PTR], tag
+    /*|  cmp  word [PTR], tag */
     dasm_put(Dst, 2076, tag);
-# 830 "upb/pb/compile_decoder_x64.dasc"
+# 834 "upb/pb/compile_decoder_x64.dasc"
     break;
   case 3:
-    //|   // OPT: Slightly more efficient code, but depends on an extra byte.
-    //|   // mov  eax, dword [PTR]
-    //|   // shl  eax, 8
-    //|   // cmp  eax, tag << 8
-    //|   cmp  word [PTR], (tag & 0xffff)
-    //|   jne  >2
-    //|   cmp  byte [PTR + 2], (tag >> 16)
-    //|2:
+    /*|   // OPT: Slightly more efficient code, but depends on an extra byte. */
+    /*|   // mov  eax, dword [PTR] */
+    /*|   // shl  eax, 8 */
+    /*|   // cmp  eax, tag << 8 */
+    /*|   cmp  word [PTR], (tag & 0xffff) */
+    /*|   jne  >2 */
+    /*|   cmp  byte [PTR + 2], (tag >> 16) */
+    /*|2: */
     dasm_put(Dst, 2081, (tag & 0xffff), 2, (tag >> 16));
-# 840 "upb/pb/compile_decoder_x64.dasc"
+# 844 "upb/pb/compile_decoder_x64.dasc"
     break;
   case 4:
-    //|   cmp  dword [PTR], tag
+    /*|   cmp  dword [PTR], tag */
     dasm_put(Dst, 2096, tag);
-# 843 "upb/pb/compile_decoder_x64.dasc"
+# 847 "upb/pb/compile_decoder_x64.dasc"
     break;
   case 5:
-    //|   cmp  dword [PTR], (tag & 0xffffffff)
-    //|   jne  >3
-    //|   cmp  byte  [PTR + 4], (tag >> 32)
+    /*|   cmp  dword [PTR], (tag & 0xffffffff) */
+    /*|   jne  >3 */
+    /*|   cmp  byte  [PTR + 4], (tag >> 32) */
     dasm_put(Dst, 2100, (tag & 0xffffffff), 4, (tag >> 32));
-# 848 "upb/pb/compile_decoder_x64.dasc"
+# 852 "upb/pb/compile_decoder_x64.dasc"
   }
-  //|  je    >4
-  //|3:
+  /*|  je    >4 */
+  /*|3: */
   dasm_put(Dst, 2112);
-# 851 "upb/pb/compile_decoder_x64.dasc"
+# 855 "upb/pb/compile_decoder_x64.dasc"
   if (ofs == 0) {
-    //|  call   =>jmptarget(jc, &method->dispatch)
-    //|  test   rax, rax
-    //|  jz     =>jmptarget(jc, delimend)
-    //|  jmp    rax
+    /*|  call   =>jmptarget(jc, &method->dispatch) */
+    /*|  test   rax, rax */
+    /*|  jz     =>jmptarget(jc, delimend) */
+    /*|  jmp    rax */
     dasm_put(Dst, 2119, jmptarget(jc, &method->dispatch), jmptarget(jc, delimend));
-# 856 "upb/pb/compile_decoder_x64.dasc"
+# 860 "upb/pb/compile_decoder_x64.dasc"
   } else {
-    //|  jmp    =>jmptarget(jc, jc->pc + ofs)
+    /*|  jmp    =>jmptarget(jc, jc->pc + ofs) */
     dasm_put(Dst, 2131, jmptarget(jc, jc->pc + ofs));
-# 858 "upb/pb/compile_decoder_x64.dasc"
+# 862 "upb/pb/compile_decoder_x64.dasc"
   }
-  //|4:
-  //|  add    PTR, n
-  //|5:
+  /*|4: */
+  /*|  add    PTR, n */
+  /*|5: */
   dasm_put(Dst, 2135, n);
-# 862 "upb/pb/compile_decoder_x64.dasc"
+# 866 "upb/pb/compile_decoder_x64.dasc"
 }
 
-// Compile the bytecode to x64.
+/* Compile the bytecode to x64. */
 static void jitbytecode(jitcompiler *jc) {
   upb_pbdecodermethod *method = NULL;
   const upb_handlers *h = NULL;
@@ -1322,16 +1326,16 @@ static void jitbytecode(jitcompiler *jc) {
     int32_t longofs = arg;
 
     if (op != OP_SETDISPATCH) {
-      // Skipped for SETDISPATCH because it defines its own asmlabel for the
-      // dispatch code it emits.
+      /* Skipped for SETDISPATCH because it defines its own asmlabel for the
+       * dispatch code it emits. */
       asmlabel(jc, "0x%lx.%s", pcofs(jc), upb_pbdecoder_getopname(op));
 
-      // Skipped for SETDISPATCH because it should point at the function
-      // prologue, not the dispatch function that is emitted first.
-      // TODO: optimize this to only define pclabels that are actually used.
-      //|=>define_jmptarget(jc, jc->pc):
+      /* Skipped for SETDISPATCH because it should point at the function
+       * prologue, not the dispatch function that is emitted first.
+       * TODO: optimize this to only define pclabels that are actually used. */
+      /*|=>define_jmptarget(jc, jc->pc): */
       dasm_put(Dst, 0, define_jmptarget(jc, jc->pc));
-# 883 "upb/pb/compile_decoder_x64.dasc"
+# 887 "upb/pb/compile_decoder_x64.dasc"
     }
 
     jc->pc++;
@@ -1340,10 +1344,10 @@ static void jitbytecode(jitcompiler *jc) {
     case OP_STARTMSG: {
       upb_func *startmsg = gethandler(h, UPB_STARTMSG_SELECTOR);
       if (startmsg) {
-        // bool startmsg(void *closure, const void *hd)
-        //|1:
-        //|  mov   ARG1_64, CLOSURE
-        //|  load_handler_data h, UPB_STARTMSG_SELECTOR
+        /* bool startmsg(void *closure, const void *hd) */
+        /*|1: */
+        /*|  mov   ARG1_64, CLOSURE */
+        /*|  load_handler_data h, UPB_STARTMSG_SELECTOR */
         dasm_put(Dst, 2144);
          {
          uintptr_t v = (uintptr_t)upb_handlers_gethandlerdata(h, UPB_STARTMSG_SELECTOR);
@@ -1355,35 +1359,35 @@ static void jitbytecode(jitcompiler *jc) {
         dasm_put(Dst, 454);
          }
          }
-# 895 "upb/pb/compile_decoder_x64.dasc"
-        //|  callp startmsg
+# 899 "upb/pb/compile_decoder_x64.dasc"
+        /*|  callp startmsg */
         dasm_put(Dst, 1754, (unsigned int)((uintptr_t)startmsg), (unsigned int)(((uintptr_t)startmsg)>>32), 0xfffffffffffffff0UL);
-# 896 "upb/pb/compile_decoder_x64.dasc"
+# 900 "upb/pb/compile_decoder_x64.dasc"
         if (!alwaysok(h, UPB_STARTMSG_SELECTOR)) {
-          //|  test  al, al
-          //|  jnz   >2
-          //|  call  ->suspend
-          //|  jmp   <1
-          //|2:
+          /*|  test  al, al */
+          /*|  jnz   >2 */
+          /*|  call  ->suspend */
+          /*|  jmp   <1 */
+          /*|2: */
           dasm_put(Dst, 2151);
-# 902 "upb/pb/compile_decoder_x64.dasc"
+# 906 "upb/pb/compile_decoder_x64.dasc"
         }
       } else {
-        //| nop
+        /*| nop */
         dasm_put(Dst, 2167);
-# 905 "upb/pb/compile_decoder_x64.dasc"
+# 909 "upb/pb/compile_decoder_x64.dasc"
       }
       break;
     }
     case OP_ENDMSG: {
       upb_func *endmsg = gethandler(h, UPB_ENDMSG_SELECTOR);
-      //|9:
+      /*|9: */
       dasm_put(Dst, 2169);
-# 911 "upb/pb/compile_decoder_x64.dasc"
+# 915 "upb/pb/compile_decoder_x64.dasc"
       if (endmsg) {
-        // bool endmsg(void *closure, const void *hd, upb_status *status)
-        //|  mov   ARG1_64, CLOSURE
-        //|  load_handler_data h, UPB_ENDMSG_SELECTOR
+        /* bool endmsg(void *closure, const void *hd, upb_status *status) */
+        /*|  mov   ARG1_64, CLOSURE */
+        /*|  load_handler_data h, UPB_ENDMSG_SELECTOR */
         dasm_put(Dst, 1749);
          {
          uintptr_t v = (uintptr_t)upb_handlers_gethandlerdata(h, UPB_ENDMSG_SELECTOR);
@@ -1395,43 +1399,44 @@ static void jitbytecode(jitcompiler *jc) {
         dasm_put(Dst, 454);
          }
          }
-# 915 "upb/pb/compile_decoder_x64.dasc"
-        //|  mov   ARG3_64, DECODER->status
-        //|  callp endmsg
+# 919 "upb/pb/compile_decoder_x64.dasc"
+        /*|  mov   ARG3_64, DECODER->status */
+        /*|  callp endmsg */
         dasm_put(Dst, 2172, Dt2(->status), (unsigned int)((uintptr_t)endmsg), (unsigned int)(((uintptr_t)endmsg)>>32), 0xfffffffffffffff0UL);
-# 917 "upb/pb/compile_decoder_x64.dasc"
+# 921 "upb/pb/compile_decoder_x64.dasc"
       }
       break;
     }
     case OP_SETDISPATCH: {
       uint32_t *op_pc = jc->pc - 1;
-
-      // Load info for new method.
+      const char *msgname;
       upb_inttable *dispatch;
+
+      /* Load info for new method. */
       memcpy(&dispatch, jc->pc, sizeof(void*));
       jc->pc += sizeof(void*) / sizeof(uint32_t);
-      // The OP_SETDISPATCH bytecode contains a pointer that is
-      // &method->dispatch; we want to go backwards and recover method.
+      /* The OP_SETDISPATCH bytecode contains a pointer that is
+       * &method->dispatch; we want to go backwards and recover method. */
       method =
           (void*)((char*)dispatch - offsetof(upb_pbdecodermethod, dispatch));
-      // May be NULL, in which case no handlers for this message will be found.
-      // OPT: we should do better by completely skipping the message in this
-      // case instead of parsing it field by field.  We should also do the skip
-      // in the containing message's code.
+      /* May be NULL, in which case no handlers for this message will be found.
+       * OPT: we should do better by completely skipping the message in this
+       * case instead of parsing it field by field.  We should also do the skip
+       * in the containing message's code. */
       h = method->dest_handlers_;
-      const char *msgname = upb_msgdef_fullname(upb_handlers_msgdef(h));
+      msgname = upb_msgdef_fullname(upb_handlers_msgdef(h));
 
-      // Emit dispatch code for new method.
+      /* Emit dispatch code for new method. */
       asmlabel(jc, "0x%lx.dispatch.%s", pcofs(jc), msgname);
       jitdispatch(jc, method);
 
-      // Emit function prologue for new method.
+      /* Emit function prologue for new method. */
       asmlabel(jc, "0x%lx.parse.%s", pcofs(jc), msgname);
-      //|=>define_jmptarget(jc, op_pc):
-      //|=>define_jmptarget(jc, method):
-      //|  sub   rsp, 8
+      /*|=>define_jmptarget(jc, op_pc): */
+      /*|=>define_jmptarget(jc, method): */
+      /*|  sub   rsp, 8 */
       dasm_put(Dst, 2198, define_jmptarget(jc, op_pc), define_jmptarget(jc, method));
-# 947 "upb/pb/compile_decoder_x64.dasc"
+# 952 "upb/pb/compile_decoder_x64.dasc"
 
       break;
     }
@@ -1455,12 +1460,12 @@ static void jitbytecode(jitcompiler *jc) {
     case OP_STARTSTR: {
       upb_func *start = gethandler(h, arg);
       if (start) {
-        // void *startseq(void *closure, const void *hd)
-        // void *startsubmsg(void *closure, const void *hd)
-        // void *startstr(void *closure, const void *hd, size_t size_hint)
-        //|1:
-        //|  mov   ARG1_64, CLOSURE
-        //|  load_handler_data h, arg
+        /* void *startseq(void *closure, const void *hd)
+         * void *startsubmsg(void *closure, const void *hd)
+         * void *startstr(void *closure, const void *hd, size_t size_hint) */
+        /*|1: */
+        /*|  mov   ARG1_64, CLOSURE */
+        /*|  load_handler_data h, arg */
         dasm_put(Dst, 2144);
          {
          uintptr_t v = (uintptr_t)upb_handlers_gethandlerdata(h, arg);
@@ -1472,33 +1477,33 @@ static void jitbytecode(jitcompiler *jc) {
         dasm_put(Dst, 454);
          }
          }
-# 976 "upb/pb/compile_decoder_x64.dasc"
+# 981 "upb/pb/compile_decoder_x64.dasc"
         if (op == OP_STARTSTR) {
-          //|  mov    ARG3_64, DELIMEND
-          //|  sub    ARG3_64, PTR
+          /*|  mov    ARG3_64, DELIMEND */
+          /*|  sub    ARG3_64, PTR */
           dasm_put(Dst, 2206);
-# 979 "upb/pb/compile_decoder_x64.dasc"
+# 984 "upb/pb/compile_decoder_x64.dasc"
         }
-        //|  callp start
+        /*|  callp start */
         dasm_put(Dst, 1754, (unsigned int)((uintptr_t)start), (unsigned int)(((uintptr_t)start)>>32), 0xfffffffffffffff0UL);
-# 981 "upb/pb/compile_decoder_x64.dasc"
+# 986 "upb/pb/compile_decoder_x64.dasc"
         if (!alwaysok(h, arg)) {
-          //|  test  rax, rax
-          //|  jnz   >2
-          //|  call  ->suspend
-          //|  jmp   <1
-          //|2:
+          /*|  test  rax, rax */
+          /*|  jnz   >2 */
+          /*|  call  ->suspend */
+          /*|  jmp   <1 */
+          /*|2: */
           dasm_put(Dst, 2214);
-# 987 "upb/pb/compile_decoder_x64.dasc"
+# 992 "upb/pb/compile_decoder_x64.dasc"
         }
-        //|  mov   CLOSURE, rax
+        /*|  mov   CLOSURE, rax */
         dasm_put(Dst, 2231);
-# 989 "upb/pb/compile_decoder_x64.dasc"
+# 994 "upb/pb/compile_decoder_x64.dasc"
       } else {
-        // TODO: nop is only required because of asmlabel().
-        //|  nop
+        /* TODO: nop is only required because of asmlabel(). */
+        /*|  nop */
         dasm_put(Dst, 2167);
-# 992 "upb/pb/compile_decoder_x64.dasc"
+# 997 "upb/pb/compile_decoder_x64.dasc"
       }
       break;
     }
@@ -1507,12 +1512,12 @@ static void jitbytecode(jitcompiler *jc) {
     case OP_ENDSTR: {
       upb_func *end = gethandler(h, arg);
       if (end) {
-        // bool endseq(void *closure, const void *hd)
-        // bool endsubmsg(void *closure, const void *hd)
-        // bool endstr(void *closure, const void *hd)
-        //|1:
-        //|  mov   ARG1_64, CLOSURE
-        //|  load_handler_data h, arg
+        /* bool endseq(void *closure, const void *hd)
+         * bool endsubmsg(void *closure, const void *hd)
+         * bool endstr(void *closure, const void *hd) */
+        /*|1: */
+        /*|  mov   ARG1_64, CLOSURE */
+        /*|  load_handler_data h, arg */
         dasm_put(Dst, 2144);
          {
          uintptr_t v = (uintptr_t)upb_handlers_gethandlerdata(h, arg);
@@ -1524,43 +1529,44 @@ static void jitbytecode(jitcompiler *jc) {
         dasm_put(Dst, 454);
          }
          }
-# 1006 "upb/pb/compile_decoder_x64.dasc"
-        //|  callp end
+# 1011 "upb/pb/compile_decoder_x64.dasc"
+        /*|  callp end */
         dasm_put(Dst, 1754, (unsigned int)((uintptr_t)end), (unsigned int)(((uintptr_t)end)>>32), 0xfffffffffffffff0UL);
-# 1007 "upb/pb/compile_decoder_x64.dasc"
+# 1012 "upb/pb/compile_decoder_x64.dasc"
         if (!alwaysok(h, arg)) {
-          //|  test  al, al
-          //|  jnz   >2
-          //|  call  ->suspend
-          //|  jmp   <1
-          //|2:
+          /*|  test  al, al */
+          /*|  jnz   >2 */
+          /*|  call  ->suspend */
+          /*|  jmp   <1 */
+          /*|2: */
           dasm_put(Dst, 2151);
-# 1013 "upb/pb/compile_decoder_x64.dasc"
+# 1018 "upb/pb/compile_decoder_x64.dasc"
         }
       } else {
-        // TODO: nop is only required because of asmlabel().
-        //|  nop
+        /* TODO: nop is only required because of asmlabel(). */
+        /*|  nop */
         dasm_put(Dst, 2167);
-# 1017 "upb/pb/compile_decoder_x64.dasc"
+# 1022 "upb/pb/compile_decoder_x64.dasc"
       }
       break;
     }
     case OP_STRING: {
       upb_func *str = gethandler(h, arg);
-      //|  cmp   PTR, DELIMEND
-      //|  je    >4
-      //|1:
-      //|  cmp   PTR, DATAEND
-      //|  jne   >2
-      //|  call  ->suspend
-      //|  jmp   <1
-      //|2:
+      /*|  cmp   PTR, DELIMEND */
+      /*|  je    >4 */
+      /*|1: */
+      /*|  cmp   PTR, DATAEND */
+      /*|  jne   >2 */
+      /*|  call  ->suspend */
+      /*|  jmp   <1 */
+      /*|2: */
       dasm_put(Dst, 2235);
-# 1030 "upb/pb/compile_decoder_x64.dasc"
+# 1035 "upb/pb/compile_decoder_x64.dasc"
       if (str) {
-        // size_t str(void *closure, const void *hd, const char *str, size_t n)
-        //|  mov   ARG1_64, CLOSURE
-        //|  load_handler_data h, arg
+        /* size_t str(void *closure, const void *hd, const char *str,
+         *            size_t n) */
+        /*|  mov   ARG1_64, CLOSURE */
+        /*|  load_handler_data h, arg */
         dasm_put(Dst, 1749);
          {
          uintptr_t v = (uintptr_t)upb_handlers_gethandlerdata(h, arg);
@@ -1572,101 +1578,101 @@ static void jitbytecode(jitcompiler *jc) {
         dasm_put(Dst, 454);
          }
          }
-# 1034 "upb/pb/compile_decoder_x64.dasc"
-        //|  mov   ARG3_64, PTR
-        //|  mov   ARG4_64, DATAEND
-        //|  sub   ARG4_64, PTR
-        //|  mov   ARG5_64, qword DECODER->handle
-        //|  callp str
-        //|  add   PTR, rax
-        dasm_put(Dst, 2262, Dt2(->handle), (unsigned int)((uintptr_t)str), (unsigned int)(((uintptr_t)str)>>32), 0xfffffffffffffff0UL);
 # 1040 "upb/pb/compile_decoder_x64.dasc"
+        /*|  mov   ARG3_64, PTR */
+        /*|  mov   ARG4_64, DATAEND */
+        /*|  sub   ARG4_64, PTR */
+        /*|  mov   ARG5_64, qword DECODER->handle */
+        /*|  callp str */
+        /*|  add   PTR, rax */
+        dasm_put(Dst, 2262, Dt2(->handle), (unsigned int)((uintptr_t)str), (unsigned int)(((uintptr_t)str)>>32), 0xfffffffffffffff0UL);
+# 1046 "upb/pb/compile_decoder_x64.dasc"
         if (!alwaysok(h, arg)) {
-          //|  cmp   PTR, DATAEND
-          //|  je    >3
-          //|  call  ->strret_fallback
-          //|3:
+          /*|  cmp   PTR, DATAEND */
+          /*|  je    >3 */
+          /*|  call  ->strret_fallback */
+          /*|3: */
           dasm_put(Dst, 2300);
-# 1045 "upb/pb/compile_decoder_x64.dasc"
+# 1051 "upb/pb/compile_decoder_x64.dasc"
         }
       } else {
-        //|  mov   PTR, DATAEND
+        /*|  mov   PTR, DATAEND */
         dasm_put(Dst, 2313);
-# 1048 "upb/pb/compile_decoder_x64.dasc"
+# 1054 "upb/pb/compile_decoder_x64.dasc"
       }
-      //|  cmp   PTR, DELIMEND
-      //|  jne   <1
-      //|4:
+      /*|  cmp   PTR, DELIMEND */
+      /*|  jne   <1 */
+      /*|4: */
       dasm_put(Dst, 2317);
-# 1052 "upb/pb/compile_decoder_x64.dasc"
+# 1058 "upb/pb/compile_decoder_x64.dasc"
       break;
     }
     case OP_PUSHTAGDELIM:
-      //|  mov   FRAME->sink.closure, CLOSURE
-      //|  // This shouldn't need to be read, because tag-delimited fields
-      //|  // shouldn't have an OP_SETDELIM after them.  But for the moment
-      //|  // non-packed repeated fields do OP_SETDELIM so they can share more
-      //|  // code with the packed code-path.  If this is changed later, this
-      //|  // store can be removed.
-      //|  mov   qword FRAME->end_ofs, 0
-      //|  cmp   FRAME, DECODER->limit
-      //|  je    ->err
-      //|  add   FRAME, sizeof(upb_pbdecoder_frame)
-      //|  mov   dword FRAME->groupnum, arg
+      /*|  mov   FRAME->sink.closure, CLOSURE */
+      /*|  // This shouldn't need to be read, because tag-delimited fields */
+      /*|  // shouldn't have an OP_SETDELIM after them.  But for the moment */
+      /*|  // non-packed repeated fields do OP_SETDELIM so they can share more */
+      /*|  // code with the packed code-path.  If this is changed later, this */
+      /*|  // store can be removed. */
+      /*|  mov   qword FRAME->end_ofs, 0 */
+      /*|  cmp   FRAME, DECODER->limit */
+      /*|  je    ->err */
+      /*|  add   FRAME, sizeof(upb_pbdecoder_frame) */
+      /*|  mov   dword FRAME->groupnum, arg */
       dasm_put(Dst, 2328, Dt1(->sink.closure), Dt1(->end_ofs), Dt2(->limit), sizeof(upb_pbdecoder_frame), Dt1(->groupnum), arg);
-# 1066 "upb/pb/compile_decoder_x64.dasc"
+# 1072 "upb/pb/compile_decoder_x64.dasc"
       break;
     case OP_PUSHLENDELIM:
-      //|  call  ->pushlendelim
+      /*|  call  ->pushlendelim */
       dasm_put(Dst, 2358);
-# 1069 "upb/pb/compile_decoder_x64.dasc"
+# 1075 "upb/pb/compile_decoder_x64.dasc"
       break;
     case OP_POP:
-      //|  sub   FRAME, sizeof(upb_pbdecoder_frame)
-      //|  mov   CLOSURE, FRAME->sink.closure
+      /*|  sub   FRAME, sizeof(upb_pbdecoder_frame) */
+      /*|  mov   CLOSURE, FRAME->sink.closure */
       dasm_put(Dst, 2362, sizeof(upb_pbdecoder_frame), Dt1(->sink.closure));
-# 1073 "upb/pb/compile_decoder_x64.dasc"
+# 1079 "upb/pb/compile_decoder_x64.dasc"
       break;
     case OP_SETDELIM:
-      // OPT: experiment with testing vs old offset to optimize away.
-      //|  mov   DATAEND, DECODER->end
-      //|  add   DELIMEND, FRAME->end_ofs
-      //|  cmp   DELIMEND, DECODER->buf
-      //|  jb    >1
-      //|  cmp   DELIMEND, DATAEND
-      //|  ja    >1   // OPT: try cmov.
-      //|  mov   DATAEND, DELIMEND
-      //|1:
+      /* OPT: experiment with testing vs old offset to optimize away. */
+      /*|  mov   DATAEND, DECODER->end */
+      /*|  add   DELIMEND, FRAME->end_ofs */
+      /*|  cmp   DELIMEND, DECODER->buf */
+      /*|  jb    >1 */
+      /*|  cmp   DELIMEND, DATAEND */
+      /*|  ja    >1   // OPT: try cmov. */
+      /*|  mov   DATAEND, DELIMEND */
+      /*|1: */
       dasm_put(Dst, 2372, Dt2(->end), Dt1(->end_ofs), Dt2(->buf));
-# 1084 "upb/pb/compile_decoder_x64.dasc"
+# 1090 "upb/pb/compile_decoder_x64.dasc"
       break;
     case OP_SETBIGGROUPNUM:
-      //|  mov   dword FRAME->groupnum, *jc->pc++
+      /*|  mov   dword FRAME->groupnum, *jc->pc++ */
       dasm_put(Dst, 2352, Dt1(->groupnum), *jc->pc++);
-# 1087 "upb/pb/compile_decoder_x64.dasc"
+# 1093 "upb/pb/compile_decoder_x64.dasc"
       break;
     case OP_CHECKDELIM:
-      //|  cmp  DELIMEND, PTR
-      //|  je   =>jmptarget(jc, jc->pc + longofs)
+      /*|  cmp  DELIMEND, PTR */
+      /*|  je   =>jmptarget(jc, jc->pc + longofs) */
       dasm_put(Dst, 2402, jmptarget(jc, jc->pc + longofs));
-# 1091 "upb/pb/compile_decoder_x64.dasc"
+# 1097 "upb/pb/compile_decoder_x64.dasc"
       break;
     case OP_CALL:
-      //|  call =>jmptarget(jc, jc->pc + longofs)
+      /*|  call =>jmptarget(jc, jc->pc + longofs) */
       dasm_put(Dst, 2409, jmptarget(jc, jc->pc + longofs));
-# 1094 "upb/pb/compile_decoder_x64.dasc"
+# 1100 "upb/pb/compile_decoder_x64.dasc"
       break;
     case OP_BRANCH:
-      //|  jmp  =>jmptarget(jc, jc->pc + longofs);
+      /*|  jmp  =>jmptarget(jc, jc->pc + longofs); */
       dasm_put(Dst, 2131, jmptarget(jc, jc->pc + longofs));
-# 1097 "upb/pb/compile_decoder_x64.dasc"
+# 1103 "upb/pb/compile_decoder_x64.dasc"
       break;
     case OP_RET:
-      //|9:
-      //|  add  rsp, 8
-      //|  ret
+      /*|9: */
+      /*|  add  rsp, 8 */
+      /*|  ret */
       dasm_put(Dst, 2412);
-# 1102 "upb/pb/compile_decoder_x64.dasc"
+# 1108 "upb/pb/compile_decoder_x64.dasc"
       break;
     case OP_TAG1:
       jittag(jc, (arg >> 8) & 0xff, 1, (int8_t)arg, method);
@@ -1681,9 +1687,9 @@ static void jitbytecode(jitcompiler *jc) {
       break;
     }
     case OP_DISPATCH:
-      //|  call   =>jmptarget(jc, &method->dispatch)
+      /*|  call   =>jmptarget(jc, &method->dispatch) */
       dasm_put(Dst, 2409, jmptarget(jc, &method->dispatch));
-# 1117 "upb/pb/compile_decoder_x64.dasc"
+# 1123 "upb/pb/compile_decoder_x64.dasc"
       break;
     case OP_HALT:
       assert(false);
@@ -1691,7 +1697,7 @@ static void jitbytecode(jitcompiler *jc) {
   }
 
   asmlabel(jc, "eof");
-  //|  nop
+  /*|  nop */
   dasm_put(Dst, 2167);
-# 1125 "upb/pb/compile_decoder_x64.dasc"
+# 1131 "upb/pb/compile_decoder_x64.dasc"
 }
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index 0c3955a..a6240ce 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -29,17 +29,17 @@
 
 #define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
 
-// Error messages that are shared between the bytecode and JIT decoders.
+/* Error messages that are shared between the bytecode and JIT decoders. */
 const char *kPbDecoderStackOverflow = "Nesting too deep.";
 
-// Error messages shared within this file.
+/* Error messages shared within this file. */
 static const char *kUnterminatedVarint = "Unterminated varint.";
 
 /* upb_pbdecoder **************************************************************/
 
 static opcode halt = OP_HALT;
 
-// Whether an op consumes any of the input buffer.
+/* Whether an op consumes any of the input buffer. */
 static bool consumes_input(opcode op) {
   switch (op) {
     case OP_SETDISPATCH:
@@ -67,12 +67,12 @@ static bool consumes_input(opcode op) {
 
 static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
 
-// It's unfortunate that we have to micro-manage the compiler with
-// UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
-// specific to one hardware configuration.  But empirically on a Core i7,
-// performance increases 30-50% with these annotations.  Every instance where
-// these appear, gcc 4.2.1 made the wrong decision and degraded performance in
-// benchmarks.
+/* It's unfortunate that we have to micro-manage the compiler with
+ * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
+ * specific to one hardware configuration.  But empirically on a Core i7,
+ * performance increases 30-50% with these annotations.  Every instance where
+ * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
+ * benchmarks. */
 
 static void seterr(upb_pbdecoder *d, const char *msg) {
   upb_status status = UPB_STATUS_INIT;
@@ -87,22 +87,22 @@ void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
 
 /* Buffering ******************************************************************/
 
-// We operate on one buffer at a time, which is either the user's buffer passed
-// to our "decode" callback or some residual bytes from the previous buffer.
+/* We operate on one buffer at a time, which is either the user's buffer passed
+ * to our "decode" callback or some residual bytes from the previous buffer. */
 
-// How many bytes can be safely read from d->ptr without reading past end-of-buf
-// or past the current delimited end.
+/* How many bytes can be safely read from d->ptr without reading past end-of-buf
+ * or past the current delimited end. */
 static size_t curbufleft(const upb_pbdecoder *d) {
   assert(d->data_end >= d->ptr);
   return d->data_end - d->ptr;
 }
 
-// Overall stream offset of d->ptr.
+/* Overall stream offset of d->ptr. */
 uint64_t offset(const upb_pbdecoder *d) {
   return d->bufstart_ofs + (d->ptr - d->buf);
 }
 
-// Advances d->ptr.
+/* Advances d->ptr. */
 static void advance(upb_pbdecoder *d, size_t len) {
   assert(curbufleft(d) >= len);
   d->ptr += len;
@@ -116,8 +116,8 @@ static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
   return in_buf(p, d->residual, d->residual_end);
 }
 
-// Calculates the delim_end value, which is affected by both the current buffer
-// and the parsing stack, so must be called whenever either is updated.
+/* Calculates the delim_end value, which is affected by both the current buffer
+ * and the parsing stack, so must be called whenever either is updated. */
 static void set_delim_end(upb_pbdecoder *d) {
   size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
   if (delim_ofs <= (size_t)(d->end - d->buf)) {
@@ -143,22 +143,22 @@ static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
 }
 
 static void checkpoint(upb_pbdecoder *d) {
-  // The assertion here is in the interests of efficiency, not correctness.
-  // We are trying to ensure that we don't checkpoint() more often than
-  // necessary.
+  /* The assertion here is in the interests of efficiency, not correctness.
+   * We are trying to ensure that we don't checkpoint() more often than
+   * necessary. */
   assert(d->checkpoint != d->ptr);
   d->checkpoint = d->ptr;
 }
 
-// Resumes the decoder from an initial state or from a previous suspend.
+/* Resumes the decoder from an initial state or from a previous suspend. */
 int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
                              size_t size, const upb_bufhandle *handle) {
-  UPB_UNUSED(p);  // Useless; just for the benefit of the JIT.
+  UPB_UNUSED(p);  /* Useless; just for the benefit of the JIT. */
   d->buf_param = buf;
   d->size_param = size;
   d->handle = handle;
   if (d->residual_end > d->residual) {
-    // We have residual bytes from the last buffer.
+    /* We have residual bytes from the last buffer. */
     assert(d->ptr == d->residual);
   } else {
     switchtobuf(d, buf, buf + size);
@@ -171,18 +171,20 @@ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
   return DECODE_OK;
 }
 
-// Suspends the decoder at the last checkpoint, without saving any residual
-// bytes.  If there are any unconsumed bytes, returns a short byte count.
+/* Suspends the decoder at the last checkpoint, without saving any residual
+ * bytes.  If there are any unconsumed bytes, returns a short byte count. */
 size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
   d->pc = d->last;
   if (d->checkpoint == d->residual) {
-    // Checkpoint was in residual buf; no user bytes were consumed.
+    /* Checkpoint was in residual buf; no user bytes were consumed. */
     d->ptr = d->residual;
     return 0;
   } else {
+    size_t consumed;
     assert(!in_residual_buf(d, d->checkpoint));
     assert(d->buf == d->buf_param);
-    size_t consumed = d->checkpoint - d->buf;
+
+    consumed = d->checkpoint - d->buf;
     d->bufstart_ofs += consumed;
     d->residual_end = d->residual;
     switchtobuf(d, d->residual, d->residual_end);
@@ -190,17 +192,17 @@ size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
   }
 }
 
-// Suspends the decoder at the last checkpoint, and saves any unconsumed
-// bytes in our residual buffer.  This is necessary if we need more user
-// bytes to form a complete value, which might not be contiguous in the
-// user's buffers.  Always consumes all user bytes.
+/* Suspends the decoder at the last checkpoint, and saves any unconsumed
+ * bytes in our residual buffer.  This is necessary if we need more user
+ * bytes to form a complete value, which might not be contiguous in the
+ * user's buffers.  Always consumes all user bytes. */
 static size_t suspend_save(upb_pbdecoder *d) {
-  // We hit end-of-buffer before we could parse a full value.
-  // Save any unconsumed bytes (if any) to the residual buffer.
+  /* We hit end-of-buffer before we could parse a full value.
+   * Save any unconsumed bytes (if any) to the residual buffer. */
   d->pc = d->last;
 
   if (d->checkpoint == d->residual) {
-    // Checkpoint was in residual buf; append user byte(s) to residual buf.
+    /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
     assert((d->residual_end - d->residual) + d->size_param <=
            sizeof(d->residual));
     if (!in_residual_buf(d, d->ptr)) {
@@ -209,10 +211,12 @@ static size_t suspend_save(upb_pbdecoder *d) {
     memcpy(d->residual_end, d->buf_param, d->size_param);
     d->residual_end += d->size_param;
   } else {
-    // Checkpoint was in user buf; old residual bytes not needed.
+    /* Checkpoint was in user buf; old residual bytes not needed. */
+    size_t save;
     assert(!in_residual_buf(d, d->checkpoint));
+
     d->ptr = d->checkpoint;
-    size_t save = curbufleft(d);
+    save = curbufleft(d);
     assert(save <= sizeof(d->residual));
     memcpy(d->residual, d->ptr, save);
     d->residual_end = d->residual + save;
@@ -223,19 +227,21 @@ static size_t suspend_save(upb_pbdecoder *d) {
   return d->size_param;
 }
 
-// Skips "bytes" bytes in the stream, which may be more than available.  If we
-// skip more bytes than are available, we return a long read count to the caller
-// indicating how many bytes the caller should skip before passing a new buffer.
+/* Skips "bytes" bytes in the stream, which may be more than available.  If we
+ * skip more bytes than are available, we return a long read count to the caller
+ * indicating how many bytes the caller should skip before passing a new buffer.
+ */
 static int32_t skip(upb_pbdecoder *d, size_t bytes) {
   assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
   if (curbufleft(d) >= bytes) {
-    // Skipped data is all in current buffer.
+    /* Skipped data is all in current buffer. */
     advance(d, bytes);
     return DECODE_OK;
   } else {
-    // Skipped data extends beyond currently available buffers.
+    /* Skipped data extends beyond currently available buffers. */
+    size_t skip;
     d->pc = d->last;
-    size_t skip = bytes - curbufleft(d);
+    skip = bytes - curbufleft(d);
     d->bufstart_ofs += (d->end - d->buf) + skip;
     d->residual_end = d->residual;
     switchtobuf(d, d->residual, d->residual_end);
@@ -243,8 +249,8 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) {
   }
 }
 
-// Copies the next "bytes" bytes into "buf" and advances the stream.
-// Requires that this many bytes are available in the current buffer.
+/* Copies the next "bytes" bytes into "buf" and advances the stream.
+ * Requires that this many bytes are available in the current buffer. */
 UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
                                          size_t bytes) {
   assert(bytes <= curbufleft(d));
@@ -252,9 +258,9 @@ UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
   advance(d, bytes);
 }
 
-// Slow path for getting the next "bytes" bytes, regardless of whether they are
-// available in the current buffer or not.  Returns a status code as described
-// in decoder.int.h.
+/* Slow path for getting the next "bytes" bytes, regardless of whether they are
+ * available in the current buffer or not.  Returns a status code as described
+ * in decoder.int.h. */
 UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
                                           size_t bytes) {
   const size_t avail = curbufleft(d);
@@ -275,12 +281,13 @@ UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
   }
 }
 
-// Gets the next "bytes" bytes, regardless of whether they are available in the
-// current buffer or not.  Returns a status code as described in decoder.int.h.
+/* Gets the next "bytes" bytes, regardless of whether they are available in the
+ * current buffer or not.  Returns a status code as described in decoder.int.h.
+ */
 UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
                                         size_t bytes) {
   if (curbufleft(d) >= bytes) {
-    // Buffer has enough data to satisfy.
+    /* Buffer has enough data to satisfy. */
     consumebytes(d, buf, bytes);
     return DECODE_OK;
   } else {
@@ -313,13 +320,13 @@ UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
 
 /* Decoding of wire types *****************************************************/
 
-// Slow path for decoding a varint from the current buffer position.
-// Returns a status code as described in decoder.int.h.
+/* Slow path for decoding a varint from the current buffer position.
+ * Returns a status code as described in decoder.int.h. */
 UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
                                                       uint64_t *u64) {
-  *u64 = 0;
   uint8_t byte = 0x80;
   int bitpos;
+  *u64 = 0;
   for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
     int32_t ret = getbytes(d, &byte, 1);
     if (ret >= 0) return ret;
@@ -332,15 +339,15 @@ UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
   return DECODE_OK;
 }
 
-// Decodes a varint from the current buffer position.
-// Returns a status code as described in decoder.int.h.
+/* Decodes a varint from the current buffer position.
+ * Returns a status code as described in decoder.int.h. */
 UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
   if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
     *u64 = *d->ptr;
     advance(d, 1);
     return DECODE_OK;
   } else if (curbufleft(d) >= 10) {
-    // Fast case.
+    /* Fast case. */
     upb_decoderet r = upb_vdecode_fast(d->ptr);
     if (r.p == NULL) {
       seterr(d, kUnterminatedVarint);
@@ -350,22 +357,23 @@ UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
     *u64 = r.val;
     return DECODE_OK;
   } else {
-    // Slow case -- varint spans buffer seam.
+    /* Slow case -- varint spans buffer seam. */
     return upb_pbdecoder_decode_varint_slow(d, u64);
   }
 }
 
-// Decodes a 32-bit varint from the current buffer position.
-// Returns a status code as described in decoder.int.h.
+/* Decodes a 32-bit varint from the current buffer position.
+ * Returns a status code as described in decoder.int.h. */
 UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
   uint64_t u64;
   int32_t ret = decode_varint(d, &u64);
   if (ret >= 0) return ret;
   if (u64 > UINT32_MAX) {
     seterr(d, "Unterminated 32-bit varint");
-    // TODO(haberman) guarantee that this function return is >= 0 somehow,
-    // so we know this path will always be treated as error by our caller.
-    // Right now the size_t -> int32_t can overflow and produce negative values.
+    /* TODO(haberman) guarantee that this function return is >= 0 somehow,
+     * so we know this path will always be treated as error by our caller.
+     * Right now the size_t -> int32_t can overflow and produce negative values.
+     */
     *u32 = 0;
     return upb_pbdecoder_suspend(d);
   }
@@ -373,22 +381,22 @@ UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
   return DECODE_OK;
 }
 
-// Decodes a fixed32 from the current buffer position.
-// Returns a status code as described in decoder.int.h.
-// TODO: proper byte swapping for big-endian machines.
+/* Decodes a fixed32 from the current buffer position.
+ * Returns a status code as described in decoder.int.h.
+ * TODO: proper byte swapping for big-endian machines. */
 UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
   return getbytes(d, u32, 4);
 }
 
-// Decodes a fixed64 from the current buffer position.
-// Returns a status code as described in decoder.int.h.
-// TODO: proper byte swapping for big-endian machines.
+/* Decodes a fixed64 from the current buffer position.
+ * Returns a status code as described in decoder.int.h.
+ * TODO: proper byte swapping for big-endian machines. */
 UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
   return getbytes(d, u64, 8);
 }
 
-// Non-static versions of the above functions.
-// These are called by the JIT for fallback paths.
+/* Non-static versions of the above functions.
+ * These are called by the JIT for fallback paths. */
 int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
   return decode_fixed32(d, u32);
 }
@@ -400,7 +408,7 @@ int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
 static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
 static float  as_float(uint32_t n)  { float  f; memcpy(&f, &n, 4); return f; }
 
-// Pushes a frame onto the decoder stack.
+/* Pushes a frame onto the decoder stack. */
 static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
   upb_pbdecoder_frame *fr = d->top;
 
@@ -421,17 +429,17 @@ static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
 }
 
 static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
-  // While we expect to see an "end" tag (either ENDGROUP or a non-sequence
-  // field number) prior to hitting any enclosing submessage end, pushing our
-  // existing delim end prevents us from continuing to parse values from a
-  // corrupt proto that doesn't give us an END tag in time.
+  /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
+   * field number) prior to hitting any enclosing submessage end, pushing our
+   * existing delim end prevents us from continuing to parse values from a
+   * corrupt proto that doesn't give us an END tag in time. */
   if (!decoder_push(d, d->top->end_ofs))
     return false;
   d->top->groupnum = arg;
   return true;
 }
 
-// Pops a frame from the decoder stack.
+/* Pops a frame from the decoder stack. */
 static void decoder_pop(upb_pbdecoder *d) { d->top--; }
 
 UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
@@ -440,7 +448,7 @@ UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
   size_t bytes = upb_value_size(expected);
   size_t read = peekbytes(d, &data, bytes);
   if (read == bytes && data == expected) {
-    // Advance past matched bytes.
+    /* Advance past matched bytes. */
     int32_t ok = getbytes(d, &data, read);
     UPB_ASSERT_VAR(ok, ok < 0);
     return DECODE_OK;
@@ -468,7 +476,7 @@ have_tag:
       return upb_pbdecoder_suspend(d);
     }
 
-    // TODO: deliver to unknown field callback.
+    /* TODO: deliver to unknown field callback. */
     switch (wire_type) {
       case UPB_WIRE_TYPE_32BIT:
         CHECK_RETURN(skip(d, 4));
@@ -511,29 +519,29 @@ have_tag:
 
     if (d->ptr == d->delim_end) {
       seterr(d, "Enclosing submessage ended in the middle of value or group");
-      // Unlike most errors we notice during parsing, right now we have consumed
-      // all of the user's input.
-      //
-      // There are three different options for how to handle this case:
-      //
-      //   1. decode() = short count, error = set
-      //   2. decode() = full count, error = set
-      //   3. decode() = full count, error NOT set, short count and error will
-      //      be reported on next call to decode() (or end())
-      //
-      // (1) and (3) have the advantage that they preserve the invariant that an
-      // error occurs iff decode() returns a short count.
-      //
-      // (2) and (3) have the advantage of reflecting the fact that all of the
-      // bytes were in fact parsed (and possibly delivered to the unknown field
-      // handler, in the future when that is supported).
-      //
-      // (3) requires extra state in the decode (a place to store the "permanent
-      // error" that we should return for all subsequent attempts to decode).
-      // But we likely want this anyway.
-      //
-      // Right now we do (1), thanks to the fact that we checkpoint *after* this
-      // check.  (3) may be a better choice long term; unclear at the moment.
+      /* Unlike most errors we notice during parsing, right now we have consumed
+       * all of the user's input.
+       *
+       * There are three different options for how to handle this case:
+       *
+       *   1. decode() = short count, error = set
+       *   2. decode() = full count, error = set
+       *   3. decode() = full count, error NOT set, short count and error will
+       *      be reported on next call to decode() (or end())
+       *
+       * (1) and (3) have the advantage that they preserve the invariant that an
+       * error occurs iff decode() returns a short count.
+       *
+       * (2) and (3) have the advantage of reflecting the fact that all of the
+       * bytes were in fact parsed (and possibly delivered to the unknown field
+       * handler, in the future when that is supported).
+       *
+       * (3) requires extra state in the decode (a place to store the "permanent
+       * error" that we should return for all subsequent attempts to decode).
+       * But we likely want this anyway.
+       *
+       * Right now we do (1), thanks to the fact that we checkpoint *after* this
+       * check.  (3) may be a better choice long term; unclear at the moment. */
       return upb_pbdecoder_suspend(d);
     }
 
@@ -548,24 +556,27 @@ static void goto_endmsg(upb_pbdecoder *d) {
   d->pc = d->top->base + upb_value_getuint64(v);
 }
 
-// Parses a tag and jumps to the corresponding bytecode instruction for this
-// field.
-//
-// If the tag is unknown (or the wire type doesn't match), parses the field as
-// unknown.  If the tag is a valid ENDGROUP tag, jumps to the bytecode
-// instruction for the end of message.
+/* Parses a tag and jumps to the corresponding bytecode instruction for this
+ * field.
+ *
+ * If the tag is unknown (or the wire type doesn't match), parses the field as
+ * unknown.  If the tag is a valid ENDGROUP tag, jumps to the bytecode
+ * instruction for the end of message. */
 static int32_t dispatch(upb_pbdecoder *d) {
   upb_inttable *dispatch = d->top->dispatch;
-
-  // Decode tag.
   uint32_t tag;
+  uint8_t wire_type;
+  uint32_t fieldnum;
+  upb_value val;
+  int32_t ret;
+
+  /* Decode tag. */
   CHECK_RETURN(decode_v32(d, &tag));
-  uint8_t wire_type = tag & 0x7;
-  uint32_t fieldnum = tag >> 3;
+  wire_type = tag & 0x7;
+  fieldnum = tag >> 3;
 
-  // Lookup tag.  Because of packed/non-packed compatibility, we have to
-  // check the wire type against two possibilities.
-  upb_value val;
+  /* Lookup tag.  Because of packed/non-packed compatibility, we have to
+   * check the wire type against two possibilities. */
   if (fieldnum != DISPATCH_ENDMSG &&
       upb_inttable_lookup32(dispatch, fieldnum, &val)) {
     uint64_t v = upb_value_getuint64(val);
@@ -581,17 +592,17 @@ static int32_t dispatch(upb_pbdecoder *d) {
     }
   }
 
-  // Unknown field or ENDGROUP.
-  int32_t ret = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
+  /* Unknown field or ENDGROUP. */
+  ret = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
 
   if (ret == DECODE_ENDGROUP) {
     goto_endmsg(d);
     return DECODE_OK;
   } else if (ret == DECODE_OK) {
-    // We just consumed some input, so we might now have consumed all the data
-    // in the delmited region.  Since every opcode that can trigger dispatch is
-    // directly preceded by OP_CHECKDELIM, rewind to it now to re-check the
-    // delimited end.
+    /* We just consumed some input, so we might now have consumed all the data
+     * in the delmited region.  Since every opcode that can trigger dispatch is
+     * directly preceded by OP_CHECKDELIM, rewind to it now to re-check the
+     * delimited end. */
     d->pc = d->last - 1;
     assert(getop(*d->pc) == OP_CHECKDELIM);
     return DECODE_OK;
@@ -600,8 +611,8 @@ static int32_t dispatch(upb_pbdecoder *d) {
   return ret;
 }
 
-// Callers know that the stack is more than one deep because the opcodes that
-// call this only occur after PUSH operations.
+/* Callers know that the stack is more than one deep because the opcodes that
+ * call this only occur after PUSH operations. */
 upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
   assert(d->top != d->stack);
   return d->top - 1;
@@ -610,14 +621,15 @@ upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
 
 /* The main decoding loop *****************************************************/
 
-// The main decoder VM function.  Uses traditional bytecode dispatch loop with a
-// switch() statement.
+/* The main decoder VM function.  Uses traditional bytecode dispatch loop with a
+ * switch() statement. */
 size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
                             size_t size, const upb_bufhandle *handle) {
   upb_pbdecoder *d = closure;
   const mgroup *group = hd;
+  int32_t result;
   assert(buf);
-  int32_t result = upb_pbdecoder_resume(d, NULL, buf, size, handle);
+  result = upb_pbdecoder_resume(d, NULL, buf, size, handle);
   if (result == DECODE_ENDGROUP) {
     goto_endmsg(d);
   }
@@ -634,11 +646,16 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
   })
 
   while(1) {
+    int32_t instruction;
+    opcode op;
+    uint32_t arg;
+    int32_t longofs;
+
     d->last = d->pc;
-    int32_t instruction = *d->pc++;
-    opcode op = getop(instruction);
-    uint32_t arg = instruction >> 8;
-    int32_t longofs = arg;
+    instruction = *d->pc++;
+    op = getop(instruction);
+    arg = instruction >> 8;
+    longofs = arg;
     assert(d->ptr != d->residual_end);
 #ifdef UPB_DUMP_BYTECODE
     fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
@@ -653,9 +670,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
             arg);
 #endif
     switch (op) {
-      // Technically, we are losing data if we see a 32-bit varint that is not
-      // properly sign-extended.  We could detect this and error about the data
-      // loss, but proto2 does not do this, so we pass.
+      /* Technically, we are losing data if we see a 32-bit varint that is not
+       * properly sign-extended.  We could detect this and error about the data
+       * loss, but proto2 does not do this, so we pass. */
       PRIMITIVE_OP(INT32,    varint,  int32,  int32_t,      uint64_t)
       PRIMITIVE_OP(INT64,    varint,  int64,  int64_t,      uint64_t)
       PRIMITIVE_OP(UINT32,   varint,  uint32, uint32_t,     uint64_t)
@@ -700,7 +717,7 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
         upb_pbdecoder_frame *outer = outer_frame(d);
         CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink));
         if (len == 0) {
-          d->pc++;  // Skip OP_STRING.
+          d->pc++;  /* Skip OP_STRING. */
         }
       )
       VMCASE(OP_STRING,
@@ -712,15 +729,15 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
             return upb_pbdecoder_suspend(d);
           } else {
             int32_t ret = skip(d, n);
-            // This shouldn't return DECODE_OK, because n > len.
+            /* This shouldn't return DECODE_OK, because n > len. */
             assert(ret >= 0);
             return ret;
           }
         }
         advance(d, n);
         if (n < len || d->delim_end == NULL) {
-          // We aren't finished with this string yet.
-          d->pc--;  // Repeat OP_STRING.
+          /* We aren't finished with this string yet. */
+          d->pc--;  /* Repeat OP_STRING. */
           if (n > 0) checkpoint(d);
           return upb_pbdecoder_suspend(d);
         }
@@ -748,8 +765,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
         set_delim_end(d);
       )
       VMCASE(OP_CHECKDELIM,
-        // We are guaranteed of this assert because we never allow ourselves to
-        // consume bytes beyond data_end, which covers delim_end when non-NULL.
+        /* We are guaranteed of this assert because we never allow ourselves to
+         * consume bytes beyond data_end, which covers delim_end when non-NULL.
+         */
         assert(!(d->delim_end && d->ptr > d->delim_end));
         if (d->ptr == d->delim_end)
           d->pc += longofs;
@@ -766,8 +784,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
         d->pc += longofs;
       )
       VMCASE(OP_TAG1,
+        uint8_t expected;
         CHECK_SUSPEND(curbufleft(d) > 0);
-        uint8_t expected = (arg >> 8) & 0xff;
+        expected = (arg >> 8) & 0xff;
         if (*d->ptr == expected) {
           advance(d, 1);
         } else {
@@ -778,13 +797,14 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
             CHECK_RETURN(dispatch(d));
           } else {
             d->pc += shortofs;
-            break; // Avoid checkpoint().
+            break; /* Avoid checkpoint(). */
           }
         }
       )
       VMCASE(OP_TAG2,
+        uint16_t expected;
         CHECK_SUSPEND(curbufleft(d) > 0);
-        uint16_t expected = (arg >> 8) & 0xffff;
+        expected = (arg >> 8) & 0xffff;
         if (curbufleft(d) >= 2) {
           uint16_t actual;
           memcpy(&actual, d->ptr, 2);
@@ -801,9 +821,10 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
       )
       VMCASE(OP_TAGN, {
         uint64_t expected;
+        int32_t result;
         memcpy(&expected, d->pc, 8);
         d->pc += 2;
-        int32_t result = upb_pbdecoder_checktag_slow(d, expected);
+        result = upb_pbdecoder_checktag_slow(d, expected);
         if (result == DECODE_MISMATCH) goto badtag;
         if (result >= 0) return result;
       })
@@ -829,9 +850,9 @@ void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
 }
 
 void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
+  upb_pbdecoder *d = closure;
   UPB_UNUSED(hd);
   UPB_UNUSED(size_hint);
-  upb_pbdecoder *d = closure;
   d->top->end_ofs = UINT64_MAX;
   d->bufstart_ofs = 0;
   d->call_len = 0;
@@ -841,6 +862,11 @@ void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
 bool upb_pbdecoder_end(void *closure, const void *handler_data) {
   upb_pbdecoder *d = closure;
   const upb_pbdecodermethod *method = handler_data;
+  uint64_t end;
+  char dummy;
+#ifdef UPB_USE_JIT_X64
+  const mgroup *group = (const mgroup*)method->group;
+#endif
 
   if (d->residual_end > d->residual) {
     seterr(d, "Unexpected EOF");
@@ -852,25 +878,24 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
     return false;
   }
 
-  // Message ends here.
-  uint64_t end = offset(d);
+  /* Message ends here. */
+  end = offset(d);
   d->top->end_ofs = end;
 
-  char dummy;
 #ifdef UPB_USE_JIT_X64
-  const mgroup *group = (const mgroup*)method->group;
   if (group->jit_code) {
     if (d->top != d->stack)
       d->stack->end_ofs = 0;
     group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
-  } else {
+  } else
 #endif
-    d->stack->end_ofs = end;
+  {
     const uint32_t *p = d->pc;
-    // Check the previous bytecode, but guard against beginning.
+    d->stack->end_ofs = end;
+    /* Check the previous bytecode, but guard against beginning. */
     if (p != method->code_base.ptr) p--;
     if (getop(*p) == OP_CHECKDELIM) {
-      // Rewind from OP_TAG* to OP_CHECKDELIM.
+      /* Rewind from OP_TAG* to OP_CHECKDELIM. */
       assert(getop(*d->pc) == OP_TAG1 ||
              getop(*d->pc) == OP_TAG2 ||
              getop(*d->pc) == OP_TAGN ||
@@ -878,9 +903,7 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
       d->pc = p;
     }
     upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
-#ifdef UPB_USE_JIT_X64
   }
-#endif
 
   if (d->call_len != 0) {
     seterr(d, "Unexpected EOF");
@@ -909,8 +932,8 @@ static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
 
 #ifdef UPB_USE_JIT_X64
   if (d->method_->is_native_) {
-    // Each native stack frame needs two pointers, plus we need a few frames for
-    // the enter/exit trampolines.
+    /* Each native stack frame needs two pointers, plus we need a few frames for
+     * the enter/exit trampolines. */
     size_t ret = entries * sizeof(void*) * 2;
     ret += sizeof(void*) * 10;
     return ret;
@@ -951,7 +974,7 @@ upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
   }
   upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
 
-  // If this fails, increase the value in decoder.h.
+  /* If this fails, increase the value in decoder.h. */
   assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE);
   return d;
 }
@@ -976,12 +999,12 @@ bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
   assert(d->top >= d->stack);
 
   if (max < (size_t)(d->top - d->stack)) {
-    // Can't set a limit smaller than what we are currently at.
+    /* Can't set a limit smaller than what we are currently at. */
     return false;
   }
 
   if (max > d->stack_size) {
-    // Need to reallocate stack and callstack to accommodate.
+    /* Need to reallocate stack and callstack to accommodate. */
     size_t old_size = stacksize(d, d->stack_size);
     size_t new_size = stacksize(d, max);
     void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h
index d37718c..6a9e1d4 100644
--- a/upb/pb/decoder.h
+++ b/upb/pb/decoder.h
@@ -28,134 +28,111 @@ class CodeCache;
 class Decoder;
 class DecoderMethod;
 class DecoderMethodOptions;
-}  // namespace pb
-}  // namespace upb
+}  /* namespace pb */
+}  /* namespace upb */
 #endif
 
-UPB_DECLARE_TYPE(upb::pb::CodeCache, upb_pbcodecache);
-UPB_DECLARE_TYPE(upb::pb::Decoder, upb_pbdecoder);
-UPB_DECLARE_TYPE(upb::pb::DecoderMethod, upb_pbdecodermethod);
-UPB_DECLARE_TYPE(upb::pb::DecoderMethodOptions, upb_pbdecodermethodopts);
+UPB_DECLARE_TYPE(upb::pb::CodeCache, upb_pbcodecache)
+UPB_DECLARE_TYPE(upb::pb::Decoder, upb_pbdecoder)
+UPB_DECLARE_TYPE(upb::pb::DecoderMethodOptions, upb_pbdecodermethodopts)
 
-// The parameters one uses to construct a DecoderMethod.
-// TODO(haberman): move allowjit here?  Seems more convenient for users.
-UPB_DEFINE_CLASS0(upb::pb::DecoderMethodOptions,
+UPB_DECLARE_DERIVED_TYPE(upb::pb::DecoderMethod, upb::RefCounted,
+                         upb_pbdecodermethod, upb_refcounted)
+
+#ifdef __cplusplus
+
+/* The parameters one uses to construct a DecoderMethod.
+ * TODO(haberman): move allowjit here?  Seems more convenient for users.
+ * TODO(haberman): move this to be heap allocated for ABI stability. */
+class upb::pb::DecoderMethodOptions {
  public:
-  // Parameter represents the destination handlers that this method will push
-  // to.
+  /* Parameter represents the destination handlers that this method will push
+   * to. */
   explicit DecoderMethodOptions(const Handlers* dest_handlers);
 
-  // Should the decoder push submessages to lazy handlers for fields that have
-  // them?  The caller should set this iff the lazy handlers expect data that is
-  // in protobuf binary format and the caller wishes to lazy parse it.
+  /* Should the decoder push submessages to lazy handlers for fields that have
+   * them?  The caller should set this iff the lazy handlers expect data that is
+   * in protobuf binary format and the caller wishes to lazy parse it. */
   void set_lazy(bool lazy);
-,
-UPB_DEFINE_STRUCT0(upb_pbdecodermethodopts,
+#else
+struct upb_pbdecodermethodopts {
+#endif
   const upb_handlers *handlers;
   bool lazy;
-));
+};
+
+#ifdef __cplusplus
 
-// Represents the code to parse a protobuf according to a destination Handlers.
-UPB_DEFINE_CLASS1(upb::pb::DecoderMethod, upb::RefCounted,
+/* Represents the code to parse a protobuf according to a destination
+ * Handlers. */
+class upb::pb::DecoderMethod {
  public:
-  // From upb::ReferenceCounted.
-  void Ref(const void* owner) const;
-  void Unref(const void* owner) const;
-  void DonateRef(const void* from, const void* to) const;
-  void CheckRef(const void* owner) const;
-
-  // The destination handlers that are statically bound to this method.
-  // This method is only capable of outputting to a sink that uses these
-  // handlers.
+  /* Include base methods from upb::ReferenceCounted. */
+  UPB_REFCOUNTED_CPPMETHODS
+
+  /* The destination handlers that are statically bound to this method.
+   * This method is only capable of outputting to a sink that uses these
+   * handlers. */
   const Handlers* dest_handlers() const;
 
-  // The input handlers for this decoder method.
+  /* The input handlers for this decoder method. */
   const BytesHandler* input_handler() const;
 
-  // Whether this method is native.
+  /* Whether this method is native. */
   bool is_native() const;
 
-  // Convenience method for generating a DecoderMethod without explicitly
-  // creating a CodeCache.
+  /* Convenience method for generating a DecoderMethod without explicitly
+   * creating a CodeCache. */
   static reffed_ptr<const DecoderMethod> New(const DecoderMethodOptions& opts);
 
  private:
-  UPB_DISALLOW_POD_OPS(DecoderMethod, upb::pb::DecoderMethod);
-,
-UPB_DEFINE_STRUCT(upb_pbdecodermethod, upb_refcounted,
-  // While compiling, the base is relative in "ofs", after compiling it is
-  // absolute in "ptr".
-  union {
-    uint32_t ofs;     // PC offset of method.
-    void *ptr;        // Pointer to bytecode or machine code for this method.
-  } code_base;
-
-  // The decoder method group to which this method belongs.  We own a ref.
-  // Owning a ref on the entire group is more coarse-grained than is strictly
-  // necessary; all we truly require is that methods we directly reference
-  // outlive us, while the group could contain many other messages we don't
-  // require.  But the group represents the messages that were
-  // allocated+compiled together, so it makes the most sense to free them
-  // together also.
-  const upb_refcounted *group;
-
-  // Whether this method is native code or bytecode.
-  bool is_native_;
-
-  // The handler one calls to invoke this method.
-  upb_byteshandler input_handler_;
-
-  // The destination handlers this method is bound to.  We own a ref.
-  const upb_handlers *dest_handlers_;
-
-  // Dispatch table -- used by both bytecode decoder and JIT when encountering a
-  // field number that wasn't the one we were expecting to see.  See
-  // decoder.int.h for the layout of this table.
-  upb_inttable dispatch;
-));
-
-// Preallocation hint: decoder won't allocate more bytes than this when first
-// constructed.  This hint may be an overestimate for some build configurations.
-// But if the decoder library is upgraded without recompiling the application,
-// it may be an underestimate.
+  UPB_DISALLOW_POD_OPS(DecoderMethod, upb::pb::DecoderMethod)
+};
+
+#endif
+
+/* Preallocation hint: decoder won't allocate more bytes than this when first
+ * constructed.  This hint may be an overestimate for some build configurations.
+ * But if the decoder library is upgraded without recompiling the application,
+ * it may be an underestimate. */
 #define UPB_PB_DECODER_SIZE 4400
 
 #ifdef __cplusplus
 
-// A Decoder receives binary protobuf data on its input sink and pushes the
-// decoded data to its output sink.
+/* A Decoder receives binary protobuf data on its input sink and pushes the
+ * decoded data to its output sink. */
 class upb::pb::Decoder {
  public:
-  // Constructs a decoder instance for the given method, which must outlive this
-  // decoder.  Any errors during parsing will be set on the given status, which
-  // must also outlive this decoder.
-  //
-  // The sink must match the given method.
+  /* Constructs a decoder instance for the given method, which must outlive this
+   * decoder.  Any errors during parsing will be set on the given status, which
+   * must also outlive this decoder.
+   *
+   * The sink must match the given method. */
   static Decoder* Create(Environment* env, const DecoderMethod* method,
                          Sink* output);
 
-  // Returns the DecoderMethod this decoder is parsing from.
+  /* Returns the DecoderMethod this decoder is parsing from. */
   const DecoderMethod* method() const;
 
-  // The sink on which this decoder receives input.
+  /* The sink on which this decoder receives input. */
   BytesSink* input();
 
-  // Returns number of bytes successfully parsed.
-  //
-  // This can be useful for determining the stream position where an error
-  // occurred.
-  //
-  // This value may not be up-to-date when called from inside a parsing
-  // callback.
+  /* Returns number of bytes successfully parsed.
+   *
+   * This can be useful for determining the stream position where an error
+   * occurred.
+   *
+   * This value may not be up-to-date when called from inside a parsing
+   * callback. */
   uint64_t BytesParsed() const;
 
-  // Gets/sets the parsing nexting limit.  If the total number of nested
-  // submessages and repeated fields hits this limit, parsing will fail.  This
-  // is a resource limit that controls the amount of memory used by the parsing
-  // stack.
-  //
-  // Setting the limit will fail if the parser is currently suspended at a depth
-  // greater than this, or if memory allocation of the stack fails.
+  /* Gets/sets the parsing nexting limit.  If the total number of nested
+   * submessages and repeated fields hits this limit, parsing will fail.  This
+   * is a resource limit that controls the amount of memory used by the parsing
+   * stack.
+   *
+   * Setting the limit will fail if the parser is currently suspended at a depth
+   * greater than this, or if memory allocation of the stack fails. */
   size_t max_nesting() const;
   bool set_max_nesting(size_t max);
 
@@ -164,57 +141,62 @@ class upb::pb::Decoder {
   static const size_t kSize = UPB_PB_DECODER_SIZE;
 
  private:
-  UPB_DISALLOW_POD_OPS(Decoder, upb::pb::Decoder);
+  UPB_DISALLOW_POD_OPS(Decoder, upb::pb::Decoder)
 };
 
-#endif  // __cplusplus
+#endif  /* __cplusplus */
 
-// A class for caching protobuf processing code, whether bytecode for the
-// interpreted decoder or machine code for the JIT.
-//
-// This class is not thread-safe.
-UPB_DEFINE_CLASS0(upb::pb::CodeCache,
+#ifdef __cplusplus
+
+/* A class for caching protobuf processing code, whether bytecode for the
+ * interpreted decoder or machine code for the JIT.
+ *
+ * This class is not thread-safe.
+ *
+ * TODO(haberman): move this to be heap allocated for ABI stability. */
+class upb::pb::CodeCache {
  public:
   CodeCache();
   ~CodeCache();
 
-  // Whether the cache is allowed to generate machine code.  Defaults to true.
-  // There is no real reason to turn it off except for testing or if you are
-  // having a specific problem with the JIT.
-  //
-  // Note that allow_jit = true does not *guarantee* that the code will be JIT
-  // compiled.  If this platform is not supported or the JIT was not compiled
-  // in, the code may still be interpreted.
+  /* Whether the cache is allowed to generate machine code.  Defaults to true.
+   * There is no real reason to turn it off except for testing or if you are
+   * having a specific problem with the JIT.
+   *
+   * Note that allow_jit = true does not *guarantee* that the code will be JIT
+   * compiled.  If this platform is not supported or the JIT was not compiled
+   * in, the code may still be interpreted. */
   bool allow_jit() const;
 
-  // This may only be called when the object is first constructed, and prior to
-  // any code generation, otherwise returns false and does nothing.
+  /* This may only be called when the object is first constructed, and prior to
+   * any code generation, otherwise returns false and does nothing. */
   bool set_allow_jit(bool allow);
 
-  // Returns a DecoderMethod that can push data to the given handlers.
-  // If a suitable method already exists, it will be returned from the cache.
-  //
-  // Specifying the destination handlers here allows the DecoderMethod to be
-  // statically bound to the destination handlers if possible, which can allow
-  // more efficient decoding.  However the returned method may or may not
-  // actually be statically bound.  But in all cases, the returned method can
-  // push data to the given handlers.
+  /* Returns a DecoderMethod that can push data to the given handlers.
+   * If a suitable method already exists, it will be returned from the cache.
+   *
+   * Specifying the destination handlers here allows the DecoderMethod to be
+   * statically bound to the destination handlers if possible, which can allow
+   * more efficient decoding.  However the returned method may or may not
+   * actually be statically bound.  But in all cases, the returned method can
+   * push data to the given handlers. */
   const DecoderMethod *GetDecoderMethod(const DecoderMethodOptions& opts);
 
-  // If/when someone needs to explicitly create a dynamically-bound
-  // DecoderMethod*, we can add a method to get it here.
+  /* If/when someone needs to explicitly create a dynamically-bound
+   * DecoderMethod*, we can add a method to get it here. */
 
  private:
-  UPB_DISALLOW_COPY_AND_ASSIGN(CodeCache);
-,
-UPB_DEFINE_STRUCT0(upb_pbcodecache,
+  UPB_DISALLOW_COPY_AND_ASSIGN(CodeCache)
+#else
+struct upb_pbcodecache {
+#endif
   bool allow_jit_;
 
-  // Array of mgroups.
+  /* Array of mgroups. */
   upb_inttable groups;
-));
+};
 
-UPB_BEGIN_EXTERN_C  // {
+UPB_BEGIN_EXTERN_C
 
 upb_pbdecoder *upb_pbdecoder_create(upb_env *e,
                                     const upb_pbdecodermethod *method,
@@ -230,12 +212,10 @@ void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
                                   const upb_handlers *h);
 void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy);
 
-void upb_pbdecodermethod_ref(const upb_pbdecodermethod *m, const void *owner);
-void upb_pbdecodermethod_unref(const upb_pbdecodermethod *m, const void *owner);
-void upb_pbdecodermethod_donateref(const upb_pbdecodermethod *m,
-                                   const void *from, const void *to);
-void upb_pbdecodermethod_checkref(const upb_pbdecodermethod *m,
-                                  const void *owner);
+
+/* Include refcounted methods like upb_pbdecodermethod_ref(). */
+UPB_REFCOUNTED_CMETHODS(upb_pbdecodermethod, upb_pbdecodermethod_upcast)
+
 const upb_handlers *upb_pbdecodermethod_desthandlers(
     const upb_pbdecodermethod *m);
 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
@@ -251,7 +231,7 @@ bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow);
 const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
     upb_pbcodecache *c, const upb_pbdecodermethodopts *opts);
 
-UPB_END_EXTERN_C  // }
+UPB_END_EXTERN_C
 
 #ifdef __cplusplus
 
@@ -259,7 +239,7 @@ namespace upb {
 
 namespace pb {
 
-// static
+/* static */
 inline Decoder* Decoder::Create(Environment* env, const DecoderMethod* m,
                                 Sink* sink) {
   return upb_pbdecoder_create(env, m, sink);
@@ -288,18 +268,6 @@ inline void DecoderMethodOptions::set_lazy(bool lazy) {
   upb_pbdecodermethodopts_setlazy(this, lazy);
 }
 
-inline void DecoderMethod::Ref(const void *owner) const {
-  upb_pbdecodermethod_ref(this, owner);
-}
-inline void DecoderMethod::Unref(const void *owner) const {
-  upb_pbdecodermethod_unref(this, owner);
-}
-inline void DecoderMethod::DonateRef(const void *from, const void *to) const {
-  upb_pbdecodermethod_donateref(this, from, to);
-}
-inline void DecoderMethod::CheckRef(const void *owner) const {
-  upb_pbdecodermethod_checkref(this, owner);
-}
 inline const Handlers* DecoderMethod::dest_handlers() const {
   return upb_pbdecodermethod_desthandlers(this);
 }
@@ -309,7 +277,7 @@ inline const BytesHandler* DecoderMethod::input_handler() const {
 inline bool DecoderMethod::is_native() const {
   return upb_pbdecodermethod_isnative(this);
 }
-// static
+/* static */
 inline reffed_ptr<const DecoderMethod> DecoderMethod::New(
     const DecoderMethodOptions &opts) {
   const upb_pbdecodermethod *m = upb_pbdecodermethod_new(&opts, &m);
@@ -333,9 +301,9 @@ inline const DecoderMethod *CodeCache::GetDecoderMethod(
   return upb_pbcodecache_getdecodermethod(this, &opts);
 }
 
-}  // namespace pb
-}  // namespace upb
+}  /* namespace pb */
+}  /* namespace upb */
 
-#endif  // __cplusplus
+#endif  /* __cplusplus */
 
 #endif  /* UPB_DECODER_H_ */
diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h
index 5522be7..ba18771 100644
--- a/upb/pb/decoder.int.h
+++ b/upb/pb/decoder.int.h
@@ -15,28 +15,40 @@
 #include "upb/handlers.h"
 #include "upb/pb/decoder.h"
 #include "upb/sink.h"
+#include "upb/structdefs.int.h"
 #include "upb/table.int.h"
 
-// Opcode definitions.  The canonical meaning of each opcode is its
-// implementation in the interpreter (the JIT is written to match this).
-//
-// All instructions have the opcode in the low byte.
-// Instruction format for most instructions is:
-//
-// +-------------------+--------+
-// |     arg (24)      | op (8) |
-// +-------------------+--------+
-//
-// Exceptions are indicated below.  A few opcodes are multi-word.
+/* C++ names are not actually used since this type isn't exposed to users. */
+#ifdef __cplusplus
+namespace upb {
+namespace pb {
+class MessageGroup;
+}  /* namespace pb */
+}  /* namespace upb */
+#endif
+UPB_DECLARE_DERIVED_TYPE(upb::pb::MessageGroup, upb::RefCounted,
+                         mgroup, upb_refcounted)
+
+/* Opcode definitions.  The canonical meaning of each opcode is its
+ * implementation in the interpreter (the JIT is written to match this).
+ *
+ * All instructions have the opcode in the low byte.
+ * Instruction format for most instructions is:
+ *
+ * +-------------------+--------+
+ * |     arg (24)      | op (8) |
+ * +-------------------+--------+
+ *
+ * Exceptions are indicated below.  A few opcodes are multi-word. */
 typedef enum {
-  // Opcodes 1-8, 13, 15-18 parse their respective descriptor types.
-  // Arg for all of these is the upb selector for this field.
+  /* Opcodes 1-8, 13, 15-18 parse their respective descriptor types.
+   * Arg for all of these is the upb selector for this field. */
 #define T(type) OP_PARSE_ ## type = UPB_DESCRIPTOR_TYPE_ ## type
   T(DOUBLE), T(FLOAT), T(INT64), T(UINT64), T(INT32), T(FIXED64), T(FIXED32),
   T(BOOL), T(UINT32), T(SFIXED32), T(SFIXED64), T(SINT32), T(SINT64),
 #undef T
-  OP_STARTMSG       = 9,   // No arg.
-  OP_ENDMSG         = 10,  // No arg.
+  OP_STARTMSG       = 9,   /* No arg. */
+  OP_ENDMSG         = 10,  /* No arg. */
   OP_STARTSEQ       = 11,
   OP_ENDSEQ         = 12,
   OP_STARTSUBMSG    = 14,
@@ -45,148 +57,185 @@ typedef enum {
   OP_STRING         = 21,
   OP_ENDSTR         = 22,
 
-  OP_PUSHTAGDELIM   = 23,  // No arg.
-  OP_PUSHLENDELIM   = 24,  // No arg.
-  OP_POP            = 25,  // No arg.
-  OP_SETDELIM       = 26,  // No arg.
-  OP_SETBIGGROUPNUM = 27,  // two words: | unused (24) | opc || groupnum (32) |
+  OP_PUSHTAGDELIM   = 23,  /* No arg. */
+  OP_PUSHLENDELIM   = 24,  /* No arg. */
+  OP_POP            = 25,  /* No arg. */
+  OP_SETDELIM       = 26,  /* No arg. */
+  OP_SETBIGGROUPNUM = 27,  /* two words:
+                            *   | unused (24)     | opc (8) |
+                            *   |        groupnum (32)      | */
   OP_CHECKDELIM     = 28,
   OP_CALL           = 29,
   OP_RET            = 30,
   OP_BRANCH         = 31,
 
-  // Different opcodes depending on how many bytes expected.
-  OP_TAG1           = 32,  // | expected tag (16) | jump target (8) | opc (8) |
-  OP_TAG2           = 33,  // | expected tag (16) | jump target (8) | opc (8) |
-  OP_TAGN           = 34,  // three words:
-                           //   | unused (16) | jump target(8) | opc (8) |
-                           //   |           expected tag 1 (32)          |
-                           //   |           expected tag 2 (32)          |
+  /* Different opcodes depending on how many bytes expected. */
+  OP_TAG1           = 32,  /* | match tag (16) | jump target (8) | opc (8) | */
+  OP_TAG2           = 33,  /* | match tag (16) | jump target (8) | opc (8) | */
+  OP_TAGN           = 34,  /* three words: */
+                           /*   | unused (16) | jump target(8) | opc (8) | */
+                           /*   |           match tag 1 (32)             | */
+                           /*   |           match tag 2 (32)             | */
 
-  OP_SETDISPATCH    = 35,  // N words:
-                           //   | unused (24)         | opc |
-                           //   | upb_inttable* (32 or 64)  |
+  OP_SETDISPATCH    = 35,  /* N words: */
+                           /*   | unused (24)         | opc | */
+                           /*   | upb_inttable* (32 or 64)  | */
 
-  OP_DISPATCH       = 36,  // No arg.
+  OP_DISPATCH       = 36,  /* No arg. */
 
-  OP_HALT           = 37,  // No arg.
+  OP_HALT           = 37   /* No arg. */
 } opcode;
 
 #define OP_MAX OP_HALT
 
 UPB_INLINE opcode getop(uint32_t instr) { return instr & 0xff; }
 
-// Method group; represents a set of decoder methods that had their code
-// emitted together, and must therefore be freed together.  Immutable once
-// created.  It is possible we may want to expose this to users at some point.
-//
-// Overall ownership of Decoder objects looks like this:
-//
-//                +----------+
-//                |          | <---> DecoderMethod
-//                | method   |
-// CodeCache ---> |  group   | <---> DecoderMethod
-//                |          |
-//                | (mgroup) | <---> DecoderMethod
-//                +----------+
-typedef struct {
+/* Method group; represents a set of decoder methods that had their code
+ * emitted together, and must therefore be freed together.  Immutable once
+ * created.  It is possible we may want to expose this to users at some point.
+ *
+ * Overall ownership of Decoder objects looks like this:
+ *
+ *                +----------+
+ *                |          | <---> DecoderMethod
+ *                | method   |
+ * CodeCache ---> |  group   | <---> DecoderMethod
+ *                |          |
+ *                | (mgroup) | <---> DecoderMethod
+ *                +----------+
+ */
+struct mgroup {
   upb_refcounted base;
 
-  // Maps upb_msgdef/upb_handlers -> upb_pbdecodermethod.  We own refs on the
-  // methods.
+  /* Maps upb_msgdef/upb_handlers -> upb_pbdecodermethod.  We own refs on the
+   * methods. */
   upb_inttable methods;
 
-  // When we add the ability to link to previously existing mgroups, we'll
-  // need an array of mgroups we reference here, and own refs on them.
+  /* When we add the ability to link to previously existing mgroups, we'll
+   * need an array of mgroups we reference here, and own refs on them. */
 
-  // The bytecode for our methods, if any exists.  Owned by us.
+  /* The bytecode for our methods, if any exists.  Owned by us. */
   uint32_t *bytecode;
   uint32_t *bytecode_end;
 
 #ifdef UPB_USE_JIT_X64
-  // JIT-generated machine code, if any.
+  /* JIT-generated machine code, if any. */
   upb_string_handlerfunc *jit_code;
-  // The size of the jit_code (required to munmap()).
+  /* The size of the jit_code (required to munmap()). */
   size_t jit_size;
   char *debug_info;
   void *dl;
 #endif
-} mgroup;
-
-// The maximum that any submessages can be nested.  Matches proto2's limit.
-// This specifies the size of the decoder's statically-sized array and therefore
-// setting it high will cause the upb::pb::Decoder object to be larger.
-//
-// If necessary we can add a runtime-settable property to Decoder that allow
-// this to be larger than the compile-time setting, but this would add
-// complexity, particularly since we would have to decide how/if to give users
-// the ability to set a custom memory allocation function.
+};
+
+/* The maximum that any submessages can be nested.  Matches proto2's limit.
+ * This specifies the size of the decoder's statically-sized array and therefore
+ * setting it high will cause the upb::pb::Decoder object to be larger.
+ *
+ * If necessary we can add a runtime-settable property to Decoder that allow
+ * this to be larger than the compile-time setting, but this would add
+ * complexity, particularly since we would have to decide how/if to give users
+ * the ability to set a custom memory allocation function. */
 #define UPB_DECODER_MAX_NESTING 64
 
-// Internal-only struct used by the decoder.
+/* Internal-only struct used by the decoder. */
 typedef struct {
-  // Space optimization note: we store two pointers here that the JIT
-  // doesn't need at all; the upb_handlers* inside the sink and
-  // the dispatch table pointer.  We can optimze so that the JIT uses
-  // smaller stack frames than the interpreter.  The only thing we need
-  // to guarantee is that the fallback routines can find end_ofs.
+  /* Space optimization note: we store two pointers here that the JIT
+   * doesn't need at all; the upb_handlers* inside the sink and
+   * the dispatch table pointer.  We can optimze so that the JIT uses
+   * smaller stack frames than the interpreter.  The only thing we need
+   * to guarantee is that the fallback routines can find end_ofs. */
   upb_sink sink;
 
-  // The absolute stream offset of the end-of-frame delimiter.
-  // Non-delimited frames (groups and non-packed repeated fields) reuse the
-  // delimiter of their parent, even though the frame may not end there.
-  //
-  // NOTE: the JIT stores a slightly different value here for non-top frames.
-  // It stores the value relative to the end of the enclosed message.  But the
-  // top frame is still stored the same way, which is important for ensuring
-  // that calls from the JIT into C work correctly.
+  /* The absolute stream offset of the end-of-frame delimiter.
+   * Non-delimited frames (groups and non-packed repeated fields) reuse the
+   * delimiter of their parent, even though the frame may not end there.
+   *
+   * NOTE: the JIT stores a slightly different value here for non-top frames.
+   * It stores the value relative to the end of the enclosed message.  But the
+   * top frame is still stored the same way, which is important for ensuring
+   * that calls from the JIT into C work correctly. */
   uint64_t end_ofs;
   const uint32_t *base;
 
-  // 0 indicates a length-delimited field.
-  // A positive number indicates a known group.
-  // A negative number indicates an unknown group.
+  /* 0 indicates a length-delimited field.
+   * A positive number indicates a known group.
+   * A negative number indicates an unknown group. */
   int32_t groupnum;
-  upb_inttable *dispatch;  // Not used by the JIT.
+  upb_inttable *dispatch;  /* Not used by the JIT. */
 } upb_pbdecoder_frame;
 
+struct upb_pbdecodermethod {
+  upb_refcounted base;
+
+  /* While compiling, the base is relative in "ofs", after compiling it is
+   * absolute in "ptr". */
+  union {
+    uint32_t ofs;     /* PC offset of method. */
+    void *ptr;        /* Pointer to bytecode or machine code for this method. */
+  } code_base;
+
+  /* The decoder method group to which this method belongs.  We own a ref.
+   * Owning a ref on the entire group is more coarse-grained than is strictly
+   * necessary; all we truly require is that methods we directly reference
+   * outlive us, while the group could contain many other messages we don't
+   * require.  But the group represents the messages that were
+   * allocated+compiled together, so it makes the most sense to free them
+   * together also. */
+  const upb_refcounted *group;
+
+  /* Whether this method is native code or bytecode. */
+  bool is_native_;
+
+  /* The handler one calls to invoke this method. */
+  upb_byteshandler input_handler_;
+
+  /* The destination handlers this method is bound to.  We own a ref. */
+  const upb_handlers *dest_handlers_;
+
+  /* Dispatch table -- used by both bytecode decoder and JIT when encountering a
+   * field number that wasn't the one we were expecting to see.  See
+   * decoder.int.h for the layout of this table. */
+  upb_inttable dispatch;
+};
+
 struct upb_pbdecoder {
   upb_env *env;
 
-  // Our input sink.
+  /* Our input sink. */
   upb_bytessink input_;
 
-  // The decoder method we are parsing with (owned).
+  /* The decoder method we are parsing with (owned). */
   const upb_pbdecodermethod *method_;
 
   size_t call_len;
   const uint32_t *pc, *last;
 
-  // Current input buffer and its stream offset.
+  /* Current input buffer and its stream offset. */
   const char *buf, *ptr, *end, *checkpoint;
 
-  // End of the delimited region, relative to ptr, or NULL if not in this buf.
+  /* End of the delimited region, relative to ptr, NULL if not in this buf. */
   const char *delim_end;
 
-  // End of the delimited region, relative to ptr, or end if not in this buf.
+  /* End of the delimited region, relative to ptr, end if not in this buf. */
   const char *data_end;
 
-  // Overall stream offset of "buf."
+  /* Overall stream offset of "buf." */
   uint64_t bufstart_ofs;
 
-  // Buffer for residual bytes not parsed from the previous buffer.
-  // The maximum number of residual bytes we require is 12; a five-byte
-  // unknown tag plus an eight-byte value, less one because the value
-  // is only a partial value.
+  /* Buffer for residual bytes not parsed from the previous buffer.
+   * The maximum number of residual bytes we require is 12; a five-byte
+   * unknown tag plus an eight-byte value, less one because the value
+   * is only a partial value. */
   char residual[12];
   char *residual_end;
 
-  // Stores the user buffer passed to our decode function.
+  /* Stores the user buffer passed to our decode function. */
   const char *buf_param;
   size_t size_param;
   const upb_bufhandle *handle;
 
-  // Our internal stack.
+  /* Our internal stack. */
   upb_pbdecoder_frame *stack, *top, *limit;
   const uint32_t **callstack;
   size_t stack_size;
@@ -194,22 +243,22 @@ struct upb_pbdecoder {
   upb_status *status;
 
 #ifdef UPB_USE_JIT_X64
-  // Used momentarily by the generated code to store a value while a user
-  // function is called.
+  /* Used momentarily by the generated code to store a value while a user
+   * function is called. */
   uint32_t tmp_len;
 
   const void *saved_rsp;
 #endif
 };
 
-// Decoder entry points; used as handlers.
+/* Decoder entry points; used as handlers. */
 void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint);
 void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint);
 size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
                             size_t size, const upb_bufhandle *handle);
 bool upb_pbdecoder_end(void *closure, const void *handler_data);
 
-// Decoder-internal functions that the JIT calls to handle fallback paths.
+/* Decoder-internal functions that the JIT calls to handle fallback paths. */
 int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
                              size_t size, const upb_bufhandle *handle);
 size_t upb_pbdecoder_suspend(upb_pbdecoder *d);
@@ -221,41 +270,42 @@ int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32);
 int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64);
 void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg);
 
-// Error messages that are shared between the bytecode and JIT decoders.
+/* Error messages that are shared between the bytecode and JIT decoders. */
 extern const char *kPbDecoderStackOverflow;
 
-// Access to decoderplan members needed by the decoder.
+/* Access to decoderplan members needed by the decoder. */
 const char *upb_pbdecoder_getopname(unsigned int op);
 
-// JIT codegen entry point.
+/* JIT codegen entry point. */
 void upb_pbdecoder_jit(mgroup *group);
 void upb_pbdecoder_freejit(mgroup *group);
+UPB_REFCOUNTED_CMETHODS(mgroup, mgroup_upcast)
 
-// A special label that means "do field dispatch for this message and branch to
-// wherever that takes you."
+/* A special label that means "do field dispatch for this message and branch to
+ * wherever that takes you." */
 #define LABEL_DISPATCH 0
 
-// A special slot in the dispatch table that stores the epilogue (ENDMSG and/or
-// RET) for branching to when we find an appropriate ENDGROUP tag.
+/* A special slot in the dispatch table that stores the epilogue (ENDMSG and/or
+ * RET) for branching to when we find an appropriate ENDGROUP tag. */
 #define DISPATCH_ENDMSG 0
 
-// It's important to use this invalid wire type instead of 0 (which is a valid
-// wire type).
+/* It's important to use this invalid wire type instead of 0 (which is a valid
+ * wire type). */
 #define NO_WIRE_TYPE 0xff
 
-// The dispatch table layout is:
-//   [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ]
-//
-// If wt1 matches, jump to the 48-bit offset.  If wt2 matches, lookup
-// (UPB_MAX_FIELDNUMBER + fieldnum) and jump there.
-//
-// We need two wire types because of packed/non-packed compatibility.  A
-// primitive repeated field can use either wire type and be valid.  While we
-// could key the table on fieldnum+wiretype, the table would be 8x sparser.
-//
-// Storing two wire types in the primary value allows us to quickly rule out
-// the second wire type without needing to do a separate lookup (this case is
-// less common than an unknown field).
+/* The dispatch table layout is:
+ *   [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ]
+ *
+ * If wt1 matches, jump to the 48-bit offset.  If wt2 matches, lookup
+ * (UPB_MAX_FIELDNUMBER + fieldnum) and jump there.
+ *
+ * We need two wire types because of packed/non-packed compatibility.  A
+ * primitive repeated field can use either wire type and be valid.  While we
+ * could key the table on fieldnum+wiretype, the table would be 8x sparser.
+ *
+ * Storing two wire types in the primary value allows us to quickly rule out
+ * the second wire type without needing to do a separate lookup (this case is
+ * less common than an unknown field). */
 UPB_INLINE uint64_t upb_pbdecoder_packdispatch(uint64_t ofs, uint8_t wt1,
                                                uint8_t wt2) {
   return (ofs << 16) | (wt2 << 8) | wt1;
@@ -268,17 +318,17 @@ UPB_INLINE void upb_pbdecoder_unpackdispatch(uint64_t dispatch, uint64_t *ofs,
   *ofs = dispatch >> 16;
 }
 
-// All of the functions in decoder.c that return int32_t return values according
-// to the following scheme:
-//   1. negative values indicate a return code from the following list.
-//   2. positive values indicate that error or end of buffer was hit, and
-//      that the decode function should immediately return the given value
-//      (the decoder state has already been suspended and is ready to be
-//      resumed).
+/* All of the functions in decoder.c that return int32_t return values according
+ * to the following scheme:
+ *   1. negative values indicate a return code from the following list.
+ *   2. positive values indicate that error or end of buffer was hit, and
+ *      that the decode function should immediately return the given value
+ *      (the decoder state has already been suspended and is ready to be
+ *      resumed). */
 #define DECODE_OK -1
-#define DECODE_MISMATCH -2  // Used only from checktag_slow().
-#define DECODE_ENDGROUP -3  // Used only from checkunknown().
+#define DECODE_MISMATCH -2  /* Used only from checktag_slow(). */
+#define DECODE_ENDGROUP -3  /* Used only from checkunknown(). */
 
 #define CHECK_RETURN(x) { int32_t ret = x; if (ret >= 0) return ret; }
 
-#endif  // UPB_DECODER_INT_H_
+#endif  /* UPB_DECODER_INT_H_ */
diff --git a/upb/pb/encoder.c b/upb/pb/encoder.c
index ca5ebc1..e704bbd 100644
--- a/upb/pb/encoder.c
+++ b/upb/pb/encoder.c
@@ -62,73 +62,74 @@
 
 #include <stdlib.h>
 
-// The output buffer is divided into segments; a segment is a string of data
-// that is "ready to go" -- it does not need any varint lengths inserted into
-// the middle.  The seams between segments are where varints will be inserted
-// once they are known.
-//
-// We also use the concept of a "run", which is a range of encoded bytes that
-// occur at a single submessage level.  Every segment contains one or more runs.
-//
-// A segment can span messages.  Consider:
-//
-//                  .--Submessage lengths---------.
-//                  |       |                     |
-//                  |       V                     V
-//                  V      | |---------------    | |-----------------
-// Submessages:    | |-----------------------------------------------
-// Top-level msg: ------------------------------------------------------------
-//
-// Segments:          -----   -------------------   -----------------
-// Runs:              *----   *--------------*---   *----------------
-// (* marks the start)
-//
-// Note that the top-level menssage is not in any segment because it does not
-// have any length preceding it.
-//
-// A segment is only interrupted when another length needs to be inserted.  So
-// observe how the second segment spans both the inner submessage and part of
-// the next enclosing message.
+/* The output buffer is divided into segments; a segment is a string of data
+ * that is "ready to go" -- it does not need any varint lengths inserted into
+ * the middle.  The seams between segments are where varints will be inserted
+ * once they are known.
+ *
+ * We also use the concept of a "run", which is a range of encoded bytes that
+ * occur at a single submessage level.  Every segment contains one or more runs.
+ *
+ * A segment can span messages.  Consider:
+ *
+ *                  .--Submessage lengths---------.
+ *                  |       |                     |
+ *                  |       V                     V
+ *                  V      | |---------------    | |-----------------
+ * Submessages:    | |-----------------------------------------------
+ * Top-level msg: ------------------------------------------------------------
+ *
+ * Segments:          -----   -------------------   -----------------
+ * Runs:              *----   *--------------*---   *----------------
+ * (* marks the start)
+ *
+ * Note that the top-level menssage is not in any segment because it does not
+ * have any length preceding it.
+ *
+ * A segment is only interrupted when another length needs to be inserted.  So
+ * observe how the second segment spans both the inner submessage and part of
+ * the next enclosing message. */
 typedef struct {
-  uint32_t msglen;  // The length to varint-encode before this segment.
-  uint32_t seglen;  // Length of the segment.
+  uint32_t msglen;  /* The length to varint-encode before this segment. */
+  uint32_t seglen;  /* Length of the segment. */
 } upb_pb_encoder_segment;
 
 struct upb_pb_encoder {
   upb_env *env;
 
-  // Our input and output.
+  /* Our input and output. */
   upb_sink input_;
   upb_bytessink *output_;
 
-  // The "subclosure" -- used as the inner closure as part of the bytessink
-  // protocol.
+  /* The "subclosure" -- used as the inner closure as part of the bytessink
+   * protocol. */
   void *subc;
 
-  // The output buffer and limit, and our current write position.  "buf"
-  // initially points to "initbuf", but is dynamically allocated if we need to
-  // grow beyond the initial size.
+  /* The output buffer and limit, and our current write position.  "buf"
+   * initially points to "initbuf", but is dynamically allocated if we need to
+   * grow beyond the initial size. */
   char *buf, *ptr, *limit;
 
-  // The beginning of the current run, or undefined if we are at the top level.
+  /* The beginning of the current run, or undefined if we are at the top
+   * level. */
   char *runbegin;
 
-  // The list of segments we are accumulating.
+  /* The list of segments we are accumulating. */
   upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
 
-  // The stack of enclosing submessages.  Each entry in the stack points to the
-  // segment where this submessage's length is being accumulated.
+  /* The stack of enclosing submessages.  Each entry in the stack points to the
+   * segment where this submessage's length is being accumulated. */
   int *stack, *top, *stacklimit;
 
-  // Depth of startmsg/endmsg calls.
+  /* Depth of startmsg/endmsg calls. */
   int depth;
 };
 
 /* low-level buffering ********************************************************/
 
-// Low-level functions for interacting with the output buffer.
+/* Low-level functions for interacting with the output buffer. */
 
-// TODO(haberman): handle pushback
+/* TODO(haberman): handle pushback */
 static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
   size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
   UPB_ASSERT_VAR(n, n == len);
@@ -138,11 +139,12 @@ static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
   return &e->segbuf[*e->top];
 }
 
-// Call to ensure that at least "bytes" bytes are available for writing at
-// e->ptr.  Returns false if the bytes could not be allocated.
+/* Call to ensure that at least "bytes" bytes are available for writing at
+ * e->ptr.  Returns false if the bytes could not be allocated. */
 static bool reserve(upb_pb_encoder *e, size_t bytes) {
   if ((size_t)(e->limit - e->ptr) < bytes) {
-    // Grow buffer.
+    /* Grow buffer. */
+    char *new_buf;
     size_t needed = bytes + (e->ptr - e->buf);
     size_t old_size = e->limit - e->buf;
 
@@ -152,7 +154,7 @@ static bool reserve(upb_pb_encoder *e, size_t bytes) {
       new_size *= 2;
     }
 
-    char *new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
+    new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
 
     if (new_buf == NULL) {
       return false;
@@ -167,22 +169,22 @@ static bool reserve(upb_pb_encoder *e, size_t bytes) {
   return true;
 }
 
-// Call when "bytes" bytes have been writte at e->ptr.  The caller *must* have
-// previously called reserve() with at least this many bytes.
+/* Call when "bytes" bytes have been writte at e->ptr.  The caller *must* have
+ * previously called reserve() with at least this many bytes. */
 static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
   assert((size_t)(e->limit - e->ptr) >= bytes);
   e->ptr += bytes;
 }
 
-// Call when all of the bytes for a handler have been written.  Flushes the
-// bytes if possible and necessary, returning false if this failed.
+/* Call when all of the bytes for a handler have been written.  Flushes the
+ * bytes if possible and necessary, returning false if this failed. */
 static bool commit(upb_pb_encoder *e) {
   if (!e->top) {
-    // We aren't inside a delimited region.  Flush our accumulated bytes to
-    // the output.
-    //
-    // TODO(haberman): in the future we may want to delay flushing for
-    // efficiency reasons.
+    /* We aren't inside a delimited region.  Flush our accumulated bytes to
+     * the output.
+     *
+     * TODO(haberman): in the future we may want to delay flushing for
+     * efficiency reasons. */
     putbuf(e, e->buf, e->ptr - e->buf);
     e->ptr = e->buf;
   }
@@ -190,7 +192,7 @@ static bool commit(upb_pb_encoder *e) {
   return true;
 }
 
-// Writes the given bytes to the buffer, handling reserve/advance.
+/* Writes the given bytes to the buffer, handling reserve/advance. */
 static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
   if (!reserve(e, len)) {
     return false;
@@ -201,32 +203,33 @@ static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
   return true;
 }
 
-// Finish the current run by adding the run totals to the segment and message
-// length.
+/* Finish the current run by adding the run totals to the segment and message
+ * length. */
 static void accumulate(upb_pb_encoder *e) {
+  size_t run_len;
   assert(e->ptr >= e->runbegin);
-  size_t run_len = e->ptr - e->runbegin;
+  run_len = e->ptr - e->runbegin;
   e->segptr->seglen += run_len;
   top(e)->msglen += run_len;
   e->runbegin = e->ptr;
 }
 
-// Call to indicate the start of delimited region for which the full length is
-// not yet known.  All data will be buffered until the length is known.
-// Delimited regions may be nested; their lengths will all be tracked properly.
+/* Call to indicate the start of delimited region for which the full length is
+ * not yet known.  All data will be buffered until the length is known.
+ * Delimited regions may be nested; their lengths will all be tracked properly. */
 static bool start_delim(upb_pb_encoder *e) {
   if (e->top) {
-    // We are already buffering, advance to the next segment and push it on the
-    // stack.
+    /* We are already buffering, advance to the next segment and push it on the
+     * stack. */
     accumulate(e);
 
     if (++e->top == e->stacklimit) {
-      // TODO(haberman): grow stack?
+      /* TODO(haberman): grow stack? */
       return false;
     }
 
     if (++e->segptr == e->seglimit) {
-      // Grow segment buffer.
+      /* Grow segment buffer. */
       size_t old_size =
           (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
       size_t new_size = old_size * 2;
@@ -242,7 +245,7 @@ static bool start_delim(upb_pb_encoder *e) {
       e->segbuf = new_buf;
     }
   } else {
-    // We were previously at the top level, start buffering.
+    /* We were previously at the top level, start buffering. */
     e->segptr = e->segbuf;
     e->top = e->stack;
     e->runbegin = e->ptr;
@@ -255,15 +258,16 @@ static bool start_delim(upb_pb_encoder *e) {
   return true;
 }
 
-// Call to indicate the end of a delimited region.  We now know the length of
-// the delimited region.  If we are not nested inside any other delimited
-// regions, we can now emit all of the buffered data we accumulated.
+/* Call to indicate the end of a delimited region.  We now know the length of
+ * the delimited region.  If we are not nested inside any other delimited
+ * regions, we can now emit all of the buffered data we accumulated. */
 static bool end_delim(upb_pb_encoder *e) {
+  size_t msglen;
   accumulate(e);
-  size_t msglen = top(e)->msglen;
+  msglen = top(e)->msglen;
 
   if (e->top == e->stack) {
-    // All lengths are now available, emit all buffered data.
+    /* All lengths are now available, emit all buffered data. */
     char buf[UPB_PB_VARINT_MAX_LEN];
     upb_pb_encoder_segment *s;
     const char *ptr = e->buf;
@@ -277,7 +281,8 @@ static bool end_delim(upb_pb_encoder *e) {
     e->ptr = e->buf;
     e->top = NULL;
   } else {
-    // Need to keep buffering; propagate length info into enclosing submessages.
+    /* Need to keep buffering; propagate length info into enclosing
+     * submessages. */
     --e->top;
     top(e)->msglen += msglen + upb_varint_size(msglen);
   }
@@ -288,14 +293,14 @@ static bool end_delim(upb_pb_encoder *e) {
 
 /* tag_t **********************************************************************/
 
-// A precomputed (pre-encoded) tag and length.
+/* A precomputed (pre-encoded) tag and length. */
 
 typedef struct {
   uint8_t bytes;
   char tag[7];
 } tag_t;
 
-// Allocates a new tag for this field, and sets it in these handlerattr.
+/* Allocates a new tag for this field, and sets it in these handlerattr. */
 static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
                     upb_handlerattr *attr) {
   uint32_t n = upb_fielddef_number(f);
@@ -316,12 +321,12 @@ static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
 /* encoding of wire types *****************************************************/
 
 static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
-  // TODO(haberman): byte-swap for big endian.
+  /* TODO(haberman): byte-swap for big endian. */
   return encode_bytes(e, &val, sizeof(uint64_t));
 }
 
 static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
-  // TODO(haberman): byte-swap for big endian.
+  /* TODO(haberman): byte-swap for big endian. */
   return encode_bytes(e, &val, sizeof(uint32_t));
 }
 
@@ -408,19 +413,19 @@ static size_t encode_strbuf(void *c, const void *hd, const char *buf,
   }
 
 T(double,   double,   dbl2uint64,   encode_fixed64)
-T(float,    float,    flt2uint32,   encode_fixed32);
-T(int64,    int64_t,  uint64_t,     encode_varint);
-T(int32,    int32_t,  uint32_t,     encode_varint);
-T(fixed64,  uint64_t, uint64_t,     encode_fixed64);
-T(fixed32,  uint32_t, uint32_t,     encode_fixed32);
-T(bool,     bool,     bool,         encode_varint);
-T(uint32,   uint32_t, uint32_t,     encode_varint);
-T(uint64,   uint64_t, uint64_t,     encode_varint);
-T(enum,     int32_t,  uint32_t,     encode_varint);
-T(sfixed32, int32_t,  uint32_t,     encode_fixed32);
-T(sfixed64, int64_t,  uint64_t,     encode_fixed64);
-T(sint32,   int32_t,  upb_zzenc_32, encode_varint);
-T(sint64,   int64_t,  upb_zzenc_64, encode_varint);
+T(float,    float,    flt2uint32,   encode_fixed32)
+T(int64,    int64_t,  uint64_t,     encode_varint)
+T(int32,    int32_t,  uint32_t,     encode_varint)
+T(fixed64,  uint64_t, uint64_t,     encode_fixed64)
+T(fixed32,  uint32_t, uint32_t,     encode_fixed32)
+T(bool,     bool,     bool,         encode_varint)
+T(uint32,   uint32_t, uint32_t,     encode_varint)
+T(uint64,   uint64_t, uint64_t,     encode_varint)
+T(enum,     int32_t,  uint32_t,     encode_varint)
+T(sfixed32, int32_t,  uint32_t,     encode_fixed32)
+T(sfixed64, int64_t,  uint64_t,     encode_fixed64)
+T(sint32,   int32_t,  upb_zzenc_32, encode_varint)
+T(sint64,   int64_t,  upb_zzenc_64, encode_varint)
 
 #undef T
 
@@ -428,13 +433,15 @@ T(sint64,   int64_t,  upb_zzenc_64, encode_varint);
 /* code to build the handlers *************************************************/
 
 static void newhandlers_callback(const void *closure, upb_handlers *h) {
+  const upb_msgdef *m;
+  upb_msg_field_iter i;
+
   UPB_UNUSED(closure);
 
   upb_handlers_setstartmsg(h, startmsg, NULL);
   upb_handlers_setendmsg(h, endmsg, NULL);
 
-  const upb_msgdef *m = upb_handlers_msgdef(h);
-  upb_msg_field_iter i;
+  m = upb_handlers_msgdef(h);
   for(upb_msg_field_begin(&i, m);
       !upb_msg_field_done(&i);
       upb_msg_field_next(&i)) {
@@ -446,7 +453,7 @@ static void newhandlers_callback(const void *closure, upb_handlers *h) {
         packed ? UPB_WIRE_TYPE_DELIMITED
                : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
 
-    // Pre-encode the tag for this field.
+    /* Pre-encode the tag for this field. */
     new_tag(h, f, wt, &attr);
 
     if (packed) {
@@ -489,7 +496,7 @@ static void newhandlers_callback(const void *closure, upb_handlers *h) {
         upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
         break;
       case UPB_DESCRIPTOR_TYPE_GROUP: {
-        // Endgroup takes a different tag (wire_type = END_GROUP).
+        /* Endgroup takes a different tag (wire_type = END_GROUP). */
         upb_handlerattr attr2;
         new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
 
@@ -525,7 +532,7 @@ upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
                                       upb_bytessink *output) {
   const size_t initial_bufsize = 256;
   const size_t initial_segbufsize = 16;
-  // TODO(haberman): make this configurable.
+  /* TODO(haberman): make this configurable. */
   const size_t stack_size = 64;
 #ifndef NDEBUG
   const size_t size_before = upb_env_bytesallocated(env);
@@ -554,7 +561,7 @@ upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
   e->subc = output->closure;
   e->ptr = e->buf;
 
-  // If this fails, increase the value in encoder.h.
+  /* If this fails, increase the value in encoder.h. */
   assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE);
   return e;
 }
diff --git a/upb/pb/encoder.h b/upb/pb/encoder.h
index edff95b..167d33f 100644
--- a/upb/pb/encoder.h
+++ b/upb/pb/encoder.h
@@ -22,35 +22,35 @@
 namespace upb {
 namespace pb {
 class Encoder;
-}  // namespace pb
-}  // namespace upb
+}  /* namespace pb */
+}  /* namespace upb */
 #endif
 
-UPB_DECLARE_TYPE(upb::pb::Encoder, upb_pb_encoder);
+UPB_DECLARE_TYPE(upb::pb::Encoder, upb_pb_encoder)
 
 #define UPB_PBENCODER_MAX_NESTING 100
 
 /* upb::pb::Encoder ***********************************************************/
 
-// Preallocation hint: decoder won't allocate more bytes than this when first
-// constructed.  This hint may be an overestimate for some build configurations.
-// But if the decoder library is upgraded without recompiling the application,
-// it may be an underestimate.
+/* Preallocation hint: decoder won't allocate more bytes than this when first
+ * constructed.  This hint may be an overestimate for some build configurations.
+ * But if the decoder library is upgraded without recompiling the application,
+ * it may be an underestimate. */
 #define UPB_PB_ENCODER_SIZE 768
 
 #ifdef __cplusplus
 
 class upb::pb::Encoder {
  public:
-  // Creates a new encoder in the given environment.  The Handlers must have
-  // come from NewHandlers() below.
+  /* Creates a new encoder in the given environment.  The Handlers must have
+   * come from NewHandlers() below. */
   static Encoder* Create(Environment* env, const Handlers* handlers,
                          BytesSink* output);
 
-  // The input to the encoder.
+  /* The input to the encoder. */
   Sink* input();
 
-  // Creates a new set of handlers for this MessageDef.
+  /* Creates a new set of handlers for this MessageDef. */
   static reffed_ptr<const Handlers> NewHandlers(const MessageDef* msg);
 
   static const size_t kSize = UPB_PB_ENCODER_SIZE;
@@ -87,8 +87,8 @@ inline reffed_ptr<const Handlers> Encoder::NewHandlers(
   const Handlers* h = upb_pb_encoder_newhandlers(md, &h);
   return reffed_ptr<const Handlers>(h, &h);
 }
-}  // namespace pb
-}  // namespace upb
+}  /* namespace pb */
+}  /* namespace upb */
 
 #endif
 
diff --git a/upb/pb/glue.c b/upb/pb/glue.c
index 1259dac..76c8356 100644
--- a/upb/pb/glue.c
+++ b/upb/pb/glue.c
@@ -15,28 +15,31 @@
 
 upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
                                         void *owner, upb_status *status) {
-  // Create handlers.
+  /* Create handlers. */
+  const upb_pbdecodermethod *decoder_m;
   const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h);
+  upb_env env;
   upb_pbdecodermethodopts opts;
+  upb_pbdecoder *decoder;
+  upb_descreader *reader;
+  bool ok;
+  upb_def **ret = NULL;
+  upb_def **defs;
+
   upb_pbdecodermethodopts_init(&opts, reader_h);
-  const upb_pbdecodermethod *decoder_m =
-      upb_pbdecodermethod_new(&opts, &decoder_m);
+  decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m);
 
-  upb_env env;
   upb_env_init(&env);
   upb_env_reporterrorsto(&env, status);
 
-  upb_descreader *reader = upb_descreader_create(&env, reader_h);
-  upb_pbdecoder *decoder =
-      upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
-
-  // Push input data.
-  bool ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder));
+  reader = upb_descreader_create(&env, reader_h);
+  decoder = upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
 
-  upb_def **ret = NULL;
+  /* Push input data. */
+  ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder));
 
   if (!ok) goto cleanup;
-  upb_def **defs = upb_descreader_getdefs(reader, owner, n);
+  defs = upb_descreader_getdefs(reader, owner, n);
   ret = malloc(sizeof(upb_def*) * (*n));
   memcpy(ret, defs, sizeof(upb_def*) * (*n));
 
@@ -50,21 +53,24 @@ cleanup:
 bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len,
                                      upb_status *status) {
   int n;
+  bool success;
   upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, &defs, status);
   if (!defs) return false;
-  bool success = upb_symtab_add(s, defs, n, &defs, status);
+  success = upb_symtab_add(s, defs, n, &defs, status);
   free(defs);
   return success;
 }
 
 char *upb_readfile(const char *filename, size_t *len) {
+  long size;
+  char *buf;
   FILE *f = fopen(filename, "rb");
   if(!f) return NULL;
   if(fseek(f, 0, SEEK_END) != 0) goto error;
-  long size = ftell(f);
+  size = ftell(f);
   if(size < 0) goto error;
   if(fseek(f, 0, SEEK_SET) != 0) goto error;
-  char *buf = malloc(size + 1);
+  buf = malloc(size + 1);
   if(size && fread(buf, size, 1, f) != 1) goto error;
   fclose(f);
   if (len) *len = size;
@@ -78,12 +84,13 @@ error:
 bool upb_load_descriptor_file_into_symtab(upb_symtab *symtab, const char *fname,
                                           upb_status *status) {
   size_t len;
+  bool success;
   char *data = upb_readfile(fname, &len);
   if (!data) {
     if (status) upb_status_seterrf(status, "Couldn't read file: %s", fname);
     return false;
   }
-  bool success = upb_load_descriptor_into_symtab(symtab, data, len, status);
+  success = upb_load_descriptor_into_symtab(symtab, data, len, status);
   free(data);
   return success;
 }
diff --git a/upb/pb/glue.h b/upb/pb/glue.h
index 4bbc975..5073968 100644
--- a/upb/pb/glue.h
+++ b/upb/pb/glue.h
@@ -33,23 +33,23 @@
 extern "C" {
 #endif
 
-// Loads all defs from the given protobuf binary descriptor, setting default
-// accessors and a default layout on all messages.  The caller owns the
-// returned array of defs, which will be of length *n.  On error NULL is
-// returned and status is set (if non-NULL).
+/* Loads all defs from the given protobuf binary descriptor, setting default
+ * accessors and a default layout on all messages.  The caller owns the
+ * returned array of defs, which will be of length *n.  On error NULL is
+ * returned and status is set (if non-NULL). */
 upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
                                         void *owner, upb_status *status);
 
-// Like the previous but also adds the loaded defs to the given symtab.
+/* Like the previous but also adds the loaded defs to the given symtab. */
 bool upb_load_descriptor_into_symtab(upb_symtab *symtab, const char *str,
                                      size_t len, upb_status *status);
 
-// Like the previous but also reads the descriptor from the given filename.
+/* Like the previous but also reads the descriptor from the given filename. */
 bool upb_load_descriptor_file_into_symtab(upb_symtab *symtab, const char *fname,
                                           upb_status *status);
 
-// Reads the given filename into a character string, returning NULL if there
-// was an error.
+/* Reads the given filename into a character string, returning NULL if there
+ * was an error. */
 char *upb_readfile(const char *filename, size_t *len);
 
 #ifdef __cplusplus
@@ -57,8 +57,8 @@ char *upb_readfile(const char *filename, size_t *len);
 
 namespace upb {
 
-// All routines that load descriptors expect the descriptor to be a
-// FileDescriptorSet.
+/* All routines that load descriptors expect the descriptor to be a
+ * FileDescriptorSet. */
 inline bool LoadDescriptorFileIntoSymtab(SymbolTable* s, const char *fname,
                                          Status* status) {
   return upb_load_descriptor_file_into_symtab(s, fname, status);
@@ -69,14 +69,14 @@ inline bool LoadDescriptorIntoSymtab(SymbolTable* s, const char* str,
   return upb_load_descriptor_into_symtab(s, str, len, status);
 }
 
-// Templated so it can accept both string and std::string.
+/* Templated so it can accept both string and std::string. */
 template <typename T>
 bool LoadDescriptorIntoSymtab(SymbolTable* s, const T& desc, Status* status) {
   return upb_load_descriptor_into_symtab(s, desc.c_str(), desc.size(), status);
 }
 
-}  // namespace upb
+}  /* namespace upb */
 
 #endif
 
-#endif
+#endif  /* UPB_GLUE_H */
diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c
index 07f951d..b772af3 100644
--- a/upb/pb/textprinter.c
+++ b/upb/pb/textprinter.c
@@ -13,6 +13,7 @@
 #include <ctype.h>
 #include <float.h>
 #include <inttypes.h>
+#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -50,22 +51,24 @@ static int endfield(upb_textprinter *p) {
 
 static int putescaped(upb_textprinter *p, const char *buf, size_t len,
                       bool preserve_utf8) {
-  // Based on CEscapeInternal() from Google's protobuf release.
+  /* Based on CEscapeInternal() from Google's protobuf release. */
   char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
   const char *end = buf + len;
 
-  // I think hex is prettier and more useful, but proto2 uses octal; should
-  // investigate whether it can parse hex also.
+  /* I think hex is prettier and more useful, but proto2 uses octal; should
+   * investigate whether it can parse hex also. */
   const bool use_hex = false;
-  bool last_hex_escape = false; // true if last output char was \xNN
+  bool last_hex_escape = false; /* true if last output char was \xNN */
 
   for (; buf < end; buf++) {
+    bool is_hex_escape;
+
     if (dstend - dst < 4) {
       upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
       dst = dstbuf;
     }
 
-    bool is_hex_escape = false;
+    is_hex_escape = false;
     switch (*buf) {
       case '\n': *(dst++) = '\\'; *(dst++) = 'n';  break;
       case '\r': *(dst++) = '\\'; *(dst++) = 'r';  break;
@@ -74,9 +77,9 @@ static int putescaped(upb_textprinter *p, const char *buf, size_t len,
       case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
       case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
       default:
-        // Note that if we emit \xNN and the buf character after that is a hex
-        // digit then that digit must be escaped too to prevent it being
-        // interpreted as part of the character code by C.
+        /* Note that if we emit \xNN and the buf character after that is a hex
+         * digit then that digit must be escaped too to prevent it being
+         * interpreted as part of the character code by C. */
         if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
             (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
           sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
@@ -88,29 +91,38 @@ static int putescaped(upb_textprinter *p, const char *buf, size_t len,
     }
     last_hex_escape = is_hex_escape;
   }
-  // Flush remaining data.
+  /* Flush remaining data. */
   upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
   return 0;
 }
 
+#ifdef __GNUC__
+#define va_copy(a, b) __va_copy(a, b)
+#endif
+
 bool putf(upb_textprinter *p, const char *fmt, ...) {
   va_list args;
+  va_list args_copy;
+  char *str;
+  int written;
+  int len;
+  bool ok;
+
   va_start(args, fmt);
 
-  // Run once to get the length of the string.
-  va_list args_copy;
+  /* Run once to get the length of the string. */
   va_copy(args_copy, args);
-  int len = vsnprintf(NULL, 0, fmt, args_copy);
+  len = vsprintf(NULL, fmt, args_copy);
   va_end(args_copy);
 
-  // + 1 for NULL terminator (vsnprintf() requires it even if we don't).
-  char *str = malloc(len + 1);
+  /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
+  str = malloc(len + 1);
   if (!str) return false;
-  int written = vsnprintf(str, len + 1, fmt, args);
+  written = vsprintf(str, fmt, args);
   va_end(args);
   UPB_ASSERT_VAR(written, written == len);
 
-  bool ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
+  ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
   free(str);
   return ok;
 }
@@ -119,8 +131,8 @@ bool putf(upb_textprinter *p, const char *fmt, ...) {
 /* handlers *******************************************************************/
 
 static bool textprinter_startmsg(void *c, const void *hd) {
-  UPB_UNUSED(hd);
   upb_textprinter *p = c;
+  UPB_UNUSED(hd);
   if (p->indent_depth_ == 0) {
     upb_bytessink_start(p->output_, 0, &p->subc);
   }
@@ -128,9 +140,9 @@ static bool textprinter_startmsg(void *c, const void *hd) {
 }
 
 static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
+  upb_textprinter *p = c;
   UPB_UNUSED(hd);
   UPB_UNUSED(s);
-  upb_textprinter *p = c;
   if (p->indent_depth_ == 0) {
     upb_bytessink_end(p->output_);
   }
@@ -167,14 +179,14 @@ err:
 
 TYPE(int32,  int32_t,  "%" PRId32)
 TYPE(int64,  int64_t,  "%" PRId64)
-TYPE(uint32, uint32_t, "%" PRIu32);
+TYPE(uint32, uint32_t, "%" PRIu32)
 TYPE(uint64, uint64_t, "%" PRIu64)
 TYPE(float,  float,    "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
 TYPE(double, double,   "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
 
 #undef TYPE
 
-// Output a symbolic value from the enum if found, else just print as int32.
+/* Output a symbolic value from the enum if found, else just print as int32. */
 static bool textprinter_putenum(void *closure, const void *handler_data,
                                 int32_t val) {
   upb_textprinter *p = closure;
@@ -194,17 +206,17 @@ static bool textprinter_putenum(void *closure, const void *handler_data,
 
 static void *textprinter_startstr(void *closure, const void *handler_data,
                       size_t size_hint) {
+  upb_textprinter *p = closure;
   const upb_fielddef *f = handler_data;
   UPB_UNUSED(size_hint);
-  upb_textprinter *p = closure;
   indent(p);
   putf(p, "%s: \"", upb_fielddef_name(f));
   return p;
 }
 
 static bool textprinter_endstr(void *closure, const void *handler_data) {
-  UPB_UNUSED(handler_data);
   upb_textprinter *p = closure;
+  UPB_UNUSED(handler_data);
   putf(p, "\"");
   endfield(p);
   return true;
@@ -212,9 +224,9 @@ static bool textprinter_endstr(void *closure, const void *handler_data) {
 
 static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
                                  size_t len, const upb_bufhandle *handle) {
-  UPB_UNUSED(handle);
   upb_textprinter *p = closure;
   const upb_fielddef *f = hd;
+  UPB_UNUSED(handle);
   CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
   return len;
 err:
@@ -233,8 +245,8 @@ err:
 }
 
 static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
-  UPB_UNUSED(handler_data);
   upb_textprinter *p = closure;
+  UPB_UNUSED(handler_data);
   p->indent_depth_--;
   CHECK(indent(p));
   upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
@@ -245,13 +257,13 @@ err:
 }
 
 static void onmreg(const void *c, upb_handlers *h) {
-  UPB_UNUSED(c);
   const upb_msgdef *m = upb_handlers_msgdef(h);
+  upb_msg_field_iter i;
+  UPB_UNUSED(c);
 
   upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
   upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
 
-  upb_msg_field_iter i;
   for(upb_msg_field_begin(&i, m);
       !upb_msg_field_done(&i);
       upb_msg_field_next(&i)) {
diff --git a/upb/pb/textprinter.h b/upb/pb/textprinter.h
index 3ba3403..4b0050f 100644
--- a/upb/pb/textprinter.h
+++ b/upb/pb/textprinter.h
@@ -15,18 +15,18 @@
 namespace upb {
 namespace pb {
 class TextPrinter;
-}  // namespace pb
-}  // namespace upb
+}  /* namespace pb */
+}  /* namespace upb */
 #endif
 
-UPB_DECLARE_TYPE(upb::pb::TextPrinter, upb_textprinter);
+UPB_DECLARE_TYPE(upb::pb::TextPrinter, upb_textprinter)
 
 #ifdef __cplusplus
 
 class upb::pb::TextPrinter {
  public:
-  // The given handlers must have come from NewHandlers().  It must outlive the
-  // TextPrinter.
+  /* The given handlers must have come from NewHandlers().  It must outlive the
+   * TextPrinter. */
   static TextPrinter *Create(Environment *env, const upb::Handlers *handlers,
                              BytesSink *output);
 
@@ -34,8 +34,8 @@ class upb::pb::TextPrinter {
 
   Sink* input();
 
-  // If handler caching becomes a requirement we can add a code cache as in
-  // decoder.h
+  /* If handler caching becomes a requirement we can add a code cache as in
+   * decoder.h */
   static reffed_ptr<const Handlers> NewHandlers(const MessageDef* md);
 };
 
@@ -43,7 +43,7 @@ class upb::pb::TextPrinter {
 
 UPB_BEGIN_EXTERN_C
 
-// C API.
+/* C API. */
 upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
                                         upb_bytessink *output);
 void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line);
@@ -74,8 +74,8 @@ inline reffed_ptr<const Handlers> TextPrinter::NewHandlers(
   const Handlers* h = upb_textprinter_newhandlers(md, &h);
   return reffed_ptr<const Handlers>(h, &h);
 }
-}  // namespace pb
-}  // namespace upb
+}  /* namespace pb */
+}  /* namespace upb */
 
 #endif
 
diff --git a/upb/pb/varint.c b/upb/pb/varint.c
index 365deb4..04767eb 100644
--- a/upb/pb/varint.c
+++ b/upb/pb/varint.c
@@ -7,32 +7,33 @@
 
 #include "upb/pb/varint.int.h"
 
-// Index is descriptor type.
+/* Index is descriptor type. */
 const uint8_t upb_pb_native_wire_types[] = {
-  UPB_WIRE_TYPE_END_GROUP,     // ENDGROUP
-  UPB_WIRE_TYPE_64BIT,         // DOUBLE
-  UPB_WIRE_TYPE_32BIT,         // FLOAT
-  UPB_WIRE_TYPE_VARINT,        // INT64
-  UPB_WIRE_TYPE_VARINT,        // UINT64
-  UPB_WIRE_TYPE_VARINT,        // INT32
-  UPB_WIRE_TYPE_64BIT,         // FIXED64
-  UPB_WIRE_TYPE_32BIT,         // FIXED32
-  UPB_WIRE_TYPE_VARINT,        // BOOL
-  UPB_WIRE_TYPE_DELIMITED,     // STRING
-  UPB_WIRE_TYPE_START_GROUP,   // GROUP
-  UPB_WIRE_TYPE_DELIMITED,     // MESSAGE
-  UPB_WIRE_TYPE_DELIMITED,     // BYTES
-  UPB_WIRE_TYPE_VARINT,        // UINT32
-  UPB_WIRE_TYPE_VARINT,        // ENUM
-  UPB_WIRE_TYPE_32BIT,         // SFIXED32
-  UPB_WIRE_TYPE_64BIT,         // SFIXED64
-  UPB_WIRE_TYPE_VARINT,        // SINT32
-  UPB_WIRE_TYPE_VARINT,        // SINT64
+  UPB_WIRE_TYPE_END_GROUP,     /* ENDGROUP */
+  UPB_WIRE_TYPE_64BIT,         /* DOUBLE */
+  UPB_WIRE_TYPE_32BIT,         /* FLOAT */
+  UPB_WIRE_TYPE_VARINT,        /* INT64 */
+  UPB_WIRE_TYPE_VARINT,        /* UINT64 */
+  UPB_WIRE_TYPE_VARINT,        /* INT32 */
+  UPB_WIRE_TYPE_64BIT,         /* FIXED64 */
+  UPB_WIRE_TYPE_32BIT,         /* FIXED32 */
+  UPB_WIRE_TYPE_VARINT,        /* BOOL */
+  UPB_WIRE_TYPE_DELIMITED,     /* STRING */
+  UPB_WIRE_TYPE_START_GROUP,   /* GROUP */
+  UPB_WIRE_TYPE_DELIMITED,     /* MESSAGE */
+  UPB_WIRE_TYPE_DELIMITED,     /* BYTES */
+  UPB_WIRE_TYPE_VARINT,        /* UINT32 */
+  UPB_WIRE_TYPE_VARINT,        /* ENUM */
+  UPB_WIRE_TYPE_32BIT,         /* SFIXED32 */
+  UPB_WIRE_TYPE_64BIT,         /* SFIXED64 */
+  UPB_WIRE_TYPE_VARINT,        /* SINT32 */
+  UPB_WIRE_TYPE_VARINT,        /* SINT64 */
 };
 
-// A basic branch-based decoder, uses 32-bit values to get good performance
-// on 32-bit architectures (but performs well on 64-bits also).
-// This scheme comes from the original Google Protobuf implementation (proto2).
+/* A basic branch-based decoder, uses 32-bit values to get good performance
+ * on 32-bit architectures (but performs well on 64-bits also).
+ * This scheme comes from the original Google Protobuf implementation
+ * (proto2). */
 upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
   upb_decoderet err = {NULL, 0};
   const char *p = r.p;
@@ -56,7 +57,7 @@ done:
   return r;
 }
 
-// Like the previous, but uses 64-bit values.
+/* Like the previous, but uses 64-bit values. */
 upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
   const char *p = r.p;
   uint64_t val = r.val;
@@ -78,49 +79,53 @@ done:
   return r;
 }
 
-// Given an encoded varint v, returns an integer with a single bit set that
-// indicates the end of the varint.  Subtracting one from this value will
-// yield a mask that leaves only bits that are part of the varint.  Returns
-// 0 if the varint is unterminated.
+/* Given an encoded varint v, returns an integer with a single bit set that
+ * indicates the end of the varint.  Subtracting one from this value will
+ * yield a mask that leaves only bits that are part of the varint.  Returns
+ * 0 if the varint is unterminated. */
 static uint64_t upb_get_vstopbit(uint64_t v) {
   uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
   return ~cbits & (cbits+1);
 }
 
-// A branchless decoder.  Credit to Pascal Massimino for the bit-twiddling.
+/* A branchless decoder.  Credit to Pascal Massimino for the bit-twiddling. */
 upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
   uint64_t b;
+  uint64_t stop_bit;
+  upb_decoderet my_r;
   memcpy(&b, r.p, sizeof(b));
-  uint64_t stop_bit = upb_get_vstopbit(b);
+  stop_bit = upb_get_vstopbit(b);
   b =  (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
   b +=       b & 0x007f007f007f007fULL;
   b +=  3 * (b & 0x0000ffff0000ffffULL);
   b += 15 * (b & 0x00000000ffffffffULL);
   if (stop_bit == 0) {
-    // Error: unterminated varint.
+    /* Error: unterminated varint. */
     upb_decoderet err_r = {(void*)0, 0};
     return err_r;
   }
-  upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
-                        r.val | (b << 7)};
+  my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
+                            r.val | (b << 7));
   return my_r;
 }
 
-// A branchless decoder.  Credit to Daniel Wright for the bit-twiddling.
+/* A branchless decoder.  Credit to Daniel Wright for the bit-twiddling. */
 upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
   uint64_t b;
+  uint64_t stop_bit;
+  upb_decoderet my_r;
   memcpy(&b, r.p, sizeof(b));
-  uint64_t stop_bit = upb_get_vstopbit(b);
+  stop_bit = upb_get_vstopbit(b);
   b &= (stop_bit - 1);
   b = ((b & 0x7f007f007f007f00ULL) >> 1) | (b & 0x007f007f007f007fULL);
   b = ((b & 0xffff0000ffff0000ULL) >> 2) | (b & 0x0000ffff0000ffffULL);
   b = ((b & 0xffffffff00000000ULL) >> 4) | (b & 0x00000000ffffffffULL);
   if (stop_bit == 0) {
-    // Error: unterminated varint.
+    /* Error: unterminated varint. */
     upb_decoderet err_r = {(void*)0, 0};
     return err_r;
   }
-  upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
-                        r.val | (b << 14)};
+  my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
+                            r.val | (b << 14));
   return my_r;
 }
diff --git a/upb/pb/varint.int.h b/upb/pb/varint.int.h
index 8498acd..a394a75 100644
--- a/upb/pb/varint.int.h
+++ b/upb/pb/varint.int.h
@@ -20,25 +20,25 @@
 extern "C" {
 #endif
 
-// A list of types as they are encoded on-the-wire.
+/* A list of types as they are encoded on-the-wire. */
 typedef enum {
   UPB_WIRE_TYPE_VARINT      = 0,
   UPB_WIRE_TYPE_64BIT       = 1,
   UPB_WIRE_TYPE_DELIMITED   = 2,
   UPB_WIRE_TYPE_START_GROUP = 3,
   UPB_WIRE_TYPE_END_GROUP   = 4,
-  UPB_WIRE_TYPE_32BIT       = 5,
+  UPB_WIRE_TYPE_32BIT       = 5
 } upb_wiretype_t;
 
 #define UPB_MAX_WIRE_TYPE 5
 
-// The maximum number of bytes that it takes to encode a 64-bit varint.
-// Note that with a better encoding this could be 9 (TODO: write up a
-// wiki document about this).
+/* The maximum number of bytes that it takes to encode a 64-bit varint.
+ * Note that with a better encoding this could be 9 (TODO: write up a
+ * wiki document about this). */
 #define UPB_PB_VARINT_MAX_LEN 10
 
-// Array of the "native" (ie. non-packed-repeated) wire type for the given a
-// descriptor type (upb_descriptortype_t).
+/* Array of the "native" (ie. non-packed-repeated) wire type for the given a
+ * descriptor type (upb_descriptortype_t). */
 extern const uint8_t upb_pb_native_wire_types[];
 
 /* Zig-zag encoding/decoding **************************************************/
@@ -54,44 +54,59 @@ UPB_INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
 
 /* Decoding *******************************************************************/
 
-// All decoding functions return this struct by value.
+/* All decoding functions return this struct by value. */
 typedef struct {
-  const char *p;  // NULL if the varint was unterminated.
+  const char *p;  /* NULL if the varint was unterminated. */
   uint64_t val;
 } upb_decoderet;
 
-// Four functions for decoding a varint of at most eight bytes.  They are all
-// functionally identical, but are implemented in different ways and likely have
-// different performance profiles.  We keep them around for performance testing.
-//
-// Note that these functions may not read byte-by-byte, so they must not be used
-// unless there are at least eight bytes left in the buffer!
+UPB_INLINE upb_decoderet upb_decoderet_make(const char *p, uint64_t val) {
+  upb_decoderet ret;
+  ret.p = p;
+  ret.val = val;
+  return ret;
+}
+
+/* Four functions for decoding a varint of at most eight bytes.  They are all
+ * functionally identical, but are implemented in different ways and likely have
+ * different performance profiles.  We keep them around for performance testing.
+ *
+ * Note that these functions may not read byte-by-byte, so they must not be used
+ * unless there are at least eight bytes left in the buffer! */
 upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r);
 upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r);
 upb_decoderet upb_vdecode_max8_wright(upb_decoderet r);
 upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);
 
-// Template for a function that checks the first two bytes with branching
-// and dispatches 2-10 bytes with a separate function.  Note that this may read
-// up to 10 bytes, so it must not be used unless there are at least ten bytes
-// left in the buffer!
+/* Template for a function that checks the first two bytes with branching
+ * and dispatches 2-10 bytes with a separate function.  Note that this may read
+ * up to 10 bytes, so it must not be used unless there are at least ten bytes
+ * left in the buffer! */
 #define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function)                  \
 UPB_INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) {         \
   uint8_t *p = (uint8_t*)_p;                                                   \
-  if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7fU}; return r; }  \
-  upb_decoderet r = {_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7)};        \
-  if ((*(p + 1) & 0x80) == 0) return r;                                        \
+  upb_decoderet r;                                                             \
+  if ((*p & 0x80) == 0) {                                                      \
+  /* Common case: one-byte varint. */                                          \
+    return upb_decoderet_make(_p + 1, *p & 0x7fU);                             \
+  }                                                                            \
+  r = upb_decoderet_make(_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7));    \
+  if ((*(p + 1) & 0x80) == 0) {                                                \
+    /* Two-byte varint. */                                                     \
+    return r;                                                                  \
+  }                                                                            \
+  /* Longer varint, fallback to out-of-line function. */                       \
   return decode_max8_function(r);                                              \
 }
 
-UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32);
-UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64);
-UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright);
-UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino);
+UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32)
+UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64)
+UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright)
+UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino)
 #undef UPB_VARINT_DECODER_CHECK2
 
-// Our canonical functions for decoding varints, based on the currently
-// favored best-performing implementations.
+/* Our canonical functions for decoding varints, based on the currently
+ * favored best-performing implementations. */
 UPB_INLINE upb_decoderet upb_vdecode_fast(const char *p) {
   if (sizeof(long) == 8)
     return upb_vdecode_check2_branch64(p);
@@ -108,7 +123,7 @@ UPB_INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
 
 UPB_INLINE int upb_value_size(uint64_t val) {
 #ifdef __GNUC__
-  int high_bit = 63 - __builtin_clzll(val);  // 0-based, undef if val == 0.
+  int high_bit = 63 - __builtin_clzll(val);  /* 0-based, undef if val == 0. */
 #else
   int high_bit = 0;
   uint64_t tmp = val;
@@ -117,13 +132,14 @@ UPB_INLINE int upb_value_size(uint64_t val) {
   return val == 0 ? 1 : high_bit / 8 + 1;
 }
 
-// Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN
-// bytes long), returning how many bytes were used.
-//
-// TODO: benchmark and optimize if necessary.
+/* Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN
+ * bytes long), returning how many bytes were used.
+ *
+ * TODO: benchmark and optimize if necessary. */
 UPB_INLINE size_t upb_vencode64(uint64_t val, char *buf) {
+  size_t i;
   if (val == 0) { buf[0] = 0; return 1; }
-  size_t i = 0;
+  i = 0;
   while (val) {
     uint8_t byte = val & 0x7fU;
     val >>= 7;
@@ -138,7 +154,7 @@ UPB_INLINE size_t upb_varint_size(uint64_t val) {
   return upb_vencode64(val, buf);
 }
 
-// Encodes a 32-bit varint, *not* sign-extended.
+/* Encodes a 32-bit varint, *not* sign-extended. */
 UPB_INLINE uint64_t upb_vencode32(uint32_t val) {
   char buf[UPB_PB_VARINT_MAX_LEN];
   size_t bytes = upb_vencode64(val, buf);