From abcb6428ad9bf7d650455a0a180647a05183fd9d Mon Sep 17 00:00:00 2001
From: Josh Haberman <jhaberman@gmail.com>
Date: Thu, 30 Jul 2015 14:54:03 -0700
Subject: Changed parser semantics around skipping.

Prior to this change:

  parse(buf, len) -> len + N

...would indicate that the next N bytes of the input are not
needed, *and* would advance the decoding position by this
much.

After this change:

  parse(buf, len) -> len + N
  parse(NULL, N) -> N

...can be used to achieve the same thing.  But skipping the
N bytes is not explicitly performed by the user.  A user that
doesn't want/need to skip can just say:

  parsed = parse(buf, len);

  if (parsed < len) {
    // Handle suspend, advance stream by "parsed".
  } else {
    // Stream was advanced by "len" (even if parsed > len).
  }

Updated unit tests to test this new behavior, and refactored
test utility code a bit to support it.
---
 tests/json/test_json.cc  |  37 ++++-------
 tests/pb/test_decoder.cc | 168 +++++++++++++++++++++++------------------------
 tests/test_util.h        | 135 +++++++++++++++++++++++++++----------
 upb/pb/decoder.c         | 155 +++++++++++++++++++++++++------------------
 upb/pb/decoder.int.h     |   6 ++
 upb/sink.h               |   2 +-
 6 files changed, 294 insertions(+), 209 deletions(-)

diff --git a/tests/json/test_json.cc b/tests/json/test_json.cc
index 8dd4062..c483cf0 100644
--- a/tests/json/test_json.cc
+++ b/tests/json/test_json.cc
@@ -288,33 +288,22 @@ void test_json_roundtrip_message(const char* json_src,
                                  const char* json_expected,
                                  const upb::Handlers* serialize_handlers,
                                  int seam) {
-  upb::Status st;
-  upb::Environment env;
-  env.ReportErrorsTo(&st);
+  VerboseParserEnvironment env(verbose);
   StringSink data_sink;
-  upb::json::Printer* printer =
-      upb::json::Printer::Create(&env, serialize_handlers, data_sink.Sink());
-  upb::json::Parser* parser = upb::json::Parser::Create(&env, printer->input());
-
-  upb::BytesSink* input = parser->input();
-  void *sub;
-  size_t len = strlen(json_src);
-  size_t ofs = 0;
-
-  bool ok = input->Start(0, &sub) &&
-            parse_buffer(input, sub, json_src, 0, seam, &ofs, &st, verbose) &&
-            parse_buffer(input, sub, json_src, seam, len, &ofs, &st, verbose) &&
-            ofs == len;
-
-  if (ok) {
-    if (verbose) {
-      fprintf(stderr, "calling end()\n");
-    }
-    ok = input->End();
-  }
+  upb::json::Printer* printer = upb::json::Printer::Create(
+      env.env(), serialize_handlers, data_sink.Sink());
+  upb::json::Parser* parser =
+      upb::json::Parser::Create(env.env(), printer->input());
+  env.ResetBytesSink(parser->input());
+  env.Reset(json_src, strlen(json_src), false);
+
+  bool ok = env.Start() &&
+            env.ParseBuffer(seam) &&
+            env.ParseBuffer(-1) &&
+            env.End();
 
   if (!ok) {
-    fprintf(stderr, "upb parse error: %s\n", st.error_message());
+    fprintf(stderr, "upb parse error: %s\n", env.status().error_message());
   }
   ASSERT(ok);
 
diff --git a/tests/pb/test_decoder.cc b/tests/pb/test_decoder.cc
index 6bb53fc..310f8c1 100644
--- a/tests/pb/test_decoder.cc
+++ b/tests/pb/test_decoder.cc
@@ -63,6 +63,8 @@
 
 #define MAX_NESTING 64
 
+#define LINE(x) x "\n"
+
 uint32_t filter_hash = 0;
 double completed;
 double total;
@@ -517,12 +519,13 @@ upb::pb::Decoder* CreateDecoder(upb::Environment* env,
 }
 
 uint32_t Hash(const string& proto, const string* expected_output, size_t seam1,
-              size_t seam2) {
+              size_t seam2, bool may_skip) {
   uint32_t hash = MurmurHash2(proto.c_str(), proto.size(), 0);
   if (expected_output)
     hash = MurmurHash2(expected_output->c_str(), expected_output->size(), hash);
   hash = MurmurHash2(&seam1, sizeof(seam1), hash);
   hash = MurmurHash2(&seam2, sizeof(seam2), hash);
+  hash = MurmurHash2(&may_skip, sizeof(may_skip), hash);
   return hash;
 }
 
@@ -538,91 +541,88 @@ void CheckBytesParsed(const upb::pb::Decoder& decoder, size_t ofs) {
   ASSERT(ofs <= (decoder.BytesParsed() + MAX_BUFFERED));
 }
 
-static bool parse(upb::pb::Decoder* decoder, void* subc, const char* buf,
-                  size_t start, size_t end, size_t* ofs, upb::Status* status) {
-  CheckBytesParsed(*decoder, *ofs);
-  bool ret = parse_buffer(decoder->input(), subc, buf, start, end, ofs, status,
-                          filter_hash != 0);
+static bool parse(VerboseParserEnvironment* env,
+                  const upb::pb::Decoder& decoder, int bytes) {
+  CheckBytesParsed(decoder, env->ofs());
+  bool ret = env->ParseBuffer(bytes);
   if (ret) {
-    CheckBytesParsed(*decoder, *ofs);
+    CheckBytesParsed(decoder, env->ofs());
   }
 
   return ret;
 }
 
-#define LINE(x) x "\n"
-void run_decoder(const string& proto, const string* expected_output) {
-  upb::Status status;
-  upb::Sink sink(global_handlers, &closures[0]);
-  for (size_t i = 0; i < proto.size(); i++) {
-    for (size_t j = i; j < UPB_MIN(proto.size(), i + 5); j++) {
-      // TODO(haberman): hoist this again once the environment supports reset.
-      upb::Environment env;
-      env.ReportErrorsTo(&status);
-      upb::pb::Decoder *decoder = CreateDecoder(&env, global_method, &sink);
-
-      testhash = Hash(proto, expected_output, i, j);
-      if (filter_hash && testhash != filter_hash) continue;
-      if (test_mode != COUNT_ONLY) {
-        output.clear();
-        status.Clear();
-        size_t ofs = 0;
-        upb::BytesSink* input = decoder->input();
-        void *sub;
-
-        if (filter_hash) {
-          fprintf(stderr, "RUNNING TEST CASE, hash=%x\n", testhash);
-          fprintf(stderr, "JIT on: %s\n",
-                  global_method->is_native() ? "true" : "false");
-          fprintf(stderr, "Input (len=%u): ", (unsigned)proto.size());
-          PrintBinary(proto);
-          fprintf(stderr, "\n");
-          if (expected_output) {
-            fprintf(stderr, "Expected output: %s\n", expected_output->c_str());
-          } else {
-            fprintf(stderr, "Expected to FAIL\n");
-          }
-          fprintf(stderr, "Calling start()\n");
-        }
+void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
+                    const string& proto, const string* expected_output,
+                    size_t i, size_t j, bool may_skip) {
+  env->Reset(proto.c_str(), proto.size(), may_skip);
+  decoder->Reset();
 
-        bool ok = input->Start(proto.size(), &sub) &&
-                  parse(decoder, sub, proto.c_str(), 0, i, &ofs, &status) &&
-                  parse(decoder, sub, proto.c_str(), i, j, &ofs, &status) &&
-                  parse(decoder, sub, proto.c_str(), j, proto.size(), &ofs,
-                        &status) &&
-                  ofs == proto.size();
+  testhash = Hash(proto, expected_output, i, j, may_skip);
+  if (filter_hash && testhash != filter_hash) return;
+  if (test_mode != COUNT_ONLY) {
+    output.clear();
+
+    if (filter_hash) {
+      fprintf(stderr, "RUNNING TEST CASE, hash=%x\n", testhash);
+      fprintf(stderr, "JIT on: %s\n",
+              global_method->is_native() ? "true" : "false");
+      fprintf(stderr, "Input (len=%u): ", (unsigned)proto.size());
+      PrintBinary(proto);
+      fprintf(stderr, "\n");
+      if (expected_output) {
+        fprintf(stderr, "Expected output: %s\n", expected_output->c_str());
+      } else {
+        fprintf(stderr, "Expected to FAIL\n");
+      }
+      fprintf(stderr, "Calling start()\n");
+    }
 
+    bool ok = env->Start() &&
+              parse(env, *decoder, i) &&
+              parse(env, *decoder, j - i) &&
+              parse(env, *decoder, -1) &&
+              env->End();
+
+    ASSERT(ok == env->status().ok());
+
+    if (test_mode == ALL_HANDLERS) {
+      if (expected_output) {
+        if (output != *expected_output) {
+          fprintf(stderr, "Text mismatch: '%s' vs '%s'\n",
+                  output.c_str(), expected_output->c_str());
+        }
+        if (!ok) {
+          fprintf(stderr, "Failed: %s\n", env->status().error_message());
+        }
+        ASSERT(ok);
+        ASSERT(output == *expected_output);
+      } else {
         if (ok) {
-          if (filter_hash) {
-            fprintf(stderr, "calling end()\n");
-          }
-          ok = input->End();
+          fprintf(stderr, "Didn't expect ok result, but got output: '%s'\n",
+                  output.c_str());
+        } else if (filter_hash) {
+          fprintf(stderr, "Failed as we expected, with message: %s\n",
+                  env->status().error_message());
         }
+        ASSERT(!ok);
+      }
+    }
+  }
+  (*count)++;
+}
 
-        if (test_mode == ALL_HANDLERS) {
-          if (expected_output) {
-            if (output != *expected_output) {
-              fprintf(stderr, "Text mismatch: '%s' vs '%s'\n",
-                      output.c_str(), expected_output->c_str());
-            }
-            if (!ok) {
-              fprintf(stderr, "Failed: %s\n", status.error_message());
-            }
-            ASSERT(ok);
-            ASSERT(output == *expected_output);
-          } else {
-            if (ok) {
-              fprintf(stderr, "Didn't expect ok result, but got output: '%s'\n",
-                      output.c_str());
-            } else if (filter_hash) {
-              fprintf(stderr, "Failed as we expected, with message: %s\n",
-                      status.error_message());
-            }
-            ASSERT(!ok);
-          }
-        }
+void run_decoder(const string& proto, const string* expected_output) {
+  VerboseParserEnvironment env(filter_hash != 0);
+  upb::Sink sink(global_handlers, &closures[0]);
+  upb::pb::Decoder *decoder = CreateDecoder(env.env(), global_method, &sink);
+  env.ResetBytesSink(decoder->input());
+  for (size_t i = 0; i < proto.size(); i++) {
+    for (size_t j = i; j < UPB_MIN(proto.size(), i + 5); j++) {
+      do_run_decoder(&env, decoder, proto, expected_output, i, j, true);
+      if (env.SkippedWithNull()) {
+        do_run_decoder(&env, decoder, proto, expected_output, i, j, false);
       }
-      (*count)++;
     }
   }
   testhash = 0;
@@ -1146,20 +1146,14 @@ upb::reffed_ptr<const upb::pb::DecoderMethod> method =
     { NULL, 0 },
   };
   for (int i = 0; testdata[i].data; i++) {
-    upb::Environment env;
-    upb::Status status;
-    env.ReportErrorsTo(&status);
+    VerboseParserEnvironment env(filter_hash != 0);
     upb::Sink sink(method->dest_handlers(), &closures[0]);
-    upb::pb::Decoder* decoder = CreateDecoder(&env, method.get(), &sink);
-    upb::BytesSink* input = decoder->input();
-    void* subc;
-    ASSERT(input->Start(0, &subc));
-    size_t ofs = 0;
-    ASSERT(parse_buffer(input, subc,
-                        testdata[i].data, 0, testdata[i].length,
-                        &ofs, &status, false));
-    ASSERT(ofs == testdata[i].length);
-    ASSERT(input->End());
+    upb::pb::Decoder* decoder = CreateDecoder(env.env(), method.get(), &sink);
+    env.ResetBytesSink(decoder->input());
+    env.Reset(testdata[i].data, testdata[i].length, true);
+    ASSERT(env.Start());
+    ASSERT(env.ParseBuffer(-1));
+    ASSERT(env.End());
   }
 }
 
diff --git a/tests/test_util.h b/tests/test_util.h
index d9e8d25..6408557 100644
--- a/tests/test_util.h
+++ b/tests/test_util.h
@@ -8,59 +8,90 @@
 #include <stdio.h>
 #include <math.h>
 #include "tests/upb_test.h"
+#include "upb/env.h"
 #include "upb/sink.h"
 
 upb::BufferHandle global_handle;
 
-// Puts a region of the given buffer [start, end) into the given sink (which
-// probably represents a parser.  Can gracefully handle the case where the
-// parser returns a "parsed" length that is less or greater than the input
-// buffer length, and tracks the overall parse offset in *ofs.
-//
-// Pass verbose=true to print detailed diagnostics to stderr.
-bool parse_buffer(upb::BytesSink* sink, void* subc, const char* buf,
-                  size_t start, size_t end, size_t* ofs,
-                  upb::Status* status, bool verbose) {
-  start = UPB_MAX(start, *ofs);
+class VerboseParserEnvironment {
+ public:
+  VerboseParserEnvironment(bool verbose) : verbose_(verbose) {
+    env_.ReportErrorsTo(&status_);
+  }
+
+  void Reset(const char *buf, size_t len, bool may_skip) {
+    buf_ = buf;
+    len_ = len;
+    ofs_ = 0;
+    skip_until_ = may_skip ? 0 : -1;
+    status_.Clear();
+  }
+
+  bool Start() {
+    return sink_->Start(len_, &subc_);
+  }
+
+  bool End() {
+    return sink_->End();
+  }
+
+  // Puts a region of the given buffer [start, end) into the given sink (which
+  // probably represents a parser.  Can gracefully handle the case where the
+  // parser returns a "parsed" length that is less or greater than the input
+  // buffer length, and tracks the overall parse offset in *ofs.
+  //
+  // Pass verbose=true to print detailed diagnostics to stderr.
+  bool ParseBuffer(int bytes) {
+    if (bytes < 0) {
+      bytes = len_ - ofs_;
+    }
 
-  if (start <= end) {
-    size_t len = end - start;
+    ASSERT((size_t)bytes <= (len_ - ofs_));
 
     // Copy buffer into a separate, temporary buffer.
     // This is necessary to verify that the parser is not erroneously
     // reading outside the specified bounds.
-    char *buf2 = (char*)malloc(len);
-    assert(buf2);
-    memcpy(buf2, buf + start, len);
+    char *buf2 = NULL;
 
-    if (verbose) {
+    if ((int)(ofs_ + bytes) > skip_until_) {
+      buf2 = (char*)malloc(bytes);
+      assert(buf2);
+      memcpy(buf2, buf_ + ofs_, bytes);
+    }
+
+    if (buf2 == NULL && bytes == 0) {
+      // Decoders dont' support buf=NULL, bytes=0.
+      return true;
+    }
+
+    if (verbose_) {
       fprintf(stderr, "Calling parse(%u) for bytes %u-%u of the input\n",
-              (unsigned)len, (unsigned)start, (unsigned)end);
+              (unsigned)bytes, (unsigned)ofs_, (unsigned)(ofs_ + bytes));
     }
 
-    size_t parsed = sink->PutBuffer(subc, buf2, len, &global_handle);
+    int parsed = sink_->PutBuffer(subc_, buf2, bytes, &global_handle);
     free(buf2);
 
-    if (verbose) {
-      if (parsed == len) {
+    if (verbose_) {
+      if (parsed == bytes) {
         fprintf(stderr,
                 "parse(%u) = %u, complete byte count indicates success\n",
-                (unsigned)len, (unsigned)len);
-      } else if (parsed > len) {
+                (unsigned)bytes, (unsigned)bytes);
+      } else if (parsed > bytes) {
         fprintf(stderr,
-                "parse(%u) = %u, long byte count indicates success and skip"
+                "parse(%u) = %u, long byte count indicates success and skip "
                 "of the next %u bytes\n",
-                (unsigned)len, (unsigned)parsed, (unsigned)(parsed - len));
+                (unsigned)bytes, (unsigned)parsed, (unsigned)(parsed - bytes));
       } else {
         fprintf(stderr,
                 "parse(%u) = %u, short byte count indicates failure; "
                 "last %u bytes were not consumed\n",
-                (unsigned)len, (unsigned)parsed, (unsigned)(len - parsed));
+                (unsigned)bytes, (unsigned)parsed, (unsigned)(bytes - parsed));
       }
     }
 
-    if (status->ok() != (parsed >= len)) {
-      if (status->ok()) {
+    if (status_.ok() != (parsed >= bytes)) {
+      if (status_.ok()) {
         fprintf(stderr,
                 "Error: decode function returned short byte count but set no "
                 "error status\n");
@@ -69,17 +100,55 @@ bool parse_buffer(upb::BytesSink* sink, void* subc, const char* buf,
                 "Error: decode function returned complete byte count but set "
                 "error status\n");
       }
-      fprintf(stderr, "Status: %s, parsed=%u, len=%u\n",
-              status->error_message(), (unsigned)parsed, (unsigned)len);
+      fprintf(stderr, "Status: %s, parsed=%u, bytes=%u\n",
+              status_.error_message(), (unsigned)parsed, (unsigned)bytes);
       ASSERT(false);
     }
 
-    if (!status->ok())
+    if (!status_.ok())
       return false;
 
-    *ofs += parsed;
+    if (parsed > bytes && skip_until_ >= 0) {
+      skip_until_ = ofs_ + parsed;
+    }
+
+    ofs_ += UPB_MIN(parsed, bytes);
+
+    return true;
+  }
+
+  void ResetBytesSink(upb::BytesSink* sink) {
+    sink_ = sink;
   }
-  return true;
-}
+
+  const upb::Status& status() { return status_; }
+
+  size_t ofs() { return ofs_; }
+  upb::Environment* env() { return &env_; }
+
+  bool SkippedWithNull() { return skip_until_ > 0; }
+
+ private:
+  upb::Environment env_;
+  upb::Status status_;
+  upb::BytesSink* sink_;
+  const char* buf_;
+  size_t len_;
+  bool verbose_;
+  size_t ofs_;
+  void *subc_;
+
+  // When our parse call returns a value greater than the number of bytes
+  // we passed in, the decoder is indicating to us that the next N bytes
+  // in the stream are not needed and can be skipped.  The user is allowed
+  // to pass a NULL buffer for those N bytes.
+  //
+  // skip_until_ is initially set to 0 if we should do this NULL-buffer
+  // skipping or -1 if we should not.  If we are open to doing NULL-buffer
+  // skipping and we get an opportunity to do it, we set skip_until to the
+  // stream offset where we can skip until.  The user can then test whether
+  // this happened by testing SkippedWithNull().
+  int skip_until_;
+};
 
 #endif
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index 905fdd1..34aed1f 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -60,6 +60,28 @@ static bool consumes_input(opcode op) {
   }
 }
 
+static size_t stacksize(upb_pbdecoder *d, size_t entries) {
+  UPB_UNUSED(d);
+  return entries * sizeof(upb_pbdecoder_frame);
+}
+
+static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
+  UPB_UNUSED(d);
+
+#ifdef UPB_USE_JIT_X64
+  if (d->method_->is_native_) {
+    /* Each native stack frame needs two pointers, plus we need a few frames for
+     * the enter/exit trampolines. */
+    size_t ret = entries * sizeof(void*) * 2;
+    ret += sizeof(void*) * 10;
+    return ret;
+  }
+#endif
+
+  return entries * sizeof(uint32_t*);
+}
+
+
 static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
 
 /* It's unfortunate that we have to micro-manage the compiler with
@@ -145,24 +167,66 @@ static void checkpoint(upb_pbdecoder *d) {
   d->checkpoint = d->ptr;
 }
 
+/* Skips "bytes" bytes in the stream, which may be more than available.  If we
+ * skip more bytes than are available, we return a long read count to the caller
+ * indicating how many bytes can be skipped over before passing actual data
+ * again.  Skipped bytes can pass a NULL buffer and the decoder guarantees they
+ * won't actually be read.
+ */
+static int32_t skip(upb_pbdecoder *d, size_t bytes) {
+  assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
+  if (curbufleft(d) > bytes) {
+    /* Skipped data is all in current buffer, and more is still available. */
+    advance(d, bytes);
+    d->skip = 0;
+    return DECODE_OK;
+  } else {
+    /* Skipped data extends beyond currently available buffers. */
+    d->pc = d->last;
+    d->skip = bytes - curbufleft(d);
+    d->bufstart_ofs += (d->end - d->buf);
+    d->residual_end = d->residual;
+    switchtobuf(d, d->residual, d->residual_end);
+    return d->size_param + d->skip;
+  }
+}
+
+
 /* Resumes the decoder from an initial state or from a previous suspend. */
 int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
                              size_t size, const upb_bufhandle *handle) {
   UPB_UNUSED(p);  /* Useless; just for the benefit of the JIT. */
+
   d->buf_param = buf;
   d->size_param = size;
   d->handle = handle;
+
   if (d->residual_end > d->residual) {
     /* We have residual bytes from the last buffer. */
     assert(d->ptr == d->residual);
   } else {
     switchtobuf(d, buf, buf + size);
   }
+
   d->checkpoint = d->ptr;
+
+  if (d->skip) {
+    CHECK_RETURN(skip(d, d->skip));
+    d->checkpoint = d->ptr;
+  }
+
+  if (!buf) {
+    /* NULL buf is ok if its entire span is covered by the "skip" above, but
+     * by this point we know that "skip" doesn't cover the buffer. */
+    seterr(d, "Passed NULL buffer over non-skippable region.");
+    return upb_pbdecoder_suspend(d);
+  }
+
   if (d->top->groupnum < 0) {
     CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
     d->checkpoint = d->ptr;
   }
+
   return DECODE_OK;
 }
 
@@ -222,28 +286,6 @@ static size_t suspend_save(upb_pbdecoder *d) {
   return d->size_param;
 }
 
-/* Skips "bytes" bytes in the stream, which may be more than available.  If we
- * skip more bytes than are available, we return a long read count to the caller
- * indicating how many bytes the caller should skip before passing a new buffer.
- */
-static int32_t skip(upb_pbdecoder *d, size_t bytes) {
-  assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
-  if (curbufleft(d) >= bytes) {
-    /* Skipped data is all in current buffer. */
-    advance(d, bytes);
-    return DECODE_OK;
-  } else {
-    /* Skipped data extends beyond currently available buffers. */
-    size_t skip;
-    d->pc = d->last;
-    skip = bytes - curbufleft(d);
-    d->bufstart_ofs += (d->end - d->buf) + skip;
-    d->residual_end = d->residual;
-    switchtobuf(d, d->residual, d->residual_end);
-    return d->size_param + skip;
-  }
-}
-
 /* Copies the next "bytes" bytes into "buf" and advances the stream.
  * Requires that this many bytes are available in the current buffer. */
 UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
@@ -618,18 +660,8 @@ upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
 
 /* The main decoder VM function.  Uses traditional bytecode dispatch loop with a
  * switch() statement. */
-size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
-                            size_t size, const upb_bufhandle *handle) {
-  upb_pbdecoder *d = closure;
-  const mgroup *group = hd;
-  int32_t result;
-  assert(buf);
-  result = upb_pbdecoder_resume(d, NULL, buf, size, handle);
-  if (result == DECODE_ENDGROUP) {
-    goto_endmsg(d);
-  }
-  CHECK_RETURN(result);
-  UPB_UNUSED(group);
+size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
+                      const upb_bufhandle* handle) {
 
 #define VMCASE(op, code) \
   case op: { code; if (consumes_input(op)) checkpoint(d); break; }
@@ -652,6 +684,7 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
     arg = instruction >> 8;
     longofs = arg;
     assert(d->ptr != d->residual_end);
+    UPB_UNUSED(group);
 #ifdef UPB_DUMP_BYTECODE
     fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
                     "%x %s (%d)\n",
@@ -827,12 +860,15 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
         CHECK_RETURN(dispatch(d));
       })
       VMCASE(OP_HALT, {
-        return size;
+        return d->size_param;
       })
     }
   }
 }
 
+
+/* BytesHandler handlers ******************************************************/
+
 void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
   upb_pbdecoder *d = closure;
   UPB_UNUSED(size_hint);
@@ -841,6 +877,7 @@ void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
   d->call_len = 1;
   d->callstack[0] = &halt;
   d->pc = pc;
+  d->skip = 0;
   return d;
 }
 
@@ -851,6 +888,7 @@ void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
   d->top->end_ofs = UINT64_MAX;
   d->bufstart_ofs = 0;
   d->call_len = 0;
+  d->skip = 0;
   return d;
 }
 
@@ -859,12 +897,9 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
   const upb_pbdecodermethod *method = handler_data;
   uint64_t end;
   char dummy;
-#ifdef UPB_USE_JIT_X64
-  const mgroup *group = (const mgroup*)method->group;
-#endif
 
   if (d->residual_end > d->residual) {
-    seterr(d, "Unexpected EOF");
+    seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
     return false;
   }
 
@@ -873,15 +908,15 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
     return false;
   }
 
-  /* Message ends here. */
+  /* The user's end() call indicates that the message ends here. */
   end = offset(d);
   d->top->end_ofs = end;
 
 #ifdef UPB_USE_JIT_X64
-  if (group->jit_code) {
+  if (method->group->jit_code) {
     if (d->top != d->stack)
       d->stack->end_ofs = 0;
-    group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
+    method->group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
   } else
 #endif
   {
@@ -901,13 +936,26 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
   }
 
   if (d->call_len != 0) {
-    seterr(d, "Unexpected EOF");
+    seterr(d, "Unexpected EOF inside submessage or group");
     return false;
   }
 
   return true;
 }
 
+size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
+                            size_t size, const upb_bufhandle *handle) {
+  int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
+
+  if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
+  CHECK_RETURN(result);
+
+  return run_decoder_vm(decoder, group, handle);
+}
+
+
+/* Public API *****************************************************************/
+
 void upb_pbdecoder_reset(upb_pbdecoder *d) {
   d->top = d->stack;
   d->top->groupnum = 0;
@@ -917,27 +965,6 @@ void upb_pbdecoder_reset(upb_pbdecoder *d) {
   d->residual_end = d->residual;
 }
 
-static size_t stacksize(upb_pbdecoder *d, size_t entries) {
-  UPB_UNUSED(d);
-  return entries * sizeof(upb_pbdecoder_frame);
-}
-
-static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
-  UPB_UNUSED(d);
-
-#ifdef UPB_USE_JIT_X64
-  if (d->method_->is_native_) {
-    /* Each native stack frame needs two pointers, plus we need a few frames for
-     * the enter/exit trampolines. */
-    size_t ret = entries * sizeof(void*) * 2;
-    ret += sizeof(void*) * 10;
-    return ret;
-  }
-#endif
-
-  return entries * sizeof(uint32_t*);
-}
-
 upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
                                     upb_sink *sink) {
   const size_t default_max_nesting = 64;
diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h
index 2d4485a..be5d044 100644
--- a/upb/pb/decoder.int.h
+++ b/upb/pb/decoder.int.h
@@ -225,6 +225,12 @@ struct upb_pbdecoder {
   char residual[12];
   char *residual_end;
 
+  /* Bytes of data that should be discarded from the input beore we start
+   * parsing again.  We set this when we internally determine that we can
+   * safely skip the next N bytes, but this region extends past the current
+   * user buffer. */
+  size_t skip;
+
   /* Stores the user buffer passed to our decode function. */
   const char *buf_param;
   size_t size_param;
diff --git a/upb/sink.h b/upb/sink.h
index e7d4960..765916e 100644
--- a/upb/sink.h
+++ b/upb/sink.h
@@ -272,7 +272,7 @@ UPB_INLINE bool upb_bufsrc_putbuf(const char *buf, size_t len,
   upb_bufhandle_setbuf(&handle, buf, 0);
   ret = upb_bytessink_start(sink, len, &subc);
   if (ret && len != 0) {
-    ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) == len);
+    ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len);
   }
   if (ret) {
     ret = upb_bytessink_end(sink);
-- 
cgit v1.2.3


From 7dcd017f4ed829b2ea4dd2d44165cd7ef0594057 Mon Sep 17 00:00:00 2001
From: Josh Haberman <jhaberman@gmail.com>
Date: Thu, 30 Jul 2015 17:17:09 -0700
Subject: Fixed PR for JIT-enabled builds.

---
 tests/test_util.h | 38 ++++++++++++++++++++++++++++++--------
 upb/pb/decoder.c  |  5 +++--
 upb/pb/decoder.h  |  2 +-
 3 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/tests/test_util.h b/tests/test_util.h
index 6408557..d2d30a4 100644
--- a/tests/test_util.h
+++ b/tests/test_util.h
@@ -13,8 +13,21 @@
 
 upb::BufferHandle global_handle;
 
+// A convenience class for parser tests.  Provides some useful features:
+//
+//   - can support multiple calls to parse, to test the parser's handling
+//     of buffer seams.
+//
+//   - can output verbose output about each parse call when requested, for
+//     ease of debugging.
+//
+//   - can pass NULL for skipped regions of the input if requested.
+//
+//   - allocates and passes a separate buffer for each parsed region, to
+//     ensure that the parser is not erroneously overreading its buffer.
 class VerboseParserEnvironment {
  public:
+  // Pass verbose=true to print detailed diagnostics to stderr.
   VerboseParserEnvironment(bool verbose) : verbose_(verbose) {
     env_.ReportErrorsTo(&status_);
   }
@@ -24,9 +37,21 @@ class VerboseParserEnvironment {
     len_ = len;
     ofs_ = 0;
     skip_until_ = may_skip ? 0 : -1;
+    skipped_with_null_ = false;
     status_.Clear();
   }
 
+  // The user should call a series of:
+  //
+  // Reset(buf, len, may_skip);
+  // Start()
+  // ParseBuffer(X);
+  // ParseBuffer(Y);
+  // // Repeat ParseBuffer as desired, but last call should pass -1.
+  // ParseBuffer(-1);
+  // End();
+
+
   bool Start() {
     return sink_->Start(len_, &subc_);
   }
@@ -35,12 +60,6 @@ class VerboseParserEnvironment {
     return sink_->End();
   }
 
-  // Puts a region of the given buffer [start, end) into the given sink (which
-  // probably represents a parser.  Can gracefully handle the case where the
-  // parser returns a "parsed" length that is less or greater than the input
-  // buffer length, and tracks the overall parse offset in *ofs.
-  //
-  // Pass verbose=true to print detailed diagnostics to stderr.
   bool ParseBuffer(int bytes) {
     if (bytes < 0) {
       bytes = len_ - ofs_;
@@ -53,7 +72,9 @@ class VerboseParserEnvironment {
     // reading outside the specified bounds.
     char *buf2 = NULL;
 
-    if ((int)(ofs_ + bytes) > skip_until_) {
+    if ((int)(ofs_ + bytes) <= skip_until_) {
+      skipped_with_null_ = true;
+    } else {
       buf2 = (char*)malloc(bytes);
       assert(buf2);
       memcpy(buf2, buf_ + ofs_, bytes);
@@ -126,7 +147,7 @@ class VerboseParserEnvironment {
   size_t ofs() { return ofs_; }
   upb::Environment* env() { return &env_; }
 
-  bool SkippedWithNull() { return skip_until_ > 0; }
+  bool SkippedWithNull() { return skipped_with_null_; }
 
  private:
   upb::Environment env_;
@@ -149,6 +170,7 @@ class VerboseParserEnvironment {
   // stream offset where we can skip until.  The user can then test whether
   // this happened by testing SkippedWithNull().
   int skip_until_;
+  bool skipped_with_null_;
 };
 
 #endif
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index 34aed1f..b624812 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -913,10 +913,11 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
   d->top->end_ofs = end;
 
 #ifdef UPB_USE_JIT_X64
-  if (method->group->jit_code) {
+  if (method->is_native_) {
+    const mgroup *group = (const mgroup*)method->group;
     if (d->top != d->stack)
       d->stack->end_ofs = 0;
-    method->group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
+    group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
   } else
 #endif
   {
diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h
index f28e5e6..65316b4 100644
--- a/upb/pb/decoder.h
+++ b/upb/pb/decoder.h
@@ -92,7 +92,7 @@ class upb::pb::DecoderMethod {
  * constructed.  This hint may be an overestimate for some build configurations.
  * But if the decoder library is upgraded without recompiling the application,
  * it may be an underestimate. */
-#define UPB_PB_DECODER_SIZE 4400
+#define UPB_PB_DECODER_SIZE 4408
 
 #ifdef __cplusplus
 
-- 
cgit v1.2.3