summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tests/json/test_json.cc6
-rw-r--r--tests/pb/test_decoder.cc100
-rw-r--r--tests/test_util.h55
-rw-r--r--upb/pb/compile_decoder.c7
-rw-r--r--upb/pb/decoder.c63
-rw-r--r--upb/pb/decoder.int.h4
6 files changed, 181 insertions, 54 deletions
diff --git a/tests/json/test_json.cc b/tests/json/test_json.cc
index c483cf0..b4346c5 100644
--- a/tests/json/test_json.cc
+++ b/tests/json/test_json.cc
@@ -295,17 +295,15 @@ void test_json_roundtrip_message(const char* json_src,
upb::json::Parser* parser =
upb::json::Parser::Create(env.env(), printer->input());
env.ResetBytesSink(parser->input());
- env.Reset(json_src, strlen(json_src), false);
+ env.Reset(json_src, strlen(json_src), false, false);
bool ok = env.Start() &&
env.ParseBuffer(seam) &&
env.ParseBuffer(-1) &&
env.End();
- if (!ok) {
- fprintf(stderr, "upb parse error: %s\n", env.status().error_message());
- }
ASSERT(ok);
+ ASSERT(env.CheckConsistency());
if (memcmp(json_expected,
data_sink.Data().data(),
diff --git a/tests/pb/test_decoder.cc b/tests/pb/test_decoder.cc
index fd9d9ae..94293cb 100644
--- a/tests/pb/test_decoder.cc
+++ b/tests/pb/test_decoder.cc
@@ -32,6 +32,7 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
+#include <sstream>
#include "tests/test_util.h"
#include "tests/upb_test.h"
@@ -176,6 +177,13 @@ string cat(const string& a, const string& b,
return ret;
}
+template <typename T>
+string num2string(T num) {
+ std::ostringstream ss;
+ ss << num;
+ return ss.str();
+}
+
string varint(uint64_t x) {
char buf[UPB_PB_VARINT_MAX_LEN];
size_t len = upb_vencode64(x, buf);
@@ -202,6 +210,11 @@ string submsg(uint32_t fn, const string& buf) {
return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf) );
}
+string group(uint32_t fn, const string& buf) {
+ return cat(tag(fn, UPB_WIRE_TYPE_START_GROUP), buf,
+ tag(fn, UPB_WIRE_TYPE_END_GROUP));
+}
+
// Like delim()/submsg(), but intentionally encodes an incorrect length.
// These help test when a delimited boundary doesn't land in the right place.
string badlen_delim(int err, const string& buf) {
@@ -558,9 +571,9 @@ uint32_t Hash(const string& proto, const string* expected_output, size_t seam1,
void CheckBytesParsed(const upb::pb::Decoder& decoder, size_t ofs) {
// We could have parsed as many as 10 bytes fewer than what the decoder
- // previously accepted, since we can buffer up to 10 partial bytes internally
+ // previously accepted, since we can buffer up to 12 partial bytes internally
// before accumulating an entire value.
- const int MAX_BUFFERED = 10;
+ const int MAX_BUFFERED = 14;
// We can't have parsed more data than the decoder callback is telling us it
// parsed.
@@ -582,7 +595,7 @@ static bool parse(VerboseParserEnvironment* env,
void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
const string& proto, const string* expected_output,
size_t i, size_t j, bool may_skip) {
- env->Reset(proto.c_str(), proto.size(), may_skip);
+ env->Reset(proto.c_str(), proto.size(), may_skip, expected_output == NULL);
decoder->Reset();
testhash = Hash(proto, expected_output, i, j, may_skip);
@@ -598,7 +611,14 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
PrintBinary(proto);
fprintf(stderr, "\n");
if (expected_output) {
- fprintf(stderr, "Expected output: %s\n", expected_output->c_str());
+ if (test_mode == ALL_HANDLERS) {
+ fprintf(stderr, "Expected output: %s\n", expected_output->c_str());
+ } else if (test_mode == NO_HANDLERS) {
+ fprintf(stderr,
+ "No handlers are registered, BUT if they were "
+ "the expected output would be: %s\n",
+ expected_output->c_str());
+ }
} else {
fprintf(stderr, "Expected to FAIL\n");
}
@@ -610,7 +630,7 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
parse(env, *decoder, -1) &&
env->End();
- ASSERT(ok == env->status().ok());
+ ASSERT(env->CheckConsistency());
if (test_mode == ALL_HANDLERS) {
if (expected_output) {
@@ -618,18 +638,12 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
fprintf(stderr, "Text mismatch: '%s' vs '%s'\n",
output.c_str(), expected_output->c_str());
}
- if (!ok) {
- fprintf(stderr, "Failed: %s\n", env->status().error_message());
- }
ASSERT(ok);
ASSERT(output == *expected_output);
} else {
if (ok) {
fprintf(stderr, "Didn't expect ok result, but got output: '%s'\n",
output.c_str());
- } else if (filter_hash) {
- fprintf(stderr, "Failed as we expected, with message: %s\n",
- env->status().error_message());
}
ASSERT(!ok);
}
@@ -657,6 +671,25 @@ void run_decoder(const string& proto, const string* expected_output) {
const static string thirty_byte_nop = cat(
tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(string(30, 'X')) );
+// Indents and wraps text as if it were a submessage with this field number
+string wrap_text(int32_t fn, const string& text) {
+ string wrapped_text = text;
+ size_t pos = 0;
+ string replace_with = "\n ";
+ while ((pos = wrapped_text.find("\n", pos)) != string::npos &&
+ pos != wrapped_text.size() - 1) {
+ wrapped_text.replace(pos, 1, replace_with);
+ pos += replace_with.size();
+ }
+ wrapped_text = cat(
+ LINE("<"),
+ num2string(fn), LINE(":{")
+ " ", wrapped_text,
+ LINE("}")
+ LINE(">"));
+ return wrapped_text;
+}
+
void assert_successful_parse(const string& proto,
const char *expected_fmt, ...) {
string expected_text;
@@ -668,16 +701,27 @@ void assert_successful_parse(const string& proto,
// repeat once with no-op padding data at the end of buffer.
run_decoder(proto, &expected_text);
run_decoder(cat( proto, thirty_byte_nop ), &expected_text);
+
+ // Test that this also works when wrapped in a submessage or group.
+ // Indent the expected text one level and wrap it.
+ string wrapped_text1 = wrap_text(UPB_DESCRIPTOR_TYPE_MESSAGE, expected_text);
+ string wrapped_text2 = wrap_text(UPB_DESCRIPTOR_TYPE_GROUP, expected_text);
+
+ run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), &wrapped_text1);
+ run_decoder(group(UPB_DESCRIPTOR_TYPE_GROUP, proto), &wrapped_text2);
}
void assert_does_not_parse_at_eof(const string& proto) {
run_decoder(proto, NULL);
// Also test that we fail to parse at end-of-submessage, not just
- // end-of-message.
- run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), NULL);
- run_decoder(cat(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), thirty_byte_nop),
- NULL);
+ // end-of-message. But skip this if we have no handlers, because in that
+ // case we won't descend into the submessage.
+ if (test_mode != NO_HANDLERS) {
+ run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), NULL);
+ run_decoder(cat(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto),
+ thirty_byte_nop), NULL);
+ }
}
void assert_does_not_parse(const string& proto) {
@@ -905,11 +949,13 @@ void test_invalid() {
submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string(" "))));
// Test exceeding the resource limit of stack depth.
- string buf;
- for (int i = 0; i <= MAX_NESTING; i++) {
- buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf));
+ if (test_mode != NO_HANDLERS) {
+ string buf;
+ for (int i = 0; i <= MAX_NESTING; i++) {
+ buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf));
+ }
+ assert_does_not_parse(buf);
}
- assert_does_not_parse(buf);
}
void test_valid() {
@@ -987,6 +1033,15 @@ void test_valid() {
// Unknown field inside a known submessage.
assert_successful_parse(
+ submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string(" "))),
+ LINE("<")
+ LINE("%u:{")
+ LINE(" <")
+ LINE(" >")
+ LINE("}")
+ LINE(">"), UPB_DESCRIPTOR_TYPE_MESSAGE);
+
+ assert_successful_parse(
cat (submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string(" "))),
tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT),
varint(5)),
@@ -1162,7 +1217,9 @@ void test_valid() {
indentbuf(&textbuf, total - i - 1);
textbuf.append(">\n");
}
- assert_successful_parse(buf, "%s", textbuf.c_str());
+ // Have to use run_decoder directly, because we are at max nesting and can't
+ // afford the extra nesting that assert_successful_parse() will do.
+ run_decoder(buf, &textbuf);
}
upb::reffed_ptr<const upb::pb::DecoderMethod> NewMethod(
@@ -1207,10 +1264,11 @@ upb::reffed_ptr<const upb::pb::DecoderMethod> method =
upb::Sink sink(method->dest_handlers(), &closures[0]);
upb::pb::Decoder* decoder = CreateDecoder(env.env(), method.get(), &sink);
env.ResetBytesSink(decoder->input());
- env.Reset(testdata[i].data, testdata[i].length, true);
+ env.Reset(testdata[i].data, testdata[i].length, true, false);
ASSERT(env.Start());
ASSERT(env.ParseBuffer(-1));
ASSERT(env.End());
+ ASSERT(env.CheckConsistency());
}
}
diff --git a/tests/test_util.h b/tests/test_util.h
index f3d5d5a..caf9ee8 100644
--- a/tests/test_util.h
+++ b/tests/test_util.h
@@ -29,16 +29,35 @@ class VerboseParserEnvironment {
public:
// Pass verbose=true to print detailed diagnostics to stderr.
VerboseParserEnvironment(bool verbose) : verbose_(verbose) {
- env_.ReportErrorsTo(&status_);
+ env_.SetErrorFunction(&VerboseParserEnvironment::OnError, this);
}
- void Reset(const char *buf, size_t len, bool may_skip) {
+ static bool OnError(void *ud, const upb::Status* status) {
+ VerboseParserEnvironment* env = static_cast<VerboseParserEnvironment*>(ud);
+
+ env->saw_error_ = true;
+
+ if (env->expect_error_ && env->verbose_) {
+ fprintf(stderr, "Encountered error, as expected: ");
+ } else if (!env->expect_error_) {
+ fprintf(stderr, "Encountered unexpected error: ");
+ } else {
+ return false;
+ }
+
+ fprintf(stderr, "%s\n", status->error_message());
+ return false;
+ }
+
+ void Reset(const char *buf, size_t len, bool may_skip, bool expect_error) {
buf_ = buf;
len_ = len;
ofs_ = 0;
+ expect_error_ = expect_error;
+ saw_error_ = false;
+ end_ok_set_ = false;
skip_until_ = may_skip ? 0 : -1;
skipped_with_null_ = false;
- status_.Clear();
}
// The user should call a series of:
@@ -63,7 +82,26 @@ class VerboseParserEnvironment {
if (verbose_) {
fprintf(stderr, "Calling end()\n");
}
- return sink_->End();
+ end_ok_ = sink_->End();
+ end_ok_set_ = true;
+
+ return end_ok_;
+ }
+
+ bool CheckConsistency() {
+ /* If we called end (which we should only do when previous bytes are fully
+ * accepted), then end() should return true iff there were no errors. */
+ if (end_ok_set_ && end_ok_ != !saw_error_) {
+ fprintf(stderr, "End() status and saw_error didn't match.\n");
+ return false;
+ }
+
+ if (expect_error_ && !saw_error_) {
+ fprintf(stderr, "Expected error but saw none.\n");
+ return false;
+ }
+
+ return true;
}
bool ParseBuffer(int bytes) {
@@ -117,7 +155,7 @@ class VerboseParserEnvironment {
}
}
- if (!status_.ok())
+ if (saw_error_)
return false;
if (parsed > bytes && skip_until_ >= 0) {
@@ -133,8 +171,6 @@ class VerboseParserEnvironment {
sink_ = sink;
}
- const upb::Status& status() { return status_; }
-
size_t ofs() { return ofs_; }
upb::Environment* env() { return &env_; }
@@ -142,13 +178,16 @@ class VerboseParserEnvironment {
private:
upb::Environment env_;
- upb::Status status_;
upb::BytesSink* sink_;
const char* buf_;
size_t len_;
bool verbose_;
size_t ofs_;
void *subc_;
+ bool expect_error_;
+ bool saw_error_;
+ bool end_ok_;
+ bool end_ok_set_;
// When our parse call returns a value greater than the number of bytes
// we passed in, the decoder is indicating to us that the next N bytes
diff --git a/upb/pb/compile_decoder.c b/upb/pb/compile_decoder.c
index 2828247..b75f45c 100644
--- a/upb/pb/compile_decoder.c
+++ b/upb/pb/compile_decoder.c
@@ -596,7 +596,12 @@ static void generate_msgfield(compiler *c, const upb_fielddef *f,
if (!sub_m) {
/* Don't emit any code for this field at all; it will be parsed as an
- * unknown field. */
+ * unknown field.
+ *
+ * TODO(haberman): we should change this to parse it as a string field
+ * instead. It will probably be faster, but more importantly, once we
+ * start vending unknown fields, a field shouldn't be treated as unknown
+ * just because it doesn't have subhandlers registered. */
return;
}
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index 7eca6d7..e2e79ae 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -36,6 +36,11 @@ static const char *kUnterminatedVarint = "Unterminated varint.";
static opcode halt = OP_HALT;
+/* A dummy character we can point to when the user passes us a NULL buffer.
+ * We need this because in C (NULL + 0) and (NULL - NULL) are undefined
+ * behavior, which would invalidate functions like curbufleft(). */
+static const char dummy_char;
+
/* Whether an op consumes any of the input buffer. */
static bool consumes_input(opcode op) {
switch (op) {
@@ -191,7 +196,7 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) {
if (bytes > delim_remaining(d)) {
seterr(d, "Skipped value extended beyond enclosing submessage.");
return upb_pbdecoder_suspend(d);
- } else if (bufleft(d) > bytes) {
+ } else if (bufleft(d) >= bytes) {
/* Skipped data is all in current buffer, and more is still available. */
advance(d, bytes);
d->skip = 0;
@@ -213,10 +218,39 @@ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
size_t size, const upb_bufhandle *handle) {
UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */
- d->buf_param = buf;
+ /* d->skip and d->residual_end could probably elegantly be represented
+ * as a single variable, to more easily represent this invariant. */
+ assert(!(d->skip && d->residual_end > d->residual));
+
+ /* We need to remember the original size_param, so that the value we return
+ * is relative to it, even if we do some skipping first. */
d->size_param = size;
d->handle = handle;
+ /* Have to handle this case specially (ie. not with skip()) because the user
+ * is allowed to pass a NULL buffer here, which won't allow us to safely
+ * calculate a d->end or use our normal functions like curbufleft(). */
+ if (d->skip && d->skip >= size) {
+ d->skip -= size;
+ d->bufstart_ofs += size;
+ buf = &dummy_char;
+ size = 0;
+
+ /* We can't just return now, because we might need to execute some ops
+ * like CHECKDELIM, which could call some callbacks and pop the stack. */
+ }
+
+ /* We need to pretend that this was the actual buffer param, since some of the
+ * calculations assume that d->ptr/d->buf is relative to this. */
+ d->buf_param = buf;
+
+ if (!buf) {
+ /* NULL buf is ok if its entire span is covered by the "skip" above, but
+ * by this point we know that "skip" doesn't cover the buffer. */
+ seterr(d, "Passed NULL buffer over non-skippable region.");
+ return upb_pbdecoder_suspend(d);
+ }
+
if (d->residual_end > d->residual) {
/* We have residual bytes from the last buffer. */
assert(d->ptr == d->residual);
@@ -226,23 +260,18 @@ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
d->checkpoint = d->ptr;
+ /* Handle skips that don't cover the whole buffer (as above). */
if (d->skip) {
size_t skip_bytes = d->skip;
d->skip = 0;
CHECK_RETURN(skip(d, skip_bytes));
- d->checkpoint = d->ptr;
- }
-
- if (!buf) {
- /* NULL buf is ok if its entire span is covered by the "skip" above, but
- * by this point we know that "skip" doesn't cover the buffer. */
- seterr(d, "Passed NULL buffer over non-skippable region.");
- return upb_pbdecoder_suspend(d);
+ checkpoint(d);
}
+ /* If we're inside an unknown group, continue to parse unknown values. */
if (d->top->groupnum < 0) {
CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
- d->checkpoint = d->ptr;
+ checkpoint(d);
}
return DECODE_OK;
@@ -257,15 +286,14 @@ size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
d->ptr = d->residual;
return 0;
} else {
- size_t consumed;
+ size_t ret = d->size_param - (d->end - d->checkpoint);
assert(!in_residual_buf(d, d->checkpoint));
- assert(d->buf == d->buf_param);
+ assert(d->buf == d->buf_param || d->buf == &dummy_char);
- consumed = d->checkpoint - d->buf;
- d->bufstart_ofs += consumed;
+ d->bufstart_ofs += (d->checkpoint - d->buf);
d->residual_end = d->residual;
switchtobuf(d, d->residual, d->residual_end);
- return consumed;
+ return ret;
}
}
@@ -383,8 +411,7 @@ UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
int bitpos;
*u64 = 0;
for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
- int32_t ret = getbytes(d, &byte, 1);
- if (ret >= 0) return ret;
+ CHECK_RETURN(getbytes(d, &byte, 1));
*u64 |= (uint64_t)(byte & 0x7F) << bitpos;
}
if(bitpos == 70 && (byte & 0x80)) {
diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h
index a63f74b..b7e462b 100644
--- a/upb/pb/decoder.int.h
+++ b/upb/pb/decoder.int.h
@@ -220,9 +220,9 @@ struct upb_pbdecoder {
/* Buffer for residual bytes not parsed from the previous buffer.
* The maximum number of residual bytes we require is 12; a five-byte
- * unknown tag plus an eight-byte value, less one because the value
+ * unknown tag plus a ten-byte value, less one because the value
* is only a partial value. */
- char residual[12];
+ char residual[14];
char *residual_end;
/* Bytes of data that should be discarded from the input beore we start
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback