summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile16
-rw-r--r--benchmarks/parsestream.upb.c3
-rw-r--r--benchmarks/parsetoproto2.upb.cc24
-rw-r--r--benchmarks/parsetostruct.upb.c8
-rw-r--r--bindings/cpp/upb/bytestream.cc39
-rw-r--r--bindings/cpp/upb/bytestream.hpp238
-rw-r--r--bindings/cpp/upb/def.hpp77
-rw-r--r--bindings/cpp/upb/handlers.hpp95
-rw-r--r--bindings/cpp/upb/pb/decoder.hpp83
-rw-r--r--bindings/cpp/upb/upb.hpp23
-rw-r--r--examples/stream_transcode.c76
-rw-r--r--tests/test_cpp.cc15
-rw-r--r--tests/test_decoder.c700
-rw-r--r--tests/test_varint.c40
-rw-r--r--tests/test_vs_proto2.cc27
-rw-r--r--tests/tests.c10
-rw-r--r--upb/bytestream.c95
-rw-r--r--upb/bytestream.h97
-rw-r--r--upb/def.c2
-rw-r--r--upb/handlers.c140
-rw-r--r--upb/handlers.h69
-rw-r--r--upb/msg.c6
-rw-r--r--upb/pb/decoder.c361
-rw-r--r--upb/pb/decoder.h102
-rw-r--r--upb/pb/decoder_x64.dasc322
-rw-r--r--upb/pb/glue.c33
-rw-r--r--upb/pb/glue.h4
-rw-r--r--upb/pb/varint.h40
-rw-r--r--upb/table.h9
-rw-r--r--upb/upb.c101
-rw-r--r--upb/upb.h37
31 files changed, 2118 insertions, 774 deletions
diff --git a/Makefile b/Makefile
index bebe023..5320876 100644
--- a/Makefile
+++ b/Makefile
@@ -162,13 +162,9 @@ upb/pb/jit_debug_elf_file.o: upb/pb/jit_debug_elf_file.s
$(E) GAS $<
$(Q) gcc -c upb/pb/jit_debug_elf_file.s -o upb/pb/jit_debug_elf_file.o
-upb/pb/jit_debug_elf_file2.o: upb/pb/jit_debug_elf_file.o
- $(E) OBJCOPY $<
- $(Q) objcopy --change-section-address .text=0x12345678 $< $@
-
-upb/pb/jit_debug_elf_file.h: upb/pb/jit_debug_elf_file2.o
+upb/pb/jit_debug_elf_file.h: upb/pb/jit_debug_elf_file.o
$(E) XXD $<
- $(Q) xxd -i < upb/pb/jit_debug_elf_file2.o > upb/pb/jit_debug_elf_file.h
+ $(Q) xxd -i < upb/pb/jit_debug_elf_file.o > upb/pb/jit_debug_elf_file.h
upb/pb/decoder_x64.h: upb/pb/jit_debug_elf_file.h
endif
@@ -232,15 +228,13 @@ VALGRIND=valgrind --leak-check=full --error-exitcode=1
test: tests
@echo Running all tests under valgrind.
@set -e # Abort on error.
- @for test in $(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS); do \
+ @for test in $(TESTS); do \
if [ -x ./$$test ] ; then \
echo !!! $(VALGRIND) ./$$test; \
- $(VALGRIND) ./$$test tests/test.proto.pb || exit 1; \
+ $(VALGRIND) ./$$test || exit 1; \
fi \
done; \
- $(VALGRIND) ./tests/t.test_vs_proto2.googlemessage1 benchmarks/google_messages.proto.pb benchmarks/google_message1.dat
- $(VALGRIND) ./tests/t.test_vs_proto2.googlemessage2 benchmarks/google_messages.proto.pb benchmarks/google_message2.dat
- @echo "All tests passed!"
+ echo "All tests passed!"
tests/t.test_vs_proto2.googlemessage1 \
tests/t.test_vs_proto2.googlemessage2: \
diff --git a/benchmarks/parsestream.upb.c b/benchmarks/parsestream.upb.c
index 19d8ccf..4d13e9d 100644
--- a/benchmarks/parsestream.upb.c
+++ b/benchmarks/parsestream.upb.c
@@ -76,7 +76,8 @@ static size_t run(int i)
(void)i;
upb_status status = UPB_STATUS_INIT;
upb_stringsrc_reset(&stringsrc, input_str, input_len);
- upb_decoder_reset(&decoder, upb_stringsrc_allbytes(&stringsrc), NULL);
+ upb_decoder_reset(&decoder, upb_stringsrc_bytesrc(&stringsrc),
+ 0, UPB_NONDELIMITED, NULL);
upb_decoder_decode(&decoder, &status);
if(!upb_ok(&status)) goto err;
return input_len;
diff --git a/benchmarks/parsetoproto2.upb.cc b/benchmarks/parsetoproto2.upb.cc
index 03a1039..75cd10c 100644
--- a/benchmarks/parsetoproto2.upb.cc
+++ b/benchmarks/parsetoproto2.upb.cc
@@ -24,7 +24,6 @@
#include <google/protobuf/descriptor.h>
#undef private
-char *str;
static size_t len;
MESSAGE_CIDENT msg[NUM_MESSAGES];
MESSAGE_CIDENT msg2;
@@ -54,13 +53,9 @@ upb_flow_t proto2_setstr(void *m, upb_value fval, upb_value val) {
const upb_fielddef *f = upb_value_getfielddef(fval);
std::string **str = (std::string**)UPB_INDEX(m, f->offset, 1);
if (*str == f->default_ptr) *str = new std::string;
- const upb_byteregion *ref = upb_value_getbyteregion(val);
- uint32_t len;
- (*str)->assign(
- upb_byteregion_getptr(ref, upb_byteregion_startofs(ref), &len),
- upb_byteregion_len(ref));
- assert(len == upb_byteregion_len(ref));
+ const upb_strref *ref = upb_value_getstrref(val);
// XXX: only supports contiguous strings atm.
+ (*str)->assign(ref->ptr, ref->len);
return UPB_CONTINUE;
}
@@ -69,13 +64,9 @@ upb_flow_t proto2_append_str(void *_r, upb_value fval, upb_value val) {
typedef google::protobuf::RepeatedPtrField<std::string> R;
(void)fval;
R *r = (R*)_r;
- const upb_byteregion *ref = upb_value_getbyteregion(val);
+ const upb_strref *ref = upb_value_getstrref(val);
// XXX: only supports contiguous strings atm.
- uint32_t len;
- r->Add()->assign(
- upb_byteregion_getptr(ref, upb_byteregion_startofs(ref), &len),
- upb_byteregion_len(ref));
- assert(len == upb_byteregion_len(ref));
+ r->Add()->assign(ref->ptr, ref->len);
return UPB_CONTINUE;
}
@@ -274,7 +265,7 @@ static bool initialize()
upb_symtab_unref(s);
// Read the message data itself.
- str = upb_readfile(MESSAGE_FILE, &len);
+ char *str = upb_readfile(MESSAGE_FILE, &len);
if(str == NULL) {
fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
return false;
@@ -284,6 +275,7 @@ static bool initialize()
msg2.ParseFromArray(str, len);
upb_stringsrc_init(&strsrc);
+ upb_stringsrc_reset(&strsrc, str, len);
upb_handlers *h = upb_handlers_new();
upb_accessors_reghandlers(h, def);
if (!JIT) h->should_jit = false;
@@ -304,8 +296,8 @@ static size_t run(int i)
(void)i;
upb_status status = UPB_STATUS_INIT;
msg[i % NUM_MESSAGES].Clear();
- upb_stringsrc_reset(&strsrc, str, len);
- upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), &msg[i % NUM_MESSAGES]);
+ upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc),
+ 0, UPB_NONDELIMITED, &msg[i % NUM_MESSAGES]);
upb_decoder_decode(&d, &status);
if(!upb_ok(&status)) goto err;
return len;
diff --git a/benchmarks/parsetostruct.upb.c b/benchmarks/parsetostruct.upb.c
index 4eeafbb..5e7aa35 100644
--- a/benchmarks/parsetostruct.upb.c
+++ b/benchmarks/parsetostruct.upb.c
@@ -8,7 +8,6 @@
#include "upb/pb/glue.h"
static const upb_msgdef *def;
-char *str;
static size_t len;
static void *msg[NUM_MESSAGES];
static upb_stringsrc strsrc;
@@ -34,7 +33,7 @@ static bool initialize()
upb_symtab_unref(s);
// Read the message data itself.
- str = upb_readfile(MESSAGE_FILE, &len);
+ char *str = upb_readfile(MESSAGE_FILE, &len);
if(str == NULL) {
fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
return false;
@@ -44,6 +43,7 @@ static bool initialize()
msg[i] = upb_stdmsg_new(def);
upb_stringsrc_init(&strsrc);
+ upb_stringsrc_reset(&strsrc, str, len);
upb_handlers *h = upb_handlers_new();
upb_accessors_reghandlers(h, def);
if (!JIT) h->should_jit = false;
@@ -70,8 +70,8 @@ static size_t run(int i)
upb_status status = UPB_STATUS_INIT;
i %= NUM_MESSAGES;
upb_msg_clear(msg[i], def);
- upb_stringsrc_reset(&strsrc, str, len);
- upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), msg[i]);
+ upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc),
+ 0, UPB_NONDELIMITED, msg[i]);
upb_decoder_decode(&d, &status);
if(!upb_ok(&status)) goto err;
return len;
diff --git a/bindings/cpp/upb/bytestream.cc b/bindings/cpp/upb/bytestream.cc
new file mode 100644
index 0000000..df0797e
--- /dev/null
+++ b/bindings/cpp/upb/bytestream.cc
@@ -0,0 +1,39 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc. See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+
+#include "bytestream.hpp"
+
+namespace upb {
+
+upb_bytesrc_vtbl* ByteSourceBase::vtable() {
+ static upb_bytesrc_vtbl vtbl = {
+ &ByteSourceBase::VFetch,
+ &ByteSourceBase::VDiscard,
+ &ByteSourceBase::VCopy,
+ &ByteSourceBase::VGetPtr,
+ };
+ return &vtbl;
+}
+
+upb_bytesuccess_t ByteSourceBase::VFetch(void *src, uint64_t ofs, size_t *len) {
+ return static_cast<ByteSourceBase*>(src)->Fetch(ofs, len);
+}
+
+void ByteSourceBase::VCopy(
+ const void *src, uint64_t ofs, size_t len, char* dest) {
+ static_cast<const ByteSourceBase*>(src)->Copy(ofs, len, dest);
+}
+
+void ByteSourceBase::VDiscard(void *src, uint64_t ofs) {
+ static_cast<ByteSourceBase*>(src)->Discard(ofs);
+}
+
+const char * ByteSourceBase::VGetPtr(
+ const void *src, uint64_t ofs, size_t* len) {
+ return static_cast<const ByteSourceBase*>(src)->GetPtr(ofs, len);
+}
+
+} // namespace upb
diff --git a/bindings/cpp/upb/bytestream.hpp b/bindings/cpp/upb/bytestream.hpp
new file mode 100644
index 0000000..968d542
--- /dev/null
+++ b/bindings/cpp/upb/bytestream.hpp
@@ -0,0 +1,238 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc. See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// This file defines three core interfaces:
+// - upb::ByteSink: for writing streams of data.
+// - upb::ByteSource: for reading streams of data.
+// - upb::ByteRegion: for reading from a specific region of a ByteSource;
+// should be used by decoders instead of using a ByteSource directly.
+//
+// These interfaces are used by streaming encoders and decoders: for example, a
+// protobuf parser gets its input from a upb::ByteRegion. They are virtual
+// base classes so concrete implementations can get the data from a fd, a
+// FILE*, a string, etc.
+//
+// A ByteRegion represents a region of data from a ByteSource.
+//
+// Parsers get data from this interface instead of a bytesrc because we often
+// want to parse only a specific region of the input. For example, if we parse
+// a string from our input but know that the string represents a protobuf, we
+// can pass its ByteRegion to an appropriate protobuf parser.
+//
+// Since the bytes may be coming from a file or network socket, bytes must be
+// fetched before they can be read (though in some cases this fetch may be a
+// no-op). "fetch" is the only operation on a byteregion that could fail or
+// block, because it is the only operation that actually performs I/O.
+//
+// Bytes can be discarded when they are no longer needed. Parsers should
+// always discard bytes they no longer need, both so the buffers can be freed
+// when possible and to give better visibility into what bytes the parser is
+// still using.
+//
+// start discard read fetch end
+// ofs ofs ofs ofs ofs
+// | |--->Discard() | |--->Fetch() |
+// V V V V V
+// +-------------+-------------------------+-----------------+-----------------+
+// | discarded | | | fetchable |
+// +-------------+-------------------------+-----------------+-----------------+
+// | <------------- loaded ------------------> |
+// | <- available -> |
+// | <---------- remaining ----------> |
+//
+// Note that the start offset may be something other than zero! A byteregion
+// is a view into an underlying bytesrc stream, and the region may start
+// somewhere other than the beginning of that stream.
+//
+// The region can be either delimited or nondelimited. A non-delimited region
+// will keep returning data until the underlying data source returns EOF. A
+// delimited region will return EOF at a predetermined offset.
+//
+// end
+// ofs
+// |
+// V
+// +-----------------------+
+// | delimited region | <-- hard EOF, even if data source has more data.
+// +-----------------------+
+//
+// +------------------------
+// | nondelimited region Z <-- won't return EOF until data source hits EOF.
+// +------------------------
+
+#ifndef UPB_BYTESTREAM_HPP
+#define UPB_BYTESTREAM_HPP
+
+#include "upb/bytestream.h"
+#include "upb/upb.hpp"
+
+namespace upb {
+
+typedef upb_bytesuccess_t ByteSuccess;
+
+// Implement this interface to vend bytes to ByteRegions which will be used by
+// a decoder.
+class ByteSourceBase : public upb_bytesrc {
+ public:
+ ByteSourceBase() { upb_bytesrc_init(this, vtable()); }
+ virtual ~ByteSourceBase() { upb_bytesrc_uninit(this); }
+
+ // Fetches at least one byte starting at ofs, setting *len to the actual
+ // number of bytes fetched (or 0 on EOF or error: see return value for
+ // details). It is valid for bytes to be fetched multiple times, as long as
+ // the bytes have not been previously discarded.
+ virtual ByteSuccess Fetch(uint64_t ofs, size_t* len) = 0;
+
+ // Discards all data prior to ofs (except data that is pinned, if pinning
+ // support is added -- see TODO below).
+ virtual void Discard(uint64_t ofs) = 0;
+
+ // Copies "len" bytes of data from ofs to "dst", which must be at least "len"
+ // bytes long. The given region must not be discarded.
+ virtual void Copy(uint64_t ofs, size_t len, char *dst) const = 0;
+
+ // Returns a pointer to the bytesrc's internal buffer, storing in *len how
+ // much data is available. The given offset must not be discarded. The
+ // returned buffer is valid for as long as its bytes are not discarded (in
+ // the case that part of the returned buffer is discarded, only the
+ // non-discarded bytes remain valid).
+ virtual const char *GetPtr(uint64_t ofs, size_t *len) const = 0;
+
+ // TODO: Add if/when there is a demonstrated need:
+ //
+ // // When the caller pins a region (which must not be already discarded), it
+ // // is guaranteed that the region will not be discarded (nor will the
+ // // bytesrc be destroyed) until the region is unpinned. However, not all
+ // // bytesrc's support pinning; a false return indicates that a pin was not
+ // // possible.
+ // virtual bool Pin(uint64_t ofs, size_t len);
+ //
+ // // Releases some number of pinned bytes from the beginning of a pinned
+ // // region (which may be fewer than the total number of bytes pinned).
+ // virtual void Unpin(uint64_t ofs, size_t len, size_t bytes_to_release);
+ //
+ // Adding pinning support would also involve adding a "pin_ofs" parameter to
+ // upb_bytesrc_fetch, so that the fetch can extend an already-pinned region.
+ private:
+ static upb_bytesrc_vtbl* vtable();
+ static upb_bytesuccess_t VFetch(void*, uint64_t, size_t*);
+ static void VDiscard(void*, uint64_t);
+ static void VCopy(const void*, uint64_t, size_t, char*);
+ static const char *VGetPtr(const void*, uint64_t, size_t*);
+};
+
+class ByteRegion : public upb_byteregion {
+ public:
+ static const uint64_t kNondelimited = UPB_NONDELIMITED;
+
+ ByteRegion() { upb_byteregion_init(this); }
+ ~ByteRegion() { upb_byteregion_uninit(this); }
+
+ // Accessors for the regions bounds -- the meaning of these is described in
+ // the diagram above.
+ uint64_t start_ofs() const { return upb_byteregion_startofs(this); }
+ uint64_t discard_ofs() const { return upb_byteregion_discardofs(this); }
+ uint64_t fetch_ofs() const { return upb_byteregion_fetchofs(this); }
+ uint64_t end_ofs() const { return upb_byteregion_endofs(this); }
+
+ // Returns how many bytes are fetched and available for reading starting from
+ // offset "offset".
+ uint64_t BytesAvailable(uint64_t offset) const {
+ return upb_byteregion_available(this, offset);
+ }
+
+ // Returns the total number of bytes remaining after offset "offset", or
+ // kNondelimited if the byteregion is non-delimited.
+ uint64_t BytesRemaining(uint64_t offset) const {
+ return upb_byteregion_remaining(this, offset);
+ }
+
+ uint64_t Length() const { return upb_byteregion_len(this); }
+
+ // Sets the value of this byteregion to be a subset of the given byteregion's
+ // data. The caller is responsible for releasing this region before the src
+ // region is released (unless the region is first pinned, if pinning support
+ // is added. see below).
+ void Reset(const upb_byteregion *src, uint64_t ofs, uint64_t len) {
+ upb_byteregion_reset(this, src, ofs, len);
+ }
+ void Release() { upb_byteregion_release(this); }
+
+ // Attempts to fetch more data, extending the fetched range of this
+ // byteregion. Returns true if the fetched region was extended by at least
+ // one byte, false on EOF or error (see *s for details).
+ ByteSuccess Fetch() { return upb_byteregion_fetch(this); }
+
+ // Fetches all remaining data, returning false if the operation failed (see
+ // *s for details). May only be used on delimited byteregions.
+ ByteSuccess FetchAll() { return upb_byteregion_fetchall(this); }
+
+ // Discards bytes from the byteregion up until ofs (which must be greater or
+ // equal to discard_ofs()). It is valid to discard bytes that have not been
+ // fetched (such bytes will never be fetched) but it is an error to discard
+ // past the end of a delimited byteregion.
+ void Discard(uint64_t ofs) { return upb_byteregion_discard(this, ofs); }
+
+ // Copies "len" bytes of data into "dst", starting at ofs. The specified
+ // region must be available.
+ void Copy(uint64_t ofs, size_t len, char *dst) const {
+ upb_byteregion_copy(this, ofs, len, dst);
+ }
+
+ // Copies all bytes from the byteregion into dst. Requires that the entire
+ // byteregion is fetched and that none has been discarded.
+ void CopyAll(char *dst) const {
+ upb_byteregion_copyall(this, dst);
+ }
+
+ // Returns a pointer to the internal buffer for the byteregion starting at
+ // offset "ofs." Stores the number of bytes available in this buffer in *len.
+ // The returned buffer is invalidated when the byteregion is reset or
+ // released, or when the bytes are discarded. If the byteregion is not
+ // currently pinned, the pointer is only valid for the lifetime of the parent
+ // byteregion.
+ const char *GetPtr(uint64_t ofs, size_t *len) const {
+ return upb_byteregion_getptr(this, ofs, len);
+ }
+
+ // Copies the contents of the byteregion into a newly-allocated,
+ // NULL-terminated string. Requires that the byteregion is fully fetched.
+ char *StrDup() const {
+ return upb_byteregion_strdup(this);
+ }
+
+ // TODO: add if/when there is a demonstrated need.
+ //
+ // // Pins this byteregion's bytes in memory, allowing it to outlive its
+ // // parent byteregion. Normally a byteregion may only be used while its
+ // // parent is still valid, but a pinned byteregion may continue to be used
+ // // until it is reset or released. A byteregion must be fully fetched to
+ // // be pinned (this implies that the byteregion must be delimited).
+ // //
+ // // In some cases this operation may cause the input data to be copied.
+ // //
+ // // void Pin();
+};
+
+class StringSource : public upb_stringsrc {
+ public:
+ StringSource() : upb_stringsrc() { upb_stringsrc_init(this); }
+ ~StringSource() { upb_stringsrc_uninit(this); }
+
+ void Reset(const char* data, size_t len) {
+ upb_stringsrc_reset(this, data, len);
+ }
+
+ ByteRegion* AllBytes() {
+ return static_cast<ByteRegion*>(upb_stringsrc_allbytes(this));
+ }
+
+ upb_bytesrc* ByteSource() { return upb_stringsrc_bytesrc(this); }
+};
+
+} // namespace upb
+
+#endif
diff --git a/bindings/cpp/upb/def.hpp b/bindings/cpp/upb/def.hpp
index ac9aff1..030ba40 100644
--- a/bindings/cpp/upb/def.hpp
+++ b/bindings/cpp/upb/def.hpp
@@ -1,42 +1,41 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2011 Google Inc. See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- *
- * The set of upb::*Def classes and upb::SymbolTable allow for defining and
- * manipulating schema information (as defined in .proto files).
- *
- * Defs go through two distinct phases of life:
- *
- * 1. MUTABLE: when first created, the properties of the def can be set freely
- * (for example a message's name, its list of fields, the name/number of
- * fields, etc). During this phase the def is *not* thread-safe, and may
- * not be used for any purpose except to set its properties (it can't be
- * used to parse anything, create any messages in memory, etc).
- *
- * 2. FINALIZED: after being added to a symtab (which links the defs together)
- * the defs become finalized (thread-safe and immutable). Programs may only
- * access defs through a CONST POINTER during this stage -- upb_symtab will
- * help you out with this requirement by only vending const pointers, but
- * you need to make sure not to use any non-const pointers you still have
- * sitting around. In practice this means that you may not call any setters
- * on the defs (or functions that themselves call the setters). If you want
- * to modify an existing immutable def, copy it with upb_*_dup(), modify the
- * copy, and add the modified def to the symtab (replacing the existing
- * def).
- *
- * You can test for which stage of life a def is in by calling
- * upb::Def::IsMutable(). This is particularly useful for dynamic language
- * bindings, which must properly guarantee that the dynamic language cannot
- * break the rules laid out above.
- *
- * It would be possible to make the defs thread-safe during stage 1 by using
- * mutexes internally and changing any methods returning pointers to return
- * copies instead. This could be important if we are integrating with a VM or
- * interpreter that does not naturally serialize access to wrapped objects (for
- * example, in the case of Python this is not necessary because of the GIL).
- */
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc. See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// The set of upb::*Def classes and upb::SymbolTable allow for defining and
+// manipulating schema information (as defined in .proto files).
+//
+// Defs go through two distinct phases of life:
+//
+// 1. MUTABLE: when first created, the properties of the def can be set freely
+// (for example a message's name, its list of fields, the name/number of
+// fields, etc). During this phase the def is *not* thread-safe, and may
+// not be used for any purpose except to set its properties (it can't be
+// used to parse anything, create any messages in memory, etc).
+//
+// 2. FINALIZED: after being added to a symtab (which links the defs together)
+// the defs become finalized (thread-safe and immutable). Programs may only
+// access defs through a CONST POINTER during this stage -- upb_symtab will
+// help you out with this requirement by only vending const pointers, but
+// you need to make sure not to use any non-const pointers you still have
+// sitting around. In practice this means that you may not call any setters
+// on the defs (or functions that themselves call the setters). If you want
+// to modify an existing immutable def, copy it with upb_*_dup(), modify the
+// copy, and add the modified def to the symtab (replacing the existing
+// def).
+//
+// You can test for which stage of life a def is in by calling
+// upb::Def::IsMutable(). This is particularly useful for dynamic language
+// bindings, which must properly guarantee that the dynamic language cannot
+// break the rules laid out above.
+//
+// It would be possible to make the defs thread-safe during stage 1 by using
+// mutexes internally and changing any methods returning pointers to return
+// copies instead. This could be important if we are integrating with a VM or
+// interpreter that does not naturally serialize access to wrapped objects (for
+// example, in the case of Python this is not necessary because of the GIL).
#ifndef UPB_DEF_HPP
#define UPB_DEF_HPP
diff --git a/bindings/cpp/upb/handlers.hpp b/bindings/cpp/upb/handlers.hpp
index 07683f6..d356a33 100644
--- a/bindings/cpp/upb/handlers.hpp
+++ b/bindings/cpp/upb/handlers.hpp
@@ -1,15 +1,14 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2011 Google Inc. See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- *
- * upb::Handlers is a generic visitor-like interface for iterating over a
- * stream of protobuf data. You can register function pointers that will be
- * called for each message and/or field as the data is being parsed or iterated
- * over, without having to know the source format that we are parsing from.
- * This decouples the parsing logic from the processing logic.
- */
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc. See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// upb::Handlers is a generic visitor-like interface for iterating over a
+// stream of protobuf data. You can register function pointers that will be
+// called for each message and/or field as the data is being parsed or iterated
+// over, without having to know the source format that we are parsing from.
+// This decouples the parsing logic from the processing logic.
#ifndef UPB_HANDLERS_HPP
#define UPB_HANDLERS_HPP
@@ -18,6 +17,7 @@
namespace upb {
+typedef upb_fieldtype_t FieldType;
typedef upb_flow_t Flow;
class MessageHandlers;
@@ -30,8 +30,8 @@ class FieldHandlers : public upb_fhandlers {
// The FieldHandlers will live at least as long as the upb::Handlers to
// which it belongs, but can be Ref'd/Unref'd to make it live longer (which
// will prolong the life of the underlying upb::Handlers also).
- void Ref() const { upb_fhandlers_ref(this); }
- void Unref() const { upb_fhandlers_unref(this); }
+ void Ref() { upb_fhandlers_ref(this); }
+ void Unref() { upb_fhandlers_unref(this); }
// Functions to set this field's handlers.
// These return "this" so they can be conveniently chained, eg.
@@ -46,13 +46,13 @@ class FieldHandlers : public upb_fhandlers {
upb_fhandlers_setstartseq(this, h); return this;
}
FieldHandlers* SetEndSequenceHandler(EndFieldHandler* h) {
- upb_fhandlers_endseq(this, h); return this;
+ upb_fhandlers_setendseq(this, h); return this;
}
FieldHandlers* SetStartSubmessageHandler(StartFieldHandler* h) {
upb_fhandlers_setstartsubmsg(this, h); return this;
}
FieldHandlers* SetEndSubmessageHandler(EndFieldHandler* h) {
- upb_fhandlers_endsubmsg(this, h); return this;
+ upb_fhandlers_setendsubmsg(this, h); return this;
}
// Get/Set the field's bound value, which will be passed to its handlers.
@@ -62,27 +62,20 @@ class FieldHandlers : public upb_fhandlers {
}
// Returns the MessageHandlers to which we belong.
- MessageHandlers* GetMessageHandlers() const {
- return upb_fhandlers_msg(this);
- }
-
+ MessageHandlers* GetMessageHandlers() const;
// Returns the MessageHandlers for this field's submessage (invalid to call
// unless this field's type UPB_TYPE(MESSAGE) or UPB_TYPE(GROUP).
- MessageHandlers* GetSubMessageHandlers() const {
- return upb_fhandlers_submsg(this);
- }
-
+ MessageHandlers* GetSubMessageHandlers() const;
// If set to >=0, the given hasbit will be set after the value callback is
- // called (relative to the current closure).
- int32_t GetValueHasbit() const { return upb_fhandler_valuehasbit(this); }
- void SetValueHasbit(int32_t bit) { upb_fhandler_setvaluehasbit(this, bit); }
+ // called (offset relative to the current closure).
+ int32_t GetValueHasbit() const { return upb_fhandlers_getvaluehasbit(this); }
+ void SetValueHasbit(int32_t bit) { upb_fhandlers_setvaluehasbit(this, bit); }
private:
FieldHandlers(); // Only created by upb::Handlers.
~FieldHandlers(); // Only destroyed by refcounting.
};
-
class MessageHandlers : public upb_mhandlers {
public:
typedef upb_startmsg_handler StartMessageHandler;
@@ -91,8 +84,8 @@ class MessageHandlers : public upb_mhandlers {
// The MessageHandlers will live at least as long as the upb::Handlers to
// which it belongs, but can be Ref'd/Unref'd to make it live longer (which
// will prolong the life of the underlying upb::Handlers also).
- void Ref() const { upb_mhandlers_ref(this); }
- void Unref() const { upb_mhandlers_unref(this); }
+ void Ref() { upb_mhandlers_ref(this); }
+ void Unref() { upb_mhandlers_unref(this); }
// Functions to set this message's handlers.
// These return "this" so they can be conveniently chained, eg.
@@ -107,12 +100,10 @@ class MessageHandlers : public upb_mhandlers {
}
// Functions to create new FieldHandlers for this message.
- FieldHandlers* NewFieldHandlers(uint32_t fieldnum, upb_fieldtype_t type,
+ FieldHandlers* NewFieldHandlers(uint32_t fieldnum, FieldType type,
bool repeated) {
- return upb_mhandlers_newfhandlers(this, fieldnum, type, repeated);
- }
- FieldHandlers* NewFieldHandlers(FieldDef* f) {
- return upb_mhandlers_newfhandlers_fordef(f);
+ return static_cast<FieldHandlers*>(
+ upb_mhandlers_newfhandlers(this, fieldnum, type, repeated));
}
// Like the previous but for MESSAGE or GROUP fields. For GROUP fields, the
@@ -120,15 +111,10 @@ class MessageHandlers : public upb_mhandlers {
FieldHandlers* NewFieldHandlersForSubmessage(uint32_t n, const char *name,
FieldType type, bool repeated,
MessageHandlers* subm) {
- return upb_mhandlers_newsubmsgfhandlers(this, n, type, repeated, subm);
- }
-
- FieldHandlers* NewFieldHandlersForSubmessage(FieldDef* f,
- MessageHandlers* subm) {
- return upb_mhandlers_newsubmsgfhandlers_fordef(f);
+ return static_cast<FieldHandlers*>(
+ upb_mhandlers_newfhandlers_subm(this, n, type, repeated, subm));
}
-
private:
MessageHandlers(); // Only created by upb::Handlers.
~MessageHandlers(); // Only destroyed by refcounting.
@@ -137,26 +123,31 @@ class MessageHandlers : public upb_mhandlers {
class Handlers : public upb_handlers {
public:
// Creates a new Handlers instance.
- Handlers* New() { return static_cast<Handlers*>(upb_handlers_new()); }
+ static Handlers* New() { return static_cast<Handlers*>(upb_handlers_new()); }
void Ref() { upb_handlers_ref(this); }
void Unref() { upb_handlers_unref(this); }
// Returns a new MessageHandlers object. The first such message that is
// obtained will be the top-level message for this Handlers object.
- MessageHandlers* NewMessageHandlers() { return upb_handlers_newmhandlers(this); }
-
- // Freezes the handlers against future modification. Handlers must be
- // finalized before they can be passed to a data producer. After Finalize()
- // has been called, you may only call const methods on the Handlers and its
- // MessageHandlers/FieldHandlers.
- void Finalize() { upb_handlers_finalize(this); }
+ MessageHandlers* NewMessageHandlers() {
+ return static_cast<MessageHandlers*>(upb_handlers_newmhandlers(this));
+ }
private:
- FieldHandlers(); // Only created by Handlers::New().
- ~FieldHandlers(); // Only destroyed by refcounting.
+ Handlers(); // Only created by Handlers::New().
+ ~Handlers(); // Only destroyed by refcounting.
};
+
+MessageHandlers* FieldHandlers::GetMessageHandlers() const {
+ return static_cast<MessageHandlers*>(upb_fhandlers_getmsg(this));
+}
+
+MessageHandlers* FieldHandlers::GetSubMessageHandlers() const {
+ return static_cast<MessageHandlers*>(upb_fhandlers_getsubmsg(this));
+}
+
} // namespace upb
#endif
diff --git a/bindings/cpp/upb/pb/decoder.hpp b/bindings/cpp/upb/pb/decoder.hpp
new file mode 100644
index 0000000..05bcb8a
--- /dev/null
+++ b/bindings/cpp/upb/pb/decoder.hpp
@@ -0,0 +1,83 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc. See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// upb::Decoder is a high performance, streaming decoder for protobuf
+// data that works by getting its input data from a ubp::ByteRegion and calling
+// into a upb::Handlers.
+//
+// A DecoderPlan contains whatever data structures and generated (JIT-ted) code
+// are necessary to decode protobuf data of a specific type to a specific set
+// of handlers. By generating the plan ahead of time, we avoid having to
+// redo this work every time we decode.
+//
+// A DecoderPlan is threadsafe, meaning that it can be used concurrently by
+// different upb::Decoders in different threads. However, the upb::Decoders are
+// *not* thread-safe.
+
+#ifndef UPB_PB_DECODER_HPP
+#define UPB_PB_DECODER_HPP
+
+#include "upb/pb/decoder.h"
+
+#include "upb/bytestream.hpp"
+#include "upb/upb.hpp"
+
+namespace upb {
+
+class DecoderPlan : public upb_decoderplan {
+ public:
+ static DecoderPlan* New(Handlers* h, bool allow_jit) {
+ return static_cast<DecoderPlan*>(upb_decoderplan_new(h, allow_jit));
+ }
+ void Unref() { upb_decoderplan_unref(this); }
+
+ // Returns true if the plan contains JIT-ted code. This may not be the same
+ // as the "allowjit" parameter to the constructor if support for JIT-ting was
+ // not compiled in.
+ bool HasJitCode() { return upb_decoderplan_hasjitcode(this); }
+
+ private:
+ DecoderPlan() {} // Only constructed by New
+};
+
+class Decoder : public upb_decoder {
+ public:
+ Decoder() { upb_decoder_init(this); }
+ ~Decoder() { upb_decoder_uninit(this); }
+
+ // Resets the plan that the decoder will parse from. This will also reset the
+ // decoder's input to be uninitialized -- ResetInput() must be called before
+ // parsing can occur. The plan must live until the decoder is destroyed or
+ // reset to a different plan.
+ //
+ // Must be called before ResetInput() or Decode().
+ void ResetPlan(DecoderPlan* plan, int32_t msg_offset) {
+ upb_decoder_resetplan(this, plan, msg_offset);
+ }
+
+ // Resets the input of the decoder. This puts it in a state where it has not
+ // seen any data, and expects the next data to be from the beginning of a new
+ // protobuf.
+ //
+ // ResetInput() must be called before Decode() but may be called more than
+ // once. "input" must live until the decoder destroyed or ResetInput is
+ // called again. "c" is the closure that will be passed to the handlers.
+ void ResetInput(ByteRegion* byte_region, void* c) {
+ upb_decoder_resetinput(this, byte_region, c);
+ }
+
+ // Decodes serialized data (calling Handlers as the data is parsed) until
+ // error or EOF (see status() for details).
+ Success Decode() { return upb_decoder_decode(this); }
+
+ const upb::Status& status() {
+ return static_cast<const upb::Status&>(*upb_decoder_status(this));
+ }
+};
+
+} // namespace upb
+
+#endif
diff --git a/bindings/cpp/upb/upb.hpp b/bindings/cpp/upb/upb.hpp
index 4fb337d..226859c 100644
--- a/bindings/cpp/upb/upb.hpp
+++ b/bindings/cpp/upb/upb.hpp
@@ -1,23 +1,34 @@
-/*
- * upb - a minimalist implementation of protocol buffers.
- *
- * Copyright (c) 2011 Google Inc. See LICENSE for details.
- * Author: Josh Haberman <jhaberman@gmail.com>
- */
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc. See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
#ifndef UPB_HPP
#define UPB_HPP
#include "upb/upb.h"
+#include <iostream>
namespace upb {
+typedef upb_success_t Success;
+
class Status : public upb_status {
public:
Status() { upb_status_init(this); }
~Status() { upb_status_uninit(this); }
+ bool ok() const { return upb_ok(this); }
+ bool eof() const { return upb_eof(this); }
+
const char *GetString() const { return upb_status_getstr(this); }
+ void SetEof() { upb_status_seteof(this); }
+ void SetErrorLiteral(const char* msg) {
+ upb_status_seterrliteral(this, msg);
+ }
+
+ void Clear() { upb_status_clear(this); }
};
class Value : public upb_value {
diff --git a/examples/stream_transcode.c b/examples/stream_transcode.c
new file mode 100644
index 0000000..21c375b
--- /dev/null
+++ b/examples/stream_transcode.c
@@ -0,0 +1,76 @@
+
+#include <stdlib.h>
+#include "upb/bytestream.h"
+#include "upb/pb/decoder.h"
+#include "upb/pb/glue.h"
+#include "upb/pb/textprinter.h"
+
+int main(int argc, char *argv[]) {
+ if (argc < 3) {
+ fprintf(stderr, "Usage: stream_transcode <descfile> <msgname>\n");
+ return 1;
+ }
+
+ upb_symtab *symtab = upb_symtab_new();
+ size_t desc_len;
+ const char *desc = upb_readfile(argv[1], &desc_len);
+ if (!desc) {
+ fprintf(stderr, "Couldn't open descriptor file: %s\n", argv[1]);
+ return 1;
+ }
+
+ upb_status status = UPB_STATUS_INIT;
+ upb_load_descriptor_into_symtab(symtab, desc, desc_len, &status);
+ if (!upb_ok(&status)) {
+ fprintf(stderr, "Error parsing descriptor: %s", upb_status_getstr(&status));
+ return 1;
+ }
+ free((void*)desc);
+
+ const upb_def *md = upb_symtab_lookup(symtab, argv[2]);
+ if (!md) {
+ fprintf(stderr, "Descriptor did not contain message: %s\n", argv[2]);
+ return 1;
+ }
+
+ const upb_msgdef *m = upb_dyncast_msgdef_const(md);
+ if (!m) {
+ fprintf(stderr, "Def was not a msgdef.\n");
+ return 1;
+ }
+
+ upb_stdio in, out;
+ upb_stdio_init(&in);
+ upb_stdio_init(&out);
+ upb_stdio_reset(&in, stdin);
+ upb_stdio_reset(&out, stdout);
+
+ upb_handlers *handlers = upb_handlers_new();
+ upb_textprinter *p = upb_textprinter_new();
+ upb_textprinter_reset(p, upb_stdio_bytesink(&out), false);
+ upb_textprinter_reghandlers(handlers, m);
+
+ upb_decoder d;
+ upb_decoder_init(&d, handlers);
+ upb_decoder_reset(&d, upb_stdio_bytesrc(&in), 0, UPB_NONDELIMITED, p);
+
+ upb_status_clear(&status);
+ upb_decoder_decode(&d, &status);
+
+ if (!upb_ok(&status)) {
+ fprintf(stderr, "Error parsing input: %s", upb_status_getstr(&status));
+ }
+
+ upb_status_uninit(&status);
+ upb_stdio_uninit(&in);
+ upb_stdio_uninit(&out);
+ upb_decoder_uninit(&d);
+ upb_textprinter_free(p);
+ upb_def_unref(UPB_UPCAST(m));
+ upb_symtab_unref(symtab);
+
+ // Prevent C library from holding buffers open, so Valgrind doesn't see
+ // memory leaks.
+ fclose(stdin);
+ fclose(stdout);
+}
diff --git a/tests/test_cpp.cc b/tests/test_cpp.cc
index ecf27bf..5182217 100644
--- a/tests/test_cpp.cc
+++ b/tests/test_cpp.cc
@@ -9,7 +9,11 @@
#include <stdio.h>
#include <iostream>
+#include "upb/bytestream.hpp"
#include "upb/def.hpp"
+#include "upb/handlers.hpp"
+#include "upb/upb.hpp"
+#include "upb/pb/decoder.hpp"
#include "upb/pb/glue.hpp"
static void TestSymbolTable(const char *descriptor_file) {
@@ -26,11 +30,22 @@ static void TestSymbolTable(const char *descriptor_file) {
md->Unref();
}
+static void TestByteStream() {
+ upb::StringSource stringsrc;
+ stringsrc.Reset("testing", 7);
+ upb::ByteRegion* byteregion = stringsrc.AllBytes();
+ assert(byteregion->FetchAll() == UPB_BYTE_OK);
+ char* str = byteregion->StrDup();
+ assert(strcmp(str, "testing") == 0);
+ free(str);
+}
+
int main(int argc, char *argv[]) {
if (argc < 2) {
fprintf(stderr, "Usage: test_cpp <descriptor file>\n");
return 1;
}
TestSymbolTable(argv[1]);
+ TestByteStream();
return 0;
}
diff --git a/tests/test_decoder.c b/tests/test_decoder.c
index 84a90cd..0db3bfa 100644
--- a/tests/test_decoder.c
+++ b/tests/test_decoder.c
@@ -1,76 +1,666 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2011 Google Inc. See LICENSE for details.
+ *
+ * An exhaustive set of tests for parsing both valid and invalid protobuf
+ * input, with buffer breaks in arbitrary places.
+ *
+ * Tests to add:
+ * - unknown field handler called appropriately
+ * - unknown fields can be inserted in random places
+ * - fuzzing of valid input
+ * - resource limits (max stack depth, max string len)
+ * - testing of groups
+ * - more throrough testing of sequences
+ * - test skipping of submessages
+ * - test suspending the decoder
+ * - buffers that are close enough to the end of the address space that
+ * pointers overflow (this might be difficult).
+ * - a few "kitchen sink" examples (one proto that uses all types, lots
+ * of submsg/sequences, etc.
+ */
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdint.h>
#include <stdlib.h>
-#include "upb/bytestream.h"
+#include <string.h>
+#include "upb/handlers.h"
#include "upb/pb/decoder.h"
-#include "upb/pb/glue.h"
-#include "upb/pb/textprinter.h"
+#include "upb/pb/varint.h"
+#include "upb/upb.h"
+#include "upb_test.h"
-int main(int argc, char *argv[]) {
- if (argc < 3) {
- fprintf(stderr, "Usage: test_decoder <descfile> <msgname>\n");
- return 1;
+typedef struct {
+ char *buf;
+ size_t len;
+} buffer;
+
+// Mem is initialized to NULL.
+buffer *buffer_new(size_t len) {
+ buffer *buf = malloc(sizeof(*buf));
+ buf->buf = malloc(len);
+ buf->len = len;
+ memset(buf->buf, 0, buf->len);
+ return buf;
+}
+
+buffer *buffer_new2(const void *data, size_t len) {
+ buffer *buf = buffer_new(len);
+ memcpy(buf->buf, data, len);
+ return buf;
+}
+
+buffer *buffer_new3(const char *data) {
+ return buffer_new2(data, strlen(data));
+}
+
+buffer *buffer_dup(buffer *buf) { return buffer_new2(buf->buf, buf->len); }
+
+void buffer_free(buffer *buf) {
+ free(buf->buf);
+ free(buf);
+}
+
+void buffer_appendf(buffer *buf, const char *fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ size_t size = buf->len;
+ buf->len += upb_vrprintf(&buf->buf, &size, buf->len, fmt, args);
+ va_end(args);
+}
+
+void buffer_cat(buffer *buf, buffer *buf2) {
+ size_t newlen = buf->len + buf2->len;
+ buf->buf = realloc(buf->buf, newlen);
+ memcpy(buf->buf + buf->len, buf2->buf, buf2->len);
+ buf->len = newlen;
+ buffer_free(buf2);
+}
+
+bool buffer_eql(buffer *buf, buffer *buf2) {
+ return buf->len == buf2->len && memcmp(buf->buf, buf2->buf, buf->len) == 0;
+}
+
+
+/* Routines for building arbitrary protos *************************************/
+
+buffer *cat(buffer *arg1, ...) {
+ va_list ap;
+ buffer *arg;
+ va_start(ap, arg1);
+ while ((arg = va_arg(ap, buffer*)) != NULL) {
+ buffer_cat(arg1, arg);
}
+ va_end(ap);
+ return arg1;
+}
+
+buffer *varint(uint64_t x) {
+ buffer *buf = buffer_new(UPB_PB_VARINT_MAX_LEN + 1);
+ buf->len = upb_vencode64(x, buf->buf);
+ return buf;
+}
+
+// TODO: proper byte-swapping for big-endian machines.
+buffer *fixed32(void *data) { return buffer_new2(data, 4); }
+buffer *fixed64(void *data) { return buffer_new2(data, 8); }
+
+buffer *delim(buffer *buf) { return cat( varint(buf->len), buf, NULL ); }
+buffer *uint32(uint32_t u32) { return fixed32(&u32); }
+buffer *uint64(uint64_t u64) { return fixed64(&u64); }
+buffer *flt(float f) { return fixed32(&f); }
+buffer *dbl(double d) { return fixed64(&d); }
+buffer *zz32(int32_t x) { return varint(upb_zzenc_32(x)); }
+buffer *zz64(int64_t x) { return varint(upb_zzenc_64(x)); }
+
+buffer *tag(uint32_t fieldnum, char wire_type) {
+ return varint((fieldnum << 3) | wire_type);
+}
+
+buffer *submsg(uint32_t fn, buffer *buf) {
+ return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf), NULL );
+}
- upb_symtab *symtab = upb_symtab_new();
- size_t desc_len;
- const char *desc = upb_readfile(argv[1], &desc_len);
- if (!desc) {
- fprintf(stderr, "Couldn't open descriptor file: %s\n", argv[1]);
- return 1;
+
+/* A set of handlers that covers all .proto types *****************************/
+
+// The handlers simply append to a string indicating what handlers were called.
+// This string is similar to protobuf text format but fields are referred to by
+// number instead of name and sequences are explicitly delimited.
+
+#define VALUE_HANDLER(member, fmt) \
+ upb_flow_t value_ ## member(void *closure, upb_value fval, upb_value val) { \
+ buffer_appendf(closure, "%" PRIu32 ":%" fmt "; ", \
+ upb_value_getuint32(fval), upb_value_get ## member(val)); \
+ return UPB_CONTINUE; \
}
- upb_status status = UPB_STATUS_INIT;
- upb_load_descriptor_into_symtab(symtab, desc, desc_len, &status);
- if (!upb_ok(&status)) {
- fprintf(stderr, "Error parsing descriptor: %s", upb_status_getstr(&status));
- return 1;
+VALUE_HANDLER(uint32, PRIu32)
+VALUE_HANDLER(uint64, PRIu64)
+VALUE_HANDLER(int32, PRId32)
+VALUE_HANDLER(int64, PRId64)
+VALUE_HANDLER(float, "g")
+VALUE_HANDLER(double, "g")
+
+upb_flow_t value_bool(void *closure, upb_value fval, upb_value val) {
+ buffer_appendf(closure, "%" PRIu32 ":%s; ",
+ upb_value_getuint32(fval),
+ upb_value_getbool(val) ? "true" : "false");
+ return UPB_CONTINUE;
+}
+
+upb_flow_t value_string(void *closure, upb_value fval, upb_value val) {
+ // Note: won't work with strings that contain NULL.
+ char *str = upb_byteregion_strdup(upb_value_getbyteregion(val));
+ buffer_appendf(closure, "%" PRIu32 ":%s; ", upb_value_getuint32(fval), str);
+ free(str);
+ return UPB_CONTINUE;
+}
+
+upb_sflow_t startsubmsg(void *closure, upb_value fval) {
+ buffer_appendf(closure, "%" PRIu32 ":{ ", upb_value_getuint32(fval));
+ return UPB_CONTINUE_WITH(closure);
+}
+
+upb_flow_t endsubmsg(void *closure, upb_value fval) {
+ buffer_appendf(closure, "} ");
+ return UPB_CONTINUE;
+}
+
+upb_sflow_t startseq(void *closure, upb_value fval) {
+ buffer_appendf(closure, "%" PRIu32 ":[ ", upb_value_getuint32(fval));
+ return UPB_CONTINUE_WITH(closure);
+}
+
+upb_flow_t endseq(void *closure, upb_value fval) {
+ buffer_appendf(closure, "] ");
+ return UPB_CONTINUE;
+}
+
+void doreg(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type, bool repeated,
+ upb_value_handler *handler) {
+ upb_fhandlers *f = upb_mhandlers_newfhandlers(m, num, type, repeated);
+ ASSERT(f);
+ upb_fhandlers_setvalue(f, handler);
+ upb_fhandlers_setstartseq(f, &startseq);
+ upb_fhandlers_setendseq(f, &endseq);
+ upb_fhandlers_setfval(f, upb_value_uint32(num));
+}
+
+// The repeated field number to correspond to the given non-repeated field
+// number.
+uint32_t rep_fn(uint32_t fn) {
+ return (UPB_MAX_FIELDNUMBER - 1000) + fn;
+}
+
+#define NOP_FIELD 40
+#define UNKNOWN_FIELD 666
+
+void reg(upb_mhandlers *m, upb_fieldtype_t type, upb_value_handler *handler) {
+ // We register both a repeated and a non-repeated field for every type.
+ // For the non-repeated field we make the field number the same as the
+ // type. For the repeated field we make it a function of the type.
+ doreg(m, type, type, false, handler);
+ doreg(m, rep_fn(type), type, true, handler);
+}
+
+void reg_subm(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type,
+ bool repeated) {
+ upb_fhandlers *f =
+ upb_mhandlers_newfhandlers_subm(m, num, type, repeated, m);
+ ASSERT(f);
+ upb_fhandlers_setstartseq(f, &startseq);
+ upb_fhandlers_setendseq(f, &endseq);
+ upb_fhandlers_setstartsubmsg(f, &startsubmsg);
+ upb_fhandlers_setendsubmsg(f, &endsubmsg);
+ upb_fhandlers_setfval(f, upb_value_uint32(num));
+}
+
+void reghandlers(upb_mhandlers *m) {
+ // Register handlers for each type.
+ reg(m, UPB_TYPE(DOUBLE), &value_double);
+ reg(m, UPB_TYPE(FLOAT), &value_float);
+ reg(m, UPB_TYPE(INT64), &value_int64);
+ reg(m, UPB_TYPE(UINT64), &value_uint64);
+ reg(m, UPB_TYPE(INT32) , &value_int32);
+ reg(m, UPB_TYPE(FIXED64), &value_uint64);
+ reg(m, UPB_TYPE(FIXED32), &value_uint32);
+ reg(m, UPB_TYPE(BOOL), &value_bool);
+ reg(m, UPB_TYPE(STRING), &value_string);
+ reg(m, UPB_TYPE(BYTES), &value_string);
+ reg(m, UPB_TYPE(UINT32), &value_uint32);
+ reg(m, UPB_TYPE(ENUM), &value_int32);
+ reg(m, UPB_TYPE(SFIXED32), &value_int32);
+ reg(m, UPB_TYPE(SFIXED64), &value_int64);
+ reg(m, UPB_TYPE(SINT32), &value_int32);
+ reg(m, UPB_TYPE(SINT64), &value_int64);
+
+ // Register submessage/group handlers that are self-recursive
+ // to this type, eg: message M { optional M m = 1; }
+ reg_subm(m, UPB_TYPE(MESSAGE), UPB_TYPE(MESSAGE), false);
+ reg_subm(m, UPB_TYPE(GROUP), UPB_TYPE(GROUP), false);
+ reg_subm(m, rep_fn(UPB_TYPE(MESSAGE)), UPB_TYPE(MESSAGE), true);
+ reg_subm(m, rep_fn(UPB_TYPE(GROUP)), UPB_TYPE(GROUP), true);
+
+ // Register a no-op string field so we can pad the proto wherever we want.
+ upb_mhandlers_newfhandlers(m, NOP_FIELD, UPB_TYPE(STRING), false);
+}
+
+
+/* Custom bytesrc that can insert buffer seams in arbitrary places ************/
+
+typedef struct {
+ upb_bytesrc bytesrc;
+ const char *str;
+ size_t len, seam1, seam2;
+ upb_byteregion byteregion;
+} upb_seamsrc;
+
+size_t upb_seamsrc_avail(const upb_seamsrc *src, size_t ofs) {
+ if (ofs < src->seam1) return src->seam1 - ofs;
+ if (ofs < src->seam2) return src->seam2 - ofs;
+ return src->len - ofs;
+}
+
+upb_bytesuccess_t upb_seamsrc_fetch(void *_src, uint64_t ofs, size_t *read) {
+ upb_seamsrc *src = _src;
+ assert(ofs < src->len);
+ if (ofs == src->len) {
+ upb_status_seteof(&src->bytesrc.status);
+ return UPB_BYTE_EOF;
}
- free((void*)desc);
+ *read = upb_seamsrc_avail(src, ofs);
+ return UPB_BYTE_OK;
+}
+
+void upb_seamsrc_copy(const void *_src, uint64_t ofs,
+ size_t len, char *dst) {
+ const upb_seamsrc *src = _src;
+ assert(ofs + len <= src->len);
+ memcpy(dst, src->str + ofs, len);
+}
+
+void upb_seamsrc_discard(void *src, uint64_t ofs) {
+ (void)src;
+ (void)ofs;
+}
+
+const char *upb_seamsrc_getptr(const void *_s, uint64_t ofs, size_t *len) {
+ const upb_seamsrc *src = _s;
+ *len = upb_seamsrc_avail(src, ofs);
+ return src->str + ofs;
+}
- const upb_def *md = upb_symtab_lookup(symtab, argv[2]);
- if (!md) {
- fprintf(stderr, "Descriptor did not contain message: %s\n", argv[2]);
- return 1;
+void upb_seamsrc_init(upb_seamsrc *s, const char *str, size_t len) {
+ static upb_bytesrc_vtbl vtbl = {
+ &upb_seamsrc_fetch,
+ &upb_seamsrc_discard,
+ &upb_seamsrc_copy,
+ &upb_seamsrc_getptr,
+ };
+ upb_bytesrc_init(&s->bytesrc, &vtbl);
+ s->seam1 = 0;
+ s->seam2 = 0;
+ s->str = str;
+ s->len = len;
+ s->byteregion.bytesrc = &s->bytesrc;
+ s->byteregion.toplevel = true;
+ s->byteregion.start = 0;
+ s->byteregion.end = len;
+}
+
+void upb_seamsrc_resetseams(upb_seamsrc *s, size_t seam1, size_t seam2) {
+ ASSERT(seam1 <= seam2);
+ s->seam1 = seam1;
+ s->seam2 = seam2;
+ s->byteregion.discard = 0;
+ s->byteregion.fetch = 0;
+}
+
+void upb_seamsrc_uninit(upb_seamsrc *s) { (void)s; }
+
+upb_bytesrc *upb_seamsrc_bytesrc(upb_seamsrc *s) {
+ return &s->bytesrc;
+}
+
+// Returns the top-level upb_byteregion* for this seamsrc. Invalidated when
+// the seamsrc is reset.
+upb_byteregion *upb_seamsrc_allbytes(upb_seamsrc *s) {
+ return &s->byteregion;
+}
+
+
+/* Running of test cases ******************************************************/
+
+upb_decoderplan *plan;
+
+void run_decoder(buffer *proto, buffer *expected_output) {
+ upb_seamsrc src;
+ upb_seamsrc_init(&src, proto->buf, proto->len);
+ upb_decoder d;
+ upb_decoder_init(&d);
+ upb_decoder_resetplan(&d, plan, 0);
+ for (size_t i = 0; i < proto->len; i++) {
+ for (size_t j = i; j < proto->len; j++) {
+ upb_seamsrc_resetseams(&src, i, j);
+ upb_byteregion *input = upb_seamsrc_allbytes(&src);
+ buffer *output = buffer_new(0);
+ upb_decoder_resetinput(&d, input, output);
+ upb_success_t success = UPB_SUSPENDED;
+ while (success == UPB_SUSPENDED)
+ success = upb_decoder_decode(&d);
+ ASSERT(upb_ok(upb_decoder_status(&d)) == (success == UPB_OK));
+ if (expected_output) {
+ ASSERT(success == UPB_OK);
+ // The input should be fully consumed.
+ ASSERT(upb_byteregion_fetchofs(input) == upb_byteregion_endofs(input));
+ ASSERT(upb_byteregion_discardofs(input) ==
+ upb_byteregion_endofs(input));
+ if (!buffer_eql(output, expected_output)) {
+ fprintf(stderr, "Text mismatch: '%s' vs '%s'\n",
+ output->buf, expected_output->buf);
+ }
+ ASSERT(strcmp(output->buf, expected_output->buf) == 0);
+ } else {
+ ASSERT(success == UPB_ERROR);
+ }
+ buffer_free(output);
+ }
}
+ upb_seamsrc_uninit(&src);
+ upb_decoder_uninit(&d);
+ buffer_free(proto);
+}
+
+void assert_successful_parse_at_eof(buffer *proto, const char *expected_fmt,
+ va_list args) {
+ buffer *expected_text = buffer_new(0);
+ size_t size = expected_text->len;
+ expected_text->len += upb_vrprintf(&expected_text->buf, &size,
+ expected_text->len, expected_fmt, args);
+ run_decoder(proto, expected_text);
+ buffer_free(expected_text);
+}
+
+void assert_does_not_parse_at_eof(buffer *proto) {
+ run_decoder(proto, NULL);
+}
+
+void assert_successful_parse(buffer *proto, const char *expected_fmt, ...) {
+ // The JIT is only used for data >=20 bytes from end-of-buffer, so
+ // repeat once with no-op padding data at the end of buffer.
+ va_list args, args2;
+ va_start(args, expected_fmt);
+ va_copy(args2, args);
+ assert_successful_parse_at_eof(buffer_dup(proto), expected_fmt, args);
+ assert_successful_parse_at_eof(
+ cat( proto,
+ tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(buffer_new(30)),
+ NULL ),
+ expected_fmt, args2);
+ va_end(args);
+ va_end(args2);
+}
+
+void assert_does_not_parse(buffer *proto) {
+ // The JIT is only used for data >=20 bytes from end-of-buffer, so
+ // repeat once with no-op padding data at the end of buffer.
+ assert_does_not_parse_at_eof(buffer_dup(proto));
+ assert_does_not_parse_at_eof(
+ cat( proto,
+ tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim( buffer_new(30)),
+ NULL ));
+}
+
+
+/* The actual tests ***********************************************************/
+
+void test_premature_eof_for_type(upb_fieldtype_t type) {
+ // Incomplete values for each wire type.
+ static const char *incompletes[] = {
+ "\x80", // UPB_WIRE_TYPE_VARINT
+ "abcdefg", // UPB_WIRE_TYPE_64BIT
+ "\x80", // UPB_WIRE_TYPE_DELIMITED (partial length)
+ NULL, // UPB_WIRE_TYPE_START_GROUP (no value required)
+ NULL, // UPB_WIRE_TYPE_END_GROUP (no value required)
+ "abc" // UPB_WIRE_TYPE_32BIT
+ };
+
+ uint32_t fieldnum = type;
+ uint32_t rep_fieldnum = rep_fn(type);
+ int wire_type = upb_types[type].native_wire_type;
+ const char *incomplete = incompletes[wire_type];
+
+ // EOF before a known non-repeated value.
+ assert_does_not_parse_at_eof(tag(fieldnum, wire_type));
+
+ // EOF before a known repeated value.
+ assert_does_not_parse_at_eof(tag(rep_fieldnum, wire_type));
+
+ // EOF before an unknown value.
+ assert_does_not_parse_at_eof(tag(UNKNOWN_FIELD, wire_type));
+
+ // EOF inside a known non-repeated value.
+ assert_does_not_parse_at_eof(
+ cat( tag(fieldnum, wire_type), buffer_new3(incomplete), NULL ));
+
+ // EOF inside a known repeated value.
+ assert_does_not_parse_at_eof(
+ cat( tag(rep_fieldnum, wire_type), buffer_new3(incomplete), NULL ));
+
+ // EOF inside an unknown value.
+ assert_does_not_parse_at_eof(
+ cat( tag(UNKNOWN_FIELD, wire_type), buffer_new3(incomplete), NULL ));
+
+ if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
+ // EOF in the middle of delimited data for known non-repeated value.
+ assert_does_not_parse_at_eof(
+ cat( tag(fieldnum, wire_type), varint(1), NULL ));
+
+ // EOF in the middle of delimited data for known repeated value.
+ assert_does_not_parse_at_eof(
+ cat( tag(rep_fieldnum, wire_type), varint(1), NULL ));
- const upb_msgdef *m = upb_dyncast_msgdef_const(md);
- if (!m) {
- fprintf(stderr, "Def was not a msgdef.\n");
- return 1;
+ // EOF in the middle of delimited data for unknown value.
+ assert_does_not_parse_at_eof(
+ cat( tag(UNKNOWN_FIELD, wire_type), varint(1), NULL ));
+
+ if (type == UPB_TYPE(MESSAGE)) {
+ // Submessage ends in the middle of a value.
+ buffer *incomplete_submsg =
+ cat ( tag(UPB_TYPE(INT32), UPB_WIRE_TYPE_VARINT),
+ buffer_new3(incompletes[UPB_WIRE_TYPE_VARINT]), NULL );
+ assert_does_not_parse(
+ cat( tag(fieldnum, UPB_WIRE_TYPE_DELIMITED),
+ varint(incomplete_submsg->len),
+ incomplete_submsg, NULL ));
+ }
+ } else {
+ // Packed region ends in the middle of a value.
+ assert_does_not_parse(
+ cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
+ varint(strlen(incomplete)),
+ buffer_new3(incomplete), NULL ));
+
+ // EOF in the middle of packed region.
+ assert_does_not_parse_at_eof(
+ cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), varint(1), NULL ));
}
+}
- upb_stdio in, out;
- upb_stdio_init(&in);
- upb_stdio_init(&out);
- upb_stdio_reset(&in, stdin);
- upb_stdio_reset(&out, stdout);
+// "33" and "66" are just two random values that all numeric types can
+// represent.
+void test_valid_data_for_type(upb_fieldtype_t type,
+ buffer *enc33, buffer *enc66) {
+ uint32_t fieldnum = type;
+ uint32_t rep_fieldnum = rep_fn(type);
+ int wire_type = upb_types[type].native_wire_type;
- upb_handlers *handlers = upb_handlers_new();
- upb_textprinter *p = upb_textprinter_new();
- upb_textprinter_reset(p, upb_stdio_bytesink(&out), false);
- upb_textprinter_reghandlers(handlers, m);
+ // Non-repeated
+ assert_successful_parse(
+ cat( tag(fieldnum, wire_type), buffer_dup(enc33),
+ tag(fieldnum, wire_type), buffer_dup(enc66), NULL ),
+ "%u:33; %u:66; ", fieldnum, fieldnum);
- upb_decoder d;
- upb_decoder_init(&d, handlers);
- upb_decoder_reset(&d, upb_stdio_allbytes(&in), p);
+ // Non-packed repeated.
+ assert_successful_parse(
+ cat( tag(rep_fieldnum, wire_type), buffer_dup(enc33),
+ tag(rep_fieldnum, wire_type), buffer_dup(enc66), NULL ),
+ "%u:[ %u:33; %u:66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
+
+ // Packed repeated.
+ assert_successful_parse(
+ cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
+ delim(cat( buffer_dup(enc33), buffer_dup(enc66), NULL )), NULL ),
+ "%u:[ %u:33; %u:66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
+
+ buffer_free(enc33);
+ buffer_free(enc66);
+}
+
+void test_valid_data_for_signed_type(upb_fieldtype_t type,
+ buffer *enc33, buffer *enc66) {
+ uint32_t fieldnum = type;
+ uint32_t rep_fieldnum = rep_fn(type);
+ int wire_type = upb_types[type].native_wire_type;
+
+ // Non-repeated
+ assert_successful_parse(
+ cat( tag(fieldnum, wire_type), buffer_dup(enc33),
+ tag(fieldnum, wire_type), buffer_dup(enc66), NULL ),
+ "%u:33; %u:-66; ", fieldnum, fieldnum);
+
+ // Non-packed repeated.
+ assert_successful_parse(
+ cat( tag(rep_fieldnum, wire_type), buffer_dup(enc33),
+ tag(rep_fieldnum, wire_type), buffer_dup(enc66), NULL ),
+ "%u:[ %u:33; %u:-66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
+
+ // Packed repeated.
+ assert_successful_parse(
+ cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
+ delim(cat( buffer_dup(enc33), buffer_dup(enc66), NULL )), NULL ),
+ "%u:[ %u:33; %u:-66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
+
+ buffer_free(enc33);
+ buffer_free(enc66);
+}
+
+// Test that invalid protobufs are properly detected (without crashing) and
+// have an error reported. Field numbers match registered handlers above.
+void test_invalid() {
+ test_premature_eof_for_type(UPB_TYPE(DOUBLE));
+ test_premature_eof_for_type(UPB_TYPE(FLOAT));
+ test_premature_eof_for_type(UPB_TYPE(INT64));
+ test_premature_eof_for_type(UPB_TYPE(UINT64));
+ test_premature_eof_for_type(UPB_TYPE(INT32));
+ test_premature_eof_for_type(UPB_TYPE(FIXED64));
+ test_premature_eof_for_type(UPB_TYPE(FIXED32));
+ test_premature_eof_for_type(UPB_TYPE(BOOL));
+ test_premature_eof_for_type(UPB_TYPE(STRING));
+ test_premature_eof_for_type(UPB_TYPE(BYTES));
+ test_premature_eof_for_type(UPB_TYPE(UINT32));
+ test_premature_eof_for_type(UPB_TYPE(ENUM));
+ test_premature_eof_for_type(UPB_TYPE(SFIXED32));
+ test_premature_eof_for_type(UPB_TYPE(SFIXED64));
+ test_premature_eof_for_type(UPB_TYPE(SINT32));
+ test_premature_eof_for_type(UPB_TYPE(SINT64));
+
+ // EOF inside a tag's varint.
+ assert_does_not_parse_at_eof( buffer_new3("\x80") );
+
+ // EOF inside a known group.
+ assert_does_not_parse_at_eof( tag(4, UPB_WIRE_TYPE_START_GROUP) );
+
+ // EOF inside an unknown group.
+ assert_does_not_parse_at_eof( tag(UNKNOWN_FIELD, UPB_WIRE_TYPE_START_GROUP) );
- upb_status_clear(&status);
- upb_decoder_decode(&d, &status);
+ // End group that we are not currently in.
+ assert_does_not_parse( tag(4, UPB_WIRE_TYPE_END_GROUP) );
- if (!upb_ok(&status)) {
- fprintf(stderr, "Error parsing input: %s", upb_status_getstr(&status));
+ // Field number is 0.
+ assert_does_not_parse(
+ cat( tag(0, UPB_WIRE_TYPE_DELIMITED), varint(0), NULL ));
+
+ // Field number is too large.
+ assert_does_not_parse(
+ cat( tag(UPB_MAX_FIELDNUMBER + 1, UPB_WIRE_TYPE_DELIMITED),
+ varint(0), NULL ));
+
+ // Test exceeding the resource limit of stack depth.
+ buffer *buf = buffer_new3("");
+ for (int i = 0; i < UPB_MAX_NESTING; i++) {
+ buf = submsg(UPB_TYPE(MESSAGE), buf);
}
+ assert_does_not_parse(buf);
- upb_status_uninit(&status);
- upb_stdio_uninit(&in);
- upb_stdio_uninit(&out);
- upb_decoder_uninit(&d);
- upb_textprinter_free(p);
- upb_def_unref(UPB_UPCAST(m));
- upb_symtab_unref(symtab);
-
- // Prevent C library from holding buffers open, so Valgrind doesn't see
- // memory leaks.
- fclose(stdin);
- fclose(stdout);
+ // Staying within the stack limit should work properly.
+ buf = buffer_new3("");
+ buffer *textbuf = buffer_new3("");
+ int total = UPB_MAX_NESTING - 1;
+ for (int i = 0; i < total; i++) {
+ buf = submsg(UPB_TYPE(MESSAGE), buf);
+ buffer_appendf(textbuf, "%u:{ ", UPB_TYPE(MESSAGE));
+ }
+ for (int i = 0; i < total; i++) {
+ buffer_appendf(textbuf, "} ");
+ }
+ assert_successful_parse(buf, "%s", textbuf->buf);
+ buffer_free(textbuf);
+}
+
+void test_valid() {
+ test_valid_data_for_signed_type(UPB_TYPE(DOUBLE), dbl(33), dbl(-66));
+ test_valid_data_for_signed_type(UPB_TYPE(FLOAT), flt(33), flt(-66));
+ test_valid_data_for_signed_type(UPB_TYPE(INT64), varint(33), varint(-66));
+ test_valid_data_for_signed_type(UPB_TYPE(INT32), varint(33), varint(-66));
+ test_valid_data_for_signed_type(UPB_TYPE(ENUM), varint(33), varint(-66));
+ test_valid_data_for_signed_type(UPB_TYPE(SFIXED32), uint32(33), uint32(-66));
+ test_valid_data_for_signed_type(UPB_TYPE(SFIXED64), uint64(33), uint64(-66));
+ test_valid_data_for_signed_type(UPB_TYPE(SINT32), zz32(33), zz32(-66));
+ test_valid_data_for_signed_type(UPB_TYPE(SINT64), zz64(33), zz64(-66));
+
+ test_valid_data_for_type(UPB_TYPE(UINT64), varint(33), varint(66));
+ test_valid_data_for_type(UPB_TYPE(UINT32), varint(33), varint(66));
+ test_valid_data_for_type(UPB_TYPE(FIXED64), uint64(33), uint64(66));
+ test_valid_data_for_type(UPB_TYPE(FIXED32), uint32(33), uint32(66));
+
+ // Submessage tests.
+ uint32_t msg_fn = UPB_TYPE(MESSAGE);
+ assert_successful_parse(
+ submsg(msg_fn, submsg(msg_fn, submsg(msg_fn, buffer_new3("")))),
+ "%u:{ %u:{ %u:{ } } } ", msg_fn, msg_fn, msg_fn);
+
+ uint32_t repm_fn = rep_fn(UPB_TYPE(MESSAGE));
+ assert_successful_parse(
+ submsg(repm_fn, submsg(repm_fn, buffer_new3(""))),
+ "%u:[ %u:{ %u:[ %u:{ } ] } ] ", repm_fn, repm_fn, repm_fn, repm_fn);
+}
+
+void run_tests() {
+ test_invalid();
+ test_valid();
+}
+
+int main() {
+ // Construct decoder plan.
+ upb_handlers *h = upb_handlers_new();
+ reghandlers(upb_handlers_newmhandlers(h));
+
+ // Test without JIT.
+ plan = upb_decoderplan_new(h, false);
+ run_tests();
+ upb_decoderplan_unref(plan);
+
+ // Test JIT.
+ plan = upb_decoderplan_new(h, true);
+ run_tests();
+ upb_decoderplan_unref(plan);
+
+ plan = NULL;
+ printf("All tests passed, %d assertions.\n", num_assertions);
+ upb_handlers_unref(h);
+ return 0;
}
diff --git a/tests/test_varint.c b/tests/test_varint.c
index 4c076b3..0fc93f0 100644
--- a/tests/test_varint.c
+++ b/tests/test_varint.c
@@ -8,12 +8,39 @@
#include "upb/pb/varint.h"
#include "upb_test.h"
+// Test that we can round-trip from int->varint->int.
+static void test_varint_for_num(upb_decoderet (*decoder)(const char*),
+ uint64_t num) {
+ char buf[16];
+ memset(buf, 0xff, sizeof(buf));
+ size_t bytes = upb_vencode64(num, buf);
+
+ if (num <= UINT32_MAX) {
+ char buf2[16];
+ memset(buf2, 0, sizeof(buf2));
+ uint64_t encoded = upb_vencode32(num);
+ memcpy(&buf2, &encoded, 8);
+ upb_decoderet r = decoder(buf2);
+ ASSERT(r.val == num);
+ ASSERT(r.p == buf2 + upb_value_size(encoded));
+ ASSERT(upb_zzenc_32(upb_zzdec_32(num)) == num);
+ }
+
+ upb_decoderet r = decoder(buf);
+ ASSERT(r.val == num);
+ ASSERT(r.p == buf + bytes);
+ ASSERT(upb_zzenc_64(upb_zzdec_64(num)) == num);
+}
+
static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
#define TEST(bytes, expected_val) {\
- const char buf[] = bytes "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" ; \
+ size_t n = sizeof(bytes) - 1; /* for NULL */ \
+ char buf[UPB_PB_VARINT_MAX_LEN]; \
+ memset(buf, 0xff, sizeof(buf)); \
+ memcpy(buf, bytes, n); \
upb_decoderet r = decoder(buf); \
ASSERT(r.val == expected_val); \
- ASSERT(r.p == buf + sizeof(buf) - 16); /* - 1 for NULL */ \
+ ASSERT(r.p == buf + n); \
}
TEST("\x00", 0ULL);
@@ -30,12 +57,19 @@ static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
TEST("\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0x8303fdf9f1e1c181ULL);
#undef TEST
- char twelvebyte[16] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
+ char twelvebyte[16] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+ 0x80, 0x01, 0x01};
const char *twelvebyte_buf = twelvebyte;
// A varint that terminates before hitting the end of the provided buffer,
// but in too many bytes (11 instead of 10).
upb_decoderet r = decoder(twelvebyte_buf);
ASSERT(r.p == NULL);
+
+
+ for (uint64_t num = 5; num * 1.5 > num; num *= 1.5) {
+ test_varint_for_num(decoder, num);
+ }
+ test_varint_for_num(decoder, 0);
}
diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc
index 8d13f33..c43649c 100644
--- a/tests/test_vs_proto2.cc
+++ b/tests/test_vs_proto2.cc
@@ -7,15 +7,19 @@
* given proto type and input protobuf.
*/
+#define __STDC_LIMIT_MACROS // So we get UINT32_MAX
#include <assert.h>
#include <inttypes.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <google/protobuf/descriptor.h>
-#include "benchmarks/google_messages.pb.h"
+#include <google/protobuf/wire_format_lite.h>
+#include "upb/benchmarks/google_messages.pb.h"
#include "upb/def.h"
#include "upb/msg.h"
#include "upb/pb/glue.h"
+#include "upb/pb/varint.h"
#include "upb_test.h"
size_t string_size;
@@ -179,13 +183,13 @@ void compare(const google::protobuf::Message& proto2_msg,
void parse_and_compare(MESSAGE_CIDENT *proto2_msg,
void *upb_msg, const upb_msgdef *upb_md,
- const char *str, size_t len)
+ const char *str, size_t len, bool allow_jit)
{
// Parse to both proto2 and upb.
ASSERT(proto2_msg->ParseFromArray(str, len));
upb_status status = UPB_STATUS_INIT;
upb_msg_clear(upb_msg, upb_md);
- upb_strtomsg(str, len, upb_msg, upb_md, &status);
+ upb_strtomsg(str, len, upb_msg, upb_md, allow_jit, &status);
if (!upb_ok(&status)) {
fprintf(stderr, "Error parsing protobuf: %s", upb_status_getstr(&status));
exit(1);
@@ -241,8 +245,10 @@ int main(int argc, char *argv[])
// Run twice to test proper object reuse.
MESSAGE_CIDENT proto2_msg;
void *upb_msg = upb_stdmsg_new(msgdef);
- parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len);
- parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len);
+ parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, true);
+ parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, false);
+ parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, true);
+ parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, false);
printf("All tests passed, %d assertions.\n", num_assertions);
upb_stdmsg_free(upb_msg, msgdef);
@@ -250,6 +256,17 @@ int main(int argc, char *argv[])
free((void*)str);
upb_symtab_unref(symtab);
upb_status_uninit(&status);
+
+ // Test Zig-Zag encoding/decoding.
+ for (uint64_t num = 5; num * 1.5 > num; num *= 1.5) {
+ ASSERT(upb_zzenc_64(num) ==
+ google::protobuf::internal::WireFormatLite::ZigZagEncode64(num));
+ if (num < UINT32_MAX) {
+ ASSERT(upb_zzenc_32(num) ==
+ google::protobuf::internal::WireFormatLite::ZigZagEncode32(num));
+ }
+ }
+
google::protobuf::ShutdownProtobufLibrary();
return 0;
diff --git a/tests/tests.c b/tests/tests.c
index 83fb3ef..12ff4bb 100644
--- a/tests/tests.c
+++ b/tests/tests.c
@@ -39,9 +39,13 @@ static void test_upb_jit() {
upb_handlers *h = upb_handlers_new();
upb_handlerset hset = {NULL, NULL, &upb_test_onvalue, NULL, NULL, NULL, NULL};
upb_handlers_reghandlerset(h, upb_downcast_msgdef_const(def), &hset);
- upb_decoder d;
- upb_decoder_init(&d, h);
- upb_decoder_uninit(&d);
+ upb_decoderplan *p = upb_decoderplan_new(h, true);
+#ifdef UPB_USE_JIT_X64
+ ASSERT(upb_decoderplan_hasjitcode(p));
+#else
+ ASSERT(!upb_decoderplan_hasjitcode(p));
+#endif
+ upb_decoderplan_unref(p);
upb_symtab_unref(s);
upb_def_unref(def);
upb_handlers_unref(h);
diff --git a/upb/bytestream.c b/upb/bytestream.c
index 135f269..8feb678 100644
--- a/upb/bytestream.c
+++ b/upb/bytestream.c
@@ -25,7 +25,7 @@ upb_byteregion *upb_byteregion_new(const void *str) {
return upb_byteregion_newl(str, strlen(str));
}
-upb_byteregion *upb_byteregion_newl(const void *str, uint32_t len) {
+upb_byteregion *upb_byteregion_newl(const void *str, size_t len) {
upb_stringsrc *src = malloc(sizeof(*src));
upb_stringsrc_init(src);
char *ptr = malloc(len + 1);
@@ -37,7 +37,7 @@ upb_byteregion *upb_byteregion_newl(const void *str, uint32_t len) {
void upb_byteregion_free(upb_byteregion *r) {
if (!r) return;
- uint32_t len;
+ size_t len;
free((char*)upb_byteregion_getptr(r, 0, &len));
upb_stringsrc_uninit((upb_stringsrc*)r->bytesrc);
free(r->bytesrc);
@@ -64,16 +64,14 @@ void upb_byteregion_reset(upb_byteregion *r, const upb_byteregion *src,
r->fetch = UPB_MIN(src->fetch, r->end);
}
-bool upb_byteregion_fetch(upb_byteregion *r, upb_status *s) {
+upb_bytesuccess_t upb_byteregion_fetch(upb_byteregion *r) {
uint64_t fetchable = upb_byteregion_remaining(r, r->fetch);
- if (fetchable == 0) {
- upb_status_seteof(s);
- return false;
- }
- uint64_t num = upb_bytesrc_fetch(r->bytesrc, r->fetch, s);
- if (num == 0) return false;
- r->fetch += UPB_MIN(num, fetchable);
- return true;
+ if (fetchable == 0) return UPB_BYTE_EOF;
+ size_t fetched;
+ upb_bytesuccess_t ret = upb_bytesrc_fetch(r->bytesrc, r->fetch, &fetched);
+ if (ret != UPB_BYTE_OK) return false;
+ r->fetch += UPB_MIN(fetched, fetchable);
+ return UPB_BYTE_OK;
}
@@ -93,10 +91,10 @@ static upb_stdio_buf *upb_stdio_findbuf(const upb_stdio *s, uint64_t ofs) {
static upb_stdio_buf *upb_stdio_rotatebufs(upb_stdio *s) {
upb_stdio_buf **reuse = NULL; // XXX
- uint32_t num_reused = 0, num_inuse = 0;
+ int num_reused = 0, num_inuse = 0;
// Could sweep only a subset of bufs if this was a hotspot.
- for (uint32_t i = 0; i < s->nbuf; i++) {
+ for (int i = 0; i < s->nbuf; i++) {
upb_stdio_buf *buf = s->bufs[i];
if (buf->refcount > 0) {
s->bufs[num_inuse++] = buf;
@@ -120,28 +118,37 @@ void upb_stdio_discard(void *src, uint64_t ofs) {
(void)ofs;
}
-uint32_t upb_stdio_fetch(void *src, uint64_t ofs, upb_status *s) {
+upb_bytesuccess_t upb_stdio_fetch(void *src, uint64_t ofs, size_t *bytes_read) {
(void)ofs;
upb_stdio *stdio = (upb_stdio*)src;
upb_stdio_buf *buf = upb_stdio_rotatebufs(stdio);
- uint32_t read = fread(&buf->data, 1, BUF_SIZE, stdio->file);
- buf->len = read;
- if(read < (uint32_t)BUF_SIZE) {
+retry:
+ *bytes_read = fread(&buf->data, 1, BUF_SIZE, stdio->file);
+ buf->len = *bytes_read;
+ if (*bytes_read < (size_t)BUF_SIZE) {
// Error or EOF.
- if(feof(stdio->file)) {
- upb_status_seteof(s);
- return read;
+ if (feof(stdio->file)) {
+ upb_status_seteof(&stdio->src.status);
+ return UPB_BYTE_EOF;
}
- if(ferror(stdio->file)) {
- upb_status_fromerrno(s);
- return 0;
+ if (ferror(stdio->file)) {
+#ifdef EINTR
+ // If we encounter a client who doesn't want to retry EINTR, we can easily
+ // add a boolean property of the stdio that controls this behavior.
+ if (errno == EINTR) {
+ clearerr(stdio->file);
+ goto retry;
+ }
+#endif
+ upb_status_fromerrno(&stdio->src.status);
+ return upb_errno_is_wouldblock() ? UPB_BYTE_WOULDBLOCK : UPB_BYTE_ERROR;
}
assert(false);
}
- return buf->ofs + buf->len;
+ return UPB_BYTE_OK;
}
-void upb_stdio_read(const void *src, uint64_t ofs, uint32_t len, char *dst) {
+void upb_stdio_copy(const void *src, uint64_t ofs, size_t len, char *dst) {
upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
ofs -= buf->ofs;
memcpy(dst, buf->data + ofs, BUF_SIZE - ofs);
@@ -149,14 +156,14 @@ void upb_stdio_read(const void *src, uint64_t ofs, uint32_t len, char *dst) {
dst += (BUF_SIZE - ofs);
while (len > 0) {
++buf;
- uint32_t bytes = UPB_MIN(len, BUF_SIZE);
+ size_t bytes = UPB_MIN(len, BUF_SIZE);
memcpy(dst, buf->data, bytes);
len -= bytes;
dst += bytes;
}
}
-const char *upb_stdio_getptr(const void *src, uint64_t ofs, uint32_t *len) {
+const char *upb_stdio_getptr(const void *src, uint64_t ofs, size_t *len) {
upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
ofs -= buf->ofs;
*len = BUF_SIZE - ofs;
@@ -168,7 +175,7 @@ upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *s
upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink));
upb_strlen_t len = upb_string_len(str);
upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file);
- if(written < len) {
+ if (written < len) {
upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream.");
return -1;
}
@@ -191,7 +198,7 @@ void upb_stdio_init(upb_stdio *stdio) {
static upb_bytesrc_vtbl bytesrc_vtbl = {
&upb_stdio_fetch,
&upb_stdio_discard,
- &upb_stdio_read,
+ &upb_stdio_copy,
&upb_stdio_getptr,
};
upb_bytesrc_init(&stdio->src, &bytesrc_vtbl);
@@ -226,20 +233,25 @@ void upb_stdio_uninit(upb_stdio *stdio) {
stdio->file = NULL;
}
-upb_byteregion* upb_stdio_allbytes(upb_stdio *stdio) { return &stdio->byteregion; }
+upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->src; }
upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; }
/* upb_stringsrc **************************************************************/
-uint32_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) {
+upb_bytesuccess_t upb_stringsrc_fetch(void *_src, uint64_t ofs, size_t *read) {
upb_stringsrc *src = _src;
- upb_status_seteof(s);
- return src->len - ofs;
+ assert(ofs < src->len);
+ if (ofs == src->len) {
+ upb_status_seteof(&src->bytesrc.status);
+ return UPB_BYTE_EOF;
+ }
+ *read = src->len - ofs;
+ return UPB_BYTE_OK;
}
-void upb_stringsrc_read(const void *_src, uint64_t ofs,
- uint32_t len, char *dst) {
+void upb_stringsrc_copy(const void *_src, uint64_t ofs,
+ size_t len, char *dst) {
const upb_stringsrc *src = _src;
assert(ofs + len <= src->len);
memcpy(dst, src->str + ofs, len);
@@ -250,7 +262,7 @@ void upb_stringsrc_discard(void *src, uint64_t ofs) {
(void)ofs;
}
-const char *upb_stringsrc_getptr(const void *_s, uint64_t ofs, uint32_t *len) {
+const char *upb_stringsrc_getptr(const void *_s, uint64_t ofs, size_t *len) {
const upb_stringsrc *src = _s;
*len = src->len - ofs;
return src->str + ofs;
@@ -260,7 +272,7 @@ void upb_stringsrc_init(upb_stringsrc *s) {
static upb_bytesrc_vtbl vtbl = {
&upb_stringsrc_fetch,
&upb_stringsrc_discard,
- &upb_stringsrc_read,
+ &upb_stringsrc_copy,
&upb_stringsrc_getptr,
};
upb_bytesrc_init(&s->bytesrc, &vtbl);
@@ -269,7 +281,7 @@ void upb_stringsrc_init(upb_stringsrc *s) {
s->byteregion.toplevel = true;
}
-void upb_stringsrc_reset(upb_stringsrc *s, const char *str, uint32_t len) {
+void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len) {
s->str = str;
s->len = len;
s->byteregion.start = 0;
@@ -280,18 +292,13 @@ void upb_stringsrc_reset(upb_stringsrc *s, const char *str, uint32_t len) {
void upb_stringsrc_uninit(upb_stringsrc *s) { (void)s; }
-upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
- return &s->bytesrc;
-}
-
-
/* upb_stringsink *************************************************************/
void upb_stringsink_uninit(upb_stringsink *s) {
free(s->str);
}
-void upb_stringsink_reset(upb_stringsink *s, char *str, uint32_t size) {
+void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size) {
free(s->str);
s->str = str;
s->len = 0;
diff --git a/upb/bytestream.h b/upb/bytestream.h
index 3b339f1..409ae80 100644
--- a/upb/bytestream.h
+++ b/upb/bytestream.h
@@ -63,11 +63,17 @@
// +------------------------
// | nondelimited region Z <-- won't return EOF until data source hits EOF.
// +------------------------
+//
+// TODO: if 64-bit math for stream offsets is a performance issue on
+// non-64-bit machines, we could introduce a upb_off_t typedef that can be
+// defined as a 32-bit type for applications that don't need to handle
+// streams longer than 4GB.
#ifndef UPB_BYTESTREAM_H
#define UPB_BYTESTREAM_H
+#include <errno.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
@@ -79,6 +85,12 @@
extern "C" {
#endif
+typedef enum {
+ UPB_BYTE_OK = UPB_OK,
+ UPB_BYTE_WOULDBLOCK = UPB_SUSPENDED,
+ UPB_BYTE_ERROR = UPB_ERROR,
+ UPB_BYTE_EOF
+} upb_bytesuccess_t;
/* upb_bytesrc ****************************************************************/
@@ -90,10 +102,10 @@ extern "C" {
// upb_bytesrc is a virtual base class with implementations that get data from
// eg. a string, a cord, a file descriptor, a FILE*, etc.
-typedef uint32_t upb_bytesrc_fetch_func(void*, uint64_t, upb_status*);
+typedef upb_bytesuccess_t upb_bytesrc_fetch_func(void*, uint64_t, size_t*);
typedef void upb_bytesrc_discard_func(void*, uint64_t);
-typedef void upb_bytesrc_copy_func(const void*, uint64_t, uint32_t, char*);
-typedef const char *upb_bytesrc_getptr_func(const void*, uint64_t, uint32_t*);
+typedef void upb_bytesrc_copy_func(const void*, uint64_t, size_t, char*);
+typedef const char *upb_bytesrc_getptr_func(const void*, uint64_t, size_t*);
typedef struct _upb_bytesrc_vtbl {
upb_bytesrc_fetch_func *fetch;
upb_bytesrc_discard_func *discard;
@@ -102,21 +114,27 @@ typedef struct _upb_bytesrc_vtbl {
} upb_bytesrc_vtbl;
typedef struct {
- upb_bytesrc_vtbl *vtbl;
+ const upb_bytesrc_vtbl *vtbl;
+ upb_status status;
} upb_bytesrc;
-INLINE void upb_bytesrc_init(upb_bytesrc *src, upb_bytesrc_vtbl *vtbl) {
+INLINE void upb_bytesrc_init(upb_bytesrc *src, const upb_bytesrc_vtbl *vtbl) {
src->vtbl = vtbl;
+ upb_status_init(&src->status);
+}
+
+INLINE void upb_bytesrc_uninit(upb_bytesrc *src) {
+ upb_status_uninit(&src->status);
}
-// Fetches at least one byte starting at ofs, returning the actual number of
-// bytes fetched (or 0 on EOF or error: see *s for details). Some bytesrc's
-// may set EOF on *s after a successful read if no further data is available,
-// but not all bytesrc's support this. It is valid for bytes to be fetched
-// multiple times, as long as the bytes have not been previously discarded.
-INLINE uint32_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs,
- upb_status *s) {
- return src->vtbl->fetch(src, ofs, s);
+// Fetches at least one byte starting at ofs, returning the success or failure
+// of the operation. If UPB_BYTE_OK is returned, *read indicates the number of
+// of bytes successfully fetched; any error or EOF status will be reflected in
+// upb_bytesrc_status(). It is valid for bytes to be fetched multiple times,
+// as long as the bytes have not been previously discarded.
+INLINE upb_bytesuccess_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs,
+ size_t *read) {
+ return src->vtbl->fetch(src, ofs, read);
}
// Discards all data prior to ofs (except data that is pinned, if pinning
@@ -127,7 +145,7 @@ INLINE void upb_bytesrc_discard(upb_bytesrc *src, uint64_t ofs) {
// Copies "len" bytes of data from ofs to "dst", which must be at least "len"
// bytes long. The given region must not be discarded.
-INLINE void upb_bytesrc_copy(const upb_bytesrc *src, uint64_t ofs, uint32_t len,
+INLINE void upb_bytesrc_copy(const upb_bytesrc *src, uint64_t ofs, size_t len,
char *dst) {
src->vtbl->copy(src, ofs, len, dst);
}
@@ -138,7 +156,7 @@ INLINE void upb_bytesrc_copy(const upb_bytesrc *src, uint64_t ofs, uint32_t len,
// part of the returned buffer is discarded, only the non-discarded bytes
// remain valid).
INLINE const char *upb_bytesrc_getptr(const upb_bytesrc *src, uint64_t ofs,
- uint32_t *len) {
+ size_t *len) {
return src->vtbl->getptr(src, ofs, len);
}
@@ -148,14 +166,14 @@ INLINE const char *upb_bytesrc_getptr(const upb_bytesrc *src, uint64_t ofs,
// // is guaranteed that the region will not be discarded (nor will the bytesrc
// // be destroyed) until the region is unpinned. However, not all bytesrc's
// // support pinning; a false return indicates that a pin was not possible.
-// INLINE bool upb_bytesrc_pin(upb_bytesrc *src, uint64_t ofs, uint32_t len) {
+// INLINE bool upb_bytesrc_pin(upb_bytesrc *src, uint64_t ofs, size_t len) {
// return src->vtbl->refregion(src, ofs, len);
// }
//
// // Releases some number of pinned bytes from the beginning of a pinned
// // region (which may be fewer than the total number of bytes pinned).
-// INLINE void upb_bytesrc_unpin(upb_bytesrc *src, uint64_t ofs, uint32_t len,
-// uint32_t bytes_to_release) {
+// INLINE void upb_bytesrc_unpin(upb_bytesrc *src, uint64_t ofs, size_t len,
+// size_t bytes_to_release) {
// src->vtbl->unpin(src, ofs, len);
// }
//
@@ -173,7 +191,7 @@ typedef struct _upb_byteregion {
uint64_t fetch;
uint64_t end; // UPB_NONDELIMITED if nondelimited.
upb_bytesrc *bytesrc;
- bool toplevel; // If true, discards hit the underlying byteregion.
+ bool toplevel; // If true, discards hit the underlying bytesrc.
} upb_byteregion;
// Initializes a byteregion. Its initial value will be empty. No methods may
@@ -225,14 +243,17 @@ void upb_byteregion_release(upb_byteregion *r);
// Attempts to fetch more data, extending the fetched range of this byteregion.
// Returns true if the fetched region was extended by at least one byte, false
// on EOF or error (see *s for details).
-bool upb_byteregion_fetch(upb_byteregion *r, upb_status *s);
+upb_bytesuccess_t upb_byteregion_fetch(upb_byteregion *r);
-// Fetches all remaining data for "r", returning false if the operation failed
-// (see "*s" for details). May only be used on delimited byteregions.
-INLINE bool upb_byteregion_fetchall(upb_byteregion *r, upb_status *s) {
+// Fetches all remaining data for "r", returning the success of the operation
+// May only be used on delimited byteregions.
+INLINE upb_bytesuccess_t upb_byteregion_fetchall(upb_byteregion *r) {
assert(upb_byteregion_len(r) != UPB_NONDELIMITED);
- while (upb_byteregion_fetch(r, s)) ; // Empty body.
- return upb_eof(s);
+ upb_bytesuccess_t ret;
+ do {
+ ret = upb_byteregion_fetch(r);
+ } while (ret == UPB_BYTE_OK);
+ return ret == UPB_BYTE_EOF ? UPB_BYTE_OK : ret;
}
// Discards bytes from the byteregion up until ofs (which must be greater or
@@ -243,13 +264,14 @@ INLINE void upb_byteregion_discard(upb_byteregion *r, uint64_t ofs) {
assert(ofs >= upb_byteregion_discardofs(r));
assert(ofs <= upb_byteregion_endofs(r));
r->discard = ofs;
+ if (ofs > r->fetch) r->fetch = ofs;
if (r->toplevel) upb_bytesrc_discard(r->bytesrc, ofs);
}
// Copies "len" bytes of data into "dst", starting at ofs. The specified
// region must be available.
INLINE void upb_byteregion_copy(const upb_byteregion *r, uint64_t ofs,
- uint32_t len, char *dst) {
+ size_t len, char *dst) {
assert(ofs >= upb_byteregion_discardofs(r));
assert(len <= upb_byteregion_available(r, ofs));
upb_bytesrc_copy(r->bytesrc, ofs, len, dst);
@@ -268,7 +290,7 @@ INLINE void upb_byteregion_copyall(const upb_byteregion *r, char *dst) {
// or when the bytes are discarded. If the byteregion is not currently pinned,
// the pointer is only valid for the lifetime of the parent byteregion.
INLINE const char *upb_byteregion_getptr(const upb_byteregion *r,
- uint64_t ofs, uint32_t *len) {
+ uint64_t ofs, size_t *len) {
assert(ofs >= upb_byteregion_discardofs(r));
const char *ret = upb_bytesrc_getptr(r->bytesrc, ofs, len);
*len = UPB_MIN(*len, upb_byteregion_available(r, ofs));
@@ -295,7 +317,7 @@ INLINE const char *upb_byteregion_getptr(const upb_byteregion *r,
// The string data in the returned region is guaranteed to be contiguous and
// NULL-terminated.
upb_byteregion *upb_byteregion_new(const void *str);
-upb_byteregion *upb_byteregion_newl(const void *str, uint32_t len);
+upb_byteregion *upb_byteregion_newl(const void *str, size_t len);
// May *only* be called on a byteregion created with upb_byteregion_new[l]()!
void upb_byteregion_free(upb_byteregion *r);
@@ -399,7 +421,7 @@ INLINE void upb_bytesink_rewind(upb_bytesink *sink, uint64_t offset) {
typedef struct {
uint64_t ofs;
- uint32_t len;
+ size_t len;
uint32_t refcount;
char data[];
} upb_stdio_buf;
@@ -414,7 +436,6 @@ typedef struct {
bool should_close;
upb_stdio_buf **bufs;
uint32_t nbuf, szbuf;
- upb_byteregion byteregion;
} upb_stdio;
void upb_stdio_init(upb_stdio *stdio);
@@ -433,7 +454,7 @@ void upb_stdio_reset(upb_stdio *stdio, FILE *file);
void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
upb_status *s);
-upb_byteregion *upb_stdio_allbytes(upb_stdio *stdio);
+upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio);
upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio);
@@ -444,7 +465,7 @@ upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio);
typedef struct {
upb_bytesrc bytesrc;
const char *str;
- uint32_t len;
+ size_t len;
upb_byteregion byteregion;
} upb_stringsrc;
@@ -454,7 +475,11 @@ void upb_stringsrc_uninit(upb_stringsrc *s);
// Resets the stringsrc to a state where it will vend the given string. The
// string data must be valid until the stringsrc is reset again or destroyed.
-void upb_stringsrc_reset(upb_stringsrc *s, const char *str, uint32_t len);
+void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len);
+
+INLINE upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
+ return &s->bytesrc;
+}
// Returns the top-level upb_byteregion* for this stringsrc. Invalidated when
// the stringsrc is reset.
@@ -468,7 +493,7 @@ INLINE upb_byteregion *upb_stringsrc_allbytes(upb_stringsrc *s) {
struct _upb_stringsink {
upb_bytesink bytesink;
char *str;
- uint32_t len, size;
+ size_t len, size;
};
typedef struct _upb_stringsink upb_stringsink;
@@ -478,12 +503,12 @@ void upb_stringsink_uninit(upb_stringsink *s);
// Resets the sink's string to "str", which the sink takes ownership of.
// "str" may be NULL, which will make the sink allocate a new string.
-void upb_stringsink_reset(upb_stringsink *s, char *str, uint32_t len);
+void upb_stringsink_reset(upb_stringsink *s, char *str, size_t len);
// Releases ownership of the returned string (which is "len" bytes long) and
// resets the internal string to be empty again (as if reset were called with
// NULL).
-const char *upb_stringsink_release(upb_stringsink *s, uint32_t *len);
+const char *upb_stringsink_release(upb_stringsink *s, size_t *len);
// Returns the upb_bytesink* for this stringsrc. Invalidated by reset above.
upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s);
diff --git a/upb/def.c b/upb/def.c
index 13418c6..246e9bb 100644
--- a/upb/def.c
+++ b/upb/def.c
@@ -334,7 +334,7 @@ static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) {
if (upb_byteregion_len(bytes) == 0) {
upb_value_setint32(&f->defaultval, e->defaultval);
} else {
- uint32_t len;
+ size_t len;
// ptr is guaranteed to be NULL-terminated because the byteregion was
// created with upb_byteregion_newl().
const char *ptr = upb_byteregion_getptr(bytes, 0, &len);
diff --git a/upb/handlers.c b/upb/handlers.c
index 0af09ef..d1b68ad 100644
--- a/upb/handlers.c
+++ b/upb/handlers.c
@@ -13,7 +13,7 @@
static upb_mhandlers *upb_mhandlers_new() {
upb_mhandlers *m = malloc(sizeof(*m));
- upb_inttable_init(&m->fieldtab, 8, sizeof(upb_fhandlers));
+ upb_inttable_init(&m->fieldtab, 8, sizeof(upb_itofhandlers_ent));
m->startmsg = NULL;
m->endmsg = NULL;
m->is_group = false;
@@ -26,21 +26,21 @@ static upb_mhandlers *upb_mhandlers_new() {
static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
upb_fieldtype_t type,
bool repeated) {
- uint32_t tag = n << 3 | upb_types[type].native_wire_type;
- upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, tag);
- if (f) abort();
- upb_fhandlers new_f = {false, type, repeated,
- repeated && upb_isprimitivetype(type), UPB_ATOMIC_INIT(0),
+ upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, n);
+ // TODO: design/refine the API for changing the set of fields or modifying
+ // existing handlers.
+ if (e) return NULL;
+ upb_fhandlers new_f = {type, repeated, UPB_ATOMIC_INIT(0),
n, -1, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL,
#ifdef UPB_USE_JIT_X64
0, 0, 0,
#endif
NULL};
- upb_inttable_insert(&m->fieldtab, tag, &new_f);
- f = upb_inttable_lookup(&m->fieldtab, tag);
- assert(f);
- assert(f->type == type);
- return f;
+ upb_fhandlers *ptr = malloc(sizeof(*ptr));
+ memcpy(ptr, &new_f, sizeof(upb_fhandlers));
+ upb_itofhandlers_ent ent = {false, ptr};
+ upb_inttable_insert(&m->fieldtab, n, &ent);
+ return ptr;
}
upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
@@ -57,6 +57,7 @@ upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n,
assert(type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP));
assert(subm);
upb_fhandlers *f = _upb_mhandlers_newfhandlers(m, n, type, repeated);
+ if (!f) return NULL;
f->submsg = subm;
if (type == UPB_TYPE(GROUP))
_upb_mhandlers_newfhandlers(subm, n, UPB_TYPE_ENDGROUP, false);
@@ -82,6 +83,12 @@ void upb_handlers_unref(upb_handlers *h) {
if (upb_atomic_unref(&h->refcount)) {
for (int i = 0; i < h->msgs_len; i++) {
upb_mhandlers *mh = h->msgs[i];
+ for(upb_inttable_iter j = upb_inttable_begin(&mh->fieldtab);
+ !upb_inttable_done(j);
+ j = upb_inttable_next(&mh->fieldtab, j)) {
+ upb_itofhandlers_ent *e = upb_inttable_iter_value(j);
+ free(e->f);
+ }
upb_inttable_free(&mh->fieldtab);
#ifdef UPB_USE_JIT_X64
free(mh->tablearray);
@@ -154,41 +161,24 @@ upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m,
/* upb_dispatcher *************************************************************/
-static upb_fhandlers toplevel_f = {
- false, UPB_TYPE(GROUP), false, false, UPB_ATOMIC_INIT(0), 0,
- -1, NULL, NULL, // submsg
-#ifdef NDEBUG
- {{0}},
-#else
- {{0}, -1},
-#endif
- NULL, NULL, NULL, NULL, NULL,
-#ifdef UPB_USE_JIT_X64
- 0, 0, 0,
-#endif
- NULL};
-
-void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
- upb_skip_handler *skip, upb_exit_handler *exit,
+void upb_dispatcher_init(upb_dispatcher *d, upb_status *status,
+ upb_exit_handler UPB_NORETURN *exit,
void *srcclosure) {
- d->handlers = h;
- upb_handlers_ref(h);
- for (int i = 0; i < h->msgs_len; i++) {
- upb_mhandlers *m = h->msgs[i];
- upb_inttable_compact(&m->fieldtab);
- }
- d->stack[0].f = &toplevel_f;
+ d->stack[0].f = NULL; // Should never be read.
d->limit = &d->stack[UPB_MAX_NESTING];
- d->skip = skip;
- d->exit = exit;
+ d->exitjmp = exit;
d->srcclosure = srcclosure;
d->top_is_implicit = false;
- upb_status_init(&d->status);
+ d->msgent = NULL;
+ d->top = NULL;
+ d->toplevel_msgent = NULL;
+ d->status = status;
}
-upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure) {
- d->msgent = d->handlers->msgs[0];
- d->dispatch_table = &d->msgent->fieldtab;
+upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure,
+ upb_mhandlers *top) {
+ d->msgent = top;
+ d->toplevel_msgent = top;
d->top = d->stack;
d->top->closure = closure;
d->top->is_sequence = false;
@@ -197,46 +187,32 @@ upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure) {
}
void upb_dispatcher_uninit(upb_dispatcher *d) {
- upb_handlers_unref(d->handlers);
- upb_status_uninit(&d->status);
}
void upb_dispatch_startmsg(upb_dispatcher *d) {
upb_flow_t flow = UPB_CONTINUE;
if (d->msgent->startmsg) d->msgent->startmsg(d->top->closure);
- if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
+ if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
}
void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) {
assert(d->top == d->stack);
- if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status);
+ if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, d->status);
// TODO: should we avoid this copy by passing client's status obj to cbs?
- upb_status_copy(status, &d->status);
-}
-
-void indent(upb_dispatcher *d) {
- for (int i = 0; i < (d->top - d->stack); i++) fprintf(stderr, " ");
-}
-
-void indentm1(upb_dispatcher *d) {
- for (int i = 0; i < (d->top - d->stack - 1); i++) fprintf(stderr, " ");
+ upb_status_copy(status, d->status);
}
upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
upb_fhandlers *f) {
- //indent(d);
- //fprintf(stderr, "START SEQ: %d\n", f->number);
- if((d->top+1) >= d->limit) {
- upb_status_seterrliteral(&d->status, "Nesting too deep.");
- _upb_dispatcher_unwind(d, UPB_BREAK);
- return d->top; // Dummy.
+ if (d->top + 1 >= d->limit) {
+ upb_status_seterrliteral(d->status, "Nesting too deep.");
+ _upb_dispatcher_abortjmp(d);
}
upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
if (f->startseq) sflow = f->startseq(d->top->closure, f->fval);
if (sflow.flow != UPB_CONTINUE) {
- _upb_dispatcher_unwind(d, sflow.flow);
- return d->top; // Dummy.
+ _upb_dispatcher_abortjmp(d);
}
++d->top;
@@ -248,8 +224,6 @@ upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
}
upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
- //indentm1(d);
- //fprintf(stderr, "END SEQ\n");
assert(d->top > d->stack);
assert(d->top->is_sequence);
upb_fhandlers *f = d->top->f;
@@ -257,30 +231,23 @@ upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
upb_flow_t flow = UPB_CONTINUE;
if (f->endseq) flow = f->endseq(d->top->closure, f->fval);
if (flow != UPB_CONTINUE) {
- printf("YO, UNWINDING!\n");
- _upb_dispatcher_unwind(d, flow);
- return d->top; // Dummy.
+ _upb_dispatcher_abortjmp(d);
}
- d->msgent = d->top->f->submsg ? d->top->f->submsg : d->handlers->msgs[0];
- d->dispatch_table = &d->msgent->fieldtab;
+ d->msgent = d->top->f ? d->top->f->submsg : d->toplevel_msgent;
return d->top;
}
upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
upb_fhandlers *f) {
- //indent(d);
- //fprintf(stderr, "START SUBMSG: %d\n", f->number);
- if((d->top+1) >= d->limit) {
- upb_status_seterrliteral(&d->status, "Nesting too deep.");
- _upb_dispatcher_unwind(d, UPB_BREAK);
- return d->top; // Dummy.
+ if (d->top + 1 >= d->limit) {
+ upb_status_seterrliteral(d->status, "Nesting too deep.");
+ _upb_dispatcher_abortjmp(d);
}
upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
if (f->startsubmsg) sflow = f->startsubmsg(d->top->closure, f->fval);
if (sflow.flow != UPB_CONTINUE) {
- _upb_dispatcher_unwind(d, sflow.flow);
- return d->top; // Dummy.
+ _upb_dispatcher_abortjmp(d);
}
++d->top;
@@ -289,24 +256,20 @@ upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
d->top->is_packed = false;
d->top->closure = sflow.closure;
d->msgent = f->submsg;
- d->dispatch_table = &d->msgent->fieldtab;
upb_dispatch_startmsg(d);
return d->top;
}
upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) {
- //indentm1(d);
- //fprintf(stderr, "END SUBMSG\n");
assert(d->top > d->stack);
assert(!d->top->is_sequence);
upb_fhandlers *f = d->top->f;
- if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status);
+ if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, d->status);
d->msgent = d->top->f->msg;
- d->dispatch_table = &d->msgent->fieldtab;
--d->top;
upb_flow_t flow = UPB_CONTINUE;
if (f->endsubmsg) f->endsubmsg(d->top->closure, f->fval);
- if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
+ if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
return d->top;
}
@@ -320,14 +283,7 @@ bool upb_dispatcher_islegalend(upb_dispatcher *d) {
return false;
}
-void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow) {
- upb_dispatcher_frame *frame = d->top;
- while (1) {
- frame->f->submsg->endmsg(frame->closure, &d->status);
- frame->f->endsubmsg(frame->closure, frame->f->fval);
- --frame;
- if (frame < d->stack) { d->exit(d->srcclosure); return; }
- d->top = frame;
- if (flow == UPB_SKIPSUBMSG) return;
- }
+void _upb_dispatcher_abortjmp(upb_dispatcher *d) {
+ d->exitjmp(d->srcclosure);
+ assert(false); // Never returns.
}
diff --git a/upb/handlers.h b/upb/handlers.h
index e17a726..9ed02c1 100644
--- a/upb/handlers.h
+++ b/upb/handlers.h
@@ -132,13 +132,15 @@ typedef upb_flow_t (upb_endfield_handler)(void *closure, upb_value fval);
// A upb_fhandlers object represents the set of handlers associated with one
// specific message field.
+//
+// TODO: remove upb_decoder-specific fields from this, and instead have
+// upb_decoderplan make a deep copy of the whole graph with its own fields
+// added.
struct _upb_decoder;
struct _upb_mhandlers;
typedef struct _upb_fieldent {
- bool junk;
upb_fieldtype_t type;
bool repeated;
- bool is_repeated_primitive;
upb_atomic_t refcount;
uint32_t number;
int32_t valuehasbit;
@@ -158,6 +160,11 @@ typedef struct _upb_fieldent {
void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
} upb_fhandlers;
+typedef struct {
+ bool junk; // Stolen by table impl; see table.h for details.
+ upb_fhandlers *f;
+} upb_itofhandlers_ent;
+
// fhandlers are created as part of a upb_handlers instance, but can be ref'd
// and unref'd to prolong the life of the handlers.
void upb_fhandlers_ref(upb_fhandlers *m);
@@ -194,16 +201,18 @@ typedef struct _upb_mhandlers {
upb_inttable fieldtab; // Maps field number -> upb_fhandlers.
bool is_group;
#ifdef UPB_USE_JIT_X64
- uint32_t jit_startmsg_pclabel;
- uint32_t jit_endofbuf_pclabel;
- uint32_t jit_endofmsg_pclabel;
- uint32_t jit_dyndispatch_pclabel;
- uint32_t jit_unknownfield_pclabel;
- int32_t jit_parent_field_done_pclabel;
+ // Used inside the JIT to track labels (jmp targets) in the generated code.
+ uint32_t jit_startmsg_pclabel; // Starting a parse of this (sub-)message.
+ uint32_t jit_endofbuf_pclabel; // ptr hitend, but delim_end or jit_end?
+ uint32_t jit_endofmsg_pclabel; // Done parsing this (sub-)message.
+ uint32_t jit_dyndispatch_pclabel; // Dispatch by table lookup.
+ uint32_t jit_unknownfield_pclabel; // Parsed an unknown field.
uint32_t max_field_number;
// Currently keyed on field number. Could also try keying it
// on encoded or decoded tag, or on encoded field number.
void **tablearray;
+ // Pointer to the JIT code for parsing this message.
+ void *jit_func;
#endif
} upb_mhandlers;
@@ -316,62 +325,47 @@ INLINE upb_mhandlers *upb_handlers_reghandlerset(upb_handlers *h, const upb_msgd
typedef struct {
upb_fhandlers *f;
void *closure;
-
- // Members to use as the data source requires.
- void *srcclosure;
uint64_t end_ofs;
- uint16_t msgindex;
- uint16_t fieldindex;
-
bool is_sequence; // frame represents seq or submsg? (f might be both).
bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX
// (strings aren't pushed).
} upb_dispatcher_frame;
-// Called when some of the input needs to be skipped. All frames from the
-// current top to "bottom", inclusive, should be skipped.
-typedef void upb_skip_handler(void *, upb_dispatcher_frame *bottom);
typedef void upb_exit_handler(void *);
typedef struct {
upb_dispatcher_frame *top, *limit;
- upb_handlers *handlers;
-
// Msg and dispatch table for the current level.
upb_mhandlers *msgent;
- upb_inttable *dispatch_table;
- upb_skip_handler *skip;
- upb_exit_handler *exit;
+ upb_mhandlers *toplevel_msgent;
+ upb_exit_handler UPB_NORETURN *exitjmp;
void *srcclosure;
bool top_is_implicit;
// Stack.
- upb_status status;
+ upb_status *status;
upb_dispatcher_frame stack[UPB_MAX_NESTING];
} upb_dispatcher;
-void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
- upb_skip_handler *skip, upb_exit_handler *exit,
- void *closure);
-upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure);
+// Caller retains ownership of the status object.
+void upb_dispatcher_init(upb_dispatcher *d, upb_status *status,
+ upb_exit_handler UPB_NORETURN *exit, void *closure);
+upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure,
+ upb_mhandlers *top_msg);
void upb_dispatcher_uninit(upb_dispatcher *d);
// Tests whether the message could legally end here (either the stack is empty
// or the only open stack frame is implicit).
bool upb_dispatcher_islegalend(upb_dispatcher *d);
-// Looks up a field by number for the current message.
-INLINE upb_fhandlers *upb_dispatcher_lookup(upb_dispatcher *d, uint32_t n) {
- return (upb_fhandlers*)upb_inttable_fastlookup(
- d->dispatch_table, n, sizeof(upb_fhandlers));
-}
-
-void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow);
+// Unwinds one or more stack frames based on the given flow constant that was
+// just returned from a handler. Calls end handlers as appropriate.
+void _upb_dispatcher_abortjmp(upb_dispatcher *d) UPB_NORETURN;
INLINE void _upb_dispatcher_sethas(void *_p, int32_t hasbit) {
char *p = (char*)_p;
- if (hasbit >= 0) p[hasbit / 8] |= (1 << (hasbit % 8));
+ if (hasbit >= 0) p[(uint32_t)hasbit / 8] |= (1 << ((uint32_t)hasbit % 8));
}
// Dispatch functions -- call the user handler and handle errors.
@@ -380,11 +374,12 @@ INLINE void upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f,
upb_flow_t flow = UPB_CONTINUE;
if (f->value) flow = f->value(d->top->closure, f->fval, val);
_upb_dispatcher_sethas(d->top->closure, f->valuehasbit);
- if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
+ if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
}
void upb_dispatch_startmsg(upb_dispatcher *d);
void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status);
-upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, upb_fhandlers *f);
+upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
+ upb_fhandlers *f);
upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d);
upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_fhandlers *f);
upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d);
diff --git a/upb/msg.c b/upb/msg.c
index 78309cf..77521e5 100644
--- a/upb/msg.c
+++ b/upb/msg.c
@@ -86,14 +86,16 @@ void upb_stdmsg_sethas(void *_m, upb_value fval) {
assert(_m != NULL);
char *m = _m;
const upb_fielddef *f = upb_value_getfielddef(fval);
- if (f->hasbit >= 0) m[f->hasbit / 8] |= (1 << (f->hasbit % 8));
+ if (f->hasbit >= 0)
+ m[(uint32_t)f->hasbit / 8] |= (1 << ((uint32_t)f->hasbit % 8));
}
bool upb_stdmsg_has(const void *_m, upb_value fval) {
assert(_m != NULL);
const char *m = _m;
const upb_fielddef *f = upb_value_getfielddef(fval);
- return f->hasbit < 0 || (m[f->hasbit / 8] & (1 << (f->hasbit % 8)));
+ return f->hasbit < 0 ||
+ (m[(uint32_t)f->hasbit / 8] & (1 << ((uint32_t)f->hasbit % 8)));
}
#define UPB_ACCESSORS(type, ctype) \
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index ae54e47..1b5fc17 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -13,14 +13,95 @@
#include "upb/pb/decoder.h"
#include "upb/pb/varint.h"
+/* upb_decoderplan ************************************************************/
+
#ifdef UPB_USE_JIT_X64
-#define Dst_DECL upb_decoder *d
-#define Dst_REF (d->dynasm)
-#define Dst (d)
+// These defines are necessary for DynASM codegen.
+// See dynasm/dasm_proto.h for more info.
+#define Dst_DECL upb_decoderplan *plan
+#define Dst_REF (plan->dynasm)
+#define Dst (plan)
+
+// In debug mode, make DynASM do internal checks (must be defined before any
+// dasm header is included.
+#ifndef NDEBUG
+#define DASM_CHECKS
+#endif
+
#include "dynasm/dasm_proto.h"
#include "upb/pb/decoder_x64.h"
#endif
+typedef struct {
+ upb_fhandlers base;
+ void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
+#ifdef UPB_USE_JIT_X64
+ uint32_t jit_pclabel;
+ uint32_t jit_pclabel_notypecheck;
+#endif
+} upb_dplanfield;
+
+typedef struct {
+ upb_mhandlers base;
+#ifdef UPB_USE_JIT_X64
+ uint32_t jit_startmsg_pclabel;
+ uint32_t jit_endofbuf_pclabel;
+ uint32_t jit_endofmsg_pclabel;
+ uint32_t jit_dyndispatch_pclabel;
+ uint32_t jit_unknownfield_pclabel;
+ int32_t jit_parent_field_done_pclabel;
+ uint32_t max_field_number;
+ // Currently keyed on field number. Could also try keying it
+ // on encoded or decoded tag, or on encoded field number.
+ void **tablearray;
+#endif
+} upb_dplanmsg;
+
+static void *upb_decoderplan_fptrs[];
+
+void upb_decoderplan_initfhandlers(upb_fhandlers *f) {
+ f->decode = upb_decoderplan_fptrs[f->type];
+}
+
+upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) {
+ upb_decoderplan *p = malloc(sizeof(*p));
+ p->handlers = h;
+ upb_handlers_ref(h);
+ h->should_jit = allowjit;
+#ifdef UPB_USE_JIT_X64
+ p->jit_code = NULL;
+ if (allowjit) upb_decoderplan_makejit(p);
+#endif
+ // Set function pointers for each field's decode function.
+ for (int i = 0; i < h->msgs_len; i++) {
+ upb_mhandlers *m = h->msgs[i];
+ for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab);
+ !upb_inttable_done(i);
+ i = upb_inttable_next(&m->fieldtab, i)) {
+ upb_itofhandlers_ent *e = upb_inttable_iter_value(i);
+ upb_fhandlers *f = e->f;
+ upb_decoderplan_initfhandlers(f);
+ }
+ }
+ return p;
+}
+
+void upb_decoderplan_unref(upb_decoderplan *p) {
+ // TODO: make truly refcounted.
+ upb_handlers_unref(p->handlers);
+#ifdef UPB_USE_JIT_X64
+ if (p->jit_code) upb_decoderplan_freejit(p);
+#endif
+ free(p);
+}
+
+bool upb_decoderplan_hasjitcode(upb_decoderplan *p) {
+ return p->jit_code != NULL;
+}
+
+
+/* upb_decoder ****************************************************************/
+
// It's unfortunate that we have to micro-manage the compiler this way,
// especially since this tuning is necessarily specific to one hardware
// configuration. But emperically on a Core i7, performance increases 30-50%
@@ -29,18 +110,17 @@
#define FORCEINLINE static __attribute__((always_inline))
#define NOINLINE static __attribute__((noinline))
-static void upb_decoder_exit(upb_decoder *d) {
+UPB_NORETURN static void upb_decoder_exitjmp(upb_decoder *d) {
// Resumable decoder would back out to completed_ptr (and possibly get a
// previous buffer).
siglongjmp(d->exitjmp, 1);
}
-static void upb_decoder_exit2(void *_d) {
- upb_decoder *d = _d;
- upb_decoder_exit(d);
+UPB_NORETURN static void upb_decoder_exitjmp2(void *d) {
+ upb_decoder_exitjmp(d);
}
-static void upb_decoder_abort(upb_decoder *d, const char *msg) {
- upb_status_seterrliteral(d->status, msg);
- upb_decoder_exit(d);
+UPB_NORETURN static void upb_decoder_abortjmp(upb_decoder *d, const char *msg) {
+ upb_status_seterrliteral(&d->status, msg);
+ upb_decoder_exitjmp(d);
}
/* Buffering ******************************************************************/
@@ -50,8 +130,12 @@ static void upb_decoder_abort(upb_decoder *d, const char *msg) {
// the next one. When we've committed our progress we discard any previous
// buffers' regions.
-static uint32_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
-static void upb_decoder_advance(upb_decoder *d, uint32_t len) {
+static size_t upb_decoder_bufleft(upb_decoder *d) {
+ assert(d->end >= d->ptr);
+ return d->end - d->ptr;
+}
+
+static void upb_decoder_advance(upb_decoder *d, size_t len) {
assert(upb_decoder_bufleft(d) >= len);
d->ptr += len;
}
@@ -66,29 +150,49 @@ uint64_t upb_decoder_bufendofs(upb_decoder *d) {
static void upb_decoder_setmsgend(upb_decoder *d) {
upb_dispatcher_frame *f = d->dispatcher.top;
- uint32_t delimlen = f->end_ofs - d->bufstart_ofs;
- uint32_t buflen = d->end - d->buf;
+ size_t delimlen = f->end_ofs - d->bufstart_ofs;
+ size_t buflen = d->end - d->buf;
d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ?
d->buf + delimlen : NULL; // NULL if not in this buf.
d->top_is_packed = f->is_packed;
+ d->dispatch_table = &d->dispatcher.msgent->fieldtab;
}
-static bool upb_trypullbuf(upb_decoder *d) {
- assert(upb_decoder_bufleft(d) == 0);
- d->bufstart_ofs = upb_decoder_offset(d);
+static void upb_decoder_skiptonewbuf(upb_decoder *d, uint64_t ofs) {
+ assert(ofs >= upb_decoder_offset(d));
+ if (ofs > upb_byteregion_endofs(d->input))
+ upb_decoder_abortjmp(d, "Unexpected EOF");
d->buf = NULL;
d->ptr = NULL;
d->end = NULL;
- if (upb_byteregion_available(d->input, upb_decoder_offset(d)) == 0 &&
- !upb_byteregion_fetch(d->input, d->status)) {
- if (upb_eof(d->status)) return false;
- upb_decoder_exit(d); // Non-EOF error.
+ d->delim_end = NULL;
+#ifdef UPB_USE_JIT_X64
+ d->jit_end = NULL;
+#endif
+ d->bufstart_ofs = ofs;
+}
+
+static bool upb_trypullbuf(upb_decoder *d) {
+ assert(upb_decoder_bufleft(d) == 0);
+ upb_decoder_skiptonewbuf(d, upb_decoder_offset(d));
+ if (upb_byteregion_available(d->input, d->bufstart_ofs) == 0) {
+ switch (upb_byteregion_fetch(d->input)) {
+ case UPB_BYTE_OK:
+ assert(upb_byteregion_available(d->input, d->bufstart_ofs) > 0);
+ break;
+ case UPB_BYTE_EOF: return false;
+ case UPB_BYTE_ERROR: upb_decoder_abortjmp(d, "I/O error in input");
+ // Decoder resuming is not yet supported.
+ case UPB_BYTE_WOULDBLOCK:
+ upb_decoder_abortjmp(d, "Input returned WOULDBLOCK");
+ }
}
- uint32_t len;
+ size_t len;
d->buf = upb_byteregion_getptr(d->input, d->bufstart_ofs, &len);
assert(len > 0);
d->ptr = d->buf;
d->end = d->buf + len;
+ upb_decoder_setmsgend(d);
#ifdef UPB_USE_JIT_X64
// If we start parsing a value, we can parse up to 20 bytes without
// having to bounds-check anything (2 10-byte varints). Since the
@@ -96,27 +200,29 @@ static bool upb_trypullbuf(upb_decoder *d) {
// JIT bails if there are not 20 bytes available.
d->jit_end = d->end - 20;
#endif
- upb_decoder_setmsgend(d);
+ assert(upb_decoder_bufleft(d) > 0);
return true;
}
static void upb_pullbuf(upb_decoder *d) {
- if (!upb_trypullbuf(d)) upb_decoder_abort(d, "Unexpected EOF");
+ if (!upb_trypullbuf(d)) upb_decoder_abortjmp(d, "Unexpected EOF");
}
-void upb_decoder_skipto(upb_decoder *d, uint64_t ofs) {
- if (ofs < upb_decoder_bufendofs(d)) {
+void upb_decoder_checkpoint(upb_decoder *d) {
+ upb_byteregion_discard(d->input, upb_decoder_offset(d));
+}
+
+void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) {
+ if (ofs <= upb_decoder_bufendofs(d)) {
upb_decoder_advance(d, ofs - upb_decoder_offset(d));
} else {
- d->buf = NULL;
- d->ptr = NULL;
- d->end = NULL;
- d->bufstart_ofs = ofs;
+ upb_decoder_skiptonewbuf(d, ofs);
}
+ upb_decoder_checkpoint(d);
}
-void upb_decoder_checkpoint(upb_decoder *d) {
- upb_byteregion_discard(d->input, upb_decoder_offset(d));
+void upb_decoder_discard(upb_decoder *d, size_t bytes) {
+ upb_decoder_discardto(d, upb_decoder_offset(d) + bytes);
}
@@ -126,15 +232,13 @@ NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) {
uint8_t byte = 0x80;
uint64_t u64 = 0;
int bitpos;
- const char *ptr = d->ptr;
for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
- if (upb_decoder_bufleft(d) == 0) {
- upb_pullbuf(d);
- ptr = d->ptr;
- }
- u64 |= ((uint64_t)(byte = *ptr++) & 0x7F) << bitpos;
+ if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d);
+ u64 |= ((uint64_t)(byte = *d->ptr) & 0x7F) << bitpos;
+ upb_decoder_advance(d, 1);
}
- if(bitpos == 70 && (byte & 0x80)) upb_decoder_abort(d, "Unterminated varint");
+ if(bitpos == 70 && (byte & 0x80))
+ upb_decoder_abortjmp(d, "Unterminated varint");
return u64;
}
@@ -151,7 +255,7 @@ FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) {
if ((*(p++) & 0x80) == 0) goto done; // likely
slow:
u64 = upb_decode_varint_slow(d);
- if (u64 > 0xffffffff) upb_decoder_abort(d, "Unterminated 32-bit varint");
+ if (u64 > UINT32_MAX) upb_decoder_abortjmp(d, "Unterminated 32-bit varint");
ret = (uint32_t)u64;
p = d->ptr; // Turn the next line into a nop.
done:
@@ -174,7 +278,7 @@ FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) {
if (upb_decoder_bufleft(d) >= 10) {
// Fast case.
upb_decoderet r = upb_vdecode_fast(d->ptr);
- if (r.p == NULL) upb_decoder_abort(d, "Unterminated varint");
+ if (r.p == NULL) upb_decoder_abortjmp(d, "Unterminated varint");
upb_decoder_advance(d, r.p - d->ptr);
return r.val;
} else if (upb_decoder_bufleft(d) > 0) {
@@ -200,11 +304,12 @@ FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
} else {
// Slow case.
size_t read = 0;
- while (read < bytes) {
- size_t avail = upb_decoder_bufleft(d);
+ while (1) {
+ size_t avail = UPB_MIN(upb_decoder_bufleft(d), bytes - read);
memcpy(buf + read, d->ptr, avail);
upb_decoder_advance(d, avail);
read += avail;
+ if (read == bytes) break;
upb_pullbuf(d);
}
}
@@ -213,26 +318,28 @@ FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) {
uint32_t u32;
upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t));
- return u32; // TODO: proper byte swapping
+ return u32; // TODO: proper byte swapping for big-endian machines.
}
FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
uint64_t u64;
upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t));
- return u64; // TODO: proper byte swapping
+ return u64; // TODO: proper byte swapping for big-endian machines.
}
INLINE upb_byteregion *upb_decode_string(upb_decoder *d) {
uint32_t strlen = upb_decode_varint32(d);
uint64_t offset = upb_decoder_offset(d);
+ if (offset + strlen > upb_byteregion_endofs(d->input))
+ upb_decoder_abortjmp(d, "Unexpected EOF");
upb_byteregion_reset(&d->str_byteregion, d->input, offset, strlen);
// Could make it an option on the callback whether we fetchall() first or not.
- upb_byteregion_fetchall(&d->str_byteregion, d->status);
- if (!upb_ok(d->status)) upb_decoder_exit(d);
- upb_decoder_skipto(d, offset + strlen);
+ if (upb_byteregion_fetchall(&d->str_byteregion) != UPB_BYTE_OK)
+ upb_decoder_abortjmp(d, "Couldn't fetchall() on string.");
+ upb_decoder_discardto(d, offset + strlen);
return &d->str_byteregion;
}
-INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
+INLINE void upb_push_msg(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end;
upb_decoder_setmsgend(d);
}
@@ -253,8 +360,6 @@ INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
static double upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
static float upb_asfloat(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
-static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
-static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
T(INT32, varint, int32, int32_t)
T(INT64, varint, int64, int64_t)
@@ -271,9 +376,10 @@ T(FLOAT, fixed32, float, upb_asfloat)
T(SINT32, varint, int32, upb_zzdec_32)
T(SINT64, varint, int64, upb_zzdec_64)
T(STRING, string, byteregion, upb_byteregion*)
+#undef T
static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) {
- upb_push(d, f, UPB_NONDELIMITED);
+ upb_push_msg(d, f, UPB_NONDELIMITED);
}
static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
(void)f;
@@ -281,15 +387,30 @@ static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
upb_decoder_setmsgend(d);
}
static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
- upb_push(d, f, upb_decode_varint32(d) + upb_decoder_offset(d));
+ uint32_t len = upb_decode_varint32(d);
+ upb_push_msg(d, f, upb_decoder_offset(d) + len);
}
+#define F(type) &upb_decode_ ## type
+static void *upb_decoderplan_fptrs[] = {
+ &upb_endgroup, F(DOUBLE), F(FLOAT), F(INT64),
+ F(UINT64), F(INT32), F(FIXED64), F(FIXED32), F(BOOL), F(STRING),
+ F(GROUP), F(MESSAGE), F(STRING), F(UINT32), F(ENUM), F(SFIXED32),
+ F(SFIXED64), F(SINT32), F(SINT64)};
+#undef F
+
/* The main decoding loop *****************************************************/
static void upb_decoder_checkdelim(upb_decoder *d) {
+ // TODO: This doesn't work for the case that no buffer is currently loaded
+ // (ie. d->buf == NULL) because delim_end is NULL even if we are at
+ // end-of-delim. Need to add a test that exercises this by putting a buffer
+ // seam in the middle of the final delimited value in a proto that we skip
+ // for some reason (like because it's unknown and we have no unknown field
+ // handler).
while (d->delim_end != NULL && d->ptr >= d->delim_end) {
- if (d->ptr > d->delim_end) upb_decoder_abort(d, "Bad submessage end");
+ if (d->ptr > d->delim_end) upb_decoder_abortjmp(d, "Bad submessage end");
if (d->dispatcher.top->is_sequence) {
upb_dispatch_endseq(&d->dispatcher);
} else {
@@ -299,33 +420,36 @@ static void upb_decoder_checkdelim(upb_decoder *d) {
}
}
-static void upb_decoder_enterjit(upb_decoder *d) {
- (void)d;
-#ifdef UPB_USE_JIT_X64
- if (d->jit_code && d->dispatcher.top == d->dispatcher.stack && d->ptr < d->jit_end) {
- // Decodes as many fields as possible, updating d->ptr appropriately,
- // before falling through to the slow(er) path.
- void (*upb_jit_decode)(upb_decoder *d) = (void*)d->jit_code;
- upb_jit_decode(d);
- }
-#endif
-}
-
INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
while (1) {
uint32_t tag;
if (!upb_trydecode_varint32(d, &tag)) return NULL;
uint8_t wire_type = tag & 0x7;
- upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag);
+ uint32_t fieldnum = tag >> 3;
+ upb_itofhandlers_ent *e = upb_inttable_fastlookup(
+ d->dispatch_table, fieldnum, sizeof(upb_itofhandlers_ent));
+ upb_fhandlers *f = e ? e->f : NULL;
+
+ if (f) {
+ // Wire type check.
+ if (wire_type == upb_types[f->type].native_wire_type ||
+ (wire_type == UPB_WIRE_TYPE_DELIMITED &&
+ upb_types[f->type].is_numeric)) {
+ // Wire type is ok.
+ } else {
+ f = NULL;
+ }
+ }
// There are no explicit "startseq" or "endseq" markers in protobuf
// streams, so we have to infer them by noticing when a repeated field
// starts or ends.
- if (d->dispatcher.top->is_sequence && d->dispatcher.top->f != f) {
+ upb_dispatcher_frame *fr = d->dispatcher.top;
+ if (fr->is_sequence && fr->f != f) {
upb_dispatch_endseq(&d->dispatcher);
upb_decoder_setmsgend(d);
}
- if (f && f->repeated && d->dispatcher.top->f != f) {
+ if (f && f->repeated && (!fr->is_sequence || fr->f != f)) {
uint64_t old_end = d->dispatcher.top->end_ofs;
upb_dispatcher_frame *fr = upb_dispatch_startseq(&d->dispatcher, f);
if (wire_type != UPB_WIRE_TYPE_DELIMITED ||
@@ -334,7 +458,8 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
fr->end_ofs = old_end;
} else {
// Packed primitive field.
- fr->end_ofs = upb_decoder_offset(d) + upb_decode_varint(d);
+ uint32_t len = upb_decode_varint32(d);
+ fr->end_ofs = upb_decoder_offset(d) + len;
fr->is_packed = true;
}
upb_decoder_setmsgend(d);
@@ -343,14 +468,20 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
if (f) return f;
// Unknown field.
+ if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER)
+ upb_decoder_abortjmp(d, "Invalid field number");
switch (wire_type) {
case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break;
- case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break;
- case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break;
+ case UPB_WIRE_TYPE_32BIT: upb_decoder_discard(d, 4); break;
+ case UPB_WIRE_TYPE_64BIT: upb_decoder_discard(d, 8); break;
case UPB_WIRE_TYPE_DELIMITED:
- upb_decoder_advance(d, upb_decode_varint32(d)); break;
+ upb_decoder_discard(d, upb_decode_varint32(d)); break;
+ case UPB_WIRE_TYPE_START_GROUP:
+ upb_decoder_abortjmp(d, "Can't handle unknown groups yet");
+ case UPB_WIRE_TYPE_END_GROUP:
+ upb_decoder_abortjmp(d, "Unmatched ENDGROUP tag");
default:
- upb_decoder_abort(d, "Invalid wire type");
+ upb_decoder_abortjmp(d, "Invalid wire type");
}
// TODO: deliver to unknown field callback.
upb_decoder_checkpoint(d);
@@ -358,16 +489,22 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
}
}
-void upb_decoder_decode(upb_decoder *d, upb_status *status) {
- if (sigsetjmp(d->exitjmp, 0)) { assert(!upb_ok(status)); return; }
- d->status = status;
+upb_success_t upb_decoder_decode(upb_decoder *d) {
+ assert(d->input);
+ if (sigsetjmp(d->exitjmp, 0)) {
+ assert(!upb_ok(&d->status));
+ return UPB_ERROR;
+ }
upb_dispatch_startmsg(&d->dispatcher);
// Prime the buf so we can hit the JIT immediately.
upb_trypullbuf(d);
upb_fhandlers *f = d->dispatcher.top->f;
- while(1) { // Main loop: executed once per tag/field pair.
+ while(1) {
upb_decoder_checkdelim(d);
+#ifdef UPB_USE_JIT_X64
upb_decoder_enterjit(d);
+ upb_decoder_checkpoint(d);
+#endif
if (!d->top_is_packed) f = upb_decode_tag(d);
if (!f) {
// Sucessful EOF. We may need to dispatch a top-level implicit frame.
@@ -375,64 +512,46 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
assert(d->dispatcher.top->is_sequence);
upb_dispatch_endseq(&d->dispatcher);
}
- return;
+ return UPB_OK;
}
f->decode(d, f);
upb_decoder_checkpoint(d);
}
}
-static void upb_decoder_skip(void *_d, upb_dispatcher_frame *f) {
- upb_decoder *d = _d;
- if (f->end_ofs != UPB_NONDELIMITED) {
- upb_decoder_skipto(d, d->dispatcher.top->end_ofs);
- } else {
- // TODO: how to support skipping groups? Dispatcher could drop callbacks,
- // or it could be special-cased inside the decoder.
- }
+void upb_decoder_init(upb_decoder *d) {
+ upb_status_init(&d->status);
+ upb_dispatcher_init(&d->dispatcher, &d->status, &upb_decoder_exitjmp2, d);
+ d->plan = NULL;
+ d->input = NULL;
}
-void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
- upb_dispatcher_init(
- &d->dispatcher, handlers, upb_decoder_skip, upb_decoder_exit2, d);
-#ifdef UPB_USE_JIT_X64
- d->jit_code = NULL;
- if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d);
-#endif
- // Set function pointers for each field's decode function.
- for (int i = 0; i < handlers->msgs_len; i++) {
- upb_mhandlers *m = handlers->msgs[i];
- for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
- i = upb_inttable_next(&m->fieldtab, i)) {
- upb_fhandlers *f = upb_inttable_iter_value(i);
-#define F(type) &upb_decode_ ## type
- static void *fptrs[] = {&upb_endgroup, F(DOUBLE), F(FLOAT), F(INT64),
- F(UINT64), F(INT32), F(FIXED64), F(FIXED32), F(BOOL), F(STRING),
- F(GROUP), F(MESSAGE), F(STRING), F(UINT32), F(ENUM), F(SFIXED32),
- F(SFIXED64), F(SINT32), F(SINT64)};
- f->decode = fptrs[f->type];
- }
- }
+void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset) {
+ assert(msg_offset >= 0);
+ assert(msg_offset < p->handlers->msgs_len);
+ d->plan = p;
+ d->msg_offset = msg_offset;
+ d->input = NULL;
}
-void upb_decoder_reset(upb_decoder *d, upb_byteregion *input, void *closure) {
- upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure);
+void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input,
+ void *closure) {
+ assert(d->plan);
+ upb_dispatcher_frame *f =
+ upb_dispatcher_reset(&d->dispatcher, closure, d->plan->handlers->msgs[0]);
+ upb_status_clear(&d->status);
f->end_ofs = UPB_NONDELIMITED;
d->input = input;
- d->bufstart_ofs = upb_byteregion_startofs(input);
- d->buf = NULL;
- d->ptr = NULL;
- d->end = NULL; // Force a buffer pull.
- d->delim_end = NULL; // But don't let end-of-message get triggered.
d->str_byteregion.bytesrc = input->bytesrc;
-#ifdef UPB_USE_JIT_X64
- d->jit_end = NULL;
-#endif
+
+ // Protect against assert in skiptonewbuf().
+ d->bufstart_ofs = 0;
+ d->ptr = NULL;
+ d->buf = NULL;
+ upb_decoder_skiptonewbuf(d, upb_byteregion_startofs(input));
}
void upb_decoder_uninit(upb_decoder *d) {
-#ifdef UPB_USE_JIT_X64
- if (d->dispatcher.handlers->should_jit) upb_decoder_freejit(d);
-#endif
upb_dispatcher_uninit(&d->dispatcher);
+ upb_status_uninit(&d->status);
}
diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h
index c35bec4..13e5774 100644
--- a/upb/pb/decoder.h
+++ b/upb/pb/decoder.h
@@ -21,15 +21,43 @@
extern "C" {
#endif
-/* upb_decoder *****************************************************************/
+/* upb_decoderplan ************************************************************/
+
+// A decoderplan contains whatever data structures and generated (JIT-ted) code
+// are necessary to decode protobuf data of a specific type to a specific set
+// of handlers. By generating the plan ahead of time, we avoid having to
+// redo this work every time we decode.
+//
+// A decoderplan is threadsafe, meaning that it can be used concurrently by
+// different upb_decoders in different threads. However, the upb_decoders are
+// *not* thread-safe.
+struct _upb_decoderplan;
+typedef struct _upb_decoderplan upb_decoderplan;
+
+// TODO: add parameter for a list of other decoder plans that we can share
+// generated code with.
+upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit);
+void upb_decoderplan_unref(upb_decoderplan *p);
+
+// Returns true if the plan contains JIT-ted code. This may not be the same as
+// the "allowjit" parameter to the constructor if support for JIT-ting was not
+// compiled in.
+bool upb_decoderplan_hasjitcode(upb_decoderplan *p);
+
+
+/* upb_decoder ****************************************************************/
struct dasm_State;
typedef struct _upb_decoder {
- upb_byteregion *input; // Input data (serialized).
- upb_dispatcher dispatcher; // Dispatcher to which we push parsed data.
- upb_status *status; // Where we will store any errors that occur.
- upb_byteregion str_byteregion; // For passing string data to callbacks.
+ upb_decoderplan *plan;
+ int msg_offset; // Which message from the plan is top-level.
+ upb_byteregion *input; // Input data (serialized), not owned.
+ upb_dispatcher dispatcher; // Dispatcher to which we push parsed data.
+ upb_status status; // Where we store errors that occur.
+ upb_byteregion str_byteregion; // For passing string data to callbacks.
+
+ upb_inttable *dispatch_table;
// Current input buffer and its stream offset.
const char *buf, *ptr, *end;
@@ -37,40 +65,64 @@ typedef struct _upb_decoder {
// End of the delimited region, relative to ptr, or NULL if not in this buf.
const char *delim_end;
+ // True if the top stack frame represents a packed field.
bool top_is_packed;
#ifdef UPB_USE_JIT_X64
// For JIT, which doesn't do bounds checks in the middle of parsing a field.
const char *jit_end, *effective_end; // == MIN(jit_end, submsg_end)
-
- // JIT-generated machine code (else NULL).
- char *jit_code;
- size_t jit_size;
- char *debug_info;
-
- struct dasm_State *dynasm;
#endif
// For exiting the decoder on error.
sigjmp_buf exitjmp;
} upb_decoder;
-// Initializes/uninitializes a decoder for calling into the given handlers
-// or to write into the given msgdef, given its accessors). Takes a ref
-// on the handlers.
-void upb_decoder_init(upb_decoder *d, upb_handlers *h);
+void upb_decoder_init(upb_decoder *d);
void upb_decoder_uninit(upb_decoder *d);
-// Resets the internal state of an already-allocated decoder. This puts it in a
-// state where it has not seen any data, and expects the next data to be from
-// the beginning of a new protobuf. Decoders must be reset before they can be
-// used. A decoder can be reset multiple times. "input" must live until the
-// decoder is reset again (or destroyed).
-void upb_decoder_reset(upb_decoder *d, upb_byteregion *input, void *closure);
+// Resets the plan that the decoder will parse from. "msg_offset" indicates
+// which message from the plan will be used as the top-level message.
+//
+// This will also reset the decoder's input to be uninitialized --
+// upb_decoder_resetinput() must be called before parsing can occur. The plan
+// must live until the decoder is destroyed or reset to a different plan.
+//
+// Must be called before upb_decoder_resetinput() or upb_decoder_decode().
+void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset);
+
+// Resets the input of an already-allocated decoder. This puts it in a state
+// where it has not seen any data, and expects the next data to be from the
+// beginning of a new protobuf. Decoders must have their input reset before
+// they can be used. A decoder can have its input reset multiple times.
+// "input" must live until the decoder is destroyed or has it input reset
+// again. "c" is the closure that will be passed to the handlers.
+//
+// Must be called before upb_decoder_decode().
+void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input, void *c);
+
+// Decodes serialized data (calling handlers as the data is parsed), returning
+// the success of the operation (call upb_decoder_status() for details).
+upb_success_t upb_decoder_decode(upb_decoder *d);
+
+INLINE const upb_status *upb_decoder_status(upb_decoder *d) {
+ return &d->status;
+}
+
+// Implementation details
+
+struct _upb_decoderplan {
+ upb_handlers *handlers; // owns reference.
+
+#ifdef UPB_USE_JIT_X64
+ // JIT-generated machine code (else NULL).
+ char *jit_code;
+ size_t jit_size;
+ char *debug_info;
-// Decodes serialized data (calling handlers as the data is parsed) until error
-// or EOF (see *status for details).
-void upb_decoder_decode(upb_decoder *d, upb_status *status);
+ // This pointer is allocated by dasm_init() and freed by dasm_free().
+ struct dasm_State *dynasm;
+#endif
+};
#ifdef __cplusplus
} /* extern "C" */
diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc
index 75e5b6b..807191b 100644
--- a/upb/pb/decoder_x64.dasc
+++ b/upb/pb/decoder_x64.dasc
@@ -4,20 +4,15 @@
|// Copyright (c) 2011 Google Inc. See LICENSE for details.
|// Author: Josh Haberman <jhaberman@gmail.com>
|//
-|// JIT compiler for upb_decoder on x86. Given a upb_handlers object,
-|// generates code specialized to parsing the specific message and
-|// calling specific handlers.
+|// JIT compiler for upb_decoder on x86. Given a upb_decoderplan object (which
+|// contains an embedded set of upb_handlers), generates code specialized to
+|// parsing the specific message and calling specific handlers.
|//
|// Since the JIT can call other functions (the JIT'ted code is not a leaf
|// function) we must respect alignment rules. On OS X, this means aligning
|// the stack to 16 bytes.
-#define UPB_NONE -1
-#define UPB_MULTIPLE -2
-#define UPB_TOPLEVEL_ONE -3
-
#include <sys/mman.h>
-#include "dynasm/dasm_proto.h"
#include "dynasm/dasm_x86.h"
#ifndef MAP_ANONYMOUS
@@ -73,15 +68,15 @@ gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL};
void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); }
-void upb_reg_jit_gdb(upb_decoder *d) {
+void upb_reg_jit_gdb(upb_decoderplan *plan) {
// Create debug info.
size_t elf_len = sizeof(upb_jit_debug_elf_file);
- d->debug_info = malloc(elf_len);
- memcpy(d->debug_info, upb_jit_debug_elf_file, elf_len);
- uint64_t *p = (void*)d->debug_info;
- for (; (void*)(p+1) <= (void*)d->debug_info + elf_len; ++p) {
- if (*p == 0x12345678) { *p = (uintptr_t)d->jit_code; }
- if (*p == 0x321) { *p = d->jit_size; }
+ plan->debug_info = malloc(elf_len);
+ memcpy(plan->debug_info, upb_jit_debug_elf_file, elf_len);
+ uint64_t *p = (void*)plan->debug_info;
+ for (; (void*)(p+1) <= (void*)plan->debug_info + elf_len; ++p) {
+ if (*p == 0x12345678) { *p = (uintptr_t)plan->jit_code; }
+ if (*p == 0x321) { *p = plan->jit_size; }
}
// Register the JIT-ted code with GDB.
@@ -89,7 +84,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
e->next_entry = __jit_debug_descriptor.first_entry;
e->prev_entry = NULL;
if (e->next_entry) e->next_entry->prev_entry = e;
- e->symfile_addr = d->debug_info;
+ e->symfile_addr = plan->debug_info;
e->symfile_size = elf_len;
__jit_debug_descriptor.first_entry = e;
__jit_debug_descriptor.relevant_entry = e;
@@ -99,12 +94,17 @@ void upb_reg_jit_gdb(upb_decoder *d) {
#else
-void upb_reg_jit_gdb(upb_decoder *d) {
- (void)d;
+void upb_reg_jit_gdb(upb_decoderplan *plan) {
+ (void)plan;
}
#endif
+// Has to be a separate function, otherwise GCC will complain about
+// expressions like (&foo != NULL) because they will never evaluate
+// to false.
+static void upb_assert_notnull(void *addr) { assert(addr != NULL); }
+
|.arch x64
|.actionlist upb_jit_actionlist
|.globals UPB_JIT_GLOBAL_
@@ -126,7 +126,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|// ALL of the code in this file uses these register allocations.
|// When we "call" within this file, we do not use regular calling
|// conventions, but of course when calling to user callbacks we must.
-|.define PTR, rbx
+|.define PTR, rbx // Writing this to DECODER->ptr commits our progress.
|.define CLOSURE, r12
|.type FRAME, upb_dispatcher_frame, r13
|.type BYTEREGION,upb_byteregion, r14
@@ -134,6 +134,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|.type STDARRAY, upb_stdarray
|
|.macro callp, addr
+|| upb_assert_notnull(addr);
|| if ((uintptr_t)addr < 0xffffffff) {
| call &addr
|| } else {
@@ -191,11 +192,12 @@ void upb_reg_jit_gdb(upb_decoder *d) {
| decode_loaded_varint, 0
| mov ecx, edx
| shr ecx, 3
-| and edx, 0x7
+| and edx, 0x7 // For the type check that will happen later.
| cmp ecx, m->max_field_number // Bounds-check the field.
| ja ->exit_jit // In the future; could be unknown label
|| if ((uintptr_t)m->tablearray < 0xffffffff) {
-| mov rax, qword [rcx*8 + m->tablearray] // TODO: support hybrid array/hash tables.
+| // TODO: support hybrid array/hash tables.
+| mov rax, qword [rcx*8 + m->tablearray]
|| } else {
| mov64 rax, (uintptr_t)m->tablearray
| mov rax, qword [rax + rcx*8]
@@ -217,8 +219,9 @@ void upb_reg_jit_gdb(upb_decoder *d) {
| lea rax, [FRAME + sizeof(upb_dispatcher_frame)] // rax for shorter addressing.
| cmp rax, qword DECODER->dispatcher.limit
| jae ->exit_jit // Frame stack overflow.
-| mov qword FRAME:rax->f, f
-| mov dword FRAME:rax->end_ofs, end_offset_
+| mov64 r8, (uintptr_t)f
+| mov qword FRAME:rax->f, r8
+| mov qword FRAME:rax->end_ofs, end_offset_
| mov byte FRAME:rax->is_sequence, is_sequence_
| mov DECODER->dispatcher.top, rax
| mov FRAME, rax
@@ -294,7 +297,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|
|.macro sethas, reg, hasbit
|| if (hasbit >= 0) {
-| or byte [reg + (hasbit / 8)], (1 << (hasbit % 8))
+| or byte [reg + ((uint32_t)hasbit / 8)], (1 << ((uint32_t)hasbit % 8))
|| }
|.endmacro
@@ -304,8 +307,9 @@ void upb_reg_jit_gdb(upb_decoder *d) {
#include "upb/msg.h"
// Decodes the next val into ARG3, advances PTR.
-static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m,
- uint8_t type, size_t tag_size) {
+static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
+ upb_mhandlers *m,
+ uint8_t type, size_t tag_size) {
// Decode the value into arg 3 for the callback.
switch (type) {
case UPB_TYPE(DOUBLE):
@@ -365,9 +369,9 @@ static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m,
// robust checks.
| mov ecx, dword [PTR + tag_size]
| decode_loaded_varint tag_size
- | mov rdi, DECODER->effective_end
+ | mov rdi, DECODER->end
| sub rdi, rax
- | cmp ARG3_64, rdi // if (len > d->effective_end - str)
+ | cmp ARG3_64, rdi // if (len > d->end - str)
| ja ->exit_jit // Can't deliver, whole string not in buf.
// Update PTR to point past end of string.
@@ -401,8 +405,8 @@ static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m,
#if 0
// These appear not to speed things up, but keeping around for
// further experimentation.
-static void upb_decoder_jit_doappend(upb_decoder *d, uint8_t size,
- upb_fhandlers *f) {
+static void upb_decoderplan_jit_doappend(upb_decoderplan *plan, uint8_t size,
+ upb_fhandlers *f) {
| mov eax, STDARRAY:ARG1_64->len
| cmp eax, STDARRAY:ARG1_64->size
| jne >2
@@ -434,18 +438,19 @@ static void upb_decoder_jit_doappend(upb_decoder *d, uint8_t size,
}
#endif
-static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
+static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
+ upb_fhandlers *f) {
// Call callbacks.
if (upb_issubmsgtype(f->type)) {
if (f->type == UPB_TYPE(MESSAGE)) {
| mov rsi, PTR
| sub rsi, DECODER->buf
- | add esi, ARG3_32 // = (d->ptr - d->buf) + delim_len
+ | add rsi, ARG3_64 // = (d->ptr - d->buf) + delim_len
} else {
assert(f->type == UPB_TYPE(GROUP));
- | mov esi, UPB_NONDELIMITED
+ | mov rsi, UPB_NONDELIMITED
}
- | pushframe f, esi, false
+ | pushframe f, rsi, false
// Call startsubmsg handler (if any).
if (f->startsubmsg) {
@@ -456,15 +461,11 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
| mov CLOSURE, rdx
}
| mov qword FRAME->closure, CLOSURE
+ // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+ | mov DECODER->ptr, PTR
const upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
- if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) {
- | jmp =>sub_m->jit_startmsg_pclabel;
- } else {
- | call =>sub_m->jit_startmsg_pclabel;
- }
-
- |=>f->jit_submsg_done_pclabel:
+ | call =>sub_m->jit_startmsg_pclabel;
// Call endsubmsg handler (if any).
if (f->endsubmsg) {
@@ -474,6 +475,8 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
| callp f->endsubmsg
}
| popframe upb_fhandlers_getmsg(f)
+ // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+ | mov DECODER->ptr, PTR
} else {
| mov ARG1_64, CLOSURE
// Test for callbacks we can specialize.
@@ -499,15 +502,15 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
f->value == &upb_stdmsg_setuint64_r ||
f->value == &upb_stdmsg_setptr_r ||
f->value == &upb_stdmsg_setdouble_r) {
- upb_decoder_jit_doappend(d, 8, f);
+ upb_decoderplan_jit_doappend(plan, 8, f);
} else if (f->value == &upb_stdmsg_setint32_r ||
f->value == &upb_stdmsg_setuint32_r ||
f->value == &upb_stdmsg_setfloat_r) {
- upb_decoder_jit_doappend(d, 4, f);
+ upb_decoderplan_jit_doappend(plan, 4, f);
} else if (f->value == &upb_stdmsg_setbool_r) {
- upb_decoder_jit_doappend(d, 1, f);
+ upb_decoderplan_jit_doappend(plan, 1, f);
#endif
- } else {
+ } else if (f->value) {
// Load closure and fval into arg registers.
||#ifndef NDEBUG
||// Since upb_value carries type information in debug mode
@@ -519,14 +522,15 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
| callp f->value
}
| sethas CLOSURE, f->valuehasbit
+ // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
+ | mov DECODER->ptr, PTR
}
- // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
}
// PTR should point to the beginning of the tag.
-static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag,
- uint32_t next_tag, upb_mhandlers *m,
- upb_fhandlers *f, upb_fhandlers *next_f) {
+static void upb_decoderplan_jit_field(upb_decoderplan *plan, uint64_t tag,
+ uint64_t next_tag, upb_mhandlers *m,
+ upb_fhandlers *f, upb_fhandlers *next_f) {
// PC-label for the dispatch table.
// We check the wire type (which must be loaded in edx) because the
// table is keyed on field number, not type.
@@ -535,8 +539,8 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag,
| jne ->exit_jit // In the future: could be an unknown field or packed.
|=>f->jit_pclabel_notypecheck:
if (f->repeated) {
- | mov esi, FRAME->end_ofs
- | pushframe f, esi, true
+ | mov rsi, FRAME->end_ofs
+ | pushframe f, rsi, true
if (f->startseq) {
| mov ARG1_64, CLOSURE
| loadfval f
@@ -555,8 +559,8 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag,
return;
}
- upb_decoder_jit_decodefield(d, m, f->type, tag_size);
- upb_decoder_jit_callcb(d, f);
+ upb_decoderplan_jit_decodefield(plan, m, f->type, tag_size);
+ upb_decoderplan_jit_callcb(plan, f);
// Epilogue: load next tag, check for repeated field.
| check_eob m
@@ -586,13 +590,11 @@ static int upb_compare_uint32(const void *a, const void *b) {
return *(uint32_t*)a - *(uint32_t*)b;
}
-static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
+static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
|=>m->jit_startmsg_pclabel:
+ // There was a call to get here, so we need to align the stack.
+ | sub rsp, 8
- if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
- // There was a call to get here, so we need to align the stack.
- | sub rsp, 8
- }
// Call startmsg handler (if any):
if (m->startmsg) {
// upb_flow_t startmsg(void *closure);
@@ -615,23 +617,30 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
int num_keys = upb_inttable_count(&m->fieldtab);
uint32_t *keys = malloc(num_keys * sizeof(*keys));
int idx = 0;
- for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
+ for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab);
+ !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
keys[idx++] = upb_inttable_iter_key(i);
}
qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
upb_fhandlers *last_f = NULL;
- uint32_t last_tag = 0;
+ uint64_t last_encoded_tag = 0;
for(int i = 0; i < num_keys; i++) {
- uint32_t key = keys[i];
- upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, key);
- uint32_t tag = upb_vencode32(key);
- if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f);
- last_tag = tag;
+ uint32_t fieldnum = keys[i];
+ upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, fieldnum);
+ upb_fhandlers *f = e->f;
+ assert(f->number == fieldnum);
+ uint32_t tag = (f->number << 3) | upb_types[f->type].native_wire_type;
+ uint64_t encoded_tag = upb_vencode32(tag);
+ // No tag should be greater than 5 bytes.
+ assert(encoded_tag <= 0xffffffffff);
+ if (last_f) upb_decoderplan_jit_field(
+ plan, last_encoded_tag, encoded_tag, m, last_f, f);
+ last_encoded_tag = encoded_tag;
last_f = f;
}
- upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL);
+ upb_decoderplan_jit_field(plan, last_encoded_tag, 0, m, last_f, NULL);
free(keys);
@@ -655,22 +664,29 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
| callp m->endmsg
}
- if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
- // Counter previous alignment.
- | add rsp, 8
- | ret
- } else if (m->jit_parent_field_done_pclabel == UPB_TOPLEVEL_ONE) {
- | jmp ->exit_jit
- } else {
- | jmp =>m->jit_parent_field_done_pclabel
+ if (m->is_group) {
+ // Advance past the "end group" tag.
+ // TODO: Handle UPB_BREAK
+ | mov DECODER->ptr, PTR
}
+ // Counter previous alignment.
+ | add rsp, 8
+ | ret
}
-static const char *dbgfmt =
- "JIT encountered unknown field! wt=%d, fn=%d\n";
-
-static void upb_decoder_jit(upb_decoder *d) {
+static void upb_decoderplan_jit(upb_decoderplan *plan) {
+ // The JIT prologue/epilogue trampoline that is generated in this function
+ // does not depend on the handlers, so it will never vary. Ideally we would
+ // put it in an object file and just link it into upb so we could have only a
+ // single copy of it instead of one copy for each decoderplan. But our
+ // options for doing that are undesirable: GCC inline assembly is
+ // complicated, not portable to other compilers, and comes with subtle
+ // caveats about incorrect things what the optimizer might do if you eg.
+ // execute non-local jumps. Putting this code in a .s file would force us to
+ // calculate the structure offsets ourself instead of symbolically
+ // (ie. [r15 + 0xcd] instead of DECODER->ptr). So we tolerate a bit of
+ // unnecessary duplication/redundancy.
| push rbp
| mov rbp, rsp
| push r15
@@ -686,18 +702,14 @@ static void upb_decoder_jit(upb_decoder *d) {
| mov CLOSURE, FRAME->closure
| mov PTR, DECODER->ptr
- upb_handlers *h = d->dispatcher.handlers;
- if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
- | call =>h->msgs[0]->jit_startmsg_pclabel
- | jmp ->exit_jit
- }
-
// TODO: push return addresses for re-entry (will be necessary for multiple
// buffer support).
- for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_msg(d, h->msgs[i]);
+ | call ARG2_64
|->exit_jit:
- | mov DECODER->ptr, PTR
+ // Restore stack pointer to where it was before any "call" instructions
+ // inside our generated code.
+ | lea rsp, [rbp - 48]
// Counter previous alignment.
| add rsp, 8
| pop rbx
@@ -707,122 +719,128 @@ static void upb_decoder_jit(upb_decoder *d) {
| pop r15
| leave
| ret
- |=>0:
- | mov rdi, stderr
- | mov rsi, dbgfmt
- | callp fprintf
- | callp abort
+
+ upb_handlers *h = plan->handlers;
+ for (int i = 0; i < h->msgs_len; i++)
+ upb_decoderplan_jit_msg(plan, h->msgs[i]);
}
-void upb_decoder_jit_assignfieldlabs(upb_fhandlers *f,
- uint32_t *pclabel_count) {
+static void upb_decoderplan_jit_assignfieldlabs(upb_fhandlers *f,
+ uint32_t *pclabel_count) {
f->jit_pclabel = (*pclabel_count)++;
f->jit_pclabel_notypecheck = (*pclabel_count)++;
- f->jit_submsg_done_pclabel = (*pclabel_count)++;
}
-void upb_decoder_jit_assignmsglabs(upb_mhandlers *m, uint32_t *pclabel_count) {
+static void upb_decoderplan_jit_assignmsglabs(upb_mhandlers *m,
+ uint32_t *pclabel_count) {
m->jit_startmsg_pclabel = (*pclabel_count)++;
m->jit_endofbuf_pclabel = (*pclabel_count)++;
m->jit_endofmsg_pclabel = (*pclabel_count)++;
m->jit_dyndispatch_pclabel = (*pclabel_count)++;
m->jit_unknownfield_pclabel = (*pclabel_count)++;
- m->jit_parent_field_done_pclabel = UPB_NONE;
m->max_field_number = 0;
upb_inttable_iter i;
for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
uint32_t key = upb_inttable_iter_key(i);
m->max_field_number = UPB_MAX(m->max_field_number, key);
- upb_fhandlers *f = upb_inttable_iter_value(i);
- upb_decoder_jit_assignfieldlabs(f, pclabel_count);
+ upb_itofhandlers_ent *e = upb_inttable_iter_value(i);
+ upb_decoderplan_jit_assignfieldlabs(e->f, pclabel_count);
}
- // XXX: Won't work for large field numbers; will need to use a upb_table.
+ // TODO: support large field numbers by either using a hash table or
+ // generating code for a binary search. For now large field numbers
+ // will just fall back to the table decoder.
+ m->max_field_number = UPB_MIN(m->max_field_number, 16000);
m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*));
}
-// Second pass: for messages that have only one parent, link them to the field
-// from which they are called.
-void upb_decoder_jit_assignmsglabs2(upb_mhandlers *m) {
- upb_inttable_iter i;
- for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
- i = upb_inttable_next(&m->fieldtab, i)) {
- upb_fhandlers *f = upb_inttable_iter_value(i);
- if (upb_issubmsgtype(f->type)) {
- upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
- if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) {
- sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel;
- } else {
- sub_m->jit_parent_field_done_pclabel = UPB_MULTIPLE;
- }
- }
- }
-}
-
-void upb_decoder_makejit(upb_decoder *d) {
- d->debug_info = NULL;
+static void upb_decoderplan_makejit(upb_decoderplan *plan) {
+ plan->debug_info = NULL;
// Assign pclabels.
- uint32_t pclabel_count = 1;
- upb_handlers *h = d->dispatcher.handlers;
+ uint32_t pclabel_count = 0;
+ upb_handlers *h = plan->handlers;
for (int i = 0; i < h->msgs_len; i++)
- upb_decoder_jit_assignmsglabs(h->msgs[i], &pclabel_count);
- for (int i = 0; i < h->msgs_len; i++)
- upb_decoder_jit_assignmsglabs2(h->msgs[i]);
-
- if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_NONE) {
- h->msgs[0]->jit_parent_field_done_pclabel = UPB_TOPLEVEL_ONE;
- }
+ upb_decoderplan_jit_assignmsglabs(h->msgs[i], &pclabel_count);
void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals));
- dasm_init(d, 1);
- dasm_setupglobal(d, globals, UPB_JIT_GLOBAL__MAX);
- dasm_growpc(d, pclabel_count);
- dasm_setup(d, upb_jit_actionlist);
+ dasm_init(plan, 1);
+ dasm_setupglobal(plan, globals, UPB_JIT_GLOBAL__MAX);
+ dasm_growpc(plan, pclabel_count);
+ dasm_setup(plan, upb_jit_actionlist);
- upb_decoder_jit(d);
+ upb_decoderplan_jit(plan);
- dasm_link(d, &d->jit_size);
+ int dasm_status = dasm_link(plan, &plan->jit_size);
+ (void)dasm_status;
+ assert(dasm_status == DASM_S_OK);
- d->jit_code = mmap(NULL, d->jit_size, PROT_READ | PROT_WRITE,
- MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ plan->jit_code = mmap(NULL, plan->jit_size, PROT_READ | PROT_WRITE,
+ MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
- upb_reg_jit_gdb(d);
+ upb_reg_jit_gdb(plan);
- dasm_encode(d, d->jit_code);
+ dasm_encode(plan, plan->jit_code);
// Create dispatch tables.
for (int i = 0; i < h->msgs_len; i++) {
upb_mhandlers *m = h->msgs[i];
+ m->jit_func =
+ plan->jit_code + dasm_getpclabel(plan, m->jit_startmsg_pclabel);
for (uint32_t j = 0; j <= m->max_field_number; j++) {
- upb_fhandlers *f = NULL;
- for (int k = 0; k < 8; k++) {
- f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k);
- if (f) break;
- }
+ upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, j);
+ upb_fhandlers *f = e ? e->f : NULL;
if (f) {
- m->tablearray[j] = d->jit_code + dasm_getpclabel(d, f->jit_pclabel);
+ m->tablearray[j] =
+ plan->jit_code + dasm_getpclabel(plan, f->jit_pclabel);
} else {
- // Don't handle unknown fields yet.
- m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0);
+ // TODO: extend the JIT to handle unknown fields.
+ // For the moment we exit the JIT for any unknown field.
+ m->tablearray[j] = globals[UPB_JIT_GLOBAL_exit_jit];
}
}
}
- dasm_free(d);
+ dasm_free(plan);
free(globals);
- mprotect(d->jit_code, d->jit_size, PROT_EXEC | PROT_READ);
+ mprotect(plan->jit_code, plan->jit_size, PROT_EXEC | PROT_READ);
// View with: objdump -M intel -D -b binary -mi386 -Mx86-64 /tmp/machine-code
// Or: ndisasm -b 64 /tmp/machine-code
FILE *f = fopen("/tmp/machine-code", "wb");
- fwrite(d->jit_code, d->jit_size, 1, f);
+ fwrite(plan->jit_code, plan->jit_size, 1, f);
fclose(f);
}
-void upb_decoder_freejit(upb_decoder *d) {
- munmap(d->jit_code, d->jit_size);
- free(d->debug_info);
+static void upb_decoderplan_freejit(upb_decoderplan *plan) {
+ munmap(plan->jit_code, plan->jit_size);
+ free(plan->debug_info);
// TODO: unregister
}
+
+static void upb_decoder_enterjit(upb_decoder *d) {
+ if (d->plan->jit_code &&
+ d->dispatcher.top == d->dispatcher.stack &&
+ d->ptr && d->ptr < d->jit_end) {
+#ifndef NDEBUG
+ register uint64_t rbx asm ("rbx") = 11;
+ register uint64_t r12 asm ("r12") = 12;
+ register uint64_t r13 asm ("r13") = 13;
+ register uint64_t r14 asm ("r14") = 14;
+ register uint64_t r15 asm ("r15") = 15;
+#endif
+ // Decodes as many fields as possible, updating d->ptr appropriately,
+ // before falling through to the slow(er) path.
+ void (*upb_jit_decode)(upb_decoder *d, void*) = (void*)d->plan->jit_code;
+ upb_jit_decode(d, d->plan->handlers->msgs[d->msg_offset]->jit_func);
+ assert(d->ptr <= d->end);
+
+ // Test that callee-save registers were properly restored.
+ assert(rbx == 11);
+ assert(r12 == 12);
+ assert(r13 == 13);
+ assert(r14 == 14);
+ assert(r15 == 15);
+ }
+}
diff --git a/upb/pb/glue.c b/upb/pb/glue.c
index 3176355..4949fe3 100644
--- a/upb/pb/glue.c
+++ b/upb/pb/glue.c
@@ -12,8 +12,8 @@
#include "upb/pb/glue.h"
#include "upb/pb/textprinter.h"
-void upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md,
- upb_status *status) {
+bool upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md,
+ bool allow_jit, upb_status *status) {
upb_stringsrc strsrc;
upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str, len);
@@ -21,13 +21,21 @@ void upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md,
upb_decoder d;
upb_handlers *h = upb_handlers_new();
upb_accessors_reghandlers(h, md);
- upb_decoder_init(&d, h);
+ upb_decoderplan *p = upb_decoderplan_new(h, allow_jit);
+ upb_decoder_init(&d);
upb_handlers_unref(h);
- upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), msg);
- upb_decoder_decode(&d, status);
+ upb_decoder_resetplan(&d, p, 0);
+ upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), msg);
+ upb_success_t ret = upb_decoder_decode(&d);
+ // stringsrc and the handlers registered by upb_accessors_reghandlers()
+ // should not suspend.
+ assert((ret == UPB_OK) == upb_ok(upb_decoder_status(&d)));
+ if (status) upb_status_copy(status, upb_decoder_status(&d));
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
+ upb_decoderplan_unref(p);
+ return ret == UPB_OK;
}
void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s) {
@@ -35,7 +43,7 @@ void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s) {
size_t len;
char *data = upb_readfile(fname, &len);
if (!data) goto err;
- upb_strtomsg(data, len, msg, md, s);
+ upb_strtomsg(data, len, msg, md, false, s);
if (!upb_ok(s)) goto err;
return msg;
@@ -69,7 +77,6 @@ void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
}
#endif
-// TODO: read->load.
upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
upb_status *status) {
upb_stringsrc strsrc;
@@ -79,17 +86,21 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
upb_handlers *h = upb_handlers_new();
upb_descreader_reghandlers(h);
+ upb_decoderplan *p = upb_decoderplan_new(h, false);
upb_decoder d;
- upb_decoder_init(&d, h);
+ upb_decoder_init(&d);
upb_handlers_unref(h);
upb_descreader r;
upb_descreader_init(&r);
- upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), &r);
+ upb_decoder_resetplan(&d, p, 0);
+ upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), &r);
- upb_decoder_decode(&d, status);
+ upb_success_t ret = upb_decoder_decode(&d);
+ if (status) upb_status_copy(status, upb_decoder_status(&d));
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
- if (!upb_ok(status)) {
+ upb_decoderplan_unref(p);
+ if (ret != UPB_OK) {
upb_descreader_uninit(&r);
return NULL;
}
diff --git a/upb/pb/glue.h b/upb/pb/glue.h
index 38e8d8e..ff8c85e 100644
--- a/upb/pb/glue.h
+++ b/upb/pb/glue.h
@@ -36,8 +36,8 @@ extern "C" {
// Decodes the given string, which must be in protobuf binary format, to the
// given upb_msg with msgdef "md", storing the status of the operation in "s".
-void upb_strtomsg(const char *str, size_t len, void *msg,
- const upb_msgdef *md, upb_status *s);
+bool upb_strtomsg(const char *str, size_t len, void *msg,
+ const upb_msgdef *md, bool allow_jit, upb_status *s);
// Parses the given file into a new message of the given type. Caller owns
// the returned message (or NULL if an error occurred).
diff --git a/upb/pb/varint.h b/upb/pb/varint.h
index 19977e9..815a7a1 100644
--- a/upb/pb/varint.h
+++ b/upb/pb/varint.h
@@ -19,6 +19,18 @@
extern "C" {
#endif
+// The maximum number of bytes that it takes to encode a 64-bit varint.
+// Note that with a better encoding this could be 9 (TODO: write up a
+// wiki document about this).
+#define UPB_PB_VARINT_MAX_LEN 10
+
+/* Zig-zag encoding/decoding **************************************************/
+
+INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
+INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
+INLINE uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
+INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
+
/* Decoding *******************************************************************/
// All decoding functions return this struct by value.
@@ -56,7 +68,7 @@ done:
INLINE upb_decoderet upb_vdecode_branch64(const char *p) {
uint64_t val;
uint64_t b;
- upb_decoderet r = {(void*)0, 0};
+ upb_decoderet r = {NULL, 0};
b = *(p++); val = (b & 0x7f) ; if(!(b & 0x80)) goto done;
b = *(p++); val |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
b = *(p++); val |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
@@ -124,17 +136,33 @@ INLINE int upb_value_size(uint64_t val) {
return val == 0 ? 1 : high_bit / 8 + 1;
}
+// Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN
+// bytes long), returning how many bytes were used.
+//
+// TODO: benchmark and optimize if necessary.
+INLINE size_t upb_vencode64(uint64_t val, char *buf) {
+ if (val == 0) { buf[0] = 0; return 1; }
+ size_t i = 0;
+ while (val) {
+ uint8_t byte = val & 0x7f;
+ val >>= 7;
+ if (val) byte |= 0x80;
+ buf[i++] = byte;
+ }
+ return i;
+}
+
// Encodes a 32-bit varint, *not* sign-extended.
INLINE uint64_t upb_vencode32(uint32_t val) {
+ char buf[UPB_PB_VARINT_MAX_LEN];
+ size_t bytes = upb_vencode64(val, buf);
uint64_t ret = 0;
- for (int bitpos = 0; val; bitpos+=8, val >>=7) {
- if (bitpos > 0) ret |= (1 << (bitpos-1));
- ret |= (val & 0x7f) << bitpos;
- }
+ assert(bytes <= 5);
+ memcpy(&ret, buf, bytes);
+ assert(ret <= 0xffffffffff);
return ret;
}
-
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/upb/table.h b/upb/table.h
index 0786a1a..0c0a785 100644
--- a/upb/table.h
+++ b/upb/table.h
@@ -127,6 +127,8 @@ INLINE bool _upb_inttable_isarrkey(const upb_inttable *t, uint32_t k) {
// We have the caller specify the entry_size because fixing this as a literal
// (instead of reading table->entry_size) gives the compiler more ability to
// optimize.
+//
+// Note: All returned pointers are invalidated by inserts!
INLINE void *_upb_inttable_fastlookup(const upb_inttable *t, uint32_t key,
size_t entry_size, size_t value_size) {
upb_inttable_value *arrval =
@@ -203,8 +205,11 @@ typedef struct {
} upb_inttable_iter;
upb_inttable_iter upb_inttable_begin(const upb_inttable *t);
-upb_inttable_iter upb_inttable_next(const upb_inttable *t, upb_inttable_iter iter);
-INLINE bool upb_inttable_done(upb_inttable_iter iter) { return iter.value == NULL; }
+upb_inttable_iter upb_inttable_next(const upb_inttable *t,
+ upb_inttable_iter iter);
+INLINE bool upb_inttable_done(upb_inttable_iter iter) {
+ return iter.value == NULL;
+}
INLINE uint32_t upb_inttable_iter_key(upb_inttable_iter iter) {
return iter.key;
}
diff --git a/upb/upb.c b/upb/upb.c
index 5002e10..a3e07e4 100644
--- a/upb/upb.c
+++ b/upb/upb.c
@@ -15,29 +15,32 @@
#include "upb/bytestream.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
-#define TYPE_INFO(wire_type, ctype, inmemory_type) \
- {alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), #ctype},
+#define TYPE_INFO(wire_type, ctype, inmemory_type, is_numeric) \
+ {alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), \
+ #ctype, is_numeric},
const upb_type_info upb_types[] = {
- TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, MESSAGE) // ENDGROUP (fake)
- TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE) // DOUBLE
- TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT) // FLOAT
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // INT64
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, UINT64) // UINT64
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32) // INT32
- TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, UINT64) // FIXED64
- TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, UINT32) // FIXED32
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, BOOL) // BOOL
- TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING) // STRING
- TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, MESSAGE) // GROUP
- TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, MESSAGE) // MESSAGE
- TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING) // BYTES
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, UINT32) // UINT32
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, INT32) // ENUM
- TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, INT32) // SFIXED32
- TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, INT64) // SFIXED64
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32) // SINT32
- TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // SINT64
+ // END_GROUP is not real, but used to signify the pseudo-field that
+ // ends a group from within the group.
+ TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, MESSAGE, false) // ENDGROUP
+ TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE, true) // DOUBLE
+ TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT, true) // FLOAT
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64, true) // INT64
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, UINT64, true) // UINT64
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32, true) // INT32
+ TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, UINT64, true) // FIXED64
+ TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, UINT32, true) // FIXED32
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, BOOL, true) // BOOL
+ TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING, false) // STRING
+ TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, MESSAGE, false) // GROUP
+ TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, MESSAGE, false) // MESSAGE
+ TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING, false) // BYTES
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, UINT32, true) // UINT32
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, INT32, true) // ENUM
+ TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, INT32, true) // SFIXED32
+ TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, INT64, true) // SFIXED64
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32, true) // SINT32
+ TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64, true) // SINT64
};
#ifdef NDEBUG
@@ -66,13 +69,13 @@ void upb_status_seterrf(upb_status *s, const char *msg, ...) {
}
void upb_status_seterrliteral(upb_status *status, const char *msg) {
- status->code = UPB_ERROR;
+ status->error = true;
status->str = msg;
status->space = NULL;
}
void upb_status_copy(upb_status *to, const upb_status *from) {
- to->status = from->status;
+ to->error = from->error;
to->eof = from->eof;
to->code = from->code;
to->space = from->space;
@@ -92,15 +95,20 @@ const char *upb_status_getstr(const upb_status *_status) {
// Function is logically const but can modify internal state to materialize
// the string.
upb_status *status = (upb_status*)_status;
- if (status->str == NULL && status->space && status->space->code_to_string) {
- status->space->code_to_string(status->code, status->buf, status->bufsize);
- status->str = status->buf;
+ if (status->str == NULL && status->space) {
+ if (status->space->code_to_string) {
+ status->space->code_to_string(status->code, status->buf, status->bufsize);
+ status->str = status->buf;
+ } else {
+ upb_status_seterrf(status, "No message, error space=%s, code=%d\n",
+ status->space->name, status->code);
+ }
}
return status->str;
}
void upb_status_clear(upb_status *status) {
- status->status = UPB_OK;
+ status->error = false;
status->eof = false;
status->code = 0;
status->space = NULL;
@@ -114,19 +122,38 @@ void upb_status_setcode(upb_status *status, upb_errorspace *space, int code) {
}
void upb_status_fromerrno(upb_status *status) {
- if (errno == 0) {
- status->status = UPB_OK;
- } else if (errno == EAGAIN || errno == EWOULDBLOCK) {
- status->status = UPB_WOULDBLOCK;
- } else {
- status->status = UPB_ERROR;
+ if (errno != 0 && !upb_errno_is_wouldblock()) {
+ status->error = true;
+ upb_status_setcode(status, &upb_posix_errorspace, errno);
+ }
+}
+
+bool upb_errno_is_wouldblock() {
+ return
+#ifdef EAGAIN
+ errno == EAGAIN ||
+#endif
+#ifdef EWOULDBLOCK
+ errno == EWOULDBLOCK ||
+#endif
+ false;
+}
+
+bool upb_posix_codetostr(int code, char *buf, size_t len) {
+ if (strerror_r(code, buf, len) == -1) {
+ if (errno == EINVAL) {
+ return snprintf(buf, len, "Invalid POSIX error number %d\n", code) >= len;
+ } else if (errno == ERANGE) {
+ return false;
+ }
+ assert(false);
}
- upb_status_setcode(status, &upb_posix_errorspace, errno);
+ return true;
}
-upb_errorspace upb_posix_errorspace = {"POSIX", NULL}; // TODO
+upb_errorspace upb_posix_errorspace = {"POSIX", &upb_posix_codetostr};
-int upb_vrprintf(char **buf, uint32_t *size, uint32_t ofs,
+int upb_vrprintf(char **buf, size_t *size, size_t ofs,
const char *fmt, va_list args) {
// Try once without reallocating. We have to va_copy because we might have
// to call vsnprintf again.
@@ -141,7 +168,7 @@ int upb_vrprintf(char **buf, uint32_t *size, uint32_t ofs,
// Need to print again, because some characters were truncated. vsnprintf
// will not write the entire string unless you give it space to store the
// NULL terminator also.
- while (*size < (ofs + true_len + 1)) *size = UPB_MAX(*size * 2, 2);
+ *size = (ofs + true_len + 1);
char *newbuf = realloc(*buf, *size);
if (!newbuf) return -1;
vsnprintf(newbuf + ofs, true_len + 1, fmt, args);
diff --git a/upb/upb.h b/upb/upb.h
index e43418f..d11c7cb 100644
--- a/upb/upb.h
+++ b/upb/upb.h
@@ -10,10 +10,12 @@
#ifndef UPB_H_
#define UPB_H_
-#include <stdbool.h>
-#include <stdint.h>
#include <assert.h>
#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
#include "descriptor_const.h"
#include "atomic.h"
@@ -26,6 +28,12 @@ extern "C" {
#define INLINE static inline
#endif
+#ifdef __GNUC__
+#define UPB_NORETURN __attribute__((__noreturn__))
+#else
+#define UPB_NORETURN
+#endif
+
#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
@@ -115,6 +123,7 @@ typedef struct {
uint8_t native_wire_type;
uint8_t inmemory_type; // For example, INT32, SINT32, and SFIXED32 -> INT32
const char *ctype;
+ bool is_numeric; // Only numeric types can be packed.
} upb_type_info;
// A static array of info about all of the field types, indexed by type number.
@@ -176,6 +185,7 @@ typedef struct {
return val.val.membername; \
} \
INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \
+ memset(val, 0, sizeof(*val)); \
SET_TYPE(val->type, proto_type); \
val->val.membername = cval; \
} \
@@ -206,27 +216,31 @@ extern upb_value UPB_NO_VALUE;
/* upb_status *****************************************************************/
-enum {
+typedef enum {
UPB_OK, // The operation completed successfully.
- UPB_WOULDBLOCK, // Stream is nonblocking and the operation would block.
+ UPB_SUSPENDED, // The operation was suspended and may be resumed later.
UPB_ERROR, // An error occurred.
-};
+} upb_success_t;
typedef struct {
const char *name;
// Writes a NULL-terminated string to "buf" containing an error message for
// the given error code, returning false if the message was too large to fit.
- bool (*code_to_string)(int code, char *buf, uint32_t len);
+ bool (*code_to_string)(int code, char *buf, size_t len);
} upb_errorspace;
typedef struct {
- char status;
+ bool error;
bool eof;
- int code; // Can be set to a more specific code (defined by error space).
+
+ // Specific status code defined by some error space (optional).
+ int code;
upb_errorspace *space;
+
+ // Error message (optional).
const char *str; // NULL when no message is present. NULL-terminated.
char *buf; // Owned by the status.
- uint32_t bufsize;
+ size_t bufsize;
} upb_status;
#define UPB_STATUS_INIT {UPB_OK, false, 0, NULL, NULL, NULL, 0}
@@ -234,7 +248,7 @@ typedef struct {
void upb_status_init(upb_status *status);
void upb_status_uninit(upb_status *status);
-INLINE bool upb_ok(const upb_status *status) { return status->code == UPB_OK; }
+INLINE bool upb_ok(const upb_status *status) { return !status->error; }
INLINE bool upb_eof(const upb_status *status) { return status->eof; }
void upb_status_clear(upb_status *status);
@@ -248,6 +262,7 @@ void upb_status_copy(upb_status *to, const upb_status *from);
extern upb_errorspace upb_posix_errorspace;
void upb_status_fromerrno(upb_status *status);
+bool upb_errno_is_wouldblock();
// Like vasprintf (which allocates a string large enough for the result), but
// uses *buf (which can be NULL) as a starting point and reallocates it only if
@@ -255,7 +270,7 @@ void upb_status_fromerrno(upb_status *status);
// of the buffer. Starts writing at the given offset into the string; bytes
// preceding this offset are unaffected. Returns the new length of the string,
// or -1 on memory allocation failure.
-int upb_vrprintf(char **buf, uint32_t *size, uint32_t ofs,
+int upb_vrprintf(char **buf, size_t *size, size_t ofs,
const char *fmt, va_list args);
#ifdef __cplusplus
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback