summaryrefslogtreecommitdiff
path: root/bindings/cpp/upb/bytestream.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'bindings/cpp/upb/bytestream.hpp')
-rw-r--r--bindings/cpp/upb/bytestream.hpp238
1 files changed, 238 insertions, 0 deletions
diff --git a/bindings/cpp/upb/bytestream.hpp b/bindings/cpp/upb/bytestream.hpp
new file mode 100644
index 0000000..968d542
--- /dev/null
+++ b/bindings/cpp/upb/bytestream.hpp
@@ -0,0 +1,238 @@
+//
+// upb - a minimalist implementation of protocol buffers.
+//
+// Copyright (c) 2011 Google Inc. See LICENSE for details.
+// Author: Josh Haberman <jhaberman@gmail.com>
+//
+// This file defines three core interfaces:
+// - upb::ByteSink: for writing streams of data.
+// - upb::ByteSource: for reading streams of data.
+// - upb::ByteRegion: for reading from a specific region of a ByteSource;
+// should be used by decoders instead of using a ByteSource directly.
+//
+// These interfaces are used by streaming encoders and decoders: for example, a
+// protobuf parser gets its input from a upb::ByteRegion. They are virtual
+// base classes so concrete implementations can get the data from a fd, a
+// FILE*, a string, etc.
+//
+// A ByteRegion represents a region of data from a ByteSource.
+//
+// Parsers get data from this interface instead of a bytesrc because we often
+// want to parse only a specific region of the input. For example, if we parse
+// a string from our input but know that the string represents a protobuf, we
+// can pass its ByteRegion to an appropriate protobuf parser.
+//
+// Since the bytes may be coming from a file or network socket, bytes must be
+// fetched before they can be read (though in some cases this fetch may be a
+// no-op). "fetch" is the only operation on a byteregion that could fail or
+// block, because it is the only operation that actually performs I/O.
+//
+// Bytes can be discarded when they are no longer needed. Parsers should
+// always discard bytes they no longer need, both so the buffers can be freed
+// when possible and to give better visibility into what bytes the parser is
+// still using.
+//
+// start discard read fetch end
+// ofs ofs ofs ofs ofs
+// | |--->Discard() | |--->Fetch() |
+// V V V V V
+// +-------------+-------------------------+-----------------+-----------------+
+// | discarded | | | fetchable |
+// +-------------+-------------------------+-----------------+-----------------+
+// | <------------- loaded ------------------> |
+// | <- available -> |
+// | <---------- remaining ----------> |
+//
+// Note that the start offset may be something other than zero! A byteregion
+// is a view into an underlying bytesrc stream, and the region may start
+// somewhere other than the beginning of that stream.
+//
+// The region can be either delimited or nondelimited. A non-delimited region
+// will keep returning data until the underlying data source returns EOF. A
+// delimited region will return EOF at a predetermined offset.
+//
+// end
+// ofs
+// |
+// V
+// +-----------------------+
+// | delimited region | <-- hard EOF, even if data source has more data.
+// +-----------------------+
+//
+// +------------------------
+// | nondelimited region Z <-- won't return EOF until data source hits EOF.
+// +------------------------
+
+#ifndef UPB_BYTESTREAM_HPP
+#define UPB_BYTESTREAM_HPP
+
+#include "upb/bytestream.h"
+#include "upb/upb.hpp"
+
+namespace upb {
+
+typedef upb_bytesuccess_t ByteSuccess;
+
+// Implement this interface to vend bytes to ByteRegions which will be used by
+// a decoder.
+class ByteSourceBase : public upb_bytesrc {
+ public:
+ ByteSourceBase() { upb_bytesrc_init(this, vtable()); }
+ virtual ~ByteSourceBase() { upb_bytesrc_uninit(this); }
+
+ // Fetches at least one byte starting at ofs, setting *len to the actual
+ // number of bytes fetched (or 0 on EOF or error: see return value for
+ // details). It is valid for bytes to be fetched multiple times, as long as
+ // the bytes have not been previously discarded.
+ virtual ByteSuccess Fetch(uint64_t ofs, size_t* len) = 0;
+
+ // Discards all data prior to ofs (except data that is pinned, if pinning
+ // support is added -- see TODO below).
+ virtual void Discard(uint64_t ofs) = 0;
+
+ // Copies "len" bytes of data from ofs to "dst", which must be at least "len"
+ // bytes long. The given region must not be discarded.
+ virtual void Copy(uint64_t ofs, size_t len, char *dst) const = 0;
+
+ // Returns a pointer to the bytesrc's internal buffer, storing in *len how
+ // much data is available. The given offset must not be discarded. The
+ // returned buffer is valid for as long as its bytes are not discarded (in
+ // the case that part of the returned buffer is discarded, only the
+ // non-discarded bytes remain valid).
+ virtual const char *GetPtr(uint64_t ofs, size_t *len) const = 0;
+
+ // TODO: Add if/when there is a demonstrated need:
+ //
+ // // When the caller pins a region (which must not be already discarded), it
+ // // is guaranteed that the region will not be discarded (nor will the
+ // // bytesrc be destroyed) until the region is unpinned. However, not all
+ // // bytesrc's support pinning; a false return indicates that a pin was not
+ // // possible.
+ // virtual bool Pin(uint64_t ofs, size_t len);
+ //
+ // // Releases some number of pinned bytes from the beginning of a pinned
+ // // region (which may be fewer than the total number of bytes pinned).
+ // virtual void Unpin(uint64_t ofs, size_t len, size_t bytes_to_release);
+ //
+ // Adding pinning support would also involve adding a "pin_ofs" parameter to
+ // upb_bytesrc_fetch, so that the fetch can extend an already-pinned region.
+ private:
+ static upb_bytesrc_vtbl* vtable();
+ static upb_bytesuccess_t VFetch(void*, uint64_t, size_t*);
+ static void VDiscard(void*, uint64_t);
+ static void VCopy(const void*, uint64_t, size_t, char*);
+ static const char *VGetPtr(const void*, uint64_t, size_t*);
+};
+
+class ByteRegion : public upb_byteregion {
+ public:
+ static const uint64_t kNondelimited = UPB_NONDELIMITED;
+
+ ByteRegion() { upb_byteregion_init(this); }
+ ~ByteRegion() { upb_byteregion_uninit(this); }
+
+ // Accessors for the regions bounds -- the meaning of these is described in
+ // the diagram above.
+ uint64_t start_ofs() const { return upb_byteregion_startofs(this); }
+ uint64_t discard_ofs() const { return upb_byteregion_discardofs(this); }
+ uint64_t fetch_ofs() const { return upb_byteregion_fetchofs(this); }
+ uint64_t end_ofs() const { return upb_byteregion_endofs(this); }
+
+ // Returns how many bytes are fetched and available for reading starting from
+ // offset "offset".
+ uint64_t BytesAvailable(uint64_t offset) const {
+ return upb_byteregion_available(this, offset);
+ }
+
+ // Returns the total number of bytes remaining after offset "offset", or
+ // kNondelimited if the byteregion is non-delimited.
+ uint64_t BytesRemaining(uint64_t offset) const {
+ return upb_byteregion_remaining(this, offset);
+ }
+
+ uint64_t Length() const { return upb_byteregion_len(this); }
+
+ // Sets the value of this byteregion to be a subset of the given byteregion's
+ // data. The caller is responsible for releasing this region before the src
+ // region is released (unless the region is first pinned, if pinning support
+ // is added. see below).
+ void Reset(const upb_byteregion *src, uint64_t ofs, uint64_t len) {
+ upb_byteregion_reset(this, src, ofs, len);
+ }
+ void Release() { upb_byteregion_release(this); }
+
+ // Attempts to fetch more data, extending the fetched range of this
+ // byteregion. Returns true if the fetched region was extended by at least
+ // one byte, false on EOF or error (see *s for details).
+ ByteSuccess Fetch() { return upb_byteregion_fetch(this); }
+
+ // Fetches all remaining data, returning false if the operation failed (see
+ // *s for details). May only be used on delimited byteregions.
+ ByteSuccess FetchAll() { return upb_byteregion_fetchall(this); }
+
+ // Discards bytes from the byteregion up until ofs (which must be greater or
+ // equal to discard_ofs()). It is valid to discard bytes that have not been
+ // fetched (such bytes will never be fetched) but it is an error to discard
+ // past the end of a delimited byteregion.
+ void Discard(uint64_t ofs) { return upb_byteregion_discard(this, ofs); }
+
+ // Copies "len" bytes of data into "dst", starting at ofs. The specified
+ // region must be available.
+ void Copy(uint64_t ofs, size_t len, char *dst) const {
+ upb_byteregion_copy(this, ofs, len, dst);
+ }
+
+ // Copies all bytes from the byteregion into dst. Requires that the entire
+ // byteregion is fetched and that none has been discarded.
+ void CopyAll(char *dst) const {
+ upb_byteregion_copyall(this, dst);
+ }
+
+ // Returns a pointer to the internal buffer for the byteregion starting at
+ // offset "ofs." Stores the number of bytes available in this buffer in *len.
+ // The returned buffer is invalidated when the byteregion is reset or
+ // released, or when the bytes are discarded. If the byteregion is not
+ // currently pinned, the pointer is only valid for the lifetime of the parent
+ // byteregion.
+ const char *GetPtr(uint64_t ofs, size_t *len) const {
+ return upb_byteregion_getptr(this, ofs, len);
+ }
+
+ // Copies the contents of the byteregion into a newly-allocated,
+ // NULL-terminated string. Requires that the byteregion is fully fetched.
+ char *StrDup() const {
+ return upb_byteregion_strdup(this);
+ }
+
+ // TODO: add if/when there is a demonstrated need.
+ //
+ // // Pins this byteregion's bytes in memory, allowing it to outlive its
+ // // parent byteregion. Normally a byteregion may only be used while its
+ // // parent is still valid, but a pinned byteregion may continue to be used
+ // // until it is reset or released. A byteregion must be fully fetched to
+ // // be pinned (this implies that the byteregion must be delimited).
+ // //
+ // // In some cases this operation may cause the input data to be copied.
+ // //
+ // // void Pin();
+};
+
+class StringSource : public upb_stringsrc {
+ public:
+ StringSource() : upb_stringsrc() { upb_stringsrc_init(this); }
+ ~StringSource() { upb_stringsrc_uninit(this); }
+
+ void Reset(const char* data, size_t len) {
+ upb_stringsrc_reset(this, data, len);
+ }
+
+ ByteRegion* AllBytes() {
+ return static_cast<ByteRegion*>(upb_stringsrc_allbytes(this));
+ }
+
+ upb_bytesrc* ByteSource() { return upb_stringsrc_bytesrc(this); }
+};
+
+} // namespace upb
+
+#endif
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback