From 7d3e2bd2c4cfd1296d1d6f996d7548de26540d41 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 15 Feb 2013 16:27:18 -0800 Subject: Sync with 8 months of Google-internal development. Many things have changed and been simplified. The memory-management story for upb_def and upb_handlers is much more robust; upb_def and upb_handlers should be fairly stable interfaces now. There is still much work to do for the runtime component (upb_sink). --- bindings/cpp/upb/bytestream.hpp | 276 ---------------------------------------- 1 file changed, 276 deletions(-) delete mode 100644 bindings/cpp/upb/bytestream.hpp (limited to 'bindings/cpp/upb/bytestream.hpp') diff --git a/bindings/cpp/upb/bytestream.hpp b/bindings/cpp/upb/bytestream.hpp deleted file mode 100644 index 37d8157..0000000 --- a/bindings/cpp/upb/bytestream.hpp +++ /dev/null @@ -1,276 +0,0 @@ -// -// upb - a minimalist implementation of protocol buffers. -// -// Copyright (c) 2011 Google Inc. See LICENSE for details. -// Author: Josh Haberman -// -// This file defines three core interfaces: -// - upb::ByteSink: for writing streams of data. -// - upb::ByteSource: for reading streams of data. -// - upb::ByteRegion: for reading from a specific region of a ByteSource; -// should be used by decoders instead of using a ByteSource directly. -// -// These interfaces are used by streaming encoders and decoders: for example, a -// protobuf parser gets its input from a upb::ByteRegion. They are virtual -// base classes so concrete implementations can get the data from a fd, a -// FILE*, a string, etc. -// -// A ByteRegion represents a region of data from a ByteSource. -// -// Parsers get data from this interface instead of a bytesrc because we often -// want to parse only a specific region of the input. For example, if we parse -// a string from our input but know that the string represents a protobuf, we -// can pass its ByteRegion to an appropriate protobuf parser. -// -// Since the bytes may be coming from a file or network socket, bytes must be -// fetched before they can be read (though in some cases this fetch may be a -// no-op). "fetch" is the only operation on a byteregion that could fail or -// block, because it is the only operation that actually performs I/O. -// -// Bytes can be discarded when they are no longer needed. Parsers should -// always discard bytes they no longer need, both so the buffers can be freed -// when possible and to give better visibility into what bytes the parser is -// still using. -// -// start discard read fetch end -// ofs ofs ofs ofs ofs -// | |--->Discard() | |--->Fetch() | -// V V V V V -// +-------------+-------------------------+-----------------+-----------------+ -// | discarded | | | fetchable | -// +-------------+-------------------------+-----------------+-----------------+ -// | <------------- loaded ------------------> | -// | <- available -> | -// | <---------- remaining ----------> | -// -// Note that the start offset may be something other than zero! A byteregion -// is a view into an underlying bytesrc stream, and the region may start -// somewhere other than the beginning of that stream. -// -// The region can be either delimited or nondelimited. A non-delimited region -// will keep returning data until the underlying data source returns EOF. A -// delimited region will return EOF at a predetermined offset. -// -// end -// ofs -// | -// V -// +-----------------------+ -// | delimited region | <-- hard EOF, even if data source has more data. -// +-----------------------+ -// -// +------------------------ -// | nondelimited region Z <-- won't return EOF until data source hits EOF. -// +------------------------ - -#ifndef UPB_BYTESTREAM_HPP -#define UPB_BYTESTREAM_HPP - -#include "upb/bytestream.h" -#include "upb/upb.hpp" -#include - -namespace upb { - -typedef upb_bytesuccess_t ByteSuccess; - -// Implement this interface to vend bytes to ByteRegions which will be used by -// a decoder. -class ByteSourceBase : public upb_bytesrc { - public: - ByteSourceBase() { upb_bytesrc_init(this, vtable()); } - virtual ~ByteSourceBase() { upb_bytesrc_uninit(this); } - - // Fetches at least one byte starting at ofs, setting *len to the actual - // number of bytes fetched (or 0 on EOF or error: see return value for - // details). It is valid for bytes to be fetched multiple times, as long as - // the bytes have not been previously discarded. - virtual ByteSuccess Fetch(uint64_t ofs, size_t* len) = 0; - - // Discards all data prior to ofs (except data that is pinned, if pinning - // support is added -- see TODO below). - virtual void Discard(uint64_t ofs) = 0; - - // Copies "len" bytes of data from ofs to "dst", which must be at least "len" - // bytes long. The given region must not be discarded. - virtual void Copy(uint64_t ofs, size_t len, char *dst) const = 0; - - // Returns a pointer to the bytesrc's internal buffer, storing in *len how - // much data is available. The given offset must not be discarded. The - // returned buffer is valid for as long as its bytes are not discarded (in - // the case that part of the returned buffer is discarded, only the - // non-discarded bytes remain valid). - virtual const char *GetPtr(uint64_t ofs, size_t *len) const = 0; - - // TODO: Add if/when there is a demonstrated need: - // - // // When the caller pins a region (which must not be already discarded), it - // // is guaranteed that the region will not be discarded (nor will the - // // bytesrc be destroyed) until the region is unpinned. However, not all - // // bytesrc's support pinning; a false return indicates that a pin was not - // // possible. - // virtual bool Pin(uint64_t ofs, size_t len); - // - // // Releases some number of pinned bytes from the beginning of a pinned - // // region (which may be fewer than the total number of bytes pinned). - // virtual void Unpin(uint64_t ofs, size_t len, size_t bytes_to_release); - // - // Adding pinning support would also involve adding a "pin_ofs" parameter to - // upb_bytesrc_fetch, so that the fetch can extend an already-pinned region. - private: - static upb_bytesrc_vtbl* vtable(); - static upb_bytesuccess_t VFetch(void*, uint64_t, size_t*); - static void VDiscard(void*, uint64_t); - static void VCopy(const void*, uint64_t, size_t, char*); - static const char *VGetPtr(const void*, uint64_t, size_t*); -}; - -class ByteRegion : public upb_byteregion { - public: - static const uint64_t kNondelimited = UPB_NONDELIMITED; - - ByteRegion() { upb_byteregion_init(this); } - ~ByteRegion() { upb_byteregion_uninit(this); } - - // Accessors for the regions bounds -- the meaning of these is described in - // the diagram above. - uint64_t start_ofs() const { return upb_byteregion_startofs(this); } - uint64_t discard_ofs() const { return upb_byteregion_discardofs(this); } - uint64_t fetch_ofs() const { return upb_byteregion_fetchofs(this); } - uint64_t end_ofs() const { return upb_byteregion_endofs(this); } - - // Returns how many bytes are fetched and available for reading starting from - // offset "offset". - uint64_t BytesAvailable(uint64_t offset) const { - return upb_byteregion_available(this, offset); - } - - // Returns the total number of bytes remaining after offset "offset", or - // kNondelimited if the byteregion is non-delimited. - uint64_t BytesRemaining(uint64_t offset) const { - return upb_byteregion_remaining(this, offset); - } - - uint64_t Length() const { return upb_byteregion_len(this); } - - // Sets the value of this byteregion to be a subset of the given byteregion's - // data. The caller is responsible for releasing this region before the src - // region is released (unless the region is first pinned, if pinning support - // is added. see below). - void Reset(const upb_byteregion *src, uint64_t ofs, uint64_t len) { - upb_byteregion_reset(this, src, ofs, len); - } - void Release() { upb_byteregion_release(this); } - - // Attempts to fetch more data, extending the fetched range of this - // byteregion. Returns true if the fetched region was extended by at least - // one byte, false on EOF or error (see *s for details). - ByteSuccess Fetch() { return upb_byteregion_fetch(this); } - - // Fetches all remaining data, returning false if the operation failed (see - // *s for details). May only be used on delimited byteregions. - ByteSuccess FetchAll() { return upb_byteregion_fetchall(this); } - - // Discards bytes from the byteregion up until ofs (which must be greater or - // equal to discard_ofs()). It is valid to discard bytes that have not been - // fetched (such bytes will never be fetched) but it is an error to discard - // past the end of a delimited byteregion. - void Discard(uint64_t ofs) { return upb_byteregion_discard(this, ofs); } - - // Copies "len" bytes of data into "dst", starting at ofs. The specified - // region must be available. - void Copy(uint64_t ofs, size_t len, char *dst) const { - upb_byteregion_copy(this, ofs, len, dst); - } - - // Copies all bytes from the byteregion into dst. Requires that the entire - // byteregion is fetched and that none has been discarded. - void CopyAll(char *dst) const { - upb_byteregion_copyall(this, dst); - } - - // Returns a pointer to the internal buffer for the byteregion starting at - // offset "ofs." Stores the number of bytes available in this buffer in *len. - // The returned buffer is invalidated when the byteregion is reset or - // released, or when the bytes are discarded. If the byteregion is not - // currently pinned, the pointer is only valid for the lifetime of the parent - // byteregion. - const char *GetPtr(uint64_t ofs, size_t *len) const { - return upb_byteregion_getptr(this, ofs, len); - } - - // Copies the contents of the byteregion into a newly-allocated, - // NULL-terminated string. Requires that the byteregion is fully fetched. - char *StrDup() const { - return upb_byteregion_strdup(this); - } - - template void AssignToString(T* str) { - uint64_t ofs = start_ofs(); - size_t len; - const char *ptr = GetPtr(ofs, &len); - // Emperically calling reserve() here is counterproductive and slows down - // benchmarks. If the parsing is happening in a tight loop that is reusing - // the string object, there is probably enough data reserved already and - // the reserve() call is extra overhead. - str->assign(ptr, len); - ofs += len; - while (ofs < end_ofs()) { - ptr = GetPtr(ofs, &len); - str->append(ptr, len); - ofs += len; - } - } - - // TODO: add if/when there is a demonstrated need. - // - // // Pins this byteregion's bytes in memory, allowing it to outlive its - // // parent byteregion. Normally a byteregion may only be used while its - // // parent is still valid, but a pinned byteregion may continue to be used - // // until it is reset or released. A byteregion must be fully fetched to - // // be pinned (this implies that the byteregion must be delimited). - // // - // // In some cases this operation may cause the input data to be copied. - // // - // // void Pin(); -}; - -class StringSource : public upb_stringsrc { - public: - StringSource() : upb_stringsrc() { upb_stringsrc_init(this); } - template explicit StringSource(const T& str) { - upb_stringsrc_init(this); - Reset(str); - } - StringSource(const char *data, size_t len) { - upb_stringsrc_init(this); - Reset(data, len); - } - ~StringSource() { upb_stringsrc_uninit(this); } - - void Reset(const char* data, size_t len) { - upb_stringsrc_reset(this, data, len); - } - - template void Reset(const T& str) { - Reset(str.c_str(), str.size()); - } - - ByteRegion* AllBytes() { - return static_cast(upb_stringsrc_allbytes(this)); - } - - upb_bytesrc* ByteSource() { return upb_stringsrc_bytesrc(this); } -}; - -template <> inline ByteRegion* GetValue(Value v) { - return static_cast(upb_value_getbyteregion(v)); -} - -template <> inline Value MakeValue(ByteRegion* v) { - return upb_value_byteregion(v); -} - -} // namespace upb - -#endif -- cgit v1.2.3