diff options
Diffstat (limited to 'src/upb_bytestream.h')
-rw-r--r-- | src/upb_bytestream.h | 213 |
1 files changed, 144 insertions, 69 deletions
diff --git a/src/upb_bytestream.h b/src/upb_bytestream.h index e4b51fd..836abb0 100644 --- a/src/upb_bytestream.h +++ b/src/upb_bytestream.h @@ -1,120 +1,195 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2010-2011 Google Inc. See LICENSE for details. + * Copyright (c) 2011 Google Inc. See LICENSE for details. * Author: Josh Haberman <jhaberman@gmail.com> * - * Defines the interfaces upb_bytesrc and upb_bytesink, which are abstractions - * of read()/write() with useful buffering/sharing semantics. + * This file contains upb_bytesrc and upb_bytesink, which are abstractions of + * stdio (fread()/fwrite()/etc) that provide useful buffering/sharing + * semantics. They are virtual base classes so concrete implementations + * can get the data from a fd, a string, a cord, etc. + * + * Byte streams are NOT thread-safe! (Like f{read,write}_unlocked()) */ #ifndef UPB_BYTESTREAM_H #define UPB_BYTESTREAM_H #include <stdarg.h> +#include <stdlib.h> +#include <string.h> #include "upb.h" #ifdef __cplusplus extern "C" { #endif -/* upb_bytesrc ****************************************************************/ -// upb_bytesrc is a pull interface for streams of bytes, basically an -// abstraction of read()/fread(), but it avoids copies where possible. +/* upb_bytesrc ****************************************************************/ -typedef upb_strlen_t (*upb_bytesrc_read_fptr)( - upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); -typedef bool (*upb_bytesrc_getstr_fptr)( - upb_bytesrc *src, upb_string *str, upb_status *status); +// A upb_bytesrc allows the consumer of a stream of bytes to obtain buffers as +// they become available, and to preserve some trailing amount of data. +typedef size_t upb_bytesrc_fetch_func(void*, uint64_t, upb_status*); +typedef void upb_bytesrc_read_func(void*, uint64_t, size_t, char*); +typedef const char *upb_bytesrc_getptr_func(void*, uint64_t, size_t*); +typedef void upb_bytesrc_refregion_func(void*, uint64_t, size_t); +typedef void upb_bytesrc_ref_func(void*); +typedef struct _upb_bytesrc_vtbl { + upb_bytesrc_fetch_func *fetch; + upb_bytesrc_read_func *read; + upb_bytesrc_getptr_func *getptr; + upb_bytesrc_refregion_func *refregion; + upb_bytesrc_refregion_func *unrefregion; + upb_bytesrc_ref_func *ref; + upb_bytesrc_ref_func *unref; +} upb_bytesrc_vtbl; typedef struct { - upb_bytesrc_read_fptr read; - upb_bytesrc_getstr_fptr getstr; -} upb_bytesrc_vtbl; + upb_bytesrc_vtbl *vtbl; +} upb_bytesrc; -struct _upb_bytesrc { - upb_bytesrc_vtbl *vtbl; -}; +INLINE void upb_bytesrc_init(upb_bytesrc *src, upb_bytesrc_vtbl *vtbl) { + src->vtbl = vtbl; +} -INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) { - s->vtbl = vtbl; +// Fetches at least minlen bytes starting at ofs, returning the actual number +// of bytes fetched (or 0 on error: see "s" for details). Gives caller a ref +// on the fetched region. It is safe to re-fetch existing regions but only if +// they are ref'd. "ofs" may not greater than the end of the region that was +// previously fetched. +INLINE size_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs, upb_status *s) { + return src->vtbl->fetch(src, ofs, s); } -// Reads up to "count" bytes into "buf", returning the total number of bytes -// read. If 0, indicates error and puts details in "status". -INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, - upb_strlen_t count, upb_status *status) { - return src->vtbl->read(src, buf, count, status); +// Copies "len" bytes of data from offset src_ofs to "dst", which must be at +// least "len" bytes long. The caller must own a ref on the given region. +INLINE void upb_bytesrc_read(upb_bytesrc *src, uint64_t src_ofs, size_t len, + char *dst) { + src->vtbl->read(src, src_ofs, len, dst); } -// Like upb_bytesrc_read(), but modifies "str" in-place. Caller must ensure -// that "str" is created or just recycled. Returns "false" if no data was -// returned, either due to error or EOF (check status for details). +// Returns a pointer to the bytesrc's internal buffer, returning how much data +// was actually returned (which may be less than "len" if the given region is +// not contiguous). The caller must own refs on the entire region from [ofs, +// ofs+len]. The returned buffer is valid for as long as the region remains +// ref'd. // -// In comparison to upb_bytesrc_read(), this call can possibly alias existing -// string data (which avoids a copy). On the other hand, if the data was *not* -// already in an existing string, this copies it into a upb_string, and if the -// data needs to be put in a specific range of memory (because eg. you need to -// put it into a different kind of string object) then upb_bytesrc_get() could -// save you a copy. -INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, - upb_status *status) { - return src->vtbl->getstr(src, str, status); +// TODO: is "len" really required here? +INLINE const char *upb_bytesrc_getptr(upb_bytesrc *src, uint64_t ofs, + size_t *len) { + return src->vtbl->getptr(src, ofs, len); +} + +// Gives the caller a ref on the given region. The caller must know that the +// given region is already ref'd. +INLINE void upb_bytesrc_refregion(upb_bytesrc *src, uint64_t ofs, size_t len) { + src->vtbl->refregion(src, ofs, len); +} + +// Releases a ref on the given region, which the caller must have previously +// ref'd. +INLINE void upb_bytesrc_unrefregion(upb_bytesrc *src, uint64_t ofs, size_t len) { + src->vtbl->unrefregion(src, ofs, len); +} + +// Attempts to ref the bytesrc itself, returning false if this bytesrc is +// not ref-able. +INLINE bool upb_bytesrc_tryref(upb_bytesrc *src) { + if (src->vtbl->ref) { + src->vtbl->ref(src); + return true; + } else { + return false; + } +} + +// Unref's the bytesrc itself. May only be called when upb_bytesrc_tryref() +// has previously returned true. +INLINE void upb_bytesrc_unref(upb_bytesrc *src) { + assert(src->vtbl->unref); + src->vtbl->unref(src); +} + +/* upb_strref *****************************************************************/ + +// The structure we pass for a string. +typedef struct _upb_strref { + // Pointer to the string data. NULL if the string spans multiple input + // buffers (in which case upb_bytesrc_getptr() must be called to obtain + // the actual pointers). + const char *ptr; + + // Bytesrc from which this string data comes. This is only guaranteed to be + // alive from inside the callback; however if the handler knows more about + // its type and how to prolong its life, it may do so. + upb_bytesrc *bytesrc; + + // Offset in the bytesrc that represents the beginning of this string. + uint32_t stream_offset; + + // Length of the string. + uint32_t len; + + // Possibly add optional members here like start_line, start_column, etc. +} upb_strref; + +// Copies the contents of the strref into a newly-allocated, NULL-terminated +// string. +INLINE char *upb_strref_dup(struct _upb_strref *r) { + char *ret = (char*)malloc(r->len + 1); + upb_bytesrc_read(r->bytesrc, r->stream_offset, r->len, ret); + ret[r->len] = '\0'; + return ret; } /* upb_bytesink ***************************************************************/ -struct _upb_bytesink; -typedef struct _upb_bytesink upb_bytesink; -typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( - upb_bytesink *bytesink, upb_string *str, upb_status *status); -typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)( - upb_bytesink *bytesink, upb_status *status, const char *fmt, va_list args); +typedef bool upb_bytesink_write_func(void*, const char*, size_t, upb_status*); +typedef int32_t upb_bytesink_vprintf_func( + void*, upb_status*, const char *fmt, va_list args); typedef struct { - upb_bytesink_putstr_fptr putstr; - upb_bytesink_vprintf_fptr vprintf; + upb_bytesink_write_func *write; + upb_bytesink_vprintf_func *vprintf; } upb_bytesink_vtbl; -struct _upb_bytesink { +typedef struct { upb_bytesink_vtbl *vtbl; -}; +} upb_bytesink; -INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) { - s->vtbl = vtbl; +INLINE void upb_bytesink_init(upb_bytesink *sink, upb_bytesink_vtbl *vtbl) { + sink->vtbl = vtbl; } +INLINE bool upb_bytesink_write(upb_bytesink *sink, const char *buf, size_t len, + upb_status *s) { + return sink->vtbl->write(sink, buf, len, s); +} -// TODO: Figure out how buffering should be handled. Should the caller buffer -// data and only call these functions when a buffer is full? Seems most -// efficient, but then buffering has to be configured in the caller, which -// could be anything, which makes it hard to have a standard interface for -// controlling buffering. -// -// The downside of having the bytesink buffer is efficiency: the caller is -// making more (virtual) function calls, and the caller can't arrange to have -// a big contiguous buffer. The bytesink can do this, but will have to copy -// to make the data contiguous. - -// Returns the number of bytes written. -INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, - const char *fmt, ...) { +INLINE bool upb_bytesink_writestr(upb_bytesink *sink, const char *str, + upb_status *s) { + return upb_bytesink_write(sink, str, strlen(str), s); +} + +// Returns the number of bytes written or -1 on error. +INLINE int32_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, + const char *fmt, ...) { va_list args; va_start(args, fmt); - upb_strlen_t ret = sink->vtbl->vprintf(sink, status, fmt, args); + uint32_t ret = sink->vtbl->vprintf(sink, status, fmt, args); va_end(args); return ret; } -// Puts the given string, returning true if the operation was successful, otherwise -// check "status" for details. Ownership of the string is *not* passed; if -// the callee wants a reference he must call upb_string_getref() on it. -INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, - upb_status *status) { - return sink->vtbl->putstr(sink, str, status); -} +// OPT: add getappendbuf() +// OPT: add writefrombytesrc() +// TODO: add flush() + + +/* upb_cbuf *******************************************************************/ + +// A circular buffer implementation for bytesrcs that do internal buffering. #ifdef __cplusplus } /* extern "C" */ |