summaryrefslogtreecommitdiff
path: root/upb/bytestream.h
blob: 0a744f62780709e3fb7a556ee2b3f4a02600cd4b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
/*
 * upb - a minimalist implementation of protocol buffers.
 *
 * Copyright (c) 2011 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 *
 * This file contains upb_bytesrc and upb_bytesink, which are abstractions of
 * stdio (fread()/fwrite()/etc) that provide useful buffering/sharing
 * semantics.  They are virtual base classes so concrete implementations
 * can get the data from a fd, a string, a cord, etc.
 *
 * Byte streams are NOT thread-safe!  (Like f{read,write}_unlocked())
 * This may change (in particular, bytesrc objects may be better thread-safe).
 */

#ifndef UPB_BYTESTREAM_H
#define UPB_BYTESTREAM_H

#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include "upb.h"

#ifdef __cplusplus
extern "C" {
#endif


/* upb_bytesrc ****************************************************************/

// A upb_bytesrc allows the consumer of a stream of bytes to obtain buffers as
// they become available, and to preserve some trailing amount of data, which
// is useful for lazy parsing (among other things).  If there is a submessage
// that we want to parse later we can take a reference on that region of the
// input buffer.  This will guarantee that the bytesrc keeps the submessage
// data around for later use, without requiring a copy out of the input
// buffers.
typedef size_t upb_bytesrc_fetch_func(void*, uint64_t, upb_status*);
typedef void upb_bytesrc_read_func(void*, uint64_t, size_t, char*);
typedef const char *upb_bytesrc_getptr_func(void*, uint64_t, size_t*);
typedef void upb_bytesrc_refregion_func(void*, uint64_t, size_t);
typedef void upb_bytesrc_ref_func(void*);
typedef struct _upb_bytesrc_vtbl {
  upb_bytesrc_fetch_func     *fetch;
  upb_bytesrc_read_func      *read;
  upb_bytesrc_getptr_func    *getptr;
  upb_bytesrc_refregion_func *refregion;
  upb_bytesrc_refregion_func *unrefregion;
  upb_bytesrc_ref_func       *ref;
  upb_bytesrc_ref_func       *unref;
} upb_bytesrc_vtbl;

typedef struct {
  upb_bytesrc_vtbl  *vtbl;
} upb_bytesrc;

INLINE void upb_bytesrc_init(upb_bytesrc *src, upb_bytesrc_vtbl *vtbl) {
  src->vtbl = vtbl;
}

// Fetches at least one byte starting at ofs, returning the actual number of
// bytes fetched (or 0 on error: see "s" for details).  A successful return
// gives caller a ref on the fetched region.
//
// If "ofs" may be greater or equal than the end of the already-fetched region.
// It may also be less than the end of the already-fetch region *if* either of
// the following is true:
//
// * the region is ref'd (this implies that the data is still in-memory)
// * the bytesrc is seekable (this implies that the data can be fetched again).
INLINE size_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs, upb_status *s) {
  return src->vtbl->fetch(src, ofs, s);
}

// Copies "len" bytes of data from offset src_ofs to "dst", which must be at
// least "len" bytes long.  The caller must own a ref on the given region.
INLINE void upb_bytesrc_read(upb_bytesrc *src, uint64_t src_ofs, size_t len,
                             char *dst) {
  src->vtbl->read(src, src_ofs, len, dst);
}

// Returns a pointer to the bytesrc's internal buffer, storing in *len how much
// data is available.  The caller must own refs on the given region.  The
// returned buffer is valid for as long as the region remains ref'd.
//
// TODO: if more data is available than the caller has ref'd is it ok for the
// caller to read *len bytes?
INLINE const char *upb_bytesrc_getptr(upb_bytesrc *src, uint64_t ofs,
                                      size_t *len) {
  return src->vtbl->getptr(src, ofs, len);
}

// Gives the caller a ref on the given region.  The caller must know that the
// given region is already ref'd (for example, inside a upb_handlers callback
// that receives a upb_strref, the region is guaranteed to be ref'd -- this
// function allows that handler to take its own ref).
INLINE void upb_bytesrc_refregion(upb_bytesrc *src, uint64_t ofs, size_t len) {
  src->vtbl->refregion(src, ofs, len);
}

// Releases a ref on the given region, which the caller must have previously
// ref'd.
INLINE void upb_bytesrc_unrefregion(upb_bytesrc *src, uint64_t ofs, size_t len) {
  src->vtbl->unrefregion(src, ofs, len);
}

// Attempts to ref the bytesrc itself, returning false if this bytesrc is
// not ref-able.
INLINE bool upb_bytesrc_tryref(upb_bytesrc *src) {
  if (src->vtbl->ref) {
    src->vtbl->ref(src);
    return true;
  } else {
    return false;
  }
}

// Unref's the bytesrc itself.  May only be called when upb_bytesrc_tryref()
// has previously returned true.
INLINE void upb_bytesrc_unref(upb_bytesrc *src) {
  assert(src->vtbl->unref);
  src->vtbl->unref(src);
}


/* upb_strref *****************************************************************/

// The structure we pass to upb_handlers for a string value.
typedef struct _upb_strref {
  // Pointer to the string data.  NULL if the string spans multiple input
  // buffers (in which case upb_bytesrc_getptr() must be called to obtain
  // the actual pointers).
  const char *ptr;

  // Total length of the string.
  uint32_t len;

  // Offset in the bytesrc that represents the beginning of this string.
  uint32_t stream_offset;

  // Bytesrc from which this string data comes.  May be NULL if ptr is set.  If
  // non-NULL, the bytesrc is only guaranteed to be alive from inside the
  // callback; however if the handler knows more about its type and how to
  // prolong its life, it may do so.
  upb_bytesrc *bytesrc;

  // Possibly add optional members here like start_line, start_column, etc.
} upb_strref;

// Copies the contents of the strref into a newly-allocated, NULL-terminated
// string.
char *upb_strref_dup(struct _upb_strref *r);

INLINE void upb_strref_read(struct _upb_strref *r, char *buf) {
  if (r->ptr) {
    memcpy(buf, r->ptr, r->len);
  } else {
    assert(r->bytesrc);
    upb_bytesrc_read(r->bytesrc, r->stream_offset, r->len, buf);
  }
}


/* upb_bytesink ***************************************************************/

// A bytesink is an interface that allows the caller to push byte-wise data.
// It is very simple -- the only special capability is the ability to "rewind"
// the stream, which is really only a mechanism of having the bytesink ignore
// some subsequent calls.
typedef int upb_bytesink_write_func(void*, const void*, int);
typedef int upb_bytesink_vprintf_func(void*, const char *fmt, va_list args);

typedef struct {
  upb_bytesink_write_func   *write;
  upb_bytesink_vprintf_func *vprintf;
} upb_bytesink_vtbl;

typedef struct {
  upb_bytesink_vtbl *vtbl;
  upb_status status;
  uint64_t offset;
} upb_bytesink;

// Should be called by derived classes.
void upb_bytesink_init(upb_bytesink *sink, upb_bytesink_vtbl *vtbl);
void upb_bytesink_uninit(upb_bytesink *sink);

INLINE int upb_bytesink_write(upb_bytesink *s, const void *buf, int len) {
  return s->vtbl->write(s, buf, len);
}

INLINE int upb_bytesink_writestr(upb_bytesink *sink, const char *str) {
  return upb_bytesink_write(sink, str, strlen(str));
}

// Returns the number of bytes written or -1 on error.
INLINE int upb_bytesink_printf(upb_bytesink *sink, const char *fmt, ...) {
  va_list args;
  va_start(args, fmt);
  uint32_t ret = sink->vtbl->vprintf(sink, fmt, args);
  va_end(args);
  return ret;
}

INLINE int upb_bytesink_putc(upb_bytesink *sink, char ch) {
  return upb_bytesink_write(sink, &ch, 1);
}

INLINE int upb_bytesink_putrepeated(upb_bytesink *sink, char ch, int len) {
  char buf[len];
  memset(buf, ch, len);
  return upb_bytesink_write(sink, buf, len);
}

INLINE uint64_t upb_bytesink_getoffset(upb_bytesink *sink) {
  return sink->offset;
}

// Rewinds the stream to the given offset.  This cannot actually "unput" any
// data, it is for situations like:
//
// // If false is returned (because of error), call again later to resume.
// bool write_some_data(upb_bytesink *sink, int indent) {
//   uint64_t start_offset = upb_bytesink_getoffset(sink);
//   if (upb_bytesink_writestr(sink, "Some data") < 0) goto err;
//   if (upb_bytesink_putrepeated(sink, ' ', indent) < 0) goto err;
//   return true;
//  err:
//   upb_bytesink_rewind(sink, start_offset);
//   return false;
// }
//
// The subsequent bytesink writes *must* be identical to the writes that were
// rewinded past.
INLINE void upb_bytesink_rewind(upb_bytesink *sink, uint64_t offset) {
  // TODO
  (void)sink;
  (void)offset;
}

// OPT: add getappendbuf()
// OPT: add writefrombytesrc()
// TODO: add flush()


/* upb_stdio ******************************************************************/

// bytesrc/bytesink for ANSI C stdio, which is less efficient than posixfd, but
// more portable.
//
// Specifically, stdio functions acquire locks on every operation (unless you
// use the f{read,write,...}_unlocked variants, which are not standard) and
// performs redundant buffering (unless you disable it with setvbuf(), but we
// can only do this on newly-opened filehandles).

typedef struct {
  uint64_t ofs;
  uint32_t len;
  uint32_t refcount;
  char data[];
} upb_stdio_buf;

// We use a single object for both bytesrc and bytesink for simplicity.
// The object is still not thread-safe, and may only be used by one reader
// and one writer at a time.
typedef struct {
  upb_bytesrc src;
  upb_bytesink sink;
  FILE *file;
  bool should_close;
  upb_stdio_buf **bufs;
  uint32_t nbuf, szbuf;
} upb_stdio;

void upb_stdio_init(upb_stdio *stdio);
// Caller should call upb_stdio_flush prior to calling this to ensure that
// all data is flushed, otherwise data can be silently dropped if an error
// occurs flushing the remaining buffers.
void upb_stdio_uninit(upb_stdio *stdio);

// Resets the object to read/write to the given "file."  The caller is
// responsible for closing the file, which must outlive this object.
void upb_stdio_reset(upb_stdio *stdio, FILE *file);

// As an alternative to upb_stdio_reset(), initializes the object by opening a
// file, and will handle closing it.  This may result in more efficient I/O
// than the previous since we can call setvbuf() to disable buffering.
void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
                    upb_status *s);

upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio);
upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio);


/* upb_stringsrc **************************************************************/

// bytesrc/bytesink for a simple contiguous string.

struct _upb_stringsrc {
  upb_bytesrc bytesrc;
  const char *str;
  size_t len;
};
typedef struct _upb_stringsrc upb_stringsrc;

// Create/free a stringsrc.
void upb_stringsrc_init(upb_stringsrc *s);
void upb_stringsrc_uninit(upb_stringsrc *s);

// Resets the stringsrc to a state where it will vend the given string.  The
// stringsrc will take a reference on the string, so the caller need not ensure
// that it outlives the stringsrc.  A stringsrc can be reset multiple times.
void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len);

// Returns the upb_bytesrc* for this stringsrc.
upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s);


/* upb_stringsink *************************************************************/

struct _upb_stringsink {
  upb_bytesink bytesink;
  char *str;
  size_t len, size;
};
typedef struct _upb_stringsink upb_stringsink;

// Create/free a stringsrc.
void upb_stringsink_init(upb_stringsink *s);
void upb_stringsink_uninit(upb_stringsink *s);

// Resets the sink's string to "str", which the sink takes ownership of.
// "str" may be NULL, which will make the sink allocate a new string.
void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size);

// Releases ownership of the returned string (which is "len" bytes long) and
// resets the internal string to be empty again (as if reset were called with
// NULL).
const char *upb_stringsink_release(upb_stringsink *s, size_t *len);

// Returns the upb_bytesink* for this stringsrc.  Invalidated by reset above.
upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s);

#ifdef __cplusplus
}  /* extern "C" */
#endif

#endif
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback