summaryrefslogtreecommitdiff
path: root/src/upb_data.h
blob: 6dc343b9cda9de83629bf0276a4076c880f16704 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
/*
 * upb - a minimalist implementation of protocol buffers.
 *
 * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
 *
 * This file defines the in-memory format for messages, arrays, and strings
 * (which are the three dynamically-allocated structures that make up all
 * protobufs).
 *
 * The members of all structs should be considered private.  Access should
 * only happen through the provided functions.
 *
 * Unlike Google's protobuf, messages contain *pointers* to strings and arrays
 * instead of including them by value.  This makes unused strings and arrays
 * use less memory, and lets the strings and arrays have multiple possible
 * representations (for example, a string could be a slice).  It also gives
 * us more flexibility wrt refcounting.  The cost is that when a field *is*
 * being used, the net memory usage is one pointer more than if we had
 * included the thing directly. */

#ifndef UPB_DATA_H
#define UPB_DATA_H

#include <assert.h>
#include <string.h>
#include "upb.h"
#include "upb_atomic.h"
#include "upb_def.h"

struct upb_msgdef;
struct upb_fielddef;

/* upb_data *******************************************************************/

// The "base class" of strings, arrays, and messages.  Contains a few flags and
// possibly a reference count.  None of the functions for upb_data are public,
// but some of the constants are.

// typedef upb_atomic_refcount_t upb_data;

// The flags in upb_data.
typedef enum {
  // Set if the object itself was allocated with malloc() and should be freed
  // with free().  This flag would be false if the object was allocated on the
  // stack or is data from the static segment of an object file.  Note that this
  // flag does not apply to the data being referenced by a string or array.
  //
  // If this flag is false, UPB_FLAG_HAS_REFCOUNT must be false also; there is
  // no sense refcounting something that does not need to be freed.
  UPB_DATA_HEAPALLOCATED = 1,

  // Set if the object is frozen against modification.  While an object is
  // frozen, it is suitable for concurrent readonly access.  Note that this
  // flag alone is not a sufficient mechanism for preventing any kind of writes
  // to the object's memory, because the object could still have a refcount.
  UPB_DATA_FROZEN = (1<<1),

  // Set if the object has an embedded refcount.
  UPB_DATA_REFCOUNTED = (1<<2)
} upb_data_flag;

#define REFCOUNT_MASK 0xFFFFFFF8
#define REFCOUNT_SHIFT 3
#define REFCOUNT_ONE (1<<REFCOUNT_SHIFT)

INLINE bool upb_data_hasflag(upb_data *d, upb_data_flag flag) {
  // We read this unsynchronized, because the is_frozen flag (the only flag
  // that can change during the life of a upb_data) may not change if the
  // data has more than one owner.
  return d->v & flag;
}

// INTERNAL-ONLY
INLINE void upb_data_setflag(upb_data *d, upb_data_flag flag) {
  d->v |= flag;
}

INLINE uint32_t upb_data_getrefcount(upb_data *d) {
  int data;
  if(upb_data_hasflag(d, UPB_DATA_FROZEN))
    data = upb_atomic_read(d);
  else
    data = d->v;
  return (data & REFCOUNT_MASK) >> REFCOUNT_SHIFT;
}

// Returns true if the given data has only one owner.
INLINE bool upb_data_only(upb_data *data) {
  return !upb_data_hasflag(data, UPB_DATA_REFCOUNTED) ||
         upb_data_getrefcount(data) == 1;
}

// Specifies the type of ref that is requested based on the kind of access the
// caller needs to the object.
typedef enum {
  // Use when the client plans to perform read-only access to the object, and
  // only in one thread at a time.  This imposes the least requirements on the
  // object; it can be either frozen or not.  As a result, requesting a
  // reference of this type never performs a copy unless the object has no
  // refcount.
  //
  // A ref of this type can always be explicitly converted to frozen or
  // unfrozen later.
  UPB_REF_THREADUNSAFE_READONLY = 0,

  // Use when the client plans to perform read-only access, but from multiple
  // threads concurrently.  This will force the object to eagerly perform any
  // parsing that may have been lazily deferred, and will force a copy if the
  // object is not current frozen.
  //
  // Asking for a reference of this type is equivalent to:
  //   x = getref(y, UPB_REF_THREADUNSAFE_READONLY);
  //   x = freeze(x);
  // ...except it is more efficient.
  UPB_REF_FROZEN = 1,

  // Use when the client plans to perform read/write access.  As a result, the
  // reference will not be thread-safe for concurrent reading *or* writing; the
  // object must be externally synchronized if it is being accessed from more
  // than one thread.  This will force a copy if the object is currently frozen.
  //
  // Asking for a reference of this type is equivalent to:
  //   x = getref(y, UPB_REF_THREADUNSAFE_READONLY);
  //   x = thaw(x);
  // ...except it is more efficient.
  UPB_REF_MUTABLE = 2
} upb_reftype;

// INTERNAL-ONLY FUNCTION:
// Attempts to increment the reference on d with the given type of ref.  If
// this is not possible, returns false.
INLINE bool _upb_data_incref(upb_data *d, upb_reftype reftype) {
  bool frozen = upb_data_hasflag(d, UPB_DATA_FROZEN);
  if((reftype == UPB_REF_FROZEN && !frozen) ||
     (reftype == UPB_REF_MUTABLE && frozen) ||
     (upb_data_hasflag(d, UPB_DATA_HEAPALLOCATED) &&
      !upb_data_hasflag(d, UPB_DATA_REFCOUNTED))) {
    return false;
  }
  // Increment the ref.  Only need to use atomic ops if the ref is frozen.
  if(upb_data_hasflag(d, UPB_DATA_FROZEN)) upb_atomic_add(d, REFCOUNT_ONE);
  else d->v += REFCOUNT_ONE;
  return true;
}

// INTERNAL-ONLY FUNCTION:
// Releases a reference on d, returning true if the object should be deleted.
INLINE bool _upb_data_unref(upb_data *d) {
  if(upb_data_hasflag(d, UPB_DATA_HEAPALLOCATED)) {
    // A heap-allocated object without a refcount should never be decref'd.
    // Its owner owns it exlusively and should free it directly.
    assert(upb_data_hasflag(d, UPB_DATA_REFCOUNTED));
    if(upb_data_hasflag(d, UPB_DATA_FROZEN)) {
      int32_t old_val = upb_atomic_fetch_and_add(d, -REFCOUNT_ONE);
      return (old_val & REFCOUNT_MASK) == REFCOUNT_ONE;
    } else {
      d->v -= REFCOUNT_ONE;
      return (d->v & REFCOUNT_MASK) == 0;
    }
  } else {
    // Non heap-allocated data never should be deleted.
    return false;
  }
}

/* upb_string *****************************************************************/

typedef uint32_t upb_strlen_t;

// We have several different representations for string, depending on whether
// it has a refcount (and likely in the future, depending on whether it is a
// slice of another string).  We could just have one representation with
// members that are sometimes unused, but this is wasteful in memory.  The
// flags that are always part of the first word tell us which representation
// to use.
//
// upb_string_common is the members that are common to all representations.
typedef struct {
  upb_data base;
  upb_strlen_t byte_len;
  // We expect the data to be 8-bit clean (uint8_t), but char* is such an
  // ingrained convention that we follow it.
  char *ptr;
} upb_string_common;

// Used for a string without a refcount.
typedef struct {
  uint32_t byte_size_and_flags;
  upb_strlen_t byte_len;
  char *ptr;
} upb_norefcount_string;

// Used for a string with a refcount.
typedef struct {
  upb_data base;
  upb_strlen_t byte_len;
  char *ptr;
  uint32_t byte_size;
} upb_refcounted_string;

union _upb_string {
  upb_norefcount_string norefcount;
  upb_string_common common;
  upb_refcounted_string refcounted;
};

// Returns a newly constructed, refcounted string which starts out empty.
// Caller owns one ref on it.  The returned string will not be frozen.
upb_string *upb_string_new(void);

// INTERNAL-ONLY:
// Frees the given string, alone with any memory the string owned.
void _upb_string_free(upb_string *s);

// Returns a string to which caller owns a ref, and contains the same contents
// as src.  The returned value may be a copy of src, if the requested flags
// were incompatible with src's.
upb_string *upb_string_getref(upb_string *s, int ref_flags);

// The caller releases a ref on src, which it must previously have owned a ref
// on.
INLINE void upb_string_unref(upb_string *s) {
  if(_upb_data_unref(&s->common.base)) _upb_string_free(s);
}

// The string is resized to byte_len.  The string must not be frozen.
void upb_string_resize(upb_string *s, upb_strlen_t len);

// Returns a buffer to which the caller may write.  The string is resized to
// byte_len (which may or may not trigger a reallocation).  The string must not
// be frozen.
INLINE char *upb_string_getrwbuf(upb_string *s, upb_strlen_t byte_len) {
  upb_string_resize(s, byte_len);
  return s->common.ptr;
}

INLINE void upb_string_clear(upb_string *s) {
  upb_string_getrwbuf(s, 0);
}

// Returns a buffer that the caller may use to read the current contents of
// the string.  The number of bytes available is upb_strlen(s).
INLINE const char *upb_string_getrobuf(upb_string *s) {
  return s->common.ptr;
}

// Returns the current length of the string.
INLINE upb_strlen_t upb_strlen(upb_string *s) {
  return s->common.byte_len;
}

/* upb_string library functions ***********************************************/

// Named like their <string.h> counterparts, these are all safe against buffer
// overflow.  These only use the public upb_string interface.

// More efficient than upb_strcmp if all you need is to test equality.
INLINE bool upb_streql(upb_string *s1, upb_string *s2) {
  upb_strlen_t len = upb_strlen(s1);
  if(len != upb_strlen(s2)) {
    return false;
  } else {
    return memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0;
  }
}

// Like strcmp().
int upb_strcmp(upb_string *s1, upb_string *s2);

// Like upb_strcpy, but copies from a buffer and length.
INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) {
  memcpy(upb_string_getrwbuf(dest, len), src, len);
}

// Replaces the contents of "dest" with the contents of "src".
INLINE void upb_strcpy(upb_string *dest, upb_string *src) {
  upb_strcpylen(dest, upb_string_getrobuf(src), upb_strlen(src));
}

// Like upb_strcpy, but copies from a NULL-terminated string.
INLINE void upb_strcpyc(upb_string *dest, const char *src) {
  // This does two passes over src, but that is necessary unless we want to
  // repeatedly re-allocate dst, which seems worse.
  upb_strcpylen(dest, src, strlen(src));
}

// Returns a new string whose contents are a copy of s.
upb_string *upb_strdup(upb_string *s);

// Like upb_strdup(), but duplicates a C NULL-terminated string.
upb_string *upb_strdupc(const char *src);

// Appends 'append' to 's' in-place, resizing s if necessary.
void upb_strcat(upb_string *s, upb_string *append);

// Returns a string that is a substring of the given string.  Currently this
// returns a copy, but in the future this may return an object that references
// the original string data instead of copying it.  Both now and in the future,
// the caller owns a ref on whatever is returned.
upb_string *upb_strslice(upb_string *s, int offset, int len);

// Reads an entire file into a newly-allocated string (caller owns one ref).
upb_string *upb_strreadfile(const char *filename);

// Typedef for a read-only string that is allocated statically or on the stack.
// Initialize with the given macro, which must resolve to a const char*.  You
// must not dynamically allocate this type.
typedef upb_string upb_static_string;
#define UPB_STRLIT_LEN(str, len) {0 | UPB_DATA_FROZEN, len, str}
#define UPB_STRLIT(str) {{0 | UPB_DATA_FROZEN, sizeof(str)-1, str}}

// Allows using upb_strings in printf, ie:
//   upb_string str = UPB_STRLIT("Hello, World!\n");
//   printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */
#define UPB_STRARG(str) (str)->common.byte_len, (str)->common.ptr
#define UPB_STRFMT "%.*s"

/* upb_array ******************************************************************/

typedef uint32_t upb_arraylen_t;

// The comments attached to upb_string above also apply here.
typedef struct {
  upb_data base;
  upb_arraylen_t len;
  union upb_value_ptr elements;
} upb_array_common;

typedef struct {
  uint32_t size_and_flags;
  upb_arraylen_t len;
  union upb_value_ptr elements;
} upb_norefcount_array;

typedef struct {
  upb_data base;
  upb_arraylen_t len;
  union upb_value_ptr elements;
  upb_arraylen_t size;
} upb_refcounted_array;

union _upb_array {
  upb_norefcount_array norefcount;
  upb_array_common common;
  upb_refcounted_array refcounted;
};

// This type can be used either to perform read-only access on an array,
// or to statically define a non-reference-counted static array.
#define UPB_DEFINE_MSG_ARRAY(type) \
typedef struct type ## _array { \
  upb_data base; \
  upb_arraylen_t len;\
  type **elements; \
} type ## _array; \

#define UPB_MSG_ARRAY(type) struct type ## _array

// Constructs a newly-allocated, reference-counted array which starts out
// empty.  Caller owns one ref on it.
upb_array *upb_array_new(void);

// Returns the current number of elements in the array.
INLINE size_t upb_array_len(upb_array *a) {
  return a->common.len;
}

// INTERNAL-ONLY:
// Frees the given message and releases references on members.
void _upb_array_free(upb_array *a, struct upb_fielddef *f);

// INTERNAL-ONLY:
// Returns a pointer to the given elem.
INLINE union upb_value_ptr _upb_array_getptr(upb_array *a,
                                             struct upb_fielddef *f,
                                             upb_arraylen_t elem) {
  size_t type_size = upb_type_info[f->type].size;
  union upb_value_ptr p = {._void = &a->common.elements.uint8[elem * type_size]};
  return p;
}

INLINE union upb_value upb_array_get(upb_array *a, struct upb_fielddef *f,
                                     upb_arraylen_t elem) {
  assert(elem < upb_array_len(a));
  return upb_value_read(_upb_array_getptr(a, f, elem), f->type);
}

// The caller releases a ref on the given array, which it must previously have
// owned a ref on.
INLINE void upb_array_unref(upb_array *a, struct upb_fielddef *f) {
  if(_upb_data_unref(&a->common.base)) _upb_array_free(a, f);
}

#if 0
// Returns an array to which caller owns a ref, and contains the same contents
// as src.  The returned value may be a copy of src, if the requested flags
// were incompatible with src's.
INLINE upb_array *upb_array_getref(upb_array *src, int ref_flags);

// Sets the given element in the array to val.  The current length of the array
// must be greater than elem.  If the field type is dynamic, the array will
// take a ref on val and release a ref on what was previously in the array.
INLINE void upb_array_set(upb_array *a, struct upb_fielddef *f, int elem,
                          union upb_value val);


// Note that array_append will attempt to take a reference on the given value,
// so to avoid a copy use append_default and get.
INLINE void upb_array_append(upb_array *a, struct upb_fielddef *f,
                             union upb_value val);
INLINE void upb_array_append_default(upb_array *a, struct upb_fielddef *f,
                             union upb_value val);
#endif

INLINE void upb_array_truncate(upb_array *a) {
  a->common.len = 0;
}


/* upb_msg ********************************************************************/

// Note that some inline functions for upb_msg are defined in upb_def.h since
// they rely on the defs.

struct _upb_msg {
  upb_data base;
  uint8_t data[4];  // We allocate the appropriate amount per message.
};

// Creates a new msg of the given type.
upb_msg *upb_msg_new(struct upb_msgdef *md);

// INTERNAL-ONLY:
// Frees the given message and releases references on members.
void _upb_msg_free(upb_msg *msg, struct upb_msgdef *md);

// INTERNAL-ONLY:
// Returns a pointer to the given field.
INLINE union upb_value_ptr _upb_msg_getptr(upb_msg *msg, struct upb_fielddef *f) {
  union upb_value_ptr p = {._void = &msg->data[f->byte_offset]};
  return p;
}

// Releases a references on msg.
INLINE void upb_msg_unref(upb_msg *msg, struct upb_msgdef *md) {
  if(_upb_data_unref(&msg->base)) _upb_msg_free(msg, md);
}

// Tests whether the given field is explicitly set, or whether it will return
// a default.
INLINE bool upb_msg_has(upb_msg *msg, struct upb_fielddef *f) {
  return (msg->data[f->field_index/8] & (1 << (f->field_index % 8))) != 0;
}

// Returns the current value if set, or the default value if not set, of the
// specified field.  The caller does *not* own a ref.
INLINE union upb_value upb_msg_get(upb_msg *msg, struct upb_fielddef *f) {
  if(upb_msg_has(msg, f)) {
    return upb_value_read(_upb_msg_getptr(msg, f), f->type);
  } else {
    return f->default_value;
  }
}

// Sets the given field to the given value.  The msg will take a ref on val,
// and will drop a ref on whatever was there before.
void upb_msg_set(upb_msg *msg, struct upb_fielddef *f, union upb_value val);

INLINE void upb_msg_clear(upb_msg *msg, struct upb_msgdef *md) {
  memset(msg->data, 0, md->set_flags_bytes);
}

/* Parsing ********************************************************************/

void upb_msg_parsestr(upb_msg *msg, struct upb_msgdef *md, upb_string *str,
                      struct upb_status *status);

struct upb_msgparser *upb_msgparser_new(struct upb_msgdef *def);
void upb_msgparser_free(struct upb_msgparser *mp);

void upb_msgparser_reset(struct upb_msgparser *mp, upb_msg *m);

size_t upb_msgparser_parse(struct upb_msgparser *mp, upb_string *str,
                           struct upb_status *status);

#endif
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback