summaryrefslogtreecommitdiff
path: root/pbstruct.h
blob: db4d2422e8680e2110880664d14dd9e5bebbcaed (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
/*
 * pbstream - a stream-oriented implementation of protocol buffers.
 *
 * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
 *
 * pbstruct is an efficient, compact, self-describing format for storing
 * protobufs in-memory.  In many ways it is a dynamic implementation of C
 * structures, which nonetheless is efficient because it does offset-based (as
 * opposed to name-based) access.  It can be used when representing a message in
 * memory is desired, but both the parsing and emitting modules can be used
 * without ever using pbstruct.
 *
 * There is one fundamental choice pbstruct is forced to make: whether to
 * include substructures by value or by reference.  Supporting both doesn't
 * seem worth the complexity.  The tradeoffs are:
 *
 * +value:
 *  - fewer malloc/free calls
 *  - better locality
 *  - one less deref to access data
 *  - simpler to delete/GC (since the data can't nest arbitrarily)
 * +ref:
 *  - you pay only sizeof(void*) for unused fields, not sizeof(struct)
 *  - you can keep substructs around while deleting the enclosing struct
 *  - copies of the surrounding struct are cheaper (including array reallocs)
 *
 * My view of these tradeoffs is that while by value semantics could yield
 * small performance gains in its best case, and is somewhat simpler, it has
 * much worse degenerate cases.  For example, consider a protobuf like that is
 * only meant to contain one of several possible large structures:
 *
 * message {
 *   optional LargeMessage1 m1 = 1;
 *   optional LargeMessage2 m2 = 2;
 *   optional LargeMessage3 m3 = 2;
 *   // ...
 * }
 *
 * This proto will take N times more memory if structures are by value than if
 * they are by reference.  To avoid such bad cases, submessages are by
 * reference.
 *
 * Ownership semantics:
 * - Each pbstruct absolutely owns any data allocated for its arrays or
 *   strings.  If you want a copy of this data that will outlive the struct,
 *   you must copy it out.
 * - Each pbstruct optionally owns any data allocated for substructs, based
 *   on whether you pass the free_substructs parameter to any of several
 *   functions.
 */

#ifndef PBSTRUCT_H_
#define PBSTRUCT_H_

#include <stdbool.h>
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

typedef enum pbstruct_type {
  PBSTRUCT_TYPE_DOUBLE = 1,
  PBSTRUCT_TYPE_FLOAT = 2,
  PBSTRUCT_TYPE_INT32 = 3,
  PBSTRUCT_TYPE_UINT32 = 4,
  PBSTRUCT_TYPE_INT64 = 5,
  PBSTRUCT_TYPE_UINT64 = 6,
  PBSTRUCT_TYPE_BOOL = 7,

  /* Main struct contains a pointer to a pbstruct. */
  PBSTRUCT_TYPE_SUBSTRUCT = 8,

  /* Main struct contains a pointer to a pbstruct_delimited. */
  PBSTRUCT_DYNAMIC = 16,
  PBSTRUCT_TYPE_BYTES     = PBSTRUCT_DYNAMIC | 1,
  PBSTRUCT_TYPE_STRING    = PBSTRUCT_DYNAMIC | 2,

  /* The main struct contains a pointer to a pbstruct_array, for which each
   * element is identical to the non-array form. */
  PBSTRUCT_ARRAY = PBSTRUCT_DYNAMIC | 32,  /* Not itself a real type. */
  PBSTRUCT_TYPE_DOUBLE_ARRAY    = PBSTRUCT_ARRAY | PBSTRUCT_TYPE_DOUBLE,
  PBSTRUCT_TYPE_FLOAT_ARRAY     = PBSTRUCT_ARRAY | PBSTRUCT_TYPE_FLOAT,
  PBSTRUCT_TYPE_INT32_ARRAY     = PBSTRUCT_ARRAY | PBSTRUCT_TYPE_INT32,
  PBSTRUCT_TYPE_UINT32_ARRAY    = PBSTRUCT_ARRAY | PBSTRUCT_TYPE_UINT32,
  PBSTRUCT_TYPE_INT64_ARRAY     = PBSTRUCT_ARRAY | PBSTRUCT_TYPE_INT64,
  PBSTRUCT_TYPE_UINT64_ARRAY    = PBSTRUCT_ARRAY | PBSTRUCT_TYPE_UINT64,
  PBSTRUCT_TYPE_BOOL_ARRAY      = PBSTRUCT_ARRAY | PBSTRUCT_TYPE_BOOL,
  PBSTRUCT_TYPE_BYTES_ARRAY     = PBSTRUCT_ARRAY | PBSTRUCT_TYPE_BYTES,
  PBSTRUCT_TYPE_STRING_ARRAY    = PBSTRUCT_ARRAY | PBSTRUCT_TYPE_STRING,
  PBSTRUCT_TYPE_SUBSTRUCT_ARRAY = PBSTRUCT_ARRAY | PBSTRUCT_TYPE_SUBSTRUCT,
} pbstruct_type_t;

struct pbstruct_field {
  char *name;
  pbstruct_type_t type;
  ptrdiff_t byte_offset;
  ptrdiff_t isset_byte_offset;
  uint8_t isset_byte_mask;
};

struct pbstruct_dynamic {
  size_t len;   /* Number of present elements (bytes for string/bytes). */
  size_t size;  /* Total size in present elements. */
  uint8_t data[];
};

struct pbstruct_definition {
  char *name;
  size_t struct_size;
  int num_fields;
  int num_required_fields;  /* Required fields have the lowest set bytemasks. */
  struct pbstruct_field fields[];
};

struct pbstruct {
  struct pbstruct_definition *definition;
  uint8_t data[];  /* layout is described by definition. */
};

struct pballoc {
  void *(*realloc)(void *ptr, size_t size, void *user_data);
  void (*free)(void *ptr, void *user_data);
};

/* Initializes everything to unset, with no dynamic memory alocated. */
void pbstruct_init(struct pbstruct *s, struct pbstruct_definition *d);
pbstruct *pbstruct_new(struct pbstruct_defintion *d, struct pballoc *a);
/* Frees the struct itself and any dynamic string or array data, and substructs
 * also if free_substructs is true. */
void pbstruct_free(struct pbstruct *s, struct pballoc *a, bool free_substructs);

/* For setting, clearing, and testing the "set" flag for each field.
 * Clearing the "set" flag does not free any dynamically allocated
 * storage -- it will be reused again if the field becomes set again. */
bool pbstruct_is_set(struct pbstruct *s, struct pbstruct_field *f);
void pbstruct_unset(struct pbstruct *s, struct pbstruct_field *f);
void pbstruct_unset_all(struct pbstruct *s);
void pbstruct_set(struct pbstruct *s, struct pbstruct_field *f);

/* Resizes the given string field to len, triggering a realloc() if
 * necessary.  Never reallocs() the string to be shorter. */
void pbstruct_resize_str(struct pbstruct *s, struct pbstruct_field *f,
                         size_t len, struct pballoc *a);

/* Appends an element to the given array field, triggering a realloc() if
 * necessary.  Returns the offset of the new array element. */
size_t pbstruct_append_array(struct pbstruct *s, struct pbstruct_field *f,
                             struct pballoc *a);

/* Sets the size of the given array field to zero, but does not free the
 * array itself (call pbstruct_free_field() for that).  Does free any
 * strings that were in the array (for string arrays), and any substructs
 * if free_substructs is true.  Does not affect the "set" flag (if the
 * field was previously set, it will remain set, but as zero elements long). */
void pbstruct_clear_array(struct pbstruct *s, struct pbstruct_field *f,
                          struct pballoc *a, bool free_substructs);

/* Resizes a delimited field (string or bytes) string that is within an array.
 * The array must already be large enough that str_offset is a valid member. */
void pbstruct_resize_arraystr(struct pbstruct *s, struct pbstruct_field *f,
                              size_t str_offset, size_t len, struct pballoc *a);

/* Like pbstruct_append_array, but appends a string of the specified length. */
size_t pbstruct_append_arraystr(struct pbstruct *s, struct pbstruct_field *f,
                                size_t len, struct pballoc *a);

/* Clears all data associated with f (like pbstruct_clear_array()), but also
 * frees all underlying storage.  For string array fields, also frees the
 * individual strings.  Clears the "set" flag since it is an error for an
 * array field to be set if it has no dynamic data allocated. */
void pbstruct_free_field(struct pbstruct *s, struct pbstruct_field *f,
                         bool free_substructs);
void pbstruct_free_all_fields(struct pbstruct *s, bool free_substructs);

/* These do no existence checks, bounds checks, or type checks. */
#define DECLARE_GETTERS(ctype, name) \
  ctype pbstruct_get_ ## name(struct pbstruct *s, \
                              struct pbstruct_field *f); \
  ctype *pbstruct_get_ ## name ## _ptr(struct pbstruct *s, \
                                       struct pbstruct_field *f); \
  ctype pbstruct_get_ ## name ## _array(struct pbstruct *s, \
                                        struct pbstruct_field *f, \
                                        int i); \
  ctype *pbstruct_get_ ## name ## _array_ptr(struct pbstruct *s, \
                                             struct pbstruct_field *f, \
                                             int i); \


DECLARE_GETTERS(double,   double)
DECLARE_GETTERS(float,    float)
DECLARE_GETTERS(int32_t,  int32)
DECLARE_GETTERS(int64_t,  int64)
DECLARE_GETTERS(uint32_t, uint32)
DECLARE_GETTERS(uint64_t, uint64)
DECLARE_GETTERS(bool,     bool)
DECLARE_GETTERS(struct pbstruct*, substruct)
DECLARE_GETTERS(struct pbstruct_delimited*, bytes)
DECLARE_GETTERS(struct pbstruct_delimited*, string)

#ifdef __cplusplus
}  /* extern "C" */
#endif

#endif  /* PBSTRUCT_H_ */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback