summaryrefslogtreecommitdiff
path: root/upb_msg.h
blob: e751c27c4336c22e8250ad2acb861d67720ab5e8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
/*
 * upb - a minimalist implementation of protocol buffers.
 *
 * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
 *
 * upb_msg contains a full description of a message as defined in a .proto file.
 * It supports many features and operations for dealing with proto messages:
 * - reflection over .proto types at runtime (list fields, get names, etc).
 * - an in-memory byte-level format for efficiently storing and accessing msgs.
 * - serializing and deserializing from the in-memory format to a protobuf.
 * - optional memory management for handling strings, arrays, and submessages.
 *
 * The in-memory format is very much like a C struct that you can define at
 * run-time, but also supports reflection.  Like C structs it supports
 * offset-based access, as opposed to the much slower name-based lookup.  The
 * format represents both the values themselves and bits describing whether each
 * field is set or not.
 *
 * The upb compiler emits C structs that mimic this definition exactly, so that
 * you can access the same hunk of memory using either this run-time
 * reflection-supporting interface or a C struct that was generated by the upb
 * compiler.
 *
 * Like C structs the format depends on the endianness of the host machine, so
 * it is not suitable for exchanging across machines of differing endianness.
 * But there is no reason to do that -- the protobuf serialization format is
 * designed already for serialization/deserialization, and is more compact than
 * this format.  This format is designed to allow the fastest possible random
 * access of individual fields.
 *
 * Note that clients need not use the memory management facilities defined here.
 * They are for convenience only -- clients wishing to do their own memory
 * management may do so (allowing clients to perform advanced techniques like
 * reference-counting, garbage collection, and string references).  Different
 * clients can read each others messages regardless of what memory management
 * scheme each is using.
 */

#ifndef UPB_MSG_H_
#define UPB_MSG_H_

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>

#include "upb.h"
#include "upb_table.h"
#include "upb_parse.h"

#ifdef __cplusplus
extern "C" {
#endif

/* Forward declarations from descriptor.h. */
struct google_protobuf_DescriptorProto;
struct google_protobuf_FieldDescriptorProto;

/* Message definition. ********************************************************/

/* Structure that describes a single field in a message.  This structure is very
 * consciously designed to fit into 12/16 bytes (32/64 bit, respectively). */
struct upb_msg_field {
  union upb_symbol_ref ref;
  uint32_t byte_offset;     /* Where to find the data. */
  uint16_t field_index;     /* Indexes upb_msg.fields. Also indicates set bit */
  upb_field_type_t type;    /* Copied from descriptor for cache-friendliness. */
  upb_label_t label;
};

/* Structure that describes a single .proto message type. */
struct upb_msg {
  struct google_protobuf_DescriptorProto *descriptor;
  size_t size;
  uint32_t num_fields;
  uint32_t set_flags_bytes;
  uint32_t num_required_fields;  /* Required fields have the lowest set bytemasks. */
  struct upb_inttable fields_by_num;
  struct upb_strtable fields_by_name;
  struct upb_msg_field *fields;
  struct google_protobuf_FieldDescriptorProto **field_descriptors;
};

/* The num->field and name->field maps in upb_msg allow fast lookup of fields
 * by number or name.  These lookups are in the critical path of parsing and
 * field lookup, so they must be as fast as possible.  To make these more
 * cache-friendly, we put the data in the table by value. */

struct upb_fieldsbynum_entry {
  struct upb_inttable_entry e;
  struct upb_msg_field f;
};

struct upb_fieldsbyname_entry {
  struct upb_strtable_entry e;
  struct upb_msg_field f;
};

/* Can be used to retrieve a field descriptor given the upb_msg_field ref. */
INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor(
    struct upb_msg_field *f, struct upb_msg *m) {
  return m->field_descriptors[f->field_index];
}

/* Initialize and free a upb_msg.  Caller retains ownership of d, but the msg
 * will contain references to it, so it must outlive the msg.  Note that init
 * does not resolve upb_msg_field.ref -- the caller should do that
 * post-initialization by calling upb_msg_ref() below. */
bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d);
void upb_msg_free(struct upb_msg *m);

/* Clients use this function on a previously initialized upb_msg to resolve the
 * "ref" field in the upb_msg_field.  Since messages can refer to each other in
 * mutually-recursive ways, this step must be separated from initialization. */
void upb_msg_ref(struct upb_msg *m, struct upb_msg_field *f, union upb_symbol_ref ref);

/* While these are written to be as fast as possible, it will still be faster
 * to cache the results of this lookup if possible.  These return NULL if no
 * such field is found. */
INLINE struct upb_msg_field *upb_msg_fieldbynum(struct upb_msg *m,
                                                uint32_t number) {
  struct upb_fieldsbynum_entry *e = upb_inttable_lookup(
      &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry));
  return e ? &e->f : NULL;
}
INLINE struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m,
                                                 struct upb_string *name) {
  struct upb_fieldsbyname_entry *e =
      upb_strtable_lookup(&m->fields_by_name, name);
  return e ? &e->f : NULL;
}

/* Arrays. ********************************************************************/

/* Represents an array (a repeated field) of any type.  The interpretation of
 * the data in the array depends on the type. */
struct upb_array {
  union {
    double   *_double;
    float    *_float;
    int32_t  *int32;
    int64_t  *int64;
    uint32_t *uint32;
    uint64_t *uint64;
    bool     *_bool;
    struct upb_string **string;
    void     **submsg;
    void     *_void;
  } elements;
  uint32_t len;     /* Measured in elements. */
};

/* These are all overlays on upb_array, pointers between them can be cast. */
#define UPB_DEFINE_ARRAY_TYPE(name, type) \
  struct upb_ ## name ## _array { \
    type *elements; \
    uint32_t len; \
  };

INLINE union upb_value_ptr upb_array_getelementptr(
    struct upb_array *arr, uint32_t n, upb_field_type_t type)
{
  union upb_value_ptr ptr = {
    ._void = ((char*)arr->elements._void + n*upb_type_info[type].size)
  };
  return ptr;
}

UPB_DEFINE_ARRAY_TYPE(upb_double, double)
UPB_DEFINE_ARRAY_TYPE(upb_float,  float)
UPB_DEFINE_ARRAY_TYPE(upb_int32,  int32_t)
UPB_DEFINE_ARRAY_TYPE(upb_int64,  int64_t)
UPB_DEFINE_ARRAY_TYPE(upb_uint32, uint32_t)
UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t)
UPB_DEFINE_ARRAY_TYPE(upb_bool,   bool)
UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*)

#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array
#define UPB_DEFINE_MSG_ARRAY(msg_type) \
  UPB_MSG_ARRAY(msg_type) { \
    msg_type **elements; \
    uint32_t len; \
  };

/* Accessors for primitive types.  ********************************************/

/* For each primitive type we define a set of three functions:
 *
 *  // For fetching out of a msg (s points to the raw msg data).
 *  int32_t *upb_msg_get_int32_ptr(void *s, struct upb_msg_field *f);
 *  int32_t upb_msg_get_int32(void *s, struct upb_msg_field *f);
 *  void upb_msg_set_int32(void *s, struct upb_msg_field *f, int32_t val);
 *
 * These do no existence checks, bounds checks, or type checks. */

#define UPB_DEFINE_ACCESSORS(INLINE, name, ctype) \
  INLINE ctype *upb_msg_get_ ## name ## _ptr( \
      void *s, struct upb_msg_field *f) { \
    return (ctype*)((char*)s + f->byte_offset); \
  } \
  INLINE ctype upb_msg_get_ ## name( \
      void *s, struct upb_msg_field *f) { \
    return *upb_msg_get_ ## name ## _ptr(s, f); \
  } \
  INLINE void upb_msg_set_ ## name( \
      void *s, struct upb_msg_field *f, ctype val) { \
    *upb_msg_get_ ## name ## _ptr(s, f) = val; \
  }

UPB_DEFINE_ACCESSORS(INLINE, double, double)
UPB_DEFINE_ACCESSORS(INLINE, float,  float)
UPB_DEFINE_ACCESSORS(INLINE, int32,  int32_t)
UPB_DEFINE_ACCESSORS(INLINE, int64,  int64_t)
UPB_DEFINE_ACCESSORS(INLINE, uint32, uint32_t)
UPB_DEFINE_ACCESSORS(INLINE, uint64, uint64_t)
UPB_DEFINE_ACCESSORS(INLINE, bool,   bool)
UPB_DEFINE_ACCESSORS(INLINE, bytes,  struct upb_string*)
UPB_DEFINE_ACCESSORS(INLINE, string, struct upb_string*)
UPB_DEFINE_ACCESSORS(INLINE, submsg, void*)
UPB_DEFINE_ACCESSORS(INLINE, array,  struct upb_array*)

INLINE union upb_value_ptr upb_msg_get_ptr(
    void *data, struct upb_msg_field *f) {
  union upb_value_ptr p = {._void = ((char*)data + f->byte_offset)};
  return p;
}

/* Memory management  *********************************************************/

void *upb_msg_new(struct upb_msg *m);

struct upb_msg_parse_state {
  struct upb_parse_state s;
  bool merge;
  bool byref;
  struct upb_msg *m;
};

void upb_msg_parse_init(struct upb_msg_parse_state *s, void *msg,
                        struct upb_msg *m, bool merge, bool byref);
void upb_msg_parse_free(struct upb_msg_parse_state *s);
upb_status_t upb_msg_parse(struct upb_msg_parse_state *s,
                           void *data, size_t len, size_t *read);

void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *s, bool byref);

/* Note!  These two may not be use on a upb_string* that was initialized by
 * means other than these functions. */
void upb_msg_reuse_str(struct upb_string **str, uint32_t len);
void upb_msg_reuse_array(struct upb_array **arr, uint32_t n, upb_field_type_t t);
void upb_msg_reuse_strref(struct upb_string **str);
void upb_msg_reuse_submsg(void **msg, struct upb_msg *m);

/* "Set" flag reading and writing.  *******************************************/

INLINE size_t upb_isset_offset(uint32_t field_index) {
  return field_index / 8;
}

INLINE uint8_t upb_isset_mask(uint32_t field_index) {
  return 1 << (field_index % 8);
}

/* Functions for reading and writing the "set" flags in the msg.  Note that
 * these do not perform memory management associated with any dynamic memory
 * these fields may be referencing. These *only* set and test the flags. */
INLINE void upb_msg_set(void *s, struct upb_msg_field *f)
{
  ((char*)s)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index);
}

INLINE void upb_msg_unset(void *s, struct upb_msg_field *f)
{
  ((char*)s)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index);
}

INLINE bool upb_msg_is_set(void *s, struct upb_msg_field *f)
{
  return ((char*)s)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index);
}

INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m)
{
  int num_fields = m->num_required_fields;
  int i = 0;
  while(num_fields > 8) {
    if(((uint8_t*)s)[i++] != 0xFF) return false;
    num_fields -= 8;
  }
  if(((uint8_t*)s)[i] != (1 << num_fields) - 1) return false;
  return true;
}

INLINE void upb_msg_clear(void *s, struct upb_msg *m)
{
  memset(s, 0, m->set_flags_bytes);
}

/* Serialization/Deserialization.  ********************************************/

/* Parses the string data in s according to the message description in m. */
upb_status_t upb_msg_merge(void *data, struct upb_msg *m, struct upb_string *s);

#ifdef __cplusplus
}  /* extern "C" */
#endif

#endif  /* UPB_MSG_H_ */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback