summaryrefslogtreecommitdiff
path: root/upb_msg.h
blob: 407daa1af61bec885c1676536e6ee29a6880c7eb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
/*
 * upb - a minimalist implementation of protocol buffers.
 *
 * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
 *
 * upb_msg contains a full description of a message as defined in a .proto file.
 * This allows for run-time reflection over .proto types, but also defines an
 * in-memory byte-level format for storing protobufs.
 *
 * The in-memory format is very much like a C struct that you can define at
 * run-time, but also supports reflection.  Like C structs it supports
 * offset-based access, as opposed to the much slower name-based lookup.  The
 * format represents both the values themselves and bits describing whether each
 * field is set or not.
 *
 * The upb compiler emits C structs that mimic this definition exactly, so that
 * you can access the same hunk of memory using either this run-time
 * reflection-supporting interface or a C struct that was generated by the upb
 * compiler.
 *
 * Like C structs the format depends on the endianness of the host machine, so
 * it is not suitable for exchanging across machines of differing endianness.
 * But there is no reason to do that -- the protobuf serialization format is
 * designed already for serialization/deserialization, and is more compact than
 * this format.  This format is designed to allow the fastest possible random
 * access of individual fields.
 *
 * Note that no memory management is defined, which should make it easier to
 * integrate this format with existing memory-management schemes.  Any memory
 * management semantics can be used with the format as defined here.
 */

#ifndef PBSTRUCT_H_
#define PBSTRUCT_H_

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>

#include "upb.h"
#include "upb_table.h"

#ifdef __cplusplus
extern "C" {
#endif

/* Structure definition. ******************************************************/

struct upb_msg_field {
  uint32_t byte_offset;     /* Where to find the data. */
  uint32_t field_index:24;  /* Indexes upb_msg.fields. Also indicates set bit */
  upb_field_type_t type;    /* Copied from descriptor for cache-friendliness. */
  union {
    struct upb_msg *msg;    /* Set if type == MESSAGE */
    struct upb_enum *_enum; /* Set if type == ENUM */
  } ref;
};

struct upb_fieldsbynum_entry {
  struct upb_inttable_entry e;
  struct upb_msg_field f;
};

struct upb_fieldsbyname_entry {
  struct upb_strtable_entry e;
  struct upb_msg_field f;
};

struct upb_msg {
  struct google_protobuf_DescriptorProto *descriptor;
  size_t size;
  int num_fields;
  int set_flags_bytes;
  int num_required_fields;  /* Required fields have the lowest set bytemasks. */
  struct upb_inttable fields_by_num;
  struct upb_strtable fields_by_name;
  struct upb_msg_field *fields;
};

/* Initialize and free a upb_msg.  Caller retains ownership of d, but the msg
 * will contain references to it, so it must outlive the msg.  Note that init
 * does not resolve upb_msg_field.ref -- that is left to the caller. */
bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d);
void upb_msg_free(struct upb_msg *m);

/* While these are written to be as fast as possible, it will still be faster
 * to cache the results of this lookup if possible.  These return NULL if no
 * such field is found. */
INLINE struct upb_msg_field *upb_msg_fieldbynum(struct upb_msg *m,
                                                uint32_t number) {
  struct upb_fieldsbynum_entry *e = upb_inttable_lookup(
      &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry));
  return e ? &e->f : NULL;
}
INLINE struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m,
                                                 struct upb_string *name) {
  struct upb_fieldsbyname_entry *e =
      upb_strtable_lookup(&m->fields_by_name, name);
  return e ? &e->f : NULL;
}

/* Variable-length data (strings and arrays).**********************************/

/* Represents an array (a repeated field) of any type.  The interpretation of
 * the data in the array depends on the type. */
struct upb_array {
  void *data;  /* Size of individual elements is based on type. */
  uint32_t len;     /* Measured in elements. */
};

/* A generic array of structs, using void* instead of specific types. */
struct upb_msg_array {
  void **elements;
  uint32_t len;
};

/* An array of strings. */
struct upb_string_array {
  struct upb_string **elements;
  uint32_t len;
};

/* Specific arrays of all the primitive types. */
#define UPB_DEFINE_PRIMITIVE_ARRAY(type, name) \
  struct upb_ ## name ## _array { \
    size_t len; \
    type *elements; \
  };

UPB_DEFINE_PRIMITIVE_ARRAY(double,   double)
UPB_DEFINE_PRIMITIVE_ARRAY(float,    float)
UPB_DEFINE_PRIMITIVE_ARRAY(int32_t,  int32)
UPB_DEFINE_PRIMITIVE_ARRAY(int64_t,  int64)
UPB_DEFINE_PRIMITIVE_ARRAY(uint32_t, uint32)
UPB_DEFINE_PRIMITIVE_ARRAY(uint64_t, uint64)
UPB_DEFINE_PRIMITIVE_ARRAY(bool,     bool)
#undef UPB_DEFINE_PRMITIVE_ARRAY

#define UPB_STRUCT_ARRAY(struct_type) struct struct_type ## _array

#define UPB_DEFINE_STRUCT_ARRAY(struct_type) \
  UPB_STRUCT_ARRAY(struct_type) { \
    size_t len; \
    struct_type **elements; \
  };

/* Accessors for primitive types.  ********************************************/

/* For each primitive type we define a set of six functions:
 *
 *  // For fetching out of a struct (s points to the raw struct data).
 *  int32_t *upb_msg_get_int32_ptr(void *s, struct upb_msg_field *f);
 *  int32_t upb_msg_get_int32(void *s, struct upb_msg_field *f);
 *  void upb_msg_set_int32(void *s, struct upb_msg_field *f, int32_t val);
 *
 *  // For fetching out of an array.
 *  int32_t *upb_array_get_int32_ptr(struct upb_array *a, int n);
 *  int32_t upb_array_get_int32(struct upb_array *a, int n);
 *  void upb_array_set_int32(struct upb_array *a, int n, ctype val);
 *
 * For arrays we provide only the first three because protobufs do not support
 * arrays of arrays.
 *
 * These do no existence checks, bounds checks, or type checks. */

#define UPB_DEFINE_ACCESSORS(ctype, name, INLINE) \
  INLINE ctype *upb_msg_get_ ## name ## _ptr( \
      void *s, struct upb_msg_field *f) { \
    return (ctype*)((char*)s + f->byte_offset); \
  } \
  INLINE ctype upb_msg_get_ ## name( \
      void *s, struct upb_msg_field *f) { \
    return *upb_msg_get_ ## name ## _ptr(s, f); \
  } \
  INLINE void upb_msg_set_ ## name( \
      void *s, struct upb_msg_field *f, ctype val) { \
    *upb_msg_get_ ## name ## _ptr(s, f) = val; \
  }

#define UPB_DEFINE_ARRAY_ACCESSORS(ctype, name, INLINE) \
  INLINE ctype *upb_array_get_ ## name ## _ptr(struct upb_array *a, int n) { \
    return ((ctype*)a->data) + n; \
  } \
  INLINE ctype upb_array_get_ ## name(struct upb_array *a, int n) { \
    return *upb_array_get_ ## name ## _ptr(a, n); \
  } \
  INLINE void upb_array_set_ ## name(struct upb_array *a, int n, ctype val) { \
    *upb_array_get_ ## name ## _ptr(a, n) = val; \
  }

#define UPB_DEFINE_ALL_ACCESSORS(ctype, name, INLINE) \
  UPB_DEFINE_ACCESSORS(ctype, name, INLINE) \
  UPB_DEFINE_ARRAY_ACCESSORS(ctype, name, INLINE)

UPB_DEFINE_ALL_ACCESSORS(double,   double, INLINE)
UPB_DEFINE_ALL_ACCESSORS(float,    float,  INLINE)
UPB_DEFINE_ALL_ACCESSORS(int32_t,  int32,  INLINE)
UPB_DEFINE_ALL_ACCESSORS(int64_t,  int64,  INLINE)
UPB_DEFINE_ALL_ACCESSORS(uint32_t, uint32, INLINE)
UPB_DEFINE_ALL_ACCESSORS(uint64_t, uint64, INLINE)
UPB_DEFINE_ALL_ACCESSORS(bool,     bool,   INLINE)
UPB_DEFINE_ALL_ACCESSORS(struct upb_string*, bytes, INLINE)
UPB_DEFINE_ALL_ACCESSORS(struct upb_string*, string, INLINE)
UPB_DEFINE_ALL_ACCESSORS(void*, substruct, INLINE)
UPB_DEFINE_ACCESSORS(struct upb_array*, array, INLINE)

INLINE size_t upb_isset_offset(uint32_t field_index) {
  return field_index / 8;
}

INLINE uint8_t upb_isset_mask(uint32_t field_index) {
  return 1 << (field_index % 8);
}

/* Functions for reading and writing the "set" flags in the pbstruct.  Note
 * that these do not perform any memory management associated with any dynamic
 * memory these fields may be referencing; that is the client's responsibility.
 * These *only* set and test the flags. */
INLINE void upb_msg_set(void *s, struct upb_msg_field *f)
{
  ((char*)s)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index);
}

INLINE void upb_msg_unset(void *s, struct upb_msg_field *f)
{
  ((char*)s)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index);
}

INLINE bool upb_msg_is_set(void *s, struct upb_msg_field *f)
{
  return ((char*)s)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index);
}

INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m)
{
  int num_fields = m->num_required_fields;
  int i = 0;
  while(num_fields > 8) {
    if(((uint8_t*)s)[i++] != 0xFF) return false;
    num_fields -= 8;
  }
  if(((uint8_t*)s)[i] != (1 << num_fields) - 1) return false;
  return true;
}

INLINE void upb_msg_clear(void *s, struct upb_msg *m)
{
  memset(s, 0, m->set_flags_bytes);
}

#ifdef __cplusplus
}  /* extern "C" */
#endif

#endif  /* PBSTRUCT_H_ */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback