summaryrefslogtreecommitdiff
path: root/src/upb_def.h
blob: 28cc2586a0563b9bd8c67ec4091ed869d167debc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
/*
 * upb - a minimalist implementation of protocol buffers.
 *
 * Copyright (c) 2009-2011 Joshua Haberman.  See LICENSE for details.
 *
 * Provides a mechanism for loading proto definitions from descriptors, and
 * data structures to represent those definitions.  These form the protobuf
 * schema, and are used extensively throughout upb:
 * - upb_msgdef: describes a "message" construct.
 * - upb_fielddef: describes a message field.
 * - upb_enumdef: describes an enum.
 * (TODO: definitions of extensions and services).
 *
 * Defs are obtained from a upb_symtab object.  A upb_symtab is empty when
 * constructed, and definitions can be added by supplying descriptors.
 *
 * Defs are immutable and reference-counted.  Symbol tables reference any defs
 * that are the "current" definitions.  If an extension is loaded that adds a
 * field to an existing message, a new msgdef is constructed that includes the
 * new field and the old msgdef is unref'd.  The old msgdef will still be ref'd
 * by messages (if any) that were constructed with that msgdef.
 *
 * This file contains routines for creating and manipulating the definitions
 * themselves.  To create and manipulate actual messages, see upb_msg.h.
 */

#ifndef UPB_DEF_H_
#define UPB_DEF_H_

#include "upb_atomic.h"
#include "upb_stream.h"
#include "upb_table.h"

#ifdef __cplusplus
extern "C" {
#endif

/* upb_def: base class for defs  **********************************************/

// All the different kind of defs we support.  These correspond 1:1 with
// declarations in a .proto file.
typedef enum {
  UPB_DEF_MSG = 0,
  UPB_DEF_ENUM,
  UPB_DEF_SVC,
  UPB_DEF_EXT,
  // Internal-only, placeholder for a def that hasn't be resolved yet.
  UPB_DEF_UNRESOLVED,

  // For specifying that defs of any type are requsted from getdefs.
  UPB_DEF_ANY = -1
} upb_deftype;

// This typedef is more space-efficient than declaring an enum var directly.
typedef int8_t upb_deftype_t;

typedef struct {
  upb_string *fqname;  // Fully qualified.
  upb_atomic_refcount_t refcount;
  upb_deftype_t type;

  // The is_cyclic flag could go in upb_msgdef instead of here, because only
  // messages can be involved in cycles.  However, putting them here is free
  // from a space perspective because structure alignment will otherwise leave
  // three bytes empty after type.  It is also makes ref and unref more
  // efficient, because we don't have to downcast to msgdef before checking the
  // is_cyclic flag.
  bool is_cyclic;
  uint16_t search_depth;  // Used during initialization dfs.
} upb_def;

// These must not be called directly!
void _upb_def_cyclic_ref(upb_def *def);
void _upb_def_reftozero(upb_def *def);

// Call to ref/deref a def.
INLINE void upb_def_ref(upb_def *def) {
  if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def);
}
INLINE void upb_def_unref(upb_def *def) {
  if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def);
}

/* upb_fielddef ***************************************************************/

// A upb_fielddef describes a single field in a message.  It isn't a full def
// in the sense that it derives from upb_def.  It cannot stand on its own; it
// is either a field of a upb_msgdef or contained inside a upb_extensiondef.
// It is also reference-counted.
typedef struct _upb_fielddef {
  upb_value default_value;

  upb_string *name;

  struct _upb_msgdef *msgdef;

  // For the case of an enum or a submessage, points to the def for that type.
  upb_def *def;

  upb_atomic_refcount_t refcount;
  uint32_t byte_offset;           // Where in a upb_msg to find the data.

  // These are set only when this fielddef is part of a msgdef.
  upb_field_number_t number;
  upb_field_count_t field_index;  // Indicates set bit.

  upb_fieldtype_t type;
  upb_label_t label;
  // True if we own a ref on "def" (above).  This is true unless this edge is
  // part of a cycle.
  bool owned;
  uint8_t set_bit_mask;
  uint16_t set_bit_offset;
} upb_fielddef;

// A variety of tests about the type of a field.
INLINE bool upb_issubmsg(upb_fielddef *f) {
  return f->type == UPB_TYPE(GROUP) || f->type == UPB_TYPE(MESSAGE);
}
INLINE bool upb_isstring(upb_fielddef *f) {
  return f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES);
}
INLINE bool upb_isarray(upb_fielddef *f) {
  return f->label == UPB_LABEL(REPEATED);
}
// Does the type of this field imply that it should contain an associated def?
INLINE bool upb_hasdef(upb_fielddef *f) {
  return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM);
}

INLINE upb_valuetype_t upb_field_valuetype(upb_fielddef *f) {
  if (upb_isarray(f)) {
    return UPB_VALUETYPE_ARRAY;
  } else {
    return f->type;
  }
}

INLINE upb_valuetype_t upb_elem_valuetype(upb_fielddef *f) {
  assert(upb_isarray(f));
  return f->type;
}

INLINE bool upb_field_ismm(upb_fielddef *f) {
  return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f);
}

INLINE bool upb_elem_ismm(upb_fielddef *f) {
  return upb_isstring(f) || upb_issubmsg(f);
}

/* upb_msgdef *****************************************************************/

// Structure that describes a single .proto message type.
typedef struct _upb_msgdef {
  upb_def base;
  upb_atomic_refcount_t cycle_refcount;
  uint32_t size;
  uint32_t set_flags_bytes;

  // Tables for looking up fields by number and name.
  upb_inttable itof;  // int to field
  upb_strtable ntof;  // name to field
} upb_msgdef;

// Hash table entries for looking up fields by name or number.
typedef struct {
  upb_inttable_entry e;
  upb_fielddef *f;
} upb_itof_ent;
typedef struct {
  upb_strtable_entry e;
  upb_fielddef *f;
} upb_ntof_ent;

// Looks up a field by name or number.  While these are written to be as fast
// as possible, it will still be faster to cache the results of this lookup if
// possible.  These return NULL if no such field is found.
INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t num) {
  upb_itof_ent *e =
      (upb_itof_ent*)upb_inttable_fastlookup(&m->itof, num, sizeof(*e));
  return e ? e->f : NULL;
}

INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) {
  upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
  return e ? e->f : NULL;
}

INLINE upb_field_count_t upb_msgdef_numfields(upb_msgdef *m) {
  return upb_strtable_count(&m->ntof);
}

// Iteration over fields.  The order is undefined.
//   upb_msg_iter i;
//   for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
//     upb_fielddef *f = upb_msg_iter_field(i);
//     // ...
//   }
typedef upb_itof_ent *upb_msg_iter;

upb_msg_iter upb_msg_begin(upb_msgdef *m);
upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter);
INLINE bool upb_msg_done(upb_msg_iter iter) { return iter == NULL; }

INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) {
  return iter->f;
}

/* upb_enumdef ****************************************************************/

typedef struct _upb_enumdef {
  upb_def base;
  upb_strtable ntoi;
  upb_inttable iton;
} upb_enumdef;

typedef struct {
  upb_strtable_entry e;
  uint32_t value;
} upb_ntoi_ent;

typedef struct {
  upb_inttable_entry e;
  upb_string *string;
} upb_iton_ent;

typedef int32_t upb_enumval_t;

// Lookups from name to integer and vice-versa.
bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num);
// Caller does not own a ref on the returned string.
upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num);

// Iteration over name/value pairs.  The order is undefined.
//   upb_enum_iter i;
//   for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
//     // ...
//   }
typedef upb_iton_ent *upb_enum_iter;

upb_enum_iter upb_enum_begin(upb_enumdef *e);
upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter);
INLINE bool upb_enum_done(upb_enum_iter iter) { return iter == NULL; }

INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) {
  return iter->string;
}
INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
  return iter->e.key;
}


/* upb_symtab *****************************************************************/

// A SymbolTable is where upb_defs live.  It is empty when first constructed.
// Clients add definitions to the symtab by supplying unserialized or
// serialized descriptors (as defined in descriptor.proto).
struct _upb_symtab {
  upb_atomic_refcount_t refcount;
  upb_rwlock_t lock;       // Protects all members except the refcount.
  upb_strtable symtab;     // The symbol table.
  upb_msgdef *fds_msgdef;  // Msgdef for google.protobuf.FileDescriptorSet.
};
typedef struct _upb_symtab upb_symtab;

// Initializes a upb_symtab.  Contexts are not freed explicitly, but unref'd
// when the caller is done with them.
upb_symtab *upb_symtab_new(void);
void _upb_symtab_free(upb_symtab *s);  // Must not be called directly!

INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); }
INLINE void upb_symtab_unref(upb_symtab *s) {
  if(upb_atomic_unref(&s->refcount)) _upb_symtab_free(s);
}

// Resolves the given symbol using the rules described in descriptor.proto,
// namely:
//
//    If the name starts with a '.', it is fully-qualified.  Otherwise, C++-like
//    scoping rules are used to find the type (i.e. first the nested types
//    within this message are searched, then within the parent, on up to the
//    root namespace).
//
// If a def is found, the caller owns one ref on the returned def.  Otherwise
// returns NULL.
upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym);

// Find an entry in the symbol table with this exact name.  If a def is found,
// the caller owns one ref on the returned def.  Otherwise returns NULL.
upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym);

// Gets an array of pointers to all currently active defs in this symtab.  The
// caller owns the returned array (which is of length *count) as well as a ref
// to each symbol inside.  If type is UPB_DEF_ANY then defs of all types are
// returned, otherwise only defs of the required type are returned.
upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type);

// "fds" is a upb_src that will yield data from the
// google.protobuf.FileDescriptorSet message type.  It is not necessary that
// the upb_def for FileDescriptorSet came from this symtab, but it must be
// compatible with the official descriptor.proto, as published by Google.
//
// upb_symtab_addfds() adds all the definitions from the given
// FileDescriptorSet and adds them to the symtab.  status indicates whether the
// operation was successful or not, and the error message (if any).
//
// TODO: should this allow redefinition?  Either is possible, but which is
// more useful?  Maybe it should be an option.
void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status);

// Adds defs for google.protobuf.FileDescriptorSet and friends to this symtab.
// This is necessary for bootstrapping, since these are the upb_defs that
// specify other defs and allow them to be loaded.
void upb_symtab_add_descriptorproto(upb_symtab *s);

// Returns the upb_msgdef for google.protobuf.FileDescriptorSet, which the
// caller owns a ref on.  This is a convenience method that is equivalent to
// looking up the symbol called "google.protobuf.FileDescriptorSet" yourself,
// except that it only will return a def that was added by
// upb_symtab_add_descriptorproto().
upb_msgdef *upb_symtab_fds_def(upb_symtab *s);


/* upb_def casts **************************************************************/

// Dynamic casts, for determining if a def is of a particular type at runtime.
#define UPB_DYNAMIC_CAST_DEF(lower, upper) \
  struct _upb_ ## lower;  /* Forward-declare. */ \
  INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \
    if(def->type != UPB_DEF_ ## upper) return NULL; \
    return (struct _upb_ ## lower*)def; \
  }
UPB_DYNAMIC_CAST_DEF(msgdef, MSG);
UPB_DYNAMIC_CAST_DEF(enumdef, ENUM);
UPB_DYNAMIC_CAST_DEF(svcdef, SVC);
UPB_DYNAMIC_CAST_DEF(extdef, EXT);
UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DYNAMIC_CAST_DEF

// Downcasts, for when some wants to assert that a def is of a particular type.
// These are only checked if we are building debug.
#define UPB_DOWNCAST_DEF(lower, upper) \
  struct _upb_ ## lower;  /* Forward-declare. */ \
  INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \
    assert(def->type == UPB_DEF_ ## upper); \
    return (struct _upb_ ## lower*)def; \
  }
UPB_DOWNCAST_DEF(msgdef, MSG);
UPB_DOWNCAST_DEF(enumdef, ENUM);
UPB_DOWNCAST_DEF(svcdef, SVC);
UPB_DOWNCAST_DEF(extdef, EXT);
UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DOWNCAST_DEF

#define UPB_UPCAST(ptr) (&(ptr)->base)

#ifdef __cplusplus
}  /* extern "C" */
#endif

#endif  /* UPB_DEF_H_ */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback