summaryrefslogtreecommitdiff
path: root/src/upb_def.h
blob: 9950c86326424b12ce20960974f5b52218581490 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
/*
 * upb - a minimalist implementation of protocol buffers.
 *
 * Copyright (c) 2009-2011 Joshua Haberman.  See LICENSE for details.
 *
 * Provides a mechanism for loading proto definitions from descriptors, and
 * data structures to represent those definitions.  These form the protobuf
 * schema, and are used extensively throughout upb:
 * - upb_msgdef: describes a "message" construct.
 * - upb_fielddef: describes a message field.
 * - upb_enumdef: describes an enum.
 * (TODO: definitions of extensions and services).
 *
 * Defs are obtained from a upb_symtab object.  A upb_symtab is empty when
 * constructed, and definitions can be added by supplying descriptors.
 *
 * Defs are immutable and reference-counted.  Symbol tables reference any defs
 * that are the "current" definitions.  If an extension is loaded that adds a
 * field to an existing message, a new msgdef is constructed that includes the
 * new field and the old msgdef is unref'd.  The old msgdef will still be ref'd
 * by messages (if any) that were constructed with that msgdef.
 *
 * This file contains routines for creating and manipulating the definitions
 * themselves.  To create and manipulate actual messages, see upb_msg.h.
 */

#ifndef UPB_DEF_H_
#define UPB_DEF_H_

#include "upb_atomic.h"
#include "upb_table.h"

#ifdef __cplusplus
extern "C" {
#endif

/* upb_def: base class for defs  **********************************************/

// All the different kind of defs we support.  These correspond 1:1 with
// declarations in a .proto file.
typedef enum {
  UPB_DEF_MSG = 0,
  UPB_DEF_ENUM,
  UPB_DEF_SVC,
  UPB_DEF_EXT,
  // Internal-only, placeholder for a def that hasn't be resolved yet.
  UPB_DEF_UNRESOLVED,

  // For specifying that defs of any type are requsted from getdefs.
  UPB_DEF_ANY = -1
} upb_deftype;

// This typedef is more space-efficient than declaring an enum var directly.
typedef int8_t upb_deftype_t;

typedef struct {
  upb_string *fqname;  // Fully qualified.
  upb_atomic_refcount_t refcount;
  upb_deftype_t type;

  // The is_cyclic flag could go in upb_msgdef instead of here, because only
  // messages can be involved in cycles.  However, putting them here is free
  // from a space perspective because structure alignment will otherwise leave
  // three bytes empty after type.  It is also makes ref and unref more
  // efficient, because we don't have to downcast to msgdef before checking the
  // is_cyclic flag.
  bool is_cyclic;
  uint16_t search_depth;  // Used during initialization dfs.
} upb_def;

// These must not be called directly!
void _upb_def_cyclic_ref(upb_def *def);
void _upb_def_reftozero(upb_def *def);

// Call to ref/deref a def.
INLINE void upb_def_ref(upb_def *def) {
  if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def);
}
INLINE void upb_def_unref(upb_def *def) {
  if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def);
}

#define UPB_UPCAST(ptr) (&(ptr)->base)


/* upb_fielddef ***************************************************************/

// A upb_fielddef describes a single field in a message.  It isn't a full def
// in the sense that it derives from upb_def.  It cannot stand on its own; it
// is either a field of a upb_msgdef or contained inside a upb_extensiondef.
// It is also reference-counted.
struct _upb_fielddef {
  uint8_t type;
  uint8_t label;
  // True if we own a ref on "def" (above).  This is true unless this edge is
  // part of a cycle.
  bool owned;
  uint8_t set_bit_mask;

  int32_t number;
  int16_t field_index;  // Indicates set bit.

  uint16_t set_bit_offset;
  uint32_t byte_offset;           // Where in a upb_msg to find the data.

  upb_value default_value;
  upb_string *name;
  struct _upb_msgdef *msgdef;

  // For the case of an enum or a submessage, points to the def for that type.
  upb_def *def;
  upb_atomic_refcount_t refcount;
};

// A variety of tests about the type of a field.
INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
  return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
}
INLINE bool upb_issubmsg(upb_fielddef *f) {
  return upb_issubmsgtype(f->type);
}
INLINE bool upb_isstring(upb_fielddef *f) {
  return f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES);
}
INLINE bool upb_isarray(upb_fielddef *f) {
  return f->label == UPB_LABEL(REPEATED);
}
// Does the type of this field imply that it should contain an associated def?
INLINE bool upb_hasdef(upb_fielddef *f) {
  return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM);
}

INLINE upb_valuetype_t upb_field_valuetype(upb_fielddef *f) {
  if (upb_isarray(f)) {
    return UPB_VALUETYPE_ARRAY;
  } else {
    return f->type;
  }
}

INLINE upb_valuetype_t upb_elem_valuetype(upb_fielddef *f) {
  assert(upb_isarray(f));
  return f->type;
}

INLINE bool upb_field_ismm(upb_fielddef *f) {
  return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f);
}

INLINE bool upb_elem_ismm(upb_fielddef *f) {
  return upb_isstring(f) || upb_issubmsg(f);
}

/* upb_msgdef *****************************************************************/

// Structure that describes a single .proto message type.
typedef struct _upb_msgdef {
  upb_def base;
  upb_atomic_refcount_t cycle_refcount;
  uint32_t size;
  uint32_t set_flags_bytes;

  // Tables for looking up fields by number and name.
  upb_inttable itof;  // int to field
  upb_strtable ntof;  // name to field

  // Immutable msg instance that has all default values set.
  // TODO: need a way of making this immutable!
  struct _upb_msg *default_message;
} upb_msgdef;

// Hash table entries for looking up fields by name or number.
typedef struct {
  bool junk;
  upb_fieldtype_t field_type;
  upb_wire_type_t native_wire_type;
  upb_fielddef *f;
} upb_itof_ent;
typedef struct {
  upb_strtable_entry e;
  upb_fielddef *f;
} upb_ntof_ent;

INLINE void upb_msgdef_unref(upb_msgdef *md) {
  upb_def_unref(UPB_UPCAST(md));
}
INLINE void upb_msgdef_ref(upb_msgdef *md) {
  upb_def_ref(UPB_UPCAST(md));
}

// Looks up a field by name or number.  While these are written to be as fast
// as possible, it will still be faster to cache the results of this lookup if
// possible.  These return NULL if no such field is found.
INLINE upb_itof_ent *upb_msgdef_itofent(upb_msgdef *m, uint32_t num) {
  return (upb_itof_ent*)upb_inttable_fastlookup(
      &m->itof, num, sizeof(upb_itof_ent));
}

INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t num) {
  upb_itof_ent *e = upb_msgdef_itofent(m, num);
  return e ? e->f : NULL;
}

INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) {
  upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
  return e ? e->f : NULL;
}

INLINE upb_field_count_t upb_msgdef_numfields(upb_msgdef *m) {
  return upb_strtable_count(&m->ntof);
}

// Iteration over fields.  The order is undefined.
//   upb_msg_iter i;
//   for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
//     upb_fielddef *f = upb_msg_iter_field(i);
//     // ...
//   }
typedef upb_inttable_iter upb_msg_iter;

upb_msg_iter upb_msg_begin(upb_msgdef *m);
upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter);
INLINE bool upb_msg_done(upb_msg_iter iter) { return upb_inttable_done(iter); }

INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) {
  upb_itof_ent *ent = (upb_itof_ent*)upb_inttable_iter_value(iter);
  return ent->f;
}


/* upb_enumdef ****************************************************************/

typedef int32_t upb_enumval_t;

typedef struct _upb_enumdef {
  upb_def base;
  upb_strtable ntoi;
  upb_inttable iton;
  upb_enumval_t default_value;  // The first value listed in the enum.
} upb_enumdef;

typedef struct {
  upb_strtable_entry e;
  uint32_t value;
} upb_ntoi_ent;

typedef struct {
  bool junk;
  upb_string *string;
} upb_iton_ent;

// Lookups from name to integer and vice-versa.
bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num);
// Caller does not own a ref on the returned string.
upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num);

// Iteration over name/value pairs.  The order is undefined.
//   upb_enum_iter i;
//   for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
//     // ...
//   }
typedef upb_inttable_iter upb_enum_iter;

upb_enum_iter upb_enum_begin(upb_enumdef *e);
upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter);
INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); }

INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) {
  upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter);
  return e->string;
}
INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
  return upb_inttable_iter_key(iter);
}


/* upb_symtab *****************************************************************/

// A SymbolTable is where upb_defs live.  It is empty when first constructed.
// Clients add definitions to the symtab by supplying descriptors (as defined
// in descriptor.proto) via the upb_stream interface.
struct _upb_symtab {
  upb_atomic_refcount_t refcount;
  upb_rwlock_t lock;       // Protects all members except the refcount.
  upb_strtable symtab;     // The symbol table.
  upb_msgdef *fds_msgdef;  // Msgdef for google.protobuf.FileDescriptorSet.
};
typedef struct _upb_symtab upb_symtab;

// Initializes a upb_symtab.  Symtabs are not freed explicitly, but unref'd
// when the caller is done with them.
upb_symtab *upb_symtab_new(void);
void _upb_symtab_free(upb_symtab *s);  // Must not be called directly!

INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); }
INLINE void upb_symtab_unref(upb_symtab *s) {
  if(s && upb_atomic_unref(&s->refcount)) _upb_symtab_free(s);
}

// Resolves the given symbol using the rules described in descriptor.proto,
// namely:
//
//    If the name starts with a '.', it is fully-qualified.  Otherwise, C++-like
//    scoping rules are used to find the type (i.e. first the nested types
//    within this message are searched, then within the parent, on up to the
//    root namespace).
//
// If a def is found, the caller owns one ref on the returned def.  Otherwise
// returns NULL.
upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym);

// Find an entry in the symbol table with this exact name.  If a def is found,
// the caller owns one ref on the returned def.  Otherwise returns NULL.
upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym);

// Gets an array of pointers to all currently active defs in this symtab.  The
// caller owns the returned array (which is of length *count) as well as a ref
// to each symbol inside.  If type is UPB_DEF_ANY then defs of all types are
// returned, otherwise only defs of the required type are returned.
upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type);

// upb_defbuilder: For adding defs to the symtab.
// You allocate the defbuilder, which can handle a single descriptor.
// It will be freed automatically when the parse completes.
struct _upb_defbuilder;
typedef struct _upb_defbuilder upb_defbuilder;
struct _upb_handlers;

// Allocates a new defbuilder that will add defs to the given symtab.
upb_defbuilder *upb_defbuilder_new(upb_symtab *s);

// Registers handlers that will operate on a defbuilder to add the defs
// to the defbuilder's symtab.  Will free itself when the parse finishes.
//
// TODO: should this allow redefinition?  Either is possible, but which is
// more useful?  Maybe it should be an option.
void upb_defbuilder_reghandlers(struct _upb_handlers *h);


/* upb_def casts **************************************************************/

// Dynamic casts, for determining if a def is of a particular type at runtime.
#define UPB_DYNAMIC_CAST_DEF(lower, upper) \
  struct _upb_ ## lower;  /* Forward-declare. */ \
  INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \
    if(def->type != UPB_DEF_ ## upper) return NULL; \
    return (struct _upb_ ## lower*)def; \
  }
UPB_DYNAMIC_CAST_DEF(msgdef, MSG);
UPB_DYNAMIC_CAST_DEF(enumdef, ENUM);
UPB_DYNAMIC_CAST_DEF(svcdef, SVC);
UPB_DYNAMIC_CAST_DEF(extdef, EXT);
UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DYNAMIC_CAST_DEF

// Downcasts, for when some wants to assert that a def is of a particular type.
// These are only checked if we are building debug.
#define UPB_DOWNCAST_DEF(lower, upper) \
  struct _upb_ ## lower;  /* Forward-declare. */ \
  INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \
    assert(def->type == UPB_DEF_ ## upper); \
    return (struct _upb_ ## lower*)def; \
  }
UPB_DOWNCAST_DEF(msgdef, MSG);
UPB_DOWNCAST_DEF(enumdef, ENUM);
UPB_DOWNCAST_DEF(svcdef, SVC);
UPB_DOWNCAST_DEF(extdef, EXT);
UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DOWNCAST_DEF

#ifdef __cplusplus
}  /* extern "C" */
#endif

#endif  /* UPB_DEF_H_ */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback