summaryrefslogtreecommitdiff
path: root/upb/def.h
blob: 4dc9c16a2a6f895d68383d9c5a2f6b16cc37078e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
/*
 * upb - a minimalist implementation of protocol buffers.
 *
 * Copyright (c) 2009-2011 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 *
 * Provides a mechanism for creating and linking proto definitions.
 * These form the protobuf schema, and are used extensively throughout upb:
 * - upb_msgdef: describes a "message" construct.
 * - upb_fielddef: describes a message field.
 * - upb_enumdef: describes an enum.
 * (TODO: definitions of services).
 *
 * These defs are mutable (and not thread-safe) when first created.
 * Once they are added to a defbuilder (and later its symtab) they become
 * immutable.
 *
 * TODO: consider making thread-safe even when first created by using mutexes
 * internally.  Would also have to change any methods returning pointers to
 * return copies instead.
 */

#ifndef UPB_DEF_H_
#define UPB_DEF_H_

#include "upb/atomic.h"
#include "upb/table.h"

#ifdef __cplusplus
extern "C" {
#endif

struct _upb_symtab;
typedef struct _upb_symtab upb_symtab;

// All the different kind of defs we support.  These correspond 1:1 with
// declarations in a .proto file.
typedef enum {
  UPB_DEF_MSG = 1,
  UPB_DEF_ENUM,
  UPB_DEF_SERVICE,          // Not yet implemented.

  UPB_DEF_ANY = -1,         // Wildcard for upb_symtab_get*()
  UPB_DEF_UNRESOLVED = 99,  // Internal-only.
} upb_deftype_t;


/* upb_def: base class for defs  **********************************************/

typedef struct {
  char *fqname;     // Fully qualified.
  upb_symtab *symtab;     // Def is mutable iff symtab == NULL.
  upb_atomic_t refcount;  // Owns a ref on symtab iff (symtab && refcount > 0).
  upb_deftype_t type;
} upb_def;

// Call to ref/unref a def.  Can be used at any time, but is not thread-safe
// until the def is in a symtab.  While a def is in a symtab, everything
// reachable from that def (the symtab and all defs in the symtab) are
// guaranteed to be alive.
void upb_def_ref(upb_def *def);
void upb_def_unref(upb_def *def);
upb_def *upb_def_dup(upb_def *def);

// A def is mutable until it has been added to a symtab.
bool upb_def_ismutable(upb_def *def);
INLINE const char *upb_def_fqname(upb_def *def) { return def->fqname; }
bool upb_def_setfqname(upb_def *def, const char *fqname);  // Only if mutable.

#define UPB_UPCAST(ptr) (&(ptr)->base)


/* upb_fielddef ***************************************************************/

// A upb_fielddef describes a single field in a message.  It isn't a full def
// in the sense that it derives from upb_def.  It cannot stand on its own; it
// must be part of a upb_msgdef.  It is also reference-counted.
typedef struct _upb_fielddef {
  struct _upb_msgdef *msgdef;
  upb_def *def;  // if upb_hasdef(f)
  upb_atomic_t refcount;
  bool finalized;

  // The following fields may be modified until the def is finalized.
  uint8_t type;          // Use UPB_TYPE() constants.
  uint8_t label;         // Use UPB_LABEL() constants.
  int16_t hasbit;
  uint16_t offset;
  bool hasdefault;
  bool active;
  int32_t number;
  char *name;
  upb_value defaultval;  // Only meaningful for non-repeated scalars and strings.
  upb_value fval;
  struct _upb_accessor_vtbl *accessor;
  const void *default_ptr;
  const void *prototype;
} upb_fielddef;

upb_fielddef *upb_fielddef_new(void);
void upb_fielddef_ref(upb_fielddef *f);
void upb_fielddef_unref(upb_fielddef *f);
upb_fielddef *upb_fielddef_dup(upb_fielddef *f);

// A fielddef is mutable until its msgdef has been added to a symtab.
bool upb_fielddef_ismutable(upb_fielddef *f);

// Read accessors.  May be called any time.
INLINE uint8_t upb_fielddef_type(upb_fielddef *f) { return f->type; }
INLINE uint8_t upb_fielddef_label(upb_fielddef *f) { return f->label; }
INLINE int32_t upb_fielddef_number(upb_fielddef *f) { return f->number; }
INLINE char *upb_fielddef_name(upb_fielddef *f) { return f->name; }
INLINE upb_value upb_fielddef_default(upb_fielddef *f) { return f->defaultval; }
INLINE upb_value upb_fielddef_fval(upb_fielddef *f) { return f->fval; }
INLINE bool upb_fielddef_finalized(upb_fielddef *f) { return f->finalized; }
INLINE struct _upb_msgdef *upb_fielddef_msgdef(upb_fielddef *f) {
  return f->msgdef;
}
INLINE struct _upb_accessor_vtbl *upb_fielddef_accessor(upb_fielddef *f) {
  return f->accessor;
}
INLINE const char *upb_fielddef_typename(upb_fielddef *f) {
  return f->def ? f->def->fqname : NULL;
}

// The enum or submessage def for this field, if any.  Only meaningful for
// submessage, group, and enum fields (ie. when upb_hassubdef(f) is true).
// Since defs are not linked together until they are in a symtab, this
// will return NULL until the msgdef is in a symtab.
upb_def *upb_fielddef_subdef(upb_fielddef *f);

// Write accessors.  "Number" and "name" must be set before the fielddef is
// added to a msgdef.  For the moment we do not allow these to be set once
// the fielddef is added to a msgdef -- this could be relaxed in the future.
bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number);
bool upb_fielddef_setname(upb_fielddef *f, const char *name);

// These writers may be called at any time prior to being put in a symtab.
bool upb_fielddef_settype(upb_fielddef *f, uint8_t type);
bool upb_fielddef_setlabel(upb_fielddef *f, uint8_t label);
void upb_fielddef_setdefault(upb_fielddef *f, upb_value value);
void upb_fielddef_setfval(upb_fielddef *f, upb_value fval);
void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl);
// The name of the message or enum this field is referring to.  Must be found
// at name resolution time (when upb_symtab_add() is called).
bool upb_fielddef_settypename(upb_fielddef *f, const char *name);

// A variety of tests about the type of a field.
INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
  return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
}
INLINE bool upb_isstringtype(upb_fieldtype_t type) {
  return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
}
INLINE bool upb_isprimitivetype(upb_fieldtype_t type) {
  return !upb_issubmsgtype(type) && !upb_isstringtype(type);
}
INLINE bool upb_issubmsg(upb_fielddef *f) { return upb_issubmsgtype(f->type); }
INLINE bool upb_isstring(upb_fielddef *f) { return upb_isstringtype(f->type); }
INLINE bool upb_isseq(upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); }

// Does the type of this field imply that it should contain an associated def?
INLINE bool upb_hassubdef(upb_fielddef *f) {
  return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM);
}


/* upb_msgdef *****************************************************************/

// Structure that describes a single .proto message type.
typedef struct _upb_msgdef {
  upb_def base;

  // Tables for looking up fields by number and name.
  upb_inttable itof;  // int to field
  upb_strtable ntof;  // name to field

  // The following fields may be modified until finalized.
  uint16_t size;
  uint8_t hasbit_bytes;
  // The range of tag numbers used to store extensions.
  uint32_t extstart, extend;
} upb_msgdef;

// Hash table entries for looking up fields by name or number.
typedef struct {
  bool junk;
  upb_fielddef *f;
} upb_itof_ent;
typedef struct {
  upb_fielddef *f;
} upb_ntof_ent;

upb_msgdef *upb_msgdef_new(void);
INLINE void upb_msgdef_unref(upb_msgdef *md) { upb_def_unref(UPB_UPCAST(md)); }
INLINE void upb_msgdef_ref(upb_msgdef *md) { upb_def_ref(UPB_UPCAST(md)); }

// Returns a new msgdef that is a copy of the given msgdef (and a copy of all
// the fields) but with any references to submessages broken and replaced with
// just the name of the submessage.  This can be put back into another symtab
// and the names will be re-resolved in the new context.
upb_msgdef *upb_msgdef_dup(upb_msgdef *m);

// Read accessors.  May be called at any time.
INLINE uint16_t upb_msgdef_size(upb_msgdef *m) { return m->size; }
INLINE uint8_t upb_msgdef_hasbit_bytes(upb_msgdef *m) {
  return m->hasbit_bytes;
}
INLINE uint32_t upb_msgdef_extstart(upb_msgdef *m) { return m->extstart; }
INLINE uint32_t upb_msgdef_extend(upb_msgdef *m) { return m->extend; }

// Write accessors.  May only be called before the msgdef is in a symtab.
void upb_msgdef_setsize(upb_msgdef *m, uint16_t size);
void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes);
bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end);

// Adds a fielddef to a msgdef.  Caller retains its ref on the fielddef.  May
// only be done before the msgdef is in a symtab (requires upb_def_ismutable(m)
// for the msgdef).  The fielddef's name and number must be set, and the
// message may not already contain any field with this name or number, and this
// fielddef may not be part of another message, otherwise false is returned and
// no action is performed.
bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef **f, int n);
INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) {
  return upb_msgdef_addfields(m, &f, 1);
}

// Sets the layout of all fields according to default rules:
// 1. Hasbits for required fields come first, then optional fields.
// 2. Values are laid out in a way that respects alignment rules.
// 3. The order is chosen to minimize memory usage.
// This should only be called once all fielddefs have been added.
// TODO: will likely want the ability to exclude strings/submessages/arrays.
// TODO: will likely want the ability to define a header size.
void upb_msgdef_layout(upb_msgdef *m);

// Looks up a field by name or number.  While these are written to be as fast
// as possible, it will still be faster to cache the results of this lookup if
// possible.  These return NULL if no such field is found.
INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t i) {
  upb_itof_ent *e = (upb_itof_ent*)
      upb_inttable_fastlookup(&m->itof, i, sizeof(upb_itof_ent));
  return e ? e->f : NULL;
}

INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, const char *name) {
  upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
  return e ? e->f : NULL;
}

INLINE int upb_msgdef_numfields(upb_msgdef *m) {
  return upb_strtable_count(&m->ntof);
}

// Iteration over fields.  The order is undefined.
// TODO: the iteration should be in field order.
// Iterators are invalidated when a field is added or removed.
//   upb_msg_iter i;
//   for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
//     upb_fielddef *f = upb_msg_iter_field(i);
//     // ...
//   }
typedef upb_inttable_iter upb_msg_iter;

upb_msg_iter upb_msg_begin(upb_msgdef *m);
upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter);
INLINE bool upb_msg_done(upb_msg_iter iter) { return upb_inttable_done(iter); }

// Iterator accessor.
INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) {
  upb_itof_ent *ent = (upb_itof_ent*)upb_inttable_iter_value(iter);
  return ent->f;
}


/* upb_enumdef ****************************************************************/

typedef struct _upb_enumdef {
  upb_def base;
  upb_strtable ntoi;
  upb_inttable iton;
  int32_t defaultval;
} upb_enumdef;

typedef struct {
  uint32_t value;
} upb_ntoi_ent;

typedef struct {
  bool junk;
  char *str;
} upb_iton_ent;

upb_enumdef *upb_enumdef_new(void);
INLINE void upb_enumdef_ref(upb_enumdef *e) { upb_def_ref(UPB_UPCAST(e)); }
INLINE void upb_enumdef_unref(upb_enumdef *e) { upb_def_unref(UPB_UPCAST(e)); }
upb_enumdef *upb_enumdef_dup(upb_enumdef *e);

INLINE int32_t upb_enumdef_default(upb_enumdef *e) { return e->defaultval; }

// May only be set if upb_def_ismutable(e).
void upb_enumdef_setdefault(upb_enumdef *e, int32_t val);

// Adds a value to the enumdef.  Requires that no existing val has this
// name or number (returns false and does not add if there is).  May only
// be called before the enumdef is in a symtab.
bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num);

// Lookups from name to integer and vice-versa.
bool upb_enumdef_ntoil(upb_enumdef *e, char *name, size_t len, int32_t *num);
bool upb_enumdef_ntoi(upb_enumdef *e, char *name, int32_t *num);
// Caller does not own the returned string.
const char *upb_enumdef_iton(upb_enumdef *e, int32_t num);

// Iteration over name/value pairs.  The order is undefined.
// Adding an enum val invalidates any iterators.
//   upb_enum_iter i;
//   for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
//     // ...
//   }
typedef upb_inttable_iter upb_enum_iter;

upb_enum_iter upb_enum_begin(upb_enumdef *e);
upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter);
INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); }

// Iterator accessors.
INLINE char *upb_enum_iter_name(upb_enum_iter iter) {
  upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter);
  return e->str;
}
INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
  return upb_inttable_iter_key(iter);
}


/* upb_symtab *****************************************************************/

// A symtab (symbol table) is where upb_defs live.  It is empty when first
// constructed.  Clients add definitions to the symtab (or replace existing
// definitions) by calling upb_symtab_add().

upb_symtab *upb_symtab_new(void);
void upb_symtab_ref(upb_symtab *s);
void upb_symtab_unref(upb_symtab *s);

// Resolves the given symbol using the rules described in descriptor.proto,
// namely:
//
//    If the name starts with a '.', it is fully-qualified.  Otherwise, C++-like
//    scoping rules are used to find the type (i.e. first the nested types
//    within this message are searched, then within the parent, on up to the
//    root namespace).
//
// If a def is found, the caller owns one ref on the returned def.  Otherwise
// returns NULL.
// TODO: make return const
upb_def *upb_symtab_resolve(upb_symtab *s, const char *base, const char *sym);

// Find an entry in the symbol table with this exact name.  If a def is found,
// the caller owns one ref on the returned def.  Otherwise returns NULL.
// TODO: make return const
upb_def *upb_symtab_lookup(upb_symtab *s, const char *sym);

// Gets an array of pointers to all currently active defs in this symtab.  The
// caller owns the returned array (which is of length *count) as well as a ref
// to each symbol inside.  If type is UPB_DEF_ANY then defs of all types are
// returned, otherwise only defs of the required type are returned.
// TODO: make return const
upb_def **upb_symtab_getdefs(upb_symtab *s, int *n, upb_deftype_t type);

// Adds the given defs to the symtab, resolving all symbols.  Only one def per
// name may be in the list, but defs can replace existing defs in the symtab.
// The entire operation either succeeds or fails.  If the operation fails, the
// symtab is unchanged, false is returned, and status indicates the error.  The
// caller retains its ref on all defs in all cases.
bool upb_symtab_add(upb_symtab *s, upb_def **defs, int n, upb_status *status);

// Frees defs that are no longer active in the symtab and are no longer
// reachable.  Such defs are not freed when they are replaced in the symtab
// if they are still reachable from defs that are still referenced.
void upb_symtab_gc(upb_symtab *s);


/* upb_def casts **************************************************************/

// Dynamic casts, for determining if a def is of a particular type at runtime.
#define UPB_DYNAMIC_CAST_DEF(lower, upper) \
  struct _upb_ ## lower;  /* Forward-declare. */ \
  INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \
    if(def->type != UPB_DEF_ ## upper) return NULL; \
    return (struct _upb_ ## lower*)def; \
  }
UPB_DYNAMIC_CAST_DEF(msgdef, MSG);
UPB_DYNAMIC_CAST_DEF(enumdef, ENUM);
UPB_DYNAMIC_CAST_DEF(svcdef, SERVICE);
UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DYNAMIC_CAST_DEF

// Downcasts, for when some wants to assert that a def is of a particular type.
// These are only checked if we are building debug.
#define UPB_DOWNCAST_DEF(lower, upper) \
  struct _upb_ ## lower;  /* Forward-declare. */ \
  INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \
    assert(def->type == UPB_DEF_ ## upper); \
    return (struct _upb_ ## lower*)def; \
  }
UPB_DOWNCAST_DEF(msgdef, MSG);
UPB_DOWNCAST_DEF(enumdef, ENUM);
UPB_DOWNCAST_DEF(svcdef, SERVICE);
UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DOWNCAST_DEF


/* upb_deflist ****************************************************************/

// upb_deflist is an internal-only dynamic array for storing a growing list of
// upb_defs.
typedef struct {
  upb_def **defs;
  uint32_t len;
  uint32_t size;
} upb_deflist;

void upb_deflist_init(upb_deflist *l);
void upb_deflist_uninit(upb_deflist *l);
void upb_deflist_push(upb_deflist *l, upb_def *d);

#ifdef __cplusplus
}  /* extern "C" */
#endif

#endif  /* UPB_DEF_H_ */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback