summaryrefslogtreecommitdiff
path: root/upb/handlers.h
blob: 9ed02c114f8a1e7fff080acb0775fa85704e4e1d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
/*
 * upb - a minimalist implementation of protocol buffers.
 *
 * Copyright (c) 2010-2011 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 *
 * upb_handlers is a generic visitor-like interface for iterating over a stream
 * of protobuf data.  You can register function pointers that will be called
 * for each message and/or field as the data is being parsed or iterated over,
 * without having to know the source format that we are parsing from.  This
 * decouples the parsing logic from the processing logic.
 */

#ifndef UPB_HANDLERS_H
#define UPB_HANDLERS_H

#include <limits.h>
#include "upb/upb.h"
#include "upb/def.h"
#include "upb/bytestream.h"

#ifdef __cplusplus
extern "C" {
#endif

/* Handlers protocol definition ***********************************************/

// A upb_handlers object represents a graph of handlers.  Each message can have
// a set of handlers as well as a set of fields which themselves have handlers.
// Fields that represent submessages or groups are linked to other message
// handlers, so the overall set of handlers can form a graph structure (which
// may be cyclic).
//
// The upb_mhandlers (message handlers) object can have the following handlers:
//
//   static upb_flow_t startmsg(void *closure) {
//     // Called when the message begins.  "closure" was supplied by our caller.
//     return UPB_CONTINUE;
//   }
//
//   static void endmsg(void *closure, upb_status *status) {
//     // Called when processing of this message ends, whether in success or
//     // failure.  "status" indicates the final status of processing, and can
//     /  also be modified in-place to update the final status.
//     //
//     // Since this callback is guaranteed to always be called eventually, it
//     // can be used to free any resources that were allocated during processing.
//   }
//
//   TODO: unknown field handler.
//
// The upb_fhandlers (field handlers) object can have the following handlers:
//
//   static upb_flow_t value(void *closure, upb_value fval, upb_value val) {
//     // Called when the field's value is encountered.  "fval" contains
//     // whatever value was bound to this field at registration type
//     // (for upb_register_all(), this will be the field's upb_fielddef*).
//     return UPB_CONTINUE;
//   }
//
//   static upb_sflow_t startsubmsg(void *closure, upb_value fval) {
//     // Called when a submessage begins.  The second element of the return
//     // value is the closure for the submessage.
//     return UPB_CONTINUE_WITH(closure);
//   }
//
//   static upb_flow_t endsubmsg(void *closure, upb_value fval) {
//     // Called when a submessage ends.
//     return UPB_CONTINUE;
//   }
//
//   static upb_sflow_t startseq(void *closure, upb_value fval) {
//     // Called when a sequence (repeated field) begins.  The second element
//     // of the return value is the closure for the sequence.
//     return UPB_CONTINUE_WITH(closure);
//   }
//
//   static upb_flow_t endseq(void *closure, upb_value fval) {
//     // Called when a sequence ends.
//     return UPB_CONTINUE;
//   }
//
// All handlers except the endmsg handler return a value from this enum, to
// control whether parsing will continue or not.
typedef enum {
  // Data source should continue calling callbacks.
  UPB_CONTINUE = 0,

  // Halt processing permanently (in a non-resumable way).  The endmsg handlers
  // for any currently open messages will be called which can supply a more
  // specific status message.  No further input data will be consumed.
  UPB_BREAK = -1,

  // Skips to the end of the current submessage (or if we are at the top
  // level, skips to the end of the entire message).  In other words, it is
  // like a UPB_BREAK that applies only to the current level.
  //
  // If you UPB_SKIPSUBMSG from a startmsg handler, the endmsg handler will
  // be called to perform cleanup and return a status.  Returning
  // UPB_SKIPSUBMSG from a startsubmsg handler will *not* call the startmsg,
  // endmsg, or endsubmsg handlers.
  //
  // If UPB_SKIPSUBMSG is called from the top-level message, no further input
  // data will be consumed.
  UPB_SKIPSUBMSG = -2,

  // TODO: Add UPB_SUSPEND, for resumable producers/consumers.
} upb_flow_t;

// The startsubmsg handler needs to also pass a closure to the submsg.
typedef struct {
  upb_flow_t flow;
  void *closure;
} upb_sflow_t;

INLINE upb_sflow_t UPB_SFLOW(upb_flow_t flow, void *closure) {
  upb_sflow_t ret = {flow, closure};
  return ret;
}
#define UPB_CONTINUE_WITH(c) UPB_SFLOW(UPB_CONTINUE, c)
#define UPB_SBREAK UPB_SFLOW(UPB_BREAK, NULL)

// Typedefs for all of the handler functions defined above.
typedef upb_flow_t (upb_startmsg_handler)(void *c);
typedef void (upb_endmsg_handler)(void *c, upb_status *status);
typedef upb_flow_t (upb_value_handler)(void *c, upb_value fval, upb_value val);
typedef upb_sflow_t (upb_startfield_handler)(void *closure, upb_value fval);
typedef upb_flow_t (upb_endfield_handler)(void *closure, upb_value fval);


/* upb_fhandlers **************************************************************/

// A upb_fhandlers object represents the set of handlers associated with one
// specific message field.
//
// TODO: remove upb_decoder-specific fields from this, and instead have
// upb_decoderplan make a deep copy of the whole graph with its own fields
// added.
struct _upb_decoder;
struct _upb_mhandlers;
typedef struct _upb_fieldent {
  upb_fieldtype_t type;
  bool repeated;
  upb_atomic_t refcount;
  uint32_t number;
  int32_t valuehasbit;
  struct _upb_mhandlers *msg;
  struct _upb_mhandlers *submsg;  // Set iff upb_issubmsgtype(type) == true.
  upb_value fval;
  upb_value_handler *value;
  upb_startfield_handler *startsubmsg;
  upb_endfield_handler *endsubmsg;
  upb_startfield_handler *startseq;
  upb_endfield_handler *endseq;
#ifdef UPB_USE_JIT_X64
  uint32_t jit_pclabel;
  uint32_t jit_pclabel_notypecheck;
  uint32_t jit_submsg_done_pclabel;
#endif
  void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
} upb_fhandlers;

typedef struct {
  bool junk;  // Stolen by table impl; see table.h for details.
  upb_fhandlers *f;
} upb_itofhandlers_ent;

// fhandlers are created as part of a upb_handlers instance, but can be ref'd
// and unref'd to prolong the life of the handlers.
void upb_fhandlers_ref(upb_fhandlers *m);
void upb_fhandlers_unref(upb_fhandlers *m);

// upb_fhandlers accessors
#define UPB_FHANDLERS_ACCESSORS(name, type) \
  INLINE void upb_fhandlers_set ## name(upb_fhandlers *f, type v){f->name = v;} \
  INLINE type upb_fhandlers_get ## name(const upb_fhandlers *f) { return f->name; }
UPB_FHANDLERS_ACCESSORS(fval, upb_value)
UPB_FHANDLERS_ACCESSORS(value, upb_value_handler*)
UPB_FHANDLERS_ACCESSORS(startsubmsg, upb_startfield_handler*)
UPB_FHANDLERS_ACCESSORS(endsubmsg, upb_endfield_handler*)
UPB_FHANDLERS_ACCESSORS(startseq, upb_startfield_handler*)
UPB_FHANDLERS_ACCESSORS(endseq, upb_endfield_handler*)
UPB_FHANDLERS_ACCESSORS(msg, struct _upb_mhandlers*)
UPB_FHANDLERS_ACCESSORS(submsg, struct _upb_mhandlers*)
// If set to >= 0, the hasbit will automatically be set after the corresponding
// callback is called (when a JIT is enabled, this can be significantly more
// efficient than setting the hasbit yourself inside the callback).  Could add
// this for seq and submsg also, but doesn't look like a win at the moment.
UPB_FHANDLERS_ACCESSORS(valuehasbit, int32_t)


/* upb_mhandlers **************************************************************/

// A upb_mhandlers object represents the set of handlers associated with a
// message in the graph of messages.

typedef struct _upb_mhandlers {
  upb_atomic_t refcount;
  upb_startmsg_handler *startmsg;
  upb_endmsg_handler *endmsg;
  upb_inttable fieldtab;  // Maps field number -> upb_fhandlers.
  bool is_group;
#ifdef UPB_USE_JIT_X64
  // Used inside the JIT to track labels (jmp targets) in the generated code.
  uint32_t jit_startmsg_pclabel;  // Starting a parse of this (sub-)message.
  uint32_t jit_endofbuf_pclabel;  // ptr hitend, but delim_end or jit_end?
  uint32_t jit_endofmsg_pclabel;  // Done parsing this (sub-)message.
  uint32_t jit_dyndispatch_pclabel;  // Dispatch by table lookup.
  uint32_t jit_unknownfield_pclabel;  // Parsed an unknown field.
  uint32_t max_field_number;
  // Currently keyed on field number.  Could also try keying it
  // on encoded or decoded tag, or on encoded field number.
  void **tablearray;
  // Pointer to the JIT code for parsing this message.
  void *jit_func;
#endif
} upb_mhandlers;

// mhandlers are created as part of a upb_handlers instance, but can be ref'd
// and unref'd to prolong the life of the handlers.
void upb_mhandlers_ref(upb_mhandlers *m);
void upb_mhandlers_unref(upb_mhandlers *m);

// Creates a new field with the given name and number.  There must not be an
// existing field with either this name or number or abort() will be called.
// TODO: this should take a name also.
upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
                                          upb_fieldtype_t type, bool repeated);
// Like the previous but for MESSAGE or GROUP fields.  For GROUP fields, the
// given submessage must not have any fields with this field number.
upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n,
                                               upb_fieldtype_t type,
                                               bool repeated,
                                               upb_mhandlers *subm);

// upb_mhandlers accessors.
#define UPB_MHANDLERS_ACCESSORS(name, type) \
  INLINE void upb_mhandlers_set ## name(upb_mhandlers *m, type v){m->name = v;} \
  INLINE type upb_mhandlers_get ## name(upb_mhandlers *m) { return m->name; }
UPB_MHANDLERS_ACCESSORS(startmsg, upb_startmsg_handler*);
UPB_MHANDLERS_ACCESSORS(endmsg, upb_endmsg_handler*);


/* upb_handlers ***************************************************************/

struct _upb_handlers {
  upb_atomic_t refcount;
  upb_mhandlers **msgs;  // Array of msgdefs, [0]=toplevel.
  int msgs_len, msgs_size;
  bool should_jit;
};
typedef struct _upb_handlers upb_handlers;

upb_handlers *upb_handlers_new(void);
void upb_handlers_ref(upb_handlers *h);
void upb_handlers_unref(upb_handlers *h);

// Appends a new message to the graph of handlers and returns it.  This message
// can be obtained later at index upb_handlers_msgcount()-1.  All handlers will
// be initialized to no-op handlers.
upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h);
upb_mhandlers *upb_handlers_getmhandlers(upb_handlers *h, int index);

// Convenience function for registering handlers for all messages and
// fields in a msgdef and all its children.  For every registered message
// "msgreg_cb" will be called with the newly-created mhandlers, and likewise
// with "fieldreg_cb"
//
// See upb_handlers_reghandlerset() below for an example.
typedef void upb_onmsgreg(void *closure, upb_mhandlers *mh, const upb_msgdef *m);
typedef void upb_onfieldreg(void *closure, upb_fhandlers *mh, const upb_fielddef *m);
upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m,
                                      upb_onmsgreg *msgreg_cb,
                                      upb_onfieldreg *fieldreg_cb,
                                      void *closure);

// Convenience function for registering a set of handlers for all messages and
// fields in a msgdef and its children, with the fval bound to the upb_fielddef.
// Any of the handlers may be NULL, in which case no callback will be set and
// the nop callback will be used.
typedef struct {
  upb_startmsg_handler *startmsg;
  upb_endmsg_handler *endmsg;
  upb_value_handler *value;
  upb_startfield_handler *startsubmsg;
  upb_endfield_handler *endsubmsg;
  upb_startfield_handler *startseq;
  upb_endfield_handler *endseq;
} upb_handlerset;

INLINE void upb_onmreg_hset(void *c, upb_mhandlers *mh, const upb_msgdef *m) {
  (void)m;
  upb_handlerset *hs = (upb_handlerset*)c;
  if (hs->startmsg) upb_mhandlers_setstartmsg(mh, hs->startmsg);
  if (hs->endmsg) upb_mhandlers_setendmsg(mh, hs->endmsg);
}
INLINE void upb_onfreg_hset(void *c, upb_fhandlers *fh, const upb_fielddef *f) {
  upb_handlerset *hs = (upb_handlerset*)c;
  if (hs->value) upb_fhandlers_setvalue(fh, hs->value);
  if (hs->startsubmsg) upb_fhandlers_setstartsubmsg(fh, hs->startsubmsg);
  if (hs->endsubmsg) upb_fhandlers_setendsubmsg(fh, hs->endsubmsg);
  if (hs->startseq) upb_fhandlers_setstartseq(fh, hs->startseq);
  if (hs->endseq) upb_fhandlers_setendseq(fh, hs->endseq);
  upb_value val;
  upb_value_setfielddef(&val, f);
  upb_fhandlers_setfval(fh, val);
}
INLINE upb_mhandlers *upb_handlers_reghandlerset(upb_handlers *h, const upb_msgdef *m,
                                                 upb_handlerset *hs) {
  return upb_handlers_regmsgdef(h, m, &upb_onmreg_hset, &upb_onfreg_hset, hs);
}


/* upb_dispatcher *************************************************************/

// WARNING: upb_dispatcher should be considered INTERNAL-ONLY.  The interface
// between it and upb_decoder is somewhat tightly coupled and may change.
//
// upb_dispatcher can be used by sources of data to invoke the appropriate
// handlers on a upb_handlers object.  Besides maintaining the runtime stack of
// closures and handlers, the dispatcher checks the return status of user
// callbacks and properly handles statuses other than UPB_CONTINUE, invoking
// "skip" or "exit" handlers on the underlying data source as appropriate.

typedef struct {
  upb_fhandlers *f;
  void *closure;
  uint64_t end_ofs;
  bool is_sequence;   // frame represents seq or submsg? (f might be both).
  bool is_packed;     // !upb_issubmsg(f) && end_ofs != UINT64_MAX
                      // (strings aren't pushed).
} upb_dispatcher_frame;

typedef void upb_exit_handler(void *);

typedef struct {
  upb_dispatcher_frame *top, *limit;

  // Msg and dispatch table for the current level.
  upb_mhandlers *msgent;
  upb_mhandlers *toplevel_msgent;
  upb_exit_handler UPB_NORETURN *exitjmp;
  void *srcclosure;
  bool top_is_implicit;

  // Stack.
  upb_status *status;
  upb_dispatcher_frame stack[UPB_MAX_NESTING];
} upb_dispatcher;

// Caller retains ownership of the status object.
void upb_dispatcher_init(upb_dispatcher *d, upb_status *status,
                         upb_exit_handler UPB_NORETURN *exit, void *closure);
upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure,
                                           upb_mhandlers *top_msg);
void upb_dispatcher_uninit(upb_dispatcher *d);

// Tests whether the message could legally end here (either the stack is empty
// or the only open stack frame is implicit).
bool upb_dispatcher_islegalend(upb_dispatcher *d);

// Unwinds one or more stack frames based on the given flow constant that was
// just returned from a handler.  Calls end handlers as appropriate.
void _upb_dispatcher_abortjmp(upb_dispatcher *d) UPB_NORETURN;

INLINE void _upb_dispatcher_sethas(void *_p, int32_t hasbit) {
  char *p = (char*)_p;
  if (hasbit >= 0) p[(uint32_t)hasbit / 8] |= (1 << ((uint32_t)hasbit % 8));
}

// Dispatch functions -- call the user handler and handle errors.
INLINE void upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f,
                               upb_value val) {
  upb_flow_t flow = UPB_CONTINUE;
  if (f->value) flow = f->value(d->top->closure, f->fval, val);
  _upb_dispatcher_sethas(d->top->closure, f->valuehasbit);
  if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
}
void upb_dispatch_startmsg(upb_dispatcher *d);
void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status);
upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
                                               upb_fhandlers *f);
upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d);
upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_fhandlers *f);
upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d);

#ifdef __cplusplus
}  /* extern "C" */
#endif

#endif
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback