summaryrefslogtreecommitdiff
path: root/src/upb_parse.h
blob: df260894cb2597b7a3f75184fd04c5f5de635b70 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
/*
 * upb - a minimalist implementation of protocol buffers.
 *
 * upb_parse implements a high performance, callback-based, stream-oriented
 * parser (comparable to the SAX model in XML parsers).  For parsing protobufs
 * into in-memory messages (a more DOM-like model), see the routines in
 * upb_msg.h, which are layered on top of this parser.
 *
 * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
 */

#ifndef UPB_PARSE_H_
#define UPB_PARSE_H_

#include <stdbool.h>
#include <stdint.h>
#include "upb.h"
#include "descriptor.h"

#ifdef __cplusplus
extern "C" {
#endif

/* Event Callbacks. ***********************************************************/

// The tag callback is called immediately after a tag has been parsed.  The
// client should determine whether it wants to parse or skip the corresponding
// value.  If it wants to parse it, it must discover and return the correct
// .proto type (the tag only contains the wire type) and check that the wire
// type is appropriate for the .proto type.  To skip the value (which means
// skipping all submessages, in the case of a submessage), the callback should
// return zero.
//
// The client can store a void* in *user_field_desc; this will be passed to
// the value callback or the string callback.
typedef upb_field_type_t (*upb_tag_cb)(void *udata, struct upb_tag *tag,
                                       void **user_field_desc);

// The value callback is called when a regular value (ie. not a string or
// submessage) is encountered which the client has opted to parse (by not
// returning 0 from the tag_cb).  The client must parse the value by calling
// upb_parse_value(), returning success or failure accordingly.
//
// Note that this callback can be called several times in a row for a single
// call to tag_cb in the case of packed arrays.
typedef upb_status_t (*upb_value_cb)(void *udata, uint8_t *buf, uint8_t *end,
                                     void *user_field_desc, uint8_t **outbuf);

// The string callback is called when a string is parsed.  avail_len is the
// number of bytes that are currently available at str.  If the client is
// streaming and the current buffer ends in the middle of the string, this
// number could be less than total_len.
typedef void (*upb_str_cb)(void *udata, uint8_t *str, size_t avail_len,
                           size_t total_len, void *user_field_desc);

// The start and end callbacks are called when a submessage begins and ends,
// respectively.
typedef void (*upb_start_cb)(void *udata, void *user_field_desc);
typedef void (*upb_end_cb)(void *udata);

/* Callback parser interface. *************************************************/

// Allocates and frees a upb_cbparser, respectively.
struct upb_cbparser *upb_cbparser_new(void);
void upb_cbparser_free(struct upb_cbparser *p);

// Resets the internal state of an already-allocated parser.  Parsers must be
// reset before they can be used.  A parser can be reset multiple times.  udata
// will be passed as the first argument to callbacks.
void upb_cbparser_reset(struct upb_cbparser *p, void *udata,
                        upb_tag_cb tagcb,
                        upb_value_cb valuecb,
                        upb_str_cb strcb,
                        upb_start_cb startcb,
                        upb_end_cb endcb);


// Parses up to len bytes of protobuf data out of buf, calling the appropriate
// callbacks as values are parsed.
//
// The function returns a status indicating the success of the operation.  Data
// is parsed until no more data can be read from buf, or the callback returns an
// error like UPB_STATUS_USER_CANCELLED, or an error occurs.
//
// *read is set to the number of bytes consumed.  Note that this can be greater
// than len in the case that a string was recognized that spans beyond the end
// of the currently provided data.
//
// The next call to upb_parse must be the first byte after buf + *read, even in
// the case that *read > len.
//
// TODO: see if we can provide the following guarantee efficiently:
//   *read will always be >= len. */
upb_status_t upb_cbparser_parse(struct upb_cbparser *p, void *buf, size_t len,
                                size_t *read);

extern upb_wire_type_t upb_expected_wire_types[];
// Returns true if wt is the correct on-the-wire type for ft.
INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) {
  // This doesn't currently support packed arrays.
  return upb_type_info[ft].expected_wire_type == wt;
}

/* Data-consuming functions (to be called from value cb). *********************/

// Parses and converts a value from the character data starting at buf (but not
// past end).  *outbuf will be set to one past the data that was read.  The
// caller must have previously checked that the wire type is appropriate for
// this field type.
upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft,
                             union upb_value_ptr v, uint8_t **outbuf);

// Parses a wire value with the given type (which must have been obtained from
// a tag that was just parsed) and sets *outbuf to one past the data that was
// read.
upb_status_t upb_parse_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt,
                                  union upb_wire_value *wv, uint8_t **outbuf);

/* Functions to read wire values. *********************************************/

// Most clients will not want to use these directly.

upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val,
                                     uint8_t **outbuf);

// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT).
INLINE upb_status_t upb_get_v_uint64_t(uint8_t *buf, uint8_t *end, uint64_t *val,
                                       uint8_t **outbuf)
{
  // We inline this common case (1-byte varints), if that fails we dispatch to
  // the full (non-inlined) version.
  if((*buf & 0x80) == 0) {
    *val = *buf & 0x7f;
    *outbuf = buf + 1;
    return UPB_STATUS_OK;
  } else {
    return upb_get_v_uint64_t_full(buf, end, val, outbuf);
  }
}

// Gets a varint -- called when we only need 32 bits of it.
INLINE upb_status_t upb_get_v_uint32_t(uint8_t *buf, uint8_t *end,
                                       uint32_t *val, uint8_t **outbuf)
{
  uint64_t val64;
  UPB_CHECK(upb_get_v_uint64_t(buf, end, &val64, outbuf));
  *val = (uint32_t)val64;  // Discard the high bits.
  return UPB_STATUS_OK;
}

// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT).
INLINE upb_status_t upb_get_f_uint32_t(uint8_t *buf, uint8_t *end,
                                       uint32_t *val, uint8_t **outbuf)
{
  uint8_t *uint32_end = buf + sizeof(uint32_t);
  if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA;
#if UPB_UNALIGNED_READS_OK
  *val = *(uint32_t*)buf;
#else
#define SHL(val, bits) ((uint32_t)val << bits)
  *val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24);
#undef SHL
#endif
  *outbuf = uint32_end;
  return UPB_STATUS_OK;
}

// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT).
INLINE upb_status_t upb_get_f_uint64_t(uint8_t *buf, uint8_t *end,
                                       uint64_t *val, uint8_t **outbuf)
{
  uint8_t *uint64_end = buf + sizeof(uint64_t);
  if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA;
#if UPB_UNALIGNED_READS_OK
  *val = *(uint64_t*)buf;
#else
#define SHL(val, bits) ((uint64_t)val << bits)
  *val = SHL(buf[0],  0) | SHL(buf[1],  8) | SHL(buf[2], 16) | SHL(buf[3], 24) |
         SHL(buf[4], 32) | SHL(buf[5], 40) | SHL(buf[6], 48) | SHL(buf[7], 56);
#undef SHL
#endif
  *outbuf = uint64_end;
  return UPB_STATUS_OK;
}

INLINE upb_status_t upb_skip_v_uint64_t(uint8_t *buf, uint8_t *end,
                                        uint8_t **outbuf)
{
  uint8_t *const maxend = buf + 10;
  uint8_t last = 0x80;
  for(; buf < (uint8_t*)end && (last & 0x80); buf++)
    last = *buf;
  if(buf >= end && buf <= maxend && (last & 0x80)) return UPB_STATUS_NEED_MORE_DATA;
  if(buf > maxend) return UPB_ERROR_UNTERMINATED_VARINT;
  *outbuf = buf;
  return UPB_STATUS_OK;
}

INLINE upb_status_t upb_skip_f_uint32_t(uint8_t *buf, uint8_t *end,
                                        uint8_t **outbuf)
{
  uint8_t *uint32_end = buf + sizeof(uint32_t);
  if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA;
  *outbuf = uint32_end;
  return UPB_STATUS_OK;
}

INLINE upb_status_t upb_skip_f_uint64_t(uint8_t *buf, uint8_t *end,
                                        uint8_t **outbuf)
{
  uint8_t *uint64_end = buf + sizeof(uint64_t);
  if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA;
  *outbuf = uint64_end;
  return UPB_STATUS_OK;
}


/* Functions to read .proto values. *******************************************/


// Performs zig-zag decoding, which is used by sint32 and sint64.
INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }

// Use macros to define a set of two functions for each .proto type:
//
//  // Reads and converts a .proto value from buf, placing it in d.
//  // "end" indicates the end of the current buffer (if the buffer does
//  // not contain the entire value UPB_STATUS_NEED_MORE_DATA is returned).
//  // On success, *outbuf will point to the first byte that was not consumed.
//  upb_status_t upb_get_INT32(uint8_t *buf, uint8_t *end, int32_t *d,
//                             uint8_t **outbuf);
//
//  // Given an already read wire value s (source), convert it to a .proto
//  // value and return it.
//  int32_t upb_wvtov_INT32(uint32_t s);
//
// These are the most efficient functions to call if you want to decode a value
// for a known type.

#define WVTOV(type, wire_t, val_t) \
  INLINE val_t upb_wvtov_ ## type(wire_t s)

#define GET(type, v_or_f, wire_t, val_t, member_name) \
  INLINE upb_status_t upb_get_ ## type(uint8_t *buf, uint8_t *end, val_t *d, \
                                       uint8_t **outbuf) { \
    wire_t tmp; \
    UPB_CHECK(upb_get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp, outbuf)); \
    *d = upb_wvtov_ ## type(tmp); \
    return UPB_STATUS_OK; \
  }

#define T(type, v_or_f, wire_t, val_t, member_name) \
  WVTOV(type, wire_t, val_t);  /* prototype for GET below */ \
  GET(type, v_or_f, wire_t, val_t, member_name) \
  WVTOV(type, wire_t, val_t)

T(INT32,    v, uint32_t, int32_t,  int32)   { return (int32_t)s;      }
T(INT64,    v, uint64_t, int64_t,  int64)   { return (int64_t)s;      }
T(UINT32,   v, uint32_t, uint32_t, uint32)  { return s;               }
T(UINT64,   v, uint64_t, uint64_t, uint64)  { return s;               }
T(SINT32,   v, uint32_t, int32_t,  int32)   { return upb_zzdec_32(s); }
T(SINT64,   v, uint64_t, int64_t,  int64)   { return upb_zzdec_64(s); }
T(FIXED32,  f, uint32_t, uint32_t, uint32)  { return s;               }
T(FIXED64,  f, uint64_t, uint64_t, uint64)  { return s;               }
T(SFIXED32, f, uint32_t, int32_t,  int32)   { return (int32_t)s;      }
T(SFIXED64, f, uint64_t, int64_t,  int64)   { return (int64_t)s;      }
T(BOOL,     v, uint32_t, bool,     _bool)   { return (bool)s;         }
T(ENUM,     v, uint32_t, int32_t,  int32)   { return (int32_t)s;      }
T(DOUBLE,   f, uint64_t, double,   _double) {
  union upb_value v;
  v.uint64 = s;
  return v._double;
}
T(FLOAT,    f, uint32_t, float,    _float)  {
  union upb_value v;
  v.uint32 = s;
  return v._float;
}

#undef WVTOV
#undef GET
#undef T

// Parses a tag, places the result in *tag.
INLINE upb_status_t parse_tag(uint8_t *buf, uint8_t *end, struct upb_tag *tag,
                              uint8_t **outbuf)
{
  uint32_t tag_int;
  UPB_CHECK(upb_get_v_uint32_t(buf, end, &tag_int, outbuf));
  tag->wire_type    = (upb_wire_type_t)(tag_int & 0x07);
  tag->field_number = tag_int >> 3;
  return UPB_STATUS_OK;
}

#ifdef __cplusplus
}  /* extern "C" */
#endif

#endif  /* UPB_PARSE_H_ */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback