summaryrefslogtreecommitdiff
path: root/pbstream.h
blob: e4046e0ca2dcb7ba98a718aba40a6968eb2c58ad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
/*
 * pbstream - a small and simple implementation of Protocol Buffers.
 *
 * Copyright (c) 2008 Joshua Haberman.  See LICENSE for details.
 */

#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
#include "dynarray.h"

/* A list of types as they can appear in a .proto file. */
enum pbstream_type {
  PBSTREAM_TYPE_DOUBLE,
  PBSTREAM_TYPE_FLOAT,
  PBSTREAM_TYPE_INT32,
  PBSTREAM_TYPE_INT64,
  PBSTREAM_TYPE_UINT32,
  PBSTREAM_TYPE_UINT64,
  PBSTREAM_TYPE_SINT32,
  PBSTREAM_TYPE_SINT64,
  PBSTREAM_TYPE_FIXED32,
  PBSTREAM_TYPE_FIXED64,
  PBSTREAM_TYPE_SFIXED32,
  PBSTREAM_TYPE_SFIXED64,
  PBSTREAM_TYPE_BOOL,
  PBSTREAM_TYPE_STRING,
  PBSTREAM_TYPE_BYTES,

  PBSTREAM_TYPE_ENUM,

  PBSTREAM_TYPE_MESSAGE
};

/* A list of types as they are encoded on-the-wire. */
enum pbstream_wire_type {
  PBSTREAM_WIRE_TYPE_VARINT      = 0,
  PBSTREAM_WIRE_TYPE_64BIT       = 1,
  PBSTREAM_WIRE_TYPE_STRING      = 2,
  PBSTREAM_WIRE_TYPE_START_GROUP = 3,
  PBSTREAM_WIRE_TYPE_END_GROUP   = 4,
  PBSTREAM_WIRE_TYPE_32BIT       = 5,
};

/* Each field must have a cardinality that is one of the following. */
enum pbstream_cardinality {
  PBSTREAM_CARDINALITY_OPTIONAL,  /* must appear 0 or 1 times */
  PBSTREAM_CARDINALITY_REQUIRED,  /* must appear exactly 1 time */
  PBSTREAM_CARDINALITY_REPEATED,  /* may appear 0 or more times */
};

typedef int32_t pbstream_field_number_t;

/* A deserialized value as described in a .proto file. */
struct pbstream_value {
  enum pbstream_type type;
  union {
    double _double;
    float  _float;
    int32_t int32;
    int64_t int64;
    uint32_t uint32;
    uint64_t uint64;
    bool _bool;
    struct {
      char *data;  /* This will be a pointer to the buffer of data the client provided. */
      int len;
    } string;
    struct {
      char *data;  /* This will be a pointer to the buffer of data the client provided. */
      int len;
    } bytes;
    int32_t _enum;
  } v;
};

/* A value as it is encoded on-the-wire */
struct pbstream_wire_value {
  enum pbstream_wire_type type;
  union {
    uint64_t varint;
    uint64_t _64bit;
    struct {
      char *data;  /* This will be a pointer to the buffer of data the client provided. */
      int len;
    } string;
    uint32_t _32bit;
  } v;
};

/* The definition of an enum as defined in a pbstream.  For example:
 * Corpus {
 *   UNIVERSAL = 0;
 *   WEB = 1;
 *   IMAGES = 2;
 *   LOCAL = 3;
 *   NEWS = 4;
 * }
 */
struct pbstream_enum_descriptor {
  char *name;
  struct enum_value {
    char *name;
    int value;
  } value;
  DEFINE_DYNARRAY(values, struct enum_value);
};

/* The definition of a field as defined in a pbstream (within a message).
 * For example:
 *   required int32 a = 1;
 */
struct pbstream_field_descriptor {
  pbstream_field_number_t field_number;
  char *name;
  enum pbstream_type type;
  enum pbstream_cardinality cardinality;
  struct pbstream_value *default_value;  /* NULL if none */

  /* Index into the "seen" list for the message.  -1 for repeated fields (for
   * which we have no need to track whether it's been seen). */
  int seen_field_num;

  union extra_data {
    struct pbstream_enum_descriptor *_enum;
    struct pbstream_message_descriptor *message;
  } d;
};

/* A message as defined by the "message" construct in a .proto file. */
struct pbstream_message_descriptor {
  char *name;  /* does not include package name or parent message names. */
  char *full_name;
  int num_seen_fields;  /* How many fields we have to track "seen" information for. */
  DEFINE_DYNARRAY(fields, struct pbstream_field_descriptor);
  DEFINE_DYNARRAY(messages, struct pbstream_message_descriptor);
  DEFINE_DYNARRAY(enums, struct pbstream_enum_descriptor);
};

/* Callback for when a value is parsed that matches a field in the .proto file.
 * */
typedef void (*pbstream_value_callback_t)(
    struct pbstream_field_descriptor *field_descriptor,
    struct pbstream_value            value,
    void *user_data);

/* Callback for when a value is parsed for which no field was defined in the
 * .proto file. */
typedef void (*pbstream_unknown_value_callback_t)(
    pbstream_field_number_t field_number,
    struct pbstream_wire_value *wire_value,
    void *user_data);

/* Callback for when a nested message is beginning. */
typedef void (*pbstream_begin_message_callback_t)(
    struct pbstream_message_descriptor *message_descriptor,
    void *user_data);

/* Callback for when a nested message is ending. */
typedef void (*pbstream_end_message_callback_t)(void *user_data);

/* Callback for when an error occurred. */
enum pbstream_error {
  PBSTREAM_ERROR_UNTERMINATED_VARINT,     /* A varint did not terminate before hitting 64 bits. Fatal. */
  PBSTREAM_ERROR_MISSING_REQUIRED_FIELD,  /* A field marked "required" was not present. */
  PBSTREAM_ERROR_DUPLICATE_FIELD,         /* An optional or required field appeared more than once. */
  PBSTREAM_ERROR_MISMATCHED_TYPE,         /* A field was encoded with the wrong wire type. */
  PBSTREAM_ERROR_BAD_SUBMESSAGE_END,      /* A submessage ended in the middle of data.  Indicates corruption. */
};
/* The description is a static buffer which the client must not free.  The
 * offset is the location in the input where the error was detected (this
 * offset is relative to the beginning of the stream).  If is_fatal is true,
 * parsing cannot continue. */
typedef void (*pbstream_error_callback_t)(enum pbstream_error error, char *description,
                                          int offset, bool is_fatal);

struct pbstream_callbacks {
  pbstream_value_callback_t          value_callback;
  pbstream_unknown_value_callback_t  unknown_value_callback;
  pbstream_begin_message_callback_t  begin_message_callback;
  pbstream_end_message_callback_t    end_message_callback;
  pbstream_error_callback_t          error_callback;
};

struct pbstream_parse_stack_frame {
  struct pbstream_message_descriptor *message_descriptor;
  int end_offset;  /* We don't know this for the outermost frame, and set it to INT_MAX. */

  /* For every field except repeated ones we track whether we have seen it or
   * not.  This lets us detect three important conditions:
   * 1. the field has a default, but we did not see it anywhere (action: emit the default)
   * 2. the field is required, but we did not see it anywhere (action: error)
   * 3. the field is required or optional, but we saw it more than once (action: error) */
  DEFINE_DYNARRAY(seen_fields, bool);
};

/* The stream parser keeps this as its state. */
struct pbstream_parse_state {
  struct pbstream_callbacks callbacks;
  int offset;
  bool fatal_error;
  void *user_data;
  DEFINE_DYNARRAY(stack, struct pbstream_parse_stack_frame);
};

/* Call this once before parsing to initialize the data structures.
 * message_type can be NULL, in which case all fields will be reported as
 * unknown. */
void pbstream_init_parser(struct pbstream_parse_state *state,
                          struct pbstream_message_descriptor *message_descriptor,
                          struct pbstream_callbacks *callbacks,
                          void *user_data);

/* Call this to parse as much of buf as possible, calling callbacks as
 * appropriate.  buf need not be a complete pbstream.  Returns the number of
 * bytes consumed.  In subsequent calls, buf should point to the first byte not
 * consumed by previous calls.
 *
 * If need_more_bytes is non-zero when parse() returns, this indicates that the
 * beginning of a string or sub-message was recognized, but not all bytes of
 * the string were in memory.  The string will not be successfully parsed (and
 * thus parsing of the pbstream cannot proceed) unless need_more_bytes more
 * data is available upon the next call to parse.  The caller may need to
 * increase its buffer size. */
enum pbstream_status {
  PBSTREAM_STATUS_OK = 0,
  PBSTREAM_STATUS_INCOMPLETE = 1,  /* buffer ended in the middle of a field.  */
  PBSTREAM_STATUS_ERROR = 2,       /* fatal error in the file, cannot recover. */
};

enum pbstream_status pbstream_parse(struct pbstream_parse_state *state,
                                    char *buf, int buf_len,
                                    int *consumed_bytes, int *need_more_bytes);
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback