summaryrefslogtreecommitdiff
path: root/pbstream.h
blob: 3c551d2a5a1d2ff35bde0c74669be92b9c5095c6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
/*
 * pbstream - a stream-oriented implementation of protocol buffers.
 *
 * Copyright (c) 2008 Joshua Haberman.  See LICENSE for details.
 */

#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
#include "dynarray.h"

/* A list of types as they can appear in a .proto file. */
typedef enum pbstream_type {
  PBSTREAM_TYPE_DOUBLE,
  PBSTREAM_TYPE_FLOAT,
  PBSTREAM_TYPE_INT32,
  PBSTREAM_TYPE_INT64,
  PBSTREAM_TYPE_UINT32,
  PBSTREAM_TYPE_UINT64,
  PBSTREAM_TYPE_SINT32,
  PBSTREAM_TYPE_SINT64,
  PBSTREAM_TYPE_FIXED32,
  PBSTREAM_TYPE_FIXED64,
  PBSTREAM_TYPE_SFIXED32,
  PBSTREAM_TYPE_SFIXED64,
  PBSTREAM_TYPE_BOOL,
  PBSTREAM_TYPE_STRING,
  PBSTREAM_TYPE_BYTES,
  PBSTREAM_TYPE_ENUM,
  PBSTREAM_TYPE_MESSAGE
} pbstream_type_t;

/* A list of types as they are encoded on-the-wire. */
typedef enum pbstream_wire_type {
  PBSTREAM_WIRE_TYPE_VARINT      = 0,
  PBSTREAM_WIRE_TYPE_64BIT       = 1,
  PBSTREAM_WIRE_TYPE_DELIMITED   = 2,
  PBSTREAM_WIRE_TYPE_START_GROUP = 3,
  PBSTREAM_WIRE_TYPE_END_GROUP   = 4,
  PBSTREAM_WIRE_TYPE_32BIT       = 5,
} pbstream_wire_type_t;

/* Each field must have a cardinality that is one of the following. */
typedef enum pbstream_cardinality {
  PBSTREAM_CARDINALITY_OPTIONAL,  /* must appear 0 or 1 times */
  PBSTREAM_CARDINALITY_REQUIRED,  /* must appear exactly 1 time */
  PBSTREAM_CARDINALITY_REPEATED,  /* may appear 0 or more times */
} pbstream_cardinality_t;

typedef int32_t pbstream_field_number_t;

/* A deserialized value as described in a .proto file. */
struct pbstream_field_descriptor;
struct pbstream_value {
  struct pbstream_field_descriptor *field_descriptor;
  union {
    double _double;
    float  _float;
    int32_t int32;
    int64_t int64;
    uint32_t uint32;
    uint64_t uint64;
    bool _bool;
    struct pbstream_delimited {
      size_t offset;  /* relative to the beginning of the stream. */
      int len;
    } delimited;
    int32_t _enum;
  } v;
};

struct pbstream_tag {
  pbstream_field_number_t field_number;
  pbstream_wire_type_t wire_type;
};

/* A value as it is encoded on-the-wire */
struct pbstream_wire_value {
  pbstream_wire_type_t type;
  union {
    uint64_t varint;
    uint64_t _64bit;
    struct {
      size_t offset;  /* relative to the beginning of the stream. */
      int len;
    } delimited;
    uint32_t _32bit;
  } v;
};

/* The definition of an enum as defined in a pbstream.  For example:
 * Corpus {
 *   UNIVERSAL = 0;
 *   WEB = 1;
 *   IMAGES = 2;
 *   LOCAL = 3;
 *   NEWS = 4;
 * }
 */
struct pbstream_enum_descriptor {
  char *name;
  struct enum_value {
    char *name;
    int value;
  } value;
  DEFINE_DYNARRAY(values, struct enum_value);
};

/* The definition of a field as defined in a pbstream (within a message).
 * For example:
 *   required int32 a = 1;
 */
struct pbstream_field_descriptor {
  pbstream_field_number_t field_number;
  char *name;
  pbstream_type_t type;
  pbstream_cardinality_t cardinality;
  struct pbstream_value *default_value;  /* NULL if none */

  /* Index into the "seen" list for the message.  -1 for repeated fields (for
   * which we have no need to track whether it's been seen). */
  int seen_field_num;

  union extra_data {
    struct pbstream_enum_descriptor *_enum;
    struct pbstream_message_descriptor *message;
  } d;
};

/* A message as defined by the "message" construct in a .proto file. */
struct pbstream_message_descriptor {
  char *name;  /* does not include package name or parent message names */
  char *full_name;
  int num_seen_fields;  /* fields we have to track "seen" information for */
  DEFINE_DYNARRAY(fields, struct pbstream_field_descriptor);
  DEFINE_DYNARRAY(messages, struct pbstream_message_descriptor);
  DEFINE_DYNARRAY(enums, struct pbstream_enum_descriptor);
};

/* Callback for when an error occurred.
 * The description is a static buffer which the client must not free.  The
 * offset is the location in the input where the error was detected (this
 * offset is relative to the beginning of the stream).  If is_fatal is true,
 * parsing cannot continue. */
typedef enum pbstream_status {
  PBSTREAM_STATUS_OK = 0,
  PBSTREAM_STATUS_INCOMPLETE = 1, /* buffer ended in the middle of a field  */

  /** FATAL ERRORS: these indicate corruption, and cannot be recovered. */

  // A varint did not terminate before hitting 64 bits.
  PBSTREAM_ERROR_UNTERMINATED_VARINT,

  // A submessage ended in the middle of data.
  PBSTREAM_ERROR_BAD_SUBMESSAGE_END,

  /** NONFATAL ERRORS: the input was invalid, but we can continue if desired. */

  // A field marked "required" was not present. */
  PBSTREAM_ERROR_MISSING_REQUIRED_FIELD,

  // An optional or required field appeared more than once.
  PBSTREAM_ERROR_DUPLICATE_FIELD,

  // A field was encoded with the wrong wire type.
  PBSTREAM_ERROR_MISMATCHED_TYPE,
} pbstream_status_t;
typedef void (*pbstream_error_callback_t)(pbstream_status_t error);

struct pbstream_callbacks {
  pbstream_error_callback_t error_callback;
};

struct pbstream_parse_stack_frame {
  struct pbstream_message_descriptor *message_descriptor;
  int end_offset;  /* unknown for the top frame, so we set to INT_MAX */

  /* Tracks whether we've seen non-repeated fields. */
  DEFINE_DYNARRAY(seen_fields, bool);
};

/* The stream parser's state. */
struct pbstream_parse_state {
  struct pbstream_callbacks callbacks;
  size_t offset;
  bool ignore_nonfatal_errors;
  void *user_data;
  DEFINE_DYNARRAY(stack, struct pbstream_parse_stack_frame);
};

/* Call this once before parsing to initialize the data structures.
 * message_type can be NULL, in which case all fields will be reported as
 * unknown. */
void pbstream_init_parser(
    struct pbstream_parse_state *state,
    struct pbstream_message_descriptor *message_descriptor,
    struct pbstream_callbacks *callbacks,
    void *user_data);

/* Call this to parse as much of buf as possible, calling callbacks as
 * appropriate.  buf need not be a complete pbstream.  Returns the number of
 * bytes consumed.  In subsequent calls, buf should point to the first byte not
 * consumed by previous calls.
 *
 * If need_more_bytes is non-zero when parse() returns, this indicates that the
 * beginning of a string or sub-message was recognized, but not all bytes of
 * the string were in memory.  The string will not be successfully parsed (and
 * thus parsing of the pbstream cannot proceed) unless need_more_bytes more
 * data is available upon the next call to parse.  The caller may need to
 * increase its buffer size. */

pbstream_status_t pbstream_parse(struct pbstream_parse_state *state,
                                 char *buf, int buf_len, int buf_offset);
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback