summaryrefslogtreecommitdiff
path: root/upb_parse.h
blob: 75752a0370e6d6b9eeb6074d9fbb08b8a887c46c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
/*
 * upb - a minimalist implementation of protocol buffers.
 *
 * This file contains parsing routines; both stream-oriented and tree-oriented
 * models are supported.
 *
 * Copyright (c) 2008 Joshua Haberman.  See LICENSE for details.
 */

#ifndef UPB_PARSE_H_
#define UPB_PARSE_H_

#include <stdint.h>
#include <stdbool.h>
#include "upb.h"

#ifdef __cplusplus
extern "C" {
#endif

/* A deserialized value as described in a .proto file. */
struct upb_tagged_value {
  struct upb_field *field;
  union upb_value v;
};

/* A value as it is encoded on-the-wire, before it has been interpreted as
 * any particular .proto type. */
struct upb_tagged_wire_value {
  upb_wire_type_t type;
  union upb_wire_value v;
};

/* Definition of a single field in a message, for the purposes of the parser's
 * fieldmap.  Note that this does not include nearly all of the information
 * that can be specified about a field in a .proto file.  For example, we don't
 * even know the field's name.  We keep only the information necessary to parse
 * the field. */
struct upb_field {
  upb_field_number_t field_number;
  int32_t type;  /* google_protobuf_FieldDescriptorProto_Type */
  struct upb_fieldset *fieldset;  /* if type == MESSAGE */
};

struct upb_parse_stack_frame {
  struct upb_fieldset *fieldset;
  size_t end_offset;  /* unknown for the top frame, so we set to SIZE_MAX */
};

/* The stream parser's state. */
struct upb_parse_state {
  size_t offset;
  struct upb_parse_stack_frame stack[UPB_MAX_STACK];
  struct upb_parse_stack_frame *top, *limit;
};

/* Call this once before parsing to initialize the data structures.
 * message_type can be NULL, in which case all fields will be reported as
 * unknown. */
void upb_init_parser(struct upb_parse_state *state,
                     struct upb_fieldset *toplevel_fieldset);

/* Status as returned by upb_parse().  Status codes <0 are fatal errors
 * that cannot be recovered.  Status codes >0 are unusual but nonfatal events,
 * which nonetheless must be handled differently since they do not return data
 * in val. */
typedef enum upb_status {
  UPB_STATUS_OK = 0,
  UPB_STATUS_SUBMESSAGE_END = 1,  // No data is stored in val or wv.

  /** FATAL ERRORS: these indicate corruption, and cannot be recovered. */

  // A varint did not terminate before hitting 64 bits.
  UPB_ERROR_UNTERMINATED_VARINT = -1,

  // A submessage ended in the middle of data.
  UPB_ERROR_BAD_SUBMESSAGE_END = -2,

  // Encountered a "group" on the wire (deprecated and unsupported).
  UPB_ERROR_GROUP = -3,

  // Input was nested more than UPB_MAX_NESTING deep.
  UPB_ERROR_STACK_OVERFLOW = -4,

  // The input data caused the pb's offset (a size_t) to overflow.
  UPB_ERROR_OVERFLOW = -5,

  /** NONFATAL ERRORS: the input was invalid, but we can continue if desired. */

  // A value was encountered that was not defined in the .proto file.  The
  // unknown value is stored in wv.
  UPB_ERROR_UNKNOWN_VALUE = 2,

  // A field was encoded with the wrong wire type.  The wire value is stored in
  // wv.
  UPB_ERROR_MISMATCHED_TYPE = 3,
} upb_status_t;
struct upb_parse_state;

/* The main parsing function.  Parses the next value from buf, storing the
 * parsed value in val.  If val is of type UPB_TYPE_MESSAGE, then a
 * submessage was entered.
 *
 * IMPORTANT NOTE: for efficiency, the parsing routines do not do bounds checks,
 * and may read as much as far as buf+10.  So the caller must ensure that buf is
 * not within 10 bytes of unmapped memory, or the program will segfault. Clients
 * are encouraged to overallocate their buffers by ten bytes to compensate. */
upb_status_t upb_parse_field(struct upb_parse_state *s,
                             uint8_t *buf,
                             upb_field_number_t *fieldnum,
                             struct upb_tagged_value *val,
                             struct upb_tagged_wire_value *wv);

/* Low-level parsing functions. ***********************************************/

/* Parses a single tag from the character data starting at buf, and updates
 * buf to point one past the bytes that were consumed.  buf will be incremented
 * by at most ten bytes. */
upb_status_t parse_tag(uint8_t **buf, struct upb_tag *tag);

/* Parses a wire value with the given type (which must have been obtained from
 * a tag that was just parsed) and adds the number of bytes that were consumed
 * to *offset.  For delimited types, offset is advanced past the delimited
 * data.  */
upb_status_t upb_parse_wire_value(uint8_t *buf, size_t *offset,
                                  upb_wire_type_t wt,
                                  union upb_wire_value *wv);

/* Like the above, but discards the wire value instead of saving it. */
upb_status_t skip_wire_value(uint8_t *buf, size_t *offset,
                             upb_wire_type_t wt);

#ifdef __cplusplus
}  /* extern "C" */
#endif

#endif  /* UPB_PARSE_H_ */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback