summaryrefslogtreecommitdiff
path: root/upb/pb/decoder.int.h
blob: 8c8710c066efec0e01af12e66caf4bd55433a868 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242

#ifndef UPB_DECODER_INT_H_
#define UPB_DECODER_INT_H_

#include <stdlib.h>
#include "upb/def.h"
#include "upb/handlers.h"
#include "upb/sink.h"
#include "upb/pb/decoder.h"

// Opcode definitions.  The canonical meaning of each opcode is its
// implementation in the interpreter (the JIT is written to match this).
//
// All instructions have the opcode in the low byte.
// Instruction format for most instructions is:
//
// +-------------------+--------+
// |     arg (24)      | op (8) |
// +-------------------+--------+
//
// Exceptions are indicated below.  A few opcodes are multi-word.
typedef enum {
  // Opcodes 1-8, 13, 15-18 parse their respective descriptor types.
  // Arg for all of these is the upb selector for this field.
#define T(type) OP_PARSE_ ## type = UPB_DESCRIPTOR_TYPE_ ## type
  T(DOUBLE), T(FLOAT), T(INT64), T(UINT64), T(INT32), T(FIXED64), T(FIXED32),
  T(BOOL), T(UINT32), T(SFIXED32), T(SFIXED64), T(SINT32), T(SINT64),
#undef T
  OP_STARTMSG       = 9,   // No arg.
  OP_ENDMSG         = 10,  // No arg.
  OP_STARTSEQ       = 11,
  OP_ENDSEQ         = 12,
  OP_STARTSUBMSG    = 14,
  OP_ENDSUBMSG      = 19,
  OP_STARTSTR       = 20,
  OP_STRING         = 21,
  OP_ENDSTR         = 22,

  OP_PUSHTAGDELIM   = 23,  // No arg.
  OP_PUSHLENDELIM   = 24,  // No arg.
  OP_POP            = 25,  // No arg.
  OP_SETDELIM       = 26,  // No arg.
  OP_SETGROUPNUM    = 27,
  OP_SETBIGGROUPNUM = 28,  // two words: | unused (24) | opc || groupnum (32) |

  // The arg for these opcodes is a local label reference.
  OP_CHECKDELIM     = 29,
  OP_CALL           = 30,
  OP_BRANCH         = 31,

  // Different opcodes depending on how many bytes expected.
  OP_TAG1           = 32,  // | expected tag (16) | jump target (8) | opc (8) |
  OP_TAG2           = 33,  // | expected tag (16) | jump target (8) | opc (8) |
  OP_TAGN           = 34,  // three words:
                           //   | unused (16) | jump target(8) | opc (8) |
                           //   |           expected tag 1 (32)          |
                           //   |           expected tag 2 (32)          |

  OP_SETDISPATCH    = 35,  // N words:
                           //   | unused (24)         | opc |
                           //   | upb_inttable* (32 or 64)  |

  OP_HALT           = 36,  // No arg.
} opcode;

#define OP_MAX OP_HALT

UPB_INLINE opcode getop(uint32_t instr) { return instr & 0xff; }

const upb_frametype upb_pbdecoder_frametype;

// Decoder entry points; used as handlers.
void *upb_pbdecoder_start(void *closure, const void *handler_data,
                          size_t size_hint);
size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
                            size_t size);
bool upb_pbdecoder_end(void *closure, const void *handler_data);

// Decoder-internal functions that the JIT calls to handle fallback paths.
void *upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
                           size_t size);
size_t upb_pbdecoder_suspend(upb_pbdecoder *d);
int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, uint32_t fieldnum,
                                  uint8_t wire_type);
int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, uint64_t expected);
int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, uint64_t *u64);
int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32);
int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64);
void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg);

// Error messages that are shared between the bytecode and JIT decoders.
extern const char *kPbDecoderStackOverflow;

typedef struct _upb_pbdecoderplan upb_pbdecoderplan;

// Access to decoderplan members needed by the decoder.
bool upb_pbdecoderplan_hasjitcode(const upb_pbdecoderplan *p);
uint32_t *upb_pbdecoderplan_codebase(const upb_pbdecoderplan *p);
const char *upb_pbdecoder_getopname(unsigned int op);
upb_string_handler *upb_pbdecoderplan_jitcode(const upb_pbdecoderplan *p);

// JIT entry point.
void upb_pbdecoder_jit(upb_pbdecoderplan *plan);
void upb_pbdecoder_freejit(upb_pbdecoderplan *plan);


// A special label that means "do field dispatch for this message and branch to
// wherever that takes you."
#define LABEL_DISPATCH 0

#define DECODE_OK -1
#define DECODE_MISMATCH -2  // Used only from checktag_slow().
#define DECODE_ENDGROUP -2  // Used only from checkunknown().

typedef struct {
  // The absolute stream offset of the end-of-frame delimiter.
  // Non-delimited frames (groups and non-packed repeated fields) reuse the
  // delimiter of their parent, even though the frame may not end there.
  //
  // NOTE: the JIT stores a slightly different value here for non-top frames.
  // It stores the value relative to the end of the enclosed message.  But the
  // innermost frame is still stored the same way, which is important for
  // ensuring that calls from the JIT into C work correctly.
  uint64_t end_ofs;
  uint32_t *base;
  uint32_t groupnum;
  union {
    upb_inttable *dispatch;  // Not used by the JIT.
    void         *closure;   // Only used by the JIT.
  } u;
} upb_pbdecoder_frame;

struct upb_pbdecoder {
  // Where we push parsed data (not owned).
  upb_sink *sink;

  size_t call_len;
  uint32_t *pc, *last;

  // Current input buffer and its stream offset.
  const char *buf, *ptr, *end, *checkpoint;

  // End of the delimited region, relative to ptr, or NULL if not in this buf.
  const char *delim_end;

  // End of the delimited region, relative to ptr, or end if not in this buf.
  const char *data_end;

  // Overall stream offset of "buf."
  uint64_t bufstart_ofs;

  // How many bytes past the end of the user buffer we want to skip.
  size_t skip;

  // Buffer for residual bytes not parsed from the previous buffer.
  // The maximum number of residual bytes we require is 12; a five-byte
  // unknown tag plus an eight-byte value, less one because the value
  // is only a partial value.
  char residual[12];
  char *residual_end;

  // Stores the user buffer passed to our decode function.
  const char *buf_param;
  size_t size_param;

#ifdef UPB_USE_JIT_X64
  // Used momentarily by the generated code to store a value while a user
  // function is called.
  uint32_t tmp_len;

  const void *saved_rsp;
#endif

  upb_status *status;

  // Our internal stack.
  upb_pbdecoder_frame *top, *limit;
  upb_pbdecoder_frame stack[UPB_DECODER_MAX_NESTING];
  uint32_t *callstack[UPB_DECODER_MAX_NESTING * 2];
};

// Data pertaining to a single decoding method/function.
// Each method contains code to parse a single message type.
// If may or may not be bound to a destination handlers object.
typedef struct {
  // While compiling, the base is relative in "ofs", after compiling it is
  // absolute in "ptr".
  union {
    uint32_t ofs;     // PC offset of method.
    const void *ptr;  // Pointer to bytecode or machine code for this method.
  } base;

  // Whether this method is native code or bytecode.
  bool native_code;

  // The message type that this method is parsing.
  const upb_msgdef *msg;

  // The destination handlers this method is bound to, or NULL if this method
  // can be bound to a destination handlers instance at runtime.
  //
  // If non-NULL, we own a ref.
  const upb_handlers *dest_handlers;

  // The dispatch table layout is:
  //   [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ]
  //
  // If wt1 matches, jump to the 48-bit offset.  If wt2 matches, lookup
  // (UPB_MAX_FIELDNUMBER + fieldnum) and jump there.
  //
  // We need two wire types because of packed/non-packed compatibility.  A
  // primitive repeated field can use either wire type and be valid.  While we
  // could key the table on fieldnum+wiretype, the table would be 8x sparser.
  //
  // Storing two wire types in the primary value allows us to quickly rule out
  // the second wire type without needing to do a separate lookup (this case is
  // less common than an unknown field).
  upb_inttable dispatch;
} upb_pbdecodermethod;

struct _upb_pbdecoderplan {
  // Pointer to bytecode.
  uint32_t *code, *code_end;

  // Maps upb_msgdef*/upb_handlers* -> upb_pbdecodermethod
  upb_inttable methods;

  // The method that starts parsing when we first call into the plan.
  // Ideally we will remove the idea that any of the methods in the plan
  // are special like this, so that any method can be the top-level one.
  upb_pbdecodermethod *topmethod;

#ifdef UPB_USE_JIT_X64
  // JIT-generated machine code (else NULL).
  upb_string_handler *jit_code;
  size_t jit_size;
  char *debug_info;
  void *dl;
#endif
};

#endif  // UPB_DECODER_INT_H_
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback