summaryrefslogtreecommitdiff
path: root/upb/pb/decoder.int.h
blob: 9b35b706daa5696e0b64379348872423aa382cd5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
/*
 * upb - a minimalist implementation of protocol buffers.
 *
 * Copyright (c) 2009-2014 Google Inc.  See LICENSE for details.
 * Author: Josh Haberman <jhaberman@gmail.com>
 *
 * Internal-only definitions for the decoder.
 */

#ifndef UPB_DECODER_INT_H_
#define UPB_DECODER_INT_H_

#include <stdlib.h>
#include "upb/def.h"
#include "upb/handlers.h"
#include "upb/sink.h"
#include "upb/pb/decoder.h"

// Opcode definitions.  The canonical meaning of each opcode is its
// implementation in the interpreter (the JIT is written to match this).
//
// All instructions have the opcode in the low byte.
// Instruction format for most instructions is:
//
// +-------------------+--------+
// |     arg (24)      | op (8) |
// +-------------------+--------+
//
// Exceptions are indicated below.  A few opcodes are multi-word.
typedef enum {
  // Opcodes 1-8, 13, 15-18 parse their respective descriptor types.
  // Arg for all of these is the upb selector for this field.
#define T(type) OP_PARSE_ ## type = UPB_DESCRIPTOR_TYPE_ ## type
  T(DOUBLE), T(FLOAT), T(INT64), T(UINT64), T(INT32), T(FIXED64), T(FIXED32),
  T(BOOL), T(UINT32), T(SFIXED32), T(SFIXED64), T(SINT32), T(SINT64),
#undef T
  OP_STARTMSG       = 9,   // No arg.
  OP_ENDMSG         = 10,  // No arg.
  OP_STARTSEQ       = 11,
  OP_ENDSEQ         = 12,
  OP_STARTSUBMSG    = 14,
  OP_ENDSUBMSG      = 19,
  OP_STARTSTR       = 20,
  OP_STRING         = 21,
  OP_ENDSTR         = 22,

  OP_PUSHTAGDELIM   = 23,  // No arg.
  OP_PUSHLENDELIM   = 24,  // No arg.
  OP_POP            = 25,  // No arg.
  OP_SETDELIM       = 26,  // No arg.
  OP_SETBIGGROUPNUM = 27,  // two words: | unused (24) | opc || groupnum (32) |
  OP_CHECKDELIM     = 28,
  OP_CALL           = 29,
  OP_RET            = 30,
  OP_BRANCH         = 31,

  // Different opcodes depending on how many bytes expected.
  OP_TAG1           = 32,  // | expected tag (16) | jump target (8) | opc (8) |
  OP_TAG2           = 33,  // | expected tag (16) | jump target (8) | opc (8) |
  OP_TAGN           = 34,  // three words:
                           //   | unused (16) | jump target(8) | opc (8) |
                           //   |           expected tag 1 (32)          |
                           //   |           expected tag 2 (32)          |

  OP_SETDISPATCH    = 35,  // N words:
                           //   | unused (24)         | opc |
                           //   | upb_inttable* (32 or 64)  |

  OP_HALT           = 36,  // No arg.
} opcode;

#define OP_MAX OP_HALT

UPB_INLINE opcode getop(uint32_t instr) { return instr & 0xff; }

// Method group; represents a set of decoder methods that had their code
// emitted together, and must therefore be freed together.  Immutable once
// created.  It is possible we may want to expose this to users at some point.
//
// Overall ownership of Decoder objects looks like this:
//
//                +----------+
//                |          | <---> DecoderMethod
//                | method   |
// CodeCache ---> |  group   | <---> DecoderMethod
//                |          |
//                | (mgroup) | <---> DecoderMethod
//                +----------+
typedef struct {
  upb_refcounted base;

  // Maps upb_msgdef/upb_handlers -> upb_pbdecodermethod.  We own refs on the
  // methods.
  upb_inttable methods;

  // When we add the ability to link to previously existing mgroups, we'll
  // need an array of mgroups we reference here, and own refs on them.

  // The bytecode for our methods, if any exists.  Owned by us.
  uint32_t *bytecode;
  uint32_t *bytecode_end;

#ifdef UPB_USE_JIT_X64
  // JIT-generated machine code, if any.
  upb_string_handlerfunc *jit_code;
  // The size of the jit_code (required to munmap()).
  size_t jit_size;
  char *debug_info;
  void *dl;
#endif
} mgroup;

// Decoder entry points; used as handlers.
void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint);
void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint);
size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
                            size_t size, const upb_bufhandle *handle);
bool upb_pbdecoder_end(void *closure, const void *handler_data);

// Decoder-internal functions that the JIT calls to handle fallback paths.
int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
                             size_t size, const upb_bufhandle *handle);
size_t upb_pbdecoder_suspend(upb_pbdecoder *d);
int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
                                  uint8_t wire_type);
int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, uint64_t expected);
int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, uint64_t *u64);
int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32);
int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64);
void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg);

// Error messages that are shared between the bytecode and JIT decoders.
extern const char *kPbDecoderStackOverflow;

// Access to decoderplan members needed by the decoder.
const char *upb_pbdecoder_getopname(unsigned int op);

// JIT codegen entry point.
void upb_pbdecoder_jit(mgroup *group);
void upb_pbdecoder_freejit(mgroup *group);

// A special label that means "do field dispatch for this message and branch to
// wherever that takes you."
#define LABEL_DISPATCH 0

// A special slot in the dispatch table that stores the epilogue (ENDMSG and/or
// RET) for branching to when we find an appropriate ENDGROUP tag.
#define DISPATCH_ENDMSG 0

// It's important to use this invalid wire type instead of 0 (which is a valid
// wire type).
#define NO_WIRE_TYPE 0xff

// The dispatch table layout is:
//   [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ]
//
// If wt1 matches, jump to the 48-bit offset.  If wt2 matches, lookup
// (UPB_MAX_FIELDNUMBER + fieldnum) and jump there.
//
// We need two wire types because of packed/non-packed compatibility.  A
// primitive repeated field can use either wire type and be valid.  While we
// could key the table on fieldnum+wiretype, the table would be 8x sparser.
//
// Storing two wire types in the primary value allows us to quickly rule out
// the second wire type without needing to do a separate lookup (this case is
// less common than an unknown field).
UPB_INLINE uint64_t upb_pbdecoder_packdispatch(uint64_t ofs, uint8_t wt1,
                                               uint8_t wt2) {
  return (ofs << 16) | (wt2 << 8) | wt1;
}

UPB_INLINE void upb_pbdecoder_unpackdispatch(uint64_t dispatch, uint64_t *ofs,
                                             uint8_t *wt1, uint8_t *wt2) {
  *wt1 = (uint8_t)dispatch;
  *wt2 = (uint8_t)(dispatch >> 8);
  *ofs = dispatch >> 16;
}

// All of the functions in decoder.c that return int32_t return values according
// to the following scheme:
//   1. negative values indicate a return code from the following list.
//   2. positive values indicate that error or end of buffer was hit, and
//      that the decode function should immediately return the given value
//      (the decoder state has already been suspended and is ready to be
//      resumed).
#define DECODE_OK -1
#define DECODE_MISMATCH -2  // Used only from checktag_slow().
#define DECODE_ENDGROUP -3  // Used only from checkunknown().

#define CHECK_RETURN(x) { int32_t ret = x; if (ret >= 0) return ret; }

#endif  // UPB_DECODER_INT_H_
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback