1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
|
#### OPTIMIZATIONS:
# where possible, we want parse trees that look like:
# (((A + B) + C) + D), i.e., left-associativity because it avoids long chains
# of completions. Another explanation for it is that then ambiguity is resolved
# as early in the left-to-right parse as possible.
KEYWORDS list
switch volatile case while do else const for if
struct union typedef void return break continue
sizeof
IDENT regex
[a-zA-Z_][0-9a-zA-Z_]*
INT regex
((0x[0-9a-fA-F]*)|([0-9]*))([uUlL])*
# https://stackoverflow.com/questions/2039795/regular-expression-for-a-string-literal-in-flex-lex
STRING regex
["]([^\\"]|\\.)*["]
CHAR regex
[']([\\][']|[^'][^'])*[^']?[']
OP list
; ,
- + ! % * & / << >> ^ |
-= += != %= *= &= /= <<= == >>= ^= |=
&& || ++ --
< <= > >= =
. ->
TERNARY list
: ?
PARENS list
( ) { } [ ]
############### ERROR RECOVERY
# These rules match either a single token, or a pair of balanced parentheses
NONPAREN nonterm
KEYWORDS
IDENT
INT
STRING
CHAR
TERNARY
OP
ERROR_INNER nonterm .poison
ERROR
ERROR_INNER ERROR
ERROR nonterm .poison
( ERROR_INNER )
{ ERROR_INNER }
[ ERROR_INNER ]
( )
{ }
[ ]
NONPAREN
############### TYPE PARSING
# A PRIMITIVE_TYPE is the core object that takes up space after dereferencing,
# calling, etc. A normal variable declaration is PRIMITIVE_TYPE (expression)
PRIMITIVE_TYPE nonterm
struct IDENT
union IDENT
struct IDENT AGGREGATE_DECLARATION
union IDENT AGGREGATE_DECLARATION
const PRIMITIVE_TYPE
volatile PRIMITIVE_TYPE
void
IDENT
# A TYPE_EXPRESSION is basically an lvalue expression.
TYPE_EXPRESSION nonterm
IDENT
TYPE_EXPRESSION [ ]
TYPE_EXPRESSION [ EXPR ]
* TYPE_EXPRESSION
const TYPE_EXPRESSION
( TYPE_EXPRESSION )
TYPE_EXPRESSION ( )
TYPE_EXPRESSION ( ARGS )
DECLARATION nonterm
PRIMITIVE_TYPE TYPE_EXPRESSION
# An ANONYMOUS_TYPE has no name
ANONYMOUS_TYPE nonterm
PRIMITIVE_TYPE
ANONYMOUS_TYPE [ ]
ANONYMOUS_TYPE [ EXPR ]
ANONYMOUS_TYPE *
ANONYMOUS_TYPE const *
const ANONYMOUS_TYPE
( ANONYMOUS_TYPE )
ANONYMOUS_TYPE ( )
ANONYMOUS_TYPE ( ARGS )
############### TOP LEVEL
TOP_LEVEL nonterm .start
TOP_LEVEL TYPEDEF
TOP_LEVEL STRUCTDECL
TOP_LEVEL FUNCTION
TOP_LEVEL DECLARATION_STATEMENT
TYPEDEF
STRUCTDECL
FUNCTION
DECLARATION_STATEMENT
TOP_LEVEL ERROR
ERROR
ARGS nonterm
ANONYMOUS_TYPE
ARGS , ANONYMOUS_TYPE
DECLARATION
ARGS , DECLARATION
CALL_ARGS nonterm
CALL_ARGS , EXPR
EXPR
OLD_ARGS nonterm
OLD_ARGS , IDENT
IDENT
OLD_ARG_DECLS nonterm
OLD_ARG_DECLS DECLARATION_STATEMENT
DECLARATION_STATEMENT
FUNCTION nonterm
DECLARATION ( ) TRUE_BLOCK
DECLARATION ( ARGS ) TRUE_BLOCK
DECLARATION ( OLD_ARGS ) OLD_ARG_DECLS TRUE_BLOCK
IDENT ( OLD_ARGS ) OLD_ARG_DECLS TRUE_BLOCK
AGGREGATE_DECLARATION nonterm
{ STMTS }
{ }
TYPEDEF nonterm
typedef PRIMITIVE_TYPE TYPE_EXPRESSION ;
STRUCTDECL nonterm
struct IDENT AGGREGATE_DECLARATION ;
UNIONDECL nonterm
union IDENT AGGREGATE_DECLARATION ;
EXPR nonterm
INT
STRING
CHAR
IDENT
EXPR --
EXPR ++
-- EXPR
++ EXPR
- EXPR
+ EXPR
& EXPR
* EXPR
( ANONYMOUS_TYPE ) EXPR
EXPR ( )
EXPR ( CALL_ARGS )
EXPR OP EXPR
EXPR ? EXPR : EXPR
EXPR ? : EXPR
EXPR [ EXPR ]
! EXPR
( EXPR )
sizeof EXPR
sizeof ANONYMOUS_TYPE
INITIALIZER_LIST
EXPR EXPR
INITIALIZER_LIST nonterm
{ INNER_INITIALIZER_LIST }
{ }
INNER_INITIALIZER_LIST nonterm
EXPR
INNER_INITIALIZER_LIST , EXPR
INNER_INITIALIZER_LIST ,
IF nonterm
if ( EXPR ) BLOCK
if ( EXPR ) BLOCK else BLOCK
WHILE nonterm
while ( EXPR ) BLOCK
DO nonterm
do BLOCK while ( EXPR )
FOR nonterm
for ( ; ; ) BLOCK
for ( ; ; EXPR ) BLOCK
for ( ; EXPR ; ) BLOCK
for ( ; EXPR ; EXPR ) BLOCK
for ( EXPR ; ; ) BLOCK
for ( EXPR ; ; EXPR ) BLOCK
for ( EXPR ; EXPR ; ) BLOCK
for ( EXPR ; EXPR ; EXPR ) BLOCK
SWITCH nonterm
switch ( EXPR ) BLOCK
DECLARATION_CHAIN nonterm
DECLARATION_CHAIN , TYPE_EXPRESSION
TYPE_EXPRESSION
DECLARATION_CHAIN , TYPE_EXPRESSION = EXPR
TYPE_EXPRESSION = EXPR
DECLARATION_STATEMENT nonterm
PRIMITIVE_TYPE DECLARATION_CHAIN ;
RETURN nonterm
return EXPR ;
return ;
BREAK nonterm
break ;
CONTINUE nonterm
continue ;
LABEL nonterm
IDENT : STMT
CASE nonterm
case EXPR : STMT
STMT nonterm
TRUE_BLOCK
LABEL
CASE
BREAK
CONTINUE
RETURN
IF
WHILE
DO
FOR
SWITCH
DECLARATION_STATEMENT
EXPR ;
;
STMTS nonterm
STMTS STMT
STMT
STMTS ERROR
ERROR
TRUE_BLOCK nonterm
{ }
{ STMTS }
BLOCK nonterm
TRUE_BLOCK
STMT
|