summaryrefslogtreecommitdiff
path: root/grammars
diff options
context:
space:
mode:
authorMatthew Sotoudeh <matthew@masot.net>2024-02-19 16:41:13 -0800
committerMatthew Sotoudeh <matthew@masot.net>2024-02-19 16:41:13 -0800
commit26a42b4a7ba077659f791208a2a7989bfdfb3663 (patch)
tree02069692c4d629d3108bbed43c4eb6eddfd2fbc7 /grammars
parente133f250761c67b4465181f41909e78c272901d3 (diff)
playing with the C grammar
Diffstat (limited to 'grammars')
-rw-r--r--grammars/c/disambiguate.c33
-rw-r--r--grammars/c/grammar.txt76
2 files changed, 102 insertions, 7 deletions
diff --git a/grammars/c/disambiguate.c b/grammars/c/disambiguate.c
index a2d69d1..403d65f 100644
--- a/grammars/c/disambiguate.c
+++ b/grammars/c/disambiguate.c
@@ -1,14 +1,37 @@
+struct token *TYPE_NAMES[1024];
+size_t N_TYPE_NAMES;
+
+void alert_parse(struct state *state) {
+ if (PRODUCTION_ID_TO_SYMBOL[state->production_id] == SYMBOL_TYPEDEF) {
+ for (struct token *t = find_token(state, 2); t->symbol != DONE_SYMBOL; t++) {
+ if (t->symbol == SYMBOL_IDENT) {
+ TYPE_NAMES[N_TYPE_NAMES++] = t;
+ break;
+ }
+ }
+ }
+}
+
+int is_typename(struct token *token) {
+ if (!strcmp("int", token->string)) return 1;
+ for (size_t i = 0; i < N_TYPE_NAMES; i++)
+ if (!strcmp(TYPE_NAMES[i]->string, token->string))
+ return 1;
+ return 0;
+}
+
int disambiguator(struct state *old, struct state *new) {
// printf("Old tree:\n");
// print_parse_tree(old, 4);
// printf("New tree:\n");
// print_parse_tree(new, 4);
- if (old->start_idx != new->start_idx) {
- // printf("\t\tIGNORING "); print_parse_tree2(old);
- // printf("\t\tVS: "); print_parse_tree2(new);
- return 2;
- }
+ if (old->production_id == PRODUCTION_DECL_STMT)
+ if (!is_typename(find_token(old->reasons[0], 0)))
+ return 1;
+ if (new->production_id == PRODUCTION_DECL_STMT)
+ if (!is_typename(find_token(new->reasons[0], 0)))
+ return 0;
// Prefer the earlier parsings in the grammar when two entirely different
// productions are taken.
diff --git a/grammars/c/grammar.txt b/grammars/c/grammar.txt
index 7959318..ffe85c3 100644
--- a/grammars/c/grammar.txt
+++ b/grammars/c/grammar.txt
@@ -1,5 +1,6 @@
KEYWORDS list
switch volatile case while do else const for if
+ struct union typedef void
IDENT regex
[a-zA-Z_][0-9a-zA-Z_]*
@@ -16,6 +17,63 @@ OP list
< <= > >= =
. -> ? :
+############### TYPE PARSING
+# A PRIMITIVE_TYPE is the core object that takes up space after dereferencing,
+# calling, etc. A normal variable declaration is PRIMITIVE_TYPE (expression)
+PRIMITIVE_TYPE nonterm
+ struct IDENT
+ union IDENT
+ struct IDENT AGGREGATE_DECLARATION
+ union IDENT AGGREGATE_DECLARATION
+ const PRIMITIVE_TYPE
+ volatile PRIMITIVE_TYPE
+ void
+ IDENT
+
+# A TYPE_EXPRESSION is basically an lvalue expression.
+TYPE_EXPRESSION nonterm
+ IDENT
+ TYPE_EXPRESSION [ ]
+ TYPE_EXPRESSION [ INT ]
+ * TYPE_EXPRESSION
+ ( TYPE_EXPRESSION )
+ TYPE_EXPRESSION ( )
+ TYPE_EXPRESSION ( ARGS )
+
+DECLARATION nonterm
+ PRIMITIVE_TYPE TYPE_EXPRESSION
+
+# An ANONYMOUS_TYPE has no name
+ANONYMOUS_TYPE nonterm
+ PRIMITIVE_TYPE
+ ANONYMOUS_TYPE [ ]
+ ANONYMOUS_TYPE [ INT ]
+ * ANONYMOUS_TYPE
+ ( ANONYMOUS_TYPE )
+ ANONYMOUS_TYPE ( )
+ ANONYMOUS_TYPE ( ARGS )
+
+############### TOP LEVEL
+TOP_LEVEL nonterm .start
+ TYPEDEF
+ FUNCTION
+
+ARGS nonterm
+ ANONYMOUS_TYPE
+ ANONYMOUS_TYPE , ARGS
+ DECLARATION
+ DECLARATION , ARGS
+
+FUNCTION nonterm
+ DECLARATION ( ) TRUE_BLOCK
+ DECLARATION ( ARGS ) TRUE_BLOCK
+
+AGGREGATE_DECLARATION nonterm
+ { STMTS }
+
+TYPEDEF nonterm
+ typedef PRIMITIVE_TYPE TYPE_EXPRESSION ;
+
EXPR nonterm
INT
IDENT
@@ -40,19 +98,33 @@ FOR nonterm
SWITCH nonterm
switch ( EXPR ) BLOCK
+DECLARATION_CHAIN nonterm
+ TYPE_EXPRESSION
+ TYPE_EXPRESSION , DECLARATION_CHAIN
+ TYPE_EXPRESSION = EXPR
+ TYPE_EXPRESSION = EXPR , DECLARATION_CHAIN
+
+DECLARATION_STATEMENT nonterm
+ PRIMITIVE_TYPE DECLARATION_CHAIN ;
+
STMT nonterm
IF
WHILE
DO
FOR
SWITCH
+ # NOTE: it auto-prefers declarations right now
+ DECLARATION_STATEMENT .name DECL_STMT
EXPR ;
-STMTS nonterm .start
+STMTS nonterm
STMT
STMT STMTS
-BLOCK nonterm
+TRUE_BLOCK nonterm
{ }
{ STMTS }
+
+BLOCK nonterm
+ TRUE_BLOCK
STMT
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback