diff options
author | Matthew Sotoudeh <matthew@masot.net> | 2024-02-19 16:41:13 -0800 |
---|---|---|
committer | Matthew Sotoudeh <matthew@masot.net> | 2024-02-19 16:41:13 -0800 |
commit | 26a42b4a7ba077659f791208a2a7989bfdfb3663 (patch) | |
tree | 02069692c4d629d3108bbed43c4eb6eddfd2fbc7 /grammars | |
parent | e133f250761c67b4465181f41909e78c272901d3 (diff) |
playing with the C grammar
Diffstat (limited to 'grammars')
-rw-r--r-- | grammars/c/disambiguate.c | 33 | ||||
-rw-r--r-- | grammars/c/grammar.txt | 76 |
2 files changed, 102 insertions, 7 deletions
diff --git a/grammars/c/disambiguate.c b/grammars/c/disambiguate.c index a2d69d1..403d65f 100644 --- a/grammars/c/disambiguate.c +++ b/grammars/c/disambiguate.c @@ -1,14 +1,37 @@ +struct token *TYPE_NAMES[1024]; +size_t N_TYPE_NAMES; + +void alert_parse(struct state *state) { + if (PRODUCTION_ID_TO_SYMBOL[state->production_id] == SYMBOL_TYPEDEF) { + for (struct token *t = find_token(state, 2); t->symbol != DONE_SYMBOL; t++) { + if (t->symbol == SYMBOL_IDENT) { + TYPE_NAMES[N_TYPE_NAMES++] = t; + break; + } + } + } +} + +int is_typename(struct token *token) { + if (!strcmp("int", token->string)) return 1; + for (size_t i = 0; i < N_TYPE_NAMES; i++) + if (!strcmp(TYPE_NAMES[i]->string, token->string)) + return 1; + return 0; +} + int disambiguator(struct state *old, struct state *new) { // printf("Old tree:\n"); // print_parse_tree(old, 4); // printf("New tree:\n"); // print_parse_tree(new, 4); - if (old->start_idx != new->start_idx) { - // printf("\t\tIGNORING "); print_parse_tree2(old); - // printf("\t\tVS: "); print_parse_tree2(new); - return 2; - } + if (old->production_id == PRODUCTION_DECL_STMT) + if (!is_typename(find_token(old->reasons[0], 0))) + return 1; + if (new->production_id == PRODUCTION_DECL_STMT) + if (!is_typename(find_token(new->reasons[0], 0))) + return 0; // Prefer the earlier parsings in the grammar when two entirely different // productions are taken. diff --git a/grammars/c/grammar.txt b/grammars/c/grammar.txt index 7959318..ffe85c3 100644 --- a/grammars/c/grammar.txt +++ b/grammars/c/grammar.txt @@ -1,5 +1,6 @@ KEYWORDS list switch volatile case while do else const for if + struct union typedef void IDENT regex [a-zA-Z_][0-9a-zA-Z_]* @@ -16,6 +17,63 @@ OP list < <= > >= = . -> ? : +############### TYPE PARSING +# A PRIMITIVE_TYPE is the core object that takes up space after dereferencing, +# calling, etc. A normal variable declaration is PRIMITIVE_TYPE (expression) +PRIMITIVE_TYPE nonterm + struct IDENT + union IDENT + struct IDENT AGGREGATE_DECLARATION + union IDENT AGGREGATE_DECLARATION + const PRIMITIVE_TYPE + volatile PRIMITIVE_TYPE + void + IDENT + +# A TYPE_EXPRESSION is basically an lvalue expression. +TYPE_EXPRESSION nonterm + IDENT + TYPE_EXPRESSION [ ] + TYPE_EXPRESSION [ INT ] + * TYPE_EXPRESSION + ( TYPE_EXPRESSION ) + TYPE_EXPRESSION ( ) + TYPE_EXPRESSION ( ARGS ) + +DECLARATION nonterm + PRIMITIVE_TYPE TYPE_EXPRESSION + +# An ANONYMOUS_TYPE has no name +ANONYMOUS_TYPE nonterm + PRIMITIVE_TYPE + ANONYMOUS_TYPE [ ] + ANONYMOUS_TYPE [ INT ] + * ANONYMOUS_TYPE + ( ANONYMOUS_TYPE ) + ANONYMOUS_TYPE ( ) + ANONYMOUS_TYPE ( ARGS ) + +############### TOP LEVEL +TOP_LEVEL nonterm .start + TYPEDEF + FUNCTION + +ARGS nonterm + ANONYMOUS_TYPE + ANONYMOUS_TYPE , ARGS + DECLARATION + DECLARATION , ARGS + +FUNCTION nonterm + DECLARATION ( ) TRUE_BLOCK + DECLARATION ( ARGS ) TRUE_BLOCK + +AGGREGATE_DECLARATION nonterm + { STMTS } + +TYPEDEF nonterm + typedef PRIMITIVE_TYPE TYPE_EXPRESSION ; + EXPR nonterm INT IDENT @@ -40,19 +98,33 @@ FOR nonterm SWITCH nonterm switch ( EXPR ) BLOCK +DECLARATION_CHAIN nonterm + TYPE_EXPRESSION + TYPE_EXPRESSION , DECLARATION_CHAIN + TYPE_EXPRESSION = EXPR + TYPE_EXPRESSION = EXPR , DECLARATION_CHAIN + +DECLARATION_STATEMENT nonterm + PRIMITIVE_TYPE DECLARATION_CHAIN ; + STMT nonterm IF WHILE DO FOR SWITCH + # NOTE: it auto-prefers declarations right now + DECLARATION_STATEMENT .name DECL_STMT EXPR ; -STMTS nonterm .start +STMTS nonterm STMT STMT STMTS -BLOCK nonterm +TRUE_BLOCK nonterm { } { STMTS } + +BLOCK nonterm + TRUE_BLOCK STMT |