diff options
author | Matthew Sotoudeh <matthew@masot.net> | 2024-02-19 03:42:42 -0800 |
---|---|---|
committer | Matthew Sotoudeh <matthew@masot.net> | 2024-02-19 03:42:42 -0800 |
commit | e133f250761c67b4465181f41909e78c272901d3 (patch) | |
tree | fc75f55dd2a3f4dddc589a8c259f887e90ef3a1e | |
parent | ffc6388571004b17e3a3dee2511ec99076ee803a (diff) |
Parse trees, dangling else, etc.
-rw-r--r-- | earlpy/earlpy.py | 25 | ||||
-rw-r--r-- | examples/expr.c | 1 | ||||
-rw-r--r-- | examples/simple.c | 8 | ||||
-rw-r--r-- | examples/test.py | 16 | ||||
-rw-r--r-- | grammars/c/disambiguate.c | 11 | ||||
-rw-r--r-- | grammars/c/grammar.txt | 43 |
6 files changed, 86 insertions, 18 deletions
diff --git a/earlpy/earlpy.py b/earlpy/earlpy.py index d48c1be..7fbf0f0 100644 --- a/earlpy/earlpy.py +++ b/earlpy/earlpy.py @@ -73,17 +73,23 @@ class Parser: for token in tokens] # production id nodes = [t[0] for t in struct.iter_unpack("Q", res.stdout[8+(8*3*n_tokens):])] + # print(nodes) # REPARSE the nodes root = Node(self.productions[nodes[0]][1], self.productions[nodes[0]][0]) - nodes = nodes[1:] + nodes.pop(0) stack = [root] while stack: node = stack[-1] + # print(len(stack)) + # if isinstance(node, tuple): + # print("\t", node) + # else: + # print("\t", node.symbol.name, [s.name for s in node.production]) + if (isinstance(node, tuple) - or len(node.production) == len(node.contents)): - # COMPLETE! + or len(node.production) == len(node.contents)): stack.pop() if stack: stack[-1].contents.append(node) else: @@ -235,8 +241,12 @@ class Parser: rule = [self.name_to_symbol[x] for x in rule] self.productions.append((rule, symbol)) prods = ', '.join(map(str, range(start_idx, len(self.productions)))) - put(", {" + prods + ", 0}") + if prods: + put(", {" + prods + ", 0}") + else: + put(", {0}") else: + self.productions.append(([], symbol)) put(", {0}") put(" };") putl(f"#define N_PRODUCTIONS {len(self.productions)}") @@ -244,8 +254,11 @@ class Parser: putl("symbol_id_t PRODUCTION_ID_TO_PRODUCTION[N_PRODUCTIONS][MAX_PRODUCTION_LEN] = { {0}") for i, (production, _) in enumerate(self.productions): if i == 0: continue - production = ', '.join([symbol.name for symbol in production]) - put(", {" + production + ", 0}") + production = ', '.join(str(symbol.id) for symbol in production) + if production: + put(", {" + production + ", 0}") + else: + put(", {0}") put(" };") putl("symbol_id_t PRODUCTION_ID_TO_SYMBOL[N_PRODUCTIONS] = { 0") diff --git a/examples/expr.c b/examples/expr.c new file mode 100644 index 0000000..68c68b0 --- /dev/null +++ b/examples/expr.c @@ -0,0 +1 @@ +x + 5 == 10 diff --git a/examples/simple.c b/examples/simple.c index a8110a7..4d3139c 100644 --- a/examples/simple.c +++ b/examples/simple.c @@ -1,3 +1,5 @@ -if (x + 5 == 10) { - x += 2; -} +if (1) + if (1) + 1; + else + 1; diff --git a/examples/test.py b/examples/test.py index bf8c6d2..e5783c5 100644 --- a/examples/test.py +++ b/examples/test.py @@ -1,4 +1,14 @@ import earlpy -p = earlpy.Parser("grammars/expression") -node = p.parse_string("1 + 1 + 2 + 3") -print(node.pprint()) + +if False: + p = earlpy.Parser("grammars/expression") + node = p.parse_string("1 + 1 + 2 + 3") + print(node.pprint()) +elif True: + p = earlpy.Parser("grammars/c") + node = p.parse_file("examples/simple.c") + print(node.pprint()) +else: + p = earlpy.Parser("grammars/c") + node = p.parse_file("examples/expr.c") + print(node.pprint()) diff --git a/grammars/c/disambiguate.c b/grammars/c/disambiguate.c index 9a8bf08..a2d69d1 100644 --- a/grammars/c/disambiguate.c +++ b/grammars/c/disambiguate.c @@ -1,8 +1,8 @@ int disambiguator(struct state *old, struct state *new) { - // printf("Old tree: "); - // print_parse_tree2(old); - // printf("New tree: "); - // print_parse_tree2(new); + // printf("Old tree:\n"); + // print_parse_tree(old, 4); + // printf("New tree:\n"); + // print_parse_tree(new, 4); if (old->start_idx != new->start_idx) { // printf("\t\tIGNORING "); print_parse_tree2(old); @@ -44,6 +44,7 @@ int disambiguator(struct state *old, struct state *new) { } } } - printf("TOTALLY UNKNOWN!\n"); + + fprintf(stderr, "TOTALLY UNKNOWN!\n"); return 2; } diff --git a/grammars/c/grammar.txt b/grammars/c/grammar.txt index 486f319..7959318 100644 --- a/grammars/c/grammar.txt +++ b/grammars/c/grammar.txt @@ -7,7 +7,7 @@ IDENT regex INT regex [0-9]+ -OPS list +OP list ( ) { } [ ] ; , - + ! % * & / << >> ^ | @@ -15,3 +15,44 @@ OPS list && || ++ -- < <= > >= = . -> ? : + +EXPR nonterm + INT + IDENT + EXPR -- + EXPR ++ + EXPR OP EXPR + EXPR ? EXPR : EXPR + +IF nonterm + if ( EXPR ) BLOCK + if ( EXPR ) BLOCK else BLOCK + +WHILE nonterm + while ( EXPR ) BLOCK + +DO nonterm + do BLOCK while ( EXPR ) + +FOR nonterm + for ( EXPR ; EXPR ; EXPR ) BLOCK + +SWITCH nonterm + switch ( EXPR ) BLOCK + +STMT nonterm + IF + WHILE + DO + FOR + SWITCH + EXPR ; + +STMTS nonterm .start + STMT + STMT STMTS + +BLOCK nonterm + { } + { STMTS } + STMT |