summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Sotoudeh <matthew@masot.net>2024-02-19 03:42:42 -0800
committerMatthew Sotoudeh <matthew@masot.net>2024-02-19 03:42:42 -0800
commite133f250761c67b4465181f41909e78c272901d3 (patch)
treefc75f55dd2a3f4dddc589a8c259f887e90ef3a1e
parentffc6388571004b17e3a3dee2511ec99076ee803a (diff)
Parse trees, dangling else, etc.
-rw-r--r--earlpy/earlpy.py25
-rw-r--r--examples/expr.c1
-rw-r--r--examples/simple.c8
-rw-r--r--examples/test.py16
-rw-r--r--grammars/c/disambiguate.c11
-rw-r--r--grammars/c/grammar.txt43
6 files changed, 86 insertions, 18 deletions
diff --git a/earlpy/earlpy.py b/earlpy/earlpy.py
index d48c1be..7fbf0f0 100644
--- a/earlpy/earlpy.py
+++ b/earlpy/earlpy.py
@@ -73,17 +73,23 @@ class Parser:
for token in tokens]
# production id
nodes = [t[0] for t in struct.iter_unpack("Q", res.stdout[8+(8*3*n_tokens):])]
+ # print(nodes)
# REPARSE the nodes
root = Node(self.productions[nodes[0]][1],
self.productions[nodes[0]][0])
- nodes = nodes[1:]
+ nodes.pop(0)
stack = [root]
while stack:
node = stack[-1]
+ # print(len(stack))
+ # if isinstance(node, tuple):
+ # print("\t", node)
+ # else:
+ # print("\t", node.symbol.name, [s.name for s in node.production])
+
if (isinstance(node, tuple)
- or len(node.production) == len(node.contents)):
- # COMPLETE!
+ or len(node.production) == len(node.contents)):
stack.pop()
if stack: stack[-1].contents.append(node)
else:
@@ -235,8 +241,12 @@ class Parser:
rule = [self.name_to_symbol[x] for x in rule]
self.productions.append((rule, symbol))
prods = ', '.join(map(str, range(start_idx, len(self.productions))))
- put(", {" + prods + ", 0}")
+ if prods:
+ put(", {" + prods + ", 0}")
+ else:
+ put(", {0}")
else:
+ self.productions.append(([], symbol))
put(", {0}")
put(" };")
putl(f"#define N_PRODUCTIONS {len(self.productions)}")
@@ -244,8 +254,11 @@ class Parser:
putl("symbol_id_t PRODUCTION_ID_TO_PRODUCTION[N_PRODUCTIONS][MAX_PRODUCTION_LEN] = { {0}")
for i, (production, _) in enumerate(self.productions):
if i == 0: continue
- production = ', '.join([symbol.name for symbol in production])
- put(", {" + production + ", 0}")
+ production = ', '.join(str(symbol.id) for symbol in production)
+ if production:
+ put(", {" + production + ", 0}")
+ else:
+ put(", {0}")
put(" };")
putl("symbol_id_t PRODUCTION_ID_TO_SYMBOL[N_PRODUCTIONS] = { 0")
diff --git a/examples/expr.c b/examples/expr.c
new file mode 100644
index 0000000..68c68b0
--- /dev/null
+++ b/examples/expr.c
@@ -0,0 +1 @@
+x + 5 == 10
diff --git a/examples/simple.c b/examples/simple.c
index a8110a7..4d3139c 100644
--- a/examples/simple.c
+++ b/examples/simple.c
@@ -1,3 +1,5 @@
-if (x + 5 == 10) {
- x += 2;
-}
+if (1)
+ if (1)
+ 1;
+ else
+ 1;
diff --git a/examples/test.py b/examples/test.py
index bf8c6d2..e5783c5 100644
--- a/examples/test.py
+++ b/examples/test.py
@@ -1,4 +1,14 @@
import earlpy
-p = earlpy.Parser("grammars/expression")
-node = p.parse_string("1 + 1 + 2 + 3")
-print(node.pprint())
+
+if False:
+ p = earlpy.Parser("grammars/expression")
+ node = p.parse_string("1 + 1 + 2 + 3")
+ print(node.pprint())
+elif True:
+ p = earlpy.Parser("grammars/c")
+ node = p.parse_file("examples/simple.c")
+ print(node.pprint())
+else:
+ p = earlpy.Parser("grammars/c")
+ node = p.parse_file("examples/expr.c")
+ print(node.pprint())
diff --git a/grammars/c/disambiguate.c b/grammars/c/disambiguate.c
index 9a8bf08..a2d69d1 100644
--- a/grammars/c/disambiguate.c
+++ b/grammars/c/disambiguate.c
@@ -1,8 +1,8 @@
int disambiguator(struct state *old, struct state *new) {
- // printf("Old tree: ");
- // print_parse_tree2(old);
- // printf("New tree: ");
- // print_parse_tree2(new);
+ // printf("Old tree:\n");
+ // print_parse_tree(old, 4);
+ // printf("New tree:\n");
+ // print_parse_tree(new, 4);
if (old->start_idx != new->start_idx) {
// printf("\t\tIGNORING "); print_parse_tree2(old);
@@ -44,6 +44,7 @@ int disambiguator(struct state *old, struct state *new) {
}
}
}
- printf("TOTALLY UNKNOWN!\n");
+
+ fprintf(stderr, "TOTALLY UNKNOWN!\n");
return 2;
}
diff --git a/grammars/c/grammar.txt b/grammars/c/grammar.txt
index 486f319..7959318 100644
--- a/grammars/c/grammar.txt
+++ b/grammars/c/grammar.txt
@@ -7,7 +7,7 @@ IDENT regex
INT regex
[0-9]+
-OPS list
+OP list
( ) { } [ ]
; ,
- + ! % * & / << >> ^ |
@@ -15,3 +15,44 @@ OPS list
&& || ++ --
< <= > >= =
. -> ? :
+
+EXPR nonterm
+ INT
+ IDENT
+ EXPR --
+ EXPR ++
+ EXPR OP EXPR
+ EXPR ? EXPR : EXPR
+
+IF nonterm
+ if ( EXPR ) BLOCK
+ if ( EXPR ) BLOCK else BLOCK
+
+WHILE nonterm
+ while ( EXPR ) BLOCK
+
+DO nonterm
+ do BLOCK while ( EXPR )
+
+FOR nonterm
+ for ( EXPR ; EXPR ; EXPR ) BLOCK
+
+SWITCH nonterm
+ switch ( EXPR ) BLOCK
+
+STMT nonterm
+ IF
+ WHILE
+ DO
+ FOR
+ SWITCH
+ EXPR ;
+
+STMTS nonterm .start
+ STMT
+ STMT STMTS
+
+BLOCK nonterm
+ { }
+ { STMTS }
+ STMT
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback