Parse trees, dangling else, etc.

author: Matthew Sotoudeh <matthew@masot.net> 2024-02-19 03:42:42 -0800
committer: Matthew Sotoudeh <matthew@masot.net> 2024-02-19 03:42:42 -0800
commit: e133f250761c67b4465181f41909e78c272901d3 (patch)
tree: fc75f55dd2a3f4dddc589a8c259f887e90ef3a1e
parent: ffc6388571004b17e3a3dee2511ec99076ee803a (diff)
6 files changed, 86 insertions, 18 deletions
diff --git a/earlpy/earlpy.py b/earlpy/earlpy.py
index d48c1be..7fbf0f0 100644
--- a/earlpy/earlpy.py
+++ b/earlpy/earlpy.py
@@ -73,17 +73,23 @@ class Parser:
                   for token in tokens]
         # production id
         nodes = [t[0] for t in struct.iter_unpack("Q", res.stdout[8+(8*3*n_tokens):])]
+        # print(nodes)
 
         # REPARSE the nodes
         root = Node(self.productions[nodes[0]][1],
                     self.productions[nodes[0]][0])
-        nodes = nodes[1:]
+        nodes.pop(0)
         stack = [root]
         while stack:
             node = stack[-1]
+            # print(len(stack))
+            # if isinstance(node, tuple):
+            #     print("\t", node)
+            # else:
+            #     print("\t", node.symbol.name, [s.name for s in node.production])
+
             if (isinstance(node, tuple)
-                or len(node.production) == len(node.contents)):
-                # COMPLETE!
+                    or len(node.production) == len(node.contents)):
                 stack.pop()
                 if stack: stack[-1].contents.append(node)
             else:
@@ -235,8 +241,12 @@ class Parser:
                     rule = [self.name_to_symbol[x] for x in rule]
                     self.productions.append((rule, symbol))
                 prods = ', '.join(map(str, range(start_idx, len(self.productions))))
-                put(", {" + prods + ", 0}")
+                if prods:
+                    put(", {" + prods + ", 0}")
+                else:
+                    put(", {0}")
             else:
+                self.productions.append(([], symbol))
                 put(", {0}")
         put(" };")
         putl(f"#define N_PRODUCTIONS {len(self.productions)}")
@@ -244,8 +254,11 @@ class Parser:
         putl("symbol_id_t PRODUCTION_ID_TO_PRODUCTION[N_PRODUCTIONS][MAX_PRODUCTION_LEN] = { {0}")
         for i, (production, _) in enumerate(self.productions):
             if i == 0: continue
-            production = ', '.join([symbol.name for symbol in production])
-            put(", {" + production + ", 0}")
+            production = ', '.join(str(symbol.id) for symbol in production)
+            if production:
+                put(", {" + production + ", 0}")
+            else:
+                put(", {0}")
         put(" };")
 
         putl("symbol_id_t PRODUCTION_ID_TO_SYMBOL[N_PRODUCTIONS] = { 0")
diff --git a/examples/expr.c b/examples/expr.c
new file mode 100644
index 0000000..68c68b0
--- /dev/null
+++ b/examples/expr.c
@@ -0,0 +1 @@
+x + 5 == 10
diff --git a/examples/simple.c b/examples/simple.c
index a8110a7..4d3139c 100644
--- a/examples/simple.c
+++ b/examples/simple.c
@@ -1,3 +1,5 @@
-if (x + 5 == 10) {
-    x += 2;
-}
+if (1)
+    if (1)
+        1;
+    else
+        1;
diff --git a/examples/test.py b/examples/test.py
index bf8c6d2..e5783c5 100644
--- a/examples/test.py
+++ b/examples/test.py
@@ -1,4 +1,14 @@
 import earlpy
-p = earlpy.Parser("grammars/expression")
-node = p.parse_string("1 + 1 + 2 + 3")
-print(node.pprint())
+
+if False:
+    p = earlpy.Parser("grammars/expression")
+    node = p.parse_string("1 + 1 + 2 + 3")
+    print(node.pprint())
+elif True:
+    p = earlpy.Parser("grammars/c")
+    node = p.parse_file("examples/simple.c")
+    print(node.pprint())
+else:
+    p = earlpy.Parser("grammars/c")
+    node = p.parse_file("examples/expr.c")
+    print(node.pprint())
diff --git a/grammars/c/disambiguate.c b/grammars/c/disambiguate.c
index 9a8bf08..a2d69d1 100644
--- a/grammars/c/disambiguate.c
+++ b/grammars/c/disambiguate.c
@@ -1,8 +1,8 @@
 int disambiguator(struct state *old, struct state *new) {
-    // printf("Old tree: ");
-    // print_parse_tree2(old);
-    // printf("New tree: ");
-    // print_parse_tree2(new);
+    // printf("Old tree:\n");
+    // print_parse_tree(old, 4);
+    // printf("New tree:\n");
+    // print_parse_tree(new, 4);
 
     if (old->start_idx != new->start_idx) {
         // printf("\t\tIGNORING "); print_parse_tree2(old);
@@ -44,6 +44,7 @@ int disambiguator(struct state *old, struct state *new) {
             }
         }
     }
-    printf("TOTALLY UNKNOWN!\n");
+
+    fprintf(stderr, "TOTALLY UNKNOWN!\n");
     return 2;
 }
diff --git a/grammars/c/grammar.txt b/grammars/c/grammar.txt
index 486f319..7959318 100644
--- a/grammars/c/grammar.txt
+++ b/grammars/c/grammar.txt
@@ -7,7 +7,7 @@ IDENT regex
 INT regex
     [0-9]+
 
-OPS list
+OP list
     ( ) { } [ ] 
     ; ,
     - + ! % * & / << >> ^ |
@@ -15,3 +15,44 @@ OPS list
     && || ++ --
     < <= > >= =
     . -> ? :
+
+EXPR nonterm
+    INT
+    IDENT
+    EXPR --
+    EXPR ++
+    EXPR OP EXPR
+    EXPR ? EXPR : EXPR
+
+IF nonterm
+    if ( EXPR ) BLOCK
+    if ( EXPR ) BLOCK else BLOCK
+
+WHILE nonterm
+    while ( EXPR ) BLOCK
+
+DO nonterm
+    do BLOCK while ( EXPR )
+
+FOR nonterm
+    for ( EXPR ; EXPR ; EXPR ) BLOCK
+
+SWITCH nonterm
+    switch ( EXPR ) BLOCK
+
+STMT nonterm
+    IF
+    WHILE
+    DO
+    FOR
+    SWITCH
+    EXPR ;
+
+STMTS nonterm .start
+    STMT
+    STMT STMTS
+
+BLOCK nonterm
+    { }
+    { STMTS }
+    STMT
author	Matthew Sotoudeh <matthew@masot.net>	2024-02-19 03:42:42 -0800
committer	Matthew Sotoudeh <matthew@masot.net>	2024-02-19 03:42:42 -0800
commit	e133f250761c67b4465181f41909e78c272901d3 (patch)
tree	fc75f55dd2a3f4dddc589a8c259f887e90ef3a1e
parent	ffc6388571004b17e3a3dee2511ec99076ee803a (diff)