From 26a42b4a7ba077659f791208a2a7989bfdfb3663 Mon Sep 17 00:00:00 2001
From: Matthew Sotoudeh <matthew@masot.net>
Date: Mon, 19 Feb 2024 16:41:13 -0800
Subject: playing with the C grammar

---
 earlpy/earlpy.py | 52 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 17 deletions(-)

(limited to 'earlpy/earlpy.py')

diff --git a/earlpy/earlpy.py b/earlpy/earlpy.py
index 7fbf0f0..3b0deab 100644
--- a/earlpy/earlpy.py
+++ b/earlpy/earlpy.py
@@ -13,7 +13,8 @@ class Parser:
         parser_dir = parser_dir
         files = sorted([f"{parser_dir}/grammar.txt",
                         *glob(f"{parser_dir}/*.c"),
-                        f"{DIR}/parser.c"])
+                        f"{DIR}/parser.c",
+                        __file__])
         if f"{parser_dir}/parser.c" in files:
             files.remove(f"{parser_dir}/parser.c")
         hashes = ' '.join(
@@ -110,17 +111,10 @@ class Parser:
         ordered_symbols = []
         last_symbol = None
         for line in grammar:
-            if line[0] in ' \t':
-                if last_symbol.kind == "list":
-                    last_symbol.contents.extend(line.split())
-                elif last_symbol.kind == "regex":
-                    assert not last_symbol.contents
-                    last_symbol.contents = line.strip()
-                elif last_symbol.kind == "nonterm":
-                    last_symbol.contents.append(line.split())
-                else: raise NotImplementedError
-            elif line.strip().startswith("#"):
+            if line.strip().startswith("#"):
                 continue
+            elif line[0] in ' \t':
+                last_symbol.process_subline(line.strip())
             elif line.strip():
                 last_symbol = Symbol(line)
                 self.name_to_symbol[last_symbol.name] = last_symbol
@@ -179,6 +173,7 @@ class Parser:
 
             symbol.kind = "nonterm"
             symbol.contents = new_rule
+            symbol.production_names = [None for _ in new_rule]
             symbol.is_pseudo_node = True
             new_ordered_symbols.append(symbol)
         ordered_symbols = new_ordered_symbols
@@ -232,27 +227,31 @@ class Parser:
 
         putl("prod_id_t SYMBOL_ID_TO_PRODUCTION_IDS[N_SYMBOLS][MAX_N_PRODUCTIONS] = { {0}")
         # [(production, Symbol), ...]
-        self.productions = [([], None)]
+        self.productions = [([], None, None)]
         for symbol in ordered_symbols:
             if symbol.kind == "nonterm":
                 start_idx = len(self.productions)
                 assert isinstance(symbol.contents[0], list)
-                for rule in symbol.contents:
+                for i, rule in enumerate(symbol.contents):
                     rule = [self.name_to_symbol[x] for x in rule]
-                    self.productions.append((rule, symbol))
+                    self.productions.append((rule, symbol, symbol.production_names[i]))
                 prods = ', '.join(map(str, range(start_idx, len(self.productions))))
                 if prods:
                     put(", {" + prods + ", 0}")
                 else:
                     put(", {0}")
             else:
-                self.productions.append(([], symbol))
+                self.productions.append(([], symbol, None))
                 put(", {0}")
         put(" };")
         putl(f"#define N_PRODUCTIONS {len(self.productions)}")
 
+        for i, (_, _, name) in enumerate(self.productions):
+            if name:
+                putl(f"#define PRODUCTION_{name} {i}")
+
         putl("symbol_id_t PRODUCTION_ID_TO_PRODUCTION[N_PRODUCTIONS][MAX_PRODUCTION_LEN] = { {0}")
-        for i, (production, _) in enumerate(self.productions):
+        for i, (production, _, _) in enumerate(self.productions):
             if i == 0: continue
             production = ', '.join(str(symbol.id) for symbol in production)
             if production:
@@ -262,7 +261,7 @@ class Parser:
         put(" };")
 
         putl("symbol_id_t PRODUCTION_ID_TO_SYMBOL[N_PRODUCTIONS] = { 0")
-        for i, (_, symbol) in enumerate(self.productions):
+        for i, (_, symbol, _) in enumerate(self.productions):
             if i != 0: put(f", {symbol.id}")
         put(" };")
 
@@ -294,9 +293,28 @@ class Symbol:
         self.kind = parts[1]
         self.is_start = ".start" in parts[2:]
         self.contents = []
+        self.production_names = []
         self.id = None
         self.is_pseudo_node = False
 
+    def process_subline(self, line):
+        if self.kind == "list":
+            self.contents.extend(line.split())
+        elif self.kind == "regex":
+            assert not self.contents
+            self.contents = line.strip()
+        elif self.kind == "nonterm":
+            self.contents.append(line.split())
+            self.production_names.append(None)
+            for i, part in enumerate(self.contents[-1]):
+                if part.startswith("."):
+                    args = self.contents[-1][i:]
+                    self.contents[-1] = self.contents[-1][:i]
+                    for arg, value in zip(args[::2], args[1::2]):
+                        if arg == ".name":
+                            self.production_names[-1] = value
+        else: raise NotImplementedError
+
 class Node:
     def __init__(self, symbol, production):
         self.symbol = symbol
-- 
cgit v1.2.3