summaryrefslogtreecommitdiff
path: root/earlpy/earlpy.py
diff options
context:
space:
mode:
authorMatthew Sotoudeh <matthew@masot.net>2024-02-19 16:41:13 -0800
committerMatthew Sotoudeh <matthew@masot.net>2024-02-19 16:41:13 -0800
commit26a42b4a7ba077659f791208a2a7989bfdfb3663 (patch)
tree02069692c4d629d3108bbed43c4eb6eddfd2fbc7 /earlpy/earlpy.py
parente133f250761c67b4465181f41909e78c272901d3 (diff)
playing with the C grammar
Diffstat (limited to 'earlpy/earlpy.py')
-rw-r--r--earlpy/earlpy.py52
1 files changed, 35 insertions, 17 deletions
diff --git a/earlpy/earlpy.py b/earlpy/earlpy.py
index 7fbf0f0..3b0deab 100644
--- a/earlpy/earlpy.py
+++ b/earlpy/earlpy.py
@@ -13,7 +13,8 @@ class Parser:
parser_dir = parser_dir
files = sorted([f"{parser_dir}/grammar.txt",
*glob(f"{parser_dir}/*.c"),
- f"{DIR}/parser.c"])
+ f"{DIR}/parser.c",
+ __file__])
if f"{parser_dir}/parser.c" in files:
files.remove(f"{parser_dir}/parser.c")
hashes = ' '.join(
@@ -110,17 +111,10 @@ class Parser:
ordered_symbols = []
last_symbol = None
for line in grammar:
- if line[0] in ' \t':
- if last_symbol.kind == "list":
- last_symbol.contents.extend(line.split())
- elif last_symbol.kind == "regex":
- assert not last_symbol.contents
- last_symbol.contents = line.strip()
- elif last_symbol.kind == "nonterm":
- last_symbol.contents.append(line.split())
- else: raise NotImplementedError
- elif line.strip().startswith("#"):
+ if line.strip().startswith("#"):
continue
+ elif line[0] in ' \t':
+ last_symbol.process_subline(line.strip())
elif line.strip():
last_symbol = Symbol(line)
self.name_to_symbol[last_symbol.name] = last_symbol
@@ -179,6 +173,7 @@ class Parser:
symbol.kind = "nonterm"
symbol.contents = new_rule
+ symbol.production_names = [None for _ in new_rule]
symbol.is_pseudo_node = True
new_ordered_symbols.append(symbol)
ordered_symbols = new_ordered_symbols
@@ -232,27 +227,31 @@ class Parser:
putl("prod_id_t SYMBOL_ID_TO_PRODUCTION_IDS[N_SYMBOLS][MAX_N_PRODUCTIONS] = { {0}")
# [(production, Symbol), ...]
- self.productions = [([], None)]
+ self.productions = [([], None, None)]
for symbol in ordered_symbols:
if symbol.kind == "nonterm":
start_idx = len(self.productions)
assert isinstance(symbol.contents[0], list)
- for rule in symbol.contents:
+ for i, rule in enumerate(symbol.contents):
rule = [self.name_to_symbol[x] for x in rule]
- self.productions.append((rule, symbol))
+ self.productions.append((rule, symbol, symbol.production_names[i]))
prods = ', '.join(map(str, range(start_idx, len(self.productions))))
if prods:
put(", {" + prods + ", 0}")
else:
put(", {0}")
else:
- self.productions.append(([], symbol))
+ self.productions.append(([], symbol, None))
put(", {0}")
put(" };")
putl(f"#define N_PRODUCTIONS {len(self.productions)}")
+ for i, (_, _, name) in enumerate(self.productions):
+ if name:
+ putl(f"#define PRODUCTION_{name} {i}")
+
putl("symbol_id_t PRODUCTION_ID_TO_PRODUCTION[N_PRODUCTIONS][MAX_PRODUCTION_LEN] = { {0}")
- for i, (production, _) in enumerate(self.productions):
+ for i, (production, _, _) in enumerate(self.productions):
if i == 0: continue
production = ', '.join(str(symbol.id) for symbol in production)
if production:
@@ -262,7 +261,7 @@ class Parser:
put(" };")
putl("symbol_id_t PRODUCTION_ID_TO_SYMBOL[N_PRODUCTIONS] = { 0")
- for i, (_, symbol) in enumerate(self.productions):
+ for i, (_, symbol, _) in enumerate(self.productions):
if i != 0: put(f", {symbol.id}")
put(" };")
@@ -294,9 +293,28 @@ class Symbol:
self.kind = parts[1]
self.is_start = ".start" in parts[2:]
self.contents = []
+ self.production_names = []
self.id = None
self.is_pseudo_node = False
+ def process_subline(self, line):
+ if self.kind == "list":
+ self.contents.extend(line.split())
+ elif self.kind == "regex":
+ assert not self.contents
+ self.contents = line.strip()
+ elif self.kind == "nonterm":
+ self.contents.append(line.split())
+ self.production_names.append(None)
+ for i, part in enumerate(self.contents[-1]):
+ if part.startswith("."):
+ args = self.contents[-1][i:]
+ self.contents[-1] = self.contents[-1][:i]
+ for arg, value in zip(args[::2], args[1::2]):
+ if arg == ".name":
+ self.production_names[-1] = value
+ else: raise NotImplementedError
+
class Node:
def __init__(self, symbol, production):
self.symbol = symbol
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback