summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Sotoudeh <matthew@masot.net>2024-03-20 14:11:51 -0700
committerMatthew Sotoudeh <matthew@masot.net>2024-03-20 14:11:51 -0700
commit90fa8c8f84ffa25b26733edc196e66ec329e7924 (patch)
treebac0edc3829f609d199640ad13dad221749b8d9f
parent22bf0d39d5fb4c8ca532639ed6f84cdc8ad0673f (diff)
minimizing dfas
-rw-r--r--minimize.py15
-rw-r--r--nfa.py107
2 files changed, 120 insertions, 2 deletions
diff --git a/minimize.py b/minimize.py
new file mode 100644
index 0000000..5cb1b18
--- /dev/null
+++ b/minimize.py
@@ -0,0 +1,15 @@
+import sys
+from parsers import *
+
+arg1 = sys.argv[1]
+if arg1.endswith(".nfa"):
+ arg1 = parse_nfa(arg1)
+elif arg1.endswith(".regex"):
+ arg1 = parse_regex(arg1)
+else: raise NotImplementedError
+
+dfa = arg1.to_min_dfa()
+dfa.check_true_dfa()
+assert dfa.equivalent_to(arg1)
+dfa.to_gv("min.gv")
+print(len(dfa.states), "states")
diff --git a/nfa.py b/nfa.py
index 73e3733..8dfe6f1 100644
--- a/nfa.py
+++ b/nfa.py
@@ -1,3 +1,4 @@
+import copy
import itertools
from collections import defaultdict
@@ -41,8 +42,8 @@ class NFA:
return frozenset(result)
def equivalent_to(self, other):
- self = self.no_epsilons()
- other = other.no_epsilons()
+ self = self.copy().no_epsilons()
+ other = other.copy().no_epsilons()
def is_ok(pair):
self_set, other_set = pair
@@ -78,6 +79,7 @@ class NFA:
last_step_pairs = next_step_pairs
def no_epsilons(self):
+ self = self.copy()
epsilon_reachable = {
k: {k} | trans[EPSILON]
for k, trans in self.transitions.items()
@@ -115,8 +117,109 @@ class NFA:
new.accepting = new_accepting
return new
+ ### for fun
+ def to_dfa(self):
+ self = self.no_epsilons()
+
+ initial_set = self.init_set()
+ set_to_prefix = {initial_set: EPSILON}
+ last_step_sets = [initial_set]
+ while True:
+ next_step_sets = []
+ for state_set in last_step_sets:
+ prefix = set_to_prefix[state_set]
+ for c in ALPHABET:
+ next_prefix = prefix + c
+ next_set = self.transition_set(state_set, c)
+ if next_set in set_to_prefix:
+ continue
+ set_to_prefix[next_set] = next_prefix
+ next_step_sets.append(next_set)
+ if not next_step_sets:
+ break
+ last_step_sets = next_step_sets
+
+ prefix_to_set = {p: s for s, p in set_to_prefix.items()}
+
+ dfa = NFA()
+ for prefix, state_set in prefix_to_set.items():
+ dfa.add_state(prefix, prefix == EPSILON, self.is_accepted(state_set))
+ for prefix, state_set in prefix_to_set.items():
+ for c in ALPHABET:
+ next_state_set = self.transition_set(state_set, c)
+ next_prefix = set_to_prefix[next_state_set]
+ dfa.transitions[prefix][c] = {next_prefix}
+ for table in dfa.transitions.values():
+ if EPSILON in table:
+ table.pop(EPSILON)
+ assert dfa.equivalent_to(self)
+ dfa.check_true_dfa()
+ return dfa
+
+ def to_min_dfa(self):
+ self = self.to_dfa()
+ self.check_true_dfa()
+ fixedpoint = False
+ while not fixedpoint:
+ fixedpoint = True
+ for a, b in itertools.combinations(list(self.transitions.keys()), 2):
+ if a < b: a, b = b, a
+ attempt = self.merge_states(a, b)
+ if attempt.equivalent_to(self)[0]:
+ self = attempt
+ fixedpoint = False
+ return self
+
+ def merge_states(self, kill, keep):
+ self = self.copy()
+ if kill in self.start_set:
+ self.start_set.remove(kill)
+ self.start_set.add(keep)
+ if kill in self.accepting:
+ self.accepting.remove(kill)
+ self.accepting.add(keep)
+ for _, table in self.transitions.items():
+ for _, dsts in table.items():
+ if kill in dsts:
+ dsts.remove(kill)
+ dsts.add(keep)
+ if kill in self.transitions:
+ self.transitions.pop(kill)
+ return self
+
+ def check_true_dfa(self):
+ for state, transitions in self.transitions.items():
+ assert len(transitions) == len(ALPHABET)
+ for k, vs in transitions.items():
+ assert len(vs) == 1
+ assert min(vs) in self.transitions
+ assert len(self.start_set) == 1
+ assert min(self.start_set) in self.transitions
+
+ def copy(self):
+ return copy.deepcopy(self)
+
+ def is_accepted(self, state_set):
+ return set(state_set & self.accepting)
+
def pprint(self):
for state, transitions in self.transitions.items():
print(state, state in self.start_set, state in self.accepting)
for k, vs in transitions.items():
print("\t", k, "->", vs)
+
+ def to_gv(self, path):
+ def rename(state):
+ if state == '': return "ε"
+ return state
+ with open(path, "w") as f:
+ f.write("digraph mygraph {\n")
+ for state, transitions in self.transitions.items():
+ if state in self.accepting:
+ f.write(f"node [shape = doublecircle]; {rename(state)};")
+ else:
+ f.write(f"node [shape = circle]; {rename(state)};")
+ for k, vs in transitions.items():
+ for v in vs:
+ f.write(f"{rename(state)} -> {rename(v)} [label=\"{k}\"]\n")
+ f.write("}\n")
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback