diff options
author | Matthew Sotoudeh <matthew@masot.net> | 2024-03-20 14:11:51 -0700 |
---|---|---|
committer | Matthew Sotoudeh <matthew@masot.net> | 2024-03-20 14:11:51 -0700 |
commit | 90fa8c8f84ffa25b26733edc196e66ec329e7924 (patch) | |
tree | bac0edc3829f609d199640ad13dad221749b8d9f | |
parent | 22bf0d39d5fb4c8ca532639ed6f84cdc8ad0673f (diff) |
minimizing dfas
-rw-r--r-- | minimize.py | 15 | ||||
-rw-r--r-- | nfa.py | 107 |
2 files changed, 120 insertions, 2 deletions
diff --git a/minimize.py b/minimize.py new file mode 100644 index 0000000..5cb1b18 --- /dev/null +++ b/minimize.py @@ -0,0 +1,15 @@ +import sys +from parsers import * + +arg1 = sys.argv[1] +if arg1.endswith(".nfa"): + arg1 = parse_nfa(arg1) +elif arg1.endswith(".regex"): + arg1 = parse_regex(arg1) +else: raise NotImplementedError + +dfa = arg1.to_min_dfa() +dfa.check_true_dfa() +assert dfa.equivalent_to(arg1) +dfa.to_gv("min.gv") +print(len(dfa.states), "states") @@ -1,3 +1,4 @@ +import copy import itertools from collections import defaultdict @@ -41,8 +42,8 @@ class NFA: return frozenset(result) def equivalent_to(self, other): - self = self.no_epsilons() - other = other.no_epsilons() + self = self.copy().no_epsilons() + other = other.copy().no_epsilons() def is_ok(pair): self_set, other_set = pair @@ -78,6 +79,7 @@ class NFA: last_step_pairs = next_step_pairs def no_epsilons(self): + self = self.copy() epsilon_reachable = { k: {k} | trans[EPSILON] for k, trans in self.transitions.items() @@ -115,8 +117,109 @@ class NFA: new.accepting = new_accepting return new + ### for fun + def to_dfa(self): + self = self.no_epsilons() + + initial_set = self.init_set() + set_to_prefix = {initial_set: EPSILON} + last_step_sets = [initial_set] + while True: + next_step_sets = [] + for state_set in last_step_sets: + prefix = set_to_prefix[state_set] + for c in ALPHABET: + next_prefix = prefix + c + next_set = self.transition_set(state_set, c) + if next_set in set_to_prefix: + continue + set_to_prefix[next_set] = next_prefix + next_step_sets.append(next_set) + if not next_step_sets: + break + last_step_sets = next_step_sets + + prefix_to_set = {p: s for s, p in set_to_prefix.items()} + + dfa = NFA() + for prefix, state_set in prefix_to_set.items(): + dfa.add_state(prefix, prefix == EPSILON, self.is_accepted(state_set)) + for prefix, state_set in prefix_to_set.items(): + for c in ALPHABET: + next_state_set = self.transition_set(state_set, c) + next_prefix = set_to_prefix[next_state_set] + dfa.transitions[prefix][c] = {next_prefix} + for table in dfa.transitions.values(): + if EPSILON in table: + table.pop(EPSILON) + assert dfa.equivalent_to(self) + dfa.check_true_dfa() + return dfa + + def to_min_dfa(self): + self = self.to_dfa() + self.check_true_dfa() + fixedpoint = False + while not fixedpoint: + fixedpoint = True + for a, b in itertools.combinations(list(self.transitions.keys()), 2): + if a < b: a, b = b, a + attempt = self.merge_states(a, b) + if attempt.equivalent_to(self)[0]: + self = attempt + fixedpoint = False + return self + + def merge_states(self, kill, keep): + self = self.copy() + if kill in self.start_set: + self.start_set.remove(kill) + self.start_set.add(keep) + if kill in self.accepting: + self.accepting.remove(kill) + self.accepting.add(keep) + for _, table in self.transitions.items(): + for _, dsts in table.items(): + if kill in dsts: + dsts.remove(kill) + dsts.add(keep) + if kill in self.transitions: + self.transitions.pop(kill) + return self + + def check_true_dfa(self): + for state, transitions in self.transitions.items(): + assert len(transitions) == len(ALPHABET) + for k, vs in transitions.items(): + assert len(vs) == 1 + assert min(vs) in self.transitions + assert len(self.start_set) == 1 + assert min(self.start_set) in self.transitions + + def copy(self): + return copy.deepcopy(self) + + def is_accepted(self, state_set): + return set(state_set & self.accepting) + def pprint(self): for state, transitions in self.transitions.items(): print(state, state in self.start_set, state in self.accepting) for k, vs in transitions.items(): print("\t", k, "->", vs) + + def to_gv(self, path): + def rename(state): + if state == '': return "ε" + return state + with open(path, "w") as f: + f.write("digraph mygraph {\n") + for state, transitions in self.transitions.items(): + if state in self.accepting: + f.write(f"node [shape = doublecircle]; {rename(state)};") + else: + f.write(f"node [shape = circle]; {rename(state)};") + for k, vs in transitions.items(): + for v in vs: + f.write(f"{rename(state)} -> {rename(v)} [label=\"{k}\"]\n") + f.write("}\n") |