summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Sotoudeh <matthew@masot.net>2024-03-19 11:35:48 -0700
committerMatthew Sotoudeh <matthew@masot.net>2024-03-19 11:35:48 -0700
commitfc8c6ea0b748f208f8f4bec5c51eed4381f57341 (patch)
tree5a5c9e418e1f004427402c551549b5e6ca0c2236
parent5214650e55e52e652f256316e2937274495fd35b (diff)
try to get the regex precedence correct
-rw-r--r--README.txt33
-rw-r--r--examples/precedence_correct.regex1
-rw-r--r--parsers.py6
3 files changed, 39 insertions, 1 deletions
diff --git a/README.txt b/README.txt
new file mode 100644
index 0000000..c9ebee5
--- /dev/null
+++ b/README.txt
@@ -0,0 +1,33 @@
+===== REGEQ
+Equivalence checker for NFAs/DFAs/RegEx
+
+To run:
+
+ python3 checker.py foo.[regex|nfa] bar.[regex|nfa]
+
+The alphabet is hard-coded to { 'c', 'h', 'p' }
+
+===== NFA Description
+See examples/correct.nfa for an example. Indentation matters. Filename must end
+in '.nfa'. Each chunk looks like:
+
+ top2
+ c top2
+ eps top3
+
+which means there is a state 'top2' that can transition to top2 upon seeing a
+'c', and also has an epsilon transition to top3.
+
+===== RegEx Description
+See examples/correct.regex for an example. Indentation does not matter.
+Filename must end in '.regex'. Should look like:
+
+ ((S*)c(S*)c)|((S*)h(S*)h)|((S*)p(S*)p)
+
+The following special symbols can be used, beyond the standard '()*|':
+
+ - `S` stands for \Sigma, which is shorthand for (c|h|p)
+ - `E` stands for \epsilon, the empty string
+
+I have *tried* to match the precdence from the slides, but not certain. When in
+doubt, please do use parentheses to disambiguate!
diff --git a/examples/precedence_correct.regex b/examples/precedence_correct.regex
new file mode 100644
index 0000000..84b3274
--- /dev/null
+++ b/examples/precedence_correct.regex
@@ -0,0 +1 @@
+S*cS*c|S*hS*h|S*pS*p
diff --git a/parsers.py b/parsers.py
index 65c2614..8427137 100644
--- a/parsers.py
+++ b/parsers.py
@@ -98,7 +98,11 @@ def _regex_to_ast(tokens):
i = tokens.index('|')
return ('OR', _regex_to_ast(tokens[:i]), _regex_to_ast(tokens[i+1:]))
if tokens[-1] == '*':
- return ('KLEENE', _regex_to_ast(tokens[:-1]))
+ if len(tokens) == 2:
+ return ('KLEENE', _regex_to_ast(tokens[:-1]))
+ else:
+ return ('CONCAT', _regex_to_ast(tokens[:-2]),
+ ('KLEENE', _regex_to_ast(tokens[-2:-1])))
return ('CONCAT', _regex_to_ast(tokens[:-1]), _regex_to_ast(tokens[-1:]))
def match_parens(tokens):
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback