diff options
author | Matthew Sotoudeh <matthew@masot.net> | 2024-03-19 12:48:52 -0700 |
---|---|---|
committer | Matthew Sotoudeh <matthew@masot.net> | 2024-03-19 12:48:52 -0700 |
commit | bcd0394801eee9bc1cdd1535cd25c5b98a20023c (patch) | |
tree | 5b818fd82ff438e96b80888bb06f15654d4c2191 | |
parent | 252ed6aa49514427dcc579a318ddc19fe0680401 (diff) |
support ?
-rw-r--r-- | parsers.py | 24 |
1 files changed, 13 insertions, 11 deletions
@@ -97,19 +97,21 @@ def _regex_to_ast(tokens): if '|' in tokens: i = tokens.index('|') return ('OR', _regex_to_ast(tokens[:i]), _regex_to_ast(tokens[i+1:])) - if tokens[-1] == '*': - if len(tokens) == 2: - return ('KLEENE', _regex_to_ast(tokens[:-1])) - else: - return ('CONCAT', _regex_to_ast(tokens[:-2]), - ('KLEENE', _regex_to_ast(tokens[-2:-1]))) - if tokens[-1] == '+': + + if len(tokens) == 2 and tokens[-1] in ("*", "+", "?"): arg_ast = _regex_to_ast(tokens[:-1]) - if len(tokens) == 2: + if tokens[-1] == '*': + return ('KLEENE', arg_ast) + elif tokens[-1] == '+': return ('CONCAT', arg_ast, ('KLEENE', arg_ast)) + elif tokens[-1] == "?": + return ('OR', _regex_to_ast([EPSILON]), arg_ast) else: - return ('CONCAT', _regex_to_ast(tokens[:-2]), - ('CONCAT', arg_ast, ('KLEENE', _regex_to_ast(tokens[-2:-1])))) + print(tokens) + raise NotImplementedError + + if tokens[-1] in ("*", "?", "+"): + return ('CONCAT', _regex_to_ast(tokens[:-2]), _regex_to_ast(tokens[-2:])) return ('CONCAT', _regex_to_ast(tokens[:-1]), _regex_to_ast(tokens[-1:])) def match_parens(tokens): @@ -131,7 +133,7 @@ def lex_regex(string): while string: if string[0].isspace(): string = string[1:] - elif string[0] in ("(", ")", "|", "*", "+") + ALPHABET: + elif string[0] in ("(", ")", "|", "*", "+", "?") + ALPHABET: tokens.append(string[0]) string = string[1:] elif string[0] == "E": |