I an writing a simple command interpreter for a project to allow the user to interact with a virtual world. In the commands the user can refer to objects by any number of different names, and the objects can appear and disappear. Also, to be nice to the user I am allowing some typos.
To figure out which object the user is referring to, I am using the Python regex module to do fuzzy matching. However, when there is a typo, the matched group no longer has the same name as the dictionary I am using to look up the actual object by its name, and the lookup fails.
Pared down test case:
# test.py
from dataclasses import dataclass, field
import regex
@dataclass
class Command:
phrase: str
arguments: list[str]
argtypes: list[type]
pattern: str = field(init=False)
def __post_init__(self):
assert (len(set(self.arguments)) == len(self.argtypes))
def fp(match):
if (n := match.group(0)) in self.arguments:
return rf"(?:(?P<{n}>\b\L<{n}>\b)){{e<=5}}"
return rf"(?:\b{regex.escape(n)}\b){{e<=3}}"
munged = regex.sub(r"\b(\w+)\b", fp, self.phrase)
munged = regex.sub(r"\s+", r"\\s+", munged)
self.pattern = munged
def match(self, string: str,
candidate_captures: list) -> tuple[list[str] | None, int | None]:
"""Match self on the string, return the matched objects and the
number of errors, or None, None if no match"""
# assemble the candidates dict
options = {x: [] for x in self.arguments}
unmap = {}
for c in candidate_captures:
for a, t in zip(self.arguments, self.argtypes):
if isinstance(c, t):
s = str(c)
options[a].append(s)
unmap[s] = c
match: regex.Match = regex.search(self.pattern, string,
**options, flags=regex.BESTMATCH)
if match:
return ([unmap[match.group(g)] for g in self.arguments], #####<<<<<<<<
sum(match.fuzzy_counts))
return None, None
>>> from test import Command
>>> x = Command("bar X foo Y", ["X", "Y"], [int, float])
>>> x
Command(phrase='bar X foo Y', arguments=['X', 'Y'], argtypes=[<class 'int'>, <class 'float'>], pattern='(?:\\bbar\\b){e<=3}\\s+(?:(?P<X>\\b\\L<X>\\b)){e<=5}\\s+(?:\\bfoo\\b){e<=3}\\s+(?:(?P<Y>\\b\\L<Y>\\b)){e<=5}')
>>> x.match("baar 12 foo 345", [12, 34.5, 17, 65.9])
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "test.py", line 39, in match
return ([unmap[match.group(g)] for g in self.arguments], #####<<<<<<<<
~~~~~^^^^^^^^^^^^^^^^
KeyError: '345'
>>> # this is because it fuzzymatched "34.5"
>>> # how do I get the "34.5" out again?
What I am looking to do is, on the marked line that errors, to get the original, unfuzzied text, instead of the actual fuzzy text. I can't figure out how to do this.
remodule?