I wanted to have something more convenient to work with nested dictionaries that come from yaml files so I wrote this DictIndexter. Its __getitem__ and __setitem__ accept a path instead of a single key. Additionally the getter can also take a tuple with path and default value. The setter on the other hand is able to create new branches, but only when it's a clean branch. This is a branch assigned to a non-existing key. It wont overwrite another value.
Implementation
class DictIndexer:
def __init__(self, source: Dict[str, Any], set_creates_branches: bool = True):
self.source = source
self.set_creates_branches = set_creates_branches
def __getitem__(self, path: str | Tuple[str, Any]) -> Any | None:
path, default = path if isinstance(path, tuple) else (path, None)
item = self.source
for key in DictIndexer._split(path):
item = item.get(key, None)
if item is None:
return default
else:
return item
def __setitem__(self, path: str, value: Any):
class Missing:
pass
names = DictIndexer._split(path)
prev = self.source
item = self.source
for key in names[:-1]:
item = item.get(key, Missing())
match item:
case dict():
prev = item
case Missing():
if self.set_creates_branches:
prev[key] = {}
else:
raise KeyError(f"Cannot create branch '{key}. Disabled")
case _:
raise KeyError(f"Cannot overwrite '{key}.")
else:
item[names[-1]] = value
@staticmethod
def _split(path: str) -> List[str]:
return re.split(r"[/\\]", path)
Tests
With pytest I made sure that the following features work:
def test_can_get_item_by_path():
data = {
"foo": {
"bar": "baz"
}
}
assert DictIndexer(data)["foo/bar"] == "baz"
def test_gets_null_when_last_name_does_not_exist():
data = {
"foo": {
"bar": "baz"
}
}
assert DictIndexer(data)["foo/baz"] is None
def test_gets_null_or_default_when_middle_name_does_not_exist():
data = {
"foo": {
"bar": {
"baz": "baz"
}
}
}
assert DictIndexer(data)["foo/baz/baz"] is None
assert DictIndexer(data)[("foo/baz/baz", "qux")] == "qux"
def test_can_set_item():
data = {
"foo": {
"bar": {
"baz": "baz"
}
}
}
DictIndexer(data)["foo/bar/baz"] = "qux"
assert DictIndexer(data)["foo/bar/baz"] == "qux"
def test_can_set_item_on_new_branches():
data = {
"foo": {
"bar": {
"baz": {
"foo": "bar"
}
}
}
}
DictIndexer(data)["foo/bar/baz/qux"] = "qux"
assert DictIndexer(data)["foo/bar/baz/qux"] == "qux"
def test_does_not_create_new_branches_when_disabled():
data = {
"foo": {
"bar": {
}
}
}
with pytest.raises(KeyError):
DictIndexer(data, set_creates_branches=False)["foo/bar/baz/qux"] = "qux"
def test_does_not_overwrite_value_with_dict():
data = {
"foo": {
"bar": {
"baz": "baz"
}
}
}
with pytest.raises(KeyError):
DictIndexer(data)["foo/bar/baz/qux"] = "qux"
Example
I currently use it to customize the logger cofiguration:
def _initialize_logging():
import logging.config
config = DictIndexer(assets.cfg("wiretap"))
config["formatters/auto/()"] = wiretap.MultiFormatter
config["formatters/auto/./values"] = [f"v{app.VERSION}-{os.environ['mode'][0]}"]
config["handlers/file/filename"] = assets.home("log", f"{app.ID}.log")
config["handlers/sqlserver/insert"] = assets.sql("wiretap", "insert log.sql")
logging.config.dictConfig(config.source)
Why do you think? What would you improve?