I am trying to create a multi-level nested dictionary from a pandas dataframe - In the below example I want to retrieve for every postal code, the sum of salary for each sex and age combination.
The output must be a dictionary as presented in the Expected output comment.
from typing import NamedTuple, Sequence, Tuple
import pandas as pd
data = [
["tom", 22, "ab 11", "M", 5555],
["Rob", 22, "ab 11", "M", 9999],
["nick", 33, "ab 22", "M", 3333],
["juli", 18, "ab 11", "F", 2222],
]
people = pd.DataFrame(data, columns=["Name", "Age", "PostalCode", "Sex", "Salary"])
d = (
people.groupby(["PostalCode", "Sex", "Age"])["Salary"]
.apply(sum)
.to_dict()
)
print(d)
# Expected output
print({"ab 11": {("M", 22): 15554, ("F", 18): 2222}, "ab 22": {("M", 33): 3333}})