I don't think you can do better than looping through the rows of the dataframe. That is, I don't see a way to vectorize this process. Also, if the number of levels can vary within the same dataframe, then the update function should be modified to handle nan entries (e.g. adding and not np.isnan(row[1]) to if len(row) > 1).
That said, I believe that the following script should be satisfactory.
import pandas as pd
region = ['A','A','A','B','B','B']
sub_region = ['1','2','2','3','3','4']
state = ['a','b','c','d','e','f']
df = pd.DataFrame({"region":region,"sub_region":sub_region,"state":state})
ls = []
def update(row,ls):
for d in ls:
if d['name'] == row[0]:
break
else:
ls.append({'name':row[0]})
d = ls[-1]
if len(row) > 1:
if not 'children' in d:
d['children'] = []
update(row[1:],d['children'])
for _,r in df.iterrows():
update(r,ls)
print(ls)
The resulting list ls:
[{'name': 'A',
'children': [{'name': '1', 'children': [{'name': 'a'}]},
{'name': '2', 'children': [{'name': 'b'}, {'name': 'c'}]}]},
{'name': 'B',
'children': [{'name': '3', 'children': [{'name': 'd'}, {'name': 'e'}]},
{'name': '4', 'children': [{'name': 'f'}]}]}]
Here's a version where childless children have 'children':[] in their dict, which I find a bit more natural.
import pandas as pd
region = ['A','A','A','B','B','B']
sub_region = ['1','2','2','3','3','4']
state = ['a','b','c','d','e','f']
df = pd.DataFrame({"region":region,"sub_region":sub_region,"state":state})
ls = []
def update(row,ls):
if len(row) == 0:
return
for d in ls:
if d['name'] == row[0]:
break
else:
ls.append({'name':row[0], 'children':[]})
d = ls[-1]
update(row[1:],d['children'])
for _,r in df.iterrows():
update(r,ls)
print(ls)
The resulting list ls:
[{'name': 'A',
'children': [{'name': '1', 'children': [{'name': 'a', 'children': []}]},
{'name': '2',
'children': [{'name': 'b', 'children': []},
{'name': 'c', 'children': []}]}]},
{'name': 'B',
'children': [{'name': '3',
'children': [{'name': 'd', 'children': []},
{'name': 'e', 'children': []}]},
{'name': '4', 'children': [{'name': 'f', 'children': []}]}]}]