12

My question is essentially the opposite of this one:

Create a Pandas DataFrame from deeply nested JSON

I'm wondering if it's possible to do the reverse. Given a table like:

     Library  Level           School Major  2013 Total
200  MS_AVERY  UGRAD  GENERAL STUDIES  GEST        5079
201  MS_AVERY  UGRAD  GENERAL STUDIES  HIST           5
202  MS_AVERY  UGRAD  GENERAL STUDIES  MELC           2
203  MS_AVERY  UGRAD  GENERAL STUDIES  PHIL          10
204  MS_AVERY  UGRAD  GENERAL STUDIES  PHYS           1
205  MS_AVERY  UGRAD  GENERAL STUDIES  POLS          53

Is it possible to generate a nested dict (or JSON) like:

dict:

{'MS_AVERY': 
    { 'UGRAD' :
        {'GENERAL STUDIES' : {'GEST' : 5}
                             {'MELC' : 2}

 ...
1

4 Answers 4

9

It seems not hard to create a function will build the recursive dictionary given your DataFrame object:

def fdrec(df):
    drec = dict()
    ncols = df.values.shape[1]
    for line in df.values:
        d = drec
        for j, col in enumerate(line[:-1]):
            if not col in d.keys():
                if j != ncols-2:
                    d[col] = {}
                    d = d[col]
                else:
                    d[col] = line[-1]
            else:
                if j!= ncols-2:
                    d = d[col]
    return drec

which will produce:

{'MS_AVERY':
    {'UGRAD':
        {'GENERAL STUDIES': {'PHYS': 1L, 
                             'POLS': 53L,
                             'PHIL': 10L,
                             'HIST': 5L,
                             'MELC': 2L,
                             'GEST': 5079L}}}}
Sign up to request clarification or add additional context in comments.

2 Comments

thankyou for the response saullo. I was wondering if there was a built-in function that would do this, but this works great!
This is a lovely function, but for JSON there must be double quotes around all values.
1

Here's a solution I came up while working on this question:

def rollup_to_dict_core(x, values, columns, d_columns=None):
    if d_columns is None:
        d_columns = []

    if len(columns) == 1:
        if len(values) == 1:
            return x.set_index(columns)[values[0]].to_dict()
        else:
            return x.set_index(columns)[values].to_dict(orient='index')
    else:
        res = x.groupby([columns[0]] + d_columns).apply(lambda y: rollup_to_dict_core(y, values, columns[1:]))
        if len(d_columns) == 0:
            return res.to_dict()
        else:
            res.name = columns[1]
            res = res.reset_index(level=range(1, len(d_columns) + 1))
            return res.to_dict(orient='index')

def rollup_to_dict(x, values, d_columns=None):
    if d_columns is None:
        d_columns = []

    columns = [c for c in x.columns if c not in values and c not in d_columns]
    return rollup_to_dict_core(x, values, columns, d_columns)

>>> pprint(rollup_to_dict(df, ['2013 Total']))
{'MS_AVERY': {'UGRAD': {'GENERAL STUDIES': {'GEST': 5079,
                                            'HIST': 5,
                                            'MELC': 2,
                                            'PHIL': 10,
                                            'PHYS': 1,
                                            'POLS': 53}}}}

Comments

0
key = ['Library', 'Level', 'School']
series = (df.groupby(key, sort=False)[df.columns.difference(key)]
            .apply(lambda x: x[['Major', '2013 Total']].to_dict('records'))
         )

# build: {Major: Total}
major = {}
values = series.values[0]
for i in range(len(values)):
    major.update({values[i]['Major']: values[i]['2013 Total']})

# build the recursive dictionary
index = series.index[0]
d = {}
for i in reversed(range(len(index))):
    if not bool(d):
        d = {index[i]: major}
    else:
        d = {index[i]: d}
print(json.dumps(d, indent=2))

It will produce:

{
  "MS_AVERY": {
    "UGRAD": {
      "GENERAL STUDIES": {
        "GEST": 5079,
        "HIST": 5,
        "MELC": 2,
        "PHIL": 10,
        "PHYS": 1,
        "POLS": 53
      }
    }
  }
}

Comments

0

Here is a generic way to generate this format, might be what someone else is looking for. Desired format:

{ "data": 
   [
        {
            "NAME": [1, 2, 3]
        },
        {
            "NAME": [1, 2, 3]
        },
    ]
}

To get that:

import json
jsonstr = '{"data":['
for (columnName, columnData) in df.iteritems():
    jsonstr+='{"'
    jsonstr+=columnName
    jsonstr+='":'
    jsonstr+=json.dumps(list(columnData.values))
    jsonstr+='},'
jsonstr = jsonstr[:-1]
jsonstr+=']}'
jsonobject = json.loads(jsonstr)
jsonobject

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.