2

I have a JSON object that looks like this:

data = {'A': {'code': 'Ok',
  'tracepoints': [None,
   None,
   {'alternatives_count': 0,
    'location': [-122.419189, 37.753805],
    'distance': 28.078003,
    'hint': '5Qg7hUqpFQA2AAAAOgAAAAwAAAAPAAAAiVMWQq2VIEIAuABB7FgoQTYAAAA6AAAADAAAAA8AAAD4RAAACwi0-M0TQALvB7T4yRRAAgEAXwX5Wu6N',
    'name': '23rd Street',
    'matchings_index': 0,
    'waypoint_index': 0},
   {'alternatives_count': 0,
    'location': [-122.417866, 37.75389],
    'distance': 26.825184,
    'hint': 'K8w6BRinFYAdAAAACwAAAA0AAAAAAAAAIxmmQTSs6kCiuRFBAAAAAB0AAAALAAAADQAAAAAAAAD4RAAANg20-CIUQAJNDbT4MRNAAgIAnxD5Wu6N',
    'name': '23rd Street',
    'matchings_index': 0,
    'waypoint_index': 1},
   {'alternatives_count': 0,
    'location': [-122.416896, 37.75395],
    'distance': 16.583412,
    'hint': 'Jcw6BSzMOoUqAAAAQwAAABAAAAANAAAA0i_uQb3SOEKKPC9BG1EaQSoAAABDAAAAEAAAAA0AAAD4RAAAABG0-F4UQALyELT48xRAAgEAnxD5Wu6N',
    'name': '23rd Street',
    'matchings_index': 0,
    'waypoint_index': 2},
   {'alternatives_count': 7,
    'location': [-122.415502, 37.754028],
    'distance': 10.013916,
    'hint': 'Jsw6hbN6kQBmAAAACAAAABAAAAANAAAAQOKOQg89nkCKPC9BEMcOQWYAAAAIAAAAEAAAAA0AAAD4RAAAcha0-KwUQAJ6FrT4UhRAAgEAbwX5Wu6N',
    'name': '23rd Street',
    'matchings_index': 0,
    'waypoint_index': 3}],
  'matchings': [{'duration': 50.6,
    'distance': 325.2,
    'weight': 50.6,
    'geometry': 'y{h_gAh~znhF}@k[OmFMoFcAea@IeD[uMAYKsDMsDAe@}@u_@g@aTMwFMwFwAqq@',
    'confidence': 0.374625,
    'weight_name': 'routability',
    'legs': [{'steps': [],
      'weight': 18.8,
      'distance': 116.7,
      'annotation': {'nodes': [1974590926,
        4763953263,
        65359046,
        4763953265,
        5443374298,
        2007343352]},
      'summary': '',
      'duration': 18.8},
     {'steps': [],
      'weight': 12.2,
      'distance': 85.6,
      'annotation': {'nodes': [5443374298,
        2007343352,
        4763953266,
        65359043,
        4763953269,
        2007343354,
        4763953270]},
      'summary': '',
      'duration': 12.2},
     {'steps': [],
      'weight': 19.6,
      'distance': 122.9,
      'annotation': {'nodes': [2007343354,
        4763953270,
        65334199,
        4763953274,
        2007343347]},
      'summary': '',
      'duration': 19.6}]}]},
 'B': {'code': 'Ok',
  'tracepoints': [{'alternatives_count': 0,
    'location': [-122.387971, 37.727587],
    'distance': 11.53267,
    'hint': 'xHWRAEJ2kYALAAAArQAAAA4AAAAsAAAAnpH1QDVG8EJWgBdBa2v0QQsAAACtAAAADgAAACwAAAD4RAAA_YG0-GOtPwJKgrT4t60_AgIA3wf5Wu6N',
    'name': 'Underwood Avenue',
    'matchings_index': 0,
    'waypoint_index': 0},
   {'alternatives_count': 0,
    'location': [-122.388563, 37.727175],
    'distance': 13.565054,
    'hint': 'w3WRgBuxOgVPAAAACAAAABMAAAASAAAA7ONaQo4CrUDv7U1BJdFAQU8AAAAIAAAAEwAAABIAAAD4RAAArX-0-MerPwIsgLT4gqs_AgIAbw35Wu6N',
    'name': 'Jennings Street',
    'matchings_index': 0,
    'waypoint_index': 1},
   {'alternatives_count': 1,
    'location': [-122.388478, 37.725984],
    'distance': 9.601917,
    'hint': 't3WRABexOoWcAAAAbAAAABEAAAALAAAAdujYQqu4lUJXHD1B9-ruQJwAAABsAAAAEQAAAAsAAAD4RAAAAoC0-CCnPwJCgLT4Zqc_AgIAHxP5Wu6N',
    'name': 'Wallace Avenue',
    'matchings_index': 0,
    'waypoint_index': 2}],
  'matchings': [{'duration': 50,
    'distance': 270.4,
    'weight': 50,
    'geometry': 'euu}fAd_~lhFoAlCMTuAvCvC|Bh@`@hXbUnAdADBhDzCzClCXVzZnW\\X~CnC~@qBLWnWej@',
    'confidence': 1e-06,
    'weight_name': 'routability',
    'legs': [{'steps': [],
      'weight': 17.8,
      'distance': 84.8,
      'annotation': {'nodes': [5443147626,
        6360865540,
        6360865536,
        65307580,
        6360865535,
        6360865539,
        6360865531]},
      'summary': '',
      'duration': 17.8},
     {'steps': [],
      'weight': 32.2,
      'distance': 185.6,
      'annotation': {'nodes': [6360865539,
        6360865531,
        6360865525,
        65343521,
        6360865527,
        6360865529,
        6360865523,
        6360865520,
        65321110,
        6360865519,
        6360865522,
        6376329343]},
      'summary': '',
      'duration': 32.2}]}]},
 'C': {'code': 'Ok',
  'tracepoints': [None,
   None,
   {'alternatives_count': 0,
    'location': [-122.443682, 37.713254],
    'distance': 6.968076,
    'hint': 'QXo6hUR6OgUAAAAANQAAAAAAAAAkAAAAAAAAAOCMMUEAAAAA_Z1yQQAAAAAbAAAAAAAAACQAAAD4RAAAXqiz-GZ1PwKiqLP4hnU_AgAAzxL5Wu6N',
    'name': '',
    'matchings_index': 0,
    'waypoint_index': 0},
   {'alternatives_count': 0,
    'location': [-122.442428, 37.714335],
    'distance': 16.488956,
    'hint': 'E3o6BVRukYAJAAAAIgAAAGgAAAAUAAAA2RnSQL_5uUEPjI9CBTlaQQkAAAAiAAAAaAAAABQAAAD4RAAARK2z-J95PwKTrLP4b3k_AgEAXxX5Wu6N',
    'name': 'Allison Street',
    'matchings_index': 0,
    'waypoint_index': 1},
   {'alternatives_count': 1,
    'location': [-122.441751, 37.712761],
    'distance': 17.311636,
    'hint': 'Fno6hRl6OgWZAAAANwAAAAAAAAAKAAAAH4vUQgKXFkIAAAAAXtbYQJkAAAA3AAAAAAAAAAoAAAD4RAAA6a-z-HlzPwKjsLP4q3M_AgAAHwr5Wu6N',
    'name': 'Allison Street',
    'matchings_index': 0,
    'waypoint_index': 2}],
  'matchings': [{'duration': 64.1,
    'distance': 420.1,
    'weight': 66.7,
    'geometry': 'kuy|fAbyjphFcBxEmE`FqJkKiBqBuP}Qgc@ie@eAiAcB}ArA_Eb@mAjKkDnBo@fe@mOrw@kW',
    'confidence': 7.3e-05,
    'weight_name': 'routability',
    'legs': [{'steps': [],
      'weight': 40.1,
      'distance': 235.2,
      'annotation': {'nodes': [5440513673,
        5440513674,
        5440513675,
        65363070,
        1229920760,
        65307726,
        6906452420,
        1229920717,
        65361047,
        1229920749,
        554163599,
        3978809925]},
      'summary': '',
      'duration': 37.5},
     {'steps': [],
      'weight': 26.6,
      'distance': 184.9,
      'annotation': {'nodes': [554163599, 3978809925, 65345518, 8256268328]},
      'summary': '',
      'duration': 26.6}]}]}}

I would like to extract the values under the key nodes per user (A, B and C) and store these values in a pandas dataframe, together with the corresponding user. Like below:

    value        user
    1974590926  A
    4763953263  A
    65359046    A
    4763953265  A
    5443374298  A
    2007343352  A
    5443374298  A
    2007343352  A
    4763953266  A
    65359043    A
    4763953269  A
    2007343354  A
    4763953270  A
    2007343354  A
    4763953270  A
    65334199    A
    4763953274  A
    2007343347  A
    5443147626  B
    6360865540  B
    6360865536  B
    65307580    B
    6360865535  B
    6360865539  B
    6360865531  B
    6360865539  B
    6360865531  B
    6360865525  B
    65343521    B
    6360865527  B
    6360865529  B
    6360865523  B
    6360865520  B
    65321110    B
    6360865519  B
    6360865522  B
    6376329343  B
    5440513673  C
    5440513674  C
    5440513675  C
    65363070    C
    1229920760  C
    65307726    C
    6906452420  C
    1229920717  C
    65361047    C
    1229920749  C
    554163599   C
    3978809925  C
    554163599   C
    3978809925  C
    65345518    C
    8256268328  C

I am able to extract and store only the nodes belonging to user C to a pandas dataframe with the code below. However, I struggle to add the user column and the other nodes with their corresponding user. Any ideas?

import pandas as pd
nodes_df = pd.DataFrame({'node':{}})

for user in output[user]['matchings'][0]['legs']:
    result  = user['annotation']['nodes']
    values_temp = pd.DataFrame(result, columns=['value'])
    values_df = values_df.append(values_temp, ignore_index=True)
values_df.node = values_df.value.astype(int)
values_df

    value
0   5440513673
1   5440513674
2   5440513675
3   65363070
4   1229920760
5   65307726
6   6906452420
7   1229920717
8   65361047
9   1229920749
10  554163599
11  3978809925
12  554163599
13  3978809925
14  65345518
15  8256268328
4
  • What is the output of your current code? Commented Apr 7, 2021 at 8:28
  • I've updated my question with the output of my current code Commented Apr 7, 2021 at 8:32
  • output is your json object? and you have set user = c? Commented Apr 7, 2021 at 8:33
  • output is indeed my json object. The values A B and C in the Json object are the user keys Commented Apr 7, 2021 at 8:35

3 Answers 3

3

You can use json_normalize() with record_path and then concat() the users:

dfs = []
for user in output.keys():
    df = pd.json_normalize(output, record_path=[user, 'matchings', 'legs', 'annotation', 'nodes'])
    df['user'] = user
    dfs.append(df)
nodes_df = pd.concat(dfs).rename(columns={0: 'node'})

#        node  user
#  1974590926     A
#  4763953263     A
#    65359046     A
#         ...   ...
#  3978809925     C
#    65345518     C
#  8256268328     C

If there are some users with missing matchings, you can check if 'matchings' in output[user]:

dfs = []
for user in output.keys():
    if 'matchings' in output[user]:
        df = pd.json_normalize(output, record_path=[user, 'matchings', 'legs', 'annotation', 'nodes'])
        df['user'] = user
        dfs.append(df)
nodes_df = pd.concat(dfs).rename(columns={0: 'node'})

If the output keys are like ('2018-02-03', 'A') and you're iterating them as trip, you need to access its date and user as trip[0] and trip[1]:

dfs = []
for trip in output.keys():
    if 'matchings' in output[trip]:
        df = pd.json_normalize(output, record_path=[trip, 'matchings', 'legs', 'annotation', 'nodes'])
        df['date'] = trip[0]
        df['user'] = trip[1]
        dfs.append(df)
nodes_df = pd.concat(dfs).rename(columns={0: 'node'})
Sign up to request clarification or add additional context in comments.

3 Comments

I noticed that when a user, let's say C, does not have a 'matchings', for example: 'C':{"message":"Could not find a matching segment for any coordinate.", "code":"NoSegment"}, the code above will throw a KeyError: 'matchings'. How to prevent this?
@sampeterson It seems pandas isn't able to handle this directly. I updated the answer with a workaround that tests if 'matchings' in output[user].
Last question: If my JSON object would also have a date key, besides a user key like so: {('2018-02-03', 'A'): {'code': 'Ok', 'tracepoints': [None ..., how do I add the date to an extra column date? I was hoping I could run dfs = [] for trip in output.keys(): if 'matchings' in output[trip]: df = pd.json_normalize(output, record_path=[date, user, 'matchings', 'legs', 'annotation', 'nodes']) df['date'] = date df['user'] = user dfs.append(df) nodes_df = pd.concat(dfs).rename(columns={0: 'node'}), but it gives me an error.
1

We want to put all the node values in [legs]

If you want the simplest way with just for loop:


nodes = []
user = []

for i in output.keys():
    for j in output[i]['matchings'][0]['legs']:
        for k in j['annotation']['nodes']:
            col1.append(k)
            col2.append(i)

d = {'nodes':nodes, 'user':user}

df = pd.DataFrame(data=d)
print(df)

Comments

0

You could use the jmespath module to extract the data, before recombining within the dataframe; you should get some speed up, since the iteration is within the dictionary:

The summary for jmespath is : if accessing a key, then use dot, if the data is within a list, then use the [] to access the data:

#pip install jmespath
import jmespath
from itertools import chain

query ={letter: jmespath.compile(f"{letter}.matchings[].legs[].annotation.nodes")
        for letter in ("A", "B", "C")}

result = {letter: pd.DataFrame(chain.from_iterable(expression.search(output)),
                               columns=['node']) 
          for letter, expression in query.items()}

result = pd.concat(result).droplevel(-1).rename_axis(index='user').reset_index()

result.head(15)
 
   user        node
0     A  1974590926
1     A  4763953263
2     A    65359046
3     A  4763953265
4     A  5443374298
5     A  2007343352
6     A  5443374298
7     A  2007343352
8     A  4763953266
9     A    65359043
10    A  4763953269
11    A  2007343354
12    A  4763953270
13    A  2007343354
14    A  4763953270

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.