1

I am bringing filtered JSON data from Elastic as a nested list with multiple levels. I am interesting in converting the nested list into 2d in Rstudio for data analysis.

I have tried mutliple tips online but none have worked (plytr,rbind, unlist, tidyjson). I am desperate for help

Example of the structure:

> str(RAW2)
List of 10
 $ :List of 5
  ..$ _index: chr "xml-search-2017.06.12"
  ..$ _type : chr "xml"
  ..$ _id   : chr "76595087100_1"
  ..$ _score: num 13.5
  ..$ fields:List of 3
  .. ..$ check_in_date :List of 1
  .. .. ..$ : chr "2017-06-20T00:00:00.000Z"
  .. ..$ check_out_date:List of 1
  .. .. ..$ : chr "2017-06-23T00:00:00.000Z"
  .. ..$ ts_start      :List of 1
  .. .. ..$ : chr "2017-06-12T20:11:07.348Z"
 $ :List of 5
  ..$ _index: chr "xml-search-2017.06.12"
  ..$ _type : chr "xml"
  ..$ _id   : chr "76595087300_1"
  ..$ _score: num 13.5
  ..$ fields:List of 3
  .. ..$ check_in_date :List of 1
  .. .. ..$ : chr "2017-06-20T00:00:00.000Z"
  .. ..$ check_out_date:List of 1
  .. .. ..$ : chr "2017-06-23T00:00:00.000Z"
  .. ..$ ts_start      :List of 1
  .. .. ..$ : chr "2017-06-12T20:11:07.060Z"

Thank you

list(structure(list(`_index` = "xml-search-2017.06.12", `_type` = "xml", 
    `_id` = "76595087100_1", `_score` = 13.457847, fields = structure(list(
        check_in_date = list("2017-06-20T00:00:00.000Z"), check_out_date = list(
            "2017-06-23T00:00:00.000Z"), ts_start = list("2017-06-12T20:11:07.348Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76595087300_1", `_score` = 13.457847, 
    fields = structure(list(check_in_date = list("2017-06-20T00:00:00.000Z"), 
        check_out_date = list("2017-06-23T00:00:00.000Z"), ts_start = list(
            "2017-06-12T20:11:07.060Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76594844800_1", `_score` = 13.455816, 
    fields = structure(list(check_in_date = list("2017-06-20T00:00:00.000Z"), 
        check_out_date = list("2017-06-22T00:00:00.000Z"), ts_start = list(
            "2017-06-12T20:11:03.445Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76597264600_1", `_score` = 13.455816, 
    fields = structure(list(check_in_date = list("2017-06-13T00:00:00.000Z"), 
        check_out_date = list("2017-06-16T00:00:00.000Z"), ts_start = list(
            "2017-06-12T20:13:15.005Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76599762900_1", `_score` = 13.455723, 
    fields = structure(list(check_in_date = list("2017-06-22T00:00:00.000Z"), 
        check_out_date = list("2017-06-28T00:00:00.000Z"), ts_start = list(
            "2017-06-12T20:14:37.454Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76595493900_1", `_score` = 13.455723, 
    fields = structure(list(check_in_date = list("2017-06-20T00:00:00.000Z"), 
        check_out_date = list("2017-06-23T00:00:00.000Z"), ts_start = list(
            "2017-06-12T20:11:07.348Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76597065400_1", `_score` = 13.169026, 
    fields = structure(list(check_in_date = list("2017-06-13T00:00:00.000Z"), 
        check_out_date = list("2017-06-16T00:00:00.000Z"), ts_start = list(
            "2017-06-12T20:13:14.994Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76594777600_1", `_score` = 13.169026, 
    fields = structure(list(check_in_date = list("2017-06-20T00:00:00.000Z"), 
        check_out_date = list("2017-06-22T00:00:00.000Z"), ts_start = list(
            "2017-06-12T20:11:03.440Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76572404700_1", `_score` = 13.169026, 
    fields = structure(list(check_in_date = list("2017-06-13T00:00:00.000Z"), 
        check_out_date = list("2017-06-14T00:00:00.000Z"), ts_start = list(
            "2017-06-12T19:53:56.580Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76598481000_1", `_score` = 12.763965, 
    fields = structure(list(check_in_date = list("2017-06-22T00:00:00.000Z"), 
        check_out_date = list("2017-06-28T00:00:00.000Z"), ts_start = list(
            "2017-06-12T20:14:37.452Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")))

Subset 2:

list(structure(list(`_index` = "xml-search-2017.06.12", `_type` = "xml", 
    `_id` = "76452356700_1", `_score` = 2.390721, fields = structure(list(
        check_in_date = list("2017-06-28T00:00:00.000Z"), check_out_date = list(
            "2017-07-02T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:21.311Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452363400_3", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-20T00:00:00.000Z"), 
        check_out_date = list("2017-06-30T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:21.235Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452375900_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-09-01T00:00:00.000Z"), 
        check_out_date = list("2017-09-03T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:30.092Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452377300_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-22T00:00:00.000Z"), 
        check_out_date = list("2017-06-24T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:31.633Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452383100_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Rodeway Inn South Miami", 
        "East Miami", "Holiday Inn Port Of Miami", "Mayfair Hotel & Spa", 
        "Intercontinental Miami", "Marriott Biscayne Bay", "Eb Hotel Miami", 
        "Doubletree Grand Biscayne Bay", "Hotel Beaux Arts Miami", 
        "Cambria Suites Miami Airport", "Epic Miami, A Kimpton Hotel", 
        "The Ritz-Carlton Coconut Grove", "Quality Inn Miami Airport", 
        "Hilton Miami Downtown", "Conrad Miami", "Miccosukee Resort & Gaming", 
        "Courtyard Downtown", "Jw Marriott Marquis Miami", "Miami Marriott Dadeland", 
        "Courtyard Miami Coral Gables"), check_in_date = list(
        "2017-08-26T00:00:00.000Z"), check_out_date = list("2017-08-29T00:00:00.000Z"), 
        ts_start = list("2017-06-12T18:19:30.198Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452394200_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Rosen Plaza"), 
        check_in_date = list("2017-06-24T00:00:00.000Z"), check_out_date = list(
            "2017-06-27T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:31.672Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452409700_2", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Jw Marriott Hotel Mumbai Sahar"), 
        check_in_date = list("2018-03-12T00:00:00.000Z"), check_out_date = list(
            "2018-03-16T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:46.007Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452420500_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Holiday Inn Express Troisdorf"), 
        check_in_date = list("2017-06-25T00:00:00.000Z"), check_out_date = list(
            "2017-06-28T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:40.676Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452421700_2", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-23T00:00:00.000Z"), 
        check_out_date = list("2017-06-26T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:40.932Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452424300_2", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Aston Balikpapan"), 
        check_in_date = list("2017-07-05T00:00:00.000Z"), check_out_date = list(
            "2017-07-06T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:42.293Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452425100_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("H10 Duque De Loule"), 
        check_in_date = list("2017-07-28T00:00:00.000Z"), check_out_date = list(
            "2017-07-30T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:42.594Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452425500_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-25T00:00:00.000Z"), 
        check_out_date = list("2017-06-26T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:42.719Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452425600_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-17T00:00:00.000Z"), 
        check_out_date = list("2017-06-19T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:42.748Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452427300_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-17T00:00:00.000Z"), 
        check_out_date = list("2017-06-20T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:43.154Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452428100_10", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-09-11T00:00:00.000Z"), 
        check_out_date = list("2017-09-24T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:43.345Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452428800_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Ibis Istanbul City West"), 
        check_in_date = list("2017-06-30T00:00:00.000Z"), check_out_date = list(
            "2017-07-01T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:43.761Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452431500_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("The Rani Hotel And Spa"), 
        check_in_date = list("2017-07-07T00:00:00.000Z"), check_out_date = list(
            "2017-07-14T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:45.460Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452431700_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-17T00:00:00.000Z"), 
        check_out_date = list("2017-06-20T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:45.642Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452434500_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Cloitre St Louis", 
        "De L'Horloge", "Hotel D'Europe", "Danieli", "Bristol", 
        "Kyriad Palais Des Papes", "Regina"), check_in_date = list(
        "2017-06-24T00:00:00.000Z"), check_out_date = list("2017-06-25T00:00:00.000Z"), 
        ts_start = list("2017-06-12T18:19:47.037Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452434700_2", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-09-08T00:00:00.000Z"), 
        check_out_date = list("2017-09-20T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:47.086Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452447400_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-09-25T00:00:00.000Z"), 
        check_out_date = list("2017-09-30T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:51.056Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452451400_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-07-01T00:00:00.000Z"), 
        check_out_date = list("2017-07-04T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:40.306Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452452500_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-18T00:00:00.000Z"), 
        check_out_date = list("2017-06-19T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:52.461Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452452800_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Best Western Grand West'S Villas"), 
        check_in_date = list("2017-07-12T00:00:00.000Z"), check_out_date = list(
            "2017-07-14T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:40.753Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452453600_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Athens Status Suites", 
        "Ambrosia Hotel & Suites", "A For Athens", "Athenswas", 
        "Kimon Athens Hotel", "Chic", "Athinais", "Novus City Hotel"), 
        check_in_date = list("2017-08-12T00:00:00.000Z"), check_out_date = list(
            "2017-08-25T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:54.035Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")))
4
  • Please go through this link and edit accordingly Commented Jun 17, 2017 at 13:24
  • @Sotos, Done, thank you! Commented Jun 17, 2017 at 13:32
  • Please make your example copy/pasteable (see ?dput). Commented Jun 17, 2017 at 13:40
  • @RomanLuštrik have added the dput output now Commented Jun 17, 2017 at 13:46

2 Answers 2

3

It's not totally clean yet, but here's an option:

library(tidyverse)

df <- raw2 %>% map_df(flatten) %>% unnest()

df
#> # A tibble: 10 x 7
#>                 `_index` `_type`         `_id` `_score`
#>                    <chr>   <chr>         <chr>    <dbl>
#>  1 xml-search-2017.06.12     xml 76595087100_1 13.45785
#>  2 xml-search-2017.06.12     xml 76595087300_1 13.45785
#>  3 xml-search-2017.06.12     xml 76594844800_1 13.45582
#>  4 xml-search-2017.06.12     xml 76597264600_1 13.45582
#>  5 xml-search-2017.06.12     xml 76599762900_1 13.45572
#>  6 xml-search-2017.06.12     xml 76595493900_1 13.45572
#>  7 xml-search-2017.06.12     xml 76597065400_1 13.16903
#>  8 xml-search-2017.06.12     xml 76594777600_1 13.16903
#>  9 xml-search-2017.06.12     xml 76572404700_1 13.16903
#> 10 xml-search-2017.06.12     xml 76598481000_1 12.76397
#> # ... with 3 more variables: check_in_date <chr>, check_out_date <chr>,
#> #   ts_start <chr>
Sign up to request clarification or add additional context in comments.

5 Comments

Those map* functions of purrr seem very handy
Yeah, I love map_df. Now that I've seen it, it seems like a huge omission that the base versions can't simplify directly to a data.frame.
@alistaire that looks very promising! worked for the example I provided, but when trying similar for a different sub-set I get an error. Have attached the new subset on the main text. Could it be because some records have larger number of fields? (ie, some have x number of hotel names)
Yeah, the new variable is both coming and going (and so can be NULL) and is of variable length. Maybe raw3 %>% map(flatten) %>% map_df(as_data_frame) %>% mutate(hotel_name = map(hotel_name, ~.x %||% NA)) %>% unnest()
@alistaire, that is right, the data contains hotel searches and each search could have 0:n hotels. (same for other fields). I tried with the code above you advised but I'm getting error. "Error in bind_rows_(x, .id) : Can not automatically convert from character to numeric in column "value"." My objective is to aggregate hotels and run analysis on the number of searches for each. Can you think of a way to extract those 0:n hotels within each record and aggregate? Really appreciate your help!
1

A base R version could be,

do.call(rbind, lapply(l1, function(i) as.data.frame(t(unlist(i)))))

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.