2

I am querying a research publication database. Articles have different number of authors (ranging from 1 to more than 20). My goal is to create an edge list of co-authors for social network analysis using iGraph. Below is a snippet of json

{
"format": "linked-data-api",
"version": "0.2",
"result": {
"_about": "http://network.csiro.au:9500/standalone/publications.json?_pageSize=5&_page=1",
"definition": "http://network.csiro.au:9500/standalone/meta/publications.json",
"extendedMetadataVersion": "http://network.csiro.au:9500/standalone/publications.json?_pageSize=5&_page=1&_metadata=all",
"first": "http://network.csiro.au:9500/standalone/publications.json?_page=0",
"isPartOf": {
"_about": "http://network.csiro.au:9500/standalone/publications.json",
"definition": "http://network.csiro.au:9500/standalone/meta/publications.json",
"hasPart": "http://network.csiro.au:9500/standalone/publications.json?_pageSize=5&_page=1",
"type": [
"http://purl.org/linked-data/api/vocab#ListEndpoint"
]
},
"items": [
{
"_about": "http://network.csiro.au/data/pub_EP1312922",
"access": "Public",
"author": {
"_about": "http://network.csiro.au/data/aimee.slangen",
"hasName": {
"_about": "http://network.csiro.au/data/aimee.slangen_name",
"firstName": "Aimee",
"lastName": "Slangen",
"title": "Ms"
}
},
"authorSeq": {
"_about": "http://network.csiro.au/data/pub_EP1312922_author_0",
"author": {
"_about": "http://network.csiro.au/data/aimee.slangen",
"hasName": {
"_about": "http://network.csiro.au/data/aimee.slangen_name",
"firstName": "Aimee",
"lastName": "Slangen",
"title": "Ms"
}
},
"sequenceNumber": 0
},
"classification": {
"_about": "http://network.csiro.au/data/classification_code_040104",
"name": "Climate Change Processes"
},
"classificationLevel": "http://network.csiro.au/data/unclassified",
"journalTitle": "Journal of Geophysical Research-Oceans",
"keyword": " ",
"outcome": "Approved",
"pages": "156-164",
"project": "http://network.csiro.au/data/project_PD00003609",
"publicationVolume": "119",
"publishedDate": "9-Jan-2014",
"publisher": "American Geophysical Union",
"title": "Regional Differences of Relative Sea Level Changes in the Northwest Atlantic: Historical Trends and Future Projections",
"wbscode": "R-03426-01-003",
"yearOfPublication": "2014"
},
{
"_about": "http://network.csiro.au/data/pub_EP112347",
"access": "Public",
"author": {
"_about": "http://network.csiro.au/data/roland.pitcher",
"hasName": {
"_about": "http://network.csiro.au/data/roland.pitcher_name",
"firstName": "Roland",
"lastName": "Pitcher",
"title": "Dr"
}
},
"authorSeq": {
"_about": "http://network.csiro.au/data/pub_EP112347_author_0",
"author": {
"_about": "http://network.csiro.au/data/roland.pitcher",
"hasName": {
"_about": "http://network.csiro.au/data/roland.pitcher_name",
"firstName": "Roland",
"lastName": "Pitcher",
"title": "Dr"
}
},
"sequenceNumber": 0
},
"classification": {
"_about": "http://network.csiro.au/data/classification_code_050209",
"name": "Natural Resource Management"
},
"classificationLevel": "http://network.csiro.au/data/unclassified",
"keyword": " ",
"outcome": "Approved",
"project": "http://network.csiro.au/data/project_PD00000752",
"publisher": "Queensland Department of Environment and Resource Management",
"title": "Understanding and Managing the Effects of Trawling on the Seabed in the Great Barrier Reef",
"wbscode": "R-00654-03-003",
"yearOfPublication": " "
},
{
"_about": "http://network.csiro.au/data/pub_EP148991",
"access": "CSIRO Only",
"author": {
"_about": "http://network.csiro.au/data/rob.bramley",
"hasName": {
"_about": "http://network.csiro.au/data/rob.bramley_name",
"firstName": "Rob",
"lastName": "Bramley",
"title": "Dr"
}
},
"authorSeq": {
"_about": "http://network.csiro.au/data/pub_EP148991_author_0",
"author": {
"_about": "http://network.csiro.au/data/rob.bramley",
"hasName": {
"_about": "http://network.csiro.au/data/rob.bramley_name",
"firstName": "Rob",
"lastName": "Bramley",
"title": "Dr"
}
},
"sequenceNumber": 0
},
"classification": {
"_about": "http://network.csiro.au/data/classification_code_070107",
"name": "Farming Systems Research"
},
"classificationLevel": "http://network.csiro.au/data/unclassified",
"keyword": " ",
"outcome": "Approved",
"pages": "26 + appendices",
"project": "http://network.csiro.au/data/project_PD00002886",
"publishedDate": "17-Sep-2014",
"publisher": "SRA",
"title": "A collaborative approach to Precision Agriculture RDE for the Australian Sugar Industry",
"wbscode": "R-02709-01",
"yearOfPublication": "2014"
},
{
"_about": "http://network.csiro.au/data/pub_EP151976",
"access": "Public",
"author": {
"_about": "http://network.csiro.au/data/paul.krummel",
"hasName": {
"_about": "http://network.csiro.au/data/paul.krummel_name",
"firstName": "Paul",
"lastName": "Krummel",
"title": "Mr"
}
},
"authorSeq": {
"_about": "http://network.csiro.au/data/pub_EP151976_author_0",
"author": {
"_about": "http://network.csiro.au/data/paul.krummel",
"hasName": {
"_about": "http://network.csiro.au/data/paul.krummel_name",
"firstName": "Paul",
"lastName": "Krummel",
"title": "Mr"
}
},
"sequenceNumber": 0
},
"classification": [
{
"_about": "http://network.csiro.au/data/classification_code_040104",
"name": "Climate Change Processes"
},
{
"_about": "http://network.csiro.au/data/classification_code_040199",
"name": "Atmospheric Sciences not elsewhere classified"
}
],
"classificationLevel": "http://network.csiro.au/data/unclassified",
"journalTitle": "Atmospheric Chemistry and Physics",
"keyword": [
"CH4",
"OH",
"hydroxyl radical",
"methane"
],
"outcome": "Approved",
"pages": "7943\u20137956",
"project": "http://network.csiro.au/data/project_PD00009165",
"publicationVolume": "16",
"publishedDate": "30-Jun-2016",
"publisher": "Copernicus GmbH",
"title": "Role of OH variability in the stalling of the global atmospheric CH4 growth rate from 1999 to 2006",
"wbscode": "R-07848; R-06420; R-07768",
"yearOfPublication": "2016"
},
{
"_about": "http://network.csiro.au/data/pub_EP152677",
"access": "CSIRO Only",
"author": [
{
"_about": "http://network.csiro.au/data/andrew.george",
"hasName": {
"_about": "http://network.csiro.au/data/andrew.george_name",
"firstName": "Andrew",
"lastName": "George",
"title": "Dr"
}
},
{
"_about": "http://network.csiro.au/data/sigrid.lehnert",
"hasName": {
"_about": "http://network.csiro.au/data/sigrid.lehnert_name",
"firstName": "Sigrid",
"lastName": "Lehnert",
"title": "Dr"
}
},
{
"_about": "http://network.csiro.au/data/toni.reverter-gomez",
"hasName": {
"_about": "http://network.csiro.au/data/toni.reverter-gomez_name",
"firstName": "Toni",
"lastName": "Reverter-Gomez",
"title": "Dr"
}
},
{
"_about": "http://network.csiro.au/data/yutao.li",
"hasName": {
"_about": "http://network.csiro.au/data/yutao.li_name",
"firstName": "Yutao",
"lastName": "Li",
"title": "Dr"
}
}
],
"authorSeq": [
{
"_about": "http://network.csiro.au/data/pub_EP152677_author_0",
"author": {
"_about": "http://network.csiro.au/data/yutao.li",
"hasName": {
"_about": "http://network.csiro.au/data/yutao.li_name",
"firstName": "Yutao",
"lastName": "Li",
"title": "Dr"
}
},
"sequenceNumber": 0
},
{
"_about": "http://network.csiro.au/data/pub_EP152677_author_1",
"author": {
"_about": "http://network.csiro.au/data/andrew.george",
"hasName": {
"_about": "http://network.csiro.au/data/andrew.george_name",
"firstName": "Andrew",
"lastName": "George",
"title": "Dr"
}
},
"sequenceNumber": 1
},
{
"_about": "http://network.csiro.au/data/pub_EP152677_author_2",
"author": {
"_about": "http://network.csiro.au/data/sigrid.lehnert",
"hasName": {
"_about": "http://network.csiro.au/data/sigrid.lehnert_name",
"firstName": "Sigrid",
"lastName": "Lehnert",
"title": "Dr"
}
},
"sequenceNumber": 2
},
{
"_about": "http://network.csiro.au/data/pub_EP152677_author_3",
"author": {
"_about": "http://network.csiro.au/data/toni.reverter-gomez",
"hasName": {
"_about": "http://network.csiro.au/data/toni.reverter-gomez_name",
"firstName": "Toni",
"lastName": "Reverter-Gomez",
"title": "Dr"
}
},
"sequenceNumber": 3
}
],
"classification": {
"_about": "http://network.csiro.au/data/classification_code_070201",
"name": "Animal Breeding"
},
"classificationLevel": "http://network.csiro.au/data/unclassified",
"conferenceDate": "28th-30th September 2015",
"conferenceLocation": "Lorne, Victoria",
"conferenceName": "21st AAABG",
"keyword": " ",
"outcome": "Approved",
"pages": "433-436",
"project": "http://network.csiro.au/data/project_PD00005603",
"publicationVolume": "21",
"publishedDate": "25-Sep-2015",
"publisher": "Association for the Advancement of Animal Breeding and Genetics",
"title": "Using Random Forests to Identify SNP Associated With Leg Defect in Broiler Chicken: Impact of Correcting For Population Structures",
"wbscode": "R-05156",
"yearOfPublication": "2015"
}
],
"itemsPerPage": 5,
"next": "http://network.csiro.au:9500/standalone/publications.json?_page=2",
"page": 1,
"prev": "http://network.csiro.au:9500/standalone/publications.json?_page=0",
"startIndex": 6,
"totalResults": 47023,
"type": [
"http://purl.org/linked-data/api/vocab#Page"
]
}
}

I am read the data in as follows:

library(jsonlite)
library(tidyjson)
pubs <- fromJSON("http://network.csiro.au:9500/standalone/publications.json?_page=1&_pageSize=5")

When trying to extract meaningful data using tidyjson, I get this error:

pubs %>%
  as.tbl_json %>%
  enter_object("items")

Error in UseMethod("as.tbl_json") : 
  no applicable method for 'as.tbl_json' applied to an object of class "list"

I am not an expert in R or JSON so would appreciate some guidance. Using the above example, I want to create for each publication an edge list of co-authors like this:

_about                                    yearOfPublication from            to
http://network.url.com/data/pub_EP16079   2011              Colin Jackson   Holly Trueman
http://network.url.com/data/pub_EP16079   2011              Colin Jackson   Tara Sutherland
http://network.url.com/data/pub_EP16079   2011              Colin Jackson   Trevor Rapson
http://network.url.com/data/pub_EP16079   2011              Holly Trueman   Tara Sutherland
http://network.url.com/data/pub_EP16079   2011              Holly Trueman   Trevor Rapson
http://network.url.com/data/pub_EP16079   2011              Tara Sutherland Trevor Rapson

I hope someone can help me! Thanks in advance.

3
  • I can display this in my browser using JSON formatter. The format is linked-data-api, which may be a customised version. Commented Nov 30, 2016 at 1:11
  • OK. The issue is the document is too big to post here. I copied the 2Mb file into JSONlint and it is valid. The snippet I provided is incomplete. I am still trying to figure out how to limit the fromJSON call. Commented Nov 30, 2016 at 2:38
  • 1
    I added valid JSON now. Extracted five records. Commented Nov 30, 2016 at 4:29

1 Answer 1

2

This is a bit of a tricky example. See this issue for discussion on how to improve how tidyjson handles objects that are sometimes arrays.

While not the cleanest solution, I think this does get the job done - you could probably functionalize some of these groups of steps to optimize code-reuse.

The basic aim is to parse enough of the object to get to the authors, then use a separate work-flow for objects and arrays. The arrays require tidyr::expand to complete the combinations of all authors (since those combinations are not represented in the data)

json <- paste(readLines("ex.json"), collapse = " ")

library(dplyr)
library(tidyjson)
library(tidyr)

## parse the objects.  Notice some publications have objects representing a
## single author, others have an array of many authors
prep <- json %>% 
 enter_object("result") %>% 
 enter_object("items") %>% 
 gather_array() %>% 
 spread_values(
   about = jstring("_about")
   , yearOfPublication = jstring(yearOfPublication)
 ) %>% 
 enter_object("author") %>% 
 json_types()

## parse object types
authorobj <- prep %>% 
filter(as.character(type) == "object") %>% 
spread_values(
 authorFirst = jstring(hasName, firstName)
 , authorLast = jstring(hasName, lastName)
) %>% 
mutate(from = paste(authorFirst, authorLast), to = from) %>% 
select(-authorFirst, -authorLast) %>% 
tbl_df()


## parse array types - get 'from' authors
authorarr <- prep %>% 
filter(as.character(type) == "array") %>% 
gather_array("authorid") %>% 
spread_values(
 authorFirst = jstring(hasName, firstName)
 , authorLast = jstring(hasName, lastName)
) %>% 
mutate(from = paste(authorFirst, authorLast)) %>% 
select(-authorFirst, -authorLast)


## use tidyr::expand to complete combinations of from/to
authorarr <- authorarr %>% 
tbl_df() %>% 
left_join(
  authorarr %>% 
   group_by(array.index) %>% 
   expand(from = authorarr$from, to = authorarr$from) %>% 
   ungroup()
  , by = c("array.index", "from"))

## stack (select only a few columns for display)
dplyr::bind_rows(authorobj, authorarr) %>% 
 select(array.index, from, to)
#> # A tibble: 20 x 3
#>    array.index                from                  to
#>          <int>               <chr>               <chr>
#>  1           1       Aimee Slangen       Aimee Slangen
#>  2           2      Roland Pitcher      Roland Pitcher
#>  3           3         Rob Bramley         Rob Bramley
#>  4           4        Paul Krummel        Paul Krummel
#>  5           5       Andrew George       Andrew George
#>  6           5       Andrew George      Sigrid Lehnert
#>  7           5       Andrew George Toni Reverter-Gomez
#>  8           5       Andrew George            Yutao Li
#>  9           5      Sigrid Lehnert       Andrew George
#> 10           5      Sigrid Lehnert      Sigrid Lehnert
#> 11           5      Sigrid Lehnert Toni Reverter-Gomez
#> 12           5      Sigrid Lehnert            Yutao Li
#> 13           5 Toni Reverter-Gomez       Andrew George
#> 14           5 Toni Reverter-Gomez      Sigrid Lehnert
#> 15           5 Toni Reverter-Gomez Toni Reverter-Gomez
#> 16           5 Toni Reverter-Gomez            Yutao Li
#> 17           5            Yutao Li       Andrew George
#> 18           5            Yutao Li      Sigrid Lehnert
#> 19           5            Yutao Li Toni Reverter-Gomez
#> 20           5            Yutao Li            Yutao Li
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.