If you inspect the website and its calls you can recover the API structure: https://iris.who.int/server/api/discover/search/objects?query=Verbatim&scope=&page=0&size=100
for example finds 1511 objects containing "Verbatim". The response also gives you the uuid and handle of each item. The page limit is 100 so we need to iterate over the 16 pages and stitch together the resulting embedded search results. Here, I do that with purrr::map_dfr.
library(httr)
library(jsonlite)
library(purrr)
url <- "https://iris.who.int/server/api/discover/search/objects?query=Verbatim&scope=&page=0&size=100"
first_page <- fromJSON(content(GET(url), "text", encoding = "UTF-8"))
total_pages <- first_page[["_embedded"]][["searchResult"]][["page"]][["totalPages"]]
results <- map_dfr(0:(total_pages - 1), \(page) {
url <- paste0("https://iris.who.int/server/api/discover/search/objects?query=Verbatim&scope=&page=",
page, "&size=100")
data <- fromJSON(content(GET(url), "text", encoding = "UTF-8"))
data$`_embedded`$searchResult$`_embedded`$objects
})
You can then subset the nested data.frame results to get your required data:
res <- data.frame(
uuid = results$`_embedded`$indexableObject$uuid,
handle = results$`_embedded`$indexableObject$handle,
name = results$`_embedded`$indexableObject$name
)
and you will get
> print(head(res))
uuid handle name
1 2f845ffc-483e-48c9-aec6-0cb93205c2e5 10665/368861 Verbatim records of plenary meetings
2 baac73ab-592f-41ea-afc5-e41274fc63fa 10665/369332 Verbatim records of plenary meetings
3 5bc09449-c72d-4b9b-8335-1977a7a27208 10665/370105 Verbatim records of plenary meetings
4 cf808d52-7896-492b-8cf0-0caa7479c3c9 10665/370817 Verbatim records of plenary meetings
5 6f11447e-62ab-4116-addc-a85cf0346287 10665/371906 Verbatim records of plenary meetings
6 80857c32-7512-4026-b0e7-c8cdd09a4558 10665/75779 Verbatim records of plenary meetings
You can then build the item links directly using the uuid: a unique identifier for each item.
For example:
https://iris.who.int/items/2f845ffc-483e-48c9-aec6-0cb93205c2e5
Fetching the related PDFs
I spent some time to write some code that
- fetches the Objects containing "Verbatim" using
req-perform-parallel in results
In this we can query for the embedded bundles using &embed=bundles
https://iris.who.int/server/api/discover/search/objects?query=Verbatim&scope=&page=0&size=100&embed=bundles
- the we get the actual bitstreams and fetch their uuids and pdf_names. I could not find a way to get these within one API call.
- Join back the pdf data to main data
library(httr2)
library(purrr)
# Get total pages from first request
url <- "https://iris.who.int/server/api/discover/search/objects?query=Verbatim&scope=&page=0&size=100"
first_page <- request(url) |> req_perform() |> resp_body_json()
total_pages <- first_page[["_embedded"]][["searchResult"]][["page"]][["totalPages"]]
results <- map(0:(total_pages - 1), \(page) {
request("https://iris.who.int/server/api/discover/search/objects") |>
req_url_query(query = "Verbatim", page = page, size = 100, embed = "bundles")
}) |>
req_perform_parallel() |>
map(resp_body_json) |>
map(~ .x[["_embedded"]][["searchResult"]][["_embedded"]][["objects"]]) |>
map_dfr(~ {
map_dfr(.x, \(obj) {
data.frame(
uuid = obj$`_embedded`$indexableObject$uuid,
handle = obj$`_embedded`$indexableObject$handle,
name = obj$`_embedded`$indexableObject$name,
bundles_href = obj$`_embedded`$indexableObject$`_embedded`$bundles$`_embedded`$bundles[[1]]$`_links`$bitstreams$href
)
})
}) |> dplyr::mutate(n = dplyr::row_number())
pdfs <- map(results$bundles_href, request) |>
req_perform_parallel() |>
imap_dfr(\(resp, idx) {
bitstreams <- resp_body_json(resp)[["_embedded"]][["bitstreams"]]
map_dfr(bitstreams, \(pdf) {
data.frame(
n = idx, # for joining with main data
pdf_uuid = pdf$id,
pdf_name = pdf$name,
pdf_url = paste0("https://iris.who.int/server/api/core/bitstreams/", pdf$id, "/content")
)
})
}) |>
dplyr::left_join(results, by = "n")
# then you can download the files ...
download.file(pdfs$pdf_url[1], pdfs$pdf_name[1], mode = "wb")
giving
> print(head(pdfs))
n pdf_uuid pdf_name pdf_url
1 1 ed7ca87b-38a3-4e40-9ce6-539451d96c53 fctc-cop9-vr-en.pdf https://iris.who.int/server/api/core/bitstreams/ed7ca87b-38a3-4e40-9ce6-539451d96c53/content
2 2 09fafbae-9b3d-4a05-8a9f-a77c9f595c83 fctc-mop2-vr-en.pdf https://iris.who.int/server/api/core/bitstreams/09fafbae-9b3d-4a05-8a9f-a77c9f595c83/content
3 3 bf3e7907-835c-4bda-9ebf-8dab43b2d8fc fctc-mop1-vr-en.pdf https://iris.who.int/server/api/core/bitstreams/bf3e7907-835c-4bda-9ebf-8dab43b2d8fc/content
4 4 288210d7-b052-4e6d-849e-0bb664d5c781 fctc-cop8-vr-en.pdf https://iris.who.int/server/api/core/bitstreams/288210d7-b052-4e6d-849e-0bb664d5c781/content
5 5 f2f177ce-2857-465a-9d43-d1801f9402f2 fctc-cop7-vr-en.pdf https://iris.who.int/server/api/core/bitstreams/f2f177ce-2857-465a-9d43-d1801f9402f2/content
6 6 c8e9c472-fe63-460e-b2b7-1eff2b0b4c2e FCTC_COP4_REC2_en.pdf https://iris.who.int/server/api/core/bitstreams/c8e9c472-fe63-460e-b2b7-1eff2b0b4c2e/content
uuid handle name
1 2f845ffc-483e-48c9-aec6-0cb93205c2e5 10665/368861 Verbatim records of plenary meetings
2 baac73ab-592f-41ea-afc5-e41274fc63fa 10665/369332 Verbatim records of plenary meetings
3 5bc09449-c72d-4b9b-8335-1977a7a27208 10665/370105 Verbatim records of plenary meetings
4 cf808d52-7896-492b-8cf0-0caa7479c3c9 10665/370817 Verbatim records of plenary meetings
5 6f11447e-62ab-4116-addc-a85cf0346287 10665/371906 Verbatim records of plenary meetings
6 80857c32-7512-4026-b0e7-c8cdd09a4558 10665/75779 Verbatim records of plenary meetings
bundles
1 https://iris.who.int/server/api/core/items/2f845ffc-483e-48c9-aec6-0cb93205c2e5/bundles
2 https://iris.who.int/server/api/core/items/baac73ab-592f-41ea-afc5-e41274fc63fa/bundles
3 https://iris.who.int/server/api/core/items/5bc09449-c72d-4b9b-8335-1977a7a27208/bundles
4 https://iris.who.int/server/api/core/items/cf808d52-7896-492b-8cf0-0caa7479c3c9/bundles
5 https://iris.who.int/server/api/core/items/6f11447e-62ab-4116-addc-a85cf0346287/bundles
6 https://iris.who.int/server/api/core/items/80857c32-7512-4026-b0e7-c8cdd09a4558/bundles