From ff5f5577a4f1b1d06767e58bbd711b205423a882 Mon Sep 17 00:00:00 2001 From: Ernest Guevarra Date: Fri, 5 Apr 2024 20:14:46 +0100 Subject: [PATCH] add structuring functions --- DESCRIPTION | 4 ++ NAMESPACE | 10 +++-- R/aurora.R | 6 ++- R/ora_identify.R | 16 +++++-- R/ora_list_metadata_formats.R | 14 ++++-- R/ora_structure.R | 80 ++++++++++++++++++++++++++++------- README.Rmd | 6 +-- README.md | 31 +++++++------- inst/CITATION | 14 +++--- man/ora_identify.Rd | 14 +++--- man/ora_list_meta_formats.Rd | 10 +++-- man/ora_structure.Rd | 10 +++-- pkgdown/_pkgdown.yml | 1 + 13 files changed, 154 insertions(+), 62 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ef7d6b8..6ef2df8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,7 +18,11 @@ Description: The Oxford University Research Archive (ORA) . License: GPL (>= 3) Imports: + dplyr, httr2, + rlang, + tibble, + tidyr, xml2 Encoding: UTF-8 Language: en-GB diff --git a/NAMESPACE b/NAMESPACE index 183fce8..c44b089 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,10 +6,14 @@ export(ora_list_records) export(ora_list_records_id) export(ora_list_sets) export(ora_structure_identify) +export(ora_structure_meta_formats) +importFrom(dplyr,bind_cols) +importFrom(dplyr,bind_rows) importFrom(httr2,req_perform) importFrom(httr2,req_url_query) importFrom(httr2,request) importFrom(httr2,resp_body_xml) -importFrom(xml2,xml_attrs) -importFrom(xml2,xml_children) -importFrom(xml2,xml_contents) +importFrom(rlang,.data) +importFrom(tibble,tibble) +importFrom(tidyr,unnest) +importFrom(xml2,as_list) diff --git a/R/aurora.R b/R/aurora.R index befa707..a165bd8 100644 --- a/R/aurora.R +++ b/R/aurora.R @@ -15,6 +15,10 @@ #' @name aurora #' @keywords internal #' @importFrom httr2 request req_url_query req_perform resp_body_xml -#' @importFrom xml2 xml_children xml_contents xml_attrs +#' @importFrom xml2 as_list +#' @importFrom dplyr bind_rows bind_cols +#' @importFrom tidyr unnest +#' @importFrom rlang .data +#' @importFrom tibble tibble #' "_PACKAGE" diff --git a/R/ora_identify.R b/R/ora_identify.R index a0f8ea9..df4e89e 100644 --- a/R/ora_identify.R +++ b/R/ora_identify.R @@ -1,10 +1,13 @@ #' -#' Identify +#' Retrieve information about Oxford University OAI repository #' #' @param base_url The OAI-PMH endpoint base URL. Set to -#' https://ora.ox.ac.uk/oai2 +#' https://ora.ox.ac.uk/oai2. +#' @param tabular Logical. Should response be structured into a table? +#' Default is TRUE. #' -#' @returns XML OAI response +#' @returns A data.frame (default) or an XML document (if `tabular = FALSE`) +#' with identifying information on the OAI repository. #' #' @examples #' ora_identify() @@ -12,7 +15,8 @@ #' @export #' -ora_identify <- function(base_url = "https://ora.ox.ac.uk/oai2") { +ora_identify <- function(base_url = "https://ora.ox.ac.uk/oai2", + tabular = TRUE) { ## Make base request ---- req <- httr2::request(base_url) @@ -25,6 +29,10 @@ ora_identify <- function(base_url = "https://ora.ox.ac.uk/oai2") { httr2::req_perform() |> httr2::resp_body_xml() + ## Structure resp? ---- + if (tabular) + resp <- ora_structure_identify(resp) + ## Return resp ---- resp } diff --git a/R/ora_list_metadata_formats.R b/R/ora_list_metadata_formats.R index 83029d8..25ba5aa 100644 --- a/R/ora_list_metadata_formats.R +++ b/R/ora_list_metadata_formats.R @@ -2,9 +2,12 @@ #' List metadata formats #' #' @param base_url The OAI-PMH endpoint base URL. Set to -#' https://ora.ox.ac.uk/oai2 +#' https://ora.ox.ac.uk/oai2. +#' @param tabular Logical. Should response be structured into a table? +#' Default is TRUE. #' -#' @returns XML OAI response +#' @returns A data.frame (default) or an XML document (if `tabular = FALSE`) +#' with identifying information on the OAI metadata formats. #' #' @examples #' ora_list_meta_formats() @@ -12,7 +15,8 @@ #' @export #' -ora_list_meta_formats <- function(base_url = "https://ora.ox.ac.uk/oai2") { +ora_list_meta_formats <- function(base_url = "https://ora.ox.ac.uk/oai2", + tabular = TRUE) { ## Make base request ---- req <- httr2::request(base_url) @@ -25,6 +29,10 @@ ora_list_meta_formats <- function(base_url = "https://ora.ox.ac.uk/oai2") { httr2::req_perform() |> httr2::resp_body_xml() + ## Structure resp ---- + if (tabular) + resp <- ora_structure_meta_formats(resp) + ## Return resp ---- resp } diff --git a/R/ora_structure.R b/R/ora_structure.R index 1164e35..4009bcb 100644 --- a/R/ora_structure.R +++ b/R/ora_structure.R @@ -1,28 +1,78 @@ #' #' Structure ORA XML outputs #' -#' @param ora_xml XML outputs +#' @param ora_xml XML outputs. #' -#' @return A structured output +#' @return A tibble outputs. #' #' @examples -#' #ora_identify() |> ora_identify_structure() +#' ora_identify() +#' ora_list_meta_formats() #' #' @rdname ora_structure #' @export #' ora_structure_identify <- function(ora_xml) { - field_names <- ora_xml |> - xml2::xml_children() |> - xml2::xml_children() |> - xml2::xml_name() - - values <- ora_xml |> - xml2::xml_children() |> - xml2::xml_attrs(".//verb") - xml2::xml_children() |> - xml2::xml_contents() |> - xml2::xml_children() |> - xml2::xml_contents() + xml_list <- xml2::as_list(ora_xml) + + responseDate <- xml_list |> + unlist(recursive = FALSE) |> + (\(x) x[[1]][[1]])() + + request <- xml_list[[1]][2] |> + (\(x) paste0(x$request, "?verb=", attributes(x$request)))() + + description <- xml_list |> + unlist(recursive = FALSE) |> + (\(x) x[[3]][8])() |> + unlist() |> + dplyr::bind_rows() + + names(description) <- names(description) |> + tools::file_ext() + + tibble::tibble( + responseDate, + request, + xml_list |> + unlist(recursive = FALSE) |> + (\(x) x[[3]][1:7] |> unlist() |> dplyr::bind_rows())(), + description + ) +} + +#' +#' @rdname ora_structure +#' @export +#' + +ora_structure_meta_formats <- function(ora_xml) { + xml_list <- ora_xml |> + xml2::as_list() |> + unlist(recursive = FALSE) + + xml_list[1:2] <- xml_list[1:2] |> + unlist(recursive = FALSE) + + xml_list[2] <- paste0( + xml_list[2], + "?verb=", + ora_xml |> + xml2::as_list() |> + (\(x) attributes(x[[1]][2][[1]]))() + ) + + names(xml_list) <- names(xml_list) |> + tools::file_ext() + + xml_list[[3]] <- xml_list[[3]] |> + dplyr::bind_rows() |> + tidyr::unnest( + cols = c( + .data$metadataPrefix, .data$schema, .data$metadataNamespace + ) + ) + + dplyr::bind_cols(xml_list) } diff --git a/README.Rmd b/README.Rmd index 96558d3..654e05a 100644 --- a/README.Rmd +++ b/README.Rmd @@ -23,13 +23,13 @@ knitr::opts_chunk$set( [![CodeFactor](https://www.codefactor.io/repository/github/oxfordihtm/aurora/badge)](https://www.codefactor.io/repository/github/oxfordihtm/aurora) -The [Oxford University Research Archive (ORA)](https://ora.ox.ac.uk/) is an Open Access platform and is the institutional repository for the University of Oxford. ORA serves as a permanent and secure online archive of research materials produced by members of the University of Oxford. ORA supports and participates in the Open Archives Initiative (OAI). ORA is a registered OAI-PMH data-provider and provides metadata for all public records which is updated as soon as each record is published or updated. ORA also provides API access to its resources. This package provides low level functions to interface with the [ORA API](https://ora.ox.ac.uk/api). +The [Oxford University Research Archive (ORA)](https://ora.ox.ac.uk/) is an Open Access platform and is the institutional repository for the [University of Oxford](https://ox.ac.uk). ORA serves as a permanent and secure online archive of research materials produced by members of the University of Oxford. ORA supports and participates in the Open Archives Initiative (OAI). ORA is a registered OAI-PMH data-provider and provides metadata for all public records which is updated as soon as each record is published or updated. ORA also provides API access to its resources. This package provides low level functions to interface with the [ORA API](https://ora.ox.ac.uk/api). ## What does `aurora` do? Please note that `aurora` is still highly experimental and is undergoing a lot of development. Hence, any functionalities described below and in the rest of the package documentation have a high likelihood of changing interface or approach as we aim for a stable working version. -Currently, the package provides functions that wrap each of the verb endpoints provided by OAI 2.0: +Currently, the package provides functions that wrap each of the verb request endpoints provided by [OAI 2.0](https://www.openarchives.org/OAI/openarchivesprotocol.html): * `Identify` * `ListRecords` @@ -37,7 +37,7 @@ Currently, the package provides functions that wrap each of the verb endpoints p * `ListMetadataFormats` * `ListIdentifiers` -From here, the plan is to develope functions that structure these XML OAI responses into useful tabular formats useful for analysis. +From here, the plan is to develope functions that structure these XML OAI responses into tabular formats useful for analysis. ## Installation diff --git a/README.md b/README.md index 5c37329..6d5ae6c 100644 --- a/README.md +++ b/README.md @@ -18,12 +18,12 @@ coverage](https://codecov.io/gh/OxfordIHTM/ourora/branch/main/graph/badge.svg)]( The [Oxford University Research Archive (ORA)](https://ora.ox.ac.uk/) is an Open Access platform and is the institutional repository for the -University of Oxford. ORA serves as a permanent and secure online -archive of research materials produced by members of the University of -Oxford. ORA supports and participates in the Open Archives Initiative -(OAI). ORA is a registered OAI-PMH data-provider and provides metadata -for all public records which is updated as soon as each record is -published or updated. ORA also provides API access to its resources. +[University of Oxford](https://ox.ac.uk). ORA serves as a permanent and +secure online archive of research materials produced by members of the +University of Oxford. ORA supports and participates in the Open Archives +Initiative (OAI). ORA is a registered OAI-PMH data-provider and provides +metadata for all public records which is updated as soon as each record +is published or updated. ORA also provides API access to its resources. This package provides low level functions to interface with the [ORA API](https://ora.ox.ac.uk/api). @@ -35,7 +35,8 @@ the rest of the package documentation have a high likelihood of changing interface or approach as we aim for a stable working version. Currently, the package provides functions that wrap each of the verb -endpoints provided by OAI 2.0: +request endpoints provided by [OAI +2.0](https://www.openarchives.org/OAI/openarchivesprotocol.html): - `Identify` - `ListRecords` @@ -44,7 +45,7 @@ endpoints provided by OAI 2.0: - `ListIdentifiers` From here, the plan is to develope functions that structure these XML -OAI responses into useful tabular formats useful for analysis. +OAI responses into tabular formats useful for analysis. ## Installation @@ -67,20 +68,20 @@ citation provided by a call to the `citation()` function as follows: ``` r citation("aurora") -#> To cite codigo in publications use: +#> To cite aurora in publications use: #> -#> Anita Makori and Ernest Guevarra (2024). codigo: Interface to the -#> International Classification of Diseases (ICD) API. R package version -#> 0.0.9000. URL https://oxford-ihtm.io/codigo/ +#> Ernest Guevarra (2024). aurora: Interface to the Oxord University +#> Research Archive (ORA) API. R package version 0.0.9000. URL +#> https://oxford-ihtm.io/aurora/ #> #> A BibTeX entry for LaTeX users is #> #> @Manual{, -#> title = {codigo: Interface to the International Classification of Diseases (ICD) API}, -#> author = {{Anita Makori} and {Ernest Guevarra}}, +#> title = {aurora: Interface to the Oxord University Research Archive (ORA) API}, +#> author = {{Ernest Guevarra}}, #> year = {2024}, #> note = {R package version 0.0.9000}, -#> url = {https://oxford-ihtm.io/codigo/}, +#> url = {https://oxford-ihtm.io/aurora/}, #> } ``` diff --git a/inst/CITATION b/inst/CITATION index ef44b8c..1054be6 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,16 +1,16 @@ -citHeader("To cite codigo in publications use:") +citHeader("To cite aurora in publications use:") citEntry( entry = "Manual", - title = "codigo: Interface to the International Classification of Diseases (ICD) API", - author = c(person("Anita Makori"), person("Ernest Guevarra")), + title = "aurora: Interface to the Oxord University Research Archive (ORA) API", + author = person("Ernest Guevarra"), year = "2024", note = "R package version 0.0.9000", - url = "https://oxford-ihtm.io/codigo/", + url = "https://oxford-ihtm.io/aurora/", textVersion = paste( - paste("Anita Makori and Ernest Guevarra (2024).", - "codigo: Interface to the International Classification of Diseases (ICD) API.", + paste("Ernest Guevarra (2024).", + "aurora: Interface to the Oxord University Research Archive (ORA) API.", "R package version 0.0.9000.", - "URL https://oxford-ihtm.io/codigo/") + "URL https://oxford-ihtm.io/aurora/") ) ) diff --git a/man/ora_identify.Rd b/man/ora_identify.Rd index aa67805..215d62c 100644 --- a/man/ora_identify.Rd +++ b/man/ora_identify.Rd @@ -2,19 +2,23 @@ % Please edit documentation in R/ora_identify.R \name{ora_identify} \alias{ora_identify} -\title{Identify} +\title{Retrieve information about Oxford University OAI repository} \usage{ -ora_identify(base_url = "https://ora.ox.ac.uk/oai2") +ora_identify(base_url = "https://ora.ox.ac.uk/oai2", tabular = TRUE) } \arguments{ \item{base_url}{The OAI-PMH endpoint base URL. Set to -https://ora.ox.ac.uk/oai2} +https://ora.ox.ac.uk/oai2.} + +\item{tabular}{Logical. Should response be structured into a table? +Default is TRUE.} } \value{ -XML OAI response +A data.frame (default) or an XML document (if \code{tabular = FALSE}) +with identifying information on the OAI repository. } \description{ -Identify +Retrieve information about Oxford University OAI repository } \examples{ ora_identify() diff --git a/man/ora_list_meta_formats.Rd b/man/ora_list_meta_formats.Rd index e690035..b9ed7b9 100644 --- a/man/ora_list_meta_formats.Rd +++ b/man/ora_list_meta_formats.Rd @@ -4,14 +4,18 @@ \alias{ora_list_meta_formats} \title{List metadata formats} \usage{ -ora_list_meta_formats(base_url = "https://ora.ox.ac.uk/oai2") +ora_list_meta_formats(base_url = "https://ora.ox.ac.uk/oai2", tabular = TRUE) } \arguments{ \item{base_url}{The OAI-PMH endpoint base URL. Set to -https://ora.ox.ac.uk/oai2} +https://ora.ox.ac.uk/oai2.} + +\item{tabular}{Logical. Should response be structured into a table? +Default is TRUE.} } \value{ -XML OAI response +A data.frame (default) or an XML document (if \code{tabular = FALSE}) +with identifying information on the OAI metadata formats. } \description{ List metadata formats diff --git a/man/ora_structure.Rd b/man/ora_structure.Rd index da86da4..7177e64 100644 --- a/man/ora_structure.Rd +++ b/man/ora_structure.Rd @@ -2,20 +2,24 @@ % Please edit documentation in R/ora_structure.R \name{ora_structure_identify} \alias{ora_structure_identify} +\alias{ora_structure_meta_formats} \title{Structure ORA XML outputs} \usage{ ora_structure_identify(ora_xml) + +ora_structure_meta_formats(ora_xml) } \arguments{ -\item{ora_xml}{XML outputs} +\item{ora_xml}{XML outputs.} } \value{ -A structured output +A tibble outputs. } \description{ Structure ORA XML outputs } \examples{ -#ora_identify() |> ora_identify_structure() +ora_identify() +ora_list_meta_formats() } diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 5c64d22..023ad4c 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -55,4 +55,5 @@ reference: - title: Structure contents: - ora_structure_identify + - ora_structure_meta_formats