Skip to content

Commit

Permalink
add structuring functions
Browse files Browse the repository at this point in the history
  • Loading branch information
ernestguevarra committed Apr 5, 2024
1 parent da22111 commit ff5f557
Show file tree
Hide file tree
Showing 13 changed files with 154 additions and 62 deletions.
4 changes: 4 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@ Description: The Oxford University Research Archive (ORA) <https://ora.ox.ac.uk/
functions to interface with the ORA API <https://ora.ox.ac.uk/api>.
License: GPL (>= 3)
Imports:
dplyr,
httr2,
rlang,
tibble,
tidyr,
xml2
Encoding: UTF-8
Language: en-GB
Expand Down
10 changes: 7 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,14 @@ export(ora_list_records)
export(ora_list_records_id)
export(ora_list_sets)
export(ora_structure_identify)
export(ora_structure_meta_formats)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(httr2,req_perform)
importFrom(httr2,req_url_query)
importFrom(httr2,request)
importFrom(httr2,resp_body_xml)
importFrom(xml2,xml_attrs)
importFrom(xml2,xml_children)
importFrom(xml2,xml_contents)
importFrom(rlang,.data)
importFrom(tibble,tibble)
importFrom(tidyr,unnest)
importFrom(xml2,as_list)
6 changes: 5 additions & 1 deletion R/aurora.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
#' @name aurora
#' @keywords internal
#' @importFrom httr2 request req_url_query req_perform resp_body_xml
#' @importFrom xml2 xml_children xml_contents xml_attrs
#' @importFrom xml2 as_list
#' @importFrom dplyr bind_rows bind_cols
#' @importFrom tidyr unnest
#' @importFrom rlang .data
#' @importFrom tibble tibble
#'
"_PACKAGE"
16 changes: 12 additions & 4 deletions R/ora_identify.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
#'
#' Identify
#' Retrieve information about Oxford University OAI repository
#'
#' @param base_url The OAI-PMH endpoint base URL. Set to
#' https://ora.ox.ac.uk/oai2
#' https://ora.ox.ac.uk/oai2.
#' @param tabular Logical. Should response be structured into a table?
#' Default is TRUE.
#'
#' @returns XML OAI response
#' @returns A data.frame (default) or an XML document (if `tabular = FALSE`)
#' with identifying information on the OAI repository.
#'
#' @examples
#' ora_identify()
#'
#' @export
#'

ora_identify <- function(base_url = "https://ora.ox.ac.uk/oai2") {
ora_identify <- function(base_url = "https://ora.ox.ac.uk/oai2",
tabular = TRUE) {
## Make base request ----
req <- httr2::request(base_url)

Expand All @@ -25,6 +29,10 @@ ora_identify <- function(base_url = "https://ora.ox.ac.uk/oai2") {
httr2::req_perform() |>
httr2::resp_body_xml()

## Structure resp? ----
if (tabular)
resp <- ora_structure_identify(resp)

## Return resp ----
resp
}
14 changes: 11 additions & 3 deletions R/ora_list_metadata_formats.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,21 @@
#' List metadata formats
#'
#' @param base_url The OAI-PMH endpoint base URL. Set to
#' https://ora.ox.ac.uk/oai2
#' https://ora.ox.ac.uk/oai2.
#' @param tabular Logical. Should response be structured into a table?
#' Default is TRUE.
#'
#' @returns XML OAI response
#' @returns A data.frame (default) or an XML document (if `tabular = FALSE`)
#' with identifying information on the OAI metadata formats.
#'
#' @examples
#' ora_list_meta_formats()
#'
#' @export
#'

ora_list_meta_formats <- function(base_url = "https://ora.ox.ac.uk/oai2") {
ora_list_meta_formats <- function(base_url = "https://ora.ox.ac.uk/oai2",
tabular = TRUE) {
## Make base request ----
req <- httr2::request(base_url)

Expand All @@ -25,6 +29,10 @@ ora_list_meta_formats <- function(base_url = "https://ora.ox.ac.uk/oai2") {
httr2::req_perform() |>
httr2::resp_body_xml()

## Structure resp ----
if (tabular)
resp <- ora_structure_meta_formats(resp)

## Return resp ----
resp
}
80 changes: 65 additions & 15 deletions R/ora_structure.R
Original file line number Diff line number Diff line change
@@ -1,28 +1,78 @@
#'
#' Structure ORA XML outputs
#'
#' @param ora_xml XML outputs
#' @param ora_xml XML outputs.
#'
#' @return A structured output
#' @return A tibble outputs.
#'
#' @examples
#' #ora_identify() |> ora_identify_structure()
#' ora_identify()
#' ora_list_meta_formats()
#'
#' @rdname ora_structure
#' @export
#'

ora_structure_identify <- function(ora_xml) {
field_names <- ora_xml |>
xml2::xml_children() |>
xml2::xml_children() |>
xml2::xml_name()

values <- ora_xml |>
xml2::xml_children() |>
xml2::xml_attrs(".//verb")
xml2::xml_children() |>
xml2::xml_contents() |>
xml2::xml_children() |>
xml2::xml_contents()
xml_list <- xml2::as_list(ora_xml)

responseDate <- xml_list |>
unlist(recursive = FALSE) |>
(\(x) x[[1]][[1]])()

request <- xml_list[[1]][2] |>
(\(x) paste0(x$request, "?verb=", attributes(x$request)))()

description <- xml_list |>
unlist(recursive = FALSE) |>
(\(x) x[[3]][8])() |>
unlist() |>
dplyr::bind_rows()

names(description) <- names(description) |>
tools::file_ext()

tibble::tibble(
responseDate,
request,
xml_list |>
unlist(recursive = FALSE) |>
(\(x) x[[3]][1:7] |> unlist() |> dplyr::bind_rows())(),
description
)
}

#'
#' @rdname ora_structure
#' @export
#'

ora_structure_meta_formats <- function(ora_xml) {
xml_list <- ora_xml |>
xml2::as_list() |>
unlist(recursive = FALSE)

xml_list[1:2] <- xml_list[1:2] |>
unlist(recursive = FALSE)

xml_list[2] <- paste0(
xml_list[2],
"?verb=",
ora_xml |>
xml2::as_list() |>
(\(x) attributes(x[[1]][2][[1]]))()
)

names(xml_list) <- names(xml_list) |>
tools::file_ext()

xml_list[[3]] <- xml_list[[3]] |>
dplyr::bind_rows() |>
tidyr::unnest(
cols = c(
.data$metadataPrefix, .data$schema, .data$metadataNamespace
)
)

dplyr::bind_cols(xml_list)
}
6 changes: 3 additions & 3 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,21 @@ knitr::opts_chunk$set(
[![CodeFactor](https://www.codefactor.io/repository/github/oxfordihtm/aurora/badge)](https://www.codefactor.io/repository/github/oxfordihtm/aurora)
<!-- badges: end -->

The [Oxford University Research Archive (ORA)](https://ora.ox.ac.uk/) is an Open Access platform and is the institutional repository for the University of Oxford. ORA serves as a permanent and secure online archive of research materials produced by members of the University of Oxford. ORA supports and participates in the Open Archives Initiative (OAI). ORA is a registered OAI-PMH data-provider and provides metadata for all public records which is updated as soon as each record is published or updated. ORA also provides API access to its resources. This package provides low level functions to interface with the [ORA API](https://ora.ox.ac.uk/api).
The [Oxford University Research Archive (ORA)](https://ora.ox.ac.uk/) is an Open Access platform and is the institutional repository for the [University of Oxford](https://ox.ac.uk). ORA serves as a permanent and secure online archive of research materials produced by members of the University of Oxford. ORA supports and participates in the Open Archives Initiative (OAI). ORA is a registered OAI-PMH data-provider and provides metadata for all public records which is updated as soon as each record is published or updated. ORA also provides API access to its resources. This package provides low level functions to interface with the [ORA API](https://ora.ox.ac.uk/api).

## What does `aurora` do?

Please note that `aurora` is still highly experimental and is undergoing a lot of development. Hence, any functionalities described below and in the rest of the package documentation have a high likelihood of changing interface or approach as we aim for a stable working version.

Currently, the package provides functions that wrap each of the verb endpoints provided by OAI 2.0:
Currently, the package provides functions that wrap each of the verb request endpoints provided by [OAI 2.0](https://www.openarchives.org/OAI/openarchivesprotocol.html):

* `Identify`
* `ListRecords`
* `ListSets`
* `ListMetadataFormats`
* `ListIdentifiers`

From here, the plan is to develope functions that structure these XML OAI responses into useful tabular formats useful for analysis.
From here, the plan is to develope functions that structure these XML OAI responses into tabular formats useful for analysis.

## Installation

Expand Down
31 changes: 16 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ coverage](https://codecov.io/gh/OxfordIHTM/ourora/branch/main/graph/badge.svg)](

The [Oxford University Research Archive (ORA)](https://ora.ox.ac.uk/) is
an Open Access platform and is the institutional repository for the
University of Oxford. ORA serves as a permanent and secure online
archive of research materials produced by members of the University of
Oxford. ORA supports and participates in the Open Archives Initiative
(OAI). ORA is a registered OAI-PMH data-provider and provides metadata
for all public records which is updated as soon as each record is
published or updated. ORA also provides API access to its resources.
[University of Oxford](https://ox.ac.uk). ORA serves as a permanent and
secure online archive of research materials produced by members of the
University of Oxford. ORA supports and participates in the Open Archives
Initiative (OAI). ORA is a registered OAI-PMH data-provider and provides
metadata for all public records which is updated as soon as each record
is published or updated. ORA also provides API access to its resources.
This package provides low level functions to interface with the [ORA
API](https://ora.ox.ac.uk/api).

Expand All @@ -35,7 +35,8 @@ the rest of the package documentation have a high likelihood of changing
interface or approach as we aim for a stable working version.

Currently, the package provides functions that wrap each of the verb
endpoints provided by OAI 2.0:
request endpoints provided by [OAI
2.0](https://www.openarchives.org/OAI/openarchivesprotocol.html):

- `Identify`
- `ListRecords`
Expand All @@ -44,7 +45,7 @@ endpoints provided by OAI 2.0:
- `ListIdentifiers`

From here, the plan is to develope functions that structure these XML
OAI responses into useful tabular formats useful for analysis.
OAI responses into tabular formats useful for analysis.

## Installation

Expand All @@ -67,20 +68,20 @@ citation provided by a call to the `citation()` function as follows:

``` r
citation("aurora")
#> To cite codigo in publications use:
#> To cite aurora in publications use:
#>
#> Anita Makori and Ernest Guevarra (2024). codigo: Interface to the
#> International Classification of Diseases (ICD) API. R package version
#> 0.0.9000. URL https://oxford-ihtm.io/codigo/
#> Ernest Guevarra (2024). aurora: Interface to the Oxord University
#> Research Archive (ORA) API. R package version 0.0.9000. URL
#> https://oxford-ihtm.io/aurora/
#>
#> A BibTeX entry for LaTeX users is
#>
#> @Manual{,
#> title = {codigo: Interface to the International Classification of Diseases (ICD) API},
#> author = {{Anita Makori} and {Ernest Guevarra}},
#> title = {aurora: Interface to the Oxord University Research Archive (ORA) API},
#> author = {{Ernest Guevarra}},
#> year = {2024},
#> note = {R package version 0.0.9000},
#> url = {https://oxford-ihtm.io/codigo/},
#> url = {https://oxford-ihtm.io/aurora/},
#> }
```

Expand Down
14 changes: 7 additions & 7 deletions inst/CITATION
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
citHeader("To cite codigo in publications use:")
citHeader("To cite aurora in publications use:")

citEntry(
entry = "Manual",
title = "codigo: Interface to the International Classification of Diseases (ICD) API",
author = c(person("Anita Makori"), person("Ernest Guevarra")),
title = "aurora: Interface to the Oxord University Research Archive (ORA) API",
author = person("Ernest Guevarra"),
year = "2024",
note = "R package version 0.0.9000",
url = "https://oxford-ihtm.io/codigo/",
url = "https://oxford-ihtm.io/aurora/",
textVersion = paste(
paste("Anita Makori and Ernest Guevarra (2024).",
"codigo: Interface to the International Classification of Diseases (ICD) API.",
paste("Ernest Guevarra (2024).",
"aurora: Interface to the Oxord University Research Archive (ORA) API.",
"R package version 0.0.9000.",
"URL https://oxford-ihtm.io/codigo/")
"URL https://oxford-ihtm.io/aurora/")
)
)
14 changes: 9 additions & 5 deletions man/ora_identify.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 7 additions & 3 deletions man/ora_list_meta_formats.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit ff5f557

Please sign in to comment.