diff --git a/R/cod_check_death_date.R b/R/cod_check_death_date.R deleted file mode 100644 index 7150dfa..0000000 --- a/R/cod_check_death_date.R +++ /dev/null @@ -1,6 +0,0 @@ -#' -#' Check date of death based on CoDEdit rules -#' -#' @param dod Date of death -#' -#' diff --git a/R/cod_check_dod.R b/R/cod_check_dod.R index 6e30eab..81c66a1 100644 --- a/R/cod_check_dod.R +++ b/R/cod_check_dod.R @@ -15,19 +15,33 @@ #' cod_check_dod <- function(dod) { - dod_check <- ifelse(is.na(dod), 1L, 0L) - + ## Check if dod is not a year value ---- dod_check <- ifelse( nchar(dod) > 4 | stringr::str_detect(dod, pattern = "[a-zA-Z]"), 1L, 0L ) + ## Check if dod is missing ---- + dod_check <- ifelse(is.na(dod), 2L, dod_check) + ## Create dod_check note vector ---- dod_check_note <- vector(mode = "character", length = length(dod)) dod_check_note[dod_check == 0] <- "No issues with date of death value" dod_check_note[dod_check == 1] <- "Date of death value is not in year format" + dod_check_note[dod_check == 2] <- "Missing date of death value" - tibble::tibble(dod_check, dod_check_note) + ## Return output ---- + tibble::tibble(dod_check, dod_check_note) |> + dplyr::mutate( + dod_check_note = factor( + x = dod_check_note, + levels = c( + "No issues with date of death value", + "Date of death value is not in year format", + "Missing date of death value" + ) + ) + ) } diff --git a/R/cod_check_sex.R b/R/cod_check_sex.R index c0488eb..65ad9f1 100644 --- a/R/cod_check_sex.R +++ b/R/cod_check_sex.R @@ -47,5 +47,16 @@ cod_check_sex <- function(sex_value, sex_code = c(1, 2)) { ## Return check ---- - tibble::tibble(sex_check, sex_check_note) + tibble::tibble(sex_check, sex_check_note) |> + dplyr::mutate( + sex_check_note = factor( + x = sex_check_note, + levels = c( + "No issues with sex value", + "Sex value is not an integer", + "Sex value is not any of the expected values", + "Missing sex value" + ) + ) + ) } diff --git a/README.Rmd b/README.Rmd index d07eb36..379d7d3 100644 --- a/README.Rmd +++ b/README.Rmd @@ -16,7 +16,7 @@ knitr::opts_chunk$set( # codeditr: Implementing Cause-of-Death Data Checks Based on the WHO CoDEdit Tool -[![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip) +[![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![R-CMD-check](https://github.com/OxfordIHTM/codeditr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/OxfordIHTM/codeditr/actions/workflows/R-CMD-check.yaml) [![test-coverage](https://github.com/OxfordIHTM/codeditr/actions/workflows/test-coverage.yaml/badge.svg)](https://github.com/OxfordIHTM/codeditr/actions/workflows/test-coverage.yaml) @@ -24,13 +24,29 @@ knitr::opts_chunk$set( [![CodeFactor](https://www.codefactor.io/repository/github/OxfordIHTM/codeditr/badge)](https://www.codefactor.io/repository/github/OxfordIHTM/codeditr) -The [World Health Organization](https://www.who.int/)'s [CoDEdit electronic tool](https://www.who.int/standards/classifications/classification-of-diseases/services/codedit-tool) is intended to help producers of cause-of-death statistics in strengthening their capacity to perform routine checks on their data. This package ports the original tool built using Microsoft Access into R so as to leverage the utility and function of the original tool into a usable application program interface that can be used for building more universal tools or for creating programmatic scientific workflows aimed at routine, automated, and large-scale monitoring of cause-of-death data. +The [World Health Organization](https://www.who.int/)'s [CoDEdit electronic tool](https://www.who.int/standards/classifications/classification-of-diseases/services/codedit-tool) is intended to help producers of cause-of-death statistics in strengthening their capacity to perform routine checks on their data. This package ports the original tool built using Microsoft Access into R. The aim is to leverage the utility and function of the original tool into a usable application program interface (API) that can be used for building more universal tools or for creating programmatic scientific workflows aimed at routine, automated, and large-scale monitoring of cause-of-death data. ## What does `codeditr` do? The `codeditr` package provides functions for implementing data quality checks on cause-of-death records. It is built upon the same heuristics and algorithms that the WHO CoDEdit v2.0 electronic tool uses to evaluate quality of cause-of-death data produced and used by country level statisticians in charge of mortality reporting. -Through this package, end users will have the ability to programmatically create scientific workflows for research on cause-of-death data and their quality or build more open tools or applications for routine monitoring of cause-of-death data. +Through this package, end users will have the ability to programmatically create scientific workflows for routine monitoring and evaluation and/or research on cause-of-death data and their quality. They can also build more open tools or applications for routine monitoring of cause-of-death data without having to rely on the proprietary Microsoft Access software. + +Currently, the `codeditr` package supports the following use cases: + +1. Cause-of-death dataset preparation for use in the CoDEdit tool + +To be able to use WHO's CoDEdit tool built on Microsoft Access, the user can either enter their cause-of-death data into the tool itself using a spreadsheet style input system with very specific input fields or through uploading of a Microsoft `.xlsx` file that is structured in a specific way required by the tool. + +The `codeditr` package has a set of functions that support in checking that specific required variables are formatted to be compatible for the CoDEdit tool and structures these variables into a dataset that is consistent with what is required for uploading to the CodEdit tool. + +This use case are for those what would still prefer to use WHO's CoDEdit tool using Microsoft Access (either for continuity purposes or for consistency with organisational policy) but would like to have the functionality of converting their existing cause-of-death dataset into a CoDEdit tool-compatible format and structure. + +2. Cause-of-death dataset checking to identify data quality issues + +The `codeditr` package has a set of functions that performs all the checks that the CoDEdit tool performs in addition to other general data quality checks. These functions allow for a similar output as the CoDEdit tool + +This use case are for those that prefer not to use Microsoft Access (either because they don't already own a copy of this software or that their purpose for data quality checks is for large-scale datasets) and would like a completely programmatic approach to performing routine and potentially large scale cause-of-death data quality checks. ## Installation @@ -45,13 +61,27 @@ install.packages( then load `codeditr` -```{r gh-installation2, eval = FALSE} +```{r gh-installation2, eval = TRUE} # load package library(codeditr) ``` ## Usage +### Support to usage of CoDEdit tool + +1. Perform checks on existing input data for CoDEdit tool + +Using the `icd10_example` dataset which is a dataset already formatted into a compatible structure required by the CoDEdit tool, we can perform a check on this dataset to see possible issues in its formatting and structure before using with the CoDEdit tool. + +```{r use-case-1} +cod_check_codedit_input(icd10_example) +``` + + + +### CoDEdit tool replacement workflow + ## Citation If you find the `codeditr` package useful please cite using the suggested citation provided by a call to the `citation()` function as follows: diff --git a/README.md b/README.md index 84db387..9a2f0dd 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,9 @@ -[![Project Status: WIP – Initial development is in progress, but there -has not yet been a stable, usable release suitable for the -public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip) +[![Project Status: Active – The project has reached a stable, usable +state and is being actively +developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![R-CMD-check](https://github.com/OxfordIHTM/codeditr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/OxfordIHTM/codeditr/actions/workflows/R-CMD-check.yaml) @@ -22,12 +22,12 @@ electronic tool](https://www.who.int/standards/classifications/classification-of-diseases/services/codedit-tool) is intended to help producers of cause-of-death statistics in strengthening their capacity to perform routine checks on their data. -This package ports the original tool built using Microsoft Access into R -so as to leverage the utility and function of the original tool into a -usable application program interface that can be used for building more -universal tools or for creating programmatic scientific workflows aimed -at routine, automated, and large-scale monitoring of cause-of-death -data. +This package ports the original tool built using Microsoft Access into +R. The aim is to leverage the utility and function of the original tool +into a usable application program interface (API) that can be used for +building more universal tools or for creating programmatic scientific +workflows aimed at routine, automated, and large-scale monitoring of +cause-of-death data. ## What does `codeditr` do? @@ -38,9 +38,45 @@ evaluate quality of cause-of-death data produced and used by country level statisticians in charge of mortality reporting. Through this package, end users will have the ability to -programmatically create scientific workflows for research on -cause-of-death data and their quality or build more open tools or -applications for routine monitoring of cause-of-death data. +programmatically create scientific workflows for routine monitoring and +evaluation and/or research on cause-of-death data and their quality. +They can also build more open tools or applications for routine +monitoring of cause-of-death data without having to rely on the +proprietary Microsoft Access software. + +Currently, the `codeditr` package supports the following use cases: + +1. Cause-of-death dataset preparation for use in the CoDEdit tool + +To be able to use WHO’s CoDEdit tool built on Microsoft Access, the user +can either enter their cause-of-death data into the tool itself using a +spreadsheet style input system with very specific input fields or +through uploading of a Microsoft `.xlsx` file that is structured in a +specific way required by the tool. + +The `codeditr` package has a set of functions that support in checking +that specific required variables are formatted to be compatible for the +CoDEdit tool and structures these variables into a dataset that is +consistent with what is required for uploading to the CodEdit tool. + +This use case are for those what would still prefer to use WHO’s CoDEdit +tool using Microsoft Access (either for continuity purposes or for +consistency with organisational policy) but would like to have the +functionality of converting their existing cause-of-death dataset into a +CoDEdit tool-compatible format and structure. + +2. Cause-of-death dataset checking to identify data quality issues + +The `codeditr` package has a set of functions that performs all the +checks that the CoDEdit tool performs in addition to other general data +quality checks. These functions allow for a similar output as the +CoDEdit tool + +This use case are for those that prefer not to use Microsoft Access +(either because they don’t already own a copy of this software or that +their purpose for data quality checks is for large-scale datasets) and +would like a completely programmatic approach to performing routine and +potentially large scale cause-of-death data quality checks. ## Installation @@ -63,6 +99,36 @@ library(codeditr) ## Usage +### Support to usage of CoDEdit tool + +1. Perform checks on existing input data for CoDEdit tool + +Using the `icd10_example` dataset which is a dataset already formatted +into a compatible structure required by the CoDEdit tool, we can perform +a check on this dataset to see possible issues in its formatting and +structure before using with the CoDEdit tool. + +``` r +cod_check_codedit_input(icd10_example) +#> # A tibble: 3,613 × 8 +#> sex_check sex_check_note age_check age_check_note code_check code_check_note +#> +#> 1 0 No issues with… 0 No issues wit… 0 Cause of death… +#> 2 0 No issues with… 0 No issues wit… 0 Cause of death… +#> 3 0 No issues with… 0 No issues wit… 0 Cause of death… +#> 4 0 No issues with… 0 No issues wit… 0 Cause of death… +#> 5 0 No issues with… 0 No issues wit… 0 Cause of death… +#> 6 0 No issues with… 0 No issues wit… 0 Cause of death… +#> 7 0 No issues with… 0 No issues wit… 0 Cause of death… +#> 8 0 No issues with… 0 No issues wit… 0 Cause of death… +#> 9 0 No issues with… 0 No issues wit… 0 Cause of death… +#> 10 0 No issues with… 0 No issues wit… 0 Cause of death… +#> # ℹ 3,603 more rows +#> # ℹ 2 more variables: dod_check , dod_check_note +``` + +### CoDEdit tool replacement workflow + ## Citation If you find the `codeditr` package useful please cite using the diff --git a/inst/WORDLIST b/inst/WORDLIST index 55242a2..7d4a355 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,5 +1,6 @@ CMD CoDEdit +CodEdit CodeFactor Codecov DD @@ -11,7 +12,8 @@ Lifecycle ORCID Udoh Umanah -WIP +WHO's +WHO’s YYYY codedit dod diff --git a/tests/testthat/test-cod_check_dod.R b/tests/testthat/test-cod_check_dod.R index 3c49acb..8d70608 100644 --- a/tests/testthat/test-cod_check_dod.R +++ b/tests/testthat/test-cod_check_dod.R @@ -8,7 +8,14 @@ testthat::test_that( cod_check_dod("2024"), tibble::tibble( dod_check = 0L, - dod_check_note = "No issues with date of death value" + dod_check_note = factor( + x = "No issues with date of death value", + levels = c( + "No issues with date of death value", + "Date of death value is not in year format", + "Missing date of death value" + ) + ) ) ) @@ -16,7 +23,29 @@ testthat::test_that( cod_check_dod("2024-06-01"), tibble::tibble( dod_check = 1L, - dod_check_note = "Date of death value is not in year format" + dod_check_note = factor( + x = "Date of death value is not in year format", + levels = c( + "No issues with date of death value", + "Date of death value is not in year format", + "Missing date of death value" + ) + ) + ) + ) + + expect_identical( + cod_check_dod(NA_character_), + tibble::tibble( + dod_check = 2L, + dod_check_note = factor( + x = "Missing date of death value", + levels = c( + "No issues with date of death value", + "Date of death value is not in year format", + "Missing date of death value" + ) + ) ) ) }) diff --git a/tests/testthat/test-cod_check_sex.R b/tests/testthat/test-cod_check_sex.R index 3246fd6..d4e2e8a 100644 --- a/tests/testthat/test-cod_check_sex.R +++ b/tests/testthat/test-cod_check_sex.R @@ -10,7 +10,15 @@ sex_value3 <- c("M", "M", "M", "F", "F", "M", NA_character_) sex_code3 <- c("M", "F") sex_check <- c(rep(0L, 6), 3L) -sex_check_note <- c(rep("No issues with sex value", 6), "Missing sex value") +sex_check_note <- factor( + x = c(rep("No issues with sex value", 6), "Missing sex value"), + levels = c( + "No issues with sex value", + "Sex value is not an integer", + "Sex value is not any of the expected values", + "Missing sex value" + ) +) testthat::test_that(