From c72f5a01acab727b40974e92b9f12d6520905fe4 Mon Sep 17 00:00:00 2001 From: edward-burn <9583964+edward-burn@users.noreply.github.com> Date: Mon, 18 Dec 2023 10:06:57 +0000 Subject: [PATCH] updates --- .gitignore | 1 + DESCRIPTION | 7 +- NAMESPACE | 2 + R/cohortTiming.R | 4 + R/generateMatchedCohortSet.R | 6 +- R/requireDateRange.R | 162 +++++++++++++ R/requireDemographics.R | 79 ------- README.Rmd | 78 +++++-- README.md | 221 ++++++++++++------ man/requireInDateRange.Rd | 27 +++ man/trimToDateRange.Rd | 26 +++ .../testthat/test-generateMatchedCohortSet.R | 31 ++- tests/testthat/test-requireDateRange.R | 60 +++++ tests/testthat/test-requireDemographics.R | 23 -- vignettes/.gitignore | 2 + vignettes/a01_building_concept_cohorts.Rmd | 30 +++ .../a02_applying_cohort_restrictions.Rmd | 19 ++ vignettes/a03_age_sex_matching.Rmd | 19 ++ vignettes/a04_cohort_timing.Rmd | 19 ++ 19 files changed, 616 insertions(+), 200 deletions(-) create mode 100644 R/cohortTiming.R create mode 100644 R/requireDateRange.R create mode 100644 man/requireInDateRange.Rd create mode 100644 man/trimToDateRange.Rd create mode 100644 tests/testthat/test-requireDateRange.R create mode 100644 vignettes/.gitignore create mode 100644 vignettes/a01_building_concept_cohorts.Rmd create mode 100644 vignettes/a02_applying_cohort_restrictions.Rmd create mode 100644 vignettes/a03_age_sex_matching.Rmd create mode 100644 vignettes/a04_cohort_timing.Rmd diff --git a/.gitignore b/.gitignore index 565f2b6a..f47ffaba 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ .Rdata .httr-oauth .DS_Store +inst/doc diff --git a/DESCRIPTION b/DESCRIPTION index c2a5fc72..3c9a4773 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,7 +28,12 @@ Suggests: DBI, DrugUtilisation, duckdb, + knitr, + rmarkdown, testthat (>= 3.0.0), - tibble + tibble, + stringr, + IncidencePrevalence Config/testthat/edition: 3 Config/testthat/parallel: true +VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 3b040513..c84a6a8c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,10 +8,12 @@ export(requireAge) export(requireCohortIntersectFlag) export(requireDemographics) export(requireFutureObservation) +export(requireInDateRange) export(requirePriorObservation) export(requireSex) export(restrictToFirstEntry) export(splitOverlap) +export(trimToDateRange) importFrom(magrittr,"%>%") importFrom(rlang,":=") importFrom(rlang,.data) diff --git a/R/cohortTiming.R b/R/cohortTiming.R new file mode 100644 index 00000000..df0db1e0 --- /dev/null +++ b/R/cohortTiming.R @@ -0,0 +1,4 @@ +cohortTiming <- function(){ + + # output as summarised result +} diff --git a/R/generateMatchedCohortSet.R b/R/generateMatchedCohortSet.R index 7576c2e7..4a6ecde9 100644 --- a/R/generateMatchedCohortSet.R +++ b/R/generateMatchedCohortSet.R @@ -141,8 +141,8 @@ randomPrefix <- function(n = 5) { getNumberOfCohorts <- function(cdm, targetCohortName){ # Read number of cohorts n <- cdm[[targetCohortName]] %>% - dplyr::summarise(v = max(.data$cohort_definition_id)) %>% - dplyr::pull(.data$v) # number of different cohorts + dplyr::summarise(v = max(.data$cohort_definition_id, na.rm = TRUE)) %>% + dplyr::pull("v") # number of different cohorts if(is.na(n)){# Empty table, number of cohorts is 0 n <- 0 @@ -343,7 +343,7 @@ infiniteMatching <- function(cdm, name, targetCohortId){ cdm[[name]] %>% dplyr::filter(.data$cohort_definition_id %in% .env$targetCohortId) %>% dplyr::group_by(.data$cohort_definition_id, .data$group_id) %>% - dplyr::mutate(max_cases = max(.data$pair_id)) %>% + dplyr::mutate(max_cases = max(.data$pair_id, na.rm = TRUE)) %>% dplyr::ungroup() %>% dplyr::select("group_id", "target_definition_id", "max_cases"), by = c("group_id", "target_definition_id") diff --git a/R/requireDateRange.R b/R/requireDateRange.R new file mode 100644 index 00000000..c4c2c2b2 --- /dev/null +++ b/R/requireDateRange.R @@ -0,0 +1,162 @@ +#' Require that an index date is within a date range +#' +#' @param cohort A cohort table in a cdm reference +#' @param indexDate Variable in cohort that contains the index date of interest +#' @param dateRange A window of time during which the index date must have +#' been observed +#' +#' @return The cohort table with any cohort entries outside of the date range +#' dropped +#' @export +#' +#' @examples +requireInDateRange <- function(cohort, + indexDate = "cohort_start_date", + dateRange = as.Date(c(NA, NA))) { + + checkCohort(cohort) + checkDateVariable(cohort = cohort, dateVar = indexDate) + checkDateRange(dateRange) + + cohort <- cohort %>% + dplyr::filter(.data[[indexDate]] >= !!dateRange[1] & + .data[[indexDate]] <= !!dateRange[2]) %>% + CDMConnector::recordCohortAttrition(reason = paste0( + indexDate, + " between ", dateRange[1], " and ", dateRange[2] + )) + + cohort + +} + +#' Trim cohort dates to be within a date range +#' +#' @param cohort A cohort table in a cdm reference +#' @param startDate Variable with earliest date +#' @param endDate Variable with latest date +#' @param dateRange A window of time during which the index date must have +#' been observed +#' +#' @return +#' @export +#' +#' @examples +trimToDateRange <- function(cohort, + startDate = "cohort_start_date", + endDate = "cohort_end_date", + dateRange = as.Date(c(NA, NA))) { + + checkCohort(cohort) + checkDateVariable(cohort = cohort, dateVar = startDate) + checkDateVariable(cohort = cohort, dateVar = endDate) + checkDateRange(dateRange) + + # + # # validate inputs + # if (!isTRUE(inherits(cdm, "cdm_reference"))) { + # cli::cli_abort("cohort must be part of a cdm reference") + # } + # if (!"GeneratedCohortSet" %in% class(cohort) || + # !all(c( + # "cohort_definition_id", "subject_id", + # "cohort_start_date", "cohort_end_date" + # ) %in% + # colnames(cohort))) { + # cli::cli_abort("cohort must be a GeneratedCohortSet") + # } + # + # if (!indexDate %in% colnames(cohort)) { + # cli::cli_abort(paste0(indexDate, " must be a date column in the cohort table")) + # } + # + # if (!endDateName %in% colnames(cohort)) { + # cli::cli_abort(paste0(endDateName, " must be a date column in the cohort table")) + # } + + cohort <- trimStartDate(cohort = cohort, + startDate = startDate, + endDate = endDate, + minDate = dateRange[1] + ) %>% + CDMConnector::recordCohortAttrition(reason = paste0( + startDate, + " >= ", dateRange[1])) + + cohort <- trimEndDate( + cohort = cohort, + startDate = startDate, + endDate = endDate, + maxDate = dateRange[2] + ) %>% + CDMConnector::recordCohortAttrition(reason = paste0( + endDate, + " <= ", dateRange[2] + )) +} + +trimStartDate <- function(cohort, + startDate, + endDate, + minDate) { + + if (!is.na(startDate)) { + cohort <- cohort %>% + dplyr::mutate(!!startDate := dplyr::if_else( + .data[[startDate]] <= !!minDate, + as.Date(minDate), .data[[startDate]] + )) %>% + dplyr::filter(.data[[startDate]] <= .data[[endDate]]) + } + return(cohort) +} +trimEndDate <- function( + cohort, + startDate, + endDate, + maxDate) { + + if (!is.na(endDate)) { + cohort <- cohort %>% + dplyr::mutate(!!endDate := dplyr::if_else( + .data[[endDate]] >= !!maxDate, + as.Date(maxDate), .data[[endDate]] + )) %>% + dplyr::filter(.data[[startDate]] <= .data[[endDate]]) + } + return(cohort) +} + + +checkCohort <- function(cohort){ + if (!"GeneratedCohortSet" %in% class(cohort) || + !all(c( + "cohort_definition_id", "subject_id", + "cohort_start_date", "cohort_end_date" + ) %in% + colnames(cohort))) { + cli::cli_abort("cohort must be a GeneratedCohortSet") + } +} + +checkDateVariable <- function(cohort, dateVar){ + if (!dateVar %in% colnames(cohort)) { + cli::cli_abort(paste0(dateVar, " must be a date column in the cohort table")) + } +} + +checkDateRange<-function(dateRange){ + if(!"Date" %in% class(dateRange)){ + cli::cli_abort("dateRange is not a date") + } + if(length(dateRange) != 2){ + cli::cli_abort("dateRange must be length two") + } + if(dateRange[1]>dateRange[2]){ + cli::cli_abort("First date in dateRange cannot be after second") + } + return(invisible(dateRange)) +} + + + diff --git a/R/requireDemographics.R b/R/requireDemographics.R index f9ddfae6..d98a2654 100644 --- a/R/requireDemographics.R +++ b/R/requireDemographics.R @@ -280,82 +280,3 @@ demographicsFilter <- function(cohort, } -requireDateRange <- function(cohort, - indexDateName = "cohort_start_date", - endDateName = "cohort_end_date", - cohortDateRange = as.Date(c(NA, NA))) { - cdm <- attr(cohort, "cdm_reference") - - # validate inputs - if (!isTRUE(inherits(cdm, "cdm_reference"))) { - cli::cli_abort("cohort must be part of a cdm reference") - } - if (!"GeneratedCohortSet" %in% class(cohort) || - !all(c( - "cohort_definition_id", "subject_id", - "cohort_start_date", "cohort_end_date" - ) %in% - colnames(cohort))) { - cli::cli_abort("cohort must be a GeneratedCohortSet") - } - - if (!indexDateName %in% colnames(cohort)) { - cli::cli_abort(paste0(indexDateName, " must be a date column in the cohort table")) - } - - if (!endDateName %in% colnames(cohort)) { - cli::cli_abort(paste0(endDateName, " must be a date column in the cohort table")) - } - - - cohort <- trimStartDate(cohort, - cdm, - indexDateName, - endDateName, - startDate = cohortDateRange[1] - ) - cohort <- cohort %>% - CDMConnector::recordCohortAttrition(reason = paste0( - indexDateName, - " >= ", cohortDateRange[1] - )) - - cohort <- trimEndDate( - cohort, - cdm, - indexDateName, - endDateName, - cohortDateRange[2] - ) - - cohort <- cohort %>% - CDMConnector::recordCohortAttrition(reason = paste0( - indexDateName, - " <= ", cohortDateRange[2] - )) -} - - -trimStartDate <- function(cohort, cdm, indexDateName, endDateName, startDate) { - if (!is.na(startDate)) { - cohort <- cohort %>% - dplyr::mutate(!!indexDateName := dplyr::if_else( - .data[[indexDateName]] <= !!startDate, - as.Date(startDate), .data[[indexDateName]] - )) %>% - dplyr::filter(.data[[indexDateName]] <= .data[[endDateName]]) - } - return(cohort) -} - -trimEndDate <- function(cohort, cdm, indexDateName, endDateName, endDate) { - if (!is.na(endDate)) { - cohort <- cohort %>% - dplyr::mutate(!!endDateName := dplyr::if_else( - .data[[endDateName]] >= !!endDate, - as.Date(endDate), .data[[endDateName]] - )) %>% - dplyr::filter(.data[[indexDateName]] <= .data[[endDateName]]) - } - return(cohort) -} diff --git a/README.Rmd b/README.Rmd index 4607590d..c7aad010 100644 --- a/README.Rmd +++ b/README.Rmd @@ -22,7 +22,7 @@ knitr::opts_chunk$set( [![Lifecycle:Experimental](https://img.shields.io/badge/Lifecycle-Experimental-339999)](https://lifecycle.r-lib.org/articles/stages.html#experimental) -The goal of CohortConstructor is to help on the creation and manipulation of cohorts in the OMOP Common Data Model. +The goal of CohortConstructor is to help on the creation and manipulation of cohorts in the OMOP Common Data Model. The package provides functions to support cohort building pipelines and additional functions to support cohort evaluation. ## Installation @@ -33,11 +33,13 @@ You can install the development version of CohortConstructor from [GitHub](https devtools::install_github("oxford-pharmacoepi/CohortConstructor") ``` -## Example usage +## Creating and manipulating cohorts ``` {r} library(CDMConnector) library(PatientProfiles) +library(DrugUtilisation) +library(dplyr) library(CohortConstructor) con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir()) @@ -46,22 +48,52 @@ cdm <- cdm_from_con(con, cdm_schema = "main", ``` ### Generating concept based cohorts +We'll start by generating a set of drug cohorts, using generateDrugUtilisationCohortSet from the DrugUtilisation R package. Here we make two cohorts, one for diclofenac and another for acetaminophen, combining records with a gap of 7 days or less. ``` {r} -cdm <- generate_concept_cohort_set(cdm = cdm, +cdm <- generateDrugUtilisationCohortSet(cdm = cdm, name = "medications", - concept_set = list("diclofenac" = 1124300, - "acetaminophen" = 1127433)) -cohort_count(cdm$medications) -cohort_attrition(cdm$medications) + conceptSet = list("diclofenac" = 1124300, + "acetaminophen" = 1127433), + gapEra = 7) +``` + +We can see that our starting cohorts, before we add any additional restrictions, have the following counts +``` {r} +cohort_set(cdm$medications) %>% glimpse() +cohort_count(cdm$medications) %>% glimpse() +``` + +### Require in date range +We can require that individuals' cohort start date fall within a certain date range. +``` {r} +cdm$medications <- cdm$medications %>% + requireInDateRange(indexDate = "cohort_start_date", + dateRange = as.Date(c("2000-01-01", "2020-01-01"))) +``` + +Now that we've applied these date restrictions, we can see how many people and records have been excluded +``` {r} +cohort_count(cdm$medications) %>% glimpse() +cohort_attrition(cdm$medications) %>% glimpse() +cohort_attrition(cdm$medications) %>% + filter(reason == "cohort_start_date between 2000-01-01 and 2020-01-01") %>% + glimpse() ``` ### Applying demographic requirements +We can also add restrictions on age (on cohort start date) and sex. ``` {r} -cdm$medications %>% - requireDemographics(ageRange = list(c(40, 65)), +cdm$medications <- cdm$medications %>% + requireDemographics(indexDate = "cohort_start_date", + ageRange = list(c(40, 65)), sex = "Female") -cohort_count(cdm$medications) -cohort_attrition(cdm$medications) +``` + +Again we can see how many individuals we've lost after applying this criteria. +``` {r} +cohort_attrition(cdm$medications) %>% + filter(reason == "Demographic requirements") %>% + glimpse() ``` ### Require presence in another cohort @@ -74,14 +106,27 @@ cdm <- generate_concept_cohort_set(cdm = cdm, cdm$medications <- cdm$medications %>% requireCohortIntersectFlag(targetCohortTable = "gibleed", window = c(-Inf, 0)) - -cohort_count(cdm$medications) -cohort_attrition(cdm$medications) ``` +``` {r} +cohort_attrition(cdm$medications) %>% + filter(reason == "In cohort gibleed between -Inf and 0 days relative to cohort_start_date") %>% + glimpse() +``` ### Combining cohorts +Currently we have two separate cohorts. One of users of diclofenac, the other of users of acetaminophen. + +Let's say we want to create a cohort of people taking **either** diclofenac or acetaminophen. We could create this cohort like so: +```{r} + +``` + +Alternatively, we might want to create a cohort of people taking **both** diclofenac and acetaminophen. For this we can create this combination cohort like so: + +Both diclofenac and acetaminophen + Generate a combination cohort. ```{r} @@ -91,8 +136,11 @@ cdm <- generateCombinationCohortSet(cdm = cdm, cohortSet(cdm$combinations) cohortCount(cdm$combinations) +``` - +```{r} cdmDisconnect(cdm) ``` + +## Evaluating cohorts diff --git a/README.md b/README.md index bf1313ba..bf14e598 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,9 @@ status](https://www.r-pkg.org/badges/version/CohortConstructor)](https://CRAN.R- The goal of CohortConstructor is to help on the creation and -manipulation of cohorts in the OMOP Common Data Model. +manipulation of cohorts in the OMOP Common Data Model. The package +provides functions to support cohort building pipelines and additional +functions to support cohort evaluation. ## Installation @@ -25,13 +27,23 @@ You can install the development version of CohortConstructor from devtools::install_github("oxford-pharmacoepi/CohortConstructor") ``` -## Example usage +## Creating and manipulating cohorts ``` r library(CDMConnector) -#> Warning: package 'CDMConnector' was built under R version 4.2.3 library(PatientProfiles) -#> Warning: package 'PatientProfiles' was built under R version 4.2.3 +library(DrugUtilisation) +#> Warning: package 'DrugUtilisation' was built under R version 4.2.3 +library(dplyr) +#> Warning: package 'dplyr' was built under R version 4.2.3 +#> +#> Attaching package: 'dplyr' +#> The following objects are masked from 'package:stats': +#> +#> filter, lag +#> The following objects are masked from 'package:base': +#> +#> intersect, setdiff, setequal, union library(CohortConstructor) con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir()) @@ -41,62 +53,117 @@ cdm <- cdm_from_con(con, cdm_schema = "main", ### Generating concept based cohorts +We’ll start by generating a set of drug cohorts, using +generateDrugUtilisationCohortSet from the DrugUtilisation R package. +Here we make two cohorts, one for diclofenac and another for +acetaminophen, combining records with a gap of 7 days or less. + ``` r -cdm <- generate_concept_cohort_set(cdm = cdm, +cdm <- generateDrugUtilisationCohortSet(cdm = cdm, name = "medications", - concept_set = list("diclofenac" = 1124300, - "acetaminophen" = 1127433)) -cohort_count(cdm$medications) -#> # A tibble: 2 × 3 -#> cohort_definition_id number_records number_subjects -#> -#> 1 1 830 830 -#> 2 2 2580 2580 -cohort_attrition(cdm$medications) -#> # A tibble: 2 × 7 -#> cohort_definition_id number_records number_subjects reason_id reason -#> -#> 1 1 830 830 1 Qualifying init… -#> 2 2 2580 2580 1 Qualifying init… -#> # ℹ 2 more variables: excluded_records , excluded_subjects + conceptSet = list("diclofenac" = 1124300, + "acetaminophen" = 1127433), + gapEra = 7) +``` + +We can see that our starting cohorts, before we add any additional +restrictions, have the following counts + +``` r +cohort_set(cdm$medications) %>% glimpse() +#> Rows: 2 +#> Columns: 11 +#> $ cohort_definition_id 1, 2 +#> $ cohort_name "diclofenac", "acetaminophen" +#> $ duration_range_min "1", "1" +#> $ duration_range_max "Inf", "Inf" +#> $ impute_duration "none", "none" +#> $ gap_era "7", "7" +#> $ prior_use_washout "0", "0" +#> $ prior_observation "0", "0" +#> $ cohort_date_range_start NA, NA +#> $ cohort_date_range_end NA, NA +#> $ limit "all", "all" +cohort_count(cdm$medications) %>% glimpse() +#> Rows: 2 +#> Columns: 3 +#> $ cohort_definition_id 2, 1 +#> $ number_records 9363, 830 +#> $ number_subjects 2580, 830 +``` + +### Require in date range + +We can require that individuals’ cohort start date fall within a certain +date range. + +``` r +cdm$medications <- cdm$medications %>% + requireInDateRange(indexDate = "cohort_start_date", + dateRange = as.Date(c("2000-01-01", "2020-01-01"))) +``` + +Now that we’ve applied these date restrictions, we can see how many +people and records have been excluded + +``` r +cohort_count(cdm$medications) %>% glimpse() +#> Rows: 2 +#> Columns: 3 +#> $ cohort_definition_id 2, 1 +#> $ number_records 2750, 397 +#> $ number_subjects 1737, 397 +cohort_attrition(cdm$medications) %>% glimpse() +#> Rows: 16 +#> Columns: 7 +#> $ cohort_definition_id 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1 +#> $ number_records 9365, 9365, 9363, 9363, 9363, 9363, 9363, 850, 85… +#> $ number_subjects 2580, 2580, 2580, 2580, 2580, 2580, 2580, 850, 85… +#> $ reason_id 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 8, 8 +#> $ reason "Qualifying initial records", "Duration imputatio… +#> $ excluded_records 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 6613, … +#> $ excluded_subjects 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 843, 4… +cohort_attrition(cdm$medications) %>% + filter(reason == "cohort_start_date between 2000-01-01 and 2020-01-01") %>% + glimpse() +#> Rows: 2 +#> Columns: 7 +#> $ cohort_definition_id 2, 1 +#> $ number_records 2750, 397 +#> $ number_subjects 1737, 397 +#> $ reason_id 8, 8 +#> $ reason "cohort_start_date between 2000-01-01 and 2020-01… +#> $ excluded_records 6613, 433 +#> $ excluded_subjects 843, 433 ``` ### Applying demographic requirements +We can also add restrictions on age (on cohort start date) and sex. + ``` r -cdm$medications %>% - requireDemographics(ageRange = list(c(40, 65)), +cdm$medications <- cdm$medications %>% + requireDemographics(indexDate = "cohort_start_date", + ageRange = list(c(40, 65)), sex = "Female") -#> # Source: SQL [?? x 4] -#> # Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/C:\Users\eburn\AppData\Local\Temp\RtmpIngDmK\file4f3841e26e9e.duckdb] -#> cohort_definition_id subject_id cohort_start_date cohort_end_date -#> -#> 1 1 18 2009-03-21 2018-11-07 -#> 2 1 893 1993-09-26 2019-05-06 -#> 3 1 2396 1961-08-30 2001-02-28 -#> 4 1 3159 2000-01-26 2018-10-18 -#> 5 1 3376 1994-05-06 2019-06-28 -#> 6 1 4071 1998-08-07 2018-12-27 -#> 7 1 4636 1986-10-26 2018-10-23 -#> 8 1 4690 2001-03-20 2018-10-14 -#> 9 1 4701 2011-07-10 2018-12-22 -#> 10 2 3951 1997-12-11 2019-04-13 -#> # ℹ more rows -cohort_count(cdm$medications) -#> # A tibble: 2 × 3 -#> cohort_definition_id number_records number_subjects -#> -#> 1 1 156 156 -#> 2 2 76 76 -cohort_attrition(cdm$medications) -#> # A tibble: 4 × 7 -#> cohort_definition_id number_records number_subjects reason_id reason -#> -#> 1 1 830 830 1 Qualifying init… -#> 2 2 2580 2580 1 Qualifying init… -#> 3 2 76 76 2 Demographic req… -#> 4 1 156 156 2 Demographic req… -#> # ℹ 2 more variables: excluded_records , excluded_subjects +``` + +Again we can see how many individuals we’ve lost after applying this +criteria. + +``` r +cohort_attrition(cdm$medications) %>% + filter(reason == "Demographic requirements") %>% + glimpse() +#> Rows: 2 +#> Columns: 7 +#> $ cohort_definition_id 2, 1 +#> $ number_records 787, 75 +#> $ number_subjects 551, 75 +#> $ reason_id 9, 9 +#> $ reason "Demographic requirements", "Demographic requirem… +#> $ excluded_records 1963, 322 +#> $ excluded_subjects 1186, 322 ``` ### Require presence in another cohort @@ -113,28 +180,37 @@ cdm <- generate_concept_cohort_set(cdm = cdm, cdm$medications <- cdm$medications %>% requireCohortIntersectFlag(targetCohortTable = "gibleed", window = c(-Inf, 0)) +``` -cohort_count(cdm$medications) -#> # A tibble: 2 × 3 -#> cohort_definition_id number_records number_subjects -#> -#> 1 2 36 36 -#> 2 1 0 0 -cohort_attrition(cdm$medications) -#> # A tibble: 6 × 7 -#> cohort_definition_id number_records number_subjects reason_id reason -#> -#> 1 1 830 830 1 Qualifying init… -#> 2 2 2580 2580 1 Qualifying init… -#> 3 2 76 76 2 Demographic req… -#> 4 1 156 156 2 Demographic req… -#> 5 2 36 36 3 In cohort gible… -#> 6 1 0 0 3 In cohort gible… -#> # ℹ 2 more variables: excluded_records , excluded_subjects +``` r +cohort_attrition(cdm$medications) %>% + filter(reason == "In cohort gibleed between -Inf and 0 days relative to cohort_start_date") %>% + glimpse() +#> Rows: 2 +#> Columns: 7 +#> $ cohort_definition_id 2, 1 +#> $ number_records 136, 0 +#> $ number_subjects 89, 0 +#> $ reason_id 10, 10 +#> $ reason "In cohort gibleed between -Inf and 0 days relati… +#> $ excluded_records 651, 75 +#> $ excluded_subjects 462, 75 ``` ### Combining cohorts +Currently we have two separate cohorts. One of users of diclofenac, the +other of users of acetaminophen. + +Let’s say we want to create a cohort of people taking **either** +diclofenac or acetaminophen. We could create this cohort like so: + +Alternatively, we might want to create a cohort of people taking +**both** diclofenac and acetaminophen. For this we can create this +combination cohort like so: + +Both diclofenac and acetaminophen + Generate a combination cohort. ``` r @@ -153,10 +229,13 @@ cohortCount(cdm$combinations) #> # A tibble: 3 × 3 #> cohort_definition_id number_records number_subjects #> -#> 1 2 36 36 +#> 1 2 136 89 #> 2 1 0 0 #> 3 3 0 0 +``` - +``` r cdmDisconnect(cdm) ``` + +## Evaluating cohorts diff --git a/man/requireInDateRange.Rd b/man/requireInDateRange.Rd new file mode 100644 index 00000000..d03705cf --- /dev/null +++ b/man/requireInDateRange.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/requireDateRange.R +\name{requireInDateRange} +\alias{requireInDateRange} +\title{Require that an index date is within a date range} +\usage{ +requireInDateRange( + cohort, + indexDate = "cohort_start_date", + dateRange = as.Date(c(NA, NA)) +) +} +\arguments{ +\item{cohort}{A cohort table in a cdm reference} + +\item{indexDate}{Variable in cohort that contains the index date of interest} + +\item{dateRange}{A window of time during which the index date must have +been observed} +} +\value{ +The cohort table with any cohort entries outside of the date range +dropped +} +\description{ +Require that an index date is within a date range +} diff --git a/man/trimToDateRange.Rd b/man/trimToDateRange.Rd new file mode 100644 index 00000000..59cb6f35 --- /dev/null +++ b/man/trimToDateRange.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/requireDateRange.R +\name{trimToDateRange} +\alias{trimToDateRange} +\title{Trim cohort dates to be within a date range} +\usage{ +trimToDateRange( + cohort, + startDate = "cohort_start_date", + endDate = "cohort_end_date", + dateRange = as.Date(c(NA, NA)) +) +} +\arguments{ +\item{cohort}{A cohort table in a cdm reference} + +\item{startDate}{Variable with earliest date} + +\item{endDate}{Variable with latest date} + +\item{dateRange}{A window of time during which the index date must have +been observed} +} +\description{ +Trim cohort dates to be within a date range +} diff --git a/tests/testthat/test-generateMatchedCohortSet.R b/tests/testthat/test-generateMatchedCohortSet.R index 87c20da4..704857b9 100644 --- a/tests/testthat/test-generateMatchedCohortSet.R +++ b/tests/testthat/test-generateMatchedCohortSet.R @@ -128,16 +128,31 @@ test_that("check that we obtain expected result when ratio is 1", { matchSex = TRUE, matchYearOfBirth = TRUE, ratio = 1) - expect_true( - length(CDMConnector::cohort_count(matched_cohorts[["new_cohort"]]) %>% - dplyr::select("number_records") %>% - dplyr::pull() %>% - unique()) == 3 - ) - # Everybody has a matched + expect_true(nrow(CDMConnector::cohort_count(matched_cohorts$new_cohort) %>% + dplyr::left_join(CDMConnector::cohort_set(matched_cohorts$new_cohort), + by = "cohort_definition_id") %>% + dplyr::filter(stringr::str_detect(cohort_name, "c1")) %>% + dplyr::select("number_records") %>% + dplyr::distinct()) == 1) + expect_true(nrow(CDMConnector::cohort_count(matched_cohorts$new_cohort) %>% + dplyr::left_join(CDMConnector::cohort_set(matched_cohorts$new_cohort), + by = "cohort_definition_id") %>% + dplyr::filter(stringr::str_detect(cohort_name, "c2")) %>% + dplyr::select("number_records") %>% + dplyr::distinct()) == 1) + expect_true(nrow(CDMConnector::cohort_count(matched_cohorts$new_cohort) %>% + dplyr::left_join(CDMConnector::cohort_set(matched_cohorts$new_cohort), + by = "cohort_definition_id") %>% + dplyr::filter(stringr::str_detect(cohort_name, "c3")) %>% + dplyr::select("number_records") %>% + dplyr::distinct()) == 1) + + + + # Everybody has a match n <- matched_cohorts[["new_cohort"]] %>% - dplyr::summarise(n = max(.data$cohort_definition_id)/2) %>% + dplyr::summarise(n = max(.data$cohort_definition_id, na.rm = TRUE)/2) %>% dplyr::pull() cohorts <- matched_cohorts[["new_cohort"]] %>% diff --git a/tests/testthat/test-requireDateRange.R b/tests/testthat/test-requireDateRange.R new file mode 100644 index 00000000..f9b2a95e --- /dev/null +++ b/tests/testthat/test-requireDateRange.R @@ -0,0 +1,60 @@ +test_that("requireDateRange", { + # one person, one observation periods + cohortTable <- dplyr::tibble( + cohort_definition_id = c(1, 1, 1), + subject_id = c("1", "2", "3"), + cohort_start_date = as.Date(c("2010-06-06", "2011-06-06", "2012-06-08")), + cohort_end_date = as.Date(c("2013-06-06", "2013-06-06", "2013-02-01")) + ) + + cdm <- PatientProfiles::mockPatientProfiles(cohort1 = cohortTable) + cdm$cohort1 <- cdm$cohort1 %>% + requireInDateRange(dateRange = as.Date(c("2010-01-01", "2011-01-01"))) + + expect_true(cdm$cohort1 %>% + dplyr::pull("subject_id") == 1L) + + # expect error + expect_error(requireInDateRange(cohort = "a")) + expect_error(cdm$cohort1 %>% + requireInDateRange(dateRange = as.Date(c("2010-01-01")))) + expect_error(cdm$cohort1 %>% + requireInDateRange(dateRange = as.Date(c("2010-01-01", "2010-01-01", + "2009-01-01")))) + expect_error(cdm$cohort1 %>% + requireInDateRange(dateRange = c("a", "b"))) + + CDMConnector::cdm_disconnect(cdm) +}) + +test_that("trim cohort dates", { + # one person, one observation periods + cohortTable <- dplyr::tibble( + cohort_definition_id = c(1, 1, 1), + subject_id = c("1", "2", "3"), + cohort_start_date = as.Date(c("2010-06-06", "2011-06-06", "2012-06-08")), + cohort_end_date = as.Date(c("2013-06-06", "2011-09-06", "2013-02-01")) + ) + + cdm <- PatientProfiles::mockPatientProfiles(cohort1 = cohortTable) + cdm$cohort1 <- cdm$cohort1 %>% + trimToDateRange(dateRange = as.Date(c("2011-01-01", "2012-01-01"))) + + expect_equal(sort(cdm$cohort1 %>% + dplyr::pull("subject_id")), c("1", "2")) + expect_true(cdm$cohort1 %>% + dplyr::filter(subject_id == "1") %>% + dplyr::pull("cohort_start_date") == as.Date("2011-01-01")) + expect_true(cdm$cohort1 %>% + dplyr::filter(subject_id == "1") %>% + dplyr::pull("cohort_end_date") == as.Date("2012-01-01")) + expect_true(cdm$cohort1 %>% + dplyr::filter(subject_id == "2") %>% + dplyr::pull("cohort_start_date") == as.Date("2011-06-06")) + expect_true(cdm$cohort1 %>% + dplyr::filter(subject_id == "2") %>% + dplyr::pull("cohort_end_date") == as.Date("2011-09-06")) + +CDMConnector::cdm_disconnect(cdm) + + } ) diff --git a/tests/testthat/test-requireDemographics.R b/tests/testthat/test-requireDemographics.R index b0c58665..71c0efc5 100644 --- a/tests/testthat/test-requireDemographics.R +++ b/tests/testthat/test-requireDemographics.R @@ -142,29 +142,6 @@ test_that("ignore existing cohort extra variables", { }) - -test_that("requireDateRange", { - # one person, one observation periods - - cohortTable <- dplyr::tibble( - cohort_definition_id = c(1, 1, 1), - subject_id = c("1", "2", "3"), - cohort_start_date = as.Date(c("2010-06-06", "2010-06-06", "2010-06-08")), - cohort_end_date = as.Date(c("2013-06-06", "2013-06-06", "2013-02-01")) - ) - - cdm <- PatientProfiles::mockPatientProfiles(cohort1 = cohortTable) - cdm$cohort1 <- cdm$cohort1 %>% - requireDateRange(cohortDateRange = as.Date(c("2010-06-06", "2013-02-01"))) - - expect_true(all(cdm$cohort1 %>% dplyr::pull(cohort_start_date) == - as.Date(c("2010-06-06", "2010-06-06", "2010-06-08")))) - - CDMConnector::cdm_disconnect(cdm) -}) - - - test_that("external columns kept after requireDemographics", { cdm <- PatientProfiles::mockPatientProfiles( patient_size = 100, diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 00000000..097b2416 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/a01_building_concept_cohorts.Rmd b/vignettes/a01_building_concept_cohorts.Rmd new file mode 100644 index 00000000..7d5bcdc5 --- /dev/null +++ b/vignettes/a01_building_concept_cohorts.Rmd @@ -0,0 +1,30 @@ +--- +title: "a01_building_concept_cohorts" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{a01_building_concept_cohorts} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(CDMConnector) +library(IncidencePrevalence) +library(DrugUtilisation) +library(CohortConstructor) +``` + +## Condition concept based cohort creation + +## Drug concept based cohort creation + +## Demographic based cohort creation + +## Combining cohorts diff --git a/vignettes/a02_applying_cohort_restrictions.Rmd b/vignettes/a02_applying_cohort_restrictions.Rmd new file mode 100644 index 00000000..2bd24072 --- /dev/null +++ b/vignettes/a02_applying_cohort_restrictions.Rmd @@ -0,0 +1,19 @@ +--- +title: "a02_applying_cohort_restrictions" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{a02_applying_cohort_restrictions} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(CohortConstructor) +``` diff --git a/vignettes/a03_age_sex_matching.Rmd b/vignettes/a03_age_sex_matching.Rmd new file mode 100644 index 00000000..6fc177f8 --- /dev/null +++ b/vignettes/a03_age_sex_matching.Rmd @@ -0,0 +1,19 @@ +--- +title: "a03_age_sex_matching" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{a03_age_sex_matching} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(CohortConstructor) +``` diff --git a/vignettes/a04_cohort_timing.Rmd b/vignettes/a04_cohort_timing.Rmd new file mode 100644 index 00000000..ae8f6f5b --- /dev/null +++ b/vignettes/a04_cohort_timing.Rmd @@ -0,0 +1,19 @@ +--- +title: "a04_cohort_timing" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{a04_cohort_timing} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(CohortConstructor) +```