From 0dba7fafc0588e29a6ac88af557871acc09e3ecc Mon Sep 17 00:00:00 2001 From: nmercadeb Date: Mon, 8 Apr 2024 13:03:34 +0200 Subject: [PATCH 1/4] omock tests --- R/generateIntersectCohorts.R | 2 +- R/requireCohortIntersectFlag.R | 20 ++- R/requireDateRange.R | 20 ++- .../testthat/test-generateIntersectCohorts.R | 153 +++++++++--------- .../test-requireCohortIntersectFlag.R | 141 ++++++++-------- tests/testthat/test-requireDateRange.R | 8 + 6 files changed, 184 insertions(+), 160 deletions(-) diff --git a/R/generateIntersectCohorts.R b/R/generateIntersectCohorts.R index 2293fa6d..1e813cdc 100644 --- a/R/generateIntersectCohorts.R +++ b/R/generateIntersectCohorts.R @@ -140,7 +140,7 @@ generateIntersectCohortSet <- function(cdm, dplyr::group_by(.data$cohort_definition_id, .data$cohort_name) %>% dplyr::mutate(dplyr::across( dplyr::everything(), - ~ dplyr::if_else(dplyr::n_distinct(.x) == 1, 1, as.numeric(NA)) + ~ dplyr::if_else(dplyr::n_distinct(.x) == 1, 1, 0) )) %>% dplyr::ungroup() %>% dplyr::distinct() diff --git a/R/requireCohortIntersectFlag.R b/R/requireCohortIntersectFlag.R index 93ccb108..4dd28e3e 100644 --- a/R/requireCohortIntersectFlag.R +++ b/R/requireCohortIntersectFlag.R @@ -1,19 +1,17 @@ - - #' Require cohort subjects are present in another cohort #' -#' @param x Cohort table -#' @param targetCohortTable name of the cohort that we want to check for overlap -#' @param targetCohortId vector of cohort definition ids to include +#' @param x Cohort table. +#' @param targetCohortTable Name of the cohort that we want to check for overlap. +#' @param targetCohortId Vector of cohort definition ids to include. #' @param indexDate Variable in x that contains the date to compute the #' intersection. -#' @param targetStartDate date of reference in cohort table, either for start -#' (in overlap) or on its own (for incidence) -#' @param targetEndDate date of reference in cohort table, either for end -#' (overlap) or NULL (if incidence) -#' @param window window to consider events over +#' @param targetStartDate Date of reference in cohort table, either for start +#' (in overlap) or on its own (for incidence). +#' @param targetEndDate Date of reference in cohort table, either for end +#' (overlap) or NULL (if incidence). +#' @param window Window to consider events over. #' @param negate If set as TRUE, criteria will be applied as exclusion -#' rather than inclusion (i.e. require absence in another cohort) +#' rather than inclusion (i.e. require absence in another cohort). #' #' @return Cohort table with only those in the other cohort kept #' @export diff --git a/R/requireDateRange.R b/R/requireDateRange.R index 3452e3a0..bde669ae 100644 --- a/R/requireDateRange.R +++ b/R/requireDateRange.R @@ -1,9 +1,12 @@ #' Require that an index date is within a date range #' -#' @param cohort A cohort table in a cdm reference -#' @param indexDate Variable in cohort that contains the index date of interest +#' @param cohort A cohort table in a cdm reference. +#' @param cohortId Vector of cohort definition ids to include. If NULL, all +#' cohort definition ids will be used. #' @param dateRange A window of time during which the index date must have -#' been observed +#' been observed. +#' @param indexDate Variable in cohort that contains the index date of interest +#' @param name Name of the new cohort with the restriction. #' #' @return The cohort table with any cohort entries outside of the date range #' dropped @@ -17,16 +20,19 @@ #' requireInDateRange(indexDate = "cohort_start_date", #' dateRange = as.Date(c("2010-01-01", "2019-01-01"))) requireInDateRange <- function(cohort, - indexDate = "cohort_start_date", - dateRange = as.Date(c(NA, NA))) { + cohortId = NULL, + dateRange = as.Date(c(NA, NA)), + indexDate = "cohort_start_date", + name = omopgenerics::tableName(cohort)) { checkCohort(cohort) checkDateVariable(cohort = cohort, dateVar = indexDate) checkDateRange(dateRange) - cohort <- cohort %>% + cohort <- cohort |> dplyr::filter(.data[[indexDate]] >= !!dateRange[1] & - .data[[indexDate]] <= !!dateRange[2]) %>% + .data[[indexDate]] <= !!dateRange[2]) |> + dplyr::compute(name = name, temporary = FALSE) |> CDMConnector::recordCohortAttrition(reason = paste0( indexDate, " between ", dateRange[1], " & ", dateRange[2] diff --git a/tests/testthat/test-generateIntersectCohorts.R b/tests/testthat/test-generateIntersectCohorts.R index 7788c38f..5cfebd1a 100644 --- a/tests/testthat/test-generateIntersectCohorts.R +++ b/tests/testthat/test-generateIntersectCohorts.R @@ -124,38 +124,13 @@ test_that("splitOverlap", { }) test_that("generateIntersectCohortSet", { - cohort <- dplyr::tibble( - cohort_definition_id = c(1, 2, 3, 1, 2, 3, 1, 2), - subject_id = c(1, 1, 1, 2, 3, 3, 4, 4), - cohort_start_date = as.Date(c( - "2020-03-01", "2020-04-01", "2020-01-01", "2020-02-01", "2020-03-01", - "2020-04-01", "2020-02-01", "2020-06-01" - )), - cohort_end_date = as.Date(c( - "2020-05-01", "2020-06-01", "2020-05-01", "2020-05-01", "2020-05-01", - "2020-07-01", "2020-02-04", "2020-06-08" - )) - ) - person <- dplyr::tibble( - person_id = c(1, 2, 3, 4), - gender_concept_id = c(8507, 8532, 8507, 8532), - year_of_birth = 2000, - month_of_birth = 1, - day_of_birth = 1, - race_concept_id = NA_character_, - ethnicity_concept_id = NA_character_ - - ) - observation_period <- dplyr::tibble( - observation_period_id = 1:4, - person_id = 1:4, - observation_period_start_date = as.Date("2010-01-01"), - observation_period_end_date = as.Date("2020-12-31"), - period_type_concept_id = 32880 - ) - cdm <- PatientProfiles::mockPatientProfiles( - observation_period = observation_period, person = person, cohort1 = cohort - ) + cdm_local <- omock::mockCdmReference() |> + omock::mockPerson(n = 4) |> + omock::mockObservationPeriod() |> + omock::mockCohort(tableName = c("cohort1"), numberCohorts = 2) + cdm <- CDMConnector::copy_cdm_to(con = DBI::dbConnect(duckdb::duckdb(), ":memory:"), + cdm = cdm_local, + schema = "main") # mutually exclusive expect_no_error(cdm <- generateIntersectCohortSet( @@ -167,7 +142,15 @@ test_that("generateIntersectCohortSet", { expect_true(all( CDMConnector::cohortCount(cdm$cohort2) %>% dplyr::arrange(.data$cohort_definition_id) %>% - dplyr::pull("number_records") == c(2, 3, 0, 2, 1, 1, 1) + dplyr::pull("number_records") == c(1, 4, 5) + )) + expect_true(nrow(omopgenerics::settings(cdm$cohort2)) == 3) + expect_true(all( + cdm$cohort2 %>% + dplyr::arrange(.data$cohort_start_date) %>% + dplyr::pull("cohort_start_date") == + c("2000-06-23", "2001-03-30", "2001-07-16", "2001-12-04", "2003-06-15", + "2005-11-24", "2015-03-05", "2015-03-25", "2015-04-15", "1997-10-22") )) # not mutually exclusive @@ -176,11 +159,18 @@ test_that("generateIntersectCohortSet", { mutuallyExclusive = FALSE )) expect_true(all(CDMConnector::settings(cdm$cohort3)$mutually_exclusive == FALSE)) - expect_true(cdm$cohort3 %>% dplyr::tally() %>% dplyr::pull() == 13) + expect_true(cdm$cohort3 %>% dplyr::tally() %>% dplyr::pull() == 7) expect_true(all( CDMConnector::cohortCount(cdm$cohort3) %>% dplyr::arrange(.data$cohort_definition_id) %>% - dplyr::pull("number_records") == c(3, 3, 1, 2, 1, 2, 1) + dplyr::pull("number_records") == c(3, 2, 2) + )) + expect_true(nrow(omopgenerics::settings(cdm$cohort3)) == 3) + expect_true(all( + cdm$cohort3 %>% + dplyr::arrange(.data$cohort_start_date) %>% + dplyr::pull("cohort_start_date") == + c("2001-03-30", "2015-03-25", "2015-03-05", "2015-03-25", "1997-10-22", "2001-03-30", "2000-06-23") )) # not enough cohorts provided @@ -197,66 +187,77 @@ test_that("generateIntersectCohortSet", { }) test_that("only return comb", { - cohort <- dplyr::tibble( - cohort_definition_id = c(1, 2, 3), - subject_id = c(1, 1, 1), - cohort_start_date = as.Date(c( - "2020-03-01", "2020-01-01", "2020-03-01" - )), - cohort_end_date = as.Date(c( - "2020-05-01", "2020-05-02", "2020-04-01" - )) - ) - person <- dplyr::tibble( - person_id = c(1, 2, 3, 4), - gender_concept_id = c(8507, 8532, 8507, 8532), - year_of_birth = 2000, - month_of_birth = 1, - day_of_birth = 1, - race_concept_id = NA_character_, - ethnicity_concept_id = NA_character_ - ) - observation_period <- dplyr::tibble( - observation_period_id = 1:4, - person_id = 1:4, - observation_period_start_date = as.Date("2010-01-01"), - observation_period_end_date = as.Date("2022-12-31"), - period_type_concept_id = 32880 - ) - cdm <- PatientProfiles::mockPatientProfiles( - observation_period = observation_period, person = person, cohort1 = cohort - ) + # combination null + cdm_local <- omock::mockCdmReference() |> + omock::mockPerson(n = 4) |> + omock::mockObservationPeriod() |> + omock::mockCohort(tableName = c("cohort1"), numberCohorts = 2, seed = 2) + cdm <- CDMConnector::copy_cdm_to(con = DBI::dbConnect(duckdb::duckdb(), ":memory:"), + cdm = cdm_local, + schema = "main") cdm <- generateIntersectCohortSet( cdm = cdm, name = "cohort2", targetCohortName = "cohort1", mutuallyExclusive = FALSE, returnOnlyComb = TRUE ) + expect_true(is.null(cdm$cohort2)) + # not null combination + cdm_local <- omock::mockCdmReference() |> + omock::mockPerson(n = 4) |> + omock::mockObservationPeriod() |> + omock::mockCohort(tableName = c("cohort1"), numberCohorts = 3) + cdm <- CDMConnector::copy_cdm_to(con = DBI::dbConnect(duckdb::duckdb(), ":memory:"), + cdm = cdm_local, + schema = "main") + cdm <- generateIntersectCohortSet( + cdm = cdm, name = "cohort3", targetCohortName = "cohort1", + mutuallyExclusive = FALSE, returnOnlyComb = TRUE + ) expect_equal( - cdm$cohort2 |> + cdm$cohort3 |> dplyr::collect() %>% - dplyr::arrange(cohort_start_date) %>% - dplyr::pull(cohort_start_date), - as.Date(c("2020-03-01", "2020-03-01", "2020-03-01", "2020-03-01")) + dplyr::arrange(.data$cohort_start_date) %>% + dplyr::pull(.data$cohort_start_date), + as.Date(c("1997-10-22", "2001-03-30", "2015-03-05", "2015-03-25", "2015-03-25", "2015-03-25")) ) - expect_equal( - cdm$cohort2 |> + cdm$cohort3 |> dplyr::collect() %>% dplyr::arrange(cohort_end_date) %>% dplyr::pull(cohort_end_date), - as.Date(c("2020-04-01", "2020-04-01", "2020-04-01", "2020-05-01")) + as.Date(c("1999-05-28", "2005-11-23", "2015-04-14", "2015-04-14", "2015-04-14", "2015-07-06")) ) + expect_true(nrow(omopgenerics::settings(cdm$cohort3)) == 4) + expect_true(all(omopgenerics::settings(cdm$cohort3)$cohort_1 == c(1, 1, 1, 0))) + expect_true(all(omopgenerics::settings(cdm$cohort3)$cohort_2 == c(1, 1, 0, 1))) + expect_true(all(omopgenerics::settings(cdm$cohort3)$cohort_3 == c(0, 1, 1, 1))) + expect_false(any(omopgenerics::settings(cdm$cohort3)$mutually_exclusive)) cdm <- generateIntersectCohortSet( - cdm = cdm, name = "cohort3", targetCohortName = "cohort1", + cdm = cdm, name = "cohort4", targetCohortName = "cohort1", mutuallyExclusive = TRUE, returnOnlyComb = TRUE ) expect_equal( - cdm$cohort3 %>% - cohortCount() %>% - dplyr::pull(number_records), - c(1, 1, 0, 0) + cdm$cohort4 |> + dplyr::collect() %>% + dplyr::arrange(.data$cohort_start_date) %>% + dplyr::pull(.data$cohort_start_date), + as.Date(c("1997-10-22", "2001-03-30", "2001-07-16", "2001-12-04", "2003-06-15", + "2015-03-05", "2015-03-25", "2015-04-15")) + ) + expect_equal( + cdm$cohort4 |> + dplyr::collect() %>% + dplyr::arrange(cohort_end_date) %>% + dplyr::pull(cohort_end_date), + as.Date(c("1999-05-28", "2001-07-15", "2001-12-03", "2003-06-14", "2005-11-23", + "2015-03-24", "2015-04-14", "2015-07-06")) ) + expect_true(nrow(omopgenerics::settings(cdm$cohort4)) == 4) + expect_true(all(omopgenerics::settings(cdm$cohort4)$cohort_1 == c(1, 1, 1, 0))) + expect_true(all(omopgenerics::settings(cdm$cohort4)$cohort_2 == c(1, 1, 0, 1))) + expect_true(all(omopgenerics::settings(cdm$cohort4)$cohort_3 == c(0, 1, 1, 1))) + expect_true(all(omopgenerics::settings(cdm$cohort4)$mutually_exclusive)) }) diff --git a/tests/testthat/test-requireCohortIntersectFlag.R b/tests/testthat/test-requireCohortIntersectFlag.R index 44d14025..7cffccb9 100644 --- a/tests/testthat/test-requireCohortIntersectFlag.R +++ b/tests/testthat/test-requireCohortIntersectFlag.R @@ -1,6 +1,12 @@ test_that("requiring presence in another cohort", { - cdm <- PatientProfiles::mockPatientProfiles(patient_size = 100, - drug_exposure_size = 100) + cdm_local <- omock::mockCdmReference() |> + omock::mockPerson(n = 4) |> + omock::mockObservationPeriod() |> + omock::mockCohort(tableName = c("cohort1"), numberCohorts = 2) |> + omock::mockCohort(tableName = c("cohort2"), numberCohorts = 2, seed = 2) + cdm <- CDMConnector::copy_cdm_to(con = DBI::dbConnect(duckdb::duckdb(), ":memory:"), + cdm = cdm_local, + schema = "main") cdm$cohort3 <- requireCohortIntersectFlag(x = cdm$cohort1, targetCohortTable = "cohort2", @@ -8,79 +14,84 @@ test_that("requiring presence in another cohort", { window = c(-Inf, Inf)) |> dplyr::compute(name = "cohort3", temporary = FALSE) - expect_true(all(cdm$cohort3 %>% - dplyr::distinct(subject_id) %>% - dplyr::pull() %in% - intersect(cdm$cohort1 %>% - dplyr::distinct(subject_id) %>% - dplyr::pull(), - cdm$cohort2 %>% - dplyr::filter(cohort_definition_id == 1) %>% - dplyr::distinct(subject_id) %>% - dplyr::pull()))) + expect_true(all(cdm$cohort3 %>% + dplyr::distinct(subject_id) %>% + dplyr::pull() %in% + intersect(cdm$cohort1 %>% + dplyr::distinct(subject_id) %>% + dplyr::pull(), + cdm$cohort2 %>% + dplyr::filter(cohort_definition_id == 1) %>% + dplyr::distinct(subject_id) %>% + dplyr::pull()))) - cdm$cohort4 <- requireCohortIntersectFlag(x = cdm$cohort1, - targetCohortTable = "cohort2", - targetCohortId = 2, - window = c(-Inf, Inf)) |> - dplyr::compute(name = "cohort4", temporary = FALSE) - expect_true(all(cdm$cohort4 %>% - dplyr::distinct(subject_id) %>% - dplyr::pull() %in% - intersect(cdm$cohort1 %>% - dplyr::distinct(subject_id) %>% - dplyr::pull(), - cdm$cohort2 %>% - dplyr::filter(cohort_definition_id == 2) %>% - dplyr::distinct(subject_id) %>% - dplyr::pull()))) + cdm$cohort4 <- requireCohortIntersectFlag(x = cdm$cohort1, + targetCohortTable = "cohort2", + targetCohortId = 2, + window = c(-Inf, Inf)) |> + dplyr::compute(name = "cohort4", temporary = FALSE) + expect_true(all(cdm$cohort4 %>% + dplyr::distinct(subject_id) %>% + dplyr::pull() %in% + intersect(cdm$cohort1 %>% + dplyr::distinct(subject_id) %>% + dplyr::pull(), + cdm$cohort2 %>% + dplyr::filter(cohort_definition_id == 2) %>% + dplyr::distinct(subject_id) %>% + dplyr::pull()))) - # expected errors - # only support one target id at the moment - expect_error(requireCohortIntersectFlag(x = cdm$cohort1, - targetCohortTable = "cohort2", - targetCohortId = c(1,2), - window = c(-Inf, Inf))) + # expected errors + # only support one target id at the moment + expect_error(requireCohortIntersectFlag(x = cdm$cohort1, + targetCohortTable = "cohort2", + targetCohortId = c(1,2), + window = c(-Inf, Inf))) - expect_error(requireCohortIntersectFlag(x = cdm$cohort1, - targetCohortTable = "cohort22", # does not exist - targetCohortId = 1, - window = c(-Inf, Inf))) - expect_error(requireCohortIntersectFlag(x = cdm$cohort1, - targetCohortTable = "cohort2", - targetCohortId = 10, # does not exist - window = c(-Inf, Inf))) + expect_error(requireCohortIntersectFlag(x = cdm$cohort1, + targetCohortTable = "cohort22", # does not exist + targetCohortId = 1, + window = c(-Inf, Inf))) + expect_error(requireCohortIntersectFlag(x = cdm$cohort1, + targetCohortTable = "cohort2", + targetCohortId = 10, # does not exist + window = c(-Inf, Inf))) - CDMConnector::cdm_disconnect(cdm) + CDMConnector::cdm_disconnect(cdm) - }) +}) test_that("requiring absence in another cohort", { -cdm <- PatientProfiles::mockPatientProfiles(patient_size = 100, - drug_exposure_size = 100) - -cdm$cohort3_inclusion <- requireCohortIntersectFlag(x = cdm$cohort1, - targetCohortTable = "cohort2", - targetCohortId = 1, - window = c(-Inf, Inf)) |> - dplyr::compute(name = "cohort3_inclusion", temporary = FALSE) -cdm$cohort3_exclusion <- requireCohortIntersectFlag(x = cdm$cohort1, - targetCohortTable = "cohort2", - targetCohortId = 1, - window = c(-Inf, Inf), - negate = TRUE) |> - dplyr::compute(name = "cohort3_exclusion", temporary = FALSE) + cdm_local <- omock::mockCdmReference() |> + omock::mockPerson(n = 4) |> + omock::mockObservationPeriod() |> + omock::mockCohort(tableName = c("cohort1"), numberCohorts = 2) |> + omock::mockCohort(tableName = c("cohort2"), numberCohorts = 2, seed = 2) + cdm <- CDMConnector::copy_cdm_to(con = DBI::dbConnect(duckdb::duckdb(), ":memory:"), + cdm = cdm_local, + schema = "main") -in_both <- intersect(cdm$cohort3_inclusion %>% - dplyr::pull("subject_id") %>% - unique(), - cdm$cohort3_exclusion %>% - dplyr::pull("subject_id") %>% - unique()) -expect_true(length(in_both) == 0) + cdm$cohort3_inclusion <- requireCohortIntersectFlag(x = cdm$cohort1, + targetCohortTable = "cohort2", + targetCohortId = 1, + window = c(-Inf, Inf)) |> + dplyr::compute(name = "cohort3_inclusion", temporary = FALSE) + cdm$cohort3_exclusion <- requireCohortIntersectFlag(x = cdm$cohort1, + targetCohortTable = "cohort2", + targetCohortId = 1, + window = c(-Inf, Inf), + negate = TRUE) |> + dplyr::compute(name = "cohort3_exclusion", temporary = FALSE) -CDMConnector::cdm_disconnect(cdm) + in_both <- intersect(cdm$cohort3_inclusion %>% + dplyr::pull("subject_id") %>% + unique(), + cdm$cohort3_exclusion %>% + dplyr::pull("subject_id") %>% + unique()) + expect_true(length(in_both) == 0) + CDMConnector::cdm_disconnect(cdm) }) diff --git a/tests/testthat/test-requireDateRange.R b/tests/testthat/test-requireDateRange.R index 5d3934d1..7767d080 100644 --- a/tests/testthat/test-requireDateRange.R +++ b/tests/testthat/test-requireDateRange.R @@ -1,4 +1,12 @@ test_that("requireDateRange", { + cdm_local <- omock::mockCdmReference() |> + omock::mockPerson(n = 4) |> + omock::mockObservationPeriod() |> + omock::mockCohort(tableName = c("cohort1"), numberCohorts = 2) |> + omock::mockCohort(tableName = c("cohort2"), numberCohorts = 2, seed = 2) + cdm <- CDMConnector::copy_cdm_to(con = DBI::dbConnect(duckdb::duckdb(), ":memory:"), + cdm = cdm_local, + schema = "main") # one person, one observation periods cohortTable <- dplyr::tibble( cohort_definition_id = c(1, 1, 1), From 83a5465514aeea8274bd070212f06a4d1c04651b Mon Sep 17 00:00:00 2001 From: nmercadeb Date: Tue, 9 Apr 2024 00:04:53 +0200 Subject: [PATCH 2/4] id + name + omock test --- R/generateIntersectCohorts.R | 34 +-- R/requireDateRange.R | 210 +++++++++--------- R/validateFunctions.R | 15 +- man/generateIntersectCohortSet.Rd | 4 + man/requireCohortIntersectFlag.Rd | 18 +- man/requireInDateRange.Rd | 15 +- man/trimToDateRange.Rd | 19 +- .../testthat/test-generateIntersectCohorts.R | 32 +-- tests/testthat/test-requireDateRange.R | 122 +++++----- 9 files changed, 260 insertions(+), 209 deletions(-) diff --git a/R/generateIntersectCohorts.R b/R/generateIntersectCohorts.R index 1e813cdc..d4521647 100644 --- a/R/generateIntersectCohorts.R +++ b/R/generateIntersectCohorts.R @@ -7,6 +7,8 @@ #' combinations. #' @param targetCohortId Ids to combine of the target cohort. If NULL all #' cohort present in the table will be used. +#' @param gap Number of days between two subsequent cohort entries to be merged +#' in a single cohort record. #' @param mutuallyExclusive Whether the generated cohorts are mutually #' exclusive or not. #' @param returnOnlyComb Whether to only get the combination cohort back @@ -36,6 +38,7 @@ generateIntersectCohortSet <- function(cdm, name, targetCohortName, targetCohortId = NULL, + gap = 0, mutuallyExclusive = FALSE, returnOnlyComb = FALSE) { # initial checks @@ -105,18 +108,18 @@ generateIntersectCohortSet <- function(cdm, } if (returnOnlyComb) { - toEliminate <- cohSet %>% - dplyr::rowwise() %>% - dplyr::mutate( - sum = sum(dplyr::c_across(-dplyr::all_of(c("cohort_definition_id", "cohort_name"))), - na.rm = TRUE) - ) %>% - dplyr::filter(.data$sum == 1) %>% - dplyr::pull("cohort_definition_id") - cohSet <- cohSet |> - dplyr::filter(!.data$cohort_definition_id %in% .env$toEliminate) %>% - dplyr::group_by(.data$cohort_name) %>% - dplyr::mutate(cohort_definition_id = dplyr::cur_group_id()) + toEliminate <- cohSet %>% + dplyr::rowwise() %>% + dplyr::mutate( + sum = sum(dplyr::c_across(-dplyr::all_of(c("cohort_definition_id", "cohort_name"))), + na.rm = TRUE) + ) %>% + dplyr::filter(.data$sum == 1) %>% + dplyr::pull("cohort_definition_id") + cohSet <- cohSet |> + dplyr::filter(!.data$cohort_definition_id %in% .env$toEliminate) %>% + dplyr::group_by(.data$cohort_name) %>% + dplyr::mutate(cohort_definition_id = dplyr::cur_group_id()) } # add cohort definition id @@ -134,8 +137,6 @@ generateIntersectCohortSet <- function(cdm, if (!mutuallyExclusive) { - cohort <- joinOverlap(x = cohort, gap = 1) %>% - dplyr::compute(name = name, temporary = FALSE) cohSet <- cohSet %>% dplyr::group_by(.data$cohort_definition_id, .data$cohort_name) %>% dplyr::mutate(dplyr::across( @@ -146,6 +147,11 @@ generateIntersectCohortSet <- function(cdm, dplyr::distinct() } + if (cohort |> dplyr::tally() |> dplyr::pull("n") > 0) { + cohort <- joinOverlap(x = cohort, gap = gap) %>% + dplyr::compute(name = name, temporary = FALSE) + } + # TODO # create attrition diff --git a/R/requireDateRange.R b/R/requireDateRange.R index bde669ae..0e41dac4 100644 --- a/R/requireDateRange.R +++ b/R/requireDateRange.R @@ -25,30 +25,52 @@ requireInDateRange <- function(cohort, indexDate = "cohort_start_date", name = omopgenerics::tableName(cohort)) { - checkCohort(cohort) - checkDateVariable(cohort = cohort, dateVar = indexDate) - checkDateRange(dateRange) - - cohort <- cohort |> - dplyr::filter(.data[[indexDate]] >= !!dateRange[1] & - .data[[indexDate]] <= !!dateRange[2]) |> - dplyr::compute(name = name, temporary = FALSE) |> - CDMConnector::recordCohortAttrition(reason = paste0( - indexDate, - " between ", dateRange[1], " & ", dateRange[2] - )) + # checks + assertCharacter(name) + validateCohortTable(cohort) + cdm <- omopgenerics::cdmReference(cohort) + validateCDM(cdm) + validateIndexDate(indexDate, cohort) + ids <- omopgenerics::settings(cohort)$cohort_definition_id + cohortId <- validateCohortId(cohortId, ids) + validateDateRange(dateRange) + + noRequirementsIds <- ids[!ids %in% cohortId] + + if (all(ids %in% cohortId)) { + cohort <- cohort |> + dplyr::filter(.data[[indexDate]] >= !!dateRange[1] & + .data[[indexDate]] <= !!dateRange[2]) |> + dplyr::compute(name = name, temporary = FALSE) |> + CDMConnector::recordCohortAttrition( + reason = paste0(indexDate, " between ", dateRange[1], " & ", dateRange[2] + )) + } else { + cohort <- cohort |> + dplyr::filter((.data[[indexDate]] >= !!dateRange[1] & + .data[[indexDate]] <= !!dateRange[2]) | + .data$cohort_definition_id %in% noRequirementsIds) |> + dplyr::compute(name = name, temporary = FALSE) |> + CDMConnector::recordCohortAttrition( + reason = paste0(indexDate, " between ", dateRange[1], " & ", dateRange[2]), + cohortId = cohortId + ) + } cohort - } #' Trim cohort dates to be within a date range #' -#' @param cohort A cohort table in a cdm reference -#' @param startDate Variable with earliest date -#' @param endDate Variable with latest date +#' @param cohort A cohort table in a cdm reference. +#' @param cohortId Vector of cohort definition ids to include. If NULL, all +#' cohort definition ids will be used. #' @param dateRange A window of time during which the index date must have -#' been observed +#' been observed. +#' @param startDate Variable with earliest date. +#' @param endDate Variable with latest date. +#' @param name Name of the new cohort with the restriction. +#' #' #' @return The cohort table with record timings updated to only be within the #' date range. Any records with all time outside of the range will have @@ -65,56 +87,68 @@ requireInDateRange <- function(cohort, #' dateRange = as.Date(c("2015-01-01", #' "2015-12-31"))) trimToDateRange <- function(cohort, + cohortId = NULL, + dateRange = as.Date(c(NA, NA)), startDate = "cohort_start_date", endDate = "cohort_end_date", - dateRange = as.Date(c(NA, NA))) { - - checkCohort(cohort) - checkDateVariable(cohort = cohort, dateVar = startDate) - checkDateVariable(cohort = cohort, dateVar = endDate) - checkDateRange(dateRange) - - # - # # validate inputs - # if (!isTRUE(inherits(cdm, "cdm_reference"))) { - # cli::cli_abort("cohort must be part of a cdm reference") - # } - # if (!"GeneratedCohortSet" %in% class(cohort) || - # !all(c( - # "cohort_definition_id", "subject_id", - # "cohort_start_date", "cohort_end_date" - # ) %in% - # colnames(cohort))) { - # cli::cli_abort("cohort must be a GeneratedCohortSet") - # } - # - # if (!indexDate %in% colnames(cohort)) { - # cli::cli_abort(paste0(indexDate, " must be a date column in the cohort table")) - # } - # - # if (!endDateName %in% colnames(cohort)) { - # cli::cli_abort(paste0(endDateName, " must be a date column in the cohort table")) - # } - - cohort <- trimStartDate(cohort = cohort, - startDate = startDate, - endDate = endDate, - minDate = dateRange[1] - ) %>% - CDMConnector::recordCohortAttrition(reason = paste0( - startDate, - " >= ", dateRange[1])) - - cohort <- trimEndDate( - cohort = cohort, - startDate = startDate, - endDate = endDate, - maxDate = dateRange[2] - ) %>% - CDMConnector::recordCohortAttrition(reason = paste0( - endDate, - " <= ", dateRange[2] - )) + name = omopgenerics::tableName(cohort)) { + + # checks + assertCharacter(name) + validateCohortTable(cohort) + cdm <- omopgenerics::cdmReference(cohort) + validateCDM(cdm) + validateIndexDate(startDate, cohort) + validateIndexDate(endDate, cohort) + ids <- omopgenerics::settings(cohort)$cohort_definition_id + cohortId <- validateCohortId(cohortId, ids) + validateDateRange(dateRange) + + noRequirementsIds <- ids[!ids %in% cohortId] + + if (all(ids %in% cohortId)) { + cohort <- trimStartDate(cohort = cohort, + startDate = startDate, + endDate = endDate, + minDate = dateRange[1] + ) %>% + dplyr::compute(name = name, temporary = FALSE) %>% + CDMConnector::recordCohortAttrition( + reason = paste0(startDate, " >= ", dateRange[1]) + ) %>% + trimEndDate( + startDate = startDate, + endDate = endDate, + maxDate = dateRange[2] + ) %>% + dplyr::compute(name = name, temporary = FALSE) %>% + CDMConnector::recordCohortAttrition( + reason = paste0(endDate, " <= ", dateRange[2]) + ) + } else { + cohort <- cohort %>% + trimStartDate(noRequirementsIds = noRequirementsIds, + startDate = startDate, + endDate = endDate, + minDate = dateRange[1] + ) %>% + dplyr::compute(name = name, temporary = FALSE) %>% + CDMConnector::recordCohortAttrition( + reason = paste0(startDate, " >= ", dateRange[1]), + cohortId = cohortId + ) %>% + trimEndDate( + noRequirementsIds = noRequirementsIds, + startDate = startDate, + endDate = endDate, + maxDate = dateRange[2] + ) %>% + dplyr::compute(name = name, temporary = FALSE) %>% + CDMConnector::recordCohortAttrition( + reason = paste0(endDate, " <= ", dateRange[2]), + cohortId = cohortId + ) + } cohort } @@ -122,7 +156,8 @@ trimToDateRange <- function(cohort, trimStartDate <- function(cohort, startDate, endDate, - minDate) { + minDate, + noRequirementsIds = NULL) { if (!is.na(startDate)) { cohort <- cohort %>% @@ -130,7 +165,8 @@ trimStartDate <- function(cohort, .data[[startDate]] <= !!minDate, as.Date(minDate), .data[[startDate]] )) %>% - dplyr::filter(.data[[startDate]] <= .data[[endDate]]) + dplyr::filter(.data[[startDate]] <= .data[[endDate]] | + .data$cohort_definition_id %in% noRequirementsIds) } return(cohort) } @@ -138,7 +174,8 @@ trimEndDate <- function( cohort, startDate, endDate, - maxDate) { + maxDate, + noRequirementsIds = NULL) { if (!is.na(endDate)) { cohort <- cohort %>% @@ -146,41 +183,8 @@ trimEndDate <- function( .data[[endDate]] >= !!maxDate, as.Date(maxDate), .data[[endDate]] )) %>% - dplyr::filter(.data[[startDate]] <= .data[[endDate]]) + dplyr::filter(.data[[startDate]] <= .data[[endDate]] | + .data$cohort_definition_id %in% noRequirementsIds) } return(cohort) } - - -checkCohort <- function(cohort){ - if (!"GeneratedCohortSet" %in% class(cohort) || - !all(c( - "cohort_definition_id", "subject_id", - "cohort_start_date", "cohort_end_date" - ) %in% - colnames(cohort))) { - cli::cli_abort("cohort must be a GeneratedCohortSet") - } -} - -checkDateVariable <- function(cohort, dateVar){ - if (!dateVar %in% colnames(cohort)) { - cli::cli_abort(paste0(dateVar, " must be a date column in the cohort table")) - } -} - -checkDateRange<-function(dateRange){ - if(!"Date" %in% class(dateRange)){ - cli::cli_abort("dateRange is not a date") - } - if(length(dateRange) != 2){ - cli::cli_abort("dateRange must be length two") - } - if(dateRange[1]>dateRange[2]){ - cli::cli_abort("First date in dateRange cannot be after second") - } - return(invisible(dateRange)) -} - - - diff --git a/R/validateFunctions.R b/R/validateFunctions.R index e503b9ee..91482050 100644 --- a/R/validateFunctions.R +++ b/R/validateFunctions.R @@ -16,7 +16,7 @@ validateCohortTable <- function(cohort) { validateIndexDate <- function(indexDate, cohort) { assertCharacter(indexDate) if(!indexDate %in% colnames(cohort)){ - cli::cli_abort("indexDate must be a date column in the cohort table") + cli::cli_abort(paste0(substitute(indexDate), " must be a date column in the cohort table")) } } @@ -33,3 +33,16 @@ validateCohortId <- function(cohortId, ids) { } return(cohortId) } + +validateDateRange<-function(dateRange){ + if(!"Date" %in% class(dateRange)){ + cli::cli_abort("dateRange is not a date") + } + if(length(dateRange) != 2){ + cli::cli_abort("dateRange must be length two") + } + if(dateRange[1]>dateRange[2]){ + cli::cli_abort("First date in dateRange cannot be after second") + } + return(invisible(dateRange)) +} diff --git a/man/generateIntersectCohortSet.Rd b/man/generateIntersectCohortSet.Rd index 9264272e..09a1592f 100644 --- a/man/generateIntersectCohortSet.Rd +++ b/man/generateIntersectCohortSet.Rd @@ -10,6 +10,7 @@ generateIntersectCohortSet( name, targetCohortName, targetCohortId = NULL, + gap = 0, mutuallyExclusive = FALSE, returnOnlyComb = FALSE ) @@ -25,6 +26,9 @@ combinations.} \item{targetCohortId}{Ids to combine of the target cohort. If NULL all cohort present in the table will be used.} +\item{gap}{Number of days between two subsequent cohort entries to be merged +in a single cohort record.} + \item{mutuallyExclusive}{Whether the generated cohorts are mutually exclusive or not.} diff --git a/man/requireCohortIntersectFlag.Rd b/man/requireCohortIntersectFlag.Rd index 4d84c502..c39efe41 100644 --- a/man/requireCohortIntersectFlag.Rd +++ b/man/requireCohortIntersectFlag.Rd @@ -16,25 +16,25 @@ requireCohortIntersectFlag( ) } \arguments{ -\item{x}{Cohort table} +\item{x}{Cohort table.} -\item{targetCohortTable}{name of the cohort that we want to check for overlap} +\item{targetCohortTable}{Name of the cohort that we want to check for overlap.} -\item{targetCohortId}{vector of cohort definition ids to include} +\item{targetCohortId}{Vector of cohort definition ids to include.} \item{indexDate}{Variable in x that contains the date to compute the intersection.} -\item{targetStartDate}{date of reference in cohort table, either for start -(in overlap) or on its own (for incidence)} +\item{targetStartDate}{Date of reference in cohort table, either for start +(in overlap) or on its own (for incidence).} -\item{targetEndDate}{date of reference in cohort table, either for end -(overlap) or NULL (if incidence)} +\item{targetEndDate}{Date of reference in cohort table, either for end +(overlap) or NULL (if incidence).} -\item{window}{window to consider events over} +\item{window}{Window to consider events over.} \item{negate}{If set as TRUE, criteria will be applied as exclusion -rather than inclusion (i.e. require absence in another cohort)} +rather than inclusion (i.e. require absence in another cohort).} } \value{ Cohort table with only those in the other cohort kept diff --git a/man/requireInDateRange.Rd b/man/requireInDateRange.Rd index 0adcac70..7a0d0385 100644 --- a/man/requireInDateRange.Rd +++ b/man/requireInDateRange.Rd @@ -6,17 +6,24 @@ \usage{ requireInDateRange( cohort, + cohortId = NULL, + dateRange = as.Date(c(NA, NA)), indexDate = "cohort_start_date", - dateRange = as.Date(c(NA, NA)) + name = omopgenerics::tableName(cohort) ) } \arguments{ -\item{cohort}{A cohort table in a cdm reference} +\item{cohort}{A cohort table in a cdm reference.} -\item{indexDate}{Variable in cohort that contains the index date of interest} +\item{cohortId}{Vector of cohort definition ids to include. If NULL, all +cohort definition ids will be used.} \item{dateRange}{A window of time during which the index date must have -been observed} +been observed.} + +\item{indexDate}{Variable in cohort that contains the index date of interest} + +\item{name}{Name of the new cohort with the restriction.} } \value{ The cohort table with any cohort entries outside of the date range diff --git a/man/trimToDateRange.Rd b/man/trimToDateRange.Rd index ae32991d..d763cdea 100644 --- a/man/trimToDateRange.Rd +++ b/man/trimToDateRange.Rd @@ -6,20 +6,27 @@ \usage{ trimToDateRange( cohort, + cohortId = NULL, + dateRange = as.Date(c(NA, NA)), startDate = "cohort_start_date", endDate = "cohort_end_date", - dateRange = as.Date(c(NA, NA)) + name = omopgenerics::tableName(cohort) ) } \arguments{ -\item{cohort}{A cohort table in a cdm reference} +\item{cohort}{A cohort table in a cdm reference.} -\item{startDate}{Variable with earliest date} - -\item{endDate}{Variable with latest date} +\item{cohortId}{Vector of cohort definition ids to include. If NULL, all +cohort definition ids will be used.} \item{dateRange}{A window of time during which the index date must have -been observed} +been observed.} + +\item{startDate}{Variable with earliest date.} + +\item{endDate}{Variable with latest date.} + +\item{name}{Name of the new cohort with the restriction.} } \value{ The cohort table with record timings updated to only be within the diff --git a/tests/testthat/test-generateIntersectCohorts.R b/tests/testthat/test-generateIntersectCohorts.R index 5cfebd1a..27bfdb17 100644 --- a/tests/testthat/test-generateIntersectCohorts.R +++ b/tests/testthat/test-generateIntersectCohorts.R @@ -147,16 +147,16 @@ test_that("generateIntersectCohortSet", { expect_true(nrow(omopgenerics::settings(cdm$cohort2)) == 3) expect_true(all( cdm$cohort2 %>% - dplyr::arrange(.data$cohort_start_date) %>% - dplyr::pull("cohort_start_date") == - c("2000-06-23", "2001-03-30", "2001-07-16", "2001-12-04", "2003-06-15", - "2005-11-24", "2015-03-05", "2015-03-25", "2015-04-15", "1997-10-22") + dplyr::pull("cohort_start_date") %>% + sort() == + c("1997-10-22", "2000-06-23", "2001-03-30" ,"2001-07-16", "2001-12-04", + "2003-06-15", "2005-11-24", "2015-03-05", "2015-03-25", "2015-04-15") )) - # not mutually exclusive + # not mutually exclusive and gap expect_no_error(cdm <- generateIntersectCohortSet( cdm = cdm, name = "cohort3", targetCohortName = "cohort1", - mutuallyExclusive = FALSE + mutuallyExclusive = FALSE, gap = 1 )) expect_true(all(CDMConnector::settings(cdm$cohort3)$mutually_exclusive == FALSE)) expect_true(cdm$cohort3 %>% dplyr::tally() %>% dplyr::pull() == 7) @@ -168,9 +168,9 @@ test_that("generateIntersectCohortSet", { expect_true(nrow(omopgenerics::settings(cdm$cohort3)) == 3) expect_true(all( cdm$cohort3 %>% - dplyr::arrange(.data$cohort_start_date) %>% - dplyr::pull("cohort_start_date") == - c("2001-03-30", "2015-03-25", "2015-03-05", "2015-03-25", "1997-10-22", "2001-03-30", "2000-06-23") + dplyr::pull("cohort_start_date") %>% + sort() == + c("1997-10-22", "2000-06-23", "2001-03-30", "2001-03-30", "2015-03-05", "2015-03-25", "2015-03-25") )) # not enough cohorts provided @@ -192,6 +192,8 @@ test_that("only return comb", { omock::mockPerson(n = 4) |> omock::mockObservationPeriod() |> omock::mockCohort(tableName = c("cohort1"), numberCohorts = 2, seed = 2) + cdm_local$cohort1 <- cdm_local$cohort1 |> + dplyr::filter(cohort_end_date != as.Date("2015-04-17")) cdm <- CDMConnector::copy_cdm_to(con = DBI::dbConnect(duckdb::duckdb(), ":memory:"), cdm = cdm_local, schema = "main") @@ -200,7 +202,7 @@ test_that("only return comb", { cdm = cdm, name = "cohort2", targetCohortName = "cohort1", mutuallyExclusive = FALSE, returnOnlyComb = TRUE ) - expect_true(is.null(cdm$cohort2)) + expect_true(nrow(dplyr::collect(cdm$cohort2)) == 0) # not null combination cdm_local <- omock::mockCdmReference() |> @@ -212,7 +214,7 @@ test_that("only return comb", { schema = "main") cdm <- generateIntersectCohortSet( cdm = cdm, name = "cohort3", targetCohortName = "cohort1", - mutuallyExclusive = FALSE, returnOnlyComb = TRUE + mutuallyExclusive = FALSE, returnOnlyComb = TRUE, gap = 1 ) expect_equal( cdm$cohort3 |> @@ -236,7 +238,7 @@ test_that("only return comb", { cdm <- generateIntersectCohortSet( cdm = cdm, name = "cohort4", targetCohortName = "cohort1", - mutuallyExclusive = TRUE, returnOnlyComb = TRUE + mutuallyExclusive = TRUE, returnOnlyComb = TRUE, gap = 1 ) expect_equal( @@ -244,16 +246,14 @@ test_that("only return comb", { dplyr::collect() %>% dplyr::arrange(.data$cohort_start_date) %>% dplyr::pull(.data$cohort_start_date), - as.Date(c("1997-10-22", "2001-03-30", "2001-07-16", "2001-12-04", "2003-06-15", - "2015-03-05", "2015-03-25", "2015-04-15")) + as.Date(c("1997-10-22", "2001-03-30", "2015-03-05", "2015-03-25", "2015-04-15")) ) expect_equal( cdm$cohort4 |> dplyr::collect() %>% dplyr::arrange(cohort_end_date) %>% dplyr::pull(cohort_end_date), - as.Date(c("1999-05-28", "2001-07-15", "2001-12-03", "2003-06-14", "2005-11-23", - "2015-03-24", "2015-04-14", "2015-07-06")) + as.Date(c("1999-05-28", "2005-11-23", "2015-03-24", "2015-04-14", "2015-07-06")) ) expect_true(nrow(omopgenerics::settings(cdm$cohort4)) == 4) expect_true(all(omopgenerics::settings(cdm$cohort4)$cohort_1 == c(1, 1, 1, 0))) diff --git a/tests/testthat/test-requireDateRange.R b/tests/testthat/test-requireDateRange.R index 7767d080..9da8fbd9 100644 --- a/tests/testthat/test-requireDateRange.R +++ b/tests/testthat/test-requireDateRange.R @@ -1,5 +1,5 @@ test_that("requireDateRange", { - cdm_local <- omock::mockCdmReference() |> + cdm_local <- omock::mockCdmReference() |> omock::mockPerson(n = 4) |> omock::mockObservationPeriod() |> omock::mockCohort(tableName = c("cohort1"), numberCohorts = 2) |> @@ -7,30 +7,41 @@ test_that("requireDateRange", { cdm <- CDMConnector::copy_cdm_to(con = DBI::dbConnect(duckdb::duckdb(), ":memory:"), cdm = cdm_local, schema = "main") - # one person, one observation periods - cohortTable <- dplyr::tibble( - cohort_definition_id = c(1, 1, 1), - subject_id = c("1", "2", "3"), - cohort_start_date = as.Date(c("2010-06-06", "2011-06-06", "2012-06-08")), - cohort_end_date = as.Date(c("2013-06-06", "2013-06-06", "2013-02-01")) - ) - observation_period <- dplyr::tibble( - observation_period_id = 1:3, - person_id = 1:3, - observation_period_start_date = as.Date("2005-01-01"), - observation_period_end_date = as.Date("2022-12-31"), - period_type_concept_id = 32880 - ) - - cdm <- PatientProfiles::mockPatientProfiles( - cohort1 = cohortTable, - observation_period = observation_period - ) cdm$cohort1 <- cdm$cohort1 %>% requireInDateRange(dateRange = as.Date(c("2010-01-01", "2011-01-01"))) + expect_true(all(cohortCount(cdm$cohort1)$number_records == c(0, 0))) + expect_true(all(cohortCount(cdm$cohort1)$number_subjects == c(0, 0))) + cdm$cohort1 <- cdm$cohort2 %>% + requireInDateRange(dateRange = as.Date(c("2010-01-01", "2020-01-01")), + name = "cohort1") expect_true(cdm$cohort1 %>% - dplyr::pull("subject_id") == 1L) + dplyr::pull("subject_id") |> unique() == 3L) + expect_true(all(cdm$cohort1 %>% + dplyr::arrange(.data$cohort_start_date) %>% + dplyr::pull("cohort_start_date") == + c("2015-04-14", "2015-02-02", "2015-02-08", "2015-02-23"))) + + # index date + cdm$cohort3 <- cdm$cohort2 %>% + dplyr::mutate(new_index_date = as.Date("2000-03-30")) %>% + requireInDateRange(dateRange = as.Date(c("2000-01-01", "2001-01-01")), + name = "cohort3", + indexDate = "new_index_date") + expect_equal(cdm$cohort3 |> dplyr::pull("cohort_start_date"), + cdm$cohort2 |> dplyr::pull("cohort_start_date")) + + # 1 cohort id + cdm$cohort4 <- cdm$cohort2 %>% + requireInDateRange(dateRange = as.Date(c("2000-01-01", "2001-01-01")), + cohortId = 1, + name = "cohort4") + expect_true(all(attrition(cdm$cohort4)$reason == + c("Initial qualifying events", + "cohort_start_date between 2000-01-01 & 2001-01-01", + "Initial qualifying events"))) + expect_true(all(cohortCount(cdm$cohort4)$number_records == c(1,4))) + expect_true(all(cohortCount(cdm$cohort4)$number_subjects == c(1,2))) # expect error expect_error(requireInDateRange(cohort = "a")) @@ -46,44 +57,43 @@ test_that("requireDateRange", { }) test_that("trim cohort dates", { - # one person, one observation periods - cohortTable <- dplyr::tibble( - cohort_definition_id = c(1, 1, 1), - subject_id = c("1", "2", "3"), - cohort_start_date = as.Date(c("2010-06-06", "2011-06-06", "2012-06-08")), - cohort_end_date = as.Date(c("2013-06-06", "2011-09-06", "2013-02-01")) - ) - observation_period <- dplyr::tibble( - observation_period_id = 1:3, - person_id = 1:3, - observation_period_start_date = as.Date("2005-01-01"), - observation_period_end_date = as.Date("2022-12-31"), - period_type_concept_id = 32880 - ) - - cdm <- PatientProfiles::mockPatientProfiles( - cohort1 = cohortTable, - observation_period = observation_period - ) + cdm_local <- omock::mockCdmReference() |> + omock::mockPerson(n = 4) |> + omock::mockObservationPeriod() |> + omock::mockCohort(tableName = c("cohort1"), numberCohorts = 2) |> + omock::mockCohort(tableName = c("cohort2"), numberCohorts = 2, seed = 2) + cdm <- CDMConnector::copy_cdm_to(con = DBI::dbConnect(duckdb::duckdb(), ":memory:"), + cdm = cdm_local, + schema = "main") cdm$cohort1 <- cdm$cohort1 %>% - trimToDateRange(dateRange = as.Date(c("2011-01-01", "2012-01-01"))) + trimToDateRange(dateRange = as.Date(c("2001-01-01", "2005-01-01"))) expect_equal(sort(cdm$cohort1 %>% - dplyr::pull("subject_id")), c("1", "2")) - expect_true(cdm$cohort1 %>% - dplyr::filter(subject_id == "1") %>% - dplyr::pull("cohort_start_date") == as.Date("2011-01-01")) - expect_true(cdm$cohort1 %>% - dplyr::filter(subject_id == "1") %>% - dplyr::pull("cohort_end_date") == as.Date("2012-01-01")) - expect_true(cdm$cohort1 %>% - dplyr::filter(subject_id == "2") %>% - dplyr::pull("cohort_start_date") == as.Date("2011-06-06")) - expect_true(cdm$cohort1 %>% - dplyr::filter(subject_id == "2") %>% - dplyr::pull("cohort_end_date") == as.Date("2011-09-06")) + dplyr::pull("subject_id")), c(1, 1, 1, 1, 1)) + expect_true(all(cdm$cohort1 %>% + dplyr::pull("cohort_start_date") == + c("2001-03-30", "2003-06-15", "2001-01-01", "2001-07-16", "2001-12-04"))) + expect_true(all(cdm$cohort1 %>% + dplyr::pull("cohort_end_date") == + c("2003-06-14", "2005-01-01", "2001-07-15", "2001-12-03", "2005-01-01"))) -CDMConnector::cdm_disconnect(cdm) + # cohort id + cdm$cohort3 <- cdm$cohort2 %>% + trimToDateRange(dateRange = as.Date(c("2001-01-01", "2005-01-01")), + cohortId = 1, + name = "cohort3") + expect_true(omopgenerics::cohortCount(cdm$cohort3)$number_records[1] == 1) + expect_equal(sort(cdm$cohort3 %>% + dplyr::pull("subject_id")), c(1, 3, 3, 3, 4)) + expect_equal(sort(omopgenerics::attrition(cdm$cohort3)$reason[ + omopgenerics::attrition(cdm$cohort3)$cohort_definition_id == 1]), + c("cohort_end_date <= 2005-01-01", "cohort_start_date >= 2001-01-01", "Initial qualifying events") + ) + expect_equal(sort(omopgenerics::attrition(cdm$cohort3)$reason[ + omopgenerics::attrition(cdm$cohort3)$cohort_definition_id == 2]), + "Initial qualifying events" + ) - } ) +CDMConnector::cdm_disconnect(cdm) +}) From 8f98482a4110c94e3e51a4eb9a3cccfc3c7dc923 Mon Sep 17 00:00:00 2001 From: nmercadeb Date: Tue, 9 Apr 2024 00:27:13 +0200 Subject: [PATCH 3/4] checks --- tests/testthat/test-requireDateRange.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-requireDateRange.R b/tests/testthat/test-requireDateRange.R index 9da8fbd9..000f64be 100644 --- a/tests/testthat/test-requireDateRange.R +++ b/tests/testthat/test-requireDateRange.R @@ -86,12 +86,12 @@ test_that("trim cohort dates", { expect_true(omopgenerics::cohortCount(cdm$cohort3)$number_records[1] == 1) expect_equal(sort(cdm$cohort3 %>% dplyr::pull("subject_id")), c(1, 3, 3, 3, 4)) - expect_equal(sort(omopgenerics::attrition(cdm$cohort3)$reason[ - omopgenerics::attrition(cdm$cohort3)$cohort_definition_id == 1]), - c("cohort_end_date <= 2005-01-01", "cohort_start_date >= 2001-01-01", "Initial qualifying events") + expect_equal(omopgenerics::attrition(cdm$cohort3)$reason[ + omopgenerics::attrition(cdm$cohort3)$cohort_definition_id == 1], + c("Initial qualifying events", "cohort_start_date >= 2001-01-01", "cohort_end_date <= 2005-01-01") ) - expect_equal(sort(omopgenerics::attrition(cdm$cohort3)$reason[ - omopgenerics::attrition(cdm$cohort3)$cohort_definition_id == 2]), + expect_equal(omopgenerics::attrition(cdm$cohort3)$reason[ + omopgenerics::attrition(cdm$cohort3)$cohort_definition_id == 2], "Initial qualifying events" ) From bf7ee675ce935d37ef28a0e3bd7f84c1ee2c6b84 Mon Sep 17 00:00:00 2001 From: nmercadeb Date: Tue, 9 Apr 2024 20:20:12 +0200 Subject: [PATCH 4/4] revision --- R/validateFunctions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/validateFunctions.R b/R/validateFunctions.R index 91482050..be440ad2 100644 --- a/R/validateFunctions.R +++ b/R/validateFunctions.R @@ -16,7 +16,7 @@ validateCohortTable <- function(cohort) { validateIndexDate <- function(indexDate, cohort) { assertCharacter(indexDate) if(!indexDate %in% colnames(cohort)){ - cli::cli_abort(paste0(substitute(indexDate), " must be a date column in the cohort table")) + cli::cli_abort("{indexDate} must be a date column in the cohort table") } }