From 47c551fad3204a5624c9ac35203397953e2a7128 Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Mon, 8 Apr 2024 22:38:59 +0100 Subject: [PATCH 1/4] getNumberOfCohorts() wrong --- R/generateMatchedCohortSet.R | 10 +++--- .../testthat/test-generateMatchedCohortSet.R | 36 +++++++++---------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/R/generateMatchedCohortSet.R b/R/generateMatchedCohortSet.R index 701c6e86..df1bf24a 100644 --- a/R/generateMatchedCohortSet.R +++ b/R/generateMatchedCohortSet.R @@ -172,9 +172,9 @@ randomPrefix <- function(n = 5) { getNumberOfCohorts <- function(cdm, targetCohortName){ # Read number of cohorts - n <- cdm[[targetCohortName]] %>% + n <- settings(cdm[[targetCohortName]]) %>% dplyr::summarise(v = max(.data$cohort_definition_id, na.rm = TRUE)) %>% - dplyr::pull("v") # number of different cohorts + dplyr::pull("v") if(is.na(n)){# Empty table, number of cohorts is 0 n <- 0 @@ -256,7 +256,9 @@ getNewCohort <- function(cdm, name, targetCohortName, targetCohortId, n){ dplyr::slice(rep(1:dplyr::n(), times = 2)) %>% dplyr::group_by(.data$cohort_definition_id) %>% dplyr::mutate( - cohort_name = dplyr::if_else(dplyr::row_number() == 2, paste0(.data$cohort_name,"_matched"), .data$cohort_name), + cohort_name = dplyr::if_else(dplyr::row_number() == 2, paste0(.data$cohort_name,"_matched"), .data$cohort_name) + ) %>% + dplyr::mutate( cohort_definition_id = dplyr::if_else(dplyr::row_number() == 2, .data$cohort_definition_id+.env$n, .data$cohort_definition_id) ) %>% dplyr::ungroup() @@ -435,7 +437,6 @@ checkObservationPeriod <- function(cdm, name, targetCohortId, n){ return(cdm) } - checkRatio <- function(cdm, name, ratio, targetCohortId, n){ if (ratio == Inf) { cdm[[name]] <- cdm[[name]] %>% @@ -455,7 +456,6 @@ checkRatio <- function(cdm, name, ratio, targetCohortId, n){ return(cdm) } - checkCohortSetRef <- function(cdm, name, targetCohortName, matchSex, matchYearOfBirth, targetCohortId, n){ cohort_set_ref <- cdm[[name]] %>% omopgenerics::settings() %>% diff --git a/tests/testthat/test-generateMatchedCohortSet.R b/tests/testthat/test-generateMatchedCohortSet.R index 45aff0ed..a2fce902 100644 --- a/tests/testthat/test-generateMatchedCohortSet.R +++ b/tests/testthat/test-generateMatchedCohortSet.R @@ -200,26 +200,26 @@ test_that("test exactMatchingCohort works if there are no subjects", { test_that("test exactMatchingCohort works if one of the cohorts does not have any people", { - # followback <- 180 - # cdm <- DrugUtilisation::generateConceptCohortSet( - # cdm = DrugUtilisation::mockDrugUtilisation(numberIndividuals = 200), - # conceptSet = list(c_1 = 317009, c_2 = 8505), - # name = "cases", - # end = "observation_period_end_date", - # requiredObservation = c(followback,followback), - # overwrite = TRUE - # ) + followback <- 180 + cdm <- DrugUtilisation::generateConceptCohortSet( + cdm = DrugUtilisation::mockDrugUtilisation(numberIndividuals = 200), + conceptSet = list(c_1 = 317009, c_2 = 8505), + name = "cases", + end = "observation_period_end_date", + requiredObservation = c(followback,followback), + overwrite = TRUE + ) ### generates overlapping cohorts --> issue CohortConstructor #53 - # expect_no_error( - # generateMatchedCohortSet(cdm, - # name = "new_cohort", - # targetCohortName = "cases", - # targetCohortId = NULL, - # matchSex = TRUE, - # matchYearOfBirth = TRUE, - # ratio = 1) - # ) + expect_no_error( + cdm <- generateMatchedCohortSet(cdm, + name = "new_cohort", + targetCohortName = "cases", + targetCohortId = NULL, + matchSex = TRUE, + matchYearOfBirth = TRUE, + ratio = 1) + ) }) From ab3041dea62af7bfcaed1ca2090b55870137e8bf Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Mon, 8 Apr 2024 22:43:39 +0100 Subject: [PATCH 2/4] strange date format solved as.Date(!!CDMConnector::dateadd("cohort_start_date", "future_observation")) --- R/generateMatchedCohortSet.R | 2 +- tests/testthat/test-generateMatchedCohortSet.R | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/R/generateMatchedCohortSet.R b/R/generateMatchedCohortSet.R index df1bf24a..8ade9b26 100644 --- a/R/generateMatchedCohortSet.R +++ b/R/generateMatchedCohortSet.R @@ -425,7 +425,7 @@ checkObservationPeriod <- function(cdm, name, targetCohortId, n){ dplyr::mutate(cohort_end_date = dplyr::if_else( .data$cohort_definition_id %in% .env$targetCohortId, .data$cohort_end_date, - !!CDMConnector::dateadd("cohort_start_date", "future_observation") + as.Date(!!CDMConnector::dateadd("cohort_start_date", "future_observation")) )) %>% dplyr::select(-"future_observation") %>% dplyr::group_by(.data$target_definition_id, .data$group_id, .data$pair_id) %>% diff --git a/tests/testthat/test-generateMatchedCohortSet.R b/tests/testthat/test-generateMatchedCohortSet.R index a2fce902..80d4964d 100644 --- a/tests/testthat/test-generateMatchedCohortSet.R +++ b/tests/testthat/test-generateMatchedCohortSet.R @@ -210,7 +210,6 @@ test_that("test exactMatchingCohort works if one of the cohorts does not have an overwrite = TRUE ) - ### generates overlapping cohorts --> issue CohortConstructor #53 expect_no_error( cdm <- generateMatchedCohortSet(cdm, name = "new_cohort", From 7919c66ece4a1eeaadee4d7de6e1e303e92048ef Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Mon, 8 Apr 2024 22:59:34 +0100 Subject: [PATCH 3/4] adding informative messages --- R/generateMatchedCohortSet.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/R/generateMatchedCohortSet.R b/R/generateMatchedCohortSet.R index 8ade9b26..a442741c 100644 --- a/R/generateMatchedCohortSet.R +++ b/R/generateMatchedCohortSet.R @@ -37,6 +37,8 @@ generateMatchedCohortSet <- function(cdm, matchSex = TRUE, matchYearOfBirth = TRUE, ratio = 1){ + cli::cli_inform("Starting matching") + # validate initial input validateInput( cdm = cdm, name = name, targetCohortName = targetCohortName, @@ -58,6 +60,7 @@ generateMatchedCohortSet <- function(cdm, } else { # get target cohort id targetCohortId <- getTargetCohortId(cdm, targetCohortId, targetCohortName) + cli::cli_inform(c("*" = paste0(length(targetCohortId), " cohorts to be matched."))) # Create the cohort name with cases and controls of the targetCohortId cdm <- getNewCohort(cdm, name, targetCohortName, targetCohortId, n) @@ -67,19 +70,25 @@ generateMatchedCohortSet <- function(cdm, # get matched tables matchCols <- getMatchCols(matchSex, matchYearOfBirth) + for(i in matchCols){ + cli::cli_inform(c("*" = paste0("Matching by ", i))) + } if(!is.null(matchCols)){ # Exclude individuals without any match cdm <- excludeNoMatchedIndividuals(cdm, name, matchCols, n) + cli::cli_inform(c("*" = "Not matched individuals excluded")) # Match as ratio was infinite cdm <- infiniteMatching(cdm, name, targetCohortId) # Delete controls that are not in observation cdm <- checkObservationPeriod(cdm, name, targetCohortId, n) + cli::cli_inform(c("*" = "Removing pairs that were not in observation at index date")) # Check ratio cdm <- checkRatio(cdm, name, ratio, targetCohortId, n) + cli::cli_inform(c("*" = "Adjusting ratio")) # Check cohort set ref cdm <- checkCohortSetRef(cdm, name, targetCohortName, matchSex, matchYearOfBirth, targetCohortId, n) @@ -92,6 +101,7 @@ generateMatchedCohortSet <- function(cdm, } } # Return + cli::cli_inform(c("v" = "Done")) return(cdm) } From 8465201d48b251f98a61f901a6fc07ec318071e3 Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Mon, 8 Apr 2024 23:05:50 +0100 Subject: [PATCH 4/4] empty cohort --- R/generateMatchedCohortSet.R | 2 -- tests/testthat/test-generateMatchedCohortSet.R | 13 +++++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/R/generateMatchedCohortSet.R b/R/generateMatchedCohortSet.R index a442741c..c00e44df 100644 --- a/R/generateMatchedCohortSet.R +++ b/R/generateMatchedCohortSet.R @@ -96,8 +96,6 @@ generateMatchedCohortSet <- function(cdm, # Rename cohort definition ids cdm <- renameCohortDefinitionIds(cdm, name) - } else { - # TO DO } } # Return diff --git a/tests/testthat/test-generateMatchedCohortSet.R b/tests/testthat/test-generateMatchedCohortSet.R index 80d4964d..65887193 100644 --- a/tests/testthat/test-generateMatchedCohortSet.R +++ b/tests/testthat/test-generateMatchedCohortSet.R @@ -189,13 +189,12 @@ test_that("test exactMatchingCohort works if there are no subjects", { overwrite = TRUE ) cdm$cases <- cdm$cases %>% dplyr::filter(subject_id == 0) - expect_no_error( - generateMatchedCohortSet( - cdm, - name = "new_cohort", - targetCohortName = "cases", - ) + cdm <- generateMatchedCohortSet( + cdm, + name = "new_cohort", + targetCohortName = "cases", ) + expect_true(cdm$new_cohort %>% dplyr::tally() %>% dplyr::pull(n) == 0) }) @@ -221,8 +220,6 @@ test_that("test exactMatchingCohort works if one of the cohorts does not have an ) }) - - test_that("test exactMatchingCohort with a ratio bigger than 1", { followback <- 180 cdm <- DrugUtilisation::generateConceptCohortSet(