diff --git a/R/generateMatchedCohortSet.R b/R/generateMatchedCohortSet.R index 701c6e86..c00e44df 100644 --- a/R/generateMatchedCohortSet.R +++ b/R/generateMatchedCohortSet.R @@ -37,6 +37,8 @@ generateMatchedCohortSet <- function(cdm, matchSex = TRUE, matchYearOfBirth = TRUE, ratio = 1){ + cli::cli_inform("Starting matching") + # validate initial input validateInput( cdm = cdm, name = name, targetCohortName = targetCohortName, @@ -58,6 +60,7 @@ generateMatchedCohortSet <- function(cdm, } else { # get target cohort id targetCohortId <- getTargetCohortId(cdm, targetCohortId, targetCohortName) + cli::cli_inform(c("*" = paste0(length(targetCohortId), " cohorts to be matched."))) # Create the cohort name with cases and controls of the targetCohortId cdm <- getNewCohort(cdm, name, targetCohortName, targetCohortId, n) @@ -67,19 +70,25 @@ generateMatchedCohortSet <- function(cdm, # get matched tables matchCols <- getMatchCols(matchSex, matchYearOfBirth) + for(i in matchCols){ + cli::cli_inform(c("*" = paste0("Matching by ", i))) + } if(!is.null(matchCols)){ # Exclude individuals without any match cdm <- excludeNoMatchedIndividuals(cdm, name, matchCols, n) + cli::cli_inform(c("*" = "Not matched individuals excluded")) # Match as ratio was infinite cdm <- infiniteMatching(cdm, name, targetCohortId) # Delete controls that are not in observation cdm <- checkObservationPeriod(cdm, name, targetCohortId, n) + cli::cli_inform(c("*" = "Removing pairs that were not in observation at index date")) # Check ratio cdm <- checkRatio(cdm, name, ratio, targetCohortId, n) + cli::cli_inform(c("*" = "Adjusting ratio")) # Check cohort set ref cdm <- checkCohortSetRef(cdm, name, targetCohortName, matchSex, matchYearOfBirth, targetCohortId, n) @@ -87,11 +96,10 @@ generateMatchedCohortSet <- function(cdm, # Rename cohort definition ids cdm <- renameCohortDefinitionIds(cdm, name) - } else { - # TO DO } } # Return + cli::cli_inform(c("v" = "Done")) return(cdm) } @@ -172,9 +180,9 @@ randomPrefix <- function(n = 5) { getNumberOfCohorts <- function(cdm, targetCohortName){ # Read number of cohorts - n <- cdm[[targetCohortName]] %>% + n <- settings(cdm[[targetCohortName]]) %>% dplyr::summarise(v = max(.data$cohort_definition_id, na.rm = TRUE)) %>% - dplyr::pull("v") # number of different cohorts + dplyr::pull("v") if(is.na(n)){# Empty table, number of cohorts is 0 n <- 0 @@ -256,7 +264,9 @@ getNewCohort <- function(cdm, name, targetCohortName, targetCohortId, n){ dplyr::slice(rep(1:dplyr::n(), times = 2)) %>% dplyr::group_by(.data$cohort_definition_id) %>% dplyr::mutate( - cohort_name = dplyr::if_else(dplyr::row_number() == 2, paste0(.data$cohort_name,"_matched"), .data$cohort_name), + cohort_name = dplyr::if_else(dplyr::row_number() == 2, paste0(.data$cohort_name,"_matched"), .data$cohort_name) + ) %>% + dplyr::mutate( cohort_definition_id = dplyr::if_else(dplyr::row_number() == 2, .data$cohort_definition_id+.env$n, .data$cohort_definition_id) ) %>% dplyr::ungroup() @@ -423,7 +433,7 @@ checkObservationPeriod <- function(cdm, name, targetCohortId, n){ dplyr::mutate(cohort_end_date = dplyr::if_else( .data$cohort_definition_id %in% .env$targetCohortId, .data$cohort_end_date, - !!CDMConnector::dateadd("cohort_start_date", "future_observation") + as.Date(!!CDMConnector::dateadd("cohort_start_date", "future_observation")) )) %>% dplyr::select(-"future_observation") %>% dplyr::group_by(.data$target_definition_id, .data$group_id, .data$pair_id) %>% @@ -435,7 +445,6 @@ checkObservationPeriod <- function(cdm, name, targetCohortId, n){ return(cdm) } - checkRatio <- function(cdm, name, ratio, targetCohortId, n){ if (ratio == Inf) { cdm[[name]] <- cdm[[name]] %>% @@ -455,7 +464,6 @@ checkRatio <- function(cdm, name, ratio, targetCohortId, n){ return(cdm) } - checkCohortSetRef <- function(cdm, name, targetCohortName, matchSex, matchYearOfBirth, targetCohortId, n){ cohort_set_ref <- cdm[[name]] %>% omopgenerics::settings() %>% diff --git a/tests/testthat/test-generateMatchedCohortSet.R b/tests/testthat/test-generateMatchedCohortSet.R index 45aff0ed..65887193 100644 --- a/tests/testthat/test-generateMatchedCohortSet.R +++ b/tests/testthat/test-generateMatchedCohortSet.R @@ -189,40 +189,36 @@ test_that("test exactMatchingCohort works if there are no subjects", { overwrite = TRUE ) cdm$cases <- cdm$cases %>% dplyr::filter(subject_id == 0) - expect_no_error( - generateMatchedCohortSet( - cdm, - name = "new_cohort", - targetCohortName = "cases", - ) + cdm <- generateMatchedCohortSet( + cdm, + name = "new_cohort", + targetCohortName = "cases", ) + expect_true(cdm$new_cohort %>% dplyr::tally() %>% dplyr::pull(n) == 0) }) test_that("test exactMatchingCohort works if one of the cohorts does not have any people", { - # followback <- 180 - # cdm <- DrugUtilisation::generateConceptCohortSet( - # cdm = DrugUtilisation::mockDrugUtilisation(numberIndividuals = 200), - # conceptSet = list(c_1 = 317009, c_2 = 8505), - # name = "cases", - # end = "observation_period_end_date", - # requiredObservation = c(followback,followback), - # overwrite = TRUE - # ) - - ### generates overlapping cohorts --> issue CohortConstructor #53 - # expect_no_error( - # generateMatchedCohortSet(cdm, - # name = "new_cohort", - # targetCohortName = "cases", - # targetCohortId = NULL, - # matchSex = TRUE, - # matchYearOfBirth = TRUE, - # ratio = 1) - # ) -}) - + followback <- 180 + cdm <- DrugUtilisation::generateConceptCohortSet( + cdm = DrugUtilisation::mockDrugUtilisation(numberIndividuals = 200), + conceptSet = list(c_1 = 317009, c_2 = 8505), + name = "cases", + end = "observation_period_end_date", + requiredObservation = c(followback,followback), + overwrite = TRUE + ) + expect_no_error( + cdm <- generateMatchedCohortSet(cdm, + name = "new_cohort", + targetCohortName = "cases", + targetCohortId = NULL, + matchSex = TRUE, + matchYearOfBirth = TRUE, + ratio = 1) + ) +}) test_that("test exactMatchingCohort with a ratio bigger than 1", { followback <- 180