diff --git a/NAMESPACE b/NAMESPACE index 6a414e04..0a6fd942 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(attrition) export(cohortCodelist) export(cohortCount) +export(collapseCohort) export(conceptCohort) export(getIdentifier) export(intersectCohort) @@ -18,12 +19,14 @@ export(requireSex) export(restrictToFirstEntry) export(settings) export(splitOverlap) +export(tableName) export(trimToDateRange) importFrom(magrittr,"%>%") importFrom(omopgenerics,attrition) importFrom(omopgenerics,cohortCodelist) importFrom(omopgenerics,cohortCount) importFrom(omopgenerics,settings) +importFrom(omopgenerics,tableName) importFrom(rlang,":=") importFrom(rlang,.data) importFrom(rlang,.env) diff --git a/R/collapseCohort.R b/R/collapseCohort.R new file mode 100644 index 00000000..7fa75413 --- /dev/null +++ b/R/collapseCohort.R @@ -0,0 +1,105 @@ +#' Collapse a cohort using a certain gap to join records. +#' +#' @param cohort A cohort_table object. +#' @param cohortId The cohort definition ids to subset, if NULL all cohort +#' definition ids are used. +#' @param gap number of days to join consecutive records. +#' @param name Name of the resultant cohort. +#' +#' @export +#' +#' @return A cohort_table object. +#' +collapseCohort <- function(cohort, + cohortId = NULL, + gap = 0, + name = tableName(cohort)) { + # input validation + cohort <- validateCohortTable(cohort) + cohortId <- validateCohortId(cohortId, settings(cohort)$cohort_definition_id) + gap <- validateGap(gap) + + # warning + extraColumns <- colnames(cohort) + extraColumns <- extraColumns[!extraColumns %in% c( + "cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date" + )] + if (length(extraColumns) > 0) { + cli::cli_inform(c( + "!" = "Extra columns are not supported in this function, the following + columns will be dropped: {paste0(extraColumns, collapse = ', ')}" + )) + } + + # restrict to cohort ids of interest + cohort <- cohort |> + dplyr::select( + "cohort_definition_id", "subject_id", "cohort_start_date", + "cohort_end_date" + ) |> + dplyr::filter(.data$cohort_definition_id %in% .env$cohortId) + + if (gap > 0) { + cl <- class(cohort) + oldAttributes <- keepAttributes(cohort, cl) + cohort <- cohort |> collapseGap(gap = gap) + # due to issue: https://github.com/darwin-eu-dev/omopgenerics/issues/256 + cohort <- restoreClass(cohort, cl) + cohort <- restoreAttributes(cohort, oldAttributes) + } + + cohort <- cohort |> + dplyr::select( + "cohort_definition_id", "subject_id", "cohort_start_date", + "cohort_end_date" + ) |> + dplyr::compute(name = name, temporary = FALSE) |> + omopgenerics::recordCohortAttrition( + reason = paste0("Collapse cohort with gap = ", gap, " days.") + ) + + return(cohort) +} + +keepAttributes <- function(x, cl) { + xx <- list( + tbl_source = attr(x, "tbl_source"), + tbl_name = attr(x, "tbl_name"), + cdm_reference = attr(x, "cdm_reference") + ) + if ("cohort_table" %in% cl) { + xx[["cohort_set"]] <- attr(x, "cohort_set") + xx[["cohort_attrition"]] <- attr(x, "cohort_attrition") + } + return(xx) +} +keepClass <- function(x) { + removeClass(x = x, value = c( + "cdm_table", "omop_table", "achilles_table", "cohort_table" + )) +} +restoreAttributes <- function(x, at) { + for (nm in names(at)) { + if (!nm %in% names(attributes(x))) { + attr(x, nm) <- at[[nm]] + } + } + return(x) +} +restoreClass <- function(x, cl) { + x <- addClass(x, "cdm_table") + if ("cohort_table" %in% cl & + "cohort_definition_id" %in% colnames(x)) { + x <- addClass(x, "cohort_table") + } + return(x) +} +addClass <- function(x, value) { + if (any(value %in% class(x))) x <- removeClass(x, value) + base::class(x) <- c(value, base::class(x)) + return(x) +} +removeClass <- function(x, value) { + base::class(x) <- base::class(x)[!(base::class(x) %in% value)] + return(x) +} diff --git a/R/conceptCohort.R b/R/conceptCohort.R index 144ed1cf..79e0692a 100644 --- a/R/conceptCohort.R +++ b/R/conceptCohort.R @@ -160,7 +160,13 @@ collapseGap <- function(cohort, gap) { "cohort_definition_id", "subject_id", "date" = "cohort_end_date" ) |> dplyr::mutate("date_id" = 1) - start |> + if (gap > 0) { + end <- end %>% + dplyr::mutate("date" = as.Date(!!CDMConnector::dateadd( + date = "date", number = gap, interval = "day" + ))) + } + x <- start |> dplyr::union_all(end) |> dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |> dplyr::arrange(.data$date, .data$date_id) |> @@ -182,4 +188,11 @@ collapseGap <- function(cohort, gap) { ) |> tidyr::pivot_wider(names_from = "name", values_from = "date") |> dplyr::select(-"era_id") + if (gap > 0) { + x <- x %>% + dplyr::mutate("cohort_end_date" = as.Date(!!CDMConnector::dateadd( + date = "cohort_end_date", number = -gap, interval = "day" + ))) + } + return(x) } diff --git a/R/reexports-omopgenerics.R b/R/reexports-omopgenerics.R index 8a66bc16..656637f3 100644 --- a/R/reexports-omopgenerics.R +++ b/R/reexports-omopgenerics.R @@ -13,3 +13,7 @@ omopgenerics::attrition #' @importFrom omopgenerics cohortCodelist #' @export omopgenerics::cohortCodelist + +#' @importFrom omopgenerics tableName +#' @export +omopgenerics::tableName diff --git a/R/validateFunctions.R b/R/validateFunctions.R index 03d8f14a..0d9722f3 100644 --- a/R/validateFunctions.R +++ b/R/validateFunctions.R @@ -19,6 +19,7 @@ validateCohortTable <- function(cohort) { colnames(cohort))){ cli::cli_abort("cohort must be a `cohort_table`") } + return(invisible(cohort)) } validateIndexDate <- function(indexDate, cohort) { @@ -74,3 +75,7 @@ validateName <- function(name) { validateConceptSet <- function(conceptSet) { omopgenerics::newCodelist(conceptSet) } + +validateGap <- function(gap) { + assertNumeric(gap, integerish = TRUE, min = 0) +} diff --git a/man/collapseCohort.Rd b/man/collapseCohort.Rd new file mode 100644 index 00000000..07af3065 --- /dev/null +++ b/man/collapseCohort.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/collapseCohort.R +\name{collapseCohort} +\alias{collapseCohort} +\title{Collapse a cohort using a certain gap to join records.} +\usage{ +collapseCohort(cohort, cohortId = NULL, gap = 0, name = tableName(cohort)) +} +\arguments{ +\item{cohort}{A cohort_table object.} + +\item{cohortId}{The cohort definition ids to subset, if NULL all cohort +definition ids are used.} + +\item{gap}{number of days to join consecutive records.} + +\item{name}{Name of the resultant cohort.} +} +\value{ +A cohort_table object. +} +\description{ +Collapse a cohort using a certain gap to join records. +} diff --git a/man/reexports.Rd b/man/reexports.Rd index ca3e6d4b..af92e22c 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -7,6 +7,7 @@ \alias{settings} \alias{attrition} \alias{cohortCodelist} +\alias{tableName} \title{Objects exported from other packages} \keyword{internal} \description{ @@ -14,6 +15,6 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{omopgenerics}{\code{\link[omopgenerics]{attrition}}, \code{\link[omopgenerics]{cohortCodelist}}, \code{\link[omopgenerics]{cohortCount}}, \code{\link[omopgenerics]{settings}}} + \item{omopgenerics}{\code{\link[omopgenerics]{attrition}}, \code{\link[omopgenerics]{cohortCodelist}}, \code{\link[omopgenerics]{cohortCount}}, \code{\link[omopgenerics]{settings}}, \code{\link[omopgenerics]{tableName}}} }} diff --git a/tests/testthat/test-collapseCohort.R b/tests/testthat/test-collapseCohort.R new file mode 100644 index 00000000..e9dc5d4f --- /dev/null +++ b/tests/testthat/test-collapseCohort.R @@ -0,0 +1,85 @@ +test_that("simple example", { + cdm <- omock::mockCdmReference() |> + omock::mockCdmFromTable(cohortTable = list("cohort" = dplyr::tibble( + "cohort_definition_id" = 1, + "subject_id" = c(1, 2, 3), + "cohort_start_date" = as.Date("2020-01-01"), + "cohort_end_date" = as.Date("2029-12-31") + ))) + cdm <- omopgenerics::insertTable( + cdm = cdm, name = "concept", table = dplyr::tibble( + "concept_id" = 1, + "concept_name" = "my concept", + "domain_id" = "drUg", + "vocabulary_id" = NA, + "concept_class_id" = NA, + "concept_code" = NA, + "valid_start_date" = NA, + "valid_end_date" = NA + ) + ) + cdm <- omopgenerics::insertTable( + cdm = cdm, name = "drug_exposure", table = dplyr::tibble( + "drug_exposure_id" = 1:11, + "person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1), + "drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1), + "drug_exposure_start_date" = c(0, 300, 1500, 750, 10, 800, 150, 1800, 1801, 1802, 1803), + "drug_exposure_end_date" = c(400, 800, 1600, 1550, 2000, 1000, 600, 1800, 1801, 1802, 1803), + "drug_type_concept_id" = 1 + ) |> + dplyr::mutate( + "drug_exposure_start_date" = as.Date(.data$drug_exposure_start_date, origin = "2020-01-01"), + "drug_exposure_end_date" = as.Date(.data$drug_exposure_end_date, origin = "2020-01-01") + ) + ) + + cdm <- CDMConnector::copyCdmTo(con = DBI::dbConnect(duckdb::duckdb()), cdm = cdm, schema = "main") + + expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 1), name = "cohort")) + + expect_no_error(sameCohort <- cohort |> collapseCohort(gap = 0, name = "new_cohort")) + expect_identical(settings(sameCohort), settings(cohort)) + expect_identical(cohortCount(sameCohort), cohortCount(cohort)) + expect_identical( + attrition(sameCohort), + attrition(cohort) |> + dplyr::union_all(dplyr::tibble( + "cohort_definition_id" = 1L, + "number_records" = 7L, + "number_subjects" = 2L, + "reason_id" = 2L, + "reason" = "Collapse cohort with gap = 0 days.", + "excluded_records" = 0L, + "excluded_subjects" = 0L + )) + ) + expect_true(tableName(sameCohort) == "new_cohort") + expect_identical( + omopgenerics::tableSource(sameCohort), omopgenerics::tableSource(cohort) + ) + + expect_no_error(newCohort <- cohort |> collapseCohort(gap = 1, name = "my_cohort")) + expect_identical(settings(newCohort), settings(cohort)) + expect_identical(cohortCount(newCohort), dplyr::tibble( + "cohort_definition_id" = 1L, "number_records" = 4L, "number_subjects" = 2L + )) + expect_identical( + attrition(newCohort), + attrition(cohort) |> + dplyr::union_all(dplyr::tibble( + "cohort_definition_id" = 1L, + "number_records" = 4L, + "number_subjects" = 2L, + "reason_id" = 2L, + "reason" = "Collapse cohort with gap = 1 days.", + "excluded_records" = 3L, + "excluded_subjects" = 0L + )) + ) + expect_true(tableName(newCohort) == "my_cohort") + expect_identical( + omopgenerics::tableSource(newCohort), omopgenerics::tableSource(cohort) + ) + +}) +