Merge pull request #84 from oxford-pharmacoepi/collapse

collapse gap
OHDSI · Apr 11, 2024 · 44df1fa · 44df1fa
2 parents f3ab854 + a0643fe
commit 44df1fa
Show file tree

Hide file tree

Showing 8 changed files with 242 additions and 2 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -3,6 +3,7 @@
 export(attrition)
 export(cohortCodelist)
 export(cohortCount)
+export(collapseCohort)
 export(conceptCohort)
 export(getIdentifier)
 export(intersectCohort)
@@ -18,12 +19,14 @@ export(requireSex)
 export(restrictToFirstEntry)
 export(settings)
 export(splitOverlap)
+export(tableName)
 export(trimToDateRange)
 importFrom(magrittr,"%>%")
 importFrom(omopgenerics,attrition)
 importFrom(omopgenerics,cohortCodelist)
 importFrom(omopgenerics,cohortCount)
 importFrom(omopgenerics,settings)
+importFrom(omopgenerics,tableName)
 importFrom(rlang,":=")
 importFrom(rlang,.data)
 importFrom(rlang,.env)

diff --git a/R/collapseCohort.R b/R/collapseCohort.R
@@ -0,0 +1,105 @@
+#' Collapse a cohort using a certain gap to join records.
+#'
+#' @param cohort A cohort_table object.
+#' @param cohortId The cohort definition ids to subset, if NULL all cohort
+#' definition ids are used.
+#' @param gap number of days to join consecutive records.
+#' @param name Name of the resultant cohort.
+#'
+#' @export
+#'
+#' @return A cohort_table object.
+#'
+collapseCohort <- function(cohort,
+                           cohortId = NULL,
+                           gap = 0,
+                           name = tableName(cohort)) {
+  # input validation
+  cohort <- validateCohortTable(cohort)
+  cohortId <- validateCohortId(cohortId, settings(cohort)$cohort_definition_id)
+  gap <- validateGap(gap)
+
+  # warning
+  extraColumns <- colnames(cohort)
+  extraColumns <- extraColumns[!extraColumns %in% c(
+    "cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date"
+  )]
+  if (length(extraColumns) > 0) {
+    cli::cli_inform(c(
+      "!" = "Extra columns are not supported in this function, the following
+      columns will be dropped: {paste0(extraColumns, collapse = ', ')}"
+    ))
+  }
+
+  # restrict to cohort ids of interest
+  cohort <- cohort |>
+    dplyr::select(
+      "cohort_definition_id", "subject_id", "cohort_start_date",
+      "cohort_end_date"
+    ) |>
+    dplyr::filter(.data$cohort_definition_id %in% .env$cohortId)
+
+  if (gap > 0) {
+    cl <- class(cohort)
+    oldAttributes <- keepAttributes(cohort, cl)
+    cohort <- cohort |> collapseGap(gap = gap)
+    # due to issue: https://github.com/darwin-eu-dev/omopgenerics/issues/256
+    cohort <- restoreClass(cohort, cl)
+    cohort <- restoreAttributes(cohort, oldAttributes)
+  }
+
+  cohort <- cohort |>
+    dplyr::select(
+      "cohort_definition_id", "subject_id", "cohort_start_date",
+      "cohort_end_date"
+    ) |>
+    dplyr::compute(name = name, temporary = FALSE) |>
+    omopgenerics::recordCohortAttrition(
+      reason = paste0("Collapse cohort with gap = ", gap, " days.")
+    )
+
+  return(cohort)
+}
+
+keepAttributes <- function(x, cl) {
+  xx <- list(
+    tbl_source = attr(x, "tbl_source"),
+    tbl_name = attr(x, "tbl_name"),
+    cdm_reference = attr(x, "cdm_reference")
+  )
+  if ("cohort_table" %in% cl) {
+    xx[["cohort_set"]] <- attr(x, "cohort_set")
+    xx[["cohort_attrition"]] <- attr(x, "cohort_attrition")
+  }
+  return(xx)
+}
+keepClass <- function(x) {
+  removeClass(x = x, value = c(
+    "cdm_table", "omop_table", "achilles_table", "cohort_table"
+  ))
+}
+restoreAttributes <- function(x, at) {
+  for (nm in names(at)) {
+    if (!nm %in% names(attributes(x))) {
+      attr(x, nm) <- at[[nm]]
+    }
+  }
+  return(x)
+}
+restoreClass <- function(x, cl) {
+  x <- addClass(x, "cdm_table")
+  if ("cohort_table" %in% cl &
+      "cohort_definition_id" %in% colnames(x)) {
+    x <- addClass(x, "cohort_table")
+  }
+  return(x)
+}
+addClass <- function(x, value) {
+  if (any(value %in% class(x))) x <- removeClass(x, value)
+  base::class(x) <- c(value, base::class(x))
+  return(x)
+}
+removeClass <- function(x, value) {
+  base::class(x) <- base::class(x)[!(base::class(x) %in% value)]
+  return(x)
+}
diff --git a/R/conceptCohort.R b/R/conceptCohort.R
@@ -160,7 +160,13 @@ collapseGap <- function(cohort, gap) {
       "cohort_definition_id", "subject_id", "date" = "cohort_end_date"
     ) |>
     dplyr::mutate("date_id" = 1)
-  start |>
+  if (gap > 0) {
+    end <- end %>%
+      dplyr::mutate("date" = as.Date(!!CDMConnector::dateadd(
+        date = "date", number = gap, interval = "day"
+      )))
+  }
+  x <- start |>
     dplyr::union_all(end) |>
     dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |>
     dplyr::arrange(.data$date, .data$date_id) |>
@@ -182,4 +188,11 @@ collapseGap <- function(cohort, gap) {
     ) |>
     tidyr::pivot_wider(names_from = "name", values_from = "date") |>
     dplyr::select(-"era_id")
+  if (gap > 0) {
+    x <- x %>%
+      dplyr::mutate("cohort_end_date" = as.Date(!!CDMConnector::dateadd(
+        date = "cohort_end_date", number = -gap, interval = "day"
+      )))
+  }
+  return(x)
 }
diff --git a/R/reexports-omopgenerics.R b/R/reexports-omopgenerics.R
@@ -13,3 +13,7 @@ omopgenerics::attrition
 #' @importFrom omopgenerics cohortCodelist
 #' @export
 omopgenerics::cohortCodelist
+
+#' @importFrom omopgenerics tableName
+#' @export
+omopgenerics::tableName
diff --git a/R/validateFunctions.R b/R/validateFunctions.R
@@ -19,6 +19,7 @@ validateCohortTable <- function(cohort) {
           colnames(cohort))){
     cli::cli_abort("cohort must be a `cohort_table`")
   }
+  return(invisible(cohort))
 }
 
 validateIndexDate <- function(indexDate, cohort) {
@@ -74,3 +75,7 @@ validateName <- function(name) {
 validateConceptSet <- function(conceptSet) {
   omopgenerics::newCodelist(conceptSet)
 }
+
+validateGap <- function(gap) {
+  assertNumeric(gap, integerish = TRUE, min = 0)
+}
diff --git a/man/collapseCohort.Rd b/man/collapseCohort.Rd
diff --git a/man/reexports.Rd b/man/reexports.Rd
diff --git a/tests/testthat/test-collapseCohort.R b/tests/testthat/test-collapseCohort.R
@@ -0,0 +1,85 @@
+test_that("simple example", {
+  cdm <- omock::mockCdmReference() |>
+    omock::mockCdmFromTable(cohortTable = list("cohort" = dplyr::tibble(
+      "cohort_definition_id" = 1,
+      "subject_id" = c(1, 2, 3),
+      "cohort_start_date" = as.Date("2020-01-01"),
+      "cohort_end_date" = as.Date("2029-12-31")
+    )))
+  cdm <- omopgenerics::insertTable(
+    cdm = cdm, name = "concept", table = dplyr::tibble(
+      "concept_id" = 1,
+      "concept_name" = "my concept",
+      "domain_id" = "drUg",
+      "vocabulary_id" = NA,
+      "concept_class_id" = NA,
+      "concept_code" = NA,
+      "valid_start_date" = NA,
+      "valid_end_date" = NA
+    )
+  )
+  cdm <- omopgenerics::insertTable(
+    cdm = cdm, name = "drug_exposure", table = dplyr::tibble(
+      "drug_exposure_id" = 1:11,
+      "person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1),
+      "drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1),
+      "drug_exposure_start_date" = c(0, 300, 1500, 750, 10, 800, 150, 1800, 1801, 1802, 1803),
+      "drug_exposure_end_date" = c(400, 800, 1600, 1550, 2000, 1000, 600, 1800, 1801, 1802, 1803),
+      "drug_type_concept_id" = 1
+    ) |>
+      dplyr::mutate(
+        "drug_exposure_start_date" = as.Date(.data$drug_exposure_start_date, origin = "2020-01-01"),
+        "drug_exposure_end_date" = as.Date(.data$drug_exposure_end_date, origin = "2020-01-01")
+      )
+  )
+
+  cdm <- CDMConnector::copyCdmTo(con = DBI::dbConnect(duckdb::duckdb()), cdm = cdm, schema = "main")
+
+  expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 1), name = "cohort"))
+
+  expect_no_error(sameCohort <- cohort |> collapseCohort(gap = 0, name = "new_cohort"))
+  expect_identical(settings(sameCohort), settings(cohort))
+  expect_identical(cohortCount(sameCohort), cohortCount(cohort))
+  expect_identical(
+    attrition(sameCohort),
+    attrition(cohort) |>
+      dplyr::union_all(dplyr::tibble(
+        "cohort_definition_id" = 1L,
+        "number_records" = 7L,
+        "number_subjects" = 2L,
+        "reason_id" = 2L,
+        "reason" = "Collapse cohort with gap = 0 days.",
+        "excluded_records" = 0L,
+        "excluded_subjects" = 0L
+      ))
+  )
+  expect_true(tableName(sameCohort) == "new_cohort")
+  expect_identical(
+    omopgenerics::tableSource(sameCohort), omopgenerics::tableSource(cohort)
+  )
+
+  expect_no_error(newCohort <- cohort |> collapseCohort(gap = 1, name = "my_cohort"))
+  expect_identical(settings(newCohort), settings(cohort))
+  expect_identical(cohortCount(newCohort), dplyr::tibble(
+    "cohort_definition_id" = 1L, "number_records" = 4L, "number_subjects" = 2L
+  ))
+  expect_identical(
+    attrition(newCohort),
+    attrition(cohort) |>
+      dplyr::union_all(dplyr::tibble(
+        "cohort_definition_id" = 1L,
+        "number_records" = 4L,
+        "number_subjects" = 2L,
+        "reason_id" = 2L,
+        "reason" = "Collapse cohort with gap = 1 days.",
+        "excluded_records" = 3L,
+        "excluded_subjects" = 0L
+      ))
+  )
+  expect_true(tableName(newCohort) == "my_cohort")
+  expect_identical(
+    omopgenerics::tableSource(newCohort), omopgenerics::tableSource(cohort)
+  )
+
+})
+