Skip to content

Commit

Permalink
Merge pull request #84 from oxford-pharmacoepi/collapse
Browse files Browse the repository at this point in the history
collapse gap
  • Loading branch information
edward-burn authored Apr 11, 2024
2 parents f3ab854 + a0643fe commit 44df1fa
Show file tree
Hide file tree
Showing 8 changed files with 242 additions and 2 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export(attrition)
export(cohortCodelist)
export(cohortCount)
export(collapseCohort)
export(conceptCohort)
export(getIdentifier)
export(intersectCohort)
Expand All @@ -18,12 +19,14 @@ export(requireSex)
export(restrictToFirstEntry)
export(settings)
export(splitOverlap)
export(tableName)
export(trimToDateRange)
importFrom(magrittr,"%>%")
importFrom(omopgenerics,attrition)
importFrom(omopgenerics,cohortCodelist)
importFrom(omopgenerics,cohortCount)
importFrom(omopgenerics,settings)
importFrom(omopgenerics,tableName)
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,.env)
Expand Down
105 changes: 105 additions & 0 deletions R/collapseCohort.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#' Collapse a cohort using a certain gap to join records.
#'
#' @param cohort A cohort_table object.
#' @param cohortId The cohort definition ids to subset, if NULL all cohort
#' definition ids are used.
#' @param gap number of days to join consecutive records.
#' @param name Name of the resultant cohort.
#'
#' @export
#'
#' @return A cohort_table object.
#'
collapseCohort <- function(cohort,
cohortId = NULL,
gap = 0,
name = tableName(cohort)) {
# input validation
cohort <- validateCohortTable(cohort)
cohortId <- validateCohortId(cohortId, settings(cohort)$cohort_definition_id)
gap <- validateGap(gap)

# warning
extraColumns <- colnames(cohort)
extraColumns <- extraColumns[!extraColumns %in% c(
"cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date"
)]
if (length(extraColumns) > 0) {
cli::cli_inform(c(
"!" = "Extra columns are not supported in this function, the following
columns will be dropped: {paste0(extraColumns, collapse = ', ')}"
))
}

# restrict to cohort ids of interest
cohort <- cohort |>
dplyr::select(
"cohort_definition_id", "subject_id", "cohort_start_date",
"cohort_end_date"
) |>
dplyr::filter(.data$cohort_definition_id %in% .env$cohortId)

if (gap > 0) {
cl <- class(cohort)
oldAttributes <- keepAttributes(cohort, cl)
cohort <- cohort |> collapseGap(gap = gap)
# due to issue: https://github.com/darwin-eu-dev/omopgenerics/issues/256
cohort <- restoreClass(cohort, cl)
cohort <- restoreAttributes(cohort, oldAttributes)
}

cohort <- cohort |>
dplyr::select(
"cohort_definition_id", "subject_id", "cohort_start_date",
"cohort_end_date"
) |>
dplyr::compute(name = name, temporary = FALSE) |>
omopgenerics::recordCohortAttrition(
reason = paste0("Collapse cohort with gap = ", gap, " days.")
)

return(cohort)
}

keepAttributes <- function(x, cl) {
xx <- list(
tbl_source = attr(x, "tbl_source"),
tbl_name = attr(x, "tbl_name"),
cdm_reference = attr(x, "cdm_reference")
)
if ("cohort_table" %in% cl) {
xx[["cohort_set"]] <- attr(x, "cohort_set")
xx[["cohort_attrition"]] <- attr(x, "cohort_attrition")
}
return(xx)
}
keepClass <- function(x) {
removeClass(x = x, value = c(
"cdm_table", "omop_table", "achilles_table", "cohort_table"
))
}
restoreAttributes <- function(x, at) {
for (nm in names(at)) {
if (!nm %in% names(attributes(x))) {
attr(x, nm) <- at[[nm]]
}
}
return(x)
}
restoreClass <- function(x, cl) {
x <- addClass(x, "cdm_table")
if ("cohort_table" %in% cl &
"cohort_definition_id" %in% colnames(x)) {
x <- addClass(x, "cohort_table")
}
return(x)
}
addClass <- function(x, value) {
if (any(value %in% class(x))) x <- removeClass(x, value)
base::class(x) <- c(value, base::class(x))
return(x)
}
removeClass <- function(x, value) {
base::class(x) <- base::class(x)[!(base::class(x) %in% value)]
return(x)
}
15 changes: 14 additions & 1 deletion R/conceptCohort.R
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,13 @@ collapseGap <- function(cohort, gap) {
"cohort_definition_id", "subject_id", "date" = "cohort_end_date"
) |>
dplyr::mutate("date_id" = 1)
start |>
if (gap > 0) {
end <- end %>%
dplyr::mutate("date" = as.Date(!!CDMConnector::dateadd(
date = "date", number = gap, interval = "day"
)))
}
x <- start |>
dplyr::union_all(end) |>
dplyr::group_by(.data$cohort_definition_id, .data$subject_id) |>
dplyr::arrange(.data$date, .data$date_id) |>
Expand All @@ -182,4 +188,11 @@ collapseGap <- function(cohort, gap) {
) |>
tidyr::pivot_wider(names_from = "name", values_from = "date") |>
dplyr::select(-"era_id")
if (gap > 0) {
x <- x %>%
dplyr::mutate("cohort_end_date" = as.Date(!!CDMConnector::dateadd(
date = "cohort_end_date", number = -gap, interval = "day"
)))
}
return(x)
}
4 changes: 4 additions & 0 deletions R/reexports-omopgenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ omopgenerics::attrition
#' @importFrom omopgenerics cohortCodelist
#' @export
omopgenerics::cohortCodelist

#' @importFrom omopgenerics tableName
#' @export
omopgenerics::tableName
5 changes: 5 additions & 0 deletions R/validateFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ validateCohortTable <- function(cohort) {
colnames(cohort))){
cli::cli_abort("cohort must be a `cohort_table`")
}
return(invisible(cohort))
}

validateIndexDate <- function(indexDate, cohort) {
Expand Down Expand Up @@ -74,3 +75,7 @@ validateName <- function(name) {
validateConceptSet <- function(conceptSet) {
omopgenerics::newCodelist(conceptSet)
}

validateGap <- function(gap) {
assertNumeric(gap, integerish = TRUE, min = 0)
}
24 changes: 24 additions & 0 deletions man/collapseCohort.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/reexports.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

85 changes: 85 additions & 0 deletions tests/testthat/test-collapseCohort.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
test_that("simple example", {
cdm <- omock::mockCdmReference() |>
omock::mockCdmFromTable(cohortTable = list("cohort" = dplyr::tibble(
"cohort_definition_id" = 1,
"subject_id" = c(1, 2, 3),
"cohort_start_date" = as.Date("2020-01-01"),
"cohort_end_date" = as.Date("2029-12-31")
)))
cdm <- omopgenerics::insertTable(
cdm = cdm, name = "concept", table = dplyr::tibble(
"concept_id" = 1,
"concept_name" = "my concept",
"domain_id" = "drUg",
"vocabulary_id" = NA,
"concept_class_id" = NA,
"concept_code" = NA,
"valid_start_date" = NA,
"valid_end_date" = NA
)
)
cdm <- omopgenerics::insertTable(
cdm = cdm, name = "drug_exposure", table = dplyr::tibble(
"drug_exposure_id" = 1:11,
"person_id" = c(1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1),
"drug_concept_id" = c(1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1),
"drug_exposure_start_date" = c(0, 300, 1500, 750, 10, 800, 150, 1800, 1801, 1802, 1803),
"drug_exposure_end_date" = c(400, 800, 1600, 1550, 2000, 1000, 600, 1800, 1801, 1802, 1803),
"drug_type_concept_id" = 1
) |>
dplyr::mutate(
"drug_exposure_start_date" = as.Date(.data$drug_exposure_start_date, origin = "2020-01-01"),
"drug_exposure_end_date" = as.Date(.data$drug_exposure_end_date, origin = "2020-01-01")
)
)

cdm <- CDMConnector::copyCdmTo(con = DBI::dbConnect(duckdb::duckdb()), cdm = cdm, schema = "main")

expect_no_error(cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 1), name = "cohort"))

expect_no_error(sameCohort <- cohort |> collapseCohort(gap = 0, name = "new_cohort"))
expect_identical(settings(sameCohort), settings(cohort))
expect_identical(cohortCount(sameCohort), cohortCount(cohort))
expect_identical(
attrition(sameCohort),
attrition(cohort) |>
dplyr::union_all(dplyr::tibble(
"cohort_definition_id" = 1L,
"number_records" = 7L,
"number_subjects" = 2L,
"reason_id" = 2L,
"reason" = "Collapse cohort with gap = 0 days.",
"excluded_records" = 0L,
"excluded_subjects" = 0L
))
)
expect_true(tableName(sameCohort) == "new_cohort")
expect_identical(
omopgenerics::tableSource(sameCohort), omopgenerics::tableSource(cohort)
)

expect_no_error(newCohort <- cohort |> collapseCohort(gap = 1, name = "my_cohort"))
expect_identical(settings(newCohort), settings(cohort))
expect_identical(cohortCount(newCohort), dplyr::tibble(
"cohort_definition_id" = 1L, "number_records" = 4L, "number_subjects" = 2L
))
expect_identical(
attrition(newCohort),
attrition(cohort) |>
dplyr::union_all(dplyr::tibble(
"cohort_definition_id" = 1L,
"number_records" = 4L,
"number_subjects" = 2L,
"reason_id" = 2L,
"reason" = "Collapse cohort with gap = 1 days.",
"excluded_records" = 3L,
"excluded_subjects" = 0L
))
)
expect_true(tableName(newCohort) == "my_cohort")
expect_identical(
omopgenerics::tableSource(newCohort), omopgenerics::tableSource(cohort)
)

})

0 comments on commit 44df1fa

Please sign in to comment.