Skip to content

Commit

Permalink
Merge pull request #25 from oxford-pharmacoepi/pkg_structure
Browse files Browse the repository at this point in the history
updates
  • Loading branch information
edward-burn authored Dec 18, 2023
2 parents a4e583a + c72f5a0 commit f3233f3
Show file tree
Hide file tree
Showing 19 changed files with 616 additions and 200 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
.Rdata
.httr-oauth
.DS_Store
inst/doc
7 changes: 6 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@ Suggests:
DBI,
DrugUtilisation,
duckdb,
knitr,
rmarkdown,
testthat (>= 3.0.0),
tibble
tibble,
stringr,
IncidencePrevalence
Config/testthat/edition: 3
Config/testthat/parallel: true
VignetteBuilder: knitr
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@ export(requireAge)
export(requireCohortIntersectFlag)
export(requireDemographics)
export(requireFutureObservation)
export(requireInDateRange)
export(requirePriorObservation)
export(requireSex)
export(restrictToFirstEntry)
export(splitOverlap)
export(trimToDateRange)
importFrom(magrittr,"%>%")
importFrom(rlang,":=")
importFrom(rlang,.data)
Expand Down
4 changes: 4 additions & 0 deletions R/cohortTiming.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cohortTiming <- function(){

# output as summarised result
}
6 changes: 3 additions & 3 deletions R/generateMatchedCohortSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@ randomPrefix <- function(n = 5) {
getNumberOfCohorts <- function(cdm, targetCohortName){
# Read number of cohorts
n <- cdm[[targetCohortName]] %>%
dplyr::summarise(v = max(.data$cohort_definition_id)) %>%
dplyr::pull(.data$v) # number of different cohorts
dplyr::summarise(v = max(.data$cohort_definition_id, na.rm = TRUE)) %>%
dplyr::pull("v") # number of different cohorts

if(is.na(n)){# Empty table, number of cohorts is 0
n <- 0
Expand Down Expand Up @@ -343,7 +343,7 @@ infiniteMatching <- function(cdm, name, targetCohortId){
cdm[[name]] %>%
dplyr::filter(.data$cohort_definition_id %in% .env$targetCohortId) %>%
dplyr::group_by(.data$cohort_definition_id, .data$group_id) %>%
dplyr::mutate(max_cases = max(.data$pair_id)) %>%
dplyr::mutate(max_cases = max(.data$pair_id, na.rm = TRUE)) %>%
dplyr::ungroup() %>%
dplyr::select("group_id", "target_definition_id", "max_cases"),
by = c("group_id", "target_definition_id")
Expand Down
162 changes: 162 additions & 0 deletions R/requireDateRange.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#' Require that an index date is within a date range
#'
#' @param cohort A cohort table in a cdm reference
#' @param indexDate Variable in cohort that contains the index date of interest
#' @param dateRange A window of time during which the index date must have
#' been observed
#'
#' @return The cohort table with any cohort entries outside of the date range
#' dropped
#' @export
#'
#' @examples
requireInDateRange <- function(cohort,
indexDate = "cohort_start_date",
dateRange = as.Date(c(NA, NA))) {

checkCohort(cohort)
checkDateVariable(cohort = cohort, dateVar = indexDate)
checkDateRange(dateRange)

cohort <- cohort %>%
dplyr::filter(.data[[indexDate]] >= !!dateRange[1] &
.data[[indexDate]] <= !!dateRange[2]) %>%
CDMConnector::recordCohortAttrition(reason = paste0(
indexDate,
" between ", dateRange[1], " and ", dateRange[2]
))

cohort

}

#' Trim cohort dates to be within a date range
#'
#' @param cohort A cohort table in a cdm reference
#' @param startDate Variable with earliest date
#' @param endDate Variable with latest date
#' @param dateRange A window of time during which the index date must have
#' been observed
#'
#' @return
#' @export
#'
#' @examples
trimToDateRange <- function(cohort,
startDate = "cohort_start_date",
endDate = "cohort_end_date",
dateRange = as.Date(c(NA, NA))) {

checkCohort(cohort)
checkDateVariable(cohort = cohort, dateVar = startDate)
checkDateVariable(cohort = cohort, dateVar = endDate)
checkDateRange(dateRange)

#
# # validate inputs
# if (!isTRUE(inherits(cdm, "cdm_reference"))) {
# cli::cli_abort("cohort must be part of a cdm reference")
# }
# if (!"GeneratedCohortSet" %in% class(cohort) ||
# !all(c(
# "cohort_definition_id", "subject_id",
# "cohort_start_date", "cohort_end_date"
# ) %in%
# colnames(cohort))) {
# cli::cli_abort("cohort must be a GeneratedCohortSet")
# }
#
# if (!indexDate %in% colnames(cohort)) {
# cli::cli_abort(paste0(indexDate, " must be a date column in the cohort table"))
# }
#
# if (!endDateName %in% colnames(cohort)) {
# cli::cli_abort(paste0(endDateName, " must be a date column in the cohort table"))
# }

cohort <- trimStartDate(cohort = cohort,
startDate = startDate,
endDate = endDate,
minDate = dateRange[1]
) %>%
CDMConnector::recordCohortAttrition(reason = paste0(
startDate,
" >= ", dateRange[1]))

cohort <- trimEndDate(
cohort = cohort,
startDate = startDate,
endDate = endDate,
maxDate = dateRange[2]
) %>%
CDMConnector::recordCohortAttrition(reason = paste0(
endDate,
" <= ", dateRange[2]
))
}

trimStartDate <- function(cohort,
startDate,
endDate,
minDate) {

if (!is.na(startDate)) {
cohort <- cohort %>%
dplyr::mutate(!!startDate := dplyr::if_else(
.data[[startDate]] <= !!minDate,
as.Date(minDate), .data[[startDate]]
)) %>%
dplyr::filter(.data[[startDate]] <= .data[[endDate]])
}
return(cohort)
}
trimEndDate <- function(
cohort,
startDate,
endDate,
maxDate) {

if (!is.na(endDate)) {
cohort <- cohort %>%
dplyr::mutate(!!endDate := dplyr::if_else(
.data[[endDate]] >= !!maxDate,
as.Date(maxDate), .data[[endDate]]
)) %>%
dplyr::filter(.data[[startDate]] <= .data[[endDate]])
}
return(cohort)
}


checkCohort <- function(cohort){
if (!"GeneratedCohortSet" %in% class(cohort) ||
!all(c(
"cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date"
) %in%
colnames(cohort))) {
cli::cli_abort("cohort must be a GeneratedCohortSet")
}
}

checkDateVariable <- function(cohort, dateVar){
if (!dateVar %in% colnames(cohort)) {
cli::cli_abort(paste0(dateVar, " must be a date column in the cohort table"))
}
}

checkDateRange<-function(dateRange){
if(!"Date" %in% class(dateRange)){
cli::cli_abort("dateRange is not a date")
}
if(length(dateRange) != 2){
cli::cli_abort("dateRange must be length two")
}
if(dateRange[1]>dateRange[2]){
cli::cli_abort("First date in dateRange cannot be after second")
}
return(invisible(dateRange))
}



79 changes: 0 additions & 79 deletions R/requireDemographics.R
Original file line number Diff line number Diff line change
Expand Up @@ -280,82 +280,3 @@ demographicsFilter <- function(cohort,
}


requireDateRange <- function(cohort,
indexDateName = "cohort_start_date",
endDateName = "cohort_end_date",
cohortDateRange = as.Date(c(NA, NA))) {
cdm <- attr(cohort, "cdm_reference")

# validate inputs
if (!isTRUE(inherits(cdm, "cdm_reference"))) {
cli::cli_abort("cohort must be part of a cdm reference")
}
if (!"GeneratedCohortSet" %in% class(cohort) ||
!all(c(
"cohort_definition_id", "subject_id",
"cohort_start_date", "cohort_end_date"
) %in%
colnames(cohort))) {
cli::cli_abort("cohort must be a GeneratedCohortSet")
}

if (!indexDateName %in% colnames(cohort)) {
cli::cli_abort(paste0(indexDateName, " must be a date column in the cohort table"))
}

if (!endDateName %in% colnames(cohort)) {
cli::cli_abort(paste0(endDateName, " must be a date column in the cohort table"))
}


cohort <- trimStartDate(cohort,
cdm,
indexDateName,
endDateName,
startDate = cohortDateRange[1]
)
cohort <- cohort %>%
CDMConnector::recordCohortAttrition(reason = paste0(
indexDateName,
" >= ", cohortDateRange[1]
))

cohort <- trimEndDate(
cohort,
cdm,
indexDateName,
endDateName,
cohortDateRange[2]
)

cohort <- cohort %>%
CDMConnector::recordCohortAttrition(reason = paste0(
indexDateName,
" <= ", cohortDateRange[2]
))
}


trimStartDate <- function(cohort, cdm, indexDateName, endDateName, startDate) {
if (!is.na(startDate)) {
cohort <- cohort %>%
dplyr::mutate(!!indexDateName := dplyr::if_else(
.data[[indexDateName]] <= !!startDate,
as.Date(startDate), .data[[indexDateName]]
)) %>%
dplyr::filter(.data[[indexDateName]] <= .data[[endDateName]])
}
return(cohort)
}

trimEndDate <- function(cohort, cdm, indexDateName, endDateName, endDate) {
if (!is.na(endDate)) {
cohort <- cohort %>%
dplyr::mutate(!!endDateName := dplyr::if_else(
.data[[endDateName]] >= !!endDate,
as.Date(endDate), .data[[endDateName]]
)) %>%
dplyr::filter(.data[[indexDateName]] <= .data[[endDateName]])
}
return(cohort)
}
Loading

0 comments on commit f3233f3

Please sign in to comment.