From c72f5a01acab727b40974e92b9f12d6520905fe4 Mon Sep 17 00:00:00 2001
From: edward-burn <9583964+edward-burn@users.noreply.github.com>
Date: Mon, 18 Dec 2023 10:06:57 +0000
Subject: [PATCH] updates

---
 .gitignore                                    |   1 +
 DESCRIPTION                                   |   7 +-
 NAMESPACE                                     |   2 +
 R/cohortTiming.R                              |   4 +
 R/generateMatchedCohortSet.R                  |   6 +-
 R/requireDateRange.R                          | 162 +++++++++++++
 R/requireDemographics.R                       |  79 -------
 README.Rmd                                    |  78 +++++--
 README.md                                     | 221 ++++++++++++------
 man/requireInDateRange.Rd                     |  27 +++
 man/trimToDateRange.Rd                        |  26 +++
 .../testthat/test-generateMatchedCohortSet.R  |  31 ++-
 tests/testthat/test-requireDateRange.R        |  60 +++++
 tests/testthat/test-requireDemographics.R     |  23 --
 vignettes/.gitignore                          |   2 +
 vignettes/a01_building_concept_cohorts.Rmd    |  30 +++
 .../a02_applying_cohort_restrictions.Rmd      |  19 ++
 vignettes/a03_age_sex_matching.Rmd            |  19 ++
 vignettes/a04_cohort_timing.Rmd               |  19 ++
 19 files changed, 616 insertions(+), 200 deletions(-)
 create mode 100644 R/cohortTiming.R
 create mode 100644 R/requireDateRange.R
 create mode 100644 man/requireInDateRange.Rd
 create mode 100644 man/trimToDateRange.Rd
 create mode 100644 tests/testthat/test-requireDateRange.R
 create mode 100644 vignettes/.gitignore
 create mode 100644 vignettes/a01_building_concept_cohorts.Rmd
 create mode 100644 vignettes/a02_applying_cohort_restrictions.Rmd
 create mode 100644 vignettes/a03_age_sex_matching.Rmd
 create mode 100644 vignettes/a04_cohort_timing.Rmd

diff --git a/.gitignore b/.gitignore
index 565f2b6a..f47ffaba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@
 .Rdata
 .httr-oauth
 .DS_Store
+inst/doc
diff --git a/DESCRIPTION b/DESCRIPTION
index c2a5fc72..3c9a4773 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -28,7 +28,12 @@ Suggests:
     DBI,
     DrugUtilisation,
     duckdb,
+    knitr,
+    rmarkdown,
     testthat (>= 3.0.0),
-    tibble
+    tibble,
+    stringr,
+    IncidencePrevalence
 Config/testthat/edition: 3
 Config/testthat/parallel: true
+VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
index 3b040513..c84a6a8c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -8,10 +8,12 @@ export(requireAge)
 export(requireCohortIntersectFlag)
 export(requireDemographics)
 export(requireFutureObservation)
+export(requireInDateRange)
 export(requirePriorObservation)
 export(requireSex)
 export(restrictToFirstEntry)
 export(splitOverlap)
+export(trimToDateRange)
 importFrom(magrittr,"%>%")
 importFrom(rlang,":=")
 importFrom(rlang,.data)
diff --git a/R/cohortTiming.R b/R/cohortTiming.R
new file mode 100644
index 00000000..df0db1e0
--- /dev/null
+++ b/R/cohortTiming.R
@@ -0,0 +1,4 @@
+cohortTiming <- function(){
+
+  # output as summarised result
+}
diff --git a/R/generateMatchedCohortSet.R b/R/generateMatchedCohortSet.R
index 7576c2e7..4a6ecde9 100644
--- a/R/generateMatchedCohortSet.R
+++ b/R/generateMatchedCohortSet.R
@@ -141,8 +141,8 @@ randomPrefix <- function(n = 5) {
 getNumberOfCohorts <- function(cdm, targetCohortName){
   # Read number of cohorts
   n <- cdm[[targetCohortName]] %>%
-    dplyr::summarise(v = max(.data$cohort_definition_id)) %>%
-    dplyr::pull(.data$v) # number of different cohorts
+    dplyr::summarise(v = max(.data$cohort_definition_id, na.rm = TRUE)) %>%
+    dplyr::pull("v") # number of different cohorts
 
   if(is.na(n)){# Empty table, number of cohorts is 0
     n <- 0
@@ -343,7 +343,7 @@ infiniteMatching <- function(cdm, name, targetCohortId){
       cdm[[name]] %>%
         dplyr::filter(.data$cohort_definition_id %in% .env$targetCohortId) %>%
         dplyr::group_by(.data$cohort_definition_id, .data$group_id) %>%
-        dplyr::mutate(max_cases = max(.data$pair_id)) %>%
+        dplyr::mutate(max_cases = max(.data$pair_id, na.rm = TRUE)) %>%
         dplyr::ungroup() %>%
         dplyr::select("group_id", "target_definition_id", "max_cases"),
       by = c("group_id", "target_definition_id")
diff --git a/R/requireDateRange.R b/R/requireDateRange.R
new file mode 100644
index 00000000..c4c2c2b2
--- /dev/null
+++ b/R/requireDateRange.R
@@ -0,0 +1,162 @@
+#' Require that an index date is within a date range
+#'
+#' @param cohort A cohort table in a cdm reference
+#' @param indexDate Variable in cohort that contains the index date of interest
+#' @param dateRange A window of time during which the index date must have
+#' been observed
+#'
+#' @return The cohort table with any cohort entries outside of the date range
+#' dropped
+#' @export
+#'
+#' @examples
+requireInDateRange <- function(cohort,
+                             indexDate = "cohort_start_date",
+                             dateRange = as.Date(c(NA, NA))) {
+
+  checkCohort(cohort)
+  checkDateVariable(cohort = cohort, dateVar = indexDate)
+  checkDateRange(dateRange)
+
+  cohort <- cohort %>%
+    dplyr::filter(.data[[indexDate]] >= !!dateRange[1] &
+                    .data[[indexDate]] <= !!dateRange[2]) %>%
+    CDMConnector::recordCohortAttrition(reason = paste0(
+      indexDate,
+      " between ", dateRange[1], " and ", dateRange[2]
+    ))
+
+  cohort
+
+}
+
+#' Trim cohort dates to be within a date range
+#'
+#' @param cohort A cohort table in a cdm reference
+#' @param startDate Variable with earliest date
+#' @param endDate Variable with latest date
+#' @param dateRange A window of time during which the index date must have
+#' been observed
+#'
+#' @return
+#' @export
+#'
+#' @examples
+trimToDateRange <- function(cohort,
+                            startDate = "cohort_start_date",
+                            endDate = "cohort_end_date",
+                            dateRange = as.Date(c(NA, NA))) {
+
+  checkCohort(cohort)
+  checkDateVariable(cohort = cohort, dateVar = startDate)
+  checkDateVariable(cohort = cohort, dateVar = endDate)
+  checkDateRange(dateRange)
+
+  #
+  #   # validate inputs
+  #   if (!isTRUE(inherits(cdm, "cdm_reference"))) {
+  #     cli::cli_abort("cohort must be part of a cdm reference")
+  #   }
+  #   if (!"GeneratedCohortSet" %in% class(cohort) ||
+  #     !all(c(
+  #       "cohort_definition_id", "subject_id",
+  #       "cohort_start_date", "cohort_end_date"
+  #     ) %in%
+  #       colnames(cohort))) {
+  #     cli::cli_abort("cohort must be a GeneratedCohortSet")
+  #   }
+  #
+  #   if (!indexDate %in% colnames(cohort)) {
+  #     cli::cli_abort(paste0(indexDate, " must be a date column in the cohort table"))
+  #   }
+  #
+  #   if (!endDateName %in% colnames(cohort)) {
+  #     cli::cli_abort(paste0(endDateName, " must be a date column in the cohort table"))
+  #   }
+
+  cohort <- trimStartDate(cohort = cohort,
+                          startDate = startDate,
+                          endDate = endDate,
+                          minDate = dateRange[1]
+  ) %>%
+    CDMConnector::recordCohortAttrition(reason = paste0(
+      startDate,
+      " >= ", dateRange[1]))
+
+  cohort <- trimEndDate(
+    cohort = cohort,
+    startDate = startDate,
+    endDate = endDate,
+    maxDate = dateRange[2]
+  ) %>%
+    CDMConnector::recordCohortAttrition(reason = paste0(
+      endDate,
+      " <= ", dateRange[2]
+    ))
+}
+
+trimStartDate <- function(cohort,
+                          startDate,
+                          endDate,
+                          minDate) {
+
+  if (!is.na(startDate)) {
+    cohort <- cohort %>%
+      dplyr::mutate(!!startDate := dplyr::if_else(
+        .data[[startDate]] <= !!minDate,
+        as.Date(minDate), .data[[startDate]]
+      )) %>%
+      dplyr::filter(.data[[startDate]] <= .data[[endDate]])
+  }
+  return(cohort)
+}
+trimEndDate <- function(
+    cohort,
+    startDate,
+    endDate,
+    maxDate) {
+
+  if (!is.na(endDate)) {
+    cohort <- cohort %>%
+      dplyr::mutate(!!endDate := dplyr::if_else(
+        .data[[endDate]] >= !!maxDate,
+        as.Date(maxDate), .data[[endDate]]
+      )) %>%
+      dplyr::filter(.data[[startDate]] <= .data[[endDate]])
+  }
+  return(cohort)
+}
+
+
+checkCohort <- function(cohort){
+  if (!"GeneratedCohortSet" %in% class(cohort) ||
+      !all(c(
+        "cohort_definition_id", "subject_id",
+        "cohort_start_date", "cohort_end_date"
+      ) %in%
+      colnames(cohort))) {
+    cli::cli_abort("cohort must be a GeneratedCohortSet")
+  }
+}
+
+checkDateVariable <- function(cohort, dateVar){
+  if (!dateVar %in% colnames(cohort)) {
+    cli::cli_abort(paste0(dateVar, " must be a date column in the cohort table"))
+  }
+}
+
+checkDateRange<-function(dateRange){
+  if(!"Date" %in% class(dateRange)){
+    cli::cli_abort("dateRange is not a date")
+  }
+  if(length(dateRange) != 2){
+    cli::cli_abort("dateRange must be length two")
+  }
+  if(dateRange[1]>dateRange[2]){
+    cli::cli_abort("First date in dateRange cannot be after second")
+  }
+  return(invisible(dateRange))
+}
+
+
+
diff --git a/R/requireDemographics.R b/R/requireDemographics.R
index f9ddfae6..d98a2654 100644
--- a/R/requireDemographics.R
+++ b/R/requireDemographics.R
@@ -280,82 +280,3 @@ demographicsFilter <- function(cohort,
 }
 
 
-requireDateRange <- function(cohort,
-                             indexDateName = "cohort_start_date",
-                             endDateName = "cohort_end_date",
-                             cohortDateRange = as.Date(c(NA, NA))) {
-  cdm <- attr(cohort, "cdm_reference")
-
-  # validate inputs
-  if (!isTRUE(inherits(cdm, "cdm_reference"))) {
-    cli::cli_abort("cohort must be part of a cdm reference")
-  }
-  if (!"GeneratedCohortSet" %in% class(cohort) ||
-    !all(c(
-      "cohort_definition_id", "subject_id",
-      "cohort_start_date", "cohort_end_date"
-    ) %in%
-      colnames(cohort))) {
-    cli::cli_abort("cohort must be a GeneratedCohortSet")
-  }
-
-  if (!indexDateName %in% colnames(cohort)) {
-    cli::cli_abort(paste0(indexDateName, " must be a date column in the cohort table"))
-  }
-
-  if (!endDateName %in% colnames(cohort)) {
-    cli::cli_abort(paste0(endDateName, " must be a date column in the cohort table"))
-  }
-
-
-  cohort <- trimStartDate(cohort,
-    cdm,
-    indexDateName,
-    endDateName,
-    startDate = cohortDateRange[1]
-  )
-  cohort <- cohort %>%
-    CDMConnector::recordCohortAttrition(reason = paste0(
-      indexDateName,
-      " >= ", cohortDateRange[1]
-    ))
-
-  cohort <- trimEndDate(
-    cohort,
-    cdm,
-    indexDateName,
-    endDateName,
-    cohortDateRange[2]
-  )
-
-  cohort <- cohort %>%
-    CDMConnector::recordCohortAttrition(reason = paste0(
-      indexDateName,
-      " <= ", cohortDateRange[2]
-    ))
-}
-
-
-trimStartDate <- function(cohort, cdm, indexDateName, endDateName, startDate) {
-  if (!is.na(startDate)) {
-    cohort <- cohort %>%
-      dplyr::mutate(!!indexDateName := dplyr::if_else(
-        .data[[indexDateName]] <= !!startDate,
-        as.Date(startDate), .data[[indexDateName]]
-      )) %>%
-      dplyr::filter(.data[[indexDateName]] <= .data[[endDateName]])
-  }
-  return(cohort)
-}
-
-trimEndDate <- function(cohort, cdm, indexDateName, endDateName, endDate) {
-  if (!is.na(endDate)) {
-    cohort <- cohort %>%
-      dplyr::mutate(!!endDateName := dplyr::if_else(
-        .data[[endDateName]] >= !!endDate,
-        as.Date(endDate), .data[[endDateName]]
-      )) %>%
-      dplyr::filter(.data[[indexDateName]] <= .data[[endDateName]])
-  }
-  return(cohort)
-}
diff --git a/README.Rmd b/README.Rmd
index 4607590d..c7aad010 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -22,7 +22,7 @@ knitr::opts_chunk$set(
 [![Lifecycle:Experimental](https://img.shields.io/badge/Lifecycle-Experimental-339999)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
 <!-- badges: end -->
 
-The goal of CohortConstructor is to help on the creation and manipulation of cohorts in the OMOP Common Data Model.
+The goal of CohortConstructor is to help on the creation and manipulation of cohorts in the OMOP Common Data Model. The package provides functions to support cohort building pipelines and additional functions to support cohort evaluation.
 
 ## Installation
 
@@ -33,11 +33,13 @@ You can install the development version of CohortConstructor from [GitHub](https
 devtools::install_github("oxford-pharmacoepi/CohortConstructor")
 ```
 
-## Example usage
+## Creating and manipulating cohorts
 
 ``` {r}
 library(CDMConnector)
 library(PatientProfiles)
+library(DrugUtilisation)
+library(dplyr)
 library(CohortConstructor)
 
 con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir())
@@ -46,22 +48,52 @@ cdm <- cdm_from_con(con, cdm_schema = "main",
 ```
 
 ### Generating concept based cohorts
+We'll start by generating a set of drug cohorts, using generateDrugUtilisationCohortSet from the DrugUtilisation R package. Here we make two cohorts, one for diclofenac and another for acetaminophen, combining records with a gap of 7 days or less.
 ``` {r}
-cdm <- generate_concept_cohort_set(cdm = cdm, 
+cdm <- generateDrugUtilisationCohortSet(cdm = cdm,  
                             name = "medications",
-                            concept_set = list("diclofenac" = 1124300,
-                                               "acetaminophen" = 1127433))
-cohort_count(cdm$medications)
-cohort_attrition(cdm$medications)
+                            conceptSet = list("diclofenac" = 1124300,
+                                           "acetaminophen" = 1127433),
+                            gapEra = 7)
+```
+
+We can see that our starting cohorts, before we add any additional restrictions, have the following counts
+``` {r}
+cohort_set(cdm$medications) %>% glimpse()
+cohort_count(cdm$medications) %>% glimpse()
+```
+
+### Require in date range
+We can require that individuals' cohort start date fall within a certain date range.
+``` {r}
+cdm$medications <- cdm$medications %>% 
+  requireInDateRange(indexDate = "cohort_start_date",
+                     dateRange = as.Date(c("2000-01-01", "2020-01-01")))
+```
+
+Now that we've applied these date restrictions, we can see how many people and records have been excluded
+``` {r}
+cohort_count(cdm$medications) %>% glimpse()
+cohort_attrition(cdm$medications) %>% glimpse()
+cohort_attrition(cdm$medications) %>% 
+  filter(reason == "cohort_start_date between 2000-01-01 and 2020-01-01") %>% 
+  glimpse()
 ```
 
 ### Applying demographic requirements
+We can also add restrictions on age (on cohort start date) and sex.
 ``` {r}
-cdm$medications %>% 
-  requireDemographics(ageRange = list(c(40, 65)),
+cdm$medications <- cdm$medications %>% 
+  requireDemographics(indexDate = "cohort_start_date",
+                      ageRange = list(c(40, 65)),
                       sex = "Female")
-cohort_count(cdm$medications)
-cohort_attrition(cdm$medications)
+```
+
+Again we can see how many individuals we've lost after applying this criteria.
+``` {r}
+cohort_attrition(cdm$medications) %>% 
+  filter(reason == "Demographic requirements") %>% 
+  glimpse()
 ```
 
 ### Require presence in another cohort
@@ -74,14 +106,27 @@ cdm <- generate_concept_cohort_set(cdm = cdm,
 cdm$medications <- cdm$medications %>% 
   requireCohortIntersectFlag(targetCohortTable = "gibleed",
                              window = c(-Inf, 0))
-
-cohort_count(cdm$medications)
-cohort_attrition(cdm$medications)
 ```
 
+``` {r}
+cohort_attrition(cdm$medications) %>% 
+  filter(reason == "In cohort gibleed between -Inf and 0 days relative to cohort_start_date") %>% 
+  glimpse()
+```
 
 ### Combining cohorts
 
+Currently we have two separate cohorts. One of users of diclofenac, the other of users of acetaminophen. 
+
+Let's say we want to create a cohort of people taking **either** diclofenac or acetaminophen. We could create this cohort like so:
+```{r}
+
+```
+
+Alternatively, we might want to create a cohort of people taking **both** diclofenac and acetaminophen. For this we can create this combination cohort like so:
+
+Both diclofenac and acetaminophen
+
 Generate a combination cohort.
 
 ```{r}
@@ -91,8 +136,11 @@ cdm <- generateCombinationCohortSet(cdm = cdm,
 
 cohortSet(cdm$combinations)
 cohortCount(cdm$combinations)
+```
 
-
+```{r}
 cdmDisconnect(cdm)
 ```
 
+
+## Evaluating cohorts
diff --git a/README.md b/README.md
index bf1313ba..bf14e598 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,9 @@ status](https://www.r-pkg.org/badges/version/CohortConstructor)](https://CRAN.R-
 <!-- badges: end -->
 
 The goal of CohortConstructor is to help on the creation and
-manipulation of cohorts in the OMOP Common Data Model.
+manipulation of cohorts in the OMOP Common Data Model. The package
+provides functions to support cohort building pipelines and additional
+functions to support cohort evaluation.
 
 ## Installation
 
@@ -25,13 +27,23 @@ You can install the development version of CohortConstructor from
 devtools::install_github("oxford-pharmacoepi/CohortConstructor")
 ```
 
-## Example usage
+## Creating and manipulating cohorts
 
 ``` r
 library(CDMConnector)
-#> Warning: package 'CDMConnector' was built under R version 4.2.3
 library(PatientProfiles)
-#> Warning: package 'PatientProfiles' was built under R version 4.2.3
+library(DrugUtilisation)
+#> Warning: package 'DrugUtilisation' was built under R version 4.2.3
+library(dplyr)
+#> Warning: package 'dplyr' was built under R version 4.2.3
+#> 
+#> Attaching package: 'dplyr'
+#> The following objects are masked from 'package:stats':
+#> 
+#>     filter, lag
+#> The following objects are masked from 'package:base':
+#> 
+#>     intersect, setdiff, setequal, union
 library(CohortConstructor)
 
 con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir())
@@ -41,62 +53,117 @@ cdm <- cdm_from_con(con, cdm_schema = "main",
 
 ### Generating concept based cohorts
 
+We’ll start by generating a set of drug cohorts, using
+generateDrugUtilisationCohortSet from the DrugUtilisation R package.
+Here we make two cohorts, one for diclofenac and another for
+acetaminophen, combining records with a gap of 7 days or less.
+
 ``` r
-cdm <- generate_concept_cohort_set(cdm = cdm, 
+cdm <- generateDrugUtilisationCohortSet(cdm = cdm,  
                             name = "medications",
-                            concept_set = list("diclofenac" = 1124300,
-                                               "acetaminophen" = 1127433))
-cohort_count(cdm$medications)
-#> # A tibble: 2 × 3
-#>   cohort_definition_id number_records number_subjects
-#>                  <int>          <dbl>           <dbl>
-#> 1                    1            830             830
-#> 2                    2           2580            2580
-cohort_attrition(cdm$medications)
-#> # A tibble: 2 × 7
-#>   cohort_definition_id number_records number_subjects reason_id reason          
-#>                  <int>          <dbl>           <dbl>     <dbl> <chr>           
-#> 1                    1            830             830         1 Qualifying init…
-#> 2                    2           2580            2580         1 Qualifying init…
-#> # ℹ 2 more variables: excluded_records <dbl>, excluded_subjects <dbl>
+                            conceptSet = list("diclofenac" = 1124300,
+                                           "acetaminophen" = 1127433),
+                            gapEra = 7)
+```
+
+We can see that our starting cohorts, before we add any additional
+restrictions, have the following counts
+
+``` r
+cohort_set(cdm$medications) %>% glimpse()
+#> Rows: 2
+#> Columns: 11
+#> $ cohort_definition_id    <int> 1, 2
+#> $ cohort_name             <chr> "diclofenac", "acetaminophen"
+#> $ duration_range_min      <chr> "1", "1"
+#> $ duration_range_max      <chr> "Inf", "Inf"
+#> $ impute_duration         <chr> "none", "none"
+#> $ gap_era                 <chr> "7", "7"
+#> $ prior_use_washout       <chr> "0", "0"
+#> $ prior_observation       <chr> "0", "0"
+#> $ cohort_date_range_start <chr> NA, NA
+#> $ cohort_date_range_end   <chr> NA, NA
+#> $ limit                   <chr> "all", "all"
+cohort_count(cdm$medications) %>% glimpse()
+#> Rows: 2
+#> Columns: 3
+#> $ cohort_definition_id <int> 2, 1
+#> $ number_records       <dbl> 9363, 830
+#> $ number_subjects      <dbl> 2580, 830
+```
+
+### Require in date range
+
+We can require that individuals’ cohort start date fall within a certain
+date range.
+
+``` r
+cdm$medications <- cdm$medications %>% 
+  requireInDateRange(indexDate = "cohort_start_date",
+                     dateRange = as.Date(c("2000-01-01", "2020-01-01")))
+```
+
+Now that we’ve applied these date restrictions, we can see how many
+people and records have been excluded
+
+``` r
+cohort_count(cdm$medications) %>% glimpse()
+#> Rows: 2
+#> Columns: 3
+#> $ cohort_definition_id <int> 2, 1
+#> $ number_records       <dbl> 2750, 397
+#> $ number_subjects      <dbl> 1737, 397
+cohort_attrition(cdm$medications) %>% glimpse()
+#> Rows: 16
+#> Columns: 7
+#> $ cohort_definition_id <int> 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1
+#> $ number_records       <dbl> 9365, 9365, 9363, 9363, 9363, 9363, 9363, 850, 85…
+#> $ number_subjects      <dbl> 2580, 2580, 2580, 2580, 2580, 2580, 2580, 850, 85…
+#> $ reason_id            <dbl> 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 8, 8
+#> $ reason               <chr> "Qualifying initial records", "Duration imputatio…
+#> $ excluded_records     <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 6613, …
+#> $ excluded_subjects    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 843, 4…
+cohort_attrition(cdm$medications) %>% 
+  filter(reason == "cohort_start_date between 2000-01-01 and 2020-01-01") %>% 
+  glimpse()
+#> Rows: 2
+#> Columns: 7
+#> $ cohort_definition_id <int> 2, 1
+#> $ number_records       <dbl> 2750, 397
+#> $ number_subjects      <dbl> 1737, 397
+#> $ reason_id            <dbl> 8, 8
+#> $ reason               <chr> "cohort_start_date between 2000-01-01 and 2020-01…
+#> $ excluded_records     <dbl> 6613, 433
+#> $ excluded_subjects    <dbl> 843, 433
 ```
 
 ### Applying demographic requirements
 
+We can also add restrictions on age (on cohort start date) and sex.
+
 ``` r
-cdm$medications %>% 
-  requireDemographics(ageRange = list(c(40, 65)),
+cdm$medications <- cdm$medications %>% 
+  requireDemographics(indexDate = "cohort_start_date",
+                      ageRange = list(c(40, 65)),
                       sex = "Female")
-#> # Source:   SQL [?? x 4]
-#> # Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/C:\Users\eburn\AppData\Local\Temp\RtmpIngDmK\file4f3841e26e9e.duckdb]
-#>    cohort_definition_id subject_id cohort_start_date cohort_end_date
-#>                   <int>      <dbl> <date>            <date>         
-#>  1                    1         18 2009-03-21        2018-11-07     
-#>  2                    1        893 1993-09-26        2019-05-06     
-#>  3                    1       2396 1961-08-30        2001-02-28     
-#>  4                    1       3159 2000-01-26        2018-10-18     
-#>  5                    1       3376 1994-05-06        2019-06-28     
-#>  6                    1       4071 1998-08-07        2018-12-27     
-#>  7                    1       4636 1986-10-26        2018-10-23     
-#>  8                    1       4690 2001-03-20        2018-10-14     
-#>  9                    1       4701 2011-07-10        2018-12-22     
-#> 10                    2       3951 1997-12-11        2019-04-13     
-#> # ℹ more rows
-cohort_count(cdm$medications)
-#> # A tibble: 2 × 3
-#>   cohort_definition_id number_records number_subjects
-#>                  <int>          <dbl>           <dbl>
-#> 1                    1            156             156
-#> 2                    2             76              76
-cohort_attrition(cdm$medications)
-#> # A tibble: 4 × 7
-#>   cohort_definition_id number_records number_subjects reason_id reason          
-#>                  <int>          <dbl>           <dbl>     <dbl> <chr>           
-#> 1                    1            830             830         1 Qualifying init…
-#> 2                    2           2580            2580         1 Qualifying init…
-#> 3                    2             76              76         2 Demographic req…
-#> 4                    1            156             156         2 Demographic req…
-#> # ℹ 2 more variables: excluded_records <dbl>, excluded_subjects <dbl>
+```
+
+Again we can see how many individuals we’ve lost after applying this
+criteria.
+
+``` r
+cohort_attrition(cdm$medications) %>% 
+  filter(reason == "Demographic requirements") %>% 
+  glimpse()
+#> Rows: 2
+#> Columns: 7
+#> $ cohort_definition_id <int> 2, 1
+#> $ number_records       <dbl> 787, 75
+#> $ number_subjects      <dbl> 551, 75
+#> $ reason_id            <dbl> 9, 9
+#> $ reason               <chr> "Demographic requirements", "Demographic requirem…
+#> $ excluded_records     <dbl> 1963, 322
+#> $ excluded_subjects    <dbl> 1186, 322
 ```
 
 ### Require presence in another cohort
@@ -113,28 +180,37 @@ cdm <- generate_concept_cohort_set(cdm = cdm,
 cdm$medications <- cdm$medications %>% 
   requireCohortIntersectFlag(targetCohortTable = "gibleed",
                              window = c(-Inf, 0))
+```
 
-cohort_count(cdm$medications)
-#> # A tibble: 2 × 3
-#>   cohort_definition_id number_records number_subjects
-#>                  <int>          <dbl>           <dbl>
-#> 1                    2             36              36
-#> 2                    1              0               0
-cohort_attrition(cdm$medications)
-#> # A tibble: 6 × 7
-#>   cohort_definition_id number_records number_subjects reason_id reason          
-#>                  <int>          <dbl>           <dbl>     <dbl> <chr>           
-#> 1                    1            830             830         1 Qualifying init…
-#> 2                    2           2580            2580         1 Qualifying init…
-#> 3                    2             76              76         2 Demographic req…
-#> 4                    1            156             156         2 Demographic req…
-#> 5                    2             36              36         3 In cohort gible…
-#> 6                    1              0               0         3 In cohort gible…
-#> # ℹ 2 more variables: excluded_records <dbl>, excluded_subjects <dbl>
+``` r
+cohort_attrition(cdm$medications) %>% 
+  filter(reason == "In cohort gibleed between -Inf and 0 days relative to cohort_start_date") %>% 
+  glimpse()
+#> Rows: 2
+#> Columns: 7
+#> $ cohort_definition_id <int> 2, 1
+#> $ number_records       <dbl> 136, 0
+#> $ number_subjects      <dbl> 89, 0
+#> $ reason_id            <dbl> 10, 10
+#> $ reason               <chr> "In cohort gibleed between -Inf and 0 days relati…
+#> $ excluded_records     <dbl> 651, 75
+#> $ excluded_subjects    <dbl> 462, 75
 ```
 
 ### Combining cohorts
 
+Currently we have two separate cohorts. One of users of diclofenac, the
+other of users of acetaminophen.
+
+Let’s say we want to create a cohort of people taking **either**
+diclofenac or acetaminophen. We could create this cohort like so:
+
+Alternatively, we might want to create a cohort of people taking
+**both** diclofenac and acetaminophen. For this we can create this
+combination cohort like so:
+
+Both diclofenac and acetaminophen
+
 Generate a combination cohort.
 
 ``` r
@@ -153,10 +229,13 @@ cohortCount(cdm$combinations)
 #> # A tibble: 3 × 3
 #>   cohort_definition_id number_records number_subjects
 #>                  <int>          <dbl>           <dbl>
-#> 1                    2             36              36
+#> 1                    2            136              89
 #> 2                    1              0               0
 #> 3                    3              0               0
+```
 
-
+``` r
 cdmDisconnect(cdm)
 ```
+
+## Evaluating cohorts
diff --git a/man/requireInDateRange.Rd b/man/requireInDateRange.Rd
new file mode 100644
index 00000000..d03705cf
--- /dev/null
+++ b/man/requireInDateRange.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/requireDateRange.R
+\name{requireInDateRange}
+\alias{requireInDateRange}
+\title{Require that an index date is within a date range}
+\usage{
+requireInDateRange(
+  cohort,
+  indexDate = "cohort_start_date",
+  dateRange = as.Date(c(NA, NA))
+)
+}
+\arguments{
+\item{cohort}{A cohort table in a cdm reference}
+
+\item{indexDate}{Variable in cohort that contains the index date of interest}
+
+\item{dateRange}{A window of time during which the index date must have
+been observed}
+}
+\value{
+The cohort table with any cohort entries outside of the date range
+dropped
+}
+\description{
+Require that an index date is within a date range
+}
diff --git a/man/trimToDateRange.Rd b/man/trimToDateRange.Rd
new file mode 100644
index 00000000..59cb6f35
--- /dev/null
+++ b/man/trimToDateRange.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/requireDateRange.R
+\name{trimToDateRange}
+\alias{trimToDateRange}
+\title{Trim cohort dates to be within a date range}
+\usage{
+trimToDateRange(
+  cohort,
+  startDate = "cohort_start_date",
+  endDate = "cohort_end_date",
+  dateRange = as.Date(c(NA, NA))
+)
+}
+\arguments{
+\item{cohort}{A cohort table in a cdm reference}
+
+\item{startDate}{Variable with earliest date}
+
+\item{endDate}{Variable with latest date}
+
+\item{dateRange}{A window of time during which the index date must have
+been observed}
+}
+\description{
+Trim cohort dates to be within a date range
+}
diff --git a/tests/testthat/test-generateMatchedCohortSet.R b/tests/testthat/test-generateMatchedCohortSet.R
index 87c20da4..704857b9 100644
--- a/tests/testthat/test-generateMatchedCohortSet.R
+++ b/tests/testthat/test-generateMatchedCohortSet.R
@@ -128,16 +128,31 @@ test_that("check that we obtain expected result when ratio is 1", {
                                               matchSex = TRUE,
                                               matchYearOfBirth = TRUE,
                                               ratio = 1)
-  expect_true(
-    length(CDMConnector::cohort_count(matched_cohorts[["new_cohort"]]) %>%
-             dplyr::select("number_records") %>%
-             dplyr::pull() %>%
-             unique()) == 3
-  )
 
-  # Everybody has a matched
+  expect_true(nrow(CDMConnector::cohort_count(matched_cohorts$new_cohort) %>%
+    dplyr::left_join(CDMConnector::cohort_set(matched_cohorts$new_cohort),
+                     by = "cohort_definition_id") %>%
+    dplyr::filter(stringr::str_detect(cohort_name, "c1"))  %>%
+      dplyr::select("number_records") %>%
+      dplyr::distinct()) == 1)
+  expect_true(nrow(CDMConnector::cohort_count(matched_cohorts$new_cohort) %>%
+                     dplyr::left_join(CDMConnector::cohort_set(matched_cohorts$new_cohort),
+                                      by = "cohort_definition_id") %>%
+                     dplyr::filter(stringr::str_detect(cohort_name, "c2"))  %>%
+                     dplyr::select("number_records") %>%
+                     dplyr::distinct()) == 1)
+  expect_true(nrow(CDMConnector::cohort_count(matched_cohorts$new_cohort) %>%
+                     dplyr::left_join(CDMConnector::cohort_set(matched_cohorts$new_cohort),
+                                      by = "cohort_definition_id") %>%
+                     dplyr::filter(stringr::str_detect(cohort_name, "c3"))  %>%
+                     dplyr::select("number_records") %>%
+                     dplyr::distinct()) == 1)
+
+
+
+  # Everybody has a match
   n <- matched_cohorts[["new_cohort"]] %>%
-    dplyr::summarise(n = max(.data$cohort_definition_id)/2) %>%
+    dplyr::summarise(n = max(.data$cohort_definition_id, na.rm = TRUE)/2) %>%
     dplyr::pull()
 
   cohorts <- matched_cohorts[["new_cohort"]] %>%
diff --git a/tests/testthat/test-requireDateRange.R b/tests/testthat/test-requireDateRange.R
new file mode 100644
index 00000000..f9b2a95e
--- /dev/null
+++ b/tests/testthat/test-requireDateRange.R
@@ -0,0 +1,60 @@
+test_that("requireDateRange", {
+  # one person, one observation periods
+  cohortTable <- dplyr::tibble(
+    cohort_definition_id = c(1, 1, 1),
+    subject_id = c("1", "2", "3"),
+    cohort_start_date = as.Date(c("2010-06-06", "2011-06-06", "2012-06-08")),
+    cohort_end_date = as.Date(c("2013-06-06", "2013-06-06", "2013-02-01"))
+  )
+
+  cdm <- PatientProfiles::mockPatientProfiles(cohort1 = cohortTable)
+  cdm$cohort1 <- cdm$cohort1 %>%
+    requireInDateRange(dateRange = as.Date(c("2010-01-01", "2011-01-01")))
+
+  expect_true(cdm$cohort1 %>%
+    dplyr::pull("subject_id") == 1L)
+
+  # expect error
+  expect_error(requireInDateRange(cohort = "a"))
+  expect_error(cdm$cohort1 %>%
+    requireInDateRange(dateRange = as.Date(c("2010-01-01"))))
+  expect_error(cdm$cohort1 %>%
+    requireInDateRange(dateRange = as.Date(c("2010-01-01", "2010-01-01",
+                                             "2009-01-01"))))
+  expect_error(cdm$cohort1 %>%
+    requireInDateRange(dateRange = c("a", "b")))
+
+  CDMConnector::cdm_disconnect(cdm)
+})
+
+test_that("trim cohort dates", {
+  # one person, one observation periods
+  cohortTable <- dplyr::tibble(
+    cohort_definition_id = c(1, 1, 1),
+    subject_id = c("1", "2", "3"),
+    cohort_start_date = as.Date(c("2010-06-06", "2011-06-06", "2012-06-08")),
+    cohort_end_date = as.Date(c("2013-06-06", "2011-09-06", "2013-02-01"))
+  )
+
+  cdm <- PatientProfiles::mockPatientProfiles(cohort1 = cohortTable)
+  cdm$cohort1 <- cdm$cohort1 %>%
+    trimToDateRange(dateRange = as.Date(c("2011-01-01", "2012-01-01")))
+
+  expect_equal(sort(cdm$cohort1 %>%
+    dplyr::pull("subject_id")), c("1", "2"))
+  expect_true(cdm$cohort1 %>%
+    dplyr::filter(subject_id == "1") %>%
+    dplyr::pull("cohort_start_date") == as.Date("2011-01-01"))
+  expect_true(cdm$cohort1 %>%
+                dplyr::filter(subject_id == "1") %>%
+                dplyr::pull("cohort_end_date") == as.Date("2012-01-01"))
+  expect_true(cdm$cohort1 %>%
+                dplyr::filter(subject_id == "2") %>%
+                dplyr::pull("cohort_start_date") == as.Date("2011-06-06"))
+  expect_true(cdm$cohort1 %>%
+                dplyr::filter(subject_id == "2") %>%
+                dplyr::pull("cohort_end_date") == as.Date("2011-09-06"))
+
+CDMConnector::cdm_disconnect(cdm)
+
+  } )
diff --git a/tests/testthat/test-requireDemographics.R b/tests/testthat/test-requireDemographics.R
index b0c58665..71c0efc5 100644
--- a/tests/testthat/test-requireDemographics.R
+++ b/tests/testthat/test-requireDemographics.R
@@ -142,29 +142,6 @@ test_that("ignore existing cohort extra variables", {
 })
 
 
-
-test_that("requireDateRange", {
-  # one person, one observation periods
-
-  cohortTable <- dplyr::tibble(
-    cohort_definition_id = c(1, 1, 1),
-    subject_id = c("1", "2", "3"),
-    cohort_start_date = as.Date(c("2010-06-06", "2010-06-06", "2010-06-08")),
-    cohort_end_date = as.Date(c("2013-06-06", "2013-06-06", "2013-02-01"))
-  )
-
-  cdm <- PatientProfiles::mockPatientProfiles(cohort1 = cohortTable)
-  cdm$cohort1 <- cdm$cohort1 %>%
-    requireDateRange(cohortDateRange = as.Date(c("2010-06-06", "2013-02-01")))
-
-  expect_true(all(cdm$cohort1 %>% dplyr::pull(cohort_start_date) ==
-    as.Date(c("2010-06-06", "2010-06-06", "2010-06-08"))))
-
-  CDMConnector::cdm_disconnect(cdm)
-})
-
-
-
 test_that("external columns kept after requireDemographics", {
   cdm <- PatientProfiles::mockPatientProfiles(
     patient_size = 100,
diff --git a/vignettes/.gitignore b/vignettes/.gitignore
new file mode 100644
index 00000000..097b2416
--- /dev/null
+++ b/vignettes/.gitignore
@@ -0,0 +1,2 @@
+*.html
+*.R
diff --git a/vignettes/a01_building_concept_cohorts.Rmd b/vignettes/a01_building_concept_cohorts.Rmd
new file mode 100644
index 00000000..7d5bcdc5
--- /dev/null
+++ b/vignettes/a01_building_concept_cohorts.Rmd
@@ -0,0 +1,30 @@
+---
+title: "a01_building_concept_cohorts"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{a01_building_concept_cohorts}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(CDMConnector)
+library(IncidencePrevalence)
+library(DrugUtilisation)
+library(CohortConstructor)
+```
+
+## Condition concept based cohort creation
+
+## Drug concept based cohort creation
+
+## Demographic based cohort creation
+
+## Combining cohorts
diff --git a/vignettes/a02_applying_cohort_restrictions.Rmd b/vignettes/a02_applying_cohort_restrictions.Rmd
new file mode 100644
index 00000000..2bd24072
--- /dev/null
+++ b/vignettes/a02_applying_cohort_restrictions.Rmd
@@ -0,0 +1,19 @@
+---
+title: "a02_applying_cohort_restrictions"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{a02_applying_cohort_restrictions}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(CohortConstructor)
+```
diff --git a/vignettes/a03_age_sex_matching.Rmd b/vignettes/a03_age_sex_matching.Rmd
new file mode 100644
index 00000000..6fc177f8
--- /dev/null
+++ b/vignettes/a03_age_sex_matching.Rmd
@@ -0,0 +1,19 @@
+---
+title: "a03_age_sex_matching"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{a03_age_sex_matching}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(CohortConstructor)
+```
diff --git a/vignettes/a04_cohort_timing.Rmd b/vignettes/a04_cohort_timing.Rmd
new file mode 100644
index 00000000..ae8f6f5b
--- /dev/null
+++ b/vignettes/a04_cohort_timing.Rmd
@@ -0,0 +1,19 @@
+---
+title: "a04_cohort_timing"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{a04_cohort_timing}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(CohortConstructor)
+```