From eb76cf940453c1b88428da512aaaa2c712cbc9e6 Mon Sep 17 00:00:00 2001
From: Marta Alcalde-Herraiz <marta.alcalde@estudiantat.upc.edu>
Date: Mon, 8 Jan 2024 15:37:21 +0000
Subject: [PATCH 1/5] generateMatchedCohortSet vignette

---
 R/generateMatchedCohortSet.R       |  6 +--
 vignettes/a03_age_sex_matching.Rmd | 62 ++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/R/generateMatchedCohortSet.R b/R/generateMatchedCohortSet.R
index 4a6ecde9..bcf99efa 100644
--- a/R/generateMatchedCohortSet.R
+++ b/R/generateMatchedCohortSet.R
@@ -63,7 +63,7 @@ generateMatchedCohortSet <- function(cdm,
     cdm <- checkRatio(cdm, name, ratio, targetCohortId, n)
 
     # Check cohort set ref
-    cdm <- checkCohortSetRef(cdm, name, matchSex, matchYearOfBirth, targetCohortId, n)
+    cdm <- checkCohortSetRef(cdm, name, targetCohortName, matchSex, matchYearOfBirth, targetCohortId, n)
   }
   # Return
   return(cdm)
@@ -416,10 +416,10 @@ checkRatio <- function(cdm, name, ratio, targetCohortId, n){
 }
 
 
-checkCohortSetRef <- function(cdm, name, matchSex, matchYearOfBirth, targetCohortId, n){
+checkCohortSetRef <- function(cdm, name, targetCohortName, matchSex, matchYearOfBirth, targetCohortId, n){
   cohort_set_ref <- cdm[[name]] %>%
     CDMConnector::cohort_set() %>%
-    dplyr::mutate(target_cohort_name  = .env$name) %>%
+    dplyr::mutate(target_cohort_name  = .env$targetCohortName) %>%
     dplyr::mutate(match_sex           = .env$matchSex) %>%
     dplyr::mutate(match_year_of_birth = .env$matchYearOfBirth) %>%
     dplyr::mutate(match_status        = dplyr::if_else(.data$cohort_definition_id %in% .env$targetCohortId, "target", "matched")) %>%
diff --git a/vignettes/a03_age_sex_matching.Rmd b/vignettes/a03_age_sex_matching.Rmd
index 6fc177f8..a0bae04a 100644
--- a/vignettes/a03_age_sex_matching.Rmd
+++ b/vignettes/a03_age_sex_matching.Rmd
@@ -14,6 +14,68 @@ knitr::opts_chunk$set(
 )
 ```
 
+# Introduction
+CohortConstructor packages includes a function to obtain an age and sex matched cohort, the `generateMatchedCohortSet()` function. In this vignette, we will explore the functionalities of the function and provide valuable examples for its usage.
+
+## Create mock data
+We will first use `mockDrugUtilisation()` functin from DrugUtilisation package to create mock data.
+
 ```{r setup}
 library(CohortConstructor)
+library(dplyr)
+library(DrugUtilisation)
+
+cdm <- mockDrugUtilisation(numberIndividual  = 200)
+```
+
+As we will use `cohort1` to explore `generateMatchedCohortSet()`, let us first use `cohort_attrition()` from CDMConnector package to explore the this cohort:
+
+```{r setup}
+CDMConnector::cohort_set(cdm$cohort1)
 ```
+Notice that there are three cohorts within this tibble, with id's going from 1 to 3. 
+
+# Use `generateMatchedCohortSet()` to create an age-sex matched cohort
+Let us first see an example of how this function works. For its usage, we need to provide a `cdm` object, the `targetCohortName`, which is the name of the table containing the cohort of interest, and the `name` of the new table that will be created containing the cohort and the matching cohort. We will also use the argument `targetCohortId` to specify that we only want a matching cohort for `cohort_definition_id = 1`.
+
+```{r setup}
+cdm <- generateMatchedCohortSet(cdm  = cdm,
+                           name = "matched_cohort1",
+                           targetCohortName = "cohort1",
+                           targetCohortId = 1)
+
+CDMConnector::cohort_set(cdm$matched_cohort1)
+```
+Notice that in the generated tibble, there are two cohorts: `cohort_definition_id = 1` (original cohort), and `cohort_definition_id = 4` (matched cohort). `target_cohort_name` column indicates which is the original cohort. `match_sex` and `match_year_of_birth` adopt boolean values (`TRUE`/`FALSE`) indicating if we have matched for sex and age, or not. `match_status` indicate if it is the original cohort (`target`) or if it is the matched cohort (`matched`). `target_cohort_id` indicates which is the cohort_id of the original cohort.
+
+`matchSex` is a boolean parameter (`TRUE`/`FALSE`) indicating if we want to match by sex (`TRUE`) or we do not want (`FALSE`).
+
+`matchYear` is another boolean parameter (`TRUE`/`FALSE`) indicating if we want to match by age (`TRUE`) or we do not want (`FALSE`).
+
+The default matching ratio is 1:1. Use `cohort_counts()` from CDMConnector to check if the matching has been done as desired.
+
+```{r setup}
+CDMConnector::cohort_count(cdm$matched_cohort1)
+```
+To have more information about the exclusion criteria applied to perform the matching, use `cohort_attrition()` from CDMConnector package:
+```{r setup}
+# Original cohort
+CDMConnector::cohort_attrition(cdm$matched_cohort1) %>% filter(cohort_definition_id == 1)
+
+# Matched cohort
+CDMConnector::cohort_attrition(cdm$matched_cohort1) %>% filter(cohort_definition_id == 4)
+```
+Briefly, from the original cohort, we exclude first those individuals that do not have a match, and then individuals that their matching pair is not in observation during the assigned `cohort_start_date`. From the matched cohort, we start from the whole database and we first exclude individuals that are in the original cohort. Afterwards, we exclude individuals that do not have a match, then individuals that are not in observation during the assigned `cohort_start_date`, and finally we remove the as many individuals as required to fulfill the ratio.
+
+You can modify the `ratio` parameter to tailor your matched cohort.
+
+```{r setup}
+cdm <- generateMatchedCohortSet(cdm  = cdm,
+                           name = "matched_cohort1",
+                           targetCohortName = "cohort1",
+                           targetCohortId = 1,
+                           ratio = Inf)
+
+CDMConnector::cohort_count(cdm$matched_cohort1)
+```
+

From 67c0b0f0b395138a79ad00d8af48cceb4eab34c4 Mon Sep 17 00:00:00 2001
From: Marta Alcalde-Herraiz <marta.alcalde@estudiantat.upc.edu>
Date: Mon, 8 Jan 2024 15:50:23 +0000
Subject: [PATCH 2/5] Update 03_age_sex_matching

---
 vignettes/a03_age_sex_matching.Rmd | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/vignettes/a03_age_sex_matching.Rmd b/vignettes/a03_age_sex_matching.Rmd
index a0bae04a..c1bf5c6a 100644
--- a/vignettes/a03_age_sex_matching.Rmd
+++ b/vignettes/a03_age_sex_matching.Rmd
@@ -20,7 +20,7 @@ CohortConstructor packages includes a function to obtain an age and sex matched
 ## Create mock data
 We will first use `mockDrugUtilisation()` functin from DrugUtilisation package to create mock data.
 
-```{r setup}
+```{r, include = FALSE}
 library(CohortConstructor)
 library(dplyr)
 library(DrugUtilisation)
@@ -30,7 +30,7 @@ cdm <- mockDrugUtilisation(numberIndividual  = 200)
 
 As we will use `cohort1` to explore `generateMatchedCohortSet()`, let us first use `cohort_attrition()` from CDMConnector package to explore the this cohort:
 
-```{r setup}
+```{r, include = FALSE}
 CDMConnector::cohort_set(cdm$cohort1)
 ```
 Notice that there are three cohorts within this tibble, with id's going from 1 to 3. 
@@ -38,7 +38,7 @@ Notice that there are three cohorts within this tibble, with id's going from 1 t
 # Use `generateMatchedCohortSet()` to create an age-sex matched cohort
 Let us first see an example of how this function works. For its usage, we need to provide a `cdm` object, the `targetCohortName`, which is the name of the table containing the cohort of interest, and the `name` of the new table that will be created containing the cohort and the matching cohort. We will also use the argument `targetCohortId` to specify that we only want a matching cohort for `cohort_definition_id = 1`.
 
-```{r setup}
+```{r, include = FALSE}
 cdm <- generateMatchedCohortSet(cdm  = cdm,
                            name = "matched_cohort1",
                            targetCohortName = "cohort1",
@@ -54,11 +54,11 @@ Notice that in the generated tibble, there are two cohorts: `cohort_definition_i
 
 The default matching ratio is 1:1. Use `cohort_counts()` from CDMConnector to check if the matching has been done as desired.
 
-```{r setup}
+```{r, include = FALSE}
 CDMConnector::cohort_count(cdm$matched_cohort1)
 ```
 To have more information about the exclusion criteria applied to perform the matching, use `cohort_attrition()` from CDMConnector package:
-```{r setup}
+```{r, include = FALSE}
 # Original cohort
 CDMConnector::cohort_attrition(cdm$matched_cohort1) %>% filter(cohort_definition_id == 1)
 
@@ -69,7 +69,7 @@ Briefly, from the original cohort, we exclude first those individuals that do no
 
 You can modify the `ratio` parameter to tailor your matched cohort.
 
-```{r setup}
+```{r, include = FALSE}
 cdm <- generateMatchedCohortSet(cdm  = cdm,
                            name = "matched_cohort1",
                            targetCohortName = "cohort1",

From b7f6de277df3aa61f4b432c4b6cb602da66ae199 Mon Sep 17 00:00:00 2001
From: Marta Alcalde-Herraiz <marta.alcalde@estudiantat.upc.edu>
Date: Fri, 12 Jan 2024 16:12:47 +0000
Subject: [PATCH 3/5] Update

- Add an error message for ratio < 1
- Update matching vignette
---
 .Rhistory                          | 553 ++++++++++++++++++++++++++---
 R/generateMatchedCohortSet.R       |   7 +
 vignettes/a03_age_sex_matching.Rmd |  80 +++--
 3 files changed, 571 insertions(+), 69 deletions(-)

diff --git a/.Rhistory b/.Rhistory
index 08afce68..1d3b7143 100644
--- a/.Rhistory
+++ b/.Rhistory
@@ -1,41 +1,512 @@
-x <- c(1, 2, 3, 4, 5)
-expand.grid(c)
-expand.grid(x)
-expand.grid(rep(list(x), length(x)))
-5*5*5*5*5
-lapply(names, c(0, 1))
-lapply(names, function(x){c(0, 1)})
-names
-names <- c("cohort1", "cohort2", "cohort3", "cohort3")
-names <- c("cohort1", "cohort2", "cohort3", "cohort4")
-lapply(names, function(x){c(0, 1)})
-lapply(names, function(x){c(0, 1)}) |>
-expand.grid()
-lapply(names, function(x){c(0, 1)}) |>
-expand.grid() |>
-rlang::set_names(name)
-lapply(names, function(x){c(0, 1)}) |>
-expand.grid() |>
-rlang::set_names(names)
-lapply(names, function(x){c(0, 1)}) |>
-expand.grid() |>
-rlang::set_names(names) |>
-dplyr::as_tibble()
-lapply(names, function(x){c(0, 1)}) |>
-expand.grid() |>
-rlang::set_names(names) |>
-dplyr::as_tibble() %>%
-dplyr::mutate("cohort_definition_id" = dplyr::row_number())
-lapply(names, function(x){c(0, 1)}) |>
-expand.grid() |>
-rlang::set_names(names) |>
-dplyr::as_tibble() |>
-dplyr::mutate("cohort_definition_id" = dplyr::row_number())
-getCohortSetMutuallyEclusive <- function(names) {
-lapply(names, function(x){c(0, 1)}) |>
-expand.grid() |>
-rlang::set_names(names) |>
-dplyr::as_tibble() |>
-dplyr::mutate("cohort_definition_id" = dplyr::row_number())
-}
-getCohortSetMutuallyEclusive(names)
+matchSex = TRUE,
+matchYearOfBirth = TRUE,
+ratio = 4)
+a <- generateMatchedCohortSet(cdm,
+name = "new_cohort",
+targetCohortName = "cases",
+targetCohortId = NULL,
+matchSex = TRUE,
+matchYearOfBirth = TRUE,
+ratio = 4)
+a
+a[["new_cohort"]]
+a <- generateMatchedCohortSet(cdm,
+name = "new_cohort",
+targetCohortName = "cases",
+targetCohortId = 1,
+matchSex = TRUE,
+matchYearOfBirth = TRUE,
+ratio = 4)
+a
+a[["new_cohort"]]
+a <- generateMatchedCohortSet(cdm,
+name = "new_cohort",
+targetCohortName = "cases",
+targetCohortId = 1,
+matchSex = TRUE,
+matchYearOfBirth = TRUE,
+ratio = 4)
+a[["new_cohort"]]
+a <- generateMatchedCohortSet(cdm,
+name = "new_cohort",
+targetCohortName = "cases",
+targetCohortId = 2,
+matchSex = TRUE,
+matchYearOfBirth = TRUE,
+ratio = 4)
+a[["new_cohort"]]
+name = "new_cohort"
+targetCohortName = "cases"
+targetCohortId = 2
+matchSex = TRUE
+matchYearOfBirth = TRUE
+ratio = 4
+# validate initial input
+validateInput(
+cdm = cdm, name = name, targetCohortName = targetCohortName,
+targetCohortId = targetCohortId, matchSex = matchSex,
+matchYearOfBirth = matchYearOfBirth, ratio = ratio
+)
+# get the number of cohorts
+n <- getNumberOfCohorts(cdm, targetCohortName)
+n
+# get target cohort id
+targetCohortId <- getTargetCohortId(cdm, targetCohortId, targetCohortName)
+targetCohortId
+# Create the cohort name with cases and controls of the targetCohortId
+cdm <- getNewCohort(cdm, name, targetCohortName, targetCohortId, n)
+cdm
+cdm[["new_cohort"]]
+cdm[["new_cohort"]] %>% print(n = 10)
+cdm[["new_cohort"]] %>% print(n = 15)
+cdm[["cases"]]
+# Generate mock data
+cdmMock <- DrugUtilisation::mockDrugUtilisation(
+numberIndividuals = 10,
+person = tibble::tibble("person_id" = seq(1,10,1),
+"gender_concept_id" = rep(8507,10),
+"year_of_birth" = rep(1980, 10),
+"day_of_birth"  = rep(1, 10),
+"birth_date_time" = rep(as.Date(1980,04,01),10),
+"month_of_birth"  = rep(4, 10)),
+condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)),
+"condition_start_date" = as.Date(c("2017-10-30","2012-01-04","2014-12-15","2004-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")),
+"condition_end_date"   = as.Date(c("2017-11-01","2012-01-05","2014-12-16","2004-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")),
+"condition_type_concept_id" = rep(32020,10),
+)
+)
+cdm <- DrugUtilisation::generateConceptCohortSet(
+cdm = cdmMock,
+conceptSet = list(c1 = 317009, c2 = 4266367),
+name = "cases",
+end  = "observation_period_end_date",
+requiredObservation = c(0,0),
+overwrite = TRUE
+)
+cdm[["cases"]]
+# Generate mock data
+cdmMock <- DrugUtilisation::mockDrugUtilisation(
+numberIndividuals = 10,
+person = tibble::tibble("person_id" = seq(1,10,1),
+"gender_concept_id" = rep(8532,10),
+"year_of_birth" = rep(1980, 10),
+"day_of_birth"  = rep(1, 10),
+"birth_date_time" = rep(as.Date(1980,04,01),10),
+"month_of_birth"  = rep(4, 10)),
+condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)),
+"condition_start_date" = as.Date(c("2017-10-30","2012-01-04","2014-12-15","2004-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")),
+"condition_end_date"   = as.Date(c("2017-11-01","2012-01-05","2014-12-16","2004-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")),
+"condition_type_concept_id" = rep(32020,10),
+)
+)
+cdmMock
+cdmMock[["condition_occurrence"]]
+# Generate mock data
+cdmMock <- DrugUtilisation::mockDrugUtilisation(
+numberIndividuals = 10,
+person = tibble::tibble("person_id" = seq(1,10,1),
+"gender_concept_id" = rep(8532,10),
+"year_of_birth" = rep(1980, 10),
+"day_of_birth"  = rep(1, 10),
+"birth_date_time" = rep(as.Date(1980,04,01),10),
+"month_of_birth"  = rep(4, 10)),
+condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)),
+"condition_start_date" = as.Date(c("2017-10-30","2012-01-04","2014-12-15","2004-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")),
+"condition_end_date"   = as.Date(c("2017-11-01","2012-01-05","2014-12-16","2004-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")),
+"condition_type_concept_id" = rep(32020,10),
+)
+)
+cdm <- DrugUtilisation::generateConceptCohortSet(
+cdm = cdmMock,
+conceptSet = list(c1 = 317009, c2 = 4266367),
+name = "cases",
+end  = "observation_period_end_date",
+requiredObservation = c(0,0),
+overwrite = TRUE
+)
+cdm[["cases"]]
+cdm[["observation_period"]]
+# Generate mock data
+cdmMock <- DrugUtilisation::mockDrugUtilisation(
+numberIndividuals = 10,
+person = tibble::tibble("person_id" = seq(1,10,1),
+"gender_concept_id" = rep(8532,10),
+"year_of_birth" = rep(1980, 10),
+"day_of_birth"  = rep(1, 10),
+"birth_date_time" = rep(as.Date(1980,04,01),10),
+"month_of_birth"  = rep(4, 10)),
+condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)),
+"condition_start_date" = as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")),
+"condition_end_date"   = as.Date(c("2017-11-01","2003-01-05","2014-12-16","2010-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")),
+"condition_type_concept_id" = rep(32020,10),
+)
+)
+cdm <- DrugUtilisation::generateConceptCohortSet(
+cdm = cdmMock,
+conceptSet = list(c1 = 317009, c2 = 4266367),
+name = "cases",
+end  = "observation_period_end_date",
+requiredObservation = c(0,0),
+overwrite = TRUE
+)
+cdm[["cases"]]
+generateMatchedCohortSet(cdm,
+name = "new_cohort",
+targetCohortName = "cases",
+targetCohortId = 2,
+matchSex = TRUE,
+matchYearOfBirth = TRUE,
+ratio = 4)
+a <- generateMatchedCohortSet(cdm,
+name = "new_cohort",
+targetCohortName = "cases",
+targetCohortId = 2,
+matchSex = TRUE,
+matchYearOfBirth = TRUE,
+ratio = 4)
+cdm[["cases"]]
+a[["new_cohort"]]
+cdm[["observation_period"]]
+# Generate mock data
+cdmMock <- DrugUtilisation::mockDrugUtilisation(
+numberIndividuals = 10,
+person = tibble::tibble("person_id" = seq(1,10,1),
+"gender_concept_id" = rep(8532,10),
+"year_of_birth" = rep(1980, 10),
+"day_of_birth"  = rep(1, 10),
+"birth_date_time" = rep(as.Date(1980,04,01),10),
+"month_of_birth"  = rep(4, 10)),
+condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)),
+"condition_start_date" = as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")),
+"condition_end_date"   = as.Date(c("2017-11-01","2003-01-05","2014-12-16","2010-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")),
+"condition_type_concept_id" = rep(32020,10)),
+observation_period = tibble::tibble("observation_period_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"observation_period_start_date" = rep(as.Date(1950-01-01),10),
+"observation_period_end_date"   = rep(as.Date(2023-12-05),10),
+"period_type_concept_id"        = 44814724)
+)
+cdm <- DrugUtilisation::generateConceptCohortSet(
+cdm = cdmMock,
+conceptSet = list(c1 = 317009, c2 = 4266367),
+name = "cases",
+end  = "observation_period_end_date",
+requiredObservation = c(0,0),
+overwrite = TRUE
+)
+a <- generateMatchedCohortSet(cdm,
+name = "new_cohort",
+targetCohortName = "cases",
+targetCohortId = 2,
+matchSex = TRUE,
+matchYearOfBirth = TRUE,
+ratio = 4)
+a
+# Generate mock data
+cdmMock <- DrugUtilisation::mockDrugUtilisation(
+numberIndividuals = 10,
+person = tibble::tibble("person_id" = seq(1,10,1),
+"gender_concept_id" = rep(8532,10),
+"year_of_birth" = rep(1980, 10),
+"day_of_birth"  = rep(1, 10),
+"birth_date_time" = rep(as.Date(1980,04,01),10),
+"month_of_birth"  = rep(4, 10)),
+condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)),
+"condition_start_date" = as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")),
+"condition_end_date"   = as.Date(c("2017-11-01","2003-01-05","2014-12-16","2010-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")),
+"condition_type_concept_id" = rep(32020,10)),
+observation_period = tibble::tibble("observation_period_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"observation_period_start_date" = rep(as.Date(1950-01-01),10),
+"observation_period_end_date"   = rep(as.Date(2023-12-05),10),
+"period_type_concept_id"        = 44814724)
+)
+cdm <- DrugUtilisation::generateConceptCohortSet(
+cdm = cdmMock,
+conceptSet = list(c1 = 317009, c2 = 4266367),
+name = "cases",
+end  = "observation_period_end_date",
+requiredObservation = c(0,0),
+overwrite = TRUE
+)
+a <- generateMatchedCohortSet(cdm,
+name = "new_cohort",
+targetCohortName = "cases",
+targetCohortId = 2,
+matchSex = TRUE,
+matchYearOfBirth = TRUE,
+ratio = 4)
+cdm[["cases"]]
+# Generate mock data
+cdmMock <- DrugUtilisation::mockDrugUtilisation(
+numberIndividuals = 10,
+person = tibble::tibble("person_id" = seq(1,10,1),
+"gender_concept_id" = rep(8532,10),
+"year_of_birth" = rep(1980, 10),
+"day_of_birth"  = rep(1, 10),
+"birth_date_time" = rep(as.Date(1980,04,01),10),
+"month_of_birth"  = rep(4, 10)),
+condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)),
+"condition_start_date" = as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")),
+"condition_end_date"   = as.Date(c("2017-11-01","2003-01-05","2014-12-16","2010-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")),
+"condition_type_concept_id" = rep(32020,10)),
+observation_period = tibble::tibble("observation_period_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"observation_period_start_date" = rep(as.Date(1950-01-01),10),
+"observation_period_end_date"   = rep(as.Date(2021-12-05),10),
+"period_type_concept_id"        = 44814724)
+)
+cdm <- DrugUtilisation::generateConceptCohortSet(
+cdm = cdmMock,
+conceptSet = list(c1 = 317009, c2 = 4266367),
+name = "cases",
+end  = "observation_period_end_date",
+requiredObservation = c(0,0),
+overwrite = TRUE
+)
+cdm[["cases"]]
+# Generate mock data
+cdmMock <- DrugUtilisation::mockDrugUtilisation(
+numberIndividuals = 10,
+person = tibble::tibble("person_id" = seq(1,10,1),
+"gender_concept_id" = rep(8532,10),
+"year_of_birth" = rep(1980, 10),
+"day_of_birth"  = rep(1, 10),
+"birth_date_time" = rep(as.Date(1980,04,01),10),
+"month_of_birth"  = rep(4, 10)),
+condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)),
+"condition_start_date" = as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")),
+"condition_end_date"   = as.Date(c("2017-11-01","2003-01-05","2014-12-16","2010-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")),
+"condition_type_concept_id" = rep(32020,10)),
+observation_period = tibble::tibble("observation_period_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"observation_period_start_date" = rep(as.Date(1984-01-01),10),
+"observation_period_end_date"   = rep(as.Date(2021-12-05),10),
+"period_type_concept_id"        = 44814724)
+)
+rep(as.Date(1984-01-01),10)
+# Generate mock data
+cdmMock <- DrugUtilisation::mockDrugUtilisation(
+numberIndividuals = 10,
+person = tibble::tibble("person_id" = seq(1,10,1),
+"gender_concept_id" = rep(8532,10),
+"year_of_birth" = rep(1980, 10),
+"day_of_birth"  = rep(1, 10),
+"birth_date_time" = rep(as.Date(1980,04,01),10),
+"month_of_birth"  = rep(4, 10)),
+condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)),
+"condition_start_date" = as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")),
+"condition_end_date"   = as.Date(c("2017-11-01","2003-01-05","2014-12-16","2010-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")),
+"condition_type_concept_id" = rep(32020,10)),
+observation_period = tibble::tibble("observation_period_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"observation_period_start_date" = as.Date(rep("1984-01-01",10)),
+"observation_period_end_date"   = as.Date(rep("2021-01-01",10)),
+"period_type_concept_id"        = 44814724)
+)
+cdm <- DrugUtilisation::generateConceptCohortSet(
+cdm = cdmMock,
+conceptSet = list(c1 = 317009, c2 = 4266367),
+name = "cases",
+end  = "observation_period_end_date",
+requiredObservation = c(0,0),
+overwrite = TRUE
+)
+cdm[["cases"]]
+a <- generateMatchedCohortSet(cdm,
+name = "new_cohort",
+targetCohortName = "cases",
+targetCohortId = 2,
+matchSex = TRUE,
+matchYearOfBirth = TRUE,
+ratio = 4)
+a[["new_cohort"]]
+a <- generateMatchedCohortSet(cdm,
+name = "new_cohort",
+targetCohortName = "cases",
+targetCohortId = NULL,
+matchSex = TRUE,
+matchYearOfBirth = TRUE,
+ratio = 4)
+a[["cases"]]
+a[["new_cohort"]]
+a[["new_cohort"]] %>% print(n = 20)
+a[["new_cohort"]] %>%
+dplyr::filter(cohort_definition_id %in% c(1,3)) %>%
+dplyr::summarise(subject_unique)
+a[["new_cohort"]] %>%
+dplyr::filter(cohort_definition_id %in% c(1,3)) %>%
+dplyr::summarise(subject_id)
+a[["new_cohort"]] %>%
+dplyr::filter(cohort_definition_id %in% c(1,3)) %>%
+dplyr::summarise(subject_id) %>% unique()
+a[["new_cohort"]] %>%
+dplyr::filter(cohort_definition_id %in% c(1,3)) %>%
+dplyr::summarise(subject_id) %>%
+distinct()
+a[["new_cohort"]] %>%
+dplyr::filter(cohort_definition_id %in% c(1,3)) %>%
+dplyr::summarise(subject_id) %>%
+dplyr::distinct()
+a[["new_cohort"]] %>%
+dplyr::filter(cohort_definition_id %in% c(1,3)) %>%
+dplyr::summarise(subject_id) %>%
+dplyr::distinct() %>% dplyr::pull()
+a[["new_cohort"]] %>%
+dplyr::filter(cohort_definition_id %in% c(1,3)) %>%
+dplyr::summarise(subject_id) %>%
+dplyr::distinct() %>% dplyr::pull() %>% length() == 10
+expect_true(a[["new_cohort"]] %>%
+dplyr::filter(cohort_definition_id %in% c(2,4)) %>%
+dplyr::summarise(subject_id) %>%
+dplyr::distinct() %>% dplyr::pull() %>% length() == 10)
+a[["new_cohort"]] %>%
+dplyr::filter(cohort_definition_id %in% c(2,4)) %>%
+dplyr::summarise(subject_id) %>%
+dplyr::distinct() %>% dplyr::pull() %>% length()
+a[["new_cohort"]] %>%
+dplyr::filter(cohort_definition_id %in% c(2,4))
+a[["new_cohort"]] %>%
+dplyr::filter(cohort_definition_id %in% c(2,4)) %>%
+dplyr::summarise(cohort_start_date) %>%
+dplyr::distinct() %>% dplyr::pull() %>% length() == 2
+a[["new_cohort"]] %>% dplyr::filter(subject_id == 5)
+a[["new_cohort"]] %>% dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% dplyr::arrange() %>% dplyr::pull()
+a[["new_cohort"]] %>% dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% dplyr::arrange(desc()) %>% dplyr::pull()
+a[["new_cohort"]] %>% dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date)
+a[["new_cohort"]] %>% dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% dplyr::pull()
+a[["new_cohort"]] %>% dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% dplyr::arrange()
+as.Date(c("2017-01-01","2019-01-01","2015-01-01"))
+arrange(as.Date(c("2017-01-01","2019-01-01","2015-01-01")))
+dplyr::arrange(as.Date(c("2017-01-01","2019-01-01","2015-01-01")))
+order(as.Date(c("2017-01-01","2019-01-01","2015-01-01")))
+as.Date(c("2017-01-01","2019-01-01","2015-01-01"))
+as.Date(c("2017-01-01","2019-01-01","2015-01-01")) %>% order()
+as.Date(c("2017-01-01","2019-01-01","2015-01-01")) %>% mdy
+as.Date(c("2017-01-01","2019-01-01","2015-01-01")) %>% lubridate::mdy()
+a[["new_cohort"]] %>% dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% dplyr::pull()
+a[["new_cohort"]] %>% dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% dplyr::pull() %in% c("2017-10-30","2003-01-04","2014-12-15","2010-09-09")
+expect_true(a[["new_cohort"]] %>%
+dplyr::filter(subject_id %in% c(seq(5,10,1)) %>% dplyr::summarise(cohort_start_date) %>% dplyr::pull() %in% c("2017-10-30","2003-01-04","2014-12-15","2010-09-09"))
+})
+expect_true(a[["new_cohort"]] %>%
+dplyr::filter(subject_id %in% c(seq(5,10,1))) %>% dplyr::summarise(cohort_start_date) %>% dplyr::pull() %in% c("2017-10-30","2003-01-04","2014-12-15","2010-09-09"))
+expect_true(a[["new_cohort"]] %>%
+dplyr::filter(subject_id %in% seq(5,10,1)) %>% dplyr::summarise(cohort_start_date) %>% dplyr::pull() %in% c("2017-10-30","2003-01-04","2014-12-15","2010-09-09"))
+expect_true(a[["new_cohort"]] %>%
+dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% dplyr::pull() %in% c("2017-10-30","2003-01-04","2014-12-15","2010-09-09"))
+outc %>% dplyr::distinct()
+outc <- a[["new_cohort"]] %>%
+dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% dplyr::pull() %in% c("2017-10-30","2003-01-04","2014-12-15","2010-09-09")
+outc %>% dplyr::distinct()
+outc == c(TRUE,TRUE)
+dplyr::distinct(outc)
+unique(outc)
+expect_true(unique(outc) == TRUE)
+devtools::load_all(".")
+devtools::check()
+usethis::use_package(tibble,"Suggests")
+usethis::use_package(tibble,"Suggests")
+usethis::use_package("tibble","Suggests")
+devtools::check()
+devtools::load_all(".")
+devtools::check()
+devtools::load_all(".")
+devtools::check()
+install.packages("DrugUtilisation")
+install.packages("DrugUtilisation")
+devtools::load_all(".")
+devtools::check()
+# Generate mock data
+cdmMock <- DrugUtilisation::mockDrugUtilisation(
+numberIndividuals = 10,
+person = tibble::tibble("person_id" = seq(1,10,1),
+"gender_concept_id" = rep(8532,10),
+"year_of_birth" = rep(1980, 10),
+"day_of_birth"  = rep(1, 10),
+"birth_date_time" = rep(as.Date(1980,04,01),10),
+"month_of_birth"  = rep(4, 10)),
+condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)),
+"condition_start_date" = as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")),
+"condition_end_date"   = as.Date(c("2017-11-01","2003-01-05","2014-12-16","2010-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")),
+"condition_type_concept_id" = rep(32020,10)),
+observation_period = tibble::tibble("observation_period_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"observation_period_start_date" = as.Date(rep("1984-01-01",10)),
+"observation_period_end_date"   = as.Date(rep("2021-01-01",10)),
+"period_type_concept_id"        = 44814724)
+)
+help(as.Date)
+help(rep)
+help(seq)
+cdm <- DrugUtilisation::generateConceptCohortSet(
+cdm = cdmMock,
+conceptSet = list(c1 = 317009, c2 = 4266367),
+name = "cases",
+end  = "observation_period_end_date",
+requiredObservation = c(0,0),
+overwrite = TRUE
+)
+generateMatchedCohortSet(cdm,
+name = "new_cohort",
+targetCohortName = "cases",
+targetCohortId = NULL,
+matchSex = TRUE,
+matchYearOfBirth = TRUE,
+ratio = 4)
+a <- generateMatchedCohortSet(cdm,
+name = "new_cohort",
+targetCohortName = "cases",
+targetCohortId = NULL,
+matchSex = TRUE,
+matchYearOfBirth = TRUE,
+ratio = 4)
+rep(as.Date(1980,04,01),10)
+# Generate mock data
+cdmMock <- DrugUtilisation::mockDrugUtilisation(
+numberIndividuals = 10,
+person = tibble::tibble("person_id" = seq(1,10,1),
+"gender_concept_id" = rep(8532,10),
+"year_of_birth" = rep(1980, 10),
+"day_of_birth"  = rep(1, 10),
+"birth_date_time" = rep(as.Date("1980,04,01"),10),
+"month_of_birth"  = rep(4, 10)),
+condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)),
+"condition_start_date" = as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")),
+"condition_end_date"   = as.Date(c("2017-11-01","2003-01-05","2014-12-16","2010-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")),
+"condition_type_concept_id" = rep(32020,10)),
+observation_period = tibble::tibble("observation_period_id" = seq(1,10,1),
+"person_id" = seq(1,10,1),
+"observation_period_start_date" = as.Date(rep("1984-01-01",10)),
+"observation_period_end_date"   = as.Date(rep("2021-01-01",10)),
+"period_type_concept_id"        = 44814724)
+)
+as.Date(rep("1980,04,01",10))
+as.Date(rep("1980,04,01",10))
+as.Date(rep("1980-04-01",10))
+devtools::load_all(".")
+devtools::check()
+library(CohortConstructor)
+library(CohortConstructor)
diff --git a/R/generateMatchedCohortSet.R b/R/generateMatchedCohortSet.R
index bcf99efa..d0d12f5e 100644
--- a/R/generateMatchedCohortSet.R
+++ b/R/generateMatchedCohortSet.R
@@ -125,6 +125,13 @@ validateInput <- function(cdm,
       errorMessage$push(glue::glue("- {name} table does not containg '{targetCohortId}' as a cohort_definition_id"))
     }
   }
+  # Check if ratio is > 0
+  ratio_check <- ratio > 0
+  checkmate::assertTRUE(ratio_check, add = errorMessage)
+  if(!isTRUE(ratio_check)){
+    errorMessage$push(glue::glue("- ratio parameter must be > 0 "))
+  }
+
   checkmate::reportAssertions(collection = errorMessage)
   return(invisible(TRUE))
 }
diff --git a/vignettes/a03_age_sex_matching.Rmd b/vignettes/a03_age_sex_matching.Rmd
index c1bf5c6a..26efe14b 100644
--- a/vignettes/a03_age_sex_matching.Rmd
+++ b/vignettes/a03_age_sex_matching.Rmd
@@ -1,8 +1,8 @@
 ---
-title: "a03_age_sex_matching"
+title: "Generate a matched age and sex cohort"
 output: rmarkdown::html_vignette
 vignette: >
-  %\VignetteIndexEntry{a03_age_sex_matching}
+  %\VignetteIndexEntry{Generate a matched age and sex cohort}
   %\VignetteEngine{knitr::rmarkdown}
   %\VignetteEncoding{UTF-8}
 ---
@@ -15,30 +15,30 @@ knitr::opts_chunk$set(
 ```
 
 # Introduction
-CohortConstructor packages includes a function to obtain an age and sex matched cohort, the `generateMatchedCohortSet()` function. In this vignette, we will explore the functionalities of the function and provide valuable examples for its usage.
+CohortConstructor packages includes a function to obtain an age and sex matched cohort, the `generateMatchedCohortSet()` function. In this vignette, we will explore the usage of this function.
 
 ## Create mock data
-We will first use `mockDrugUtilisation()` functin from DrugUtilisation package to create mock data.
+We will first use `mockDrugUtilisation()` function from DrugUtilisation package to create mock data.
 
-```{r, include = FALSE}
+```{r setup, message = FALSE, warning = FALSE}
 library(CohortConstructor)
 library(dplyr)
 library(DrugUtilisation)
 
-cdm <- mockDrugUtilisation(numberIndividual  = 200)
+cdm <- mockDrugUtilisation(numberIndividual = 1000)
 ```
 
-As we will use `cohort1` to explore `generateMatchedCohortSet()`, let us first use `cohort_attrition()` from CDMConnector package to explore the this cohort:
+As we will use `cohort1` to explore `generateMatchedCohortSet()`, let us first use `cohort_attrition()` from CDMConnector package to explore this cohort:
 
-```{r, include = FALSE}
+```{r}
 CDMConnector::cohort_set(cdm$cohort1)
 ```
 Notice that there are three cohorts within this tibble, with id's going from 1 to 3. 
 
-# Use `generateMatchedCohortSet()` to create an age-sex matched cohort
-Let us first see an example of how this function works. For its usage, we need to provide a `cdm` object, the `targetCohortName`, which is the name of the table containing the cohort of interest, and the `name` of the new table that will be created containing the cohort and the matching cohort. We will also use the argument `targetCohortId` to specify that we only want a matching cohort for `cohort_definition_id = 1`.
+# Use generateMatchedCohortSet() to create an age-sex matched cohort
+Let us first see an example of how this function works. For its usage, we need to provide a `cdm` object, the `targetCohortName`, which is the name of the table containing the cohort of interest, and the `name` of the new generated tibble containing the cohort and the matched cohort. We will also use the argument `targetCohortId` to specify that we only want a matched cohort for `cohort_definition_id = 1`.
 
-```{r, include = FALSE}
+```{r}
 cdm <- generateMatchedCohortSet(cdm  = cdm,
                            name = "matched_cohort1",
                            targetCohortName = "cohort1",
@@ -46,36 +46,60 @@ cdm <- generateMatchedCohortSet(cdm  = cdm,
 
 CDMConnector::cohort_set(cdm$matched_cohort1)
 ```
-Notice that in the generated tibble, there are two cohorts: `cohort_definition_id = 1` (original cohort), and `cohort_definition_id = 4` (matched cohort). `target_cohort_name` column indicates which is the original cohort. `match_sex` and `match_year_of_birth` adopt boolean values (`TRUE`/`FALSE`) indicating if we have matched for sex and age, or not. `match_status` indicate if it is the original cohort (`target`) or if it is the matched cohort (`matched`). `target_cohort_id` indicates which is the cohort_id of the original cohort.
-
-`matchSex` is a boolean parameter (`TRUE`/`FALSE`) indicating if we want to match by sex (`TRUE`) or we do not want (`FALSE`).
+Notice that in the generated tibble, there are two cohorts: `cohort_definition_id = 1` (original cohort), and `cohort_definition_id = 4` (matched cohort). *target_cohort_name* column indicates which is the original cohort. *match_sex* and *match_year_of_birth* adopt boolean values (`TRUE`/`FALSE`) indicating if we have matched for sex and age, or not. *match_status* indicate if it is the original cohort (`target`) or if it is the matched cohort (`matched`). *target_cohort_id* indicates which is the cohort_id of the original cohort.
 
-`matchYear` is another boolean parameter (`TRUE`/`FALSE`) indicating if we want to match by age (`TRUE`) or we do not want (`FALSE`).
-
-The default matching ratio is 1:1. Use `cohort_counts()` from CDMConnector to check if the matching has been done as desired.
-
-```{r, include = FALSE}
-CDMConnector::cohort_count(cdm$matched_cohort1)
-```
-To have more information about the exclusion criteria applied to perform the matching, use `cohort_attrition()` from CDMConnector package:
-```{r, include = FALSE}
+Check the exclusion criteria applied to generate the new cohorts by using `cohort_attrition()` from CDMConnector package:
+```{r}
 # Original cohort
 CDMConnector::cohort_attrition(cdm$matched_cohort1) %>% filter(cohort_definition_id == 1)
 
 # Matched cohort
 CDMConnector::cohort_attrition(cdm$matched_cohort1) %>% filter(cohort_definition_id == 4)
 ```
-Briefly, from the original cohort, we exclude first those individuals that do not have a match, and then individuals that their matching pair is not in observation during the assigned `cohort_start_date`. From the matched cohort, we start from the whole database and we first exclude individuals that are in the original cohort. Afterwards, we exclude individuals that do not have a match, then individuals that are not in observation during the assigned `cohort_start_date`, and finally we remove the as many individuals as required to fulfill the ratio.
+Briefly, from the original cohort, we exclude first those individuals that do not have a match, and then individuals that their matching pair is not in observation during the assigned *cohort_start_date*. From the matched cohort, we start from the whole database and we first exclude individuals that are in the original cohort. Afterwards, we exclude individuals that do not have a match, then individuals that are not in observation during the assigned *cohort_start_date*, and finally we remove as many individuals as required to fulfill the ratio.
 
-You can modify the `ratio` parameter to tailor your matched cohort.
+Notice that matching pairs are randomly assigned, so it is probable that every time you execute this function, the generated cohorts change. Use `set.seed()` to avoid this. 
 
-```{r, include = FALSE}
+## matchSex parameter
+`matchSex` is a boolean parameter (`TRUE`/`FALSE`) indicating if we want to match by sex (`TRUE`) or we do not want to (`FALSE`). 
+
+## matchYear parameter
+`matchYear` is another boolean parameter (`TRUE`/`FALSE`) indicating if we want to match by age (`TRUE`) or we do not want (`FALSE`).
+
+Notice that if `matchSex = FALSE` and `matchYear = FALSE`, we will obtain an unmatched comparator cohort.
+
+## ratio parameter
+The default matching ratio is 1:1 (`ratio = 1`). Use `cohort_counts()` from CDMConnector to check if the matching has been done as desired.
+
+```{r}
+CDMConnector::cohort_count(cdm$matched_cohort1)
+```
+
+You can modify the `ratio` parameter to tailor your matched cohort. `ratio` can adopt values from 1 to Inf.
+
+```{r}
 cdm <- generateMatchedCohortSet(cdm  = cdm,
-                           name = "matched_cohort1",
+                           name = "matched_cohort2",
                            targetCohortName = "cohort1",
                            targetCohortId = 1,
                            ratio = Inf)
 
-CDMConnector::cohort_count(cdm$matched_cohort1)
+CDMConnector::cohort_count(cdm$matched_cohort2)
+```
+
+## Generate matched cohorts simultaneously across multiple cohorts
+All these functionalities can be implemented across multiple cohorts simultaneously. Specify in `targetCohortId` parameter which are the cohorts of interest. If set to NULL, all the cohorts present in `targetCohortName` will be matched. 
+
+```{r}
+cdm <- generateMatchedCohortSet(cdm  = cdm,
+                           name = "matched_cohort3",
+                           targetCohortName = "cohort1",
+                           targetCohortId = c(1,3),
+                           ratio = 2)
+
+CDMConnector::cohort_set(cdm$matched_cohort3) %>% arrange(cohort_definition_id)
+
+CDMConnector::cohort_count(cdm$matched_cohort3) %>% arrange(cohort_definition_id)
 ```
 
+Notice that each cohort has their own (and independent of other cohorts) matched cohort. 

From 534e12c61aa19ed3ea9b35ee4593db9cd038a75e Mon Sep 17 00:00:00 2001
From: Marta Alcalde-Herraiz <marta.alcalde@estudiantat.upc.edu>
Date: Fri, 12 Jan 2024 17:10:34 +0000
Subject: [PATCH 4/5] Update generateMatchedCohortSet.R

---
 R/generateMatchedCohortSet.R | 66 +++++++++++++++++++++++++++++++++++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/R/generateMatchedCohortSet.R b/R/generateMatchedCohortSet.R
index d0d12f5e..538b3afa 100644
--- a/R/generateMatchedCohortSet.R
+++ b/R/generateMatchedCohortSet.R
@@ -16,7 +16,16 @@
 #'
 #' @export
 #'
-#'
+#' @examples
+#' library(DrugUtilisation)
+#' library(CohortConstructor)
+#' cdm <- mockDrugUtilisation(numberIndividuals = 100)
+#' cdm$cohort1 %>%
+#'   requireCohortIntersectFlag(targetCohortTable = "cohort2",
+#'                              targetCohortId = 1,
+#'                              indexDate = "cohort_start_date",
+#'                              window = c(-Inf, 0))
+
 generateMatchedCohortSet <- function(cdm,
                                      name,
                                      targetCohortName,
@@ -64,6 +73,9 @@ generateMatchedCohortSet <- function(cdm,
 
     # Check cohort set ref
     cdm <- checkCohortSetRef(cdm, name, targetCohortName, matchSex, matchYearOfBirth, targetCohortId, n)
+
+    # Rename cohort definition ids
+    cdm <- renameCohortDefinitionIds(cdm)
   }
   # Return
   return(cdm)
@@ -440,3 +452,55 @@ checkCohortSetRef <- function(cdm, name, targetCohortName, matchSex, matchYearOf
 
   return(cdm)
 }
+
+renameCohortDefinitionIds <- function(cdm, name){
+  new_cohort_set <- cdm[[name]] %>%
+    CDMConnector::cohort_set() %>%
+    dplyr::mutate(cohort_definition_id_new = target_cohort_id) %>%
+    arrange(cohort_definition_id_new) %>%
+    dplyr::mutate(cohort_definition_id_new = dplyr::row_number())
+
+  new_cohort_attrition <- cdm[[name]] %>%
+    CDMConnector::cohort_attrition() %>%
+    inner_join(
+      new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new"),
+      by = "cohort_definition_id"
+    ) %>%
+    dplyr::select(-"cohort_definition_id") %>%
+    dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>%
+    dplyr::relocate(cohort_definition_id)
+
+  new_cohort_count <- cdm[[name]] %>%
+    CDMConnector::cohort_count() %>%
+    inner_join(
+      new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new"),
+      by = "cohort_definition_id"
+    ) %>%
+    dplyr::select(-"cohort_definition_id") %>%
+    dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>%
+    dplyr::relocate(cohort_definition_id)
+
+  new_cohort <- cdm[[name]] %>%
+    inner_join(
+      new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new"),
+      by = "cohort_definition_id",
+      copy = TRUE
+    ) %>%
+    dplyr::select(-"cohort_definition_id") %>%
+    dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>%
+    dplyr::relocate(cohort_definition_id) %>%
+    CDMConnector::compute_query() %>%
+    CDMConnector::compute_query(name = name, temporary = FALSE, schema = attr(cdm, "write_schema"), overwrite = TRUE)
+
+  new_cohort_set <- new_cohort_set %>%
+    dplyr::select(-"cohort_definition_id") %>%
+    dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>%
+    dplyr::relocate(cohort_definition_id)
+
+  cdm[[name]] <- CDMConnector::new_generated_cohort_set(
+    cohort_ref = new_cohort,
+    cohort_attrition_ref =  new_cohort_attrition ,
+    cohort_set_ref = new_cohort_set,
+    cohort_count_ref = new_cohort_count,
+    overwrite = TRUE)
+}

From 3346e99f6e91c3781e0e6f3113a3485eda62ced6 Mon Sep 17 00:00:00 2001
From: Marta Alcalde-Herraiz <marta.alcalde@estudiantat.upc.edu>
Date: Fri, 12 Jan 2024 18:20:48 +0000
Subject: [PATCH 5/5] Update generateMatchedCohortSet()

---
 R/generateMatchedCohortSet.R                  | 38 +++++++++++--------
 man/generateMatchedCohortSet.Rd               | 15 ++++++++
 .../testthat/test-generateMatchedCohortSet.R  |  8 ++--
 vignettes/a03_age_sex_matching.Rmd            |  1 -
 4 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/R/generateMatchedCohortSet.R b/R/generateMatchedCohortSet.R
index 538b3afa..ad7d768a 100644
--- a/R/generateMatchedCohortSet.R
+++ b/R/generateMatchedCohortSet.R
@@ -19,13 +19,17 @@
 #' @examples
 #' library(DrugUtilisation)
 #' library(CohortConstructor)
+#' library(dplyr)
 #' cdm <- mockDrugUtilisation(numberIndividuals = 100)
-#' cdm$cohort1 %>%
-#'   requireCohortIntersectFlag(targetCohortTable = "cohort2",
-#'                              targetCohortId = 1,
-#'                              indexDate = "cohort_start_date",
-#'                              window = c(-Inf, 0))
-
+#' cdm <- cdm %>%
+#'   generateMatchedCohortSet(name = "new_matched_cohort",
+#'                            targetCohortName = "cohort1",
+#'                            targetCohortId = c(1,2),
+#'                            matchSex = TRUE,
+#'                            matchYearOfBirth = TRUE,
+#'                            ratio = 2)
+#' cdm$new_matched_cohort
+#'
 generateMatchedCohortSet <- function(cdm,
                                      name,
                                      targetCohortName,
@@ -75,7 +79,7 @@ generateMatchedCohortSet <- function(cdm,
     cdm <- checkCohortSetRef(cdm, name, targetCohortName, matchSex, matchYearOfBirth, targetCohortId, n)
 
     # Rename cohort definition ids
-    cdm <- renameCohortDefinitionIds(cdm)
+    cdm <- renameCohortDefinitionIds(cdm, name)
   }
   # Return
   return(cdm)
@@ -456,46 +460,46 @@ checkCohortSetRef <- function(cdm, name, targetCohortName, matchSex, matchYearOf
 renameCohortDefinitionIds <- function(cdm, name){
   new_cohort_set <- cdm[[name]] %>%
     CDMConnector::cohort_set() %>%
-    dplyr::mutate(cohort_definition_id_new = target_cohort_id) %>%
-    arrange(cohort_definition_id_new) %>%
+    dplyr::mutate(cohort_definition_id_new = .data$target_cohort_id) %>%
+    dplyr::arrange(.data$cohort_definition_id_new) %>%
     dplyr::mutate(cohort_definition_id_new = dplyr::row_number())
 
   new_cohort_attrition <- cdm[[name]] %>%
     CDMConnector::cohort_attrition() %>%
-    inner_join(
+    dplyr::inner_join(
       new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new"),
       by = "cohort_definition_id"
     ) %>%
     dplyr::select(-"cohort_definition_id") %>%
     dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>%
-    dplyr::relocate(cohort_definition_id)
+    dplyr::relocate(.data$cohort_definition_id)
 
   new_cohort_count <- cdm[[name]] %>%
     CDMConnector::cohort_count() %>%
-    inner_join(
+    dplyr::inner_join(
       new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new"),
       by = "cohort_definition_id"
     ) %>%
     dplyr::select(-"cohort_definition_id") %>%
     dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>%
-    dplyr::relocate(cohort_definition_id)
+    dplyr::relocate(.data$cohort_definition_id)
 
   new_cohort <- cdm[[name]] %>%
-    inner_join(
+    dplyr::inner_join(
       new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new"),
       by = "cohort_definition_id",
       copy = TRUE
     ) %>%
     dplyr::select(-"cohort_definition_id") %>%
     dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>%
-    dplyr::relocate(cohort_definition_id) %>%
+    dplyr::relocate(.data$cohort_definition_id) %>%
     CDMConnector::compute_query() %>%
     CDMConnector::compute_query(name = name, temporary = FALSE, schema = attr(cdm, "write_schema"), overwrite = TRUE)
 
   new_cohort_set <- new_cohort_set %>%
     dplyr::select(-"cohort_definition_id") %>%
     dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>%
-    dplyr::relocate(cohort_definition_id)
+    dplyr::relocate(.data$cohort_definition_id)
 
   cdm[[name]] <- CDMConnector::new_generated_cohort_set(
     cohort_ref = new_cohort,
@@ -503,4 +507,6 @@ renameCohortDefinitionIds <- function(cdm, name){
     cohort_set_ref = new_cohort_set,
     cohort_count_ref = new_cohort_count,
     overwrite = TRUE)
+
+  return(cdm)
 }
diff --git a/man/generateMatchedCohortSet.Rd b/man/generateMatchedCohortSet.Rd
index baa9aac7..39a61b7a 100644
--- a/man/generateMatchedCohortSet.Rd
+++ b/man/generateMatchedCohortSet.Rd
@@ -41,3 +41,18 @@ Generate a new cohort matched cohort from a preexisting target cohort. The
 new cohort will contain individuals not included in the target cohort with
 same year of birth (matchYearOfBirth = TRUE) and same sex (matchSex = TRUE).
 }
+\examples{
+library(DrugUtilisation)
+library(CohortConstructor)
+library(dplyr)
+cdm <- mockDrugUtilisation(numberIndividuals = 100)
+cdm <- cdm \%>\%
+  generateMatchedCohortSet(name = "new_matched_cohort",
+                           targetCohortName = "cohort1",
+                           targetCohortId = c(1,2),
+                           matchSex = TRUE,
+                           matchYearOfBirth = TRUE,
+                           ratio = 2)
+cdm$new_matched_cohort
+
+}
diff --git a/tests/testthat/test-generateMatchedCohortSet.R b/tests/testthat/test-generateMatchedCohortSet.R
index 704857b9..47d2f248 100644
--- a/tests/testthat/test-generateMatchedCohortSet.R
+++ b/tests/testthat/test-generateMatchedCohortSet.R
@@ -291,19 +291,19 @@ test_that("test exactMatchingCohort with a ratio bigger than 1", {
                                 ratio = 4)
 
   expect_true(a[["new_cohort"]] %>%
-                dplyr::filter(cohort_definition_id %in% c(1,3)) %>%
+                dplyr::filter(cohort_definition_id %in% c(1,2)) %>%
                 dplyr::summarise(subject_id) %>%
                 dplyr::distinct() %>% dplyr::pull() %>% length() == 10)
   expect_true(a[["new_cohort"]] %>%
-                dplyr::filter(cohort_definition_id %in% c(2,4)) %>%
+                dplyr::filter(cohort_definition_id %in% c(3,4)) %>%
                 dplyr::summarise(subject_id) %>%
                 dplyr::distinct() %>% dplyr::pull() %>% length() == 10)
   expect_true(a[["new_cohort"]] %>%
-                dplyr::filter(cohort_definition_id %in% c(1,3)) %>%
+                dplyr::filter(cohort_definition_id %in% c(1,2)) %>%
                 dplyr::summarise(cohort_start_date) %>%
                 dplyr::distinct() %>% dplyr::pull() %>% length() == 2)
   expect_true(a[["new_cohort"]] %>%
-                dplyr::filter(cohort_definition_id %in% c(2,4)) %>%
+                dplyr::filter(cohort_definition_id %in% c(3,4)) %>%
                 dplyr::summarise(cohort_start_date) %>%
                 dplyr::distinct() %>% dplyr::pull() %>% length() == 2)
 
diff --git a/vignettes/a03_age_sex_matching.Rmd b/vignettes/a03_age_sex_matching.Rmd
index 26efe14b..a49591b9 100644
--- a/vignettes/a03_age_sex_matching.Rmd
+++ b/vignettes/a03_age_sex_matching.Rmd
@@ -33,7 +33,6 @@ As we will use `cohort1` to explore `generateMatchedCohortSet()`, let us first u
 ```{r}
 CDMConnector::cohort_set(cdm$cohort1)
 ```
-Notice that there are three cohorts within this tibble, with id's going from 1 to 3. 
 
 # Use generateMatchedCohortSet() to create an age-sex matched cohort
 Let us first see an example of how this function works. For its usage, we need to provide a `cdm` object, the `targetCohortName`, which is the name of the table containing the cohort of interest, and the `name` of the new generated tibble containing the cohort and the matched cohort. We will also use the argument `targetCohortId` to specify that we only want a matched cohort for `cohort_definition_id = 1`.