From a034b71e0e31451d3aee71071b0cfff980f16f4e Mon Sep 17 00:00:00 2001 From: Evanette Burrows Date: Fri, 23 Feb 2024 11:12:00 -0500 Subject: [PATCH] Update ETL-Synthea for development updates (#185) * Address Issue 162 improve version support (#174) * Update functions for supported OMOP versions * Update formatting for lintr issues (#175) * Update package to split intermediate steps and event table loading (#176) * Update package to support synthea 3.1.0 and 3.2.0 (#178) * Add support for loading synthea v3.1 and v3.2 * Add synthea version to source description * Address lint errors * Merge branch 'main' into develop * Add location and caresite support #177 (#181) * Add scripts and vignettes for location and care_site support * Remove redundant join from allvisittable query (#182) --- DESCRIPTION | 9 +- NAMESPACE | 3 +- R/CreateCDMIndexAndConstraintScripts.r | 181 +++------- R/CreateCDMIndices.r | 32 +- R/CreateCDMTables.r | 58 ++-- R/CreateMapAndRollupTables.r | 66 ++++ R/CreateSyntheaTables.r | 19 +- R/CreateVocabMapTables.r | 5 +- R/DropMapAndRollupTables.R | 2 +- R/DropSyntheaTables.r | 41 +-- R/LoadEventTables.r | 105 +++--- R/LoadSyntheaTables.r | 64 ++-- R/LoadVocabFromCsv.r | 4 +- R/backupCDM.r | 10 +- R/createExtraIndices.R | 6 +- R/createPrunedTables.r | 12 +- R/exportToSQLite.r | 8 +- R/getEventConceptId.r | 12 +- R/pruneCDM.r | 4 +- R/restoreCDMTables.r | 10 +- README.md | 20 +- docs/404.html | 24 +- docs/articles/Condition_occurrence.html | 26 +- docs/articles/Cost.html | 26 +- docs/articles/Device_exposure.html | 29 +- docs/articles/Drug_exposure.html | 26 +- docs/articles/Measurement.html | 26 +- docs/articles/Payer_plan_period.html | 32 +- docs/articles/care_site.html | 211 ++++++++++++ docs/articles/death.html | 24 +- docs/articles/index.html | 46 ++- docs/articles/location.html | 230 ++++++++++++ docs/articles/observation.html | 24 +- docs/articles/observation_period.html | 24 +- docs/articles/person.html | 24 +- docs/articles/procedure_occurrence.html | 24 +- docs/articles/provider.html | 24 +- docs/articles/visit_detail.html | 24 +- docs/articles/visit_occurrence.html | 24 +- docs/authors.html | 36 +- docs/index.html | 33 +- docs/pkgdown.yml | 16 +- .../CreateCDMIndexAndConstraintScripts.html | 30 +- docs/reference/CreateCDMIndices.html | 26 +- docs/reference/CreateCDMTables.html | 24 +- docs/reference/CreateMapAndRollupTables.html | 199 +++++++++++ docs/reference/CreateSyntheaTables.html | 26 +- docs/reference/CreateVisitRollupTables.html | 24 +- docs/reference/CreateVocabMapTables.html | 24 +- docs/reference/DropEventTables.html | 24 +- docs/reference/DropMapAndRollupTables.html | 26 +- docs/reference/DropSyntheaTables.html | 24 +- docs/reference/DropVocabTables.html | 24 +- docs/reference/ETLSyntheaBuilder-package.html | 24 +- docs/reference/LoadEventTables.html | 34 +- docs/reference/LoadSyntheaTables.html | 26 +- docs/reference/LoadVocabFromCsv.html | 24 +- docs/reference/LoadVocabFromSchema.html | 24 +- docs/reference/TruncateEventTables.html | 24 +- docs/reference/TruncateVocabTables.html | 24 +- docs/reference/backupCDM.html | 28 +- docs/reference/createExtraIndices.html | 32 +- docs/reference/createPrunedTables.html | 28 +- docs/reference/exportToSQLite.html | 24 +- docs/reference/getEventConceptId.html | 28 +- docs/reference/index.html | 30 +- docs/reference/pruneCDM.html | 28 +- docs/reference/restoreCDMTables.html | 28 +- docs/sitemap.xml | 9 + extras/codeToRun.R | 4 + .../cdm_version/v531/AllVisitTable.sql | 6 +- .../cdm_version/v531/create_states_map.sql | 57 +++ .../cdm_version/v531/insert_care_site.sql | 21 ++ .../cdm_version/v531/insert_location.sql | 27 ++ .../v531/insert_payer_plan_period.sql | 7 +- .../cdm_version/v531/insert_person.sql | 9 +- .../v531/insert_visit_occurrence.sql | 26 +- .../cdm_version/v540/AllVisitTable.sql | 6 +- .../cdm_version/v540/create_states_map.sql | 57 +++ .../cdm_version/v540/insert_care_site.sql | 21 ++ .../cdm_version/v540/insert_location.sql | 27 ++ .../v540/insert_payer_plan_period.sql | 5 + .../cdm_version/v540/insert_person.sql | 112 +++--- inst/sql/sql_server/extra_indices.sql | 2 +- .../v310/create_synthea_tables.sql | 326 ++++++++++++++++++ .../v320/create_synthea_tables.sql | 326 ++++++++++++++++++ man/CreateCDMIndexAndConstraintScripts.Rd | 6 +- man/CreateCDMIndices.Rd | 2 +- man/CreateMapAndRollupTables.Rd | 58 ++++ man/CreateSyntheaTables.Rd | 2 +- man/DropMapAndRollupTables.Rd | 2 +- man/LoadEventTables.Rd | 10 +- man/LoadSyntheaTables.Rd | 2 +- man/backupCDM.Rd | 4 +- man/createExtraIndices.Rd | 10 +- man/createPrunedTables.Rd | 4 +- man/getEventConceptId.Rd | 4 +- man/pruneCDM.Rd | 4 +- man/restoreCDMTables.Rd | 4 +- vignettes/Cost.Rmd | 2 +- vignettes/Device_exposure.Rmd | 2 +- vignettes/Payer_plan_period.Rmd | 6 +- vignettes/care_site.Rmd | 23 ++ vignettes/index.md | 2 + vignettes/location.Rmd | 25 ++ vignettes/observation.Rmd | 2 +- vignettes/provider.Rmd | 2 +- 107 files changed, 2825 insertions(+), 880 deletions(-) create mode 100644 R/CreateMapAndRollupTables.r create mode 100644 docs/articles/care_site.html create mode 100644 docs/articles/location.html create mode 100644 docs/reference/CreateMapAndRollupTables.html create mode 100644 inst/sql/sql_server/cdm_version/v531/create_states_map.sql create mode 100644 inst/sql/sql_server/cdm_version/v531/insert_care_site.sql create mode 100644 inst/sql/sql_server/cdm_version/v531/insert_location.sql create mode 100644 inst/sql/sql_server/cdm_version/v540/create_states_map.sql create mode 100644 inst/sql/sql_server/cdm_version/v540/insert_care_site.sql create mode 100644 inst/sql/sql_server/cdm_version/v540/insert_location.sql create mode 100644 inst/sql/sql_server/synthea_version/v310/create_synthea_tables.sql create mode 100644 inst/sql/sql_server/synthea_version/v320/create_synthea_tables.sql create mode 100644 man/CreateMapAndRollupTables.Rd create mode 100644 vignettes/care_site.Rmd create mode 100644 vignettes/location.Rmd diff --git a/DESCRIPTION b/DESCRIPTION index 0d4db8e..620d0bb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,12 +1,13 @@ Package: ETLSyntheaBuilder Type: Package Title: A Builder for Converting the Synthea Data to the OMOP CDM -Version: 1.0 +Version: 2.0 mDate: 2021-12-04 Author: Anthony Molinaro [aut, cre], Clair Blacketer [aut], - Frank DeFalco [aut] -Maintainer: Anthony Molinaro + Frank DeFalco [aut], + Evanette Burrows [aut] +Maintainer: Evanette Burrows Description: ETL and Builder to convert Synthea Data to the OMOP CDM. Staring with csv files for an OMOP Vocabulary and csv files for Synthea, this package creates database tables from these csv files and maps them @@ -27,7 +28,7 @@ Remotes: OHDSI/CommonDataModel Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.2 +RoxygenNote: 7.2.3 Suggests: rmarkdown, knitr, diff --git a/NAMESPACE b/NAMESPACE index 60eaaf8..92bb720 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,8 @@ export(CreateCDMIndexAndConstraintScripts) export(CreateCDMIndices) export(CreateCDMTables) +export(CreateExtraIndices) +export(CreateMapAndRollupTables) export(CreateSyntheaTables) export(CreateVisitRollupTables) export(CreateVocabMapTables) @@ -17,7 +19,6 @@ export(LoadVocabFromSchema) export(TruncateEventTables) export(TruncateVocabTables) export(backupCDM) -export(createExtraIndices) export(createPrunedTables) export(exportToSQLite) export(getEventConceptId) diff --git a/R/CreateCDMIndexAndConstraintScripts.r b/R/CreateCDMIndexAndConstraintScripts.r index 737d259..00496bc 100644 --- a/R/CreateCDMIndexAndConstraintScripts.r +++ b/R/CreateCDMIndexAndConstraintScripts.r @@ -6,11 +6,11 @@ #' function \code{createConnectionDetails} in the #' \code{DatabaseConnector} package. #' @param cdmSchema The name of the CDM database schema. Requires read and write permissions to this database. On SQL -#' Server, this should specifiy both the database and the schema, +#' Server, this should specify both the database and the schema, #' so for example 'cdm_instance.dbo'. -#' @param cdmVersion Your CDM version. Currently "5.3.1" and "6.0.0" are supported. +#' @param cdmVersion Your CDM version. Currently "5.3.1" and "5.4.0" are supported. #' @param githubTag An optional github tag from which to pull the DDL script. -#' Currently "v5.3.1", "v5.3.1_fixes", "v6.0.0", and "v6.0.0_fixes" are supported. The default is NULL. +#' Currently "v5.3.1" and "v5.4.0" are supported. The default is NULL. #' #' @details This function creates SQL scripts for the indices and constraints on tables in a CDM by referring to the #' correct SQL DDL script in the OHDSI CommonDataModel repo. The database platform is @@ -29,13 +29,13 @@ CreateCDMIndexAndConstraintScripts <- { supportedDbs <- c("oracle", "postgresql", "pdw", "netezza", "sql server") - supportedVersions <- c("5.3.1", "6.0.0") + supportedVersions <- c("5.3.1", "5.4.0") supportedTags <- - c("v5.3.1", "v6.0.0", "v5.3.1_fixes", "v6.0.0_fixes") + c("v5.3.1", "v5.4.0") rdbms <- tolower(connectionDetails$dbms) if (!(cdmVersion %in% supportedVersions)) { - stop("Unsupported CDM specified. Supported CDM versions are \"5.3.1\" and \"6.0.0\"") + stop("Unsupported CDM specified. Supported CDM versions are \"5.3.1\" and \"5.4.0\"") } if (!(rdbms %in% supportedDbs)) { @@ -46,17 +46,17 @@ CreateCDMIndexAndConstraintScripts <- if (!is.null(githubTag) && !(githubTag %in% supportedTags)) { stop( - "Unrecognized github tag. Supported values are \"v5.3.1\", \"v6.0.0\", \"v5.3.1_fixes\", and \"v6.0.0_fixes\"" + "Unrecognized github tag. Supported values are \"v5.3.1\" and \"v5.4.0\"" ) } if (cdmVersion == "5.3.1" && - !(githubTag %in% c("v5.3.1", "v5.3.1_fixes"))) { + !(githubTag %in% c("v5.3.1"))) { stop("cdmVersion and githubTag mismatch.") } - if (cdmVersion == "6.0.0" && - !(githubTag %in% c("v6.0.0", "v6.0.0_fixes"))) { + if (cdmVersion == "5.4.0" && + !(githubTag %in% c("v5.4.0"))) { stop("cdmVersion and githubTag mismatch.") } @@ -74,34 +74,16 @@ CreateCDMIndexAndConstraintScripts <- httr::GET( "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.3.1/Oracle/OMOP%20CDM%20oracle%20constraints.txt" ) - } else if (githubTag == "v5.3.1_fixes") { - webResponseInd <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.3.1_fixes/Oracle/OMOP%20CDM%20oracle%20v5_3_1%20primary%20keys.sql" - ) - webResponseCon <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.3.1_fixes/Oracle/OMOP%20CDM%20oracle%20v5_3_1%20constraints.sql" - ) } - } else if (cdmVersion == "6.0.0") { - if (is.null(githubTag) || githubTag == "v6.0.0") { - webResponseInd <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0.0/Oracle/OMOP%20CDM%20oracle%20pk%20indexes.txt" - ) - webResponseCon <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0.0/Oracle/OMOP%20CDM%20oracle%20constraints.txt" - ) - } else if (githubTag == "v6.0.0_fixes") { + } else if (cdmVersion == "5.4.0") { + if (is.null(githubTag) || githubTag == "v5.4.0") { webResponseInd <- httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0_fixes/Oracle/OMOP%20CDM%20oracle%20pk%20indexes.txt" + "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.4.0/inst/ddl/5.4/oracle/OMOPCDM_oracle_5.4_indices.sql" ) webResponseCon <- httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0_fixes/Oracle/OMOP%20CDM%20oracle%20constraints.txt" + "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.4.0/inst/ddl/5.4/oracle/OMOPCDM_oracle_5.4_constraints.sql" ) } } @@ -116,34 +98,16 @@ CreateCDMIndexAndConstraintScripts <- httr::GET( "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.3.1/PostgreSQL/OMOP%20CDM%20postgresql%20constraints.txt" ) - } else if (githubTag == "v5.3.1_fixes") { - webResponseInd <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.3.1_fixes/PostgreSQL/OMOP%20CDM%20postgresql%20v5_3_1%20primary%20keys.sql" - ) - webResponseCon <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.3.1_fixes/PostgreSQL/OMOP%20CDM%20postgresql%20v5_3_1%20constraints.sql" - ) } - } else if (cdmVersion == "6.0.0") { - if (is.null(githubTag) || githubTag == "v6.0.0") { - webResponseInd <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0.0/PostgreSQL/OMOP%20CDM%20postgresql%20pk%20indexes.txt" - ) - webResponseCon <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0.0/PostgreSQL/OMOP%20CDM%20postgresql%20constraints.txt" - ) - } else if (githubTag == "v6.0.0_fixes") { + } else if (cdmVersion == "5.4.0") { + if (is.null(githubTag) || githubTag == "v5.4.0") { webResponseInd <- httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0_fixes/PostgreSQL/OMOP%20CDM%20postgresql%20pk%20indexes.txt" + "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.4.0/inst/ddl/5.4/postgresql/OMOPCDM_postgresql_5.4_indices.sql" ) webResponseCon <- httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0_fixes/PostgreSQL/OMOP%20CDM%20postgresql%20constraints.txt" + "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.4.0/inst/ddl/5.4/postgresql/OMOPCDM_postgresql_5.4_constraints.sql" ) } } @@ -158,34 +122,16 @@ CreateCDMIndexAndConstraintScripts <- httr::GET( "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.3.1/ParallelDataWarehouse/OMOP%20CDM%20pdw%20constraints.txt" ) - } else if (githubTag == "v5.3.1_fixes") { - webResponseInd <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.3.1_fixes/ParallelDataWarehouse/OMOP%20CDM%20pdw%20v5_3_1%20indices.sql" - ) - webResponseCon <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.3.1_fixes/ParallelDataWarehouse/OMOP%20CDM%20pdw%20v5_3_1%20constraints.sql" - ) } - } else if (cdmVersion == "6.0.0") { - if (is.null(githubTag) || githubTag == "v6.0.0") { - webResponseInd <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0.0/ParallelDataWarehouse/OMOP%20CDM%20pdw%20pk%20indexes.txt" - ) - webResponseCon <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0.0/ParallelDataWarehouse/OMOP%20CDM%20pdw%20constraints.txt" - ) - } else if (githubTag == "v6.0.0_fixes") { + } else if (cdmVersion == "5.4.0") { + if (is.null(githubTag) || githubTag == "v5.4.0") { webResponseInd <- httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0_fixes/ParallelDataWarehouse/OMOP%20CDM%20pdw%20pk%20indexes.txt" + "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.4.0/inst/ddl/5.4/pdw/OMOPCDM_pdw_5.4_indices.sql" ) webResponseCon <- httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0_fixes/ParallelDataWarehouse/OMOP%20CDM%20pdw%20constraints.txt" + "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.4.0/inst/ddl/5.4/pdw/OMOPCDM_pdw_5.4_constraints.sql" ) } } @@ -196,22 +142,12 @@ CreateCDMIndexAndConstraintScripts <- httr::GET( "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.3.1/Netezza/OMOP%20CDM%20netezza%20primary%20keys.txt" ) - } else if (githubTag == "v5.3.1_fixes") { - webResponseInd <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.3.1_fixes/Netezza/OMOP%20CDM%20netezza%20v5_3_1%20primary%20keys.sql" - ) } - } else if (cdmVersion == "6.0.0") { - if (is.null(githubTag) || githubTag == "v6.0.0") { - webResponseInd <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0.0/Netezza/OMOP%20CDM%20netezza%20primary%20keys.txt" - ) - } else if (githubTag == "v6.0.0_fixes") { + } else if (cdmVersion == "5.4.0") { + if (is.null(githubTag) || githubTag == "v5.4.0") { webResponseInd <- httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0_fixes/Netezza/OMOP%20CDM%20netezza%20primary%20keys.txt" + "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.4.0/inst/ddl/5.4/netezza/OMOPCDM_netezza_5.4_primary_keys.sql" ) } } @@ -226,34 +162,16 @@ CreateCDMIndexAndConstraintScripts <- httr::GET( "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.3.1/Sql%20Server/OMOP%20CDM%20sql%20server%20constraints.txt" ) - } else if (githubTag == "v5.3.1_fixes") { - webResponseInd <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.3.1_fixes/Sql%20Server/OMOP%20CDM%20sql%20server%20v5_3_1%20primary%20keys.sql" - ) - webResponseCon <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.3.1_fixes/Sql%20Server/OMOP%20CDM%20sql%20server%20v5_3_1%20constraints.sql" - ) } - } else if (cdmVersion == "6.0.0") { - if (is.null(githubTag) || githubTag == "v6.0.0") { - webResponseInd <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0.0/Sql%20Server/OMOP%20CDM%20sql%20server%20pk%20indexes.txt" - ) - webResponseCon <- - httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0.0/Sql%20Server/OMOP%20CDM%20sql%20server%20constraints.txt" - ) - } else if (githubTag == "v6.0.0_fixes") { + } else if (cdmVersion == "5.4.0") { + if (is.null(githubTag) || githubTag == "v5.4.0") { webResponseInd <- httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0_fixes/Sql%20Server/OMOP%20CDM%20sql%20server%20pk%20indexes.txt" + "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.4.0/inst/ddl/5.4/sql_server/OMOPCDM_sql_server_5.4_indices.sql" ) webResponseCon <- httr::GET( - "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v6.0_fixes/Sql%20Server/OMOP%20CDM%20sql%20server%20constraints.txt" + "https://raw.githubusercontent.com/OHDSI/CommonDataModel/v5.4.0/inst/ddl/5.4/sql_server/OMOPCDM_sql_server_5.4_primary_keys.sql" ) } } @@ -267,30 +185,23 @@ CreateCDMIndexAndConstraintScripts <- indexDDL <- toupper(indexDDL) constraintDDL <- toupper(constraintDDL) - if (githubTag == "v5.3.1_fixes") { - indexDDL <- - SqlRender::render(sql = indexDDL, CDMDATABASESCHEMA = cdmSchema) - constraintDDL <- - SqlRender::render(sql = constraintDDL, CDMDATABASESCHEMA = cdmSchema) - } else { - indexDDL <- gsub(" ON ", " ON @CDM_SCHEMA.", indexDDL) - indexDDL <- - gsub("ALTER TABLE", "ALTER TABLE @CDM_SCHEMA.", indexDDL) - indexDDL <- gsub("@CDM_SCHEMA. ", "@CDM_SCHEMA.", indexDDL) - indexDDL <- - SqlRender::render(indexDDL, CDM_SCHEMA = cdmSchema) - indexDDL <- - SqlRender::translate(indexDDL, targetDialect = rdbms) - - constraintDDL <- - gsub("ALTER TABLE", "ALTER TABLE @CDM_SCHEMA.", constraintDDL) - constraintDDL <- - gsub("@CDM_SCHEMA. ", "@CDM_SCHEMA.", constraintDDL) - constraintDDL <- - SqlRender::render(constraintDDL, CDM_SCHEMA = cdmSchema) - constraintDDL <- - SqlRender::translate(constraintDDL, targetDialect = rdbms) - } + indexDDL <- gsub(" ON ", " ON @CDM_SCHEMA.", indexDDL) + indexDDL <- + gsub("ALTER TABLE", "ALTER TABLE @CDM_SCHEMA.", indexDDL) + indexDDL <- gsub("@CDM_SCHEMA. ", "@CDM_SCHEMA.", indexDDL) + indexDDL <- + SqlRender::render(indexDDL, CDM_SCHEMA = cdmSchema) + indexDDL <- + SqlRender::translate(indexDDL, targetDialect = rdbms) + + constraintDDL <- + gsub("ALTER TABLE", "ALTER TABLE @CDM_SCHEMA.", constraintDDL) + constraintDDL <- + gsub("@CDM_SCHEMA. ", "@CDM_SCHEMA.", constraintDDL) + constraintDDL <- + SqlRender::render(constraintDDL, CDM_SCHEMA = cdmSchema) + constraintDDL <- + SqlRender::translate(constraintDDL, targetDialect = rdbms) # Save the translated sql ddl to be run at a later time if (!dir.exists("output")) { diff --git a/R/CreateCDMIndices.r b/R/CreateCDMIndices.r index 53eda5c..7b53f2a 100644 --- a/R/CreateCDMIndices.r +++ b/R/CreateCDMIndices.r @@ -6,7 +6,7 @@ #' function \code{createConnectionDetails} in the #' \code{DatabaseConnector} package. #' @param cdmSchema The name of the CDM database schema. Requires read and write permissions to this database. On SQL -#' Server, this should specifiy both the database and the schema, +#' Server, this should specify both the database and the schema, #' so for example 'cdm_instance.dbo'. #' @param cdmVersion Your CDM version. Currently "5.3" and "5.4" are supported. #' @param outputFolder Location of the SQL scripts if sqlOnly = TRUE. Default is NULL. @@ -25,23 +25,23 @@ CreateCDMIndices <- cdmVersion, outputFolder = NULL, sqlOnly = FALSE) -{ + { if (!sqlOnly) { - - print("Creating Indices on CDM Tables....") + print("Creating Indices on CDM Tables....") - indexSQLFile <- CommonDataModel::writeIndex( - targetDialect = connectionDetails$dbms, - cdmVersion = cdmVersion, - cdmDatabaseSchema = cdmSchema, - outputfolder = tempdir()) + indexSQLFile <- CommonDataModel::writeIndex( + targetDialect = connectionDetails$dbms, + cdmVersion = cdmVersion, + cdmDatabaseSchema = cdmSchema, + outputfolder = tempdir() + ) + + indexDDL <- SqlRender::readSql(paste0(tempdir(), "/", indexSQLFile)) + conn <- DatabaseConnector::connect(connectionDetails) + DatabaseConnector::executeSql(conn, indexDDL) + DatabaseConnector::disconnect(conn) + print("Index Creation Complete.") - indexDDL <- SqlRender::readSql(paste0(tempdir(),"/",indexSQLFile)) - conn <- DatabaseConnector::connect(connectionDetails) - DatabaseConnector::executeSql(conn,indexDDL) - DatabaseConnector::disconnect(conn) - print("Index Creation Complete.") - } else { if (is.null(outputFolder)) { stop("Must specify an outputFolder location when using sqlOnly = TRUE") @@ -56,4 +56,4 @@ CreateCDMIndices <- cdmDatabaseSchema = cdmSchema ) } -} + } diff --git a/R/CreateCDMTables.r b/R/CreateCDMTables.r index f12631f..36e8ce3 100644 --- a/R/CreateCDMTables.r +++ b/R/CreateCDMTables.r @@ -26,41 +26,41 @@ CreateCDMTables <- cdmSchema, cdmVersion, outputFolder = NULL, - createIndices = FALSE, + createIndices = FALSE, sqlOnly = FALSE) { if (!sqlOnly) { - - print("Creating CDM Tables....") - - CommonDataModel::executeDdl( - connectionDetails = connectionDetails, - cdmVersion = cdmVersion, - cdmDatabaseSchema = cdmSchema, - executeDdl = TRUE, - executePrimaryKey = TRUE, - executeForeignKey = FALSE - ) # False for now due to bug: https://github.com/OHDSI/CommonDataModel/issues/452 + print("Creating CDM Tables....") - print("CDM Tables Created.") - - if (createIndices) { - - print("Creating Indices on CDM Tables....") + CommonDataModel::executeDdl( + connectionDetails = connectionDetails, + cdmVersion = cdmVersion, + cdmDatabaseSchema = cdmSchema, + executeDdl = TRUE, + executePrimaryKey = TRUE, + executeForeignKey = FALSE + ) # False for now due to bug: https://github.com/OHDSI/CommonDataModel/issues/452 + + print("CDM Tables Created.") + + if (createIndices) { + print("Creating Indices on CDM Tables....") - indexSQLFile <- CommonDataModel::writeIndex( - targetDialect = connectionDetails$dbms, - cdmVersion = cdmVersion, - cdmDatabaseSchema = cdmSchema, - outputfolder = tempdir()) + indexSQLFile <- CommonDataModel::writeIndex( + targetDialect = connectionDetails$dbms, + cdmVersion = cdmVersion, + cdmDatabaseSchema = cdmSchema, + outputfolder = tempdir() + ) + + indexDDL <- + SqlRender::readSql(paste0(tempdir(), "/", indexSQLFile)) + conn <- DatabaseConnector::connect(connectionDetails) + DatabaseConnector::executeSql(conn, indexDDL) + DatabaseConnector::disconnect(conn) + print("Index Creation Complete.") + } - indexDDL <- SqlRender::readSql(paste0(tempdir(),"/",indexSQLFile)) - conn <- DatabaseConnector::connect(connectionDetails) - DatabaseConnector::executeSql(conn,indexDDL) - DatabaseConnector::disconnect(conn) - print("Index Creation Complete.") - } - } else { if (is.null(outputFolder)) { stop("Must specify an outputFolder location when using sqlOnly = TRUE") diff --git a/R/CreateMapAndRollupTables.r b/R/CreateMapAndRollupTables.r new file mode 100644 index 0000000..5bf6261 --- /dev/null +++ b/R/CreateMapAndRollupTables.r @@ -0,0 +1,66 @@ +#' @title Create Vocab Mapping and Visit Rollup Tables. +#' +#' @description This function creates the vocabulary mapping and visit roll-up intermediate tables from created +#' by the \cr\code{CreateVocabMapTables()} and \cr\code{CreateVisitRollupTables()} scripts. +#' +#' @details This function assumes \cr\code{createCDMTables()}, \cr\code{createSyntheaTables()}, \cr\code{LoadSyntheaTables()}, +#' and \cr\code{LoadVocabTables()} have all been run. +#' +#' @param connectionDetails An R object of type\cr\code{connectionDetails} created using the +#' function \code{createConnectionDetails} in the +#' \code{DatabaseConnector} package. +#' @param cdmSchema The name of the database schema that will contain the CDM. +#' Requires read and write permissions to this database. On SQL +#' Server, this should specify both the database and the schema, +#' so for example 'cdm_instance.dbo'. +#' @param syntheaSchema The name of the database schema that contain the Synthea +#' instance. Requires read and write permissions to this database. On SQL +#' Server, this should specify both the database and the schema, +#' so for example 'cdm_instance.dbo'. +#' @param cdmVersion The version of your CDM. Currently "5.3" and "5.4". +#' @param syntheaVersion The version of Synthea used to generate the csv files. +#' Currently "2.7.0", "3.0.0", "3.1.0" and "3.2.0" are supported. +#' @param cdmSourceName The source name to insert into the CDM_SOURCE table. Default is Synthea synthetic health database. +#' @param cdmSourceAbbreviation The source abbreviation to insert into the CDM_SOURCE table. Default is Synthea. +#' @param cdmHolder The holder to insert into the CDM_SOURCE table. Default is OHDSI +#' @param cdmSourceDescription The description of the source data. Default is generic Synthea description. +#' @param sqlOnly A boolean that determines whether or not to perform the load or generate SQL scripts. Default is FALSE. +#' +#'@export + + +CreateMapAndRollupTables <- function(connectionDetails, + cdmSchema, + syntheaSchema, + cdmVersion, + syntheaVersion = "2.7.0", + cdmSourceName = "Synthea synthetic health database", + cdmSourceAbbreviation = "Synthea", + cdmHolder = "OHDSI", + cdmSourceDescription = "SyntheaTM is a Synthetic Patient Population Simulator. The goal is to output synthetic, realistic (but not real), patient data and associated health records in a variety of formats.", + sqlOnly = FALSE) +{ + supportedCDMVersions <- c("5.3", "5.4") + + if (!(cdmVersion %in% supportedCDMVersions)) { + stop("Unsupported CDM specified. Supported CDM versions are \"5.3\" and \"5.4\".") + } + + supportedSyntheaVersions <- c("2.7.0", "3.0.0", "3.1.0", "3.2.0") + + if (!(syntheaVersion %in% supportedSyntheaVersions)) + stop( + "Invalid Synthea version specified. Currently \"2.7.0\", \"3.0.0\",\"3.1.0\", and \"3.2.0\" are supported." + ) + + # Create Vocabulary mapping tables + CreateVocabMapTables(connectionDetails, cdmSchema, cdmVersion, sqlOnly) + + # Perform visit rollup logic and create auxiliary tables + CreateVisitRollupTables(connectionDetails, + cdmSchema, + syntheaSchema, + cdmVersion, + sqlOnly) + +} diff --git a/R/CreateSyntheaTables.r b/R/CreateSyntheaTables.r index 173c740..43a8954 100644 --- a/R/CreateSyntheaTables.r +++ b/R/CreateSyntheaTables.r @@ -10,25 +10,32 @@ #' Server, this should specify both the database and the schema, #' so for example 'cdm_instance.dbo'. #' @param syntheaVersion The version of Synthea used to generate the csv files. -#' Currently "2.7.0" and "3.0.0" are is supported. +#' Currently "2.7.0", "3.0.0", "3.1.0", and "3.2.0" are is supported. #' #'@export CreateSyntheaTables <- - function (connectionDetails, - syntheaSchema, - syntheaVersion = "2.7.0") + function(connectionDetails, + syntheaSchema, + syntheaVersion = "2.7.0") { if (syntheaVersion == "2.7.0") sqlFilePath <- "synthea_version/v270" else if (syntheaVersion == "3.0.0") sqlFilePath <- "synthea_version/v300" + else if (syntheaVersion == "3.1.0") + sqlFilePath <- "synthea_version/v310" + else if (syntheaVersion == "3.2.0") + sqlFilePath <- "synthea_version/v320" else - stop("Invalid synthea version specified. Currently \"2.7.0\" and \"3.0.0\" are supported.") + stop( + "Invalid synthea version specified. Currently \"2.7.0\", \"3.0.0\", \"3.1.0\" and \"3.2.0\" are supported." + ) - sqlFilename <- paste0(sqlFilePath, "/", "create_synthea_tables.sql") + sqlFilename <- + paste0(sqlFilePath, "/", "create_synthea_tables.sql") translatedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = sqlFilename, diff --git a/R/CreateVocabMapTables.r b/R/CreateVocabMapTables.r index 60a653c..4a42f92 100644 --- a/R/CreateVocabMapTables.r +++ b/R/CreateVocabMapTables.r @@ -9,7 +9,7 @@ #' \code{DatabaseConnector} package. #' @param cdmSchema The name of the database schema that will contain the Vocab mapping #' tables. Requires read and write permissions to this database. On SQL -#' Server, this should specifiy both the database and the schema, +#' Server, this should specify both the database and the schema, #' so for example 'cdm_instance.dbo'. #' @param cdmVersion The version of your CDM. Currently "5.3" and "5.4" are supported. #' @param sqlOnly A boolean that determines whether or not to perform the load or generate SQL scripts. Default is FALSE. @@ -33,7 +33,8 @@ CreateVocabMapTables <- queries <- c( "create_source_to_standard_vocab_map.sql", - "create_source_to_source_vocab_map.sql" + "create_source_to_source_vocab_map.sql", + "create_states_map.sql" ) if (!sqlOnly) { diff --git a/R/DropMapAndRollupTables.R b/R/DropMapAndRollupTables.R index c49d65a..8e891a7 100644 --- a/R/DropMapAndRollupTables.R +++ b/R/DropMapAndRollupTables.R @@ -9,7 +9,7 @@ #' \code{DatabaseConnector} package. #' @param cdmSchema The name of the database schema that will contain the Vocab mapping #' tables. Requires read and write permissions to this database. On SQL -#' Server, this should specifiy both the database and the schema, +#' Server, this should specify both the database and the schema, #' so for example 'cdm_instance.dbo'. #' #'@export diff --git a/R/DropSyntheaTables.r b/R/DropSyntheaTables.r index 4c81280..067b28b 100644 --- a/R/DropSyntheaTables.r +++ b/R/DropSyntheaTables.r @@ -19,8 +19,8 @@ DropSyntheaTables <- function(connectionDetails, syntheaSchema) syntheaTables <- c( "ALLERGIES", "CAREPLANS", - "CLAIMS", - "CLAIMS_TRANSACTIONS", + "CLAIMS", + "CLAIMS_TRANSACTIONS", "CONDITIONS", "DEVICES", "ENCOUNTERS", @@ -30,33 +30,34 @@ DropSyntheaTables <- function(connectionDetails, syntheaSchema) "OBSERVATIONS", "ORGANIZATIONS", "PATIENTS", - "PAYERS", - "PAYER_TRANSITIONS", + "PAYERS", + "PAYER_TRANSITIONS", "PROCEDURES", "PROVIDERS", - "SUPPLIES" + "SUPPLIES" ) conn <- DatabaseConnector::connect(connectionDetails) allTables <- DatabaseConnector::getTableNames(conn, syntheaSchema) tablesToDrop <- allTables[which(allTables %in% syntheaTables)] - + if (length(tablesToDrop) > 0) { - writeLines("Dropping Synthea tables...") - sql <- - paste( - "drop table @synthea_schema.", - tablesToDrop, - ";", - collapse = "\n", - sep = "" - ) - sql <- SqlRender::render(sql, synthea_schema = syntheaSchema) - sql <- SqlRender::translate(sql, targetDialect = connectionDetails$dbms) - DatabaseConnector::executeSql(conn, sql) + writeLines("Dropping Synthea tables...") + sql <- + paste( + "drop table @synthea_schema.", + tablesToDrop, + ";", + collapse = "\n", + sep = "" + ) + sql <- SqlRender::render(sql, synthea_schema = syntheaSchema) + sql <- + SqlRender::translate(sql, targetDialect = connectionDetails$dbms) + DatabaseConnector::executeSql(conn, sql) } else { - print(sprintf("No synthea tables to drop in schema %s",syntheaSchema)) + print(sprintf("No synthea tables to drop in schema %s", syntheaSchema)) } - + on.exit(DatabaseConnector::disconnect(conn)) } diff --git a/R/LoadEventTables.r b/R/LoadEventTables.r index 0dd71e5..9ffe996 100644 --- a/R/LoadEventTables.r +++ b/R/LoadEventTables.r @@ -3,23 +3,23 @@ #' @description This function loads the CDM Event tables with Synthea data. #' #' @details This function assumes \cr\code{createCDMTables()}, \cr\code{createSyntheaTables()}, \cr\code{LoadSyntheaTables()}, -#' and \cr\code{LoadVocabTables()} have all been run. +#' \cr\code{LoadVocabTables()}, and \cr\code{CreateMapAndRollupTables()} have all been run. #' #' @param connectionDetails An R object of type\cr\code{connectionDetails} created using the #' function \code{createConnectionDetails} in the #' \code{DatabaseConnector} package. #' @param cdmSchema The name of the database schema that will contain the CDM. #' Requires read and write permissions to this database. On SQL -#' Server, this should specifiy both the database and the schema, +#' Server, this should specify both the database and the schema, #' so for example 'cdm_instance.dbo'. #' @param syntheaSchema The name of the database schema that contain the Synthea #' instance. Requires read and write permissions to this database. On SQL -#' Server, this should specifiy both the database and the schema, +#' Server, this should specify both the database and the schema, #' so for example 'cdm_instance.dbo'. #' @param cdmVersion The version of your CDM. Currently "5.3" and "5.4". #' @param syntheaVersion The version of Synthea used to generate the csv files. -#' Currently "2.7.0" and "3.0.0" are supported. -#' @param cdmSourceName The source name to insert into the CDM_SOURCE table. Default is Synthea synthentic health database. +#' Currently "2.7.0","3.0.0","3.1.0" and "3.2.0" are supported. +#' @param cdmSourceName The source name to insert into the CDM_SOURCE table. Default is Synthea synthetic health database. #' @param cdmSourceAbbreviation The source abbreviation to insert into the CDM_SOURCE table. Default is Synthea. #' @param cdmHolder The holder to insert into the CDM_SOURCE table. Default is OHDSI #' @param cdmSourceDescription The description of the source data. Default is generic Synthea description. @@ -39,7 +39,7 @@ LoadEventTables <- function(connectionDetails, cdmHolder = "OHDSI", cdmSourceDescription = "SyntheaTM is a Synthetic Patient Population Simulator. The goal is to output synthetic, realistic (but not real), patient data and associated health records in a variety of formats.", createIndices = FALSE, - sqlOnly = FALSE) + sqlOnly = FALSE) { # Determine which sql scripts to run based on the given version. # The path is relative to inst/sql/sql_server. @@ -51,36 +51,29 @@ LoadEventTables <- function(connectionDetails, stop("Unsupported CDM specified. Supported CDM versions are \"5.3\" and \"5.4\".") } - supportedSyntheaVersions <- c("2.7.0", "3.0.0") + supportedSyntheaVersions <- c("2.7.0", "3.0.0", "3.1.0", "3.2.0") if (!(syntheaVersion %in% supportedSyntheaVersions)) - stop("Invalid Synthea version specified. Currently \"2.7.0\" and \"3.0.0\" are supported.") + stop( + "Invalid Synthea version specified. Currently \"2.7.0\", \"3.0.0\",\"3.1.0\", and \"3.2.0\" are supported." + ) if (createIndices) { - print("Creating Indices on CDM Tables....") + print("Creating Indices on CDM Tables....") - indexSQLFile <- CommonDataModel::writeIndex( - targetDialect = connectionDetails$dbms, - cdmVersion = cdmVersion, - cdmDatabaseSchema = cdmSchema, - outputfolder = tempdir()) + indexSQLFile <- CommonDataModel::writeIndex( + targetDialect = connectionDetails$dbms, + cdmVersion = cdmVersion, + cdmDatabaseSchema = cdmSchema, + outputfolder = tempdir() + ) - indexDDL <- SqlRender::readSql(paste0(tempdir(),"/",indexSQLFile)) - conn <- DatabaseConnector::connect(connectionDetails) - DatabaseConnector::executeSql(conn,indexDDL) - DatabaseConnector::disconnect(conn) - print("Index Creation Complete.") + indexDDL <- SqlRender::readSql(paste0(tempdir(), "/", indexSQLFile)) + conn <- DatabaseConnector::connect(connectionDetails) + DatabaseConnector::executeSql(conn, indexDDL) + DatabaseConnector::disconnect(conn) + print("Index Creation Complete.") } - - # Create Vocabulary mapping tables - CreateVocabMapTables(connectionDetails, cdmSchema, cdmVersion, sqlOnly) - - # Perform visit rollup logic and create auxiliary tables - CreateVisitRollupTables(connectionDetails, - cdmSchema, - syntheaSchema, - cdmVersion, - sqlOnly) if (!sqlOnly) { conn <- DatabaseConnector::connect(connectionDetails) @@ -100,6 +93,28 @@ LoadEventTables <- function(connectionDetails, } } + # location + fileQuery <- "insert_location.sql" + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = file.path(sqlFilePath, fileQuery), + packageName = "ETLSyntheaBuilder", + dbms = connectionDetails$dbms, + cdm_schema = cdmSchema, + synthea_schema = syntheaSchema + ) + runStep(sql, fileQuery) + + # care_site + fileQuery <- "insert_care_site.sql" + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = file.path(sqlFilePath, fileQuery), + packageName = "ETLSyntheaBuilder", + dbms = connectionDetails$dbms, + cdm_schema = cdmSchema, + synthea_schema = syntheaSchema + ) + runStep(sql, fileQuery) + # person fileQuery <- "insert_person.sql" sql <- SqlRender::loadRenderTranslateSql( @@ -243,7 +258,7 @@ LoadEventTables <- function(connectionDetails, cdm_source_name = cdmSourceName, cdm_source_abbreviation = cdmSourceAbbreviation, cdm_holder = cdmHolder, - source_description = cdmSourceDescription + source_description = paste("Synthea version: ", syntheaVersion, " ", cdmSourceDescription) ) runStep(sql, fileQuery) @@ -277,25 +292,25 @@ LoadEventTables <- function(connectionDetails, dbms = connectionDetails$dbms, cdm_schema = cdmSchema, synthea_schema = syntheaSchema, - synthea_version = syntheaVersion + synthea_version = syntheaVersion ) runStep(sql, fileQuery) # cost - if (syntheaVersion == "2.7.0") - fileQuery <- "insert_cost_v270.sql" - else if (syntheaVersion == "3.0.0") - fileQuery <- "insert_cost_v300.sql" - - sql <- SqlRender::loadRenderTranslateSql( - sqlFilename = file.path(sqlFilePath, fileQuery), - packageName = "ETLSyntheaBuilder", - dbms = connectionDetails$dbms, - cdm_schema = cdmSchema, - synthea_schema = syntheaSchema - ) - runStep(sql, fileQuery) - + if (syntheaVersion == "2.7.0") + fileQuery <- "insert_cost_v270.sql" + else if (syntheaVersion %in% c("3.0.0", "3.1.0", "3.2.0")) + fileQuery <- "insert_cost_v300.sql" + + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = file.path(sqlFilePath, fileQuery), + packageName = "ETLSyntheaBuilder", + dbms = connectionDetails$dbms, + cdm_schema = cdmSchema, + synthea_schema = syntheaSchema + ) + runStep(sql, fileQuery) + if (!sqlOnly) { DatabaseConnector::disconnect(conn) } diff --git a/R/LoadSyntheaTables.r b/R/LoadSyntheaTables.r index ca356bb..2f1341b 100644 --- a/R/LoadSyntheaTables.r +++ b/R/LoadSyntheaTables.r @@ -11,7 +11,7 @@ #' \cr\code{./run_synthea -p 1000} #' #' You can enable csv records in src/main/resources/synthea.properties by setting exporter.csv.export = true. -#' As of the time of this writing the csv files can be found at synthe/output/csv. +#' As of the time of this writing the csv files can be found at synthea/output/csv. #' For more details: \href{https://github.com/synthetichealth/synthea/wiki/Basic-Setup-and-Running}{Synthea Basic Setup} #' #' @param connectionDetails An R object of type\cr\code{connectionDetails} created using the @@ -32,16 +32,15 @@ LoadSyntheaTables <- syntheaSchema, syntheaFileLoc, bulkLoad = FALSE) -{ - + { csvList <- list.files(syntheaFileLoc, pattern = "*.csv") - + conn <- DatabaseConnector::connect(connectionDetails) for (csv in csvList) { syntheaTable <- data.table::fread( - file = paste0(syntheaFileLoc,"/",csv), + file = paste0(syntheaFileLoc, "/", csv), stringsAsFactors = FALSE, header = TRUE, sep = ",", @@ -51,32 +50,49 @@ LoadSyntheaTables <- writeLines(paste0("Loading: ", csv)) # experiencing type conversion errors and need to explicitly case some columns - if ("START" %in% colnames(syntheaTable)) - syntheaTable$START <- - as.Date(syntheaTable$START, format = "%Y-%m-%d") - if ("STOP" %in% colnames(syntheaTable)) + if ("START" %in% colnames(syntheaTable)) { + syntheaTable$START <- + as.Date(syntheaTable$START, format = "%Y-%m-%d") + } + if ("STOP" %in% colnames(syntheaTable)) { syntheaTable$STOP <- - as.Date(syntheaTable$STOP, format = "%Y-%m-%d") - if ("DATE" %in% colnames(syntheaTable)) + as.Date(syntheaTable$STOP, format = "%Y-%m-%d") + } + if ("DATE" %in% colnames(syntheaTable)) { syntheaTable$DATE <- - as.Date(syntheaTable$DATE, format = "%Y-%m-%d") - if ("BIRTHDATE" %in% colnames(syntheaTable)) + as.Date(syntheaTable$DATE, format = "%Y-%m-%d") + } + if ("START_DATE" %in% colnames(syntheaTable)) { + syntheaTable$START_DATE <- + as.Date(syntheaTable$START_DATE, format = "%Y-%m-%d") + } + if ("END_DATE" %in% colnames(syntheaTable)) { + syntheaTable$END_DATE <- + as.Date(syntheaTable$END_DATE, format = "%Y-%m-%d") + } + if ("BIRTHDATE" %in% colnames(syntheaTable)) { syntheaTable$BIRTHDATE <- - as.Date(syntheaTable$BIRTHDATE, format = "%Y-%m-%d") - if ("DEATHDATE" %in% colnames(syntheaTable)) + as.Date(syntheaTable$BIRTHDATE, format = "%Y-%m-%d") + } + if ("DEATHDATE" %in% colnames(syntheaTable)) { syntheaTable$DEATHDATE <- - as.Date(syntheaTable$DEATHDATE, format = "%Y-%m-%d") - if ("CODE" %in% colnames(syntheaTable)) + as.Date(syntheaTable$DEATHDATE, format = "%Y-%m-%d") + } + if ("CODE" %in% colnames(syntheaTable)) { syntheaTable$CODE <- as.character(syntheaTable$CODE) - if ("REASONCODE" %in% colnames(syntheaTable)) + } + if ("REASONCODE" %in% colnames(syntheaTable)) { syntheaTable$REASONCODE <- - as.character(syntheaTable$REASONCODE) - if ("PHONE" %in% colnames(syntheaTable)) + as.character(syntheaTable$REASONCODE) + } + if ("PHONE" %in% colnames(syntheaTable)) { syntheaTable$PHONE <- - as.character(syntheaTable$PHONE) - if ("UTILIZATION" %in% colnames(syntheaTable)) + as.character(syntheaTable$PHONE) + } + if ("UTILIZATION" %in% colnames(syntheaTable)) { syntheaTable$UTILIZATION <- - as.numeric(syntheaTable$UTILIZATION) + as.numeric(syntheaTable$UTILIZATION) + } suppressWarnings({ DatabaseConnector::insertTable( @@ -85,7 +101,7 @@ LoadSyntheaTables <- data = as.data.frame(syntheaTable), dropTableIfExists = FALSE, createTable = FALSE, - bulkLoad = bulkLoad, + bulkLoad = bulkLoad, progressBar = TRUE ) }) diff --git a/R/LoadVocabFromCsv.r b/R/LoadVocabFromCsv.r index 3da247d..c2b6bf2 100644 --- a/R/LoadVocabFromCsv.r +++ b/R/LoadVocabFromCsv.r @@ -54,9 +54,7 @@ LoadVocabFromCsv <- na.strings = "" ) - if (tolower(csv) == "concept.csv" || - tolower(csv) == "concept_relationship.csv" || - tolower(csv) == "drug_strength.csv") { + if (tolower(csv) == "concept.csv" || tolower(csv) == "concept_relationship.csv" || tolower(csv) == "drug_strength.csv") { writeLines(" - handling dates") vocabTable$valid_start_date <- as.Date(as.character(vocabTable$valid_start_date), "%Y%m%d") diff --git a/R/backupCDM.r b/R/backupCDM.r index 3c2a857..230455c 100644 --- a/R/backupCDM.r +++ b/R/backupCDM.r @@ -7,9 +7,9 @@ #' \code{DatabaseConnector} package. #' @param cdmSchema The name of the database schema that contains the CDM. #' Requires read and write permissions to this database. On SQL -#' Server, this should specifiy both the database and the schema, +#' Server, this should specify both the database and the schema, #' so for example 'cdm_instance.dbo'. -#' @param cdmVersion The version of your CDM. Currently "5.3.1" and "6.0.1". +#' @param cdmVersion The version of your CDM. Currently "5.3.1" and "5.4.0". #' #'@export @@ -18,10 +18,10 @@ backupCDM <- function(connectionDetails, cdmSchema, cdmVersion) { if (cdmVersion == "5.3.1") sqlFilePath <- "cdm_version/v531" - else if (cdmVersion == "6.0.0") - sqlFilePath <- "cdm_version/v600" + else if (cdmVersion == "5.4.0") + sqlFilePath <- "cdm_version/v540" else - stop("Unsupported CDM specified. Supported CDM versions are \"5.3.1\" and \"6.0.0\"") + stop("Unsupported CDM specified. Supported CDM versions are \"5.3.1\" and \"5.4.0\"") sql <- SqlRender::loadRenderTranslateSql( diff --git a/R/createExtraIndices.R b/R/createExtraIndices.R index 813c5b8..9d435d8 100644 --- a/R/createExtraIndices.R +++ b/R/createExtraIndices.R @@ -8,20 +8,20 @@ #' Server, this should specify both the database and the schema, so for example 'cdm_instance.dbo'. #' @param syntheaSchema The name of the Synthea database schema. Requires read and write permissions to this schema. On SQL #' Server, this should specify both the database and the schema, so for example 'synthea.dbo'. -#' @param syntheaVersion Your Synthea version. Currently "2.7.0" and "3.0.0" are supported. +#' @param syntheaVersion Your Synthea version. Currently "2.7.0", "3.0.0", "3.1.0" and "3.2.0" are supported. #' @param outputFolder Location of the SQL scripts if sqlOnly = TRUE. Default is NULL. #' @param sqlOnly A boolean that determines whether to create the indices or generate a SQL scripts. Default is FALSE. #' #' @details This function creates indices which have been found to speed up certain long-running INSERT queries in LoadEventTables, #' for some users. Indices are created on the intermediate vocabulary mapping tables; the person & provider CDM tables; -#' and the claims_transactions Synthea table (in Synthea 3.0.0). +#' and the claims_transactions Synthea table (in Synthea 3.0.0, 3.1.0 and 3.2.0) . #' #' @importFrom utils head #' #' @export -createExtraIndices <- function(connectionDetails, +CreateExtraIndices <- function(connectionDetails, cdmSchema, syntheaSchema, syntheaVersion, diff --git a/R/createPrunedTables.r b/R/createPrunedTables.r index 00ae8a1..094818e 100644 --- a/R/createPrunedTables.r +++ b/R/createPrunedTables.r @@ -9,10 +9,10 @@ #' #' @param cdmSchema The name of the database schema that contains the CDM. #' Requires read and write permissions to this database. On SQL -#' Server, this should specifiy both the database and the schema, +#' Server, this should specify both the database and the schema, #' so for example 'cdm_instance.dbo'. #' @param eventConceptId A vector of concept_ids returned from \code{getEventConceptId}. -#' @param cdmVersion The version of your CDM. Currently "5.3.1" and "6.0.0". +#' @param cdmVersion The version of your CDM. Currently "5.3.1" and "5.4.0". #' #'@export @@ -25,16 +25,16 @@ createPrunedTables <- { if (cdmVersion == "5.3.1") sqlFilePath <- "cdm_version/v531" - else if (cdmVersion == "6.0.0") - sqlFilePath <- "cdm_version/v600" + else if (cdmVersion == "5.4.0") + sqlFilePath <- "cdm_version/v540" else - stop("Unsupported CDM specified. Supported CDM versions are \"5.3.1\" and \"6.0.0\"") + stop("Unsupported CDM specified. Supported CDM versions are \"5.3.1\" and \"5.4.0\"") sql <- SqlRender::loadRenderTranslateSql( sqlFileName = paste0(sqlFilePath, "/create_pruned_tables.sql"), packageName = "ETLSyntheaBuilder", dbms = connectionDetails$dbms, - cdm_schema = cdmDatabaseSchema, + cdm_schema = cdmSchema, event_concept_id = eventConceptId ) diff --git a/R/exportToSQLite.r b/R/exportToSQLite.r index 7407187..35445f3 100644 --- a/R/exportToSQLite.r +++ b/R/exportToSQLite.r @@ -72,10 +72,14 @@ exportToSQLite <- sqlQuery <- paste0("select * from ", paste0(cdmSchema, ".", tableName), ";") translatedSql <- - SqlRender::translate(renderedSql, targetDialect = connectionDetails$dbms) + SqlRender::translate(sqlQuery, targetDialect = connectionDetails$dbms) writeLines(paste0("Fetching: ", tableName)) tableData <- DatabaseConnector::querySql(conn, translatedSql) - DatabaseConnector::insertTable(sqliteCon, toupper(tableName), tableData) + DatabaseConnector::insertTable( + connection = sqliteCon, + tableName = toupper(tableName), + data = tableData + ) } DatabaseConnector::disconnect(conn) diff --git a/R/getEventConceptId.r b/R/getEventConceptId.r index 22a7859..762b110 100644 --- a/R/getEventConceptId.r +++ b/R/getEventConceptId.r @@ -8,9 +8,9 @@ #' #' @param cdmSchema The name of the database schema that contains the CDM. #' Requires read and write permissions to this database. On SQL -#' Server, this should specifiy both the database and the schema, +#' Server, this should specify both the database and the schema, #' so for example 'cdm_instance.dbo'. -#' @param cdmVersion The version of your CDM. Currently "5.3.1" and "6.0.0". +#' @param cdmVersion The version of your CDM. Currently "5.3.1" and "5.4.0". #' #'@export @@ -19,10 +19,10 @@ getEventConceptId <- { if (cdmVersion == "5.3.1") sqlFilePath <- "cdm_version/v531" - else if (cdmVersion == "6.0.0") - sqlFilePath <- "cdm_version/v600" + else if (cdmVersion == "5.4.0") + sqlFilePath <- "cdm_version/v540" else - stop("Unsupported CDM specified. Supported CDM versions are \"5.3.1\" and \"6.0.0\"") + stop("Unsupported CDM specified. Supported CDM versions are \"5.3.1\" and \"5.4.0\"") sql <- SqlRender::loadRenderTranslateSql( sqlFileName = paste0(sqlFilePath, "/get_event_concept_id.sql"), @@ -37,5 +37,5 @@ getEventConceptId <- on.exit(DatabaseConnector::disconnect(conn)) - return(eventConceptId) + eventConceptId } diff --git a/R/pruneCDM.r b/R/pruneCDM.r index 31e3937..649febd 100644 --- a/R/pruneCDM.r +++ b/R/pruneCDM.r @@ -7,9 +7,9 @@ #' \code{DatabaseConnector} package. #' @param cdmSchema The name of the database schema that contains the CDM. #' Requires read and write permissions to this database. On SQL -#' Server, this should specifiy both the database and the schema, +#' Server, this should specify both the database and the schema, #' so for example 'cdm_instance.dbo'. -#' @param cdmVersion The version of your CDM. Currently "5.3.1" and "6.0.0". +#' @param cdmVersion The version of your CDM. Currently "5.3.1" and "5.4.0". #' #'@export diff --git a/R/restoreCDMTables.r b/R/restoreCDMTables.r index 0c63535..6816d71 100644 --- a/R/restoreCDMTables.r +++ b/R/restoreCDMTables.r @@ -7,9 +7,9 @@ #' \code{DatabaseConnector} package. #' @param cdmSchema The name of the database schema that contains the CDM #' instance. Requires read and write permissions to this database. On SQL -#' Server, this should specifiy both the database and the schema, +#' Server, this should specify both the database and the schema, #' so for example 'cdm_instance.dbo'. -#' @param cdmVersion The version of your CDM. Currently "5.3.1" and "6.0.0" are supported. +#' @param cdmVersion The version of your CDM. Currently "5.3.1" and "5.4.0" are supported. #'@export @@ -18,10 +18,10 @@ restoreCDMTables <- { if (cdmVersion == "5.3.1") sqlFilePath <- "cdm_version/v531" - else if (cdmVersion == "6.0.0") - sqlFilePath <- "cdm_version/v600" + else if (cdmVersion == "5.4.0") + sqlFilePath <- "cdm_version/v540" else - stop("Unsupported CDM specified. Supported CDM versions are \"5.3.1\" and \"6.0.0\"") + stop("Unsupported CDM specified. Supported CDM versions are \"5.3.1\" and \"5.4.0\"") sql <- SqlRender::loadRenderTranslateSql( sqlFileName = paste0(sqlFilePath, "/restore_cdm_tables.sql"), diff --git a/README.md b/README.md index eab7457..58b603b 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,12 @@ # Utility to Load Synthea CSV data to OMOP CDM -## Currently supports CDM v5.3 and v5.4 + +## Currently supports CDM v5.3 and v5.4 Follow the steps on the [synthea wiki](https://github.com/synthetichealth/synthea/wiki) to run the program and generate the files. This builder works off of the csv files, not the fhir files. To do this the `exporter.csv.export` option in the `./src/main/resources/synthea.properties` file needs to be set to TRUE. ### Step by Step Example -```r +``` r devtools::install_github("OHDSI/ETL-Synthea") library(ETLSyntheaBuilder) @@ -14,7 +15,7 @@ Follow the steps on the [synthea wiki](https://github.com/synthetichealth/synthe # The ETLSyntheaBuilder package leverages the OHDSI/CommonDataModel package for CDM creation. # Valid CDM versions are determined by executing CommonDataModel::listSupportedVersions(). # The strings representing supported CDM versions are currently "5.3" and "5.4". - # The Synthea version we use in this example is 2.7.0. However, at this time we also support 3.0.0. + # The Synthea version we use in this example is 2.7.0. However, at this time we also support 3.0.0, 3.1.0 and 3.2.0. # Please note that Synthea's MASTER branch is always active and this package will be updated to support # future versions as possible. # The schema to load the Synthea tables is called "native". @@ -47,16 +48,19 @@ ETLSyntheaBuilder::CreateSyntheaTables(connectionDetails = cd, syntheaSchema = s ETLSyntheaBuilder::LoadSyntheaTables(connectionDetails = cd, syntheaSchema = syntheaSchema, syntheaFileLoc = syntheaFileLoc) ETLSyntheaBuilder::LoadVocabFromCsv(connectionDetails = cd, cdmSchema = cdmSchema, vocabFileLoc = vocabFileLoc) + +ETLSyntheaBuilder::CreateMapAndRollupTables(connectionDetails = cd, cdmSchema = cdmSchema, syntheaSchema = syntheaSchema, cdmVersion = cdmVersion, syntheaVersion = syntheaVersion) + +## Optional Step to create extra indices +ETLSyntheaBuilder::CreateExtraIndices(connectionDetails = cd, cdmSchema = cdmSchema, syntheaSchema = syntheaSchema, cdmVersion = cdmVersion, syntheaVersion = syntheaVersion) ETLSyntheaBuilder::LoadEventTables(connectionDetails = cd, cdmSchema = cdmSchema, syntheaSchema = syntheaSchema, cdmVersion = cdmVersion, syntheaVersion = syntheaVersion) - ``` ### Simulating Data with Synthea + For commented code used to convert the Synthea data see extras/codeToRun.R -For more information on Synthea visit: -https://synthetichealth.github.io/synthea/ +For more information on Synthea visit: -Get Synthea from GitHub: -https://github.com/synthetichealth/synthea +Get Synthea from GitHub: diff --git a/docs/404.html b/docs/404.html index 22887cb..96ef43e 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ ETLSyntheaBuilder - 1.0 + 2.0 @@ -49,25 +49,28 @@