From eb86d2d0f0000a34a951dae5141f933681d834e6 Mon Sep 17 00:00:00 2001 From: Katy Sadowski Date: Fri, 8 Dec 2023 15:45:44 -0500 Subject: [PATCH] add index creation func. (#165) Co-authored-by: Katy Sadowski Co-authored-by: Evanette Burrows --- .Rbuildignore | 1 + NAMESPACE | 2 + R/createExtraIndices.R | 69 +++++++++++++++++++++++++++ inst/sql/sql_server/extra_indices.sql | 41 ++++++++++++++++ man/createExtraIndices.Rd | 39 +++++++++++++++ 5 files changed, 152 insertions(+) create mode 100644 R/createExtraIndices.R create mode 100644 inst/sql/sql_server/extra_indices.sql create mode 100644 man/createExtraIndices.Rd diff --git a/.Rbuildignore b/.Rbuildignore index 4c2ae5b..12329da 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,4 @@ inst/sqlite .gitignore ^doc$ ^\.github$ +extras diff --git a/NAMESPACE b/NAMESPACE index 2dfc131..60eaaf8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,9 +17,11 @@ export(LoadVocabFromSchema) export(TruncateEventTables) export(TruncateVocabTables) export(backupCDM) +export(createExtraIndices) export(createPrunedTables) export(exportToSQLite) export(getEventConceptId) export(pruneCDM) export(restoreCDMTables) import(dplyr) +importFrom(utils,head) diff --git a/R/createExtraIndices.R b/R/createExtraIndices.R new file mode 100644 index 0000000..813c5b8 --- /dev/null +++ b/R/createExtraIndices.R @@ -0,0 +1,69 @@ +#' @title Create Optional Extra Indices for ETL Performance +#' +#' @description This function creates indices for certain tables which may help to speed up LoadEventTables. +#' +#' @param connectionDetails An R object of type\cr\code{connectionDetails} created using the +#' function \code{createConnectionDetails} in the \code{DatabaseConnector} package. +#' @param cdmSchema The name of the CDM database schema. Requires read and write permissions to this schema. On SQL +#' Server, this should specify both the database and the schema, so for example 'cdm_instance.dbo'. +#' @param syntheaSchema The name of the Synthea database schema. Requires read and write permissions to this schema. On SQL +#' Server, this should specify both the database and the schema, so for example 'synthea.dbo'. +#' @param syntheaVersion Your Synthea version. Currently "2.7.0" and "3.0.0" are supported. +#' @param outputFolder Location of the SQL scripts if sqlOnly = TRUE. Default is NULL. +#' @param sqlOnly A boolean that determines whether to create the indices or generate a SQL scripts. Default is FALSE. +#' +#' @details This function creates indices which have been found to speed up certain long-running INSERT queries in LoadEventTables, +#' for some users. Indices are created on the intermediate vocabulary mapping tables; the person & provider CDM tables; +#' and the claims_transactions Synthea table (in Synthea 3.0.0). +#' +#' @importFrom utils head +#' +#' @export + + +createExtraIndices <- function(connectionDetails, + cdmSchema, + syntheaSchema, + syntheaVersion, + outputFolder = NULL, + sqlOnly = FALSE) { + sqlFilename <- "extra_indices.sql" + + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = sqlFilename, + packageName = "ETLSyntheaBuilder", + dbms = connectionDetails$dbms, + targetDialect = connectionDetails$dbms, + cdmDatabaseSchema = cdmSchema, + syntheaVersion = syntheaVersion, + syntheaSchema = syntheaSchema + ) + sqlQueries <- utils::head(unlist(strsplit(sql, ";")), -1) + + if (!sqlOnly) { + print("Creating Extra Indices....") + conn <- DatabaseConnector::connect(connectionDetails) + lapply(sqlQueries, function(query) { + tryCatch( + expr = { + DatabaseConnector::executeSql(conn, query) + }, + error = function(e) { + message(paste("Error in SQL:", query)) + message(e) + } + ) + }) + DatabaseConnector::disconnect(conn) + print("Index Creation Complete.") + } else { + if (is.null(outputFolder)) { + stop("Must specify an outputFolder location when using sqlOnly = TRUE") + } + if (!dir.exists(outputFolder)) { + dir.create(outputFolder) + } + writeLines(paste0("Saving to output/", sqlFilename)) + SqlRender::writeSql(sql, paste0("output/", sqlFilename)) + } +} diff --git a/inst/sql/sql_server/extra_indices.sql b/inst/sql/sql_server/extra_indices.sql new file mode 100644 index 0000000..7a703ec --- /dev/null +++ b/inst/sql/sql_server/extra_indices.sql @@ -0,0 +1,41 @@ +/* +@targetDialect Optional indices to speed up ETL. +*/ + +CREATE INDEX source_to_standard_vocab_map_sctditvisvitsctir ON @cdmDatabaseSchema.source_to_standard_vocab_map ( + source_code, + target_domain_id, + target_vocabulary_id, + source_vocabulary_id, + target_standard_concept, + target_invalid_reason +); + +CREATE INDEX source_to_standard_vocab_map_scsvsd ON @cdmDatabaseSchema.source_to_standard_vocab_map ( + source_code, + source_vocabulary_id, + source_domain_id +); + +CREATE INDEX source_to_source_vocab_map_scsvisdi ON @cdmDatabaseSchema.source_to_source_vocab_map ( + source_code, + source_vocabulary_id, + source_domain_id +); + +CREATE INDEX provider_psv ON @cdmDatabaseSchema.provider ( + provider_source_value +); + +CREATE INDEX person_psv ON @cdmDatabaseSchema.person ( + person_source_value +); + +{@syntheaVersion == '3.0.0'}?{ + CREATE INDEX claims_transactions_cpap ON @syntheaSchema.claims_transactions ( + claimid, + patientid, + appointmentid, + providerid + ); +} diff --git a/man/createExtraIndices.Rd b/man/createExtraIndices.Rd new file mode 100644 index 0000000..c0ce4ef --- /dev/null +++ b/man/createExtraIndices.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/createExtraIndices.R +\name{createExtraIndices} +\alias{createExtraIndices} +\title{Create Optional Extra Indices for ETL Performance} +\usage{ +createExtraIndices( + connectionDetails, + cdmSchema, + syntheaSchema, + syntheaVersion, + outputFolder = NULL, + sqlOnly = FALSE +) +} +\arguments{ +\item{connectionDetails}{An R object of type\cr\code{connectionDetails} created using the +function \code{createConnectionDetails} in the \code{DatabaseConnector} package.} + +\item{cdmSchema}{The name of the CDM database schema. Requires read and write permissions to this schema. On SQL +Server, this should specify both the database and the schema, so for example 'cdm_instance.dbo'.} + +\item{syntheaSchema}{The name of the Synthea database schema. Requires read and write permissions to this schema. On SQL +Server, this should specify both the database and the schema, so for example 'synthea.dbo'.} + +\item{syntheaVersion}{Your Synthea version. Currently "2.7.0" and "3.0.0" are supported.} + +\item{outputFolder}{Location of the SQL scripts if sqlOnly = TRUE. Default is NULL.} + +\item{sqlOnly}{A boolean that determines whether to create the indices or generate a SQL scripts. Default is FALSE.} +} +\description{ +This function creates indices for certain tables which may help to speed up LoadEventTables. +} +\details{ +This function creates indices which have been found to speed up certain long-running INSERT queries in LoadEventTables, + for some users. Indices are created on the intermediate vocabulary mapping tables; the person & provider CDM tables; + and the claims_transactions Synthea table (in Synthea 3.0.0). +}