Skip to content

Commit

Permalink
add index creation func. (#165)
Browse files Browse the repository at this point in the history
Co-authored-by: Katy Sadowski <[email protected]>
Co-authored-by: Evanette Burrows <[email protected]>
  • Loading branch information
3 people authored Dec 8, 2023
1 parent bfc7594 commit eb86d2d
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 0 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ inst/sqlite
.gitignore
^doc$
^\.github$
extras
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ export(LoadVocabFromSchema)
export(TruncateEventTables)
export(TruncateVocabTables)
export(backupCDM)
export(createExtraIndices)
export(createPrunedTables)
export(exportToSQLite)
export(getEventConceptId)
export(pruneCDM)
export(restoreCDMTables)
import(dplyr)
importFrom(utils,head)
69 changes: 69 additions & 0 deletions R/createExtraIndices.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#' @title Create Optional Extra Indices for ETL Performance
#'
#' @description This function creates indices for certain tables which may help to speed up LoadEventTables.
#'
#' @param connectionDetails An R object of type\cr\code{connectionDetails} created using the
#' function \code{createConnectionDetails} in the \code{DatabaseConnector} package.
#' @param cdmSchema The name of the CDM database schema. Requires read and write permissions to this schema. On SQL
#' Server, this should specify both the database and the schema, so for example 'cdm_instance.dbo'.
#' @param syntheaSchema The name of the Synthea database schema. Requires read and write permissions to this schema. On SQL
#' Server, this should specify both the database and the schema, so for example 'synthea.dbo'.
#' @param syntheaVersion Your Synthea version. Currently "2.7.0" and "3.0.0" are supported.
#' @param outputFolder Location of the SQL scripts if sqlOnly = TRUE. Default is NULL.
#' @param sqlOnly A boolean that determines whether to create the indices or generate a SQL scripts. Default is FALSE.
#'
#' @details This function creates indices which have been found to speed up certain long-running INSERT queries in LoadEventTables,
#' for some users. Indices are created on the intermediate vocabulary mapping tables; the person & provider CDM tables;
#' and the claims_transactions Synthea table (in Synthea 3.0.0).
#'
#' @importFrom utils head
#'
#' @export


createExtraIndices <- function(connectionDetails,
cdmSchema,
syntheaSchema,
syntheaVersion,
outputFolder = NULL,
sqlOnly = FALSE) {
sqlFilename <- "extra_indices.sql"

sql <- SqlRender::loadRenderTranslateSql(
sqlFilename = sqlFilename,
packageName = "ETLSyntheaBuilder",
dbms = connectionDetails$dbms,
targetDialect = connectionDetails$dbms,
cdmDatabaseSchema = cdmSchema,
syntheaVersion = syntheaVersion,
syntheaSchema = syntheaSchema
)
sqlQueries <- utils::head(unlist(strsplit(sql, ";")), -1)

if (!sqlOnly) {
print("Creating Extra Indices....")
conn <- DatabaseConnector::connect(connectionDetails)
lapply(sqlQueries, function(query) {
tryCatch(
expr = {
DatabaseConnector::executeSql(conn, query)
},
error = function(e) {
message(paste("Error in SQL:", query))
message(e)
}
)
})
DatabaseConnector::disconnect(conn)
print("Index Creation Complete.")
} else {
if (is.null(outputFolder)) {
stop("Must specify an outputFolder location when using sqlOnly = TRUE")
}
if (!dir.exists(outputFolder)) {
dir.create(outputFolder)
}
writeLines(paste0("Saving to output/", sqlFilename))
SqlRender::writeSql(sql, paste0("output/", sqlFilename))
}
}
41 changes: 41 additions & 0 deletions inst/sql/sql_server/extra_indices.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
@targetDialect Optional indices to speed up ETL.
*/

CREATE INDEX source_to_standard_vocab_map_sctditvisvitsctir ON @cdmDatabaseSchema.source_to_standard_vocab_map (
source_code,
target_domain_id,
target_vocabulary_id,
source_vocabulary_id,
target_standard_concept,
target_invalid_reason
);

CREATE INDEX source_to_standard_vocab_map_scsvsd ON @cdmDatabaseSchema.source_to_standard_vocab_map (
source_code,
source_vocabulary_id,
source_domain_id
);

CREATE INDEX source_to_source_vocab_map_scsvisdi ON @cdmDatabaseSchema.source_to_source_vocab_map (
source_code,
source_vocabulary_id,
source_domain_id
);

CREATE INDEX provider_psv ON @cdmDatabaseSchema.provider (
provider_source_value
);

CREATE INDEX person_psv ON @cdmDatabaseSchema.person (
person_source_value
);

{@syntheaVersion == '3.0.0'}?{
CREATE INDEX claims_transactions_cpap ON @syntheaSchema.claims_transactions (
claimid,
patientid,
appointmentid,
providerid
);
}
39 changes: 39 additions & 0 deletions man/createExtraIndices.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit eb86d2d

Please sign in to comment.