From 823af96d484a1ec075548ce181f52147cff54af5 Mon Sep 17 00:00:00 2001 From: David Mayer Date: Thu, 10 Oct 2024 09:13:26 -0600 Subject: [PATCH 1/6] - remove old .Rd leftovers and update with new docs - let R-CMD sort NAMESPACE --- NAMESPACE | 1 - man/IPG2Lineage.Rd | 3 ++- man/acc2Lineage.Rd | 3 ++- man/acc2lin.Rd | 0 man/efetchIPG.Rd | 3 ++- man/efetch_ipg.Rd | 0 man/ipg2lin.Rd | 0 man/sink.reset.Rd | 0 man/sinkReset.Rd | 1 + 9 files changed, 7 insertions(+), 4 deletions(-) delete mode 100644 man/acc2lin.Rd delete mode 100644 man/efetch_ipg.Rd delete mode 100644 man/ipg2lin.Rd delete mode 100644 man/sink.reset.Rd diff --git a/NAMESPACE b/NAMESPACE index 50af36df..078f971b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -77,7 +77,6 @@ export(prepareColumnParams) export(prepareSingleColumnParams) export(proteinAcc2TaxID) export(proteinAcc2TaxID_old) -export(prot2tax_old) export(removeAsterisks) export(removeEmptyRows) export(removeTails) diff --git a/man/IPG2Lineage.Rd b/man/IPG2Lineage.Rd index e24ab617..f8434c7f 100644 --- a/man/IPG2Lineage.Rd +++ b/man/IPG2Lineage.Rd @@ -38,7 +38,8 @@ This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} f Describe return, in detail } \description{ -Takes the resulting file of an efetch run on the ipg database and +Takes the resulting file +of an efetch run on the ipg database and Takes the resulting file of an efetch run on the ipg database and append lineage, and taxid columns diff --git a/man/acc2Lineage.Rd b/man/acc2Lineage.Rd index a24bdc9a..836a677f 100644 --- a/man/acc2Lineage.Rd +++ b/man/acc2Lineage.Rd @@ -38,7 +38,8 @@ on the ipg database. If NULL, the file will not be written. Defaults to NULL} Describe return, in detail } \description{ -This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set +This function combines 'efetchIPG()' +and 'IPG2Lineage()' to map a set of protein accessions to their assembly (GCA_ID), tax ID, and lineage. Function to map protein accession numbers to lineage diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd deleted file mode 100644 index e69de29b..00000000 diff --git a/man/efetchIPG.Rd b/man/efetchIPG.Rd index 6a5d85a4..5d2e8372 100644 --- a/man/efetchIPG.Rd +++ b/man/efetchIPG.Rd @@ -23,7 +23,8 @@ the ipg database} Describe return, in detail } \description{ -Perform efetch on the ipg database and write the results to out_path +Perform efetch on the ipg database +and write the results to out_path Perform efetch on the ipg database and write the results to out_path } diff --git a/man/efetch_ipg.Rd b/man/efetch_ipg.Rd deleted file mode 100644 index e69de29b..00000000 diff --git a/man/ipg2lin.Rd b/man/ipg2lin.Rd deleted file mode 100644 index e69de29b..00000000 diff --git a/man/sink.reset.Rd b/man/sink.reset.Rd deleted file mode 100644 index e69de29b..00000000 diff --git a/man/sinkReset.Rd b/man/sinkReset.Rd index 0285c0b2..e3fc7ce4 100644 --- a/man/sinkReset.Rd +++ b/man/sinkReset.Rd @@ -8,6 +8,7 @@ sinkReset() } \value{ No return, but run to close all outstanding \code{sink()}s +and handles any errors or warnings that occur during the process. } \description{ Sink Reset From 48b7fd697b6c6cac7826ae3f09d315025db1a438 Mon Sep 17 00:00:00 2001 From: Seyi Kuforiji Date: Sun, 13 Oct 2024 18:02:36 +0100 Subject: [PATCH 2/6] Update error handling to use rlang functions in acc2lin.R file - Replaced base R error handling with rlang functions: `abort()`, `warn()`, and `inform()`. - Improved clarity and consistency in error and warning messages. - Enhanced robustness with detailed context for errors and warnings. --- R/acc2lin.R | 209 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 141 insertions(+), 68 deletions(-) diff --git a/R/acc2lin.R b/R/acc2lin.R index 08cb7d76..bd5cc289 100644 --- a/R/acc2lin.R +++ b/R/acc2lin.R @@ -5,6 +5,7 @@ # suppressPackageStartupMessages(library(data.table)) # suppressPackageStartupMessages(library(tidyverse)) # suppressPackageStartupMessages(library(biomartr)) +suppressPackageStartupMessages(library(rlang)) # https://stackoverflow.com/questions/18730491/sink-does-not-release-file #' Sink Reset @@ -24,13 +25,18 @@ sinkReset <- function() { for (i in seq_len(sink.number())) { sink(NULL) } - print("All sinks closed") + inform("All sinks closed", class = "sink_reset_info") }, error = function(e) { - print(paste("Error: ", e$message)) + abort(paste("Error: ", e$message), class = "sink_reset_error") }, warning = function(w) { - print(paste("Warning: ", w$message)) + warn(paste("Warning: ", w$message), class = "sink_reset_warning") }, finally = { - print("resetSink function execution completed.") + # If any additional cleanup is needed, it can be done here + if (sink.number() > 0) { + # Additional cleanup if sinks are still open + inform("Some sinks remain open, ensure proper cleanup.", + class = "sink_cleanup_warning") + } }) } @@ -56,60 +62,64 @@ sinkReset <- function() { #' addLineage() #' } addLineage <- function(df, acc_col = "AccNum", assembly_path, - lineagelookup_path, ipgout_path = NULL, - plan = "sequential", ...) { + lineagelookup_path, ipgout_path = NULL, + plan = "sequential", ...) { # check for validate inputs if (!is.data.frame(df)) { - stop("Input 'df' must be a data frame.") + abort("Input 'df' must be a data frame.", class = "input_error") } if (!acc_col %in% colnames(df)) { - stop(paste("Column", acc_col, "not found in data frame.")) + abort(paste("Column", acc_col, + "not found in data frame."), class = "column_error") } # Ensure paths are character strings if (!is.character(assembly_path) || !is.character(lineagelookup_path)) { - stop("Both 'assembly_path' and - 'lineagelookup_path' must be character strings.") + abort("Both 'assembly_path' and + 'lineagelookup_path' must be character strings.", + class = "path_type_error") } # Ensure paths exist if (!file.exists(assembly_path)) { - stop(paste("Assembly file not found at:", assembly_path)) + abort(paste("Assembly file not found at:", + assembly_path), class = "file_not_found_error") } if (!file.exists(lineagelookup_path)) { - stop(paste("Lineage lookup file not found at:", lineagelookup_path)) + abort(paste("Lineage lookup file not found at:", + lineagelookup_path), class = "file_not_found_error") } - tryCatch({ - # Attempt to add lineages - acc_col <- sym(acc_col) - accessions <- df %>% pull(acc_col) - lins <- acc2Lineage( - accessions, assembly_path, lineagelookup_path, ipgout_path, plan - ) - - # Drop a lot of the unimportant columns for now? - # will make merging much easier - lins <- lins[, c( - "Strand", "Start", "Stop", "Nucleotide Accession", "Source", - "Id", "Strain" - ) := NULL] - lins <- unique(lins) - - # dup <- lins %>% group_by(Protein) %>% - # summarize(count = n()) %>% filter(count > 1) %>% - # pull(Protein) - - merged <- merge(df, lins, by.x = acc_col, by.y = "Protein", all.x = TRUE) - return(merged) - }, error = function(e) { - print(paste("Error: ", e$message)) - }, warning = function(w) { - print(paste("Warning: ", w$message)) - }, finally = { - print("addLineages function execution completed.") - }) + tryCatch({ + # Attempt to add lineages + acc_col <- sym(acc_col) + accessions <- df %>% pull(acc_col) + lins <- acc2Lineage( + accessions, assembly_path, lineagelookup_path, ipgout_path, plan + ) + + # Drop a lot of the unimportant columns for now? + # will make merging much easier + lins <- lins[, c( + "Strand", "Start", "Stop", "Nucleotide Accession", "Source", + "Id", "Strain" + ) := NULL] + lins <- unique(lins) + + # dup <- lins %>% group_by(Protein) %>% + # summarize(count = n()) %>% filter(count > 1) %>% + # pull(Protein) + + merged <- merge(df, lins, by.x = acc_col, by.y = "Protein", all.x = TRUE) + return(merged) + }, error = function(e) { + abort(paste("Error during lineage addition:", e$message), + class = "lineage_addition_error") + }, warning = function(w) { + warn(paste("Warning during lineage addition:", w$message), + class = "lineage_addition_warning") + }) } @@ -140,11 +150,11 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path, #' acc2Lineage() #' } acc2Lineage <- function(accessions, assembly_path, - lineagelookup_path, ipgout_path = NULL, - plan = "sequential", ...) { + lineagelookup_path, ipgout_path = NULL, + plan = "sequential", ...) { tmp_ipg <- F if (is.null(ipgout_path)) { - tmp_ipg <- T + tmp_ipg <- TRUE ipgout_path <- tempfile("ipg", fileext = ".txt") } @@ -154,18 +164,41 @@ acc2Lineage <- function(accessions, assembly_path, efetchIPG(accessions, out_path = ipgout_path, plan) # Attempt to process IPG to lineages - lins <- IPG2Lineage(accessions, ipgout_path, assembly_path, lineagelookup_path) + lins <- IPG2Lineage(accessions, ipgout_path, + assembly_path, lineagelookup_path) }, error = function(e) { - print(paste("An error occurred: ", e$message)) + abort( + message = paste("An error occurred during IPG fetching + or lineage processing:", e$message), + class = "lineage_processing_error", + # capturing the call stack + call = sys.call(), + # adding additional context + accessions = accessions, + assembly_path = assembly_path, + lineagelookup_path = lineagelookup_path, + ipgout_path = ipgout_path, + plan = plan + ) }, warning = function(w) { - print(paste("Warning: ", w$message)) + warn( + message = paste("Warning during IPG fetching + or lineage processing:", w$message), + class = "lineage_processing_warning", + call = sys.call(), # capturing the call stack + accessions = accessions, + assembly_path = assembly_path, + lineagelookup_path = lineagelookup_path, + ipgout_path = ipgout_path, + plan = plan + ) }, finally = { - print("acc2lin function execution completed.") + # Cleanup: delete temporary IPG file if it was created + if (tmp_ipg && file.exists(ipgout_path)) { + unlink(ipgout_path) + } }) - if (tmp_ipg) { - unlink(tempdir(), recursive = T) - } return(lins) } @@ -196,15 +229,18 @@ acc2Lineage <- function(accessions, assembly_path, efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { # Argument validation if (!is.character(accnums) || length(accnums) == 0) { - stop("Error: 'accnums' must be a non-empty character vector.") + abort("Error: 'accnums' must be a non-empty character vector.", + class = "validation_error") } if (!is.character(out_path) || nchar(out_path) == 0) { - stop("Error: 'out_path' must be a non-empty string.") + abort("Error: 'out_path' must be a non-empty string.", + class = "validation_error") } if (!is.function(plan)) { - stop("Error: 'plan' must be a valid plan function.") + abort("Error: 'plan' must be a valid plan function.", + class = "validation_error") } if (length(accnums) > 0) { partition <- function(in_data, groups) { @@ -249,11 +285,26 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { }) sink(NULL) }, error = function(e) { - print(paste("An error occurred: ", e$message)) + abort( + message = paste("An error occurred: ", e$message), + class = "fetch_error", + call = sys.call(), + accnums = accnums, + out_path = out_path, + plan = plan + ) }, warning = function(w) { - print(paste("Warning: ", w$message)) + warn( + message = paste("Warning: ", w$message), + class = "fetch_warning", + call = sys.call(), + accnums = accnums, + out_path = out_path, + plan = plan + ) }, finally = { - print("efetch_ipg function execution completed.") + # Ensure the sink is closed in case of errors + if (sink.number() > 0) sink(NULL) }) } } @@ -289,31 +340,38 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { #' IPG2Lineage() #' } #' -IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path, ...) { +IPG2Lineage <- function(accessions, ipg_file, + assembly_path, lineagelookup_path, ...) { # Argument validation for accessions if (!is.character(accessions) || length(accessions) == 0) { - stop("Input 'accessions' must be a non-empty character vector.") + abort("Input 'accessions' must be a non-empty + character vector.", class = "validation_error") } # check for validate inputs if (!is.character(ipg_file)) { - stop("Input 'ipg_file' must be a character string.") + abort("Input 'ipg_file' must be a + character string.", class = "validation_error") } + # Ensure paths are character strings if (!is.character(assembly_path) || !is.character(lineagelookup_path)) { - stop("Both 'assembly_path' and - 'lineagelookup_path' must be character strings.") + abort("Both 'assembly_path' and 'lineagelookup_path' + must be character strings.", class = "validation_error") } # Ensure paths exist if (!file.exists(assembly_path)) { - stop(paste("Assembly file not found at:", assembly_path)) + abort(paste("Assembly file not found at:", assembly_path), + class = "file_error") } if (!file.exists(lineagelookup_path)) { - stop(paste("Lineage lookup file not found at:", lineagelookup_path)) + abort(paste("Lineage lookup file not found at:", lineagelookup_path), + class = "file_error") } + # Process the IPG file try({ # Attempt to read the IPG file ipg_dt <- fread(ipg_file, sep = "\t", fill = T) @@ -332,12 +390,27 @@ IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path, return(lins) }, error = function(e) { - print(paste("An error occurred: ", e$message)) + abort( + message = paste("An error occurred: ", e$message), + class = "processing_error", + call = sys.call(), + accessions = accessions, + ipg_file = ipg_file, + assembly_path = assembly_path, + lineagelookup_path = lineagelookup_path + ) }, warning = function(w) { - print(paste("Warning: ", w$message)) - }, finally = { - print("ipg2lin function execution completed.") + warn( + message = paste("Warning: ", w$message), + class = "processing_warning", + call = sys.call(), + accessions = accessions, + ipg_file = ipg_file, + assembly_path = assembly_path, + lineagelookup_path = lineagelookup_path + ) }) + } From 6babffe95d2729857b921c9305f25dcbc0c0ed49 Mon Sep 17 00:00:00 2001 From: Seyi Kuforiji Date: Tue, 15 Oct 2024 11:57:15 +0100 Subject: [PATCH 3/6] Update error handling to use rlang functions in R/assign_job_queue.R file - Replaced base R error handling with rlang functions: `abort()`, `warn()`, and `inform()`. - Improved clarity and consistency in error and warning messages. - Enhanced robustness with detailed context for errors and warnings. --- R/assign_job_queue.R | 227 +++++++++++++++++++++++++++++++------------ 1 file changed, 166 insertions(+), 61 deletions(-) diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R index c531fb09..df4f97e7 100644 --- a/R/assign_job_queue.R +++ b/R/assign_job_queue.R @@ -1,3 +1,4 @@ +suppressPackageStartupMessages(library(rlang)) # for now, we're using an env var, COMMON_SRC_ROOT, to specify this folder since # the working directory is changed in many parts of the current molevolvr # pipeline. @@ -22,11 +23,9 @@ make_opts2procs <- function() { ) return(opts2processes) }, error = function(e) { - message(paste("Encountered an error: ", e$message)) + abort(paste("Error: ", e$message), class = "Opts_to_process_error") }, warning = function(w) { - message(paste("Warning: ", w$message)) - }, finally = { - message("make_opts2procs function execution completed.") + warn(paste("Warning: ", w$message), class = "Opts_to_process_warning") }) } @@ -44,7 +43,7 @@ make_opts2procs <- function() { #' @export map_advanced_opts2procs <- function(advanced_opts) { if (!is.character(advanced_opts)) { - stop("Argument must be a character vector!") + abort("Argument must be a character vector!", class = "validation_error") } tryCatch({ # append 'always' to add procs that always run @@ -56,11 +55,19 @@ map_advanced_opts2procs <- function(advanced_opts) { procs <- opts2proc[idx] |> unlist() return(procs) }, error = function(e) { - message(paste("Encountered an error: ", e$message)) + abort( + message = paste("Encountered an error: ", e$message), + class = "map_advanced_opts2procs_error", + call = sys.call(), + advanced_opts = advanced_opts + ) }, warning = function(w) { - message(paste("Warning: ", w$message)) - }, finally = { - message("make_opts2procs function execution completed.") + warn( + message = paste("Warning: ", w$message), + class = "map_advanced_opts2procs_warning", + call = sys.call(), + advanced_opts = advanced_opts + ) }) } @@ -91,12 +98,14 @@ get_proc_medians <- function(dir_job_results) { tryCatch({ # Check if dir_job_results is a character string if (!is.character(dir_job_results) || length(dir_job_results) != 1) { - stop("Input 'dir_job_results' must be a single character string.") + abort("Input 'dir_job_results' must be a single character string.", + class = "validation_error") } # Check if dir_job_results exists if (!dir.exists(dir_job_results)) { - stop(paste("The directory", dir_job_results, "does not exist.")) + abort(paste("The directory", dir_job_results, "does not exist."), + class = "file_error") } source(file.path(common_root, "molevol_scripts", "R", "metrics.R")) @@ -135,11 +144,10 @@ get_proc_medians <- function(dir_job_results) { as.list() return(list_proc_medians) }, error = function(e) { - message(paste("Encountered an error: ", e$message)) + abort(paste("Encountered an error: ", e$message), + class = "processing_error") }, warning = function(w) { - message(paste("Warning: ", w$message)) - }, finally = { - message("get_proc_medians function execution completed.") + warn(paste("Warning: ", w$message), class = "processing_warning") }) } @@ -165,15 +173,18 @@ write_proc_medians_table <- function(dir_job_results, filepath) { tryCatch({ # Error handling for input arguments if (!is.character(dir_job_results) || length(dir_job_results) != 1) { - stop("Input 'dir_job_results' must be a single character string.") + abort("Input 'dir_job_results' must be a single character string.", + class = "validation_error") } if (!dir.exists(dir_job_results)) { - stop(paste("The directory", dir_job_results, "does not exist.")) + abort(paste("The directory", dir_job_results, "does not exist."), + class = "file_error") } if (!is.character(filepath) || length(filepath) != 1) { - stop("Input 'filepath' must be a single character string.") + abort("Input 'filepath' must be a single character string.", + class = "validation_error") } df_proc_medians <- get_proc_medians(dir_job_results) |> tibble::as_tibble() |> @@ -188,11 +199,21 @@ write_proc_medians_table <- function(dir_job_results, filepath) { readr::write_tsv(df_proc_medians, file = filepath) return(df_proc_medians) }, error = function(e) { - message(paste("Encountered an error: ", e$message)) + abort( + message = paste("Encountered an error: ", e$message), + class = "processing_error", + call = sys.call(), + dir_job_results = dir_job_results, + filepath = filepath + ) }, warning = function(w) { - message(paste("Warning: ", w$message)) - }, finally = { - message("write_proc_medians_table function execution completed.") + warn( + message = paste("Warning: ", w$message), + class = "processing_warning", + call = sys.call(), + dir_job_results = dir_job_results, + filepath = filepath + ) }) } @@ -222,12 +243,21 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) { tryCatch({ # Error handling for dir_job_results arguments if (!is.character(dir_job_results) || length(dir_job_results) != 1) { - stop("Input 'dir_job_results' must be a single character string.") + abort( + message = "Input 'dir_job_results' must be a single character string.", + class = "validation_error", + dir_job_results = dir_job_results + ) } if (!dir.exists(dir_job_results)) { - stop(paste("The directory", dir_job_results, "does not exist.")) + abort( + message = paste("The directory", dir_job_results, "does not exist."), + class = "file_error", + dir_job_results = dir_job_results + ) } + if (is.null(filepath)) { filepath <- file.path(common_root, "molevol_scripts", @@ -235,20 +265,32 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) { "job_proc_weights.yml") } if (!is.character(filepath) || length(filepath) != 1) { - stop("Input 'filepath' must be a single character string.") + abort( + message = "Input 'filepath' must be a single character string.", + class = "validation_error", + filepath = filepath + ) } medians <- get_proc_medians(dir_job_results) yaml::write_yaml(medians, filepath) }, error = function(e) { - message(paste("Encountered an error: "), e$message) + abort( + message = paste("Encountered an error: ", e$message), + class = "processing_error", + call = sys.call(), + dir_job_results = dir_job_results, + filepath = filepath + ) }, warning = function(w) { - message(paste("Warning: "), w$message) - }, finally = { - message("write_proc_medians_table function execution completed.") - } - ) - + warn( + message = paste("Warning: ", w$message), + class = "processing_warning", + call = sys.call(), + dir_job_results = dir_job_results, + filepath = filepath + ) + }) } #' Quickly get the runtime weights for MolEvolvR backend processes @@ -275,13 +317,24 @@ get_proc_weights <- function(medians_yml_path = NULL) { # attempt to read the weights from the YAML file produced by # write_proc_medians_yml() if (stringr::str_trim(medians_yml_path) == "") { - stop( - stringr::str_glue("medians_yml_path is empty - ({medians_yml_path}), returning default weights") + abort( + message = stringr::str_glue("medians_yml_path is empty + ({medians_yml_path}), returning default weights"), + class = "input_error", + medians_yml_path = medians_yml_path ) } proc_weights <- yaml::read_yaml(medians_yml_path) + + if (!is.list(proc_weights) || length(proc_weights) == 0) { + abort( + message = "The loaded YAML file does not + contain valid process weights.", + class = "file_error", + medians_yml_path = medians_yml_path + ) + } }, # to avoid fatal errors in reading the proc weights yaml, # some median process runtimes have been hardcoded based on @@ -318,10 +371,9 @@ get_proc_weights <- function(medians_yml_path = NULL) { #' "domain_architecture"), #' n_inputs = 3, n_hits = 50L) #' @export -advanced_opts2est_walltime <- function(advanced_opts, - n_inputs = 1L, - n_hits = NULL, - verbose = FALSE) { +advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L, + n_hits = NULL, + verbose = FALSE) { tryCatch({ # to calculate est walltime for a homology search job, the number of hits @@ -331,26 +383,42 @@ advanced_opts2est_walltime <- function(advanced_opts, # Validate advanced_opts if (!is.character(advanced_opts)) { - stop("Argument 'advanced_opts' must be a character vector.") + abort( + message = "Argument 'advanced_opts' must be a character vector.", + class = "validation_error", + advanced_opts = advanced_opts + ) } # Validate n_inputs if (!is.numeric(n_inputs) || length(n_inputs) != 1 || n_inputs <= 0) { - stop("Argument 'n_inputs' must be a single positive numeric value.") + abort( + message = "Argument 'n_inputs' + must be a single positive numeric value.", + class = "validation_error", + n_inputs = n_inputs + ) } # Validate n_hits if homology_search is in advanced_opts if ("homology_search" %in% advanced_opts && - (is.null(n_hits)|| !is.numeric(n_hits) - || length(n_hits) != 1 || n_hits < 0)) { - stop("Argument 'n_hits' must be a single non-negative numeric value when - 'homology_search' is in 'advanced_opts'.") + (is.null(n_hits) || !is.numeric(n_hits) || + length(n_hits) != 1 || n_hits < 0)) { + abort( + message = "Argument 'n_hits' must be a single non-negative numeric + value when 'homology_search' is in 'advanced_opts'.", + class = "validation_error", + n_hits = n_hits + ) } # Get process weights proc_weights <- write_proc_medians_yml() if (!is.list(proc_weights)) { - stop("Process weights could not be retrieved correctly.") + abort( + message = "Process weights could not be retrieved correctly.", + class = "processing_error" + ) } # sort process weights by names and convert to vec @@ -389,12 +457,23 @@ advanced_opts2est_walltime <- function(advanced_opts, } return(est_walltime) }, error = function(e) { - message(paste("Encountered an error: ", e$message)) + abort( + message = paste("Encountered an error: ", e$message), + class = "processing_error", + call = sys.call(), + advanced_opts = advanced_opts, + n_inputs = n_inputs, + n_hits = n_hits + ) }, warning = function(w) { - message(paste("Warning: ", w$message)) - }, finally = { - message("advanced_opts2est_walltime - function execution completed.") + warn( + message = paste("Warning: ", w$message), + class = "processing_warning", + call = sys.call(), + advanced_opts = advanced_opts, + n_inputs = n_inputs, + n_hits = n_hits + ) }) } @@ -419,22 +498,44 @@ assign_job_queue <- function( t_cutoff = 21600 # 6 hours ) { tryCatch({ + # Validate t_sec_estimate if (!is.numeric(t_sec_estimate) || length(t_sec_estimate) != 1) { - stop("Argument 't_sec_estimate' must be a single numeric value.") + abort( + message = "Argument 't_sec_estimate' must be a single numeric value.", + class = "validation_error", + t_sec_estimate = t_sec_estimate + ) } + # Validate t_cutoff if (!is.numeric(t_cutoff) || length(t_cutoff) != 1 || t_cutoff < 0) { - stop("Argument 't_cutoff' must be a single non-negative numeric value.") + abort( + message = "Argument 't_cutoff' must be a + single non-negative numeric value.", + class = "validation_error", + t_cutoff = t_cutoff + ) } + queue <- ifelse(t_sec_estimate > t_cutoff, "long", "short") return(queue) }, error = function(e) { - message(paste("Encountered an error: ", e$message)) + abort( + message = paste("Encountered an error: ", e$message), + class = "processing_error", + call = sys.call(), + t_sec_estimate = t_sec_estimate, + t_cutoff = t_cutoff + ) }, warning = function(w) { - message(paste("Warning: ", w$message)) - }, finally = { - message("assign_job_queue function execution completed.") + warn( + message = paste("Warning: ", w$message), + class = "processing_warning", + call = sys.call(), + t_sec_estimate = t_sec_estimate, + t_cutoff = t_cutoff + ) }) } @@ -537,11 +638,15 @@ plot_estimated_walltimes <- function() { ) return(p) }, error = function(e) { - message(paste("Encountered an error: ", e$message)) + abort( + message = paste("Encountered an error:", e$message), + .internal = TRUE + ) }, warning = function(w) { - message(paste("Warning: ", w$message)) - }, finally = { - message("plot_estimated_walltimes function execution completed.") + warn( + message = paste("Warning:", w$message), + .internal = TRUE + ) }) } From 57a635671795984f5ace17076ef0029c6ff0336c Mon Sep 17 00:00:00 2001 From: Seyi Kuforiji Date: Sun, 20 Oct 2024 12:01:02 +0100 Subject: [PATCH 4/6] Enhance error handling and validation across functions - Added robust error handling in run_deltablast and run_rpsblast functions. - Updated Roxygen documentation to import rlang::abort, rlang::warn and rlang::inform for better error management. - Refactored code for clarity and consistency based on the suggestion from the last review. --- NAMESPACE | 3 + R/acc2lin.R | 105 ++++++++++++++++++----------------- R/assign_job_queue.R | 128 ++++++++++++++++++++++--------------------- R/blastWrappers.R | 84 +++++++++++++++++++++------- 4 files changed, 184 insertions(+), 136 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 078f971b..9449e14b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -240,8 +240,11 @@ importFrom(readr,write_lines) importFrom(readr,write_tsv) importFrom(rentrez,entrez_fetch) importFrom(rlang,.data) +importFrom(rlang,abort) importFrom(rlang,as_string) +importFrom(rlang,inform) importFrom(rlang,sym) +importFrom(rlang,warn) importFrom(sendmailR,mime_part) importFrom(sendmailR,sendmail) importFrom(seqinr,dist.alignment) diff --git a/R/acc2lin.R b/R/acc2lin.R index bd5cc289..c1f3b34e 100644 --- a/R/acc2lin.R +++ b/R/acc2lin.R @@ -5,11 +5,13 @@ # suppressPackageStartupMessages(library(data.table)) # suppressPackageStartupMessages(library(tidyverse)) # suppressPackageStartupMessages(library(biomartr)) -suppressPackageStartupMessages(library(rlang)) + # https://stackoverflow.com/questions/18730491/sink-does-not-release-file #' Sink Reset #' +#' @importFrom rlang warn abort inform +#' #' @return No return, but run to close all outstanding `sink()`s #' and handles any errors or warnings that occur during the process. #' @@ -25,17 +27,17 @@ sinkReset <- function() { for (i in seq_len(sink.number())) { sink(NULL) } - inform("All sinks closed", class = "sink_reset_info") + rlang::inform("All sinks closed", class = "sink_reset_info") }, error = function(e) { - abort(paste("Error: ", e$message), class = "sink_reset_error") + rlang::abort(paste("Error: ", e$message), class = "sink_reset_error") }, warning = function(w) { - warn(paste("Warning: ", w$message), class = "sink_reset_warning") + rlang::warn(paste("Warning: ", w$message), class = "sink_reset_warning") }, finally = { # If any additional cleanup is needed, it can be done here if (sink.number() > 0) { # Additional cleanup if sinks are still open - inform("Some sinks remain open, ensure proper cleanup.", - class = "sink_cleanup_warning") + rlang::inform("Some sinks remain open, ensure proper cleanup.", + class = "sink_cleanup_warning") } }) } @@ -52,7 +54,7 @@ sinkReset <- function() { #' #' @importFrom dplyr pull #' @importFrom magrittr %>% -#' @importFrom rlang sym +#' @importFrom rlang sym warn abort inform #' #' @return Describe return, in detail #' @export @@ -66,30 +68,30 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path, plan = "sequential", ...) { # check for validate inputs if (!is.data.frame(df)) { - abort("Input 'df' must be a data frame.", class = "input_error") + rlang::abort("Input 'df' must be a data frame.", class = "input_error") } if (!acc_col %in% colnames(df)) { - abort(paste("Column", acc_col, - "not found in data frame."), class = "column_error") + rlang::abort(paste("Column", acc_col, + "not found in data frame."), class = "column_error") } # Ensure paths are character strings if (!is.character(assembly_path) || !is.character(lineagelookup_path)) { - abort("Both 'assembly_path' and - 'lineagelookup_path' must be character strings.", - class = "path_type_error") + rlang::abort("Both 'assembly_path' and + 'lineagelookup_path' must be character strings.", + class = "path_type_error") } # Ensure paths exist if (!file.exists(assembly_path)) { - abort(paste("Assembly file not found at:", - assembly_path), class = "file_not_found_error") + rlang::abort(paste("Assembly file not found at:", + assembly_path), class = "file_not_found_error") } if (!file.exists(lineagelookup_path)) { - abort(paste("Lineage lookup file not found at:", - lineagelookup_path), class = "file_not_found_error") + rlang::abort(paste("Lineage lookup file not found at:", + lineagelookup_path), class = "file_not_found_error") } tryCatch({ # Attempt to add lineages @@ -99,7 +101,7 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path, accessions, assembly_path, lineagelookup_path, ipgout_path, plan ) - # Drop a lot of the unimportant columns for now? + # Drop a lot of the unimportant columns for now? # will make merging much easier lins <- lins[, c( "Strand", "Start", "Stop", "Nucleotide Accession", "Source", @@ -107,18 +109,18 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path, ) := NULL] lins <- unique(lins) - # dup <- lins %>% group_by(Protein) %>% + # dup <- lins %>% group_by(Protein) %>% # summarize(count = n()) %>% filter(count > 1) %>% # pull(Protein) merged <- merge(df, lins, by.x = acc_col, by.y = "Protein", all.x = TRUE) return(merged) }, error = function(e) { - abort(paste("Error during lineage addition:", e$message), - class = "lineage_addition_error") + rlang::abort(paste("Error during lineage addition:", e$message), + class = "lineage_addition_error") }, warning = function(w) { - warn(paste("Warning during lineage addition:", w$message), - class = "lineage_addition_warning") + rlang::warn(paste("Warning during lineage addition:", w$message), + class = "lineage_addition_warning") }) } @@ -137,11 +139,13 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path, #' This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} function #' @param lineagelookup_path String of the path to the lineage lookup file #' (taxid to lineage mapping). This file can be generated using the -#' @param ipgout_path Path to write the results +#' @param ipgout_path Path to write the results #' of the efetch run of the accessions #' on the ipg database. If NULL, the file will not be written. Defaults to NULL #' @param plan #' +#' @importFrom rlang warn abort inform +#' #' @return Describe return, in detail #' @export #' @@ -149,8 +153,8 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path, #' \dontrun{ #' acc2Lineage() #' } -acc2Lineage <- function(accessions, assembly_path, - lineagelookup_path, ipgout_path = NULL, +acc2Lineage <- function(accessions, assembly_path, + lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) { tmp_ipg <- F if (is.null(ipgout_path)) { @@ -167,12 +171,10 @@ acc2Lineage <- function(accessions, assembly_path, lins <- IPG2Lineage(accessions, ipgout_path, assembly_path, lineagelookup_path) }, error = function(e) { - abort( + rlang::abort( message = paste("An error occurred during IPG fetching or lineage processing:", e$message), class = "lineage_processing_error", - # capturing the call stack - call = sys.call(), # adding additional context accessions = accessions, assembly_path = assembly_path, @@ -181,11 +183,10 @@ acc2Lineage <- function(accessions, assembly_path, plan = plan ) }, warning = function(w) { - warn( + rlang::warn( message = paste("Warning during IPG fetching or lineage processing:", w$message), class = "lineage_processing_warning", - call = sys.call(), # capturing the call stack accessions = accessions, assembly_path = assembly_path, lineagelookup_path = lineagelookup_path, @@ -218,6 +219,7 @@ acc2Lineage <- function(accessions, assembly_path, #' @importFrom furrr future_map #' @importFrom future plan #' @importFrom rentrez entrez_fetch +#' @importFrom rlang warn abort inform #' #' @return Describe return, in detail #' @export @@ -229,18 +231,18 @@ acc2Lineage <- function(accessions, assembly_path, efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { # Argument validation if (!is.character(accnums) || length(accnums) == 0) { - abort("Error: 'accnums' must be a non-empty character vector.", - class = "validation_error") + rlang::abort("Error: 'accnums' must be a non-empty character vector.", + class = "validation_error") } if (!is.character(out_path) || nchar(out_path) == 0) { - abort("Error: 'out_path' must be a non-empty string.", - class = "validation_error") + rlang::abort("Error: 'out_path' must be a non-empty string.", + class = "validation_error") } if (!is.function(plan)) { - abort("Error: 'plan' must be a valid plan function.", - class = "validation_error") + rlang::abort("Error: 'plan' must be a valid plan function.", + class = "validation_error") } if (length(accnums) > 0) { partition <- function(in_data, groups) { @@ -285,19 +287,17 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { }) sink(NULL) }, error = function(e) { - abort( + rlang::abort( message = paste("An error occurred: ", e$message), class = "fetch_error", - call = sys.call(), accnums = accnums, out_path = out_path, plan = plan ) }, warning = function(w) { - warn( + rlang::warn( message = paste("Warning: ", w$message), class = "fetch_warning", - call = sys.call(), accnums = accnums, out_path = out_path, plan = plan @@ -331,6 +331,7 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { #' "create_lineage_lookup()" function #' #' @importFrom data.table fread +#' @importFrom rlang warn abort inform #' #' @return Describe return, in detail #' @export @@ -344,31 +345,31 @@ IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path, ...) { # Argument validation for accessions if (!is.character(accessions) || length(accessions) == 0) { - abort("Input 'accessions' must be a non-empty + rlang::abort("Input 'accessions' must be a non-empty character vector.", class = "validation_error") } # check for validate inputs if (!is.character(ipg_file)) { - abort("Input 'ipg_file' must be a + rlang::abort("Input 'ipg_file' must be a character string.", class = "validation_error") } # Ensure paths are character strings if (!is.character(assembly_path) || !is.character(lineagelookup_path)) { - abort("Both 'assembly_path' and 'lineagelookup_path' - must be character strings.", class = "validation_error") + rlang::abort("Both 'assembly_path' and 'lineagelookup_path' + must be character strings.", class = "validation_error") } # Ensure paths exist if (!file.exists(assembly_path)) { - abort(paste("Assembly file not found at:", assembly_path), - class = "file_error") + rlang::abort(paste("Assembly file not found at:", assembly_path), + class = "file_error") } if (!file.exists(lineagelookup_path)) { - abort(paste("Lineage lookup file not found at:", lineagelookup_path), - class = "file_error") + rlang::abort(paste("Lineage lookup file not found at:", lineagelookup_path), + class = "file_error") } # Process the IPG file @@ -390,20 +391,18 @@ IPG2Lineage <- function(accessions, ipg_file, return(lins) }, error = function(e) { - abort( + rlang::abort( message = paste("An error occurred: ", e$message), class = "processing_error", - call = sys.call(), accessions = accessions, ipg_file = ipg_file, assembly_path = assembly_path, lineagelookup_path = lineagelookup_path ) }, warning = function(w) { - warn( + rlang::warn( message = paste("Warning: ", w$message), class = "processing_warning", - call = sys.call(), accessions = accessions, ipg_file = ipg_file, assembly_path = assembly_path, diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R index df4f97e7..8b227979 100644 --- a/R/assign_job_queue.R +++ b/R/assign_job_queue.R @@ -1,4 +1,4 @@ -suppressPackageStartupMessages(library(rlang)) + # for now, we're using an env var, COMMON_SRC_ROOT, to specify this folder since # the working directory is changed in many parts of the current molevolvr # pipeline. @@ -9,6 +9,8 @@ common_root <- Sys.getenv("COMMON_SRC_ROOT") #' Construct list where names (MolEvolvR advanced options) point to processes #' +#' @importFrom rlang warn abort inform +#' #' @return list where names (MolEvolvR advanced options) point to processes #' #' example: list_opts2procs <- make_opts2procs @@ -23,9 +25,10 @@ make_opts2procs <- function() { ) return(opts2processes) }, error = function(e) { - abort(paste("Error: ", e$message), class = "Opts_to_process_error") + rlang::abort(paste("Error: ", e$message), class = "Opts_to_process_error") }, warning = function(w) { - warn(paste("Warning: ", w$message), class = "Opts_to_process_warning") + rlang::warn(paste("Warning: ", w$message), + class = "Opts_to_process_warning") }) } @@ -34,6 +37,8 @@ make_opts2procs <- function() { #' #' @param advanced_opts character vector of MolEvolvR advanced options #' +#' @importFrom rlang warn abort inform +#' #' @return character vector of process names that will execute given #' the advanced options #' @@ -43,7 +48,8 @@ make_opts2procs <- function() { #' @export map_advanced_opts2procs <- function(advanced_opts) { if (!is.character(advanced_opts)) { - abort("Argument must be a character vector!", class = "validation_error") + rlang::abort("Argument must be a character vector!", + class = "validation_error") } tryCatch({ # append 'always' to add procs that always run @@ -55,17 +61,15 @@ map_advanced_opts2procs <- function(advanced_opts) { procs <- opts2proc[idx] |> unlist() return(procs) }, error = function(e) { - abort( + rlang::abort( message = paste("Encountered an error: ", e$message), class = "map_advanced_opts2procs_error", - call = sys.call(), advanced_opts = advanced_opts ) }, warning = function(w) { - warn( + rlang::warn( message = paste("Warning: ", w$message), class = "map_advanced_opts2procs_warning", - call = sys.call(), advanced_opts = advanced_opts ) }) @@ -78,6 +82,7 @@ map_advanced_opts2procs <- function(advanced_opts) { #' directory #' #' @importFrom dplyr across everything select summarise +#' @importFrom rlang warn abort inform #' #' @return [list] names: processes; values: median runtime (seconds) #' @@ -98,14 +103,14 @@ get_proc_medians <- function(dir_job_results) { tryCatch({ # Check if dir_job_results is a character string if (!is.character(dir_job_results) || length(dir_job_results) != 1) { - abort("Input 'dir_job_results' must be a single character string.", - class = "validation_error") + rlang::abort("Input 'dir_job_results' must be a single character string.", + class = "validation_error") } # Check if dir_job_results exists if (!dir.exists(dir_job_results)) { - abort(paste("The directory", dir_job_results, "does not exist."), - class = "file_error") + rlang::abort(paste("The directory", dir_job_results, "does not exist."), + class = "file_error") } source(file.path(common_root, "molevol_scripts", "R", "metrics.R")) @@ -144,10 +149,10 @@ get_proc_medians <- function(dir_job_results) { as.list() return(list_proc_medians) }, error = function(e) { - abort(paste("Encountered an error: ", e$message), - class = "processing_error") + rlang::abort(paste("Encountered an error: ", e$message), + class = "processing_error") }, warning = function(w) { - warn(paste("Warning: ", w$message), class = "processing_warning") + rlang::warn(paste("Warning: ", w$message), class = "processing_warning") }) } @@ -161,6 +166,7 @@ get_proc_medians <- function(dir_job_results) { #' @importFrom tibble as_tibble #' @importFrom readr write_tsv #' @importFrom tidyr pivot_longer +#' @importFrom rlang warn abort inform #' #' @return [tbl_df] 2 columns: 1) process and 2) median seconds #' @@ -173,18 +179,18 @@ write_proc_medians_table <- function(dir_job_results, filepath) { tryCatch({ # Error handling for input arguments if (!is.character(dir_job_results) || length(dir_job_results) != 1) { - abort("Input 'dir_job_results' must be a single character string.", - class = "validation_error") + rlang::abort("Input 'dir_job_results' must be a single character string.", + class = "validation_error") } if (!dir.exists(dir_job_results)) { - abort(paste("The directory", dir_job_results, "does not exist."), - class = "file_error") + rlang::abort(paste("The directory", dir_job_results, "does not exist."), + class = "file_error") } if (!is.character(filepath) || length(filepath) != 1) { - abort("Input 'filepath' must be a single character string.", - class = "validation_error") + rlang::abort("Input 'filepath' must be a single character string.", + class = "validation_error") } df_proc_medians <- get_proc_medians(dir_job_results) |> tibble::as_tibble() |> @@ -199,18 +205,16 @@ write_proc_medians_table <- function(dir_job_results, filepath) { readr::write_tsv(df_proc_medians, file = filepath) return(df_proc_medians) }, error = function(e) { - abort( + rlang::abort( message = paste("Encountered an error: ", e$message), class = "processing_error", - call = sys.call(), dir_job_results = dir_job_results, filepath = filepath ) }, warning = function(w) { - warn( + rlang::warn( message = paste("Warning: ", w$message), class = "processing_warning", - call = sys.call(), dir_job_results = dir_job_results, filepath = filepath ) @@ -226,10 +230,11 @@ write_proc_medians_table <- function(dir_job_results, filepath) { #' read location. #' #' @param dir_job_results [chr] path to MolEvolvR job_results directory -#' @param filepath [chr] path to save YAML file; if NULL, +#' @param filepath [chr] path to save YAML file; if NULL, #' uses ./molevol_scripts/log_data/job_proc_weights.yml #' #' @importFrom yaml write_yaml +#' @importFrom rlang warn abort inform #' #' @examples #' \dontrun{ @@ -243,7 +248,7 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) { tryCatch({ # Error handling for dir_job_results arguments if (!is.character(dir_job_results) || length(dir_job_results) != 1) { - abort( + rlang::abort( message = "Input 'dir_job_results' must be a single character string.", class = "validation_error", dir_job_results = dir_job_results @@ -251,7 +256,7 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) { } if (!dir.exists(dir_job_results)) { - abort( + rlang::abort( message = paste("The directory", dir_job_results, "does not exist."), class = "file_error", dir_job_results = dir_job_results @@ -265,7 +270,7 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) { "job_proc_weights.yml") } if (!is.character(filepath) || length(filepath) != 1) { - abort( + rlang::abort( message = "Input 'filepath' must be a single character string.", class = "validation_error", filepath = filepath @@ -275,18 +280,16 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) { medians <- get_proc_medians(dir_job_results) yaml::write_yaml(medians, filepath) }, error = function(e) { - abort( + rlang::abort( message = paste("Encountered an error: ", e$message), class = "processing_error", - call = sys.call(), dir_job_results = dir_job_results, filepath = filepath ) }, warning = function(w) { - warn( + rlang::warn( message = paste("Warning: ", w$message), class = "processing_warning", - call = sys.call(), dir_job_results = dir_job_results, filepath = filepath ) @@ -300,6 +303,7 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) { #' #' @importFrom stringr str_glue str_trim #' @importFrom yaml read_yaml +#' @importFrom rlang warn abort inform #' #' @return [list] names: processes; values: median runtime (seconds) #' @@ -317,9 +321,9 @@ get_proc_weights <- function(medians_yml_path = NULL) { # attempt to read the weights from the YAML file produced by # write_proc_medians_yml() if (stringr::str_trim(medians_yml_path) == "") { - abort( - message = stringr::str_glue("medians_yml_path is empty - ({medians_yml_path}), returning default weights"), + rlang::abort( + message = stringr::str_glue("medians_yml_path is empty + ({medians_yml_path}), returning default weights"), class = "input_error", medians_yml_path = medians_yml_path ) @@ -328,7 +332,7 @@ get_proc_weights <- function(medians_yml_path = NULL) { proc_weights <- yaml::read_yaml(medians_yml_path) if (!is.list(proc_weights) || length(proc_weights) == 0) { - abort( + rlang::abort( message = "The loaded YAML file does not contain valid process weights.", class = "file_error", @@ -364,6 +368,7 @@ get_proc_weights <- function(medians_yml_path = NULL) { #' #' @importFrom dplyr if_else #' @importFrom stringr str_glue +#' @importFrom rlang warn abort inform #' #' @return total estimated number of seconds a job will process (walltime) #' @@ -383,7 +388,7 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L, # Validate advanced_opts if (!is.character(advanced_opts)) { - abort( + rlang::abort( message = "Argument 'advanced_opts' must be a character vector.", class = "validation_error", advanced_opts = advanced_opts @@ -392,8 +397,8 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L, # Validate n_inputs if (!is.numeric(n_inputs) || length(n_inputs) != 1 || n_inputs <= 0) { - abort( - message = "Argument 'n_inputs' + rlang::abort( + message = "Argument 'n_inputs' must be a single positive numeric value.", class = "validation_error", n_inputs = n_inputs @@ -404,8 +409,8 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L, if ("homology_search" %in% advanced_opts && (is.null(n_hits) || !is.numeric(n_hits) || length(n_hits) != 1 || n_hits < 0)) { - abort( - message = "Argument 'n_hits' must be a single non-negative numeric + rlang::abort( + message = "Argument 'n_hits' must be a single non-negative numeric value when 'homology_search' is in 'advanced_opts'.", class = "validation_error", n_hits = n_hits @@ -415,7 +420,7 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L, # Get process weights proc_weights <- write_proc_medians_yml() if (!is.list(proc_weights)) { - abort( + rlang::abort( message = "Process weights could not be retrieved correctly.", class = "processing_error" ) @@ -437,9 +442,9 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L, opts2procs <- make_opts2procs() # exclude the homology search processes for the homologous hits procs2exclude_for_homologs <- opts2procs[["homology_search"]] - procs_homologs <- procs_from_opts[!(procs_from_opts + procs_homologs <- procs_from_opts[!(procs_from_opts %in% procs2exclude_for_homologs)] - binary_proc_vec_homolog <- dplyr::if_else(all_procs + binary_proc_vec_homolog <- dplyr::if_else(all_procs %in% procs_homologs, 1L, 0L) # add the estimated walltime for processes run on the homologous hits est_walltime <- est_walltime + @@ -457,19 +462,17 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L, } return(est_walltime) }, error = function(e) { - abort( + rlang::abort( message = paste("Encountered an error: ", e$message), class = "processing_error", - call = sys.call(), advanced_opts = advanced_opts, n_inputs = n_inputs, n_hits = n_hits ) }, warning = function(w) { - warn( + rlang::warn( message = paste("Warning: ", w$message), class = "processing_warning", - call = sys.call(), advanced_opts = advanced_opts, n_inputs = n_inputs, n_hits = n_hits @@ -486,6 +489,8 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L, #' @param t_long threshold value that defines the lower bound for assigning a #' job to the "long queue" #' +#' @importFrom rlang warn abort inform +#' #' @return a string of "short" or "long" #' #' example: @@ -500,7 +505,7 @@ assign_job_queue <- function( tryCatch({ # Validate t_sec_estimate if (!is.numeric(t_sec_estimate) || length(t_sec_estimate) != 1) { - abort( + rlang::abort( message = "Argument 't_sec_estimate' must be a single numeric value.", class = "validation_error", t_sec_estimate = t_sec_estimate @@ -509,8 +514,8 @@ assign_job_queue <- function( # Validate t_cutoff if (!is.numeric(t_cutoff) || length(t_cutoff) != 1 || t_cutoff < 0) { - abort( - message = "Argument 't_cutoff' must be a + rlang::abort( + message = "Argument 't_cutoff' must be a single non-negative numeric value.", class = "validation_error", t_cutoff = t_cutoff @@ -521,18 +526,16 @@ assign_job_queue <- function( queue <- ifelse(t_sec_estimate > t_cutoff, "long", "short") return(queue) }, error = function(e) { - abort( + rlang::abort( message = paste("Encountered an error: ", e$message), class = "processing_error", - call = sys.call(), t_sec_estimate = t_sec_estimate, t_cutoff = t_cutoff ) }, warning = function(w) { - warn( + rlang::warn( message = paste("Warning: ", w$message), class = "processing_warning", - call = sys.call(), t_sec_estimate = t_sec_estimate, t_cutoff = t_cutoff ) @@ -548,6 +551,7 @@ assign_job_queue <- function( #' @importFrom dplyr mutate select #' @importFrom ggplot2 aes geom_line ggplot labs #' @importFrom tibble as_tibble +#' @importFrom rlang warn abort inform #' #' @return line plot object #' @@ -581,8 +585,8 @@ plot_estimated_walltimes <- function() { n_hits <- if ("homology_search" %in% advanced_opts) { 100 } else { - NULL - } + NULL + } est_walltime <- advanced_opts2est_walltime ( advanced_opts, n_inputs = i, @@ -627,8 +631,8 @@ plot_estimated_walltimes <- function() { # sec to hrs df_walltimes <- df_walltimes |> dplyr::mutate(est_walltime = est_walltime / 3600) - p <- ggplot2::ggplot(df_walltimes, ggplot2::aes(x = n_inputs, - y = est_walltime, + p <- ggplot2::ggplot(df_walltimes, ggplot2::aes(x = n_inputs, + y = est_walltime, color = advanced_opts)) + ggplot2::geom_line() + ggplot2::labs( @@ -638,12 +642,12 @@ plot_estimated_walltimes <- function() { ) return(p) }, error = function(e) { - abort( + rlang::abort( message = paste("Encountered an error:", e$message), .internal = TRUE ) }, warning = function(w) { - warn( + rlang::warn( message = paste("Warning:", w$message), .internal = TRUE ) diff --git a/R/blastWrappers.R b/R/blastWrappers.R index 15484a1b..95643e24 100755 --- a/R/blastWrappers.R +++ b/R/blastWrappers.R @@ -13,6 +13,8 @@ #' @param num_alignments #' @param num_threads #' +#' @importFrom rlang warn abort inform +#' #' @return #' @export #' @@ -23,23 +25,25 @@ run_deltablast <- function(deltablast_path, db_search_path, # Argument validation if (!file.exists(deltablast_path)) { - stop("The DELTABLAST executable path is invalid: ", deltablast_path) + rlang::abort(paste("The DELTABLAST executable path is invalid:", + deltablast_path)) } if (!dir.exists(db_search_path)) { - stop("The database search path is invalid: ", db_search_path) + rlang::abort(paste("The database search path is invalid:", db_search_path)) } if (!file.exists(query)) { - stop("The query file path is invalid: ", query) + rlang::abort(paste("The query file path is invalid:", query)) } if (!is.numeric(as.numeric(evalue)) || as.numeric(evalue) <= 0) { - stop("The evalue must be a positive number: ", evalue) + rlang::abort(paste("The evalue must be a positive number:", evalue)) } if (!is.numeric(num_alignments) || num_alignments <= 0) { - stop("The number of alignments must be a - positive integer: ", num_alignments) + rlang::abort(paste("The number of alignments must be a positive integer:", + num_alignments)) } if (!is.numeric(num_threads) || num_threads <= 0) { - stop("The number of threads must be a positive integer: ", num_threads) + rlang::abort(paste("The number of threads must be a positive integer:", + num_threads)) } start <- Sys.time() @@ -61,13 +65,28 @@ run_deltablast <- function(deltablast_path, db_search_path, ) print(Sys.time() - start) }, error = function(e) { - message(paste("Error in run_deltablast: ", e)) + rlang::abort( + message = paste("Error in run_deltablast:", e$message), + class = "processing_error", + deltablast_path = deltablast_path, + db_search_path = db_search_path, + query = query, + out = out, + num_alignments = num_alignments, + num_threads = num_threads + ) }, warning = function(w) { - message(paste("Warning in run_deltablast: ", w)) - }, finally = { - message("run_deltablast completed") + rlang::warn( + message = paste("Warning in run_deltablast:", w$message), + class = "processing_warning", + deltablast_path = deltablast_path, + db_search_path = db_search_path, + query = query, + out = out, + num_alignments = num_alignments, + num_threads = num_threads + ) }) - } @@ -81,6 +100,8 @@ run_deltablast <- function(deltablast_path, db_search_path, #' @param out #' @param num_threads #' +#' @importFrom rlang warn abort inform +#' #' @return #' @export #' @@ -90,19 +111,26 @@ run_rpsblast <- function(rpsblast_path, db_search_path, out, num_threads = 1) { # Argument validation if (!file.exists(rpsblast_path)) { - stop("The RPSBLAST executable path is invalid: ", rpsblast_path) + rlang::abort(paste("The RPSBLAST executable path is invalid:", + rpsblast_path), + class = "file_error") } if (!dir.exists(db_search_path)) { - stop("The database search path is invalid: ", db_search_path) + rlang::abort(paste("The database search path is invalid:", db_search_path), + class = "file_error") } if (!file.exists(query)) { - stop("The query file path is invalid: ", query) + rlang::abort(paste("The query file path is invalid:", query), + class = "file_error") } if (!is.numeric(as.numeric(evalue)) || as.numeric(evalue) <= 0) { - stop("The evalue must be a positive number: ", evalue) + rlang::abort(paste("The evalue must be a positive number:", evalue), + class = "validation_error") } if (!is.numeric(num_threads) || num_threads <= 0) { - stop("The number of threads must be a positive integer: ", num_threads) + rlang::abort(paste("The number of threads must be a positive integer:", + num_threads), + class = "validation_error") } start <- Sys.time() @@ -123,11 +151,25 @@ run_rpsblast <- function(rpsblast_path, db_search_path, ) print(Sys.time() - start) }, error = function(e) { - message(paste("Error in run_rpsblast: ", e)) + rlang::abort( + message = paste("Error in run_rpsblast:", e$message), + class = "processing_error", + rpsblast_path = rpsblast_path, + db_search_path = db_search_path, + query = query, + out = out, + num_threads = num_threads + ) }, warning = function(w) { - message(paste("Warning in run_rpsblast: ", w)) - }, finally = { - message("run_rpsblast completed") + rlang::warn( + message = paste("Warning in run_rpsblast:", w$message), + class = "processing_warning", + rpsblast_path = rpsblast_path, + db_search_path = db_search_path, + query = query, + out = out, + num_threads = num_threads + ) }) } From f4b50f4c387f7a797e630eb2b00c06d473ee75da Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 29 Oct 2024 20:39:13 -0600 Subject: [PATCH 5/6] fix error introduced by merge --- R/assign_job_queue.R | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R index 6f3dde17..e0c22ec6 100644 --- a/R/assign_job_queue.R +++ b/R/assign_job_queue.R @@ -127,33 +127,6 @@ calculateProcessRuntime <- function(dir_job_results) { dir.create(dirname(path_log_data), recursive = TRUE, showWarnings = FALSE) } - - # attempt to load pre-generated logdata - if (!file.exists(path_log_data)) { - logs <- aggregate_logs(dir_job_results, latest_date = Sys.Date() - 60) - save(logs, file = path_log_data) - } else { - load(path_log_data) # loads the logs object - } - df_log <- logs$df_log - procs <- c( - "dblast", "dblast_cleanup", "iprscan", - "ipr2lineage", "ipr2da", "blast_clust", - "clust2table" - ) - list_proc_medians <- df_log |> - dplyr::select(dplyr::all_of(procs)) |> - dplyr::summarise( - dplyr::across( - dplyr::everything(), - \(x) median(x, na.rm = TRUE) - ) - ) |> - as.list() - return(list_proc_medians) -} - - # attempt to load pre-generated logdata if (!file.exists(path_log_data)) { logs <- aggregate_logs(dir_job_results, latest_date = Sys.Date() - 60) @@ -600,6 +573,7 @@ assignJobQueue <- function( #' dev/molevol_scripts/docs/estimate_walltimes.png", plot = p) #' @export plotEstimatedWallTimes <- function() { + tryCatch({ opts <- mapOption2Process() |> names() # get all possible submission permutations (powerset) get_powerset <- function(vec) { From dd86b3ce04e68345297ee2f0f095f2999ff286f1 Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 29 Oct 2024 20:45:29 -0600 Subject: [PATCH 6/6] fix .Rd --- R/assign_job_queue.R | 18 ++++++++++++++++++ man/acc2Lineage.Rd | 3 +-- man/efetchIPG.Rd | 3 +-- man/sinkReset.Rd | 1 - 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R index e0c22ec6..52af46bf 100644 --- a/R/assign_job_queue.R +++ b/R/assign_job_queue.R @@ -36,6 +36,9 @@ mapOption2Process <- function() { } +#' mapAdvOption2Process +#' +#' @description #' Use MolEvolvR advanced options to get associated processes #' #' @param advanced_opts character vector of MolEvolvR advanced options @@ -79,6 +82,9 @@ mapAdvOption2Process <- function(advanced_opts) { } +#' calculateProcessRuntime +#' +#' @description #' Scrape MolEvolvR logs and calculate median processes #' #' @param dir_job_results [chr] path to MolEvolvR job_results @@ -227,6 +233,9 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) { } +#' writeProcessRuntime2YML +#' +#' @description #' Compute median process runtimes, then write a YAML list of the processes and #' their median runtimes in seconds to the path specified by 'filepath'. #' @@ -304,6 +313,9 @@ writeProcessRuntime2YML <- function(dir_job_results, filepath = NULL) { }) } +#' getProcessRuntimeWeights +#' +#' @description #' Quickly get the runtime weights for MolEvolvR backend processes #' #' @param dir_job_results [chr] path to MolEvolvR job_results @@ -494,6 +506,9 @@ calculateEstimatedWallTimeFromOpts <- function(advanced_opts, } +#' assignJobQueue +#' +#' @description #' Decision function to assign job queue #' #' @param t_sec_estimate estimated number of seconds a job will process @@ -555,6 +570,9 @@ assignJobQueue <- function( } +#' plotEstimatedWallTimes +#' +#' @description #' Plot the estimated runtimes for different advanced options and number #' of inputs #' diff --git a/man/acc2Lineage.Rd b/man/acc2Lineage.Rd index fd4eeceb..ce499592 100644 --- a/man/acc2Lineage.Rd +++ b/man/acc2Lineage.Rd @@ -44,8 +44,7 @@ accessions. The dataframe includes relevant columns such as TaxID, GCA_ID, Protein, Protein Name, Species, and Lineage. } \description{ -This function combines 'efetchIPG()' -and 'IPG2Lineage()' to map a set +This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set of protein accessions to their assembly (GCA_ID), tax ID, and lineage. Function to map protein accession numbers to lineage diff --git a/man/efetchIPG.Rd b/man/efetchIPG.Rd index eb5ca678..e55c342a 100644 --- a/man/efetchIPG.Rd +++ b/man/efetchIPG.Rd @@ -27,8 +27,7 @@ The function does not return a value but writes the efetch results directly to the specified \code{out_path}. } \description{ -Perform efetch on the ipg database -and write the results to out_path +Perform efetch on the ipg database and write the results to out_path Perform efetch on the ipg database and write the results to out_path } diff --git a/man/sinkReset.Rd b/man/sinkReset.Rd index e3fc7ce4..0285c0b2 100644 --- a/man/sinkReset.Rd +++ b/man/sinkReset.Rd @@ -8,7 +8,6 @@ sinkReset() } \value{ No return, but run to close all outstanding \code{sink()}s -and handles any errors or warnings that occur during the process. } \description{ Sink Reset