From 823af96d484a1ec075548ce181f52147cff54af5 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Thu, 10 Oct 2024 09:13:26 -0600
Subject: [PATCH 1/6] - remove old .Rd leftovers and update with new docs - let
 R-CMD sort NAMESPACE

---
 NAMESPACE          | 1 -
 man/IPG2Lineage.Rd | 3 ++-
 man/acc2Lineage.Rd | 3 ++-
 man/acc2lin.Rd     | 0
 man/efetchIPG.Rd   | 3 ++-
 man/efetch_ipg.Rd  | 0
 man/ipg2lin.Rd     | 0
 man/sink.reset.Rd  | 0
 man/sinkReset.Rd   | 1 +
 9 files changed, 7 insertions(+), 4 deletions(-)
 delete mode 100644 man/acc2lin.Rd
 delete mode 100644 man/efetch_ipg.Rd
 delete mode 100644 man/ipg2lin.Rd
 delete mode 100644 man/sink.reset.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 50af36df..078f971b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -77,7 +77,6 @@ export(prepareColumnParams)
 export(prepareSingleColumnParams)
 export(proteinAcc2TaxID)
 export(proteinAcc2TaxID_old)
-export(prot2tax_old)
 export(removeAsterisks)
 export(removeEmptyRows)
 export(removeTails)
diff --git a/man/IPG2Lineage.Rd b/man/IPG2Lineage.Rd
index e24ab617..f8434c7f 100644
--- a/man/IPG2Lineage.Rd
+++ b/man/IPG2Lineage.Rd
@@ -38,7 +38,8 @@ This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} f
 Describe return, in detail
 }
 \description{
-Takes the resulting file of an efetch run on the ipg database and
+Takes the resulting file
+of an efetch run on the ipg database and
 
 Takes the resulting file of an efetch run on the ipg database and
 append lineage, and taxid columns
diff --git a/man/acc2Lineage.Rd b/man/acc2Lineage.Rd
index a24bdc9a..836a677f 100644
--- a/man/acc2Lineage.Rd
+++ b/man/acc2Lineage.Rd
@@ -38,7 +38,8 @@ on the ipg database. If NULL, the file will not be written. Defaults to NULL}
 Describe return, in detail
 }
 \description{
-This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set
+This function combines 'efetchIPG()'
+and 'IPG2Lineage()' to map a set
 of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
 
 Function to map protein accession numbers to lineage
diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd
deleted file mode 100644
index e69de29b..00000000
diff --git a/man/efetchIPG.Rd b/man/efetchIPG.Rd
index 6a5d85a4..5d2e8372 100644
--- a/man/efetchIPG.Rd
+++ b/man/efetchIPG.Rd
@@ -23,7 +23,8 @@ the ipg database}
 Describe return, in detail
 }
 \description{
-Perform efetch on the ipg database and write the results to out_path
+Perform efetch on the ipg database
+and write the results to out_path
 
 Perform efetch on the ipg database and write the results to out_path
 }
diff --git a/man/efetch_ipg.Rd b/man/efetch_ipg.Rd
deleted file mode 100644
index e69de29b..00000000
diff --git a/man/ipg2lin.Rd b/man/ipg2lin.Rd
deleted file mode 100644
index e69de29b..00000000
diff --git a/man/sink.reset.Rd b/man/sink.reset.Rd
deleted file mode 100644
index e69de29b..00000000
diff --git a/man/sinkReset.Rd b/man/sinkReset.Rd
index 0285c0b2..e3fc7ce4 100644
--- a/man/sinkReset.Rd
+++ b/man/sinkReset.Rd
@@ -8,6 +8,7 @@ sinkReset()
 }
 \value{
 No return, but run to close all outstanding \code{sink()}s
+and handles any errors or warnings that occur during the process.
 }
 \description{
 Sink Reset

From 48b7fd697b6c6cac7826ae3f09d315025db1a438 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Sun, 13 Oct 2024 18:02:36 +0100
Subject: [PATCH 2/6] Update error handling to use rlang functions in acc2lin.R
 file

- Replaced base R error handling with rlang functions: `abort()`, `warn()`, and `inform()`.
- Improved clarity and consistency in error and warning messages.
- Enhanced robustness with detailed context for errors and warnings.
---
 R/acc2lin.R | 209 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 141 insertions(+), 68 deletions(-)

diff --git a/R/acc2lin.R b/R/acc2lin.R
index 08cb7d76..bd5cc289 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -5,6 +5,7 @@
 # suppressPackageStartupMessages(library(data.table))
 # suppressPackageStartupMessages(library(tidyverse))
 # suppressPackageStartupMessages(library(biomartr))
+suppressPackageStartupMessages(library(rlang))
 
 # https://stackoverflow.com/questions/18730491/sink-does-not-release-file
 #' Sink Reset
@@ -24,13 +25,18 @@ sinkReset <- function() {
     for (i in seq_len(sink.number())) {
       sink(NULL)
     }
-    print("All sinks closed")
+    inform("All sinks closed", class = "sink_reset_info")
   }, error = function(e) {
-    print(paste("Error: ", e$message))
+    abort(paste("Error: ", e$message), class = "sink_reset_error")
   }, warning = function(w) {
-    print(paste("Warning: ", w$message))
+    warn(paste("Warning: ", w$message), class = "sink_reset_warning")
   }, finally = {
-    print("resetSink function execution completed.")
+    # If any additional cleanup is needed, it can be done here
+    if (sink.number() > 0) {
+      # Additional cleanup if sinks are still open
+      inform("Some sinks remain open, ensure proper cleanup.",
+             class = "sink_cleanup_warning")
+    }
   })
 }
 
@@ -56,60 +62,64 @@ sinkReset <- function() {
 #' addLineage()
 #' }
 addLineage <- function(df, acc_col = "AccNum", assembly_path,
-                     lineagelookup_path, ipgout_path = NULL,
-                     plan = "sequential", ...) {
+                       lineagelookup_path, ipgout_path = NULL,
+                       plan = "sequential", ...) {
   # check for validate inputs
   if (!is.data.frame(df)) {
-    stop("Input 'df' must be a data frame.")
+    abort("Input 'df' must be a data frame.", class = "input_error")
   }
 
   if (!acc_col %in% colnames(df)) {
-    stop(paste("Column", acc_col, "not found in data frame."))
+    abort(paste("Column", acc_col, 
+                "not found in data frame."), class = "column_error")
   }
 
   # Ensure paths are character strings
   if (!is.character(assembly_path) || !is.character(lineagelookup_path)) {
-    stop("Both 'assembly_path' and 
-         'lineagelookup_path' must be character strings.")
+    abort("Both 'assembly_path' and 
+          'lineagelookup_path' must be character strings.",
+          class = "path_type_error")
   }
 
   # Ensure paths exist
   if (!file.exists(assembly_path)) {
-    stop(paste("Assembly file not found at:", assembly_path))
+    abort(paste("Assembly file not found at:",
+                assembly_path), class = "file_not_found_error")
   }
 
   if (!file.exists(lineagelookup_path)) {
-    stop(paste("Lineage lookup file not found at:", lineagelookup_path))
+    abort(paste("Lineage lookup file not found at:",
+                lineagelookup_path), class = "file_not_found_error")
   }
-    tryCatch({
-      # Attempt to add lineages
-      acc_col <- sym(acc_col)
-      accessions <- df %>% pull(acc_col)
-      lins <- acc2Lineage(
-        accessions, assembly_path, lineagelookup_path, ipgout_path, plan
-      )
-
-      # Drop a lot of the unimportant columns for now? 
-      # will make merging much easier
-      lins <- lins[, c(
-        "Strand", "Start", "Stop", "Nucleotide Accession", "Source",
-        "Id", "Strain"
-      ) := NULL]
-      lins <- unique(lins)
-
-      # dup <- lins %>% group_by(Protein) %>% 
-      # summarize(count = n()) %>% filter(count > 1) %>%
-      # pull(Protein)
-
-      merged <- merge(df, lins, by.x = acc_col, by.y = "Protein", all.x = TRUE)
-      return(merged)
-    }, error = function(e) {
-      print(paste("Error: ", e$message))
-    }, warning = function(w) {
-      print(paste("Warning: ", w$message))
-    }, finally = {
-      print("addLineages function execution completed.")
-    })
+  tryCatch({
+    # Attempt to add lineages
+    acc_col <- sym(acc_col)
+    accessions <- df %>% pull(acc_col)
+    lins <- acc2Lineage(
+      accessions, assembly_path, lineagelookup_path, ipgout_path, plan
+    )
+
+    # Drop a lot of the unimportant columns for now? 
+    # will make merging much easier
+    lins <- lins[, c(
+      "Strand", "Start", "Stop", "Nucleotide Accession", "Source",
+      "Id", "Strain"
+    ) := NULL]
+    lins <- unique(lins)
+
+    # dup <- lins %>% group_by(Protein) %>% 
+    # summarize(count = n()) %>% filter(count > 1) %>%
+    # pull(Protein)
+
+    merged <- merge(df, lins, by.x = acc_col, by.y = "Protein", all.x = TRUE)
+    return(merged)
+  }, error = function(e) {
+    abort(paste("Error during lineage addition:", e$message),
+          class = "lineage_addition_error")
+  }, warning = function(w) {
+    warn(paste("Warning during lineage addition:", w$message),
+         class = "lineage_addition_warning")
+  })
 
 }
 
@@ -140,11 +150,11 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
 #' acc2Lineage()
 #' }
 acc2Lineage <- function(accessions, assembly_path, 
-                    lineagelookup_path, ipgout_path = NULL, 
-                    plan = "sequential", ...) {
+                        lineagelookup_path, ipgout_path = NULL, 
+                        plan = "sequential", ...) {
   tmp_ipg <- F
   if (is.null(ipgout_path)) {
-    tmp_ipg <- T
+    tmp_ipg <- TRUE
     ipgout_path <- tempfile("ipg", fileext = ".txt")
   }
 
@@ -154,18 +164,41 @@ acc2Lineage <- function(accessions, assembly_path,
     efetchIPG(accessions, out_path = ipgout_path, plan)
 
     # Attempt to process IPG to lineages
-    lins <- IPG2Lineage(accessions, ipgout_path, assembly_path, lineagelookup_path)
+    lins <- IPG2Lineage(accessions, ipgout_path,
+                        assembly_path, lineagelookup_path)
   }, error = function(e) {
-    print(paste("An error occurred: ", e$message))
+    abort(
+      message = paste("An error occurred during IPG fetching
+                      or lineage processing:", e$message),
+      class = "lineage_processing_error",
+      # capturing the call stack
+      call = sys.call(),
+      # adding additional context
+      accessions = accessions,
+      assembly_path = assembly_path,
+      lineagelookup_path = lineagelookup_path,
+      ipgout_path = ipgout_path,
+      plan = plan
+    )
   }, warning = function(w) {
-    print(paste("Warning: ", w$message))
+    warn(
+      message = paste("Warning during IPG fetching
+                      or lineage processing:", w$message),
+      class = "lineage_processing_warning",
+      call = sys.call(), # capturing the call stack
+      accessions = accessions,
+      assembly_path = assembly_path,
+      lineagelookup_path = lineagelookup_path,
+      ipgout_path = ipgout_path,
+      plan = plan
+    )
   }, finally = {
-    print("acc2lin function execution completed.")
+    # Cleanup: delete temporary IPG file if it was created
+    if (tmp_ipg && file.exists(ipgout_path)) {
+      unlink(ipgout_path)
+    }
   })
 
-  if (tmp_ipg) {
-    unlink(tempdir(), recursive = T)
-  }
   return(lins)
 }
 
@@ -196,15 +229,18 @@ acc2Lineage <- function(accessions, assembly_path,
 efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
   # Argument validation
   if (!is.character(accnums) || length(accnums) == 0) {
-    stop("Error: 'accnums' must be a non-empty character vector.")
+    abort("Error: 'accnums' must be a non-empty character vector.",
+          class = "validation_error")
   }
 
   if (!is.character(out_path) || nchar(out_path) == 0) {
-    stop("Error: 'out_path' must be a non-empty string.")
+    abort("Error: 'out_path' must be a non-empty string.",
+          class = "validation_error")
   }
 
   if (!is.function(plan)) {
-    stop("Error: 'plan' must be a valid plan function.")
+    abort("Error: 'plan' must be a valid plan function.",
+          class = "validation_error")
   }
   if (length(accnums) > 0) {
     partition <- function(in_data, groups) {
@@ -249,11 +285,26 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
       })
       sink(NULL)
     }, error = function(e) {
-      print(paste("An error occurred: ", e$message))
+      abort(
+        message = paste("An error occurred: ", e$message),
+        class = "fetch_error",
+        call = sys.call(),
+        accnums = accnums,
+        out_path = out_path,
+        plan = plan
+      )
     }, warning = function(w) {
-      print(paste("Warning: ", w$message))
+      warn(
+        message = paste("Warning: ", w$message),
+        class = "fetch_warning",
+        call = sys.call(),
+        accnums = accnums,
+        out_path = out_path,
+        plan = plan
+      )
     }, finally = {
-      print("efetch_ipg function execution completed.")
+      # Ensure the sink is closed in case of errors
+      if (sink.number() > 0) sink(NULL)
     })
   }
 }
@@ -289,31 +340,38 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
 #' IPG2Lineage()
 #' }
 #'
-IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path, ...) {
+IPG2Lineage <- function(accessions, ipg_file,
+                        assembly_path, lineagelookup_path, ...) {
   # Argument validation for accessions
   if (!is.character(accessions) || length(accessions) == 0) {
-    stop("Input 'accessions' must be a non-empty character vector.")
+    abort("Input 'accessions' must be a non-empty
+          character vector.", class = "validation_error")
   }
 
   # check for validate inputs
   if (!is.character(ipg_file)) {
-    stop("Input 'ipg_file' must be a character string.")
+    abort("Input 'ipg_file' must be a
+          character string.", class = "validation_error")
   }
+
   # Ensure paths are character strings
   if (!is.character(assembly_path) || !is.character(lineagelookup_path)) {
-    stop("Both 'assembly_path' and 
-         'lineagelookup_path' must be character strings.")
+    abort("Both 'assembly_path' and 'lineagelookup_path'
+          must be character strings.", class = "validation_error")
   }
 
   # Ensure paths exist
   if (!file.exists(assembly_path)) {
-    stop(paste("Assembly file not found at:", assembly_path))
+    abort(paste("Assembly file not found at:", assembly_path),
+          class = "file_error")
   }
 
   if (!file.exists(lineagelookup_path)) {
-    stop(paste("Lineage lookup file not found at:", lineagelookup_path))
+    abort(paste("Lineage lookup file not found at:", lineagelookup_path),
+          class = "file_error")
   }
 
+  # Process the IPG file
   try({
     # Attempt to read the IPG file
     ipg_dt <- fread(ipg_file, sep = "\t", fill = T)
@@ -332,12 +390,27 @@ IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path,
 
     return(lins)
   }, error = function(e) {
-    print(paste("An error occurred: ", e$message))
+    abort(
+      message = paste("An error occurred: ", e$message),
+      class = "processing_error",
+      call = sys.call(),
+      accessions = accessions,
+      ipg_file = ipg_file,
+      assembly_path = assembly_path,
+      lineagelookup_path = lineagelookup_path
+    )
   }, warning = function(w) {
-    print(paste("Warning: ", w$message))
-  }, finally = {
-    print("ipg2lin function execution completed.")
+    warn(
+      message = paste("Warning: ", w$message),
+      class = "processing_warning",
+      call = sys.call(),
+      accessions = accessions,
+      ipg_file = ipg_file,
+      assembly_path = assembly_path,
+      lineagelookup_path = lineagelookup_path
+    )
   })
+
 }
 
 

From 6babffe95d2729857b921c9305f25dcbc0c0ed49 Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Tue, 15 Oct 2024 11:57:15 +0100
Subject: [PATCH 3/6] Update error handling to use rlang functions in
 R/assign_job_queue.R file - Replaced base R error handling with rlang
 functions: `abort()`, `warn()`, and `inform()`. - Improved clarity and
 consistency in error and warning messages. - Enhanced robustness with
 detailed context for errors and warnings.

---
 R/assign_job_queue.R | 227 +++++++++++++++++++++++++++++++------------
 1 file changed, 166 insertions(+), 61 deletions(-)

diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index c531fb09..df4f97e7 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -1,3 +1,4 @@
+suppressPackageStartupMessages(library(rlang))
 # for now, we're using an env var, COMMON_SRC_ROOT, to specify this folder since
 # the working directory is changed in many parts of the current molevolvr
 # pipeline.
@@ -22,11 +23,9 @@ make_opts2procs <- function() {
     )
     return(opts2processes)
   }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
+    abort(paste("Error: ", e$message), class = "Opts_to_process_error")
   }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("make_opts2procs function execution completed.")
+    warn(paste("Warning: ", w$message), class = "Opts_to_process_warning")
   })
 
 }
@@ -44,7 +43,7 @@ make_opts2procs <- function() {
 #' @export
 map_advanced_opts2procs <- function(advanced_opts) {
   if (!is.character(advanced_opts)) {
-    stop("Argument must be a character vector!")
+    abort("Argument must be a character vector!", class = "validation_error")
   }
   tryCatch({
     # append 'always' to add procs that always run
@@ -56,11 +55,19 @@ map_advanced_opts2procs <- function(advanced_opts) {
     procs <- opts2proc[idx] |> unlist()
     return(procs)
   }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
+    abort(
+      message = paste("Encountered an error: ", e$message),
+      class = "map_advanced_opts2procs_error",
+      call = sys.call(),
+      advanced_opts = advanced_opts
+    )
   }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("make_opts2procs function execution completed.")
+    warn(
+      message = paste("Warning: ", w$message),
+      class = "map_advanced_opts2procs_warning",
+      call = sys.call(),
+      advanced_opts = advanced_opts
+    )
   })
 
 }
@@ -91,12 +98,14 @@ get_proc_medians <- function(dir_job_results) {
   tryCatch({
     # Check if dir_job_results is a character string
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      stop("Input 'dir_job_results' must be a single character string.")
+      abort("Input 'dir_job_results' must be a single character string.",
+            class = "validation_error")
     }
 
     # Check if dir_job_results exists
     if (!dir.exists(dir_job_results)) {
-      stop(paste("The directory", dir_job_results, "does not exist."))
+      abort(paste("The directory", dir_job_results, "does not exist."),
+            class = "file_error")
     }
 
     source(file.path(common_root, "molevol_scripts", "R", "metrics.R"))
@@ -135,11 +144,10 @@ get_proc_medians <- function(dir_job_results) {
       as.list()
     return(list_proc_medians)
   }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
+    abort(paste("Encountered an error: ", e$message),
+          class = "processing_error")
   }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("get_proc_medians function execution completed.")
+    warn(paste("Warning: ", w$message), class = "processing_warning")
   })
 
 }
@@ -165,15 +173,18 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
   tryCatch({
     # Error handling for input arguments
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      stop("Input 'dir_job_results' must be a single character string.")
+      abort("Input 'dir_job_results' must be a single character string.",
+            class = "validation_error")
     }
 
     if (!dir.exists(dir_job_results)) {
-      stop(paste("The directory", dir_job_results, "does not exist."))
+      abort(paste("The directory", dir_job_results, "does not exist."),
+            class = "file_error")
     }
 
     if (!is.character(filepath) || length(filepath) != 1) {
-      stop("Input 'filepath' must be a single character string.")
+      abort("Input 'filepath' must be a single character string.",
+            class = "validation_error")
     }
     df_proc_medians <- get_proc_medians(dir_job_results) |>
       tibble::as_tibble() |>
@@ -188,11 +199,21 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
     readr::write_tsv(df_proc_medians, file = filepath)
     return(df_proc_medians)
   }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
+    abort(
+      message = paste("Encountered an error: ", e$message),
+      class = "processing_error",
+      call = sys.call(),
+      dir_job_results = dir_job_results,
+      filepath = filepath
+    )
   }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("write_proc_medians_table function execution completed.")
+    warn(
+      message = paste("Warning: ", w$message),
+      class = "processing_warning",
+      call = sys.call(),
+      dir_job_results = dir_job_results,
+      filepath = filepath
+    )
   })
 
 }
@@ -222,12 +243,21 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
   tryCatch({
     # Error handling for dir_job_results arguments
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      stop("Input 'dir_job_results' must be a single character string.")
+      abort(
+        message = "Input 'dir_job_results' must be a single character string.",
+        class = "validation_error",
+        dir_job_results = dir_job_results
+      )
     }
 
     if (!dir.exists(dir_job_results)) {
-      stop(paste("The directory", dir_job_results, "does not exist."))
+      abort(
+        message = paste("The directory", dir_job_results, "does not exist."),
+        class = "file_error",
+        dir_job_results = dir_job_results
+      )
     }
+
     if (is.null(filepath)) {
       filepath <- file.path(common_root,
                             "molevol_scripts",
@@ -235,20 +265,32 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
                             "job_proc_weights.yml")
     }
     if (!is.character(filepath) || length(filepath) != 1) {
-      stop("Input 'filepath' must be a single character string.")
+      abort(
+        message = "Input 'filepath' must be a single character string.",
+        class = "validation_error",
+        filepath = filepath
+      )
     }
 
     medians <- get_proc_medians(dir_job_results)
     yaml::write_yaml(medians, filepath)
   }, error = function(e) {
-    message(paste("Encountered an error: "), e$message)
+    abort(
+      message = paste("Encountered an error: ", e$message),
+      class = "processing_error",
+      call = sys.call(),
+      dir_job_results = dir_job_results,
+      filepath = filepath
+    )
   }, warning = function(w) {
-    message(paste("Warning: "), w$message)
-  }, finally = {
-    message("write_proc_medians_table function execution completed.")
-  }
-  )
-
+    warn(
+      message = paste("Warning: ", w$message),
+      class = "processing_warning",
+      call = sys.call(),
+      dir_job_results = dir_job_results,
+      filepath = filepath
+    )
+  })
 }
 
 #' Quickly get the runtime weights for MolEvolvR backend processes
@@ -275,13 +317,24 @@ get_proc_weights <- function(medians_yml_path = NULL) {
     # attempt to read the weights from the YAML file produced by
     # write_proc_medians_yml()
     if (stringr::str_trim(medians_yml_path) == "") {
-      stop(
-        stringr::str_glue("medians_yml_path is empty 
-                          ({medians_yml_path}), returning default weights")
+      abort(
+        message = stringr::str_glue("medians_yml_path is empty 
+                          ({medians_yml_path}), returning default weights"),
+        class = "input_error",
+        medians_yml_path = medians_yml_path
       )
     }
 
     proc_weights <- yaml::read_yaml(medians_yml_path)
+
+    if (!is.list(proc_weights) || length(proc_weights) == 0) {
+      abort(
+        message = "The loaded YAML file does not
+                  contain valid process weights.",
+        class = "file_error",
+        medians_yml_path = medians_yml_path
+      )
+    }
   },
   # to avoid fatal errors in reading the proc weights yaml,
   # some median process runtimes have been hardcoded based on
@@ -318,10 +371,9 @@ get_proc_weights <- function(medians_yml_path = NULL) {
 #'                                       "domain_architecture"),
 #'                                       n_inputs = 3, n_hits = 50L)
 #' @export
-advanced_opts2est_walltime	 <- function(advanced_opts,
-                                                  n_inputs = 1L,
-                                                  n_hits = NULL,
-                                                  verbose = FALSE) {
+advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L,
+                                       n_hits = NULL,
+                                       verbose = FALSE) {
 
   tryCatch({
     # to calculate est walltime for a homology search job, the number of hits
@@ -331,26 +383,42 @@ advanced_opts2est_walltime	 <- function(advanced_opts,
 
     # Validate advanced_opts
     if (!is.character(advanced_opts)) {
-      stop("Argument 'advanced_opts' must be a character vector.")
+      abort(
+        message = "Argument 'advanced_opts' must be a character vector.",
+        class = "validation_error",
+        advanced_opts = advanced_opts
+      )
     }
 
     # Validate n_inputs
     if (!is.numeric(n_inputs) || length(n_inputs) != 1 || n_inputs <= 0) {
-      stop("Argument 'n_inputs' must be a single positive numeric value.")
+      abort(
+        message = "Argument 'n_inputs' 
+                  must be a single positive numeric value.",
+        class = "validation_error",
+        n_inputs = n_inputs
+      )
     }
 
     # Validate n_hits if homology_search is in advanced_opts
     if ("homology_search" %in% advanced_opts &&
-          (is.null(n_hits)|| !is.numeric(n_hits)
-           || length(n_hits) != 1 || n_hits < 0)) {
-      stop("Argument 'n_hits' must be a single non-negative numeric value when 
-           'homology_search' is in 'advanced_opts'.")
+          (is.null(n_hits) || !is.numeric(n_hits) ||
+             length(n_hits) != 1 || n_hits < 0)) {
+      abort(
+        message = "Argument 'n_hits' must be a single non-negative numeric 
+        value when 'homology_search' is in 'advanced_opts'.",
+        class = "validation_error",
+        n_hits = n_hits
+      )
     }
 
     # Get process weights
     proc_weights <- write_proc_medians_yml()
     if (!is.list(proc_weights)) {
-      stop("Process weights could not be retrieved correctly.")
+      abort(
+        message = "Process weights could not be retrieved correctly.",
+        class = "processing_error"
+      )
     }
 
     # sort process weights by names and convert to vec
@@ -389,12 +457,23 @@ advanced_opts2est_walltime	 <- function(advanced_opts,
     }
     return(est_walltime)
   }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
+    abort(
+      message = paste("Encountered an error: ", e$message),
+      class = "processing_error",
+      call = sys.call(),
+      advanced_opts = advanced_opts,
+      n_inputs = n_inputs,
+      n_hits = n_hits
+    )
   }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("advanced_opts2est_walltime	 
-            function execution completed.")
+    warn(
+      message = paste("Warning: ", w$message),
+      class = "processing_warning",
+      call = sys.call(),
+      advanced_opts = advanced_opts,
+      n_inputs = n_inputs,
+      n_hits = n_hits
+    )
   })
 
 }
@@ -419,22 +498,44 @@ assign_job_queue <- function(
   t_cutoff = 21600 # 6 hours
 ) {
   tryCatch({
+    # Validate t_sec_estimate
     if (!is.numeric(t_sec_estimate) || length(t_sec_estimate) != 1) {
-      stop("Argument 't_sec_estimate' must be a single numeric value.")
+      abort(
+        message = "Argument 't_sec_estimate' must be a single numeric value.",
+        class = "validation_error",
+        t_sec_estimate = t_sec_estimate
+      )
     }
 
+    # Validate t_cutoff
     if (!is.numeric(t_cutoff) || length(t_cutoff) != 1 || t_cutoff < 0) {
-      stop("Argument 't_cutoff' must be a single non-negative numeric value.")
+      abort(
+        message = "Argument 't_cutoff' must be a 
+                  single non-negative numeric value.",
+        class = "validation_error",
+        t_cutoff = t_cutoff
+      )
     }
 
+
     queue <- ifelse(t_sec_estimate > t_cutoff, "long", "short")
     return(queue)
   }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
+    abort(
+      message = paste("Encountered an error: ", e$message),
+      class = "processing_error",
+      call = sys.call(),
+      t_sec_estimate = t_sec_estimate,
+      t_cutoff = t_cutoff
+    )
   }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("assign_job_queue function execution completed.")
+    warn(
+      message = paste("Warning: ", w$message),
+      class = "processing_warning",
+      call = sys.call(),
+      t_sec_estimate = t_sec_estimate,
+      t_cutoff = t_cutoff
+    )
   })
 
 }
@@ -537,11 +638,15 @@ plot_estimated_walltimes <- function() {
       )
     return(p)
   }, error = function(e) {
-    message(paste("Encountered an error: ", e$message))
+    abort(
+      message = paste("Encountered an error:", e$message),
+      .internal = TRUE
+    )
   }, warning = function(w) {
-    message(paste("Warning: ", w$message))
-  }, finally = {
-    message("plot_estimated_walltimes function execution completed.")
+    warn(
+      message = paste("Warning:", w$message),
+      .internal = TRUE
+    )
   })
 
 }

From 57a635671795984f5ace17076ef0029c6ff0336c Mon Sep 17 00:00:00 2001
From: Seyi Kuforiji <kuforiji98@gmail.com>
Date: Sun, 20 Oct 2024 12:01:02 +0100
Subject: [PATCH 4/6] Enhance error handling and validation across functions

- Added robust error handling in run_deltablast and run_rpsblast functions.
- Updated Roxygen documentation to import rlang::abort, rlang::warn and rlang::inform for better error management.
- Refactored code for clarity and consistency based on the suggestion from the last review.
---
 NAMESPACE            |   3 +
 R/acc2lin.R          | 105 ++++++++++++++++++-----------------
 R/assign_job_queue.R | 128 ++++++++++++++++++++++---------------------
 R/blastWrappers.R    |  84 +++++++++++++++++++++-------
 4 files changed, 184 insertions(+), 136 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 078f971b..9449e14b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -240,8 +240,11 @@ importFrom(readr,write_lines)
 importFrom(readr,write_tsv)
 importFrom(rentrez,entrez_fetch)
 importFrom(rlang,.data)
+importFrom(rlang,abort)
 importFrom(rlang,as_string)
+importFrom(rlang,inform)
 importFrom(rlang,sym)
+importFrom(rlang,warn)
 importFrom(sendmailR,mime_part)
 importFrom(sendmailR,sendmail)
 importFrom(seqinr,dist.alignment)
diff --git a/R/acc2lin.R b/R/acc2lin.R
index bd5cc289..c1f3b34e 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -5,11 +5,13 @@
 # suppressPackageStartupMessages(library(data.table))
 # suppressPackageStartupMessages(library(tidyverse))
 # suppressPackageStartupMessages(library(biomartr))
-suppressPackageStartupMessages(library(rlang))
+
 
 # https://stackoverflow.com/questions/18730491/sink-does-not-release-file
 #' Sink Reset
 #'
+#' @importFrom rlang warn abort inform
+#'
 #' @return No return, but run to close all outstanding `sink()`s
 #'         and handles any errors or warnings that occur during the process.
 #'
@@ -25,17 +27,17 @@ sinkReset <- function() {
     for (i in seq_len(sink.number())) {
       sink(NULL)
     }
-    inform("All sinks closed", class = "sink_reset_info")
+    rlang::inform("All sinks closed", class = "sink_reset_info")
   }, error = function(e) {
-    abort(paste("Error: ", e$message), class = "sink_reset_error")
+    rlang::abort(paste("Error: ", e$message), class = "sink_reset_error")
   }, warning = function(w) {
-    warn(paste("Warning: ", w$message), class = "sink_reset_warning")
+    rlang::warn(paste("Warning: ", w$message), class = "sink_reset_warning")
   }, finally = {
     # If any additional cleanup is needed, it can be done here
     if (sink.number() > 0) {
       # Additional cleanup if sinks are still open
-      inform("Some sinks remain open, ensure proper cleanup.",
-             class = "sink_cleanup_warning")
+      rlang::inform("Some sinks remain open, ensure proper cleanup.",
+                    class = "sink_cleanup_warning")
     }
   })
 }
@@ -52,7 +54,7 @@ sinkReset <- function() {
 #'
 #' @importFrom dplyr pull
 #' @importFrom magrittr %>%
-#' @importFrom rlang sym
+#' @importFrom rlang sym warn abort inform
 #'
 #' @return Describe return, in detail
 #' @export
@@ -66,30 +68,30 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
                        plan = "sequential", ...) {
   # check for validate inputs
   if (!is.data.frame(df)) {
-    abort("Input 'df' must be a data frame.", class = "input_error")
+    rlang::abort("Input 'df' must be a data frame.", class = "input_error")
   }
 
   if (!acc_col %in% colnames(df)) {
-    abort(paste("Column", acc_col, 
-                "not found in data frame."), class = "column_error")
+    rlang::abort(paste("Column", acc_col,
+                       "not found in data frame."), class = "column_error")
   }
 
   # Ensure paths are character strings
   if (!is.character(assembly_path) || !is.character(lineagelookup_path)) {
-    abort("Both 'assembly_path' and 
-          'lineagelookup_path' must be character strings.",
-          class = "path_type_error")
+    rlang::abort("Both 'assembly_path' and
+                 'lineagelookup_path' must be character strings.",
+                 class = "path_type_error")
   }
 
   # Ensure paths exist
   if (!file.exists(assembly_path)) {
-    abort(paste("Assembly file not found at:",
-                assembly_path), class = "file_not_found_error")
+    rlang::abort(paste("Assembly file not found at:",
+                       assembly_path), class = "file_not_found_error")
   }
 
   if (!file.exists(lineagelookup_path)) {
-    abort(paste("Lineage lookup file not found at:",
-                lineagelookup_path), class = "file_not_found_error")
+    rlang::abort(paste("Lineage lookup file not found at:",
+                       lineagelookup_path), class = "file_not_found_error")
   }
   tryCatch({
     # Attempt to add lineages
@@ -99,7 +101,7 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
       accessions, assembly_path, lineagelookup_path, ipgout_path, plan
     )
 
-    # Drop a lot of the unimportant columns for now? 
+    # Drop a lot of the unimportant columns for now?
     # will make merging much easier
     lins <- lins[, c(
       "Strand", "Start", "Stop", "Nucleotide Accession", "Source",
@@ -107,18 +109,18 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
     ) := NULL]
     lins <- unique(lins)
 
-    # dup <- lins %>% group_by(Protein) %>% 
+    # dup <- lins %>% group_by(Protein) %>%
     # summarize(count = n()) %>% filter(count > 1) %>%
     # pull(Protein)
 
     merged <- merge(df, lins, by.x = acc_col, by.y = "Protein", all.x = TRUE)
     return(merged)
   }, error = function(e) {
-    abort(paste("Error during lineage addition:", e$message),
-          class = "lineage_addition_error")
+    rlang::abort(paste("Error during lineage addition:", e$message),
+                 class = "lineage_addition_error")
   }, warning = function(w) {
-    warn(paste("Warning during lineage addition:", w$message),
-         class = "lineage_addition_warning")
+    rlang::warn(paste("Warning during lineage addition:", w$message),
+                class = "lineage_addition_warning")
   })
 
 }
@@ -137,11 +139,13 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
 #' This file can be generated using the \link[MolEvolvR]{downloadAssemblySummary} function
 #' @param lineagelookup_path String of the path to the lineage lookup file
 #' (taxid to lineage mapping). This file can be generated using the
-#' @param ipgout_path Path to write the results 
+#' @param ipgout_path Path to write the results
 #'                    of the efetch run of the accessions
 #' on the ipg database. If NULL, the file will not be written. Defaults to NULL
 #' @param plan
 #'
+#' @importFrom rlang warn abort inform
+#'
 #' @return Describe return, in detail
 #' @export
 #'
@@ -149,8 +153,8 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
 #' \dontrun{
 #' acc2Lineage()
 #' }
-acc2Lineage <- function(accessions, assembly_path, 
-                        lineagelookup_path, ipgout_path = NULL, 
+acc2Lineage <- function(accessions, assembly_path,
+                        lineagelookup_path, ipgout_path = NULL,
                         plan = "sequential", ...) {
   tmp_ipg <- F
   if (is.null(ipgout_path)) {
@@ -167,12 +171,10 @@ acc2Lineage <- function(accessions, assembly_path,
     lins <- IPG2Lineage(accessions, ipgout_path,
                         assembly_path, lineagelookup_path)
   }, error = function(e) {
-    abort(
+    rlang::abort(
       message = paste("An error occurred during IPG fetching
                       or lineage processing:", e$message),
       class = "lineage_processing_error",
-      # capturing the call stack
-      call = sys.call(),
       # adding additional context
       accessions = accessions,
       assembly_path = assembly_path,
@@ -181,11 +183,10 @@ acc2Lineage <- function(accessions, assembly_path,
       plan = plan
     )
   }, warning = function(w) {
-    warn(
+    rlang::warn(
       message = paste("Warning during IPG fetching
                       or lineage processing:", w$message),
       class = "lineage_processing_warning",
-      call = sys.call(), # capturing the call stack
       accessions = accessions,
       assembly_path = assembly_path,
       lineagelookup_path = lineagelookup_path,
@@ -218,6 +219,7 @@ acc2Lineage <- function(accessions, assembly_path,
 #' @importFrom furrr future_map
 #' @importFrom future plan
 #' @importFrom rentrez entrez_fetch
+#' @importFrom rlang warn abort inform
 #'
 #' @return Describe return, in detail
 #' @export
@@ -229,18 +231,18 @@ acc2Lineage <- function(accessions, assembly_path,
 efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
   # Argument validation
   if (!is.character(accnums) || length(accnums) == 0) {
-    abort("Error: 'accnums' must be a non-empty character vector.",
-          class = "validation_error")
+    rlang::abort("Error: 'accnums' must be a non-empty character vector.",
+                 class = "validation_error")
   }
 
   if (!is.character(out_path) || nchar(out_path) == 0) {
-    abort("Error: 'out_path' must be a non-empty string.",
-          class = "validation_error")
+    rlang::abort("Error: 'out_path' must be a non-empty string.",
+                 class = "validation_error")
   }
 
   if (!is.function(plan)) {
-    abort("Error: 'plan' must be a valid plan function.",
-          class = "validation_error")
+    rlang::abort("Error: 'plan' must be a valid plan function.",
+                 class = "validation_error")
   }
   if (length(accnums) > 0) {
     partition <- function(in_data, groups) {
@@ -285,19 +287,17 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
       })
       sink(NULL)
     }, error = function(e) {
-      abort(
+      rlang::abort(
         message = paste("An error occurred: ", e$message),
         class = "fetch_error",
-        call = sys.call(),
         accnums = accnums,
         out_path = out_path,
         plan = plan
       )
     }, warning = function(w) {
-      warn(
+      rlang::warn(
         message = paste("Warning: ", w$message),
         class = "fetch_warning",
-        call = sys.call(),
         accnums = accnums,
         out_path = out_path,
         plan = plan
@@ -331,6 +331,7 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
 #' "create_lineage_lookup()" function
 #'
 #' @importFrom data.table fread
+#' @importFrom rlang warn abort inform
 #'
 #' @return Describe return, in detail
 #' @export
@@ -344,31 +345,31 @@ IPG2Lineage <- function(accessions, ipg_file,
                         assembly_path, lineagelookup_path, ...) {
   # Argument validation for accessions
   if (!is.character(accessions) || length(accessions) == 0) {
-    abort("Input 'accessions' must be a non-empty
+    rlang::abort("Input 'accessions' must be a non-empty
           character vector.", class = "validation_error")
   }
 
   # check for validate inputs
   if (!is.character(ipg_file)) {
-    abort("Input 'ipg_file' must be a
+    rlang::abort("Input 'ipg_file' must be a
           character string.", class = "validation_error")
   }
 
   # Ensure paths are character strings
   if (!is.character(assembly_path) || !is.character(lineagelookup_path)) {
-    abort("Both 'assembly_path' and 'lineagelookup_path'
-          must be character strings.", class = "validation_error")
+    rlang::abort("Both 'assembly_path' and 'lineagelookup_path'
+                 must be character strings.", class = "validation_error")
   }
 
   # Ensure paths exist
   if (!file.exists(assembly_path)) {
-    abort(paste("Assembly file not found at:", assembly_path),
-          class = "file_error")
+    rlang::abort(paste("Assembly file not found at:", assembly_path),
+                 class = "file_error")
   }
 
   if (!file.exists(lineagelookup_path)) {
-    abort(paste("Lineage lookup file not found at:", lineagelookup_path),
-          class = "file_error")
+    rlang::abort(paste("Lineage lookup file not found at:", lineagelookup_path),
+                 class = "file_error")
   }
 
   # Process the IPG file
@@ -390,20 +391,18 @@ IPG2Lineage <- function(accessions, ipg_file,
 
     return(lins)
   }, error = function(e) {
-    abort(
+    rlang::abort(
       message = paste("An error occurred: ", e$message),
       class = "processing_error",
-      call = sys.call(),
       accessions = accessions,
       ipg_file = ipg_file,
       assembly_path = assembly_path,
       lineagelookup_path = lineagelookup_path
     )
   }, warning = function(w) {
-    warn(
+    rlang::warn(
       message = paste("Warning: ", w$message),
       class = "processing_warning",
-      call = sys.call(),
       accessions = accessions,
       ipg_file = ipg_file,
       assembly_path = assembly_path,
diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index df4f97e7..8b227979 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -1,4 +1,4 @@
-suppressPackageStartupMessages(library(rlang))
+
 # for now, we're using an env var, COMMON_SRC_ROOT, to specify this folder since
 # the working directory is changed in many parts of the current molevolvr
 # pipeline.
@@ -9,6 +9,8 @@ common_root <- Sys.getenv("COMMON_SRC_ROOT")
 
 #' Construct list where names (MolEvolvR advanced options) point to processes
 #'
+#' @importFrom rlang warn abort inform
+#'
 #' @return list where names (MolEvolvR advanced options) point to processes
 #'
 #' example: list_opts2procs <- make_opts2procs
@@ -23,9 +25,10 @@ make_opts2procs <- function() {
     )
     return(opts2processes)
   }, error = function(e) {
-    abort(paste("Error: ", e$message), class = "Opts_to_process_error")
+    rlang::abort(paste("Error: ", e$message), class = "Opts_to_process_error")
   }, warning = function(w) {
-    warn(paste("Warning: ", w$message), class = "Opts_to_process_warning")
+    rlang::warn(paste("Warning: ", w$message),
+                class = "Opts_to_process_warning")
   })
 
 }
@@ -34,6 +37,8 @@ make_opts2procs <- function() {
 #'
 #' @param advanced_opts character vector of MolEvolvR advanced options
 #'
+#' @importFrom rlang warn abort inform
+#'
 #' @return character vector of process names that will execute given
 #' the advanced options
 #'
@@ -43,7 +48,8 @@ make_opts2procs <- function() {
 #' @export
 map_advanced_opts2procs <- function(advanced_opts) {
   if (!is.character(advanced_opts)) {
-    abort("Argument must be a character vector!", class = "validation_error")
+    rlang::abort("Argument must be a character vector!",
+                 class = "validation_error")
   }
   tryCatch({
     # append 'always' to add procs that always run
@@ -55,17 +61,15 @@ map_advanced_opts2procs <- function(advanced_opts) {
     procs <- opts2proc[idx] |> unlist()
     return(procs)
   }, error = function(e) {
-    abort(
+    rlang::abort(
       message = paste("Encountered an error: ", e$message),
       class = "map_advanced_opts2procs_error",
-      call = sys.call(),
       advanced_opts = advanced_opts
     )
   }, warning = function(w) {
-    warn(
+    rlang::warn(
       message = paste("Warning: ", w$message),
       class = "map_advanced_opts2procs_warning",
-      call = sys.call(),
       advanced_opts = advanced_opts
     )
   })
@@ -78,6 +82,7 @@ map_advanced_opts2procs <- function(advanced_opts) {
 #' directory
 #'
 #' @importFrom dplyr across everything select summarise
+#' @importFrom rlang warn abort inform
 #'
 #' @return [list] names: processes; values: median runtime (seconds)
 #'
@@ -98,14 +103,14 @@ get_proc_medians <- function(dir_job_results) {
   tryCatch({
     # Check if dir_job_results is a character string
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      abort("Input 'dir_job_results' must be a single character string.",
-            class = "validation_error")
+      rlang::abort("Input 'dir_job_results' must be a single character string.",
+                   class = "validation_error")
     }
 
     # Check if dir_job_results exists
     if (!dir.exists(dir_job_results)) {
-      abort(paste("The directory", dir_job_results, "does not exist."),
-            class = "file_error")
+      rlang::abort(paste("The directory", dir_job_results, "does not exist."),
+                         class = "file_error")
     }
 
     source(file.path(common_root, "molevol_scripts", "R", "metrics.R"))
@@ -144,10 +149,10 @@ get_proc_medians <- function(dir_job_results) {
       as.list()
     return(list_proc_medians)
   }, error = function(e) {
-    abort(paste("Encountered an error: ", e$message),
-          class = "processing_error")
+    rlang::abort(paste("Encountered an error: ", e$message),
+                 class = "processing_error")
   }, warning = function(w) {
-    warn(paste("Warning: ", w$message), class = "processing_warning")
+    rlang::warn(paste("Warning: ", w$message), class = "processing_warning")
   })
 
 }
@@ -161,6 +166,7 @@ get_proc_medians <- function(dir_job_results) {
 #' @importFrom tibble as_tibble
 #' @importFrom readr write_tsv
 #' @importFrom tidyr pivot_longer
+#' @importFrom rlang warn abort inform
 #'
 #' @return [tbl_df] 2 columns: 1) process and 2) median seconds
 #'
@@ -173,18 +179,18 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
   tryCatch({
     # Error handling for input arguments
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      abort("Input 'dir_job_results' must be a single character string.",
-            class = "validation_error")
+      rlang::abort("Input 'dir_job_results' must be a single character string.",
+                   class = "validation_error")
     }
 
     if (!dir.exists(dir_job_results)) {
-      abort(paste("The directory", dir_job_results, "does not exist."),
-            class = "file_error")
+      rlang::abort(paste("The directory", dir_job_results, "does not exist."),
+                   class = "file_error")
     }
 
     if (!is.character(filepath) || length(filepath) != 1) {
-      abort("Input 'filepath' must be a single character string.",
-            class = "validation_error")
+      rlang::abort("Input 'filepath' must be a single character string.",
+                   class = "validation_error")
     }
     df_proc_medians <- get_proc_medians(dir_job_results) |>
       tibble::as_tibble() |>
@@ -199,18 +205,16 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
     readr::write_tsv(df_proc_medians, file = filepath)
     return(df_proc_medians)
   }, error = function(e) {
-    abort(
+    rlang::abort(
       message = paste("Encountered an error: ", e$message),
       class = "processing_error",
-      call = sys.call(),
       dir_job_results = dir_job_results,
       filepath = filepath
     )
   }, warning = function(w) {
-    warn(
+    rlang::warn(
       message = paste("Warning: ", w$message),
       class = "processing_warning",
-      call = sys.call(),
       dir_job_results = dir_job_results,
       filepath = filepath
     )
@@ -226,10 +230,11 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
 #' read location.
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results directory
-#' @param filepath [chr] path to save YAML file; if NULL, 
+#' @param filepath [chr] path to save YAML file; if NULL,
 #'                 uses ./molevol_scripts/log_data/job_proc_weights.yml
 #'
 #' @importFrom yaml write_yaml
+#' @importFrom rlang warn abort inform
 #'
 #' @examples
 #' \dontrun{
@@ -243,7 +248,7 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
   tryCatch({
     # Error handling for dir_job_results arguments
     if (!is.character(dir_job_results) || length(dir_job_results) != 1) {
-      abort(
+      rlang::abort(
         message = "Input 'dir_job_results' must be a single character string.",
         class = "validation_error",
         dir_job_results = dir_job_results
@@ -251,7 +256,7 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
     }
 
     if (!dir.exists(dir_job_results)) {
-      abort(
+      rlang::abort(
         message = paste("The directory", dir_job_results, "does not exist."),
         class = "file_error",
         dir_job_results = dir_job_results
@@ -265,7 +270,7 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
                             "job_proc_weights.yml")
     }
     if (!is.character(filepath) || length(filepath) != 1) {
-      abort(
+      rlang::abort(
         message = "Input 'filepath' must be a single character string.",
         class = "validation_error",
         filepath = filepath
@@ -275,18 +280,16 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
     medians <- get_proc_medians(dir_job_results)
     yaml::write_yaml(medians, filepath)
   }, error = function(e) {
-    abort(
+    rlang::abort(
       message = paste("Encountered an error: ", e$message),
       class = "processing_error",
-      call = sys.call(),
       dir_job_results = dir_job_results,
       filepath = filepath
     )
   }, warning = function(w) {
-    warn(
+    rlang::warn(
       message = paste("Warning: ", w$message),
       class = "processing_warning",
-      call = sys.call(),
       dir_job_results = dir_job_results,
       filepath = filepath
     )
@@ -300,6 +303,7 @@ write_proc_medians_yml <- function(dir_job_results, filepath = NULL) {
 #'
 #' @importFrom stringr str_glue str_trim
 #' @importFrom yaml read_yaml
+#' @importFrom rlang warn abort inform
 #'
 #' @return [list] names: processes; values: median runtime (seconds)
 #'
@@ -317,9 +321,9 @@ get_proc_weights <- function(medians_yml_path = NULL) {
     # attempt to read the weights from the YAML file produced by
     # write_proc_medians_yml()
     if (stringr::str_trim(medians_yml_path) == "") {
-      abort(
-        message = stringr::str_glue("medians_yml_path is empty 
-                          ({medians_yml_path}), returning default weights"),
+      rlang::abort(
+        message = stringr::str_glue("medians_yml_path is empty
+                                    ({medians_yml_path}), returning default weights"),
         class = "input_error",
         medians_yml_path = medians_yml_path
       )
@@ -328,7 +332,7 @@ get_proc_weights <- function(medians_yml_path = NULL) {
     proc_weights <- yaml::read_yaml(medians_yml_path)
 
     if (!is.list(proc_weights) || length(proc_weights) == 0) {
-      abort(
+      rlang::abort(
         message = "The loaded YAML file does not
                   contain valid process weights.",
         class = "file_error",
@@ -364,6 +368,7 @@ get_proc_weights <- function(medians_yml_path = NULL) {
 #'
 #' @importFrom dplyr if_else
 #' @importFrom stringr str_glue
+#' @importFrom rlang warn abort inform
 #'
 #' @return total estimated number of seconds a job will process (walltime)
 #'
@@ -383,7 +388,7 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L,
 
     # Validate advanced_opts
     if (!is.character(advanced_opts)) {
-      abort(
+      rlang::abort(
         message = "Argument 'advanced_opts' must be a character vector.",
         class = "validation_error",
         advanced_opts = advanced_opts
@@ -392,8 +397,8 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L,
 
     # Validate n_inputs
     if (!is.numeric(n_inputs) || length(n_inputs) != 1 || n_inputs <= 0) {
-      abort(
-        message = "Argument 'n_inputs' 
+      rlang::abort(
+        message = "Argument 'n_inputs'
                   must be a single positive numeric value.",
         class = "validation_error",
         n_inputs = n_inputs
@@ -404,8 +409,8 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L,
     if ("homology_search" %in% advanced_opts &&
           (is.null(n_hits) || !is.numeric(n_hits) ||
              length(n_hits) != 1 || n_hits < 0)) {
-      abort(
-        message = "Argument 'n_hits' must be a single non-negative numeric 
+      rlang::abort(
+        message = "Argument 'n_hits' must be a single non-negative numeric
         value when 'homology_search' is in 'advanced_opts'.",
         class = "validation_error",
         n_hits = n_hits
@@ -415,7 +420,7 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L,
     # Get process weights
     proc_weights <- write_proc_medians_yml()
     if (!is.list(proc_weights)) {
-      abort(
+      rlang::abort(
         message = "Process weights could not be retrieved correctly.",
         class = "processing_error"
       )
@@ -437,9 +442,9 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L,
       opts2procs <- make_opts2procs()
       # exclude the homology search processes for the homologous hits
       procs2exclude_for_homologs <- opts2procs[["homology_search"]]
-      procs_homologs <- procs_from_opts[!(procs_from_opts 
+      procs_homologs <- procs_from_opts[!(procs_from_opts
                                           %in% procs2exclude_for_homologs)]
-      binary_proc_vec_homolog <- dplyr::if_else(all_procs 
+      binary_proc_vec_homolog <- dplyr::if_else(all_procs
                                                 %in% procs_homologs, 1L, 0L)
       # add the estimated walltime for processes run on the homologous hits
       est_walltime <- est_walltime +
@@ -457,19 +462,17 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L,
     }
     return(est_walltime)
   }, error = function(e) {
-    abort(
+    rlang::abort(
       message = paste("Encountered an error: ", e$message),
       class = "processing_error",
-      call = sys.call(),
       advanced_opts = advanced_opts,
       n_inputs = n_inputs,
       n_hits = n_hits
     )
   }, warning = function(w) {
-    warn(
+    rlang::warn(
       message = paste("Warning: ", w$message),
       class = "processing_warning",
-      call = sys.call(),
       advanced_opts = advanced_opts,
       n_inputs = n_inputs,
       n_hits = n_hits
@@ -486,6 +489,8 @@ advanced_opts2est_walltime <- function(advanced_opts, n_inputs = 1L,
 #' @param t_long threshold value that defines the lower bound for assigning a
 #' job to the "long queue"
 #'
+#' @importFrom rlang warn abort inform
+#'
 #' @return a string of "short" or "long"
 #'
 #' example:
@@ -500,7 +505,7 @@ assign_job_queue <- function(
   tryCatch({
     # Validate t_sec_estimate
     if (!is.numeric(t_sec_estimate) || length(t_sec_estimate) != 1) {
-      abort(
+      rlang::abort(
         message = "Argument 't_sec_estimate' must be a single numeric value.",
         class = "validation_error",
         t_sec_estimate = t_sec_estimate
@@ -509,8 +514,8 @@ assign_job_queue <- function(
 
     # Validate t_cutoff
     if (!is.numeric(t_cutoff) || length(t_cutoff) != 1 || t_cutoff < 0) {
-      abort(
-        message = "Argument 't_cutoff' must be a 
+      rlang::abort(
+        message = "Argument 't_cutoff' must be a
                   single non-negative numeric value.",
         class = "validation_error",
         t_cutoff = t_cutoff
@@ -521,18 +526,16 @@ assign_job_queue <- function(
     queue <- ifelse(t_sec_estimate > t_cutoff, "long", "short")
     return(queue)
   }, error = function(e) {
-    abort(
+    rlang::abort(
       message = paste("Encountered an error: ", e$message),
       class = "processing_error",
-      call = sys.call(),
       t_sec_estimate = t_sec_estimate,
       t_cutoff = t_cutoff
     )
   }, warning = function(w) {
-    warn(
+    rlang::warn(
       message = paste("Warning: ", w$message),
       class = "processing_warning",
-      call = sys.call(),
       t_sec_estimate = t_sec_estimate,
       t_cutoff = t_cutoff
     )
@@ -548,6 +551,7 @@ assign_job_queue <- function(
 #' @importFrom dplyr mutate select
 #' @importFrom ggplot2 aes geom_line ggplot labs
 #' @importFrom tibble as_tibble
+#' @importFrom rlang warn abort inform
 #'
 #' @return line plot object
 #'
@@ -581,8 +585,8 @@ plot_estimated_walltimes <- function() {
             n_hits <- if ("homology_search" %in% advanced_opts) {
               100
             } else {
-                NULL
-              }
+              NULL
+            }
             est_walltime <- advanced_opts2est_walltime	(
               advanced_opts,
               n_inputs = i,
@@ -627,8 +631,8 @@ plot_estimated_walltimes <- function() {
     # sec to hrs
     df_walltimes <- df_walltimes |>
       dplyr::mutate(est_walltime = est_walltime / 3600)
-    p <- ggplot2::ggplot(df_walltimes, ggplot2::aes(x = n_inputs, 
-                                                    y = est_walltime, 
+    p <- ggplot2::ggplot(df_walltimes, ggplot2::aes(x = n_inputs,
+                                                    y = est_walltime,
                                                     color = advanced_opts)) +
       ggplot2::geom_line() +
       ggplot2::labs(
@@ -638,12 +642,12 @@ plot_estimated_walltimes <- function() {
       )
     return(p)
   }, error = function(e) {
-    abort(
+    rlang::abort(
       message = paste("Encountered an error:", e$message),
       .internal = TRUE
     )
   }, warning = function(w) {
-    warn(
+    rlang::warn(
       message = paste("Warning:", w$message),
       .internal = TRUE
     )
diff --git a/R/blastWrappers.R b/R/blastWrappers.R
index 15484a1b..95643e24 100755
--- a/R/blastWrappers.R
+++ b/R/blastWrappers.R
@@ -13,6 +13,8 @@
 #' @param num_alignments
 #' @param num_threads
 #'
+#' @importFrom rlang warn abort inform
+#'
 #' @return
 #' @export
 #'
@@ -23,23 +25,25 @@ run_deltablast <- function(deltablast_path, db_search_path,
 
   # Argument validation
   if (!file.exists(deltablast_path)) {
-    stop("The DELTABLAST executable path is invalid: ", deltablast_path)
+    rlang::abort(paste("The DELTABLAST executable path is invalid:",
+                       deltablast_path))
   }
   if (!dir.exists(db_search_path)) {
-    stop("The database search path is invalid: ", db_search_path)
+    rlang::abort(paste("The database search path is invalid:", db_search_path))
   }
   if (!file.exists(query)) {
-    stop("The query file path is invalid: ", query)
+    rlang::abort(paste("The query file path is invalid:", query))
   }
   if (!is.numeric(as.numeric(evalue)) || as.numeric(evalue) <= 0) {
-    stop("The evalue must be a positive number: ", evalue)
+    rlang::abort(paste("The evalue must be a positive number:", evalue))
   }
   if (!is.numeric(num_alignments) || num_alignments <= 0) {
-    stop("The number of alignments must be a 
-         positive integer: ", num_alignments)
+    rlang::abort(paste("The number of alignments must be a positive integer:",
+                       num_alignments))
   }
   if (!is.numeric(num_threads) || num_threads <= 0) {
-    stop("The number of threads must be a positive integer: ", num_threads)
+    rlang::abort(paste("The number of threads must be a positive integer:",
+                       num_threads))
   }
 
   start <- Sys.time()
@@ -61,13 +65,28 @@ run_deltablast <- function(deltablast_path, db_search_path,
     )
     print(Sys.time() - start)
   }, error = function(e) {
-    message(paste("Error in run_deltablast: ", e))
+    rlang::abort(
+      message = paste("Error in run_deltablast:", e$message),
+      class = "processing_error",
+      deltablast_path = deltablast_path,
+      db_search_path = db_search_path,
+      query = query,
+      out = out,
+      num_alignments = num_alignments,
+      num_threads = num_threads
+    )
   }, warning = function(w) {
-    message(paste("Warning in run_deltablast: ", w))
-  }, finally = {
-    message("run_deltablast completed")
+    rlang::warn(
+      message = paste("Warning in run_deltablast:", w$message),
+      class = "processing_warning",
+      deltablast_path = deltablast_path,
+      db_search_path = db_search_path,
+      query = query,
+      out = out,
+      num_alignments = num_alignments,
+      num_threads = num_threads
+    )
   })
-
 }
 
 
@@ -81,6 +100,8 @@ run_deltablast <- function(deltablast_path, db_search_path,
 #' @param out
 #' @param num_threads
 #'
+#' @importFrom rlang warn abort inform
+#'
 #' @return
 #' @export
 #'
@@ -90,19 +111,26 @@ run_rpsblast <- function(rpsblast_path, db_search_path,
                          out, num_threads = 1) {
   # Argument validation
   if (!file.exists(rpsblast_path)) {
-    stop("The RPSBLAST executable path is invalid: ", rpsblast_path)
+    rlang::abort(paste("The RPSBLAST executable path is invalid:",
+                       rpsblast_path),
+                 class = "file_error")
   }
   if (!dir.exists(db_search_path)) {
-    stop("The database search path is invalid: ", db_search_path)
+    rlang::abort(paste("The database search path is invalid:", db_search_path),
+                 class = "file_error")
   }
   if (!file.exists(query)) {
-    stop("The query file path is invalid: ", query)
+    rlang::abort(paste("The query file path is invalid:", query),
+                 class = "file_error")
   }
   if (!is.numeric(as.numeric(evalue)) || as.numeric(evalue) <= 0) {
-    stop("The evalue must be a positive number: ", evalue)
+    rlang::abort(paste("The evalue must be a positive number:", evalue),
+          class = "validation_error")
   }
   if (!is.numeric(num_threads) || num_threads <= 0) {
-    stop("The number of threads must be a positive integer: ", num_threads)
+    rlang::abort(paste("The number of threads must be a positive integer:",
+                       num_threads),
+                 class = "validation_error")
   }
 
   start <- Sys.time()
@@ -123,11 +151,25 @@ run_rpsblast <- function(rpsblast_path, db_search_path,
     )
     print(Sys.time() - start)
   }, error = function(e) {
-    message(paste("Error in run_rpsblast: ", e))
+    rlang::abort(
+      message = paste("Error in run_rpsblast:", e$message),
+      class = "processing_error",
+      rpsblast_path = rpsblast_path,
+      db_search_path = db_search_path,
+      query = query,
+      out = out,
+      num_threads = num_threads
+    )
   }, warning = function(w) {
-    message(paste("Warning in run_rpsblast: ", w))
-  }, finally = {
-    message("run_rpsblast completed")
+    rlang::warn(
+      message = paste("Warning in run_rpsblast:", w$message),
+      class = "processing_warning",
+      rpsblast_path = rpsblast_path,
+      db_search_path = db_search_path,
+      query = query,
+      out = out,
+      num_threads = num_threads
+    )
   })
 
 }

From f4b50f4c387f7a797e630eb2b00c06d473ee75da Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 29 Oct 2024 20:39:13 -0600
Subject: [PATCH 5/6] fix error introduced by merge

---
 R/assign_job_queue.R | 28 +---------------------------
 1 file changed, 1 insertion(+), 27 deletions(-)

diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index 6f3dde17..e0c22ec6 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -127,33 +127,6 @@ calculateProcessRuntime <- function(dir_job_results) {
     dir.create(dirname(path_log_data),
                 recursive = TRUE, showWarnings = FALSE)
   }
-
-  # attempt to load pre-generated logdata
-  if (!file.exists(path_log_data)) {
-    logs <- aggregate_logs(dir_job_results, latest_date = Sys.Date() - 60)
-    save(logs, file = path_log_data)
-  } else {
-    load(path_log_data) # loads the logs object
-  }
-  df_log <- logs$df_log
-  procs <- c(
-    "dblast", "dblast_cleanup", "iprscan",
-    "ipr2lineage", "ipr2da", "blast_clust",
-    "clust2table"
-  )
-  list_proc_medians <- df_log |>
-    dplyr::select(dplyr::all_of(procs)) |>
-    dplyr::summarise(
-      dplyr::across(
-        dplyr::everything(),
-        \(x) median(x, na.rm = TRUE)
-      )
-    ) |>
-    as.list()
-  return(list_proc_medians)
-}
-
-
     # attempt to load pre-generated logdata
     if (!file.exists(path_log_data)) {
       logs <- aggregate_logs(dir_job_results, latest_date = Sys.Date() - 60)
@@ -600,6 +573,7 @@ assignJobQueue <- function(
 #'                 dev/molevol_scripts/docs/estimate_walltimes.png", plot = p)
 #' @export
 plotEstimatedWallTimes <- function() {
+  tryCatch({
     opts <- mapOption2Process() |> names()
     # get all possible submission permutations (powerset)
     get_powerset <- function(vec) {

From dd86b3ce04e68345297ee2f0f095f2999ff286f1 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Tue, 29 Oct 2024 20:45:29 -0600
Subject: [PATCH 6/6] fix .Rd

---
 R/assign_job_queue.R | 18 ++++++++++++++++++
 man/acc2Lineage.Rd   |  3 +--
 man/efetchIPG.Rd     |  3 +--
 man/sinkReset.Rd     |  1 -
 4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index e0c22ec6..52af46bf 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -36,6 +36,9 @@ mapOption2Process <- function() {
 
 }
 
+#' mapAdvOption2Process
+#' 
+#' @description
 #' Use MolEvolvR advanced options to get associated processes
 #'
 #' @param advanced_opts character vector of MolEvolvR advanced options
@@ -79,6 +82,9 @@ mapAdvOption2Process <- function(advanced_opts) {
 
 }
 
+#' calculateProcessRuntime
+#' 
+#' @description
 #' Scrape MolEvolvR logs and calculate median processes
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results
@@ -227,6 +233,9 @@ writeProcessRuntime2TSV <- function(dir_job_results, filepath) {
 
 }
 
+#' writeProcessRuntime2YML
+#' 
+#' @description
 #' Compute median process runtimes, then write a YAML list of the processes and
 #' their median runtimes in seconds to the path specified by 'filepath'.
 #'
@@ -304,6 +313,9 @@ writeProcessRuntime2YML <- function(dir_job_results, filepath = NULL) {
   })
 }
 
+#' getProcessRuntimeWeights
+#' 
+#' @description
 #' Quickly get the runtime weights for MolEvolvR backend processes
 #'
 #' @param dir_job_results [chr] path to MolEvolvR job_results
@@ -494,6 +506,9 @@ calculateEstimatedWallTimeFromOpts	 <- function(advanced_opts,
 }
 
 
+#' assignJobQueue
+#' 
+#' @description
 #' Decision function to assign job queue
 #'
 #' @param t_sec_estimate estimated number of seconds a job will process
@@ -555,6 +570,9 @@ assignJobQueue <- function(
 
 }
 
+#' plotEstimatedWallTimes
+#' 
+#' @description
 #' Plot the estimated runtimes for different advanced options and number
 #' of inputs
 #'
diff --git a/man/acc2Lineage.Rd b/man/acc2Lineage.Rd
index fd4eeceb..ce499592 100644
--- a/man/acc2Lineage.Rd
+++ b/man/acc2Lineage.Rd
@@ -44,8 +44,7 @@ accessions. The dataframe includes relevant columns such as TaxID, GCA_ID,
 Protein, Protein Name, Species, and Lineage.
 }
 \description{
-This function combines 'efetchIPG()'
-and 'IPG2Lineage()' to map a set
+This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set
 of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
 
 Function to map protein accession numbers to lineage
diff --git a/man/efetchIPG.Rd b/man/efetchIPG.Rd
index eb5ca678..e55c342a 100644
--- a/man/efetchIPG.Rd
+++ b/man/efetchIPG.Rd
@@ -27,8 +27,7 @@ The function does not return a value but writes the efetch results
 directly to the specified \code{out_path}.
 }
 \description{
-Perform efetch on the ipg database
-and write the results to out_path
+Perform efetch on the ipg database and write the results to out_path
 
 Perform efetch on the ipg database and write the results to out_path
 }
diff --git a/man/sinkReset.Rd b/man/sinkReset.Rd
index e3fc7ce4..0285c0b2 100644
--- a/man/sinkReset.Rd
+++ b/man/sinkReset.Rd
@@ -8,7 +8,6 @@ sinkReset()
 }
 \value{
 No return, but run to close all outstanding \code{sink()}s
-and handles any errors or warnings that occur during the process.
 }
 \description{
 Sink Reset