Noble-Lab · Lilferrit · Aug 21, 2024 · Jul 30, 2024 · Jul 30, 2024 · Jul 31, 2024
diff --git a/.github/ISSUE_TEMPLATE/bug_report_template.md b/.github/ISSUE_TEMPLATE/bug_report_template.md
@@ -0,0 +1,55 @@
+---
+name: Bug Report
+about: Submit a Casanovo Bug Report
+labels: bug
+---
+
+## Describe the Issue
+A clear and concise description of what the issue/bug is.
+
+## Steps To Reproduce
+Steps to reproduce the incorrect behavior.
+
+## Expected Behavior
+A clear and concise description of what you expected to happen.
+
+## Terminal Output (If Applicable)
+Provide any applicable console output in between the tick marks below.
+
+```
+
+```
+
+## Environment:
+- OS: [e.g. Windows 11, Windows 10, macOS 14, Ubuntu 24.04]
+- Casanovo Version: [e.g. 4.2.1]
+- Hardware Used (CPU or GPU, if GPU also GPU model and CUDA version): [e.g. GPU: NVIDIA GeForce RTX 2070, CUDA Version: 12.5]
+
+### Checking GPU Version
+
+The GPU model can be checked by typing `nvidia-smi` into a terminal/console window.
+An example of how to use this command is shown below.
+In this case, the CUDA version is 12.5 and the GPU model is GeForce RTX 2070.
+
+
+```
+(casanovo_env) C:\Users\<user>\OneDrive\Documents\casanovo>nvidia-smi
+Fri Aug  2 12:34:57 2024       
++-----------------------------------------------------------------------------------------+
+| NVIDIA-SMI 555.99                 Driver Version: 555.99         CUDA Version: 12.5     |
+|-----------------------------------------+------------------------+----------------------+
+| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
+| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
+|                                         |                        |               MIG M. |
+|=========================================+========================+======================|
+|   0  NVIDIA GeForce RTX 2070 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
+| N/A   60C    P8             16W /   90W |    1059MiB /   8192MiB |      0%      Default |
+|                                         |                        |                  N/A |
++-----------------------------------------+------------------------+----------------------+
+```
+
+## Additional Context
+Add any other context about the problem here.
+
+## Attach Files
+Please attach all input files used and the full Casanovo log file.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,10 +11,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - During training, model checkpoints will be saved at the end of each training epoch in addition to the checkpoints saved at the end of every validation run.
 - Besides as a local file, model weights can be specified from a URL. Upon initial download, the weights file is cached for future re-use.
 
+### Changed
+
+- Removed the `evaluate` sub-command, and all model evaluation functionality has been moved to the `sequence` command using the new `--evaluate` flag.
+
 ### Fixed
 
 - Precursor charges are exported as integers instead of floats in the mzTab output file, in compliance with the mzTab specification.
 
+### Removed
+
+- Removed the `save_top_k` option from the Casanovo config, the model with the lowest validation loss during training will now be saved to a fixed filename `<output_root>.best.ckpt`. 
+
 ## [4.2.1] - 2024-06-25
 
 ### Fixed

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
@@ -128,64 +128,50 @@ def main() -> None:
     nargs=-1,
     type=click.Path(exists=True, dir_okay=False),
 )
+@click.option(
+    "--evaluate",
+    "-e",
+    is_flag=True,
+    default=False,
+    help="""
+    Run in evaluation mode. When this flag is set the peptide and amino
+    acid precision will be calculated and logged at the end of the sequencing
+    run. All input files must be annotated MGF files if running in evaluation
+    mode.
+    """,
+)
 def sequence(
     peak_path: Tuple[str],
     model: Optional[str],
     config: Optional[str],
     output: Optional[str],
     verbosity: str,
+    evaluate: bool,
 ) -> None:
     """De novo sequence peptides from tandem mass spectra.
 
-    PEAK_PATH must be one or more mzMl, mzXML, or MGF files from which
-    to sequence peptides.
+    PEAK_PATH must be one or more mzML, mzXML, or MGF files from which
+    to sequence peptides. If evaluate is set to True PEAK_PATH must be
+    one or more annotated MGF file.
     """
     output = setup_logging(output, verbosity)
     config, model = setup_model(model, config, output, False)
     start_time = time.time()
     with ModelRunner(config, model) as runner:
-        logger.info("Sequencing peptides from:")
+        logger.info(
+            "Sequencing %speptides from:",
+            "and evaluating " if evaluate else "",
+        )
         for peak_file in peak_path:
             logger.info("  %s", peak_file)
 
-        runner.predict(peak_path, output)
+        runner.predict(peak_path, output, evaluate=evaluate)
         psms = runner.writer.psms
         utils.log_sequencing_report(
             psms, start_time=start_time, end_time=time.time()
         )
 
 
-@main.command(cls=_SharedParams)
-@click.argument(
-    "annotated_peak_path",
-    required=True,
-    nargs=-1,
-    type=click.Path(exists=True, dir_okay=False),
-)
-def evaluate(
-    annotated_peak_path: Tuple[str],
-    model: Optional[str],
-    config: Optional[str],
-    output: Optional[str],
-    verbosity: str,
-) -> None:
-    """Evaluate de novo peptide sequencing performance.
-
-    ANNOTATED_PEAK_PATH must be one or more annoated MGF files,
-    such as those provided by MassIVE-KB.
-    """
-    output = setup_logging(output, verbosity)
-    config, model = setup_model(model, config, output, False)
-    start_time = time.time()
-    with ModelRunner(config, model) as runner:
-        logger.info("Sequencing and evaluating peptides from:")
-        for peak_file in annotated_peak_path:
-            logger.info("  %s", peak_file)
-
-        runner.evaluate(annotated_peak_path)
-        utils.log_run_report(start_time=start_time, end_time=time.time())
-
-
 @main.command(cls=_SharedParams)
 @click.argument(
     "train_peak_path",

diff --git a/casanovo/config.py b/casanovo/config.py
@@ -18,6 +18,7 @@
 _config_deprecated = dict(
     every_n_train_steps="val_check_interval",
     max_iters="cosine_schedule_period_iters",
+    save_top_k=None,
 )
 
 
@@ -74,7 +75,6 @@ class Config:
         top_match=int,
         max_epochs=int,
         num_sanity_val_steps=int,
-        save_top_k=int,
         model_save_folder_path=str,
         val_check_interval=int,
         calculate_precision=bool,
@@ -96,12 +96,20 @@ def __init__(self, config_file: Optional[str] = None):
                 # Remap deprecated config entries.
                 for old, new in _config_deprecated.items():
                     if old in self._user_config:
-                        self._user_config[new] = self._user_config.pop(old)
-                        warnings.warn(
-                            f"Deprecated config option '{old}' remapped to "
-                            f"'{new}'",
-                            DeprecationWarning,
-                        )
+                        if new is not None:
+                            self._user_config[new] = self._user_config.pop(old)
+                            warning_msg = (
+                                f"Deprecated config option '{old}' "
+                                f"remapped to '{new}'"
+                            )
+                        else:
+                            del self._user_config[old]
+                            warning_msg = (
+                                f"Deprecated config option '{old}' "
+                                "is no longer in use"
+                            )
+
+                        warnings.warn(warning_msg, DeprecationWarning)
                 # Check for missing entries in config file.
                 config_missing = self._params.keys() - self._user_config.keys()
                 if len(config_missing) > 0:

diff --git a/casanovo/config.yaml b/casanovo/config.yaml
@@ -42,9 +42,6 @@ random_seed: 454
 n_log: 1
 # Tensorboard directory to use for keeping track of training metrics.
 tb_summarywriter:
-# Save the top k model checkpoints during training. -1 saves all, and leaving
-# this field empty saves none.
-save_top_k: 5
 # Path to saved checkpoints.
 model_save_folder_path: ""
 # Model validation and checkpointing frequency in training steps.

diff --git a/casanovo/data/datasets.py b/casanovo/data/datasets.py
@@ -83,7 +83,9 @@ def __getitem__(
             The unique spectrum identifier, formed by its original peak file and
             identifier (index or scan number) therein.
         """
-        mz_array, int_array, precursor_mz, precursor_charge = self.index[idx]
+        mz_array, int_array, precursor_mz, precursor_charge = self.index[idx][
+            :4
+        ]
         spectrum = self._process_peaks(
             mz_array, int_array, precursor_mz, precursor_charge
         )