From a6cb0ce6ad6f6df16c42b1327969290e5af1bbd8 Mon Sep 17 00:00:00 2001
From: Wout Bittremieux <bittremieux@users.noreply.github.com>
Date: Thu, 22 Feb 2024 10:13:24 +0100
Subject: [PATCH] Rename max_iters to cosine_schedule_period_iters (#300)

* Rename max_iters to cosine_schedule_period_iters

* Add deprecated config option unit test

* Fix missed rename

* Proper linting

* Remove unnecessary logging

* Test that checkpoints with deprecated config options can be loaded

* Minor change

* Add test for fine-tuning with deprecated config options

* Remove deprecated hyperparameters during model loading

* Include deprecated hyperparameter warning

* Test whether the warning is issued

* Verify that the deprecated option is removed

* Fix comments

* Avoid defining deprecated options twice

* Remap previous renamed config option `every_n_train_steps`

* Update changelog

---------

Co-authored-by: melihyilmaz <yilmazmelih97@gmail.com>
---
 CHANGELOG.md                    |  8 +++
 casanovo/config.py              | 20 +++++++-
 casanovo/config.yaml            | 89 ++++++++++++++++-----------------
 casanovo/denovo/model.py        | 64 +++++++++++++++---------
 casanovo/denovo/model_runner.py | 12 ++---
 tests/conftest.py               |  2 +-
 tests/unit_tests/test_config.py | 12 +++++
 tests/unit_tests/test_runner.py | 32 +++++++++++-
 8 files changed, 160 insertions(+), 79 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a9f5f939..040e87e3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ## [Unreleased]
 
+### Added
+
+- A deprecation warning will be issued when depecrated config options are used in the config file or in the model weights file.
+
+### Changed
+
+- Config option `max_iters` has been renamed to `cosine_schedule_period_iters` to better reflect that it controls the number of iterations for the cosine half period of the learning rate.
+
 ## [4.1.0] - 2024-02-16
 
 ### Changed
diff --git a/casanovo/config.py b/casanovo/config.py
index 817766ac..792da35a 100644
--- a/casanovo/config.py
+++ b/casanovo/config.py
@@ -2,6 +2,7 @@
 
 import logging
 import shutil
+import warnings
 from pathlib import Path
 from typing import Optional, Dict, Callable, Tuple, Union
 
@@ -12,6 +13,14 @@
 logger = logging.getLogger("casanovo")
 
 
+# FIXME: This contains deprecated config options to be removed in the next major
+#  version update.
+_config_deprecated = dict(
+    every_n_train_steps="val_check_interval",
+    max_iters="cosine_schedule_period_iters",
+)
+
+
 class Config:
     """The Casanovo configuration options.
 
@@ -56,7 +65,7 @@ class Config:
         tb_summarywriter=str,
         train_label_smoothing=float,
         warmup_iters=int,
-        max_iters=int,
+        cosine_schedule_period_iters=int,
         learning_rate=float,
         weight_decay=float,
         train_batch_size=int,
@@ -84,6 +93,15 @@ def __init__(self, config_file: Optional[str] = None):
         else:
             with Path(config_file).open() as f_in:
                 self._user_config = yaml.safe_load(f_in)
+                # Remap deprecated config entries.
+                for old, new in _config_deprecated.items():
+                    if old in self._user_config:
+                        self._user_config[new] = self._user_config.pop(old)
+                        warnings.warn(
+                            f"Deprecated config option '{old}' remapped to "
+                            f"'{new}'",
+                            DeprecationWarning,
+                        )
                 # Check for missing entries in config file.
                 config_missing = self._params.keys() - self._user_config.keys()
                 if len(config_missing) > 0:
diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index 24bf4623..c7186ff7 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -4,30 +4,29 @@
 ###
 
 ###
-# The following parameters can be modified when running inference or
-# when fine-tuning an existing Casanovo model.
+# The following parameters can be modified when running inference or when
+# fine-tuning an existing Casanovo model.
 ###
 
-# Max absolute difference allowed with respect to observed precursor m/z
+# Max absolute difference allowed with respect to observed precursor m/z.
 # Predictions outside the tolerance range are assigned a negative peptide score.
 precursor_mass_tol: 50  # ppm
-# Isotopes to consider when comparing predicted and observed precursor m/z's
+# Isotopes to consider when comparing predicted and observed precursor m/z's.
 isotope_error_range: [0, 1]
-# The minimum length of predicted peptides
+# The minimum length of predicted peptides.
 min_peptide_len: 6
-# Number of spectra in one inference batch
+# Number of spectra in one inference batch.
 predict_batch_size: 1024
-# Number of beams used in beam search
+# Number of beams used in beam search.
 n_beams: 1
-# Number of PSMs for each spectrum
+# Number of PSMs for each spectrum.
 top_match: 1
 # The hardware accelerator to use. Must be one of:
-# "cpu", "gpu", "tpu", "ipu", "hpu", "mps", or "auto"
+# "cpu", "gpu", "tpu", "ipu", "hpu", "mps", or "auto".
 accelerator: "auto"
-# The devices to use. Can be set to a positive number int,
-# or the value -1 to indicate all available devices should be used,
-# If left empty, the appropriate number will be automatically
-# selected for automatic selected on the chosen accelerator.
+# The devices to use. Can be set to a positive number int, or the value -1 to
+# indicate all available devices should be used. If left empty, the appropriate
+# number will be automatically selected for based on the chosen accelerator.
 devices:
 
 ###
@@ -35,72 +34,72 @@ devices:
 # Casanovo model from scratch.
 ###
 
-# Random seed to ensure reproducible results
+# Random seed to ensure reproducible results.
 random_seed: 454
 
 # OUTPUT OPTIONS
-# Logging frequency in training steps
+# Logging frequency in training steps.
 n_log: 1
-# Tensorboard directory to use for keeping track of training metrics
+# Tensorboard directory to use for keeping track of training metrics.
 tb_summarywriter:
-# Save the top k model checkpoints during training. -1 saves all, and
-# leaving this field empty saves none.
+# Save the top k model checkpoints during training. -1 saves all, and leaving
+# this field empty saves none.
 save_top_k: 5
-# Path to saved checkpoints
+# Path to saved checkpoints.
 model_save_folder_path: ""
-# Model validation and checkpointing frequency in training steps
+# Model validation and checkpointing frequency in training steps.
 val_check_interval: 50_000
 
 # SPECTRUM PROCESSING OPTIONS
-# Number of the most intense peaks to retain, any remaining peaks are discarded
+# Number of the most intense peaks to retain, any remaining peaks are discarded.
 n_peaks: 150
-# Min peak m/z allowed, peaks with smaller m/z are discarded
+# Min peak m/z allowed, peaks with smaller m/z are discarded.
 min_mz: 50.0
-# Max peak m/z allowed, peaks with larger m/z are discarded
+# Max peak m/z allowed, peaks with larger m/z are discarded.
 max_mz: 2500.0
-# Min peak intensity allowed, less intense peaks are discarded
+# Min peak intensity allowed, less intense peaks are discarded.
 min_intensity: 0.01
-# Max absolute m/z difference allowed when removing the precursor peak
+# Max absolute m/z difference allowed when removing the precursor peak.
 remove_precursor_tol: 2.0  # Da
-# Max precursor charge allowed, spectra with larger charge are skipped
+# Max precursor charge allowed, spectra with larger charge are skipped.
 max_charge: 10
 
 # MODEL ARCHITECTURE OPTIONS
-# Dimensionality of latent representations, i.e. peak embeddings
+# Dimensionality of latent representations, i.e. peak embeddings.
 dim_model: 512
-# Number of attention heads
+# Number of attention heads.
 n_head: 8
-# Dimensionality of fully connected layers
+# Dimensionality of fully connected layers.
 dim_feedforward: 1024
-# Number of transformer layers in spectrum encoder and peptide decoder
+# Number of transformer layers in spectrum encoder and peptide decoder.
 n_layers: 9
-# Dropout rate for model weights
+# Dropout rate for model weights.
 dropout: 0.0
-# Number of dimensions to use for encoding peak intensity
-# Projected up to ``dim_model`` by default and summed with the peak m/z encoding
+# Number of dimensions to use for encoding peak intensity.
+# Projected up to `dim_model` by default and summed with the peak m/z encoding.
 dim_intensity:
-# Max decoded peptide length
+# Max decoded peptide length.
 max_length: 100
-# Number of warmup iterations for learning rate scheduler
+# The number of iterations for the linear warm-up of the learning rate.
 warmup_iters: 100_000
-# Max number of iterations for learning rate scheduler
-max_iters: 600_000
-# Learning rate for weight updates during training
+# The number of iterations for the cosine half period of the learning rate.
+cosine_schedule_period_iters: 600_000
+# Learning rate for weight updates during training.
 learning_rate: 5e-4
-# Regularization term for weight updates
+# Regularization term for weight updates.
 weight_decay: 1e-5
-# Amount of label smoothing when computing the training loss
+# Amount of label smoothing when computing the training loss.
 train_label_smoothing: 0.01
 
 # TRAINING/INFERENCE OPTIONS
-# Number of spectra in one training batch
+# Number of spectra in one training batch.
 train_batch_size: 32
-# Max number of training epochs
+# Max number of training epochs.
 max_epochs: 30
-# Number of validation steps to run before training begins
+# Number of validation steps to run before training begins.
 num_sanity_val_steps: 0
-# Calculate peptide and amino acid precision during training. this
-# is expensive, so we recommend against it.
+# Calculate peptide and amino acid precision during training.
+# This is expensive, so we recommend against it.
 calculate_precision: False
 
 # AMINO ACID AND MODIFICATION VOCABULARY
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 9ea9cb23..50d43047 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -3,6 +3,7 @@
 import collections
 import heapq
 import logging
+import warnings
 from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
 
 import depthcharge.masses
@@ -14,6 +15,7 @@
 from depthcharge.components import ModelMixin, PeptideDecoder, SpectrumEncoder
 
 from . import evaluate
+from .. import config
 from ..data import ms_io
 
 logger = logging.getLogger("casanovo")
@@ -46,7 +48,7 @@ class Spec2Pep(pl.LightningModule, ModelMixin):
         linear layer, then summed with the m/z encoding for each peak.
     max_length : int
         The maximum peptide length to decode.
-    residues: Union[Dict[str, float], str]
+    residues : Union[Dict[str, float], str]
         The amino acid dictionary and their masses. By default ("canonical) this
         is only the 20 canonical amino acids, with cysteine carbamidomethylated.
         If "massivekb", this dictionary will include the modifications found in
@@ -65,24 +67,24 @@ class Spec2Pep(pl.LightningModule, ModelMixin):
         < precursor_mass_tol`
     min_peptide_len : int
         The minimum length of predicted peptides.
-    n_beams: int
+    n_beams : int
         Number of beams used during beam search decoding.
-    top_match: int
+    top_match : int
         Number of PSMs to return for each spectrum.
     n_log : int
         The number of epochs to wait between logging messages.
-    tb_summarywriter: Optional[str]
+    tb_summarywriter : Optional[str]
         Folder path to record performance metrics during training. If ``None``,
         don't use a ``SummaryWriter``.
-    train_label_smoothing: float
+    train_label_smoothing : float
         Smoothing factor when calculating the training loss.
-    warmup_iters: int
-        The number of warm up iterations for the learning rate scheduler.
-    max_iters: int
-        The total number of iterations for the learning rate scheduler.
-    out_writer: Optional[str]
+    warmup_iters : int
+        The number of iterations for the linear warm-up of the learning rate.
+    cosine_schedule_period_iters : int
+        The number of iterations for the cosine half period of the learning rate.
+    out_writer : Optional[str]
         The output writer for the prediction results.
-    calculate_precision: bool
+    calculate_precision : bool
         Calculate the validation set precision during training.
         This is expensive.
     **kwargs : Dict
@@ -111,7 +113,7 @@ def __init__(
         ] = None,
         train_label_smoothing: float = 0.01,
         warmup_iters: int = 100_000,
-        max_iters: int = 600_000,
+        cosine_schedule_period_iters: int = 600_000,
         out_writer: Optional[ms_io.MztabWriter] = None,
         calculate_precision: bool = False,
         **kwargs: Dict,
@@ -144,7 +146,15 @@ def __init__(
         self.val_celoss = torch.nn.CrossEntropyLoss(ignore_index=0)
         # Optimizer settings.
         self.warmup_iters = warmup_iters
-        self.max_iters = max_iters
+        self.cosine_schedule_period_iters = cosine_schedule_period_iters
+        # `kwargs` will contain additional arguments as well as unrecognized
+        # arguments, including deprecated ones. Remove the deprecated ones.
+        for k in config._config_deprecated:
+            kwargs.pop(k, None)
+            warnings.warn(
+                f"Deprecated hyperparameter '{k}' removed from the model.",
+                DeprecationWarning,
+            )
         self.opt_kwargs = kwargs
 
         # Data properties.
@@ -960,29 +970,33 @@ def configure_optimizers(
         optimizer = torch.optim.Adam(self.parameters(), **self.opt_kwargs)
         # Apply learning rate scheduler per step.
         lr_scheduler = CosineWarmupScheduler(
-            optimizer, warmup=self.warmup_iters, max_iters=self.max_iters
+            optimizer, self.warmup_iters, self.cosine_schedule_period_iters
         )
         return [optimizer], {"scheduler": lr_scheduler, "interval": "step"}
 
 
 class CosineWarmupScheduler(torch.optim.lr_scheduler._LRScheduler):
     """
-    Learning rate scheduler with linear warm up followed by cosine shaped decay.
+    Learning rate scheduler with linear warm-up followed by cosine shaped decay.
 
     Parameters
     ----------
     optimizer : torch.optim.Optimizer
         Optimizer object.
-    warmup : int
-        The number of warm up iterations.
-    max_iters : torch.optim
-        The total number of iterations.
+    warmup_iters : int
+        The number of iterations for the linear warm-up of the learning rate.
+    cosine_schedule_period_iters : int
+        The number of iterations for the cosine half period of the learning rate.
     """
 
     def __init__(
-        self, optimizer: torch.optim.Optimizer, warmup: int, max_iters: int
+        self,
+        optimizer: torch.optim.Optimizer,
+        warmup_iters: int,
+        cosine_schedule_period_iters: int,
     ):
-        self.warmup, self.max_iters = warmup, max_iters
+        self.warmup_iters = warmup_iters
+        self.cosine_schedule_period_iters = cosine_schedule_period_iters
         super().__init__(optimizer)
 
     def get_lr(self):
@@ -990,9 +1004,11 @@ def get_lr(self):
         return [base_lr * lr_factor for base_lr in self.base_lrs]
 
     def get_lr_factor(self, epoch):
-        lr_factor = 0.5 * (1 + np.cos(np.pi * epoch / self.max_iters))
-        if epoch <= self.warmup:
-            lr_factor *= epoch / self.warmup
+        lr_factor = 0.5 * (
+            1 + np.cos(np.pi * epoch / self.cosine_schedule_period_iters)
+        )
+        if epoch <= self.warmup_iters:
+            lr_factor *= epoch / self.warmup_iters
         return lr_factor
 
 
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 3253419a..4bd2165e 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -204,8 +204,8 @@ def initialize_model(self, train: bool) -> None:
         Parameters
         ----------
         train : bool
-            Determines whether to set the model up for model training
-            or evaluation / inference.
+            Determines whether to set the model up for model training or
+            evaluation / inference.
         """
         model_params = dict(
             dim_model=self.config.dim_model,
@@ -226,14 +226,14 @@ def initialize_model(self, train: bool) -> None:
             tb_summarywriter=self.config.tb_summarywriter,
             train_label_smoothing=self.config.train_label_smoothing,
             warmup_iters=self.config.warmup_iters,
-            max_iters=self.config.max_iters,
+            cosine_schedule_period_iters=self.config.cosine_schedule_period_iters,
             lr=self.config.learning_rate,
             weight_decay=self.config.weight_decay,
             out_writer=self.writer,
             calculate_precision=self.config.calculate_precision,
         )
 
-        # Reconfigurable non-architecture related parameters for a loaded model
+        # Reconfigurable non-architecture related parameters for a loaded model.
         loaded_model_params = dict(
             max_length=self.config.max_length,
             precursor_mass_tol=self.config.precursor_mass_tol,
@@ -245,7 +245,7 @@ def initialize_model(self, train: bool) -> None:
             tb_summarywriter=self.config.tb_summarywriter,
             train_label_smoothing=self.config.train_label_smoothing,
             warmup_iters=self.config.warmup_iters,
-            max_iters=self.config.max_iters,
+            cosine_schedule_period_iters=self.config.cosine_schedule_period_iters,
             lr=self.config.learning_rate,
             weight_decay=self.config.weight_decay,
             out_writer=self.writer,
@@ -300,7 +300,7 @@ def initialize_model(self, train: bool) -> None:
             except RuntimeError:
                 raise RuntimeError(
                     "Weights file incompatible with the current version of "
-                    "Casanovo. "
+                    "Casanovo."
                 )
 
     def initialize_data_module(
diff --git a/tests/conftest.py b/tests/conftest.py
index 3345824e..02a6d0f2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -193,7 +193,7 @@ def tiny_config(tmp_path):
         "n_layers": 1,
         "train_label_smoothing": 0.01,
         "warmup_iters": 1,
-        "max_iters": 1,
+        "cosine_schedule_period_iters": 1,
         "max_epochs": 20,
         "val_check_interval": 1,
         "model_save_folder_path": str(tmp_path),
diff --git a/tests/unit_tests/test_config.py b/tests/unit_tests/test_config.py
index 1924d122..fbe10eee 100644
--- a/tests/unit_tests/test_config.py
+++ b/tests/unit_tests/test_config.py
@@ -37,3 +37,15 @@ def test_override(tmp_path, tiny_config):
 
     with pytest.raises(KeyError):
         Config(filename)
+
+
+def test_deprecated(tmp_path, tiny_config):
+    filename = str(tmp_path / "config_deprecated.yml")
+    with open(tiny_config, "r") as f_in, open(filename, "w") as f_out:
+        cfg = yaml.safe_load(f_in)
+        # Insert deprecated config option.
+        cfg["max_iters"] = 1
+        yaml.safe_dump(cfg, f_out)
+
+    with pytest.warns(DeprecationWarning):
+        Config(filename)
diff --git a/tests/unit_tests/test_runner.py b/tests/unit_tests/test_runner.py
index efaceb6b..7febf3f7 100644
--- a/tests/unit_tests/test_runner.py
+++ b/tests/unit_tests/test_runner.py
@@ -62,7 +62,7 @@ def test_save_and_load_weights(tmp_path, mgf_small, tiny_config):
     other_config = Config(tiny_config)
     other_config.n_layers = 50  # lol
     other_config.n_beams = 12
-    other_config.max_iters = 2
+    other_config.cosine_schedule_period_iters = 2
     with torch.device("meta"):
         # Now load the weights into a new model
         # The device should be meta for all the weights.
@@ -72,7 +72,7 @@ def test_save_and_load_weights(tmp_path, mgf_small, tiny_config):
     obs_layers = runner.model.encoder.transformer_encoder.num_layers
     assert obs_layers == 1  # Match the original arch.
     assert runner.model.n_beams == 12  # Match the config
-    assert runner.model.max_iters == 2  # Match the config
+    assert runner.model.cosine_schedule_period_iters == 2  # Match the config
     assert next(runner.model.parameters()).device == torch.device("meta")
 
     # If the Trainer correctly moves the weights to the accelerator,
@@ -99,6 +99,34 @@ def test_save_and_load_weights(tmp_path, mgf_small, tiny_config):
         runner.evaluate([mgf_small])
 
 
+def test_save_and_load_weights_deprecated(tmp_path, mgf_small, tiny_config):
+    """Test saving and loading weights with deprecated config options."""
+    config = Config(tiny_config)
+    config.max_epochs = 1
+    config.cosine_schedule_period_iters = 5
+    ckpt = tmp_path / "test.ckpt"
+
+    with ModelRunner(config=config) as runner:
+        runner.train([mgf_small], [mgf_small])
+        runner.trainer.save_checkpoint(ckpt)
+
+    # Replace the new config option with the deprecated one.
+    ckpt_data = torch.load(ckpt)
+    ckpt_data["hyper_parameters"]["max_iters"] = 5
+    del ckpt_data["hyper_parameters"]["cosine_schedule_period_iters"]
+    torch.save(ckpt_data, str(ckpt))
+
+    # Inference.
+    with ModelRunner(config=config, model_filename=str(ckpt)) as runner:
+        runner.initialize_model(train=False)
+        assert runner.model.cosine_schedule_period_iters == 5
+    # Fine-tuning.
+    with ModelRunner(config=config, model_filename=str(ckpt)) as runner:
+        with pytest.warns(DeprecationWarning):
+            runner.train([mgf_small], [mgf_small])
+            assert "max_iters" not in runner.model.opt_kwargs
+
+
 def test_calculate_precision(tmp_path, mgf_small, tiny_config):
     """Test that this parameter is working correctly."""
     config = Config(tiny_config)