From a2a53eecb5f8c9b09e746cd2d36f2f2e7f36e6c6 Mon Sep 17 00:00:00 2001
From: Misko <misko@meta.com>
Date: Tue, 29 Oct 2024 11:44:03 -0700
Subject: [PATCH] add hydra freeze backbone option (#898)

---
 src/fairchem/core/models/base.py          |   5 +
 tests/core/e2e/test_e2e_finetune_hydra.py | 119 +++++++++++++++++++++-
 2 files changed, 120 insertions(+), 4 deletions(-)

diff --git a/src/fairchem/core/models/base.py b/src/fairchem/core/models/base.py
index ab3c95afa..32865e0ef 100644
--- a/src/fairchem/core/models/base.py
+++ b/src/fairchem/core/models/base.py
@@ -242,6 +242,7 @@ def __init__(
         finetune_config: dict | None = None,
         otf_graph: bool = True,
         pass_through_head_outputs: bool = False,
+        freeze_backbone: bool = False,
     ):
         super().__init__()
         self.device = None
@@ -282,6 +283,10 @@ def __init__(
                 "Backbone not specified and not found in the starting checkpoint"
             )
 
+        if freeze_backbone:
+            for param in self.backbone.parameters():
+                param.requires_grad = False
+
         if heads is not None:
             heads = copy.deepcopy(heads)
             # Iterate through outputs_cfg and create heads
diff --git a/tests/core/e2e/test_e2e_finetune_hydra.py b/tests/core/e2e/test_e2e_finetune_hydra.py
index 9a36e09ef..4dc2e2efc 100644
--- a/tests/core/e2e/test_e2e_finetune_hydra.py
+++ b/tests/core/e2e/test_e2e_finetune_hydra.py
@@ -5,10 +5,11 @@
 from pathlib import Path
 
 import pytest
-from fairchem.core.scripts.convert_hydra_to_release import convert_fine_tune_checkpoint
 import torch
 from test_e2e_commons import _run_main, oc20_lmdb_train_and_val_from_paths
 
+from fairchem.core.scripts.convert_hydra_to_release import convert_fine_tune_checkpoint
+
 
 @pytest.fixture()
 def tutorial_val_src(tutorial_dataset_path):
@@ -104,12 +105,122 @@ def verify_release_checkpoint(release_yaml_fn, release_checkpoint_fn, ft_state_d
         assert os.path.isfile(ck_release_ft_afterload_path)
         ft_after_state_dict = torch.load(ck_release_ft_afterload_path)["state_dict"]
         for key in ft_after_state_dict:
-            if key.startswith("module.backbone"):
-                assert torch.allclose(ft_after_state_dict[key], ft_state_dict[key])
-            elif key.startswith("module.output_heads") and key.endswith("weight"):
+            if (
+                key.startswith("module.backbone")
+                or key.startswith("module.output_heads")
+                and key.endswith("weight")
+            ):
                 assert torch.allclose(ft_after_state_dict[key], ft_state_dict[key])
 
 
+def test_finetune_hydra_freeze_backbone(tutorial_val_src):
+    with tempfile.TemporaryDirectory() as orig_ckpt_dir:
+        starting_ckpt = make_checkpoint(orig_ckpt_dir, tutorial_val_src, 0)
+        old_state_dict = torch.load(starting_ckpt)["state_dict"]
+
+        # Test to make sure without freeze the backbone weights change
+        with tempfile.TemporaryDirectory() as ft_temp_dir:
+            ft_yml = Path("tests/core/models/test_configs/test_finetune_hydra.yml")
+            ck_ft_path = os.path.join(ft_temp_dir, "checkpoint_ft.pt")
+            model_config = {
+                "name": "hydra",
+                "finetune_config": {"starting_checkpoint": starting_ckpt},
+                "heads": {
+                    "energy": {"module": "equiformer_v2_energy_head"},
+                    "forces": {"module": "equiformer_v2_force_head"},
+                },
+            }
+
+            _run_main(
+                ft_temp_dir,
+                ft_yml,
+                update_dict_with={
+                    "optim": {
+                        "max_epochs": 1,
+                        "eval_every": 8,
+                        "batch_size": 1,
+                        "num_workers": 0,
+                        "lr_initial": 10.0,
+                    },
+                    "dataset": oc20_lmdb_train_and_val_from_paths(
+                        train_src=str(tutorial_val_src),
+                        val_src=str(tutorial_val_src),
+                        test_src=str(tutorial_val_src),
+                        otf_norms=False,
+                    ),
+                    "model": model_config,
+                },
+                update_run_args_with={"seed": 1000},
+                save_checkpoint_to=ck_ft_path,
+                world_size=1,
+            )
+
+            assert os.path.isfile(ck_ft_path)
+            ft_ckpt = torch.load(ck_ft_path)
+            assert "config" in ft_ckpt
+            assert ft_ckpt["config"]["model"]["name"] == "hydra"
+            # check that the backbone weights are different, and other weights are not the same
+            ft_state_dict = ft_ckpt["state_dict"]
+            for key in ft_state_dict:
+                if key.startswith("module.backbone") and ".weight" in key:
+                    # backbone should be different
+                    assert not torch.allclose(ft_state_dict[key], old_state_dict[key])
+                elif key.startswith("module.output_heads") and key.endswith("weight"):
+                    # heads weight should be different because the seeds are different
+                    assert not torch.allclose(ft_state_dict[key], old_state_dict[key])
+
+        # Test to make sure with freeze the backbone weights are unchanged
+        with tempfile.TemporaryDirectory() as ft_temp_dir:
+            ft_yml = Path("tests/core/models/test_configs/test_finetune_hydra.yml")
+            ck_ft_path = os.path.join(ft_temp_dir, "checkpoint_ft.pt")
+            model_config = {
+                "name": "hydra",
+                "finetune_config": {"starting_checkpoint": starting_ckpt},
+                "heads": {
+                    "energy": {"module": "equiformer_v2_energy_head"},
+                    "forces": {"module": "equiformer_v2_force_head"},
+                },
+                "freeze_backbone": True,
+            }
+
+            _run_main(
+                ft_temp_dir,
+                ft_yml,
+                update_dict_with={
+                    "optim": {
+                        "max_epochs": 1,
+                        "eval_every": 8,
+                        "batch_size": 1,
+                        "num_workers": 0,
+                    },
+                    "dataset": oc20_lmdb_train_and_val_from_paths(
+                        train_src=str(tutorial_val_src),
+                        val_src=str(tutorial_val_src),
+                        test_src=str(tutorial_val_src),
+                        otf_norms=False,
+                    ),
+                    "model": model_config,
+                },
+                update_run_args_with={"seed": 1000},
+                save_checkpoint_to=ck_ft_path,
+                world_size=1,
+            )
+
+            assert os.path.isfile(ck_ft_path)
+            ft_ckpt = torch.load(ck_ft_path)
+            assert "config" in ft_ckpt
+            assert ft_ckpt["config"]["model"]["name"] == "hydra"
+            # check that the backbone weights are different, and other weights are not the same
+            ft_state_dict = ft_ckpt["state_dict"]
+            for key in ft_state_dict:
+                if key.startswith("module.backbone"):
+                    # backbone should be different
+                    assert torch.allclose(ft_state_dict[key], old_state_dict[key])
+                elif key.startswith("module.output_heads") and key.endswith("weight"):
+                    # heads weight should be different because the seeds are different
+                    assert not torch.allclose(ft_state_dict[key], old_state_dict[key])
+
+
 def test_finetune_hydra_retain_backbone(tutorial_val_src):
     with tempfile.TemporaryDirectory() as orig_ckpt_dir:
         starting_ckpt = make_checkpoint(orig_ckpt_dir, tutorial_val_src, 0)