From 1f0f631f612b3cf273b3b84fe3c8094367348354 Mon Sep 17 00:00:00 2001
From: rayg1234 <7001989+rayg1234@users.noreply.github.com>
Date: Mon, 19 Aug 2024 16:16:05 -0700
Subject: [PATCH] Fuse all hydras (#814)

* make hydra compat with multitask

* remove finetune hydra

* fix tests

* update comment

* update logic slightly

* ruff

* fix test

* add map location

* ruff

* fix tests

* get device from input in hydra

* update ocp_hydra_example.yml

* add logging

* update
---
 configs/ocp_hydra_example.yml                 |   2 +
 src/fairchem/core/common/utils.py             |  19 ++
 src/fairchem/core/models/base.py              | 119 +++++++-----
 src/fairchem/core/models/finetune_hydra.py    | 177 ------------------
 src/fairchem/core/trainers/base_trainer.py    |   8 -
 src/fairchem/core/trainers/ocp_trainer.py     |  15 +-
 tests/core/e2e/test_e2e_finetune_hydra.py     |  49 +++--
 tests/core/e2e/test_s2efs.py                  |   2 +-
 .../models/test_configs/test_dpp_hydra.yml    |   3 +
 .../test_configs/test_equiformerv2_hydra.yml  |   2 +
 .../models/test_configs/test_escn_hydra.yml   |   2 +
 .../test_configs/test_finetune_hydra.yml      |   2 +
 .../test_configs/test_gemnet_dt_hydra.yml     |   3 +
 .../test_gemnet_dt_hydra_grad.yml             |   3 +
 .../test_configs/test_gemnet_oc_hydra.yml     |   2 +
 .../test_gemnet_oc_hydra_grad.yml             |   3 +
 .../models/test_configs/test_painn_hydra.yml  |   2 +
 tests/core/models/test_equiformer_v2.py       |   4 +-
 18 files changed, 146 insertions(+), 271 deletions(-)
 delete mode 100644 src/fairchem/core/models/finetune_hydra.py

diff --git a/configs/ocp_hydra_example.yml b/configs/ocp_hydra_example.yml
index dbcadeff3..10373ad61 100755
--- a/configs/ocp_hydra_example.yml
+++ b/configs/ocp_hydra_example.yml
@@ -22,9 +22,11 @@ logger: wandb
 
 outputs:
   energy:
+    property: energy
     shape: 1
     level: system
   forces:
+    property: forces
     irrep_dim: 1
     level: atom
     train_on_free_atoms: True
diff --git a/src/fairchem/core/common/utils.py b/src/fairchem/core/common/utils.py
index 669449f0b..e762dfeb5 100644
--- a/src/fairchem/core/common/utils.py
+++ b/src/fairchem/core/common/utils.py
@@ -10,6 +10,7 @@
 import ast
 import collections
 import copy
+import errno
 import importlib
 import itertools
 import json
@@ -38,6 +39,7 @@
 from torch_scatter import scatter, segment_coo, segment_csr
 
 import fairchem.core
+from fairchem.core.common.registry import registry
 from fairchem.core.modules.loss import AtomwiseL2Loss, L2MAELoss
 
 if TYPE_CHECKING:
@@ -1370,3 +1372,20 @@ def get_loss_module(loss_name):
         raise NotImplementedError(f"Unknown loss function name: {loss_name}")
 
     return loss_fn
+
+
+def load_model_and_weights_from_checkpoint(checkpoint_path: str) -> nn.Module:
+    if not os.path.isfile(checkpoint_path):
+        raise FileNotFoundError(
+            errno.ENOENT, "Checkpoint file not found", checkpoint_path
+        )
+    logging.info(f"Loading checkpoint from: {checkpoint_path}")
+    checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"))
+    # this assumes the checkpont also contains the config with the full model in it
+    # TODO: need to schematize how we save and load the config from checkpoint
+    config = checkpoint["config"]["model"]
+    name = config.pop("name")
+    model = registry.get_model_class(name)(**config)
+    matched_dict = match_state_dict(model.state_dict(), checkpoint["state_dict"])
+    load_state_dict(model, matched_dict, strict=True)
+    return model
diff --git a/src/fairchem/core/models/base.py b/src/fairchem/core/models/base.py
index c070fea4e..480ee7d02 100644
--- a/src/fairchem/core/models/base.py
+++ b/src/fairchem/core/models/base.py
@@ -9,7 +9,7 @@
 
 import copy
 import logging
-from abc import ABC, ABCMeta, abstractmethod
+from abc import ABCMeta, abstractmethod
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 
@@ -21,6 +21,7 @@
 from fairchem.core.common.utils import (
     compute_neighbors,
     get_pbc_distances,
+    load_model_and_weights_from_checkpoint,
     radius_graph_pbc,
 )
 
@@ -232,64 +233,79 @@ def forward(self, data: Batch) -> dict[str, torch.Tensor]:
         return
 
 
-class HydraInterface(ABC):
-    # a hydra has a backbone and heads
-    @abstractmethod
-    def get_backbone(self) -> BackboneInterface:
-        raise not NotImplementedError
-
-    @abstractmethod
-    def get_heads(self) -> dict[str, HeadInterface]:
-        raise not NotImplementedError
-
-
 @registry.register_model("hydra")
-class HydraModel(nn.Module, GraphModelMixin, HydraInterface):
+class HydraModel(nn.Module, GraphModelMixin):
     def __init__(
         self,
-        backbone: dict,
-        heads: dict,
+        backbone: dict | None = None,
+        heads: dict | None = None,
+        finetune_config: dict | None = None,
         otf_graph: bool = True,
+        pass_through_head_outputs: bool = False,
     ):
         super().__init__()
+        self.device = None
         self.otf_graph = otf_graph
-        self.device = "cpu"
-        # make a copy so we don't modify the original config
-        backbone = copy.deepcopy(backbone)
-        heads = copy.deepcopy(heads)
-
-        backbone_model_name = backbone.pop("model")
-        self.backbone: BackboneInterface = registry.get_model_class(
-            backbone_model_name
-        )(
-            **backbone,
-        )
-
-        # Iterate through outputs_cfg and create heads
-        self.output_heads: dict[str, HeadInterface] = {}
-
-        head_names_sorted = sorted(heads.keys())
-        for head_name in head_names_sorted:
-            head_config = heads[head_name]
-            if "module" not in head_config:
-                raise ValueError(
-                    f"{head_name} head does not specify module to use for the head"
-                )
-
-            module_name = head_config.pop("module")
-            self.output_heads[head_name] = registry.get_model_class(module_name)(
-                self.backbone,
-                **head_config,
+        # This is required for hydras with models that have multiple outputs per head, since we will deprecate
+        # the old config system at some point, this will prevent the need to make major modifications to the trainer
+        # because they all expect the name of the outputs directly instead of the head_name.property_name
+        self.pass_through_head_outputs = pass_through_head_outputs
+
+        # if finetune_config is provided, then attempt to load the model from the given finetune checkpoint
+        starting_model = None
+        if finetune_config is not None:
+            starting_model: HydraModel = load_model_and_weights_from_checkpoint(finetune_config["starting_checkpoint"])
+            logging.info(f"Found and loaded fine-tuning checkpoint: {finetune_config['starting_checkpoint']} (Note we are NOT loading the training state from this checkpoint, only parts of the model and weights)")
+            assert isinstance(starting_model, HydraModel), "Can only finetune starting from other hydra models!"
+
+        if backbone is not None:
+            backbone = copy.deepcopy(backbone)
+            backbone_model_name = backbone.pop("model")
+            self.backbone: BackboneInterface = registry.get_model_class(
+                backbone_model_name
+            )(
+                **backbone,
             )
+        elif starting_model is not None:
+            self.backbone = starting_model.backbone
+            logging.info(f"User did not specify a backbone, using the backbone from the starting checkpoint {self.backbone}")
+        else:
+            raise RuntimeError("Backbone not specified and not found in the starting checkpoint")
+
+        if heads is not None:
+            heads = copy.deepcopy(heads)
+            # Iterate through outputs_cfg and create heads
+            self.output_heads: dict[str, HeadInterface] = {}
+
+            head_names_sorted = sorted(heads.keys())
+            assert len(set(head_names_sorted)) == len(head_names_sorted), "Head names must be unique!"
+            for head_name in head_names_sorted:
+                head_config = heads[head_name]
+                if "module" not in head_config:
+                    raise ValueError(
+                        f"{head_name} head does not specify module to use for the head"
+                    )
 
-        self.output_heads = torch.nn.ModuleDict(self.output_heads)
+                module_name = head_config.pop("module")
+                self.output_heads[head_name] = registry.get_model_class(module_name)(
+                    self.backbone,
+                    **head_config,
+                )
 
-    def to(self, *args, **kwargs):
-        if "device" in kwargs:
-            self.device = kwargs["device"]
-        return super().to(*args, **kwargs)
+            self.output_heads = torch.nn.ModuleDict(self.output_heads)
+        elif starting_model is not None:
+            self.output_heads = starting_model.output_heads
+            logging.info(f"User did not specify heads, using the output heads from the starting checkpoint {self.output_heads}")
+        else:
+            raise RuntimeError("Heads not specified and not found in the starting checkpoint")
 
     def forward(self, data: Batch):
+        # lazily get device from input to use with amp, at least one input must be a tensor to figure out it's device
+        if not self.device:
+            device_from_tensors = {x.device.type for x in data.values() if isinstance(x, torch.Tensor)}
+            assert len(device_from_tensors) == 1, f"all inputs must be on the same device, found the following devices {device_from_tensors}"
+            self.device = device_from_tensors.pop()
+
         emb = self.backbone(data)
         # Predict all output properties for all structures in the batch for now.
         out = {}
@@ -297,12 +313,11 @@ def forward(self, data: Batch):
             with torch.autocast(
                 device_type=self.device, enabled=self.output_heads[k].use_amp
             ):
-                out.update(self.output_heads[k](data, emb))
+                if self.pass_through_head_outputs:
+                    out.update(self.output_heads[k](data, emb))
+                else:
+                    out[k] = self.output_heads[k](data, emb)
 
         return out
 
-    def get_backbone(self) -> BackboneInterface:
-        return self.backbone
 
-    def get_heads(self) -> dict[str, HeadInterface]:
-        return self.output_heads
diff --git a/src/fairchem/core/models/finetune_hydra.py b/src/fairchem/core/models/finetune_hydra.py
deleted file mode 100644
index 6c271e24e..000000000
--- a/src/fairchem/core/models/finetune_hydra.py
+++ /dev/null
@@ -1,177 +0,0 @@
-from __future__ import annotations
-
-import copy
-import errno
-import logging
-import os
-from enum import Enum
-from typing import TYPE_CHECKING
-
-import torch
-from torch import nn
-
-from fairchem.core.common.registry import registry
-from fairchem.core.common.utils import load_state_dict, match_state_dict
-from fairchem.core.models.base import BackboneInterface, HeadInterface, HydraInterface
-
-if TYPE_CHECKING:
-    from torch_geometric.data import Batch
-
-FTHYDRA_NAME = "finetune_hydra"
-
-class FineTuneMode(Enum):
-    # in DATA_ONLY, we load the entire model and only finetune on new data
-    DATA_ONLY = 1
-    # in this mode, we only load the Backbone and feed the output of the backbone
-    # to new heads that are specified
-    RETAIN_BACKBONE_ONLY = 2
-
-
-def get_model_config_from_checkpoint(checkpoint_path: str) -> dict:
-    if not os.path.isfile(checkpoint_path):
-        raise FileNotFoundError(
-            errno.ENOENT, "Checkpoint file not found", checkpoint_path
-        )
-    checkpoint = torch.load(checkpoint_path)
-    return checkpoint["config"]["model"]
-
-
-def load_hydra_model(checkpoint_path: str) -> HydraInterface:
-    if not os.path.isfile(checkpoint_path):
-        raise FileNotFoundError(
-            errno.ENOENT, "Checkpoint file not found", checkpoint_path
-        )
-    logging.info(f"Loading checkpoint from: {checkpoint_path}")
-    checkpoint = torch.load(checkpoint_path)
-    config = checkpoint["config"]["model"]
-    name = config.pop("name")
-    hydra_model = registry.get_model_class(name)(**config)
-    assert isinstance(
-        hydra_model, HydraInterface
-    ), "Can only load models with the HydraInterface"
-    matched_dict = match_state_dict(hydra_model.state_dict(), checkpoint["state_dict"])
-    load_state_dict(hydra_model, matched_dict, strict=True)
-    return hydra_model
-
-
-class FTConfig:
-    FT_CONFIG_NAME = "finetune_config"
-    STARTING_CHECKPOINT = "starting_checkpoint"
-    STARTING_MODEL = "starting_model"
-    MODE = "mode"
-    HEADS = "heads"
-
-    def __init__(self, config: dict):
-        self.config = config
-        self._mode = FineTuneMode[self.config[FTConfig.MODE]]
-        assert (
-            (FTConfig.STARTING_CHECKPOINT in self.config)
-            or (FTConfig.STARTING_MODEL in self.config)
-        ), "Either a starting checkpoint or a starting model must be provided!"
-        assert FTConfig.MODE in self.config
-        if self._mode == FineTuneMode.RETAIN_BACKBONE_ONLY:
-            # in this mode, we keep the backbone but attach new output heads specified in head config
-            assert (
-                FTConfig.HEADS in self.config
-            ), "heads cannot be empty when using RETAIN_BACKBONE_ONLY mode!"
-
-    def load_model(self) -> nn.Module:
-        # if provided a hydra config to start, build from the starting hydra model
-        # this assumes the weights are loaded from the state_dict in the checkpoint.pt file instead
-        # so no actual weights are loaded here
-        if FTConfig.STARTING_MODEL in self.config:
-            # register model from hydra_config
-            config_copy = copy.deepcopy(self.config[FTConfig.STARTING_MODEL])
-            name = config_copy.pop("name")
-            hydra_model = registry.get_model_class(name)(**config_copy)
-        # if provided a checkpoint to start then load the model and weights from the given checkpoint
-        # this happens used in the beginning of a finetuning run
-        elif FTConfig.STARTING_CHECKPOINT in self.config:
-            hydra_model: HydraInterface = load_hydra_model(
-                self.config[FTConfig.STARTING_CHECKPOINT]
-            )
-        assert isinstance(hydra_model, HydraInterface)
-
-        num_params = sum(p.numel() for p in hydra_model.parameters())
-        logging.info(f"Loaded Original hydra model with {num_params} params")
-        return hydra_model
-
-    def get_standalone_config(self) -> dict:
-        # replace a config with a checkpoint with one that has the model config only
-        # this is required for standalone prediction (so we don't need to ship the original checkpoint),
-        # multi-round finetuning, and better robustness
-        standalone_config = {
-            "name": FTHYDRA_NAME,
-            FTConfig.FT_CONFIG_NAME: self.config,
-        }
-        if FTConfig.STARTING_CHECKPOINT in self.config:
-            # modify the config to store the original model config inside model attrs
-            # so we dont need the checkpoint again when loading from checkpoint
-            new_config = copy.deepcopy(self.config)
-            new_config[FTConfig.STARTING_MODEL] = (
-                get_model_config_from_checkpoint(
-                    self.config[FTConfig.STARTING_CHECKPOINT]
-                )
-            )
-            standalone_config[FTConfig.FT_CONFIG_NAME] = new_config
-        return standalone_config
-
-    @property
-    def mode(self) -> FineTuneMode:
-        return self._mode
-
-    @property
-    def head_config(self) -> dict:
-        return copy.deepcopy(self.config[FTConfig.HEADS])
-
-
-@registry.register_model(FTHYDRA_NAME)
-class FineTuneHydra(nn.Module, HydraInterface):
-    def __init__(self, finetune_config: dict):
-        super().__init__()
-        ft_config = FTConfig(finetune_config)
-        logging.info(f"Initializing FineTuneHydra model in {ft_config.mode} mode")
-        hydra_model: HydraInterface = ft_config.load_model()
-        self.backbone: BackboneInterface = hydra_model.get_backbone()
-
-        if ft_config.mode == FineTuneMode.DATA_ONLY:
-            # in this mode, we just use the model as is and train on it with new data
-            self.output_heads: dict[str, HeadInterface] = hydra_model.get_heads()
-        elif ft_config.mode == FineTuneMode.RETAIN_BACKBONE_ONLY:
-            # in this mode, we keep the backbone but attach new output heads specified in head config
-            self.output_heads: dict[str, HeadInterface] = {}
-            heads_config = ft_config.head_config
-            head_names_sorted = sorted(heads_config.keys())
-            for head_name in head_names_sorted:
-                head_config = heads_config[head_name]
-                if "module" not in head_config:
-                    raise ValueError(
-                        f"{head_name} head does not specify module to use for the head"
-                    )
-
-                module_name = head_config.pop("module")
-                self.output_heads[head_name] = registry.get_model_class(module_name)(
-                    self.backbone,
-                    **head_config,
-                )
-                num_params = sum(
-                    p.numel() for p in self.output_heads[head_name].parameters()
-                )
-                logging.info(
-                    f"Attaching new output head: {module_name} with {num_params} params"
-                )
-            self.output_heads = torch.nn.ModuleDict(self.output_heads)
-
-
-    def forward(self, data: Batch):
-        emb = self.backbone(data)
-        out = {}
-        for k in self.output_heads:
-            out.update(self.output_heads[k](data, emb))
-        return out
-
-    def get_backbone(self) -> BackboneInterface:
-        return self.backbone
-
-    def get_heads(self) -> dict[str, HeadInterface]:
-        return self.output_heads
diff --git a/src/fairchem/core/trainers/base_trainer.py b/src/fairchem/core/trainers/base_trainer.py
index a31421a75..d84a2c12f 100644
--- a/src/fairchem/core/trainers/base_trainer.py
+++ b/src/fairchem/core/trainers/base_trainer.py
@@ -44,7 +44,6 @@
     update_config,
 )
 from fairchem.core.datasets.base_dataset import create_dataset
-from fairchem.core.models.finetune_hydra import FineTuneHydra, FTConfig
 from fairchem.core.modules.evaluator import Evaluator
 from fairchem.core.modules.exponential_moving_average import ExponentialMovingAverage
 from fairchem.core.modules.loss import DDPLoss
@@ -716,13 +715,6 @@ def save(
         training_state: bool = True,
     ) -> str | None:
         if not self.is_debug and distutils.is_master():
-            # if we are using a FineTune-able model, then we need to modify the config to remove
-            # the original starting checkpoint so it can be loaded standalone, can move this to save function
-            if isinstance(self.model, FineTuneHydra):
-                self.config["model"] = FTConfig(
-                    self.config["model"][FTConfig.FT_CONFIG_NAME]
-                ).get_standalone_config()
-
             state = {
                 "state_dict": self.model.state_dict(),
                 "normalizers": {
diff --git a/src/fairchem/core/trainers/ocp_trainer.py b/src/fairchem/core/trainers/ocp_trainer.py
index 662341bdc..0ced35bef 100644
--- a/src/fairchem/core/trainers/ocp_trainer.py
+++ b/src/fairchem/core/trainers/ocp_trainer.py
@@ -251,9 +251,18 @@ def _forward(self, batch):
         for target_key in self.output_targets:
             ### Target property is a direct output of the model
             if target_key in out:
-                pred = out[target_key]
-            ## Target property is a derived output of the model. Construct the
-            ## parent property
+                if isinstance(out[target_key], torch.Tensor):
+                    pred = out[target_key]
+                elif isinstance(out[target_key], dict):
+                    # if output is a nested dictionary (in the case of hydra models), we attempt to retrieve it using the property name
+                    # ie: "output_head_name.property"
+                    assert "property" in self.output_targets[target_key], \
+                        f"we need to know which property to match the target to, please specify the property field in the task config, current config: {self.output_targets[target_key]}"
+                    property = self.output_targets[target_key]["property"]
+                    pred = out[target_key][property]
+
+            ## TODO: deprecate the following logic?
+            ## Otherwise, assume target property is a derived output of the model. Construct the parent property
             else:
                 _max_rank = 0
                 for subtarget_key in self.output_targets[target_key]["decomposition"]:
diff --git a/tests/core/e2e/test_e2e_finetune_hydra.py b/tests/core/e2e/test_e2e_finetune_hydra.py
index 91f2abd49..df9def3b0 100644
--- a/tests/core/e2e/test_e2e_finetune_hydra.py
+++ b/tests/core/e2e/test_e2e_finetune_hydra.py
@@ -8,8 +8,6 @@
 import torch
 from test_e2e_commons import _run_main, oc20_lmdb_train_and_val_from_paths
 
-from fairchem.core.models.finetune_hydra import FTHYDRA_NAME, FineTuneMode, FTConfig
-
 
 @pytest.fixture()
 def tutorial_val_src(tutorial_dataset_path):
@@ -49,7 +47,7 @@ def run_main_with_ft_hydra(tempdir: str,
                            yaml: str,
                            data_src: str,
                            run_args: dict,
-                           ft_config: str,
+                           model_config: str,
                            output_checkpoint: str):
     _run_main(
         tempdir,
@@ -68,10 +66,7 @@ def run_main_with_ft_hydra(tempdir: str,
                 test_src=str(data_src),
                 otf_norms=False,
             ),
-            "model": {
-                "name": FTHYDRA_NAME,
-                FTConfig.FT_CONFIG_NAME: ft_config,
-            }
+            "model": model_config,
         },
         update_run_args_with=run_args,
         save_checkpoint_to=output_checkpoint,
@@ -87,9 +82,9 @@ def test_finetune_hydra_retain_backbone(tutorial_val_src):
         with tempfile.TemporaryDirectory() as ft_temp_dir:
             ft_yml = Path("tests/core/models/test_configs/test_finetune_hydra.yml")
             ck_ft_path = os.path.join(ft_temp_dir, "checkpoint_ft.pt")
-            ft_config = {
-                "mode": FineTuneMode.RETAIN_BACKBONE_ONLY.name,
-                "starting_checkpoint": starting_ckpt,
+            model_config = {
+                "name" : "hydra",
+                "finetune_config": {'starting_checkpoint': starting_ckpt},
                 "heads": {
                     "energy": {
                         "module": "equiformer_v2_energy_head"
@@ -103,12 +98,12 @@ def test_finetune_hydra_retain_backbone(tutorial_val_src):
                                    yaml = ft_yml,
                                    data_src = tutorial_val_src,
                                    run_args = {"seed": 1000},
-                                   ft_config = ft_config,
+                                   model_config = model_config,
                                    output_checkpoint = ck_ft_path)
             assert os.path.isfile(ck_ft_path)
             ft_ckpt = torch.load(ck_ft_path)
             assert "config" in ft_ckpt
-            assert ft_ckpt["config"]["model"]["name"] == FTHYDRA_NAME
+            assert ft_ckpt["config"]["model"]["name"] == "hydra"
             # check that the backbone weights are the same, and other weights are not the same
             new_state_dict = ft_ckpt["state_dict"]
             for key in new_state_dict:
@@ -128,28 +123,26 @@ def test_finetune_hydra_data_only(tutorial_val_src):
         with tempfile.TemporaryDirectory() as ft_temp_dir:
             ft_yml = Path("tests/core/models/test_configs/test_finetune_hydra.yml")
             ck_ft_path = os.path.join(ft_temp_dir, "checkpoint_ft.pt")
-            ft_config = {
-                "mode": FineTuneMode.DATA_ONLY.name,
-                "starting_checkpoint": starting_ckpt,
+            model_config = {
+                "name" : "hydra",
+                "finetune_config": {'starting_checkpoint': starting_ckpt},
             }
             run_main_with_ft_hydra(tempdir = ft_temp_dir,
                                 yaml = ft_yml,
                                 data_src = tutorial_val_src,
                                 run_args = {"seed": 1000},
-                                ft_config = ft_config,
+                                model_config = model_config,
                                 output_checkpoint = ck_ft_path)
             assert os.path.isfile(ck_ft_path)
             ft_ckpt = torch.load(ck_ft_path)
             assert "config" in ft_ckpt
             config_model = ft_ckpt["config"]["model"]
-            assert config_model["name"] == FTHYDRA_NAME
+            assert config_model["name"] == "hydra"
             # check that the entire model weights are the same
             new_state_dict = ft_ckpt["state_dict"]
             assert len(new_state_dict) == len(old_state_dict)
             for key in new_state_dict:
                 assert torch.allclose(new_state_dict[key], old_state_dict[key])
-            # check the new checkpoint contains a hydra model
-            assert FTConfig.STARTING_MODEL in config_model[FTConfig.FT_CONFIG_NAME]
 
 
 def test_finetune_from_finetunehydra(tutorial_val_src):
@@ -159,15 +152,15 @@ def test_finetune_from_finetunehydra(tutorial_val_src):
         with tempfile.TemporaryDirectory() as finetune_run1_dir:
             ft_yml = Path("tests/core/models/test_configs/test_finetune_hydra.yml")
             ck_ft_path = os.path.join(finetune_run1_dir, "checkpoint_ft.pt")
-            ft_config_1 = {
-                "mode": FineTuneMode.DATA_ONLY.name,
-                "starting_checkpoint": starting_ckpt,
+            model_config_1 = {
+                "name" : "hydra",
+                "finetune_config": {'starting_checkpoint': starting_ckpt},
             }
             run_main_with_ft_hydra(tempdir = finetune_run1_dir,
                                 yaml = ft_yml,
                                 data_src = tutorial_val_src,
                                 run_args = {"seed": 1000},
-                                ft_config = ft_config_1,
+                                model_config = model_config_1,
                                 output_checkpoint = ck_ft_path)
             assert os.path.isfile(ck_ft_path)
 
@@ -175,20 +168,20 @@ def test_finetune_from_finetunehydra(tutorial_val_src):
             ########################################################################################
             with tempfile.TemporaryDirectory() as finetune_run2_dir:
                 ck_ft2_path = os.path.join(finetune_run2_dir, "checkpoint_ft.pt")
-                ft_config_2 = {
-                    "mode": FineTuneMode.DATA_ONLY.name,
-                    "starting_checkpoint": ck_ft_path,
+                model_config_2 = {
+                    "name" : "hydra",
+                    "finetune_config": {'starting_checkpoint': ck_ft_path},
                 }
                 run_main_with_ft_hydra(tempdir = finetune_run2_dir,
                                     yaml = ft_yml,
                                     data_src = tutorial_val_src,
                                     run_args = {"seed": 1000},
-                                    ft_config = ft_config_2,
+                                    model_config = model_config_2,
                                     output_checkpoint = ck_ft2_path)
                 ft_ckpt2 = torch.load(ck_ft2_path)
                 assert "config" in ft_ckpt2
                 config_model = ft_ckpt2["config"]["model"]
-                assert config_model["name"] == FTHYDRA_NAME
+                assert config_model["name"] == "hydra"
                 old_state_dict = torch.load(ck_ft_path)["state_dict"]
                 new_state_dict = ft_ckpt2["state_dict"]
                 # the state dicts should still be identical because we made the LR = 0.0
diff --git a/tests/core/e2e/test_s2efs.py b/tests/core/e2e/test_s2efs.py
index 94b0862ed..037979e60 100644
--- a/tests/core/e2e/test_s2efs.py
+++ b/tests/core/e2e/test_s2efs.py
@@ -44,7 +44,7 @@ def test_smoke_s2efs_predict(
             {"forces": {"fn": "l2mae", "coefficient": 100}},
             {"stress": {"fn": "mae", "coefficient": 100}},
         ],
-        "outputs": {"stress": {"level": "system", "irrep_dim": 2}},
+        "outputs": {"stress": {"level": "system", "irrep_dim": 2, "property": "stress"}},
         "evaluation_metrics": {"metrics": {"stress": ["mae"]}},
         "dataset": {
             "train": {
diff --git a/tests/core/models/test_configs/test_dpp_hydra.yml b/tests/core/models/test_configs/test_dpp_hydra.yml
index e41a39141..24f13e6ad 100755
--- a/tests/core/models/test_configs/test_dpp_hydra.yml
+++ b/tests/core/models/test_configs/test_dpp_hydra.yml
@@ -2,9 +2,11 @@ trainer: forces
 
 outputs:
   energy:
+    property: energy
     shape: 1
     level: system
   forces:
+    property: forces
     irrep_dim: 1
     level: atom
     train_on_free_atoms: True
@@ -52,6 +54,7 @@ model:
   heads:
     energy:
       module: dimenetplusplus_energy_and_force_head
+  pass_through_head_outputs: True
 
 # *** Important note ***
 #   The total number of gpus used for this run was 256.
diff --git a/tests/core/models/test_configs/test_equiformerv2_hydra.yml b/tests/core/models/test_configs/test_equiformerv2_hydra.yml
index 1852799f5..9747eec80 100644
--- a/tests/core/models/test_configs/test_equiformerv2_hydra.yml
+++ b/tests/core/models/test_configs/test_equiformerv2_hydra.yml
@@ -2,9 +2,11 @@ trainer: forces
 
 outputs:
   energy:
+    property: energy
     shape: 1
     level: system
   forces:
+    property: forces
     irrep_dim: 1
     level: atom
     train_on_free_atoms: True
diff --git a/tests/core/models/test_configs/test_escn_hydra.yml b/tests/core/models/test_configs/test_escn_hydra.yml
index c51d46fc3..8d730dad0 100644
--- a/tests/core/models/test_configs/test_escn_hydra.yml
+++ b/tests/core/models/test_configs/test_escn_hydra.yml
@@ -2,9 +2,11 @@ trainer: forces
 
 outputs:
   energy:
+    property: energy
     shape: 1
     level: system
   forces:
+    property: forces
     irrep_dim: 1
     level: atom
     train_on_free_atoms: True
diff --git a/tests/core/models/test_configs/test_finetune_hydra.yml b/tests/core/models/test_configs/test_finetune_hydra.yml
index a5f1dc51b..e1be6d20b 100644
--- a/tests/core/models/test_configs/test_finetune_hydra.yml
+++ b/tests/core/models/test_configs/test_finetune_hydra.yml
@@ -2,9 +2,11 @@ trainer: forces
 
 outputs:
   energy:
+    property: energy
     shape: 1
     level: system
   forces:
+    property: forces
     irrep_dim: 1
     level: atom
     train_on_free_atoms: True
diff --git a/tests/core/models/test_configs/test_gemnet_dt_hydra.yml b/tests/core/models/test_configs/test_gemnet_dt_hydra.yml
index 036ed689f..4b5c239fc 100644
--- a/tests/core/models/test_configs/test_gemnet_dt_hydra.yml
+++ b/tests/core/models/test_configs/test_gemnet_dt_hydra.yml
@@ -2,9 +2,11 @@ trainer: forces
 
 outputs:
   energy:
+    property: energy
     shape: 1
     level: system
   forces:
+    property: forces
     irrep_dim: 1
     level: atom
     train_on_free_atoms: True
@@ -73,6 +75,7 @@ model:
     forces:
       module: gemnet_t_force_head
 
+
 optim:
   batch_size: 8
   eval_batch_size: 8
diff --git a/tests/core/models/test_configs/test_gemnet_dt_hydra_grad.yml b/tests/core/models/test_configs/test_gemnet_dt_hydra_grad.yml
index 358dd1c86..e8e4eca9c 100644
--- a/tests/core/models/test_configs/test_gemnet_dt_hydra_grad.yml
+++ b/tests/core/models/test_configs/test_gemnet_dt_hydra_grad.yml
@@ -2,9 +2,11 @@ trainer: forces
 
 outputs:
   energy:
+    property: energy
     shape: 1
     level: system
   forces:
+    property: forces
     irrep_dim: 1
     level: atom
     train_on_free_atoms: True
@@ -71,6 +73,7 @@ model:
   heads:
     energy_and_forces:
       module: gemnet_t_energy_and_grad_force_head
+  pass_through_head_outputs: True
 
 optim:
   batch_size: 8
diff --git a/tests/core/models/test_configs/test_gemnet_oc_hydra.yml b/tests/core/models/test_configs/test_gemnet_oc_hydra.yml
index 716718e3e..a58d328bd 100644
--- a/tests/core/models/test_configs/test_gemnet_oc_hydra.yml
+++ b/tests/core/models/test_configs/test_gemnet_oc_hydra.yml
@@ -2,9 +2,11 @@ trainer: forces
 
 outputs:
   energy:
+    property: energy
     shape: 1
     level: system
   forces:
+    property: forces
     irrep_dim: 1
     level: atom
     train_on_free_atoms: True
diff --git a/tests/core/models/test_configs/test_gemnet_oc_hydra_grad.yml b/tests/core/models/test_configs/test_gemnet_oc_hydra_grad.yml
index 90001488b..88cb493f4 100644
--- a/tests/core/models/test_configs/test_gemnet_oc_hydra_grad.yml
+++ b/tests/core/models/test_configs/test_gemnet_oc_hydra_grad.yml
@@ -3,9 +3,11 @@ trainer: forces
 
 outputs:
   energy:
+    property: energy
     shape: 1
     level: system
   forces:
+    property: forces
     irrep_dim: 1
     level: atom
     train_on_free_atoms: True
@@ -96,6 +98,7 @@ model:
     energy:
       module: gemnet_oc_energy_and_grad_force_head
       num_global_out_layers: 2
+  pass_through_head_outputs: True
 
 optim:
   batch_size: 5
diff --git a/tests/core/models/test_configs/test_painn_hydra.yml b/tests/core/models/test_configs/test_painn_hydra.yml
index 2c4731742..a6b26c5d4 100644
--- a/tests/core/models/test_configs/test_painn_hydra.yml
+++ b/tests/core/models/test_configs/test_painn_hydra.yml
@@ -2,9 +2,11 @@ trainer: forces
 
 outputs:
   energy:
+    property: energy
     shape: 1
     level: system
   forces:
+    property: forces
     irrep_dim: 1
     level: atom
     train_on_free_atoms: True
diff --git a/tests/core/models/test_equiformer_v2.py b/tests/core/models/test_equiformer_v2.py
index 54d58db1c..2f0903608 100644
--- a/tests/core/models/test_equiformer_v2.py
+++ b/tests/core/models/test_equiformer_v2.py
@@ -264,12 +264,12 @@ def test_eqv2_hydra_activation_checkpoint():
     # way to do this is save the rng state and reset it after stepping the first model
     start_rng_state = torch.random.get_rng_state()
     outputs_no_ac = no_ac_model(inputs)
-    torch.autograd.backward(outputs_no_ac["energy"].sum() + outputs_no_ac["forces"].sum())
+    torch.autograd.backward(outputs_no_ac["energy"]["energy"].sum() + outputs_no_ac["forces"]["forces"].sum())
 
     # reset the rng state to the beginning
     torch.random.set_rng_state(start_rng_state)
     outptuts_ac = ac_model(inputs)
-    torch.autograd.backward(outptuts_ac["energy"].sum() + outptuts_ac["forces"].sum())
+    torch.autograd.backward(outptuts_ac["energy"]["energy"].sum() + outptuts_ac["forces"]["forces"].sum())
 
     # assert all the gradients are identical between the model with checkpointing and no checkpointing
     ac_model_grad_dict = {name:p.grad for name, p in ac_model.named_parameters() if p.grad is not None}