From cafa32a117a76cb47d57c70c9460505c93f75d3d Mon Sep 17 00:00:00 2001 From: Kristof Schroeder Date: Fri, 3 May 2024 01:28:19 +0200 Subject: [PATCH 1/7] Fix missing move to model device for EkfacInfluence implementation --- .../influence/torch/influence_function_model.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/pydvl/influence/torch/influence_function_model.py b/src/pydvl/influence/torch/influence_function_model.py index 46a5fa16e..4a6cb638c 100644 --- a/src/pydvl/influence/torch/influence_function_model.py +++ b/src/pydvl/influence/torch/influence_function_model.py @@ -1195,7 +1195,7 @@ def _get_kfac_blocks( data, disable=not self.progress, desc="K-FAC blocks - batch progress" ): data_len += x.shape[0] - pred_y = self.model(x) + pred_y = self.model(x.to(self.model_device)) loss = empirical_cross_entropy_loss_fn(pred_y) loss.backward() @@ -1319,7 +1319,7 @@ def _update_diag( data, disable=not self.progress, desc="Update Diagonal - batch progress" ): data_len += x.shape[0] - pred_y = self.model(x) + pred_y = self.model(x.to(self.model_device)) loss = empirical_cross_entropy_loss_fn(pred_y) loss.backward() @@ -1526,7 +1526,10 @@ def influences_from_factors_by_layer( influences = {} for layer_id, layer_z_test in z_test_factors.items(): end_idx = start_idx + layer_z_test.shape[1] - influences[layer_id] = layer_z_test @ total_grad[:, start_idx:end_idx].T + influences[layer_id] = ( + layer_z_test.to(self.model_device) + @ total_grad[:, start_idx:end_idx].T + ) start_idx = end_idx return influences elif mode == InfluenceMode.Perturbation: @@ -1539,7 +1542,7 @@ def influences_from_factors_by_layer( end_idx = start_idx + layer_z_test.shape[1] influences[layer_id] = torch.einsum( "ia,j...a->ij...", - layer_z_test, + layer_z_test.to(self.model_device), total_mixed_grad[:, start_idx:end_idx], ) start_idx = end_idx @@ -1626,7 +1629,7 @@ def explore_hessian_regularization( being dictionaries containing the influences for each layer of the model, with the layer name as key. """ - grad = self._loss_grad(x, y) + grad = self._loss_grad(x.to(self.model_device), y.to(self.model_device)) influences_by_reg_value = {} for reg_value in regularization_values: reg_factors = self._solve_hvp_by_layer( From a151422d31dff79624b5750a42d280b5ab7ee061 Mon Sep 17 00:00:00 2001 From: Kristof Schroeder Date: Fri, 3 May 2024 01:35:01 +0200 Subject: [PATCH 2/7] Update CHANGELOG.md --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52bc910a4..abea5f5ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## Unreleased + +### Fixed + +- Fixed missing move of tensors to model device in `EkfacInfluence` + implementation [PR #570](https://github.com/aai-institute/pyDVL/pull/570) + ## 0.9.1 - Bug fixes, logging improvement ### Fixed From 36ea3bada2221febe1b1cb75ff687ad122ce2fdb Mon Sep 17 00:00:00 2001 From: Kristof Schroeder Date: Fri, 3 May 2024 11:31:39 +0200 Subject: [PATCH 3/7] Add device move in influence_from_factors method in base class TorchInfluenceFunctionModel --- src/pydvl/influence/torch/influence_function_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pydvl/influence/torch/influence_function_model.py b/src/pydvl/influence/torch/influence_function_model.py index 4a6cb638c..08fd64a14 100644 --- a/src/pydvl/influence/torch/influence_function_model.py +++ b/src/pydvl/influence/torch/influence_function_model.py @@ -303,13 +303,13 @@ def influences_from_factors( """ if mode == InfluenceMode.Up: return ( - z_test_factors + z_test_factors.to(self.model_device) @ self._loss_grad(x.to(self.model_device), y.to(self.model_device)).T ) elif mode == InfluenceMode.Perturbation: return torch.einsum( "ia,j...a->ij...", - z_test_factors, + z_test_factors.to(self.model_device), self._flat_loss_mixed_grad( x.to(self.model_device), y.to(self.model_device) ), From 919e73f17063aaaa00515a67ec3b3d17338f51bb Mon Sep 17 00:00:00 2001 From: Kristof Schroeder Date: Fri, 3 May 2024 12:16:53 +0200 Subject: [PATCH 4/7] Overwrite `to` method of `CgInfluence`, add `to` method to preconditoners, fix wrong device for indices array in block CG implementation --- .../influence/torch/influence_function_model.py | 9 ++++++++- src/pydvl/influence/torch/pre_conditioner.py | 17 +++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/pydvl/influence/torch/influence_function_model.py b/src/pydvl/influence/torch/influence_function_model.py index 46a5fa16e..b4ec964cc 100644 --- a/src/pydvl/influence/torch/influence_function_model.py +++ b/src/pydvl/influence/torch/influence_function_model.py @@ -706,7 +706,9 @@ def mat_mat(x: torch.Tensor): R = (rhs - mat_mat(X)).T Z = R if self.pre_conditioner is None else self.pre_conditioner.solve(R) P, _, _ = torch.linalg.svd(Z, full_matrices=False) - active_indices = torch.as_tensor(list(range(X.shape[-1])), dtype=torch.long) + active_indices = torch.as_tensor( + list(range(X.shape[-1])), dtype=torch.long, device=self.model_device + ) maxiter = self.maxiter if self.maxiter is not None else len(rhs) * 10 y_norm = torch.linalg.norm(rhs, dim=1) @@ -758,6 +760,11 @@ def mat_mat(x: torch.Tensor): return X.T + def to(self, device: torch.device): + if self.pre_conditioner is not None: + self.pre_conditioner = self.pre_conditioner.to(device) + return super().to(device) + class LissaInfluence(TorchInfluenceFunctionModel): r""" diff --git a/src/pydvl/influence/torch/pre_conditioner.py b/src/pydvl/influence/torch/pre_conditioner.py index 4497d81c2..f42852c2c 100644 --- a/src/pydvl/influence/torch/pre_conditioner.py +++ b/src/pydvl/influence/torch/pre_conditioner.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from abc import ABC, abstractmethod from typing import Callable, Optional @@ -70,6 +72,11 @@ def solve(self, rhs: torch.Tensor): def _solve(self, rhs: torch.Tensor): pass + @abstractmethod + def to(self, device: torch.device) -> PreConditioner: + """Implement this to move the (potentially fitted) preconditioner to a + specific device""" + class JacobiPreConditioner(PreConditioner): r""" @@ -141,6 +148,11 @@ def _solve(self, rhs: torch.Tensor): return rhs * inv_diag.unsqueeze(-1) + def to(self, device: torch.device) -> JacobiPreConditioner: + if self._diag is not None: + self._diag = self._diag.to(device) + return self + class NystroemPreConditioner(PreConditioner): r""" @@ -233,3 +245,8 @@ def _solve(self, rhs: torch.Tensor): result = result.squeeze() return result + + def to(self, device: torch.device) -> NystroemPreConditioner: + if self._low_rank_approx is not None: + self._low_rank_approx = self._low_rank_approx.to(device) + return self From 6375afe31d900698bb30ac42a69657d65fbdc31c Mon Sep 17 00:00:00 2001 From: Kristof Schroeder Date: Fri, 3 May 2024 12:24:26 +0200 Subject: [PATCH 5/7] Update CHANGELOG.md --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52bc910a4..e2d4bf923 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## Unreleased + +### Fixed + +- Missing move to device of `preconditioner` in `CgInfluence` implementation + [PR #572](https://github.com/aai-institute/pyDVL/pull/572) + ## 0.9.1 - Bug fixes, logging improvement ### Fixed From 18d4fb8ebd1acf0ab12a81bc5543e2619e313cc2 Mon Sep 17 00:00:00 2001 From: Kristof Schroeder Date: Fri, 3 May 2024 13:25:22 +0200 Subject: [PATCH 6/7] Add functionality to set a device fixture depending on the availability of cuda and a user input (pytest --with-cuda) --- CONTRIBUTING.md | 7 + tests/conftest.py | 6 + tests/influence/torch/conftest.py | 12 ++ tests/influence/torch/test_influence_model.py | 143 ++++++++++++------ 4 files changed, 121 insertions(+), 47 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 56d8ead7b..ecd1288de 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -131,6 +131,13 @@ There are a few important arguments: - `--slow-tests` enables running slow tests. See below for a description of slow tests. +- `--with-cuda` sets the device fixture in [tests/influence/torch/conftest.py]( + tests/influence/torch/conftest.py) to `cuda` if it is available. + Using this fixture within tests, you can run parts of your tests on a `cuda` + device. Be aware, that you still have to take care of the usage of the device + manually in a specific test. Setting this flag does not result in + running all tests on a GPU. + ### Markers We use a few different markers to differentiate between tests and runs diff --git a/tests/conftest.py b/tests/conftest.py index b08f09377..d8594c314 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -48,6 +48,12 @@ def pytest_addoption(parser): default=False, help="Disable reporting. Verbose mode takes precedence.", ) + parser.addoption( + "--with-cuda", + action="store_true", + default=False, + help="Set device fixture to 'cuda' if available", + ) @pytest.fixture diff --git a/tests/influence/torch/conftest.py b/tests/influence/torch/conftest.py index b16a2d856..37459f1cc 100644 --- a/tests/influence/torch/conftest.py +++ b/tests/influence/torch/conftest.py @@ -1,5 +1,6 @@ from typing import Tuple +import pytest import torch from numpy.typing import NDArray from torch.optim import LBFGS @@ -59,3 +60,14 @@ def closure(): def torch_linear_model_to_numpy(model: torch.nn.Linear) -> Tuple[NDArray, NDArray]: model.eval() return model.weight.data.numpy(), model.bias.data.numpy() + + +@pytest.fixture(scope="session") +def device(request): + import torch + + use_cuda = request.config.getoption("--with-cuda") + if use_cuda and torch.cuda.is_available(): + return torch.device("cuda") + else: + return torch.device("cpu") diff --git a/tests/influence/torch/test_influence_model.py b/tests/influence/torch/test_influence_model.py index 0631c60fc..d2203a84e 100644 --- a/tests/influence/torch/test_influence_model.py +++ b/tests/influence/torch/test_influence_model.py @@ -340,6 +340,7 @@ def test_influence_linear_model( rtol, mode: InfluenceMode, train_set_size: int, + device: torch.device, hessian_reg: float = 0.1, test_set_size: int = 20, problem_dimension: Tuple[int, int] = (4, 20), @@ -373,16 +374,20 @@ def test_influence_linear_model( train_data_set = TensorDataset(*list(map(torch.from_numpy, train_data))) train_data_loader = DataLoader(train_data_set, batch_size=40, num_workers=0) - influence = influence_factory(linear_layer, loss, train_data_loader, hessian_reg) + influence = influence_factory( + linear_layer.to(device), loss, train_data_loader, hessian_reg + ) x_train, y_train = tuple(map(torch.from_numpy, train_data)) x_test, y_test = tuple(map(torch.from_numpy, test_data)) - influence_values = influence.influences( - x_test, y_test, x_train, y_train, mode=mode - ).numpy() - sym_influence_values = influence.influences( - x_train, y_train, x_train, y_train, mode=mode - ).numpy() + influence_values = ( + influence.influences(x_test, y_test, x_train, y_train, mode=mode).cpu().numpy() + ) + sym_influence_values = ( + influence.influences(x_train, y_train, x_train, y_train, mode=mode) + .cpu() + .numpy() + ) with pytest.raises(ValueError): influence.influences(x_test, y_test, x=x_train, mode=mode) @@ -431,6 +436,7 @@ def test_influences_lissa( ], direct_influences, influence_factory, + device, ): model, loss, x_train, y_train, x_test, y_test = model_and_data @@ -438,11 +444,15 @@ def test_influences_lissa( TensorDataset(x_train, y_train), batch_size=test_case.batch_size ) influence_model = influence_factory( - model, loss, train_dataloader, test_case.hessian_reg + model.to(device), loss, train_dataloader, test_case.hessian_reg + ) + approx_influences = ( + influence_model.influences( + x_test, y_test, x_train, y_train, mode=test_case.mode + ) + .cpu() + .numpy() ) - approx_influences = influence_model.influences( - x_test, y_test, x_train, y_train, mode=test_case.mode - ).numpy() assert not np.any(np.isnan(approx_influences)) @@ -497,9 +507,10 @@ def test_influences_low_rank( direct_sym_influences, direct_factors, influence_factory, + device: torch.device, ): - atol = 1e-8 - rtol = 1e-5 + atol = 1e-7 + rtol = 1e-4 model, loss, x_train, y_train, x_test, y_test = model_and_data num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) @@ -509,7 +520,7 @@ def test_influences_low_rank( ) influence_func_model = influence_factory( - model, + model.to(device), loss, test_case.hessian_reg, num_parameters - 1, @@ -525,33 +536,47 @@ def test_influences_low_rank( influence_func_model = influence_func_model.fit(train_dataloader) - low_rank_influence = influence_func_model.influences( - x_test, y_test, x_train, y_train, mode=test_case.mode - ).numpy() + low_rank_influence = ( + influence_func_model.influences( + x_test, y_test, x_train, y_train, mode=test_case.mode + ) + .cpu() + .numpy() + ) - sym_low_rank_influence = influence_func_model.influences( - x_train, y_train, mode=test_case.mode - ).numpy() + sym_low_rank_influence = ( + influence_func_model.influences(x_train, y_train, mode=test_case.mode) + .cpu() + .numpy() + ) low_rank_factors = influence_func_model.influence_factors(x_test, y_test) assert np.allclose( direct_factors, - influence_func_model.influence_factors(x_train, y_train).numpy(), + influence_func_model.influence_factors(x_train, y_train).cpu().numpy(), atol=atol, rtol=rtol, ) if test_case.mode is InfluenceMode.Up: - low_rank_influence_transpose = influence_func_model.influences( - x_train, y_train, x_test, y_test, mode=test_case.mode - ).numpy() + low_rank_influence_transpose = ( + influence_func_model.influences( + x_train, y_train, x_test, y_test, mode=test_case.mode + ) + .cpu() + .numpy() + ) assert np.allclose( low_rank_influence_transpose, low_rank_influence.swapaxes(0, 1) ) - low_rank_values_from_factors = influence_func_model.influences_from_factors( - low_rank_factors, x_train, y_train, mode=test_case.mode - ).numpy() + low_rank_values_from_factors = ( + influence_func_model.influences_from_factors( + low_rank_factors, x_train, y_train, mode=test_case.mode + ) + .cpu() + .numpy() + ) assert np.allclose(direct_influences, low_rank_influence, atol=atol, rtol=rtol) assert np.allclose( direct_sym_influences, sym_low_rank_influence, atol=atol, rtol=rtol @@ -578,6 +603,7 @@ def test_influences_ekfac( ], direct_influences, direct_sym_influences, + device: torch.device, ): model, loss, x_train, y_train, x_test, y_test = model_and_data @@ -589,7 +615,7 @@ def test_influences_ekfac( model, update_diagonal=True, hessian_regularization=test_case.hessian_reg, - ) + ).to(device) with pytest.raises(NotFittedException): ekfac_influence.influences( @@ -604,9 +630,13 @@ def test_influences_ekfac( ekfac_influence.fit(train_dataloader) elif isinstance(loss, nn.CrossEntropyLoss): ekfac_influence = ekfac_influence.fit(train_dataloader) - ekfac_influence_values = ekfac_influence.influences( - x_test, y_test, x_train, y_train, mode=test_case.mode - ).numpy() + ekfac_influence_values = ( + ekfac_influence.influences( + x_test, y_test, x_train, y_train, mode=test_case.mode + ) + .cpu() + .numpy() + ) ekfac_influences_by_layer = ekfac_influence.influences_by_layer( x_test, y_test, x_train, y_train, mode=test_case.mode @@ -614,22 +644,32 @@ def test_influences_ekfac( accumulated_inf_by_layer = np.zeros_like(ekfac_influence_values) for layer, infl in ekfac_influences_by_layer.items(): - accumulated_inf_by_layer += infl.detach().numpy() + accumulated_inf_by_layer += infl.detach().cpu().numpy() - ekfac_self_influence = ekfac_influence.influences( - x_train, y_train, mode=test_case.mode - ).numpy() + ekfac_self_influence = ( + ekfac_influence.influences(x_train, y_train, mode=test_case.mode) + .cpu() + .numpy() + ) ekfac_factors = ekfac_influence.influence_factors(x_test, y_test) - influence_from_factors = ekfac_influence.influences_from_factors( - ekfac_factors, x_train, y_train, mode=test_case.mode - ).numpy() + influence_from_factors = ( + ekfac_influence.influences_from_factors( + ekfac_factors, x_train, y_train, mode=test_case.mode + ) + .cpu() + .numpy() + ) assert np.allclose(ekfac_influence_values, influence_from_factors) assert np.allclose(ekfac_influence_values, accumulated_inf_by_layer) - check_influence_correlations(direct_influences, ekfac_influence_values) - check_influence_correlations(direct_sym_influences, ekfac_self_influence) + check_influence_correlations( + direct_influences, ekfac_influence_values, threshold=0.94 + ) + check_influence_correlations( + direct_sym_influences, ekfac_self_influence, threshold=0.94 + ) @pytest.mark.torch @@ -656,6 +696,7 @@ def test_influences_cg( direct_factors, use_block_cg: bool, pre_conditioner: PreConditioner, + device: torch.device, ): model, loss, x_train, y_train, x_test, y_test = model_and_data @@ -663,7 +704,7 @@ def test_influences_cg( TensorDataset(x_train, y_train), batch_size=test_case.batch_size ) influence_model = CgInfluence( - model, + model.to(device), loss, test_case.hessian_reg, maxiter=5, @@ -672,9 +713,13 @@ def test_influences_cg( ) influence_model = influence_model.fit(train_dataloader) - approx_influences = influence_model.influences( - x_test, y_test, x_train, y_train, mode=test_case.mode - ).numpy() + approx_influences = ( + influence_model.influences( + x_test, y_test, x_train, y_train, mode=test_case.mode + ) + .cpu() + .numpy() + ) assert not np.any(np.isnan(approx_influences)) @@ -701,7 +746,11 @@ def test_influences_cg( # check that block variant returns the correct vector, if only one right hand side # is provided if use_block_cg: - single_influence = influence_model.influence_factors( - x_train[0].unsqueeze(0), y_train[0].unsqueeze(0) - ).numpy() + single_influence = ( + influence_model.influence_factors( + x_train[0].unsqueeze(0), y_train[0].unsqueeze(0) + ) + .cpu() + .numpy() + ) assert np.allclose(single_influence, direct_factors[0], atol=1e-6, rtol=1e-4) From 4cf4ac2a8cf4a8a86dbc3c5caa9a25b59c775faa Mon Sep 17 00:00:00 2001 From: Kristof Schroeder Date: Fri, 3 May 2024 13:32:45 +0200 Subject: [PATCH 7/7] Update CHANGELOG.md --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52bc910a4..a0e27a8d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## Unreleased + +### Added + +- Add a device fixture for `pytest`, which depending on the availability and + user input (`pytest --with-cuda`) resolves to cuda device + [PR #574](https://github.com/aai-institute/pyDVL/pull/574) + ## 0.9.1 - Bug fixes, logging improvement ### Fixed