From f40b8aa886f74c83748c9f808bef9bd1d4827f0f Mon Sep 17 00:00:00 2001
From: Anes Benmerzoug <a.benmerzoug@appliedai.de>
Date: Sun, 10 Dec 2023 12:36:24 +0100
Subject: [PATCH 01/11] Replace all uses of maybe_progress by tqdm directly

---
 notebooks/support/torch.py                        |  4 ++--
 src/pydvl/influence/general.py                    | 15 ++++++++-------
 src/pydvl/influence/torch/torch_differentiable.py | 10 +++++-----
 src/pydvl/reporting/scores.py                     |  5 +++--
 src/pydvl/value/least_core/__init__.py            |  2 +-
 src/pydvl/value/least_core/montecarlo.py          |  4 ++--
 src/pydvl/value/least_core/naive.py               | 10 +++++++---
 src/pydvl/value/oob/oob.py                        |  7 ++++---
 src/pydvl/value/shapley/gt.py                     |  5 +++--
 src/pydvl/value/shapley/knn.py                    |  5 +++--
 src/pydvl/value/shapley/montecarlo.py             |  2 +-
 src/pydvl/value/shapley/naive.py                  | 11 ++++++-----
 12 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/notebooks/support/torch.py b/notebooks/support/torch.py
index 7eeec22e0..16d055223 100644
--- a/notebooks/support/torch.py
+++ b/notebooks/support/torch.py
@@ -12,9 +12,9 @@
 from torch.optim.lr_scheduler import _LRScheduler
 from torch.utils.data import DataLoader
 from torchvision.models import ResNet18_Weights, resnet18
+from tqdm.auto import tqdm
 
 from pydvl.influence.torch import as_tensor
-from pydvl.utils import maybe_progress
 
 from .types import Losses
 
@@ -124,7 +124,7 @@ def fit_torch_model(
     train_loss = []
     val_loss = []
 
-    for epoch in maybe_progress(range(num_epochs), progress, desc="Model fitting"):
+    for epoch in tqdm(range(num_epochs), disable=not progress, desc="Model fitting"):
         batch_loss = []
         for train_batch in training_data:
             batch_x, batch_y = train_batch
diff --git a/src/pydvl/influence/general.py b/src/pydvl/influence/general.py
index f5170346c..bf1ee6f94 100644
--- a/src/pydvl/influence/general.py
+++ b/src/pydvl/influence/general.py
@@ -13,7 +13,8 @@
 from enum import Enum
 from typing import Any, Callable, Dict, Generator, Optional, Type
 
-from ..utils import maybe_progress
+from tqdm.auto import tqdm
+
 from .inversion import InversionMethod, solve_hvp
 from .twice_differentiable import (
     DataLoaderType,
@@ -93,8 +94,8 @@ def compute_influence_factors(
     cat = tensor_util.cat
 
     def test_grads() -> Generator[TensorType, None, None]:
-        for x_test, y_test in maybe_progress(
-            test_data, progress, desc="Batch Test Gradients"
+        for x_test, y_test in tqdm(
+            test_data, disable=not progress, desc="Batch Test Gradients"
         ):
             yield stack(
                 [
@@ -167,8 +168,8 @@ def compute_influences_up(
     einsum = tensor_util.einsum
 
     def train_grads() -> Generator[TensorType, None, None]:
-        for x, y in maybe_progress(
-            input_data, progress, desc="Batch Split Input Gradients"
+        for x, y in tqdm(
+            input_data, disable=not progress, desc="Batch Split Input Gradients"
         ):
             yield stack(
                 [model.grad(inpt, target) for inpt, target in zip(unsqueeze(x, 1), y)]
@@ -232,9 +233,9 @@ def compute_influences_pert(
     shape = tensor_util.shape
 
     all_pert_influences = []
-    for x, y in maybe_progress(
+    for x, y in tqdm(
         input_data,
-        progress,
+        disable=not progress,
         desc="Batch Influence Perturbation",
     ):
         for i in range(len(x)):
diff --git a/src/pydvl/influence/torch/torch_differentiable.py b/src/pydvl/influence/torch/torch_differentiable.py
index f93ebe23b..a1d85ebfe 100644
--- a/src/pydvl/influence/torch/torch_differentiable.py
+++ b/src/pydvl/influence/torch/torch_differentiable.py
@@ -25,8 +25,8 @@
 from torch import autograd
 from torch.autograd import Variable
 from torch.utils.data import DataLoader
+from tqdm.auto import tqdm
 
-from ...utils import maybe_progress
 from ..inversion import InversionMethod, InversionRegistry
 from ..twice_differentiable import (
     InverseHvpResult,
@@ -192,7 +192,7 @@ def mvp(
         z = (grad_xy * Variable(v)).sum(dim=1)
 
         mvp = []
-        for i in maybe_progress(range(len(z)), progress, desc="MVP"):
+        for i in tqdm(range(len(z)), disable=not progress, desc="MVP"):
             mvp.append(
                 flatten_tensors_to_vector(
                     autograd.grad(z[i], backprop_on, retain_graph=True)
@@ -578,7 +578,7 @@ def solve_batch_cg(
     total_grad_xy = torch.empty(0)
     total_points = 0
 
-    for x, y in maybe_progress(training_data, progress, desc="Batch Train Gradients"):
+    for x, y in tqdm(training_data, disable=not progress, desc="Batch Train Gradients"):
         grad_xy = model.grad(x, y, create_graph=True)
         if total_grad_xy.nelement() == 0:
             total_grad_xy = torch.zeros_like(grad_xy)
@@ -592,7 +592,7 @@ def solve_batch_cg(
     batch_cg = torch.zeros_like(b)
     info = {}
 
-    for idx, bi in enumerate(maybe_progress(b, progress, desc="Conjugate gradient")):
+    for idx, bi in enumerate(tqdm(b, disable=not progress, desc="Conjugate gradient")):
         batch_result, batch_info = solve_cg(
             reg_hvp, bi, x0=x0, rtol=rtol, atol=atol, maxiter=maxiter
         )
@@ -724,7 +724,7 @@ def lissa_step(
         """
         return b + (1 - dampen) * h - reg_hvp(h) / scale
 
-    for _ in maybe_progress(range(maxiter), progress, desc="Lissa"):
+    for _ in tqdm(range(maxiter), disable=not progress, desc="Lissa"):
         x, y = next(iter(shuffled_training_data))
         grad_xy = model.grad(x, y, create_graph=True)
         reg_hvp = (
diff --git a/src/pydvl/reporting/scores.py b/src/pydvl/reporting/scores.py
index b12e52248..99d6525d8 100644
--- a/src/pydvl/reporting/scores.py
+++ b/src/pydvl/reporting/scores.py
@@ -2,8 +2,9 @@
 
 import numpy as np
 from numpy.typing import NDArray
+from tqdm.auto import tqdm
 
-from pydvl.utils import Utility, maybe_progress
+from pydvl.utils import Utility
 from pydvl.value.result import ValuationResult
 
 __all__ = ["compute_removal_score"]
@@ -44,7 +45,7 @@ def compute_removal_score(
     # We sort in descending order if we want to remove the best values
     values.sort(reverse=remove_best)
 
-    for pct in maybe_progress(percentages, display=progress, desc="Removal Scores"):
+    for pct in tqdm(percentages, disable=not progress, desc="Removal Scores"):
         n_removal = int(pct * len(u.data))
         indices = values.indices[n_removal:]
         score = u(indices)
diff --git a/src/pydvl/value/least_core/__init__.py b/src/pydvl/value/least_core/__init__.py
index 8451dd4f0..abf34c623 100644
--- a/src/pydvl/value/least_core/__init__.py
+++ b/src/pydvl/value/least_core/__init__.py
@@ -96,7 +96,7 @@ def compute_least_core_values(
             solver_options.update(kwargs)
 
     if mode == LeastCoreMode.MonteCarlo:
-        # TODO fix progress showing and maybe_progress in remote case
+        # TODO fix progress showing in remote case
         progress = False
         if n_iterations is None:
             raise ValueError("n_iterations cannot be None for Monte Carlo Least Core")
diff --git a/src/pydvl/value/least_core/montecarlo.py b/src/pydvl/value/least_core/montecarlo.py
index 88f518253..88dc11ded 100644
--- a/src/pydvl/value/least_core/montecarlo.py
+++ b/src/pydvl/value/least_core/montecarlo.py
@@ -4,10 +4,10 @@
 
 import numpy as np
 from numpy.typing import NDArray
+from tqdm.auto import tqdm
 
 from pydvl.parallel import MapReduceJob, ParallelConfig, effective_n_jobs
 from pydvl.utils.numeric import random_powerset
-from pydvl.utils.progress import maybe_progress
 from pydvl.utils.types import Seed
 from pydvl.utils.utility import Utility
 from pydvl.value.least_core.common import LeastCoreProblem, lc_solve_problem
@@ -175,7 +175,7 @@ def _montecarlo_least_core(
     A_lb = np.zeros((n_iterations, n))
 
     for i, subset in enumerate(
-        maybe_progress(power_set, progress, total=n_iterations, position=job_id)
+        tqdm(power_set, disable=not progress, total=n_iterations, position=job_id)
     ):
         indices: NDArray[np.bool_] = np.zeros(n, dtype=bool)
         indices[list(subset)] = True
diff --git a/src/pydvl/value/least_core/naive.py b/src/pydvl/value/least_core/naive.py
index 4a6a941f2..31de56ef1 100644
--- a/src/pydvl/value/least_core/naive.py
+++ b/src/pydvl/value/least_core/naive.py
@@ -4,8 +4,9 @@
 
 import numpy as np
 from numpy.typing import NDArray
+from tqdm.auto import tqdm
 
-from pydvl.utils import Utility, maybe_progress, powerset
+from pydvl.utils import Utility, powerset
 from pydvl.value.least_core.common import LeastCoreProblem, lc_solve_problem
 from pydvl.value.result import ValuationResult
 
@@ -104,8 +105,11 @@ def lc_prepare_problem(u: Utility, progress: bool = False) -> LeastCoreProblem:
     logger.debug("Iterating over all subsets")
     utility_values = np.zeros(powerset_size)
     for i, subset in enumerate(
-        maybe_progress(
-            powerset(u.data.indices), progress, total=powerset_size - 1, position=0
+        tqdm(
+            powerset(u.data.indices),
+            display=progress,
+            total=powerset_size - 1,
+            position=0,
         )
     ):
         indices: NDArray[np.bool_] = np.zeros(n, dtype=bool)
diff --git a/src/pydvl/value/oob/oob.py b/src/pydvl/value/oob/oob.py
index b0bfe6e95..71e32ff2a 100644
--- a/src/pydvl/value/oob/oob.py
+++ b/src/pydvl/value/oob/oob.py
@@ -12,8 +12,9 @@
 from numpy.typing import NDArray
 from sklearn.base import is_classifier, is_regressor
 from sklearn.ensemble import BaggingClassifier, BaggingRegressor
+from tqdm.auto import tqdm
 
-from pydvl.utils import Seed, Utility, maybe_progress
+from pydvl.utils import Seed, Utility
 from pydvl.utils.types import LossFunction
 from pydvl.value.result import ValuationResult
 
@@ -112,8 +113,8 @@ def compute_data_oob(
 
     bag.fit(u.data.x_train, u.data.y_train)
 
-    for est, samples in maybe_progress(
-        zip(bag.estimators_, bag.estimators_samples_), progress, total=n_est
+    for est, samples in tqdm(
+        zip(bag.estimators_, bag.estimators_samples_), disable=not progress, total=n_est
     ):  # The bottleneck is the bag fitting not this part so TQDM is not very useful here
         oob_idx = np.setxor1d(u.data.indices, np.unique(samples))
         array_loss = loss(
diff --git a/src/pydvl/value/shapley/gt.py b/src/pydvl/value/shapley/gt.py
index 2d3be7710..7d15c49c6 100644
--- a/src/pydvl/value/shapley/gt.py
+++ b/src/pydvl/value/shapley/gt.py
@@ -30,9 +30,10 @@
 import numpy as np
 from numpy.random import SeedSequence
 from numpy.typing import NDArray
+from tqdm.auto import tqdm
 
 from pydvl.parallel import MapReduceJob, ParallelConfig, effective_n_jobs
-from pydvl.utils import Utility, maybe_progress
+from pydvl.utils import Utility
 from pydvl.utils.numeric import random_subset_of_size
 from pydvl.utils.status import Status
 from pydvl.utils.types import Seed, ensure_seed_sequence
@@ -155,7 +156,7 @@ def _group_testing_shapley(
     )  # indicator vars
     uu = np.empty(n_samples)  # utilities
 
-    for t in maybe_progress(n_samples, progress=progress, position=job_id):
+    for t in tqdm(n_samples, disable=not progress, position=job_id):
         k = rng.choice(const.kk, size=1, p=const.q).item()
         s = random_subset_of_size(u.data.indices, k, seed=rng)
         uu[t] = u(s)
diff --git a/src/pydvl/value/shapley/knn.py b/src/pydvl/value/shapley/knn.py
index 5356ab946..5e3a28ae9 100644
--- a/src/pydvl/value/shapley/knn.py
+++ b/src/pydvl/value/shapley/knn.py
@@ -19,8 +19,9 @@
 import numpy as np
 from numpy.typing import NDArray
 from sklearn.neighbors import KNeighborsClassifier, NearestNeighbors
+from tqdm.auto import tqdm
 
-from pydvl.utils import Utility, maybe_progress
+from pydvl.utils import Utility
 from pydvl.utils.status import Status
 from pydvl.value.result import ValuationResult
 
@@ -76,7 +77,7 @@ def knn_shapley(u: Utility, *, progress: bool = True) -> ValuationResult:
     n = len(u.data)
     yt = u.data.y_train
     iterator = enumerate(zip(u.data.y_test, indices), start=1)
-    for j, (y, ii) in maybe_progress(iterator, progress):
+    for j, (y, ii) in tqdm(iterator, disable=not progress):
         value_at_x = int(yt[ii[-1]] == y) / n
         values[ii[-1]] += (value_at_x - values[ii[-1]]) / j
         for i in range(n - 2, n_neighbors, -1):  # farthest to closest
diff --git a/src/pydvl/value/shapley/montecarlo.py b/src/pydvl/value/shapley/montecarlo.py
index ac66520fb..e6f1dbf2a 100644
--- a/src/pydvl/value/shapley/montecarlo.py
+++ b/src/pydvl/value/shapley/montecarlo.py
@@ -54,7 +54,7 @@
 from deprecate import deprecated
 from numpy.random import SeedSequence
 from numpy.typing import NDArray
-from tqdm import tqdm
+from tqdm.auto import tqdm
 
 from pydvl.parallel import (
     CancellationPolicy,
diff --git a/src/pydvl/value/shapley/naive.py b/src/pydvl/value/shapley/naive.py
index 46f1c11a5..8d0653fba 100644
--- a/src/pydvl/value/shapley/naive.py
+++ b/src/pydvl/value/shapley/naive.py
@@ -5,9 +5,10 @@
 
 import numpy as np
 from numpy.typing import NDArray
+from tqdm.auto import tqdm
 
 from pydvl.parallel import MapReduceJob, ParallelConfig
-from pydvl.utils import Utility, maybe_progress, powerset
+from pydvl.utils import Utility, powerset
 from pydvl.utils.status import Status
 from pydvl.value.result import ValuationResult
 
@@ -44,9 +45,9 @@ def permutation_exact_shapley(u: Utility, *, progress: bool = True) -> Valuation
         )
 
     values = np.zeros(n)
-    for p in maybe_progress(
+    for p in tqdm(
         permutations(u.data.indices),
-        progress,
+        disable=not progress,
         desc="Permutation",
         total=math.factorial(n),
     ):
@@ -77,9 +78,9 @@ def _combinatorial_exact_shapley(
         subset: NDArray[np.int_] = np.setxor1d(
             u.data.indices, [i], assume_unique=True
         ).astype(np.int_)
-        for s in maybe_progress(
+        for s in tqdm(
             powerset(subset),
-            progress,
+            disable=not progress,
             desc=f"Index {i}",
             total=2 ** (n - 1),
             position=0,

From efca4238661ee25da05c30a1b7373ab09eebcb86 Mon Sep 17 00:00:00 2001
From: Anes Benmerzoug <a.benmerzoug@appliedai.de>
Date: Sun, 10 Dec 2023 12:37:12 +0100
Subject: [PATCH 02/11] Delete progress module

---
 src/pydvl/utils/__init__.py |  1 -
 src/pydvl/utils/progress.py | 74 -------------------------------------
 2 files changed, 75 deletions(-)
 delete mode 100644 src/pydvl/utils/progress.py

diff --git a/src/pydvl/utils/__init__.py b/src/pydvl/utils/__init__.py
index 245c596dd..7beac241f 100644
--- a/src/pydvl/utils/__init__.py
+++ b/src/pydvl/utils/__init__.py
@@ -3,7 +3,6 @@
 from .config import *
 from .dataset import *
 from .numeric import *
-from .progress import *
 from .score import *
 from .status import *
 from .types import *
diff --git a/src/pydvl/utils/progress.py b/src/pydvl/utils/progress.py
deleted file mode 100644
index 52493f27a..000000000
--- a/src/pydvl/utils/progress.py
+++ /dev/null
@@ -1,74 +0,0 @@
-"""
-!!! Warning
-    This module is deprecated and will be removed in a future release.
-    It implements a wrapper for the [tqdm](https://tqdm.github.io/) progress bar
-    iterator for easy toggling, but this functionality is already provided by
-    the `disable` argument of `tqdm`.
-"""
-import collections.abc
-from typing import Iterable, Iterator, Union
-
-from tqdm.auto import tqdm
-
-__all__ = ["maybe_progress"]
-
-
-class MockProgress(collections.abc.Iterator):
-    """A Naive mock class to use with maybe_progress and tqdm.
-    Mocked methods don't support return values.
-    Mocked properties don't do anything
-    """
-
-    class MiniMock:
-        def __call__(self, *args, **kwargs):
-            pass
-
-        def __add__(self, other):
-            pass
-
-        def __sub__(self, other):
-            pass
-
-        def __mul__(self, other):
-            pass
-
-        def __floordiv__(self, other):
-            pass
-
-        def __truediv__(self, other):
-            pass
-
-    def __init__(self, iterator: Union[Iterator, Iterable]):
-        # Since there is no _it in __dict__ at this point, doing here
-        # self._it = iterator
-        # results in a call to __getattr__() and the assignment fails, so we
-        # use __dict__ instead
-        self.__dict__["_it"] = iterator
-
-    def __iter__(self):
-        return iter(self._it)
-
-    def __next__(self):
-        return next(self._it)
-
-    def __getattr__(self, key):
-        return self.MiniMock()
-
-    def __setattr__(self, key, value):
-        pass
-
-
-def maybe_progress(
-    it: Union[int, Iterable, Iterator], display: bool = False, **kwargs
-) -> Union[tqdm, MockProgress]:
-    """Returns either a tqdm progress bar or a mock object which wraps the
-    iterator as well, but ignores any accesses to methods or properties.
-
-    Args:
-        it: the iterator to wrap
-        display: set to True to return a tqdm bar
-        kwargs: Keyword arguments that will be forwarded to tqdm
-    """
-    if isinstance(it, int):
-        it = range(it)  # type: ignore
-    return tqdm(it, **kwargs) if display else MockProgress(it)

From 4ad3eebbcaf2c19576407174ce4b3d73b237192e Mon Sep 17 00:00:00 2001
From: Anes Benmerzoug <a.benmerzoug@appliedai.de>
Date: Sun, 10 Dec 2023 13:07:50 +0100
Subject: [PATCH 03/11] Create repeat_indices helper and use it in
 _owen_sampling_shapley and _combinatorial_montecarlo_shapley

---
 src/pydvl/utils/__init__.py           |  1 +
 src/pydvl/utils/progress.py           | 30 +++++++++++++++++++++++++++
 src/pydvl/value/shapley/montecarlo.py | 11 +++++-----
 src/pydvl/value/shapley/owen.py       | 12 +++++------
 4 files changed, 41 insertions(+), 13 deletions(-)
 create mode 100644 src/pydvl/utils/progress.py

diff --git a/src/pydvl/utils/__init__.py b/src/pydvl/utils/__init__.py
index 7beac241f..245c596dd 100644
--- a/src/pydvl/utils/__init__.py
+++ b/src/pydvl/utils/__init__.py
@@ -3,6 +3,7 @@
 from .config import *
 from .dataset import *
 from .numeric import *
+from .progress import *
 from .score import *
 from .status import *
 from .types import *
diff --git a/src/pydvl/utils/progress.py b/src/pydvl/utils/progress.py
new file mode 100644
index 000000000..bbcfca682
--- /dev/null
+++ b/src/pydvl/utils/progress.py
@@ -0,0 +1,30 @@
+from collections.abc import Iterator
+from itertools import cycle, takewhile
+from typing import Collection
+
+from tqdm.auto import tqdm
+
+from pydvl.value.result import ValuationResult
+from pydvl.value.stopping import StoppingCriterion
+
+__all__ = ["repeat_indices"]
+
+
+def repeat_indices(
+    indices: Collection[int], result: ValuationResult, done: StoppingCriterion, **kwargs
+) -> Iterator[int]:
+    """Helper function to cycle indefinitely over a collection of indices
+    until the stopping criterion is satisfied while displaying progress.
+
+    Args:
+        indices: Collection of indices that will be cycled until done.
+        result: Object containing the current results.
+        done: Stopping criterion.
+        kwargs: Keyword arguments passed to tqdm.
+    """
+    with tqdm(total=100, unit="%", **kwargs) as pbar:
+        it = takewhile(lambda _: not done(result), cycle(indices))
+        for i in it:
+            yield i
+            pbar.update(100 * done.completion() - pbar.n)
+            pbar.refresh()
diff --git a/src/pydvl/value/shapley/montecarlo.py b/src/pydvl/value/shapley/montecarlo.py
index e6f1dbf2a..aabc2d813 100644
--- a/src/pydvl/value/shapley/montecarlo.py
+++ b/src/pydvl/value/shapley/montecarlo.py
@@ -47,7 +47,6 @@
 import operator
 from concurrent.futures import FIRST_COMPLETED, Future, wait
 from functools import reduce
-from itertools import cycle, takewhile
 from typing import Optional, Sequence, Union
 
 import numpy as np
@@ -65,6 +64,7 @@
     init_parallel_backend,
 )
 from pydvl.utils.numeric import random_powerset
+from pydvl.utils.progress import repeat_indices
 from pydvl.utils.types import Seed, ensure_seed_sequence
 from pydvl.utils.utility import Utility
 from pydvl.value.result import ValuationResult
@@ -281,11 +281,10 @@ def _combinatorial_montecarlo_shapley(
     )
 
     rng = np.random.default_rng(seed)
-    repeat_indices = takewhile(lambda _: not done(result), cycle(indices))
-    pbar = tqdm(disable=not progress, position=job_id, total=100, unit="%")
-    for idx in repeat_indices:
-        pbar.n = 100 * done.completion()
-        pbar.refresh()
+
+    for idx in repeat_indices(
+        indices, result=result, done=done, disable=not progress, position=job_id
+    ):
         # Randomly sample subsets of full dataset without idx
         subset = np.setxor1d(u.data.indices, [idx], assume_unique=True)
         s = next(random_powerset(subset, n_samples=1, seed=rng))
diff --git a/src/pydvl/value/shapley/owen.py b/src/pydvl/value/shapley/owen.py
index 07b9e972b..2d7cde6ba 100644
--- a/src/pydvl/value/shapley/owen.py
+++ b/src/pydvl/value/shapley/owen.py
@@ -9,15 +9,14 @@
 import operator
 from enum import Enum
 from functools import reduce
-from itertools import cycle, takewhile
 from typing import Optional, Sequence
 
 import numpy as np
 from numpy.typing import NDArray
-from tqdm import tqdm
 
 from pydvl.parallel import MapReduceJob, ParallelConfig
 from pydvl.utils import Utility, random_powerset
+from pydvl.utils.progress import repeat_indices
 from pydvl.utils.types import Seed
 from pydvl.value import ValuationResult
 from pydvl.value.stopping import MinUpdates
@@ -76,11 +75,10 @@ def _owen_sampling_shapley(
 
     rng = np.random.default_rng(seed)
     done = MinUpdates(1)
-    repeat_indices = takewhile(lambda _: not done(result), cycle(indices))
-    pbar = tqdm(disable=not progress, position=job_id, total=100, unit="%")
-    for idx in repeat_indices:
-        pbar.n = 100 * done.completion()
-        pbar.refresh()
+
+    for idx in repeat_indices(
+        indices, result=result, done=done, disable=not progress, position=job_id
+    ):
         e = np.zeros(max_q)
         subset = np.setxor1d(u.data.indices, [idx], assume_unique=True)
         for j, q in enumerate(q_steps):

From d1aa6adb25719cad0a39df6a4c761e5fa487ca11 Mon Sep 17 00:00:00 2001
From: Anes Benmerzoug <a.benmerzoug@appliedai.de>
Date: Sun, 10 Dec 2023 13:34:53 +0100
Subject: [PATCH 04/11] Fix type hints

---
 src/pydvl/value/least_core/naive.py | 2 +-
 src/pydvl/value/shapley/gt.py       | 4 ++--
 src/pydvl/value/shapley/naive.py    | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/pydvl/value/least_core/naive.py b/src/pydvl/value/least_core/naive.py
index 31de56ef1..9467292b3 100644
--- a/src/pydvl/value/least_core/naive.py
+++ b/src/pydvl/value/least_core/naive.py
@@ -115,6 +115,6 @@ def lc_prepare_problem(u: Utility, progress: bool = False) -> LeastCoreProblem:
         indices: NDArray[np.bool_] = np.zeros(n, dtype=bool)
         indices[list(subset)] = True
         A_lb[i, indices] = 1
-        utility_values[i] = u(subset)
+        utility_values[i] = u(subset)  # type: ignore
 
     return LeastCoreProblem(utility_values, A_lb)
diff --git a/src/pydvl/value/shapley/gt.py b/src/pydvl/value/shapley/gt.py
index 7d15c49c6..34b3b00c1 100644
--- a/src/pydvl/value/shapley/gt.py
+++ b/src/pydvl/value/shapley/gt.py
@@ -30,7 +30,7 @@
 import numpy as np
 from numpy.random import SeedSequence
 from numpy.typing import NDArray
-from tqdm.auto import tqdm
+from tqdm.auto import trange
 
 from pydvl.parallel import MapReduceJob, ParallelConfig, effective_n_jobs
 from pydvl.utils import Utility
@@ -156,7 +156,7 @@ def _group_testing_shapley(
     )  # indicator vars
     uu = np.empty(n_samples)  # utilities
 
-    for t in tqdm(n_samples, disable=not progress, position=job_id):
+    for t in trange(n_samples, disable=not progress, position=job_id):
         k = rng.choice(const.kk, size=1, p=const.q).item()
         s = random_subset_of_size(u.data.indices, k, seed=rng)
         uu[t] = u(s)
diff --git a/src/pydvl/value/shapley/naive.py b/src/pydvl/value/shapley/naive.py
index 8d0653fba..bd69432eb 100644
--- a/src/pydvl/value/shapley/naive.py
+++ b/src/pydvl/value/shapley/naive.py
@@ -64,7 +64,7 @@ def permutation_exact_shapley(u: Utility, *, progress: bool = True) -> Valuation
 
 
 def _combinatorial_exact_shapley(
-    indices: NDArray, u: Utility, progress: bool
+    indices: NDArray[np.int_], u: Utility, progress: bool
 ) -> NDArray:
     """Helper function for
     [combinatorial_exact_shapley()][pydvl.value.shapley.naive.combinatorial_exact_shapley].
@@ -85,7 +85,7 @@ def _combinatorial_exact_shapley(
             total=2 ** (n - 1),
             position=0,
         ):
-            local_values[i] += (u({i}.union(s)) - u(s)) / math.comb(n - 1, len(s))
+            local_values[i] += (u({i}.union(s)) - u(s)) / math.comb(n - 1, len(s))  # type: ignore
     return local_values / n
 
 

From 27a85d14d4943a01585921d2058740cdb5871ed2 Mon Sep 17 00:00:00 2001
From: Anes Benmerzoug <a.benmerzoug@appliedai.de>
Date: Mon, 11 Dec 2023 09:41:34 +0100
Subject: [PATCH 05/11] Ignore some tqdm related type hints

---
 src/pydvl/value/least_core/naive.py | 4 ++--
 src/pydvl/value/shapley/naive.py    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/pydvl/value/least_core/naive.py b/src/pydvl/value/least_core/naive.py
index 9467292b3..f97021678 100644
--- a/src/pydvl/value/least_core/naive.py
+++ b/src/pydvl/value/least_core/naive.py
@@ -104,10 +104,10 @@ def lc_prepare_problem(u: Utility, progress: bool = False) -> LeastCoreProblem:
 
     logger.debug("Iterating over all subsets")
     utility_values = np.zeros(powerset_size)
-    for i, subset in enumerate(
+    for i, subset in enumerate(  # type: ignore
         tqdm(
             powerset(u.data.indices),
-            display=progress,
+            disable=not progress,
             total=powerset_size - 1,
             position=0,
         )
diff --git a/src/pydvl/value/shapley/naive.py b/src/pydvl/value/shapley/naive.py
index bd69432eb..031925681 100644
--- a/src/pydvl/value/shapley/naive.py
+++ b/src/pydvl/value/shapley/naive.py
@@ -78,7 +78,7 @@ def _combinatorial_exact_shapley(
         subset: NDArray[np.int_] = np.setxor1d(
             u.data.indices, [i], assume_unique=True
         ).astype(np.int_)
-        for s in tqdm(
+        for s in tqdm(  # type: ignore
             powerset(subset),
             disable=not progress,
             desc=f"Index {i}",

From 0dbacaeec0027d53b30d32b08d965cb87462f033 Mon Sep 17 00:00:00 2001
From: Anes Benmerzoug <a.benmerzoug@appliedai.de>
Date: Mon, 11 Dec 2023 09:54:08 +0100
Subject: [PATCH 06/11] Fix circular import

---
 src/pydvl/utils/progress.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/pydvl/utils/progress.py b/src/pydvl/utils/progress.py
index bbcfca682..7bd62aff4 100644
--- a/src/pydvl/utils/progress.py
+++ b/src/pydvl/utils/progress.py
@@ -1,17 +1,22 @@
 from collections.abc import Iterator
 from itertools import cycle, takewhile
-from typing import Collection
+from typing import TYPE_CHECKING, Collection
 
 from tqdm.auto import tqdm
 
-from pydvl.value.result import ValuationResult
 from pydvl.value.stopping import StoppingCriterion
 
+if TYPE_CHECKING:
+    from pydvl.value.result import ValuationResult
+
 __all__ = ["repeat_indices"]
 
 
 def repeat_indices(
-    indices: Collection[int], result: ValuationResult, done: StoppingCriterion, **kwargs
+    indices: Collection[int],
+    result: "ValuationResult",
+    done: StoppingCriterion,
+    **kwargs
 ) -> Iterator[int]:
     """Helper function to cycle indefinitely over a collection of indices
     until the stopping criterion is satisfied while displaying progress.

From 166f06829859a1d78e5a0189a5e8f9837f64a5c0 Mon Sep 17 00:00:00 2001
From: Anes Benmerzoug <a.benmerzoug@appliedai.de>
Date: Wed, 13 Dec 2023 13:13:40 +0100
Subject: [PATCH 07/11] Fix import errors

---
 src/pydvl/utils/progress.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/pydvl/utils/progress.py b/src/pydvl/utils/progress.py
index 7bd62aff4..03207de67 100644
--- a/src/pydvl/utils/progress.py
+++ b/src/pydvl/utils/progress.py
@@ -1,13 +1,11 @@
-from collections.abc import Iterator
 from itertools import cycle, takewhile
-from typing import TYPE_CHECKING, Collection
+from typing import TYPE_CHECKING, Collection, Iterator
 
 from tqdm.auto import tqdm
 
-from pydvl.value.stopping import StoppingCriterion
-
 if TYPE_CHECKING:
     from pydvl.value.result import ValuationResult
+    from pydvl.value.stopping import StoppingCriterion
 
 __all__ = ["repeat_indices"]
 
@@ -15,7 +13,7 @@
 def repeat_indices(
     indices: Collection[int],
     result: "ValuationResult",
-    done: StoppingCriterion,
+    done: "StoppingCriterion",
     **kwargs
 ) -> Iterator[int]:
     """Helper function to cycle indefinitely over a collection of indices

From a5a617b8ff5434ca93853e3a4f468585ab51fd6a Mon Sep 17 00:00:00 2001
From: Anes Benmerzoug <a.benmerzoug@appliedai.de>
Date: Sun, 17 Dec 2023 20:59:54 +0100
Subject: [PATCH 08/11] Fixes

---
 .../torch/influence_function_model.py         |   8 +-
 .../influence/torch/torch_differentiable.py   | 858 ------------------
 2 files changed, 5 insertions(+), 861 deletions(-)
 delete mode 100644 src/pydvl/influence/torch/torch_differentiable.py

diff --git a/src/pydvl/influence/torch/influence_function_model.py b/src/pydvl/influence/torch/influence_function_model.py
index 17fcb50f5..3b7c0a688 100644
--- a/src/pydvl/influence/torch/influence_function_model.py
+++ b/src/pydvl/influence/torch/influence_function_model.py
@@ -13,8 +13,10 @@
 import torch
 from torch import nn as nn
 from torch.utils.data import DataLoader
+from tqdm.auto import tqdm
+
+from pydvl.utils.progress import log_duration
 
-from ...utils import log_duration, maybe_progress
 from ..base_influence_function_model import (
     InfluenceFunctionModel,
     InfluenceMode,
@@ -522,7 +524,7 @@ def reg_hvp(v: torch.Tensor):
         batch_cg = torch.zeros_like(rhs)
 
         for idx, bi in enumerate(
-            maybe_progress(rhs, self.progress, desc="Conjugate gradient")
+            tqdm(rhs, disable=not self.progress, desc="Conjugate gradient")
         ):
             batch_result = self._solve_cg(
                 reg_hvp,
@@ -689,7 +691,7 @@ def lissa_step(
             create_batch_hvp_function(self.model, self.loss),
             in_dims=(None, None, None, 0),
         )
-        for _ in maybe_progress(range(self.maxiter), self.progress, desc="Lissa"):
+        for _ in tqdm(range(self.maxiter), disable=not self.progress, desc="Lissa"):
             x, y = next(iter(shuffled_training_data))
             # grad_xy = model.grad(x, y, create_graph=True)
             reg_hvp = (
diff --git a/src/pydvl/influence/torch/torch_differentiable.py b/src/pydvl/influence/torch/torch_differentiable.py
deleted file mode 100644
index a1d85ebfe..000000000
--- a/src/pydvl/influence/torch/torch_differentiable.py
+++ /dev/null
@@ -1,858 +0,0 @@
-"""
-Contains methods for differentiating  a pyTorch model. Most of the methods focus
-on ways to calculate matrix vector products. Moreover, it contains several
-methods to invert the Hessian vector product. These are used to calculate the
-influence of a training point on the model.
-
-## References
-
-[^1]: <a name="koh_liang_2017"></a>Koh, P.W., Liang, P., 2017.
-    [Understanding Black-box Predictions via Influence Functions](https://proceedings.mlr.press/v70/koh17a.html).
-    In: Proceedings of the 34th International Conference on Machine Learning, pp. 1885–1894. PMLR.
-[^2]: <a name="agarwal_secondorder_2017"></a>Agarwal, N., Bullins, B., Hazan, E., 2017.
-    [Second-Order Stochastic Optimization for Machine Learning in Linear Time](https://www.jmlr.org/papers/v18/16-491.html).
-    In: Journal of Machine Learning Research, Vol. 18, pp. 1–40. JMLR.
-"""
-import logging
-from dataclasses import dataclass
-from functools import partial
-from typing import Callable, Generator, List, Optional, Sequence, Tuple, Union
-
-import torch
-import torch.nn as nn
-from numpy.typing import NDArray
-from scipy.sparse.linalg import ArpackNoConvergence
-from torch import autograd
-from torch.autograd import Variable
-from torch.utils.data import DataLoader
-from tqdm.auto import tqdm
-
-from ..inversion import InversionMethod, InversionRegistry
-from ..twice_differentiable import (
-    InverseHvpResult,
-    TensorUtilities,
-    TwiceDifferentiable,
-)
-from .functional import get_hvp_function
-from .util import align_structure, as_tensor, flatten_tensors_to_vector
-
-__all__ = [
-    "TorchTwiceDifferentiable",
-    "solve_linear",
-    "solve_batch_cg",
-    "solve_lissa",
-    "solve_arnoldi",
-    "lanzcos_low_rank_hessian_approx",
-    "model_hessian_low_rank",
-]
-
-logger = logging.getLogger(__name__)
-
-
-class TorchTwiceDifferentiable(TwiceDifferentiable[torch.Tensor]):
-    r"""
-    Wraps a [torch.nn.Module][torch.nn.Module]
-    and a loss function and provides methods to compute gradients and
-    second derivative of the loss wrt. the model parameters
-
-    Args:
-        model: A (differentiable) function.
-        loss: A differentiable scalar loss \( L(\hat{y}, y) \),
-            mapping a prediction and a target to a real value.
-    """
-
-    def __init__(
-        self,
-        model: nn.Module,
-        loss: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
-    ):
-
-        if model.training:
-            logger.warning(
-                "Passed model not in evaluation mode. This can create several issues in influence "
-                "computation, e.g. due to batch normalization. Please call model.eval() before "
-                "computing influences."
-            )
-        self.loss = loss
-        self.model = model
-        first_param = next(model.parameters())
-        self.device = first_param.device
-        self.dtype = first_param.dtype
-
-    @classmethod
-    def tensor_type(cls):
-        return torch.Tensor
-
-    @property
-    def parameters(self) -> List[torch.Tensor]:
-        """
-        Returns:
-            All model parameters that require differentiating.
-        """
-
-        return [param for param in self.model.parameters() if param.requires_grad]
-
-    @property
-    def num_params(self) -> int:
-        """
-        Get the number of parameters of model f.
-
-        Returns:
-            int: Number of parameters.
-        """
-        return sum([p.numel() for p in self.parameters])
-
-    def grad(
-        self, x: torch.Tensor, y: torch.Tensor, create_graph: bool = False
-    ) -> torch.Tensor:
-        r"""
-        Calculates gradient of model parameters with respect to the model parameters.
-
-        Args:
-            x: A matrix [NxD] representing the features \( x_i \).
-            y: A matrix [NxK] representing the target values \( y_i \).
-            create_graph (bool): If True, the resulting gradient tensor can be used for further differentiation.
-
-        Returns:
-            An array [P] with the gradients of the model.
-        """
-
-        x = x.to(self.device)
-        y = y.to(self.device)
-
-        if create_graph and not x.requires_grad:
-            x = x.requires_grad_(True)
-
-        loss_value = self.loss(torch.squeeze(self.model(x)), torch.squeeze(y))
-        grad_f = torch.autograd.grad(
-            loss_value, self.parameters, create_graph=create_graph
-        )
-        return flatten_tensors_to_vector(grad_f)
-
-    def hessian(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
-        r"""
-        Calculates the explicit hessian of model parameters given data \(x\) and \(y\).
-
-        Args:
-            x: A matrix [NxD] representing the features \(x_i\).
-            y: A matrix [NxK] representing the target values \(y_i\).
-
-        Returns:
-            A tensor representing the hessian of the loss with respect to the model parameters.
-        """
-
-        def model_func(param):
-            outputs = torch.func.functional_call(
-                self.model,
-                align_structure(
-                    {k: p for k, p in self.model.named_parameters() if p.requires_grad},
-                    param,
-                ),
-                (x.to(self.device),),
-            )
-            return self.loss(outputs, y.to(self.device))
-
-        params = flatten_tensors_to_vector(
-            p.detach() for p in self.model.parameters() if p.requires_grad
-        )
-        return torch.func.hessian(model_func)(params)  # type: ignore
-
-    @staticmethod
-    def mvp(
-        grad_xy: torch.Tensor,
-        v: torch.Tensor,
-        backprop_on: Union[torch.Tensor, Sequence[torch.Tensor]],
-        *,
-        progress: bool = False,
-    ) -> torch.Tensor:
-        r"""
-        Calculates the second-order derivative of the model along directions v.
-        This second-order derivative can be selected through the `backprop_on` argument.
-
-        Args:
-            grad_xy: An array [P] holding the gradients of the model parameters with respect to input
-                \(x\) and labels \(y\), where P is the number of parameters of the model.
-                It is typically obtained through `self.grad`.
-            v: An array ([DxP] or even one-dimensional [D]) which multiplies the matrix,
-                where D is the number of directions.
-            progress: If True, progress will be printed.
-            backprop_on: Tensor used in the second backpropagation
-                (the first one is defined via grad_xy).
-
-        Returns:
-            A matrix representing the implicit matrix-vector product of the model along the given directions.
-                The output shape is [DxM], with M being the number of elements of `backprop_on`.
-        """
-
-        device = grad_xy.device
-        v = as_tensor(v, warn=False).to(device)
-        if v.ndim == 1:
-            v = v.unsqueeze(0)
-
-        z = (grad_xy * Variable(v)).sum(dim=1)
-
-        mvp = []
-        for i in tqdm(range(len(z)), disable=not progress, desc="MVP"):
-            mvp.append(
-                flatten_tensors_to_vector(
-                    autograd.grad(z[i], backprop_on, retain_graph=True)
-                )
-            )
-        return torch.stack([grad.contiguous().view(-1) for grad in mvp]).detach()
-
-
-@dataclass
-class LowRankProductRepresentation:
-    r"""
-    Representation of a low rank product of the form \(H = V D V^T\),
-    where D is a diagonal matrix and V is orthogonal.
-
-    Args:
-        eigen_vals: Diagonal of D.
-        projections: The matrix V.
-    """
-
-    eigen_vals: torch.Tensor
-    projections: torch.Tensor
-
-    @property
-    def device(self) -> torch.device:
-        return (
-            self.eigen_vals.device
-            if hasattr(self.eigen_vals, "device")
-            else torch.device("cpu")
-        )
-
-    def to(self, device: torch.device):
-        """
-        Move the representing tensors to a device
-        """
-        return LowRankProductRepresentation(
-            self.eigen_vals.to(device), self.projections.to(device)
-        )
-
-    def __post_init__(self):
-        if self.eigen_vals.device != self.projections.device:
-            raise ValueError("eigen_vals and projections must be on the same device.")
-
-
-def lanzcos_low_rank_hessian_approx(
-    hessian_vp: Callable[[torch.Tensor], torch.Tensor],
-    matrix_shape: Tuple[int, int],
-    hessian_perturbation: float = 0.0,
-    rank_estimate: int = 10,
-    krylov_dimension: Optional[int] = None,
-    tol: float = 1e-6,
-    max_iter: Optional[int] = None,
-    device: Optional[torch.device] = None,
-    eigen_computation_on_gpu: bool = False,
-    torch_dtype: Optional[torch.dtype] = None,
-) -> LowRankProductRepresentation:
-    r"""
-    Calculates a low-rank approximation of the Hessian matrix of a scalar-valued
-    function using the implicitly restarted Lanczos algorithm, i.e.:
-
-    \[ H_{\text{approx}} = V D V^T\]
-
-    where \(D\) is a diagonal matrix with the top (in absolute value) `rank_estimate` eigenvalues of the Hessian
-    and \(V\) contains the corresponding eigenvectors.
-
-    Args:
-        hessian_vp: A function that takes a vector and returns the product of
-            the Hessian of the loss function.
-        matrix_shape: The shape of the matrix, represented by the hessian vector
-            product.
-        hessian_perturbation: Regularization parameter added to the
-            Hessian-vector product for numerical stability.
-        rank_estimate: The number of eigenvalues and corresponding eigenvectors
-            to compute. Represents the desired rank of the Hessian approximation.
-        krylov_dimension: The number of Krylov vectors to use for the Lanczos
-            method. If not provided, it defaults to
-            \( \min(\text{model.num_parameters}, \max(2 \times \text{rank_estimate} + 1, 20)) \).
-        tol: The stopping criteria for the Lanczos algorithm, which stops when
-            the difference in the approximated eigenvalue is less than `tol`.
-            Defaults to 1e-6.
-        max_iter: The maximum number of iterations for the Lanczos method. If
-            not provided, it defaults to \( 10 \cdot \text{model.num_parameters}\).
-        device: The device to use for executing the hessian vector product.
-        eigen_computation_on_gpu: If True, tries to execute the eigen pair
-            approximation on the provided device via [cupy](https://cupy.dev/)
-            implementation. Ensure that either your model is small enough, or you
-            use a small rank_estimate to fit your device's memory. If False, the
-            eigen pair approximation is executed on the CPU with scipy's wrapper to
-            ARPACK.
-        torch_dtype: If not provided, the current torch default dtype is used for
-            conversion to torch.
-
-    Returns:
-        A [LowRankProductRepresentation][pydvl.influence.torch.torch_differentiable.LowRankProductRepresentation]
-            instance that contains the top (up until rank_estimate) eigenvalues
-            and corresponding eigenvectors of the Hessian.
-    """
-
-    torch_dtype = torch.get_default_dtype() if torch_dtype is None else torch_dtype
-
-    if eigen_computation_on_gpu:
-        try:
-            import cupy as cp
-            from cupyx.scipy.sparse.linalg import LinearOperator, eigsh
-            from torch.utils.dlpack import from_dlpack, to_dlpack
-        except ImportError as e:
-            raise ImportError(
-                f"Try to install missing dependencies or set eigen_computation_on_gpu to False: {e}"
-            )
-
-        if device is None:
-            raise ValueError(
-                "Without setting an explicit device, cupy is not supported"
-            )
-
-        def to_torch_conversion_function(x: cp.NDArray) -> torch.Tensor:
-            return from_dlpack(x.toDlpack()).to(torch_dtype)
-
-        def mv(x):
-            x = to_torch_conversion_function(x)
-            y = hessian_vp(x) + hessian_perturbation * x
-            return cp.from_dlpack(to_dlpack(y))
-
-    else:
-        from scipy.sparse.linalg import LinearOperator, eigsh
-
-        def mv(x):
-            x_torch = torch.as_tensor(x, device=device, dtype=torch_dtype)
-            y: NDArray = (
-                (hessian_vp(x_torch) + hessian_perturbation * x_torch)
-                .detach()
-                .cpu()
-                .numpy()
-            )
-            return y
-
-        to_torch_conversion_function = partial(torch.as_tensor, dtype=torch_dtype)
-
-    try:
-        eigen_vals, eigen_vecs = eigsh(
-            LinearOperator(matrix_shape, matvec=mv),
-            k=rank_estimate,
-            maxiter=max_iter,
-            tol=tol,
-            ncv=krylov_dimension,
-            return_eigenvectors=True,
-        )
-
-    except ArpackNoConvergence as e:
-        logger.warning(
-            f"ARPACK did not converge for parameters {max_iter=}, {tol=}, {krylov_dimension=}, "
-            f"{rank_estimate=}. \n Returning the best approximation found so far. Use those with care or "
-            f"modify parameters.\n Original error: {e}"
-        )
-
-        eigen_vals, eigen_vecs = e.eigenvalues, e.eigenvectors
-
-    eigen_vals = to_torch_conversion_function(eigen_vals)
-    eigen_vecs = to_torch_conversion_function(eigen_vecs)
-
-    return LowRankProductRepresentation(eigen_vals, eigen_vecs)
-
-
-def model_hessian_low_rank(
-    model: TorchTwiceDifferentiable,
-    training_data: DataLoader,
-    hessian_perturbation: float = 0.0,
-    rank_estimate: int = 10,
-    krylov_dimension: Optional[int] = None,
-    tol: float = 1e-6,
-    max_iter: Optional[int] = None,
-    eigen_computation_on_gpu: bool = False,
-) -> LowRankProductRepresentation:
-    r"""
-    Calculates a low-rank approximation of the Hessian matrix of the model's loss function using the implicitly
-    restarted Lanczos algorithm, i.e.
-
-    \[ H_{\text{approx}} = V D V^T\]
-
-    where \(D\) is a diagonal matrix with the top (in absolute value) `rank_estimate` eigenvalues of the Hessian
-    and \(V\) contains the corresponding eigenvectors.
-
-
-    Args:
-        model: A PyTorch model instance that is twice differentiable, wrapped into `TorchTwiceDifferential`.
-            The Hessian will be calculated with respect to this model's parameters.
-        training_data: A DataLoader instance that provides the model's training data.
-            Used in calculating the Hessian-vector products.
-        hessian_perturbation: Optional regularization parameter added to the Hessian-vector product
-            for numerical stability.
-        rank_estimate: The number of eigenvalues and corresponding eigenvectors to compute.
-            Represents the desired rank of the Hessian approximation.
-        krylov_dimension: The number of Krylov vectors to use for the Lanczos method.
-            If not provided, it defaults to min(model.num_parameters, max(2*rank_estimate + 1, 20)).
-        tol: The stopping criteria for the Lanczos algorithm, which stops when the difference
-            in the approximated eigenvalue is less than `tol`. Defaults to 1e-6.
-        max_iter: The maximum number of iterations for the Lanczos method. If not provided, it defaults to
-            10*model.num_parameters.
-        eigen_computation_on_gpu: If True, tries to execute the eigen pair approximation on the provided
-            device via cupy implementation.
-            Make sure, that either your model is small enough or you use a
-            small rank_estimate to fit your device's memory.
-            If False, the eigen pair approximation is executed on the CPU by scipy wrapper to
-            ARPACK.
-
-    Returns:
-        A [LowRankProductRepresentation][pydvl.influence.torch.torch_differentiable.LowRankProductRepresentation]
-            instance that contains the top (up until rank_estimate) eigenvalues
-            and corresponding eigenvectors of the Hessian.
-    """
-    raw_hvp = get_hvp_function(
-        model.model, model.loss, training_data, use_hessian_avg=True
-    )
-
-    return lanzcos_low_rank_hessian_approx(
-        hessian_vp=raw_hvp,
-        matrix_shape=(model.num_params, model.num_params),
-        hessian_perturbation=hessian_perturbation,
-        rank_estimate=rank_estimate,
-        krylov_dimension=krylov_dimension,
-        tol=tol,
-        max_iter=max_iter,
-        device=model.device if hasattr(model, "device") else None,
-        eigen_computation_on_gpu=eigen_computation_on_gpu,
-    )
-
-
-class TorchTensorUtilities(TensorUtilities[torch.Tensor, TorchTwiceDifferentiable]):
-    twice_differentiable_type = TorchTwiceDifferentiable
-
-    @staticmethod
-    def einsum(equation: str, *operands) -> torch.Tensor:
-        """Sums the product of the elements of the input :attr:`operands` along dimensions specified using a notation
-        based on the Einstein summation convention.
-        """
-        return torch.einsum(equation, *operands)
-
-    @staticmethod
-    def cat(a: Sequence[torch.Tensor], **kwargs) -> torch.Tensor:
-        """Concatenates a sequence of tensors into a single torch tensor"""
-        return torch.cat(a, **kwargs)  # type: ignore
-
-    @staticmethod
-    def stack(a: Sequence[torch.Tensor], **kwargs) -> torch.Tensor:
-        """Stacks a sequence of tensors into a single torch tensor"""
-        return torch.stack(a, **kwargs)  # type: ignore
-
-    @staticmethod
-    def unsqueeze(x: torch.Tensor, dim: int) -> torch.Tensor:
-        """
-        Add a singleton dimension at a specified position in a tensor.
-
-        Args:
-            x: A PyTorch tensor.
-            dim: The position at which to add the singleton dimension. Zero-based indexing.
-
-        Returns:
-            A new tensor with an additional singleton dimension.
-        """
-
-        return x.unsqueeze(dim)
-
-    @staticmethod
-    def get_element(x: torch.Tensor, idx: int) -> torch.Tensor:
-        return x[idx]
-
-    @staticmethod
-    def slice(x: torch.Tensor, start: int, stop: int, axis: int = 0) -> torch.Tensor:
-        slicer = [slice(None) for _ in x.shape]
-        slicer[axis] = slice(start, stop)
-        return x[tuple(slicer)]
-
-    @staticmethod
-    def shape(x: torch.Tensor) -> Tuple[int, ...]:
-        return x.shape  # type:ignore
-
-    @staticmethod
-    def reshape(x: torch.Tensor, shape: Tuple[int, ...]) -> torch.Tensor:
-        return x.reshape(shape)
-
-    @staticmethod
-    def cat_gen(
-        a: Generator[torch.Tensor, None, None],
-        resulting_shape: Tuple[int, ...],
-        model: TorchTwiceDifferentiable,
-        axis: int = 0,
-    ) -> torch.Tensor:
-        result = torch.empty(resulting_shape, dtype=model.dtype, device=model.device)
-
-        start_idx = 0
-        for x in a:
-            stop_idx = start_idx + x.shape[axis]
-
-            slicer = [slice(None) for _ in resulting_shape]
-            slicer[axis] = slice(start_idx, stop_idx)
-
-            result[tuple(slicer)] = x
-
-            start_idx = stop_idx
-
-        return result
-
-
-@InversionRegistry.register(TorchTwiceDifferentiable, InversionMethod.Direct)
-def solve_linear(
-    model: TorchTwiceDifferentiable,
-    training_data: DataLoader,
-    b: torch.Tensor,
-    hessian_perturbation: float = 0.0,
-) -> InverseHvpResult:
-    r"""
-    Given a model and training data, it finds x such that \(Hx = b\), with \(H\) being the model hessian.
-
-    Args:
-        model: A model wrapped in the TwiceDifferentiable interface.
-        training_data: A DataLoader containing the training data.
-        b: A vector or matrix, the right hand side of the equation \(Hx = b\).
-        hessian_perturbation: Regularization of the hessian.
-
-    Returns:
-        Instance of [InverseHvpResult][pydvl.influence.twice_differentiable.InverseHvpResult],
-            having an array that solves the inverse problem, i.e. it returns \(x\) such that \(Hx = b\),
-            and a dictionary containing information about the solution.
-    """
-
-    all_x, all_y = [], []
-    for x, y in training_data:
-        all_x.append(x)
-        all_y.append(y)
-    hessian = model.hessian(torch.cat(all_x), torch.cat(all_y))
-    matrix = hessian + hessian_perturbation * torch.eye(
-        model.num_params, device=model.device
-    )
-    info = {"hessian": hessian}
-    try:
-        x = torch.linalg.solve(matrix, b.T).T
-    except torch.linalg.LinAlgError as e:
-        raise RuntimeError(
-            f"Direct inversion failed, possibly due to the Hessian being singular. "
-            f"Consider increasing the parameter 'hessian_perturbation' (currently: {hessian_perturbation}). \n{e}"
-        )
-    return InverseHvpResult(x=x, info=info)
-
-
-@InversionRegistry.register(TorchTwiceDifferentiable, InversionMethod.Cg)
-def solve_batch_cg(
-    model: TorchTwiceDifferentiable,
-    training_data: DataLoader,
-    b: torch.Tensor,
-    hessian_perturbation: float = 0.0,
-    *,
-    x0: Optional[torch.Tensor] = None,
-    rtol: float = 1e-7,
-    atol: float = 1e-7,
-    maxiter: Optional[int] = None,
-    progress: bool = False,
-) -> InverseHvpResult:
-    r"""
-    Given a model and training data, it uses conjugate gradient to calculate the
-    inverse of the Hessian Vector Product. More precisely, it finds x such that \(Hx =
-    b\), with \(H\) being the model hessian. For more info, see
-    [Wikipedia](https://en.wikipedia.org/wiki/Conjugate_gradient_method).
-
-    Args:
-        model: A model wrapped in the TwiceDifferentiable interface.
-        training_data: A DataLoader containing the training data.
-        b: A vector or matrix, the right hand side of the equation \(Hx = b\).
-        hessian_perturbation: Regularization of the hessian.
-        x0: Initial guess for hvp. If None, defaults to b.
-        rtol: Maximum relative tolerance of result.
-        atol: Absolute tolerance of result.
-        maxiter: Maximum number of iterations. If None, defaults to 10*len(b).
-        progress: If True, display progress bars.
-
-    Returns:
-        Instance of [InverseHvpResult][pydvl.influence.twice_differentiable.InverseHvpResult],
-            having a matrix of shape [NxP] with each line being a solution of \(Ax=b\),
-            and a dictionary containing information about the convergence of CG,
-            one entry for each line of the matrix.
-    """
-    if len(training_data) == 0:
-        raise ValueError("Training dataloader must not be empty.")
-
-    total_grad_xy = torch.empty(0)
-    total_points = 0
-
-    for x, y in tqdm(training_data, disable=not progress, desc="Batch Train Gradients"):
-        grad_xy = model.grad(x, y, create_graph=True)
-        if total_grad_xy.nelement() == 0:
-            total_grad_xy = torch.zeros_like(grad_xy)
-        total_grad_xy += grad_xy * len(x)
-        total_points += len(x)
-
-    backprop_on = model.parameters
-    reg_hvp = lambda v: model.mvp(
-        total_grad_xy / total_points, v, backprop_on
-    ) + hessian_perturbation * v.type(torch.float64)
-    batch_cg = torch.zeros_like(b)
-    info = {}
-
-    for idx, bi in enumerate(tqdm(b, disable=not progress, desc="Conjugate gradient")):
-        batch_result, batch_info = solve_cg(
-            reg_hvp, bi, x0=x0, rtol=rtol, atol=atol, maxiter=maxiter
-        )
-        batch_cg[idx] = batch_result
-        info[f"batch_{idx}"] = batch_info
-    return InverseHvpResult(x=batch_cg, info=info)
-
-
-def solve_cg(
-    hvp: Callable[[torch.Tensor], torch.Tensor],
-    b: torch.Tensor,
-    *,
-    x0: Optional[torch.Tensor] = None,
-    rtol: float = 1e-7,
-    atol: float = 1e-7,
-    maxiter: Optional[int] = None,
-) -> InverseHvpResult:
-    r"""
-    Conjugate gradient solver for the Hessian vector product.
-
-    Args:
-        hvp: A callable Hvp, operating with tensors of size N.
-        b: A vector or matrix, the right hand side of the equation \(Hx = b\).
-        x0: Initial guess for hvp.
-        rtol: Maximum relative tolerance of result.
-        atol: Absolute tolerance of result.
-        maxiter: Maximum number of iterations. If None, defaults to 10*len(b).
-
-    Returns:
-        Instance of [InverseHvpResult][pydvl.influence.twice_differentiable.InverseHvpResult],
-            with a vector x, solution of \(Ax=b\), and a dictionary containing
-            information about the convergence of CG.
-    """
-
-    if x0 is None:
-        x0 = torch.clone(b)
-    if maxiter is None:
-        maxiter = len(b) * 10
-
-    y_norm = torch.sum(torch.matmul(b, b)).item()
-    stopping_val = max([rtol**2 * y_norm, atol**2])
-
-    x = x0
-    p = r = (b - hvp(x)).squeeze().type(torch.float64)
-    gamma = torch.sum(torch.matmul(r, r)).item()
-    optimal = False
-
-    for k in range(maxiter):
-        if gamma < stopping_val:
-            optimal = True
-            break
-        Ap = hvp(p).squeeze()
-        alpha = gamma / torch.sum(torch.matmul(p, Ap)).item()
-        x += alpha * p
-        r -= alpha * Ap
-        gamma_ = torch.sum(torch.matmul(r, r)).item()
-        beta = gamma_ / gamma
-        gamma = gamma_
-        p = r + beta * p
-
-    info = {"niter": k, "optimal": optimal, "gamma": gamma}
-    return InverseHvpResult(x=x, info=info)
-
-
-@InversionRegistry.register(TorchTwiceDifferentiable, InversionMethod.Lissa)
-def solve_lissa(
-    model: TorchTwiceDifferentiable,
-    training_data: DataLoader,
-    b: torch.Tensor,
-    hessian_perturbation: float = 0.0,
-    *,
-    maxiter: int = 1000,
-    dampen: float = 0.0,
-    scale: float = 10.0,
-    h0: Optional[torch.Tensor] = None,
-    rtol: float = 1e-4,
-    progress: bool = False,
-) -> InverseHvpResult:
-    r"""
-    Uses LISSA, Linear time Stochastic Second-Order Algorithm, to iteratively
-    approximate the inverse Hessian. More precisely, it finds x s.t. \(Hx = b\),
-    with \(H\) being the model's second derivative wrt. the parameters.
-    This is done with the update
-
-    \[H^{-1}_{j+1} b = b + (I - d) \ H - \frac{H^{-1}_j b}{s},\]
-
-    where \(I\) is the identity matrix, \(d\) is a dampening term and \(s\) a scaling
-    factor that are applied to help convergence. For details, see
-    (Koh and Liang, 2017)<sup><a href="#koh_liang_2017">1</a></sup> and the original paper
-    (Agarwal et. al.)<sup><a href="#agarwal_secondorder_2017">2</a></sup>.
-
-    Args:
-        model: A model wrapped in the TwiceDifferentiable interface.
-        training_data: A DataLoader containing the training data.
-        b: A vector or matrix, the right hand side of the equation \(Hx = b\).
-        hessian_perturbation: Regularization of the hessian.
-        maxiter: Maximum number of iterations.
-        dampen: Dampening factor, defaults to 0 for no dampening.
-        scale: Scaling factor, defaults to 10.
-        h0: Initial guess for hvp.
-        rtol: tolerance to use for early stopping
-        progress: If True, display progress bars.
-
-    Returns:
-        Instance of [InverseHvpResult][pydvl.influence.twice_differentiable.InverseHvpResult], with a matrix of shape [NxP] with each line being a solution of \(Ax=b\),
-            and a dictionary containing information about the accuracy of the solution.
-    """
-
-    if h0 is None:
-        h_estimate = torch.clone(b)
-    else:
-        h_estimate = h0
-    shuffled_training_data = DataLoader(
-        training_data.dataset, training_data.batch_size, shuffle=True
-    )
-
-    def lissa_step(
-        h: torch.Tensor, reg_hvp: Callable[[torch.Tensor], torch.Tensor]
-    ) -> torch.Tensor:
-        """Given an estimate of the hessian inverse and the regularised hessian
-        vector product, it computes the next estimate.
-
-        Args:
-            h: An estimate of the hessian inverse.
-            reg_hvp: Regularised hessian vector product.
-
-        Returns:
-            The next estimate of the hessian inverse.
-        """
-        return b + (1 - dampen) * h - reg_hvp(h) / scale
-
-    for _ in tqdm(range(maxiter), disable=not progress, desc="Lissa"):
-        x, y = next(iter(shuffled_training_data))
-        grad_xy = model.grad(x, y, create_graph=True)
-        reg_hvp = (
-            lambda v: model.mvp(grad_xy, v, model.parameters) + hessian_perturbation * v
-        )
-        residual = lissa_step(h_estimate, reg_hvp) - h_estimate
-        h_estimate += residual
-        if torch.isnan(h_estimate).any():
-            raise RuntimeError("NaNs in h_estimate. Increase scale or dampening.")
-        max_residual = torch.max(torch.abs(residual / h_estimate))
-        if max_residual < rtol:
-            break
-    mean_residual = torch.mean(torch.abs(residual / h_estimate))
-    logger.info(
-        f"Terminated Lissa with {max_residual*100:.2f} % max residual."
-        f" Mean residual: {mean_residual*100:.5f} %"
-    )
-    info = {
-        "max_perc_residual": max_residual * 100,
-        "mean_perc_residual": mean_residual * 100,
-    }
-    return InverseHvpResult(x=h_estimate / scale, info=info)
-
-
-@InversionRegistry.register(TorchTwiceDifferentiable, InversionMethod.Arnoldi)
-def solve_arnoldi(
-    model: TorchTwiceDifferentiable,
-    training_data: DataLoader,
-    b: torch.Tensor,
-    hessian_perturbation: float = 0.0,
-    *,
-    rank_estimate: int = 10,
-    krylov_dimension: Optional[int] = None,
-    low_rank_representation: Optional[LowRankProductRepresentation] = None,
-    tol: float = 1e-6,
-    max_iter: Optional[int] = None,
-    eigen_computation_on_gpu: bool = False,
-) -> InverseHvpResult:
-    r"""
-    Solves the linear system Hx = b, where H is the Hessian of the model's loss function and b is the given
-    right-hand side vector.
-    It employs the [implicitly restarted Arnoldi method](https://en.wikipedia.org/wiki/Arnoldi_iteration) for
-    computing a partial eigen decomposition, which is used fo the inversion i.e.
-
-    \[x = V D^{-1} V^T b\]
-
-    where \(D\) is a diagonal matrix with the top (in absolute value) `rank_estimate` eigenvalues of the Hessian
-    and \(V\) contains the corresponding eigenvectors.
-
-    Args:
-        model: A PyTorch model instance that is twice differentiable, wrapped into
-            [TorchTwiceDifferential][pydvl.influence.torch.torch_differentiable.TorchTwiceDifferentiable].
-            The Hessian will be calculated with respect to this model's parameters.
-        training_data: A DataLoader instance that provides the model's training data.
-            Used in calculating the Hessian-vector products.
-        b: The right-hand side vector in the system Hx = b.
-        hessian_perturbation: Optional regularization parameter added to the Hessian-vector
-            product for numerical stability.
-        rank_estimate: The number of eigenvalues and corresponding eigenvectors to compute.
-            Represents the desired rank of the Hessian approximation.
-        krylov_dimension: The number of Krylov vectors to use for the Lanczos method.
-            Defaults to min(model's number of parameters, max(2 times rank_estimate + 1, 20)).
-        low_rank_representation: An instance of
-            [LowRankProductRepresentation][pydvl.influence.torch.torch_differentiable.LowRankProductRepresentation]
-            containing a previously computed low-rank representation of the Hessian. If provided, all other parameters
-            are ignored; otherwise, a new low-rank representation is computed
-            using provided parameters.
-        tol: The stopping criteria for the Lanczos algorithm.
-            Ignored if `low_rank_representation` is provided.
-        max_iter: The maximum number of iterations for the Lanczos method.
-            Ignored if `low_rank_representation` is provided.
-        eigen_computation_on_gpu: If True, tries to execute the eigen pair approximation on the model's device
-            via a cupy implementation. Ensure the model size or rank_estimate is appropriate for device memory.
-            If False, the eigen pair approximation is executed on the CPU by the scipy wrapper to ARPACK.
-
-    Returns:
-        Instance of [InverseHvpResult][pydvl.influence.torch.torch_differentiable.InverseHvpResult],
-            having the solution vector x that satisfies the system \(Ax = b\),
-            where \(A\) is a low-rank approximation of the Hessian \(H\) of the model's loss function, and an instance
-            of [LowRankProductRepresentation][pydvl.influence.torch.torch_differentiable.LowRankProductRepresentation],
-            which represents the approximation of H.
-    """
-
-    b_device = b.device if hasattr(b, "device") else torch.device("cpu")
-
-    if low_rank_representation is None:
-        if b_device.type == "cuda" and not eigen_computation_on_gpu:
-            raise ValueError(
-                "Using 'eigen_computation_on_gpu=False' while 'b' is on a 'cuda' device is not supported. "
-                "To address this, consider the following options:\n"
-                " - Set eigen_computation_on_gpu=True if your model and data are small enough "
-                "and if 'cupy' is available in your environment.\n"
-                " - Move 'b' to the CPU with b.to('cpu').\n"
-                " - Precompute a low rank representation and move it to the 'b' device using:\n"
-                "     low_rank_representation = model_hessian_low_rank(model, training_data, ..., "
-                "eigen_computation_on_gpu=False).to(b.device)"
-            )
-
-        low_rank_representation = model_hessian_low_rank(
-            model,
-            training_data,
-            hessian_perturbation=hessian_perturbation,
-            rank_estimate=rank_estimate,
-            krylov_dimension=krylov_dimension,
-            tol=tol,
-            max_iter=max_iter,
-            eigen_computation_on_gpu=eigen_computation_on_gpu,
-        )
-    else:
-        if b_device.type != low_rank_representation.device.type:
-            raise RuntimeError(
-                f"The devices for 'b' and 'low_rank_representation' do not match.\n"
-                f" - 'b' is on device: {b_device}\n"
-                f" - 'low_rank_representation' is on device: {low_rank_representation.device}\n"
-                f"\nTo resolve this, consider moving 'low_rank_representation' to '{b_device}' by using:\n"
-                f"low_rank_representation = low_rank_representation.to(b.device)"
-            )
-
-        logger.info("Using provided low rank representation, ignoring other parameters")
-
-    result = low_rank_representation.projections @ (
-        torch.diag_embed(1.0 / low_rank_representation.eigen_vals)
-        @ (low_rank_representation.projections.t() @ b.t())
-    )
-    return InverseHvpResult(
-        x=result.t(),
-        info={
-            "eigenvalues": low_rank_representation.eigen_vals,
-            "eigenvectors": low_rank_representation.projections,
-        },
-    )

From 0d41145b3cf7ae32745f6ee5c5573441ace2a348 Mon Sep 17 00:00:00 2001
From: Anes Benmerzoug <a.benmerzoug@appliedai.de>
Date: Sun, 17 Dec 2023 21:06:52 +0100
Subject: [PATCH 09/11] Delete module added by mistake

---
 src/pydvl/influence/general.py | 327 ---------------------------------
 1 file changed, 327 deletions(-)
 delete mode 100644 src/pydvl/influence/general.py

diff --git a/src/pydvl/influence/general.py b/src/pydvl/influence/general.py
deleted file mode 100644
index bf1ee6f94..000000000
--- a/src/pydvl/influence/general.py
+++ /dev/null
@@ -1,327 +0,0 @@
-"""
-This module contains influence calculation functions for general
-models, as introduced in (Koh and Liang, 2017)[^1].
-
-## References
-
-[^1]: <a name="koh_liang_2017"></a>Koh, P.W., Liang, P., 2017.
-    [Understanding Black-box Predictions via Influence Functions](https://proceedings.mlr.press/v70/koh17a.html).
-    In: Proceedings of the 34th International Conference on Machine Learning, pp. 1885–1894. PMLR.
-"""
-import logging
-from copy import deepcopy
-from enum import Enum
-from typing import Any, Callable, Dict, Generator, Optional, Type
-
-from tqdm.auto import tqdm
-
-from .inversion import InversionMethod, solve_hvp
-from .twice_differentiable import (
-    DataLoaderType,
-    InverseHvpResult,
-    TensorType,
-    TensorUtilities,
-    TwiceDifferentiable,
-)
-
-__all__ = ["compute_influences", "InfluenceType", "compute_influence_factors"]
-
-logger = logging.getLogger(__name__)
-
-
-class InfluenceType(str, Enum):
-    r"""
-    Enum representation for the types of influence.
-
-    Attributes:
-        Up: Up-weighting a training point, see section 2.1 of
-            (Koh and Liang, 2017)<sup><a href="#koh_liang_2017">1</a></sup>
-        Perturbation: Perturb a training point, see section 2.2 of
-            (Koh and Liang, 2017)<sup><a href="#koh_liang_2017">1</a></sup>
-
-    """
-
-    Up = "up"
-    Perturbation = "perturbation"
-
-
-def compute_influence_factors(
-    model: TwiceDifferentiable,
-    training_data: DataLoaderType,
-    test_data: DataLoaderType,
-    inversion_method: InversionMethod,
-    *,
-    hessian_perturbation: float = 0.0,
-    progress: bool = False,
-    **kwargs: Any,
-) -> InverseHvpResult:
-    r"""
-    Calculates influence factors of a model for training and test data.
-
-    Given a test point \(z_{test} = (x_{test}, y_{test})\), a loss \(L(z_{test}, \theta)\)
-    (\(\theta\) being the parameters of the model) and the Hessian of the model \(H_{\theta}\),
-    influence factors are defined as:
-
-    \[
-    s_{test} = H_{\theta}^{-1} \operatorname{grad}_{\theta} L(z_{test}, \theta).
-    \]
-
-    They are used for efficient influence calculation. This method first (implicitly) calculates
-    the Hessian and then (explicitly) finds the influence factors for the model using the given
-    inversion method. The parameter `hessian_perturbation` is used to regularize the inversion of
-    the Hessian. For more info, refer to (Koh and Liang, 2017)<sup><a href="#koh_liang_2017">1</a></sup>, paragraph 3.
-
-    Args:
-        model: A model wrapped in the TwiceDifferentiable interface.
-        training_data: DataLoader containing the training data.
-        test_data: DataLoader containing the test data.
-        inversion_method: Name of method for computing inverse hessian vector products.
-        hessian_perturbation: Regularization of the hessian.
-        progress: If True, display progress bars.
-
-    Returns:
-        array: An array of size (N, D) containing the influence factors for each dimension (D) and test sample (N).
-
-    """
-
-    tensor_util: Type[TensorUtilities] = TensorUtilities.from_twice_differentiable(
-        model
-    )
-
-    stack = tensor_util.stack
-    unsqueeze = tensor_util.unsqueeze
-    cat_gen = tensor_util.cat_gen
-    cat = tensor_util.cat
-
-    def test_grads() -> Generator[TensorType, None, None]:
-        for x_test, y_test in tqdm(
-            test_data, disable=not progress, desc="Batch Test Gradients"
-        ):
-            yield stack(
-                [
-                    model.grad(inpt, target)
-                    for inpt, target in zip(unsqueeze(x_test, 1), y_test)
-                ]
-            )  # type:ignore
-
-    try:
-        # in case input_data is a torch DataLoader created from a Dataset,
-        # we can pre-allocate the result tensor to reduce memory consumption
-        resulting_shape = (len(test_data.dataset), model.num_params)  # type:ignore
-        rhs = cat_gen(
-            test_grads(), resulting_shape, model  # type:ignore
-        )  # type:ignore
-    except Exception as e:
-        logger.warning(
-            f"Failed to pre-allocate result tensor: {e}\n"
-            f"Evaluate all resulting tensor and concatenate"
-        )
-        rhs = cat(list(test_grads()))
-
-    return solve_hvp(
-        inversion_method,
-        model,
-        training_data,
-        rhs,
-        hessian_perturbation=hessian_perturbation,
-        **kwargs,
-    )
-
-
-def compute_influences_up(
-    model: TwiceDifferentiable,
-    input_data: DataLoaderType,
-    influence_factors: TensorType,
-    *,
-    progress: bool = False,
-) -> TensorType:
-    r"""
-    Given the model, the training points, and the influence factors, this function calculates the
-    influences using the up-weighting method.
-
-    The procedure involves two main steps:
-    1. Calculating the gradients of the model with respect to each training sample
-       (\(\operatorname{grad}_{\theta} L\), where \(L\) is the loss of a single point and \(\theta\) are the
-       parameters of the model).
-    2. Multiplying each gradient with the influence factors.
-
-    For a detailed description of the methodology, see section 2.1 of (Koh and Liang, 2017)<sup><a href="#koh_liang_2017">1</a></sup>.
-
-    Args:
-        model: A model that implements the TwiceDifferentiable interface.
-        input_data: DataLoader containing the samples for which the influence will be calculated.
-        influence_factors: Array containing pre-computed influence factors.
-        progress: If set to True, progress bars will be displayed during computation.
-
-    Returns:
-        An array of shape [NxM], where N is the number of influence factors, and M is the number of input samples.
-    """
-
-    tensor_util: Type[TensorUtilities] = TensorUtilities.from_twice_differentiable(
-        model
-    )
-
-    stack = tensor_util.stack
-    unsqueeze = tensor_util.unsqueeze
-    cat_gen = tensor_util.cat_gen
-    cat = tensor_util.cat
-    einsum = tensor_util.einsum
-
-    def train_grads() -> Generator[TensorType, None, None]:
-        for x, y in tqdm(
-            input_data, disable=not progress, desc="Batch Split Input Gradients"
-        ):
-            yield stack(
-                [model.grad(inpt, target) for inpt, target in zip(unsqueeze(x, 1), y)]
-            )  # type:ignore
-
-    try:
-        # in case input_data is a torch DataLoader created from a Dataset,
-        # we can pre-allocate the result tensor to reduce memory consumption
-        resulting_shape = (len(input_data.dataset), model.num_params)  # type:ignore
-        train_grad_tensor = cat_gen(
-            train_grads(), resulting_shape, model  # type:ignore
-        )  # type:ignore
-    except Exception as e:
-        logger.warning(
-            f"Failed to pre-allocate result tensor: {e}\n"
-            f"Evaluate all resulting tensor and concatenate"
-        )
-        train_grad_tensor = cat([x for x in train_grads()])  # type:ignore
-
-    return einsum("ta,va->tv", influence_factors, train_grad_tensor)  # type:ignore
-
-
-def compute_influences_pert(
-    model: TwiceDifferentiable,
-    input_data: DataLoaderType,
-    influence_factors: TensorType,
-    *,
-    progress: bool = False,
-) -> TensorType:
-    r"""
-    Calculates the influence values based on the influence factors and training samples using the perturbation method.
-
-    The process involves two main steps:
-    1. Calculating the gradient of the model with respect to each training sample
-       (\(\operatorname{grad}_{\theta} L\), where \(L\) is the loss of the model for a single data point and \(\theta\)
-       are the parameters of the model).
-    2. Using the method [TwiceDifferentiable.mvp][pydvl.influence.twice_differentiable.TwiceDifferentiable.mvp]
-       to efficiently compute the product of the
-       influence factors and \(\operatorname{grad}_x \operatorname{grad}_{\theta} L\).
-
-    For a detailed methodology, see section 2.2 of (Koh and Liang, 2017)<sup><a href="#koh_liang_2017">1</a></sup>.
-
-    Args:
-        model: A model that implements the TwiceDifferentiable interface.
-        input_data: DataLoader containing the samples for which the influence will be calculated.
-        influence_factors: Array containing pre-computed influence factors.
-        progress: If set to True, progress bars will be displayed during computation.
-
-    Returns:
-        A 3D array with shape [NxMxP], where N is the number of influence factors,
-            M is the number of input samples, and P is the number of features.
-    """
-
-    tensor_util: Type[TensorUtilities] = TensorUtilities.from_twice_differentiable(
-        model
-    )
-    stack = tensor_util.stack
-    tu_slice = tensor_util.slice
-    reshape = tensor_util.reshape
-    get_element = tensor_util.get_element
-    shape = tensor_util.shape
-
-    all_pert_influences = []
-    for x, y in tqdm(
-        input_data,
-        disable=not progress,
-        desc="Batch Influence Perturbation",
-    ):
-        for i in range(len(x)):
-            tensor_x = tu_slice(x, i, i + 1)
-            grad_xy = model.grad(tensor_x, get_element(y, i), create_graph=True)
-            perturbation_influences = model.mvp(
-                grad_xy,
-                influence_factors,
-                backprop_on=tensor_x,
-            )
-            all_pert_influences.append(
-                reshape(perturbation_influences, (-1, *shape(get_element(x, i))))
-            )
-
-    return stack(all_pert_influences, axis=1)  # type:ignore
-
-
-influence_type_registry: Dict[InfluenceType, Callable[..., TensorType]] = {
-    InfluenceType.Up: compute_influences_up,
-    InfluenceType.Perturbation: compute_influences_pert,
-}
-
-
-def compute_influences(
-    differentiable_model: TwiceDifferentiable,
-    training_data: DataLoaderType,
-    *,
-    test_data: Optional[DataLoaderType] = None,
-    input_data: Optional[DataLoaderType] = None,
-    inversion_method: InversionMethod = InversionMethod.Direct,
-    influence_type: InfluenceType = InfluenceType.Up,
-    hessian_regularization: float = 0.0,
-    progress: bool = False,
-    **kwargs: Any,
-) -> TensorType:  # type: ignore # ToDO fix typing
-    r"""
-    Calculates the influence of each input data point on the specified test points.
-
-    This method operates in two primary stages:
-    1. Computes the influence factors for all test points concerning the model and its training data.
-    2. Uses these factors to derive the influences over the complete set of input data.
-
-    The influence calculation relies on the twice-differentiable nature of the provided model.
-
-    Args:
-        differentiable_model: A model bundled with its corresponding loss in the `TwiceDifferentiable` wrapper.
-        training_data: DataLoader instance supplying the training data. This data is pivotal in computing the
-                       Hessian matrix for the model's loss.
-        test_data: DataLoader instance with the test samples. Defaults to `training_data` if None.
-        input_data: DataLoader instance holding samples whose influences need to be computed. Defaults to
-                    `training_data` if None.
-        inversion_method: An enumeration value determining the approach for inverting matrices
-            or computing inverse operations, see [.inversion.InversionMethod]
-        progress: A boolean indicating whether progress bars should be displayed during computation.
-        influence_type: Determines the methodology for computing influences.
-            Valid choices include 'up' (for up-weighting) and 'perturbation'.
-            For an in-depth understanding, see (Koh and Liang, 2017)<sup><a href="#koh_liang_2017">1</a></sup>.
-        hessian_regularization: A lambda value used in Hessian regularization. The regularized Hessian, \( H_{reg} \),
-            is computed as \( H + \lambda \times I \), where \( I \) is the identity matrix and \( H \)
-            is the simple, unmodified Hessian. This regularization is typically utilized for more
-            sophisticated models to ensure that the Hessian remains positive definite.
-
-    Returns:
-        The shape of this array varies based on the `influence_type`. If 'up', the shape is [NxM], where
-            N denotes the number of test points and M denotes the number of training points. Conversely, if the
-            influence_type is 'perturbation', the shape is [NxMxP], with P representing the number of input features.
-    """
-
-    if input_data is None:
-        input_data = deepcopy(training_data)
-    if test_data is None:
-        test_data = deepcopy(training_data)
-
-    influence_factors, _ = compute_influence_factors(
-        differentiable_model,
-        training_data,
-        test_data,
-        inversion_method,
-        hessian_perturbation=hessian_regularization,
-        progress=progress,
-        **kwargs,
-    )
-
-    return influence_type_registry[influence_type](
-        differentiable_model,
-        input_data,
-        influence_factors,
-        progress=progress,
-    )

From 8576ac87c7bfdb44cced8508ebfcf697bbf1dcda Mon Sep 17 00:00:00 2001
From: Anes Benmerzoug <a.benmerzoug@appliedai.de>
Date: Sun, 17 Dec 2023 21:08:19 +0100
Subject: [PATCH 10/11] Update changelog

---
 CHANGELOG.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 136136bba..967555de2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
 ## Unreleased
 
 ### Added
+
 - New influence function interface `InfluenceFunctionModel`
 - Data parallel computation with `DaskInfluenceCalculator`
   [PR #26](https://github.com/aai-institute/pyDVL/issues/26)
@@ -14,11 +15,12 @@
 
 ### Changed
 
+- Simplify display of computation progress
+  [PR #466](https://github.com/aai-institute/pyDVL/pull/466)
 - Improve readme and explain better the examples
   [PR #465](https://github.com/aai-institute/pyDVL/pull/465)
 - Simplify and improve tests, add CodeCov code coverage
   [PR #429](https://github.com/aai-institute/pyDVL/pull/429)
-- 
 - **Breaking Changes**
   - Removed `compute_influences` and all related code.
     Replaced by new `InfluenceFunctionModel` interface. Removed modules:

From 504a17294e4e588554ab01d403c1d1da7c6988da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristof=20Schr=C3=B6der?= <kristof_schroeder@web.de>
Date: Mon, 18 Dec 2023 11:58:01 +0100
Subject: [PATCH 11/11] Add comment about avoiding circular import, use
 progress=True in test

---
 src/pydvl/utils/progress.py            | 1 +
 tests/value/shapley/test_montecarlo.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/pydvl/utils/progress.py b/src/pydvl/utils/progress.py
index 9be62fac2..8a49f08aa 100644
--- a/src/pydvl/utils/progress.py
+++ b/src/pydvl/utils/progress.py
@@ -6,6 +6,7 @@
 
 from tqdm.auto import tqdm
 
+# This is needed to avoid circular import errors
 if TYPE_CHECKING:
     from pydvl.value.result import ValuationResult
     from pydvl.value.stopping import StoppingCriterion
diff --git a/tests/value/shapley/test_montecarlo.py b/tests/value/shapley/test_montecarlo.py
index b2b558461..d95cdce9e 100644
--- a/tests/value/shapley/test_montecarlo.py
+++ b/tests/value/shapley/test_montecarlo.py
@@ -65,8 +65,8 @@ def test_analytic_montecarlo_shapley(
         mode=fun,
         n_jobs=n_jobs,
         config=parallel_config,
-        progress=False,
         seed=seed,
+        progress=True,
         **kwargs
     )