aai-institute · AnesBenmerzoug · Dec 18, 2023 · Dec 10, 2023 · Dec 10, 2023 · Dec 10, 2023
diff --git a/notebooks/support/torch.py b/notebooks/support/torch.py
@@ -12,9 +12,9 @@
 from torch.optim.lr_scheduler import _LRScheduler
 from torch.utils.data import DataLoader
 from torchvision.models import ResNet18_Weights, resnet18
+from tqdm.auto import tqdm
 
 from pydvl.influence.torch import as_tensor
-from pydvl.utils import maybe_progress
 
 from .types import Losses
 
@@ -124,7 +124,7 @@ def fit_torch_model(
     train_loss = []
     val_loss = []
 
-    for epoch in maybe_progress(range(num_epochs), progress, desc="Model fitting"):
+    for epoch in tqdm(range(num_epochs), disable=not progress, desc="Model fitting"):
         batch_loss = []
         for train_batch in training_data:
             batch_x, batch_y = train_batch

diff --git a/src/pydvl/influence/general.py b/src/pydvl/influence/general.py
@@ -13,7 +13,8 @@
 from enum import Enum
 from typing import Any, Callable, Dict, Generator, Optional, Type
 
-from ..utils import maybe_progress
+from tqdm.auto import tqdm
+
 from .inversion import InversionMethod, solve_hvp
 from .twice_differentiable import (
     DataLoaderType,
@@ -93,8 +94,8 @@ def compute_influence_factors(
     cat = tensor_util.cat
 
     def test_grads() -> Generator[TensorType, None, None]:
-        for x_test, y_test in maybe_progress(
-            test_data, progress, desc="Batch Test Gradients"
+        for x_test, y_test in tqdm(
+            test_data, disable=not progress, desc="Batch Test Gradients"
         ):
             yield stack(
                 [
@@ -167,8 +168,8 @@ def compute_influences_up(
     einsum = tensor_util.einsum
 
     def train_grads() -> Generator[TensorType, None, None]:
-        for x, y in maybe_progress(
-            input_data, progress, desc="Batch Split Input Gradients"
+        for x, y in tqdm(
+            input_data, disable=not progress, desc="Batch Split Input Gradients"
         ):
             yield stack(
                 [model.grad(inpt, target) for inpt, target in zip(unsqueeze(x, 1), y)]
@@ -232,9 +233,9 @@ def compute_influences_pert(
     shape = tensor_util.shape
 
     all_pert_influences = []
-    for x, y in maybe_progress(
+    for x, y in tqdm(
         input_data,
-        progress,
+        disable=not progress,
         desc="Batch Influence Perturbation",
     ):
         for i in range(len(x)):

diff --git a/src/pydvl/influence/torch/torch_differentiable.py b/src/pydvl/influence/torch/torch_differentiable.py
@@ -25,8 +25,8 @@
 from torch import autograd
 from torch.autograd import Variable
 from torch.utils.data import DataLoader
+from tqdm.auto import tqdm
 
-from ...utils import maybe_progress
 from ..inversion import InversionMethod, InversionRegistry
 from ..twice_differentiable import (
     InverseHvpResult,
@@ -192,7 +192,7 @@ def mvp(
         z = (grad_xy * Variable(v)).sum(dim=1)
 
         mvp = []
-        for i in maybe_progress(range(len(z)), progress, desc="MVP"):
+        for i in tqdm(range(len(z)), disable=not progress, desc="MVP"):
             mvp.append(
                 flatten_tensors_to_vector(
                     autograd.grad(z[i], backprop_on, retain_graph=True)
@@ -578,7 +578,7 @@ def solve_batch_cg(
     total_grad_xy = torch.empty(0)
     total_points = 0
 
-    for x, y in maybe_progress(training_data, progress, desc="Batch Train Gradients"):
+    for x, y in tqdm(training_data, disable=not progress, desc="Batch Train Gradients"):
         grad_xy = model.grad(x, y, create_graph=True)
         if total_grad_xy.nelement() == 0:
             total_grad_xy = torch.zeros_like(grad_xy)
@@ -592,7 +592,7 @@ def solve_batch_cg(
     batch_cg = torch.zeros_like(b)
     info = {}
 
-    for idx, bi in enumerate(maybe_progress(b, progress, desc="Conjugate gradient")):
+    for idx, bi in enumerate(tqdm(b, disable=not progress, desc="Conjugate gradient")):
         batch_result, batch_info = solve_cg(
             reg_hvp, bi, x0=x0, rtol=rtol, atol=atol, maxiter=maxiter
         )
@@ -724,7 +724,7 @@ def lissa_step(
         """
         return b + (1 - dampen) * h - reg_hvp(h) / scale
 
-    for _ in maybe_progress(range(maxiter), progress, desc="Lissa"):
+    for _ in tqdm(range(maxiter), disable=not progress, desc="Lissa"):
         x, y = next(iter(shuffled_training_data))
         grad_xy = model.grad(x, y, create_graph=True)
         reg_hvp = (

diff --git a/src/pydvl/reporting/scores.py b/src/pydvl/reporting/scores.py
@@ -2,8 +2,9 @@
 
 import numpy as np
 from numpy.typing import NDArray
+from tqdm.auto import tqdm
 
-from pydvl.utils import Utility, maybe_progress
+from pydvl.utils import Utility
 from pydvl.value.result import ValuationResult
 
 __all__ = ["compute_removal_score"]
@@ -44,7 +45,7 @@ def compute_removal_score(
     # We sort in descending order if we want to remove the best values
     values.sort(reverse=remove_best)
 
-    for pct in maybe_progress(percentages, display=progress, desc="Removal Scores"):
+    for pct in tqdm(percentages, disable=not progress, desc="Removal Scores"):
         n_removal = int(pct * len(u.data))
         indices = values.indices[n_removal:]
         score = u(indices)

diff --git a/src/pydvl/utils/progress.py b/src/pydvl/utils/progress.py
@@ -1,74 +1,33 @@
-"""
-!!! Warning
-    This module is deprecated and will be removed in a future release.
-    It implements a wrapper for the [tqdm](https://tqdm.github.io/) progress bar
-    iterator for easy toggling, but this functionality is already provided by
-    the `disable` argument of `tqdm`.
-"""
-import collections.abc
-from typing import Iterable, Iterator, Union
+from itertools import cycle, takewhile
+from typing import TYPE_CHECKING, Collection, Iterator
 
 from tqdm.auto import tqdm
 
-__all__ = ["maybe_progress"]
+if TYPE_CHECKING:
+    from pydvl.value.result import ValuationResult
+    from pydvl.value.stopping import StoppingCriterion
 
-
-class MockProgress(collections.abc.Iterator):
-    """A Naive mock class to use with maybe_progress and tqdm.
-    Mocked methods don't support return values.
-    Mocked properties don't do anything
-    """
-
-    class MiniMock:
-        def __call__(self, *args, **kwargs):
-            pass
-
-        def __add__(self, other):
-            pass
-
-        def __sub__(self, other):
-            pass
-
-        def __mul__(self, other):
-            pass
-
-        def __floordiv__(self, other):
-            pass
-
-        def __truediv__(self, other):
-            pass
-
-    def __init__(self, iterator: Union[Iterator, Iterable]):
-        # Since there is no _it in __dict__ at this point, doing here
-        # self._it = iterator
-        # results in a call to __getattr__() and the assignment fails, so we
-        # use __dict__ instead
-        self.__dict__["_it"] = iterator
-
-    def __iter__(self):
-        return iter(self._it)
-
-    def __next__(self):
-        return next(self._it)
-
-    def __getattr__(self, key):
-        return self.MiniMock()
-
-    def __setattr__(self, key, value):
-        pass
+__all__ = ["repeat_indices"]
 
 
-def maybe_progress(
-    it: Union[int, Iterable, Iterator], display: bool = False, **kwargs
-) -> Union[tqdm, MockProgress]:
-    """Returns either a tqdm progress bar or a mock object which wraps the
-    iterator as well, but ignores any accesses to methods or properties.
+def repeat_indices(
+    indices: Collection[int],
+    result: "ValuationResult",
+    done: "StoppingCriterion",
+    **kwargs
+) -> Iterator[int]:
+    """Helper function to cycle indefinitely over a collection of indices
+    until the stopping criterion is satisfied while displaying progress.
 
     Args:
-        it: the iterator to wrap
-        display: set to True to return a tqdm bar
-        kwargs: Keyword arguments that will be forwarded to tqdm
+        indices: Collection of indices that will be cycled until done.
+        result: Object containing the current results.
+        done: Stopping criterion.
+        kwargs: Keyword arguments passed to tqdm.
     """
-    if isinstance(it, int):
-        it = range(it)  # type: ignore
-    return tqdm(it, **kwargs) if display else MockProgress(it)
+    with tqdm(total=100, unit="%", **kwargs) as pbar:
+        it = takewhile(lambda _: not done(result), cycle(indices))
+        for i in it:
+            yield i
+            pbar.update(100 * done.completion() - pbar.n)
+            pbar.refresh()
diff --git a/src/pydvl/value/least_core/__init__.py b/src/pydvl/value/least_core/__init__.py
@@ -96,7 +96,7 @@ def compute_least_core_values(
             solver_options.update(kwargs)
 
     if mode == LeastCoreMode.MonteCarlo:
-        # TODO fix progress showing and maybe_progress in remote case
+        # TODO fix progress showing in remote case
         progress = False
         if n_iterations is None:
             raise ValueError("n_iterations cannot be None for Monte Carlo Least Core")

diff --git a/src/pydvl/value/least_core/montecarlo.py b/src/pydvl/value/least_core/montecarlo.py
@@ -4,10 +4,10 @@
 
 import numpy as np
 from numpy.typing import NDArray
+from tqdm.auto import tqdm
 
 from pydvl.parallel import MapReduceJob, ParallelConfig, effective_n_jobs
 from pydvl.utils.numeric import random_powerset
-from pydvl.utils.progress import maybe_progress
 from pydvl.utils.types import Seed
 from pydvl.utils.utility import Utility
 from pydvl.value.least_core.common import LeastCoreProblem, lc_solve_problem
@@ -175,7 +175,7 @@ def _montecarlo_least_core(
     A_lb = np.zeros((n_iterations, n))
 
     for i, subset in enumerate(
-        maybe_progress(power_set, progress, total=n_iterations, position=job_id)
+        tqdm(power_set, disable=not progress, total=n_iterations, position=job_id)
     ):
         indices: NDArray[np.bool_] = np.zeros(n, dtype=bool)
         indices[list(subset)] = True

diff --git a/src/pydvl/value/least_core/naive.py b/src/pydvl/value/least_core/naive.py
@@ -4,8 +4,9 @@
 
 import numpy as np
 from numpy.typing import NDArray
+from tqdm.auto import tqdm
 
-from pydvl.utils import Utility, maybe_progress, powerset
+from pydvl.utils import Utility, powerset
 from pydvl.value.least_core.common import LeastCoreProblem, lc_solve_problem
 from pydvl.value.result import ValuationResult
 
@@ -103,14 +104,17 @@ def lc_prepare_problem(u: Utility, progress: bool = False) -> LeastCoreProblem:
 
     logger.debug("Iterating over all subsets")
     utility_values = np.zeros(powerset_size)
-    for i, subset in enumerate(
-        maybe_progress(
-            powerset(u.data.indices), progress, total=powerset_size - 1, position=0
+    for i, subset in enumerate(  # type: ignore
+        tqdm(
+            powerset(u.data.indices),
+            disable=not progress,
+            total=powerset_size - 1,
+            position=0,
         )
     ):
         indices: NDArray[np.bool_] = np.zeros(n, dtype=bool)
         indices[list(subset)] = True
         A_lb[i, indices] = 1
-        utility_values[i] = u(subset)
+        utility_values[i] = u(subset)  # type: ignore
 
     return LeastCoreProblem(utility_values, A_lb)
diff --git a/src/pydvl/value/oob/oob.py b/src/pydvl/value/oob/oob.py
@@ -12,8 +12,9 @@
 from numpy.typing import NDArray
 from sklearn.base import is_classifier, is_regressor
 from sklearn.ensemble import BaggingClassifier, BaggingRegressor
+from tqdm.auto import tqdm
 
-from pydvl.utils import Seed, Utility, maybe_progress
+from pydvl.utils import Seed, Utility
 from pydvl.utils.types import LossFunction
 from pydvl.value.result import ValuationResult
 
@@ -112,8 +113,8 @@ def compute_data_oob(
 
     bag.fit(u.data.x_train, u.data.y_train)
 
-    for est, samples in maybe_progress(
-        zip(bag.estimators_, bag.estimators_samples_), progress, total=n_est
+    for est, samples in tqdm(
+        zip(bag.estimators_, bag.estimators_samples_), disable=not progress, total=n_est
     ):  # The bottleneck is the bag fitting not this part so TQDM is not very useful here
         oob_idx = np.setxor1d(u.data.indices, np.unique(samples))
         array_loss = loss(

diff --git a/src/pydvl/value/shapley/gt.py b/src/pydvl/value/shapley/gt.py
@@ -30,9 +30,10 @@
 import numpy as np
 from numpy.random import SeedSequence
 from numpy.typing import NDArray
+from tqdm.auto import trange
 
 from pydvl.parallel import MapReduceJob, ParallelConfig, effective_n_jobs
-from pydvl.utils import Utility, maybe_progress
+from pydvl.utils import Utility
 from pydvl.utils.numeric import random_subset_of_size
 from pydvl.utils.status import Status
 from pydvl.utils.types import Seed, ensure_seed_sequence
@@ -155,7 +156,7 @@ def _group_testing_shapley(
     )  # indicator vars
     uu = np.empty(n_samples)  # utilities
 
-    for t in maybe_progress(n_samples, progress=progress, position=job_id):
+    for t in trange(n_samples, disable=not progress, position=job_id):
         k = rng.choice(const.kk, size=1, p=const.q).item()
         s = random_subset_of_size(u.data.indices, k, seed=rng)
         uu[t] = u(s)

diff --git a/src/pydvl/value/shapley/knn.py b/src/pydvl/value/shapley/knn.py
@@ -19,8 +19,9 @@
 import numpy as np
 from numpy.typing import NDArray
 from sklearn.neighbors import KNeighborsClassifier, NearestNeighbors
+from tqdm.auto import tqdm
 
-from pydvl.utils import Utility, maybe_progress
+from pydvl.utils import Utility
 from pydvl.utils.status import Status
 from pydvl.value.result import ValuationResult
 
@@ -76,7 +77,7 @@ def knn_shapley(u: Utility, *, progress: bool = True) -> ValuationResult:
     n = len(u.data)
     yt = u.data.y_train
     iterator = enumerate(zip(u.data.y_test, indices), start=1)
-    for j, (y, ii) in maybe_progress(iterator, progress):
+    for j, (y, ii) in tqdm(iterator, disable=not progress):
         value_at_x = int(yt[ii[-1]] == y) / n
         values[ii[-1]] += (value_at_x - values[ii[-1]]) / j
         for i in range(n - 2, n_neighbors, -1):  # farthest to closest

diff --git a/src/pydvl/value/shapley/montecarlo.py b/src/pydvl/value/shapley/montecarlo.py
@@ -47,14 +47,13 @@
 import operator
 from concurrent.futures import FIRST_COMPLETED, Future, wait
 from functools import reduce
-from itertools import cycle, takewhile
 from typing import Optional, Sequence, Union
 
 import numpy as np
 from deprecate import deprecated
 from numpy.random import SeedSequence
 from numpy.typing import NDArray
-from tqdm import tqdm
+from tqdm.auto import tqdm
 
 from pydvl.parallel import (
     CancellationPolicy,
@@ -65,6 +64,7 @@
     init_parallel_backend,
 )
 from pydvl.utils.numeric import random_powerset
+from pydvl.utils.progress import repeat_indices
 from pydvl.utils.types import Seed, ensure_seed_sequence
 from pydvl.utils.utility import Utility
 from pydvl.value.result import ValuationResult
@@ -281,11 +281,10 @@ def _combinatorial_montecarlo_shapley(
     )
 
     rng = np.random.default_rng(seed)
-    repeat_indices = takewhile(lambda _: not done(result), cycle(indices))
-    pbar = tqdm(disable=not progress, position=job_id, total=100, unit="%")
-    for idx in repeat_indices:
-        pbar.n = 100 * done.completion()
-        pbar.refresh()
+
+    for idx in repeat_indices(
+        indices, result=result, done=done, disable=not progress, position=job_id
+    ):
         # Randomly sample subsets of full dataset without idx
         subset = np.setxor1d(u.data.indices, [idx], assume_unique=True)
         s = next(random_powerset(subset, n_samples=1, seed=rng))