From 5ac062ae79f657d432d50854827a33650927438b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 10 Apr 2024 22:58:56 +0000
Subject: [PATCH 01/11] Bump transformers from 4.36.0 to 4.38.0

Bumps [transformers](https://github.com/huggingface/transformers) from 4.36.0 to 4.38.0.
- [Release notes](https://github.com/huggingface/transformers/releases)
- [Commits](https://github.com/huggingface/transformers/compare/v4.36.0...v4.38.0)

---
updated-dependencies:
- dependency-name: transformers
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements-notebooks.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-notebooks.txt b/requirements-notebooks.txt
index 04c7cf350..23c03b420 100644
--- a/requirements-notebooks.txt
+++ b/requirements-notebooks.txt
@@ -3,5 +3,5 @@ distributed==2023.4.0
 pillow==10.3.0
 torch==2.0.1
 torchvision==0.15.2
-transformers==4.36.0
+transformers==4.38.0
 zarr==2.16.1

From fd463330da4a5c6d122dfe0a50508f1ad87abd36 Mon Sep 17 00:00:00 2001
From: Miguel de Benito Delgado <m.debenito@appliedai.de>
Date: Fri, 12 Apr 2024 19:50:46 +0200
Subject: [PATCH 02/11] =?UTF-8?q?Bump=20version:=200.9.0=20=E2=86=92=200.9?=
 =?UTF-8?q?.1.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg      | 2 +-
 setup.py              | 2 +-
 src/pydvl/__init__.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 0336abca1..db49fd207 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.9.0
+current_version = 0.9.1.dev0
 commit = False
 tag = False
 allow_dirty = False
diff --git a/setup.py b/setup.py
index c06657f1d..6c3fe2533 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@
     package_data={"pydvl": ["py.typed"]},
     packages=find_packages(where="src"),
     include_package_data=True,
-    version="0.9.0",
+    version="0.9.1.dev0",
     description="The Python Data Valuation Library",
     install_requires=[
         line
diff --git a/src/pydvl/__init__.py b/src/pydvl/__init__.py
index a0bef3141..cabed8f98 100644
--- a/src/pydvl/__init__.py
+++ b/src/pydvl/__init__.py
@@ -7,4 +7,4 @@
 The two main modules you will want to look at are [value][pydvl.value] and
 [influence][pydvl.influence].
 """
-__version__ = "0.9.0"
+__version__ = "0.9.1.dev0"

From 1f0ac18269c8b93c29edef8da8fffe15d5fb9193 Mon Sep 17 00:00:00 2001
From: Miguel de Benito Delgado <m.debenito@appliedai.de>
Date: Fri, 12 Apr 2024 20:13:28 +0200
Subject: [PATCH 03/11] [skip ci] Update citation file to latest release

---
 CITATION.cff | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CITATION.cff b/CITATION.cff
index 7ca971f99..241536c7d 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -27,6 +27,6 @@ keywords:
   - Banzhaf index
 license: LGPL-3.0
 commit: 0e929ae121820b0014bf245da1b21032186768cb
-version: v0.7.0
-doi: 10.5281/zenodo.8311583
-date-released: '2023-09-02'
+version: v0.9.0
+doi: 10.5281/zenodo.10966754
+date-released: '2024-04-12'

From 599f7370b383f84b3e989d9f932bb8b5e13a7e4a Mon Sep 17 00:00:00 2001
From: Miguel de Benito Delgado <m.debenito@appliedai.de>
Date: Fri, 12 Apr 2024 20:21:52 +0200
Subject: [PATCH 04/11] [skip ci] Cosmetic

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d194f74ad..d49b8aef7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Changelog
 
-## 0.9.0 🆕 New methods, better docs and bugfixes 📚🐞
+## 0.9.0 - 🆕 New methods, better docs and bugfixes 📚🐞
 
 ### Added
 

From ad697c233491773baba97b1ab06524875d02ee0e Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Wed, 17 Apr 2024 10:36:36 +0200
Subject: [PATCH 05/11] Make default argument for config None and instantiate
 in init

---
 src/pydvl/parallel/futures/__init__.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/pydvl/parallel/futures/__init__.py b/src/pydvl/parallel/futures/__init__.py
index c75d04299..0659137ca 100644
--- a/src/pydvl/parallel/futures/__init__.py
+++ b/src/pydvl/parallel/futures/__init__.py
@@ -21,7 +21,7 @@
 )
 def init_executor(
     max_workers: Optional[int] = None,
-    config: ParallelConfig = ParallelConfig(),
+    config: Optional[ParallelConfig] = None,
     **kwargs,
 ) -> Generator[Executor, None, None]:
     """Initializes a futures executor for the given parallel configuration.
@@ -50,6 +50,10 @@ def init_executor(
         assert results == [1, 2, 3, 4, 5]
         ```
     """
+
+    if config is None:
+        config = ParallelConfig()
+
     try:
         cls = ParallelBackend.BACKENDS[config.backend]
         with cls.executor(max_workers=max_workers, config=config, **kwargs) as e:

From 2bcf3f6e9facbdcbe3c90c609da0d5aad0c22245 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Wed, 17 Apr 2024 10:46:48 +0200
Subject: [PATCH 06/11] Update CHANGELOG.md

---
 CHANGELOG.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d49b8aef7..0092f6045 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## Unreleased
+
+### Fixed
+
+- `FutureWarning` for `ParallelConfig` constantly raised without actually 
+  instantiating the object
+  [PR #562](https://github.com/aai-institute/pyDVL/pull/562)
+
 ## 0.9.0 - 🆕 New methods, better docs and bugfixes 📚🐞
 
 ### Added

From f0d94b0ebc687b848d4d943e5db214903c7eb074 Mon Sep 17 00:00:00 2001
From: Miguel de Benito Delgado <m.debenito@appliedai.de>
Date: Fri, 19 Apr 2024 14:52:02 +0200
Subject: [PATCH 07/11] Some tweaks to the readme

---
 README.md | 267 ++++++++++++++++++++++--------------------------------
 1 file changed, 109 insertions(+), 158 deletions(-)

diff --git a/README.md b/README.md
index 2f7296018..97cf5f22d 100644
--- a/README.md
+++ b/README.md
@@ -16,10 +16,8 @@
     <a href="https://zenodo.org/badge/latestdoi/354117916"><img src="https://zenodo.org/badge/354117916.svg" alt="DOI"></a>
 </p>
 
-**pyDVL** collects algorithms for **Data Valuation** and **Influence Function** computation.
-
-Refer to the [Methods](https://pydvl.org/devel/getting-started/methods/)
-page of our documentation for a list of all implemented methods. 
+**pyDVL** collects algorithms for **Data Valuation** and **Influence Function**
+computation. Here is the list of [all methods implemented](https://pydvl.org/devel/getting-started/methods/).
 
 **Data Valuation** for machine learning is the task of assigning a scalar
 to each element of a training set which reflects its contribution to the final
@@ -29,7 +27,7 @@ pyDVL focuses on model-dependent methods.
 
 <div align="center" style="text-align:center;">
     <img
-        width="70%"
+        width="60%"
         align="center"
         style="display: block; margin-left: auto; margin-right: auto;"
         src="https://pydvl.org/devel/value/img/mclc-best-removal-10k-natural.svg"
@@ -48,7 +46,7 @@ of training samples over individual test points.
 
 <div align="center" style="text-align:center;">
     <img
-        width="70%"
+        width="60%"
         align="center"
         style="display: block; margin-left: auto; margin-right: auto;"
         src="https://pydvl.org/devel/examples/img/influence_functions_example.png"
@@ -82,180 +80,133 @@ $ pip install pyDVL[influence]
 ```
 
 For more instructions and information refer to [Installing pyDVL
-](https://pydvl.org/stable/getting-started/#installation) in the
-documentation.
+](https://pydvl.org/stable/getting-started/#installation) in the documentation.
 
 # Usage
 
-In the following subsections, we will showcase the usage of pyDVL
-for Data Valuation and Influence Functions using simple examples.
-
-For more instructions and information refer to [Getting
-Started](https://pydvl.org/stable/getting-started/first-steps/) in
-the documentation.
-We provide several examples for data valuation
-(e.g. [Shapley Data Valuation](https://pydvl.org/stable/examples/shapley_basic_spotify/))
-and for influence functions
-(e.g. [Influence Functions for Neural Networks](https://pydvl.org/stable/examples/influence_imagenet/))
-with details on the algorithms and their applications.
+Please read [Getting
+Started](https://pydvl.org/stable/getting-started/first-steps/) in the
+documentation for more instructions. We provide several examples for data
+valuation and for influence functions in our [Example
+Gallery](https://pydvl.org/stable/examples/).
 
 ## Influence Functions
 
-For influence computation, follow these steps:
-
-1. Import the necessary packages (The exact packages depend on your specific use case).
-
-   ```python
-   import torch
-   from torch import nn
-   from torch.utils.data import DataLoader, TensorDataset
-   
-   from pydvl.influence.torch import DirectInfluence
-   from pydvl.influence.torch.util import NestedTorchCatAggregator, TorchNumpyConverter
-   from pydvl.influence import SequentialInfluenceCalculator
-   ```
-
+1. Import the necessary packages (the exact ones depend on your specific use case).
 2. Create PyTorch data loaders for your train and test splits.
-
-   ```python
-   input_dim = (5, 5, 5)
-   output_dim = 3
-   train_x = torch.rand((10, *input_dim))
-   train_y = torch.rand((10, output_dim))
-   test_x = torch.rand((5, *input_dim))
-   test_y = torch.rand((5, output_dim))
-
-   train_data_loader = DataLoader(TensorDataset(train_x, train_y), batch_size=2)
-   test_data_loader = DataLoader(TensorDataset(test_x, test_y), batch_size=1)
-   ```
-
-3. Instantiate your neural network model.
-
-   ```python
-   nn_architecture = nn.Sequential(
-     nn.Conv2d(in_channels=5, out_channels=3, kernel_size=3),
-     nn.Flatten(),
-     nn.Linear(27, 3),
+3. Instantiate your neural network model and define your loss function.
+4. Instantiate an `InfluenceFunctionModel` and fit it to the training data
+5. For small input data, you can call the `influences()` method on the fitted
+   instance. The result is a tensor of shape `(training samples, test samples)`
+   that contains at index `(i, j`) the influence of training sample `i` on
+   test sample `j`.
+6. For larger datasets, wrap the model into a "calculator" and call methods on
+   it. This splits the computation into smaller chunks and allows for lazy
+   evaluation and out-of-core computation.
+
+The higher the absolute value of the influence of a training sample
+on a test sample, the more influential it is for the chosen test sample, model
+and data loaders. The sign of the influence determines whether it is 
+useful (positive) or harmful (negative).
+
+> **Note** pyDVL currently only support PyTorch for Influence Functions. We plan
+> to add support for Jax next.
+
+```python
+import torch
+from torch import nn
+from torch.utils.data import DataLoader, TensorDataset
+
+from pydvl.influence import SequentialInfluenceCalculator
+from pydvl.influence.torch import DirectInfluence
+from pydvl.influence.torch.util import (
+   NestedTorchCatAggregator,
+   TorchNumpyConverter,
    )
-   ```
-
-4. Define your loss:
-
-   ```python
-   loss = nn.MSELoss()
-   ```
-
-5. Instantiate an `InfluenceFunctionModel` and fit it to the training data
 
-   ```python
-   infl_model = DirectInfluence(nn_architecture, loss, hessian_regularization=0.01)
-   infl_model = infl_model.fit(train_data_loader)
-   ```
+input_dim = (5, 5, 5)
+output_dim = 3
+train_x, train_y = torch.rand((10, *input_dim)), torch.rand((10, output_dim))
+test_x, test_y = torch.rand((5, *input_dim)), torch.rand((5, output_dim))
+train_data_loader = DataLoader(TensorDataset(train_x, train_y), batch_size=2)
+test_data_loader = DataLoader(TensorDataset(test_x, test_y), batch_size=1)
+model = nn.Sequential(
+  nn.Conv2d(in_channels=5, out_channels=3, kernel_size=3),
+  nn.Flatten(),
+  nn.Linear(27, 3),
+  )
+loss = nn.MSELoss()
 
-6. For small input data call influence method on the fitted instance. 
-
-   ```python
-   influences = infl_model.influences(test_x, test_y, train_x, train_y)
-   ```
-   The result is a tensor of shape `(training samples x test samples)`
-   that contains at index `(i, j`) the influence of training sample `i` on
-   test sample `j`.
+infl_model = DirectInfluence(model, loss, hessian_regularization=0.01)
+infl_model = infl_model.fit(train_data_loader)
 
-7. For larger data, wrap the model into a
-   calculator and call methods on the calculator.
-   ```python
-   infl_calc = SequentialInfluenceCalculator(infl_model)
-   
-    # Lazy object providing arrays batch-wise in a sequential manner
-   lazy_influences = infl_calc.influences(test_data_loader, train_data_loader)
+# For small datasets, instantiate the full influence matrix:
+influences = infl_model.influences(test_x, test_y, train_x, train_y)
 
-   # Trigger computation and pull results to memory
-   influences = lazy_influences.compute(aggregator=NestedTorchCatAggregator())
+# For larger datasets, use the Influence calculators:
+infl_calc = SequentialInfluenceCalculator(infl_model)
 
-   # Trigger computation and write results batch-wise to disk
-   lazy_influences.to_zarr("influences_result", TorchNumpyConverter())
-   ```
-   
+# Lazy object providing arrays batch-wise in a sequential manner
+lazy_influences = infl_calc.influences(test_data_loader, train_data_loader)
 
-   The higher the absolute value of the influence of a training sample
-   on a test sample, the more influential it is for the chosen test sample, model
-   and data loaders. The sign of the influence determines whether it is 
-   useful (positive) or harmful (negative).
+# Trigger computation and pull results to memory
+influences = lazy_influences.compute(aggregator=NestedTorchCatAggregator())
 
-> **Note** pyDVL currently only support PyTorch for Influence Functions. 
-> We are planning to add support for Jax and perhaps TensorFlow or even Keras.
+# Trigger computation and write results batch-wise to disk
+lazy_influences.to_zarr("influences_result", TorchNumpyConverter())
+```
 
 ## Data Valuation
 
 The steps required to compute data values for your samples are:
 
-1. Import the necessary packages (The exact packages depend on your specific use case).
-
-   ```python
-   import matplotlib.pyplot as plt
-   from sklearn.datasets import load_breast_cancer
-   from sklearn.linear_model import LogisticRegression
-   from pydvl.utils import Dataset, Scorer, Utility
-   from pydvl.value import (
-      compute_shapley_values,
-      ShapleyMode,
-      MaxUpdates,
-   )
-   ```
- 
+1. Import the necessary packages (the exact ones will depend on your specific
+   use case).
 2. Create a `Dataset` object with your train and test splits.
-
-   ```python
-   data = Dataset.from_sklearn(
-       load_breast_cancer(),
-       train_size=10,
-       stratify_by_target=True,
-       random_state=16,
-   )
-   ```
-
 3. Create an instance of a `SupervisedModel` (basically any sklearn compatible
-   predictor).
-
-   ```python
-   model = LogisticRegression()
-   ```  
-
-4. Create a `Utility` object to wrap the Dataset, the model and a scoring
-   function.
-
-   ```python
-   u = Utility(
-      model,
-      data,
-      Scorer("accuracy", default=0.0)
-   )
-   ```
-
-5. Use one of the methods defined in the library to compute the values.
-   In our example, we will use *Permutation Montecarlo Shapley*,
-   an approximate method for computing Data Shapley values.
-
-   ```python
-   values = compute_shapley_values(
-      u,
-      mode=ShapleyMode.PermutationMontecarlo,
-      done=MaxUpdates(100),
-      seed=16,  
-      progress=True
-   )
-   ```
-   The result is a variable of type `ValuationResult` that contains
-   the indices and their values as well as other attributes.
-
-   The higher the value for an index, the more important it is for the chosen
-   model, dataset and scorer.
-
-6. (Optional) Convert the valuation result to a dataframe and analyze and visualize the values.
-
-   ```python
-   df = values.to_dataframe(column="data_value")
-   ```
+   predictor), and wrap it in a `Utility` object together with the data and a
+   scoring function.
+4. Use one of the methods defined in the library to compute the values. In the
+   example below, we will use *Permutation Montecarlo Shapley*, an approximate
+   method for computing Data Shapley values. The result is a variable of type
+   `ValuationResult` that contains the indices and their values as well as other
+   attributes.
+5. Convert the valuation result to a dataframe, and analyze and visualize the
+   values.
+
+The higher the value for an index, the more important it is for the chosen
+model, dataset and scorer. Reciprocally, low-value points could be mislabelled,
+or out-of-distribution, and dropping them can improve the model's performance.
+
+```python
+from sklearn.datasets import load_breast_cancer
+from sklearn.linear_model import LogisticRegression
+
+from pydvl.utils import Dataset, Scorer, Utility
+from pydvl.value import (MaxUpdates, RelativeTruncation,
+                         permutation_montecarlo_shapley)
+
+data = Dataset.from_sklearn(
+  load_breast_cancer(),
+  train_size=10,
+  stratify_by_target=True,
+  random_state=16,
+  )
+model = LogisticRegression()
+u = Utility(
+  model,
+  data,
+  Scorer("accuracy", default=0.0)
+  )
+values = permutation_montecarlo_shapley(
+  u,
+  truncation=RelativeTruncation(u, 0.05),
+  done=MaxUpdates(1000),
+  seed=16,
+  progress=True
+  )
+df = values.to_dataframe(column="data_value")
+```
 
 # Contributing
 

From a033f8707d47b2d279b8e24a620e38dbb6d295df Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Sun, 21 Apr 2024 22:02:00 +0200
Subject: [PATCH 08/11] Improve duration logger to take the logging level as
 input

---
 src/pydvl/utils/progress.py | 40 +++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/src/pydvl/utils/progress.py b/src/pydvl/utils/progress.py
index 8a49f08aa..9ad931938 100644
--- a/src/pydvl/utils/progress.py
+++ b/src/pydvl/utils/progress.py
@@ -39,19 +39,33 @@ def repeat_indices(
             pbar.refresh()
 
 
-def log_duration(func):
+def log_duration(_func=None, *, log_level=logging.DEBUG):
     """
-    Decorator to log execution time of a function
+    Decorator to log execution time of a function with a configurable logging level.
+    It can be used with or without specifying a log level.
     """
 
-    @wraps(func)
-    def wrapper_log_duration(*args, **kwargs):
-        func_name = func.__qualname__
-        logger.info(f"Function '{func_name}' is starting.")
-        start_time = time()
-        result = func(*args, **kwargs)
-        duration = time() - start_time
-        logger.info(f"Function '{func_name}' completed. Duration: {duration:.2f} sec")
-        return result
-
-    return wrapper_log_duration
+    def decorator_log_duration(func):
+        @wraps(func)
+        def wrapper_log_duration(*args, **kwargs):
+            func_name = func.__qualname__
+            duration_logger = logging.getLogger(func_name)
+            duration_logger.setLevel(log_level)
+            duration_logger.log(log_level, f"Function '{func_name}' is starting.")
+            start_time = time()
+            result = func(*args, **kwargs)
+            duration = time() - start_time
+            duration_logger.log(
+                log_level,
+                f"Function '{func_name}' completed. " f"Duration: {duration:.2f} sec",
+            )
+            return result
+
+        return wrapper_log_duration
+
+    if _func is None:
+        # If log_duration was called without arguments, return decorator
+        return decorator_log_duration
+    else:
+        # If log_duration was called with a function, apply decorator directly
+        return decorator_log_duration(_func)

From f09f020ac76616fc96e030388a654b66edc4b0bc Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Sun, 21 Apr 2024 22:02:55 +0200
Subject: [PATCH 09/11] Set log_duration level for fit methods to INFO

---
 src/pydvl/influence/torch/influence_function_model.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/pydvl/influence/torch/influence_function_model.py b/src/pydvl/influence/torch/influence_function_model.py
index f85c0d4f0..46a5fa16e 100644
--- a/src/pydvl/influence/torch/influence_function_model.py
+++ b/src/pydvl/influence/torch/influence_function_model.py
@@ -363,6 +363,7 @@ def is_fitted(self):
         except AttributeError:
             return False
 
+    @log_duration(log_level=logging.INFO)
     def fit(self, data: DataLoader) -> DirectInfluence:
         """
         Compute the hessian matrix based on a provided dataloader.
@@ -500,6 +501,7 @@ def is_fitted(self):
         except AttributeError:
             return False
 
+    @log_duration(log_level=logging.INFO)
     def fit(self, data: DataLoader) -> CgInfluence:
         self.train_dataloader = data
         if self.pre_conditioner is not None:
@@ -816,6 +818,7 @@ def is_fitted(self):
         except AttributeError:
             return False
 
+    @log_duration(log_level=logging.INFO)
     def fit(self, data: DataLoader) -> LissaInfluence:
         self.train_dataloader = data
         return self
@@ -948,6 +951,7 @@ def is_fitted(self):
         except AttributeError:
             return False
 
+    @log_duration(log_level=logging.INFO)
     def fit(self, data: DataLoader) -> ArnoldiInfluence:
         r"""
         Fitting corresponds to the computation of the low rank decomposition
@@ -1204,6 +1208,7 @@ def _get_kfac_blocks(
 
         return forward_x, grad_y
 
+    @log_duration(log_level=logging.INFO)
     def fit(self, data: DataLoader) -> EkfacInfluence:
         """
         Compute the KFAC blocks for each layer of the model, using the provided data.
@@ -1712,6 +1717,7 @@ def is_fitted(self):
         except AttributeError:
             return False
 
+    @log_duration(log_level=logging.INFO)
     def fit(self, data: DataLoader):
         self.low_rank_representation = model_hessian_nystroem_approximation(
             self.model, self.loss, data, self.rank

From 898cdc15968699e2acb7b985fafe9cc194d2b171 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Sun, 21 Apr 2024 22:06:44 +0200
Subject: [PATCH 10/11] Add log_duration decorator with level INFO to lazy
 object compute and to_zarr methods

---
 src/pydvl/influence/array.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/pydvl/influence/array.py b/src/pydvl/influence/array.py
index a82b380b8..d549eee9d 100644
--- a/src/pydvl/influence/array.py
+++ b/src/pydvl/influence/array.py
@@ -7,6 +7,7 @@
 using the Zarr library.
 """
 
+import logging
 from abc import ABC, abstractmethod
 from typing import Callable, Generator, Generic, List, Optional, Tuple, Union
 
@@ -14,6 +15,7 @@
 from numpy.typing import NDArray
 from zarr.storage import StoreLike
 
+from ..utils import log_duration
 from .base_influence_function_model import TensorType
 
 
@@ -119,6 +121,7 @@ def __init__(
     ):
         self.generator_factory = generator_factory
 
+    @log_duration(log_level=logging.INFO)
     def compute(self, aggregator: Optional[SequenceAggregator] = None):
         """
         Computes and optionally aggregates the chunks of the array using the provided
@@ -139,6 +142,7 @@ def compute(self, aggregator: Optional[SequenceAggregator] = None):
             aggregator = ListAggregator()
         return aggregator(self.generator_factory())
 
+    @log_duration(log_level=logging.INFO)
     def to_zarr(
         self,
         path_or_url: Union[str, StoreLike],
@@ -223,6 +227,7 @@ def __init__(
     ):
         self.generator_factory = generator_factory
 
+    @log_duration(log_level=logging.INFO)
     def compute(self, aggregator: Optional[NestedSequenceAggregator] = None):
         """
         Computes and optionally aggregates the chunks of the array using the provided
@@ -244,6 +249,7 @@ def compute(self, aggregator: Optional[NestedSequenceAggregator] = None):
             aggregator = NestedListAggregator()
         return aggregator(self.generator_factory())
 
+    @log_duration(log_level=logging.INFO)
     def to_zarr(
         self,
         path_or_url: Union[str, StoreLike],

From f5bc6c9cf65c3ea11b49e7c5a9becc2d63e545a4 Mon Sep 17 00:00:00 2001
From: Kristof Schroeder <kristof_schroeder@web.de>
Date: Mon, 22 Apr 2024 11:19:39 +0200
Subject: [PATCH 11/11] =?UTF-8?q?Bump=20version:=200.9.1.dev0=20=E2=86=92?=
 =?UTF-8?q?=200.9.1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg      | 2 +-
 setup.py              | 2 +-
 src/pydvl/__init__.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index db49fd207..f27d47b90 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.9.1.dev0
+current_version = 0.9.1
 commit = False
 tag = False
 allow_dirty = False
diff --git a/setup.py b/setup.py
index 6c3fe2533..805a93f22 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@
     package_data={"pydvl": ["py.typed"]},
     packages=find_packages(where="src"),
     include_package_data=True,
-    version="0.9.1.dev0",
+    version="0.9.1",
     description="The Python Data Valuation Library",
     install_requires=[
         line
diff --git a/src/pydvl/__init__.py b/src/pydvl/__init__.py
index cabed8f98..ba22fa08d 100644
--- a/src/pydvl/__init__.py
+++ b/src/pydvl/__init__.py
@@ -7,4 +7,4 @@
 The two main modules you will want to look at are [value][pydvl.value] and
 [influence][pydvl.influence].
 """
-__version__ = "0.9.1.dev0"
+__version__ = "0.9.1"