diff --git a/src/pydvl/utils/utility.py b/src/pydvl/utils/utility.py index 10bc7ebf4..2773b96a1 100644 --- a/src/pydvl/utils/utility.py +++ b/src/pydvl/utils/utility.py @@ -179,7 +179,8 @@ def _utility(self, indices: FrozenSet) -> float: """Clones the model, fits it on a subset of the training data and scores it on the test data. - If the object is constructed with `enable_cache = True`, results are + If an instance of [CacheBackend][pydvl.utils.caching.base.CacheBackend] + is passed during construction, results are memoized to avoid duplicate computation. This is useful in particular when computing utilities of permutations of indices or when randomly sampling from the powerset of indices. diff --git a/tests/value/conftest.py b/tests/value/conftest.py index 3eaa3d672..33e58bf64 100644 --- a/tests/value/conftest.py +++ b/tests/value/conftest.py @@ -72,7 +72,6 @@ def score(self, x: NDArray, y: NDArray) -> float: score_range=(0, x.sum() / x.max()), catch_errors=False, show_warnings=True, - enable_cache=False, ) @@ -122,7 +121,9 @@ def linear_shapley(cache, linear_dataset, scorer, n_jobs): if u is None: u = Utility( - LinearRegression(), data=linear_dataset, scorer=scorer, enable_cache=False + LinearRegression(), + data=linear_dataset, + scorer=scorer, ) exact_values = combinatorial_exact_shapley(u, progress=False, n_jobs=n_jobs) cache.set(u_cache_key, u) diff --git a/tests/value/shapley/test_knn.py b/tests/value/shapley/test_knn.py index 1ca7a1fbc..cf935f347 100644 --- a/tests/value/shapley/test_knn.py +++ b/tests/value/shapley/test_knn.py @@ -40,7 +40,10 @@ def knn_loss_function(labels, predictions, n_classes=3): ) utility = Utility( - model, data=data, scorer=scorer, show_warnings=False, enable_cache=False + model, + data=data, + scorer=scorer, + show_warnings=False, ) exact_values = combinatorial_exact_shapley( utility, progress=False, n_jobs=min(len(data), available_cpus())