Skip to content

Commit

Permalink
Save commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
Markus Semmler committed Apr 11, 2023
1 parent 2aed995 commit 65599cc
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 5 deletions.
5 changes: 3 additions & 2 deletions src/pydvl/utils/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,14 +156,15 @@ def random_powerset_group_conditional(
if n_samples is None:
n_samples = np.iinfo(np.int32).max

unique_labels = np.unique(labels)
while total <= n_samples:

subsets: List[NDArray[T]] = []
for label in labels:
for label in unique_labels:
label_indices = np.asarray(np.where(labels == label)[0])
subset_length = int(
rng.integers(
min(min_elements, len(label_indices) - 1), len(label_indices)
min(min_elements, len(label_indices)), len(label_indices) + 1
)
)
subsets.append(random_subset_of_size(s[label_indices], subset_length))
Expand Down
2 changes: 1 addition & 1 deletion src/pydvl/value/shapley/classwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def _class_wise_shapley_worker(
final_score = u(train_set)
prev_score = 0.0

for i, _ in enumerate(label_set):
for i in range(len(permutation_label_set)):

if np.abs(prev_score - final_score) < eps:
score = prev_score
Expand Down
66 changes: 64 additions & 2 deletions tests/value/shapley/test_classwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,15 @@
import numpy as np
import pytest
from numpy._typing import NDArray
from sklearn.metrics import accuracy_score

from pydvl.utils import SupervisedModel
from pydvl.value.shapley.classwise import _estimate_in_out_cls_accuracy
from pydvl.utils import Dataset, SupervisedModel, Utility
from pydvl.value import MaxChecks
from pydvl.value.shapley.classwise import (
CSScorer,
_class_wise_shapley_worker,
_estimate_in_out_cls_accuracy,
)


@pytest.fixture(scope="function")
Expand Down Expand Up @@ -50,3 +56,59 @@ def test_estimate_in_out_cls_accuracy(
in_cls_acc_1, out_of_cls_acc_1 = _estimate_in_out_cls_accuracy(mock_model, x, y, 1)
assert in_cls_acc_1 == out_of_cls_acc_0
assert in_cls_acc_0 == out_of_cls_acc_1


@pytest.fixture(scope="function")
def dataset_cs_shapley() -> Dataset:
"""
A simple dataset for testing the class wise shapley value.
"""
x_train = np.arange(1, 5).reshape([-1, 1])
y_train = np.array([0, 0, 1, 1])
x_test = x_train
y_test = np.array([0, 0, 0, 1])
return Dataset(x_train, y_train, x_test, y_test)


@pytest.fixture(scope="function")
def linear_regression_classifier() -> SupervisedModel:
"""
A classifier based on linear regression, so that a closed form solution exists
"""

class _LinearRegressionBasedClassifier(SupervisedModel):
def __init__(self):
self._beta = None

def fit(self, x: NDArray, y: NDArray) -> float:
v = x[:, 0]
self._beta = np.dot(v, y) / np.dot(v, v)
return -1

def predict(self, x: NDArray) -> NDArray:
if self._beta is None:
raise AttributeError("Model not fitted")

x = x[:, 0]
probs = self._beta * x
return np.clip(np.round(probs), 0, 1).astype(int)

def score(self, x: NDArray, y: NDArray) -> float:
pred_y = self.predict(x)
return np.sum(pred_y == y) / 4

return _LinearRegressionBasedClassifier()


def test_cs_shapley_exact_solution(
dataset_cs_shapley: Dataset, linear_regression_classifier: SupervisedModel
):
n_samples = 100
scorer = CSScorer()
utility = Utility(
linear_regression_classifier, dataset_cs_shapley, scorer, catch_errors=False
)
valuation_result = _class_wise_shapley_worker(
dataset_cs_shapley.indices, utility, done=MaxChecks(n_samples)
)
print(valuation_result)

0 comments on commit 65599cc

Please sign in to comment.