From 4bd92ec7cf3aafca1e656787429d59fda57d999f Mon Sep 17 00:00:00 2001 From: Markus Semmler Date: Sat, 12 Aug 2023 23:21:28 +0200 Subject: [PATCH] Implement algorithm from paper `CS-Shapley: Class-wise Shapley Values for Data Valuation in Classification` (https://arxiv.org/abs/2211.06800) --- CHANGELOG.md | 2 + docs/30-data-valuation.rst | 46 ++ src/pydvl/utils/config.py | 1 + src/pydvl/utils/dataset.py | 9 +- src/pydvl/utils/numeric.py | 84 ++- src/pydvl/utils/score.py | 151 ++++- src/pydvl/utils/util.py | 14 + src/pydvl/value/result.py | 26 +- src/pydvl/value/shapley/__init__.py | 1 + src/pydvl/value/shapley/classwise.py | 251 ++++++++ src/pydvl/value/shapley/montecarlo.py | 187 +++++- src/pydvl/value/shapley/truncated.py | 22 +- src/pydvl/value/stopping.py | 10 +- tests/conftest.py | 24 +- tests/misc.py | 36 ++ tests/utils/conftest.py | 22 + tests/utils/test_numeric.py | 27 + tests/utils/test_score.py | 126 ++++- tests/value/shapley/test_classwise.py | 786 ++++++++++++++++++++++++++ 19 files changed, 1780 insertions(+), 45 deletions(-) create mode 100644 src/pydvl/utils/util.py create mode 100644 src/pydvl/value/shapley/classwise.py create mode 100644 tests/misc.py create mode 100644 tests/value/shapley/test_classwise.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 14aca878a..f625f407c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,8 @@ [PR #382](https://github.com/appliedAI-Initiative/pyDVL/pull/382) - Decouple ray.init from ParallelConfig [PR #373](https://github.com/appliedAI-Initiative/pyDVL/pull/383) +- **New Method**: Add classwise Shapley algorithm. + [PR #338](https://github.com/appliedAI-Initiative/pyDVL/pull/338) ## 0.6.1 - 🏗 Bug fixes and small improvement diff --git a/docs/30-data-valuation.rst b/docs/30-data-valuation.rst index b3fd5018e..565f17906 100644 --- a/docs/30-data-valuation.rst +++ b/docs/30-data-valuation.rst @@ -359,6 +359,52 @@ useful in applications. u=utility, mode="truncated_montecarlo", done=MaxUpdates(1000) ) +Classwise Shapley +^^^^^^^^^^^^^^^^^^ + +A different schema applicable for classification problems first appeared in +:footcite:t:`schoch_csshapley_2022`. The key insight is that samples can be beneficial +for overall performance, while being detrimental for their own class. This could be an +indication of some problem with the data. CS-Shapley changes the utility to account for +this effect by decomposing it into a product of two functions: one gives +priority to in-class accuracy, while the other adds a slight discount which +increases as the out-of-class accuracy increases. + +The value is computed as: + +$$ +v_u(x_i) \approx \frac{1}{K \cdot L} +\sum_{S^{(k)}_{-y_i} \subseteq T_{-y_i} \setminus \{i\}} +\sum_{\sigma^{(l)} \in \Pi(T_{y_i} \setminus \{i\})} +[u( \sigma_{\colon i} \cup \{i\} | S_{-y_i} ) +− u( \sigma_{\colon i} | S_{-y_i})] +$$ + +where $K$ is the number of subsets $S^{(k)}_{-y_i}$ sampled from the class complement +set $T_{-y_i}$ of class c and $L$ is the number of permutations sampled from the class +indices set $T_{y_i}$. The scoring function used has the form + +$$u(S_{y_i}|S_{-y_i}) = a_S(D_{y_i}))) \exp\{a_S(D_{-y_i}))\}.$$ + +This can be further customised, but that form is shown by the authors to have certain +desirable properties. + +.. code-block:: python + + from pydvl.utils import Dataset, Utility + from pydvl.value import compute_shapley_values + + model = ... + scoring = ClassWiseScorer("accuracy") + data = Dataset(...) + utility = Utility(model, data, scoring) + values = classwise_shapley( + utility, + done=HistoryDeviation(n_steps=500, rtol=1e-3), + n_resample_complement_sets=10, + normalize_values=True + ) + Exact Shapley for KNN ^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/pydvl/utils/config.py b/src/pydvl/utils/config.py index 36b9ab647..675c1df02 100644 --- a/src/pydvl/utils/config.py +++ b/src/pydvl/utils/config.py @@ -25,6 +25,7 @@ class ParallelConfig: address: Optional[Union[str, Tuple[str, int]]] = None n_cpus_local: Optional[int] = None logging_level: int = logging.WARNING + _temp_dir: Optional[str] = None def __post_init__(self) -> None: if self.address is not None and self.n_cpus_local is not None: diff --git a/src/pydvl/utils/dataset.py b/src/pydvl/utils/dataset.py index 980957cbc..cab59416b 100644 --- a/src/pydvl/utils/dataset.py +++ b/src/pydvl/utils/dataset.py @@ -222,6 +222,10 @@ def indices(self): """ return self._indices + @indices.setter + def indices(self, indices: np.ndarray): + self._indices = indices + @property def data_names(self): """Names of each individual datapoint. @@ -410,11 +414,6 @@ def __init__( def __len__(self): return len(self.groups) - @property - def indices(self): - """Indices of the groups.""" - return self._indices - # FIXME this is a misnomer, should be `names` in `Dataset` so that here it # makes sense @property diff --git a/src/pydvl/utils/numeric.py b/src/pydvl/utils/numeric.py index c639da82b..5e5904c56 100644 --- a/src/pydvl/utils/numeric.py +++ b/src/pydvl/utils/numeric.py @@ -4,8 +4,22 @@ """ from __future__ import annotations +import logging +import os +import random +import time from itertools import chain, combinations -from typing import Collection, Generator, Iterator, Optional, Tuple, TypeVar, overload +from typing import ( + Collection, + Generator, + Iterator, + List, + Optional, + Tuple, + TypeVar, + cast, + overload, +) import numpy as np from numpy.typing import NDArray @@ -17,10 +31,15 @@ "random_matrix_with_condition_number", "random_subset", "random_powerset", + "random_powerset_group_conditional", "random_subset_of_size", "top_k_value_accuracy", ] + +logger = logging.getLogger(__name__) + + T = TypeVar("T", bound=np.generic) @@ -110,6 +129,69 @@ def random_powerset( total += 1 +def random_powerset_group_conditional( + s: NDArray[T], + groups: NDArray[np.int_], + min_elements_per_group: int = 1, +) -> Generator[NDArray[T], None, None]: + """ + Draw infinite random group-conditional subsets from the passed set s. It is ensured + that in each sampled set, each unique group is represented at least ``min_elements`` + times. The groups are specified as integers for all elements of the set separately. + + :param s: Vector of size N representing the set to sample elements from. + :param groups: Vector of size N containing the group as an integer for each element. + :param min_elements_per_group: The minimum number of elements for each group. + + :return: Generated draw from the power set of s with ``min_elements`` of each group. + :raises: TypeError: If the data ``s`` or ``groups`` is not a NumPy array. + :raises: ValueError: If the length of ``s``and ``groups`` different or + ``min_elements`` is smaller than 0. + """ + if not isinstance(s, np.ndarray): + raise TypeError("Set must be an NDArray") + + if not isinstance(groups, np.ndarray): + raise TypeError("Labels must be an NDArray") + + if len(groups) != len(s): + raise ValueError("Set and labels have to be of same size.") + + if min_elements_per_group < 0: + raise ValueError( + f"Parameter min_elements={min_elements_per_group} needs to be bigger or equal to 0." + ) + + if min_elements_per_group == 0: + logger.warning( + "It is recommended to ensure at least one element of each group is" + " contained in the sampled and yielded set." + ) + + rng = np.random.default_rng() + unique_labels = np.unique(groups) + + while True: + subsets: List[NDArray[T]] = [] + for label in unique_labels: + label_indices = np.asarray(np.where(groups == label)[0]) + subset_length = int( + rng.integers( + min(min_elements_per_group, len(label_indices)), + len(label_indices) + 1, + ) + ) + if subset_length > 0: + subsets.append(random_subset_of_size(s[label_indices], subset_length)) + + if len(subsets) > 0: + subset = np.concatenate(tuple(subsets)) + rng.shuffle(subset) + yield subset + else: + yield np.array([]) + + def random_subset_of_size(s: NDArray[T], size: int) -> NDArray[T]: """Samples a random subset of given size uniformly from the powerset of ``s``. diff --git a/src/pydvl/utils/score.py b/src/pydvl/utils/score.py index 933706d98..ca02b1ccb 100644 --- a/src/pydvl/utils/score.py +++ b/src/pydvl/utils/score.py @@ -2,7 +2,7 @@ This module provides a :class:`Scorer` class that wraps scoring functions with additional information. -Scorers can be constructed in the same way as in scikit-learn: either from +Scorers can be constructed in the same way as in scikit-learn: either from known strings or from a callable. Greater values must be better. If they are not, a negated version can be used, see scikit-learn's `make_scorer() `_. @@ -17,11 +17,17 @@ import numpy as np from numpy.typing import NDArray from scipy.special import expit -from sklearn.metrics import get_scorer +from sklearn.metrics import accuracy_score, get_scorer, make_scorer from pydvl.utils.types import SupervisedModel -__all__ = ["Scorer", "compose_score", "squashed_r2", "squashed_variance"] +__all__ = [ + "Scorer", + "ClasswiseScorer", + "compose_score", + "squashed_r2", + "squashed_variance", +] class ScorerCallable(Protocol): @@ -58,7 +64,7 @@ class Scorer: def __init__( self, scoring: Union[str, ScorerCallable], - default: float = np.nan, + default: float = 0.0, range: Tuple = (-np.inf, np.inf), name: Optional[str] = None, ): @@ -81,6 +87,143 @@ def __repr__(self): return f"{capitalized_name} (scorer={self._scorer})" +class ClasswiseScorer(Scorer): + """A Scorer which is applicable for valuation in classification problems. Its value + is based on in-cls and out-of-cls score :footcite:t:`schoch_csshapley_2022`. For + each class ``label`` it separates the elements into two groups, namely in-cls + instances and out-of-cls instances. The value function itself than estimates the + in-cls metric discounted by the out-of-cls metric. In other words the value function + for each element of one class is conditioned on the out-of-cls instances (or a + subset of it). The form of the value function can be written as + + .. math:: + v_{y_i}(D) = f(a_S(D_{y_i}))) * g(a_S(D_{-y_i}))) + + where f and g are continuous, monotonic functions and D is the test set. + + in order to produce meaningful results. For further reference see also section four + of :footcite:t:`schoch_csshapley_2022`. + + :param default: Score used when a model cannot be fit, e.g. when too little data is + passed, or errors arise. + :param range: Numerical range of the score function. Some Monte Carlo methods can + use this to estimate the number of samples required for a certain quality of + approximation. If not provided, it can be read from the ``scoring`` object if it + provides it, for instance if it was constructed with + :func:`~pydvl.utils.types.compose_score`. + :param in_class_discount_fn: Continuous, monotonic increasing function used to + discount the in-class score. + :param out_of_class_discount_fn: Continuous, monotonic increasing function used to + discount the out-of-class score. + :param initial_label: Set initial label (Doesn't require to set parameter ``label`` + on ``ClassWiseDiscountedScorer`` in first iteration) + :param name: Name of the scorer. If not provided, the name of the passed + function will be prefixed by 'classwise '. + + .. versionadded:: 0.7.0 + """ + + def __init__( + self, + scoring: str = "accuracy", + default: float = 0.0, + range: Tuple[float, float] = (-np.inf, np.inf), + in_class_discount_fn: Callable[[float], float] = lambda x: x, + out_of_class_discount_fn: Callable[[float], float] = np.exp, + initial_label: Optional[int] = None, + name: Optional[str] = None, + ): + disc_score_in_cls = in_class_discount_fn(range[1]) + disc_score_out_of_cls = out_of_class_discount_fn(range[1]) + transformed_range = (0, disc_score_in_cls * disc_score_out_of_cls) + super().__init__( + "accuracy", + range=transformed_range, + default=default, + name=name or f"classwise {scoring}", + ) + self._in_cls_discount_fn = in_class_discount_fn + self._out_of_cls_discount_fn = out_of_class_discount_fn + self.label = initial_label + + def __str__(self): + return self._name + + def __call__( + self: "ClasswiseScorer", + model: SupervisedModel, + x_test: NDArray[np.float_], + y_test: NDArray[np.int_], + ) -> float: + """ + :param model: Model used for computing the score on the validation set. + :param x_test: Array containing the features of the classification problem. + :param y_test: Array containing the labels of the classification problem. + :return: Calculated score. + """ + in_cls_score, out_of_cls_score = self.estimate_in_cls_and_out_of_cls_score( + model, x_test, y_test + ) + disc_score_in_cls = self._in_cls_discount_fn(in_cls_score) + disc_score_out_of_cls = self._out_of_cls_discount_fn(out_of_cls_score) + return disc_score_in_cls * disc_score_out_of_cls + + def estimate_in_cls_and_out_of_cls_score( + self, + model: SupervisedModel, + x_test: NDArray[np.float_], + y_test: NDArray[np.int_], + rescale_scores: bool = True, + ) -> Tuple[float, float]: + r""" + Computes in-class and out-of-class scores using the provided scoring function, + which can be expressed as: + + .. math:: + a_S(D=\{(\hat{x}_1, \hat{y}_1), \dots, (\hat{x}_K, \hat{y}_K)\}) &= + \frac{1}{N} \sum_k s(y(\hat{x}_k), \hat{y}_k) + + In this context, the computation is performed twice: once on D_i and once on D_o + to calculate the in-class and out-of-class scores. Here, D_i contains only + samples with the specified 'label' from the validation set, while D_o contains + all other samples. By default, the scores are scaled to have the same order of + magnitude. In such cases, the raw scores are multiplied by: + + .. math:: + N_{y_i} = \frac{a_S(D_{y_i})}{a_S(D_{y_i})+a_S(D_{-y_i})} \quad \text{and} + \quad N_{-y_i} = \frac{a_S(D_{-y_i})}{a_S(D_{y_i})+a_S(D_{-y_i})} + + :param model: Model used for computing the score on the validation set. + :param x_test: Array containing the features of the classification problem. + :param y_test: Array containing the labels of the classification problem. + :param rescale_scores: If set to True, the scores will be denormalized. This is + particularly useful when the inner score is calculated by an estimator of + the form 1/N sum_i x_i. + :return: Tuple containing the in-class and out-of-class scores. + """ + scorer = self._scorer + label_set_match = y_test == self.label + label_set = np.where(label_set_match)[0] + num_classes = len(np.unique(y_test)) + + if len(label_set) == 0: + return 0, 1 / (num_classes - 1) + + complement_label_set = np.where(~label_set_match)[0] + in_cls_score = scorer(model, x_test[label_set], y_test[label_set]) + out_of_cls_score = scorer( + model, x_test[complement_label_set], y_test[complement_label_set] + ) + + if rescale_scores: + n_in_cls = np.count_nonzero(y_test == self.label) + n_out_of_cls = len(y_test) - n_in_cls + in_cls_score *= n_in_cls / (n_in_cls + n_out_of_cls) + out_of_cls_score *= n_out_of_cls / (n_in_cls + n_out_of_cls) + + return in_cls_score, out_of_cls_score + + def compose_score( scorer: Scorer, transformation: Callable[[float], float], diff --git a/src/pydvl/utils/util.py b/src/pydvl/utils/util.py new file mode 100644 index 000000000..d556b4d28 --- /dev/null +++ b/src/pydvl/utils/util.py @@ -0,0 +1,14 @@ +import numpy as np +from numpy.typing import NDArray + + +def arr_or_writeable_copy(arr: NDArray) -> NDArray: + """Return a copy of ``arr`` if it's not writeable, otherwise return ``arr``. + + :param arr: Array to copy if it's not writeable. + :return: Copy of ``arr`` if it's not writeable, otherwise ``arr``. + """ + if not arr.flags.writeable: + return np.copy(arr) + + return arr diff --git a/src/pydvl/value/result.py b/src/pydvl/value/result.py index 219b8ea90..f66a514e7 100644 --- a/src/pydvl/value/result.py +++ b/src/pydvl/value/result.py @@ -66,6 +66,7 @@ from pydvl.utils.dataset import Dataset from pydvl.utils.numeric import running_moments from pydvl.utils.status import Status +from pydvl.utils.util import arr_or_writeable_copy try: import pandas # Try to import here for the benefit of mypy @@ -234,8 +235,12 @@ def __init__( self._algorithm = algorithm self._status = Status(status) # Just in case we are given a string - self._values = values - self._variances = np.zeros_like(values) if variances is None else variances + self._values = arr_or_writeable_copy(values) + self._variances = ( + np.zeros_like(values) + if variances is None + else arr_or_writeable_copy(variances) + ) self._counts = np.ones_like(values) if counts is None else counts self._sort_order = None self._extra_values = extra_values or {} @@ -526,10 +531,14 @@ def __add__(self, other: "ValuationResult") -> "ValuationResult": xm[other_pos] = other._values vm[other_pos] = other._variances + # np.maximum(1, n + m) covers case n = m = 0 with + n_m_sum = np.maximum(1, n + m) + # Sample mean of n+m samples from two means of n and m samples - xnm = (n * xn + m * xm) / (n + m) + xnm = (n * xn + m * xm) / n_m_sum + # Sample variance of n+m samples from two sample variances of n and m samples - vnm = (n * (vn + xn**2) + m * (vm + xm**2)) / (n + m) - xnm**2 + vnm = (n * (vn + xn**2) + m * (vm + xm**2)) / n_m_sum - xnm**2 if np.any(vnm < 0): if np.any(vnm < -1e-6): @@ -610,6 +619,15 @@ def update(self, idx: int, new_value: float) -> "ValuationResult": ) return self + def scale(self, coefficient: float, indices: Optional[NDArray[IndexT]] = None): + """ + Scales the values and variances of the result by a coefficient. + :param coefficient: Coefficient to scale by. + :param indices: Indices to scale. If None, all values are scaled. + """ + self._values[self._sort_positions[indices]] *= coefficient + self._variances[self._sort_positions[indices]] *= coefficient**2 + def get(self, idx: Integral) -> ValueItem: """Retrieves a ValueItem by data index, as opposed to sort index, like the indexing operator. diff --git a/src/pydvl/value/shapley/__init__.py b/src/pydvl/value/shapley/__init__.py index 6f93cd60e..db5802f25 100644 --- a/src/pydvl/value/shapley/__init__.py +++ b/src/pydvl/value/shapley/__init__.py @@ -8,6 +8,7 @@ from ..result import * from ..stopping import * +from .classwise import * from .common import * from .gt import * from .knn import * diff --git a/src/pydvl/value/shapley/classwise.py b/src/pydvl/value/shapley/classwise.py new file mode 100644 index 000000000..9dd9f5ad6 --- /dev/null +++ b/src/pydvl/value/shapley/classwise.py @@ -0,0 +1,251 @@ +""" +Implementation of the algorithm footcite:t:`schoch_csshapley_2022`. +""" +import logging +import numbers +from concurrent.futures import FIRST_COMPLETED, wait +from copy import copy +from typing import cast + +import numpy as np + +from pydvl.utils import ( + ParallelConfig, + Utility, + effective_n_jobs, + init_executor, + init_parallel_backend, +) + +__all__ = [ + "compute_classwise_shapley_values", +] + +from tqdm import tqdm + +from pydvl.utils.score import ClasswiseScorer +from pydvl.value.result import ValuationResult +from pydvl.value.shapley.montecarlo import permutation_montecarlo_classwise_shapley +from pydvl.value.shapley.truncated import TruncationPolicy +from pydvl.value.stopping import MaxChecks, StoppingCriterion + +logger = logging.getLogger(__name__) + + +def compute_classwise_shapley_values( + u: Utility, + *, + done: StoppingCriterion, + truncation: TruncationPolicy, + normalize_values: bool = True, + n_resample_complement_sets: int = 1, + use_default_scorer_value: bool = True, + min_elements_per_label: int = 1, + n_jobs: int = 1, + config: ParallelConfig = ParallelConfig(), + progress: bool = False, +) -> ValuationResult: + """ + Computes the classwise Shapley value by parallel processing. Independent workers + are spawned to process the data in parallel. Once the data is aggregated, the values + can be optionally normalized, depending on ``normalize_values``. + + :param u: Utility object containing model, data, and scoring function. The scoring + function should be of type :class:`~pydvl.utils.score.ClassWiseScorer`. + :param done: Function that checks whether the computation needs to stop. + :param truncation: Callable function that decides whether to interrupt processing a + permutation and set subsequent marginals to zero. + :param normalize_values: Indicates whether to normalize the values by the variation + in each class times their in-class accuracy. + :param n_resample_complement_sets: Number of times to resample the complement set + for each permutation. + :param use_default_scorer_value: Use default scorer value even if additional_indices + is not None. + :param min_elements_per_label: The minimum number of elements for each opposite + label. + :param n_jobs: Number of parallel jobs to run. + :param config: Parallel configuration. + :param progress: Whether to display progress bars for each job. + :return: ValuationResult object containing computed data values. + """ + + _check_classwise_shapley_utility(u) + + parallel_backend = init_parallel_backend(config) + u_ref = parallel_backend.put(u) + # This represents the number of jobs that are running + n_jobs = effective_n_jobs(n_jobs, config) + # This determines the total number of submitted jobs + # including the ones that are running + n_submitted_jobs = 2 * n_jobs + + pbar = tqdm(disable=not progress, position=0, total=100, unit="%") + accumulated_result = ValuationResult.zeros( + algorithm="classwise_shapley", + indices=u.data.indices, + data_names=u.data.data_names, + ) + terminate_exec = False + with init_executor(max_workers=n_jobs, config=config) as executor: + futures = set() + # Initial batch of computations + for _ in range(n_submitted_jobs): + future = executor.submit( + _classwise_shapley_one_step, + u_ref, + truncation=truncation, + n_resample_complement_sets=n_resample_complement_sets, + use_default_scorer_value=use_default_scorer_value, + min_elements_per_label=min_elements_per_label, + ) + futures.add(future) + while futures: + # Wait for the next futures to complete. + completed_futures, futures = wait( + futures, timeout=60, return_when=FIRST_COMPLETED + ) + for future in completed_futures: + accumulated_result += future.result() + if done(accumulated_result): + terminate_exec = True + break + + pbar.n = 100 * done.completion() + pbar.refresh() + if terminate_exec: + break + + # Submit more computations + # The goal is to always have `n_jobs` + # computations running + for _ in range(n_submitted_jobs - len(futures)): + future = executor.submit( + _classwise_shapley_one_step, + u_ref, + truncation=truncation, + n_resample_complement_sets=n_resample_complement_sets, + use_default_scorer_value=use_default_scorer_value, + min_elements_per_label=min_elements_per_label, + ) + futures.add(future) + + result = accumulated_result + if normalize_values: + result = _normalize_classwise_shapley_values(result, u) + + return result + + +def _classwise_shapley_one_step( + u: Utility, + *, + truncation: TruncationPolicy, + n_resample_complement_sets: int = 1, + use_default_scorer_value: bool = True, + min_elements_per_label: int = 1, +) -> ValuationResult: + """Computes classwise Shapley value using truncated Monte Carlo permutation + sampling for the subsets. + + :param u: Utility object containing model, data, and scoring function. The scoring + function should be of type :class:`~pydvl.utils.score.ClassWiseScorer`. + :param truncation: Callable function that decides whether to interrupt processing a + permutation and set subsequent marginals to zero. + :param n_resample_complement_sets: Number of times to resample the complement set + for each permutation. + :param use_default_scorer_value: Use default scorer value even if additional_indices + is not None. + :param min_elements_per_label: The minimum number of elements for each opposite + label. + :return: ValuationResult object containing computed data values. + """ + result = ValuationResult.zeros( + algorithm="classwise_shapley", + indices=u.data.indices, + data_names=u.data.data_names, + ) + x_train, y_train = u.data.get_training_data(u.data.indices) + unique_labels = np.unique(y_train) + scorer = cast(ClasswiseScorer, copy(u.scorer)) + u.scorer = scorer + + for label in unique_labels: + u.scorer.label = label + result += permutation_montecarlo_classwise_shapley( + u, + label, + done=MaxChecks(n_resample_complement_sets - 1), + truncation=truncation, + use_default_scorer_value=use_default_scorer_value, + min_elements_per_label=min_elements_per_label, + ) + + return result + + +def _check_classwise_shapley_utility(u: Utility): + """ + Verifies if the provided utility object supports classwise Shapley values. + + :param u: Utility object containing model, data, and scoring function. The scoring + function should be of type :class:`~pydvl.utils.score.ClassWiseScorer`. + :raises: ValueError: If ``u.data`` is not a classification problem. + :raises: ValueError: If ``u.scorer`` is not an instance of + :class:`~pydvl.utils.score.ClassWiseScorer` + """ + + dim_correct = u.data.y_train.ndim == 1 and u.data.y_test.ndim == 1 + is_integral = all( + map( + lambda v: isinstance(v, numbers.Integral), (*u.data.y_train, *u.data.y_test) + ) + ) + if not dim_correct or not is_integral: + raise ValueError( + "The supplied dataset has to be a 1-dimensional classification dataset." + ) + + if not isinstance(u.scorer, ClasswiseScorer): + raise ValueError( + "Please set a subclass of ClassWiseScorer object as scorer object of the" + " utility. See scoring argument of Utility." + ) + + +def _normalize_classwise_shapley_values( + result: ValuationResult, + u: Utility, +) -> ValuationResult: + """ + Normalize a valuation result specific to classwise Shapley. + + Each value corresponds to a class c and gets normalized by multiplying + `in-class-score / sigma`. In this context `sigma` is the magnitude of all values + belonging to the currently viewed class. See footcite:t:`schoch_csshapley_2022` for + more details. + + :param result: ValuationResult object to be normalized. + :param u: Utility object containing model, data, and scoring function. The scoring + function should be of type :class:`~pydvl.utils.score.ClassWiseScorer`. + """ + y_train = u.data.y_train + unique_labels = np.unique(np.concatenate((y_train, u.data.y_test))) + scorer = cast(ClasswiseScorer, u.scorer) + + for idx_label, label in enumerate(unique_labels): + scorer.label = label + active_elements = y_train == label + indices_label_set = np.where(active_elements)[0] + indices_label_set = u.data.indices[indices_label_set] + + u.model.fit(u.data.x_train, u.data.y_train) + scorer.label = label + in_cls_acc, _ = scorer.estimate_in_cls_and_out_of_cls_score( + u.model, u.data.x_test, u.data.y_test + ) + + sigma = np.sum(result.values[indices_label_set]) + if sigma != 0: + result.scale(in_cls_acc / sigma, indices=indices_label_set) + + return result diff --git a/src/pydvl/value/shapley/montecarlo.py b/src/pydvl/value/shapley/montecarlo.py index ad43edad1..7eef96032 100644 --- a/src/pydvl/value/shapley/montecarlo.py +++ b/src/pydvl/value/shapley/montecarlo.py @@ -35,12 +35,14 @@ import operator from functools import reduce from itertools import cycle, takewhile -from typing import Sequence +from typing import Optional, Sequence, Tuple import numpy as np +from numpy._typing import NDArray from numpy.typing import NDArray from tqdm import tqdm +from pydvl.utils import Utility, random_powerset_group_conditional from pydvl.utils.config import ParallelConfig from pydvl.utils.numeric import random_powerset from pydvl.utils.parallel import MapReduceJob @@ -51,7 +53,11 @@ logger = logging.getLogger(__name__) -__all__ = ["permutation_montecarlo_shapley", "combinatorial_montecarlo_shapley"] +__all__ = [ + "permutation_montecarlo_shapley", + "permutation_montecarlo_classwise_shapley", + "combinatorial_montecarlo_shapley", +] def _permutation_montecarlo_shapley( @@ -87,20 +93,11 @@ def _permutation_montecarlo_shapley( while not done(result): pbar.n = 100 * done.completion() pbar.refresh() - prev_score = 0.0 permutation = np.random.permutation(u.data.indices) - permutation_done = False - truncation.reset() - for i, idx in enumerate(permutation): - if permutation_done: - score = prev_score - else: - score = u(permutation[: i + 1]) - marginal = score - prev_score - result.update(idx, marginal) - prev_score = score - if not permutation_done and truncation(i, score): - permutation_done = True + result += _permutation_montecarlo_shapley_rollout( + u, permutation, truncation=truncation, algorithm_name=algorithm_name + ) + return result @@ -152,6 +149,146 @@ def permutation_montecarlo_shapley( return map_reduce_job() +def permutation_montecarlo_classwise_shapley( + u: Utility, + label: int, + *, + done: StoppingCriterion, + truncation: TruncationPolicy, + use_default_scorer_value: bool = True, + min_elements_per_label: int = 1, +) -> ValuationResult: + """ + Samples a random subset of the complement set and computes the truncated Monte Carlo + estimator. + + :param u: Utility object containing model, data, and scoring function. The scoring + function should be of type :class:`~pydvl.utils.score.ClassWiseScorer`. + :param done: Function checking whether computation needs to stop. + :param label: The label for which to sample the complement (e.g. all other labels) + :param truncation: Callable which decides whether to interrupt processing a + permutation and set all subsequent marginals to zero. + :param use_default_scorer_value: Use default scorer value even if additional_indices + is not None. + :param min_elements_per_label: The minimum number of elements for each opposite + label. + :return: ValuationResult object containing computed data values. + """ + + algorithm_name = "classwise_shapley" + result = ValuationResult.zeros( + algorithm="classwise_shapley", + indices=u.data.indices, + data_names=u.data.data_names, + ) + + _, y_train = u.data.get_training_data(u.data.indices) + class_indices_set, class_complement_indices_set = split_indices_by_label( + u.data.indices, + y_train, + label, + ) + _, complement_y_train = u.data.get_training_data(class_complement_indices_set) + indices_permutation = np.random.permutation(class_indices_set) + + for subset_idx, subset_complement in enumerate( + random_powerset_group_conditional( + class_complement_indices_set, + complement_y_train, + min_elements_per_group=min_elements_per_label, + ) + ): + result += _permutation_montecarlo_shapley_rollout( + u, + indices_permutation, + additional_indices=subset_complement, + truncation=truncation, + algorithm_name=algorithm_name, + use_default_scorer_value=use_default_scorer_value, + ) + if done(result): + break + + return result + + +def _permutation_montecarlo_shapley_rollout( + u: Utility, + permutation: NDArray[np.int_], + *, + truncation: TruncationPolicy, + algorithm_name: str, + additional_indices: Optional[NDArray[np.int_]] = None, + use_default_scorer_value: bool = True, +) -> ValuationResult: + """ + A truncated version of a permutation-based MC estimator for classwise Shapley + values. It generates a permutation p[i] of the class label indices and iterates over + all subsets starting from the empty set to the full set of indices. + + :param u: Utility object containing model, data, and scoring function. The scoring + function should to be of type :class:`~pydvl.utils.score.ClassWiseScorer`. + :param permutation: Permutation of indices to be considered. + :param truncation: Callable which decides whether to interrupt processing a + permutation and set all subsequent marginals to zero. + :param additional_indices: Set of additional indices for data points which should be + always considered. + :param use_default_scorer_value: Use default scorer value even if additional_indices + is not None. + :return: ValuationResult object containing computed data values. + """ + if ( + additional_indices is not None + and len(np.intersect1d(permutation, additional_indices)) > 0 + ): + raise ValueError( + "The class label set and the complement set have to be disjoint." + ) + + result = ValuationResult.zeros( + algorithm=algorithm_name, + indices=u.data.indices, + data_names=u.data.data_names, + ) + + prev_score = ( + u.default_score + if ( + use_default_scorer_value + or additional_indices is None + or additional_indices is not None + and len(additional_indices) == 0 + ) + else u(additional_indices) + ) + + # hack to calculate the correct value in reset. + if additional_indices is not None: + old_indices = u.data.indices + u.data.indices = np.sort(np.concatenate((permutation, additional_indices))) + truncation.reset(u) + u.data.indices = old_indices + else: + truncation.reset(u) + + is_terminated = False + for i, idx in enumerate(permutation): + if is_terminated or (is_terminated := truncation(i, prev_score)): + score = prev_score + else: + score = u( + np.concatenate((permutation[: i + 1], additional_indices)) + if additional_indices is not None and len(additional_indices) > 0 + else permutation[: i + 1] + ) + + marginal = score - prev_score + result.update(idx, marginal) + prev_score = score + + return result + + def _combinatorial_montecarlo_shapley( indices: Sequence[int], u: Utility, @@ -246,3 +383,23 @@ def combinatorial_montecarlo_shapley( config=config, ) return map_reduce_job() + + +def split_indices_by_label( + indices: NDArray[np.int_], labels: NDArray[np.int_], label: int +) -> Tuple[NDArray[np.int_], NDArray[np.int_]]: + """ + Splits the indices into two sets based on the value of ``label``: those samples + with and without that label. + + :param indices: The indices to be used for referring to the data. + :param labels: Corresponding labels for the indices. + :param label: Label to be used for splitting. + :return: Tuple with two sets of indices. + """ + active_elements = labels == label + class_indices_set = np.where(active_elements)[0] + class_complement_indices_set = np.where(~active_elements)[0] + class_indices_set = indices[class_indices_set] + class_complement_indices_set = indices[class_complement_indices_set] + return class_indices_set, class_complement_indices_set diff --git a/src/pydvl/value/shapley/truncated.py b/src/pydvl/value/shapley/truncated.py index 23b871699..2945c95bf 100644 --- a/src/pydvl/value/shapley/truncated.py +++ b/src/pydvl/value/shapley/truncated.py @@ -1,6 +1,7 @@ import abc import logging from concurrent.futures import FIRST_COMPLETED, wait +from typing import Optional import numpy as np from deprecate import deprecated @@ -48,7 +49,7 @@ def _check(self, idx: int, score: float) -> bool: ... @abc.abstractmethod - def reset(self): + def reset(self, u: Optional[Utility] = None): """Reset the policy to a state ready for a new permutation.""" ... @@ -71,7 +72,7 @@ class NoTruncation(TruncationPolicy): def _check(self, idx: int, score: float) -> bool: return False - def reset(self): + def reset(self, u: Optional[Utility] = None): pass @@ -94,7 +95,7 @@ def _check(self, idx: int, score: float) -> bool: self.count += 1 return self.count >= self.max_marginals - def reset(self): + def reset(self, u: Optional[Utility] = None): self.count = 0 @@ -111,14 +112,18 @@ class RelativeTruncation(TruncationPolicy): def __init__(self, u: Utility, rtol: float): super().__init__() self.rtol = rtol - logger.info("Computing total utility for permutation truncation.") - self.total_utility = u(u.data.indices) + self.total_utility = self.reset(u) + self._u = u def _check(self, idx: int, score: float) -> bool: return np.allclose(score, self.total_utility, rtol=self.rtol) - def reset(self): - pass + def reset(self, u: Optional[Utility] = None) -> float: + if u is None: + u = self._u + + self.total_utility = u(u.data.indices) + return self.total_utility class BootstrapTruncation(TruncationPolicy): @@ -134,7 +139,6 @@ class BootstrapTruncation(TruncationPolicy): def __init__(self, u: Utility, n_samples: int, sigmas: float = 1): super().__init__() self.n_samples = n_samples - logger.info("Computing total utility for permutation truncation.") self.total_utility = u(u.data.indices) self.count: int = 0 self.variance: float = 0 @@ -155,7 +159,7 @@ def _check(self, idx: int, score: float) -> bool: self.sigmas * np.sqrt(self.variance) ) - def reset(self): + def reset(self, u: Optional[Utility] = None): self.count = 0 self.variance = self.mean = 0 diff --git a/src/pydvl/value/stopping.py b/src/pydvl/value/stopping.py index 09ba84475..b235d2067 100644 --- a/src/pydvl/value/stopping.py +++ b/src/pydvl/value/stopping.py @@ -279,13 +279,13 @@ class MaxChecks(StoppingCriterion): def __init__(self, n_checks: Optional[int], modify_result: bool = True): super().__init__(modify_result=modify_result) - if n_checks is not None and n_checks < 1: - raise ValueError("n_iterations must be at least 1 or None") + if n_checks is not None and n_checks < 0: + raise ValueError("n_iterations must be at least 0 or None") self.n_checks = n_checks self._count = 0 def _check(self, result: ValuationResult) -> Status: - if self.n_checks: + if self.n_checks is not None: self._count += 1 if self._count > self.n_checks: self._converged = np.ones_like(result.values, dtype=bool) @@ -293,7 +293,7 @@ def _check(self, result: ValuationResult) -> Status: return Status.Pending def completion(self) -> float: - if self.n_checks: + if self.n_checks is not None: return min(1.0, self._count / self.n_checks) return 0.0 @@ -476,7 +476,7 @@ def _check(self, r: ValuationResult) -> Status: quots = np.divide(diffs, curr[ii], out=diffs, where=curr[ii] != 0) # quots holds the quotients when the denominator is non-zero, and # the absolute difference, which is just the memory, otherwise. - if np.mean(quots) < self.rtol: + if len(quots) > 0 and np.mean(quots) < self.rtol: self._converged = self.update_op( self._converged, r.counts > self.n_steps ) # type: ignore diff --git a/tests/conftest.py b/tests/conftest.py index 41244d275..d03779214 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,8 +11,9 @@ from sklearn import datasets from sklearn.utils import Bunch -from pydvl.utils import Dataset, MemcachedClientConfig +from pydvl.utils import ClasswiseScorer, Dataset, MemcachedClientConfig, Utility from pydvl.utils.parallel.backend import available_cpus +from tests.misc import ClosedFormLinearClassifier if TYPE_CHECKING: from _pytest.config import Config @@ -411,3 +412,24 @@ def pytest_terminal_summary( ): tolerate_session = terminalreporter.config._tolerate_session tolerate_session.display(terminalreporter) + + +@pytest.fixture(scope="function") +def dataset_alt_seq_full() -> Dataset: + x_train = np.arange(1, 5).reshape([-1, 1]) + y_train = np.array([0, 0, 1, 1]) + x_test = x_train + y_test = np.array([0, 0, 0, 1]) + return Dataset(x_train, y_train, x_test, y_test) + + +@pytest.fixture(scope="function") +def linear_classifier_cs_scorer( + dataset_alt_seq_full: Dataset, +) -> Utility: + return Utility( + ClosedFormLinearClassifier(), + dataset_alt_seq_full, + ClasswiseScorer("accuracy"), + catch_errors=False, + ) diff --git a/tests/misc.py b/tests/misc.py new file mode 100644 index 000000000..2d12fb673 --- /dev/null +++ b/tests/misc.py @@ -0,0 +1,36 @@ +import numpy as np +from numpy._typing import NDArray + + +class ThresholdClassifier: + def fit(self, x: NDArray, y: NDArray) -> float: + raise NotImplementedError("Mock model") + + def predict(self, x: NDArray) -> NDArray: + y = 0.5 < x + return y[:, 0].astype(int) + + def score(self, x: NDArray, y: NDArray) -> float: + raise NotImplementedError("Mock model") + + +class ClosedFormLinearClassifier: + def __init__(self): + self._beta = None + + def fit(self, x: NDArray, y: NDArray) -> float: + v = x[:, 0] + self._beta = np.dot(v, y) / np.dot(v, v) + return -1 + + def predict(self, x: NDArray) -> NDArray: + if self._beta is None: + raise AttributeError("Model not fitted") + + x = x[:, 0] + probs = self._beta * x + return np.clip(np.round(probs + 1e-10), 0, 1).astype(int) + + def score(self, x: NDArray, y: NDArray) -> float: + pred_y = self.predict(x) + return np.sum(pred_y == y) / 4 diff --git a/tests/utils/conftest.py b/tests/utils/conftest.py index f64330777..5783aba8a 100644 --- a/tests/utils/conftest.py +++ b/tests/utils/conftest.py @@ -1,5 +1,9 @@ +from typing import Dict, Tuple + +import numpy as np import pytest import ray +from numpy.typing import NDArray from ray.cluster_utils import Cluster from pydvl.utils.config import ParallelConfig @@ -23,3 +27,21 @@ def parallel_config(request, num_workers): yield ParallelConfig(backend="ray", address=cluster.address) ray.shutdown() cluster.shutdown() + + +@pytest.fixture(scope="function") +def dataset_alt_seq_simple( + request, +) -> Tuple[NDArray[np.float_], NDArray[np.int_], Dict[str, float]]: + """ + The label set is represented as 0000011100011111, with adjustable left and right + margins. The left margin denotes the percentage of zeros at the beginning, while the + right margin denotes the percentage of ones at the end. Accuracy can be efficiently + calculated using a closed-form solution. + """ + n_element, left_margin, right_margin = request.param + x = np.linspace(0, 1, n_element) + y = ((left_margin <= x) & (x < 0.5)) | ((1 - right_margin) <= x) + y = y.astype(int) + x = np.expand_dims(x, -1) + return x, y, {"left_margin": left_margin, "right_margin": right_margin} diff --git a/tests/utils/test_numeric.py b/tests/utils/test_numeric.py index e6101defb..5b3d2582e 100644 --- a/tests/utils/test_numeric.py +++ b/tests/utils/test_numeric.py @@ -5,6 +5,7 @@ powerset, random_matrix_with_condition_number, random_powerset, + random_powerset_group_conditional, random_subset_of_size, running_moments, ) @@ -138,3 +139,29 @@ def test_running_moments(): true_variances = [np.var(vv) for vv in values] assert np.allclose(means, true_means) assert np.allclose(variances, true_variances) + + +@pytest.mark.parametrize("min_elements", [1, 2]) +@pytest.mark.parametrize("elements_per_group", [10]) +@pytest.mark.parametrize("num_groups", [3]) +@pytest.mark.parametrize("check_num_samples", [10]) +def test_random_powerset_group_conditional( + min_elements: int, + elements_per_group: int, + num_groups: int, + check_num_samples: int, +): + s = np.arange(num_groups * elements_per_group) + groups = np.arange(num_groups).repeat(elements_per_group) + + for idx, subset in enumerate( + random_powerset_group_conditional(s, groups, min_elements) + ): + assert np.all(np.isin(subset, s)) + assert np.all(np.unique(groups[subset]) == np.unique(groups)) + + for group in np.unique(groups): + assert np.sum(group == groups[subset]) >= min_elements + + if idx == check_num_samples: + break diff --git a/tests/utils/test_score.py b/tests/utils/test_score.py index 078775240..15bd91d1e 100644 --- a/tests/utils/test_score.py +++ b/tests/utils/test_score.py @@ -1,7 +1,19 @@ +from typing import Dict, Tuple, cast + import numpy as np +import pandas as pd +import pytest from numpy.typing import NDArray -from pydvl.utils.score import Scorer, compose_score, squashed_r2, squashed_variance +from pydvl.utils import Utility, powerset +from pydvl.utils.score import ( + ClasswiseScorer, + Scorer, + compose_score, + squashed_r2, + squashed_variance, +) +from tests.misc import ThresholdClassifier sigmoid = lambda x: 1 / (1 + np.exp(-x)) @@ -69,3 +81,115 @@ def test_squashed_variance(): X = np.array([[1, 2], [3, 4]]) model = FittedLinearModel(coef) assert sigmoid(1.0) == squashed_variance(model, X, X @ coef) + + +@pytest.mark.parametrize( + "dataset_alt_seq_simple", + [((101, 0.3, 0.4))], + indirect=True, +) +def test_cs_scorer_on_dataset_alt_seq_simple(dataset_alt_seq_simple): + """ + Tests the class wise scorer. + """ + + scorer = ClasswiseScorer("accuracy", initial_label=0) + assert str(scorer) == "classwise accuracy" + assert repr(scorer) == "ClasswiseAccuracy (scorer=make_scorer(accuracy_score))" + + x, y, info = dataset_alt_seq_simple + n_element = len(x) + target_in_cls_acc_0 = (info["left_margin"] * 100 + 1) / n_element + target_out_of_cls_acc_0 = (info["right_margin"] * 100 + 1) / n_element + + model = ThresholdClassifier() + in_cls_acc_0, out_of_cls_acc_0 = scorer.estimate_in_cls_and_out_of_cls_score( + model, x, y + ) + assert np.isclose(in_cls_acc_0, target_in_cls_acc_0) + assert np.isclose(out_of_cls_acc_0, target_out_of_cls_acc_0) + + scorer.label = 1 + in_cls_acc_1, out_of_cls_acc_1 = scorer.estimate_in_cls_and_out_of_cls_score( + model, x, y + ) + assert in_cls_acc_1 == out_of_cls_acc_0 + assert in_cls_acc_0 == out_of_cls_acc_1 + + scorer.label = 0 + value = scorer(model, x, y) + assert np.isclose(value, in_cls_acc_0 * np.exp(out_of_cls_acc_0)) + + scorer.label = 1 + value = scorer(model, x, y) + assert np.isclose(value, in_cls_acc_1 * np.exp(out_of_cls_acc_1)) + + +def test_cs_scorer_on_alt_seq_cf_linear_classifier_cs_score( + linear_classifier_cs_scorer: Utility, +): + subsets_zero = list(powerset(np.array((0, 1)))) + subsets_one = list(powerset(np.array((2, 3)))) + subsets_zero = [tuple(s) for s in subsets_zero] + subsets_one = [tuple(s) for s in subsets_one] + target_betas = pd.DataFrame( + [ + [np.nan, 1 / 3, 1 / 4, 7 / 25], + [0, 3 / 10, 4 / 17, 7 / 26], + [0, 3 / 13, 1 / 5, 7 / 29], + [0, 3 / 14, 4 / 21, 7 / 30], + ], + index=subsets_zero, + columns=subsets_one, + ) + target_accuracies_zero = pd.DataFrame( + [ + [0, 1 / 4, 1 / 4, 1 / 4], + [3 / 4, 1 / 4, 1 / 2, 1 / 4], + [3 / 4, 1 / 2, 1 / 2, 1 / 2], + [3 / 4, 1 / 2, 1 / 2, 1 / 2], + ], + index=subsets_zero, + columns=subsets_one, + ) + target_accuracies_one = pd.DataFrame( + [ + [0, 1 / 4, 1 / 4, 1 / 4], + [0, 1 / 4, 1 / 4, 1 / 4], + [0, 1 / 4, 1 / 4, 1 / 4], + [0, 1 / 4, 1 / 4, 1 / 4], + ], + index=subsets_zero, + columns=subsets_one, + ) + model = linear_classifier_cs_scorer.model + scorer = cast(ClasswiseScorer, linear_classifier_cs_scorer.scorer) + scorer.label = 0 + + for set_zero_idx in range(len(subsets_zero)): + for set_one_idx in range(len(subsets_one)): + indices = list(subsets_zero[set_zero_idx] + subsets_one[set_one_idx]) + ( + x_train, + y_train, + ) = linear_classifier_cs_scorer.data.get_training_data(indices) + linear_classifier_cs_scorer.model.fit(x_train, y_train) + fitted_beta = linear_classifier_cs_scorer.model._beta # noqa + target_beta = target_betas.iloc[set_zero_idx, set_one_idx] + assert ( + np.isnan(fitted_beta) + if np.isnan(target_beta) + else fitted_beta == target_beta + ) + + ( + x_test, + y_test, + ) = linear_classifier_cs_scorer.data.get_test_data() + in_cls_acc_0, in_cls_acc_1 = scorer.estimate_in_cls_and_out_of_cls_score( + model, x_test, y_test + ) + assert ( + in_cls_acc_0 == target_accuracies_zero.iloc[set_zero_idx, set_one_idx] + ) + assert in_cls_acc_1 == target_accuracies_one.iloc[set_zero_idx, set_one_idx] diff --git a/tests/value/shapley/test_classwise.py b/tests/value/shapley/test_classwise.py new file mode 100644 index 000000000..1d263a7d3 --- /dev/null +++ b/tests/value/shapley/test_classwise.py @@ -0,0 +1,786 @@ +""" +Test cases for the class wise shapley value. +""" +import random +from random import seed +from typing import Dict, Tuple + +import numpy as np +import pytest + +from pydvl.utils import Utility +from pydvl.value import MaxChecks, ValuationResult +from pydvl.value.shapley.classwise import compute_classwise_shapley_values +from pydvl.value.shapley.truncated import NoTruncation +from tests.value import check_values + + +@pytest.fixture(scope="function") +def linear_classifier_cs_scorer_args_exact_solution_use_default_score() -> Tuple[ + Dict, ValuationResult, Dict +]: + r""" + Returns the exact solution for the class wise shapley value of the training and + validation set of the `utility_alt_seq_cf_linear_classifier_cs_scorer` fixture. + + =========================== + CS-Shapley Manual Derivation + =========================== + + :Author: Markus Semmler + :Date: August 2023 + + Dataset description + =================== + + We have a training and a test dataset. We want to model a simple XOR dataset. The + development set :math:`D` is given by + + .. math:: + \begin{aligned} + \hat{x}_0 &= 1 \quad &\hat{y}_0 = 0 \\ + \hat{x}_1 &= 2 \quad &\hat{y}_1 = 0 \\ + \hat{x}_2 &= 3 \quad &\hat{y}_2 = 0 \\ + \hat{x}_3 &= 4 \quad &\hat{y}_3 = 1 \\ + \end{aligned} + + and the training set :math:`T` is given by + + .. math:: + \begin{aligned} + x_0 &= 1 \quad &y_0 = 0 \\ + x_1 &= 2 \quad &y_1 = 0 \\ + x_2 &= 3 \quad &y_2 = 1 \\ + x_3 &= 4 \quad &y_3 = 1 \\ + \end{aligned} + + Note that the training set and the development set contain the same + inputs x, but differ in the label :math:`\hat{y}_2 \neq y_2` + + Model + ===== + + We use an adapted version of linear regression + + .. math:: y = \max(0, \min(1, \text{round}(\beta^T x))) + + for classification, with the closed form solution + + .. math:: \beta = \frac{\text{dot}(x, y)}{\text{dot}(x, x)} + + Fitted model + ============ + + The hyperparameters for all combinations are + + .. container:: tabular + + | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & + :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & + :math:`\{x_2, x_3\}` + | :math:`\emptyset` & nan & :math:`\frac{1}{3}` & :math:`\frac{1}{4}` + & :math:`\frac{7}{25}` + | :math:`\{x_0\}` & :math:`0` & :math:`\frac{3}{10}` & + :math:`\frac{4}{17}` & :math:`\frac{7}{26}` + | :math:`\{x_1\}` & :math:`0` & :math:`\frac{3}{13}` & + :math:`\frac{1}{5}` &\ :math:`\frac{7}{29}` + | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{3}{14}` & + :math:`\frac{4}{21}` & :math:`\frac{7}{30}` + + Accuracy tables on development set :math:`D` + ============================================ + + (*) Note that the algorithm described in the paper overwrites these + values with 0. + + .. container:: tabular + + | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & + :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & + :math:`\{x_2, x_3\}` + | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` & + :math:`\frac{1}{4}` & :math:`\frac{1}{4}` + | :math:`\{x_0\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{4}` & + :math:`\frac{1}{2}` & :math:`\frac{1}{4}` + | :math:`\{x_1\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` & + :math:`\frac{1}{2}` &\ :math:`\frac{1}{2}` + | :math:`\{x_0, x_1 \}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` & + :math:`\frac{1}{2}` & :math:`\frac{1}{2}` + + .. container:: tabular + + | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & + :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & + :math:`\{x_2, x_3\}` + | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` & + :math:`\frac{1}{4}` & :math:`\frac{1}{4}` + | :math:`\{x_0\}` & :math:`0` & :math:`\frac{1}{4}` & + :math:`\frac{1}{4}` & :math:`\frac{1}{4}` + | :math:`\{x_1\}` & :math:`0` & :math:`\frac{1}{4}` & + :math:`\frac{1}{4}` &\ :math:`\frac{1}{4}` + | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{1}{4}` & + :math:`\frac{1}{4}` & :math:`\frac{1}{4}` + + CS-Shapley + ========== + + The formulas of the algorithm are given by + + .. math:: + + \begin{aligned} + \delta(\pi, S_{-y_i}, i) &= v_{y_i}(\pi_{:i} \cup \{ i \} | S_{-y_i}) + - v_{y_i}(\pi_{:i} | S_{-y_i}) \\ + \left [ \phi_i | S_{-y_i} \right ] &= \frac{1}{|T_{y_i}|!} + \sum_{\pi \in \Pi(T_{y_i})} \delta(\pi, S_{-y_i}, i) \\ + \phi_i &= \frac{1}{2^{|T_{-y_i}|}-1} \left [\sum_{\emptyset \subset S_{-y_i} + \subseteq T_{-y_i}} \left [ \phi_i | S_{-y_i} \right ] \right ] + \end{aligned} + + Valuation of :math:`x_0` + ======================== + + .. math:: + \begin{aligned} + \delta((x_0, x_1), \{ x_2 \}, 0) &= \frac{1}{4} e^\frac{1}{4} &\quad + \delta((x_1, x_0), \{ x_2 \}, 0) &= 0 \\ + \delta((x_0, x_1), \{ x_3 \}, 0) &= \frac{1}{2} e^\frac{1}{4} &\quad + \delta((x_1, x_0), \{ x_3 \}, 0) &= 0 \\ + \delta((x_0, x_1), \{ x_2, x_3 \}, 0) &= \frac{1}{4} e^\frac{1}{4} &\quad + \delta((x_1, x_0), \{ x_2, x_3 \}, 0) &= 0 + \end{aligned} + + .. math:: + \begin{aligned} + \left [ \phi_0 | \{ x_2 \} \right] &= \frac{1}{8} e^\frac{1}{4} \\ + \left [ \phi_0 | \{ x_3 \} \right] &= \frac{1}{4} e^\frac{1}{4} \\ + \left [ \phi_0 | \{ x_2, x_3 \} \right] &= \frac{1}{8} e^\frac{1}{4} + \end{aligned} + + .. math:: \phi_0 = \frac{1}{6} e^\frac{1}{4} \approx 0.214 + + Valuation of :math:`x_1` + ======================== + + .. math:: + \begin{aligned} + \delta((x_0, x_1), \{ x_2 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad + \delta((x_1, x_0), \{ x_2 \}, 1) &= \frac{1}{2} e^\frac{1}{4} \\ + \delta((x_0, x_1), \{ x_3 \}, 1) &= 0 &\quad + \delta((x_1, x_0), \{ x_3 \}, 1) &= \frac{1}{2} e^\frac{1}{4} \\ + \delta((x_0, x_1), \{ x_2, x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad + \delta((x_1, x_0), \{ x_2, x_3 \}, 1) &= \frac{1}{2} e^\frac{1}{4} + \end{aligned} + + .. math:: + \begin{aligned} + \left [ \phi_1 | \{ x_2 \} \right] &= \frac{3}{8} e^\frac{1}{4} \\ + \left [ \phi_1 | \{ x_3 \} \right] &= \frac{1}{4} e^\frac{1}{4} \\ + \left [ \phi_1 | \{ x_2, x_3 \} \right] &= \frac{3}{8} e^\frac{1}{4} + \end{aligned} + + .. math:: \phi_0 = \frac{1}{3} e^\frac{1}{4} \approx 0.428 + + Valuation of :math:`x_2` + ======================== + + .. math:: + \begin{aligned} + \delta((x_2, x_3), \{ x_0 \}, 2) &= \frac{1}{4} e^\frac{1}{4} &\quad + \delta((x_3, x_2), \{ x_0 \}, 2) + &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{4} e^\frac{1}{2} \\ + \delta((x_2, x_3), \{ x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad + \delta((x_3, x_2), \{ x_1 \}, 2) &= 0 \\ + \delta((x_2, x_3), \{ x_0, x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad + \delta((x_3, x_2), \{ x_0, x_1 \}, 2) &= 0 + \end{aligned} + + .. math:: + \begin{aligned} + \left [ \phi_2 | \{ x_0 \} \right] + &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{8} e^\frac{1}{2} \\ + \left [ \phi_2 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ + \left [ \phi_2 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} + \end{aligned} + + .. math:: \phi_2 = \frac{1}{12} e^\frac{1}{4} + \frac{1}{24} e^\frac{1}{2} \approx 0.1757 + + Valuation of :math:`x_3` + ======================== + + .. math:: + \begin{aligned} + \delta((x_2, x_3), \{ x_0 \}, 3) &= 0 &\quad + \delta((x_3, x_2), \{ x_0 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\ + \delta((x_2, x_3), \{ x_1 \}, 3) &= 0 &\quad + \delta((x_3, x_2), \{ x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\ + \delta((x_2, x_3), \{ x_0, x_1 \}, 3) &= 0 &\quad + \delta((x_3, x_2), \{ x_0, x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2} + \end{aligned} + + .. math:: + \begin{aligned} + \left [ \phi_3 | \{ x_0 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ + \left [ \phi_3 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ + \left [ \phi_3 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} + \end{aligned} + + .. math:: \phi_3 = \frac{1}{8} e^\frac{1}{2} \approx 0.2061 + """ + return ( + { + "normalize_values": False, + }, + ValuationResult( + values=np.array( + [ + 1 / 6 * np.exp(1 / 4), + 1 / 3 * np.exp(1 / 4), + 1 / 12 * np.exp(1 / 4) + 1 / 24 * np.exp(1 / 2), + 1 / 8 * np.exp(1 / 2), + ] + ) + ), + {"atol": 0.05}, + ) + + +@pytest.fixture(scope="function") +def linear_classifier_cs_scorer_args_exact_solution_use_default_score_norm( + linear_classifier_cs_scorer_args_exact_solution_use_default_score: Tuple[ + Dict, ValuationResult, Dict + ] +) -> Tuple[Dict, ValuationResult, Dict]: + """ + Same as :func:`linear_classifier_cs_scorer_args_exact_solution_use_default_score` + but with normalization. The values of label c are normalized by the in-class score + of label c divided by the sum of values of that specific label. + """ + values = linear_classifier_cs_scorer_args_exact_solution_use_default_score[1].values + label_zero_coefficient = 1 / np.exp(1 / 4) + label_one_coefficient = 1 / (1 / 3 * np.exp(1 / 4) + 2 / 3 * np.exp(1 / 2)) + + return ( + { + "normalize_values": True, + }, + ValuationResult( + values=np.array( + [ + values[0] * label_zero_coefficient, + values[1] * label_zero_coefficient, + values[2] * label_one_coefficient, + values[3] * label_one_coefficient, + ] + ) + ), + {"atol": 0.05}, + ) + + +@pytest.fixture(scope="function") +def linear_classifier_cs_scorer_args_exact_solution_use_add_idx() -> Tuple[ + Dict, ValuationResult, Dict +]: + r""" + Returns the exact solution for the class wise shapley value of the training and + validation set of the `utility_alt_seq_cf_linear_classifier_cs_scorer` fixture. + + =========================== + CS-Shapley Manual Derivation + =========================== + + :Author: Markus Semmler + :Date: August 2023 + + Dataset description + =================== + + We have a training and a test dataset. We want to model a simple XOR dataset. The + development set :math:`D` is given by + + .. math:: + \begin{aligned} + \hat{x}_0 &= 1 \quad &\hat{y}_0 = 0 \\ + \hat{x}_1 &= 2 \quad &\hat{y}_1 = 0 \\ + \hat{x}_2 &= 3 \quad &\hat{y}_2 = 0 \\ + \hat{x}_3 &= 4 \quad &\hat{y}_3 = 1 \\ + \end{aligned} + + and the training set :math:`T` is given by + + .. math:: + \begin{aligned} + x_0 &= 1 \quad &y_0 = 0 \\ + x_1 &= 2 \quad &y_1 = 0 \\ + x_2 &= 3 \quad &y_2 = 1 \\ + x_3 &= 4 \quad &y_3 = 1 \\ + \end{aligned} + + Note that the training set and the development set contain the same + inputs x, but differ in the label :math:`\hat{y}_2 \neq y_2` + + Model + ===== + + We use an adapted version of linear regression + + .. math:: y = \max(0, \min(1, \text{round}(\beta^T x))) + + for classification, with the closed form solution + + .. math:: \beta = \frac{\text{dot}(x, y)}{\text{dot}(x, x)} + + Fitted model + ============ + + The hyperparameters for all combinations are + + .. container:: tabular + + | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & + :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & + :math:`\{x_2, x_3\}` + | :math:`\emptyset` & nan & :math:`\frac{1}{3}` & :math:`\frac{1}{4}` + & :math:`\frac{7}{25}` + | :math:`\{x_0\}` & :math:`0` & :math:`\frac{3}{10}` & + :math:`\frac{4}{17}` & :math:`\frac{7}{26}` + | :math:`\{x_1\}` & :math:`0` & :math:`\frac{3}{13}` & + :math:`\frac{1}{5}` &\ :math:`\frac{7}{29}` + | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{3}{14}` & + :math:`\frac{4}{21}` & :math:`\frac{7}{30}` + + Accuracy tables on development set :math:`D` + ============================================ + + .. container:: tabular + + | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & + :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & + :math:`\{x_2, x_3\}` + | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` & + :math:`\frac{1}{4}` & :math:`\frac{1}{4}` + | :math:`\{x_0\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{4}` & + :math:`\frac{1}{2}` & :math:`\frac{1}{4}` + | :math:`\{x_1\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` & + :math:`\frac{1}{2}` &\ :math:`\frac{1}{2}` + | :math:`\{x_0, x_1 \}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` & + :math:`\frac{1}{2}` & :math:`\frac{1}{2}` + + .. container:: tabular + + | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & + :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & + :math:`\{x_2, x_3\}` + | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` & + :math:`\frac{1}{4}` & :math:`\frac{1}{4}` + | :math:`\{x_0\}` & :math:`0` & :math:`\frac{1}{4}` & + :math:`\frac{1}{4}` & :math:`\frac{1}{4}` + | :math:`\{x_1\}` & :math:`0` & :math:`\frac{1}{4}` & + :math:`\frac{1}{4}` &\ :math:`\frac{1}{4}` + | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{1}{4}` & + :math:`\frac{1}{4}` & :math:`\frac{1}{4}` + + CS-Shapley + ========== + + The formulas of the algorithm are given by + + .. math:: + + \begin{aligned} + \delta(\pi, S_{-y_i}, i) &= v_{y_i}(\pi_{:i} \cup \{ i \} | S_{-y_i}) + - v_{y_i}(\pi_{:i} | S_{-y_i}) \\ + \left [ \phi_i | S_{-y_i} \right ] &= \frac{1}{|T_{y_i}|!} + \sum_{\pi \in \Pi(T_{y_i})} \delta(\pi, S_{-y_i}, i) \\ + \phi_i &= \frac{1}{2^{|T_{-y_i}|}-1} \left [\sum_{\emptyset \subset S_{-y_i} + \subseteq T_{-y_i}} \left [ \phi_i | S_{-y_i} \right ] \right ] + \end{aligned} + + Valuation of :math:`x_0` + ======================== + + .. math:: + \begin{aligned} + \delta((x_0, x_1), \{ x_2 \}, 0) &= 0 &\quad + \delta((x_1, x_0), \{ x_2 \}, 0) &= 0 \\ + \delta((x_0, x_1), \{ x_3 \}, 0) &= \frac{1}{4} e^\frac{1}{4} &\quad + \delta((x_1, x_0), \{ x_3 \}, 0) &= 0 \\ + \delta((x_0, x_1), \{ x_2, x_3 \}, 0) &= 0 &\quad + \delta((x_1, x_0), \{ x_2, x_3 \}, 0) &= 0 + \end{aligned} + + .. math:: + \begin{aligned} + \left [ \phi_0 | \{ x_2 \} \right] &= 0 \\ + \left [ \phi_0 | \{ x_3 \} \right] &= \frac{1}{8} e^\frac{1}{4} \\ + \left [ \phi_0 | \{ x_2, x_3 \} \right] &= 0 + \end{aligned} + + .. math:: \phi_0 = \frac{1}{24} e^\frac{1}{4} \approx 0.0535 + + Valuation of :math:`x_1` + ======================== + + .. math:: + \begin{aligned} + \delta((x_0, x_1), \{ x_2 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad + \delta((x_1, x_0), \{ x_2 \}, 1) &= \frac{1}{4} e^\frac{1}{4} \\ + \delta((x_0, x_1), \{ x_3 \}, 1) &= 0 &\quad + \delta((x_1, x_0), \{ x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} \\ + \delta((x_0, x_1), \{ x_2, x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad + \delta((x_1, x_0), \{ x_2, x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} + \end{aligned} + + .. math:: + \begin{aligned} + \left [ \phi_1 | \{ x_2 \} \right] &= \frac{1}{4} e^\frac{1}{4} \\ + \left [ \phi_1 | \{ x_3 \} \right] &= \frac{1}{8} e^\frac{1}{4} \\ + \left [ \phi_1 | \{ x_2, x_3 \} \right] &= \frac{1}{4} e^\frac{1}{4} + \end{aligned} + + .. math:: \phi_0 = \frac{5}{24} e^\frac{1}{4} \approx 0.2675 + + Valuation of :math:`x_2` + ======================== + + .. math:: + \begin{aligned} + \delta((x_2, x_3), \{ x_0 \}, 2) &= \frac{1}{4} e^\frac{1}{4} &\quad + \delta((x_3, x_2), \{ x_0 \}, 2) + &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{4} e^\frac{1}{2} \\ + \delta((x_2, x_3), \{ x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad + \delta((x_3, x_2), \{ x_1 \}, 2) &= 0 \\ + \delta((x_2, x_3), \{ x_0, x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad + \delta((x_3, x_2), \{ x_0, x_1 \}, 2) &= 0 + \end{aligned} + + .. math:: + \begin{aligned} + \left [ \phi_2 | \{ x_0 \} \right] + &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{8} e^\frac{1}{2} \\ + \left [ \phi_2 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ + \left [ \phi_2 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} + \end{aligned} + + .. math:: \phi_2 = \frac{1}{12} e^\frac{1}{4} + \frac{1}{24} e^\frac{1}{2} \approx 0.1757 + + Valuation of :math:`x_3` + ======================== + + .. math:: + \begin{aligned} + \delta((x_2, x_3), \{ x_0 \}, 3) &= 0 &\quad + \delta((x_3, x_2), \{ x_0 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\ + \delta((x_2, x_3), \{ x_1 \}, 3) &= 0 &\quad + \delta((x_3, x_2), \{ x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\ + \delta((x_2, x_3), \{ x_0, x_1 \}, 3) &= 0 &\quad + \delta((x_3, x_2), \{ x_0, x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2} + \end{aligned} + + .. math:: + \begin{aligned} + \left [ \phi_3 | \{ x_0 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ + \left [ \phi_3 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ + \left [ \phi_3 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} + \end{aligned} + + .. math:: \phi_3 = \frac{1}{8} e^\frac{1}{2} \approx 0.2061 + """ + return ( + { + "use_default_scorer_value": False, + "normalize_values": False, + }, + ValuationResult( + values=np.array( + [ + 1 / 24 * np.exp(1 / 4), + 5 / 24 * np.exp(1 / 4), + 1 / 12 * np.exp(1 / 4) + 1 / 24 * np.exp(1 / 2), + 1 / 8 * np.exp(1 / 2), + ] + ) + ), + {"atol": 0.05}, + ) + + +@pytest.fixture(scope="function") +def linear_classifier_cs_scorer_args_exact_solution_use_add_idx_empty_set() -> Tuple[ + Dict, ValuationResult, Dict +]: + r""" + Returns the exact solution for the class wise shapley value of the training and + validation set of the `utility_alt_seq_cf_linear_classifier_cs_scorer` fixture. + + =========================== + CS-Shapley Manual Derivation + =========================== + + :Author: Markus Semmler + :Date: August 2023 + + Dataset description + =================== + + We have a training and a test dataset. We want to model a simple XOR dataset. The + development set :math:`D` is given by + + .. math:: + \begin{aligned} + \hat{x}_0 &= 1 \quad &\hat{y}_0 = 0 \\ + \hat{x}_1 &= 2 \quad &\hat{y}_1 = 0 \\ + \hat{x}_2 &= 3 \quad &\hat{y}_2 = 0 \\ + \hat{x}_3 &= 4 \quad &\hat{y}_3 = 1 \\ + \end{aligned} + + and the training set :math:`T` is given by + + .. math:: + \begin{aligned} + x_0 &= 1 \quad &y_0 = 0 \\ + x_1 &= 2 \quad &y_1 = 0 \\ + x_2 &= 3 \quad &y_2 = 1 \\ + x_3 &= 4 \quad &y_3 = 1 \\ + \end{aligned} + + Note that the training set and the development set contain the same + inputs x, but differ in the label :math:`\hat{y}_2 \neq y_2` + + Model + ===== + + We use an adapted version of linear regression + + .. math:: y = \max(0, \min(1, \text{round}(\beta^T x))) + + for classification, with the closed form solution + + .. math:: \beta = \frac{\text{dot}(x, y)}{\text{dot}(x, x)} + + Fitted model + ============ + + The hyperparameters for all combinations are + + .. container:: tabular + + | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & + :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & + :math:`\{x_2, x_3\}` + | :math:`\emptyset` & nan & :math:`\frac{1}{3}` & :math:`\frac{1}{4}` + & :math:`\frac{7}{25}` + | :math:`\{x_0\}` & :math:`0` & :math:`\frac{3}{10}` & + :math:`\frac{4}{17}` & :math:`\frac{7}{26}` + | :math:`\{x_1\}` & :math:`0` & :math:`\frac{3}{13}` & + :math:`\frac{1}{5}` &\ :math:`\frac{7}{29}` + | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{3}{14}` & + :math:`\frac{4}{21}` & :math:`\frac{7}{30}` + + Accuracy tables on development set :math:`D` + ============================================ + + .. container:: tabular + + | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & + :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & + :math:`\{x_2, x_3\}` + | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` & + :math:`\frac{1}{4}` & :math:`\frac{1}{4}` + | :math:`\{x_0\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{4}` & + :math:`\frac{1}{2}` & :math:`\frac{1}{4}` + | :math:`\{x_1\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` & + :math:`\frac{1}{2}` &\ :math:`\frac{1}{2}` + | :math:`\{x_0, x_1 \}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` & + :math:`\frac{1}{2}` & :math:`\frac{1}{2}` + + .. container:: tabular + + | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & + :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & + :math:`\{x_2, x_3\}` + | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` & + :math:`\frac{1}{4}` & :math:`\frac{1}{4}` + | :math:`\{x_0\}` & :math:`0` & :math:`\frac{1}{4}` & + :math:`\frac{1}{4}` & :math:`\frac{1}{4}` + | :math:`\{x_1\}` & :math:`0` & :math:`\frac{1}{4}` & + :math:`\frac{1}{4}` &\ :math:`\frac{1}{4}` + | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{1}{4}` & + :math:`\frac{1}{4}` & :math:`\frac{1}{4}` + + CS-Shapley + ========== + + The formulas of the algorithm are given by + + .. math:: + + \begin{aligned} + \delta(\pi, S_{-y_i}, i) &= v_{y_i}(\pi_{:i} \cup \{ i \} | S_{-y_i}) + - v_{y_i}(\pi_{:i} | S_{-y_i}) \\ + \left [ \phi_i | S_{-y_i} \right ] &= \frac{1}{|T_{y_i}|!} + \sum_{\pi \in \Pi(T_{y_i})} \delta(\pi, S_{-y_i}, i) \\ + \phi_i &= \frac{1}{2^{|T_{-y_i}|}} \left [\sum_{S_{-y_i} + \subseteq T_{-y_i}} \left [ \phi_i | S_{-y_i} \right ] \right ] + \end{aligned} + + Valuation of :math:`x_0` + ======================== + + .. math:: + \begin{aligned} + \delta((x_0, x_1), \emptyset, 0) &= \frac{3}{4} &\quad + \delta((x_1, x_0), \emptyset, 0) &= 0 \\ + \delta((x_0, x_1), \{ x_2 \}, 0) &= 0 &\quad + \delta((x_1, x_0), \{ x_2 \}, 0) &= 0 \\ + \delta((x_0, x_1), \{ x_3 \}, 0) &= \frac{1}{4} e^\frac{1}{4} &\quad + \delta((x_1, x_0), \{ x_3 \}, 0) &= 0 \\ + \delta((x_0, x_1), \{ x_2, x_3 \}, 0) &= 0 &\quad + \delta((x_1, x_0), \{ x_2, x_3 \}, 0) &= 0 + \end{aligned} + + .. math:: + \begin{aligned} + \left [ \phi_0 | \emptyset \right] &= \frac{3}{8} \\ + \left [ \phi_0 | \{ x_2 \} \right] &= 0 \\ + \left [ \phi_0 | \{ x_3 \} \right] &= \frac{1}{8} e^\frac{1}{4} \\ + \left [ \phi_0 | \{ x_2, x_3 \} \right] &= 0 + \end{aligned} + + .. math:: \phi_0 = \frac{3}{32} + \frac{1}{32} e^\frac{1}{4} \approx 0.1339 + + Valuation of :math:`x_1` + ======================== + + .. math:: + \begin{aligned} + \delta((x_0, x_1), \emptyset, 1) &= 0 &\quad + \delta((x_1, x_0), \emptyset, 1) &= \frac{3}{4} \\ + \delta((x_0, x_1), \{ x_2 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad + \delta((x_1, x_0), \{ x_2 \}, 1) &= \frac{1}{4} e^\frac{1}{4} \\ + \delta((x_0, x_1), \{ x_3 \}, 1) &= 0 &\quad + \delta((x_1, x_0), \{ x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} \\ + \delta((x_0, x_1), \{ x_2, x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad + \delta((x_1, x_0), \{ x_2, x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} + \end{aligned} + + .. math:: + \begin{aligned} + \left [ \phi_1 | \emptyset \right] &= \frac{3}{8} \\ + \left [ \phi_1 | \{ x_2 \} \right] &= \frac{1}{4} e^\frac{1}{4} \\ + \left [ \phi_1 | \{ x_3 \} \right] &= \frac{1}{8} e^\frac{1}{4} \\ + \left [ \phi_1 | \{ x_2, x_3 \} \right] &= \frac{1}{4} e^\frac{1}{4} + \end{aligned} + + .. math:: \phi_0 = \frac{3}{32} + \frac{5}{32} e^\frac{1}{4} \approx 0.2944 + + Valuation of :math:`x_2` + ======================== + + .. math:: + \begin{aligned} + \delta((x_2, x_3), \emptyset, 2) &= \frac{1}{4} e^\frac{1}{4} &\quad + \delta((x_3, x_2), \emptyset, 2) &= 0 \\ + \delta((x_2, x_3), \{ x_0 \}, 2) &= \frac{1}{4} e^\frac{1}{4} &\quad + \delta((x_3, x_2), \{ x_0 \}, 2) + &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{4} e^\frac{1}{2} \\ + \delta((x_2, x_3), \{ x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad + \delta((x_3, x_2), \{ x_1 \}, 2) &= 0 \\ + \delta((x_2, x_3), \{ x_0, x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad + \delta((x_3, x_2), \{ x_0, x_1 \}, 2) &= 0 + \end{aligned} + + .. math:: + \begin{aligned} + \left [ \phi_2 | \emptyset \right] &= \frac{1}{8} e^\frac{1}{4} \\ + \left [ \phi_2 | \{ x_0 \} \right] + &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{8} e^\frac{1}{2} \\ + \left [ \phi_2 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ + \left [ \phi_2 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} + \end{aligned} + + .. math:: + \phi_2 = \frac{5}{32} e^\frac{1}{4} + \frac{1}{32} e^\frac{1}{2} \approx 0.2522 + + Valuation of :math:`x_3` + ======================== + + .. math:: + \begin{aligned} + \delta((x_2, x_3), \emptyset, 3) &= 0 &\quad + \delta((x_3, x_2), \emptyset, 3) &= \frac{1}{4} e^\frac{1}{4} \\ + \delta((x_2, x_3), \{ x_0 \}, 3) &= 0 &\quad + \delta((x_3, x_2), \{ x_0 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\ + \delta((x_2, x_3), \{ x_1 \}, 3) &= 0 &\quad + \delta((x_3, x_2), \{ x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\ + \delta((x_2, x_3), \{ x_0, x_1 \}, 3) &= 0 &\quad + \delta((x_3, x_2), \{ x_0, x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2} + \end{aligned} + + .. math:: + \begin{aligned} + \left [ \phi_3 | \emptyset \right] &= \frac{1}{8} e^\frac{1}{4} \\ + \left [ \phi_3 | \{ x_0 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ + \left [ \phi_3 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ + \left [ \phi_3 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} + \end{aligned} + + .. math:: + \phi_3 = \frac{1}{32} e^\frac{1}{4} + \frac{3}{32} e^\frac{1}{2} \approx 0.1947 + """ + return ( + { + "use_default_scorer_value": False, + "min_elements_per_label": 0, + "normalize_values": False, + }, + ValuationResult( + values=np.array( + [ + 3 / 32 + 1 / 32 * np.exp(1 / 4), + 3 / 32 + 5 / 32 * np.exp(1 / 4), + 5 / 32 * np.exp(1 / 4) + 1 / 32 * np.exp(1 / 2), + 1 / 32 * np.exp(1 / 4) + 3 / 32 * np.exp(1 / 2), + ] + ) + ), + {"atol": 0.05}, + ) + + +@pytest.mark.parametrize("n_samples", [500], ids=lambda x: "n_samples={}".format(x)) +@pytest.mark.parametrize( + "n_resample_complement_sets", + [1], + ids=lambda x: "n_resample_complement_sets={}".format(x), +) +@pytest.mark.parametrize( + "linear_classifier_cs_scorer_args_exact_solution", + [ + "linear_classifier_cs_scorer_args_exact_solution_use_default_score", + "linear_classifier_cs_scorer_args_exact_solution_use_default_score_norm", + "linear_classifier_cs_scorer_args_exact_solution_use_add_idx", + "linear_classifier_cs_scorer_args_exact_solution_use_add_idx_empty_set", + ], +) +def test_classwise_shapley( + linear_classifier_cs_scorer: Utility, + linear_classifier_cs_scorer_args_exact_solution: Tuple[Dict, ValuationResult], + n_samples: int, + n_resample_complement_sets: int, + request, +): + args, exact_solution, check_args = request.getfixturevalue( + linear_classifier_cs_scorer_args_exact_solution + ) + values = compute_classwise_shapley_values( + linear_classifier_cs_scorer, + done=MaxChecks(n_samples - 1), + truncation=NoTruncation(), + n_resample_complement_sets=n_resample_complement_sets, + **args, + progress=True, + ) + check_values(values, exact_solution, **check_args) + assert np.all(values.counts == n_samples * n_resample_complement_sets)